ARMInstrNEON.td revision 194710
1194710Sed//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2194710Sed// 3194710Sed// The LLVM Compiler Infrastructure 4194710Sed// 5194710Sed// This file is distributed under the University of Illinois Open Source 6194710Sed// License. See LICENSE.TXT for details. 7194710Sed// 8194710Sed//===----------------------------------------------------------------------===// 9194710Sed// 10194710Sed// This file describes the ARM NEON instruction set. 11194710Sed// 12194710Sed//===----------------------------------------------------------------------===// 13194710Sed 14194710Sed//===----------------------------------------------------------------------===// 15194710Sed// NEON-specific DAG Nodes. 16194710Sed//===----------------------------------------------------------------------===// 17194710Sed 18194710Seddef SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19194710Sed 20194710Seddef NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21194710Seddef NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22194710Seddef NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23194710Seddef NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24194710Seddef NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25194710Seddef NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26194710Sed 27194710Sed// Types for vector shift by immediates. The "SHX" version is for long and 28194710Sed// narrow operations where the source and destination vectors have different 29194710Sed// types. The "SHINS" version is for shift and insert operations. 30194710Seddef SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31194710Sed SDTCisVT<2, i32>]>; 32194710Seddef SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33194710Sed SDTCisVT<2, i32>]>; 34194710Seddef SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35194710Sed SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36194710Sed 37194710Seddef NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38194710Seddef NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39194710Seddef NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40194710Seddef NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41194710Seddef NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42194710Seddef NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43194710Seddef NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44194710Sed 45194710Seddef NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46194710Seddef NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47194710Seddef NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48194710Sed 49194710Seddef NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50194710Seddef NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51194710Seddef NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52194710Seddef NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53194710Seddef NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54194710Seddef NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55194710Sed 56194710Seddef NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57194710Seddef NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58194710Seddef NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59194710Sed 60194710Seddef NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61194710Seddef NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62194710Sed 63194710Seddef SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64194710Sed SDTCisVT<2, i32>]>; 65194710Seddef NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66194710Seddef NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67194710Sed 68194710Seddef NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", 69194710Sed SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; 70194710Sed 71194710Sed//===----------------------------------------------------------------------===// 72194710Sed// NEON operand definitions 73194710Sed//===----------------------------------------------------------------------===// 74194710Sed 75194710Sed// addrmode_neonldstm := reg 76194710Sed// 77194710Sed/* TODO: Take advantage of vldm. 78194710Seddef addrmode_neonldstm : Operand<i32>, 79194710Sed ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { 80194710Sed let PrintMethod = "printAddrNeonLdStMOperand"; 81194710Sed let MIOperandInfo = (ops GPR, i32imm); 82194710Sed} 83194710Sed*/ 84194710Sed 85194710Sed//===----------------------------------------------------------------------===// 86194710Sed// NEON load / store instructions 87194710Sed//===----------------------------------------------------------------------===// 88194710Sed 89194710Sed/* TODO: Take advantage of vldm. 90194710Sedlet mayLoad = 1 in { 91194710Seddef VLDMD : NI<(outs), 92194710Sed (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 93194710Sed "vldm${addr:submode} ${addr:base}, $dst1", 94194710Sed []>; 95194710Sed 96194710Seddef VLDMS : NI<(outs), 97194710Sed (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 98194710Sed "vldm${addr:submode} ${addr:base}, $dst1", 99194710Sed []>; 100194710Sed} 101194710Sed*/ 102194710Sed 103194710Sed// Use vldmia to load a Q register as a D register pair. 104194710Seddef VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr), 105194710Sed "vldmia $addr, ${dst:dregpair}", 106194710Sed [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>; 107194710Sed 108194710Sed// Use vstmia to store a Q register as a D register pair. 109194710Seddef VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), 110194710Sed "vstmia $addr, ${src:dregpair}", 111194710Sed [(store (v2f64 QPR:$src), GPR:$addr)]>; 112194710Sed 113194710Sed 114194710Sed//===----------------------------------------------------------------------===// 115194710Sed// NEON pattern fragments 116194710Sed//===----------------------------------------------------------------------===// 117194710Sed 118194710Sed// Extract D sub-registers of Q registers. 119194710Sed// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 120194710Seddef SubReg_i8_reg : SDNodeXForm<imm, [{ 121194710Sed return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 122194710Sed}]>; 123194710Seddef SubReg_i16_reg : SDNodeXForm<imm, [{ 124194710Sed return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 125194710Sed}]>; 126194710Seddef SubReg_i32_reg : SDNodeXForm<imm, [{ 127194710Sed return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 128194710Sed}]>; 129194710Seddef SubReg_f64_reg : SDNodeXForm<imm, [{ 130194710Sed return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 131194710Sed}]>; 132194710Sed 133194710Sed// Translate lane numbers from Q registers to D subregs. 134194710Seddef SubReg_i8_lane : SDNodeXForm<imm, [{ 135194710Sed return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 136194710Sed}]>; 137194710Seddef SubReg_i16_lane : SDNodeXForm<imm, [{ 138194710Sed return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 139194710Sed}]>; 140194710Seddef SubReg_i32_lane : SDNodeXForm<imm, [{ 141194710Sed return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 142194710Sed}]>; 143194710Sed 144194710Sed//===----------------------------------------------------------------------===// 145194710Sed// Instruction Classes 146194710Sed//===----------------------------------------------------------------------===// 147194710Sed 148194710Sed// Basic 2-register operations, both double- and quad-register. 149194710Sedclass N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 150194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 151194710Sed ValueType ResTy, ValueType OpTy, SDNode OpNode> 152194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 153194710Sed (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 154194710Sed [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 155194710Sedclass N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 156194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 157194710Sed ValueType ResTy, ValueType OpTy, SDNode OpNode> 158194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 159194710Sed (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 160194710Sed [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 161194710Sed 162194710Sed// Basic 2-register intrinsics, both double- and quad-register. 163194710Sedclass N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 164194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 165194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 166194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 167194710Sed (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 168194710Sed [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 169194710Sedclass N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 170194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 171194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 172194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 173194710Sed (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 174194710Sed [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 175194710Sed 176194710Sed// Narrow 2-register intrinsics. 177194710Sedclass N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 178194710Sed bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 179194710Sed string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp> 180194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 181194710Sed (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 182194710Sed [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 183194710Sed 184194710Sed// Long 2-register intrinsics. (This is currently only used for VMOVL and is 185194710Sed// derived from N2VImm instead of N2V because of the way the size is encoded.) 186194710Sedclass N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 187194710Sed bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, 188194710Sed Intrinsic IntOp> 189194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), 190194710Sed (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 191194710Sed [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 192194710Sed 193194710Sed// Basic 3-register operations, both double- and quad-register. 194194710Sedclass N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 195194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 196194710Sed SDNode OpNode, bit Commutable> 197194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 198194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 199194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 200194710Sed [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 201194710Sed let isCommutable = Commutable; 202194710Sed} 203194710Sedclass N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 204194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 205194710Sed SDNode OpNode, bit Commutable> 206194710Sed : N3V<op24, op23, op21_20, op11_8, 1, op4, 207194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 208194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 209194710Sed [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 210194710Sed let isCommutable = Commutable; 211194710Sed} 212194710Sed 213194710Sed// Basic 3-register intrinsics, both double- and quad-register. 214194710Sedclass N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 215194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 216194710Sed Intrinsic IntOp, bit Commutable> 217194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 218194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 219194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 220194710Sed [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 221194710Sed let isCommutable = Commutable; 222194710Sed} 223194710Sedclass N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 224194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 225194710Sed Intrinsic IntOp, bit Commutable> 226194710Sed : N3V<op24, op23, op21_20, op11_8, 1, op4, 227194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 228194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 229194710Sed [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 230194710Sed let isCommutable = Commutable; 231194710Sed} 232194710Sed 233194710Sed// Multiply-Add/Sub operations, both double- and quad-register. 234194710Sedclass N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 235194710Sed string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> 236194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 237194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 238194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 239194710Sed [(set DPR:$dst, (Ty (OpNode DPR:$src1, 240194710Sed (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 241194710Sedclass N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 242194710Sed string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> 243194710Sed : N3V<op24, op23, op21_20, op11_8, 1, op4, 244194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 245194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 246194710Sed [(set QPR:$dst, (Ty (OpNode QPR:$src1, 247194710Sed (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 248194710Sed 249194710Sed// Neon 3-argument intrinsics, both double- and quad-register. 250194710Sed// The destination register is also used as the first source operand register. 251194710Sedclass N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 252194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 253194710Sed Intrinsic IntOp> 254194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 255194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 256194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 257194710Sed [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 258194710Sed (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 259194710Sedclass N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 260194710Sed string OpcodeStr, ValueType ResTy, ValueType OpTy, 261194710Sed Intrinsic IntOp> 262194710Sed : N3V<op24, op23, op21_20, op11_8, 1, op4, 263194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 264194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 265194710Sed [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 266194710Sed (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 267194710Sed 268194710Sed// Neon Long 3-argument intrinsic. The destination register is 269194710Sed// a quad-register and is also used as the first source operand register. 270194710Sedclass N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 271194710Sed string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> 272194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 273194710Sed (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), 274194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 275194710Sed [(set QPR:$dst, 276194710Sed (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 277194710Sed 278194710Sed// Narrowing 3-register intrinsics. 279194710Sedclass N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 280194710Sed string OpcodeStr, ValueType TyD, ValueType TyQ, 281194710Sed Intrinsic IntOp, bit Commutable> 282194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 283194710Sed (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), 284194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 285194710Sed [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 286194710Sed let isCommutable = Commutable; 287194710Sed} 288194710Sed 289194710Sed// Long 3-register intrinsics. 290194710Sedclass N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 291194710Sed string OpcodeStr, ValueType TyQ, ValueType TyD, 292194710Sed Intrinsic IntOp, bit Commutable> 293194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 294194710Sed (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), 295194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 296194710Sed [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 297194710Sed let isCommutable = Commutable; 298194710Sed} 299194710Sed 300194710Sed// Wide 3-register intrinsics. 301194710Sedclass N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 302194710Sed string OpcodeStr, ValueType TyQ, ValueType TyD, 303194710Sed Intrinsic IntOp, bit Commutable> 304194710Sed : N3V<op24, op23, op21_20, op11_8, 0, op4, 305194710Sed (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), 306194710Sed !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 307194710Sed [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 308194710Sed let isCommutable = Commutable; 309194710Sed} 310194710Sed 311194710Sed// Pairwise long 2-register intrinsics, both double- and quad-register. 312194710Sedclass N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 313194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 314194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 315194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 316194710Sed (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 317194710Sed [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 318194710Sedclass N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 319194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 320194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 321194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 322194710Sed (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 323194710Sed [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 324194710Sed 325194710Sed// Pairwise long 2-register accumulate intrinsics, 326194710Sed// both double- and quad-register. 327194710Sed// The destination register is also used as the first source operand register. 328194710Sedclass N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 329194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 330194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 331194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 332194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 333194710Sed !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 334194710Sed [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 335194710Sedclass N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 336194710Sed bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 337194710Sed ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 338194710Sed : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 339194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 340194710Sed !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 341194710Sed [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 342194710Sed 343194710Sed// Shift by immediate, 344194710Sed// both double- and quad-register. 345194710Sedclass N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 346194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> 347194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 348194710Sed (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), 349194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 350194710Sed [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 351194710Sedclass N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 352194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> 353194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 354194710Sed (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), 355194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 356194710Sed [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 357194710Sed 358194710Sed// Long shift by immediate. 359194710Sedclass N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 360194710Sed bit op6, bit op4, string OpcodeStr, ValueType ResTy, 361194710Sed ValueType OpTy, SDNode OpNode> 362194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 363194710Sed (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), 364194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 365194710Sed [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 366194710Sed (i32 imm:$SIMM))))]>; 367194710Sed 368194710Sed// Narrow shift by immediate. 369194710Sedclass N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 370194710Sed bit op6, bit op4, string OpcodeStr, ValueType ResTy, 371194710Sed ValueType OpTy, SDNode OpNode> 372194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 373194710Sed (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), 374194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 375194710Sed [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 376194710Sed (i32 imm:$SIMM))))]>; 377194710Sed 378194710Sed// Shift right by immediate and accumulate, 379194710Sed// both double- and quad-register. 380194710Sedclass N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 381194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 382194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 383194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 384194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 385194710Sed [(set DPR:$dst, (Ty (add DPR:$src1, 386194710Sed (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 387194710Sedclass N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 388194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 389194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 390194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 391194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 392194710Sed [(set QPR:$dst, (Ty (add QPR:$src1, 393194710Sed (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 394194710Sed 395194710Sed// Shift by immediate and insert, 396194710Sed// both double- and quad-register. 397194710Sedclass N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 398194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 399194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 400194710Sed (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 401194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 402194710Sed [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 403194710Sedclass N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 404194710Sed bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 405194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 406194710Sed (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 407194710Sed !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 408194710Sed [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 409194710Sed 410194710Sed// Convert, with fractional bits immediate, 411194710Sed// both double- and quad-register. 412194710Sedclass N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 413194710Sed bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 414194710Sed Intrinsic IntOp> 415194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 416194710Sed (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), 417194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 418194710Sed [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 419194710Sedclass N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 420194710Sed bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 421194710Sed Intrinsic IntOp> 422194710Sed : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 423194710Sed (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), 424194710Sed !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 425194710Sed [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 426194710Sed 427194710Sed//===----------------------------------------------------------------------===// 428194710Sed// Multiclasses 429194710Sed//===----------------------------------------------------------------------===// 430194710Sed 431194710Sed// Neon 3-register vector operations. 432194710Sed 433194710Sed// First with only element sizes of 8, 16 and 32 bits: 434194710Sedmulticlass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 435194710Sed string OpcodeStr, SDNode OpNode, bit Commutable = 0> { 436194710Sed // 64-bit vector types. 437194710Sed def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 438194710Sed v8i8, v8i8, OpNode, Commutable>; 439194710Sed def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), 440194710Sed v4i16, v4i16, OpNode, Commutable>; 441194710Sed def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), 442194710Sed v2i32, v2i32, OpNode, Commutable>; 443194710Sed 444194710Sed // 128-bit vector types. 445194710Sed def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 446194710Sed v16i8, v16i8, OpNode, Commutable>; 447194710Sed def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), 448194710Sed v8i16, v8i16, OpNode, Commutable>; 449194710Sed def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), 450194710Sed v4i32, v4i32, OpNode, Commutable>; 451194710Sed} 452194710Sed 453194710Sed// ....then also with element size 64 bits: 454194710Sedmulticlass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 455194710Sed string OpcodeStr, SDNode OpNode, bit Commutable = 0> 456194710Sed : N3V_QHS<op24, op23, op11_8, op4, OpcodeStr, OpNode, Commutable> { 457194710Sed def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), 458194710Sed v1i64, v1i64, OpNode, Commutable>; 459194710Sed def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), 460194710Sed v2i64, v2i64, OpNode, Commutable>; 461194710Sed} 462194710Sed 463194710Sed 464194710Sed// Neon Narrowing 2-register vector intrinsics, 465194710Sed// source operand element sizes of 16, 32 and 64 bits: 466194710Sedmulticlass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 467194710Sed bits<5> op11_7, bit op6, bit op4, string OpcodeStr, 468194710Sed Intrinsic IntOp> { 469194710Sed def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 470194710Sed !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; 471194710Sed def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 472194710Sed !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; 473194710Sed def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 474194710Sed !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; 475194710Sed} 476194710Sed 477194710Sed 478194710Sed// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 479194710Sed// source operand element sizes of 16, 32 and 64 bits: 480194710Sedmulticlass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 481194710Sed bit op4, string OpcodeStr, Intrinsic IntOp> { 482194710Sed def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, 483194710Sed !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 484194710Sed def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, 485194710Sed !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 486194710Sed def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, 487194710Sed !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 488194710Sed} 489194710Sed 490194710Sed 491194710Sed// Neon 3-register vector intrinsics. 492194710Sed 493194710Sed// First with only element sizes of 16 and 32 bits: 494194710Sedmulticlass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 495194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 496194710Sed // 64-bit vector types. 497194710Sed def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 498194710Sed v4i16, v4i16, IntOp, Commutable>; 499194710Sed def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 500194710Sed v2i32, v2i32, IntOp, Commutable>; 501194710Sed 502194710Sed // 128-bit vector types. 503194710Sed def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 504194710Sed v8i16, v8i16, IntOp, Commutable>; 505194710Sed def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 506194710Sed v4i32, v4i32, IntOp, Commutable>; 507194710Sed} 508194710Sed 509194710Sed// ....then also with element size of 8 bits: 510194710Sedmulticlass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 511194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 512194710Sed : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 513194710Sed def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 514194710Sed v8i8, v8i8, IntOp, Commutable>; 515194710Sed def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 516194710Sed v16i8, v16i8, IntOp, Commutable>; 517194710Sed} 518194710Sed 519194710Sed// ....then also with element size of 64 bits: 520194710Sedmulticlass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 521194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 522194710Sed : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 523194710Sed def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), 524194710Sed v1i64, v1i64, IntOp, Commutable>; 525194710Sed def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), 526194710Sed v2i64, v2i64, IntOp, Commutable>; 527194710Sed} 528194710Sed 529194710Sed 530194710Sed// Neon Narrowing 3-register vector intrinsics, 531194710Sed// source operand element sizes of 16, 32 and 64 bits: 532194710Sedmulticlass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 533194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 534194710Sed def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), 535194710Sed v8i8, v8i16, IntOp, Commutable>; 536194710Sed def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), 537194710Sed v4i16, v4i32, IntOp, Commutable>; 538194710Sed def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), 539194710Sed v2i32, v2i64, IntOp, Commutable>; 540194710Sed} 541194710Sed 542194710Sed 543194710Sed// Neon Long 3-register vector intrinsics. 544194710Sed 545194710Sed// First with only element sizes of 16 and 32 bits: 546194710Sedmulticlass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 547194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 548194710Sed def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 549194710Sed v4i32, v4i16, IntOp, Commutable>; 550194710Sed def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 551194710Sed v2i64, v2i32, IntOp, Commutable>; 552194710Sed} 553194710Sed 554194710Sed// ....then also with element size of 8 bits: 555194710Sedmulticlass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 556194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 557194710Sed : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 558194710Sed def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 559194710Sed v8i16, v8i8, IntOp, Commutable>; 560194710Sed} 561194710Sed 562194710Sed 563194710Sed// Neon Wide 3-register vector intrinsics, 564194710Sed// source operand element sizes of 8, 16 and 32 bits: 565194710Sedmulticlass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 566194710Sed string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 567194710Sed def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 568194710Sed v8i16, v8i8, IntOp, Commutable>; 569194710Sed def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 570194710Sed v4i32, v4i16, IntOp, Commutable>; 571194710Sed def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 572194710Sed v2i64, v2i32, IntOp, Commutable>; 573194710Sed} 574194710Sed 575194710Sed 576194710Sed// Neon Multiply-Op vector operations, 577194710Sed// element sizes of 8, 16 and 32 bits: 578194710Sedmulticlass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 579194710Sed string OpcodeStr, SDNode OpNode> { 580194710Sed // 64-bit vector types. 581194710Sed def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, 582194710Sed !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; 583194710Sed def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, 584194710Sed !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; 585194710Sed def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, 586194710Sed !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; 587194710Sed 588194710Sed // 128-bit vector types. 589194710Sed def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, 590194710Sed !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; 591194710Sed def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, 592194710Sed !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; 593194710Sed def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, 594194710Sed !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; 595194710Sed} 596194710Sed 597194710Sed 598194710Sed// Neon 3-argument intrinsics, 599194710Sed// element sizes of 8, 16 and 32 bits: 600194710Sedmulticlass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 601194710Sed string OpcodeStr, Intrinsic IntOp> { 602194710Sed // 64-bit vector types. 603194710Sed def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, 604194710Sed !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 605194710Sed def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, 606194710Sed !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 607194710Sed def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, 608194710Sed !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 609194710Sed 610194710Sed // 128-bit vector types. 611194710Sed def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, 612194710Sed !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 613194710Sed def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, 614194710Sed !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 615194710Sed def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, 616194710Sed !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 617194710Sed} 618194710Sed 619194710Sed 620194710Sed// Neon Long 3-argument intrinsics. 621194710Sed 622194710Sed// First with only element sizes of 16 and 32 bits: 623194710Sedmulticlass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 624194710Sed string OpcodeStr, Intrinsic IntOp> { 625194710Sed def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, 626194710Sed !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 627194710Sed def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, 628194710Sed !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 629194710Sed} 630194710Sed 631194710Sed// ....then also with element size of 8 bits: 632194710Sedmulticlass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 633194710Sed string OpcodeStr, Intrinsic IntOp> 634194710Sed : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { 635194710Sed def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, 636194710Sed !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 637194710Sed} 638194710Sed 639194710Sed 640194710Sed// Neon 2-register vector intrinsics, 641194710Sed// element sizes of 8, 16 and 32 bits: 642194710Sedmulticlass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 643194710Sed bits<5> op11_7, bit op4, string OpcodeStr, 644194710Sed Intrinsic IntOp> { 645194710Sed // 64-bit vector types. 646194710Sed def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 647194710Sed !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 648194710Sed def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 649194710Sed !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 650194710Sed def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 651194710Sed !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 652194710Sed 653194710Sed // 128-bit vector types. 654194710Sed def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 655194710Sed !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 656194710Sed def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 657194710Sed !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 658194710Sed def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 659194710Sed !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 660194710Sed} 661194710Sed 662194710Sed 663194710Sed// Neon Pairwise long 2-register intrinsics, 664194710Sed// element sizes of 8, 16 and 32 bits: 665194710Sedmulticlass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 666194710Sed bits<5> op11_7, bit op4, 667194710Sed string OpcodeStr, Intrinsic IntOp> { 668194710Sed // 64-bit vector types. 669194710Sed def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 670194710Sed !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 671194710Sed def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 672194710Sed !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 673194710Sed def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 674194710Sed !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 675194710Sed 676194710Sed // 128-bit vector types. 677194710Sed def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 678194710Sed !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 679194710Sed def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 680194710Sed !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 681194710Sed def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 682194710Sed !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 683194710Sed} 684194710Sed 685194710Sed 686194710Sed// Neon Pairwise long 2-register accumulate intrinsics, 687194710Sed// element sizes of 8, 16 and 32 bits: 688194710Sedmulticlass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 689194710Sed bits<5> op11_7, bit op4, 690194710Sed string OpcodeStr, Intrinsic IntOp> { 691194710Sed // 64-bit vector types. 692194710Sed def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 693194710Sed !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 694194710Sed def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 695194710Sed !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 696194710Sed def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 697194710Sed !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 698194710Sed 699194710Sed // 128-bit vector types. 700194710Sed def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 701194710Sed !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 702194710Sed def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 703194710Sed !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 704194710Sed def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 705194710Sed !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 706194710Sed} 707194710Sed 708194710Sed 709194710Sed// Neon 2-register vector shift by immediate, 710194710Sed// element sizes of 8, 16, 32 and 64 bits: 711194710Sedmulticlass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 712194710Sed string OpcodeStr, SDNode OpNode> { 713194710Sed // 64-bit vector types. 714194710Sed def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, 715194710Sed !strconcat(OpcodeStr, "8"), v8i8, OpNode>; 716194710Sed def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, 717194710Sed !strconcat(OpcodeStr, "16"), v4i16, OpNode>; 718194710Sed def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, 719194710Sed !strconcat(OpcodeStr, "32"), v2i32, OpNode>; 720194710Sed def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, 721194710Sed !strconcat(OpcodeStr, "64"), v1i64, OpNode>; 722194710Sed 723194710Sed // 128-bit vector types. 724194710Sed def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, 725194710Sed !strconcat(OpcodeStr, "8"), v16i8, OpNode>; 726194710Sed def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, 727194710Sed !strconcat(OpcodeStr, "16"), v8i16, OpNode>; 728194710Sed def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, 729194710Sed !strconcat(OpcodeStr, "32"), v4i32, OpNode>; 730194710Sed def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, 731194710Sed !strconcat(OpcodeStr, "64"), v2i64, OpNode>; 732194710Sed} 733194710Sed 734194710Sed 735194710Sed// Neon Shift-Accumulate vector operations, 736194710Sed// element sizes of 8, 16, 32 and 64 bits: 737194710Sedmulticlass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 738194710Sed string OpcodeStr, SDNode ShOp> { 739194710Sed // 64-bit vector types. 740194710Sed def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, 741194710Sed !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 742194710Sed def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, 743194710Sed !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 744194710Sed def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, 745194710Sed !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 746194710Sed def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, 747194710Sed !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 748194710Sed 749194710Sed // 128-bit vector types. 750194710Sed def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, 751194710Sed !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 752194710Sed def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, 753194710Sed !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 754194710Sed def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, 755194710Sed !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 756194710Sed def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, 757194710Sed !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 758194710Sed} 759194710Sed 760194710Sed 761194710Sed// Neon Shift-Insert vector operations, 762194710Sed// element sizes of 8, 16, 32 and 64 bits: 763194710Sedmulticlass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 764194710Sed string OpcodeStr, SDNode ShOp> { 765194710Sed // 64-bit vector types. 766194710Sed def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, 767194710Sed !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 768194710Sed def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, 769194710Sed !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 770194710Sed def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, 771194710Sed !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 772194710Sed def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, 773194710Sed !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 774194710Sed 775194710Sed // 128-bit vector types. 776194710Sed def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, 777194710Sed !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 778194710Sed def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, 779194710Sed !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 780194710Sed def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, 781194710Sed !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 782194710Sed def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, 783194710Sed !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 784194710Sed} 785194710Sed 786194710Sed//===----------------------------------------------------------------------===// 787194710Sed// Instruction Definitions. 788194710Sed//===----------------------------------------------------------------------===// 789194710Sed 790194710Sed// Vector Add Operations. 791194710Sed 792194710Sed// VADD : Vector Add (integer and floating-point) 793194710Seddefm VADD : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>; 794194710Seddef VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>; 795194710Seddef VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>; 796194710Sed// VADDL : Vector Add Long (Q = D + D) 797194710Seddefm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; 798194710Seddefm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; 799194710Sed// VADDW : Vector Add Wide (Q = Q + D) 800194710Seddefm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; 801194710Seddefm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; 802194710Sed// VHADD : Vector Halving Add 803194710Seddefm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; 804194710Seddefm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; 805194710Sed// VRHADD : Vector Rounding Halving Add 806194710Seddefm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; 807194710Seddefm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; 808194710Sed// VQADD : Vector Saturating Add 809194710Seddefm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; 810194710Seddefm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; 811194710Sed// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 812194710Seddefm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; 813194710Sed// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 814194710Seddefm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; 815194710Sed 816194710Sed// Vector Multiply Operations. 817194710Sed 818194710Sed// VMUL : Vector Multiply (integer, polynomial and floating-point) 819194710Seddefm VMUL : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>; 820194710Seddef VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, 821194710Sed int_arm_neon_vmulp, 1>; 822194710Seddef VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, 823194710Sed int_arm_neon_vmulp, 1>; 824194710Seddef VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>; 825194710Seddef VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>; 826194710Sed// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 827194710Seddefm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; 828194710Sed// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 829194710Seddefm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; 830194710Sed// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 831194710Seddefm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; 832194710Seddefm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; 833194710Seddef VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, 834194710Sed int_arm_neon_vmullp, 1>; 835194710Sed// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 836194710Seddefm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; 837194710Sed 838194710Sed// Vector Multiply-Accumulate and Multiply-Subtract Operations. 839194710Sed 840194710Sed// VMLA : Vector Multiply Accumulate (integer and floating-point) 841194710Seddefm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; 842194710Seddef VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; 843194710Seddef VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; 844194710Sed// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 845194710Seddefm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; 846194710Seddefm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; 847194710Sed// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 848194710Seddefm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; 849194710Sed// VMLS : Vector Multiply Subtract (integer and floating-point) 850194710Seddefm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; 851194710Seddef VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; 852194710Seddef VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; 853194710Sed// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 854194710Seddefm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; 855194710Seddefm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; 856194710Sed// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 857194710Seddefm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 858194710Sed 859194710Sed// Vector Subtract Operations. 860194710Sed 861194710Sed// VSUB : Vector Subtract (integer and floating-point) 862194710Seddefm VSUB : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>; 863194710Seddef VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>; 864194710Seddef VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>; 865194710Sed// VSUBL : Vector Subtract Long (Q = D - D) 866194710Seddefm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; 867194710Seddefm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; 868194710Sed// VSUBW : Vector Subtract Wide (Q = Q - D) 869194710Seddefm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; 870194710Seddefm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; 871194710Sed// VHSUB : Vector Halving Subtract 872194710Seddefm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; 873194710Seddefm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; 874194710Sed// VQSUB : Vector Saturing Subtract 875194710Seddefm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; 876194710Seddefm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; 877194710Sed// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 878194710Seddefm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; 879194710Sed// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 880194710Seddefm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; 881194710Sed 882194710Sed// Vector Comparisons. 883194710Sed 884194710Sed// VCEQ : Vector Compare Equal 885194710Seddefm VCEQ : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>; 886194710Seddef VCEQfd : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; 887194710Seddef VCEQfq : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; 888194710Sed// VCGE : Vector Compare Greater Than or Equal 889194710Seddefm VCGEs : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>; 890194710Seddefm VCGEu : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>; 891194710Seddef VCGEfd : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; 892194710Seddef VCGEfq : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; 893194710Sed// VCGT : Vector Compare Greater Than 894194710Seddefm VCGTs : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>; 895194710Seddefm VCGTu : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>; 896194710Seddef VCGTfd : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; 897194710Seddef VCGTfq : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; 898194710Sed// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 899194710Seddef VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, 900194710Sed int_arm_neon_vacged, 0>; 901194710Seddef VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, 902194710Sed int_arm_neon_vacgeq, 0>; 903194710Sed// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 904194710Seddef VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, 905194710Sed int_arm_neon_vacgtd, 0>; 906194710Seddef VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, 907194710Sed int_arm_neon_vacgtq, 0>; 908194710Sed// VTST : Vector Test Bits 909194710Seddefm VTST : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>; 910194710Sed 911194710Sed// Vector Bitwise Operations. 912194710Sed 913194710Sed// VAND : Vector Bitwise AND 914194710Seddef VANDd : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>; 915194710Seddef VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>; 916194710Sed 917194710Sed// VEOR : Vector Bitwise Exclusive OR 918194710Seddef VEORd : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>; 919194710Seddef VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>; 920194710Sed 921194710Sed// VORR : Vector Bitwise OR 922194710Seddef VORRd : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>; 923194710Seddef VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>; 924194710Sed 925194710Sed// VBIC : Vector Bitwise Bit Clear (AND NOT) 926194710Seddef VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 927194710Sed (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "", 928194710Sed [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>; 929194710Seddef VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 930194710Sed (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "", 931194710Sed [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>; 932194710Sed 933194710Sed// VORN : Vector Bitwise OR NOT 934194710Seddef VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 935194710Sed (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "", 936194710Sed [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>; 937194710Seddef VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 938194710Sed (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "", 939194710Sed [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>; 940194710Sed 941194710Sed// VMVN : Vector Bitwise NOT 942194710Seddef VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 943194710Sed (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "", 944194710Sed [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 945194710Seddef VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 946194710Sed (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "", 947194710Sed [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 948194710Seddef : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 949194710Seddef : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 950194710Sed 951194710Sed// VBSL : Vector Bitwise Select 952194710Seddef VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 953194710Sed (ins DPR:$src1, DPR:$src2, DPR:$src3), 954194710Sed "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 955194710Sed [(set DPR:$dst, 956194710Sed (v2i32 (or (and DPR:$src2, DPR:$src1), 957194710Sed (and DPR:$src3, (vnot DPR:$src1)))))]>; 958194710Seddef VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 959194710Sed (ins QPR:$src1, QPR:$src2, QPR:$src3), 960194710Sed "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 961194710Sed [(set QPR:$dst, 962194710Sed (v4i32 (or (and QPR:$src2, QPR:$src1), 963194710Sed (and QPR:$src3, (vnot QPR:$src1)))))]>; 964194710Sed 965194710Sed// VBIF : Vector Bitwise Insert if False 966194710Sed// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", 967194710Sed// VBIT : Vector Bitwise Insert if True 968194710Sed// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", 969194710Sed// These are not yet implemented. The TwoAddress pass will not go looking 970194710Sed// for equivalent operations with different register constraints; it just 971194710Sed// inserts copies. 972194710Sed 973194710Sed// Vector Absolute Differences. 974194710Sed 975194710Sed// VABD : Vector Absolute Difference 976194710Seddefm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; 977194710Seddefm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; 978194710Seddef VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, 979194710Sed int_arm_neon_vabdf, 0>; 980194710Seddef VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, 981194710Sed int_arm_neon_vabdf, 0>; 982194710Sed 983194710Sed// VABDL : Vector Absolute Difference Long (Q = | D - D |) 984194710Seddefm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; 985194710Seddefm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; 986194710Sed 987194710Sed// VABA : Vector Absolute Difference and Accumulate 988194710Seddefm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; 989194710Seddefm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; 990194710Sed 991194710Sed// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 992194710Seddefm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; 993194710Seddefm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; 994194710Sed 995194710Sed// Vector Maximum and Minimum. 996194710Sed 997194710Sed// VMAX : Vector Maximum 998194710Seddefm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; 999194710Seddefm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; 1000194710Seddef VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, 1001194710Sed int_arm_neon_vmaxf, 1>; 1002194710Seddef VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, 1003194710Sed int_arm_neon_vmaxf, 1>; 1004194710Sed 1005194710Sed// VMIN : Vector Minimum 1006194710Seddefm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; 1007194710Seddefm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; 1008194710Seddef VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, 1009194710Sed int_arm_neon_vminf, 1>; 1010194710Seddef VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, 1011194710Sed int_arm_neon_vminf, 1>; 1012194710Sed 1013194710Sed// Vector Pairwise Operations. 1014194710Sed 1015194710Sed// VPADD : Vector Pairwise Add 1016194710Seddef VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, 1017194710Sed int_arm_neon_vpaddi, 0>; 1018194710Seddef VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, 1019194710Sed int_arm_neon_vpaddi, 0>; 1020194710Seddef VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, 1021194710Sed int_arm_neon_vpaddi, 0>; 1022194710Seddef VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, 1023194710Sed int_arm_neon_vpaddf, 0>; 1024194710Sed 1025194710Sed// VPADDL : Vector Pairwise Add Long 1026194710Seddefm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", 1027194710Sed int_arm_neon_vpaddls>; 1028194710Seddefm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", 1029194710Sed int_arm_neon_vpaddlu>; 1030194710Sed 1031194710Sed// VPADAL : Vector Pairwise Add and Accumulate Long 1032194710Seddefm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", 1033194710Sed int_arm_neon_vpadals>; 1034194710Seddefm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", 1035194710Sed int_arm_neon_vpadalu>; 1036194710Sed 1037194710Sed// VPMAX : Vector Pairwise Maximum 1038194710Seddef VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, 1039194710Sed int_arm_neon_vpmaxs, 0>; 1040194710Seddef VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, 1041194710Sed int_arm_neon_vpmaxs, 0>; 1042194710Seddef VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, 1043194710Sed int_arm_neon_vpmaxs, 0>; 1044194710Seddef VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, 1045194710Sed int_arm_neon_vpmaxu, 0>; 1046194710Seddef VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, 1047194710Sed int_arm_neon_vpmaxu, 0>; 1048194710Seddef VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, 1049194710Sed int_arm_neon_vpmaxu, 0>; 1050194710Seddef VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, 1051194710Sed int_arm_neon_vpmaxf, 0>; 1052194710Sed 1053194710Sed// VPMIN : Vector Pairwise Minimum 1054194710Seddef VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, 1055194710Sed int_arm_neon_vpmins, 0>; 1056194710Seddef VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, 1057194710Sed int_arm_neon_vpmins, 0>; 1058194710Seddef VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, 1059194710Sed int_arm_neon_vpmins, 0>; 1060194710Seddef VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, 1061194710Sed int_arm_neon_vpminu, 0>; 1062194710Seddef VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, 1063194710Sed int_arm_neon_vpminu, 0>; 1064194710Seddef VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, 1065194710Sed int_arm_neon_vpminu, 0>; 1066194710Seddef VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, 1067194710Sed int_arm_neon_vpminf, 0>; 1068194710Sed 1069194710Sed// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 1070194710Sed 1071194710Sed// VRECPE : Vector Reciprocal Estimate 1072194710Seddef VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", 1073194710Sed v2i32, v2i32, int_arm_neon_vrecpe>; 1074194710Seddef VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", 1075194710Sed v4i32, v4i32, int_arm_neon_vrecpe>; 1076194710Seddef VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", 1077194710Sed v2f32, v2f32, int_arm_neon_vrecpef>; 1078194710Seddef VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", 1079194710Sed v4f32, v4f32, int_arm_neon_vrecpef>; 1080194710Sed 1081194710Sed// VRECPS : Vector Reciprocal Step 1082194710Seddef VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, 1083194710Sed int_arm_neon_vrecps, 1>; 1084194710Seddef VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, 1085194710Sed int_arm_neon_vrecps, 1>; 1086194710Sed 1087194710Sed// VRSQRTE : Vector Reciprocal Square Root Estimate 1088194710Seddef VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", 1089194710Sed v2i32, v2i32, int_arm_neon_vrsqrte>; 1090194710Seddef VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", 1091194710Sed v4i32, v4i32, int_arm_neon_vrsqrte>; 1092194710Seddef VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", 1093194710Sed v2f32, v2f32, int_arm_neon_vrsqrtef>; 1094194710Seddef VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", 1095194710Sed v4f32, v4f32, int_arm_neon_vrsqrtef>; 1096194710Sed 1097194710Sed// VRSQRTS : Vector Reciprocal Square Root Step 1098194710Seddef VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, 1099194710Sed int_arm_neon_vrsqrts, 1>; 1100194710Seddef VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, 1101194710Sed int_arm_neon_vrsqrts, 1>; 1102194710Sed 1103194710Sed// Vector Shifts. 1104194710Sed 1105194710Sed// VSHL : Vector Shift 1106194710Seddefm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; 1107194710Seddefm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; 1108194710Sed// VSHL : Vector Shift Left (Immediate) 1109194710Seddefm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; 1110194710Sed// VSHR : Vector Shift Right (Immediate) 1111194710Seddefm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; 1112194710Seddefm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; 1113194710Sed 1114194710Sed// VSHLL : Vector Shift Left Long 1115194710Seddef VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", 1116194710Sed v8i16, v8i8, NEONvshlls>; 1117194710Seddef VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", 1118194710Sed v4i32, v4i16, NEONvshlls>; 1119194710Seddef VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", 1120194710Sed v2i64, v2i32, NEONvshlls>; 1121194710Seddef VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", 1122194710Sed v8i16, v8i8, NEONvshllu>; 1123194710Seddef VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", 1124194710Sed v4i32, v4i16, NEONvshllu>; 1125194710Seddef VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", 1126194710Sed v2i64, v2i32, NEONvshllu>; 1127194710Sed 1128194710Sed// VSHLL : Vector Shift Left Long (with maximum shift count) 1129194710Seddef VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", 1130194710Sed v8i16, v8i8, NEONvshlli>; 1131194710Seddef VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", 1132194710Sed v4i32, v4i16, NEONvshlli>; 1133194710Seddef VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", 1134194710Sed v2i64, v2i32, NEONvshlli>; 1135194710Sed 1136194710Sed// VSHRN : Vector Shift Right and Narrow 1137194710Seddef VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", 1138194710Sed v8i8, v8i16, NEONvshrn>; 1139194710Seddef VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", 1140194710Sed v4i16, v4i32, NEONvshrn>; 1141194710Seddef VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", 1142194710Sed v2i32, v2i64, NEONvshrn>; 1143194710Sed 1144194710Sed// VRSHL : Vector Rounding Shift 1145194710Seddefm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; 1146194710Seddefm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; 1147194710Sed// VRSHR : Vector Rounding Shift Right 1148194710Seddefm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; 1149194710Seddefm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; 1150194710Sed 1151194710Sed// VRSHRN : Vector Rounding Shift Right and Narrow 1152194710Seddef VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", 1153194710Sed v8i8, v8i16, NEONvrshrn>; 1154194710Seddef VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", 1155194710Sed v4i16, v4i32, NEONvrshrn>; 1156194710Seddef VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", 1157194710Sed v2i32, v2i64, NEONvrshrn>; 1158194710Sed 1159194710Sed// VQSHL : Vector Saturating Shift 1160194710Seddefm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; 1161194710Seddefm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; 1162194710Sed// VQSHL : Vector Saturating Shift Left (Immediate) 1163194710Seddefm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; 1164194710Seddefm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; 1165194710Sed// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 1166194710Seddefm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; 1167194710Sed 1168194710Sed// VQSHRN : Vector Saturating Shift Right and Narrow 1169194710Seddef VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", 1170194710Sed v8i8, v8i16, NEONvqshrns>; 1171194710Seddef VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", 1172194710Sed v4i16, v4i32, NEONvqshrns>; 1173194710Seddef VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", 1174194710Sed v2i32, v2i64, NEONvqshrns>; 1175194710Seddef VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", 1176194710Sed v8i8, v8i16, NEONvqshrnu>; 1177194710Seddef VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", 1178194710Sed v4i16, v4i32, NEONvqshrnu>; 1179194710Seddef VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", 1180194710Sed v2i32, v2i64, NEONvqshrnu>; 1181194710Sed 1182194710Sed// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 1183194710Seddef VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", 1184194710Sed v8i8, v8i16, NEONvqshrnsu>; 1185194710Seddef VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", 1186194710Sed v4i16, v4i32, NEONvqshrnsu>; 1187194710Seddef VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", 1188194710Sed v2i32, v2i64, NEONvqshrnsu>; 1189194710Sed 1190194710Sed// VQRSHL : Vector Saturating Rounding Shift 1191194710Seddefm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", 1192194710Sed int_arm_neon_vqrshifts, 0>; 1193194710Seddefm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", 1194194710Sed int_arm_neon_vqrshiftu, 0>; 1195194710Sed 1196194710Sed// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 1197194710Seddef VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", 1198194710Sed v8i8, v8i16, NEONvqrshrns>; 1199194710Seddef VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", 1200194710Sed v4i16, v4i32, NEONvqrshrns>; 1201194710Seddef VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", 1202194710Sed v2i32, v2i64, NEONvqrshrns>; 1203194710Seddef VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", 1204194710Sed v8i8, v8i16, NEONvqrshrnu>; 1205194710Seddef VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", 1206194710Sed v4i16, v4i32, NEONvqrshrnu>; 1207194710Seddef VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", 1208194710Sed v2i32, v2i64, NEONvqrshrnu>; 1209194710Sed 1210194710Sed// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 1211194710Seddef VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", 1212194710Sed v8i8, v8i16, NEONvqrshrnsu>; 1213194710Seddef VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", 1214194710Sed v4i16, v4i32, NEONvqrshrnsu>; 1215194710Seddef VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", 1216194710Sed v2i32, v2i64, NEONvqrshrnsu>; 1217194710Sed 1218194710Sed// VSRA : Vector Shift Right and Accumulate 1219194710Seddefm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; 1220194710Seddefm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; 1221194710Sed// VRSRA : Vector Rounding Shift Right and Accumulate 1222194710Seddefm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; 1223194710Seddefm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; 1224194710Sed 1225194710Sed// VSLI : Vector Shift Left and Insert 1226194710Seddefm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; 1227194710Sed// VSRI : Vector Shift Right and Insert 1228194710Seddefm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; 1229194710Sed 1230194710Sed// Vector Absolute and Saturating Absolute. 1231194710Sed 1232194710Sed// VABS : Vector Absolute Value 1233194710Seddefm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s", 1234194710Sed int_arm_neon_vabs>; 1235194710Seddef VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", 1236194710Sed v2f32, v2f32, int_arm_neon_vabsf>; 1237194710Seddef VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", 1238194710Sed v4f32, v4f32, int_arm_neon_vabsf>; 1239194710Sed 1240194710Sed// VQABS : Vector Saturating Absolute Value 1241194710Seddefm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s", 1242194710Sed int_arm_neon_vqabs>; 1243194710Sed 1244194710Sed// Vector Negate. 1245194710Sed 1246194710Seddef vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 1247194710Seddef vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 1248194710Sed 1249194710Sedclass VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> 1250194710Sed : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 1251194710Sed !strconcat(OpcodeStr, "\t$dst, $src"), "", 1252194710Sed [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 1253194710Sedclass VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> 1254194710Sed : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 1255194710Sed !strconcat(OpcodeStr, "\t$dst, $src"), "", 1256194710Sed [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 1257194710Sed 1258194710Sed// VNEG : Vector Negate 1259194710Seddef VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; 1260194710Seddef VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; 1261194710Seddef VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; 1262194710Seddef VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; 1263194710Seddef VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; 1264194710Seddef VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; 1265194710Sed 1266194710Sed// VNEG : Vector Negate (floating-point) 1267194710Seddef VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 1268194710Sed (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "", 1269194710Sed [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 1270194710Seddef VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 1271194710Sed (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "", 1272194710Sed [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 1273194710Sed 1274194710Seddef : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 1275194710Seddef : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 1276194710Seddef : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 1277194710Seddef : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 1278194710Seddef : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 1279194710Seddef : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 1280194710Sed 1281194710Sed// VQNEG : Vector Saturating Negate 1282194710Seddefm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s", 1283194710Sed int_arm_neon_vqneg>; 1284194710Sed 1285194710Sed// Vector Bit Counting Operations. 1286194710Sed 1287194710Sed// VCLS : Vector Count Leading Sign Bits 1288194710Seddefm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s", 1289194710Sed int_arm_neon_vcls>; 1290194710Sed// VCLZ : Vector Count Leading Zeros 1291194710Seddefm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i", 1292194710Sed int_arm_neon_vclz>; 1293194710Sed// VCNT : Vector Count One Bits 1294194710Seddef VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", 1295194710Sed v8i8, v8i8, int_arm_neon_vcnt>; 1296194710Seddef VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", 1297194710Sed v16i8, v16i8, int_arm_neon_vcnt>; 1298194710Sed 1299194710Sed// Vector Move Operations. 1300194710Sed 1301194710Sed// VMOV : Vector Move (Register) 1302194710Sed 1303194710Seddef VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 1304194710Sed "vmov\t$dst, $src", "", []>; 1305194710Seddef VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 1306194710Sed "vmov\t$dst, $src", "", []>; 1307194710Sed 1308194710Sed// VMOV : Vector Move (Immediate) 1309194710Sed 1310194710Sed// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 1311194710Seddef VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 1312194710Sed return ARM::getVMOVImm(N, 1, *CurDAG); 1313194710Sed}]>; 1314194710Seddef vmovImm8 : PatLeaf<(build_vector), [{ 1315194710Sed return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 1316194710Sed}], VMOV_get_imm8>; 1317194710Sed 1318194710Sed// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 1319194710Seddef VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 1320194710Sed return ARM::getVMOVImm(N, 2, *CurDAG); 1321194710Sed}]>; 1322194710Seddef vmovImm16 : PatLeaf<(build_vector), [{ 1323194710Sed return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 1324194710Sed}], VMOV_get_imm16>; 1325194710Sed 1326194710Sed// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 1327194710Seddef VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 1328194710Sed return ARM::getVMOVImm(N, 4, *CurDAG); 1329194710Sed}]>; 1330194710Seddef vmovImm32 : PatLeaf<(build_vector), [{ 1331194710Sed return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 1332194710Sed}], VMOV_get_imm32>; 1333194710Sed 1334194710Sed// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 1335194710Seddef VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 1336194710Sed return ARM::getVMOVImm(N, 8, *CurDAG); 1337194710Sed}]>; 1338194710Seddef vmovImm64 : PatLeaf<(build_vector), [{ 1339194710Sed return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 1340194710Sed}], VMOV_get_imm64>; 1341194710Sed 1342194710Sed// Note: Some of the cmode bits in the following VMOV instructions need to 1343194710Sed// be encoded based on the immed values. 1344194710Sed 1345194710Seddef VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 1346194710Sed (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", 1347194710Sed [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 1348194710Seddef VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 1349194710Sed (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", 1350194710Sed [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 1351194710Sed 1352194710Seddef VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), 1353194710Sed (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", 1354194710Sed [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 1355194710Seddef VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), 1356194710Sed (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", 1357194710Sed [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 1358194710Sed 1359194710Seddef VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), 1360194710Sed (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", 1361194710Sed [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 1362194710Seddef VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), 1363194710Sed (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", 1364194710Sed [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 1365194710Sed 1366194710Seddef VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 1367194710Sed (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", 1368194710Sed [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 1369194710Seddef VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 1370194710Sed (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", 1371194710Sed [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 1372194710Sed 1373194710Sed// VMOV : Vector Get Lane (move scalar to ARM core register) 1374194710Sed 1375194710Seddef VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, 1376194710Sed (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1377194710Sed "vmov", ".s8\t$dst, $src[$lane]", 1378194710Sed [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 1379194710Sed imm:$lane))]>; 1380194710Seddef VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, 1381194710Sed (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1382194710Sed "vmov", ".s16\t$dst, $src[$lane]", 1383194710Sed [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 1384194710Sed imm:$lane))]>; 1385194710Seddef VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, 1386194710Sed (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1387194710Sed "vmov", ".u8\t$dst, $src[$lane]", 1388194710Sed [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 1389194710Sed imm:$lane))]>; 1390194710Seddef VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, 1391194710Sed (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1392194710Sed "vmov", ".u16\t$dst, $src[$lane]", 1393194710Sed [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 1394194710Sed imm:$lane))]>; 1395194710Seddef VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, 1396194710Sed (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1397194710Sed "vmov", ".32\t$dst, $src[$lane]", 1398194710Sed [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 1399194710Sed imm:$lane))]>; 1400194710Sed// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 1401194710Seddef : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 1402194710Sed (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 1403194710Sed (SubReg_i8_reg imm:$lane))), 1404194710Sed (SubReg_i8_lane imm:$lane))>; 1405194710Seddef : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 1406194710Sed (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 1407194710Sed (SubReg_i16_reg imm:$lane))), 1408194710Sed (SubReg_i16_lane imm:$lane))>; 1409194710Seddef : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 1410194710Sed (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 1411194710Sed (SubReg_i8_reg imm:$lane))), 1412194710Sed (SubReg_i8_lane imm:$lane))>; 1413194710Seddef : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 1414194710Sed (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 1415194710Sed (SubReg_i16_reg imm:$lane))), 1416194710Sed (SubReg_i16_lane imm:$lane))>; 1417194710Seddef : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 1418194710Sed (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 1419194710Sed (SubReg_i32_reg imm:$lane))), 1420194710Sed (SubReg_i32_lane imm:$lane))>; 1421194710Sed//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 1422194710Sed// (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; 1423194710Seddef : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 1424194710Sed (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; 1425194710Sed 1426194710Sed 1427194710Sed// VMOV : Vector Set Lane (move ARM core register to scalar) 1428194710Sed 1429194710Sedlet Constraints = "$src1 = $dst" in { 1430194710Seddef VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), 1431194710Sed (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1432194710Sed "vmov", ".8\t$dst[$lane], $src2", 1433194710Sed [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 1434194710Sed GPR:$src2, imm:$lane))]>; 1435194710Seddef VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), 1436194710Sed (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1437194710Sed "vmov", ".16\t$dst[$lane], $src2", 1438194710Sed [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 1439194710Sed GPR:$src2, imm:$lane))]>; 1440194710Seddef VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), 1441194710Sed (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1442194710Sed "vmov", ".32\t$dst[$lane], $src2", 1443194710Sed [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 1444194710Sed GPR:$src2, imm:$lane))]>; 1445194710Sed} 1446194710Seddef : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 1447194710Sed (v16i8 (INSERT_SUBREG QPR:$src1, 1448194710Sed (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 1449194710Sed (SubReg_i8_reg imm:$lane))), 1450194710Sed GPR:$src2, (SubReg_i8_lane imm:$lane)), 1451194710Sed (SubReg_i8_reg imm:$lane)))>; 1452194710Seddef : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 1453194710Sed (v8i16 (INSERT_SUBREG QPR:$src1, 1454194710Sed (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 1455194710Sed (SubReg_i16_reg imm:$lane))), 1456194710Sed GPR:$src2, (SubReg_i16_lane imm:$lane)), 1457194710Sed (SubReg_i16_reg imm:$lane)))>; 1458194710Seddef : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 1459194710Sed (v4i32 (INSERT_SUBREG QPR:$src1, 1460194710Sed (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 1461194710Sed (SubReg_i32_reg imm:$lane))), 1462194710Sed GPR:$src2, (SubReg_i32_lane imm:$lane)), 1463194710Sed (SubReg_i32_reg imm:$lane)))>; 1464194710Sed 1465194710Sed//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 1466194710Sed// (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; 1467194710Seddef : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 1468194710Sed (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; 1469194710Sed 1470194710Sed// VDUP : Vector Duplicate (from ARM core register to all elements) 1471194710Sed 1472194710Seddef splat_lo : PatFrag<(ops node:$lhs, node:$rhs), 1473194710Sed (vector_shuffle node:$lhs, node:$rhs), [{ 1474194710Sed ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1475194710Sed return SVOp->isSplat() && SVOp->getSplatIndex() == 0; 1476194710Sed}]>; 1477194710Sed 1478194710Sedclass VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 1479194710Sed : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 1480194710Sed "vdup", !strconcat(asmSize, "\t$dst, $src"), 1481194710Sed [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; 1482194710Sedclass VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 1483194710Sed : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 1484194710Sed "vdup", !strconcat(asmSize, "\t$dst, $src"), 1485194710Sed [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; 1486194710Sed 1487194710Seddef VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; 1488194710Seddef VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; 1489194710Seddef VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; 1490194710Seddef VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; 1491194710Seddef VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; 1492194710Seddef VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; 1493194710Sed 1494194710Seddef VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 1495194710Sed "vdup", ".32\t$dst, $src", 1496194710Sed [(set DPR:$dst, (v2f32 (splat_lo 1497194710Sed (scalar_to_vector 1498194710Sed (f32 (bitconvert GPR:$src))), 1499194710Sed undef)))]>; 1500194710Seddef VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 1501194710Sed "vdup", ".32\t$dst, $src", 1502194710Sed [(set QPR:$dst, (v4f32 (splat_lo 1503194710Sed (scalar_to_vector 1504194710Sed (f32 (bitconvert GPR:$src))), 1505194710Sed undef)))]>; 1506194710Sed 1507194710Sed// VDUP : Vector Duplicate Lane (from scalar to all elements) 1508194710Sed 1509194710Seddef SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{ 1510194710Sed ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1511194710Sed return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32); 1512194710Sed}]>; 1513194710Sed 1514194710Seddef splat_lane : PatFrag<(ops node:$lhs, node:$rhs), 1515194710Sed (vector_shuffle node:$lhs, node:$rhs), [{ 1516194710Sed ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1517194710Sed return SVOp->isSplat(); 1518194710Sed}], SHUFFLE_get_splat_lane>; 1519194710Sed 1520194710Sedclass VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> 1521194710Sed : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 1522194710Sed (outs DPR:$dst), (ins DPR:$src, i32imm:$lane), 1523194710Sed !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 1524194710Sed [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>; 1525194710Sed 1526194710Sed// vector_shuffle requires that the source and destination types match, so 1527194710Sed// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node. 1528194710Sedclass VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, 1529194710Sed ValueType ResTy, ValueType OpTy> 1530194710Sed : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 1531194710Sed (outs QPR:$dst), (ins DPR:$src, i32imm:$lane), 1532194710Sed !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 1533194710Sed [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>; 1534194710Sed 1535194710Seddef VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; 1536194710Seddef VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; 1537194710Seddef VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; 1538194710Seddef VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; 1539194710Seddef VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; 1540194710Seddef VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; 1541194710Seddef VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; 1542194710Seddef VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; 1543194710Sed 1544194710Sed// VMOVN : Vector Narrowing Move 1545194710Seddefm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i", 1546194710Sed int_arm_neon_vmovn>; 1547194710Sed// VQMOVN : Vector Saturating Narrowing Move 1548194710Seddefm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s", 1549194710Sed int_arm_neon_vqmovns>; 1550194710Seddefm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u", 1551194710Sed int_arm_neon_vqmovnu>; 1552194710Seddefm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s", 1553194710Sed int_arm_neon_vqmovnsu>; 1554194710Sed// VMOVL : Vector Lengthening Move 1555194710Seddefm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; 1556194710Seddefm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; 1557194710Sed 1558194710Sed// Vector Conversions. 1559194710Sed 1560194710Sed// VCVT : Vector Convert Between Floating-Point and Integers 1561194710Seddef VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 1562194710Sed v2i32, v2f32, fp_to_sint>; 1563194710Seddef VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 1564194710Sed v2i32, v2f32, fp_to_uint>; 1565194710Seddef VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 1566194710Sed v2f32, v2i32, sint_to_fp>; 1567194710Seddef VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 1568194710Sed v2f32, v2i32, uint_to_fp>; 1569194710Sed 1570194710Seddef VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 1571194710Sed v4i32, v4f32, fp_to_sint>; 1572194710Seddef VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 1573194710Sed v4i32, v4f32, fp_to_uint>; 1574194710Seddef VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 1575194710Sed v4f32, v4i32, sint_to_fp>; 1576194710Seddef VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 1577194710Sed v4f32, v4i32, uint_to_fp>; 1578194710Sed 1579194710Sed// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 1580194710Sed// Note: Some of the opcode bits in the following VCVT instructions need to 1581194710Sed// be encoded based on the immed values. 1582194710Seddef VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 1583194710Sed v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 1584194710Seddef VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 1585194710Sed v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 1586194710Seddef VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 1587194710Sed v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 1588194710Seddef VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 1589194710Sed v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 1590194710Sed 1591194710Seddef VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 1592194710Sed v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 1593194710Seddef VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 1594194710Sed v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 1595194710Seddef VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 1596194710Sed v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 1597194710Seddef VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 1598194710Sed v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 1599194710Sed 1600194710Sed//===----------------------------------------------------------------------===// 1601194710Sed// Non-Instruction Patterns 1602194710Sed//===----------------------------------------------------------------------===// 1603194710Sed 1604194710Sed// bit_convert 1605194710Seddef : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 1606194710Seddef : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 1607194710Seddef : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 1608194710Seddef : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 1609194710Seddef : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 1610194710Seddef : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 1611194710Seddef : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 1612194710Seddef : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 1613194710Seddef : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 1614194710Seddef : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 1615194710Seddef : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 1616194710Seddef : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 1617194710Seddef : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 1618194710Seddef : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 1619194710Seddef : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 1620194710Seddef : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 1621194710Seddef : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 1622194710Seddef : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 1623194710Seddef : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 1624194710Seddef : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 1625194710Seddef : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 1626194710Seddef : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 1627194710Seddef : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 1628194710Seddef : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 1629194710Seddef : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 1630194710Seddef : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 1631194710Seddef : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 1632194710Seddef : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 1633194710Seddef : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 1634194710Seddef : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 1635194710Sed 1636194710Seddef : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 1637194710Seddef : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 1638194710Seddef : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 1639194710Seddef : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 1640194710Seddef : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 1641194710Seddef : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 1642194710Seddef : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 1643194710Seddef : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 1644194710Seddef : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 1645194710Seddef : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 1646194710Seddef : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 1647194710Seddef : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 1648194710Seddef : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 1649194710Seddef : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 1650194710Seddef : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 1651194710Seddef : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 1652194710Seddef : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 1653194710Seddef : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 1654194710Seddef : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 1655194710Seddef : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 1656194710Seddef : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 1657194710Seddef : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 1658194710Seddef : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 1659194710Seddef : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 1660194710Seddef : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 1661194710Seddef : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 1662194710Seddef : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 1663194710Seddef : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 1664194710Seddef : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 1665194710Seddef : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 1666