ARMInstrNEON.td revision 194710
1//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// NEON-specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 19 20def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 21def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 22def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 23def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 24def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 25def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 26 27// Types for vector shift by immediates. The "SHX" version is for long and 28// narrow operations where the source and destination vectors have different 29// types. The "SHINS" version is for shift and insert operations. 30def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 31 SDTCisVT<2, i32>]>; 32def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 33 SDTCisVT<2, i32>]>; 34def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 35 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 36 37def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 38def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 39def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 40def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 41def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 42def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 43def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 44 45def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 46def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 47def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 48 49def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 50def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 51def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 52def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 53def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 54def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 55 56def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 57def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 58def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 59 60def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 61def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 62 63def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 64 SDTCisVT<2, i32>]>; 65def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 66def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 67 68def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", 69 SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; 70 71//===----------------------------------------------------------------------===// 72// NEON operand definitions 73//===----------------------------------------------------------------------===// 74 75// addrmode_neonldstm := reg 76// 77/* TODO: Take advantage of vldm. 78def addrmode_neonldstm : Operand<i32>, 79 ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> { 80 let PrintMethod = "printAddrNeonLdStMOperand"; 81 let MIOperandInfo = (ops GPR, i32imm); 82} 83*/ 84 85//===----------------------------------------------------------------------===// 86// NEON load / store instructions 87//===----------------------------------------------------------------------===// 88 89/* TODO: Take advantage of vldm. 90let mayLoad = 1 in { 91def VLDMD : NI<(outs), 92 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 93 "vldm${addr:submode} ${addr:base}, $dst1", 94 []>; 95 96def VLDMS : NI<(outs), 97 (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), 98 "vldm${addr:submode} ${addr:base}, $dst1", 99 []>; 100} 101*/ 102 103// Use vldmia to load a Q register as a D register pair. 104def VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr), 105 "vldmia $addr, ${dst:dregpair}", 106 [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>; 107 108// Use vstmia to store a Q register as a D register pair. 109def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), 110 "vstmia $addr, ${src:dregpair}", 111 [(store (v2f64 QPR:$src), GPR:$addr)]>; 112 113 114//===----------------------------------------------------------------------===// 115// NEON pattern fragments 116//===----------------------------------------------------------------------===// 117 118// Extract D sub-registers of Q registers. 119// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) 120def SubReg_i8_reg : SDNodeXForm<imm, [{ 121 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); 122}]>; 123def SubReg_i16_reg : SDNodeXForm<imm, [{ 124 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); 125}]>; 126def SubReg_i32_reg : SDNodeXForm<imm, [{ 127 return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); 128}]>; 129def SubReg_f64_reg : SDNodeXForm<imm, [{ 130 return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32); 131}]>; 132 133// Translate lane numbers from Q registers to D subregs. 134def SubReg_i8_lane : SDNodeXForm<imm, [{ 135 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 136}]>; 137def SubReg_i16_lane : SDNodeXForm<imm, [{ 138 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 139}]>; 140def SubReg_i32_lane : SDNodeXForm<imm, [{ 141 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 142}]>; 143 144//===----------------------------------------------------------------------===// 145// Instruction Classes 146//===----------------------------------------------------------------------===// 147 148// Basic 2-register operations, both double- and quad-register. 149class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 150 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 151 ValueType ResTy, ValueType OpTy, SDNode OpNode> 152 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 153 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 154 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>; 155class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 156 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 157 ValueType ResTy, ValueType OpTy, SDNode OpNode> 158 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 159 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 160 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>; 161 162// Basic 2-register intrinsics, both double- and quad-register. 163class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 164 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 165 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 166 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 167 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 168 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 169class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 170 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 171 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 172 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 173 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 174 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 175 176// Narrow 2-register intrinsics. 177class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 178 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 179 string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp> 180 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst), 181 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 182 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; 183 184// Long 2-register intrinsics. (This is currently only used for VMOVL and is 185// derived from N2VImm instead of N2V because of the way the size is encoded.) 186class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 187 bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, 188 Intrinsic IntOp> 189 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), 190 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 191 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; 192 193// Basic 3-register operations, both double- and quad-register. 194class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 195 string OpcodeStr, ValueType ResTy, ValueType OpTy, 196 SDNode OpNode, bit Commutable> 197 : N3V<op24, op23, op21_20, op11_8, 0, op4, 198 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 199 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 200 [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 201 let isCommutable = Commutable; 202} 203class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 204 string OpcodeStr, ValueType ResTy, ValueType OpTy, 205 SDNode OpNode, bit Commutable> 206 : N3V<op24, op23, op21_20, op11_8, 1, op4, 207 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 208 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 209 [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 210 let isCommutable = Commutable; 211} 212 213// Basic 3-register intrinsics, both double- and quad-register. 214class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 215 string OpcodeStr, ValueType ResTy, ValueType OpTy, 216 Intrinsic IntOp, bit Commutable> 217 : N3V<op24, op23, op21_20, op11_8, 0, op4, 218 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 219 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 220 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { 221 let isCommutable = Commutable; 222} 223class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 224 string OpcodeStr, ValueType ResTy, ValueType OpTy, 225 Intrinsic IntOp, bit Commutable> 226 : N3V<op24, op23, op21_20, op11_8, 1, op4, 227 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 228 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 229 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { 230 let isCommutable = Commutable; 231} 232 233// Multiply-Add/Sub operations, both double- and quad-register. 234class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 235 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> 236 : N3V<op24, op23, op21_20, op11_8, 0, op4, 237 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 238 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 239 [(set DPR:$dst, (Ty (OpNode DPR:$src1, 240 (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; 241class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 242 string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> 243 : N3V<op24, op23, op21_20, op11_8, 1, op4, 244 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 245 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 246 [(set QPR:$dst, (Ty (OpNode QPR:$src1, 247 (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; 248 249// Neon 3-argument intrinsics, both double- and quad-register. 250// The destination register is also used as the first source operand register. 251class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 252 string OpcodeStr, ValueType ResTy, ValueType OpTy, 253 Intrinsic IntOp> 254 : N3V<op24, op23, op21_20, op11_8, 0, op4, 255 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), 256 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 257 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), 258 (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; 259class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 260 string OpcodeStr, ValueType ResTy, ValueType OpTy, 261 Intrinsic IntOp> 262 : N3V<op24, op23, op21_20, op11_8, 1, op4, 263 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), 264 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 265 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), 266 (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; 267 268// Neon Long 3-argument intrinsic. The destination register is 269// a quad-register and is also used as the first source operand register. 270class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 271 string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> 272 : N3V<op24, op23, op21_20, op11_8, 0, op4, 273 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), 274 !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", 275 [(set QPR:$dst, 276 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; 277 278// Narrowing 3-register intrinsics. 279class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 280 string OpcodeStr, ValueType TyD, ValueType TyQ, 281 Intrinsic IntOp, bit Commutable> 282 : N3V<op24, op23, op21_20, op11_8, 0, op4, 283 (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), 284 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 285 [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { 286 let isCommutable = Commutable; 287} 288 289// Long 3-register intrinsics. 290class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 291 string OpcodeStr, ValueType TyQ, ValueType TyD, 292 Intrinsic IntOp, bit Commutable> 293 : N3V<op24, op23, op21_20, op11_8, 0, op4, 294 (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), 295 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 296 [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { 297 let isCommutable = Commutable; 298} 299 300// Wide 3-register intrinsics. 301class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 302 string OpcodeStr, ValueType TyQ, ValueType TyD, 303 Intrinsic IntOp, bit Commutable> 304 : N3V<op24, op23, op21_20, op11_8, 0, op4, 305 (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), 306 !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", 307 [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { 308 let isCommutable = Commutable; 309} 310 311// Pairwise long 2-register intrinsics, both double- and quad-register. 312class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 313 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 314 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 315 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), 316 (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 317 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; 318class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 319 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 320 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 321 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), 322 (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "", 323 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; 324 325// Pairwise long 2-register accumulate intrinsics, 326// both double- and quad-register. 327// The destination register is also used as the first source operand register. 328class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 329 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 330 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 331 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 332 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), 333 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 334 [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; 335class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 336 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 337 ValueType ResTy, ValueType OpTy, Intrinsic IntOp> 338 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 339 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), 340 !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", 341 [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; 342 343// Shift by immediate, 344// both double- and quad-register. 345class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 346 bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> 347 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 348 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), 349 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 350 [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; 351class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 352 bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> 353 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 354 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), 355 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 356 [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; 357 358// Long shift by immediate. 359class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 360 bit op6, bit op4, string OpcodeStr, ValueType ResTy, 361 ValueType OpTy, SDNode OpNode> 362 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 363 (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), 364 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 365 [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), 366 (i32 imm:$SIMM))))]>; 367 368// Narrow shift by immediate. 369class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 370 bit op6, bit op4, string OpcodeStr, ValueType ResTy, 371 ValueType OpTy, SDNode OpNode> 372 : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, 373 (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), 374 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 375 [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), 376 (i32 imm:$SIMM))))]>; 377 378// Shift right by immediate and accumulate, 379// both double- and quad-register. 380class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 381 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 382 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 383 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 384 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 385 [(set DPR:$dst, (Ty (add DPR:$src1, 386 (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; 387class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 388 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 389 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 390 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 391 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 392 [(set QPR:$dst, (Ty (add QPR:$src1, 393 (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; 394 395// Shift by immediate and insert, 396// both double- and quad-register. 397class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 398 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 399 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 400 (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), 401 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 402 [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; 403class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 404 bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> 405 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 406 (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), 407 !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", 408 [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; 409 410// Convert, with fractional bits immediate, 411// both double- and quad-register. 412class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 413 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 414 Intrinsic IntOp> 415 : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, 416 (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), 417 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 418 [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; 419class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 420 bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, 421 Intrinsic IntOp> 422 : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, 423 (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), 424 !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", 425 [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; 426 427//===----------------------------------------------------------------------===// 428// Multiclasses 429//===----------------------------------------------------------------------===// 430 431// Neon 3-register vector operations. 432 433// First with only element sizes of 8, 16 and 32 bits: 434multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 435 string OpcodeStr, SDNode OpNode, bit Commutable = 0> { 436 // 64-bit vector types. 437 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 438 v8i8, v8i8, OpNode, Commutable>; 439 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), 440 v4i16, v4i16, OpNode, Commutable>; 441 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), 442 v2i32, v2i32, OpNode, Commutable>; 443 444 // 128-bit vector types. 445 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 446 v16i8, v16i8, OpNode, Commutable>; 447 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"), 448 v8i16, v8i16, OpNode, Commutable>; 449 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"), 450 v4i32, v4i32, OpNode, Commutable>; 451} 452 453// ....then also with element size 64 bits: 454multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 455 string OpcodeStr, SDNode OpNode, bit Commutable = 0> 456 : N3V_QHS<op24, op23, op11_8, op4, OpcodeStr, OpNode, Commutable> { 457 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), 458 v1i64, v1i64, OpNode, Commutable>; 459 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"), 460 v2i64, v2i64, OpNode, Commutable>; 461} 462 463 464// Neon Narrowing 2-register vector intrinsics, 465// source operand element sizes of 16, 32 and 64 bits: 466multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 467 bits<5> op11_7, bit op6, bit op4, string OpcodeStr, 468 Intrinsic IntOp> { 469 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 470 !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; 471 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 472 !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; 473 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 474 !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; 475} 476 477 478// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 479// source operand element sizes of 16, 32 and 64 bits: 480multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 481 bit op4, string OpcodeStr, Intrinsic IntOp> { 482 def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, 483 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 484 def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, 485 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 486 def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, 487 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 488} 489 490 491// Neon 3-register vector intrinsics. 492 493// First with only element sizes of 16 and 32 bits: 494multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 495 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 496 // 64-bit vector types. 497 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 498 v4i16, v4i16, IntOp, Commutable>; 499 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 500 v2i32, v2i32, IntOp, Commutable>; 501 502 // 128-bit vector types. 503 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 504 v8i16, v8i16, IntOp, Commutable>; 505 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 506 v4i32, v4i32, IntOp, Commutable>; 507} 508 509// ....then also with element size of 8 bits: 510multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 511 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 512 : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 513 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 514 v8i8, v8i8, IntOp, Commutable>; 515 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 516 v16i8, v16i8, IntOp, Commutable>; 517} 518 519// ....then also with element size of 64 bits: 520multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 521 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 522 : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 523 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), 524 v1i64, v1i64, IntOp, Commutable>; 525 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), 526 v2i64, v2i64, IntOp, Commutable>; 527} 528 529 530// Neon Narrowing 3-register vector intrinsics, 531// source operand element sizes of 16, 32 and 64 bits: 532multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 533 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 534 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"), 535 v8i8, v8i16, IntOp, Commutable>; 536 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"), 537 v4i16, v4i32, IntOp, Commutable>; 538 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"), 539 v2i32, v2i64, IntOp, Commutable>; 540} 541 542 543// Neon Long 3-register vector intrinsics. 544 545// First with only element sizes of 16 and 32 bits: 546multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 547 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 548 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 549 v4i32, v4i16, IntOp, Commutable>; 550 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 551 v2i64, v2i32, IntOp, Commutable>; 552} 553 554// ....then also with element size of 8 bits: 555multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 556 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> 557 : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { 558 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 559 v8i16, v8i8, IntOp, Commutable>; 560} 561 562 563// Neon Wide 3-register vector intrinsics, 564// source operand element sizes of 8, 16 and 32 bits: 565multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 566 string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { 567 def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), 568 v8i16, v8i8, IntOp, Commutable>; 569 def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), 570 v4i32, v4i16, IntOp, Commutable>; 571 def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), 572 v2i64, v2i32, IntOp, Commutable>; 573} 574 575 576// Neon Multiply-Op vector operations, 577// element sizes of 8, 16 and 32 bits: 578multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 579 string OpcodeStr, SDNode OpNode> { 580 // 64-bit vector types. 581 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, 582 !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; 583 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, 584 !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; 585 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, 586 !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; 587 588 // 128-bit vector types. 589 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, 590 !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; 591 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, 592 !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; 593 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, 594 !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; 595} 596 597 598// Neon 3-argument intrinsics, 599// element sizes of 8, 16 and 32 bits: 600multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 601 string OpcodeStr, Intrinsic IntOp> { 602 // 64-bit vector types. 603 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, 604 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 605 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, 606 !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 607 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, 608 !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 609 610 // 128-bit vector types. 611 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, 612 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 613 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, 614 !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 615 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, 616 !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 617} 618 619 620// Neon Long 3-argument intrinsics. 621 622// First with only element sizes of 16 and 32 bits: 623multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 624 string OpcodeStr, Intrinsic IntOp> { 625 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, 626 !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; 627 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, 628 !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; 629} 630 631// ....then also with element size of 8 bits: 632multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 633 string OpcodeStr, Intrinsic IntOp> 634 : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { 635 def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, 636 !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; 637} 638 639 640// Neon 2-register vector intrinsics, 641// element sizes of 8, 16 and 32 bits: 642multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 643 bits<5> op11_7, bit op4, string OpcodeStr, 644 Intrinsic IntOp> { 645 // 64-bit vector types. 646 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 647 !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; 648 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 649 !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; 650 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 651 !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; 652 653 // 128-bit vector types. 654 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 655 !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; 656 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 657 !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; 658 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 659 !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; 660} 661 662 663// Neon Pairwise long 2-register intrinsics, 664// element sizes of 8, 16 and 32 bits: 665multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 666 bits<5> op11_7, bit op4, 667 string OpcodeStr, Intrinsic IntOp> { 668 // 64-bit vector types. 669 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 670 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 671 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 672 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 673 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 674 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 675 676 // 128-bit vector types. 677 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 678 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 679 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 680 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 681 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 682 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 683} 684 685 686// Neon Pairwise long 2-register accumulate intrinsics, 687// element sizes of 8, 16 and 32 bits: 688multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 689 bits<5> op11_7, bit op4, 690 string OpcodeStr, Intrinsic IntOp> { 691 // 64-bit vector types. 692 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 693 !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>; 694 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 695 !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>; 696 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 697 !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>; 698 699 // 128-bit vector types. 700 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 701 !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>; 702 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 703 !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>; 704 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 705 !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>; 706} 707 708 709// Neon 2-register vector shift by immediate, 710// element sizes of 8, 16, 32 and 64 bits: 711multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 712 string OpcodeStr, SDNode OpNode> { 713 // 64-bit vector types. 714 def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, 715 !strconcat(OpcodeStr, "8"), v8i8, OpNode>; 716 def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, 717 !strconcat(OpcodeStr, "16"), v4i16, OpNode>; 718 def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, 719 !strconcat(OpcodeStr, "32"), v2i32, OpNode>; 720 def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, 721 !strconcat(OpcodeStr, "64"), v1i64, OpNode>; 722 723 // 128-bit vector types. 724 def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, 725 !strconcat(OpcodeStr, "8"), v16i8, OpNode>; 726 def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, 727 !strconcat(OpcodeStr, "16"), v8i16, OpNode>; 728 def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, 729 !strconcat(OpcodeStr, "32"), v4i32, OpNode>; 730 def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, 731 !strconcat(OpcodeStr, "64"), v2i64, OpNode>; 732} 733 734 735// Neon Shift-Accumulate vector operations, 736// element sizes of 8, 16, 32 and 64 bits: 737multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 738 string OpcodeStr, SDNode ShOp> { 739 // 64-bit vector types. 740 def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, 741 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 742 def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, 743 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 744 def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, 745 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 746 def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, 747 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 748 749 // 128-bit vector types. 750 def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, 751 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 752 def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, 753 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 754 def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, 755 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 756 def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, 757 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 758} 759 760 761// Neon Shift-Insert vector operations, 762// element sizes of 8, 16, 32 and 64 bits: 763multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 764 string OpcodeStr, SDNode ShOp> { 765 // 64-bit vector types. 766 def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, 767 !strconcat(OpcodeStr, "8"), v8i8, ShOp>; 768 def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, 769 !strconcat(OpcodeStr, "16"), v4i16, ShOp>; 770 def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, 771 !strconcat(OpcodeStr, "32"), v2i32, ShOp>; 772 def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, 773 !strconcat(OpcodeStr, "64"), v1i64, ShOp>; 774 775 // 128-bit vector types. 776 def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, 777 !strconcat(OpcodeStr, "8"), v16i8, ShOp>; 778 def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, 779 !strconcat(OpcodeStr, "16"), v8i16, ShOp>; 780 def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, 781 !strconcat(OpcodeStr, "32"), v4i32, ShOp>; 782 def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, 783 !strconcat(OpcodeStr, "64"), v2i64, ShOp>; 784} 785 786//===----------------------------------------------------------------------===// 787// Instruction Definitions. 788//===----------------------------------------------------------------------===// 789 790// Vector Add Operations. 791 792// VADD : Vector Add (integer and floating-point) 793defm VADD : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>; 794def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>; 795def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>; 796// VADDL : Vector Add Long (Q = D + D) 797defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; 798defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; 799// VADDW : Vector Add Wide (Q = Q + D) 800defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; 801defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; 802// VHADD : Vector Halving Add 803defm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; 804defm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; 805// VRHADD : Vector Rounding Halving Add 806defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; 807defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; 808// VQADD : Vector Saturating Add 809defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; 810defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; 811// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 812defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; 813// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 814defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; 815 816// Vector Multiply Operations. 817 818// VMUL : Vector Multiply (integer, polynomial and floating-point) 819defm VMUL : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>; 820def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, 821 int_arm_neon_vmulp, 1>; 822def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, 823 int_arm_neon_vmulp, 1>; 824def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>; 825def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>; 826// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 827defm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; 828// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 829defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; 830// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 831defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; 832defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; 833def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, 834 int_arm_neon_vmullp, 1>; 835// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 836defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; 837 838// Vector Multiply-Accumulate and Multiply-Subtract Operations. 839 840// VMLA : Vector Multiply Accumulate (integer and floating-point) 841defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; 842def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; 843def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; 844// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 845defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; 846defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; 847// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 848defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; 849// VMLS : Vector Multiply Subtract (integer and floating-point) 850defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; 851def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; 852def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; 853// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 854defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; 855defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; 856// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 857defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; 858 859// Vector Subtract Operations. 860 861// VSUB : Vector Subtract (integer and floating-point) 862defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>; 863def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>; 864def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>; 865// VSUBL : Vector Subtract Long (Q = D - D) 866defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; 867defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; 868// VSUBW : Vector Subtract Wide (Q = Q - D) 869defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; 870defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; 871// VHSUB : Vector Halving Subtract 872defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; 873defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; 874// VQSUB : Vector Saturing Subtract 875defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; 876defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; 877// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 878defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; 879// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 880defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; 881 882// Vector Comparisons. 883 884// VCEQ : Vector Compare Equal 885defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>; 886def VCEQfd : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; 887def VCEQfq : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; 888// VCGE : Vector Compare Greater Than or Equal 889defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>; 890defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>; 891def VCGEfd : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; 892def VCGEfq : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; 893// VCGT : Vector Compare Greater Than 894defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>; 895defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>; 896def VCGTfd : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; 897def VCGTfq : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; 898// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 899def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, 900 int_arm_neon_vacged, 0>; 901def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, 902 int_arm_neon_vacgeq, 0>; 903// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 904def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, 905 int_arm_neon_vacgtd, 0>; 906def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, 907 int_arm_neon_vacgtq, 0>; 908// VTST : Vector Test Bits 909defm VTST : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>; 910 911// Vector Bitwise Operations. 912 913// VAND : Vector Bitwise AND 914def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>; 915def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>; 916 917// VEOR : Vector Bitwise Exclusive OR 918def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>; 919def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>; 920 921// VORR : Vector Bitwise OR 922def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>; 923def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>; 924 925// VBIC : Vector Bitwise Bit Clear (AND NOT) 926def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 927 (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "", 928 [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>; 929def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 930 (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "", 931 [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>; 932 933// VORN : Vector Bitwise OR NOT 934def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), 935 (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "", 936 [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>; 937def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), 938 (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "", 939 [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>; 940 941// VMVN : Vector Bitwise NOT 942def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 943 (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "", 944 [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; 945def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 946 (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "", 947 [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; 948def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; 949def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; 950 951// VBSL : Vector Bitwise Select 952def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), 953 (ins DPR:$src1, DPR:$src2, DPR:$src3), 954 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 955 [(set DPR:$dst, 956 (v2i32 (or (and DPR:$src2, DPR:$src1), 957 (and DPR:$src3, (vnot DPR:$src1)))))]>; 958def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), 959 (ins QPR:$src1, QPR:$src2, QPR:$src3), 960 "vbsl\t$dst, $src2, $src3", "$src1 = $dst", 961 [(set QPR:$dst, 962 (v4i32 (or (and QPR:$src2, QPR:$src1), 963 (and QPR:$src3, (vnot QPR:$src1)))))]>; 964 965// VBIF : Vector Bitwise Insert if False 966// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", 967// VBIT : Vector Bitwise Insert if True 968// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", 969// These are not yet implemented. The TwoAddress pass will not go looking 970// for equivalent operations with different register constraints; it just 971// inserts copies. 972 973// Vector Absolute Differences. 974 975// VABD : Vector Absolute Difference 976defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; 977defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; 978def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, 979 int_arm_neon_vabdf, 0>; 980def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, 981 int_arm_neon_vabdf, 0>; 982 983// VABDL : Vector Absolute Difference Long (Q = | D - D |) 984defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; 985defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; 986 987// VABA : Vector Absolute Difference and Accumulate 988defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; 989defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; 990 991// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 992defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; 993defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; 994 995// Vector Maximum and Minimum. 996 997// VMAX : Vector Maximum 998defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; 999defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; 1000def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, 1001 int_arm_neon_vmaxf, 1>; 1002def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, 1003 int_arm_neon_vmaxf, 1>; 1004 1005// VMIN : Vector Minimum 1006defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; 1007defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; 1008def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, 1009 int_arm_neon_vminf, 1>; 1010def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, 1011 int_arm_neon_vminf, 1>; 1012 1013// Vector Pairwise Operations. 1014 1015// VPADD : Vector Pairwise Add 1016def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, 1017 int_arm_neon_vpaddi, 0>; 1018def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, 1019 int_arm_neon_vpaddi, 0>; 1020def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, 1021 int_arm_neon_vpaddi, 0>; 1022def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, 1023 int_arm_neon_vpaddf, 0>; 1024 1025// VPADDL : Vector Pairwise Add Long 1026defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", 1027 int_arm_neon_vpaddls>; 1028defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", 1029 int_arm_neon_vpaddlu>; 1030 1031// VPADAL : Vector Pairwise Add and Accumulate Long 1032defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", 1033 int_arm_neon_vpadals>; 1034defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", 1035 int_arm_neon_vpadalu>; 1036 1037// VPMAX : Vector Pairwise Maximum 1038def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, 1039 int_arm_neon_vpmaxs, 0>; 1040def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, 1041 int_arm_neon_vpmaxs, 0>; 1042def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, 1043 int_arm_neon_vpmaxs, 0>; 1044def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, 1045 int_arm_neon_vpmaxu, 0>; 1046def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, 1047 int_arm_neon_vpmaxu, 0>; 1048def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, 1049 int_arm_neon_vpmaxu, 0>; 1050def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, 1051 int_arm_neon_vpmaxf, 0>; 1052 1053// VPMIN : Vector Pairwise Minimum 1054def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, 1055 int_arm_neon_vpmins, 0>; 1056def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, 1057 int_arm_neon_vpmins, 0>; 1058def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, 1059 int_arm_neon_vpmins, 0>; 1060def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, 1061 int_arm_neon_vpminu, 0>; 1062def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, 1063 int_arm_neon_vpminu, 0>; 1064def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, 1065 int_arm_neon_vpminu, 0>; 1066def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, 1067 int_arm_neon_vpminf, 0>; 1068 1069// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 1070 1071// VRECPE : Vector Reciprocal Estimate 1072def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", 1073 v2i32, v2i32, int_arm_neon_vrecpe>; 1074def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", 1075 v4i32, v4i32, int_arm_neon_vrecpe>; 1076def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", 1077 v2f32, v2f32, int_arm_neon_vrecpef>; 1078def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", 1079 v4f32, v4f32, int_arm_neon_vrecpef>; 1080 1081// VRECPS : Vector Reciprocal Step 1082def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, 1083 int_arm_neon_vrecps, 1>; 1084def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, 1085 int_arm_neon_vrecps, 1>; 1086 1087// VRSQRTE : Vector Reciprocal Square Root Estimate 1088def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", 1089 v2i32, v2i32, int_arm_neon_vrsqrte>; 1090def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", 1091 v4i32, v4i32, int_arm_neon_vrsqrte>; 1092def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", 1093 v2f32, v2f32, int_arm_neon_vrsqrtef>; 1094def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", 1095 v4f32, v4f32, int_arm_neon_vrsqrtef>; 1096 1097// VRSQRTS : Vector Reciprocal Square Root Step 1098def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, 1099 int_arm_neon_vrsqrts, 1>; 1100def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, 1101 int_arm_neon_vrsqrts, 1>; 1102 1103// Vector Shifts. 1104 1105// VSHL : Vector Shift 1106defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; 1107defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; 1108// VSHL : Vector Shift Left (Immediate) 1109defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; 1110// VSHR : Vector Shift Right (Immediate) 1111defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; 1112defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; 1113 1114// VSHLL : Vector Shift Left Long 1115def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", 1116 v8i16, v8i8, NEONvshlls>; 1117def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", 1118 v4i32, v4i16, NEONvshlls>; 1119def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", 1120 v2i64, v2i32, NEONvshlls>; 1121def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", 1122 v8i16, v8i8, NEONvshllu>; 1123def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", 1124 v4i32, v4i16, NEONvshllu>; 1125def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", 1126 v2i64, v2i32, NEONvshllu>; 1127 1128// VSHLL : Vector Shift Left Long (with maximum shift count) 1129def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", 1130 v8i16, v8i8, NEONvshlli>; 1131def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", 1132 v4i32, v4i16, NEONvshlli>; 1133def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", 1134 v2i64, v2i32, NEONvshlli>; 1135 1136// VSHRN : Vector Shift Right and Narrow 1137def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", 1138 v8i8, v8i16, NEONvshrn>; 1139def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", 1140 v4i16, v4i32, NEONvshrn>; 1141def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", 1142 v2i32, v2i64, NEONvshrn>; 1143 1144// VRSHL : Vector Rounding Shift 1145defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; 1146defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; 1147// VRSHR : Vector Rounding Shift Right 1148defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; 1149defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; 1150 1151// VRSHRN : Vector Rounding Shift Right and Narrow 1152def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", 1153 v8i8, v8i16, NEONvrshrn>; 1154def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", 1155 v4i16, v4i32, NEONvrshrn>; 1156def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", 1157 v2i32, v2i64, NEONvrshrn>; 1158 1159// VQSHL : Vector Saturating Shift 1160defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; 1161defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; 1162// VQSHL : Vector Saturating Shift Left (Immediate) 1163defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; 1164defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; 1165// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 1166defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; 1167 1168// VQSHRN : Vector Saturating Shift Right and Narrow 1169def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", 1170 v8i8, v8i16, NEONvqshrns>; 1171def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", 1172 v4i16, v4i32, NEONvqshrns>; 1173def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", 1174 v2i32, v2i64, NEONvqshrns>; 1175def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", 1176 v8i8, v8i16, NEONvqshrnu>; 1177def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", 1178 v4i16, v4i32, NEONvqshrnu>; 1179def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", 1180 v2i32, v2i64, NEONvqshrnu>; 1181 1182// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 1183def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", 1184 v8i8, v8i16, NEONvqshrnsu>; 1185def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", 1186 v4i16, v4i32, NEONvqshrnsu>; 1187def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", 1188 v2i32, v2i64, NEONvqshrnsu>; 1189 1190// VQRSHL : Vector Saturating Rounding Shift 1191defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", 1192 int_arm_neon_vqrshifts, 0>; 1193defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", 1194 int_arm_neon_vqrshiftu, 0>; 1195 1196// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 1197def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", 1198 v8i8, v8i16, NEONvqrshrns>; 1199def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", 1200 v4i16, v4i32, NEONvqrshrns>; 1201def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", 1202 v2i32, v2i64, NEONvqrshrns>; 1203def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", 1204 v8i8, v8i16, NEONvqrshrnu>; 1205def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", 1206 v4i16, v4i32, NEONvqrshrnu>; 1207def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", 1208 v2i32, v2i64, NEONvqrshrnu>; 1209 1210// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 1211def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", 1212 v8i8, v8i16, NEONvqrshrnsu>; 1213def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", 1214 v4i16, v4i32, NEONvqrshrnsu>; 1215def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", 1216 v2i32, v2i64, NEONvqrshrnsu>; 1217 1218// VSRA : Vector Shift Right and Accumulate 1219defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; 1220defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; 1221// VRSRA : Vector Rounding Shift Right and Accumulate 1222defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; 1223defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; 1224 1225// VSLI : Vector Shift Left and Insert 1226defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; 1227// VSRI : Vector Shift Right and Insert 1228defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; 1229 1230// Vector Absolute and Saturating Absolute. 1231 1232// VABS : Vector Absolute Value 1233defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s", 1234 int_arm_neon_vabs>; 1235def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", 1236 v2f32, v2f32, int_arm_neon_vabsf>; 1237def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", 1238 v4f32, v4f32, int_arm_neon_vabsf>; 1239 1240// VQABS : Vector Saturating Absolute Value 1241defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s", 1242 int_arm_neon_vqabs>; 1243 1244// Vector Negate. 1245 1246def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; 1247def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; 1248 1249class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> 1250 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), 1251 !strconcat(OpcodeStr, "\t$dst, $src"), "", 1252 [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; 1253class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> 1254 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), 1255 !strconcat(OpcodeStr, "\t$dst, $src"), "", 1256 [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; 1257 1258// VNEG : Vector Negate 1259def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; 1260def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; 1261def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; 1262def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; 1263def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; 1264def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; 1265 1266// VNEG : Vector Negate (floating-point) 1267def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 1268 (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "", 1269 [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; 1270def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 1271 (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "", 1272 [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; 1273 1274def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; 1275def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>; 1276def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>; 1277def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>; 1278def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; 1279def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; 1280 1281// VQNEG : Vector Saturating Negate 1282defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s", 1283 int_arm_neon_vqneg>; 1284 1285// Vector Bit Counting Operations. 1286 1287// VCLS : Vector Count Leading Sign Bits 1288defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s", 1289 int_arm_neon_vcls>; 1290// VCLZ : Vector Count Leading Zeros 1291defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i", 1292 int_arm_neon_vclz>; 1293// VCNT : Vector Count One Bits 1294def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", 1295 v8i8, v8i8, int_arm_neon_vcnt>; 1296def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", 1297 v16i8, v16i8, int_arm_neon_vcnt>; 1298 1299// Vector Move Operations. 1300 1301// VMOV : Vector Move (Register) 1302 1303def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), 1304 "vmov\t$dst, $src", "", []>; 1305def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), 1306 "vmov\t$dst, $src", "", []>; 1307 1308// VMOV : Vector Move (Immediate) 1309 1310// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. 1311def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ 1312 return ARM::getVMOVImm(N, 1, *CurDAG); 1313}]>; 1314def vmovImm8 : PatLeaf<(build_vector), [{ 1315 return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; 1316}], VMOV_get_imm8>; 1317 1318// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. 1319def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ 1320 return ARM::getVMOVImm(N, 2, *CurDAG); 1321}]>; 1322def vmovImm16 : PatLeaf<(build_vector), [{ 1323 return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; 1324}], VMOV_get_imm16>; 1325 1326// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. 1327def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ 1328 return ARM::getVMOVImm(N, 4, *CurDAG); 1329}]>; 1330def vmovImm32 : PatLeaf<(build_vector), [{ 1331 return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; 1332}], VMOV_get_imm32>; 1333 1334// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. 1335def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ 1336 return ARM::getVMOVImm(N, 8, *CurDAG); 1337}]>; 1338def vmovImm64 : PatLeaf<(build_vector), [{ 1339 return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; 1340}], VMOV_get_imm64>; 1341 1342// Note: Some of the cmode bits in the following VMOV instructions need to 1343// be encoded based on the immed values. 1344 1345def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), 1346 (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", 1347 [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; 1348def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), 1349 (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", 1350 [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; 1351 1352def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), 1353 (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", 1354 [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; 1355def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), 1356 (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", 1357 [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; 1358 1359def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), 1360 (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", 1361 [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; 1362def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), 1363 (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", 1364 [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; 1365 1366def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), 1367 (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", 1368 [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; 1369def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), 1370 (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", 1371 [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; 1372 1373// VMOV : Vector Get Lane (move scalar to ARM core register) 1374 1375def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, 1376 (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1377 "vmov", ".s8\t$dst, $src[$lane]", 1378 [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), 1379 imm:$lane))]>; 1380def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, 1381 (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1382 "vmov", ".s16\t$dst, $src[$lane]", 1383 [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), 1384 imm:$lane))]>; 1385def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, 1386 (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1387 "vmov", ".u8\t$dst, $src[$lane]", 1388 [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), 1389 imm:$lane))]>; 1390def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, 1391 (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1392 "vmov", ".u16\t$dst, $src[$lane]", 1393 [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), 1394 imm:$lane))]>; 1395def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, 1396 (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), 1397 "vmov", ".32\t$dst, $src[$lane]", 1398 [(set GPR:$dst, (extractelt (v2i32 DPR:$src), 1399 imm:$lane))]>; 1400// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 1401def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 1402 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 1403 (SubReg_i8_reg imm:$lane))), 1404 (SubReg_i8_lane imm:$lane))>; 1405def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 1406 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 1407 (SubReg_i16_reg imm:$lane))), 1408 (SubReg_i16_lane imm:$lane))>; 1409def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 1410 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 1411 (SubReg_i8_reg imm:$lane))), 1412 (SubReg_i8_lane imm:$lane))>; 1413def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 1414 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 1415 (SubReg_i16_reg imm:$lane))), 1416 (SubReg_i16_lane imm:$lane))>; 1417def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 1418 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 1419 (SubReg_i32_reg imm:$lane))), 1420 (SubReg_i32_lane imm:$lane))>; 1421//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 1422// (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; 1423def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 1424 (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; 1425 1426 1427// VMOV : Vector Set Lane (move ARM core register to scalar) 1428 1429let Constraints = "$src1 = $dst" in { 1430def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), 1431 (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1432 "vmov", ".8\t$dst[$lane], $src2", 1433 [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), 1434 GPR:$src2, imm:$lane))]>; 1435def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), 1436 (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1437 "vmov", ".16\t$dst[$lane], $src2", 1438 [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), 1439 GPR:$src2, imm:$lane))]>; 1440def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), 1441 (ins DPR:$src1, GPR:$src2, i32imm:$lane), 1442 "vmov", ".32\t$dst[$lane], $src2", 1443 [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), 1444 GPR:$src2, imm:$lane))]>; 1445} 1446def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 1447 (v16i8 (INSERT_SUBREG QPR:$src1, 1448 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 1449 (SubReg_i8_reg imm:$lane))), 1450 GPR:$src2, (SubReg_i8_lane imm:$lane)), 1451 (SubReg_i8_reg imm:$lane)))>; 1452def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 1453 (v8i16 (INSERT_SUBREG QPR:$src1, 1454 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 1455 (SubReg_i16_reg imm:$lane))), 1456 GPR:$src2, (SubReg_i16_lane imm:$lane)), 1457 (SubReg_i16_reg imm:$lane)))>; 1458def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 1459 (v4i32 (INSERT_SUBREG QPR:$src1, 1460 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 1461 (SubReg_i32_reg imm:$lane))), 1462 GPR:$src2, (SubReg_i32_lane imm:$lane)), 1463 (SubReg_i32_reg imm:$lane)))>; 1464 1465//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 1466// (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; 1467def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 1468 (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; 1469 1470// VDUP : Vector Duplicate (from ARM core register to all elements) 1471 1472def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), 1473 (vector_shuffle node:$lhs, node:$rhs), [{ 1474 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1475 return SVOp->isSplat() && SVOp->getSplatIndex() == 0; 1476}]>; 1477 1478class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 1479 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), 1480 "vdup", !strconcat(asmSize, "\t$dst, $src"), 1481 [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; 1482class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> 1483 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), 1484 "vdup", !strconcat(asmSize, "\t$dst, $src"), 1485 [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>; 1486 1487def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; 1488def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; 1489def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; 1490def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; 1491def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; 1492def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; 1493 1494def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), 1495 "vdup", ".32\t$dst, $src", 1496 [(set DPR:$dst, (v2f32 (splat_lo 1497 (scalar_to_vector 1498 (f32 (bitconvert GPR:$src))), 1499 undef)))]>; 1500def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), 1501 "vdup", ".32\t$dst, $src", 1502 [(set QPR:$dst, (v4f32 (splat_lo 1503 (scalar_to_vector 1504 (f32 (bitconvert GPR:$src))), 1505 undef)))]>; 1506 1507// VDUP : Vector Duplicate Lane (from scalar to all elements) 1508 1509def SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{ 1510 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1511 return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32); 1512}]>; 1513 1514def splat_lane : PatFrag<(ops node:$lhs, node:$rhs), 1515 (vector_shuffle node:$lhs, node:$rhs), [{ 1516 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1517 return SVOp->isSplat(); 1518}], SHUFFLE_get_splat_lane>; 1519 1520class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> 1521 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, 1522 (outs DPR:$dst), (ins DPR:$src, i32imm:$lane), 1523 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 1524 [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>; 1525 1526// vector_shuffle requires that the source and destination types match, so 1527// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node. 1528class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, 1529 ValueType ResTy, ValueType OpTy> 1530 : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, 1531 (outs QPR:$dst), (ins DPR:$src, i32imm:$lane), 1532 !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", 1533 [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>; 1534 1535def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; 1536def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; 1537def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; 1538def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; 1539def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; 1540def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; 1541def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; 1542def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; 1543 1544// VMOVN : Vector Narrowing Move 1545defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i", 1546 int_arm_neon_vmovn>; 1547// VQMOVN : Vector Saturating Narrowing Move 1548defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s", 1549 int_arm_neon_vqmovns>; 1550defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u", 1551 int_arm_neon_vqmovnu>; 1552defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s", 1553 int_arm_neon_vqmovnsu>; 1554// VMOVL : Vector Lengthening Move 1555defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; 1556defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; 1557 1558// Vector Conversions. 1559 1560// VCVT : Vector Convert Between Floating-Point and Integers 1561def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 1562 v2i32, v2f32, fp_to_sint>; 1563def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 1564 v2i32, v2f32, fp_to_uint>; 1565def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 1566 v2f32, v2i32, sint_to_fp>; 1567def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 1568 v2f32, v2i32, uint_to_fp>; 1569 1570def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", 1571 v4i32, v4f32, fp_to_sint>; 1572def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", 1573 v4i32, v4f32, fp_to_uint>; 1574def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", 1575 v4f32, v4i32, sint_to_fp>; 1576def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", 1577 v4f32, v4i32, uint_to_fp>; 1578 1579// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 1580// Note: Some of the opcode bits in the following VCVT instructions need to 1581// be encoded based on the immed values. 1582def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 1583 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 1584def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 1585 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 1586def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 1587 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 1588def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 1589 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 1590 1591def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", 1592 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 1593def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", 1594 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 1595def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", 1596 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 1597def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", 1598 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 1599 1600//===----------------------------------------------------------------------===// 1601// Non-Instruction Patterns 1602//===----------------------------------------------------------------------===// 1603 1604// bit_convert 1605def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 1606def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 1607def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 1608def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 1609def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 1610def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 1611def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 1612def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 1613def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 1614def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 1615def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 1616def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 1617def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 1618def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 1619def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 1620def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 1621def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 1622def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 1623def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 1624def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 1625def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 1626def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 1627def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 1628def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 1629def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 1630def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 1631def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 1632def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 1633def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 1634def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 1635 1636def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 1637def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 1638def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 1639def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 1640def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 1641def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 1642def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 1643def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 1644def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 1645def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 1646def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 1647def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 1648def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 1649def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 1650def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 1651def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 1652def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 1653def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 1654def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 1655def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 1656def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 1657def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 1658def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 1659def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 1660def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 1661def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 1662def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 1663def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 1664def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 1665def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 1666