ARMInstrNEON.td revision 263508
1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the ARM NEON instruction set. 11// 12//===----------------------------------------------------------------------===// 13 14 15//===----------------------------------------------------------------------===// 16// NEON-specific Operands. 17//===----------------------------------------------------------------------===// 18def nModImm : Operand<i32> { 19 let PrintMethod = "printNEONModImmOperand"; 20} 21 22def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } 23def nImmSplatI8 : Operand<i32> { 24 let PrintMethod = "printNEONModImmOperand"; 25 let ParserMatchClass = nImmSplatI8AsmOperand; 26} 27def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } 28def nImmSplatI16 : Operand<i32> { 29 let PrintMethod = "printNEONModImmOperand"; 30 let ParserMatchClass = nImmSplatI16AsmOperand; 31} 32def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } 33def nImmSplatI32 : Operand<i32> { 34 let PrintMethod = "printNEONModImmOperand"; 35 let ParserMatchClass = nImmSplatI32AsmOperand; 36} 37def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } 38def nImmVMOVI32 : Operand<i32> { 39 let PrintMethod = "printNEONModImmOperand"; 40 let ParserMatchClass = nImmVMOVI32AsmOperand; 41} 42def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } 43def nImmVMOVI32Neg : Operand<i32> { 44 let PrintMethod = "printNEONModImmOperand"; 45 let ParserMatchClass = nImmVMOVI32NegAsmOperand; 46} 47def nImmVMOVF32 : Operand<i32> { 48 let PrintMethod = "printFPImmOperand"; 49 let ParserMatchClass = FPImmOperand; 50} 51def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } 52def nImmSplatI64 : Operand<i32> { 53 let PrintMethod = "printNEONModImmOperand"; 54 let ParserMatchClass = nImmSplatI64AsmOperand; 55} 56 57def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } 58def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } 59def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } 60def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ 61 return ((uint64_t)Imm) < 8; 62}]> { 63 let ParserMatchClass = VectorIndex8Operand; 64 let PrintMethod = "printVectorIndex"; 65 let MIOperandInfo = (ops i32imm); 66} 67def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{ 68 return ((uint64_t)Imm) < 4; 69}]> { 70 let ParserMatchClass = VectorIndex16Operand; 71 let PrintMethod = "printVectorIndex"; 72 let MIOperandInfo = (ops i32imm); 73} 74def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ 75 return ((uint64_t)Imm) < 2; 76}]> { 77 let ParserMatchClass = VectorIndex32Operand; 78 let PrintMethod = "printVectorIndex"; 79 let MIOperandInfo = (ops i32imm); 80} 81 82// Register list of one D register. 83def VecListOneDAsmOperand : AsmOperandClass { 84 let Name = "VecListOneD"; 85 let ParserMethod = "parseVectorList"; 86 let RenderMethod = "addVecListOperands"; 87} 88def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { 89 let ParserMatchClass = VecListOneDAsmOperand; 90} 91// Register list of two sequential D registers. 92def VecListDPairAsmOperand : AsmOperandClass { 93 let Name = "VecListDPair"; 94 let ParserMethod = "parseVectorList"; 95 let RenderMethod = "addVecListOperands"; 96} 97def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { 98 let ParserMatchClass = VecListDPairAsmOperand; 99} 100// Register list of three sequential D registers. 101def VecListThreeDAsmOperand : AsmOperandClass { 102 let Name = "VecListThreeD"; 103 let ParserMethod = "parseVectorList"; 104 let RenderMethod = "addVecListOperands"; 105} 106def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { 107 let ParserMatchClass = VecListThreeDAsmOperand; 108} 109// Register list of four sequential D registers. 110def VecListFourDAsmOperand : AsmOperandClass { 111 let Name = "VecListFourD"; 112 let ParserMethod = "parseVectorList"; 113 let RenderMethod = "addVecListOperands"; 114} 115def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { 116 let ParserMatchClass = VecListFourDAsmOperand; 117} 118// Register list of two D registers spaced by 2 (two sequential Q registers). 119def VecListDPairSpacedAsmOperand : AsmOperandClass { 120 let Name = "VecListDPairSpaced"; 121 let ParserMethod = "parseVectorList"; 122 let RenderMethod = "addVecListOperands"; 123} 124def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { 125 let ParserMatchClass = VecListDPairSpacedAsmOperand; 126} 127// Register list of three D registers spaced by 2 (three Q registers). 128def VecListThreeQAsmOperand : AsmOperandClass { 129 let Name = "VecListThreeQ"; 130 let ParserMethod = "parseVectorList"; 131 let RenderMethod = "addVecListOperands"; 132} 133def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> { 134 let ParserMatchClass = VecListThreeQAsmOperand; 135} 136// Register list of three D registers spaced by 2 (three Q registers). 137def VecListFourQAsmOperand : AsmOperandClass { 138 let Name = "VecListFourQ"; 139 let ParserMethod = "parseVectorList"; 140 let RenderMethod = "addVecListOperands"; 141} 142def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> { 143 let ParserMatchClass = VecListFourQAsmOperand; 144} 145 146// Register list of one D register, with "all lanes" subscripting. 147def VecListOneDAllLanesAsmOperand : AsmOperandClass { 148 let Name = "VecListOneDAllLanes"; 149 let ParserMethod = "parseVectorList"; 150 let RenderMethod = "addVecListOperands"; 151} 152def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { 153 let ParserMatchClass = VecListOneDAllLanesAsmOperand; 154} 155// Register list of two D registers, with "all lanes" subscripting. 156def VecListDPairAllLanesAsmOperand : AsmOperandClass { 157 let Name = "VecListDPairAllLanes"; 158 let ParserMethod = "parseVectorList"; 159 let RenderMethod = "addVecListOperands"; 160} 161def VecListDPairAllLanes : RegisterOperand<DPair, 162 "printVectorListTwoAllLanes"> { 163 let ParserMatchClass = VecListDPairAllLanesAsmOperand; 164} 165// Register list of two D registers spaced by 2 (two sequential Q registers). 166def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { 167 let Name = "VecListDPairSpacedAllLanes"; 168 let ParserMethod = "parseVectorList"; 169 let RenderMethod = "addVecListOperands"; 170} 171def VecListDPairSpacedAllLanes : RegisterOperand<DPair, 172 "printVectorListTwoSpacedAllLanes"> { 173 let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; 174} 175// Register list of three D registers, with "all lanes" subscripting. 176def VecListThreeDAllLanesAsmOperand : AsmOperandClass { 177 let Name = "VecListThreeDAllLanes"; 178 let ParserMethod = "parseVectorList"; 179 let RenderMethod = "addVecListOperands"; 180} 181def VecListThreeDAllLanes : RegisterOperand<DPR, 182 "printVectorListThreeAllLanes"> { 183 let ParserMatchClass = VecListThreeDAllLanesAsmOperand; 184} 185// Register list of three D registers spaced by 2 (three sequential Q regs). 186def VecListThreeQAllLanesAsmOperand : AsmOperandClass { 187 let Name = "VecListThreeQAllLanes"; 188 let ParserMethod = "parseVectorList"; 189 let RenderMethod = "addVecListOperands"; 190} 191def VecListThreeQAllLanes : RegisterOperand<DPR, 192 "printVectorListThreeSpacedAllLanes"> { 193 let ParserMatchClass = VecListThreeQAllLanesAsmOperand; 194} 195// Register list of four D registers, with "all lanes" subscripting. 196def VecListFourDAllLanesAsmOperand : AsmOperandClass { 197 let Name = "VecListFourDAllLanes"; 198 let ParserMethod = "parseVectorList"; 199 let RenderMethod = "addVecListOperands"; 200} 201def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> { 202 let ParserMatchClass = VecListFourDAllLanesAsmOperand; 203} 204// Register list of four D registers spaced by 2 (four sequential Q regs). 205def VecListFourQAllLanesAsmOperand : AsmOperandClass { 206 let Name = "VecListFourQAllLanes"; 207 let ParserMethod = "parseVectorList"; 208 let RenderMethod = "addVecListOperands"; 209} 210def VecListFourQAllLanes : RegisterOperand<DPR, 211 "printVectorListFourSpacedAllLanes"> { 212 let ParserMatchClass = VecListFourQAllLanesAsmOperand; 213} 214 215 216// Register list of one D register, with byte lane subscripting. 217def VecListOneDByteIndexAsmOperand : AsmOperandClass { 218 let Name = "VecListOneDByteIndexed"; 219 let ParserMethod = "parseVectorList"; 220 let RenderMethod = "addVecListIndexedOperands"; 221} 222def VecListOneDByteIndexed : Operand<i32> { 223 let ParserMatchClass = VecListOneDByteIndexAsmOperand; 224 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 225} 226// ...with half-word lane subscripting. 227def VecListOneDHWordIndexAsmOperand : AsmOperandClass { 228 let Name = "VecListOneDHWordIndexed"; 229 let ParserMethod = "parseVectorList"; 230 let RenderMethod = "addVecListIndexedOperands"; 231} 232def VecListOneDHWordIndexed : Operand<i32> { 233 let ParserMatchClass = VecListOneDHWordIndexAsmOperand; 234 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 235} 236// ...with word lane subscripting. 237def VecListOneDWordIndexAsmOperand : AsmOperandClass { 238 let Name = "VecListOneDWordIndexed"; 239 let ParserMethod = "parseVectorList"; 240 let RenderMethod = "addVecListIndexedOperands"; 241} 242def VecListOneDWordIndexed : Operand<i32> { 243 let ParserMatchClass = VecListOneDWordIndexAsmOperand; 244 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 245} 246 247// Register list of two D registers with byte lane subscripting. 248def VecListTwoDByteIndexAsmOperand : AsmOperandClass { 249 let Name = "VecListTwoDByteIndexed"; 250 let ParserMethod = "parseVectorList"; 251 let RenderMethod = "addVecListIndexedOperands"; 252} 253def VecListTwoDByteIndexed : Operand<i32> { 254 let ParserMatchClass = VecListTwoDByteIndexAsmOperand; 255 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 256} 257// ...with half-word lane subscripting. 258def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { 259 let Name = "VecListTwoDHWordIndexed"; 260 let ParserMethod = "parseVectorList"; 261 let RenderMethod = "addVecListIndexedOperands"; 262} 263def VecListTwoDHWordIndexed : Operand<i32> { 264 let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; 265 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 266} 267// ...with word lane subscripting. 268def VecListTwoDWordIndexAsmOperand : AsmOperandClass { 269 let Name = "VecListTwoDWordIndexed"; 270 let ParserMethod = "parseVectorList"; 271 let RenderMethod = "addVecListIndexedOperands"; 272} 273def VecListTwoDWordIndexed : Operand<i32> { 274 let ParserMatchClass = VecListTwoDWordIndexAsmOperand; 275 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 276} 277// Register list of two Q registers with half-word lane subscripting. 278def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { 279 let Name = "VecListTwoQHWordIndexed"; 280 let ParserMethod = "parseVectorList"; 281 let RenderMethod = "addVecListIndexedOperands"; 282} 283def VecListTwoQHWordIndexed : Operand<i32> { 284 let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; 285 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 286} 287// ...with word lane subscripting. 288def VecListTwoQWordIndexAsmOperand : AsmOperandClass { 289 let Name = "VecListTwoQWordIndexed"; 290 let ParserMethod = "parseVectorList"; 291 let RenderMethod = "addVecListIndexedOperands"; 292} 293def VecListTwoQWordIndexed : Operand<i32> { 294 let ParserMatchClass = VecListTwoQWordIndexAsmOperand; 295 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 296} 297 298 299// Register list of three D registers with byte lane subscripting. 300def VecListThreeDByteIndexAsmOperand : AsmOperandClass { 301 let Name = "VecListThreeDByteIndexed"; 302 let ParserMethod = "parseVectorList"; 303 let RenderMethod = "addVecListIndexedOperands"; 304} 305def VecListThreeDByteIndexed : Operand<i32> { 306 let ParserMatchClass = VecListThreeDByteIndexAsmOperand; 307 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 308} 309// ...with half-word lane subscripting. 310def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { 311 let Name = "VecListThreeDHWordIndexed"; 312 let ParserMethod = "parseVectorList"; 313 let RenderMethod = "addVecListIndexedOperands"; 314} 315def VecListThreeDHWordIndexed : Operand<i32> { 316 let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; 317 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 318} 319// ...with word lane subscripting. 320def VecListThreeDWordIndexAsmOperand : AsmOperandClass { 321 let Name = "VecListThreeDWordIndexed"; 322 let ParserMethod = "parseVectorList"; 323 let RenderMethod = "addVecListIndexedOperands"; 324} 325def VecListThreeDWordIndexed : Operand<i32> { 326 let ParserMatchClass = VecListThreeDWordIndexAsmOperand; 327 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 328} 329// Register list of three Q registers with half-word lane subscripting. 330def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { 331 let Name = "VecListThreeQHWordIndexed"; 332 let ParserMethod = "parseVectorList"; 333 let RenderMethod = "addVecListIndexedOperands"; 334} 335def VecListThreeQHWordIndexed : Operand<i32> { 336 let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; 337 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 338} 339// ...with word lane subscripting. 340def VecListThreeQWordIndexAsmOperand : AsmOperandClass { 341 let Name = "VecListThreeQWordIndexed"; 342 let ParserMethod = "parseVectorList"; 343 let RenderMethod = "addVecListIndexedOperands"; 344} 345def VecListThreeQWordIndexed : Operand<i32> { 346 let ParserMatchClass = VecListThreeQWordIndexAsmOperand; 347 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 348} 349 350// Register list of four D registers with byte lane subscripting. 351def VecListFourDByteIndexAsmOperand : AsmOperandClass { 352 let Name = "VecListFourDByteIndexed"; 353 let ParserMethod = "parseVectorList"; 354 let RenderMethod = "addVecListIndexedOperands"; 355} 356def VecListFourDByteIndexed : Operand<i32> { 357 let ParserMatchClass = VecListFourDByteIndexAsmOperand; 358 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 359} 360// ...with half-word lane subscripting. 361def VecListFourDHWordIndexAsmOperand : AsmOperandClass { 362 let Name = "VecListFourDHWordIndexed"; 363 let ParserMethod = "parseVectorList"; 364 let RenderMethod = "addVecListIndexedOperands"; 365} 366def VecListFourDHWordIndexed : Operand<i32> { 367 let ParserMatchClass = VecListFourDHWordIndexAsmOperand; 368 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 369} 370// ...with word lane subscripting. 371def VecListFourDWordIndexAsmOperand : AsmOperandClass { 372 let Name = "VecListFourDWordIndexed"; 373 let ParserMethod = "parseVectorList"; 374 let RenderMethod = "addVecListIndexedOperands"; 375} 376def VecListFourDWordIndexed : Operand<i32> { 377 let ParserMatchClass = VecListFourDWordIndexAsmOperand; 378 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 379} 380// Register list of four Q registers with half-word lane subscripting. 381def VecListFourQHWordIndexAsmOperand : AsmOperandClass { 382 let Name = "VecListFourQHWordIndexed"; 383 let ParserMethod = "parseVectorList"; 384 let RenderMethod = "addVecListIndexedOperands"; 385} 386def VecListFourQHWordIndexed : Operand<i32> { 387 let ParserMatchClass = VecListFourQHWordIndexAsmOperand; 388 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 389} 390// ...with word lane subscripting. 391def VecListFourQWordIndexAsmOperand : AsmOperandClass { 392 let Name = "VecListFourQWordIndexed"; 393 let ParserMethod = "parseVectorList"; 394 let RenderMethod = "addVecListIndexedOperands"; 395} 396def VecListFourQWordIndexed : Operand<i32> { 397 let ParserMatchClass = VecListFourQWordIndexAsmOperand; 398 let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); 399} 400 401def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 402 return cast<LoadSDNode>(N)->getAlignment() >= 8; 403}]>; 404def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 405 (store node:$val, node:$ptr), [{ 406 return cast<StoreSDNode>(N)->getAlignment() >= 8; 407}]>; 408def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 409 return cast<LoadSDNode>(N)->getAlignment() == 4; 410}]>; 411def word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 412 (store node:$val, node:$ptr), [{ 413 return cast<StoreSDNode>(N)->getAlignment() == 4; 414}]>; 415def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 416 return cast<LoadSDNode>(N)->getAlignment() == 2; 417}]>; 418def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), 419 (store node:$val, node:$ptr), [{ 420 return cast<StoreSDNode>(N)->getAlignment() == 2; 421}]>; 422def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() == 1; 424}]>; 425def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), 426 (store node:$val, node:$ptr), [{ 427 return cast<StoreSDNode>(N)->getAlignment() == 1; 428}]>; 429def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 430 return cast<LoadSDNode>(N)->getAlignment() < 4; 431}]>; 432def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), 433 (store node:$val, node:$ptr), [{ 434 return cast<StoreSDNode>(N)->getAlignment() < 4; 435}]>; 436 437//===----------------------------------------------------------------------===// 438// NEON-specific DAG Nodes. 439//===----------------------------------------------------------------------===// 440 441def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; 442def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; 443 444def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; 445def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; 446def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; 447def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; 448def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; 449def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; 450def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; 451def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; 452def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; 453def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; 454def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; 455 456// Types for vector shift by immediates. The "SHX" version is for long and 457// narrow operations where the source and destination vectors have different 458// types. The "SHINS" version is for shift and insert operations. 459def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 460 SDTCisVT<2, i32>]>; 461def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 462 SDTCisVT<2, i32>]>; 463def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, 464 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 465 466def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; 467def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; 468def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; 469def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; 470def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; 471def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; 472def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; 473 474def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; 475def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; 476def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; 477 478def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; 479def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; 480def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; 481def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; 482def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; 483def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; 484 485def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; 486def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; 487def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; 488 489def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; 490def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; 491 492def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, 493 SDTCisVT<2, i32>]>; 494def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; 495def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; 496 497def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 498def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; 499def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; 500def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; 501 502def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 503 SDTCisVT<2, i32>]>; 504def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; 505def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; 506 507def NEONvbsl : SDNode<"ARMISD::VBSL", 508 SDTypeProfile<1, 3, [SDTCisVec<0>, 509 SDTCisSameAs<0, 1>, 510 SDTCisSameAs<0, 2>, 511 SDTCisSameAs<0, 3>]>>; 512 513def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; 514 515// VDUPLANE can produce a quad-register result from a double-register source, 516// so the result is not constrained to match the source. 517def NEONvduplane : SDNode<"ARMISD::VDUPLANE", 518 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 519 SDTCisVT<2, i32>]>>; 520 521def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 522 SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; 523def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; 524 525def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; 526def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; 527def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; 528def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; 529 530def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, 531 SDTCisSameAs<0, 2>, 532 SDTCisSameAs<0, 3>]>; 533def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; 534def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; 535def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; 536 537def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 538 SDTCisSameAs<1, 2>]>; 539def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; 540def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; 541 542def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, 543 SDTCisSameAs<0, 2>]>; 544def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; 545def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; 546 547def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 548 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 549 unsigned EltBits = 0; 550 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 551 return (EltBits == 32 && EltVal == 0); 552}]>; 553 554def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ 555 ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0)); 556 unsigned EltBits = 0; 557 uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); 558 return (EltBits == 8 && EltVal == 0xff); 559}]>; 560 561//===----------------------------------------------------------------------===// 562// NEON load / store instructions 563//===----------------------------------------------------------------------===// 564 565// Use VLDM to load a Q register as a D register pair. 566// This is a pseudo instruction that is expanded to VLDMD after reg alloc. 567def VLDMQIA 568 : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), 569 IIC_fpLoad_m, "", 570 [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; 571 572// Use VSTM to store a Q register as a D register pair. 573// This is a pseudo instruction that is expanded to VSTMD after reg alloc. 574def VSTMQIA 575 : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), 576 IIC_fpStore_m, "", 577 [(store (v2f64 DPair:$src), GPR:$Rn)]>; 578 579// Classes for VLD* pseudo-instructions with multi-register operands. 580// These are expanded to real instructions after register allocation. 581class VLDQPseudo<InstrItinClass itin> 582 : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; 583class VLDQWBPseudo<InstrItinClass itin> 584 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 585 (ins addrmode6:$addr, am6offset:$offset), itin, 586 "$addr.addr = $wb">; 587class VLDQWBfixedPseudo<InstrItinClass itin> 588 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 589 (ins addrmode6:$addr), itin, 590 "$addr.addr = $wb">; 591class VLDQWBregisterPseudo<InstrItinClass itin> 592 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 593 (ins addrmode6:$addr, rGPR:$offset), itin, 594 "$addr.addr = $wb">; 595 596class VLDQQPseudo<InstrItinClass itin> 597 : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; 598class VLDQQWBPseudo<InstrItinClass itin> 599 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 600 (ins addrmode6:$addr, am6offset:$offset), itin, 601 "$addr.addr = $wb">; 602class VLDQQWBfixedPseudo<InstrItinClass itin> 603 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 604 (ins addrmode6:$addr), itin, 605 "$addr.addr = $wb">; 606class VLDQQWBregisterPseudo<InstrItinClass itin> 607 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 608 (ins addrmode6:$addr, rGPR:$offset), itin, 609 "$addr.addr = $wb">; 610 611 612class VLDQQQQPseudo<InstrItinClass itin> 613 : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, 614 "$src = $dst">; 615class VLDQQQQWBPseudo<InstrItinClass itin> 616 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 617 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 618 "$addr.addr = $wb, $src = $dst">; 619 620let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 621 622// VLD1 : Vector Load (multiple single elements) 623class VLD1D<bits<4> op7_4, string Dt> 624 : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), 625 (ins addrmode6:$Rn), IIC_VLD1, 626 "vld1", Dt, "$Vd, $Rn", "", []> { 627 let Rm = 0b1111; 628 let Inst{4} = Rn{4}; 629 let DecoderMethod = "DecodeVLDST1Instruction"; 630} 631class VLD1Q<bits<4> op7_4, string Dt> 632 : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), 633 (ins addrmode6:$Rn), IIC_VLD1x2, 634 "vld1", Dt, "$Vd, $Rn", "", []> { 635 let Rm = 0b1111; 636 let Inst{5-4} = Rn{5-4}; 637 let DecoderMethod = "DecodeVLDST1Instruction"; 638} 639 640def VLD1d8 : VLD1D<{0,0,0,?}, "8">; 641def VLD1d16 : VLD1D<{0,1,0,?}, "16">; 642def VLD1d32 : VLD1D<{1,0,0,?}, "32">; 643def VLD1d64 : VLD1D<{1,1,0,?}, "64">; 644 645def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; 646def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; 647def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; 648def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; 649 650// ...with address register writeback: 651multiclass VLD1DWB<bits<4> op7_4, string Dt> { 652 def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 653 (ins addrmode6:$Rn), IIC_VLD1u, 654 "vld1", Dt, "$Vd, $Rn!", 655 "$Rn.addr = $wb", []> { 656 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 657 let Inst{4} = Rn{4}; 658 let DecoderMethod = "DecodeVLDST1Instruction"; 659 } 660 def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), 661 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, 662 "vld1", Dt, "$Vd, $Rn, $Rm", 663 "$Rn.addr = $wb", []> { 664 let Inst{4} = Rn{4}; 665 let DecoderMethod = "DecodeVLDST1Instruction"; 666 } 667} 668multiclass VLD1QWB<bits<4> op7_4, string Dt> { 669 def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 670 (ins addrmode6:$Rn), IIC_VLD1x2u, 671 "vld1", Dt, "$Vd, $Rn!", 672 "$Rn.addr = $wb", []> { 673 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 674 let Inst{5-4} = Rn{5-4}; 675 let DecoderMethod = "DecodeVLDST1Instruction"; 676 } 677 def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), 678 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 679 "vld1", Dt, "$Vd, $Rn, $Rm", 680 "$Rn.addr = $wb", []> { 681 let Inst{5-4} = Rn{5-4}; 682 let DecoderMethod = "DecodeVLDST1Instruction"; 683 } 684} 685 686defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; 687defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; 688defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; 689defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; 690defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; 691defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; 692defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; 693defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; 694 695// ...with 3 registers 696class VLD1D3<bits<4> op7_4, string Dt> 697 : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), 698 (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, 699 "$Vd, $Rn", "", []> { 700 let Rm = 0b1111; 701 let Inst{4} = Rn{4}; 702 let DecoderMethod = "DecodeVLDST1Instruction"; 703} 704multiclass VLD1D3WB<bits<4> op7_4, string Dt> { 705 def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 706 (ins addrmode6:$Rn), IIC_VLD1x2u, 707 "vld1", Dt, "$Vd, $Rn!", 708 "$Rn.addr = $wb", []> { 709 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 710 let Inst{4} = Rn{4}; 711 let DecoderMethod = "DecodeVLDST1Instruction"; 712 } 713 def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), 714 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 715 "vld1", Dt, "$Vd, $Rn, $Rm", 716 "$Rn.addr = $wb", []> { 717 let Inst{4} = Rn{4}; 718 let DecoderMethod = "DecodeVLDST1Instruction"; 719 } 720} 721 722def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; 723def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; 724def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; 725def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; 726 727defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; 728defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; 729defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; 730defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; 731 732def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; 733 734// ...with 4 registers 735class VLD1D4<bits<4> op7_4, string Dt> 736 : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), 737 (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, 738 "$Vd, $Rn", "", []> { 739 let Rm = 0b1111; 740 let Inst{5-4} = Rn{5-4}; 741 let DecoderMethod = "DecodeVLDST1Instruction"; 742} 743multiclass VLD1D4WB<bits<4> op7_4, string Dt> { 744 def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), 745 (ins addrmode6:$Rn), IIC_VLD1x2u, 746 "vld1", Dt, "$Vd, $Rn!", 747 "$Rn.addr = $wb", []> { 748 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 749 let Inst{5-4} = Rn{5-4}; 750 let DecoderMethod = "DecodeVLDST1Instruction"; 751 } 752 def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), 753 (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, 754 "vld1", Dt, "$Vd, $Rn, $Rm", 755 "$Rn.addr = $wb", []> { 756 let Inst{5-4} = Rn{5-4}; 757 let DecoderMethod = "DecodeVLDST1Instruction"; 758 } 759} 760 761def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; 762def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; 763def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; 764def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; 765 766defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; 767defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; 768defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; 769defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; 770 771def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; 772 773// VLD2 : Vector Load (multiple 2-element structures) 774class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 775 InstrItinClass itin> 776 : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), 777 (ins addrmode6:$Rn), itin, 778 "vld2", Dt, "$Vd, $Rn", "", []> { 779 let Rm = 0b1111; 780 let Inst{5-4} = Rn{5-4}; 781 let DecoderMethod = "DecodeVLDST2Instruction"; 782} 783 784def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; 785def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; 786def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; 787 788def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; 789def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; 790def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; 791 792def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>; 793def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; 794def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; 795 796// ...with address register writeback: 797multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, 798 RegisterOperand VdTy, InstrItinClass itin> { 799 def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 800 (ins addrmode6:$Rn), itin, 801 "vld2", Dt, "$Vd, $Rn!", 802 "$Rn.addr = $wb", []> { 803 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 804 let Inst{5-4} = Rn{5-4}; 805 let DecoderMethod = "DecodeVLDST2Instruction"; 806 } 807 def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), 808 (ins addrmode6:$Rn, rGPR:$Rm), itin, 809 "vld2", Dt, "$Vd, $Rn, $Rm", 810 "$Rn.addr = $wb", []> { 811 let Inst{5-4} = Rn{5-4}; 812 let DecoderMethod = "DecodeVLDST2Instruction"; 813 } 814} 815 816defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; 817defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; 818defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; 819 820defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; 821defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; 822defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; 823 824def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 825def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 826def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; 827def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 828def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 829def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; 830 831// ...with double-spaced registers 832def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; 833def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; 834def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; 835defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; 836defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; 837defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; 838 839// VLD3 : Vector Load (multiple 3-element structures) 840class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> 841 : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 842 (ins addrmode6:$Rn), IIC_VLD3, 843 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { 844 let Rm = 0b1111; 845 let Inst{4} = Rn{4}; 846 let DecoderMethod = "DecodeVLDST3Instruction"; 847} 848 849def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; 850def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; 851def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; 852 853def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>; 854def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>; 855def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>; 856 857// ...with address register writeback: 858class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 859 : NLdSt<0, 0b10, op11_8, op7_4, 860 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 861 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, 862 "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", 863 "$Rn.addr = $wb", []> { 864 let Inst{4} = Rn{4}; 865 let DecoderMethod = "DecodeVLDST3Instruction"; 866} 867 868def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; 869def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; 870def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; 871 872def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 873def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 874def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>; 875 876// ...with double-spaced registers: 877def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; 878def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; 879def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; 880def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; 881def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; 882def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; 883 884def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 885def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 886def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 887 888// ...alternate versions to be allocated odd register numbers: 889def VLD3q8oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 890def VLD3q16oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 891def VLD3q32oddPseudo : VLDQQQQPseudo<IIC_VLD3>; 892 893def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 894def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 895def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>; 896 897// VLD4 : Vector Load (multiple 4-element structures) 898class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> 899 : NLdSt<0, 0b10, op11_8, op7_4, 900 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 901 (ins addrmode6:$Rn), IIC_VLD4, 902 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { 903 let Rm = 0b1111; 904 let Inst{5-4} = Rn{5-4}; 905 let DecoderMethod = "DecodeVLDST4Instruction"; 906} 907 908def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; 909def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; 910def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; 911 912def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>; 913def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>; 914def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>; 915 916// ...with address register writeback: 917class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 918 : NLdSt<0, 0b10, op11_8, op7_4, 919 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 920 (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, 921 "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", 922 "$Rn.addr = $wb", []> { 923 let Inst{5-4} = Rn{5-4}; 924 let DecoderMethod = "DecodeVLDST4Instruction"; 925} 926 927def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; 928def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; 929def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; 930 931def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 932def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 933def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>; 934 935// ...with double-spaced registers: 936def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; 937def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; 938def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; 939def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; 940def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; 941def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; 942 943def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 944def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 945def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 946 947// ...alternate versions to be allocated odd register numbers: 948def VLD4q8oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 949def VLD4q16oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 950def VLD4q32oddPseudo : VLDQQQQPseudo<IIC_VLD4>; 951 952def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 953def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 954def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; 955 956} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 957 958// Classes for VLD*LN pseudo-instructions with multi-register operands. 959// These are expanded to real instructions after register allocation. 960class VLDQLNPseudo<InstrItinClass itin> 961 : PseudoNLdSt<(outs QPR:$dst), 962 (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 963 itin, "$src = $dst">; 964class VLDQLNWBPseudo<InstrItinClass itin> 965 : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), 966 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 967 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 968class VLDQQLNPseudo<InstrItinClass itin> 969 : PseudoNLdSt<(outs QQPR:$dst), 970 (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 971 itin, "$src = $dst">; 972class VLDQQLNWBPseudo<InstrItinClass itin> 973 : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), 974 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 975 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 976class VLDQQQQLNPseudo<InstrItinClass itin> 977 : PseudoNLdSt<(outs QQQQPR:$dst), 978 (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 979 itin, "$src = $dst">; 980class VLDQQQQLNWBPseudo<InstrItinClass itin> 981 : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), 982 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 983 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; 984 985// VLD1LN : Vector Load (single element to one lane) 986class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 987 PatFrag LoadOp> 988 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 989 (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), 990 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 991 "$src = $Vd", 992 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 993 (i32 (LoadOp addrmode6:$Rn)), 994 imm:$lane))]> { 995 let Rm = 0b1111; 996 let DecoderMethod = "DecodeVLD1LN"; 997} 998class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 999 PatFrag LoadOp> 1000 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), 1001 (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), 1002 IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", 1003 "$src = $Vd", 1004 [(set DPR:$Vd, (vector_insert (Ty DPR:$src), 1005 (i32 (LoadOp addrmode6oneL32:$Rn)), 1006 imm:$lane))]> { 1007 let Rm = 0b1111; 1008 let DecoderMethod = "DecodeVLD1LN"; 1009} 1010class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> { 1011 let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), 1012 (i32 (LoadOp addrmode6:$addr)), 1013 imm:$lane))]; 1014} 1015 1016def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { 1017 let Inst{7-5} = lane{2-0}; 1018} 1019def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { 1020 let Inst{7-6} = lane{1-0}; 1021 let Inst{5-4} = Rn{5-4}; 1022} 1023def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { 1024 let Inst{7} = lane{0}; 1025 let Inst{5-4} = Rn{5-4}; 1026} 1027 1028def VLD1LNq8Pseudo : VLD1QLNPseudo<v16i8, extloadi8>; 1029def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>; 1030def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>; 1031 1032def : Pat<(vector_insert (v2f32 DPR:$src), 1033 (f32 (load addrmode6:$addr)), imm:$lane), 1034 (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1035def : Pat<(vector_insert (v4f32 QPR:$src), 1036 (f32 (load addrmode6:$addr)), imm:$lane), 1037 (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1038 1039let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1040 1041// ...with address register writeback: 1042class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1043 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), 1044 (ins addrmode6:$Rn, am6offset:$Rm, 1045 DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, 1046 "\\{$Vd[$lane]\\}, $Rn$Rm", 1047 "$src = $Vd, $Rn.addr = $wb", []> { 1048 let DecoderMethod = "DecodeVLD1LN"; 1049} 1050 1051def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { 1052 let Inst{7-5} = lane{2-0}; 1053} 1054def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { 1055 let Inst{7-6} = lane{1-0}; 1056 let Inst{4} = Rn{4}; 1057} 1058def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { 1059 let Inst{7} = lane{0}; 1060 let Inst{5} = Rn{4}; 1061 let Inst{4} = Rn{4}; 1062} 1063 1064def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1065def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1066def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>; 1067 1068// VLD2LN : Vector Load (single 2-element structure to one lane) 1069class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1070 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), 1071 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), 1072 IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", 1073 "$src1 = $Vd, $src2 = $dst2", []> { 1074 let Rm = 0b1111; 1075 let Inst{4} = Rn{4}; 1076 let DecoderMethod = "DecodeVLD2LN"; 1077} 1078 1079def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { 1080 let Inst{7-5} = lane{2-0}; 1081} 1082def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { 1083 let Inst{7-6} = lane{1-0}; 1084} 1085def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { 1086 let Inst{7} = lane{0}; 1087} 1088 1089def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1090def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1091def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>; 1092 1093// ...with double-spaced registers: 1094def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { 1095 let Inst{7-6} = lane{1-0}; 1096} 1097def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { 1098 let Inst{7} = lane{0}; 1099} 1100 1101def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1102def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>; 1103 1104// ...with address register writeback: 1105class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1106 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), 1107 (ins addrmode6:$Rn, am6offset:$Rm, 1108 DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, 1109 "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", 1110 "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { 1111 let Inst{4} = Rn{4}; 1112 let DecoderMethod = "DecodeVLD2LN"; 1113} 1114 1115def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { 1116 let Inst{7-5} = lane{2-0}; 1117} 1118def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { 1119 let Inst{7-6} = lane{1-0}; 1120} 1121def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { 1122 let Inst{7} = lane{0}; 1123} 1124 1125def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1126def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1127def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>; 1128 1129def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { 1130 let Inst{7-6} = lane{1-0}; 1131} 1132def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { 1133 let Inst{7} = lane{0}; 1134} 1135 1136def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1137def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>; 1138 1139// VLD3LN : Vector Load (single 3-element structure to one lane) 1140class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1141 : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1142 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, 1143 nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, 1144 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", 1145 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { 1146 let Rm = 0b1111; 1147 let DecoderMethod = "DecodeVLD3LN"; 1148} 1149 1150def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { 1151 let Inst{7-5} = lane{2-0}; 1152} 1153def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { 1154 let Inst{7-6} = lane{1-0}; 1155} 1156def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { 1157 let Inst{7} = lane{0}; 1158} 1159 1160def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1161def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1162def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>; 1163 1164// ...with double-spaced registers: 1165def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { 1166 let Inst{7-6} = lane{1-0}; 1167} 1168def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { 1169 let Inst{7} = lane{0}; 1170} 1171 1172def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1173def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>; 1174 1175// ...with address register writeback: 1176class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1177 : NLdStLn<1, 0b10, op11_8, op7_4, 1178 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1179 (ins addrmode6:$Rn, am6offset:$Rm, 1180 DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), 1181 IIC_VLD3lnu, "vld3", Dt, 1182 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", 1183 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", 1184 []> { 1185 let DecoderMethod = "DecodeVLD3LN"; 1186} 1187 1188def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { 1189 let Inst{7-5} = lane{2-0}; 1190} 1191def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { 1192 let Inst{7-6} = lane{1-0}; 1193} 1194def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { 1195 let Inst{7} = lane{0}; 1196} 1197 1198def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1199def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1200def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>; 1201 1202def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { 1203 let Inst{7-6} = lane{1-0}; 1204} 1205def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { 1206 let Inst{7} = lane{0}; 1207} 1208 1209def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1210def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>; 1211 1212// VLD4LN : Vector Load (single 4-element structure to one lane) 1213class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 1214 : NLdStLn<1, 0b10, op11_8, op7_4, 1215 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1216 (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, 1217 nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, 1218 "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", 1219 "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { 1220 let Rm = 0b1111; 1221 let Inst{4} = Rn{4}; 1222 let DecoderMethod = "DecodeVLD4LN"; 1223} 1224 1225def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { 1226 let Inst{7-5} = lane{2-0}; 1227} 1228def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { 1229 let Inst{7-6} = lane{1-0}; 1230} 1231def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { 1232 let Inst{7} = lane{0}; 1233 let Inst{5} = Rn{5}; 1234} 1235 1236def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1237def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1238def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>; 1239 1240// ...with double-spaced registers: 1241def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { 1242 let Inst{7-6} = lane{1-0}; 1243} 1244def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { 1245 let Inst{7} = lane{0}; 1246 let Inst{5} = Rn{5}; 1247} 1248 1249def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1250def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>; 1251 1252// ...with address register writeback: 1253class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1254 : NLdStLn<1, 0b10, op11_8, op7_4, 1255 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1256 (ins addrmode6:$Rn, am6offset:$Rm, 1257 DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 1258 IIC_VLD4lnu, "vld4", Dt, 1259"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", 1260"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", 1261 []> { 1262 let Inst{4} = Rn{4}; 1263 let DecoderMethod = "DecodeVLD4LN" ; 1264} 1265 1266def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { 1267 let Inst{7-5} = lane{2-0}; 1268} 1269def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { 1270 let Inst{7-6} = lane{1-0}; 1271} 1272def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { 1273 let Inst{7} = lane{0}; 1274 let Inst{5} = Rn{5}; 1275} 1276 1277def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1278def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1279def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>; 1280 1281def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { 1282 let Inst{7-6} = lane{1-0}; 1283} 1284def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { 1285 let Inst{7} = lane{0}; 1286 let Inst{5} = Rn{5}; 1287} 1288 1289def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1290def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; 1291 1292} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1293 1294// VLD1DUP : Vector Load (single element to all lanes) 1295class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1296 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), 1297 (ins addrmode6dup:$Rn), 1298 IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", 1299 [(set VecListOneDAllLanes:$Vd, 1300 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1301 let Rm = 0b1111; 1302 let Inst{4} = Rn{4}; 1303 let DecoderMethod = "DecodeVLD1DupInstruction"; 1304} 1305def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; 1306def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; 1307def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; 1308 1309def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1310 (VLD1DUPd32 addrmode6:$addr)>; 1311 1312class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> 1313 : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), 1314 (ins addrmode6dup:$Rn), IIC_VLD1dup, 1315 "vld1", Dt, "$Vd, $Rn", "", 1316 [(set VecListDPairAllLanes:$Vd, 1317 (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { 1318 let Rm = 0b1111; 1319 let Inst{4} = Rn{4}; 1320 let DecoderMethod = "DecodeVLD1DupInstruction"; 1321} 1322 1323def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; 1324def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; 1325def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; 1326 1327def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), 1328 (VLD1DUPq32 addrmode6:$addr)>; 1329 1330let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { 1331// ...with address register writeback: 1332multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { 1333 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1334 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1335 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1336 "vld1", Dt, "$Vd, $Rn!", 1337 "$Rn.addr = $wb", []> { 1338 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1339 let Inst{4} = Rn{4}; 1340 let DecoderMethod = "DecodeVLD1DupInstruction"; 1341 } 1342 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1343 (outs VecListOneDAllLanes:$Vd, GPR:$wb), 1344 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1345 "vld1", Dt, "$Vd, $Rn, $Rm", 1346 "$Rn.addr = $wb", []> { 1347 let Inst{4} = Rn{4}; 1348 let DecoderMethod = "DecodeVLD1DupInstruction"; 1349 } 1350} 1351multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { 1352 def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, 1353 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1354 (ins addrmode6dup:$Rn), IIC_VLD1dupu, 1355 "vld1", Dt, "$Vd, $Rn!", 1356 "$Rn.addr = $wb", []> { 1357 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1358 let Inst{4} = Rn{4}; 1359 let DecoderMethod = "DecodeVLD1DupInstruction"; 1360 } 1361 def _register : NLdSt<1, 0b10, 0b1100, op7_4, 1362 (outs VecListDPairAllLanes:$Vd, GPR:$wb), 1363 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, 1364 "vld1", Dt, "$Vd, $Rn, $Rm", 1365 "$Rn.addr = $wb", []> { 1366 let Inst{4} = Rn{4}; 1367 let DecoderMethod = "DecodeVLD1DupInstruction"; 1368 } 1369} 1370 1371defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; 1372defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; 1373defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; 1374 1375defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; 1376defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; 1377defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; 1378 1379// VLD2DUP : Vector Load (single 2-element structure to all lanes) 1380class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> 1381 : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), 1382 (ins addrmode6dup:$Rn), IIC_VLD2dup, 1383 "vld2", Dt, "$Vd, $Rn", "", []> { 1384 let Rm = 0b1111; 1385 let Inst{4} = Rn{4}; 1386 let DecoderMethod = "DecodeVLD2DupInstruction"; 1387} 1388 1389def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; 1390def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; 1391def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; 1392 1393// ...with double-spaced registers 1394def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; 1395def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1396def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1397 1398// ...with address register writeback: 1399multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { 1400 def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, 1401 (outs VdTy:$Vd, GPR:$wb), 1402 (ins addrmode6dup:$Rn), IIC_VLD2dupu, 1403 "vld2", Dt, "$Vd, $Rn!", 1404 "$Rn.addr = $wb", []> { 1405 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1406 let Inst{4} = Rn{4}; 1407 let DecoderMethod = "DecodeVLD2DupInstruction"; 1408 } 1409 def _register : NLdSt<1, 0b10, 0b1101, op7_4, 1410 (outs VdTy:$Vd, GPR:$wb), 1411 (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, 1412 "vld2", Dt, "$Vd, $Rn, $Rm", 1413 "$Rn.addr = $wb", []> { 1414 let Inst{4} = Rn{4}; 1415 let DecoderMethod = "DecodeVLD2DupInstruction"; 1416 } 1417} 1418 1419defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; 1420defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; 1421defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; 1422 1423defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; 1424defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; 1425defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; 1426 1427// VLD3DUP : Vector Load (single 3-element structure to all lanes) 1428class VLD3DUP<bits<4> op7_4, string Dt> 1429 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), 1430 (ins addrmode6dup:$Rn), IIC_VLD3dup, 1431 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { 1432 let Rm = 0b1111; 1433 let Inst{4} = 0; 1434 let DecoderMethod = "DecodeVLD3DupInstruction"; 1435} 1436 1437def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; 1438def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; 1439def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; 1440 1441def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1442def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1443def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>; 1444 1445// ...with double-spaced registers (not used for codegen): 1446def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; 1447def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; 1448def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; 1449 1450// ...with address register writeback: 1451class VLD3DUPWB<bits<4> op7_4, string Dt> 1452 : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), 1453 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, 1454 "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", 1455 "$Rn.addr = $wb", []> { 1456 let Inst{4} = 0; 1457 let DecoderMethod = "DecodeVLD3DupInstruction"; 1458} 1459 1460def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; 1461def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; 1462def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; 1463 1464def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; 1465def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; 1466def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; 1467 1468def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1469def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1470def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>; 1471 1472// VLD4DUP : Vector Load (single 4-element structure to all lanes) 1473class VLD4DUP<bits<4> op7_4, string Dt> 1474 : NLdSt<1, 0b10, 0b1111, op7_4, 1475 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), 1476 (ins addrmode6dup:$Rn), IIC_VLD4dup, 1477 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { 1478 let Rm = 0b1111; 1479 let Inst{4} = Rn{4}; 1480 let DecoderMethod = "DecodeVLD4DupInstruction"; 1481} 1482 1483def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; 1484def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; 1485def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1486 1487def VLD4DUPd8Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1488def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1489def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>; 1490 1491// ...with double-spaced registers (not used for codegen): 1492def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; 1493def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">; 1494def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1495 1496// ...with address register writeback: 1497class VLD4DUPWB<bits<4> op7_4, string Dt> 1498 : NLdSt<1, 0b10, 0b1111, op7_4, 1499 (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), 1500 (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, 1501 "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", 1502 "$Rn.addr = $wb", []> { 1503 let Inst{4} = Rn{4}; 1504 let DecoderMethod = "DecodeVLD4DupInstruction"; 1505} 1506 1507def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; 1508def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; 1509def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } 1510 1511def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; 1512def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; 1513def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } 1514 1515def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1516def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1517def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; 1518 1519} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 1520 1521let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 1522 1523// Classes for VST* pseudo-instructions with multi-register operands. 1524// These are expanded to real instructions after register allocation. 1525class VSTQPseudo<InstrItinClass itin> 1526 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; 1527class VSTQWBPseudo<InstrItinClass itin> 1528 : PseudoNLdSt<(outs GPR:$wb), 1529 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, 1530 "$addr.addr = $wb">; 1531class VSTQWBfixedPseudo<InstrItinClass itin> 1532 : PseudoNLdSt<(outs GPR:$wb), 1533 (ins addrmode6:$addr, QPR:$src), itin, 1534 "$addr.addr = $wb">; 1535class VSTQWBregisterPseudo<InstrItinClass itin> 1536 : PseudoNLdSt<(outs GPR:$wb), 1537 (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, 1538 "$addr.addr = $wb">; 1539class VSTQQPseudo<InstrItinClass itin> 1540 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; 1541class VSTQQWBPseudo<InstrItinClass itin> 1542 : PseudoNLdSt<(outs GPR:$wb), 1543 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, 1544 "$addr.addr = $wb">; 1545class VSTQQWBfixedPseudo<InstrItinClass itin> 1546 : PseudoNLdSt<(outs GPR:$wb), 1547 (ins addrmode6:$addr, QQPR:$src), itin, 1548 "$addr.addr = $wb">; 1549class VSTQQWBregisterPseudo<InstrItinClass itin> 1550 : PseudoNLdSt<(outs GPR:$wb), 1551 (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, 1552 "$addr.addr = $wb">; 1553 1554class VSTQQQQPseudo<InstrItinClass itin> 1555 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; 1556class VSTQQQQWBPseudo<InstrItinClass itin> 1557 : PseudoNLdSt<(outs GPR:$wb), 1558 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, 1559 "$addr.addr = $wb">; 1560 1561// VST1 : Vector Store (multiple single elements) 1562class VST1D<bits<4> op7_4, string Dt> 1563 : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), 1564 IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { 1565 let Rm = 0b1111; 1566 let Inst{4} = Rn{4}; 1567 let DecoderMethod = "DecodeVLDST1Instruction"; 1568} 1569class VST1Q<bits<4> op7_4, string Dt> 1570 : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), 1571 IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { 1572 let Rm = 0b1111; 1573 let Inst{5-4} = Rn{5-4}; 1574 let DecoderMethod = "DecodeVLDST1Instruction"; 1575} 1576 1577def VST1d8 : VST1D<{0,0,0,?}, "8">; 1578def VST1d16 : VST1D<{0,1,0,?}, "16">; 1579def VST1d32 : VST1D<{1,0,0,?}, "32">; 1580def VST1d64 : VST1D<{1,1,0,?}, "64">; 1581 1582def VST1q8 : VST1Q<{0,0,?,?}, "8">; 1583def VST1q16 : VST1Q<{0,1,?,?}, "16">; 1584def VST1q32 : VST1Q<{1,0,?,?}, "32">; 1585def VST1q64 : VST1Q<{1,1,?,?}, "64">; 1586 1587// ...with address register writeback: 1588multiclass VST1DWB<bits<4> op7_4, string Dt> { 1589 def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), 1590 (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, 1591 "vst1", Dt, "$Vd, $Rn!", 1592 "$Rn.addr = $wb", []> { 1593 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1594 let Inst{4} = Rn{4}; 1595 let DecoderMethod = "DecodeVLDST1Instruction"; 1596 } 1597 def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), 1598 (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), 1599 IIC_VLD1u, 1600 "vst1", Dt, "$Vd, $Rn, $Rm", 1601 "$Rn.addr = $wb", []> { 1602 let Inst{4} = Rn{4}; 1603 let DecoderMethod = "DecodeVLDST1Instruction"; 1604 } 1605} 1606multiclass VST1QWB<bits<4> op7_4, string Dt> { 1607 def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1608 (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, 1609 "vst1", Dt, "$Vd, $Rn!", 1610 "$Rn.addr = $wb", []> { 1611 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1612 let Inst{5-4} = Rn{5-4}; 1613 let DecoderMethod = "DecodeVLDST1Instruction"; 1614 } 1615 def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), 1616 (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), 1617 IIC_VLD1x2u, 1618 "vst1", Dt, "$Vd, $Rn, $Rm", 1619 "$Rn.addr = $wb", []> { 1620 let Inst{5-4} = Rn{5-4}; 1621 let DecoderMethod = "DecodeVLDST1Instruction"; 1622 } 1623} 1624 1625defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; 1626defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; 1627defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; 1628defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; 1629 1630defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; 1631defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; 1632defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; 1633defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; 1634 1635// ...with 3 registers 1636class VST1D3<bits<4> op7_4, string Dt> 1637 : NLdSt<0, 0b00, 0b0110, op7_4, (outs), 1638 (ins addrmode6:$Rn, VecListThreeD:$Vd), 1639 IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { 1640 let Rm = 0b1111; 1641 let Inst{4} = Rn{4}; 1642 let DecoderMethod = "DecodeVLDST1Instruction"; 1643} 1644multiclass VST1D3WB<bits<4> op7_4, string Dt> { 1645 def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1646 (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, 1647 "vst1", Dt, "$Vd, $Rn!", 1648 "$Rn.addr = $wb", []> { 1649 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1650 let Inst{5-4} = Rn{5-4}; 1651 let DecoderMethod = "DecodeVLDST1Instruction"; 1652 } 1653 def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), 1654 (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), 1655 IIC_VLD1x3u, 1656 "vst1", Dt, "$Vd, $Rn, $Rm", 1657 "$Rn.addr = $wb", []> { 1658 let Inst{5-4} = Rn{5-4}; 1659 let DecoderMethod = "DecodeVLDST1Instruction"; 1660 } 1661} 1662 1663def VST1d8T : VST1D3<{0,0,0,?}, "8">; 1664def VST1d16T : VST1D3<{0,1,0,?}, "16">; 1665def VST1d32T : VST1D3<{1,0,0,?}, "32">; 1666def VST1d64T : VST1D3<{1,1,0,?}, "64">; 1667 1668defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; 1669defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; 1670defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; 1671defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; 1672 1673def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; 1674def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; 1675def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; 1676 1677// ...with 4 registers 1678class VST1D4<bits<4> op7_4, string Dt> 1679 : NLdSt<0, 0b00, 0b0010, op7_4, (outs), 1680 (ins addrmode6:$Rn, VecListFourD:$Vd), 1681 IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", 1682 []> { 1683 let Rm = 0b1111; 1684 let Inst{5-4} = Rn{5-4}; 1685 let DecoderMethod = "DecodeVLDST1Instruction"; 1686} 1687multiclass VST1D4WB<bits<4> op7_4, string Dt> { 1688 def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1689 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, 1690 "vst1", Dt, "$Vd, $Rn!", 1691 "$Rn.addr = $wb", []> { 1692 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1693 let Inst{5-4} = Rn{5-4}; 1694 let DecoderMethod = "DecodeVLDST1Instruction"; 1695 } 1696 def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), 1697 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1698 IIC_VLD1x4u, 1699 "vst1", Dt, "$Vd, $Rn, $Rm", 1700 "$Rn.addr = $wb", []> { 1701 let Inst{5-4} = Rn{5-4}; 1702 let DecoderMethod = "DecodeVLDST1Instruction"; 1703 } 1704} 1705 1706def VST1d8Q : VST1D4<{0,0,?,?}, "8">; 1707def VST1d16Q : VST1D4<{0,1,?,?}, "16">; 1708def VST1d32Q : VST1D4<{1,0,?,?}, "32">; 1709def VST1d64Q : VST1D4<{1,1,?,?}, "64">; 1710 1711defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; 1712defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; 1713defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; 1714defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; 1715 1716def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; 1717def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; 1718def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; 1719 1720// VST2 : Vector Store (multiple 2-element structures) 1721class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, 1722 InstrItinClass itin> 1723 : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), 1724 itin, "vst2", Dt, "$Vd, $Rn", "", []> { 1725 let Rm = 0b1111; 1726 let Inst{5-4} = Rn{5-4}; 1727 let DecoderMethod = "DecodeVLDST2Instruction"; 1728} 1729 1730def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; 1731def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; 1732def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; 1733 1734def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; 1735def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; 1736def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; 1737 1738def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>; 1739def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; 1740def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; 1741 1742// ...with address register writeback: 1743multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, 1744 RegisterOperand VdTy> { 1745 def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1746 (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, 1747 "vst2", Dt, "$Vd, $Rn!", 1748 "$Rn.addr = $wb", []> { 1749 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1750 let Inst{5-4} = Rn{5-4}; 1751 let DecoderMethod = "DecodeVLDST2Instruction"; 1752 } 1753 def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1754 (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, 1755 "vst2", Dt, "$Vd, $Rn, $Rm", 1756 "$Rn.addr = $wb", []> { 1757 let Inst{5-4} = Rn{5-4}; 1758 let DecoderMethod = "DecodeVLDST2Instruction"; 1759 } 1760} 1761multiclass VST2QWB<bits<4> op7_4, string Dt> { 1762 def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1763 (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, 1764 "vst2", Dt, "$Vd, $Rn!", 1765 "$Rn.addr = $wb", []> { 1766 let Rm = 0b1101; // NLdSt will assign to the right encoding bits. 1767 let Inst{5-4} = Rn{5-4}; 1768 let DecoderMethod = "DecodeVLDST2Instruction"; 1769 } 1770 def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), 1771 (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), 1772 IIC_VLD1u, 1773 "vst2", Dt, "$Vd, $Rn, $Rm", 1774 "$Rn.addr = $wb", []> { 1775 let Inst{5-4} = Rn{5-4}; 1776 let DecoderMethod = "DecodeVLDST2Instruction"; 1777 } 1778} 1779 1780defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; 1781defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; 1782defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; 1783 1784defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; 1785defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; 1786defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; 1787 1788def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1789def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1790def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>; 1791def VST2q8PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1792def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1793def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>; 1794 1795// ...with double-spaced registers 1796def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; 1797def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; 1798def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; 1799defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; 1800defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; 1801defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; 1802 1803// VST3 : Vector Store (multiple 3-element structures) 1804class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> 1805 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1806 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, 1807 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { 1808 let Rm = 0b1111; 1809 let Inst{4} = Rn{4}; 1810 let DecoderMethod = "DecodeVLDST3Instruction"; 1811} 1812 1813def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; 1814def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; 1815def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; 1816 1817def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>; 1818def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>; 1819def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>; 1820 1821// ...with address register writeback: 1822class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1823 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1824 (ins addrmode6:$Rn, am6offset:$Rm, 1825 DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, 1826 "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", 1827 "$Rn.addr = $wb", []> { 1828 let Inst{4} = Rn{4}; 1829 let DecoderMethod = "DecodeVLDST3Instruction"; 1830} 1831 1832def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; 1833def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; 1834def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; 1835 1836def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1837def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1838def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>; 1839 1840// ...with double-spaced registers: 1841def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; 1842def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; 1843def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; 1844def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; 1845def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; 1846def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; 1847 1848def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1849def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1850def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1851 1852// ...alternate versions to be allocated odd register numbers: 1853def VST3q8oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1854def VST3q16oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1855def VST3q32oddPseudo : VSTQQQQPseudo<IIC_VST3>; 1856 1857def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1858def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1859def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>; 1860 1861// VST4 : Vector Store (multiple 4-element structures) 1862class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> 1863 : NLdSt<0, 0b00, op11_8, op7_4, (outs), 1864 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), 1865 IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", 1866 "", []> { 1867 let Rm = 0b1111; 1868 let Inst{5-4} = Rn{5-4}; 1869 let DecoderMethod = "DecodeVLDST4Instruction"; 1870} 1871 1872def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; 1873def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; 1874def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; 1875 1876def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>; 1877def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>; 1878def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>; 1879 1880// ...with address register writeback: 1881class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> 1882 : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), 1883 (ins addrmode6:$Rn, am6offset:$Rm, 1884 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, 1885 "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", 1886 "$Rn.addr = $wb", []> { 1887 let Inst{5-4} = Rn{5-4}; 1888 let DecoderMethod = "DecodeVLDST4Instruction"; 1889} 1890 1891def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; 1892def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; 1893def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; 1894 1895def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1896def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1897def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>; 1898 1899// ...with double-spaced registers: 1900def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; 1901def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; 1902def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; 1903def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; 1904def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; 1905def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; 1906 1907def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1908def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1909def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1910 1911// ...alternate versions to be allocated odd register numbers: 1912def VST4q8oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1913def VST4q16oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1914def VST4q32oddPseudo : VSTQQQQPseudo<IIC_VST4>; 1915 1916def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1917def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1918def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; 1919 1920} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 1921 1922// Classes for VST*LN pseudo-instructions with multi-register operands. 1923// These are expanded to real instructions after register allocation. 1924class VSTQLNPseudo<InstrItinClass itin> 1925 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), 1926 itin, "">; 1927class VSTQLNWBPseudo<InstrItinClass itin> 1928 : PseudoNLdSt<(outs GPR:$wb), 1929 (ins addrmode6:$addr, am6offset:$offset, QPR:$src, 1930 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1931class VSTQQLNPseudo<InstrItinClass itin> 1932 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), 1933 itin, "">; 1934class VSTQQLNWBPseudo<InstrItinClass itin> 1935 : PseudoNLdSt<(outs GPR:$wb), 1936 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, 1937 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1938class VSTQQQQLNPseudo<InstrItinClass itin> 1939 : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), 1940 itin, "">; 1941class VSTQQQQLNWBPseudo<InstrItinClass itin> 1942 : PseudoNLdSt<(outs GPR:$wb), 1943 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, 1944 nohash_imm:$lane), itin, "$addr.addr = $wb">; 1945 1946// VST1LN : Vector Store (single element from one lane) 1947class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1948 PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> 1949 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 1950 (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), 1951 IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", 1952 [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { 1953 let Rm = 0b1111; 1954 let DecoderMethod = "DecodeVST1LN"; 1955} 1956class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 1957 : VSTQLNPseudo<IIC_VST1ln> { 1958 let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 1959 addrmode6:$addr)]; 1960} 1961 1962def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, 1963 NEONvgetlaneu, addrmode6> { 1964 let Inst{7-5} = lane{2-0}; 1965} 1966def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, 1967 NEONvgetlaneu, addrmode6> { 1968 let Inst{7-6} = lane{1-0}; 1969 let Inst{4} = Rn{4}; 1970} 1971 1972def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 1973 addrmode6oneL32> { 1974 let Inst{7} = lane{0}; 1975 let Inst{5-4} = Rn{5-4}; 1976} 1977 1978def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>; 1979def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>; 1980def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>; 1981 1982def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), 1983 (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; 1984def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), 1985 (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; 1986 1987// ...with address register writeback: 1988class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, 1989 PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> 1990 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 1991 (ins AdrMode:$Rn, am6offset:$Rm, 1992 DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, 1993 "\\{$Vd[$lane]\\}, $Rn$Rm", 1994 "$Rn.addr = $wb", 1995 [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), 1996 AdrMode:$Rn, am6offset:$Rm))]> { 1997 let DecoderMethod = "DecodeVST1LN"; 1998} 1999class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> 2000 : VSTQLNWBPseudo<IIC_VST1lnu> { 2001 let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), 2002 addrmode6:$addr, am6offset:$offset))]; 2003} 2004 2005def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, 2006 NEONvgetlaneu, addrmode6> { 2007 let Inst{7-5} = lane{2-0}; 2008} 2009def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, 2010 NEONvgetlaneu, addrmode6> { 2011 let Inst{7-6} = lane{1-0}; 2012 let Inst{4} = Rn{4}; 2013} 2014def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, 2015 extractelt, addrmode6oneL32> { 2016 let Inst{7} = lane{0}; 2017 let Inst{5-4} = Rn{5-4}; 2018} 2019 2020def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; 2021def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; 2022def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; 2023 2024let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { 2025 2026// VST2LN : Vector Store (single 2-element structure from one lane) 2027class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2028 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2029 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), 2030 IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", 2031 "", []> { 2032 let Rm = 0b1111; 2033 let Inst{4} = Rn{4}; 2034 let DecoderMethod = "DecodeVST2LN"; 2035} 2036 2037def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { 2038 let Inst{7-5} = lane{2-0}; 2039} 2040def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { 2041 let Inst{7-6} = lane{1-0}; 2042} 2043def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { 2044 let Inst{7} = lane{0}; 2045} 2046 2047def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2048def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2049def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>; 2050 2051// ...with double-spaced registers: 2052def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { 2053 let Inst{7-6} = lane{1-0}; 2054 let Inst{4} = Rn{4}; 2055} 2056def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { 2057 let Inst{7} = lane{0}; 2058 let Inst{4} = Rn{4}; 2059} 2060 2061def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2062def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; 2063 2064// ...with address register writeback: 2065class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2066 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2067 (ins addrmode6:$Rn, am6offset:$Rm, 2068 DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, 2069 "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", 2070 "$Rn.addr = $wb", []> { 2071 let Inst{4} = Rn{4}; 2072 let DecoderMethod = "DecodeVST2LN"; 2073} 2074 2075def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { 2076 let Inst{7-5} = lane{2-0}; 2077} 2078def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { 2079 let Inst{7-6} = lane{1-0}; 2080} 2081def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { 2082 let Inst{7} = lane{0}; 2083} 2084 2085def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2086def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2087def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>; 2088 2089def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { 2090 let Inst{7-6} = lane{1-0}; 2091} 2092def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { 2093 let Inst{7} = lane{0}; 2094} 2095 2096def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2097def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>; 2098 2099// VST3LN : Vector Store (single 3-element structure from one lane) 2100class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2101 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2102 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, 2103 nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, 2104 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { 2105 let Rm = 0b1111; 2106 let DecoderMethod = "DecodeVST3LN"; 2107} 2108 2109def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { 2110 let Inst{7-5} = lane{2-0}; 2111} 2112def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { 2113 let Inst{7-6} = lane{1-0}; 2114} 2115def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { 2116 let Inst{7} = lane{0}; 2117} 2118 2119def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2120def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2121def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>; 2122 2123// ...with double-spaced registers: 2124def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { 2125 let Inst{7-6} = lane{1-0}; 2126} 2127def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { 2128 let Inst{7} = lane{0}; 2129} 2130 2131def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2132def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>; 2133 2134// ...with address register writeback: 2135class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2136 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2137 (ins addrmode6:$Rn, am6offset:$Rm, 2138 DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), 2139 IIC_VST3lnu, "vst3", Dt, 2140 "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", 2141 "$Rn.addr = $wb", []> { 2142 let DecoderMethod = "DecodeVST3LN"; 2143} 2144 2145def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { 2146 let Inst{7-5} = lane{2-0}; 2147} 2148def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { 2149 let Inst{7-6} = lane{1-0}; 2150} 2151def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { 2152 let Inst{7} = lane{0}; 2153} 2154 2155def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2156def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2157def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>; 2158 2159def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { 2160 let Inst{7-6} = lane{1-0}; 2161} 2162def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { 2163 let Inst{7} = lane{0}; 2164} 2165 2166def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2167def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>; 2168 2169// VST4LN : Vector Store (single 4-element structure from one lane) 2170class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt> 2171 : NLdStLn<1, 0b00, op11_8, op7_4, (outs), 2172 (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, 2173 nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, 2174 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", 2175 "", []> { 2176 let Rm = 0b1111; 2177 let Inst{4} = Rn{4}; 2178 let DecoderMethod = "DecodeVST4LN"; 2179} 2180 2181def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { 2182 let Inst{7-5} = lane{2-0}; 2183} 2184def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { 2185 let Inst{7-6} = lane{1-0}; 2186} 2187def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { 2188 let Inst{7} = lane{0}; 2189 let Inst{5} = Rn{5}; 2190} 2191 2192def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2193def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2194def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>; 2195 2196// ...with double-spaced registers: 2197def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { 2198 let Inst{7-6} = lane{1-0}; 2199} 2200def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { 2201 let Inst{7} = lane{0}; 2202 let Inst{5} = Rn{5}; 2203} 2204 2205def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2206def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>; 2207 2208// ...with address register writeback: 2209class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> 2210 : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), 2211 (ins addrmode6:$Rn, am6offset:$Rm, 2212 DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), 2213 IIC_VST4lnu, "vst4", Dt, 2214 "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", 2215 "$Rn.addr = $wb", []> { 2216 let Inst{4} = Rn{4}; 2217 let DecoderMethod = "DecodeVST4LN"; 2218} 2219 2220def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { 2221 let Inst{7-5} = lane{2-0}; 2222} 2223def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { 2224 let Inst{7-6} = lane{1-0}; 2225} 2226def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { 2227 let Inst{7} = lane{0}; 2228 let Inst{5} = Rn{5}; 2229} 2230 2231def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2232def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2233def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>; 2234 2235def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { 2236 let Inst{7-6} = lane{1-0}; 2237} 2238def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { 2239 let Inst{7} = lane{0}; 2240 let Inst{5} = Rn{5}; 2241} 2242 2243def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2244def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; 2245 2246} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 2247 2248// Use vld1/vst1 for unaligned f64 load / store 2249def : Pat<(f64 (hword_alignedload addrmode6:$addr)), 2250 (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; 2251def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), 2252 (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2253def : Pat<(f64 (byte_alignedload addrmode6:$addr)), 2254 (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; 2255def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), 2256 (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; 2257def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), 2258 (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; 2259def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), 2260 (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; 2261 2262// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64 2263// load / store if it's legal. 2264def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), 2265 (VLD1q64 addrmode6:$addr)>; 2266def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2267 (VST1q64 addrmode6:$addr, QPR:$value)>; 2268def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), 2269 (VLD1q32 addrmode6:$addr)>; 2270def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2271 (VST1q32 addrmode6:$addr, QPR:$value)>; 2272def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), 2273 (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; 2274def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2275 (VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2276def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)), 2277 (VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>; 2278def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr), 2279 (VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; 2280 2281//===----------------------------------------------------------------------===// 2282// NEON pattern fragments 2283//===----------------------------------------------------------------------===// 2284 2285// Extract D sub-registers of Q registers. 2286def DSubReg_i8_reg : SDNodeXForm<imm, [{ 2287 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2288 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); 2289}]>; 2290def DSubReg_i16_reg : SDNodeXForm<imm, [{ 2291 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2292 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); 2293}]>; 2294def DSubReg_i32_reg : SDNodeXForm<imm, [{ 2295 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2296 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); 2297}]>; 2298def DSubReg_f64_reg : SDNodeXForm<imm, [{ 2299 assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); 2300 return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); 2301}]>; 2302 2303// Extract S sub-registers of Q/D registers. 2304def SSubReg_f32_reg : SDNodeXForm<imm, [{ 2305 assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering"); 2306 return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); 2307}]>; 2308 2309// Translate lane numbers from Q registers to D subregs. 2310def SubReg_i8_lane : SDNodeXForm<imm, [{ 2311 return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32); 2312}]>; 2313def SubReg_i16_lane : SDNodeXForm<imm, [{ 2314 return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32); 2315}]>; 2316def SubReg_i32_lane : SDNodeXForm<imm, [{ 2317 return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32); 2318}]>; 2319 2320//===----------------------------------------------------------------------===// 2321// Instruction Classes 2322//===----------------------------------------------------------------------===// 2323 2324// Basic 2-register operations: double- and quad-register. 2325class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2326 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2327 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2328 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2329 (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "", 2330 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>; 2331class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2332 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, 2333 string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> 2334 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2335 (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "", 2336 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>; 2337 2338// Basic 2-register intrinsics, both double- and quad-register. 2339class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2340 bits<2> op17_16, bits<5> op11_7, bit op4, 2341 InstrItinClass itin, string OpcodeStr, string Dt, 2342 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2343 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2344 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2345 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2346class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2347 bits<2> op17_16, bits<5> op11_7, bit op4, 2348 InstrItinClass itin, string OpcodeStr, string Dt, 2349 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2350 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2351 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2352 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2353 2354// Same as above, but not predicated. 2355class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2356 InstrItinClass itin, string OpcodeStr, string Dt, 2357 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2358 : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), 2359 itin, OpcodeStr, Dt, ResTy, OpTy, 2360 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2361 2362class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7, 2363 InstrItinClass itin, string OpcodeStr, string Dt, 2364 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2365 : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), 2366 itin, OpcodeStr, Dt, ResTy, OpTy, 2367 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2368 2369// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). 2370class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2371 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2372 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2373 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm), 2374 itin, OpcodeStr, Dt, ResTy, OpTy, 2375 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2376 2377// Same as N2VQIntXnp but with Vd as a src register. 2378class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, 2379 bit op7, InstrItinClass itin, string OpcodeStr, string Dt, 2380 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2381 : N2Vnp<op19_18, op17_16, op10_8, op7, op6, 2382 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm), 2383 itin, OpcodeStr, Dt, ResTy, OpTy, 2384 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> { 2385 let Constraints = "$src = $Vd"; 2386} 2387 2388// Narrow 2-register operations. 2389class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2390 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2391 InstrItinClass itin, string OpcodeStr, string Dt, 2392 ValueType TyD, ValueType TyQ, SDNode OpNode> 2393 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2394 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2395 [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>; 2396 2397// Narrow 2-register intrinsics. 2398class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2399 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2400 InstrItinClass itin, string OpcodeStr, string Dt, 2401 ValueType TyD, ValueType TyQ, SDPatternOperator IntOp> 2402 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd), 2403 (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2404 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>; 2405 2406// Long 2-register operations (currently only used for VMOVL). 2407class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2408 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2409 InstrItinClass itin, string OpcodeStr, string Dt, 2410 ValueType TyQ, ValueType TyD, SDNode OpNode> 2411 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2412 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2413 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>; 2414 2415// Long 2-register intrinsics. 2416class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2417 bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, 2418 InstrItinClass itin, string OpcodeStr, string Dt, 2419 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2420 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd), 2421 (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", 2422 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>; 2423 2424// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. 2425class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt> 2426 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), 2427 (ins DPR:$src1, DPR:$src2), IIC_VPERMD, 2428 OpcodeStr, Dt, "$Vd, $Vm", 2429 "$src1 = $Vd, $src2 = $Vm", []>; 2430class N2VQShuffle<bits<2> op19_18, bits<5> op11_7, 2431 InstrItinClass itin, string OpcodeStr, string Dt> 2432 : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), 2433 (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", 2434 "$src1 = $Vd, $src2 = $Vm", []>; 2435 2436// Basic 3-register operations: double- and quad-register. 2437class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2438 InstrItinClass itin, string OpcodeStr, string Dt, 2439 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2440 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2441 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2442 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2443 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2444 // All of these have a two-operand InstAlias. 2445 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2446 let isCommutable = Commutable; 2447} 2448// Same as N3VD but no data type. 2449class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2450 InstrItinClass itin, string OpcodeStr, 2451 ValueType ResTy, ValueType OpTy, 2452 SDNode OpNode, bit Commutable> 2453 : N3VX<op24, op23, op21_20, op11_8, 0, op4, 2454 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2455 OpcodeStr, "$Vd, $Vn, $Vm", "", 2456 [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{ 2457 // All of these have a two-operand InstAlias. 2458 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2459 let isCommutable = Commutable; 2460} 2461 2462class N3VDSL<bits<2> op21_20, bits<4> op11_8, 2463 InstrItinClass itin, string OpcodeStr, string Dt, 2464 ValueType Ty, SDNode ShOp> 2465 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2466 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2467 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2468 [(set (Ty DPR:$Vd), 2469 (Ty (ShOp (Ty DPR:$Vn), 2470 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> { 2471 // All of these have a two-operand InstAlias. 2472 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2473 let isCommutable = 0; 2474} 2475class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 2476 string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> 2477 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2478 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2479 NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","", 2480 [(set (Ty DPR:$Vd), 2481 (Ty (ShOp (Ty DPR:$Vn), 2482 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2483 // All of these have a two-operand InstAlias. 2484 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2485 let isCommutable = 0; 2486} 2487 2488class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2489 InstrItinClass itin, string OpcodeStr, string Dt, 2490 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2491 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2492 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2493 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2494 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2495 // All of these have a two-operand InstAlias. 2496 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2497 let isCommutable = Commutable; 2498} 2499class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2500 InstrItinClass itin, string OpcodeStr, 2501 ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> 2502 : N3VX<op24, op23, op21_20, op11_8, 1, op4, 2503 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2504 OpcodeStr, "$Vd, $Vn, $Vm", "", 2505 [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{ 2506 // All of these have a two-operand InstAlias. 2507 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2508 let isCommutable = Commutable; 2509} 2510class N3VQSL<bits<2> op21_20, bits<4> op11_8, 2511 InstrItinClass itin, string OpcodeStr, string Dt, 2512 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2513 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2514 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2515 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2516 [(set (ResTy QPR:$Vd), 2517 (ResTy (ShOp (ResTy QPR:$Vn), 2518 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2519 imm:$lane)))))]> { 2520 // All of these have a two-operand InstAlias. 2521 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2522 let isCommutable = 0; 2523} 2524class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, 2525 ValueType ResTy, ValueType OpTy, SDNode ShOp> 2526 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2527 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2528 NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "", 2529 [(set (ResTy QPR:$Vd), 2530 (ResTy (ShOp (ResTy QPR:$Vn), 2531 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2532 imm:$lane)))))]> { 2533 // All of these have a two-operand InstAlias. 2534 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2535 let isCommutable = 0; 2536} 2537 2538// Basic 3-register intrinsics, both double- and quad-register. 2539class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2540 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2541 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2542 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2543 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, 2544 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2545 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> { 2546 // All of these have a two-operand InstAlias. 2547 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2548 let isCommutable = Commutable; 2549} 2550 2551class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2552 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2553 string Dt, ValueType ResTy, ValueType OpTy, 2554 SDPatternOperator IntOp, bit Commutable> 2555 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2556 (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2557 ResTy, OpTy, IntOp, Commutable, 2558 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2559 2560class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2561 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2562 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2563 (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2564 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2565 [(set (Ty DPR:$Vd), 2566 (Ty (IntOp (Ty DPR:$Vn), 2567 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2568 imm:$lane)))))]> { 2569 let isCommutable = 0; 2570} 2571 2572class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2573 string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> 2574 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2575 (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2576 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2577 [(set (Ty DPR:$Vd), 2578 (Ty (IntOp (Ty DPR:$Vn), 2579 (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> { 2580 let isCommutable = 0; 2581} 2582class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2583 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2584 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2585 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2586 (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin, 2587 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2588 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> { 2589 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2590 let isCommutable = 0; 2591} 2592 2593class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2594 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2595 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> 2596 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2597 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, 2598 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2599 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> { 2600 // All of these have a two-operand InstAlias. 2601 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2602 let isCommutable = Commutable; 2603} 2604 2605class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2606 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2607 string Dt, ValueType ResTy, ValueType OpTy, 2608 SDPatternOperator IntOp, bit Commutable> 2609 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2610 (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt, 2611 ResTy, OpTy, IntOp, Commutable, 2612 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2613 2614// Same as N3VQIntnp but with Vd as a src register. 2615class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2616 bit op4, Format f, InstrItinClass itin, string OpcodeStr, 2617 string Dt, ValueType ResTy, ValueType OpTy, 2618 SDPatternOperator IntOp, bit Commutable> 2619 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2620 (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, 2621 Dt, ResTy, OpTy, IntOp, Commutable, 2622 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn), 2623 (OpTy QPR:$Vm))))]> { 2624 let Constraints = "$src = $Vd"; 2625} 2626 2627class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2628 string OpcodeStr, string Dt, 2629 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2630 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2631 (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2632 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2633 [(set (ResTy QPR:$Vd), 2634 (ResTy (IntOp (ResTy QPR:$Vn), 2635 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2636 imm:$lane)))))]> { 2637 let isCommutable = 0; 2638} 2639class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2640 string OpcodeStr, string Dt, 2641 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2642 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2643 (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2644 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2645 [(set (ResTy QPR:$Vd), 2646 (ResTy (IntOp (ResTy QPR:$Vn), 2647 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2648 imm:$lane)))))]> { 2649 let isCommutable = 0; 2650} 2651class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2652 Format f, InstrItinClass itin, string OpcodeStr, string Dt, 2653 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2654 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2655 (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin, 2656 OpcodeStr, Dt, "$Vd, $Vm, $Vn", "", 2657 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> { 2658 let TwoOperandAliasConstraint = "$Vm = $Vd"; 2659 let isCommutable = 0; 2660} 2661 2662// Multiply-Add/Sub operations: double- and quad-register. 2663class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2664 InstrItinClass itin, string OpcodeStr, string Dt, 2665 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode> 2666 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2667 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2668 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2669 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2670 (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>; 2671 2672class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2673 string OpcodeStr, string Dt, 2674 ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> 2675 : N3VLane32<0, 1, op21_20, op11_8, 1, 0, 2676 (outs DPR:$Vd), 2677 (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2678 NVMulSLFrm, itin, 2679 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2680 [(set (Ty DPR:$Vd), 2681 (Ty (ShOp (Ty DPR:$src1), 2682 (Ty (MulOp DPR:$Vn, 2683 (Ty (NEONvduplane (Ty DPR_VFP2:$Vm), 2684 imm:$lane)))))))]>; 2685class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2686 string OpcodeStr, string Dt, 2687 ValueType Ty, SDNode MulOp, SDNode ShOp> 2688 : N3VLane16<0, 1, op21_20, op11_8, 1, 0, 2689 (outs DPR:$Vd), 2690 (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2691 NVMulSLFrm, itin, 2692 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2693 [(set (Ty DPR:$Vd), 2694 (Ty (ShOp (Ty DPR:$src1), 2695 (Ty (MulOp DPR:$Vn, 2696 (Ty (NEONvduplane (Ty DPR_8:$Vm), 2697 imm:$lane)))))))]>; 2698 2699class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2700 InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, 2701 SDPatternOperator MulOp, SDPatternOperator OpNode> 2702 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2703 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2704 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2705 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2706 (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>; 2707class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2708 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 2709 SDPatternOperator MulOp, SDPatternOperator ShOp> 2710 : N3VLane32<1, 1, op21_20, op11_8, 1, 0, 2711 (outs QPR:$Vd), 2712 (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2713 NVMulSLFrm, itin, 2714 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2715 [(set (ResTy QPR:$Vd), 2716 (ResTy (ShOp (ResTy QPR:$src1), 2717 (ResTy (MulOp QPR:$Vn, 2718 (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2719 imm:$lane)))))))]>; 2720class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2721 string OpcodeStr, string Dt, 2722 ValueType ResTy, ValueType OpTy, 2723 SDNode MulOp, SDNode ShOp> 2724 : N3VLane16<1, 1, op21_20, op11_8, 1, 0, 2725 (outs QPR:$Vd), 2726 (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2727 NVMulSLFrm, itin, 2728 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2729 [(set (ResTy QPR:$Vd), 2730 (ResTy (ShOp (ResTy QPR:$src1), 2731 (ResTy (MulOp QPR:$Vn, 2732 (ResTy (NEONvduplane (OpTy DPR_8:$Vm), 2733 imm:$lane)))))))]>; 2734 2735// Neon Intrinsic-Op instructions (VABA): double- and quad-register. 2736class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2737 InstrItinClass itin, string OpcodeStr, string Dt, 2738 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2739 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2740 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2741 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2742 [(set DPR:$Vd, (Ty (OpNode DPR:$src1, 2743 (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>; 2744class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2745 InstrItinClass itin, string OpcodeStr, string Dt, 2746 ValueType Ty, SDPatternOperator IntOp, SDNode OpNode> 2747 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2748 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2749 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2750 [(set QPR:$Vd, (Ty (OpNode QPR:$src1, 2751 (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>; 2752 2753// Neon 3-argument intrinsics, both double- and quad-register. 2754// The destination register is also used as the first source operand register. 2755class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2756 InstrItinClass itin, string OpcodeStr, string Dt, 2757 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2758 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2759 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2760 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2761 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1), 2762 (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2763class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2764 InstrItinClass itin, string OpcodeStr, string Dt, 2765 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2766 : N3V<op24, op23, op21_20, op11_8, 1, op4, 2767 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin, 2768 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2769 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1), 2770 (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; 2771 2772// Long Multiply-Add/Sub operations. 2773class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2774 InstrItinClass itin, string OpcodeStr, string Dt, 2775 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2776 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2777 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2778 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2779 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2780 (TyQ (MulOp (TyD DPR:$Vn), 2781 (TyD DPR:$Vm)))))]>; 2782class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2783 InstrItinClass itin, string OpcodeStr, string Dt, 2784 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2785 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2786 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2787 NVMulSLFrm, itin, 2788 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2789 [(set QPR:$Vd, 2790 (OpNode (TyQ QPR:$src1), 2791 (TyQ (MulOp (TyD DPR:$Vn), 2792 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm), 2793 imm:$lane))))))]>; 2794class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2795 InstrItinClass itin, string OpcodeStr, string Dt, 2796 ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> 2797 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), 2798 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2799 NVMulSLFrm, itin, 2800 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2801 [(set QPR:$Vd, 2802 (OpNode (TyQ QPR:$src1), 2803 (TyQ (MulOp (TyD DPR:$Vn), 2804 (TyD (NEONvduplane (TyD DPR_8:$Vm), 2805 imm:$lane))))))]>; 2806 2807// Long Intrinsic-Op vector operations with explicit extend (VABAL). 2808class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2809 InstrItinClass itin, string OpcodeStr, string Dt, 2810 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2811 SDNode OpNode> 2812 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2813 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2814 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2815 [(set QPR:$Vd, (OpNode (TyQ QPR:$src1), 2816 (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2817 (TyD DPR:$Vm)))))))]>; 2818 2819// Neon Long 3-argument intrinsic. The destination register is 2820// a quad-register and is also used as the first source operand register. 2821class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2822 InstrItinClass itin, string OpcodeStr, string Dt, 2823 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp> 2824 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2825 (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2826 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd", 2827 [(set QPR:$Vd, 2828 (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>; 2829class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2830 string OpcodeStr, string Dt, 2831 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2832 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2833 (outs QPR:$Vd), 2834 (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2835 NVMulSLFrm, itin, 2836 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2837 [(set (ResTy QPR:$Vd), 2838 (ResTy (IntOp (ResTy QPR:$src1), 2839 (OpTy DPR:$Vn), 2840 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2841 imm:$lane)))))]>; 2842class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2843 InstrItinClass itin, string OpcodeStr, string Dt, 2844 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2845 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2846 (outs QPR:$Vd), 2847 (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2848 NVMulSLFrm, itin, 2849 OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd", 2850 [(set (ResTy QPR:$Vd), 2851 (ResTy (IntOp (ResTy QPR:$src1), 2852 (OpTy DPR:$Vn), 2853 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2854 imm:$lane)))))]>; 2855 2856// Narrowing 3-register intrinsics. 2857class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2858 string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, 2859 SDPatternOperator IntOp, bit Commutable> 2860 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2861 (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D, 2862 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2863 [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> { 2864 let isCommutable = Commutable; 2865} 2866 2867// Long 3-register operations. 2868class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2869 InstrItinClass itin, string OpcodeStr, string Dt, 2870 ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable> 2871 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2872 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2873 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2874 [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2875 let isCommutable = Commutable; 2876} 2877 2878class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, 2879 InstrItinClass itin, string OpcodeStr, string Dt, 2880 ValueType TyQ, ValueType TyD, SDNode OpNode> 2881 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2882 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2883 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2884 [(set QPR:$Vd, 2885 (TyQ (OpNode (TyD DPR:$Vn), 2886 (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>; 2887class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2888 InstrItinClass itin, string OpcodeStr, string Dt, 2889 ValueType TyQ, ValueType TyD, SDNode OpNode> 2890 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2891 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2892 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2893 [(set QPR:$Vd, 2894 (TyQ (OpNode (TyD DPR:$Vn), 2895 (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>; 2896 2897// Long 3-register operations with explicitly extended operands. 2898class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2899 InstrItinClass itin, string OpcodeStr, string Dt, 2900 ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, 2901 bit Commutable> 2902 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2903 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2904 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2905 [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))), 2906 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2907 let isCommutable = Commutable; 2908} 2909 2910// Long 3-register intrinsics with explicit extend (VABDL). 2911class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2912 InstrItinClass itin, string OpcodeStr, string Dt, 2913 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp, 2914 bit Commutable> 2915 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2916 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2917 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2918 [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn), 2919 (TyD DPR:$Vm))))))]> { 2920 let isCommutable = Commutable; 2921} 2922 2923// Long 3-register intrinsics. 2924class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2925 InstrItinClass itin, string OpcodeStr, string Dt, 2926 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable> 2927 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2928 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, 2929 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2930 [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { 2931 let isCommutable = Commutable; 2932} 2933 2934// Same as above, but not predicated. 2935class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, 2936 bit op4, InstrItinClass itin, string OpcodeStr, 2937 string Dt, ValueType ResTy, ValueType OpTy, 2938 SDPatternOperator IntOp, bit Commutable> 2939 : N3Vnp<op27_23, op21_20, op11_8, op6, op4, 2940 (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt, 2941 ResTy, OpTy, IntOp, Commutable, 2942 [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>; 2943 2944class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, 2945 string OpcodeStr, string Dt, 2946 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2947 : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, 2948 (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), 2949 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2950 [(set (ResTy QPR:$Vd), 2951 (ResTy (IntOp (OpTy DPR:$Vn), 2952 (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm), 2953 imm:$lane)))))]>; 2954class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, 2955 InstrItinClass itin, string OpcodeStr, string Dt, 2956 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2957 : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, 2958 (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane), 2959 NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "", 2960 [(set (ResTy QPR:$Vd), 2961 (ResTy (IntOp (OpTy DPR:$Vn), 2962 (OpTy (NEONvduplane (OpTy DPR_8:$Vm), 2963 imm:$lane)))))]>; 2964 2965// Wide 3-register operations. 2966class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, 2967 string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, 2968 SDNode OpNode, SDNode ExtOp, bit Commutable> 2969 : N3V<op24, op23, op21_20, op11_8, 0, op4, 2970 (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD, 2971 OpcodeStr, Dt, "$Vd, $Vn, $Vm", "", 2972 [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn), 2973 (TyQ (ExtOp (TyD DPR:$Vm)))))]> { 2974 // All of these have a two-operand InstAlias. 2975 let TwoOperandAliasConstraint = "$Vn = $Vd"; 2976 let isCommutable = Commutable; 2977} 2978 2979// Pairwise long 2-register intrinsics, both double- and quad-register. 2980class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2981 bits<2> op17_16, bits<5> op11_7, bit op4, 2982 string OpcodeStr, string Dt, 2983 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2984 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd), 2985 (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2986 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; 2987class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2988 bits<2> op17_16, bits<5> op11_7, bit op4, 2989 string OpcodeStr, string Dt, 2990 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 2991 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd), 2992 (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 2993 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; 2994 2995// Pairwise long 2-register accumulate intrinsics, 2996// both double- and quad-register. 2997// The destination register is also used as the first source operand register. 2998class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 2999 bits<2> op17_16, bits<5> op11_7, bit op4, 3000 string OpcodeStr, string Dt, 3001 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3002 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, 3003 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD, 3004 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3005 [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>; 3006class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, 3007 bits<2> op17_16, bits<5> op11_7, bit op4, 3008 string OpcodeStr, string Dt, 3009 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> 3010 : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, 3011 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ, 3012 OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd", 3013 [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>; 3014 3015// Shift by immediate, 3016// both double- and quad-register. 3017let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3018class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3019 Format f, InstrItinClass itin, Operand ImmTy, 3020 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3021 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3022 (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, 3023 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3024 [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; 3025class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3026 Format f, InstrItinClass itin, Operand ImmTy, 3027 string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> 3028 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3029 (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, 3030 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3031 [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; 3032} 3033 3034// Long shift by immediate. 3035class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3036 string OpcodeStr, string Dt, 3037 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3038 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3039 (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, 3040 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3041 [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), 3042 (i32 imm:$SIMM))))]>; 3043 3044// Narrow shift by immediate. 3045class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, 3046 InstrItinClass itin, string OpcodeStr, string Dt, 3047 ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> 3048 : N2VImm<op24, op23, op11_8, op7, op6, op4, 3049 (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, 3050 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3051 [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), 3052 (i32 imm:$SIMM))))]>; 3053 3054// Shift right by immediate and accumulate, 3055// both double- and quad-register. 3056let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3057class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3058 Operand ImmTy, string OpcodeStr, string Dt, 3059 ValueType Ty, SDNode ShOp> 3060 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3061 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3062 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3063 [(set DPR:$Vd, (Ty (add DPR:$src1, 3064 (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; 3065class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3066 Operand ImmTy, string OpcodeStr, string Dt, 3067 ValueType Ty, SDNode ShOp> 3068 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3069 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, 3070 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3071 [(set QPR:$Vd, (Ty (add QPR:$src1, 3072 (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; 3073} 3074 3075// Shift by immediate and insert, 3076// both double- and quad-register. 3077let TwoOperandAliasConstraint = "$Vm = $Vd" in { 3078class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3079 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3080 ValueType Ty,SDNode ShOp> 3081 : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), 3082 (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, 3083 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3084 [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; 3085class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3086 Operand ImmTy, Format f, string OpcodeStr, string Dt, 3087 ValueType Ty,SDNode ShOp> 3088 : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), 3089 (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, 3090 OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", 3091 [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; 3092} 3093 3094// Convert, with fractional bits immediate, 3095// both double- and quad-register. 3096class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3097 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3098 SDPatternOperator IntOp> 3099 : N2VImm<op24, op23, op11_8, op7, 0, op4, 3100 (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3101 IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3102 [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; 3103class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, 3104 string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 3105 SDPatternOperator IntOp> 3106 : N2VImm<op24, op23, op11_8, op7, 1, op4, 3107 (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm, 3108 IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", 3109 [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; 3110 3111//===----------------------------------------------------------------------===// 3112// Multiclasses 3113//===----------------------------------------------------------------------===// 3114 3115// Abbreviations used in multiclass suffixes: 3116// Q = quarter int (8 bit) elements 3117// H = half int (16 bit) elements 3118// S = single int (32 bit) elements 3119// D = double int (64 bit) elements 3120 3121// Neon 2-register vector operations and intrinsics. 3122 3123// Neon 2-register comparisons. 3124// source operand element sizes of 8, 16 and 32 bits: 3125multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3126 bits<5> op11_7, bit op4, string opc, string Dt, 3127 string asm, SDNode OpNode> { 3128 // 64-bit vector types. 3129 def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, 3130 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3131 opc, !strconcat(Dt, "8"), asm, "", 3132 [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; 3133 def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, 3134 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3135 opc, !strconcat(Dt, "16"), asm, "", 3136 [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; 3137 def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3138 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3139 opc, !strconcat(Dt, "32"), asm, "", 3140 [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; 3141 def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, 3142 (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, 3143 opc, "f32", asm, "", 3144 [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { 3145 let Inst{10} = 1; // overwrite F = 1 3146 } 3147 3148 // 128-bit vector types. 3149 def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, 3150 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3151 opc, !strconcat(Dt, "8"), asm, "", 3152 [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; 3153 def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, 3154 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3155 opc, !strconcat(Dt, "16"), asm, "", 3156 [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; 3157 def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3158 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3159 opc, !strconcat(Dt, "32"), asm, "", 3160 [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; 3161 def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, 3162 (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, 3163 opc, "f32", asm, "", 3164 [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { 3165 let Inst{10} = 1; // overwrite F = 1 3166 } 3167} 3168 3169 3170// Neon 2-register vector intrinsics, 3171// element sizes of 8, 16 and 32 bits: 3172multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3173 bits<5> op11_7, bit op4, 3174 InstrItinClass itinD, InstrItinClass itinQ, 3175 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3176 // 64-bit vector types. 3177 def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3178 itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3179 def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3180 itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>; 3181 def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3182 itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>; 3183 3184 // 128-bit vector types. 3185 def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3186 itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>; 3187 def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3188 itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>; 3189 def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3190 itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>; 3191} 3192 3193 3194// Neon Narrowing 2-register vector operations, 3195// source operand element sizes of 16, 32 and 64 bits: 3196multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3197 bits<5> op11_7, bit op6, bit op4, 3198 InstrItinClass itin, string OpcodeStr, string Dt, 3199 SDNode OpNode> { 3200 def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3201 itin, OpcodeStr, !strconcat(Dt, "16"), 3202 v8i8, v8i16, OpNode>; 3203 def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3204 itin, OpcodeStr, !strconcat(Dt, "32"), 3205 v4i16, v4i32, OpNode>; 3206 def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3207 itin, OpcodeStr, !strconcat(Dt, "64"), 3208 v2i32, v2i64, OpNode>; 3209} 3210 3211// Neon Narrowing 2-register vector intrinsics, 3212// source operand element sizes of 16, 32 and 64 bits: 3213multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3214 bits<5> op11_7, bit op6, bit op4, 3215 InstrItinClass itin, string OpcodeStr, string Dt, 3216 SDPatternOperator IntOp> { 3217 def v8i8 : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4, 3218 itin, OpcodeStr, !strconcat(Dt, "16"), 3219 v8i8, v8i16, IntOp>; 3220 def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4, 3221 itin, OpcodeStr, !strconcat(Dt, "32"), 3222 v4i16, v4i32, IntOp>; 3223 def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4, 3224 itin, OpcodeStr, !strconcat(Dt, "64"), 3225 v2i32, v2i64, IntOp>; 3226} 3227 3228 3229// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). 3230// source operand element sizes of 16, 32 and 64 bits: 3231multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, 3232 string OpcodeStr, string Dt, SDNode OpNode> { 3233 def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3234 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>; 3235 def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3236 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3237 def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, 3238 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3239} 3240 3241 3242// Neon 3-register vector operations. 3243 3244// First with only element sizes of 8, 16 and 32 bits: 3245multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3246 InstrItinClass itinD16, InstrItinClass itinD32, 3247 InstrItinClass itinQ16, InstrItinClass itinQ32, 3248 string OpcodeStr, string Dt, 3249 SDNode OpNode, bit Commutable = 0> { 3250 // 64-bit vector types. 3251 def v8i8 : N3VD<op24, op23, 0b00, op11_8, op4, itinD16, 3252 OpcodeStr, !strconcat(Dt, "8"), 3253 v8i8, v8i8, OpNode, Commutable>; 3254 def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16, 3255 OpcodeStr, !strconcat(Dt, "16"), 3256 v4i16, v4i16, OpNode, Commutable>; 3257 def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32, 3258 OpcodeStr, !strconcat(Dt, "32"), 3259 v2i32, v2i32, OpNode, Commutable>; 3260 3261 // 128-bit vector types. 3262 def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16, 3263 OpcodeStr, !strconcat(Dt, "8"), 3264 v16i8, v16i8, OpNode, Commutable>; 3265 def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16, 3266 OpcodeStr, !strconcat(Dt, "16"), 3267 v8i16, v8i16, OpNode, Commutable>; 3268 def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32, 3269 OpcodeStr, !strconcat(Dt, "32"), 3270 v4i32, v4i32, OpNode, Commutable>; 3271} 3272 3273multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { 3274 def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; 3275 def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; 3276 def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; 3277 def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", 3278 v4i32, v2i32, ShOp>; 3279} 3280 3281// ....then also with element size 64 bits: 3282multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3283 InstrItinClass itinD, InstrItinClass itinQ, 3284 string OpcodeStr, string Dt, 3285 SDNode OpNode, bit Commutable = 0> 3286 : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ, 3287 OpcodeStr, Dt, OpNode, Commutable> { 3288 def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD, 3289 OpcodeStr, !strconcat(Dt, "64"), 3290 v1i64, v1i64, OpNode, Commutable>; 3291 def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ, 3292 OpcodeStr, !strconcat(Dt, "64"), 3293 v2i64, v2i64, OpNode, Commutable>; 3294} 3295 3296 3297// Neon 3-register vector intrinsics. 3298 3299// First with only element sizes of 16 and 32 bits: 3300multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3301 InstrItinClass itinD16, InstrItinClass itinD32, 3302 InstrItinClass itinQ16, InstrItinClass itinQ32, 3303 string OpcodeStr, string Dt, 3304 SDPatternOperator IntOp, bit Commutable = 0> { 3305 // 64-bit vector types. 3306 def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16, 3307 OpcodeStr, !strconcat(Dt, "16"), 3308 v4i16, v4i16, IntOp, Commutable>; 3309 def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32, 3310 OpcodeStr, !strconcat(Dt, "32"), 3311 v2i32, v2i32, IntOp, Commutable>; 3312 3313 // 128-bit vector types. 3314 def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3315 OpcodeStr, !strconcat(Dt, "16"), 3316 v8i16, v8i16, IntOp, Commutable>; 3317 def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3318 OpcodeStr, !strconcat(Dt, "32"), 3319 v4i32, v4i32, IntOp, Commutable>; 3320} 3321multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3322 InstrItinClass itinD16, InstrItinClass itinD32, 3323 InstrItinClass itinQ16, InstrItinClass itinQ32, 3324 string OpcodeStr, string Dt, 3325 SDPatternOperator IntOp> { 3326 // 64-bit vector types. 3327 def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16, 3328 OpcodeStr, !strconcat(Dt, "16"), 3329 v4i16, v4i16, IntOp>; 3330 def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32, 3331 OpcodeStr, !strconcat(Dt, "32"), 3332 v2i32, v2i32, IntOp>; 3333 3334 // 128-bit vector types. 3335 def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16, 3336 OpcodeStr, !strconcat(Dt, "16"), 3337 v8i16, v8i16, IntOp>; 3338 def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32, 3339 OpcodeStr, !strconcat(Dt, "32"), 3340 v4i32, v4i32, IntOp>; 3341} 3342 3343multiclass N3VIntSL_HS<bits<4> op11_8, 3344 InstrItinClass itinD16, InstrItinClass itinD32, 3345 InstrItinClass itinQ16, InstrItinClass itinQ32, 3346 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3347 def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, 3348 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; 3349 def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, 3350 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; 3351 def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, 3352 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; 3353 def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, 3354 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; 3355} 3356 3357// ....then also with element size of 8 bits: 3358multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3359 InstrItinClass itinD16, InstrItinClass itinD32, 3360 InstrItinClass itinQ16, InstrItinClass itinQ32, 3361 string OpcodeStr, string Dt, 3362 SDPatternOperator IntOp, bit Commutable = 0> 3363 : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3364 OpcodeStr, Dt, IntOp, Commutable> { 3365 def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16, 3366 OpcodeStr, !strconcat(Dt, "8"), 3367 v8i8, v8i8, IntOp, Commutable>; 3368 def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3369 OpcodeStr, !strconcat(Dt, "8"), 3370 v16i8, v16i8, IntOp, Commutable>; 3371} 3372multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3373 InstrItinClass itinD16, InstrItinClass itinD32, 3374 InstrItinClass itinQ16, InstrItinClass itinQ32, 3375 string OpcodeStr, string Dt, 3376 SDPatternOperator IntOp> 3377 : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3378 OpcodeStr, Dt, IntOp> { 3379 def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16, 3380 OpcodeStr, !strconcat(Dt, "8"), 3381 v8i8, v8i8, IntOp>; 3382 def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16, 3383 OpcodeStr, !strconcat(Dt, "8"), 3384 v16i8, v16i8, IntOp>; 3385} 3386 3387 3388// ....then also with element size of 64 bits: 3389multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3390 InstrItinClass itinD16, InstrItinClass itinD32, 3391 InstrItinClass itinQ16, InstrItinClass itinQ32, 3392 string OpcodeStr, string Dt, 3393 SDPatternOperator IntOp, bit Commutable = 0> 3394 : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3395 OpcodeStr, Dt, IntOp, Commutable> { 3396 def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32, 3397 OpcodeStr, !strconcat(Dt, "64"), 3398 v1i64, v1i64, IntOp, Commutable>; 3399 def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3400 OpcodeStr, !strconcat(Dt, "64"), 3401 v2i64, v2i64, IntOp, Commutable>; 3402} 3403multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f, 3404 InstrItinClass itinD16, InstrItinClass itinD32, 3405 InstrItinClass itinQ16, InstrItinClass itinQ32, 3406 string OpcodeStr, string Dt, 3407 SDPatternOperator IntOp> 3408 : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32, 3409 OpcodeStr, Dt, IntOp> { 3410 def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32, 3411 OpcodeStr, !strconcat(Dt, "64"), 3412 v1i64, v1i64, IntOp>; 3413 def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32, 3414 OpcodeStr, !strconcat(Dt, "64"), 3415 v2i64, v2i64, IntOp>; 3416} 3417 3418// Neon Narrowing 3-register vector intrinsics, 3419// source operand element sizes of 16, 32 and 64 bits: 3420multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3421 string OpcodeStr, string Dt, 3422 SDPatternOperator IntOp, bit Commutable = 0> { 3423 def v8i8 : N3VNInt<op24, op23, 0b00, op11_8, op4, 3424 OpcodeStr, !strconcat(Dt, "16"), 3425 v8i8, v8i16, IntOp, Commutable>; 3426 def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, 3427 OpcodeStr, !strconcat(Dt, "32"), 3428 v4i16, v4i32, IntOp, Commutable>; 3429 def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, 3430 OpcodeStr, !strconcat(Dt, "64"), 3431 v2i32, v2i64, IntOp, Commutable>; 3432} 3433 3434 3435// Neon Long 3-register vector operations. 3436 3437multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3438 InstrItinClass itin16, InstrItinClass itin32, 3439 string OpcodeStr, string Dt, 3440 SDNode OpNode, bit Commutable = 0> { 3441 def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, 3442 OpcodeStr, !strconcat(Dt, "8"), 3443 v8i16, v8i8, OpNode, Commutable>; 3444 def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, 3445 OpcodeStr, !strconcat(Dt, "16"), 3446 v4i32, v4i16, OpNode, Commutable>; 3447 def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, 3448 OpcodeStr, !strconcat(Dt, "32"), 3449 v2i64, v2i32, OpNode, Commutable>; 3450} 3451 3452multiclass N3VLSL_HS<bit op24, bits<4> op11_8, 3453 InstrItinClass itin, string OpcodeStr, string Dt, 3454 SDNode OpNode> { 3455 def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr, 3456 !strconcat(Dt, "16"), v4i32, v4i16, OpNode>; 3457 def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr, 3458 !strconcat(Dt, "32"), v2i64, v2i32, OpNode>; 3459} 3460 3461multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3462 InstrItinClass itin16, InstrItinClass itin32, 3463 string OpcodeStr, string Dt, 3464 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3465 def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16, 3466 OpcodeStr, !strconcat(Dt, "8"), 3467 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3468 def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16, 3469 OpcodeStr, !strconcat(Dt, "16"), 3470 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3471 def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32, 3472 OpcodeStr, !strconcat(Dt, "32"), 3473 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3474} 3475 3476// Neon Long 3-register vector intrinsics. 3477 3478// First with only element sizes of 16 and 32 bits: 3479multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3480 InstrItinClass itin16, InstrItinClass itin32, 3481 string OpcodeStr, string Dt, 3482 SDPatternOperator IntOp, bit Commutable = 0> { 3483 def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, 3484 OpcodeStr, !strconcat(Dt, "16"), 3485 v4i32, v4i16, IntOp, Commutable>; 3486 def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, 3487 OpcodeStr, !strconcat(Dt, "32"), 3488 v2i64, v2i32, IntOp, Commutable>; 3489} 3490 3491multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, 3492 InstrItinClass itin, string OpcodeStr, string Dt, 3493 SDPatternOperator IntOp> { 3494 def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, 3495 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3496 def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, 3497 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3498} 3499 3500// ....then also with element size of 8 bits: 3501multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3502 InstrItinClass itin16, InstrItinClass itin32, 3503 string OpcodeStr, string Dt, 3504 SDPatternOperator IntOp, bit Commutable = 0> 3505 : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, 3506 IntOp, Commutable> { 3507 def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, 3508 OpcodeStr, !strconcat(Dt, "8"), 3509 v8i16, v8i8, IntOp, Commutable>; 3510} 3511 3512// ....with explicit extend (VABDL). 3513multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3514 InstrItinClass itin, string OpcodeStr, string Dt, 3515 SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> { 3516 def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin, 3517 OpcodeStr, !strconcat(Dt, "8"), 3518 v8i16, v8i8, IntOp, ExtOp, Commutable>; 3519 def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin, 3520 OpcodeStr, !strconcat(Dt, "16"), 3521 v4i32, v4i16, IntOp, ExtOp, Commutable>; 3522 def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin, 3523 OpcodeStr, !strconcat(Dt, "32"), 3524 v2i64, v2i32, IntOp, ExtOp, Commutable>; 3525} 3526 3527 3528// Neon Wide 3-register vector intrinsics, 3529// source operand element sizes of 8, 16 and 32 bits: 3530multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3531 string OpcodeStr, string Dt, 3532 SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { 3533 def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, 3534 OpcodeStr, !strconcat(Dt, "8"), 3535 v8i16, v8i8, OpNode, ExtOp, Commutable>; 3536 def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, 3537 OpcodeStr, !strconcat(Dt, "16"), 3538 v4i32, v4i16, OpNode, ExtOp, Commutable>; 3539 def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, 3540 OpcodeStr, !strconcat(Dt, "32"), 3541 v2i64, v2i32, OpNode, ExtOp, Commutable>; 3542} 3543 3544 3545// Neon Multiply-Op vector operations, 3546// element sizes of 8, 16 and 32 bits: 3547multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3548 InstrItinClass itinD16, InstrItinClass itinD32, 3549 InstrItinClass itinQ16, InstrItinClass itinQ32, 3550 string OpcodeStr, string Dt, SDNode OpNode> { 3551 // 64-bit vector types. 3552 def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, 3553 OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; 3554 def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, 3555 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; 3556 def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, 3557 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; 3558 3559 // 128-bit vector types. 3560 def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, 3561 OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; 3562 def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, 3563 OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; 3564 def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, 3565 OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; 3566} 3567 3568multiclass N3VMulOpSL_HS<bits<4> op11_8, 3569 InstrItinClass itinD16, InstrItinClass itinD32, 3570 InstrItinClass itinQ16, InstrItinClass itinQ32, 3571 string OpcodeStr, string Dt, SDNode ShOp> { 3572 def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, 3573 OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; 3574 def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, 3575 OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; 3576 def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, 3577 OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, 3578 mul, ShOp>; 3579 def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, 3580 OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, 3581 mul, ShOp>; 3582} 3583 3584// Neon Intrinsic-Op vector operations, 3585// element sizes of 8, 16 and 32 bits: 3586multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3587 InstrItinClass itinD, InstrItinClass itinQ, 3588 string OpcodeStr, string Dt, SDPatternOperator IntOp, 3589 SDNode OpNode> { 3590 // 64-bit vector types. 3591 def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD, 3592 OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>; 3593 def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD, 3594 OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>; 3595 def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD, 3596 OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>; 3597 3598 // 128-bit vector types. 3599 def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ, 3600 OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>; 3601 def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ, 3602 OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>; 3603 def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ, 3604 OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>; 3605} 3606 3607// Neon 3-argument intrinsics, 3608// element sizes of 8, 16 and 32 bits: 3609multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3610 InstrItinClass itinD, InstrItinClass itinQ, 3611 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3612 // 64-bit vector types. 3613 def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, 3614 OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; 3615 def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, 3616 OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; 3617 def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, 3618 OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; 3619 3620 // 128-bit vector types. 3621 def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, 3622 OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; 3623 def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, 3624 OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; 3625 def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, 3626 OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; 3627} 3628 3629 3630// Neon Long Multiply-Op vector operations, 3631// element sizes of 8, 16 and 32 bits: 3632multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3633 InstrItinClass itin16, InstrItinClass itin32, 3634 string OpcodeStr, string Dt, SDNode MulOp, 3635 SDNode OpNode> { 3636 def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, 3637 !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>; 3638 def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, 3639 !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>; 3640 def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, 3641 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3642} 3643 3644multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr, 3645 string Dt, SDNode MulOp, SDNode OpNode> { 3646 def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr, 3647 !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>; 3648 def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr, 3649 !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>; 3650} 3651 3652 3653// Neon Long 3-argument intrinsics. 3654 3655// First with only element sizes of 16 and 32 bits: 3656multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, 3657 InstrItinClass itin16, InstrItinClass itin32, 3658 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3659 def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, 3660 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; 3661 def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, 3662 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3663} 3664 3665multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, 3666 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3667 def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, 3668 OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; 3669 def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, 3670 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; 3671} 3672 3673// ....then also with element size of 8 bits: 3674multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3675 InstrItinClass itin16, InstrItinClass itin32, 3676 string OpcodeStr, string Dt, SDPatternOperator IntOp> 3677 : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { 3678 def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, 3679 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; 3680} 3681 3682// ....with explicit extend (VABAL). 3683multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, 3684 InstrItinClass itin, string OpcodeStr, string Dt, 3685 SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> { 3686 def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin, 3687 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, 3688 IntOp, ExtOp, OpNode>; 3689 def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin, 3690 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, 3691 IntOp, ExtOp, OpNode>; 3692 def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin, 3693 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, 3694 IntOp, ExtOp, OpNode>; 3695} 3696 3697 3698// Neon Pairwise long 2-register intrinsics, 3699// element sizes of 8, 16 and 32 bits: 3700multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3701 bits<5> op11_7, bit op4, 3702 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3703 // 64-bit vector types. 3704 def v8i8 : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3705 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3706 def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3707 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3708 def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3709 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3710 3711 // 128-bit vector types. 3712 def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3713 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3714 def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3715 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3716 def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3717 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3718} 3719 3720 3721// Neon Pairwise long 2-register accumulate intrinsics, 3722// element sizes of 8, 16 and 32 bits: 3723multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, 3724 bits<5> op11_7, bit op4, 3725 string OpcodeStr, string Dt, SDPatternOperator IntOp> { 3726 // 64-bit vector types. 3727 def v8i8 : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3728 OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; 3729 def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3730 OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; 3731 def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3732 OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; 3733 3734 // 128-bit vector types. 3735 def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4, 3736 OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; 3737 def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4, 3738 OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; 3739 def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4, 3740 OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; 3741} 3742 3743 3744// Neon 2-register vector shift by immediate, 3745// with f of either N2RegVShLFrm or N2RegVShRFrm 3746// element sizes of 8, 16, 32 and 64 bits: 3747multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3748 InstrItinClass itin, string OpcodeStr, string Dt, 3749 SDNode OpNode> { 3750 // 64-bit vector types. 3751 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3752 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3753 let Inst{21-19} = 0b001; // imm6 = 001xxx 3754 } 3755 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3756 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3757 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3758 } 3759 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3760 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3761 let Inst{21} = 0b1; // imm6 = 1xxxxx 3762 } 3763 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3764 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3765 // imm6 = xxxxxx 3766 3767 // 128-bit vector types. 3768 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3769 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3770 let Inst{21-19} = 0b001; // imm6 = 001xxx 3771 } 3772 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3773 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3774 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3775 } 3776 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, 3777 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3778 let Inst{21} = 0b1; // imm6 = 1xxxxx 3779 } 3780 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, 3781 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3782 // imm6 = xxxxxx 3783} 3784multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3785 InstrItinClass itin, string OpcodeStr, string Dt, 3786 string baseOpc, SDNode OpNode> { 3787 // 64-bit vector types. 3788 def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3789 OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { 3790 let Inst{21-19} = 0b001; // imm6 = 001xxx 3791 } 3792 def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3793 OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { 3794 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3795 } 3796 def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3797 OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { 3798 let Inst{21} = 0b1; // imm6 = 1xxxxx 3799 } 3800 def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3801 OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; 3802 // imm6 = xxxxxx 3803 3804 // 128-bit vector types. 3805 def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, 3806 OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { 3807 let Inst{21-19} = 0b001; // imm6 = 001xxx 3808 } 3809 def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, 3810 OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { 3811 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3812 } 3813 def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, 3814 OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { 3815 let Inst{21} = 0b1; // imm6 = 1xxxxx 3816 } 3817 def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, 3818 OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; 3819 // imm6 = xxxxxx 3820} 3821 3822// Neon Shift-Accumulate vector operations, 3823// element sizes of 8, 16, 32 and 64 bits: 3824multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3825 string OpcodeStr, string Dt, SDNode ShOp> { 3826 // 64-bit vector types. 3827 def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3828 OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { 3829 let Inst{21-19} = 0b001; // imm6 = 001xxx 3830 } 3831 def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3832 OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { 3833 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3834 } 3835 def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3836 OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { 3837 let Inst{21} = 0b1; // imm6 = 1xxxxx 3838 } 3839 def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3840 OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; 3841 // imm6 = xxxxxx 3842 3843 // 128-bit vector types. 3844 def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, 3845 OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { 3846 let Inst{21-19} = 0b001; // imm6 = 001xxx 3847 } 3848 def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, 3849 OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { 3850 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3851 } 3852 def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, 3853 OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { 3854 let Inst{21} = 0b1; // imm6 = 1xxxxx 3855 } 3856 def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, 3857 OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; 3858 // imm6 = xxxxxx 3859} 3860 3861// Neon Shift-Insert vector operations, 3862// with f of either N2RegVShLFrm or N2RegVShRFrm 3863// element sizes of 8, 16, 32 and 64 bits: 3864multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3865 string OpcodeStr> { 3866 // 64-bit vector types. 3867 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3868 N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { 3869 let Inst{21-19} = 0b001; // imm6 = 001xxx 3870 } 3871 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3872 N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { 3873 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3874 } 3875 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, 3876 N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { 3877 let Inst{21} = 0b1; // imm6 = 1xxxxx 3878 } 3879 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, 3880 N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; 3881 // imm6 = xxxxxx 3882 3883 // 128-bit vector types. 3884 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3885 N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { 3886 let Inst{21-19} = 0b001; // imm6 = 001xxx 3887 } 3888 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3889 N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { 3890 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3891 } 3892 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, 3893 N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { 3894 let Inst{21} = 0b1; // imm6 = 1xxxxx 3895 } 3896 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, 3897 N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; 3898 // imm6 = xxxxxx 3899} 3900multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, 3901 string OpcodeStr> { 3902 // 64-bit vector types. 3903 def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3904 N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { 3905 let Inst{21-19} = 0b001; // imm6 = 001xxx 3906 } 3907 def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3908 N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { 3909 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3910 } 3911 def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3912 N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { 3913 let Inst{21} = 0b1; // imm6 = 1xxxxx 3914 } 3915 def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3916 N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; 3917 // imm6 = xxxxxx 3918 3919 // 128-bit vector types. 3920 def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, 3921 N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { 3922 let Inst{21-19} = 0b001; // imm6 = 001xxx 3923 } 3924 def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, 3925 N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { 3926 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3927 } 3928 def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, 3929 N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { 3930 let Inst{21} = 0b1; // imm6 = 1xxxxx 3931 } 3932 def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, 3933 N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; 3934 // imm6 = xxxxxx 3935} 3936 3937// Neon Shift Long operations, 3938// element sizes of 8, 16, 32 bits: 3939multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3940 bit op4, string OpcodeStr, string Dt, SDNode OpNode> { 3941 def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3942 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { 3943 let Inst{21-19} = 0b001; // imm6 = 001xxx 3944 } 3945 def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3946 OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { 3947 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3948 } 3949 def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, 3950 OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { 3951 let Inst{21} = 0b1; // imm6 = 1xxxxx 3952 } 3953} 3954 3955// Neon Shift Narrow operations, 3956// element sizes of 16, 32, 64 bits: 3957multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, 3958 bit op4, InstrItinClass itin, string OpcodeStr, string Dt, 3959 SDNode OpNode> { 3960 def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3961 OpcodeStr, !strconcat(Dt, "16"), 3962 v8i8, v8i16, shr_imm8, OpNode> { 3963 let Inst{21-19} = 0b001; // imm6 = 001xxx 3964 } 3965 def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3966 OpcodeStr, !strconcat(Dt, "32"), 3967 v4i16, v4i32, shr_imm16, OpNode> { 3968 let Inst{21-20} = 0b01; // imm6 = 01xxxx 3969 } 3970 def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, 3971 OpcodeStr, !strconcat(Dt, "64"), 3972 v2i32, v2i64, shr_imm32, OpNode> { 3973 let Inst{21} = 0b1; // imm6 = 1xxxxx 3974 } 3975} 3976 3977//===----------------------------------------------------------------------===// 3978// Instruction Definitions. 3979//===----------------------------------------------------------------------===// 3980 3981// Vector Add Operations. 3982 3983// VADD : Vector Add (integer and floating-point) 3984defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", 3985 add, 1>; 3986def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", 3987 v2f32, v2f32, fadd, 1>; 3988def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", 3989 v4f32, v4f32, fadd, 1>; 3990// VADDL : Vector Add Long (Q = D + D) 3991defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3992 "vaddl", "s", add, sext, 1>; 3993defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, 3994 "vaddl", "u", add, zext, 1>; 3995// VADDW : Vector Add Wide (Q = Q + D) 3996defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; 3997defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; 3998// VHADD : Vector Halving Add 3999defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, 4000 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4001 "vhadd", "s", int_arm_neon_vhadds, 1>; 4002defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm, 4003 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4004 "vhadd", "u", int_arm_neon_vhaddu, 1>; 4005// VRHADD : Vector Rounding Halving Add 4006defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm, 4007 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4008 "vrhadd", "s", int_arm_neon_vrhadds, 1>; 4009defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm, 4010 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4011 "vrhadd", "u", int_arm_neon_vrhaddu, 1>; 4012// VQADD : Vector Saturating Add 4013defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm, 4014 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4015 "vqadd", "s", int_arm_neon_vqadds, 1>; 4016defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, 4017 IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, 4018 "vqadd", "u", int_arm_neon_vqaddu, 1>; 4019// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) 4020defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; 4021// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) 4022defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", 4023 int_arm_neon_vraddhn, 1>; 4024 4025def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4026 (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; 4027def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4028 (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; 4029def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4030 (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; 4031 4032// Vector Multiply Operations. 4033 4034// VMUL : Vector Multiply (integer, polynomial and floating-point) 4035defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, 4036 IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; 4037def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul", 4038 "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; 4039def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul", 4040 "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; 4041def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", 4042 v2f32, v2f32, fmul, 1>; 4043def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", 4044 v4f32, v4f32, fmul, 1>; 4045defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; 4046def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; 4047def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, 4048 v2f32, fmul>; 4049 4050def : Pat<(v8i16 (mul (v8i16 QPR:$src1), 4051 (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), 4052 (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), 4053 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4054 (DSubReg_i16_reg imm:$lane))), 4055 (SubReg_i16_lane imm:$lane)))>; 4056def : Pat<(v4i32 (mul (v4i32 QPR:$src1), 4057 (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), 4058 (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), 4059 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4060 (DSubReg_i32_reg imm:$lane))), 4061 (SubReg_i32_lane imm:$lane)))>; 4062def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), 4063 (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), 4064 (v4f32 (VMULslfq (v4f32 QPR:$src1), 4065 (v2f32 (EXTRACT_SUBREG QPR:$src2, 4066 (DSubReg_i32_reg imm:$lane))), 4067 (SubReg_i32_lane imm:$lane)))>; 4068 4069 4070def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4071 (VMULslfd DPR:$Rn, 4072 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4073 (i32 0))>; 4074def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), 4075 (VMULslfq QPR:$Rn, 4076 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), 4077 (i32 0))>; 4078 4079 4080// VQDMULH : Vector Saturating Doubling Multiply Returning High Half 4081defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, 4082 IIC_VMULi16Q, IIC_VMULi32Q, 4083 "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; 4084defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, 4085 IIC_VMULi16Q, IIC_VMULi32Q, 4086 "vqdmulh", "s", int_arm_neon_vqdmulh>; 4087def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), 4088 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4089 imm:$lane)))), 4090 (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), 4091 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4092 (DSubReg_i16_reg imm:$lane))), 4093 (SubReg_i16_lane imm:$lane)))>; 4094def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), 4095 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4096 imm:$lane)))), 4097 (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), 4098 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4099 (DSubReg_i32_reg imm:$lane))), 4100 (SubReg_i32_lane imm:$lane)))>; 4101 4102// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half 4103defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm, 4104 IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q, 4105 "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; 4106defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, 4107 IIC_VMULi16Q, IIC_VMULi32Q, 4108 "vqrdmulh", "s", int_arm_neon_vqrdmulh>; 4109def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), 4110 (v8i16 (NEONvduplane (v8i16 QPR:$src2), 4111 imm:$lane)))), 4112 (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), 4113 (v4i16 (EXTRACT_SUBREG QPR:$src2, 4114 (DSubReg_i16_reg imm:$lane))), 4115 (SubReg_i16_lane imm:$lane)))>; 4116def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), 4117 (v4i32 (NEONvduplane (v4i32 QPR:$src2), 4118 imm:$lane)))), 4119 (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), 4120 (v2i32 (EXTRACT_SUBREG QPR:$src2, 4121 (DSubReg_i32_reg imm:$lane))), 4122 (SubReg_i32_lane imm:$lane)))>; 4123 4124// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) 4125let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 4126 DecoderNamespace = "NEONData" in { 4127 defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4128 "vmull", "s", NEONvmulls, 1>; 4129 defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, 4130 "vmull", "u", NEONvmullu, 1>; 4131 def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", 4132 v8i16, v8i8, int_arm_neon_vmullp, 1>; 4133 def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, 4134 "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, 4135 Requires<[HasV8, HasCrypto]>; 4136} 4137defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; 4138defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; 4139 4140// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) 4141defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, 4142 "vqdmull", "s", int_arm_neon_vqdmull, 1>; 4143defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, 4144 "vqdmull", "s", int_arm_neon_vqdmull>; 4145 4146// Vector Multiply-Accumulate and Multiply-Subtract Operations. 4147 4148// VMLA : Vector Multiply Accumulate (integer and floating-point) 4149defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4150 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4151def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", 4152 v2f32, fmul_su, fadd_mlx>, 4153 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4154def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", 4155 v4f32, fmul_su, fadd_mlx>, 4156 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4157defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, 4158 IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; 4159def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", 4160 v2f32, fmul_su, fadd_mlx>, 4161 Requires<[HasNEON, UseFPVMLx]>; 4162def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", 4163 v4f32, v2f32, fmul_su, fadd_mlx>, 4164 Requires<[HasNEON, UseFPVMLx]>; 4165 4166def : Pat<(v8i16 (add (v8i16 QPR:$src1), 4167 (mul (v8i16 QPR:$src2), 4168 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4169 (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4170 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4171 (DSubReg_i16_reg imm:$lane))), 4172 (SubReg_i16_lane imm:$lane)))>; 4173 4174def : Pat<(v4i32 (add (v4i32 QPR:$src1), 4175 (mul (v4i32 QPR:$src2), 4176 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4177 (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4178 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4179 (DSubReg_i32_reg imm:$lane))), 4180 (SubReg_i32_lane imm:$lane)))>; 4181 4182def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1), 4183 (fmul_su (v4f32 QPR:$src2), 4184 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4185 (v4f32 (VMLAslfq (v4f32 QPR:$src1), 4186 (v4f32 QPR:$src2), 4187 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4188 (DSubReg_i32_reg imm:$lane))), 4189 (SubReg_i32_lane imm:$lane)))>, 4190 Requires<[HasNEON, UseFPVMLx]>; 4191 4192// VMLAL : Vector Multiply Accumulate Long (Q += D * D) 4193defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4194 "vmlal", "s", NEONvmulls, add>; 4195defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, 4196 "vmlal", "u", NEONvmullu, add>; 4197 4198defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>; 4199defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; 4200 4201// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) 4202defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4203 "vqdmlal", "s", null_frag>; 4204defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; 4205 4206def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4207 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4208 (v4i16 DPR:$Vm))))), 4209 (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4210def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4211 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4212 (v2i32 DPR:$Vm))))), 4213 (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4214def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), 4215 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4216 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4217 imm:$lane)))))), 4218 (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4219def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), 4220 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4221 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4222 imm:$lane)))))), 4223 (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4224 4225// VMLS : Vector Multiply Subtract (integer and floating-point) 4226defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, 4227 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4228def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", 4229 v2f32, fmul_su, fsub_mlx>, 4230 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4231def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", 4232 v4f32, fmul_su, fsub_mlx>, 4233 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; 4234defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, 4235 IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; 4236def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", 4237 v2f32, fmul_su, fsub_mlx>, 4238 Requires<[HasNEON, UseFPVMLx]>; 4239def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", 4240 v4f32, v2f32, fmul_su, fsub_mlx>, 4241 Requires<[HasNEON, UseFPVMLx]>; 4242 4243def : Pat<(v8i16 (sub (v8i16 QPR:$src1), 4244 (mul (v8i16 QPR:$src2), 4245 (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), 4246 (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2), 4247 (v4i16 (EXTRACT_SUBREG QPR:$src3, 4248 (DSubReg_i16_reg imm:$lane))), 4249 (SubReg_i16_lane imm:$lane)))>; 4250 4251def : Pat<(v4i32 (sub (v4i32 QPR:$src1), 4252 (mul (v4i32 QPR:$src2), 4253 (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), 4254 (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), 4255 (v2i32 (EXTRACT_SUBREG QPR:$src3, 4256 (DSubReg_i32_reg imm:$lane))), 4257 (SubReg_i32_lane imm:$lane)))>; 4258 4259def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1), 4260 (fmul_su (v4f32 QPR:$src2), 4261 (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), 4262 (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), 4263 (v2f32 (EXTRACT_SUBREG QPR:$src3, 4264 (DSubReg_i32_reg imm:$lane))), 4265 (SubReg_i32_lane imm:$lane)))>, 4266 Requires<[HasNEON, UseFPVMLx]>; 4267 4268// VMLSL : Vector Multiply Subtract Long (Q -= D * D) 4269defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4270 "vmlsl", "s", NEONvmulls, sub>; 4271defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, 4272 "vmlsl", "u", NEONvmullu, sub>; 4273 4274defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>; 4275defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; 4276 4277// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) 4278defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, 4279 "vqdmlsl", "s", null_frag>; 4280defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; 4281 4282def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4283 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4284 (v4i16 DPR:$Vm))))), 4285 (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4286def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4287 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4288 (v2i32 DPR:$Vm))))), 4289 (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; 4290def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), 4291 (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), 4292 (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), 4293 imm:$lane)))))), 4294 (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; 4295def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), 4296 (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), 4297 (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), 4298 imm:$lane)))))), 4299 (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; 4300 4301// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. 4302def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", 4303 v2f32, fmul_su, fadd_mlx>, 4304 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4305 4306def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", 4307 v4f32, fmul_su, fadd_mlx>, 4308 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4309 4310// Fused Vector Multiply Subtract (floating-point) 4311def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", 4312 v2f32, fmul_su, fsub_mlx>, 4313 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4314def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", 4315 v4f32, fmul_su, fsub_mlx>, 4316 Requires<[HasNEON,HasVFP4,UseFusedMAC]>; 4317 4318// Match @llvm.fma.* intrinsics 4319def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), 4320 (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4321 Requires<[HasVFP4]>; 4322def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)), 4323 (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4324 Requires<[HasVFP4]>; 4325def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)), 4326 (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4327 Requires<[HasVFP4]>; 4328def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), 4329 (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4330 Requires<[HasVFP4]>; 4331 4332// Vector Subtract Operations. 4333 4334// VSUB : Vector Subtract (integer and floating-point) 4335defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, 4336 "vsub", "i", sub, 0>; 4337def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", 4338 v2f32, v2f32, fsub, 0>; 4339def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", 4340 v4f32, v4f32, fsub, 0>; 4341// VSUBL : Vector Subtract Long (Q = D - D) 4342defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4343 "vsubl", "s", sub, sext, 0>; 4344defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, 4345 "vsubl", "u", sub, zext, 0>; 4346// VSUBW : Vector Subtract Wide (Q = Q - D) 4347defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; 4348defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; 4349// VHSUB : Vector Halving Subtract 4350defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, 4351 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4352 "vhsub", "s", int_arm_neon_vhsubs, 0>; 4353defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, 4354 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4355 "vhsub", "u", int_arm_neon_vhsubu, 0>; 4356// VQSUB : Vector Saturing Subtract 4357defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, 4358 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4359 "vqsub", "s", int_arm_neon_vqsubs, 0>; 4360defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, 4361 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4362 "vqsub", "u", int_arm_neon_vqsubu, 0>; 4363// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) 4364defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; 4365// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) 4366defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", 4367 int_arm_neon_vrsubhn, 0>; 4368 4369def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), 4370 (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; 4371def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), 4372 (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; 4373def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), 4374 (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; 4375 4376// Vector Comparisons. 4377 4378// VCEQ : Vector Compare Equal 4379defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4380 IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; 4381def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, 4382 NEONvceq, 1>; 4383def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, 4384 NEONvceq, 1>; 4385 4386let TwoOperandAliasConstraint = "$Vm = $Vd" in 4387defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", 4388 "$Vd, $Vm, #0", NEONvceqz>; 4389 4390// VCGE : Vector Compare Greater Than or Equal 4391defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4392 IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; 4393defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4394 IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; 4395def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, 4396 NEONvcge, 0>; 4397def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, 4398 NEONvcge, 0>; 4399 4400let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4401defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", 4402 "$Vd, $Vm, #0", NEONvcgez>; 4403defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", 4404 "$Vd, $Vm, #0", NEONvclez>; 4405} 4406 4407// VCGT : Vector Compare Greater Than 4408defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4409 IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; 4410defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, 4411 IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; 4412def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, 4413 NEONvcgt, 0>; 4414def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, 4415 NEONvcgt, 0>; 4416 4417let TwoOperandAliasConstraint = "$Vm = $Vd" in { 4418defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", 4419 "$Vd, $Vm, #0", NEONvcgtz>; 4420defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", 4421 "$Vd, $Vm, #0", NEONvcltz>; 4422} 4423 4424// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) 4425def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge", 4426 "f32", v2i32, v2f32, int_arm_neon_vacged, 0>; 4427def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge", 4428 "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; 4429// VACGT : Vector Absolute Compare Greater Than (aka VCAGT) 4430def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt", 4431 "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; 4432def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt", 4433 "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; 4434// VTST : Vector Test Bits 4435defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 4436 IIC_VBINi4Q, "vtst", "", NEONvtst, 1>; 4437 4438def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4439 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4440def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm", 4441 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4442def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4443 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>; 4444def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm", 4445 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; 4446 4447def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4448 (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4449def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", 4450 (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4451def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4452 (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; 4453def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm", 4454 (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>; 4455 4456// Vector Bitwise Operations. 4457 4458def vnotd : PatFrag<(ops node:$in), 4459 (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>; 4460def vnotq : PatFrag<(ops node:$in), 4461 (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>; 4462 4463 4464// VAND : Vector Bitwise AND 4465def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", 4466 v2i32, v2i32, and, 1>; 4467def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", 4468 v4i32, v4i32, and, 1>; 4469 4470// VEOR : Vector Bitwise Exclusive OR 4471def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", 4472 v2i32, v2i32, xor, 1>; 4473def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", 4474 v4i32, v4i32, xor, 1>; 4475 4476// VORR : Vector Bitwise OR 4477def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", 4478 v2i32, v2i32, or, 1>; 4479def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", 4480 v4i32, v4i32, or, 1>; 4481 4482def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, 4483 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4484 IIC_VMOVImm, 4485 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4486 [(set DPR:$Vd, 4487 (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4488 let Inst{9} = SIMM{9}; 4489} 4490 4491def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1, 4492 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4493 IIC_VMOVImm, 4494 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4495 [(set DPR:$Vd, 4496 (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { 4497 let Inst{10-9} = SIMM{10-9}; 4498} 4499 4500def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, 4501 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4502 IIC_VMOVImm, 4503 "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", 4504 [(set QPR:$Vd, 4505 (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4506 let Inst{9} = SIMM{9}; 4507} 4508 4509def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1, 4510 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4511 IIC_VMOVImm, 4512 "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", 4513 [(set QPR:$Vd, 4514 (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { 4515 let Inst{10-9} = SIMM{10-9}; 4516} 4517 4518 4519// VBIC : Vector Bitwise Bit Clear (AND NOT) 4520let TwoOperandAliasConstraint = "$Vn = $Vd" in { 4521def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4522 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4523 "vbic", "$Vd, $Vn, $Vm", "", 4524 [(set DPR:$Vd, (v2i32 (and DPR:$Vn, 4525 (vnotd DPR:$Vm))))]>; 4526def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4527 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4528 "vbic", "$Vd, $Vn, $Vm", "", 4529 [(set QPR:$Vd, (v4i32 (and QPR:$Vn, 4530 (vnotq QPR:$Vm))))]>; 4531} 4532 4533def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1, 4534 (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src), 4535 IIC_VMOVImm, 4536 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4537 [(set DPR:$Vd, 4538 (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4539 let Inst{9} = SIMM{9}; 4540} 4541 4542def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1, 4543 (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src), 4544 IIC_VMOVImm, 4545 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4546 [(set DPR:$Vd, 4547 (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { 4548 let Inst{10-9} = SIMM{10-9}; 4549} 4550 4551def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1, 4552 (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src), 4553 IIC_VMOVImm, 4554 "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", 4555 [(set QPR:$Vd, 4556 (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4557 let Inst{9} = SIMM{9}; 4558} 4559 4560def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1, 4561 (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src), 4562 IIC_VMOVImm, 4563 "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", 4564 [(set QPR:$Vd, 4565 (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { 4566 let Inst{10-9} = SIMM{10-9}; 4567} 4568 4569// VORN : Vector Bitwise OR NOT 4570def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), 4571 (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, 4572 "vorn", "$Vd, $Vn, $Vm", "", 4573 [(set DPR:$Vd, (v2i32 (or DPR:$Vn, 4574 (vnotd DPR:$Vm))))]>; 4575def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), 4576 (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, 4577 "vorn", "$Vd, $Vn, $Vm", "", 4578 [(set QPR:$Vd, (v4i32 (or QPR:$Vn, 4579 (vnotq QPR:$Vm))))]>; 4580 4581// VMVN : Vector Bitwise NOT (Immediate) 4582 4583let isReMaterializable = 1 in { 4584 4585def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd), 4586 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4587 "vmvn", "i16", "$Vd, $SIMM", "", 4588 [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> { 4589 let Inst{9} = SIMM{9}; 4590} 4591 4592def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd), 4593 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 4594 "vmvn", "i16", "$Vd, $SIMM", "", 4595 [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> { 4596 let Inst{9} = SIMM{9}; 4597} 4598 4599def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd), 4600 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4601 "vmvn", "i32", "$Vd, $SIMM", "", 4602 [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> { 4603 let Inst{11-8} = SIMM{11-8}; 4604} 4605 4606def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd), 4607 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 4608 "vmvn", "i32", "$Vd, $SIMM", "", 4609 [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> { 4610 let Inst{11-8} = SIMM{11-8}; 4611} 4612} 4613 4614// VMVN : Vector Bitwise NOT 4615def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, 4616 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD, 4617 "vmvn", "$Vd, $Vm", "", 4618 [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>; 4619def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, 4620 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD, 4621 "vmvn", "$Vd, $Vm", "", 4622 [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>; 4623def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>; 4624def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>; 4625 4626// VBSL : Vector Bitwise Select 4627def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), 4628 (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4629 N3RegFrm, IIC_VCNTiD, 4630 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4631 [(set DPR:$Vd, 4632 (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; 4633def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1), 4634 (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))), 4635 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4636 Requires<[HasNEON]>; 4637def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1), 4638 (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))), 4639 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4640 Requires<[HasNEON]>; 4641def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1), 4642 (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))), 4643 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4644 Requires<[HasNEON]>; 4645def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1), 4646 (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))), 4647 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4648 Requires<[HasNEON]>; 4649def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1), 4650 (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))), 4651 (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>, 4652 Requires<[HasNEON]>; 4653 4654def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), 4655 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4656 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4657 Requires<[HasNEON]>; 4658 4659def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd), 4660 (and DPR:$Vm, (vnotd DPR:$Vd)))), 4661 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>, 4662 Requires<[HasNEON]>; 4663 4664def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), 4665 (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4666 N3RegFrm, IIC_VCNTiQ, 4667 "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4668 [(set QPR:$Vd, 4669 (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; 4670 4671def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1), 4672 (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))), 4673 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4674 Requires<[HasNEON]>; 4675def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1), 4676 (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))), 4677 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4678 Requires<[HasNEON]>; 4679def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1), 4680 (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))), 4681 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4682 Requires<[HasNEON]>; 4683def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1), 4684 (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))), 4685 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4686 Requires<[HasNEON]>; 4687def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1), 4688 (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))), 4689 (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>, 4690 Requires<[HasNEON]>; 4691 4692def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), 4693 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4694 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4695 Requires<[HasNEON]>; 4696def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd), 4697 (and QPR:$Vm, (vnotq QPR:$Vd)))), 4698 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>, 4699 Requires<[HasNEON]>; 4700 4701// VBIF : Vector Bitwise Insert if False 4702// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", 4703// FIXME: This instruction's encoding MAY NOT BE correct. 4704def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, 4705 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4706 N3RegFrm, IIC_VBINiD, 4707 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4708 []>; 4709def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, 4710 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4711 N3RegFrm, IIC_VBINiQ, 4712 "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4713 []>; 4714 4715// VBIT : Vector Bitwise Insert if True 4716// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", 4717// FIXME: This instruction's encoding MAY NOT BE correct. 4718def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, 4719 (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), 4720 N3RegFrm, IIC_VBINiD, 4721 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4722 []>; 4723def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, 4724 (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), 4725 N3RegFrm, IIC_VBINiQ, 4726 "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", 4727 []>; 4728 4729// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking 4730// for equivalent operations with different register constraints; it just 4731// inserts copies. 4732 4733// Vector Absolute Differences. 4734 4735// VABD : Vector Absolute Difference 4736defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, 4737 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4738 "vabd", "s", int_arm_neon_vabds, 1>; 4739defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, 4740 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4741 "vabd", "u", int_arm_neon_vabdu, 1>; 4742def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, 4743 "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; 4744def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, 4745 "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>; 4746 4747// VABDL : Vector Absolute Difference Long (Q = | D - D |) 4748defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, 4749 "vabdl", "s", int_arm_neon_vabds, zext, 1>; 4750defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, 4751 "vabdl", "u", int_arm_neon_vabdu, zext, 1>; 4752 4753// VABA : Vector Absolute Difference and Accumulate 4754defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4755 "vaba", "s", int_arm_neon_vabds, add>; 4756defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, 4757 "vaba", "u", int_arm_neon_vabdu, add>; 4758 4759// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) 4760defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, 4761 "vabal", "s", int_arm_neon_vabds, zext, add>; 4762defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, 4763 "vabal", "u", int_arm_neon_vabdu, zext, add>; 4764 4765// Vector Maximum and Minimum. 4766 4767// VMAX : Vector Maximum 4768defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, 4769 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4770 "vmax", "s", int_arm_neon_vmaxs, 1>; 4771defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, 4772 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4773 "vmax", "u", int_arm_neon_vmaxu, 1>; 4774def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, 4775 "vmax", "f32", 4776 v2f32, v2f32, int_arm_neon_vmaxs, 1>; 4777def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4778 "vmax", "f32", 4779 v4f32, v4f32, int_arm_neon_vmaxs, 1>; 4780 4781// VMAXNM 4782let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4783 def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, 4784 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4785 v2f32, v2f32, int_arm_neon_vmaxnm, 1>, 4786 Requires<[HasV8, HasNEON]>; 4787 def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, 4788 N3RegFrm, NoItinerary, "vmaxnm", "f32", 4789 v4f32, v4f32, int_arm_neon_vmaxnm, 1>, 4790 Requires<[HasV8, HasNEON]>; 4791} 4792 4793// VMIN : Vector Minimum 4794defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, 4795 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4796 "vmin", "s", int_arm_neon_vmins, 1>; 4797defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, 4798 IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, 4799 "vmin", "u", int_arm_neon_vminu, 1>; 4800def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, 4801 "vmin", "f32", 4802 v2f32, v2f32, int_arm_neon_vmins, 1>; 4803def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, 4804 "vmin", "f32", 4805 v4f32, v4f32, int_arm_neon_vmins, 1>; 4806 4807// VMINNM 4808let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 4809 def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, 4810 N3RegFrm, NoItinerary, "vminnm", "f32", 4811 v2f32, v2f32, int_arm_neon_vminnm, 1>, 4812 Requires<[HasV8, HasNEON]>; 4813 def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, 4814 N3RegFrm, NoItinerary, "vminnm", "f32", 4815 v4f32, v4f32, int_arm_neon_vminnm, 1>, 4816 Requires<[HasV8, HasNEON]>; 4817} 4818 4819// Vector Pairwise Operations. 4820 4821// VPADD : Vector Pairwise Add 4822def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4823 "vpadd", "i8", 4824 v8i8, v8i8, int_arm_neon_vpadd, 0>; 4825def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4826 "vpadd", "i16", 4827 v4i16, v4i16, int_arm_neon_vpadd, 0>; 4828def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, 4829 "vpadd", "i32", 4830 v2i32, v2i32, int_arm_neon_vpadd, 0>; 4831def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, 4832 IIC_VPBIND, "vpadd", "f32", 4833 v2f32, v2f32, int_arm_neon_vpadd, 0>; 4834 4835// VPADDL : Vector Pairwise Add Long 4836defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", 4837 int_arm_neon_vpaddls>; 4838defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", 4839 int_arm_neon_vpaddlu>; 4840 4841// VPADAL : Vector Pairwise Add and Accumulate Long 4842defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", 4843 int_arm_neon_vpadals>; 4844defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", 4845 int_arm_neon_vpadalu>; 4846 4847// VPMAX : Vector Pairwise Maximum 4848def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4849 "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; 4850def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4851 "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; 4852def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4853 "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; 4854def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4855 "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; 4856def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4857 "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; 4858def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", 4859 "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; 4860def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax", 4861 "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; 4862 4863// VPMIN : Vector Pairwise Minimum 4864def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4865 "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; 4866def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4867 "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; 4868def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4869 "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; 4870def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4871 "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; 4872def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4873 "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; 4874def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", 4875 "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; 4876def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin", 4877 "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; 4878 4879// Vector Reciprocal and Reciprocal Square Root Estimate and Step. 4880 4881// VRECPE : Vector Reciprocal Estimate 4882def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4883 IIC_VUNAD, "vrecpe", "u32", 4884 v2i32, v2i32, int_arm_neon_vrecpe>; 4885def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, 4886 IIC_VUNAQ, "vrecpe", "u32", 4887 v4i32, v4i32, int_arm_neon_vrecpe>; 4888def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4889 IIC_VUNAD, "vrecpe", "f32", 4890 v2f32, v2f32, int_arm_neon_vrecpe>; 4891def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, 4892 IIC_VUNAQ, "vrecpe", "f32", 4893 v4f32, v4f32, int_arm_neon_vrecpe>; 4894 4895// VRECPS : Vector Reciprocal Step 4896def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4897 IIC_VRECSD, "vrecps", "f32", 4898 v2f32, v2f32, int_arm_neon_vrecps, 1>; 4899def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm, 4900 IIC_VRECSQ, "vrecps", "f32", 4901 v4f32, v4f32, int_arm_neon_vrecps, 1>; 4902 4903// VRSQRTE : Vector Reciprocal Square Root Estimate 4904def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4905 IIC_VUNAD, "vrsqrte", "u32", 4906 v2i32, v2i32, int_arm_neon_vrsqrte>; 4907def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, 4908 IIC_VUNAQ, "vrsqrte", "u32", 4909 v4i32, v4i32, int_arm_neon_vrsqrte>; 4910def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4911 IIC_VUNAD, "vrsqrte", "f32", 4912 v2f32, v2f32, int_arm_neon_vrsqrte>; 4913def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, 4914 IIC_VUNAQ, "vrsqrte", "f32", 4915 v4f32, v4f32, int_arm_neon_vrsqrte>; 4916 4917// VRSQRTS : Vector Reciprocal Square Root Step 4918def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4919 IIC_VRECSD, "vrsqrts", "f32", 4920 v2f32, v2f32, int_arm_neon_vrsqrts, 1>; 4921def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm, 4922 IIC_VRECSQ, "vrsqrts", "f32", 4923 v4f32, v4f32, int_arm_neon_vrsqrts, 1>; 4924 4925// Vector Shifts. 4926 4927// VSHL : Vector Shift 4928defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, 4929 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4930 "vshl", "s", int_arm_neon_vshifts>; 4931defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, 4932 IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, 4933 "vshl", "u", int_arm_neon_vshiftu>; 4934 4935// VSHL : Vector Shift Left (Immediate) 4936defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; 4937 4938// VSHR : Vector Shift Right (Immediate) 4939defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", 4940 NEONvshrs>; 4941defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", 4942 NEONvshru>; 4943 4944// VSHLL : Vector Shift Left Long 4945defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; 4946defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; 4947 4948// VSHLL : Vector Shift Left Long (with maximum shift count) 4949class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, 4950 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, 4951 ValueType OpTy, Operand ImmTy, SDNode OpNode> 4952 : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, 4953 ResTy, OpTy, ImmTy, OpNode> { 4954 let Inst{21-16} = op21_16; 4955 let DecoderMethod = "DecodeVSHLMaxInstruction"; 4956} 4957def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", 4958 v8i16, v8i8, imm8, NEONvshlli>; 4959def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", 4960 v4i32, v4i16, imm16, NEONvshlli>; 4961def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", 4962 v2i64, v2i32, imm32, NEONvshlli>; 4963 4964// VSHRN : Vector Shift Right and Narrow 4965defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", 4966 NEONvshrn>; 4967 4968// VRSHL : Vector Rounding Shift 4969defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm, 4970 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4971 "vrshl", "s", int_arm_neon_vrshifts>; 4972defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, 4973 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4974 "vrshl", "u", int_arm_neon_vrshiftu>; 4975// VRSHR : Vector Rounding Shift Right 4976defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", 4977 NEONvrshrs>; 4978defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", 4979 NEONvrshru>; 4980 4981// VRSHRN : Vector Rounding Shift Right and Narrow 4982defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", 4983 NEONvrshrn>; 4984 4985// VQSHL : Vector Saturating Shift 4986defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm, 4987 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4988 "vqshl", "s", int_arm_neon_vqshifts>; 4989defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, 4990 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 4991 "vqshl", "u", int_arm_neon_vqshiftu>; 4992// VQSHL : Vector Saturating Shift Left (Immediate) 4993defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; 4994defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; 4995 4996// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) 4997defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; 4998 4999// VQSHRN : Vector Saturating Shift Right and Narrow 5000defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", 5001 NEONvqshrns>; 5002defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", 5003 NEONvqshrnu>; 5004 5005// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) 5006defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", 5007 NEONvqshrnsu>; 5008 5009// VQRSHL : Vector Saturating Rounding Shift 5010defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm, 5011 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5012 "vqrshl", "s", int_arm_neon_vqrshifts>; 5013defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm, 5014 IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, 5015 "vqrshl", "u", int_arm_neon_vqrshiftu>; 5016 5017// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow 5018defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", 5019 NEONvqrshrns>; 5020defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", 5021 NEONvqrshrnu>; 5022 5023// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) 5024defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", 5025 NEONvqrshrnsu>; 5026 5027// VSRA : Vector Shift Right and Accumulate 5028defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; 5029defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; 5030// VRSRA : Vector Rounding Shift Right and Accumulate 5031defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; 5032defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; 5033 5034// VSLI : Vector Shift Left and Insert 5035defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; 5036 5037// VSRI : Vector Shift Right and Insert 5038defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; 5039 5040// Vector Absolute and Saturating Absolute. 5041 5042// VABS : Vector Absolute Value 5043defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, 5044 IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", 5045 int_arm_neon_vabs>; 5046def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5047 "vabs", "f32", 5048 v2f32, v2f32, fabs>; 5049def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 5050 "vabs", "f32", 5051 v4f32, v4f32, fabs>; 5052 5053def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))), 5054 (v2i32 (bitconvert (v8i8 (add DPR:$src, 5055 (NEONvshrs DPR:$src, (i32 7))))))), 5056 (VABSv8i8 DPR:$src)>; 5057def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))), 5058 (v2i32 (bitconvert (v4i16 (add DPR:$src, 5059 (NEONvshrs DPR:$src, (i32 15))))))), 5060 (VABSv4i16 DPR:$src)>; 5061def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))), 5062 (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))), 5063 (VABSv2i32 DPR:$src)>; 5064def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))), 5065 (v4i32 (bitconvert (v16i8 (add QPR:$src, 5066 (NEONvshrs QPR:$src, (i32 7))))))), 5067 (VABSv16i8 QPR:$src)>; 5068def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))), 5069 (v4i32 (bitconvert (v8i16 (add QPR:$src, 5070 (NEONvshrs QPR:$src, (i32 15))))))), 5071 (VABSv8i16 QPR:$src)>; 5072def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))), 5073 (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))), 5074 (VABSv4i32 QPR:$src)>; 5075 5076def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>; 5077def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>; 5078 5079// VQABS : Vector Saturating Absolute Value 5080defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, 5081 IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", 5082 int_arm_neon_vqabs>; 5083 5084// Vector Negate. 5085 5086def vnegd : PatFrag<(ops node:$in), 5087 (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>; 5088def vnegq : PatFrag<(ops node:$in), 5089 (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>; 5090 5091class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5092 : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), 5093 IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "", 5094 [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>; 5095class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty> 5096 : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm), 5097 IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "", 5098 [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>; 5099 5100// VNEG : Vector Negate (integer) 5101def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; 5102def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; 5103def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; 5104def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; 5105def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; 5106def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; 5107 5108// VNEG : Vector Negate (floating-point) 5109def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, 5110 (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD, 5111 "vneg", "f32", "$Vd, $Vm", "", 5112 [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>; 5113def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, 5114 (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ, 5115 "vneg", "f32", "$Vd, $Vm", "", 5116 [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>; 5117 5118def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>; 5119def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>; 5120def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>; 5121def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>; 5122def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>; 5123def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>; 5124 5125// VQNEG : Vector Saturating Negate 5126defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, 5127 IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", 5128 int_arm_neon_vqneg>; 5129 5130// Vector Bit Counting Operations. 5131 5132// VCLS : Vector Count Leading Sign Bits 5133defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, 5134 IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", 5135 int_arm_neon_vcls>; 5136// VCLZ : Vector Count Leading Zeros 5137defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, 5138 IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", 5139 ctlz>; 5140// VCNT : Vector Count One Bits 5141def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5142 IIC_VCNTiD, "vcnt", "8", 5143 v8i8, v8i8, ctpop>; 5144def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, 5145 IIC_VCNTiQ, "vcnt", "8", 5146 v16i8, v16i8, ctpop>; 5147 5148// Vector Swap 5149def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, 5150 (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), 5151 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5152 []>; 5153def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, 5154 (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), 5155 NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", 5156 []>; 5157 5158// Vector Move Operations. 5159 5160// VMOV : Vector Move (Register) 5161def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5162 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 5163def : NEONInstAlias<"vmov${p} $Vd, $Vm", 5164 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 5165 5166// VMOV : Vector Move (Immediate) 5167 5168let isReMaterializable = 1 in { 5169def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd), 5170 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5171 "vmov", "i8", "$Vd, $SIMM", "", 5172 [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>; 5173def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd), 5174 (ins nImmSplatI8:$SIMM), IIC_VMOVImm, 5175 "vmov", "i8", "$Vd, $SIMM", "", 5176 [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>; 5177 5178def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd), 5179 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5180 "vmov", "i16", "$Vd, $SIMM", "", 5181 [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> { 5182 let Inst{9} = SIMM{9}; 5183} 5184 5185def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd), 5186 (ins nImmSplatI16:$SIMM), IIC_VMOVImm, 5187 "vmov", "i16", "$Vd, $SIMM", "", 5188 [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> { 5189 let Inst{9} = SIMM{9}; 5190} 5191 5192def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd), 5193 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5194 "vmov", "i32", "$Vd, $SIMM", "", 5195 [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> { 5196 let Inst{11-8} = SIMM{11-8}; 5197} 5198 5199def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd), 5200 (ins nImmVMOVI32:$SIMM), IIC_VMOVImm, 5201 "vmov", "i32", "$Vd, $SIMM", "", 5202 [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> { 5203 let Inst{11-8} = SIMM{11-8}; 5204} 5205 5206def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd), 5207 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5208 "vmov", "i64", "$Vd, $SIMM", "", 5209 [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>; 5210def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), 5211 (ins nImmSplatI64:$SIMM), IIC_VMOVImm, 5212 "vmov", "i64", "$Vd, $SIMM", "", 5213 [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; 5214 5215def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), 5216 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5217 "vmov", "f32", "$Vd, $SIMM", "", 5218 [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; 5219def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), 5220 (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, 5221 "vmov", "f32", "$Vd, $SIMM", "", 5222 [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; 5223} // isReMaterializable 5224 5225// VMOV : Vector Get Lane (move scalar to ARM core register) 5226 5227def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, 5228 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5229 IIC_VMOVSI, "vmov", "s8", "$R, $V$lane", 5230 [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V), 5231 imm:$lane))]> { 5232 let Inst{21} = lane{2}; 5233 let Inst{6-5} = lane{1-0}; 5234} 5235def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, 5236 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5237 IIC_VMOVSI, "vmov", "s16", "$R, $V$lane", 5238 [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V), 5239 imm:$lane))]> { 5240 let Inst{21} = lane{1}; 5241 let Inst{6} = lane{0}; 5242} 5243def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, 5244 (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane), 5245 IIC_VMOVSI, "vmov", "u8", "$R, $V$lane", 5246 [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V), 5247 imm:$lane))]> { 5248 let Inst{21} = lane{2}; 5249 let Inst{6-5} = lane{1-0}; 5250} 5251def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, 5252 (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane), 5253 IIC_VMOVSI, "vmov", "u16", "$R, $V$lane", 5254 [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V), 5255 imm:$lane))]> { 5256 let Inst{21} = lane{1}; 5257 let Inst{6} = lane{0}; 5258} 5259def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, 5260 (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane), 5261 IIC_VMOVSI, "vmov", "32", "$R, $V$lane", 5262 [(set GPR:$R, (extractelt (v2i32 DPR:$V), 5263 imm:$lane))]>, 5264 Requires<[HasNEON, HasFastVGETLNi32]> { 5265 let Inst{21} = lane{0}; 5266} 5267// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td 5268def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), 5269 (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5270 (DSubReg_i8_reg imm:$lane))), 5271 (SubReg_i8_lane imm:$lane))>; 5272def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), 5273 (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5274 (DSubReg_i16_reg imm:$lane))), 5275 (SubReg_i16_lane imm:$lane))>; 5276def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), 5277 (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, 5278 (DSubReg_i8_reg imm:$lane))), 5279 (SubReg_i8_lane imm:$lane))>; 5280def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), 5281 (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, 5282 (DSubReg_i16_reg imm:$lane))), 5283 (SubReg_i16_lane imm:$lane))>; 5284def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5285 (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, 5286 (DSubReg_i32_reg imm:$lane))), 5287 (SubReg_i32_lane imm:$lane))>, 5288 Requires<[HasNEON, HasFastVGETLNi32]>; 5289def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane), 5290 (COPY_TO_REGCLASS 5291 (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5292 Requires<[HasNEON, HasSlowVGETLNi32]>; 5293def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), 5294 (COPY_TO_REGCLASS 5295 (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>, 5296 Requires<[HasNEON, HasSlowVGETLNi32]>; 5297def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), 5298 (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)), 5299 (SSubReg_f32_reg imm:$src2))>; 5300def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), 5301 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)), 5302 (SSubReg_f32_reg imm:$src2))>; 5303//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), 5304// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5305def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), 5306 (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; 5307 5308 5309// VMOV : Vector Set Lane (move ARM core register to scalar) 5310 5311let Constraints = "$src1 = $V" in { 5312def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V), 5313 (ins DPR:$src1, GPR:$R, VectorIndex8:$lane), 5314 IIC_VMOVISL, "vmov", "8", "$V$lane, $R", 5315 [(set DPR:$V, (vector_insert (v8i8 DPR:$src1), 5316 GPR:$R, imm:$lane))]> { 5317 let Inst{21} = lane{2}; 5318 let Inst{6-5} = lane{1-0}; 5319} 5320def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V), 5321 (ins DPR:$src1, GPR:$R, VectorIndex16:$lane), 5322 IIC_VMOVISL, "vmov", "16", "$V$lane, $R", 5323 [(set DPR:$V, (vector_insert (v4i16 DPR:$src1), 5324 GPR:$R, imm:$lane))]> { 5325 let Inst{21} = lane{1}; 5326 let Inst{6} = lane{0}; 5327} 5328def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), 5329 (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), 5330 IIC_VMOVISL, "vmov", "32", "$V$lane, $R", 5331 [(set DPR:$V, (insertelt (v2i32 DPR:$src1), 5332 GPR:$R, imm:$lane))]> { 5333 let Inst{21} = lane{0}; 5334} 5335} 5336def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), 5337 (v16i8 (INSERT_SUBREG QPR:$src1, 5338 (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, 5339 (DSubReg_i8_reg imm:$lane))), 5340 GPR:$src2, (SubReg_i8_lane imm:$lane))), 5341 (DSubReg_i8_reg imm:$lane)))>; 5342def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), 5343 (v8i16 (INSERT_SUBREG QPR:$src1, 5344 (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, 5345 (DSubReg_i16_reg imm:$lane))), 5346 GPR:$src2, (SubReg_i16_lane imm:$lane))), 5347 (DSubReg_i16_reg imm:$lane)))>; 5348def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), 5349 (v4i32 (INSERT_SUBREG QPR:$src1, 5350 (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, 5351 (DSubReg_i32_reg imm:$lane))), 5352 GPR:$src2, (SubReg_i32_lane imm:$lane))), 5353 (DSubReg_i32_reg imm:$lane)))>; 5354 5355def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), 5356 (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), 5357 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5358def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), 5359 (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), 5360 SPR:$src2, (SSubReg_f32_reg imm:$src3))>; 5361 5362//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5363// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5364def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), 5365 (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; 5366 5367def : Pat<(v2f32 (scalar_to_vector SPR:$src)), 5368 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5369def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))), 5370 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; 5371def : Pat<(v4f32 (scalar_to_vector SPR:$src)), 5372 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>; 5373 5374def : Pat<(v8i8 (scalar_to_vector GPR:$src)), 5375 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5376def : Pat<(v4i16 (scalar_to_vector GPR:$src)), 5377 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5378def : Pat<(v2i32 (scalar_to_vector GPR:$src)), 5379 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; 5380 5381def : Pat<(v16i8 (scalar_to_vector GPR:$src)), 5382 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 5383 (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5384 dsub_0)>; 5385def : Pat<(v8i16 (scalar_to_vector GPR:$src)), 5386 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 5387 (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5388 dsub_0)>; 5389def : Pat<(v4i32 (scalar_to_vector GPR:$src)), 5390 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5391 (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), 5392 dsub_0)>; 5393 5394// VDUP : Vector Duplicate (from ARM core register to all elements) 5395 5396class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5397 : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R), 5398 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5399 [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5400class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty> 5401 : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R), 5402 IIC_VMOVIS, "vdup", Dt, "$V, $R", 5403 [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>; 5404 5405def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; 5406def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; 5407def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>, 5408 Requires<[HasNEON, HasFastVDUP32]>; 5409def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; 5410def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; 5411def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; 5412 5413// NEONvdup patterns for uarchs with fast VDUP.32. 5414def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>, 5415 Requires<[HasNEON,HasFastVDUP32]>; 5416def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; 5417 5418// NEONvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead. 5419def : Pat<(v2i32 (NEONvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>, 5420 Requires<[HasNEON,HasSlowVDUP32]>; 5421def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>, 5422 Requires<[HasNEON,HasSlowVDUP32]>; 5423 5424// VDUP : Vector Duplicate Lane (from scalar to all elements) 5425 5426class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt, 5427 ValueType Ty, Operand IdxTy> 5428 : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5429 IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane", 5430 [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>; 5431 5432class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt, 5433 ValueType ResTy, ValueType OpTy, Operand IdxTy> 5434 : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane), 5435 IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane", 5436 [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm), 5437 VectorIndex32:$lane)))]>; 5438 5439// Inst{19-16} is partially specified depending on the element size. 5440 5441def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> { 5442 bits<3> lane; 5443 let Inst{19-17} = lane{2-0}; 5444} 5445def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> { 5446 bits<2> lane; 5447 let Inst{19-18} = lane{1-0}; 5448} 5449def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> { 5450 bits<1> lane; 5451 let Inst{19} = lane{0}; 5452} 5453def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> { 5454 bits<3> lane; 5455 let Inst{19-17} = lane{2-0}; 5456} 5457def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> { 5458 bits<2> lane; 5459 let Inst{19-18} = lane{1-0}; 5460} 5461def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> { 5462 bits<1> lane; 5463 let Inst{19} = lane{0}; 5464} 5465 5466def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5467 (VDUPLN32d DPR:$Vm, imm:$lane)>; 5468 5469def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), 5470 (VDUPLN32q DPR:$Vm, imm:$lane)>; 5471 5472def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), 5473 (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, 5474 (DSubReg_i8_reg imm:$lane))), 5475 (SubReg_i8_lane imm:$lane)))>; 5476def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), 5477 (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, 5478 (DSubReg_i16_reg imm:$lane))), 5479 (SubReg_i16_lane imm:$lane)))>; 5480def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), 5481 (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, 5482 (DSubReg_i32_reg imm:$lane))), 5483 (SubReg_i32_lane imm:$lane)))>; 5484def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), 5485 (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, 5486 (DSubReg_i32_reg imm:$lane))), 5487 (SubReg_i32_lane imm:$lane)))>; 5488 5489def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5490 [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; 5491def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "", 5492 [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; 5493 5494// VMOVN : Vector Narrowing Move 5495defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN, 5496 "vmovn", "i", trunc>; 5497// VQMOVN : Vector Saturating Narrowing Move 5498defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, 5499 "vqmovn", "s", int_arm_neon_vqmovns>; 5500defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, 5501 "vqmovn", "u", int_arm_neon_vqmovnu>; 5502defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, 5503 "vqmovun", "s", int_arm_neon_vqmovnsu>; 5504// VMOVL : Vector Lengthening Move 5505defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>; 5506defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>; 5507def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>; 5508def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>; 5509def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>; 5510 5511// Vector Conversions. 5512 5513// VCVT : Vector Convert Between Floating-Point and Integers 5514def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5515 v2i32, v2f32, fp_to_sint>; 5516def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5517 v2i32, v2f32, fp_to_uint>; 5518def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5519 v2f32, v2i32, sint_to_fp>; 5520def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5521 v2f32, v2i32, uint_to_fp>; 5522 5523def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", 5524 v4i32, v4f32, fp_to_sint>; 5525def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", 5526 v4i32, v4f32, fp_to_uint>; 5527def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", 5528 v4f32, v4i32, sint_to_fp>; 5529def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", 5530 v4f32, v4i32, uint_to_fp>; 5531 5532// VCVT{A, N, P, M} 5533multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS, 5534 SDPatternOperator IntU> { 5535 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5536 def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5537 "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; 5538 def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), 5539 "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; 5540 def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5541 "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; 5542 def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), 5543 "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; 5544 } 5545} 5546 5547defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; 5548defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; 5549defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; 5550defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; 5551 5552// VCVT : Vector Convert Between Floating-Point and Fixed-Point. 5553let DecoderMethod = "DecodeVCVTD" in { 5554def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5555 v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; 5556def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5557 v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; 5558def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5559 v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; 5560def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5561 v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; 5562} 5563 5564let DecoderMethod = "DecodeVCVTQ" in { 5565def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", 5566 v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; 5567def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", 5568 v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; 5569def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", 5570 v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; 5571def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", 5572 v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; 5573} 5574 5575def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", 5576 (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; 5577def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", 5578 (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; 5579def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", 5580 (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5581def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", 5582 (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; 5583 5584def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", 5585 (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; 5586def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", 5587 (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; 5588def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", 5589 (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5590def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", 5591 (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; 5592 5593 5594// VCVT : Vector Convert Between Half-Precision and Single-Precision. 5595def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, 5596 IIC_VUNAQ, "vcvt", "f16.f32", 5597 v4i16, v4f32, int_arm_neon_vcvtfp2hf>, 5598 Requires<[HasNEON, HasFP16]>; 5599def VCVTh2f : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0, 5600 IIC_VUNAQ, "vcvt", "f32.f16", 5601 v4f32, v4i16, int_arm_neon_vcvthf2fp>, 5602 Requires<[HasNEON, HasFP16]>; 5603 5604// Vector Reverse. 5605 5606// VREV64 : Vector Reverse elements within 64-bit doublewords 5607 5608class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5609 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd), 5610 (ins DPR:$Vm), IIC_VMOVD, 5611 OpcodeStr, Dt, "$Vd, $Vm", "", 5612 [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>; 5613class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5614 : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd), 5615 (ins QPR:$Vm), IIC_VMOVQ, 5616 OpcodeStr, Dt, "$Vd, $Vm", "", 5617 [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>; 5618 5619def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; 5620def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; 5621def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; 5622def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; 5623 5624def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; 5625def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; 5626def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; 5627def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; 5628 5629// VREV32 : Vector Reverse elements within 32-bit words 5630 5631class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5632 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd), 5633 (ins DPR:$Vm), IIC_VMOVD, 5634 OpcodeStr, Dt, "$Vd, $Vm", "", 5635 [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>; 5636class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5637 : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd), 5638 (ins QPR:$Vm), IIC_VMOVQ, 5639 OpcodeStr, Dt, "$Vd, $Vm", "", 5640 [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>; 5641 5642def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; 5643def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; 5644 5645def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; 5646def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; 5647 5648// VREV16 : Vector Reverse elements within 16-bit halfwords 5649 5650class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5651 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd), 5652 (ins DPR:$Vm), IIC_VMOVD, 5653 OpcodeStr, Dt, "$Vd, $Vm", "", 5654 [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>; 5655class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> 5656 : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd), 5657 (ins QPR:$Vm), IIC_VMOVQ, 5658 OpcodeStr, Dt, "$Vd, $Vm", "", 5659 [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>; 5660 5661def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; 5662def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; 5663 5664// Other Vector Shuffles. 5665 5666// Aligned extractions: really just dropping registers 5667 5668class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT> 5669 : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))), 5670 (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>; 5671 5672def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>; 5673 5674def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>; 5675 5676def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>; 5677 5678def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>; 5679 5680def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; 5681 5682 5683// VEXT : Vector Extract 5684 5685 5686// All of these have a two-operand InstAlias. 5687let TwoOperandAliasConstraint = "$Vn = $Vd" in { 5688class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5689 : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), 5690 (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, 5691 IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5692 [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), 5693 (Ty DPR:$Vm), imm:$index)))]> { 5694 bits<3> index; 5695 let Inst{11} = 0b0; 5696 let Inst{10-8} = index{2-0}; 5697} 5698 5699class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> 5700 : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), 5701 (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, 5702 IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", 5703 [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), 5704 (Ty QPR:$Vm), imm:$index)))]> { 5705 bits<4> index; 5706 let Inst{11-8} = index{3-0}; 5707} 5708} 5709 5710def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { 5711 let Inst{10-8} = index{2-0}; 5712} 5713def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { 5714 let Inst{10-9} = index{1-0}; 5715 let Inst{8} = 0b0; 5716} 5717def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { 5718 let Inst{10} = index{0}; 5719 let Inst{9-8} = 0b00; 5720} 5721def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), 5722 (v2f32 DPR:$Vm), 5723 (i32 imm:$index))), 5724 (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; 5725 5726def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { 5727 let Inst{11-8} = index{3-0}; 5728} 5729def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { 5730 let Inst{11-9} = index{2-0}; 5731 let Inst{8} = 0b0; 5732} 5733def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { 5734 let Inst{11-10} = index{1-0}; 5735 let Inst{9-8} = 0b00; 5736} 5737def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { 5738 let Inst{11} = index{0}; 5739 let Inst{10-8} = 0b000; 5740} 5741def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), 5742 (v4f32 QPR:$Vm), 5743 (i32 imm:$index))), 5744 (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>; 5745 5746// VTRN : Vector Transpose 5747 5748def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; 5749def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; 5750def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; 5751 5752def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; 5753def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; 5754def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; 5755 5756// VUZP : Vector Unzip (Deinterleave) 5757 5758def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; 5759def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; 5760// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5761def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", 5762 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5763 5764def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; 5765def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; 5766def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; 5767 5768// VZIP : Vector Zip (Interleave) 5769 5770def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; 5771def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; 5772// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 5773def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", 5774 (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; 5775 5776def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; 5777def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; 5778def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; 5779 5780// Vector Table Lookup and Table Extension. 5781 5782// VTBL : Vector Table Lookup 5783let DecoderMethod = "DecodeTBLInstruction" in { 5784def VTBL1 5785 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd), 5786 (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1, 5787 "vtbl", "8", "$Vd, $Vn, $Vm", "", 5788 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>; 5789let hasExtraSrcRegAllocReq = 1 in { 5790def VTBL2 5791 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), 5792 (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, 5793 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5794def VTBL3 5795 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), 5796 (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, 5797 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5798def VTBL4 5799 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), 5800 (ins VecListFourD:$Vn, DPR:$Vm), 5801 NVTBLFrm, IIC_VTB4, 5802 "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; 5803} // hasExtraSrcRegAllocReq = 1 5804 5805def VTBL3Pseudo 5806 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>; 5807def VTBL4Pseudo 5808 : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>; 5809 5810// VTBX : Vector Table Extension 5811def VTBX1 5812 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), 5813 (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, 5814 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", 5815 [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 5816 DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; 5817let hasExtraSrcRegAllocReq = 1 in { 5818def VTBX2 5819 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), 5820 (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, 5821 "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; 5822def VTBX3 5823 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), 5824 (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), 5825 NVTBLFrm, IIC_VTBX3, 5826 "vtbx", "8", "$Vd, $Vn, $Vm", 5827 "$orig = $Vd", []>; 5828def VTBX4 5829 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), 5830 (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, 5831 "vtbx", "8", "$Vd, $Vn, $Vm", 5832 "$orig = $Vd", []>; 5833} // hasExtraSrcRegAllocReq = 1 5834 5835def VTBX3Pseudo 5836 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5837 IIC_VTBX3, "$orig = $dst", []>; 5838def VTBX4Pseudo 5839 : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src), 5840 IIC_VTBX4, "$orig = $dst", []>; 5841} // DecoderMethod = "DecodeTBLInstruction" 5842 5843// VRINT : Vector Rounding 5844multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> { 5845 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { 5846 def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, 5847 !strconcat("vrint", op), "f32", 5848 v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { 5849 let Inst{9-7} = op9_7; 5850 } 5851 def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, 5852 !strconcat("vrint", op), "f32", 5853 v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { 5854 let Inst{9-7} = op9_7; 5855 } 5856 } 5857 5858 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"), 5859 (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>; 5860 def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"), 5861 (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>; 5862} 5863 5864defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; 5865defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; 5866defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; 5867defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; 5868defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; 5869defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; 5870 5871// Cryptography instructions 5872let PostEncoderMethod = "NEONThumb2DataIPostEncoder", 5873 DecoderNamespace = "v8Crypto" in { 5874 class AES<string op, bit op7, bit op6, SDPatternOperator Int> 5875 : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, 5876 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 5877 Requires<[HasV8, HasCrypto]>; 5878 class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int> 5879 : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, 5880 !strconcat("aes", op), "8", v16i8, v16i8, Int>, 5881 Requires<[HasV8, HasCrypto]>; 5882 class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 5883 SDPatternOperator Int> 5884 : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, 5885 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 5886 Requires<[HasV8, HasCrypto]>; 5887 class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, 5888 SDPatternOperator Int> 5889 : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, 5890 !strconcat("sha", op), "32", v4i32, v4i32, Int>, 5891 Requires<[HasV8, HasCrypto]>; 5892 class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int> 5893 : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary, 5894 !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>, 5895 Requires<[HasV8, HasCrypto]>; 5896} 5897 5898def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; 5899def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; 5900def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; 5901def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; 5902 5903def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; 5904def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; 5905def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; 5906def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; 5907def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; 5908def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; 5909def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; 5910def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; 5911def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; 5912def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; 5913 5914//===----------------------------------------------------------------------===// 5915// NEON instructions for single-precision FP math 5916//===----------------------------------------------------------------------===// 5917 5918class N2VSPat<SDNode OpNode, NeonI Inst> 5919 : NEONFPPat<(f32 (OpNode SPR:$a)), 5920 (EXTRACT_SUBREG 5921 (v2f32 (COPY_TO_REGCLASS (Inst 5922 (INSERT_SUBREG 5923 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5924 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>; 5925 5926class N3VSPat<SDNode OpNode, NeonI Inst> 5927 : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), 5928 (EXTRACT_SUBREG 5929 (v2f32 (COPY_TO_REGCLASS (Inst 5930 (INSERT_SUBREG 5931 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5932 SPR:$a, ssub_0), 5933 (INSERT_SUBREG 5934 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5935 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5936 5937class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst> 5938 : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), 5939 (EXTRACT_SUBREG 5940 (v2f32 (COPY_TO_REGCLASS (Inst 5941 (INSERT_SUBREG 5942 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5943 SPR:$acc, ssub_0), 5944 (INSERT_SUBREG 5945 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5946 SPR:$a, ssub_0), 5947 (INSERT_SUBREG 5948 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)), 5949 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>; 5950 5951def : N3VSPat<fadd, VADDfd>; 5952def : N3VSPat<fsub, VSUBfd>; 5953def : N3VSPat<fmul, VMULfd>; 5954def : N3VSMulOpPat<fmul, fadd, VMLAfd>, 5955 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5956def : N3VSMulOpPat<fmul, fsub, VMLSfd>, 5957 Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; 5958def : N3VSMulOpPat<fmul, fadd, VFMAfd>, 5959 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5960def : N3VSMulOpPat<fmul, fsub, VFMSfd>, 5961 Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; 5962def : N2VSPat<fabs, VABSfd>; 5963def : N2VSPat<fneg, VNEGfd>; 5964def : N3VSPat<NEONfmax, VMAXfd>; 5965def : N3VSPat<NEONfmin, VMINfd>; 5966def : N2VSPat<arm_ftosi, VCVTf2sd>; 5967def : N2VSPat<arm_ftoui, VCVTf2ud>; 5968def : N2VSPat<arm_sitof, VCVTs2fd>; 5969def : N2VSPat<arm_uitof, VCVTu2fd>; 5970 5971// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers. 5972def : Pat<(f32 (bitconvert GPR:$a)), 5973 (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>, 5974 Requires<[HasNEON, DontUseVMOVSR]>; 5975 5976//===----------------------------------------------------------------------===// 5977// Non-Instruction Patterns 5978//===----------------------------------------------------------------------===// 5979 5980// bit_convert 5981def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; 5982def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; 5983def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; 5984def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; 5985def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; 5986def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; 5987def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; 5988def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; 5989def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; 5990def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; 5991def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; 5992def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; 5993def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; 5994def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; 5995def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; 5996def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; 5997def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; 5998def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; 5999def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; 6000def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; 6001def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; 6002def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; 6003def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; 6004def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; 6005def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; 6006def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; 6007def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; 6008def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; 6009def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; 6010def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; 6011 6012def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; 6013def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; 6014def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; 6015def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; 6016def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; 6017def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; 6018def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; 6019def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; 6020def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; 6021def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; 6022def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; 6023def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; 6024def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; 6025def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; 6026def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; 6027def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; 6028def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; 6029def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; 6030def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; 6031def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; 6032def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; 6033def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; 6034def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; 6035def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; 6036def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; 6037def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; 6038def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; 6039def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; 6040def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; 6041def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; 6042 6043// Fold extracting an element out of a v2i32 into a vfp register. 6044def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), 6045 (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>; 6046 6047// Vector lengthening move with load, matching extending loads. 6048 6049// extload, zextload and sextload for a standard lengthening load. Example: 6050// Lengthen_Single<"8", "i16", "8"> = 6051// Pat<(v8i16 (extloadvi8 addrmode6:$addr)) 6052// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, 6053// (f64 (IMPLICIT_DEF)), (i32 0)))>; 6054multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> { 6055 let AddedComplexity = 10 in { 6056 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6057 (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)), 6058 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6059 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6060 6061 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6062 (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)), 6063 (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy) 6064 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6065 6066 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6067 (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)), 6068 (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy) 6069 (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>; 6070 } 6071} 6072 6073// extload, zextload and sextload for a lengthening load which only uses 6074// half the lanes available. Example: 6075// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = 6076// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), 6077// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6078// (f64 (IMPLICIT_DEF)), (i32 0))), 6079// dsub_0)>; 6080multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, 6081 string InsnLanes, string InsnTy> { 6082 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6083 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6084 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6085 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6086 dsub_0)>; 6087 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6088 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6089 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) 6090 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6091 dsub_0)>; 6092 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6093 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6094 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) 6095 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6096 dsub_0)>; 6097} 6098 6099// extload, zextload and sextload for a lengthening load followed by another 6100// lengthening load, to quadruple the initial length. 6101// 6102// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> = 6103// Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr)) 6104// (EXTRACT_SUBREG (VMOVLuv4i32 6105// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, 6106// (f64 (IMPLICIT_DEF)), 6107// (i32 0))), 6108// dsub_0)), 6109// dsub_0)>; 6110multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, 6111 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6112 string Insn2Ty> { 6113 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6114 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), 6115 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6116 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6117 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6118 dsub_0))>; 6119 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6120 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), 6121 (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6122 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6123 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6124 dsub_0))>; 6125 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6126 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), 6127 (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6128 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6129 (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6130 dsub_0))>; 6131} 6132 6133// extload, zextload and sextload for a lengthening load followed by another 6134// lengthening load, to quadruple the initial length, but which ends up only 6135// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). 6136// 6137// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = 6138// Pat<(v2i32 (extloadvi8 addrmode6:$addr)) 6139// (EXTRACT_SUBREG (VMOVLuv4i32 6140// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr, 6141// (f64 (IMPLICIT_DEF)), (i32 0))), 6142// dsub_0)), 6143// dsub_0)>; 6144multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, 6145 string Insn1Lanes, string Insn1Ty, string Insn2Lanes, 6146 string Insn2Ty> { 6147 def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6148 (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), 6149 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6150 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6151 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6152 dsub_0)), 6153 dsub_0)>; 6154 def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6155 (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), 6156 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) 6157 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) 6158 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6159 dsub_0)), 6160 dsub_0)>; 6161 def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) 6162 (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), 6163 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) 6164 (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) 6165 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), 6166 dsub_0)), 6167 dsub_0)>; 6168} 6169 6170defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 6171defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 6172defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 6173 6174defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 6175defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 6176 6177// Double lengthening - v4i8 -> v4i16 -> v4i32 6178defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; 6179// v2i8 -> v2i16 -> v2i32 6180defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; 6181// v2i16 -> v2i32 -> v2i64 6182defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; 6183 6184// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 6185def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), 6186 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6187 (VLD1LNd16 addrmode6:$addr, 6188 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6189def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), 6190 (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 6191 (VLD1LNd16 addrmode6:$addr, 6192 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6193def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), 6194 (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 6195 (VLD1LNd16 addrmode6:$addr, 6196 (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; 6197 6198//===----------------------------------------------------------------------===// 6199// Assembler aliases 6200// 6201 6202def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", 6203 (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; 6204def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", 6205 (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; 6206 6207// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. 6208defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6209 (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6210defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", 6211 (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6212defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6213 (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6214defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", 6215 (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6216defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6217 (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6218defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", 6219 (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6220defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6221 (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 6222defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", 6223 (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 6224// ... two-operand aliases 6225defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6226 (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6227defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", 6228 (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6229defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6230 (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6231defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", 6232 (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6233defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6234 (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; 6235defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", 6236 (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; 6237 6238// VLD1 single-lane pseudo-instructions. These need special handling for 6239// the lane index that an InstAlias can't handle, so we use these instead. 6240def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", 6241 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6242def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", 6243 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6244def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", 6245 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6246 6247def VLD1LNdWB_fixed_Asm_8 : 6248 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", 6249 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6250def VLD1LNdWB_fixed_Asm_16 : 6251 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", 6252 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6253def VLD1LNdWB_fixed_Asm_32 : 6254 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", 6255 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6256def VLD1LNdWB_register_Asm_8 : 6257 NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", 6258 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6259 rGPR:$Rm, pred:$p)>; 6260def VLD1LNdWB_register_Asm_16 : 6261 NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", 6262 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6263 rGPR:$Rm, pred:$p)>; 6264def VLD1LNdWB_register_Asm_32 : 6265 NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", 6266 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6267 rGPR:$Rm, pred:$p)>; 6268 6269 6270// VST1 single-lane pseudo-instructions. These need special handling for 6271// the lane index that an InstAlias can't handle, so we use these instead. 6272def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", 6273 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6274def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", 6275 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6276def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", 6277 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6278 6279def VST1LNdWB_fixed_Asm_8 : 6280 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", 6281 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6282def VST1LNdWB_fixed_Asm_16 : 6283 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", 6284 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6285def VST1LNdWB_fixed_Asm_32 : 6286 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", 6287 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6288def VST1LNdWB_register_Asm_8 : 6289 NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", 6290 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, 6291 rGPR:$Rm, pred:$p)>; 6292def VST1LNdWB_register_Asm_16 : 6293 NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", 6294 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, 6295 rGPR:$Rm, pred:$p)>; 6296def VST1LNdWB_register_Asm_32 : 6297 NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", 6298 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, 6299 rGPR:$Rm, pred:$p)>; 6300 6301// VLD2 single-lane pseudo-instructions. These need special handling for 6302// the lane index that an InstAlias can't handle, so we use these instead. 6303def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", 6304 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6305def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6306 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6307def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6308 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6309def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", 6310 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6311def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", 6312 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6313 6314def VLD2LNdWB_fixed_Asm_8 : 6315 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", 6316 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6317def VLD2LNdWB_fixed_Asm_16 : 6318 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6319 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6320def VLD2LNdWB_fixed_Asm_32 : 6321 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6322 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6323def VLD2LNqWB_fixed_Asm_16 : 6324 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", 6325 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6326def VLD2LNqWB_fixed_Asm_32 : 6327 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", 6328 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6329def VLD2LNdWB_register_Asm_8 : 6330 NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", 6331 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6332 rGPR:$Rm, pred:$p)>; 6333def VLD2LNdWB_register_Asm_16 : 6334 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6335 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6336 rGPR:$Rm, pred:$p)>; 6337def VLD2LNdWB_register_Asm_32 : 6338 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6339 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6340 rGPR:$Rm, pred:$p)>; 6341def VLD2LNqWB_register_Asm_16 : 6342 NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", 6343 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6344 rGPR:$Rm, pred:$p)>; 6345def VLD2LNqWB_register_Asm_32 : 6346 NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", 6347 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6348 rGPR:$Rm, pred:$p)>; 6349 6350 6351// VST2 single-lane pseudo-instructions. These need special handling for 6352// the lane index that an InstAlias can't handle, so we use these instead. 6353def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", 6354 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6355def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6356 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6357def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6358 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6359def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", 6360 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6361def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", 6362 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6363 6364def VST2LNdWB_fixed_Asm_8 : 6365 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", 6366 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6367def VST2LNdWB_fixed_Asm_16 : 6368 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6369 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6370def VST2LNdWB_fixed_Asm_32 : 6371 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6372 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6373def VST2LNqWB_fixed_Asm_16 : 6374 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", 6375 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6376def VST2LNqWB_fixed_Asm_32 : 6377 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", 6378 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6379def VST2LNdWB_register_Asm_8 : 6380 NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", 6381 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, 6382 rGPR:$Rm, pred:$p)>; 6383def VST2LNdWB_register_Asm_16 : 6384 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6385 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, 6386 rGPR:$Rm, pred:$p)>; 6387def VST2LNdWB_register_Asm_32 : 6388 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6389 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, 6390 rGPR:$Rm, pred:$p)>; 6391def VST2LNqWB_register_Asm_16 : 6392 NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", 6393 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, 6394 rGPR:$Rm, pred:$p)>; 6395def VST2LNqWB_register_Asm_32 : 6396 NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", 6397 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, 6398 rGPR:$Rm, pred:$p)>; 6399 6400// VLD3 all-lanes pseudo-instructions. These need special handling for 6401// the lane index that an InstAlias can't handle, so we use these instead. 6402def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6403 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6404def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6405 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6406def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6407 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6408def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6409 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6410def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6411 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6412def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6413 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6414 6415def VLD3DUPdWB_fixed_Asm_8 : 6416 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6417 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6418def VLD3DUPdWB_fixed_Asm_16 : 6419 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6420 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6421def VLD3DUPdWB_fixed_Asm_32 : 6422 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6423 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6424def VLD3DUPqWB_fixed_Asm_8 : 6425 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6426 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6427def VLD3DUPqWB_fixed_Asm_16 : 6428 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6429 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6430def VLD3DUPqWB_fixed_Asm_32 : 6431 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6432 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6433def VLD3DUPdWB_register_Asm_8 : 6434 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6435 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6436 rGPR:$Rm, pred:$p)>; 6437def VLD3DUPdWB_register_Asm_16 : 6438 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6439 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6440 rGPR:$Rm, pred:$p)>; 6441def VLD3DUPdWB_register_Asm_32 : 6442 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6443 (ins VecListThreeDAllLanes:$list, addrmode6:$addr, 6444 rGPR:$Rm, pred:$p)>; 6445def VLD3DUPqWB_register_Asm_8 : 6446 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6447 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6448 rGPR:$Rm, pred:$p)>; 6449def VLD3DUPqWB_register_Asm_16 : 6450 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6451 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6452 rGPR:$Rm, pred:$p)>; 6453def VLD3DUPqWB_register_Asm_32 : 6454 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6455 (ins VecListThreeQAllLanes:$list, addrmode6:$addr, 6456 rGPR:$Rm, pred:$p)>; 6457 6458 6459// VLD3 single-lane pseudo-instructions. These need special handling for 6460// the lane index that an InstAlias can't handle, so we use these instead. 6461def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6462 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6463def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6464 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6465def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6466 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6467def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6468 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6469def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6470 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6471 6472def VLD3LNdWB_fixed_Asm_8 : 6473 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6474 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6475def VLD3LNdWB_fixed_Asm_16 : 6476 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6477 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6478def VLD3LNdWB_fixed_Asm_32 : 6479 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6480 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6481def VLD3LNqWB_fixed_Asm_16 : 6482 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6483 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6484def VLD3LNqWB_fixed_Asm_32 : 6485 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6486 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6487def VLD3LNdWB_register_Asm_8 : 6488 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6489 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6490 rGPR:$Rm, pred:$p)>; 6491def VLD3LNdWB_register_Asm_16 : 6492 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6493 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6494 rGPR:$Rm, pred:$p)>; 6495def VLD3LNdWB_register_Asm_32 : 6496 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6497 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6498 rGPR:$Rm, pred:$p)>; 6499def VLD3LNqWB_register_Asm_16 : 6500 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6501 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6502 rGPR:$Rm, pred:$p)>; 6503def VLD3LNqWB_register_Asm_32 : 6504 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6505 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6506 rGPR:$Rm, pred:$p)>; 6507 6508// VLD3 multiple structure pseudo-instructions. These need special handling for 6509// the vector operands that the normal instructions don't yet model. 6510// FIXME: Remove these when the register classes and instructions are updated. 6511def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6512 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6513def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6514 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6515def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6516 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6517def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", 6518 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6519def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", 6520 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6521def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", 6522 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6523 6524def VLD3dWB_fixed_Asm_8 : 6525 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6526 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6527def VLD3dWB_fixed_Asm_16 : 6528 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6529 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6530def VLD3dWB_fixed_Asm_32 : 6531 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6532 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6533def VLD3qWB_fixed_Asm_8 : 6534 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", 6535 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6536def VLD3qWB_fixed_Asm_16 : 6537 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", 6538 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6539def VLD3qWB_fixed_Asm_32 : 6540 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", 6541 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6542def VLD3dWB_register_Asm_8 : 6543 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6544 (ins VecListThreeD:$list, addrmode6:$addr, 6545 rGPR:$Rm, pred:$p)>; 6546def VLD3dWB_register_Asm_16 : 6547 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6548 (ins VecListThreeD:$list, addrmode6:$addr, 6549 rGPR:$Rm, pred:$p)>; 6550def VLD3dWB_register_Asm_32 : 6551 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6552 (ins VecListThreeD:$list, addrmode6:$addr, 6553 rGPR:$Rm, pred:$p)>; 6554def VLD3qWB_register_Asm_8 : 6555 NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", 6556 (ins VecListThreeQ:$list, addrmode6:$addr, 6557 rGPR:$Rm, pred:$p)>; 6558def VLD3qWB_register_Asm_16 : 6559 NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", 6560 (ins VecListThreeQ:$list, addrmode6:$addr, 6561 rGPR:$Rm, pred:$p)>; 6562def VLD3qWB_register_Asm_32 : 6563 NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", 6564 (ins VecListThreeQ:$list, addrmode6:$addr, 6565 rGPR:$Rm, pred:$p)>; 6566 6567// VST3 single-lane pseudo-instructions. These need special handling for 6568// the lane index that an InstAlias can't handle, so we use these instead. 6569def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6570 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6571def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6572 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6573def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6574 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6575def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6576 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6577def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6578 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6579 6580def VST3LNdWB_fixed_Asm_8 : 6581 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6582 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6583def VST3LNdWB_fixed_Asm_16 : 6584 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6585 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6586def VST3LNdWB_fixed_Asm_32 : 6587 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6588 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6589def VST3LNqWB_fixed_Asm_16 : 6590 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6591 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6592def VST3LNqWB_fixed_Asm_32 : 6593 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6594 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6595def VST3LNdWB_register_Asm_8 : 6596 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6597 (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, 6598 rGPR:$Rm, pred:$p)>; 6599def VST3LNdWB_register_Asm_16 : 6600 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6601 (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, 6602 rGPR:$Rm, pred:$p)>; 6603def VST3LNdWB_register_Asm_32 : 6604 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6605 (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, 6606 rGPR:$Rm, pred:$p)>; 6607def VST3LNqWB_register_Asm_16 : 6608 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6609 (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, 6610 rGPR:$Rm, pred:$p)>; 6611def VST3LNqWB_register_Asm_32 : 6612 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6613 (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, 6614 rGPR:$Rm, pred:$p)>; 6615 6616 6617// VST3 multiple structure pseudo-instructions. These need special handling for 6618// the vector operands that the normal instructions don't yet model. 6619// FIXME: Remove these when the register classes and instructions are updated. 6620def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6621 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6622def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6623 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6624def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6625 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6626def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", 6627 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6628def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", 6629 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6630def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", 6631 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6632 6633def VST3dWB_fixed_Asm_8 : 6634 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6635 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6636def VST3dWB_fixed_Asm_16 : 6637 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6638 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6639def VST3dWB_fixed_Asm_32 : 6640 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6641 (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; 6642def VST3qWB_fixed_Asm_8 : 6643 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", 6644 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6645def VST3qWB_fixed_Asm_16 : 6646 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", 6647 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6648def VST3qWB_fixed_Asm_32 : 6649 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", 6650 (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; 6651def VST3dWB_register_Asm_8 : 6652 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6653 (ins VecListThreeD:$list, addrmode6:$addr, 6654 rGPR:$Rm, pred:$p)>; 6655def VST3dWB_register_Asm_16 : 6656 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6657 (ins VecListThreeD:$list, addrmode6:$addr, 6658 rGPR:$Rm, pred:$p)>; 6659def VST3dWB_register_Asm_32 : 6660 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6661 (ins VecListThreeD:$list, addrmode6:$addr, 6662 rGPR:$Rm, pred:$p)>; 6663def VST3qWB_register_Asm_8 : 6664 NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", 6665 (ins VecListThreeQ:$list, addrmode6:$addr, 6666 rGPR:$Rm, pred:$p)>; 6667def VST3qWB_register_Asm_16 : 6668 NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", 6669 (ins VecListThreeQ:$list, addrmode6:$addr, 6670 rGPR:$Rm, pred:$p)>; 6671def VST3qWB_register_Asm_32 : 6672 NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", 6673 (ins VecListThreeQ:$list, addrmode6:$addr, 6674 rGPR:$Rm, pred:$p)>; 6675 6676// VLD4 all-lanes pseudo-instructions. These need special handling for 6677// the lane index that an InstAlias can't handle, so we use these instead. 6678def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6679 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6680def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6681 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6682def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6683 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6684def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6685 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6686def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6687 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6688def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6689 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6690 6691def VLD4DUPdWB_fixed_Asm_8 : 6692 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6693 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6694def VLD4DUPdWB_fixed_Asm_16 : 6695 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6696 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6697def VLD4DUPdWB_fixed_Asm_32 : 6698 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6699 (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; 6700def VLD4DUPqWB_fixed_Asm_8 : 6701 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6702 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6703def VLD4DUPqWB_fixed_Asm_16 : 6704 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6705 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6706def VLD4DUPqWB_fixed_Asm_32 : 6707 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6708 (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; 6709def VLD4DUPdWB_register_Asm_8 : 6710 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6711 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6712 rGPR:$Rm, pred:$p)>; 6713def VLD4DUPdWB_register_Asm_16 : 6714 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6715 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6716 rGPR:$Rm, pred:$p)>; 6717def VLD4DUPdWB_register_Asm_32 : 6718 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6719 (ins VecListFourDAllLanes:$list, addrmode6:$addr, 6720 rGPR:$Rm, pred:$p)>; 6721def VLD4DUPqWB_register_Asm_8 : 6722 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6723 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6724 rGPR:$Rm, pred:$p)>; 6725def VLD4DUPqWB_register_Asm_16 : 6726 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6727 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6728 rGPR:$Rm, pred:$p)>; 6729def VLD4DUPqWB_register_Asm_32 : 6730 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6731 (ins VecListFourQAllLanes:$list, addrmode6:$addr, 6732 rGPR:$Rm, pred:$p)>; 6733 6734 6735// VLD4 single-lane pseudo-instructions. These need special handling for 6736// the lane index that an InstAlias can't handle, so we use these instead. 6737def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6738 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6739def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6740 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6741def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6742 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6743def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6744 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6745def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6746 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6747 6748def VLD4LNdWB_fixed_Asm_8 : 6749 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6750 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6751def VLD4LNdWB_fixed_Asm_16 : 6752 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6753 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6754def VLD4LNdWB_fixed_Asm_32 : 6755 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6756 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6757def VLD4LNqWB_fixed_Asm_16 : 6758 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6759 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6760def VLD4LNqWB_fixed_Asm_32 : 6761 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6762 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6763def VLD4LNdWB_register_Asm_8 : 6764 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6765 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6766 rGPR:$Rm, pred:$p)>; 6767def VLD4LNdWB_register_Asm_16 : 6768 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6769 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6770 rGPR:$Rm, pred:$p)>; 6771def VLD4LNdWB_register_Asm_32 : 6772 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6773 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6774 rGPR:$Rm, pred:$p)>; 6775def VLD4LNqWB_register_Asm_16 : 6776 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6777 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6778 rGPR:$Rm, pred:$p)>; 6779def VLD4LNqWB_register_Asm_32 : 6780 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6781 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6782 rGPR:$Rm, pred:$p)>; 6783 6784 6785 6786// VLD4 multiple structure pseudo-instructions. These need special handling for 6787// the vector operands that the normal instructions don't yet model. 6788// FIXME: Remove these when the register classes and instructions are updated. 6789def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6790 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6791def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6792 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6793def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6794 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6795def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", 6796 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6797def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", 6798 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6799def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", 6800 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6801 6802def VLD4dWB_fixed_Asm_8 : 6803 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6804 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6805def VLD4dWB_fixed_Asm_16 : 6806 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6807 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6808def VLD4dWB_fixed_Asm_32 : 6809 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6810 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6811def VLD4qWB_fixed_Asm_8 : 6812 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", 6813 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6814def VLD4qWB_fixed_Asm_16 : 6815 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", 6816 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6817def VLD4qWB_fixed_Asm_32 : 6818 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", 6819 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6820def VLD4dWB_register_Asm_8 : 6821 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6822 (ins VecListFourD:$list, addrmode6:$addr, 6823 rGPR:$Rm, pred:$p)>; 6824def VLD4dWB_register_Asm_16 : 6825 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6826 (ins VecListFourD:$list, addrmode6:$addr, 6827 rGPR:$Rm, pred:$p)>; 6828def VLD4dWB_register_Asm_32 : 6829 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6830 (ins VecListFourD:$list, addrmode6:$addr, 6831 rGPR:$Rm, pred:$p)>; 6832def VLD4qWB_register_Asm_8 : 6833 NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", 6834 (ins VecListFourQ:$list, addrmode6:$addr, 6835 rGPR:$Rm, pred:$p)>; 6836def VLD4qWB_register_Asm_16 : 6837 NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", 6838 (ins VecListFourQ:$list, addrmode6:$addr, 6839 rGPR:$Rm, pred:$p)>; 6840def VLD4qWB_register_Asm_32 : 6841 NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", 6842 (ins VecListFourQ:$list, addrmode6:$addr, 6843 rGPR:$Rm, pred:$p)>; 6844 6845// VST4 single-lane pseudo-instructions. These need special handling for 6846// the lane index that an InstAlias can't handle, so we use these instead. 6847def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6848 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6849def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6850 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6851def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6852 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6853def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6854 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6855def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6856 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6857 6858def VST4LNdWB_fixed_Asm_8 : 6859 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6860 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; 6861def VST4LNdWB_fixed_Asm_16 : 6862 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6863 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6864def VST4LNdWB_fixed_Asm_32 : 6865 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6866 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6867def VST4LNqWB_fixed_Asm_16 : 6868 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6869 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6870def VST4LNqWB_fixed_Asm_32 : 6871 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6872 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; 6873def VST4LNdWB_register_Asm_8 : 6874 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6875 (ins VecListFourDByteIndexed:$list, addrmode6:$addr, 6876 rGPR:$Rm, pred:$p)>; 6877def VST4LNdWB_register_Asm_16 : 6878 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6879 (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, 6880 rGPR:$Rm, pred:$p)>; 6881def VST4LNdWB_register_Asm_32 : 6882 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6883 (ins VecListFourDWordIndexed:$list, addrmode6:$addr, 6884 rGPR:$Rm, pred:$p)>; 6885def VST4LNqWB_register_Asm_16 : 6886 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6887 (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, 6888 rGPR:$Rm, pred:$p)>; 6889def VST4LNqWB_register_Asm_32 : 6890 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6891 (ins VecListFourQWordIndexed:$list, addrmode6:$addr, 6892 rGPR:$Rm, pred:$p)>; 6893 6894 6895// VST4 multiple structure pseudo-instructions. These need special handling for 6896// the vector operands that the normal instructions don't yet model. 6897// FIXME: Remove these when the register classes and instructions are updated. 6898def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6899 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6900def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6901 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6902def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6903 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6904def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", 6905 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6906def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", 6907 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6908def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", 6909 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6910 6911def VST4dWB_fixed_Asm_8 : 6912 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6913 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6914def VST4dWB_fixed_Asm_16 : 6915 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6916 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6917def VST4dWB_fixed_Asm_32 : 6918 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6919 (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; 6920def VST4qWB_fixed_Asm_8 : 6921 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", 6922 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6923def VST4qWB_fixed_Asm_16 : 6924 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", 6925 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6926def VST4qWB_fixed_Asm_32 : 6927 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", 6928 (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; 6929def VST4dWB_register_Asm_8 : 6930 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6931 (ins VecListFourD:$list, addrmode6:$addr, 6932 rGPR:$Rm, pred:$p)>; 6933def VST4dWB_register_Asm_16 : 6934 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6935 (ins VecListFourD:$list, addrmode6:$addr, 6936 rGPR:$Rm, pred:$p)>; 6937def VST4dWB_register_Asm_32 : 6938 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6939 (ins VecListFourD:$list, addrmode6:$addr, 6940 rGPR:$Rm, pred:$p)>; 6941def VST4qWB_register_Asm_8 : 6942 NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", 6943 (ins VecListFourQ:$list, addrmode6:$addr, 6944 rGPR:$Rm, pred:$p)>; 6945def VST4qWB_register_Asm_16 : 6946 NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", 6947 (ins VecListFourQ:$list, addrmode6:$addr, 6948 rGPR:$Rm, pred:$p)>; 6949def VST4qWB_register_Asm_32 : 6950 NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", 6951 (ins VecListFourQ:$list, addrmode6:$addr, 6952 rGPR:$Rm, pred:$p)>; 6953 6954// VMOV/VMVN takes an optional datatype suffix 6955defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6956 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; 6957defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", 6958 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; 6959 6960defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6961 (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; 6962defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", 6963 (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; 6964 6965// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6966// D-register versions. 6967def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", 6968 (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6969def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", 6970 (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6971def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", 6972 (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6973def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", 6974 (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6975def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", 6976 (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6977def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", 6978 (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6979def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", 6980 (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 6981// Q-register versions. 6982def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", 6983 (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6984def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", 6985 (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6986def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", 6987 (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6988def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", 6989 (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6990def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", 6991 (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6992def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", 6993 (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6994def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", 6995 (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 6996 6997// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. 6998// D-register versions. 6999def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", 7000 (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7001def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", 7002 (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7003def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", 7004 (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7005def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", 7006 (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7007def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", 7008 (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7009def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", 7010 (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7011def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", 7012 (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; 7013// Q-register versions. 7014def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", 7015 (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7016def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", 7017 (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7018def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", 7019 (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7020def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", 7021 (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7022def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", 7023 (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7024def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", 7025 (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7026def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", 7027 (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; 7028 7029// VSWP allows, but does not require, a type suffix. 7030defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7031 (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; 7032defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", 7033 (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>; 7034 7035// VBIF, VBIT, and VBSL allow, but do not require, a type suffix. 7036defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7037 (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7038defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7039 (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7040defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7041 (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; 7042defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm", 7043 (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7044defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm", 7045 (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7046defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm", 7047 (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; 7048 7049// "vmov Rd, #-imm" can be handled via "vmvn". 7050def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7051 (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7052def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm", 7053 (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7054def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7055 (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7056def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm", 7057 (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>; 7058 7059// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, 7060// these should restrict to just the Q register variants, but the register 7061// classes are enough to match correctly regardless, so we keep it simple 7062// and just use MnemonicAlias. 7063def : NEONMnemonicAlias<"vbicq", "vbic">; 7064def : NEONMnemonicAlias<"vandq", "vand">; 7065def : NEONMnemonicAlias<"veorq", "veor">; 7066def : NEONMnemonicAlias<"vorrq", "vorr">; 7067 7068def : NEONMnemonicAlias<"vmovq", "vmov">; 7069def : NEONMnemonicAlias<"vmvnq", "vmvn">; 7070// Explicit versions for floating point so that the FPImm variants get 7071// handled early. The parser gets confused otherwise. 7072def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; 7073def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; 7074 7075def : NEONMnemonicAlias<"vaddq", "vadd">; 7076def : NEONMnemonicAlias<"vsubq", "vsub">; 7077 7078def : NEONMnemonicAlias<"vminq", "vmin">; 7079def : NEONMnemonicAlias<"vmaxq", "vmax">; 7080 7081def : NEONMnemonicAlias<"vmulq", "vmul">; 7082 7083def : NEONMnemonicAlias<"vabsq", "vabs">; 7084 7085def : NEONMnemonicAlias<"vshlq", "vshl">; 7086def : NEONMnemonicAlias<"vshrq", "vshr">; 7087 7088def : NEONMnemonicAlias<"vcvtq", "vcvt">; 7089 7090def : NEONMnemonicAlias<"vcleq", "vcle">; 7091def : NEONMnemonicAlias<"vceqq", "vceq">; 7092 7093def : NEONMnemonicAlias<"vzipq", "vzip">; 7094def : NEONMnemonicAlias<"vswpq", "vswp">; 7095 7096def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">; 7097def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">; 7098 7099 7100// Alias for loading floating point immediates that aren't representable 7101// using the vmov.f32 encoding but the bitpattern is representable using 7102// the .i32 encoding. 7103def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7104 (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7105def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm", 7106 (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>; 7107