1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// AArch64 Instruction definitions. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// ARM Instruction Predicate Definitions. 15// 16def HasV8_0a : Predicate<"Subtarget->hasV8_0aOps()">, 17 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">; 18def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, 19 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">; 20def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, 21 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">; 22def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, 23 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">; 24def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">, 25 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">; 26def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">, 27 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">; 28def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">, 29 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">; 30def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">, 31 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">; 32def HasV8_8a : Predicate<"Subtarget->hasV8_8aOps()">, 33 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">; 34def HasV8_9a : Predicate<"Subtarget->hasV8_9aOps()">, 35 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">; 36def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">, 37 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">; 38def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">, 39 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">; 40def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">, 41 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">; 42def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">, 43 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">; 44def HasV9_4a : Predicate<"Subtarget->hasV9_4aOps()">, 45 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">; 46def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">, 47 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">; 48 49def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">, 50 AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">; 51 52def HasEL3 : Predicate<"Subtarget->hasEL3()">, 53 AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">; 54 55def HasVH : Predicate<"Subtarget->hasVH()">, 56 AssemblerPredicateWithAll<(all_of FeatureVH), "vh">; 57 58def HasLOR : Predicate<"Subtarget->hasLOR()">, 59 AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">; 60 61def HasPAuth : Predicate<"Subtarget->hasPAuth()">, 62 AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">; 63 64def HasJS : Predicate<"Subtarget->hasJS()">, 65 AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">; 66 67def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">, 68 AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">; 69 70def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">, 71 AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">; 72 73def HasNV : Predicate<"Subtarget->hasNV()">, 74 AssemblerPredicateWithAll<(all_of FeatureNV), "nv">; 75 76def HasMPAM : Predicate<"Subtarget->hasMPAM()">, 77 AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">; 78 79def HasDIT : Predicate<"Subtarget->hasDIT()">, 80 AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">; 81 82def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">, 83 AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">; 84 85def HasAM : Predicate<"Subtarget->hasAM()">, 86 AssemblerPredicateWithAll<(all_of FeatureAM), "am">; 87 88def HasSEL2 : Predicate<"Subtarget->hasSEL2()">, 89 AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">; 90 91def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">, 92 AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">; 93 94def HasFlagM : Predicate<"Subtarget->hasFlagM()">, 95 AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">; 96 97def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">, 98 AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">; 99 100def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, 101 AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">; 102def HasNEON : Predicate<"Subtarget->hasNEON()">, 103 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">; 104def HasCrypto : Predicate<"Subtarget->hasCrypto()">, 105 AssemblerPredicateWithAll<(all_of FeatureCrypto), "crypto">; 106def HasSM4 : Predicate<"Subtarget->hasSM4()">, 107 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">; 108def HasSHA3 : Predicate<"Subtarget->hasSHA3()">, 109 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">; 110def HasSHA2 : Predicate<"Subtarget->hasSHA2()">, 111 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">; 112def HasAES : Predicate<"Subtarget->hasAES()">, 113 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">; 114def HasDotProd : Predicate<"Subtarget->hasDotProd()">, 115 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">; 116def HasCRC : Predicate<"Subtarget->hasCRC()">, 117 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">; 118def HasCSSC : Predicate<"Subtarget->hasCSSC()">, 119 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">; 120def HasNoCSSC : Predicate<"!Subtarget->hasCSSC()">; 121def HasLSE : Predicate<"Subtarget->hasLSE()">, 122 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">; 123def HasNoLSE : Predicate<"!Subtarget->hasLSE()">; 124def HasRAS : Predicate<"Subtarget->hasRAS()">, 125 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">; 126def HasRDM : Predicate<"Subtarget->hasRDM()">, 127 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">; 128def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, 129 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">; 130def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, 131 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">; 132def HasSPE : Predicate<"Subtarget->hasSPE()">, 133 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">; 134def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, 135 AssemblerPredicateWithAll<(all_of FeatureFuseAES), 136 "fuse-aes">; 137def HasSVE : Predicate<"Subtarget->hasSVE()">, 138 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">; 139def HasSVE2 : Predicate<"Subtarget->hasSVE2()">, 140 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">; 141def HasSVE2p1 : Predicate<"Subtarget->hasSVE2p1()">, 142 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">; 143def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">, 144 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">; 145def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, 146 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">; 147def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, 148 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; 149def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, 150 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; 151def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, 152 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; 153def HasSME : Predicate<"Subtarget->hasSME()">, 154 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; 155def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, 156 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">; 157def HasSMEF16F16 : Predicate<"Subtarget->hasSMEF16F16()">, 158 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">; 159def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, 160 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; 161def HasSME2 : Predicate<"Subtarget->hasSME2()">, 162 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; 163def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, 164 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; 165 166// A subset of SVE(2) instructions are legal in Streaming SVE execution mode, 167// they should be enabled if either has been specified. 168def HasSVEorSME 169 : Predicate<"Subtarget->hasSVEorSME()">, 170 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), 171 "sve or sme">; 172def HasSVE2orSME 173 : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">, 174 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), 175 "sve2 or sme">; 176def HasSVE2p1_or_HasSME 177 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, 178 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; 179def HasSVE2p1_or_HasSME2 180 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, 181 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; 182def HasSVE2p1_or_HasSME2p1 183 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, 184 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; 185// A subset of NEON instructions are legal in Streaming SVE execution mode, 186// they should be enabled if either has been specified. 187def HasNEONorSME 188 : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">, 189 AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME), 190 "neon or sme">; 191def HasRCPC : Predicate<"Subtarget->hasRCPC()">, 192 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">; 193def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, 194 AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">; 195def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">, 196 AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">; 197def HasSB : Predicate<"Subtarget->hasSB()">, 198 AssemblerPredicateWithAll<(all_of FeatureSB), "sb">; 199def HasPredRes : Predicate<"Subtarget->hasPredRes()">, 200 AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">; 201def HasCCDP : Predicate<"Subtarget->hasCCDP()">, 202 AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; 203def HasBTI : Predicate<"Subtarget->hasBTI()">, 204 AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; 205def HasMTE : Predicate<"Subtarget->hasMTE()">, 206 AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; 207def HasTME : Predicate<"Subtarget->hasTME()">, 208 AssemblerPredicateWithAll<(all_of FeatureTME), "tme">; 209def HasETE : Predicate<"Subtarget->hasETE()">, 210 AssemblerPredicateWithAll<(all_of FeatureETE), "ete">; 211def HasTRBE : Predicate<"Subtarget->hasTRBE()">, 212 AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">; 213def HasBF16 : Predicate<"Subtarget->hasBF16()">, 214 AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">; 215def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">, 216 AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">; 217def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">, 218 AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">; 219def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">, 220 AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">; 221def HasXS : Predicate<"Subtarget->hasXS()">, 222 AssemblerPredicateWithAll<(all_of FeatureXS), "xs">; 223def HasWFxT : Predicate<"Subtarget->hasWFxT()">, 224 AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">; 225def HasLS64 : Predicate<"Subtarget->hasLS64()">, 226 AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">; 227def HasBRBE : Predicate<"Subtarget->hasBRBE()">, 228 AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">; 229def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">, 230 AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">; 231def HasHBC : Predicate<"Subtarget->hasHBC()">, 232 AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">; 233def HasMOPS : Predicate<"Subtarget->hasMOPS()">, 234 AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">; 235def HasCLRBHB : Predicate<"Subtarget->hasCLRBHB()">, 236 AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">; 237def HasSPECRES2 : Predicate<"Subtarget->hasSPECRES2()">, 238 AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">; 239def HasITE : Predicate<"Subtarget->hasITE()">, 240 AssemblerPredicateWithAll<(all_of FeatureITE), "ite">; 241def HasTHE : Predicate<"Subtarget->hasTHE()">, 242 AssemblerPredicateWithAll<(all_of FeatureTHE), "the">; 243def HasRCPC3 : Predicate<"Subtarget->hasRCPC3()">, 244 AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">; 245def HasLSE128 : Predicate<"Subtarget->hasLSE128()">, 246 AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">; 247def HasD128 : Predicate<"Subtarget->hasD128()">, 248 AssemblerPredicateWithAll<(all_of FeatureD128), "d128">; 249def IsLE : Predicate<"Subtarget->isLittleEndian()">; 250def IsBE : Predicate<"!Subtarget->isLittleEndian()">; 251def IsWindows : Predicate<"Subtarget->isTargetWindows()">; 252def UseExperimentalZeroingPseudos 253 : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; 254def UseAlternateSExtLoadCVTF32 255 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; 256 257def UseNegativeImmediates 258 : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)), 259 "NegativeImmediates">; 260 261def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">; 262 263def NotInStreamingSVEMode : Predicate<"!Subtarget->forceStreamingCompatibleSVE()">; 264 265def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", 266 SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, 267 SDTCisInt<1>]>>; 268 269 270//===----------------------------------------------------------------------===// 271// AArch64-specific DAG Nodes. 272// 273 274// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS 275def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, 276 [SDTCisSameAs<0, 2>, 277 SDTCisSameAs<0, 3>, 278 SDTCisInt<0>, SDTCisVT<1, i32>]>; 279 280// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS 281def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, 282 [SDTCisSameAs<0, 1>, 283 SDTCisSameAs<0, 2>, 284 SDTCisInt<0>, 285 SDTCisVT<3, i32>]>; 286 287// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS 288def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, 289 [SDTCisSameAs<0, 2>, 290 SDTCisSameAs<0, 3>, 291 SDTCisInt<0>, 292 SDTCisVT<1, i32>, 293 SDTCisVT<4, i32>]>; 294 295def SDT_AArch64Brcond : SDTypeProfile<0, 3, 296 [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, 297 SDTCisVT<2, i32>]>; 298def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; 299def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, 300 SDTCisVT<2, OtherVT>]>; 301 302 303def SDT_AArch64CSel : SDTypeProfile<1, 4, 304 [SDTCisSameAs<0, 1>, 305 SDTCisSameAs<0, 2>, 306 SDTCisInt<3>, 307 SDTCisVT<4, i32>]>; 308def SDT_AArch64CCMP : SDTypeProfile<1, 5, 309 [SDTCisVT<0, i32>, 310 SDTCisInt<1>, 311 SDTCisSameAs<1, 2>, 312 SDTCisInt<3>, 313 SDTCisInt<4>, 314 SDTCisVT<5, i32>]>; 315def SDT_AArch64FCCMP : SDTypeProfile<1, 5, 316 [SDTCisVT<0, i32>, 317 SDTCisFP<1>, 318 SDTCisSameAs<1, 2>, 319 SDTCisInt<3>, 320 SDTCisInt<4>, 321 SDTCisVT<5, i32>]>; 322def SDT_AArch64FCmp : SDTypeProfile<0, 2, 323 [SDTCisFP<0>, 324 SDTCisSameAs<0, 1>]>; 325def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 326def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; 327def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>; 328def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, 329 SDTCisSameAs<0, 1>, 330 SDTCisSameAs<0, 2>]>; 331def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; 332def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; 333def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 334 SDTCisInt<2>, SDTCisInt<3>]>; 335def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 336def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 337 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 338def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; 339def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 340 SDTCisVec<2>, SDTCisSameAs<2,3>]>; 341 342def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>, 343 SDTCisSameAs<0,1>, 344 SDTCisSameAs<0,2>]>; 345 346def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 347def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; 348def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; 349def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 350 SDTCisSameAs<0,2>]>; 351def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 352 SDTCisSameAs<0,2>, 353 SDTCisSameAs<0,3>]>; 354def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; 355def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; 356 357def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; 358 359def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, 360 SDTCisPtrTy<1>]>; 361 362def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 363 364def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 365def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 366def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 367def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; 368 369// Generates the general dynamic sequences, i.e. 370// adrp x0, :tlsdesc:var 371// ldr x1, [x0, #:tlsdesc_lo12:var] 372// add x0, x0, #:tlsdesc_lo12:var 373// .tlsdesccall var 374// blr x1 375 376// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here) 377// number of operands (the variable) 378def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1, 379 [SDTCisPtrTy<0>]>; 380 381def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, 382 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, 383 SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, 384 SDTCisSameAs<1, 4>]>; 385 386def SDT_AArch64TBL : SDTypeProfile<1, 2, [ 387 SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> 388]>; 389 390// non-extending masked load fragment. 391def nonext_masked_load : 392 PatFrag<(ops node:$ptr, node:$pred, node:$def), 393 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 394 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 395 cast<MaskedLoadSDNode>(N)->isUnindexed() && 396 !cast<MaskedLoadSDNode>(N)->isNonTemporal(); 397}]>; 398// Any/Zero extending masked load fragments. 399def azext_masked_load : 400 PatFrag<(ops node:$ptr, node:$pred, node:$def), 401 (masked_ld node:$ptr, undef, node:$pred, node:$def),[{ 402 return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD || 403 cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) && 404 cast<MaskedLoadSDNode>(N)->isUnindexed(); 405}]>; 406def azext_masked_load_i8 : 407 PatFrag<(ops node:$ptr, node:$pred, node:$def), 408 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 409 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 410}]>; 411def azext_masked_load_i16 : 412 PatFrag<(ops node:$ptr, node:$pred, node:$def), 413 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 414 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 415}]>; 416def azext_masked_load_i32 : 417 PatFrag<(ops node:$ptr, node:$pred, node:$def), 418 (azext_masked_load node:$ptr, node:$pred, node:$def), [{ 419 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 420}]>; 421// Sign extending masked load fragments. 422def sext_masked_load : 423 PatFrag<(ops node:$ptr, node:$pred, node:$def), 424 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 425 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD && 426 cast<MaskedLoadSDNode>(N)->isUnindexed(); 427}]>; 428def sext_masked_load_i8 : 429 PatFrag<(ops node:$ptr, node:$pred, node:$def), 430 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 431 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 432}]>; 433def sext_masked_load_i16 : 434 PatFrag<(ops node:$ptr, node:$pred, node:$def), 435 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 436 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 437}]>; 438def sext_masked_load_i32 : 439 PatFrag<(ops node:$ptr, node:$pred, node:$def), 440 (sext_masked_load node:$ptr, node:$pred, node:$def), [{ 441 return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 442}]>; 443 444def non_temporal_load : 445 PatFrag<(ops node:$ptr, node:$pred, node:$def), 446 (masked_ld node:$ptr, undef, node:$pred, node:$def), [{ 447 return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD && 448 cast<MaskedLoadSDNode>(N)->isUnindexed() && 449 cast<MaskedLoadSDNode>(N)->isNonTemporal(); 450}]>; 451 452// non-truncating masked store fragment. 453def nontrunc_masked_store : 454 PatFrag<(ops node:$val, node:$ptr, node:$pred), 455 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 456 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 457 cast<MaskedStoreSDNode>(N)->isUnindexed() && 458 !cast<MaskedStoreSDNode>(N)->isNonTemporal(); 459}]>; 460// truncating masked store fragments. 461def trunc_masked_store : 462 PatFrag<(ops node:$val, node:$ptr, node:$pred), 463 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 464 return cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 465 cast<MaskedStoreSDNode>(N)->isUnindexed(); 466}]>; 467def trunc_masked_store_i8 : 468 PatFrag<(ops node:$val, node:$ptr, node:$pred), 469 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 470 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; 471}]>; 472def trunc_masked_store_i16 : 473 PatFrag<(ops node:$val, node:$ptr, node:$pred), 474 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 475 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; 476}]>; 477def trunc_masked_store_i32 : 478 PatFrag<(ops node:$val, node:$ptr, node:$pred), 479 (trunc_masked_store node:$val, node:$ptr, node:$pred), [{ 480 return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; 481}]>; 482 483def non_temporal_store : 484 PatFrag<(ops node:$val, node:$ptr, node:$pred), 485 (masked_st node:$val, node:$ptr, undef, node:$pred), [{ 486 return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() && 487 cast<MaskedStoreSDNode>(N)->isUnindexed() && 488 cast<MaskedStoreSDNode>(N)->isNonTemporal(); 489}]>; 490 491multiclass masked_gather_scatter<PatFrags GatherScatterOp> { 492 // offsets = (signed)Index << sizeof(elt) 493 def NAME#_signed_scaled : 494 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 495 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 496 auto MGS = cast<MaskedGatherScatterSDNode>(N); 497 bool Signed = MGS->isIndexSigned() || 498 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 499 return Signed && MGS->isIndexScaled(); 500 }]>; 501 // offsets = (signed)Index 502 def NAME#_signed_unscaled : 503 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 504 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 505 auto MGS = cast<MaskedGatherScatterSDNode>(N); 506 bool Signed = MGS->isIndexSigned() || 507 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 508 return Signed && !MGS->isIndexScaled(); 509 }]>; 510 // offsets = (unsigned)Index << sizeof(elt) 511 def NAME#_unsigned_scaled : 512 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 513 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 514 auto MGS = cast<MaskedGatherScatterSDNode>(N); 515 bool Signed = MGS->isIndexSigned() || 516 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 517 return !Signed && MGS->isIndexScaled(); 518 }]>; 519 // offsets = (unsigned)Index 520 def NAME#_unsigned_unscaled : 521 PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx), 522 (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{ 523 auto MGS = cast<MaskedGatherScatterSDNode>(N); 524 bool Signed = MGS->isIndexSigned() || 525 MGS->getIndex().getValueType().getVectorElementType() == MVT::i64; 526 return !Signed && !MGS->isIndexScaled(); 527 }]>; 528} 529 530defm nonext_masked_gather : masked_gather_scatter<nonext_masked_gather>; 531defm azext_masked_gather_i8 : masked_gather_scatter<azext_masked_gather_i8>; 532defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>; 533defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>; 534defm sext_masked_gather_i8 : masked_gather_scatter<sext_masked_gather_i8>; 535defm sext_masked_gather_i16 : masked_gather_scatter<sext_masked_gather_i16>; 536defm sext_masked_gather_i32 : masked_gather_scatter<sext_masked_gather_i32>; 537 538defm nontrunc_masked_scatter : masked_gather_scatter<nontrunc_masked_scatter>; 539defm trunc_masked_scatter_i8 : masked_gather_scatter<trunc_masked_scatter_i8>; 540defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>; 541defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>; 542 543// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise 544def top16Zero: PatLeaf<(i32 GPR32:$src), [{ 545 return SDValue(N,0)->getValueType(0) == MVT::i32 && 546 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16)); 547 }]>; 548 549// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise 550def top32Zero: PatLeaf<(i64 GPR64:$src), [{ 551 return SDValue(N,0)->getValueType(0) == MVT::i64 && 552 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32)); 553 }]>; 554 555// topbitsallzero - Return true if all bits except the lowest bit are known zero 556def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{ 557 return SDValue(N,0)->getValueType(0) == MVT::i32 && 558 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31)); 559 }]>; 560def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{ 561 return SDValue(N,0)->getValueType(0) == MVT::i64 && 562 CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63)); 563 }]>; 564 565// Node definitions. 566def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; 567def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>; 568def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; 569def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; 570def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", 571 SDCallSeqStart<[ SDTCisVT<0, i32>, 572 SDTCisVT<1, i32> ]>, 573 [SDNPHasChain, SDNPOutGlue]>; 574def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", 575 SDCallSeqEnd<[ SDTCisVT<0, i32>, 576 SDTCisVT<1, i32> ]>, 577 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 578def AArch64call : SDNode<"AArch64ISD::CALL", 579 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 580 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 581 SDNPVariadic]>; 582 583def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", 584 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 585 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 586 SDNPVariadic]>; 587 588def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", 589 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 590 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 591 SDNPVariadic]>; 592 593def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, 594 [SDNPHasChain]>; 595def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, 596 [SDNPHasChain]>; 597def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, 598 [SDNPHasChain]>; 599def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, 600 [SDNPHasChain]>; 601def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, 602 [SDNPHasChain]>; 603 604 605def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; 606def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; 607def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; 608def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; 609def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, 610 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 611def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; 612def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; 613def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, 614 [SDNPCommutative]>; 615def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; 616def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, 617 [SDNPCommutative]>; 618def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; 619def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; 620 621def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>; 622def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>; 623def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>; 624 625def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; 626 627def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; 628def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp, 629 [SDNPHasChain]>; 630def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp, 631 [SDNPHasChain]>; 632def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs), 633 [(AArch64strict_fcmp node:$lhs, node:$rhs), 634 (AArch64fcmp node:$lhs, node:$rhs)]>; 635 636def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; 637def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; 638def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; 639def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; 640def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; 641def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>; 642 643def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>; 644 645def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; 646def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; 647def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; 648def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; 649def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; 650def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; 651 652def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; 653def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; 654def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; 655def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; 656def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; 657def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; 658def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; 659 660def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; 661def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; 662def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; 663def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; 664 665def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; 666def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; 667def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; 668def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; 669def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; 670def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; 671def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; 672def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; 673def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>; 674def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>; 675 676def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; 677def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>; 678 679def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; 680def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; 681def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; 682def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; 683def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; 684 685def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; 686def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; 687def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; 688 689def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; 690def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; 691def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; 692def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; 693def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; 694def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), 695 (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>; 696 697def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; 698def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; 699def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; 700def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; 701def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; 702 703def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; 704def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; 705 706def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, 707 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 708 709def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, 710 [SDNPHasChain, SDNPSideEffect]>; 711 712def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; 713def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; 714 715def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", 716 SDT_AArch64TLSDescCallSeq, 717 [SDNPInGlue, SDNPOutGlue, SDNPHasChain, 718 SDNPVariadic]>; 719 720 721def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", 722 SDT_AArch64WrapperLarge>; 723 724def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>; 725 726def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, 727 SDTCisSameAs<1, 2>]>; 728def AArch64pmull : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull, 729 [SDNPCommutative]>; 730def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull, 731 [SDNPCommutative]>; 732def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull, 733 [SDNPCommutative]>; 734 735def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>; 736def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>; 737def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>; 738def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>; 739 740def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>; 741def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>; 742 743def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>; 744def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>; 745def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>; 746def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; 747def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; 748def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; 749 750def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs), 751 [(abdu node:$lhs, node:$rhs), 752 (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>; 753def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs), 754 [(abds node:$lhs, node:$rhs), 755 (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>; 756 757def AArch64addp_n : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>; 758def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>; 759def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>; 760def AArch64addp : PatFrags<(ops node:$Rn, node:$Rm), 761 [(AArch64addp_n node:$Rn, node:$Rm), 762 (int_aarch64_neon_addp node:$Rn, node:$Rm)]>; 763def AArch64uaddlp : PatFrags<(ops node:$src), 764 [(AArch64uaddlp_n node:$src), 765 (int_aarch64_neon_uaddlp node:$src)]>; 766def AArch64saddlp : PatFrags<(ops node:$src), 767 [(AArch64saddlp_n node:$src), 768 (int_aarch64_neon_saddlp node:$src)]>; 769def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm), 770 [(AArch64addp_n node:$Rn, node:$Rm), 771 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>; 772def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>; 773 774def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; 775def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 776def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 777def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 778def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 779 780def SDT_AArch64unpk : SDTypeProfile<1, 1, [ 781 SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0> 782]>; 783def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>; 784def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>; 785def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>; 786def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; 787 788def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 789def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 790def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 791def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 792 793def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; 794def AArch64mrs : SDNode<"AArch64ISD::MRS", 795 SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, 796 [SDNPHasChain, SDNPOutGlue]>; 797 798// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands 799// have no common bits. 800def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs), 801 [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{ 802 if (N->getOpcode() == ISD::ADD) 803 return true; 804 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 805}]> { 806 let GISelPredicateCode = [{ 807 // Only handle G_ADD for now. FIXME. build capability to compute whether 808 // operands of G_OR have common bits set or not. 809 return MI.getOpcode() == TargetOpcode::G_ADD; 810 }]; 811} 812 813// Match mul with enough sign-bits. Can be reduced to a smaller mul operand. 814def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{ 815 return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 && 816 CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32; 817}]>; 818 819//===----------------------------------------------------------------------===// 820 821//===----------------------------------------------------------------------===// 822 823// AArch64 Instruction Predicate Definitions. 824// We could compute these on a per-module basis but doing so requires accessing 825// the Function object through the <Target>Subtarget and objections were raised 826// to that (see post-commit review comments for r301750). 827let RecomputePerFunction = 1 in { 828 def ForCodeSize : Predicate<"shouldOptForSize(MF)">; 829 def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">; 830 // Avoid generating STRQro if it is slow, unless we're optimizing for code size. 831 def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">; 832 833 def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 834 def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>; 835 836 def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 837 def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>; 838 // Toggles patterns which aren't beneficial in GlobalISel when we aren't 839 // optimizing. This allows us to selectively use patterns without impacting 840 // SelectionDAG's behaviour. 841 // FIXME: One day there will probably be a nicer way to check for this, but 842 // today is not that day. 843 def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">; 844} 845 846include "AArch64InstrFormats.td" 847include "SVEInstrFormats.td" 848include "SMEInstrFormats.td" 849 850//===----------------------------------------------------------------------===// 851 852//===----------------------------------------------------------------------===// 853// Miscellaneous instructions. 854//===----------------------------------------------------------------------===// 855 856let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { 857// We set Sched to empty list because we expect these instructions to simply get 858// removed in most cases. 859def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 860 [(AArch64callseq_start timm:$amt1, timm:$amt2)]>, 861 Sched<[]>; 862def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), 863 [(AArch64callseq_end timm:$amt1, timm:$amt2)]>, 864 Sched<[]>; 865} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 866 867let isReMaterializable = 1, isCodeGenOnly = 1 in { 868// FIXME: The following pseudo instructions are only needed because remat 869// cannot handle multiple instructions. When that changes, they can be 870// removed, along with the AArch64Wrapper node. 871 872let AddedComplexity = 10 in 873def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), 874 [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, 875 Sched<[WriteLDAdr]>; 876 877// The MOVaddr instruction should match only when the add is not folded 878// into a load or store address. 879def MOVaddr 880 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 881 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), 882 tglobaladdr:$low))]>, 883 Sched<[WriteAdrAdr]>; 884def MOVaddrJT 885 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 886 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), 887 tjumptable:$low))]>, 888 Sched<[WriteAdrAdr]>; 889def MOVaddrCP 890 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 891 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), 892 tconstpool:$low))]>, 893 Sched<[WriteAdrAdr]>; 894def MOVaddrBA 895 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 896 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), 897 tblockaddress:$low))]>, 898 Sched<[WriteAdrAdr]>; 899def MOVaddrTLS 900 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 901 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), 902 tglobaltlsaddr:$low))]>, 903 Sched<[WriteAdrAdr]>; 904def MOVaddrEXT 905 : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), 906 [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), 907 texternalsym:$low))]>, 908 Sched<[WriteAdrAdr]>; 909// Normally AArch64addlow either gets folded into a following ldr/str, 910// or together with an adrp into MOVaddr above. For cases with TLS, it 911// might appear without either of them, so allow lowering it into a plain 912// add. 913def ADDlowTLS 914 : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), 915 [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, 916 tglobaltlsaddr:$low))]>, 917 Sched<[WriteAdr]>; 918 919} // isReMaterializable, isCodeGenOnly 920 921//===----------------------------------------------------------------------===// 922// Pseudo instruction used by retguard 923let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { 924 let Size = 8 in { 925 def RETGUARD_JMP_TRAP: Pseudo<(outs), (ins GPR64:$reg), []>; 926 } 927} 928 929def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), 930 (LOADgot tglobaltlsaddr:$addr)>; 931 932def : Pat<(AArch64LOADgot texternalsym:$addr), 933 (LOADgot texternalsym:$addr)>; 934 935def : Pat<(AArch64LOADgot tconstpool:$addr), 936 (LOADgot tconstpool:$addr)>; 937 938// In general these get lowered into a sequence of three 4-byte instructions. 939// 32-bit jump table destination is actually only 2 instructions since we can 940// use the table itself as a PC-relative base. But optimization occurs after 941// branch relaxation so be pessimistic. 942let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch", 943 isNotDuplicable = 1 in { 944def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 945 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 946 Sched<[]>; 947def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 948 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 949 Sched<[]>; 950def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), 951 (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, 952 Sched<[]>; 953} 954 955// Space-consuming pseudo to aid testing of placement and reachability 956// algorithms. Immediate operand is the number of bytes this "instruction" 957// occupies; register operands can be used to enforce dependency and constrain 958// the scheduler. 959let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in 960def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), 961 [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, 962 Sched<[]>; 963 964let hasSideEffects = 1, isCodeGenOnly = 1 in { 965 def SpeculationSafeValueX 966 : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>; 967 def SpeculationSafeValueW 968 : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>; 969} 970 971// SpeculationBarrierEndBB must only be used after an unconditional control 972// flow, i.e. after a terminator for which isBarrier is True. 973let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in { 974 // This gets lowered to a pair of 4-byte instructions. 975 let Size = 8 in 976 def SpeculationBarrierISBDSBEndBB 977 : Pseudo<(outs), (ins), []>, Sched<[]>; 978 // This gets lowered to a 4-byte instruction. 979 let Size = 4 in 980 def SpeculationBarrierSBEndBB 981 : Pseudo<(outs), (ins), []>, Sched<[]>; 982} 983 984//===----------------------------------------------------------------------===// 985// System instructions. 986//===----------------------------------------------------------------------===// 987 988def HINT : HintI<"hint">; 989def : InstAlias<"nop", (HINT 0b000)>; 990def : InstAlias<"yield",(HINT 0b001)>; 991def : InstAlias<"wfe", (HINT 0b010)>; 992def : InstAlias<"wfi", (HINT 0b011)>; 993def : InstAlias<"sev", (HINT 0b100)>; 994def : InstAlias<"sevl", (HINT 0b101)>; 995def : InstAlias<"dgh", (HINT 0b110)>; 996def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; 997def : InstAlias<"csdb", (HINT 20)>; 998// In order to be able to write readable assembly, LLVM should accept assembly 999// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. 1000// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1001// should not emit these mnemonics unless BTI is enabled. 1002def : InstAlias<"bti", (HINT 32), 0>; 1003def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; 1004def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; 1005def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; 1006 1007// v8.2a Statistical Profiling extension 1008def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>; 1009 1010// As far as LLVM is concerned this writes to the system's exclusive monitors. 1011let mayLoad = 1, mayStore = 1 in 1012def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">; 1013 1014// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot 1015// model patterns with sufficiently fine granularity. 1016let mayLoad = ?, mayStore = ? in { 1017def DMB : CRmSystemI<barrier_op, 0b101, "dmb", 1018 [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>; 1019 1020def DSB : CRmSystemI<barrier_op, 0b100, "dsb", 1021 [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>; 1022 1023def ISB : CRmSystemI<barrier_op, 0b110, "isb", 1024 [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>; 1025 1026def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { 1027 let CRm = 0b0010; 1028 let Inst{12} = 0; 1029 let Predicates = [HasTRACEV8_4]; 1030} 1031 1032def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> { 1033 let CRm{1-0} = 0b11; 1034 let Inst{9-8} = 0b10; 1035 let Predicates = [HasXS]; 1036} 1037 1038let Predicates = [HasWFxT] in { 1039def WFET : RegInputSystemI<0b0000, 0b000, "wfet">; 1040def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">; 1041} 1042 1043// Branch Record Buffer two-word mnemonic instructions 1044class BRBEI<bits<3> op2, string keyword> 1045 : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> { 1046 let Inst{31-8} = 0b110101010000100101110010; 1047 let Inst{7-5} = op2; 1048 let Predicates = [HasBRBE]; 1049} 1050def BRB_IALL: BRBEI<0b100, "\tiall">; 1051def BRB_INJ: BRBEI<0b101, "\tinj">; 1052 1053} 1054 1055// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ 1056def : TokenAlias<"INJ", "inj">; 1057def : TokenAlias<"IALL", "iall">; 1058 1059// ARMv8.2-A Dot Product 1060let Predicates = [HasDotProd] in { 1061defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; 1062defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>; 1063defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>; 1064defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>; 1065} 1066 1067// ARMv8.6-A BFloat 1068let Predicates = [HasNEON, HasBF16] in { 1069defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">; 1070defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">; 1071def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">; 1072def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1073def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1074def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>; 1075def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>; 1076def BFCVTN : SIMD_BFCVTN; 1077def BFCVTN2 : SIMD_BFCVTN2; 1078 1079// Vector-scalar BFDOT: 1080// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit 1081// register (the instruction uses a single 32-bit lane from it), so the pattern 1082// is a bit tricky. 1083def : Pat<(v2f32 (int_aarch64_neon_bfdot 1084 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1085 (v4bf16 (bitconvert 1086 (v2i32 (AArch64duplane32 1087 (v4i32 (bitconvert 1088 (v8bf16 (insert_subvector undef, 1089 (v4bf16 V64:$Rm), 1090 (i64 0))))), 1091 VectorIndexS:$idx)))))), 1092 (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn), 1093 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 1094 VectorIndexS:$idx)>; 1095} 1096 1097let Predicates = [HasNEONorSME, HasBF16] in { 1098def BFCVT : BF16ToSinglePrecision<"bfcvt">; 1099} 1100 1101// ARMv8.6A AArch64 matrix multiplication 1102let Predicates = [HasMatMulInt8] in { 1103def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>; 1104def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>; 1105def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>; 1106defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>; 1107defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>; 1108 1109// sudot lane has a pattern where usdot is expected (there is no sudot). 1110// The second operand is used in the dup operation to repeat the indexed 1111// element. 1112class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind, 1113 string rhs_kind, RegisterOperand RegType, 1114 ValueType AccumType, ValueType InputType> 1115 : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind, 1116 lhs_kind, rhs_kind, RegType, AccumType, 1117 InputType, null_frag> { 1118 let Pattern = [(set (AccumType RegType:$dst), 1119 (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd), 1120 (InputType (bitconvert (AccumType 1121 (AArch64duplane32 (v4i32 V128:$Rm), 1122 VectorIndexS:$idx)))), 1123 (InputType RegType:$Rn))))]; 1124} 1125 1126multiclass SIMDSUDOTIndex { 1127 def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>; 1128 def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>; 1129} 1130 1131defm SUDOTlane : SIMDSUDOTIndex; 1132 1133} 1134 1135// ARMv8.2-A FP16 Fused Multiply-Add Long 1136let Predicates = [HasNEON, HasFP16FML] in { 1137defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; 1138defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; 1139defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; 1140defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; 1141defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; 1142defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; 1143defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; 1144defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; 1145} 1146 1147// Armv8.2-A Crypto extensions 1148let Predicates = [HasSHA3] in { 1149def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; 1150def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">; 1151def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">; 1152def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">; 1153def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">; 1154def EOR3 : CryptoRRRR_16B<0b00, "eor3">; 1155def BCAX : CryptoRRRR_16B<0b01, "bcax">; 1156def XAR : CryptoRRRi6<"xar">; 1157 1158class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy> 1159 : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), 1160 (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; 1161 1162def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1163 (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1164 1165def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>; 1166def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>; 1167def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>; 1168 1169def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>; 1170def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>; 1171def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>; 1172def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>; 1173 1174class EOR3_pattern<ValueType VecTy> 1175 : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)), 1176 (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1177 1178def : EOR3_pattern<v16i8>; 1179def : EOR3_pattern<v8i16>; 1180def : EOR3_pattern<v4i32>; 1181def : EOR3_pattern<v2i64>; 1182 1183class BCAX_pattern<ValueType VecTy> 1184 : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))), 1185 (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>; 1186 1187def : BCAX_pattern<v16i8>; 1188def : BCAX_pattern<v8i16>; 1189def : BCAX_pattern<v4i32>; 1190def : BCAX_pattern<v2i64>; 1191 1192def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>; 1193def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>; 1194def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>; 1195def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>; 1196 1197def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>; 1198def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>; 1199def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>; 1200def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>; 1201 1202def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>; 1203def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>; 1204def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>; 1205def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>; 1206 1207def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), 1208 (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; 1209 1210def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), 1211 (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; 1212 1213 1214} // HasSHA3 1215 1216let Predicates = [HasSM4] in { 1217def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">; 1218def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">; 1219def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">; 1220def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">; 1221def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">; 1222def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">; 1223def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">; 1224def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">; 1225def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">; 1226 1227def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))), 1228 (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>; 1229 1230class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode> 1231 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1232 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1233 1234class SM3TT_pattern<Instruction INST, Intrinsic OpNode> 1235 : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )), 1236 (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>; 1237 1238class SM4_pattern<Instruction INST, Intrinsic OpNode> 1239 : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))), 1240 (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>; 1241 1242def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>; 1243def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>; 1244 1245def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>; 1246def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>; 1247def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>; 1248def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>; 1249 1250def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>; 1251def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>; 1252} // HasSM4 1253 1254let Predicates = [HasRCPC] in { 1255 // v8.3 Release Consistent Processor Consistent support, optional in v8.2. 1256 def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>; 1257 def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>; 1258 def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>; 1259 def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; 1260} 1261 1262// v8.3a complex add and multiply-accumulate. No predicate here, that is done 1263// inside the multiclass as the FP16 versions need different predicates. 1264defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, 1265 "fcmla", null_frag>; 1266defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, 1267 "fcadd", null_frag>; 1268defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">; 1269 1270let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1271 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1272 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>; 1273 def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))), 1274 (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>; 1275 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1276 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>; 1277 def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), 1278 (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; 1279} 1280 1281let Predicates = [HasComplxNum, HasNEON] in { 1282 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1283 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; 1284 def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 1285 (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>; 1286 foreach Ty = [v4f32, v2f64] in { 1287 def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))), 1288 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>; 1289 def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))), 1290 (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>; 1291 } 1292} 1293 1294multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> { 1295 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1296 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>; 1297 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1298 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>; 1299 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1300 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>; 1301 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))), 1302 (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; 1303} 1304 1305multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> { 1306 def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1307 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; 1308 def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1309 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; 1310 def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1311 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; 1312 def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), 1313 (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; 1314} 1315 1316 1317let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { 1318 defm : FCMLA_PATS<v4f16, V64>; 1319 defm : FCMLA_PATS<v8f16, V128>; 1320 1321 defm : FCMLA_LANE_PATS<v4f16, V64, 1322 (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>; 1323 defm : FCMLA_LANE_PATS<v8f16, V128, 1324 (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>; 1325} 1326let Predicates = [HasComplxNum, HasNEON] in { 1327 defm : FCMLA_PATS<v2f32, V64>; 1328 defm : FCMLA_PATS<v4f32, V128>; 1329 defm : FCMLA_PATS<v2f64, V128>; 1330 1331 defm : FCMLA_LANE_PATS<v4f32, V128, 1332 (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>; 1333} 1334 1335// v8.3a Pointer Authentication 1336// These instructions inhabit part of the hint space and so can be used for 1337// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is 1338// important for compatibility with other assemblers (e.g. GAS) when building 1339// software compatible with both CPUs that do or don't implement PA. 1340let Uses = [LR], Defs = [LR] in { 1341 def PACIAZ : SystemNoOperands<0b000, "hint\t#24">; 1342 def PACIBZ : SystemNoOperands<0b010, "hint\t#26">; 1343 let isAuthenticated = 1 in { 1344 def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">; 1345 def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">; 1346 } 1347} 1348let Uses = [LR, SP], Defs = [LR] in { 1349 def PACIASP : SystemNoOperands<0b001, "hint\t#25">; 1350 def PACIBSP : SystemNoOperands<0b011, "hint\t#27">; 1351 let isAuthenticated = 1 in { 1352 def AUTIASP : SystemNoOperands<0b101, "hint\t#29">; 1353 def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">; 1354 } 1355} 1356let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in { 1357 def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">; 1358 def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">; 1359 let isAuthenticated = 1 in { 1360 def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">; 1361 def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">; 1362 } 1363} 1364 1365let Uses = [LR], Defs = [LR], CRm = 0b0000 in { 1366 def XPACLRI : SystemNoOperands<0b111, "hint\t#7">; 1367} 1368 1369// In order to be able to write readable assembly, LLVM should accept assembly 1370// inputs that use pointer authentication mnemonics, even with PA disabled. 1371// However, in order to be compatible with other assemblers (e.g. GAS), LLVM 1372// should not emit these mnemonics unless PA is enabled. 1373def : InstAlias<"paciaz", (PACIAZ), 0>; 1374def : InstAlias<"pacibz", (PACIBZ), 0>; 1375def : InstAlias<"autiaz", (AUTIAZ), 0>; 1376def : InstAlias<"autibz", (AUTIBZ), 0>; 1377def : InstAlias<"paciasp", (PACIASP), 0>; 1378def : InstAlias<"pacibsp", (PACIBSP), 0>; 1379def : InstAlias<"autiasp", (AUTIASP), 0>; 1380def : InstAlias<"autibsp", (AUTIBSP), 0>; 1381def : InstAlias<"pacia1716", (PACIA1716), 0>; 1382def : InstAlias<"pacib1716", (PACIB1716), 0>; 1383def : InstAlias<"autia1716", (AUTIA1716), 0>; 1384def : InstAlias<"autib1716", (AUTIB1716), 0>; 1385def : InstAlias<"xpaclri", (XPACLRI), 0>; 1386 1387// These pointer authentication instructions require armv8.3a 1388let Predicates = [HasPAuth] in { 1389 1390 // When PA is enabled, a better mnemonic should be emitted. 1391 def : InstAlias<"paciaz", (PACIAZ), 1>; 1392 def : InstAlias<"pacibz", (PACIBZ), 1>; 1393 def : InstAlias<"autiaz", (AUTIAZ), 1>; 1394 def : InstAlias<"autibz", (AUTIBZ), 1>; 1395 def : InstAlias<"paciasp", (PACIASP), 1>; 1396 def : InstAlias<"pacibsp", (PACIBSP), 1>; 1397 def : InstAlias<"autiasp", (AUTIASP), 1>; 1398 def : InstAlias<"autibsp", (AUTIBSP), 1>; 1399 def : InstAlias<"pacia1716", (PACIA1716), 1>; 1400 def : InstAlias<"pacib1716", (PACIB1716), 1>; 1401 def : InstAlias<"autia1716", (AUTIA1716), 1>; 1402 def : InstAlias<"autib1716", (AUTIB1716), 1>; 1403 def : InstAlias<"xpaclri", (XPACLRI), 1>; 1404 1405 multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm, 1406 SDPatternOperator op> { 1407 def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>; 1408 def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>; 1409 def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>; 1410 def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>; 1411 def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>; 1412 def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>; 1413 def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>; 1414 def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>; 1415 } 1416 1417 defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>; 1418 defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>; 1419 1420 def XPACI : ClearAuth<0, "xpaci">; 1421 def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>; 1422 def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>; 1423 1424 def XPACD : ClearAuth<1, "xpacd">; 1425 def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>; 1426 def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>; 1427 1428 def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>; 1429 1430 // Combined Instructions 1431 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1432 def BRAA : AuthBranchTwoOperands<0, 0, "braa">; 1433 def BRAB : AuthBranchTwoOperands<0, 1, "brab">; 1434 } 1435 let isCall = 1, Defs = [LR], Uses = [SP] in { 1436 def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">; 1437 def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">; 1438 } 1439 1440 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 1441 def BRAAZ : AuthOneOperand<0b000, 0, "braaz">; 1442 def BRABZ : AuthOneOperand<0b000, 1, "brabz">; 1443 } 1444 let isCall = 1, Defs = [LR], Uses = [SP] in { 1445 def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">; 1446 def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">; 1447 } 1448 1449 let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 1450 def RETAA : AuthReturn<0b010, 0, "retaa">; 1451 def RETAB : AuthReturn<0b010, 1, "retab">; 1452 def ERETAA : AuthReturn<0b100, 0, "eretaa">; 1453 def ERETAB : AuthReturn<0b100, 1, "eretab">; 1454 } 1455 1456 defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>; 1457 defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>; 1458 1459} 1460 1461// v8.3a floating point conversion for javascript 1462let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in 1463def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32, 1464 "fjcvtzs", 1465 [(set GPR32:$Rd, 1466 (int_aarch64_fjcvtzs FPR64:$Rn))]> { 1467 let Inst{31} = 0; 1468} // HasJS, HasFPARMv8 1469 1470// v8.4 Flag manipulation instructions 1471let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in { 1472def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> { 1473 let Inst{20-5} = 0b0000001000000000; 1474} 1475def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">; 1476def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">; 1477def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif", 1478 "{\t$Rn, $imm, $mask}">; 1479} // HasFlagM 1480 1481// v8.5 flag manipulation instructions 1482let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in { 1483 1484def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> { 1485 let Inst{18-16} = 0b000; 1486 let Inst{11-8} = 0b0000; 1487 let Unpredictable{11-8} = 0b1111; 1488 let Inst{7-5} = 0b001; 1489} 1490 1491def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> { 1492 let Inst{18-16} = 0b000; 1493 let Inst{11-8} = 0b0000; 1494 let Unpredictable{11-8} = 0b1111; 1495 let Inst{7-5} = 0b010; 1496} 1497} // HasAltNZCV 1498 1499 1500// Armv8.5-A speculation barrier 1501def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> { 1502 let Inst{20-5} = 0b0001100110000111; 1503 let Unpredictable{11-8} = 0b1111; 1504 let Predicates = [HasSB]; 1505 let hasSideEffects = 1; 1506} 1507 1508def : InstAlias<"clrex", (CLREX 0xf)>; 1509def : InstAlias<"isb", (ISB 0xf)>; 1510def : InstAlias<"ssbb", (DSB 0)>; 1511def : InstAlias<"pssbb", (DSB 4)>; 1512def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>; 1513 1514def MRS : MRSI; 1515def MSR : MSRI; 1516def MSRpstateImm1 : MSRpstateImm0_1; 1517def MSRpstateImm4 : MSRpstateImm0_15; 1518 1519def : Pat<(AArch64mrs imm:$id), 1520 (MRS imm:$id)>; 1521 1522// The thread pointer (on Linux, at least, where this has been implemented) is 1523// TPIDR_EL0. 1524def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), 1525 [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; 1526 1527// This gets lowered into a 24-byte instruction sequence 1528let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { 1529def KCFI_CHECK : Pseudo< 1530 (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>; 1531} 1532 1533let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in { 1534def HWASAN_CHECK_MEMACCESS : Pseudo< 1535 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1536 [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1537 Sched<[]>; 1538} 1539 1540let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in { 1541def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo< 1542 (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo), 1543 [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>, 1544 Sched<[]>; 1545} 1546 1547// The virtual cycle counter register is CNTVCT_EL0. 1548def : Pat<(readcyclecounter), (MRS 0xdf02)>; 1549 1550// FPCR register 1551let Uses = [FPCR] in 1552def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins), 1553 [(set GPR64:$dst, (int_aarch64_get_fpcr))]>, 1554 PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>, 1555 Sched<[WriteSys]>; 1556let Defs = [FPCR] in 1557def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val), 1558 [(int_aarch64_set_fpcr i64:$val)]>, 1559 PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>, 1560 Sched<[WriteSys]>; 1561 1562// Generic system instructions 1563def SYSxt : SystemXtI<0, "sys">; 1564def SYSLxt : SystemLXtI<1, "sysl">; 1565 1566def : InstAlias<"sys $op1, $Cn, $Cm, $op2", 1567 (SYSxt imm0_7:$op1, sys_cr_op:$Cn, 1568 sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 1569 1570 1571let Predicates = [HasTME] in { 1572 1573def TSTART : TMSystemI<0b0000, "tstart", 1574 [(set GPR64:$Rt, (int_aarch64_tstart))]>; 1575 1576def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>; 1577 1578def TCANCEL : TMSystemException<0b011, "tcancel", 1579 [(int_aarch64_tcancel timm64_0_65535:$imm)]>; 1580 1581def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> { 1582 let mayLoad = 0; 1583 let mayStore = 0; 1584} 1585} // HasTME 1586 1587//===----------------------------------------------------------------------===// 1588// Move immediate instructions. 1589//===----------------------------------------------------------------------===// 1590 1591defm MOVK : InsertImmediate<0b11, "movk">; 1592defm MOVN : MoveImmediate<0b00, "movn">; 1593 1594let PostEncoderMethod = "fixMOVZ" in 1595defm MOVZ : MoveImmediate<0b10, "movz">; 1596 1597// First group of aliases covers an implicit "lsl #0". 1598def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>; 1599def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>; 1600def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1601def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1602def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>; 1603def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>; 1604 1605// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. 1606def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1607def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1608def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1609def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1610 1611def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>; 1612def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>; 1613def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>; 1614def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>; 1615 1616def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>; 1617def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>; 1618def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>; 1619def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>; 1620 1621def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1622def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1623 1624def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>; 1625def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>; 1626 1627def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>; 1628def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>; 1629 1630// Final group of aliases covers true "mov $Rd, $imm" cases. 1631multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR, 1632 int width, int shift> { 1633 def _asmoperand : AsmOperandClass { 1634 let Name = basename # width # "_lsl" # shift # "MovAlias"; 1635 let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " 1636 # shift # ">"; 1637 let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; 1638 } 1639 1640 def _movimm : Operand<i32> { 1641 let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand"); 1642 } 1643 1644 def : InstAlias<"mov $Rd, $imm", 1645 (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>; 1646} 1647 1648defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; 1649defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; 1650 1651defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; 1652defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; 1653defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; 1654defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; 1655 1656defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; 1657defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; 1658 1659defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; 1660defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; 1661defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; 1662defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; 1663 1664let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, 1665 isAsCheapAsAMove = 1 in { 1666// FIXME: The following pseudo instructions are only needed because remat 1667// cannot handle multiple instructions. When that changes, we can select 1668// directly to the real instructions and get rid of these pseudos. 1669 1670def MOVi32imm 1671 : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), 1672 [(set GPR32:$dst, imm:$src)]>, 1673 Sched<[WriteImm]>; 1674def MOVi64imm 1675 : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), 1676 [(set GPR64:$dst, imm:$src)]>, 1677 Sched<[WriteImm]>; 1678} // isReMaterializable, isCodeGenOnly 1679 1680// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the 1681// eventual expansion code fewer bits to worry about getting right. Marshalling 1682// the types is a little tricky though: 1683def i64imm_32bit : ImmLeaf<i64, [{ 1684 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 1685}]>; 1686 1687def s64imm_32bit : ImmLeaf<i64, [{ 1688 int64_t Imm64 = static_cast<int64_t>(Imm); 1689 return Imm64 >= std::numeric_limits<int32_t>::min() && 1690 Imm64 <= std::numeric_limits<int32_t>::max(); 1691}]>; 1692 1693def trunc_imm : SDNodeXForm<imm, [{ 1694 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 1695}]>; 1696 1697def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">, 1698 GISDNodeXFormEquiv<trunc_imm>; 1699 1700let Predicates = [OptimizedGISelOrOtherSelector] in { 1701// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless 1702// copies. 1703def : Pat<(i64 i64imm_32bit:$src), 1704 (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; 1705} 1706 1707// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model). 1708def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 1709return CurDAG->getTargetConstant( 1710 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 1711}]>; 1712 1713def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 1714return CurDAG->getTargetConstant( 1715 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 1716}]>; 1717 1718 1719def : Pat<(f32 fpimm:$in), 1720 (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>; 1721def : Pat<(f64 fpimm:$in), 1722 (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>; 1723 1724 1725// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK 1726// sequences. 1727def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, 1728 tglobaladdr:$g1, tglobaladdr:$g0), 1729 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0), 1730 tglobaladdr:$g1, 16), 1731 tglobaladdr:$g2, 32), 1732 tglobaladdr:$g3, 48)>; 1733 1734def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, 1735 tblockaddress:$g1, tblockaddress:$g0), 1736 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0), 1737 tblockaddress:$g1, 16), 1738 tblockaddress:$g2, 32), 1739 tblockaddress:$g3, 48)>; 1740 1741def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, 1742 tconstpool:$g1, tconstpool:$g0), 1743 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0), 1744 tconstpool:$g1, 16), 1745 tconstpool:$g2, 32), 1746 tconstpool:$g3, 48)>; 1747 1748def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, 1749 tjumptable:$g1, tjumptable:$g0), 1750 (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0), 1751 tjumptable:$g1, 16), 1752 tjumptable:$g2, 32), 1753 tjumptable:$g3, 48)>; 1754 1755 1756//===----------------------------------------------------------------------===// 1757// Arithmetic instructions. 1758//===----------------------------------------------------------------------===// 1759 1760// Add/subtract with carry. 1761defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; 1762defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; 1763 1764def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; 1765def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; 1766def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; 1767def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; 1768 1769// Add/subtract 1770defm ADD : AddSub<0, "add", "sub", add>; 1771defm SUB : AddSub<1, "sub", "add">; 1772 1773def : InstAlias<"mov $dst, $src", 1774 (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; 1775def : InstAlias<"mov $dst, $src", 1776 (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; 1777def : InstAlias<"mov $dst, $src", 1778 (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; 1779def : InstAlias<"mov $dst, $src", 1780 (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; 1781 1782defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">; 1783defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">; 1784 1785def copyFromSP: PatLeaf<(i64 GPR64:$src), [{ 1786 return N->getOpcode() == ISD::CopyFromReg && 1787 cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP; 1788}]>; 1789 1790// Use SUBS instead of SUB to enable CSE between SUBS and SUB. 1791def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), 1792 (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; 1793def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), 1794 (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; 1795def : Pat<(sub GPR32:$Rn, GPR32:$Rm), 1796 (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; 1797def : Pat<(sub GPR64:$Rn, GPR64:$Rm), 1798 (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; 1799def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), 1800 (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; 1801def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), 1802 (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; 1803let AddedComplexity = 1 in { 1804def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3), 1805 (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>; 1806def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3), 1807 (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>; 1808def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)), 1809 (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>; 1810} 1811 1812// Because of the immediate format for add/sub-imm instructions, the 1813// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1814// These patterns capture that transformation. 1815let AddedComplexity = 1 in { 1816def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1817 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1818def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1819 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1820def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1821 (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1822def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1823 (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1824} 1825 1826// Because of the immediate format for add/sub-imm instructions, the 1827// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). 1828// These patterns capture that transformation. 1829let AddedComplexity = 1 in { 1830def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1831 (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1832def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1833 (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1834def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), 1835 (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; 1836def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), 1837 (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; 1838} 1839 1840def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1841def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1842def : InstAlias<"neg $dst, $src$shift", 1843 (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1844def : InstAlias<"neg $dst, $src$shift", 1845 (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1846 1847def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; 1848def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; 1849def : InstAlias<"negs $dst, $src$shift", 1850 (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; 1851def : InstAlias<"negs $dst, $src$shift", 1852 (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; 1853 1854 1855// Unsigned/Signed divide 1856defm UDIV : Div<0, "udiv", udiv>; 1857defm SDIV : Div<1, "sdiv", sdiv>; 1858 1859def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; 1860def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; 1861def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; 1862def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; 1863 1864// Variable shift 1865defm ASRV : Shift<0b10, "asr", sra>; 1866defm LSLV : Shift<0b00, "lsl", shl>; 1867defm LSRV : Shift<0b01, "lsr", srl>; 1868defm RORV : Shift<0b11, "ror", rotr>; 1869 1870def : ShiftAlias<"asrv", ASRVWr, GPR32>; 1871def : ShiftAlias<"asrv", ASRVXr, GPR64>; 1872def : ShiftAlias<"lslv", LSLVWr, GPR32>; 1873def : ShiftAlias<"lslv", LSLVXr, GPR64>; 1874def : ShiftAlias<"lsrv", LSRVWr, GPR32>; 1875def : ShiftAlias<"lsrv", LSRVXr, GPR64>; 1876def : ShiftAlias<"rorv", RORVWr, GPR32>; 1877def : ShiftAlias<"rorv", RORVXr, GPR64>; 1878 1879// Multiply-add 1880let AddedComplexity = 5 in { 1881defm MADD : MulAccum<0, "madd">; 1882defm MSUB : MulAccum<1, "msub">; 1883 1884def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), 1885 (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1886def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), 1887 (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1888 1889def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), 1890 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1891def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), 1892 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1893def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)), 1894 (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; 1895def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)), 1896 (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; 1897} // AddedComplexity = 5 1898 1899let AddedComplexity = 5 in { 1900def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; 1901def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; 1902def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; 1903def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; 1904 1905def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))), 1906 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1907def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))), 1908 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1909def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), 1910 (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1911def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))), 1912 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1913def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))), 1914 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1915def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), 1916 (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1917 1918def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), 1919 (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1920def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), 1921 (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; 1922 1923def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))), 1924 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1925def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))), 1926 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1927def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))), 1928 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1929 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1930 1931def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 1932 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1933def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 1934 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1935def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))), 1936 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1937 (MOVi32imm (trunc_imm imm:$C)), XZR)>; 1938 1939def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)), 1940 (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1941def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)), 1942 (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1943def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)), 1944 GPR64:$Ra)), 1945 (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1946 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1947 1948def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))), 1949 (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1950def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))), 1951 (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1952def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32), 1953 (s64imm_32bit:$C)))), 1954 (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)), 1955 (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>; 1956 1957def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)), 1958 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1959def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))), 1960 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1961 1962def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)), 1963 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1964def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)), 1965 (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1966 1967def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 1968 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1969def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 1970 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1971 1972def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))), 1973 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1974def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))), 1975 (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1976 1977def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)), 1978 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1979def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))), 1980 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1981 1982def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)), 1983 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1984def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)), 1985 (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1986 1987def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))), 1988 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>; 1989def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 1990 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>; 1991 1992def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))), 1993 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>; 1994def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))), 1995 (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>; 1996} // AddedComplexity = 5 1997 1998def : MulAccumWAlias<"mul", MADDWrrr>; 1999def : MulAccumXAlias<"mul", MADDXrrr>; 2000def : MulAccumWAlias<"mneg", MSUBWrrr>; 2001def : MulAccumXAlias<"mneg", MSUBXrrr>; 2002def : WideMulAccumAlias<"smull", SMADDLrrr>; 2003def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; 2004def : WideMulAccumAlias<"umull", UMADDLrrr>; 2005def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; 2006 2007// Multiply-high 2008def SMULHrr : MulHi<0b010, "smulh", mulhs>; 2009def UMULHrr : MulHi<0b110, "umulh", mulhu>; 2010 2011// CRC32 2012def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; 2013def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; 2014def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; 2015def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; 2016 2017def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; 2018def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; 2019def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; 2020def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; 2021 2022// v8.1 atomic CAS 2023defm CAS : CompareAndSwap<0, 0, "">; 2024defm CASA : CompareAndSwap<1, 0, "a">; 2025defm CASL : CompareAndSwap<0, 1, "l">; 2026defm CASAL : CompareAndSwap<1, 1, "al">; 2027 2028// v8.1 atomic CASP 2029defm CASP : CompareAndSwapPair<0, 0, "">; 2030defm CASPA : CompareAndSwapPair<1, 0, "a">; 2031defm CASPL : CompareAndSwapPair<0, 1, "l">; 2032defm CASPAL : CompareAndSwapPair<1, 1, "al">; 2033 2034// v8.1 atomic SWP 2035defm SWP : Swap<0, 0, "">; 2036defm SWPA : Swap<1, 0, "a">; 2037defm SWPL : Swap<0, 1, "l">; 2038defm SWPAL : Swap<1, 1, "al">; 2039 2040// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register) 2041defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; 2042defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; 2043defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; 2044defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; 2045 2046defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; 2047defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; 2048defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; 2049defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; 2050 2051defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; 2052defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; 2053defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; 2054defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; 2055 2056defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; 2057defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; 2058defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; 2059defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; 2060 2061defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; 2062defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; 2063defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; 2064defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; 2065 2066defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; 2067defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; 2068defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; 2069defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; 2070 2071defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; 2072defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; 2073defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; 2074defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; 2075 2076defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; 2077defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; 2078defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; 2079defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; 2080 2081// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR" 2082defm : STOPregister<"stadd","LDADD">; // STADDx 2083defm : STOPregister<"stclr","LDCLR">; // STCLRx 2084defm : STOPregister<"steor","LDEOR">; // STEORx 2085defm : STOPregister<"stset","LDSET">; // STSETx 2086defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx 2087defm : STOPregister<"stsmin","LDSMIN">;// STSMINx 2088defm : STOPregister<"stumax","LDUMAX">;// STUMAXx 2089defm : STOPregister<"stumin","LDUMIN">;// STUMINx 2090 2091// v8.5 Memory Tagging Extension 2092let Predicates = [HasMTE] in { 2093 2094def IRG : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg", 2095 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>; 2096 2097def GMI : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi", 2098 int_aarch64_gmi, GPR64sp>, Sched<[]> { 2099 let isNotDuplicable = 1; 2100} 2101def ADDG : AddSubG<0, "addg", null_frag>; 2102def SUBG : AddSubG<1, "subg", null_frag>; 2103 2104def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>; 2105 2106def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>; 2107def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{ 2108 let Defs = [NZCV]; 2109} 2110 2111def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>; 2112 2113def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">; 2114 2115def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4), 2116 (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>; 2117def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2118 (LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2119 2120def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>; 2121 2122def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]", 2123 (outs GPR64:$Rt), (ins GPR64sp:$Rn)>; 2124def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]", 2125 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>; 2126def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]", 2127 (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> { 2128 let Inst{23} = 0; 2129} 2130 2131defm STG : MemTagStore<0b00, "stg">; 2132defm STZG : MemTagStore<0b01, "stzg">; 2133defm ST2G : MemTagStore<0b10, "st2g">; 2134defm STZ2G : MemTagStore<0b11, "stz2g">; 2135 2136def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2137 (STGOffset $Rn, $Rm, $imm)>; 2138def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2139 (STZGOffset $Rn, $Rm, $imm)>; 2140def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2141 (ST2GOffset $Rn, $Rm, $imm)>; 2142def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), 2143 (STZ2GOffset $Rn, $Rm, $imm)>; 2144 2145defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; 2146def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; 2147def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; 2148 2149def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), 2150 (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; 2151 2152def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), 2153 (STGPi $Rt, $Rt2, $Rn, $imm)>; 2154 2155def IRGstack 2156 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, 2157 Sched<[]>; 2158def TAGPstack 2159 : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, 2160 Sched<[]>; 2161 2162// Explicit SP in the first operand prevents ShrinkWrap optimization 2163// from leaving this instruction out of the stack frame. When IRGstack 2164// is transformed into IRG, this operand is replaced with the actual 2165// register / expression for the tagged base pointer of the current function. 2166def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; 2167 2168// Large STG to be expanded into a loop. $sz is the size, $Rn is start address. 2169// $Rn_wback is one past the end of the range. $Rm is the loop counter. 2170let isCodeGenOnly=1, mayStore=1 in { 2171def STGloop_wback 2172 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2173 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2174 Sched<[WriteAdr, WriteST]>; 2175 2176def STZGloop_wback 2177 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), 2178 [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, 2179 Sched<[WriteAdr, WriteST]>; 2180 2181// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn. 2182// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back). 2183def STGloop 2184 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2185 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2186 Sched<[WriteAdr, WriteST]>; 2187 2188def STZGloop 2189 : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn), 2190 [], "@earlyclobber $Rn2,@earlyclobber $Rm" >, 2191 Sched<[WriteAdr, WriteST]>; 2192} 2193 2194} // Predicates = [HasMTE] 2195 2196//===----------------------------------------------------------------------===// 2197// Logical instructions. 2198//===----------------------------------------------------------------------===// 2199 2200// (immediate) 2201defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; 2202defm AND : LogicalImm<0b00, "and", and, "bic">; 2203defm EOR : LogicalImm<0b10, "eor", xor, "eon">; 2204defm ORR : LogicalImm<0b01, "orr", or, "orn">; 2205 2206// FIXME: these aliases *are* canonical sometimes (when movz can't be 2207// used). Actually, it seems to be working right now, but putting logical_immXX 2208// here is a bit dodgy on the AsmParser side too. 2209def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, 2210 logical_imm32:$imm), 0>; 2211def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, 2212 logical_imm64:$imm), 0>; 2213 2214 2215// (register) 2216defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; 2217defm BICS : LogicalRegS<0b11, 1, "bics", 2218 BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; 2219defm AND : LogicalReg<0b00, 0, "and", and>; 2220defm BIC : LogicalReg<0b00, 1, "bic", 2221 BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>; 2222defm EON : LogicalReg<0b10, 1, "eon", 2223 BinOpFrag<(not (xor node:$LHS, node:$RHS))>>; 2224defm EOR : LogicalReg<0b10, 0, "eor", xor>; 2225defm ORN : LogicalReg<0b01, 1, "orn", 2226 BinOpFrag<(or node:$LHS, (not node:$RHS))>>; 2227defm ORR : LogicalReg<0b01, 0, "orr", or>; 2228 2229def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; 2230def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; 2231 2232def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; 2233def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; 2234 2235def : InstAlias<"mvn $Wd, $Wm$sh", 2236 (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; 2237def : InstAlias<"mvn $Xd, $Xm$sh", 2238 (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; 2239 2240def : InstAlias<"tst $src1, $src2", 2241 (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; 2242def : InstAlias<"tst $src1, $src2", 2243 (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; 2244 2245def : InstAlias<"tst $src1, $src2", 2246 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; 2247def : InstAlias<"tst $src1, $src2", 2248 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; 2249 2250def : InstAlias<"tst $src1, $src2$sh", 2251 (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; 2252def : InstAlias<"tst $src1, $src2$sh", 2253 (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; 2254 2255 2256def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; 2257def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; 2258 2259 2260//===----------------------------------------------------------------------===// 2261// One operand data processing instructions. 2262//===----------------------------------------------------------------------===// 2263 2264defm CLS : OneOperandData<0b000101, "cls">; 2265defm CLZ : OneOperandData<0b000100, "clz", ctlz>; 2266defm RBIT : OneOperandData<0b000000, "rbit", bitreverse>; 2267 2268def REV16Wr : OneWRegData<0b000001, "rev16", 2269 UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; 2270def REV16Xr : OneXRegData<0b000001, "rev16", null_frag>; 2271 2272def : Pat<(cttz GPR32:$Rn), 2273 (CLZWr (RBITWr GPR32:$Rn))>; 2274def : Pat<(cttz GPR64:$Rn), 2275 (CLZXr (RBITXr GPR64:$Rn))>; 2276def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), 2277 (i32 1))), 2278 (CLSWr GPR32:$Rn)>; 2279def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), 2280 (i64 1))), 2281 (CLSXr GPR64:$Rn)>; 2282def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>; 2283def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>; 2284 2285// Unlike the other one operand instructions, the instructions with the "rev" 2286// mnemonic do *not* just different in the size bit, but actually use different 2287// opcode bits for the different sizes. 2288def REVWr : OneWRegData<0b000010, "rev", bswap>; 2289def REVXr : OneXRegData<0b000011, "rev", bswap>; 2290def REV32Xr : OneXRegData<0b000010, "rev32", 2291 UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; 2292 2293def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>; 2294 2295// The bswap commutes with the rotr so we want a pattern for both possible 2296// orders. 2297def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; 2298def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; 2299 2300// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero. 2301def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>; 2302def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>; 2303 2304def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)), 2305 (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))), 2306 (REV16Xr GPR64:$Rn)>; 2307 2308//===----------------------------------------------------------------------===// 2309// Bitfield immediate extraction instruction. 2310//===----------------------------------------------------------------------===// 2311let hasSideEffects = 0 in 2312defm EXTR : ExtractImm<"extr">; 2313def : InstAlias<"ror $dst, $src, $shift", 2314 (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; 2315def : InstAlias<"ror $dst, $src, $shift", 2316 (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; 2317 2318def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), 2319 (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; 2320def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), 2321 (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; 2322 2323//===----------------------------------------------------------------------===// 2324// Other bitfield immediate instructions. 2325//===----------------------------------------------------------------------===// 2326let hasSideEffects = 0 in { 2327defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; 2328defm SBFM : BitfieldImm<0b00, "sbfm">; 2329defm UBFM : BitfieldImm<0b10, "ubfm">; 2330} 2331 2332def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2333 uint64_t enc = (32 - N->getZExtValue()) & 0x1f; 2334 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2335}]>; 2336 2337def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2338 uint64_t enc = 31 - N->getZExtValue(); 2339 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2340}]>; 2341 2342// min(7, 31 - shift_amt) 2343def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2344 uint64_t enc = 31 - N->getZExtValue(); 2345 enc = enc > 7 ? 7 : enc; 2346 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2347}]>; 2348 2349// min(15, 31 - shift_amt) 2350def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2351 uint64_t enc = 31 - N->getZExtValue(); 2352 enc = enc > 15 ? 15 : enc; 2353 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2354}]>; 2355 2356def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{ 2357 uint64_t enc = (64 - N->getZExtValue()) & 0x3f; 2358 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2359}]>; 2360 2361def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{ 2362 uint64_t enc = 63 - N->getZExtValue(); 2363 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2364}]>; 2365 2366// min(7, 63 - shift_amt) 2367def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{ 2368 uint64_t enc = 63 - N->getZExtValue(); 2369 enc = enc > 7 ? 7 : enc; 2370 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2371}]>; 2372 2373// min(15, 63 - shift_amt) 2374def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{ 2375 uint64_t enc = 63 - N->getZExtValue(); 2376 enc = enc > 15 ? 15 : enc; 2377 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2378}]>; 2379 2380// min(31, 63 - shift_amt) 2381def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{ 2382 uint64_t enc = 63 - N->getZExtValue(); 2383 enc = enc > 31 ? 31 : enc; 2384 return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64); 2385}]>; 2386 2387def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), 2388 (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 2389 (i64 (i32shift_b imm0_31:$imm)))>; 2390def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), 2391 (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 2392 (i64 (i64shift_b imm0_63:$imm)))>; 2393 2394let AddedComplexity = 10 in { 2395def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), 2396 (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2397def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), 2398 (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2399} 2400 2401def : InstAlias<"asr $dst, $src, $shift", 2402 (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2403def : InstAlias<"asr $dst, $src, $shift", 2404 (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2405def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2406def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2407def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2408def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2409def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2410 2411def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), 2412 (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; 2413def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), 2414 (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; 2415 2416def : InstAlias<"lsr $dst, $src, $shift", 2417 (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; 2418def : InstAlias<"lsr $dst, $src, $shift", 2419 (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; 2420def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; 2421def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; 2422def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; 2423def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; 2424def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; 2425 2426//===----------------------------------------------------------------------===// 2427// Conditional comparison instructions. 2428//===----------------------------------------------------------------------===// 2429defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>; 2430defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>; 2431 2432//===----------------------------------------------------------------------===// 2433// Conditional select instructions. 2434//===----------------------------------------------------------------------===// 2435defm CSEL : CondSelect<0, 0b00, "csel">; 2436 2437def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; 2438defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; 2439defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; 2440defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; 2441 2442def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2443 (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2444def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2445 (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2446def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2447 (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2448def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2449 (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2450def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), 2451 (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; 2452def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), 2453 (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; 2454 2455def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), 2456 (CSINCWr WZR, WZR, (i32 imm:$cc))>; 2457def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), 2458 (CSINCXr XZR, XZR, (i32 imm:$cc))>; 2459def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV), 2460 (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2461def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV), 2462 (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2463def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV), 2464 (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2465def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV), 2466 (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2467def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), 2468 (CSINVWr WZR, WZR, (i32 imm:$cc))>; 2469def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), 2470 (CSINVXr XZR, XZR, (i32 imm:$cc))>; 2471def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV), 2472 (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>; 2473def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV), 2474 (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>; 2475def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV), 2476 (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2477def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV), 2478 (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>; 2479 2480def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2481 (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>; 2482def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2483 (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>; 2484 2485def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2486 (CSINCWr GPR32:$val, WZR, imm:$cc)>; 2487def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2488 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2489def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2490 (CSINCXr GPR64:$val, XZR, imm:$cc)>; 2491 2492def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)), 2493 (CSELWr WZR, GPR32:$val, imm:$cc)>; 2494def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)), 2495 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2496def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))), 2497 (CSELXr XZR, GPR64:$val, imm:$cc)>; 2498 2499// The inverse of the condition code from the alias instruction is what is used 2500// in the aliased instruction. The parser all ready inverts the condition code 2501// for these aliases. 2502def : InstAlias<"cset $dst, $cc", 2503 (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2504def : InstAlias<"cset $dst, $cc", 2505 (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2506 2507def : InstAlias<"csetm $dst, $cc", 2508 (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; 2509def : InstAlias<"csetm $dst, $cc", 2510 (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; 2511 2512def : InstAlias<"cinc $dst, $src, $cc", 2513 (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2514def : InstAlias<"cinc $dst, $src, $cc", 2515 (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2516 2517def : InstAlias<"cinv $dst, $src, $cc", 2518 (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2519def : InstAlias<"cinv $dst, $src, $cc", 2520 (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2521 2522def : InstAlias<"cneg $dst, $src, $cc", 2523 (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; 2524def : InstAlias<"cneg $dst, $src, $cc", 2525 (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; 2526 2527//===----------------------------------------------------------------------===// 2528// PC-relative instructions. 2529//===----------------------------------------------------------------------===// 2530let isReMaterializable = 1 in { 2531let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 2532def ADR : ADRI<0, "adr", adrlabel, 2533 [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>; 2534} // hasSideEffects = 0 2535 2536def ADRP : ADRI<1, "adrp", adrplabel, 2537 [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; 2538} // isReMaterializable = 1 2539 2540// page address of a constant pool entry, block address 2541def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>; 2542def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>; 2543def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>; 2544def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>; 2545def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; 2546def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; 2547def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>; 2548 2549//===----------------------------------------------------------------------===// 2550// Unconditional branch (register) instructions. 2551//===----------------------------------------------------------------------===// 2552 2553let isReturn = 1, isTerminator = 1, isBarrier = 1 in { 2554def RET : BranchReg<0b0010, "ret", []>; 2555def DRPS : SpecialReturn<0b0101, "drps">; 2556def ERET : SpecialReturn<0b0100, "eret">; 2557} // isReturn = 1, isTerminator = 1, isBarrier = 1 2558 2559// Default to the LR register. 2560def : InstAlias<"ret", (RET LR)>; 2561 2562let isCall = 1, Defs = [LR], Uses = [SP] in { 2563 def BLR : BranchReg<0b0001, "blr", []>; 2564 def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, 2565 Sched<[WriteBrReg]>, 2566 PseudoInstExpansion<(BLR GPR64:$Rn)>; 2567 def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, 2568 Sched<[WriteBrReg]>; 2569 def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, 2570 Sched<[WriteBrReg]>; 2571} // isCall 2572 2573def : Pat<(AArch64call GPR64:$Rn), 2574 (BLR GPR64:$Rn)>, 2575 Requires<[NoSLSBLRMitigation]>; 2576def : Pat<(AArch64call GPR64noip:$Rn), 2577 (BLRNoIP GPR64noip:$Rn)>, 2578 Requires<[SLSBLRMitigation]>; 2579 2580def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), 2581 (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, 2582 Requires<[NoSLSBLRMitigation]>; 2583 2584def : Pat<(AArch64call_bti GPR64:$Rn), 2585 (BLR_BTI GPR64:$Rn)>, 2586 Requires<[NoSLSBLRMitigation]>; 2587 2588let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { 2589def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; 2590} // isBranch, isTerminator, isBarrier, isIndirectBranch 2591 2592// Create a separate pseudo-instruction for codegen to use so that we don't 2593// flag lr as used in every function. It'll be restored before the RET by the 2594// epilogue if it's legitimately used. 2595def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>, 2596 Sched<[WriteBrReg]> { 2597 let isTerminator = 1; 2598 let isBarrier = 1; 2599 let isReturn = 1; 2600} 2601 2602// This is a directive-like pseudo-instruction. The purpose is to insert an 2603// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction 2604// (which in the usual case is a BLR). 2605let hasSideEffects = 1 in 2606def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> { 2607 let AsmString = ".tlsdesccall $sym"; 2608} 2609 2610// Pseudo instruction to tell the streamer to emit a 'B' character into the 2611// augmentation string. 2612def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {} 2613 2614// Pseudo instruction to tell the streamer to emit a 'G' character into the 2615// augmentation string. 2616def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {} 2617 2618// FIXME: maybe the scratch register used shouldn't be fixed to X1? 2619// FIXME: can "hasSideEffects be dropped? 2620// This gets lowered to an instruction sequence which takes 16 bytes 2621let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16, 2622 isCodeGenOnly = 1 in 2623def TLSDESC_CALLSEQ 2624 : Pseudo<(outs), (ins i64imm:$sym), 2625 [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>, 2626 Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>; 2627def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym), 2628 (TLSDESC_CALLSEQ texternalsym:$sym)>; 2629 2630//===----------------------------------------------------------------------===// 2631// Conditional branch (immediate) instruction. 2632//===----------------------------------------------------------------------===// 2633def Bcc : BranchCond<0, "b">; 2634 2635// Armv8.8-A variant form which hints to the branch predictor that 2636// this branch is very likely to go the same way nearly all the time 2637// (even though it is not known at compile time _which_ way that is). 2638def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>; 2639 2640//===----------------------------------------------------------------------===// 2641// Compare-and-branch instructions. 2642//===----------------------------------------------------------------------===// 2643defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; 2644defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; 2645 2646//===----------------------------------------------------------------------===// 2647// Test-bit-and-branch instructions. 2648//===----------------------------------------------------------------------===// 2649defm TBZ : TestBranch<0, "tbz", AArch64tbz>; 2650defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; 2651 2652//===----------------------------------------------------------------------===// 2653// Unconditional branch (immediate) instructions. 2654//===----------------------------------------------------------------------===// 2655let isBranch = 1, isTerminator = 1, isBarrier = 1 in { 2656def B : BranchImm<0, "b", [(br bb:$addr)]>; 2657} // isBranch, isTerminator, isBarrier 2658 2659let isCall = 1, Defs = [LR], Uses = [SP] in { 2660def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; 2661} // isCall 2662def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; 2663 2664//===----------------------------------------------------------------------===// 2665// Exception generation instructions. 2666//===----------------------------------------------------------------------===// 2667let isTrap = 1 in { 2668def BRK : ExceptionGeneration<0b001, 0b00, "brk", 2669 [(int_aarch64_break timm32_0_65535:$imm)]>; 2670} 2671def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; 2672def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; 2673def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>; 2674def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; 2675def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; 2676def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>; 2677def SVC : ExceptionGeneration<0b000, 0b01, "svc">; 2678 2679// DCPSn defaults to an immediate operand of zero if unspecified. 2680def : InstAlias<"dcps1", (DCPS1 0)>; 2681def : InstAlias<"dcps2", (DCPS2 0)>; 2682def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>; 2683 2684def UDF : UDFType<0, "udf">; 2685 2686//===----------------------------------------------------------------------===// 2687// Load instructions. 2688//===----------------------------------------------------------------------===// 2689 2690// Pair (indexed, offset) 2691defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">; 2692defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">; 2693defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">; 2694defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">; 2695defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">; 2696 2697defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2698 2699// Pair (pre-indexed) 2700def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2701def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2702def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2703def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2704def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2705 2706def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2707 2708// Pair (post-indexed) 2709def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">; 2710def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">; 2711def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">; 2712def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">; 2713def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">; 2714 2715def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">; 2716 2717 2718// Pair (no allocate) 2719defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">; 2720defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">; 2721defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">; 2722defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">; 2723defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">; 2724 2725def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 2726 (LDPXi GPR64sp:$Rn, simm7s8:$offset)>; 2727 2728def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 2729 (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>; 2730//--- 2731// (register offset) 2732//--- 2733 2734// Integer 2735defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; 2736defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; 2737defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; 2738defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; 2739 2740// Floating-point 2741defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>; 2742defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>; 2743defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>; 2744defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>; 2745defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>; 2746 2747// Load sign-extended half-word 2748defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; 2749defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; 2750 2751// Load sign-extended byte 2752defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; 2753defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; 2754 2755// Load sign-extended word 2756defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; 2757 2758// Pre-fetch. 2759defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; 2760 2761// For regular load, we do not have any alignment requirement. 2762// Thus, it is safe to directly map the vector loads with interesting 2763// addressing modes. 2764// FIXME: We could do the same for bitconvert to floating point vectors. 2765multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop, 2766 ValueType ScalTy, ValueType VecTy, 2767 Instruction LOADW, Instruction LOADX, 2768 SubRegIndex sub> { 2769 def : Pat<(VecTy (scalar_to_vector (ScalTy 2770 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), 2771 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2772 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), 2773 sub)>; 2774 2775 def : Pat<(VecTy (scalar_to_vector (ScalTy 2776 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), 2777 (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), 2778 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), 2779 sub)>; 2780} 2781 2782let AddedComplexity = 10 in { 2783defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>; 2784defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>; 2785 2786defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>; 2787defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>; 2788 2789defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>; 2790defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>; 2791 2792defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>; 2793defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>; 2794 2795defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>; 2796defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>; 2797 2798defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>; 2799 2800defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>; 2801 2802 2803def : Pat <(v1i64 (scalar_to_vector (i64 2804 (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, 2805 ro_Wextend64:$extend))))), 2806 (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; 2807 2808def : Pat <(v1i64 (scalar_to_vector (i64 2809 (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, 2810 ro_Xextend64:$extend))))), 2811 (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; 2812} 2813 2814// Match all load 64 bits width whose type is compatible with FPR64 2815multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy, 2816 Instruction LOADW, Instruction LOADX> { 2817 2818 def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2819 (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2820 2821 def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2822 (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2823} 2824 2825let AddedComplexity = 10 in { 2826let Predicates = [IsLE] in { 2827 // We must do vector loads with LD1 in big-endian. 2828 defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>; 2829 defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>; 2830 defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>; 2831 defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>; 2832 defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>; 2833 defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>; 2834} 2835 2836defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>; 2837defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>; 2838 2839// Match all load 128 bits width whose type is compatible with FPR128 2840let Predicates = [IsLE] in { 2841 // We must do vector loads with LD1 in big-endian. 2842 defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>; 2843 defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>; 2844 defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>; 2845 defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>; 2846 defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>; 2847 defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>; 2848 defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>; 2849 defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>; 2850} 2851} // AddedComplexity = 10 2852 2853// zextload -> i64 2854multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop, 2855 Instruction INSTW, Instruction INSTX> { 2856 def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2857 (SUBREG_TO_REG (i64 0), 2858 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 2859 sub_32)>; 2860 2861 def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2862 (SUBREG_TO_REG (i64 0), 2863 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 2864 sub_32)>; 2865} 2866 2867let AddedComplexity = 10 in { 2868 defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>; 2869 defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>; 2870 defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>; 2871 2872 // zextloadi1 -> zextloadi8 2873 defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2874 2875 // extload -> zextload 2876 defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2877 defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2878 defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2879 2880 // extloadi1 -> zextloadi8 2881 defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>; 2882} 2883 2884 2885// zextload -> i64 2886multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop, 2887 Instruction INSTW, Instruction INSTX> { 2888 def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), 2889 (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 2890 2891 def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), 2892 (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 2893 2894} 2895 2896let AddedComplexity = 10 in { 2897 // extload -> zextload 2898 defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>; 2899 defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>; 2900 defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>; 2901 2902 // zextloadi1 -> zextloadi8 2903 defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>; 2904} 2905 2906//--- 2907// (unsigned immediate) 2908//--- 2909defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr", 2910 [(set GPR64z:$Rt, 2911 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 2912defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr", 2913 [(set GPR32z:$Rt, 2914 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 2915defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr", 2916 [(set FPR8Op:$Rt, 2917 (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; 2918defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr", 2919 [(set (f16 FPR16Op:$Rt), 2920 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; 2921defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr", 2922 [(set (f32 FPR32Op:$Rt), 2923 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; 2924defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr", 2925 [(set (f64 FPR64Op:$Rt), 2926 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; 2927defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr", 2928 [(set (f128 FPR128Op:$Rt), 2929 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; 2930 2931// bf16 load pattern 2932def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 2933 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 2934 2935// For regular load, we do not have any alignment requirement. 2936// Thus, it is safe to directly map the vector loads with interesting 2937// addressing modes. 2938// FIXME: We could do the same for bitconvert to floating point vectors. 2939def : Pat <(v8i8 (scalar_to_vector (i32 2940 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 2941 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 2942 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 2943def : Pat <(v16i8 (scalar_to_vector (i32 2944 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 2945 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 2946 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; 2947def : Pat <(v4i16 (scalar_to_vector (i32 2948 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2949 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 2950 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 2951def : Pat <(v8i16 (scalar_to_vector (i32 2952 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 2953 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 2954 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; 2955def : Pat <(v2i32 (scalar_to_vector (i32 2956 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 2957 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 2958 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 2959def : Pat <(v4i32 (scalar_to_vector (i32 2960 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 2961 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 2962 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; 2963def : Pat <(v1i64 (scalar_to_vector (i64 2964 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 2965 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2966def : Pat <(v2i64 (scalar_to_vector (i64 2967 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), 2968 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 2969 (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; 2970 2971// Match all load 64 bits width whose type is compatible with FPR64 2972let Predicates = [IsLE] in { 2973 // We must use LD1 to perform vector loads in big-endian. 2974 def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2975 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2976 def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2977 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2978 def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2979 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2980 def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2981 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2982 def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2983 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2984 def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2985 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2986} 2987def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2988 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2989def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), 2990 (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; 2991 2992// Match all load 128 bits width whose type is compatible with FPR128 2993let Predicates = [IsLE] in { 2994 // We must use LD1 to perform vector loads in big-endian. 2995 def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2996 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2997 def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 2998 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 2999 def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3000 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3001 def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3002 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3003 def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3004 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3005 def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3006 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3007 def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3008 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3009 def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3010 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3011} 3012def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), 3013 (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; 3014 3015defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", 3016 [(set GPR32:$Rt, 3017 (zextloadi16 (am_indexed16 GPR64sp:$Rn, 3018 uimm12s2:$offset)))]>; 3019defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", 3020 [(set GPR32:$Rt, 3021 (zextloadi8 (am_indexed8 GPR64sp:$Rn, 3022 uimm12s1:$offset)))]>; 3023// zextload -> i64 3024def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3025 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3026def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3027 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3028 3029// zextloadi1 -> zextloadi8 3030def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3031 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3032def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3033 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3034 3035// extload -> zextload 3036def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3037 (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; 3038def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3039 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3040def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3041 (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; 3042def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3043 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3044def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), 3045 (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; 3046def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3047 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3048def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), 3049 (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; 3050 3051// load sign-extended half-word 3052defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", 3053 [(set GPR32:$Rt, 3054 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3055 uimm12s2:$offset)))]>; 3056defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", 3057 [(set GPR64:$Rt, 3058 (sextloadi16 (am_indexed16 GPR64sp:$Rn, 3059 uimm12s2:$offset)))]>; 3060 3061// load sign-extended byte 3062defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", 3063 [(set GPR32:$Rt, 3064 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3065 uimm12s1:$offset)))]>; 3066defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", 3067 [(set GPR64:$Rt, 3068 (sextloadi8 (am_indexed8 GPR64sp:$Rn, 3069 uimm12s1:$offset)))]>; 3070 3071// load sign-extended word 3072defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", 3073 [(set GPR64:$Rt, 3074 (sextloadi32 (am_indexed32 GPR64sp:$Rn, 3075 uimm12s4:$offset)))]>; 3076 3077// load zero-extended word 3078def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), 3079 (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; 3080 3081// Pre-fetch. 3082def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", 3083 [(AArch64Prefetch timm:$Rt, 3084 (am_indexed64 GPR64sp:$Rn, 3085 uimm12s8:$offset))]>; 3086 3087def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; 3088 3089//--- 3090// (literal) 3091 3092def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{ 3093 if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) { 3094 const DataLayout &DL = MF->getDataLayout(); 3095 Align Align = G->getGlobal()->getPointerAlignment(DL); 3096 return Align >= 4 && G->getOffset() % 4 == 0; 3097 } 3098 if (auto *C = dyn_cast<ConstantPoolSDNode>(N)) 3099 return C->getAlign() >= 4 && C->getOffset() % 4 == 0; 3100 return false; 3101}]>; 3102 3103def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr", 3104 [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3105def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr", 3106 [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>; 3107def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr", 3108 [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3109def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr", 3110 [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3111def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr", 3112 [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>; 3113 3114// load sign-extended word 3115def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw", 3116 [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>; 3117 3118let AddedComplexity = 20 in { 3119def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))), 3120 (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>; 3121} 3122 3123// prefetch 3124def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; 3125// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; 3126 3127//--- 3128// (unscaled immediate) 3129defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur", 3130 [(set GPR64z:$Rt, 3131 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3132defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur", 3133 [(set GPR32z:$Rt, 3134 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3135defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur", 3136 [(set FPR8Op:$Rt, 3137 (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3138defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur", 3139 [(set (f16 FPR16Op:$Rt), 3140 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3141defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur", 3142 [(set (f32 FPR32Op:$Rt), 3143 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3144defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur", 3145 [(set (f64 FPR64Op:$Rt), 3146 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; 3147defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur", 3148 [(set (f128 FPR128Op:$Rt), 3149 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; 3150 3151defm LDURHH 3152 : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", 3153 [(set GPR32:$Rt, 3154 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3155defm LDURBB 3156 : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", 3157 [(set GPR32:$Rt, 3158 (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3159 3160// Match all load 64 bits width whose type is compatible with FPR64 3161let Predicates = [IsLE] in { 3162 def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3163 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3164 def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3165 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3166 def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3167 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3168 def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3169 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3170 def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3171 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3172} 3173def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3174 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3175def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), 3176 (LDURDi GPR64sp:$Rn, simm9:$offset)>; 3177 3178// Match all load 128 bits width whose type is compatible with FPR128 3179let Predicates = [IsLE] in { 3180 def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3181 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3182 def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3183 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3184 def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3185 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3186 def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3187 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3188 def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3189 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3190 def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3191 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3192 def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), 3193 (LDURQi GPR64sp:$Rn, simm9:$offset)>; 3194} 3195 3196// anyext -> zext 3197def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3198 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3199def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3200 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3201def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3202 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3203def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3204 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3205def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3206 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3207def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3208 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3209def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3210 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3211// unscaled zext 3212def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3213 (LDURHHi GPR64sp:$Rn, simm9:$offset)>; 3214def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3215 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3216def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3217 (LDURBBi GPR64sp:$Rn, simm9:$offset)>; 3218def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), 3219 (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3220def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3221 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3222def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3223 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3224def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3225 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3226 3227 3228//--- 3229// LDR mnemonics fall back to LDUR for negative or unaligned offsets. 3230 3231// Define new assembler match classes as we want to only match these when 3232// the don't otherwise match the scaled addressing mode for LDR/STR. Don't 3233// associate a DiagnosticType either, as we want the diagnostic for the 3234// canonical form (the scaled operand) to take precedence. 3235class SImm9OffsetOperand<int Width> : AsmOperandClass { 3236 let Name = "SImm9OffsetFB" # Width; 3237 let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; 3238 let RenderMethod = "addImmOperands"; 3239} 3240 3241def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; 3242def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; 3243def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; 3244def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; 3245def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; 3246 3247def simm9_offset_fb8 : Operand<i64> { 3248 let ParserMatchClass = SImm9OffsetFB8Operand; 3249} 3250def simm9_offset_fb16 : Operand<i64> { 3251 let ParserMatchClass = SImm9OffsetFB16Operand; 3252} 3253def simm9_offset_fb32 : Operand<i64> { 3254 let ParserMatchClass = SImm9OffsetFB32Operand; 3255} 3256def simm9_offset_fb64 : Operand<i64> { 3257 let ParserMatchClass = SImm9OffsetFB64Operand; 3258} 3259def simm9_offset_fb128 : Operand<i64> { 3260 let ParserMatchClass = SImm9OffsetFB128Operand; 3261} 3262 3263def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3264 (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3265def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3266 (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3267def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3268 (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3269def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3270 (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3271def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3272 (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3273def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3274 (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3275def : InstAlias<"ldr $Rt, [$Rn, $offset]", 3276 (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3277 3278// zextload -> i64 3279def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), 3280 (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3281def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), 3282 (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; 3283 3284// load sign-extended half-word 3285defm LDURSHW 3286 : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", 3287 [(set GPR32:$Rt, 3288 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3289defm LDURSHX 3290 : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", 3291 [(set GPR64:$Rt, 3292 (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; 3293 3294// load sign-extended byte 3295defm LDURSBW 3296 : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", 3297 [(set GPR32:$Rt, 3298 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3299defm LDURSBX 3300 : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", 3301 [(set GPR64:$Rt, 3302 (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; 3303 3304// load sign-extended word 3305defm LDURSW 3306 : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", 3307 [(set GPR64:$Rt, 3308 (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; 3309 3310// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. 3311def : InstAlias<"ldrb $Rt, [$Rn, $offset]", 3312 (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3313def : InstAlias<"ldrh $Rt, [$Rn, $offset]", 3314 (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3315def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3316 (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3317def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", 3318 (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3319def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3320 (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3321def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", 3322 (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3323def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", 3324 (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3325 3326// Pre-fetch. 3327defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", 3328 [(AArch64Prefetch timm:$Rt, 3329 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3330 3331//--- 3332// (unscaled immediate, unprivileged) 3333defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; 3334defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; 3335 3336defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; 3337defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; 3338 3339// load sign-extended half-word 3340defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; 3341defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; 3342 3343// load sign-extended byte 3344defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; 3345defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; 3346 3347// load sign-extended word 3348defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; 3349 3350//--- 3351// (immediate pre-indexed) 3352def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3353def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3354def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3355def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3356def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3357def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3358def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3359 3360// load sign-extended half-word 3361def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3362def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3363 3364// load sign-extended byte 3365def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3366def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3367 3368// load zero-extended byte 3369def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3370def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3371 3372// load sign-extended word 3373def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3374 3375//--- 3376// (immediate post-indexed) 3377def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">; 3378def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">; 3379def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">; 3380def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">; 3381def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">; 3382def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">; 3383def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">; 3384 3385// load sign-extended half-word 3386def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">; 3387def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">; 3388 3389// load sign-extended byte 3390def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">; 3391def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">; 3392 3393// load zero-extended byte 3394def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">; 3395def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">; 3396 3397// load sign-extended word 3398def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">; 3399 3400//===----------------------------------------------------------------------===// 3401// Store instructions. 3402//===----------------------------------------------------------------------===// 3403 3404// Pair (indexed, offset) 3405// FIXME: Use dedicated range-checked addressing mode operand here. 3406defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">; 3407defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">; 3408defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">; 3409defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">; 3410defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">; 3411 3412// Pair (pre-indexed) 3413def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3414def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3415def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3416def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3417def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3418 3419// Pair (pre-indexed) 3420def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">; 3421def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">; 3422def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">; 3423def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">; 3424def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">; 3425 3426// Pair (no allocate) 3427defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">; 3428defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">; 3429defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">; 3430defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">; 3431defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">; 3432 3433def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 3434 (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>; 3435 3436def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)), 3437 (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>; 3438 3439 3440//--- 3441// (Register offset) 3442 3443// Integer 3444defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; 3445defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; 3446defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; 3447defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; 3448 3449 3450// Floating-point 3451defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>; 3452defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>; 3453defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>; 3454defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>; 3455defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">; 3456 3457let Predicates = [UseSTRQro], AddedComplexity = 10 in { 3458 def : Pat<(store (f128 FPR128:$Rt), 3459 (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm, 3460 ro_Wextend128:$extend)), 3461 (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>; 3462 def : Pat<(store (f128 FPR128:$Rt), 3463 (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm, 3464 ro_Xextend128:$extend)), 3465 (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>; 3466} 3467 3468multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop, 3469 Instruction STRW, Instruction STRX> { 3470 3471 def : Pat<(storeop GPR64:$Rt, 3472 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3473 (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3474 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3475 3476 def : Pat<(storeop GPR64:$Rt, 3477 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3478 (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), 3479 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3480} 3481 3482let AddedComplexity = 10 in { 3483 // truncstore i64 3484 defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>; 3485 defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>; 3486 defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>; 3487} 3488 3489multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR, 3490 Instruction STRW, Instruction STRX> { 3491 def : Pat<(store (VecTy FPR:$Rt), 3492 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3493 (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3494 3495 def : Pat<(store (VecTy FPR:$Rt), 3496 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3497 (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3498} 3499 3500let AddedComplexity = 10 in { 3501// Match all store 64 bits width whose type is compatible with FPR64 3502let Predicates = [IsLE] in { 3503 // We must use ST1 to store vectors in big-endian. 3504 defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>; 3505 defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>; 3506 defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>; 3507 defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>; 3508 defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>; 3509 defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>; 3510} 3511 3512defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>; 3513defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>; 3514 3515// Match all store 128 bits width whose type is compatible with FPR128 3516let Predicates = [IsLE, UseSTRQro] in { 3517 // We must use ST1 to store vectors in big-endian. 3518 defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>; 3519 defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>; 3520 defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>; 3521 defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>; 3522 defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>; 3523 defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>; 3524 defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>; 3525 defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>; 3526} 3527} // AddedComplexity = 10 3528 3529// Match stores from lane 0 to the appropriate subreg's store. 3530multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop, 3531 ValueType VecTy, ValueType STy, 3532 SubRegIndex SubRegIdx, 3533 Instruction STRW, Instruction STRX> { 3534 3535 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3536 (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), 3537 (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3538 GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; 3539 3540 def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)), 3541 (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), 3542 (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3543 GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; 3544} 3545 3546let AddedComplexity = 19 in { 3547 defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>; 3548 defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>; 3549 defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>; 3550 defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>; 3551 defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>; 3552 defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>; 3553} 3554 3555//--- 3556// (unsigned immediate) 3557defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", 3558 [(store GPR64z:$Rt, 3559 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3560defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", 3561 [(store GPR32z:$Rt, 3562 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3563defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str", 3564 [(store FPR8Op:$Rt, 3565 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; 3566defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str", 3567 [(store (f16 FPR16Op:$Rt), 3568 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; 3569defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str", 3570 [(store (f32 FPR32Op:$Rt), 3571 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; 3572defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str", 3573 [(store (f64 FPR64Op:$Rt), 3574 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; 3575defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>; 3576 3577defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", 3578 [(truncstorei16 GPR32z:$Rt, 3579 (am_indexed16 GPR64sp:$Rn, 3580 uimm12s2:$offset))]>; 3581defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", 3582 [(truncstorei8 GPR32z:$Rt, 3583 (am_indexed8 GPR64sp:$Rn, 3584 uimm12s1:$offset))]>; 3585 3586// bf16 store pattern 3587def : Pat<(store (bf16 FPR16Op:$Rt), 3588 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3589 (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>; 3590 3591let AddedComplexity = 10 in { 3592 3593// Match all store 64 bits width whose type is compatible with FPR64 3594def : Pat<(store (v1i64 FPR64:$Rt), 3595 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3596 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3597def : Pat<(store (v1f64 FPR64:$Rt), 3598 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3599 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3600 3601let Predicates = [IsLE] in { 3602 // We must use ST1 to store vectors in big-endian. 3603 def : Pat<(store (v2f32 FPR64:$Rt), 3604 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3605 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3606 def : Pat<(store (v8i8 FPR64:$Rt), 3607 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3608 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3609 def : Pat<(store (v4i16 FPR64:$Rt), 3610 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3611 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3612 def : Pat<(store (v2i32 FPR64:$Rt), 3613 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3614 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3615 def : Pat<(store (v4f16 FPR64:$Rt), 3616 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3617 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3618 def : Pat<(store (v4bf16 FPR64:$Rt), 3619 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), 3620 (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; 3621} 3622 3623// Match all store 128 bits width whose type is compatible with FPR128 3624def : Pat<(store (f128 FPR128:$Rt), 3625 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3626 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3627 3628let Predicates = [IsLE] in { 3629 // We must use ST1 to store vectors in big-endian. 3630 def : Pat<(store (v4f32 FPR128:$Rt), 3631 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3632 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3633 def : Pat<(store (v2f64 FPR128:$Rt), 3634 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3635 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3636 def : Pat<(store (v16i8 FPR128:$Rt), 3637 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3638 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3639 def : Pat<(store (v8i16 FPR128:$Rt), 3640 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3641 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3642 def : Pat<(store (v4i32 FPR128:$Rt), 3643 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3644 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3645 def : Pat<(store (v2i64 FPR128:$Rt), 3646 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3647 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3648 def : Pat<(store (v8f16 FPR128:$Rt), 3649 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3650 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3651 def : Pat<(store (v8bf16 FPR128:$Rt), 3652 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), 3653 (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; 3654} 3655 3656// truncstore i64 3657def : Pat<(truncstorei32 GPR64:$Rt, 3658 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), 3659 (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; 3660def : Pat<(truncstorei16 GPR64:$Rt, 3661 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), 3662 (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; 3663def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), 3664 (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; 3665 3666} // AddedComplexity = 10 3667 3668// Match stores from lane 0 to the appropriate subreg's store. 3669multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop, 3670 ValueType VTy, ValueType STy, 3671 SubRegIndex SubRegIdx, Operand IndexType, 3672 Instruction STR> { 3673 def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)), 3674 (UIAddrMode GPR64sp:$Rn, IndexType:$offset)), 3675 (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx), 3676 GPR64sp:$Rn, IndexType:$offset)>; 3677} 3678 3679let AddedComplexity = 19 in { 3680 defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>; 3681 defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>; 3682 defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>; 3683 defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>; 3684 defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>; 3685 defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>; 3686} 3687 3688//--- 3689// (unscaled immediate) 3690defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur", 3691 [(store GPR64z:$Rt, 3692 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3693defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur", 3694 [(store GPR32z:$Rt, 3695 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3696defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur", 3697 [(store FPR8Op:$Rt, 3698 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3699defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur", 3700 [(store (f16 FPR16Op:$Rt), 3701 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3702defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur", 3703 [(store (f32 FPR32Op:$Rt), 3704 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; 3705defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur", 3706 [(store (f64 FPR64Op:$Rt), 3707 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; 3708defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur", 3709 [(store (f128 FPR128Op:$Rt), 3710 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; 3711defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh", 3712 [(truncstorei16 GPR32z:$Rt, 3713 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; 3714defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb", 3715 [(truncstorei8 GPR32z:$Rt, 3716 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; 3717 3718// Armv8.4 Weaker Release Consistency enhancements 3719// LDAPR & STLR with Immediate Offset instructions 3720let Predicates = [HasRCPC_IMMO] in { 3721defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>; 3722defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>; 3723defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>; 3724defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>; 3725defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>; 3726defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>; 3727defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>; 3728defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>; 3729defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>; 3730defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>; 3731defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>; 3732defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>; 3733defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>; 3734} 3735 3736// Match all store 64 bits width whose type is compatible with FPR64 3737def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3738 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3739def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3740 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3741 3742let AddedComplexity = 10 in { 3743 3744let Predicates = [IsLE] in { 3745 // We must use ST1 to store vectors in big-endian. 3746 def : Pat<(store (v2f32 FPR64:$Rt), 3747 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3748 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3749 def : Pat<(store (v8i8 FPR64:$Rt), 3750 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3751 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3752 def : Pat<(store (v4i16 FPR64:$Rt), 3753 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3754 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3755 def : Pat<(store (v2i32 FPR64:$Rt), 3756 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3757 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3758 def : Pat<(store (v4f16 FPR64:$Rt), 3759 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3760 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3761 def : Pat<(store (v4bf16 FPR64:$Rt), 3762 (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), 3763 (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3764} 3765 3766// Match all store 128 bits width whose type is compatible with FPR128 3767def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3768 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3769 3770let Predicates = [IsLE] in { 3771 // We must use ST1 to store vectors in big-endian. 3772 def : Pat<(store (v4f32 FPR128:$Rt), 3773 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3774 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3775 def : Pat<(store (v2f64 FPR128:$Rt), 3776 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3777 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3778 def : Pat<(store (v16i8 FPR128:$Rt), 3779 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3780 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3781 def : Pat<(store (v8i16 FPR128:$Rt), 3782 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3783 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3784 def : Pat<(store (v4i32 FPR128:$Rt), 3785 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3786 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3787 def : Pat<(store (v2i64 FPR128:$Rt), 3788 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3789 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3790 def : Pat<(store (v2f64 FPR128:$Rt), 3791 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3792 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3793 def : Pat<(store (v8f16 FPR128:$Rt), 3794 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3795 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3796 def : Pat<(store (v8bf16 FPR128:$Rt), 3797 (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), 3798 (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; 3799} 3800 3801} // AddedComplexity = 10 3802 3803// unscaled i64 truncating stores 3804def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), 3805 (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3806def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), 3807 (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3808def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), 3809 (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; 3810 3811// Match stores from lane 0 to the appropriate subreg's store. 3812multiclass VecStoreULane0Pat<SDPatternOperator StoreOp, 3813 ValueType VTy, ValueType STy, 3814 SubRegIndex SubRegIdx, Instruction STR> { 3815 defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>; 3816} 3817 3818let AddedComplexity = 19 in { 3819 defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>; 3820 defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>; 3821 defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>; 3822 defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>; 3823 defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>; 3824 defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>; 3825} 3826 3827//--- 3828// STR mnemonics fall back to STUR for negative or unaligned offsets. 3829def : InstAlias<"str $Rt, [$Rn, $offset]", 3830 (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3831def : InstAlias<"str $Rt, [$Rn, $offset]", 3832 (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3833def : InstAlias<"str $Rt, [$Rn, $offset]", 3834 (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3835def : InstAlias<"str $Rt, [$Rn, $offset]", 3836 (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3837def : InstAlias<"str $Rt, [$Rn, $offset]", 3838 (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; 3839def : InstAlias<"str $Rt, [$Rn, $offset]", 3840 (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; 3841def : InstAlias<"str $Rt, [$Rn, $offset]", 3842 (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; 3843 3844def : InstAlias<"strb $Rt, [$Rn, $offset]", 3845 (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; 3846def : InstAlias<"strh $Rt, [$Rn, $offset]", 3847 (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; 3848 3849//--- 3850// (unscaled immediate, unprivileged) 3851defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; 3852defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; 3853 3854defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; 3855defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; 3856 3857//--- 3858// (immediate pre-indexed) 3859def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>; 3860def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>; 3861def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>; 3862def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>; 3863def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>; 3864def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>; 3865def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>; 3866 3867def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>; 3868def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>; 3869 3870// truncstore i64 3871def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3872 (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3873 simm9:$off)>; 3874def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3875 (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3876 simm9:$off)>; 3877def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3878 (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3879 simm9:$off)>; 3880 3881def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3882 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3883def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3884 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3885def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3886 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3887def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3888 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3889def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3890 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3891def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3892 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3893def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3894 (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3895 3896def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3897 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3898def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3899 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3900def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3901 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3902def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3903 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3904def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3905 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3906def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3907 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3908def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3909 (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3910 3911//--- 3912// (immediate post-indexed) 3913def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>; 3914def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>; 3915def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>; 3916def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>; 3917def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>; 3918def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>; 3919def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>; 3920 3921def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>; 3922def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>; 3923 3924// truncstore i64 3925def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3926 (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3927 simm9:$off)>; 3928def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3929 (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3930 simm9:$off)>; 3931def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), 3932 (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, 3933 simm9:$off)>; 3934 3935def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off), 3936 (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>; 3937 3938def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3939 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3940def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3941 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3942def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3943 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3944def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3945 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3946def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3947 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3948def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3949 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3950def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3951 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3952def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), 3953 (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; 3954 3955def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3956 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3957def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3958 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3959def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3960 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3961def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3962 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3963def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3964 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3965def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3966 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3967def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3968 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3969def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), 3970 (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; 3971 3972//===----------------------------------------------------------------------===// 3973// Load/store exclusive instructions. 3974//===----------------------------------------------------------------------===// 3975 3976def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; 3977def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; 3978def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; 3979def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; 3980 3981def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; 3982def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; 3983def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; 3984def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; 3985 3986def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; 3987def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; 3988def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; 3989def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; 3990 3991def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; 3992def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; 3993def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; 3994def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; 3995 3996/* 3997Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn 3998of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an 3999alias for the case of immediate #0. This is because new STLR versions (from 4000LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not 4001appropriate anymore (it parses and discards the optional zero). This is not the 4002case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed, 4003and the immediate values are not inside the [] brackets and thus not accepted 4004by GPR64sp0 parser. 4005*/ 4006def STLRW0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW GPR32: $Rt, GPR64sp:$Rn)>; 4007def STLRX0 : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX GPR64: $Rt, GPR64sp:$Rn)>; 4008def STLRB0 : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB GPR32: $Rt, GPR64sp:$Rn)>; 4009def STLRH0 : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH GPR32: $Rt, GPR64sp:$Rn)>; 4010 4011def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; 4012def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; 4013def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; 4014def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; 4015 4016def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; 4017def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; 4018def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; 4019def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; 4020 4021def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; 4022def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; 4023 4024def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; 4025def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; 4026 4027def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; 4028def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; 4029 4030def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; 4031def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; 4032 4033let Predicates = [HasLOR] in { 4034 // v8.1a "Limited Order Region" extension load-acquire instructions 4035 def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">; 4036 def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">; 4037 def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">; 4038 def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">; 4039 4040 // v8.1a "Limited Order Region" extension store-release instructions 4041 def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">; 4042 def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">; 4043 def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">; 4044 def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">; 4045 4046 // Aliases for when offset=0 4047 def STLLRW0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRW GPR32: $Rt, GPR64sp:$Rn)>; 4048 def STLLRX0 : InstAlias<"stllr\t$Rt, [$Rn, #0]", (STLLRX GPR64: $Rt, GPR64sp:$Rn)>; 4049 def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]", (STLLRB GPR32: $Rt, GPR64sp:$Rn)>; 4050 def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]", (STLLRH GPR32: $Rt, GPR64sp:$Rn)>; 4051} 4052 4053//===----------------------------------------------------------------------===// 4054// Scaled floating point to integer conversion instructions. 4055//===----------------------------------------------------------------------===// 4056 4057defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; 4058defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; 4059defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; 4060defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; 4061defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; 4062defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; 4063defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; 4064defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; 4065defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4066defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4067defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; 4068defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; 4069 4070// AArch64's FCVT instructions saturate when out of range. 4071multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> { 4072 let Predicates = [HasFullFP16] in { 4073 def : Pat<(i32 (to_int_sat f16:$Rn, i32)), 4074 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4075 def : Pat<(i64 (to_int_sat f16:$Rn, i64)), 4076 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4077 } 4078 def : Pat<(i32 (to_int_sat f32:$Rn, i32)), 4079 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4080 def : Pat<(i64 (to_int_sat f32:$Rn, i64)), 4081 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4082 def : Pat<(i32 (to_int_sat f64:$Rn, i32)), 4083 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4084 def : Pat<(i64 (to_int_sat f64:$Rn, i64)), 4085 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4086 4087 let Predicates = [HasFullFP16] in { 4088 def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), 4089 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4090 def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), 4091 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4092 } 4093 def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), 4094 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4095 def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), 4096 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4097 def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), 4098 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4099 def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), 4100 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4101} 4102 4103defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">; 4104defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">; 4105 4106multiclass FPToIntegerIntPats<Intrinsic round, string INST> { 4107 let Predicates = [HasFullFP16] in { 4108 def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>; 4109 def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>; 4110 } 4111 def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>; 4112 def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>; 4113 def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; 4114 def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; 4115 4116 let Predicates = [HasFullFP16] in { 4117 def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), 4118 (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; 4119 def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), 4120 (!cast<Instruction>(INST # SXHri) $Rn, $scale)>; 4121 } 4122 def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), 4123 (!cast<Instruction>(INST # SWSri) $Rn, $scale)>; 4124 def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), 4125 (!cast<Instruction>(INST # SXSri) $Rn, $scale)>; 4126 def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), 4127 (!cast<Instruction>(INST # SWDri) $Rn, $scale)>; 4128 def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), 4129 (!cast<Instruction>(INST # SXDri) $Rn, $scale)>; 4130} 4131 4132defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">; 4133defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">; 4134 4135multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> { 4136 def : Pat<(i32 (to_int (round f32:$Rn))), 4137 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4138 def : Pat<(i64 (to_int (round f32:$Rn))), 4139 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4140 def : Pat<(i32 (to_int (round f64:$Rn))), 4141 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4142 def : Pat<(i64 (to_int (round f64:$Rn))), 4143 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4144 4145 // These instructions saturate like fp_to_[su]int_sat. 4146 let Predicates = [HasFullFP16] in { 4147 def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), 4148 (!cast<Instruction>(INST # UWHr) f16:$Rn)>; 4149 def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), 4150 (!cast<Instruction>(INST # UXHr) f16:$Rn)>; 4151 } 4152 def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), 4153 (!cast<Instruction>(INST # UWSr) f32:$Rn)>; 4154 def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), 4155 (!cast<Instruction>(INST # UXSr) f32:$Rn)>; 4156 def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)), 4157 (!cast<Instruction>(INST # UWDr) f64:$Rn)>; 4158 def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), 4159 (!cast<Instruction>(INST # UXDr) f64:$Rn)>; 4160} 4161 4162defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">; 4163defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">; 4164defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">; 4165defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">; 4166defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">; 4167defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">; 4168defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">; 4169defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">; 4170 4171 4172 4173let Predicates = [HasFullFP16] in { 4174 def : Pat<(i32 (any_lround f16:$Rn)), 4175 (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>; 4176 def : Pat<(i64 (any_lround f16:$Rn)), 4177 (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>; 4178 def : Pat<(i64 (any_llround f16:$Rn)), 4179 (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>; 4180} 4181def : Pat<(i32 (any_lround f32:$Rn)), 4182 (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>; 4183def : Pat<(i32 (any_lround f64:$Rn)), 4184 (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>; 4185def : Pat<(i64 (any_lround f32:$Rn)), 4186 (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>; 4187def : Pat<(i64 (any_lround f64:$Rn)), 4188 (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>; 4189def : Pat<(i64 (any_llround f32:$Rn)), 4190 (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>; 4191def : Pat<(i64 (any_llround f64:$Rn)), 4192 (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>; 4193 4194//===----------------------------------------------------------------------===// 4195// Scaled integer to floating point conversion instructions. 4196//===----------------------------------------------------------------------===// 4197 4198defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; 4199defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; 4200 4201//===----------------------------------------------------------------------===// 4202// Unscaled integer to floating point conversion instruction. 4203//===----------------------------------------------------------------------===// 4204 4205defm FMOV : UnscaledConversion<"fmov">; 4206 4207// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable 4208let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { 4209def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, 4210 Sched<[WriteF]>, Requires<[HasFullFP16]>; 4211def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, 4212 Sched<[WriteF]>; 4213def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, 4214 Sched<[WriteF]>; 4215} 4216// Similarly add aliases 4217def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>, 4218 Requires<[HasFullFP16]>; 4219def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>; 4220def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>; 4221 4222// Pattern for FP16 immediates 4223let Predicates = [HasFullFP16] in { 4224 def : Pat<(f16 fpimm:$in), 4225 (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>; 4226} 4227 4228//===----------------------------------------------------------------------===// 4229// Floating point conversion instruction. 4230//===----------------------------------------------------------------------===// 4231 4232defm FCVT : FPConversion<"fcvt">; 4233 4234//===----------------------------------------------------------------------===// 4235// Floating point single operand instructions. 4236//===----------------------------------------------------------------------===// 4237 4238defm FABS : SingleOperandFPDataNoException<0b0001, "fabs", fabs>; 4239defm FMOV : SingleOperandFPDataNoException<0b0000, "fmov">; 4240defm FNEG : SingleOperandFPDataNoException<0b0010, "fneg", fneg>; 4241defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>; 4242defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>; 4243defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>; 4244defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>; 4245defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>; 4246 4247defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>; 4248defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>; 4249 4250let SchedRW = [WriteFDiv] in { 4251defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>; 4252} 4253 4254let Predicates = [HasFRInt3264] in { 4255 defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>; 4256 defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>; 4257 defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>; 4258 defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>; 4259} // HasFRInt3264 4260 4261// Emitting strict_lrint as two instructions is valid as any exceptions that 4262// occur will happen in exactly one of the instructions (e.g. if the input is 4263// not an integer the inexact exception will happen in the FRINTX but not then 4264// in the FCVTZS as the output of FRINTX is an integer). 4265let Predicates = [HasFullFP16] in { 4266 def : Pat<(i32 (any_lrint f16:$Rn)), 4267 (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4268 def : Pat<(i64 (any_lrint f16:$Rn)), 4269 (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4270 def : Pat<(i64 (any_llrint f16:$Rn)), 4271 (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>; 4272} 4273def : Pat<(i32 (any_lrint f32:$Rn)), 4274 (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4275def : Pat<(i32 (any_lrint f64:$Rn)), 4276 (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4277def : Pat<(i64 (any_lrint f32:$Rn)), 4278 (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4279def : Pat<(i64 (any_lrint f64:$Rn)), 4280 (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4281def : Pat<(i64 (any_llrint f32:$Rn)), 4282 (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>; 4283def : Pat<(i64 (any_llrint f64:$Rn)), 4284 (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>; 4285 4286//===----------------------------------------------------------------------===// 4287// Floating point two operand instructions. 4288//===----------------------------------------------------------------------===// 4289 4290defm FADD : TwoOperandFPData<0b0010, "fadd", any_fadd>; 4291let SchedRW = [WriteFDiv] in { 4292defm FDIV : TwoOperandFPData<0b0001, "fdiv", any_fdiv>; 4293} 4294defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>; 4295defm FMAX : TwoOperandFPData<0b0100, "fmax", any_fmaximum>; 4296defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>; 4297defm FMIN : TwoOperandFPData<0b0101, "fmin", any_fminimum>; 4298let SchedRW = [WriteFMul] in { 4299defm FMUL : TwoOperandFPData<0b0000, "fmul", any_fmul>; 4300defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>; 4301} 4302defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>; 4303 4304// Match reassociated forms of FNMUL. 4305def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)), 4306 (FNMULHrr FPR16:$a, FPR16:$b)>, 4307 Requires<[HasFullFP16]>; 4308def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)), 4309 (FNMULSrr FPR32:$a, FPR32:$b)>; 4310def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)), 4311 (FNMULDrr FPR64:$a, FPR64:$b)>; 4312 4313def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4314 (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; 4315def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4316 (FMINDrr FPR64:$Rn, FPR64:$Rm)>; 4317def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4318 (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; 4319def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 4320 (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; 4321 4322//===----------------------------------------------------------------------===// 4323// Floating point three operand instructions. 4324//===----------------------------------------------------------------------===// 4325 4326defm FMADD : ThreeOperandFPData<0, 0, "fmadd", any_fma>; 4327defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", 4328 TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; 4329defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", 4330 TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >; 4331defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", 4332 TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; 4333 4334// The following def pats catch the case where the LHS of an FMA is negated. 4335// The TriOpFrag above catches the case where the middle operand is negated. 4336 4337// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike 4338// the NEON variant. 4339 4340// Here we handle first -(a + b*c) for FNMADD: 4341 4342let Predicates = [HasNEON, HasFullFP16] in 4343def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), 4344 (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4345 4346def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), 4347 (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4348 4349def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), 4350 (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4351 4352// Now it's time for "(-a) + (-b)*c" 4353 4354let Predicates = [HasNEON, HasFullFP16] in 4355def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), 4356 (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; 4357 4358def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), 4359 (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; 4360 4361def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), 4362 (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; 4363 4364//===----------------------------------------------------------------------===// 4365// Floating point comparison instructions. 4366//===----------------------------------------------------------------------===// 4367 4368defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>; 4369defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>; 4370 4371//===----------------------------------------------------------------------===// 4372// Floating point conditional comparison instructions. 4373//===----------------------------------------------------------------------===// 4374 4375defm FCCMPE : FPCondComparison<1, "fccmpe">; 4376defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>; 4377 4378//===----------------------------------------------------------------------===// 4379// Floating point conditional select instruction. 4380//===----------------------------------------------------------------------===// 4381 4382defm FCSEL : FPCondSelect<"fcsel">; 4383 4384let Predicates = [HasFullFP16] in 4385def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)), 4386 (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>; 4387 4388// CSEL instructions providing f128 types need to be handled by a 4389// pseudo-instruction since the eventual code will need to introduce basic 4390// blocks and control flow. 4391def F128CSEL : Pseudo<(outs FPR128:$Rd), 4392 (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), 4393 [(set (f128 FPR128:$Rd), 4394 (AArch64csel FPR128:$Rn, FPR128:$Rm, 4395 (i32 imm:$cond), NZCV))]> { 4396 let Uses = [NZCV]; 4397 let usesCustomInserter = 1; 4398 let hasNoSchedulingInfo = 1; 4399} 4400 4401//===----------------------------------------------------------------------===// 4402// Instructions used for emitting unwind opcodes on ARM64 Windows. 4403//===----------------------------------------------------------------------===// 4404let isPseudo = 1 in { 4405 def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>; 4406 def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4407 def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4408 def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4409 def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4410 def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4411 def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4412 def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4413 def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>; 4414 def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4415 def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>; 4416 def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>; 4417 def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>; 4418 def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>; 4419 def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4420 def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>; 4421 def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>; 4422 def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>; 4423} 4424 4425// Pseudo instructions for Windows EH 4426//===----------------------------------------------------------------------===// 4427let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 4428 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { 4429 def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; 4430 let usesCustomInserter = 1 in 4431 def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, 4432 Sched<[]>; 4433} 4434 4435// Pseudo instructions for homogeneous prolog/epilog 4436let isPseudo = 1 in { 4437 // Save CSRs in order, {FPOffset} 4438 def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4439 // Restore CSRs in order 4440 def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>; 4441} 4442 4443//===----------------------------------------------------------------------===// 4444// Floating point immediate move. 4445//===----------------------------------------------------------------------===// 4446 4447let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 4448defm FMOV : FPMoveImmediate<"fmov">; 4449} 4450 4451//===----------------------------------------------------------------------===// 4452// Advanced SIMD two vector instructions. 4453//===----------------------------------------------------------------------===// 4454 4455defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", 4456 AArch64uabd>; 4457// Match UABDL in log2-shuffle patterns. 4458def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)), 4459 (zext (v8i8 V64:$opB))))), 4460 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4461def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4462 (v8i16 (add (sub (zext (v8i8 V64:$opA)), 4463 (zext (v8i8 V64:$opB))), 4464 (AArch64vashr v8i16:$src, (i32 15))))), 4465 (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>; 4466def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4467 (zext (extract_high_v16i8 (v16i8 V128:$opB)))))), 4468 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4469def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))), 4470 (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))), 4471 (zext (extract_high_v16i8 (v16i8 V128:$opB)))), 4472 (AArch64vashr v8i16:$src, (i32 15))))), 4473 (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>; 4474def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)), 4475 (zext (v4i16 V64:$opB))))), 4476 (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>; 4477def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))), 4478 (zext (extract_high_v8i16 (v8i16 V128:$opB)))))), 4479 (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>; 4480def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)), 4481 (zext (v2i32 V64:$opB))))), 4482 (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>; 4483def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))), 4484 (zext (extract_high_v4i32 (v4i32 V128:$opB)))))), 4485 (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>; 4486 4487defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>; 4488defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; 4489defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; 4490defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; 4491defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; 4492defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; 4493defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; 4494defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; 4495defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; 4496defm FABS : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>; 4497 4498def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))), 4499 (CMLTv8i8rz V64:$Rn)>; 4500def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))), 4501 (CMLTv4i16rz V64:$Rn)>; 4502def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))), 4503 (CMLTv2i32rz V64:$Rn)>; 4504def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))), 4505 (CMLTv16i8rz V128:$Rn)>; 4506def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))), 4507 (CMLTv8i16rz V128:$Rn)>; 4508def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))), 4509 (CMLTv4i32rz V128:$Rn)>; 4510def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))), 4511 (CMLTv2i64rz V128:$Rn)>; 4512 4513defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 4514defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 4515defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 4516defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 4517defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 4518defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; 4519defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; 4520defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; 4521def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), 4522 (FCVTLv4i16 V64:$Rn)>; 4523def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), 4524 (i64 4)))), 4525 (FCVTLv8i16 V128:$Rn)>; 4526def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; 4527 4528def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>; 4529 4530defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; 4531defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; 4532defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; 4533defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; 4534defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; 4535def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), 4536 (FCVTNv4i16 V128:$Rn)>; 4537def : Pat<(concat_vectors V64:$Rd, 4538 (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), 4539 (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4540def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; 4541def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>; 4542def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))), 4543 (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; 4544defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; 4545defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; 4546defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", 4547 int_aarch64_neon_fcvtxn>; 4548defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; 4549defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; 4550 4551// AArch64's FCVT instructions saturate when out of range. 4552multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> { 4553 let Predicates = [HasFullFP16] in { 4554 def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)), 4555 (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>; 4556 def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)), 4557 (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>; 4558 } 4559 def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)), 4560 (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>; 4561 def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)), 4562 (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>; 4563 def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)), 4564 (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>; 4565} 4566defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">; 4567defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">; 4568 4569def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>; 4570def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>; 4571def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>; 4572def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>; 4573def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>; 4574 4575def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>; 4576def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>; 4577def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>; 4578def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>; 4579def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>; 4580 4581defm FNEG : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>; 4582defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; 4583defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>; 4584defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>; 4585defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>; 4586defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>; 4587defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>; 4588defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>; 4589defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>; 4590 4591let Predicates = [HasFRInt3264] in { 4592 defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>; 4593 defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>; 4594 defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>; 4595 defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>; 4596} // HasFRInt3264 4597 4598defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; 4599defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>; 4600defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", 4601 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 4602defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; 4603// Aliases for MVN -> NOT. 4604def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", 4605 (NOTv8i8 V64:$Vd, V64:$Vn)>; 4606def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", 4607 (NOTv16i8 V128:$Vd, V128:$Vn)>; 4608 4609def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4610def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4611def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4612def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4613def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; 4614def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; 4615 4616defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>; 4617defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; 4618defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; 4619defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; 4620defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", 4621 BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >; 4622defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>; 4623defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>; 4624defm SHLL : SIMDVectorLShiftLongBySizeBHS; 4625defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 4626defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 4627defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; 4628defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; 4629defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; 4630defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", 4631 BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >; 4632defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>; 4633defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>; 4634defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; 4635defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; 4636defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; 4637defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; 4638defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; 4639 4640def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4641def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4642def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>; 4643def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>; 4644def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4645def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4646def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>; 4647def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>; 4648def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; 4649def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; 4650 4651// Patterns for vector long shift (by element width). These need to match all 4652// three of zext, sext and anyext so it's easier to pull the patterns out of the 4653// definition. 4654multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> { 4655 def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), 4656 (SHLLv8i8 V64:$Rn)>; 4657 def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)), 4658 (SHLLv16i8 V128:$Rn)>; 4659 def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), 4660 (SHLLv4i16 V64:$Rn)>; 4661 def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)), 4662 (SHLLv8i16 V128:$Rn)>; 4663 def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), 4664 (SHLLv2i32 V64:$Rn)>; 4665 def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)), 4666 (SHLLv4i32 V128:$Rn)>; 4667} 4668 4669defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>; 4670defm : SIMDVectorLShiftLongBySizeBHSPats<zext>; 4671defm : SIMDVectorLShiftLongBySizeBHSPats<sext>; 4672 4673// Constant vector values, used in the S/UQXTN patterns below. 4674def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>; 4675def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>; 4676def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>; 4677def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>; 4678def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>; 4679def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>; 4680 4681// trunc(umin(X, 255)) -> UQXTRN v8i8 4682def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))), 4683 (UQXTNv8i8 V128:$Vn)>; 4684// trunc(umin(X, 65535)) -> UQXTRN v4i16 4685def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))), 4686 (UQXTNv4i16 V128:$Vn)>; 4687// trunc(smin(smax(X, -128), 128)) -> SQXTRN 4688// with reversed min/max 4689def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4690 (v8i16 VImm7F)))), 4691 (SQXTNv8i8 V128:$Vn)>; 4692def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4693 (v8i16 VImm80)))), 4694 (SQXTNv8i8 V128:$Vn)>; 4695// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN 4696// with reversed min/max 4697def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4698 (v4i32 VImm7FFF)))), 4699 (SQXTNv4i16 V128:$Vn)>; 4700def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4701 (v4i32 VImm8000)))), 4702 (SQXTNv4i16 V128:$Vn)>; 4703 4704// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) 4705// with reversed min/max 4706def : Pat<(v16i8 (concat_vectors 4707 (v8i8 V64:$Vd), 4708 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), 4709 (v8i16 VImm7F)))))), 4710 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4711def : Pat<(v16i8 (concat_vectors 4712 (v8i8 V64:$Vd), 4713 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), 4714 (v8i16 VImm80)))))), 4715 (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4716 4717// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) 4718// with reversed min/max 4719def : Pat<(v8i16 (concat_vectors 4720 (v4i16 V64:$Vd), 4721 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), 4722 (v4i32 VImm7FFF)))))), 4723 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4724def : Pat<(v8i16 (concat_vectors 4725 (v4i16 V64:$Vd), 4726 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), 4727 (v4i32 VImm8000)))))), 4728 (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; 4729 4730//===----------------------------------------------------------------------===// 4731// Advanced SIMD three vector instructions. 4732//===----------------------------------------------------------------------===// 4733 4734defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; 4735defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>; 4736defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; 4737defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; 4738defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; 4739defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; 4740defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; 4741defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; 4742foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in { 4743def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>; 4744} 4745defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; 4746let Predicates = [HasNEON] in { 4747foreach VT = [ v2f32, v4f32, v2f64 ] in 4748def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4749} 4750let Predicates = [HasNEON, HasFullFP16] in { 4751foreach VT = [ v4f16, v8f16 ] in 4752def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>; 4753} 4754defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; 4755defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; 4756defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>; 4757defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>; 4758defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 4759defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 4760defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 4761defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>; 4762defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; 4763defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>; 4764defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; 4765defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>; 4766defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; 4767defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>; 4768defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; 4769defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>; 4770 4771// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the 4772// instruction expects the addend first, while the fma intrinsic puts it last. 4773defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", 4774 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 4775defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", 4776 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 4777 4778defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; 4779defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>; 4780defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; 4781defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; 4782defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>; 4783 4784// MLA and MLS are generated in MachineCombine 4785defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>; 4786defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>; 4787 4788defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; 4789defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; 4790defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", 4791 TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >; 4792defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>; 4793defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>; 4794defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; 4795defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; 4796defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>; 4797defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; 4798defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>; 4799defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; 4800defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; 4801defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; 4802defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; 4803defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; 4804defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; 4805defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>; 4806defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; 4807defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; 4808defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; 4809defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", 4810 TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >; 4811defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>; 4812defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>; 4813defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; 4814defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; 4815defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>; 4816defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; 4817defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>; 4818defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; 4819defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; 4820defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; 4821defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; 4822defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>; 4823defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; 4824defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; 4825defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", 4826 int_aarch64_neon_sqrdmlah>; 4827defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", 4828 int_aarch64_neon_sqrdmlsh>; 4829 4830// Extra saturate patterns, other than the intrinsics matches above 4831defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>; 4832defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>; 4833defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>; 4834defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>; 4835 4836defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; 4837defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", 4838 BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; 4839defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; 4840defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", 4841 BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; 4842defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; 4843 4844// Pseudo bitwise select pattern BSP. 4845// It is expanded into BSL/BIT/BIF after register allocation. 4846defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS), 4847 (and (vnot node:$LHS), node:$RHS))>>; 4848defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">; 4849defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; 4850defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">; 4851 4852def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), 4853 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4854def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), 4855 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4856def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), 4857 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4858def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), 4859 (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; 4860 4861def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), 4862 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4863def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), 4864 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4865def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), 4866 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4867def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), 4868 (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; 4869 4870def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", 4871 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; 4872def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", 4873 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4874def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", 4875 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4876def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", 4877 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; 4878 4879def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", 4880 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; 4881def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", 4882 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4883def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", 4884 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4885def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", 4886 (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; 4887 4888def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # 4889 "|cmls.8b\t$dst, $src1, $src2}", 4890 (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4891def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # 4892 "|cmls.16b\t$dst, $src1, $src2}", 4893 (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4894def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # 4895 "|cmls.4h\t$dst, $src1, $src2}", 4896 (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4897def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # 4898 "|cmls.8h\t$dst, $src1, $src2}", 4899 (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4900def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # 4901 "|cmls.2s\t$dst, $src1, $src2}", 4902 (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4903def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # 4904 "|cmls.4s\t$dst, $src1, $src2}", 4905 (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4906def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # 4907 "|cmls.2d\t$dst, $src1, $src2}", 4908 (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4909 4910def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # 4911 "|cmlo.8b\t$dst, $src1, $src2}", 4912 (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4913def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # 4914 "|cmlo.16b\t$dst, $src1, $src2}", 4915 (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4916def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # 4917 "|cmlo.4h\t$dst, $src1, $src2}", 4918 (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4919def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # 4920 "|cmlo.8h\t$dst, $src1, $src2}", 4921 (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4922def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # 4923 "|cmlo.2s\t$dst, $src1, $src2}", 4924 (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4925def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # 4926 "|cmlo.4s\t$dst, $src1, $src2}", 4927 (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4928def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # 4929 "|cmlo.2d\t$dst, $src1, $src2}", 4930 (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4931 4932def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # 4933 "|cmle.8b\t$dst, $src1, $src2}", 4934 (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4935def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # 4936 "|cmle.16b\t$dst, $src1, $src2}", 4937 (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4938def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # 4939 "|cmle.4h\t$dst, $src1, $src2}", 4940 (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4941def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # 4942 "|cmle.8h\t$dst, $src1, $src2}", 4943 (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4944def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # 4945 "|cmle.2s\t$dst, $src1, $src2}", 4946 (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4947def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # 4948 "|cmle.4s\t$dst, $src1, $src2}", 4949 (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4950def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # 4951 "|cmle.2d\t$dst, $src1, $src2}", 4952 (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4953 4954def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # 4955 "|cmlt.8b\t$dst, $src1, $src2}", 4956 (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; 4957def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # 4958 "|cmlt.16b\t$dst, $src1, $src2}", 4959 (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; 4960def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # 4961 "|cmlt.4h\t$dst, $src1, $src2}", 4962 (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; 4963def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # 4964 "|cmlt.8h\t$dst, $src1, $src2}", 4965 (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; 4966def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # 4967 "|cmlt.2s\t$dst, $src1, $src2}", 4968 (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; 4969def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # 4970 "|cmlt.4s\t$dst, $src1, $src2}", 4971 (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; 4972def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # 4973 "|cmlt.2d\t$dst, $src1, $src2}", 4974 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; 4975 4976let Predicates = [HasNEON, HasFullFP16] in { 4977def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # 4978 "|fcmle.4h\t$dst, $src1, $src2}", 4979 (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 4980def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # 4981 "|fcmle.8h\t$dst, $src1, $src2}", 4982 (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 4983} 4984def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # 4985 "|fcmle.2s\t$dst, $src1, $src2}", 4986 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 4987def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # 4988 "|fcmle.4s\t$dst, $src1, $src2}", 4989 (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 4990def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # 4991 "|fcmle.2d\t$dst, $src1, $src2}", 4992 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 4993 4994let Predicates = [HasNEON, HasFullFP16] in { 4995def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # 4996 "|fcmlt.4h\t$dst, $src1, $src2}", 4997 (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 4998def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # 4999 "|fcmlt.8h\t$dst, $src1, $src2}", 5000 (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5001} 5002def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # 5003 "|fcmlt.2s\t$dst, $src1, $src2}", 5004 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5005def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # 5006 "|fcmlt.4s\t$dst, $src1, $src2}", 5007 (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5008def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # 5009 "|fcmlt.2d\t$dst, $src1, $src2}", 5010 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5011 5012let Predicates = [HasNEON, HasFullFP16] in { 5013def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # 5014 "|facle.4h\t$dst, $src1, $src2}", 5015 (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5016def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # 5017 "|facle.8h\t$dst, $src1, $src2}", 5018 (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5019} 5020def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # 5021 "|facle.2s\t$dst, $src1, $src2}", 5022 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5023def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # 5024 "|facle.4s\t$dst, $src1, $src2}", 5025 (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5026def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # 5027 "|facle.2d\t$dst, $src1, $src2}", 5028 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5029 5030let Predicates = [HasNEON, HasFullFP16] in { 5031def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # 5032 "|faclt.4h\t$dst, $src1, $src2}", 5033 (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; 5034def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # 5035 "|faclt.8h\t$dst, $src1, $src2}", 5036 (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; 5037} 5038def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # 5039 "|faclt.2s\t$dst, $src1, $src2}", 5040 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; 5041def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # 5042 "|faclt.4s\t$dst, $src1, $src2}", 5043 (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; 5044def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # 5045 "|faclt.2d\t$dst, $src1, $src2}", 5046 (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; 5047 5048//===----------------------------------------------------------------------===// 5049// Advanced SIMD three scalar instructions. 5050//===----------------------------------------------------------------------===// 5051 5052defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; 5053defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; 5054defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; 5055defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; 5056defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; 5057defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; 5058defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; 5059defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; 5060def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), 5061 (FABD64 FPR64:$Rn, FPR64:$Rm)>; 5062let Predicates = [HasNEON, HasFullFP16] in { 5063def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>; 5064} 5065let Predicates = [HasNEON] in { 5066def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>; 5067def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>; 5068} 5069defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", 5070 int_aarch64_neon_facge>; 5071defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", 5072 int_aarch64_neon_facgt>; 5073defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; 5074defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; 5075defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; 5076defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>; 5077defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>; 5078defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>; 5079defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; 5080defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; 5081defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 5082defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; 5083defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; 5084defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; 5085defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; 5086defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; 5087defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; 5088defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; 5089defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; 5090defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; 5091defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; 5092defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; 5093defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; 5094let Predicates = [HasRDM] in { 5095 defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">; 5096 defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">; 5097 def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5098 (i32 FPR32:$Rm))), 5099 (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5100 def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn), 5101 (i32 FPR32:$Rm))), 5102 (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5103} 5104 5105def : InstAlias<"cmls $dst, $src1, $src2", 5106 (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5107def : InstAlias<"cmle $dst, $src1, $src2", 5108 (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5109def : InstAlias<"cmlo $dst, $src1, $src2", 5110 (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5111def : InstAlias<"cmlt $dst, $src1, $src2", 5112 (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5113def : InstAlias<"fcmle $dst, $src1, $src2", 5114 (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5115def : InstAlias<"fcmle $dst, $src1, $src2", 5116 (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5117def : InstAlias<"fcmlt $dst, $src1, $src2", 5118 (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5119def : InstAlias<"fcmlt $dst, $src1, $src2", 5120 (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5121def : InstAlias<"facle $dst, $src1, $src2", 5122 (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5123def : InstAlias<"facle $dst, $src1, $src2", 5124 (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5125def : InstAlias<"faclt $dst, $src1, $src2", 5126 (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; 5127def : InstAlias<"faclt $dst, $src1, $src2", 5128 (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; 5129 5130//===----------------------------------------------------------------------===// 5131// Advanced SIMD three scalar instructions (mixed operands). 5132//===----------------------------------------------------------------------===// 5133defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", 5134 int_aarch64_neon_sqdmulls_scalar>; 5135defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; 5136defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; 5137 5138def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), 5139 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5140 (i32 FPR32:$Rm))))), 5141 (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5142def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), 5143 (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 5144 (i32 FPR32:$Rm))))), 5145 (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; 5146 5147//===----------------------------------------------------------------------===// 5148// Advanced SIMD two scalar instructions. 5149//===----------------------------------------------------------------------===// 5150 5151defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs, [HasNoCSSC]>; 5152defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; 5153defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; 5154defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; 5155defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; 5156defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; 5157defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; 5158defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; 5159defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; 5160defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; 5161defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; 5162defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; 5163defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; 5164defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; 5165defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; 5166defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; 5167defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; 5168defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; 5169defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; 5170def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; 5171defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; 5172defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; 5173defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorSME>; 5174defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorSME>; 5175defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorSME>; 5176defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", 5177 UnOpFrag<(sub immAllZerosV, node:$LHS)> >; 5178defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; 5179defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; 5180defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; 5181defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; 5182defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; 5183defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", 5184 int_aarch64_neon_suqadd>; 5185defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; 5186defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; 5187defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", 5188 int_aarch64_neon_usqadd>; 5189 5190def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))), 5191 (CMLTv1i64rz V64:$Rn)>; 5192 5193def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), 5194 (FCVTASv1i64 FPR64:$Rn)>; 5195def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), 5196 (FCVTAUv1i64 FPR64:$Rn)>; 5197def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), 5198 (FCVTMSv1i64 FPR64:$Rn)>; 5199def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), 5200 (FCVTMUv1i64 FPR64:$Rn)>; 5201def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), 5202 (FCVTNSv1i64 FPR64:$Rn)>; 5203def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), 5204 (FCVTNUv1i64 FPR64:$Rn)>; 5205def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), 5206 (FCVTPSv1i64 FPR64:$Rn)>; 5207def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), 5208 (FCVTPUv1i64 FPR64:$Rn)>; 5209def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))), 5210 (FCVTZSv1i64 FPR64:$Rn)>; 5211def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))), 5212 (FCVTZUv1i64 FPR64:$Rn)>; 5213 5214def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))), 5215 (FRECPEv1f16 FPR16:$Rn)>; 5216def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), 5217 (FRECPEv1i32 FPR32:$Rn)>; 5218def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), 5219 (FRECPEv1i64 FPR64:$Rn)>; 5220def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), 5221 (FRECPEv1i64 FPR64:$Rn)>; 5222 5223def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))), 5224 (FRECPEv1i32 FPR32:$Rn)>; 5225def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))), 5226 (FRECPEv2f32 V64:$Rn)>; 5227def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))), 5228 (FRECPEv4f32 FPR128:$Rn)>; 5229def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))), 5230 (FRECPEv1i64 FPR64:$Rn)>; 5231def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))), 5232 (FRECPEv1i64 FPR64:$Rn)>; 5233def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))), 5234 (FRECPEv2f64 FPR128:$Rn)>; 5235 5236def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5237 (FRECPS32 FPR32:$Rn, FPR32:$Rm)>; 5238def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5239 (FRECPSv2f32 V64:$Rn, V64:$Rm)>; 5240def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5241 (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5242def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5243 (FRECPS64 FPR64:$Rn, FPR64:$Rm)>; 5244def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5245 (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5246 5247def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))), 5248 (FRECPXv1f16 FPR16:$Rn)>; 5249def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), 5250 (FRECPXv1i32 FPR32:$Rn)>; 5251def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), 5252 (FRECPXv1i64 FPR64:$Rn)>; 5253 5254def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))), 5255 (FRSQRTEv1f16 FPR16:$Rn)>; 5256def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), 5257 (FRSQRTEv1i32 FPR32:$Rn)>; 5258def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), 5259 (FRSQRTEv1i64 FPR64:$Rn)>; 5260def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), 5261 (FRSQRTEv1i64 FPR64:$Rn)>; 5262 5263def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))), 5264 (FRSQRTEv1i32 FPR32:$Rn)>; 5265def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))), 5266 (FRSQRTEv2f32 V64:$Rn)>; 5267def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))), 5268 (FRSQRTEv4f32 FPR128:$Rn)>; 5269def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))), 5270 (FRSQRTEv1i64 FPR64:$Rn)>; 5271def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))), 5272 (FRSQRTEv1i64 FPR64:$Rn)>; 5273def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))), 5274 (FRSQRTEv2f64 FPR128:$Rn)>; 5275 5276def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))), 5277 (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>; 5278def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))), 5279 (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>; 5280def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))), 5281 (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>; 5282def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))), 5283 (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>; 5284def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))), 5285 (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>; 5286 5287// Some float -> int -> float conversion patterns for which we want to keep the 5288// int values in FP registers using the corresponding NEON instructions to 5289// avoid more costly int <-> fp register transfers. 5290let Predicates = [HasNEON] in { 5291def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))), 5292 (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>; 5293def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))), 5294 (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>; 5295def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))), 5296 (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>; 5297def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))), 5298 (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>; 5299 5300let Predicates = [HasFullFP16] in { 5301def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))), 5302 (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>; 5303def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))), 5304 (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>; 5305} 5306// If an integer is about to be converted to a floating point value, 5307// just load it on the floating point unit. 5308// Here are the patterns for 8 and 16-bits to float. 5309// 8-bits -> float. 5310multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy, 5311 SDPatternOperator loadop, Instruction UCVTF, 5312 ROAddrMode ro, Instruction LDRW, Instruction LDRX, 5313 SubRegIndex sub> { 5314 def : Pat<(DstTy (uint_to_fp (SrcTy 5315 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, 5316 ro.Wext:$extend))))), 5317 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5318 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), 5319 sub))>; 5320 5321 def : Pat<(DstTy (uint_to_fp (SrcTy 5322 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, 5323 ro.Wext:$extend))))), 5324 (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), 5325 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), 5326 sub))>; 5327} 5328 5329defm : UIntToFPROLoadPat<f32, i32, zextloadi8, 5330 UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>; 5331def : Pat <(f32 (uint_to_fp (i32 5332 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5333 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5334 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5335def : Pat <(f32 (uint_to_fp (i32 5336 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5337 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5338 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5339// 16-bits -> float. 5340defm : UIntToFPROLoadPat<f32, i32, zextloadi16, 5341 UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>; 5342def : Pat <(f32 (uint_to_fp (i32 5343 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5344 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5345 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5346def : Pat <(f32 (uint_to_fp (i32 5347 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5348 (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), 5349 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5350// 32-bits are handled in target specific dag combine: 5351// performIntToFpCombine. 5352// 64-bits integer to 32-bits floating point, not possible with 5353// UCVTF on floating point registers (both source and destination 5354// must have the same size). 5355 5356// Here are the patterns for 8, 16, 32, and 64-bits to double. 5357// 8-bits -> double. 5358defm : UIntToFPROLoadPat<f64, i32, zextloadi8, 5359 UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>; 5360def : Pat <(f64 (uint_to_fp (i32 5361 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), 5362 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5363 (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; 5364def : Pat <(f64 (uint_to_fp (i32 5365 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), 5366 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5367 (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; 5368// 16-bits -> double. 5369defm : UIntToFPROLoadPat<f64, i32, zextloadi16, 5370 UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>; 5371def : Pat <(f64 (uint_to_fp (i32 5372 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), 5373 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5374 (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; 5375def : Pat <(f64 (uint_to_fp (i32 5376 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), 5377 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5378 (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; 5379// 32-bits -> double. 5380defm : UIntToFPROLoadPat<f64, i32, load, 5381 UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>; 5382def : Pat <(f64 (uint_to_fp (i32 5383 (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), 5384 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5385 (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; 5386def : Pat <(f64 (uint_to_fp (i32 5387 (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), 5388 (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 5389 (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; 5390// 64-bits -> double are handled in target specific dag combine: 5391// performIntToFpCombine. 5392} // let Predicates = [HasNEON] 5393 5394//===----------------------------------------------------------------------===// 5395// Advanced SIMD three different-sized vector instructions. 5396//===----------------------------------------------------------------------===// 5397 5398defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; 5399defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; 5400defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; 5401defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; 5402defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>; 5403defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", 5404 AArch64sabd>; 5405defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", 5406 AArch64sabd>; 5407defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", 5408 BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; 5409defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", 5410 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; 5411defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", 5412 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5413defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", 5414 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 5415defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>; 5416defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", 5417 int_aarch64_neon_sqadd>; 5418defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", 5419 int_aarch64_neon_sqsub>; 5420defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", 5421 int_aarch64_neon_sqdmull>; 5422defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", 5423 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; 5424defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", 5425 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; 5426defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", 5427 AArch64uabd>; 5428defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", 5429 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>; 5430defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", 5431 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>; 5432defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", 5433 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5434defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", 5435 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 5436defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>; 5437defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", 5438 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>; 5439defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", 5440 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>; 5441 5442// Additional patterns for [SU]ML[AS]L 5443multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode, 5444 Instruction INST8B, Instruction INST4H, Instruction INST2S> { 5445 def : Pat<(v4i16 (opnode 5446 V64:$Ra, 5447 (v4i16 (extract_subvector 5448 (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)), 5449 (i64 0))))), 5450 (EXTRACT_SUBREG (v8i16 (INST8B 5451 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub), 5452 V64:$Rn, V64:$Rm)), dsub)>; 5453 def : Pat<(v2i32 (opnode 5454 V64:$Ra, 5455 (v2i32 (extract_subvector 5456 (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)), 5457 (i64 0))))), 5458 (EXTRACT_SUBREG (v4i32 (INST4H 5459 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub), 5460 V64:$Rn, V64:$Rm)), dsub)>; 5461 def : Pat<(v1i64 (opnode 5462 V64:$Ra, 5463 (v1i64 (extract_subvector 5464 (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)), 5465 (i64 0))))), 5466 (EXTRACT_SUBREG (v2i64 (INST2S 5467 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub), 5468 V64:$Rn, V64:$Rm)), dsub)>; 5469} 5470 5471defm : Neon_mul_acc_widen_patterns<add, AArch64umull, 5472 UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>; 5473defm : Neon_mul_acc_widen_patterns<add, AArch64smull, 5474 SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>; 5475defm : Neon_mul_acc_widen_patterns<sub, AArch64umull, 5476 UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>; 5477defm : Neon_mul_acc_widen_patterns<sub, AArch64smull, 5478 SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>; 5479 5480// CodeGen patterns for addhn and subhn instructions, which can actually be 5481// written in LLVM IR without too much difficulty. 5482 5483// Prioritize ADDHN and SUBHN over UZP2. 5484let AddedComplexity = 10 in { 5485 5486// ADDHN 5487def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), 5488 (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5489def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5490 (i32 16))))), 5491 (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5492def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5493 (i32 32))))), 5494 (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5495def : Pat<(concat_vectors (v8i8 V64:$Rd), 5496 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5497 (i32 8))))), 5498 (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5499 V128:$Rn, V128:$Rm)>; 5500def : Pat<(concat_vectors (v4i16 V64:$Rd), 5501 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5502 (i32 16))))), 5503 (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5504 V128:$Rn, V128:$Rm)>; 5505def : Pat<(concat_vectors (v2i32 V64:$Rd), 5506 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), 5507 (i32 32))))), 5508 (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5509 V128:$Rn, V128:$Rm)>; 5510 5511// SUBHN 5512def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), 5513 (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; 5514def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5515 (i32 16))))), 5516 (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; 5517def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5518 (i32 32))))), 5519 (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; 5520def : Pat<(concat_vectors (v8i8 V64:$Rd), 5521 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5522 (i32 8))))), 5523 (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5524 V128:$Rn, V128:$Rm)>; 5525def : Pat<(concat_vectors (v4i16 V64:$Rd), 5526 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5527 (i32 16))))), 5528 (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5529 V128:$Rn, V128:$Rm)>; 5530def : Pat<(concat_vectors (v2i32 V64:$Rd), 5531 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), 5532 (i32 32))))), 5533 (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), 5534 V128:$Rn, V128:$Rm)>; 5535 5536} // AddedComplexity = 10 5537 5538//---------------------------------------------------------------------------- 5539// AdvSIMD bitwise extract from vector instruction. 5540//---------------------------------------------------------------------------- 5541 5542defm EXT : SIMDBitwiseExtract<"ext">; 5543 5544def AdjustExtImm : SDNodeXForm<imm, [{ 5545 return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32); 5546}]>; 5547multiclass ExtPat<ValueType VT64, ValueType VT128, int N> { 5548 def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), 5549 (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; 5550 def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), 5551 (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; 5552 // We use EXT to handle extract_subvector to copy the upper 64-bits of a 5553 // 128-bit vector. 5554 def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))), 5555 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; 5556 // A 64-bit EXT of two halves of the same 128-bit register can be done as a 5557 // single 128-bit EXT. 5558 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)), 5559 (extract_subvector V128:$Rn, (i64 N)), 5560 (i32 imm:$imm))), 5561 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>; 5562 // A 64-bit EXT of the high half of a 128-bit register can be done using a 5563 // 128-bit EXT of the whole register with an adjustment to the immediate. The 5564 // top half of the other operand will be unset, but that doesn't matter as it 5565 // will not be used. 5566 def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)), 5567 V64:$Rm, 5568 (i32 imm:$imm))), 5569 (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, 5570 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 5571 (AdjustExtImm imm:$imm)), dsub)>; 5572} 5573 5574defm : ExtPat<v8i8, v16i8, 8>; 5575defm : ExtPat<v4i16, v8i16, 4>; 5576defm : ExtPat<v4f16, v8f16, 4>; 5577defm : ExtPat<v4bf16, v8bf16, 4>; 5578defm : ExtPat<v2i32, v4i32, 2>; 5579defm : ExtPat<v2f32, v4f32, 2>; 5580defm : ExtPat<v1i64, v2i64, 1>; 5581defm : ExtPat<v1f64, v2f64, 1>; 5582 5583//---------------------------------------------------------------------------- 5584// AdvSIMD zip vector 5585//---------------------------------------------------------------------------- 5586 5587defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; 5588defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; 5589defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; 5590defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; 5591defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; 5592defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; 5593 5594def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), 5595 (v8i8 (trunc (v8i16 V128:$Vm))))), 5596 (UZP1v16i8 V128:$Vn, V128:$Vm)>; 5597def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), 5598 (v4i16 (trunc (v4i32 V128:$Vm))))), 5599 (UZP1v8i16 V128:$Vn, V128:$Vm)>; 5600def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), 5601 (v2i32 (trunc (v2i64 V128:$Vm))))), 5602 (UZP1v4i32 V128:$Vn, V128:$Vm)>; 5603 5604def : Pat<(v16i8 (concat_vectors 5605 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))), 5606 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))), 5607 (UZP2v16i8 V128:$Vn, V128:$Vm)>; 5608def : Pat<(v8i16 (concat_vectors 5609 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))), 5610 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))), 5611 (UZP2v8i16 V128:$Vn, V128:$Vm)>; 5612def : Pat<(v4i32 (concat_vectors 5613 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))), 5614 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))), 5615 (UZP2v4i32 V128:$Vn, V128:$Vm)>; 5616 5617//---------------------------------------------------------------------------- 5618// AdvSIMD TBL/TBX instructions 5619//---------------------------------------------------------------------------- 5620 5621defm TBL : SIMDTableLookup< 0, "tbl">; 5622defm TBX : SIMDTableLookupTied<1, "tbx">; 5623 5624def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5625 (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; 5626def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5627 (TBLv16i8One V128:$Ri, V128:$Rn)>; 5628 5629def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), 5630 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), 5631 (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; 5632def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), 5633 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), 5634 (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; 5635 5636 5637//---------------------------------------------------------------------------- 5638// AdvSIMD scalar DUP instruction 5639//---------------------------------------------------------------------------- 5640 5641defm DUP : SIMDScalarDUP<"mov">; 5642 5643//---------------------------------------------------------------------------- 5644// AdvSIMD scalar pairwise instructions 5645//---------------------------------------------------------------------------- 5646 5647defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; 5648defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; 5649defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; 5650defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; 5651defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; 5652defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; 5653 5654// Only the lower half of the result of the inner FADDP is used in the patterns 5655// below, so the second operand does not matter. Re-use the first input 5656// operand, so no additional dependencies need to be introduced. 5657let Predicates = [HasFullFP16] in { 5658def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))), 5659 (FADDPv2i16p 5660 (EXTRACT_SUBREG 5661 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn), 5662 dsub))>; 5663def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))), 5664 (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>; 5665} 5666def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))), 5667 (FADDPv2i32p 5668 (EXTRACT_SUBREG 5669 (FADDPv4f32 V128:$Rn, V128:$Rn), 5670 dsub))>; 5671def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))), 5672 (FADDPv2i32p V64:$Rn)>; 5673def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))), 5674 (FADDPv2i64p V128:$Rn)>; 5675 5676def : Pat<(v2i64 (AArch64saddv V128:$Rn)), 5677 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5678def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), 5679 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; 5680def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), 5681 (FADDPv2i32p V64:$Rn)>; 5682def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), 5683 (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; 5684def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), 5685 (FADDPv2i64p V128:$Rn)>; 5686def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), 5687 (FMAXNMPv2i32p V64:$Rn)>; 5688def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), 5689 (FMAXNMPv2i64p V128:$Rn)>; 5690def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), 5691 (FMAXPv2i32p V64:$Rn)>; 5692def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), 5693 (FMAXPv2i64p V128:$Rn)>; 5694def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), 5695 (FMINNMPv2i32p V64:$Rn)>; 5696def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), 5697 (FMINNMPv2i64p V128:$Rn)>; 5698def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), 5699 (FMINPv2i32p V64:$Rn)>; 5700def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), 5701 (FMINPv2i64p V128:$Rn)>; 5702 5703//---------------------------------------------------------------------------- 5704// AdvSIMD INS/DUP instructions 5705//---------------------------------------------------------------------------- 5706 5707def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>; 5708def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>; 5709def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>; 5710def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>; 5711def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>; 5712def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>; 5713def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>; 5714 5715def DUPv2i64lane : SIMDDup64FromElement; 5716def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; 5717def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; 5718def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; 5719def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; 5720def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; 5721def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; 5722 5723// DUP from a 64-bit register to a 64-bit register is just a copy 5724def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))), 5725 (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>; 5726def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))), 5727 (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>; 5728 5729def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), 5730 (v2f32 (DUPv2i32lane 5731 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5732 (i64 0)))>; 5733def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), 5734 (v4f32 (DUPv4i32lane 5735 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), 5736 (i64 0)))>; 5737def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), 5738 (v2f64 (DUPv2i64lane 5739 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), 5740 (i64 0)))>; 5741def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))), 5742 (v4f16 (DUPv4i16lane 5743 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5744 (i64 0)))>; 5745def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))), 5746 (v4bf16 (DUPv4i16lane 5747 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5748 (i64 0)))>; 5749def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))), 5750 (v8f16 (DUPv8i16lane 5751 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5752 (i64 0)))>; 5753def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))), 5754 (v8bf16 (DUPv8i16lane 5755 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub), 5756 (i64 0)))>; 5757 5758def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5759 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5760def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)), 5761 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5762 5763def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5764 (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>; 5765def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)), 5766 (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>; 5767 5768def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5769 (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; 5770def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), 5771 (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; 5772def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), 5773 (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; 5774 5775// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane 5776// instruction even if the types don't match: we just have to remap the lane 5777// carefully. N.b. this trick only applies to truncations. 5778def VecIndex_x2 : SDNodeXForm<imm, [{ 5779 return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64); 5780}]>; 5781def VecIndex_x4 : SDNodeXForm<imm, [{ 5782 return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64); 5783}]>; 5784def VecIndex_x8 : SDNodeXForm<imm, [{ 5785 return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64); 5786}]>; 5787 5788multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT, 5789 ValueType Src128VT, ValueType ScalVT, 5790 Instruction DUP, SDNodeXForm IdxXFORM> { 5791 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), 5792 imm:$idx)))), 5793 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 5794 5795 def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), 5796 imm:$idx)))), 5797 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 5798} 5799 5800defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>; 5801defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>; 5802defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>; 5803 5804defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>; 5805defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>; 5806defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>; 5807 5808multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP, 5809 SDNodeXForm IdxXFORM> { 5810 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn), 5811 imm:$idx))))), 5812 (DUP V128:$Rn, (IdxXFORM imm:$idx))>; 5813 5814 def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn), 5815 imm:$idx))))), 5816 (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; 5817} 5818 5819defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>; 5820defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>; 5821defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>; 5822 5823defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>; 5824defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>; 5825defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>; 5826 5827// SMOV and UMOV definitions, with some extra patterns for convenience 5828defm SMOV : SMov; 5829defm UMOV : UMov; 5830 5831def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 5832 (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; 5833def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), 5834 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 5835def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5836 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 5837def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5838 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 5839def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), 5840 (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; 5841def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), 5842 (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; 5843 5844def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 5845 VectorIndexB:$idx)))), i8), 5846 (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; 5847def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 5848 VectorIndexH:$idx)))), i16), 5849 (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; 5850 5851// Extracting i8 or i16 elements will have the zero-extend transformed to 5852// an 'and' mask by type legalization since neither i8 nor i16 are legal types 5853// for AArch64. Match these patterns here since UMOV already zeroes out the high 5854// bits of the destination register. 5855def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), 5856 (i32 0xff)), 5857 (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; 5858def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), 5859 (i32 0xffff)), 5860 (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; 5861 5862def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn), 5863 VectorIndexB:$idx)))), (i64 0xff))), 5864 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>; 5865def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn), 5866 VectorIndexH:$idx)))), (i64 0xffff))), 5867 (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>; 5868 5869defm INS : SIMDIns; 5870 5871def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), 5872 (SUBREG_TO_REG (i32 0), 5873 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5874def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), 5875 (SUBREG_TO_REG (i32 0), 5876 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5877 5878// The top bits will be zero from the FMOVWSr 5879def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))), 5880 (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>; 5881 5882def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), 5883 (SUBREG_TO_REG (i32 0), 5884 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5885def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), 5886 (SUBREG_TO_REG (i32 0), 5887 (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; 5888 5889def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 5890 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5891def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 5892 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5893 5894def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5895 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5896def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5897 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5898 5899def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), 5900 (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), 5901 (i32 FPR32:$Rn), ssub))>; 5902def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), 5903 (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 5904 (i32 FPR32:$Rn), ssub))>; 5905 5906def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), 5907 (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), 5908 (i64 FPR64:$Rn), dsub))>; 5909 5910def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), 5911 (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5912def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), 5913 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5914 5915def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5916 (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5917def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))), 5918 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; 5919 5920def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), 5921 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 5922def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), 5923 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; 5924 5925def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), 5926 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; 5927 5928def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn), 5929 (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 5930 (EXTRACT_SUBREG 5931 (INSvi16lane 5932 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5933 VectorIndexS:$imm, 5934 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5935 (i64 0)), 5936 dsub)>; 5937 5938def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0), 5939 (i64 VectorIndexH:$imm)), 5940 (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; 5941def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0), 5942 (i64 VectorIndexS:$imm)), 5943 (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; 5944def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), 5945 (i64 VectorIndexD:$imm)), 5946 (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; 5947 5948def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), 5949 (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 5950 (INSvi16lane 5951 V128:$Rn, VectorIndexH:$imm, 5952 (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5953 (i64 0))>; 5954 5955def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn), 5956 (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))), 5957 (EXTRACT_SUBREG 5958 (INSvi16lane 5959 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5960 VectorIndexS:$imm, 5961 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5962 (i64 0)), 5963 dsub)>; 5964 5965def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn), 5966 (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))), 5967 (INSvi16lane 5968 V128:$Rn, VectorIndexH:$imm, 5969 (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)), 5970 (i64 0))>; 5971 5972def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), 5973 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 5974 (EXTRACT_SUBREG 5975 (INSvi32lane 5976 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), 5977 VectorIndexS:$imm, 5978 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 5979 (i64 0)), 5980 dsub)>; 5981def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), 5982 (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), 5983 (INSvi32lane 5984 V128:$Rn, VectorIndexS:$imm, 5985 (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), 5986 (i64 0))>; 5987def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), 5988 (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), 5989 (INSvi64lane 5990 V128:$Rn, VectorIndexD:$imm, 5991 (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), 5992 (i64 0))>; 5993 5994// Copy an element at a constant index in one vector into a constant indexed 5995// element of another. 5996// FIXME refactor to a shared class/dev parameterized on vector type, vector 5997// index type and INS extension 5998def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane 5999 (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), 6000 VectorIndexB:$idx2)), 6001 (v16i8 (INSvi8lane 6002 V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) 6003 )>; 6004def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane 6005 (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), 6006 VectorIndexH:$idx2)), 6007 (v8i16 (INSvi16lane 6008 V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) 6009 )>; 6010def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane 6011 (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), 6012 VectorIndexS:$idx2)), 6013 (v4i32 (INSvi32lane 6014 V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) 6015 )>; 6016def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane 6017 (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), 6018 VectorIndexD:$idx2)), 6019 (v2i64 (INSvi64lane 6020 V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) 6021 )>; 6022 6023multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, 6024 ValueType VTScal, Instruction INS> { 6025 def : Pat<(VT128 (vector_insert V128:$src, 6026 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6027 imm:$Immd)), 6028 (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; 6029 6030 def : Pat<(VT128 (vector_insert V128:$src, 6031 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6032 imm:$Immd)), 6033 (INS V128:$src, imm:$Immd, 6034 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; 6035 6036 def : Pat<(VT64 (vector_insert V64:$src, 6037 (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), 6038 imm:$Immd)), 6039 (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), 6040 imm:$Immd, V128:$Rn, imm:$Immn), 6041 dsub)>; 6042 6043 def : Pat<(VT64 (vector_insert V64:$src, 6044 (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), 6045 imm:$Immd)), 6046 (EXTRACT_SUBREG 6047 (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, 6048 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), 6049 dsub)>; 6050} 6051 6052defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>; 6053defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>; 6054defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>; 6055defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>; 6056 6057// Insert from bitcast 6058// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) 6059def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), 6060 (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; 6061def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), 6062 (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; 6063 6064// bitcast of an extract 6065// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane)) 6066def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))), 6067 (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>; 6068def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))), 6069 (EXTRACT_SUBREG V128:$src, ssub)>; 6070def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))), 6071 (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>; 6072def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))), 6073 (EXTRACT_SUBREG V128:$src, dsub)>; 6074 6075// Floating point vector extractions are codegen'd as either a sequence of 6076// subregister extractions, or a MOV (aka DUP here) if 6077// the lane number is anything other than zero. 6078def : Pat<(vector_extract (v2f64 V128:$Rn), 0), 6079 (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; 6080def : Pat<(vector_extract (v4f32 V128:$Rn), 0), 6081 (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; 6082def : Pat<(vector_extract (v8f16 V128:$Rn), 0), 6083 (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6084def : Pat<(vector_extract (v8bf16 V128:$Rn), 0), 6085 (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>; 6086 6087 6088def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), 6089 (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>; 6090def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), 6091 (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>; 6092def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx), 6093 (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6094def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx), 6095 (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>; 6096 6097// All concat_vectors operations are canonicalised to act on i64 vectors for 6098// AArch64. In the general case we need an instruction, which had just as well be 6099// INS. 6100class ConcatPat<ValueType DstTy, ValueType SrcTy> 6101 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), 6102 (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, 6103 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; 6104 6105def : ConcatPat<v2i64, v1i64>; 6106def : ConcatPat<v2f64, v1f64>; 6107def : ConcatPat<v4i32, v2i32>; 6108def : ConcatPat<v4f32, v2f32>; 6109def : ConcatPat<v8i16, v4i16>; 6110def : ConcatPat<v8f16, v4f16>; 6111def : ConcatPat<v8bf16, v4bf16>; 6112def : ConcatPat<v16i8, v8i8>; 6113 6114// If the high lanes are undef, though, we can just ignore them: 6115class ConcatUndefPat<ValueType DstTy, ValueType SrcTy> 6116 : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), 6117 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; 6118 6119def : ConcatUndefPat<v2i64, v1i64>; 6120def : ConcatUndefPat<v2f64, v1f64>; 6121def : ConcatUndefPat<v4i32, v2i32>; 6122def : ConcatUndefPat<v4f32, v2f32>; 6123def : ConcatUndefPat<v8i16, v4i16>; 6124def : ConcatUndefPat<v16i8, v8i8>; 6125 6126//---------------------------------------------------------------------------- 6127// AdvSIMD across lanes instructions 6128//---------------------------------------------------------------------------- 6129 6130defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; 6131defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; 6132defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; 6133defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; 6134defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; 6135defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; 6136defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; 6137defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; 6138defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; 6139defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; 6140defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; 6141 6142multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> { 6143 // Patterns for addv(addlp(x)) ==> addlv 6144 def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, 6145 (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))), 6146 (i64 0))), (i64 0))), 6147 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6148 (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>; 6149 def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))), 6150 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6151 (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>; 6152 def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))), 6153 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>; 6154 6155 // Patterns for addp(addlp(x))) ==> addlv 6156 def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))), 6157 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>; 6158 def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))), 6159 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>; 6160} 6161 6162defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>; 6163defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>; 6164 6165// Patterns for across-vector intrinsics, that have a node equivalent, that 6166// returns a vector (with only the low lane defined) instead of a scalar. 6167// In effect, opNode is the same as (scalar_to_vector (IntNode)). 6168multiclass SIMDAcrossLanesIntrinsic<string baseOpc, 6169 SDPatternOperator opNode> { 6170// If a lane instruction caught the vector_extract around opNode, we can 6171// directly match the latter to the instruction. 6172def : Pat<(v8i8 (opNode V64:$Rn)), 6173 (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6174 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>; 6175def : Pat<(v16i8 (opNode V128:$Rn)), 6176 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6177 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>; 6178def : Pat<(v4i16 (opNode V64:$Rn)), 6179 (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6180 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>; 6181def : Pat<(v8i16 (opNode V128:$Rn)), 6182 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6183 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>; 6184def : Pat<(v4i32 (opNode V128:$Rn)), 6185 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6186 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>; 6187 6188 6189// If none did, fallback to the explicit patterns, consuming the vector_extract. 6190def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)), 6191 (i64 0)), (i64 0))), 6192 (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), 6193 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), 6194 bsub), ssub)>; 6195def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))), 6196 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6197 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), 6198 bsub), ssub)>; 6199def : Pat<(i32 (vector_extract (insert_subvector undef, 6200 (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))), 6201 (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), 6202 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), 6203 hsub), ssub)>; 6204def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))), 6205 (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), 6206 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), 6207 hsub), ssub)>; 6208def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))), 6209 (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), 6210 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), 6211 ssub), ssub)>; 6212 6213} 6214 6215multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, 6216 SDPatternOperator opNode> 6217 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6218// If there is a sign extension after this intrinsic, consume it as smov already 6219// performed it 6220def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6221 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)), 6222 (i32 (SMOVvi8to32 6223 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6224 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6225 (i64 0)))>; 6226def : Pat<(i32 (sext_inreg (i32 (vector_extract 6227 (opNode (v16i8 V128:$Rn)), (i64 0))), i8)), 6228 (i32 (SMOVvi8to32 6229 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6230 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6231 (i64 0)))>; 6232def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef, 6233 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)), 6234 (i32 (SMOVvi16to32 6235 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6236 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6237 (i64 0)))>; 6238def : Pat<(i32 (sext_inreg (i32 (vector_extract 6239 (opNode (v8i16 V128:$Rn)), (i64 0))), i16)), 6240 (i32 (SMOVvi16to32 6241 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6242 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6243 (i64 0)))>; 6244} 6245 6246multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, 6247 SDPatternOperator opNode> 6248 : SIMDAcrossLanesIntrinsic<baseOpc, opNode> { 6249// If there is a masking operation keeping only what has been actually 6250// generated, consume it. 6251def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6252 (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)), 6253 (i32 (EXTRACT_SUBREG 6254 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6255 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), 6256 ssub))>; 6257def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))), 6258 maski8_or_more)), 6259 (i32 (EXTRACT_SUBREG 6260 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6261 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), 6262 ssub))>; 6263def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef, 6264 (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)), 6265 (i32 (EXTRACT_SUBREG 6266 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6267 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), 6268 ssub))>; 6269def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), 6270 maski16_or_more)), 6271 (i32 (EXTRACT_SUBREG 6272 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6273 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), 6274 ssub))>; 6275} 6276 6277defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; 6278// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6279def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), 6280 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6281 6282defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>; 6283// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 6284def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))), 6285 (ADDPv2i32 V64:$Rn, V64:$Rn)>; 6286 6287defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>; 6288def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))), 6289 (SMAXPv2i32 V64:$Rn, V64:$Rn)>; 6290 6291defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>; 6292def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))), 6293 (SMINPv2i32 V64:$Rn, V64:$Rn)>; 6294 6295defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>; 6296def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))), 6297 (UMAXPv2i32 V64:$Rn, V64:$Rn)>; 6298 6299defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>; 6300def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))), 6301 (UMINPv2i32 V64:$Rn, V64:$Rn)>; 6302 6303multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> { 6304 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6305 (i32 (SMOVvi16to32 6306 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6307 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6308 (i64 0)))>; 6309def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6310 (i32 (SMOVvi16to32 6311 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6312 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6313 (i64 0)))>; 6314 6315def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6316 (i32 (EXTRACT_SUBREG 6317 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6318 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6319 ssub))>; 6320def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6321 (i32 (EXTRACT_SUBREG 6322 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6323 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6324 ssub))>; 6325 6326def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6327 (i64 (EXTRACT_SUBREG 6328 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6329 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6330 dsub))>; 6331} 6332 6333multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc, 6334 Intrinsic intOp> { 6335 def : Pat<(i32 (intOp (v8i8 V64:$Rn))), 6336 (i32 (EXTRACT_SUBREG 6337 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6338 (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), 6339 ssub))>; 6340def : Pat<(i32 (intOp (v16i8 V128:$Rn))), 6341 (i32 (EXTRACT_SUBREG 6342 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6343 (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), 6344 ssub))>; 6345 6346def : Pat<(i32 (intOp (v4i16 V64:$Rn))), 6347 (i32 (EXTRACT_SUBREG 6348 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6349 (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), 6350 ssub))>; 6351def : Pat<(i32 (intOp (v8i16 V128:$Rn))), 6352 (i32 (EXTRACT_SUBREG 6353 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6354 (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), 6355 ssub))>; 6356 6357def : Pat<(i64 (intOp (v4i32 V128:$Rn))), 6358 (i64 (EXTRACT_SUBREG 6359 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6360 (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), 6361 dsub))>; 6362} 6363 6364defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; 6365defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; 6366 6367// The vaddlv_s32 intrinsic gets mapped to SADDLP. 6368def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), 6369 (i64 (EXTRACT_SUBREG 6370 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6371 (SADDLPv2i32_v1i64 V64:$Rn), dsub), 6372 dsub))>; 6373// The vaddlv_u32 intrinsic gets mapped to UADDLP. 6374def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), 6375 (i64 (EXTRACT_SUBREG 6376 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), 6377 (UADDLPv2i32_v1i64 V64:$Rn), dsub), 6378 dsub))>; 6379 6380//------------------------------------------------------------------------------ 6381// AdvSIMD modified immediate instructions 6382//------------------------------------------------------------------------------ 6383 6384// AdvSIMD BIC 6385defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; 6386// AdvSIMD ORR 6387defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; 6388 6389def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6390def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6391def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6392def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6393 6394def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6395def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6396def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6397def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6398 6399def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6400def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6401def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6402def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6403 6404def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; 6405def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; 6406def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; 6407def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; 6408 6409// AdvSIMD FMOV 6410def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, 6411 "fmov", ".2d", 6412 [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6413def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, 6414 "fmov", ".2s", 6415 [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6416def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, 6417 "fmov", ".4s", 6418 [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6419let Predicates = [HasNEON, HasFullFP16] in { 6420def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, 6421 "fmov", ".4h", 6422 [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6423def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, 6424 "fmov", ".8h", 6425 [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; 6426} // Predicates = [HasNEON, HasFullFP16] 6427 6428// AdvSIMD MOVI 6429 6430// EDIT byte mask: scalar 6431let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6432def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", 6433 [(set FPR64:$Rd, simdimmtype10:$imm8)]>; 6434// The movi_edit node has the immediate value already encoded, so we use 6435// a plain imm0_255 here. 6436def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), 6437 (MOVID imm0_255:$shift)>; 6438 6439// EDIT byte mask: 2d 6440 6441// The movi_edit node has the immediate value already encoded, so we use 6442// a plain imm0_255 in the pattern 6443let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6444def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, 6445 simdimmtype10, 6446 "movi", ".2d", 6447 [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; 6448 6449def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6450def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6451def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6452def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; 6453 6454def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6455def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6456def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6457def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; 6458 6459// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the 6460// extract is free and this gives better MachineCSE results. 6461def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6462def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6463def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6464def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>; 6465 6466def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6467def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6468def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6469def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>; 6470 6471// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6472let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6473defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; 6474 6475let Predicates = [HasNEON] in { 6476 // Using the MOVI to materialize fp constants. 6477 def : Pat<(f32 fpimm32SIMDModImmType4:$in), 6478 (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), 6479 (i32 24)), 6480 ssub)>; 6481} 6482 6483def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6484def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6485def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6486def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6487 6488def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6489def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6490def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6491def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6492 6493def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6494 (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; 6495def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6496 (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; 6497def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6498 (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; 6499def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), 6500 (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; 6501 6502let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6503// EDIT per word: 2s & 4s with MSL shifter 6504def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", 6505 [(set (v2i32 V64:$Rd), 6506 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6507def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", 6508 [(set (v4i32 V128:$Rd), 6509 (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6510 6511// Per byte: 8b & 16b 6512def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, 6513 "movi", ".8b", 6514 [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; 6515 6516def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, 6517 "movi", ".16b", 6518 [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; 6519} 6520 6521// AdvSIMD MVNI 6522 6523// EDIT per word & halfword: 2s, 4h, 4s, & 8h 6524let isReMaterializable = 1, isAsCheapAsAMove = 1 in 6525defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; 6526 6527def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6528def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6529def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6530def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6531 6532def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; 6533def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; 6534def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; 6535def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; 6536 6537def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6538 (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; 6539def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6540 (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; 6541def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6542 (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; 6543def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), 6544 (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; 6545 6546// EDIT per word: 2s & 4s with MSL shifter 6547let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 6548def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", 6549 [(set (v2i32 V64:$Rd), 6550 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6551def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", 6552 [(set (v4i32 V128:$Rd), 6553 (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; 6554} 6555 6556//---------------------------------------------------------------------------- 6557// AdvSIMD indexed element 6558//---------------------------------------------------------------------------- 6559 6560let hasSideEffects = 0 in { 6561 defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">; 6562 defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">; 6563} 6564 6565// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the 6566// instruction expects the addend first, while the intrinsic expects it last. 6567 6568// On the other hand, there are quite a few valid combinatorial options due to 6569// the commutativity of multiplication and the fact that (-x) * y = x * (-y). 6570defm : SIMDFPIndexedTiedPatterns<"FMLA", 6571 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>; 6572defm : SIMDFPIndexedTiedPatterns<"FMLA", 6573 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>; 6574 6575defm : SIMDFPIndexedTiedPatterns<"FMLS", 6576 TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; 6577defm : SIMDFPIndexedTiedPatterns<"FMLS", 6578 TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; 6579defm : SIMDFPIndexedTiedPatterns<"FMLS", 6580 TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; 6581defm : SIMDFPIndexedTiedPatterns<"FMLS", 6582 TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; 6583 6584multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { 6585 // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6586 // and DUP scalar. 6587 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6588 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6589 VectorIndexS:$idx))), 6590 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6591 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6592 (v2f32 (AArch64duplane32 6593 (v4f32 (insert_subvector undef, 6594 (v2f32 (fneg V64:$Rm)), 6595 (i64 0))), 6596 VectorIndexS:$idx)))), 6597 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6598 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6599 VectorIndexS:$idx)>; 6600 def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), 6601 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6602 (FMLSv2i32_indexed V64:$Rd, V64:$Rn, 6603 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6604 6605 // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit 6606 // and DUP scalar. 6607 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6608 (AArch64duplane32 (v4f32 (fneg V128:$Rm)), 6609 VectorIndexS:$idx))), 6610 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, 6611 VectorIndexS:$idx)>; 6612 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6613 (v4f32 (AArch64duplane32 6614 (v4f32 (insert_subvector undef, 6615 (v2f32 (fneg V64:$Rm)), 6616 (i64 0))), 6617 VectorIndexS:$idx)))), 6618 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6619 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), 6620 VectorIndexS:$idx)>; 6621 def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), 6622 (AArch64dup (f32 (fneg FPR32Op:$Rm))))), 6623 (FMLSv4i32_indexed V128:$Rd, V128:$Rn, 6624 (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; 6625 6626 // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar 6627 // (DUPLANE from 64-bit would be trivial). 6628 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6629 (AArch64duplane64 (v2f64 (fneg V128:$Rm)), 6630 VectorIndexD:$idx))), 6631 (FMLSv2i64_indexed 6632 V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; 6633 def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), 6634 (AArch64dup (f64 (fneg FPR64Op:$Rm))))), 6635 (FMLSv2i64_indexed V128:$Rd, V128:$Rn, 6636 (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; 6637 6638 // 2 variants for 32-bit scalar version: extract from .2s or from .4s 6639 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6640 (vector_extract (v4f32 (fneg V128:$Rm)), 6641 VectorIndexS:$idx))), 6642 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6643 V128:$Rm, VectorIndexS:$idx)>; 6644 def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), 6645 (vector_extract (v4f32 (insert_subvector undef, 6646 (v2f32 (fneg V64:$Rm)), 6647 (i64 0))), 6648 VectorIndexS:$idx))), 6649 (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, 6650 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; 6651 6652 // 1 variant for 64-bit scalar version: extract from .1d or from .2d 6653 def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), 6654 (vector_extract (v2f64 (fneg V128:$Rm)), 6655 VectorIndexS:$idx))), 6656 (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, 6657 V128:$Rm, VectorIndexS:$idx)>; 6658} 6659 6660defm : FMLSIndexedAfterNegPatterns< 6661 TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >; 6662defm : FMLSIndexedAfterNegPatterns< 6663 TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >; 6664 6665defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; 6666defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>; 6667 6668def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6669 (FMULv2i32_indexed V64:$Rn, 6670 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6671 (i64 0))>; 6672def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), 6673 (FMULv4i32_indexed V128:$Rn, 6674 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), 6675 (i64 0))>; 6676def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), 6677 (FMULv2i64_indexed V128:$Rn, 6678 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), 6679 (i64 0))>; 6680 6681defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; 6682defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; 6683 6684defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane, 6685 int_aarch64_neon_sqdmulh_laneq>; 6686defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane, 6687 int_aarch64_neon_sqrdmulh_laneq>; 6688 6689// Generated by MachineCombine 6690defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>; 6691defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>; 6692 6693defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; 6694defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", 6695 TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6696defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", 6697 TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>; 6698defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>; 6699defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", 6700 int_aarch64_neon_sqadd>; 6701defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", 6702 int_aarch64_neon_sqsub>; 6703defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah", 6704 int_aarch64_neon_sqrdmlah>; 6705defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh", 6706 int_aarch64_neon_sqrdmlsh>; 6707defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; 6708defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", 6709 TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6710defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", 6711 TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>; 6712defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>; 6713 6714// A scalar sqdmull with the second operand being a vector lane can be 6715// handled directly with the indexed instruction encoding. 6716def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), 6717 (vector_extract (v4i32 V128:$Vm), 6718 VectorIndexS:$idx)), 6719 (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; 6720 6721//---------------------------------------------------------------------------- 6722// AdvSIMD scalar shift instructions 6723//---------------------------------------------------------------------------- 6724defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; 6725defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; 6726defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; 6727defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; 6728// Codegen patterns for the above. We don't put these directly on the 6729// instructions because TableGen's type inference can't handle the truth. 6730// Having the same base pattern for fp <--> int totally freaks it out. 6731def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), 6732 (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; 6733def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), 6734 (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; 6735def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), 6736 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6737def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), 6738 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 6739def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), 6740 vecshiftR64:$imm)), 6741 (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; 6742def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), 6743 vecshiftR64:$imm)), 6744 (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; 6745def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), 6746 (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 6747def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 6748 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6749def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), 6750 vecshiftR64:$imm)), 6751 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6752def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), 6753 (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6754def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), 6755 vecshiftR64:$imm)), 6756 (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; 6757def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), 6758 (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; 6759 6760// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported. 6761 6762def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), 6763 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6764def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), 6765 (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6766def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 6767 (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 6768def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp 6769 (and FPR32:$Rn, (i32 65535)), 6770 vecshiftR16:$imm)), 6771 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6772def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), 6773 (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; 6774def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), 6775 (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; 6776def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), 6777 (i32 (INSERT_SUBREG 6778 (i32 (IMPLICIT_DEF)), 6779 (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), 6780 hsub))>; 6781def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), 6782 (i64 (INSERT_SUBREG 6783 (i64 (IMPLICIT_DEF)), 6784 (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), 6785 hsub))>; 6786def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), 6787 (i32 (INSERT_SUBREG 6788 (i32 (IMPLICIT_DEF)), 6789 (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), 6790 hsub))>; 6791def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), 6792 (i64 (INSERT_SUBREG 6793 (i64 (IMPLICIT_DEF)), 6794 (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), 6795 hsub))>; 6796def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 6797 (i32 (INSERT_SUBREG 6798 (i32 (IMPLICIT_DEF)), 6799 (FACGE16 FPR16:$Rn, FPR16:$Rm), 6800 hsub))>; 6801def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))), 6802 (i32 (INSERT_SUBREG 6803 (i32 (IMPLICIT_DEF)), 6804 (FACGT16 FPR16:$Rn, FPR16:$Rm), 6805 hsub))>; 6806 6807defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; 6808defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; 6809defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", 6810 int_aarch64_neon_sqrshrn>; 6811defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", 6812 int_aarch64_neon_sqrshrun>; 6813defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 6814defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 6815defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", 6816 int_aarch64_neon_sqshrn>; 6817defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", 6818 int_aarch64_neon_sqshrun>; 6819defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; 6820defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; 6821defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", 6822 TriOpFrag<(add node:$LHS, 6823 (AArch64srshri node:$MHS, node:$RHS))>>; 6824defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; 6825defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", 6826 TriOpFrag<(add_and_or_is_add node:$LHS, 6827 (AArch64vashr node:$MHS, node:$RHS))>>; 6828defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", 6829 int_aarch64_neon_uqrshrn>; 6830defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 6831defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", 6832 int_aarch64_neon_uqshrn>; 6833defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; 6834defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", 6835 TriOpFrag<(add node:$LHS, 6836 (AArch64urshri node:$MHS, node:$RHS))>>; 6837defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; 6838defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", 6839 TriOpFrag<(add_and_or_is_add node:$LHS, 6840 (AArch64vlshr node:$MHS, node:$RHS))>>; 6841 6842//---------------------------------------------------------------------------- 6843// AdvSIMD vector shift instructions 6844//---------------------------------------------------------------------------- 6845defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; 6846defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; 6847defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", 6848 int_aarch64_neon_vcvtfxs2fp>; 6849defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", 6850 BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>; 6851defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; 6852defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", 6853 BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; 6854defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>; 6855def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 6856 (i32 vecshiftL64:$imm))), 6857 (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; 6858defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", 6859 int_aarch64_neon_sqrshrn>; 6860defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", 6861 int_aarch64_neon_sqrshrun>; 6862defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; 6863defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; 6864defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", 6865 int_aarch64_neon_sqshrn>; 6866defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", 6867 int_aarch64_neon_sqshrun>; 6868defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>; 6869def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), 6870 (i32 vecshiftR64:$imm))), 6871 (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; 6872defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; 6873defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", 6874 TriOpFrag<(add node:$LHS, 6875 (AArch64srshri node:$MHS, node:$RHS))> >; 6876defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", 6877 BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; 6878 6879defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; 6880defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", 6881 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; 6882defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", 6883 int_aarch64_neon_vcvtfxu2fp>; 6884defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", 6885 int_aarch64_neon_uqrshrn>; 6886defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; 6887defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", 6888 int_aarch64_neon_uqshrn>; 6889defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; 6890defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", 6891 TriOpFrag<(add node:$LHS, 6892 (AArch64urshri node:$MHS, node:$RHS))> >; 6893defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", 6894 BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; 6895defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; 6896defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", 6897 TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; 6898 6899// RADDHN patterns for when RSHRN shifts by half the size of the vector element 6900def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))), 6901 (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; 6902def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))), 6903 (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; 6904let AddedComplexity = 5 in 6905def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))), 6906 (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; 6907 6908// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element 6909def : Pat<(v16i8 (concat_vectors 6910 (v8i8 V64:$Vd), 6911 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))), 6912 (RADDHNv8i16_v16i8 6913 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6914 (v8i16 (MOVIv2d_ns (i32 0))))>; 6915def : Pat<(v8i16 (concat_vectors 6916 (v4i16 V64:$Vd), 6917 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))), 6918 (RADDHNv4i32_v8i16 6919 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6920 (v4i32 (MOVIv2d_ns (i32 0))))>; 6921let AddedComplexity = 5 in 6922def : Pat<(v4i32 (concat_vectors 6923 (v2i32 V64:$Vd), 6924 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))), 6925 (RADDHNv2i64_v4i32 6926 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, 6927 (v2i64 (MOVIv2d_ns (i32 0))))>; 6928 6929// SHRN patterns for when a logical right shift was used instead of arithmetic 6930// (the immediate guarantees no sign bits actually end up in the result so it 6931// doesn't matter). 6932def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), 6933 (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; 6934def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), 6935 (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; 6936def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), 6937 (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; 6938 6939def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), 6940 (trunc (AArch64vlshr (v8i16 V128:$Rn), 6941 vecshiftR16Narrow:$imm)))), 6942 (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6943 V128:$Rn, vecshiftR16Narrow:$imm)>; 6944def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), 6945 (trunc (AArch64vlshr (v4i32 V128:$Rn), 6946 vecshiftR32Narrow:$imm)))), 6947 (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6948 V128:$Rn, vecshiftR32Narrow:$imm)>; 6949def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), 6950 (trunc (AArch64vlshr (v2i64 V128:$Rn), 6951 vecshiftR64Narrow:$imm)))), 6952 (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 6953 V128:$Rn, vecshiftR32Narrow:$imm)>; 6954 6955// Vector sign and zero extensions are implemented with SSHLL and USSHLL. 6956// Anyexts are implemented as zexts. 6957def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; 6958def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 6959def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; 6960def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; 6961def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 6962def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; 6963def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; 6964def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 6965def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; 6966// Also match an extend from the upper half of a 128 bit source register. 6967def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6968 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 6969def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6970 (USHLLv16i8_shift V128:$Rn, (i32 0))>; 6971def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), 6972 (SSHLLv16i8_shift V128:$Rn, (i32 0))>; 6973def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6974 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 6975def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6976 (USHLLv8i16_shift V128:$Rn, (i32 0))>; 6977def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), 6978 (SSHLLv8i16_shift V128:$Rn, (i32 0))>; 6979def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6980 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 6981def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6982 (USHLLv4i32_shift V128:$Rn, (i32 0))>; 6983def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), 6984 (SSHLLv4i32_shift V128:$Rn, (i32 0))>; 6985 6986// Vector shift sxtl aliases 6987def : InstAlias<"sxtl.8h $dst, $src1", 6988 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 6989def : InstAlias<"sxtl $dst.8h, $src1.8b", 6990 (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 6991def : InstAlias<"sxtl.4s $dst, $src1", 6992 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 6993def : InstAlias<"sxtl $dst.4s, $src1.4h", 6994 (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 6995def : InstAlias<"sxtl.2d $dst, $src1", 6996 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 6997def : InstAlias<"sxtl $dst.2d, $src1.2s", 6998 (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 6999 7000// Vector shift sxtl2 aliases 7001def : InstAlias<"sxtl2.8h $dst, $src1", 7002 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7003def : InstAlias<"sxtl2 $dst.8h, $src1.16b", 7004 (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7005def : InstAlias<"sxtl2.4s $dst, $src1", 7006 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7007def : InstAlias<"sxtl2 $dst.4s, $src1.8h", 7008 (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7009def : InstAlias<"sxtl2.2d $dst, $src1", 7010 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7011def : InstAlias<"sxtl2 $dst.2d, $src1.4s", 7012 (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7013 7014// Vector shift uxtl aliases 7015def : InstAlias<"uxtl.8h $dst, $src1", 7016 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7017def : InstAlias<"uxtl $dst.8h, $src1.8b", 7018 (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; 7019def : InstAlias<"uxtl.4s $dst, $src1", 7020 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7021def : InstAlias<"uxtl $dst.4s, $src1.4h", 7022 (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; 7023def : InstAlias<"uxtl.2d $dst, $src1", 7024 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7025def : InstAlias<"uxtl $dst.2d, $src1.2s", 7026 (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; 7027 7028// Vector shift uxtl2 aliases 7029def : InstAlias<"uxtl2.8h $dst, $src1", 7030 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7031def : InstAlias<"uxtl2 $dst.8h, $src1.16b", 7032 (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; 7033def : InstAlias<"uxtl2.4s $dst, $src1", 7034 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7035def : InstAlias<"uxtl2 $dst.4s, $src1.8h", 7036 (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; 7037def : InstAlias<"uxtl2.2d $dst, $src1", 7038 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7039def : InstAlias<"uxtl2 $dst.2d, $src1.4s", 7040 (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; 7041 7042// If an integer is about to be converted to a floating point value, 7043// just load it on the floating point unit. 7044// These patterns are more complex because floating point loads do not 7045// support sign extension. 7046// The sign extension has to be explicitly added and is only supported for 7047// one step: byte-to-half, half-to-word, word-to-doubleword. 7048// SCVTF GPR -> FPR is 9 cycles. 7049// SCVTF FPR -> FPR is 4 cyclces. 7050// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. 7051// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR 7052// and still being faster. 7053// However, this is not good for code size. 7054// 8-bits -> float. 2 sizes step-up. 7055class SExtLoadi8CVTf32Pat<dag addrmode, dag INST> 7056 : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), 7057 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7058 (SSHLLv4i16_shift 7059 (f64 7060 (EXTRACT_SUBREG 7061 (SSHLLv8i8_shift 7062 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7063 INST, 7064 bsub), 7065 0), 7066 dsub)), 7067 0), 7068 ssub)))>, 7069 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7070 7071def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), 7072 (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; 7073def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), 7074 (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; 7075def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), 7076 (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; 7077def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), 7078 (LDURBi GPR64sp:$Rn, simm9:$offset)>; 7079 7080// 16-bits -> float. 1 size step-up. 7081class SExtLoadi16CVTf32Pat<dag addrmode, dag INST> 7082 : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7083 (SCVTFv1i32 (f32 (EXTRACT_SUBREG 7084 (SSHLLv4i16_shift 7085 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7086 INST, 7087 hsub), 7088 0), 7089 ssub)))>, 7090 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7091 7092def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7093 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7094def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7095 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7096def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7097 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7098def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7099 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7100 7101// 32-bits to 32-bits are handled in target specific dag combine: 7102// performIntToFpCombine. 7103// 64-bits integer to 32-bits floating point, not possible with 7104// SCVTF on floating point registers (both source and destination 7105// must have the same size). 7106 7107// Here are the patterns for 8, 16, 32, and 64-bits to double. 7108// 8-bits -> double. 3 size step-up: give up. 7109// 16-bits -> double. 2 size step. 7110class SExtLoadi16CVTf64Pat<dag addrmode, dag INST> 7111 : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), 7112 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7113 (SSHLLv2i32_shift 7114 (f64 7115 (EXTRACT_SUBREG 7116 (SSHLLv4i16_shift 7117 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7118 INST, 7119 hsub), 7120 0), 7121 dsub)), 7122 0), 7123 dsub)))>, 7124 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7125 7126def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), 7127 (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; 7128def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), 7129 (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; 7130def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), 7131 (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; 7132def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), 7133 (LDURHi GPR64sp:$Rn, simm9:$offset)>; 7134// 32-bits -> double. 1 size step-up. 7135class SExtLoadi32CVTf64Pat<dag addrmode, dag INST> 7136 : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), 7137 (SCVTFv1i64 (f64 (EXTRACT_SUBREG 7138 (SSHLLv2i32_shift 7139 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), 7140 INST, 7141 ssub), 7142 0), 7143 dsub)))>, 7144 Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; 7145 7146def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), 7147 (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; 7148def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), 7149 (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; 7150def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), 7151 (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; 7152def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), 7153 (LDURSi GPR64sp:$Rn, simm9:$offset)>; 7154 7155// 64-bits -> double are handled in target specific dag combine: 7156// performIntToFpCombine. 7157 7158 7159//---------------------------------------------------------------------------- 7160// AdvSIMD Load-Store Structure 7161//---------------------------------------------------------------------------- 7162defm LD1 : SIMDLd1Multiple<"ld1">; 7163defm LD2 : SIMDLd2Multiple<"ld2">; 7164defm LD3 : SIMDLd3Multiple<"ld3">; 7165defm LD4 : SIMDLd4Multiple<"ld4">; 7166 7167defm ST1 : SIMDSt1Multiple<"st1">; 7168defm ST2 : SIMDSt2Multiple<"st2">; 7169defm ST3 : SIMDSt3Multiple<"st3">; 7170defm ST4 : SIMDSt4Multiple<"st4">; 7171 7172class Ld1Pat<ValueType ty, Instruction INST> 7173 : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; 7174 7175def : Ld1Pat<v16i8, LD1Onev16b>; 7176def : Ld1Pat<v8i16, LD1Onev8h>; 7177def : Ld1Pat<v4i32, LD1Onev4s>; 7178def : Ld1Pat<v2i64, LD1Onev2d>; 7179def : Ld1Pat<v8i8, LD1Onev8b>; 7180def : Ld1Pat<v4i16, LD1Onev4h>; 7181def : Ld1Pat<v2i32, LD1Onev2s>; 7182def : Ld1Pat<v1i64, LD1Onev1d>; 7183 7184class St1Pat<ValueType ty, Instruction INST> 7185 : Pat<(store ty:$Vt, GPR64sp:$Rn), 7186 (INST ty:$Vt, GPR64sp:$Rn)>; 7187 7188def : St1Pat<v16i8, ST1Onev16b>; 7189def : St1Pat<v8i16, ST1Onev8h>; 7190def : St1Pat<v4i32, ST1Onev4s>; 7191def : St1Pat<v2i64, ST1Onev2d>; 7192def : St1Pat<v8i8, ST1Onev8b>; 7193def : St1Pat<v4i16, ST1Onev4h>; 7194def : St1Pat<v2i32, ST1Onev2s>; 7195def : St1Pat<v1i64, ST1Onev1d>; 7196 7197//--- 7198// Single-element 7199//--- 7200 7201defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; 7202defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; 7203defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; 7204defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; 7205let mayLoad = 1, hasSideEffects = 0 in { 7206defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; 7207defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; 7208defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; 7209defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; 7210defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; 7211defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; 7212defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; 7213defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; 7214defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; 7215defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; 7216defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; 7217defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; 7218defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; 7219defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; 7220defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; 7221defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; 7222} 7223 7224def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7225 (LD1Rv8b GPR64sp:$Rn)>; 7226def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), 7227 (LD1Rv16b GPR64sp:$Rn)>; 7228def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7229 (LD1Rv4h GPR64sp:$Rn)>; 7230def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), 7231 (LD1Rv8h GPR64sp:$Rn)>; 7232def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7233 (LD1Rv2s GPR64sp:$Rn)>; 7234def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), 7235 (LD1Rv4s GPR64sp:$Rn)>; 7236def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7237 (LD1Rv2d GPR64sp:$Rn)>; 7238def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), 7239 (LD1Rv1d GPR64sp:$Rn)>; 7240// Grab the floating point version too 7241def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7242 (LD1Rv2s GPR64sp:$Rn)>; 7243def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), 7244 (LD1Rv4s GPR64sp:$Rn)>; 7245def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7246 (LD1Rv2d GPR64sp:$Rn)>; 7247def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), 7248 (LD1Rv1d GPR64sp:$Rn)>; 7249def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7250 (LD1Rv4h GPR64sp:$Rn)>; 7251def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))), 7252 (LD1Rv8h GPR64sp:$Rn)>; 7253def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7254 (LD1Rv4h GPR64sp:$Rn)>; 7255def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), 7256 (LD1Rv8h GPR64sp:$Rn)>; 7257 7258class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex, 7259 ValueType VTy, ValueType STy, Instruction LD1> 7260 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7261 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7262 (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; 7263 7264def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>; 7265def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>; 7266def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>; 7267def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>; 7268def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>; 7269def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>; 7270def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>; 7271def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>; 7272 7273// Generate LD1 for extload if memory type does not match the 7274// destination type, for example: 7275// 7276// (v4i32 (insert_vector_elt (load anyext from i8) idx)) 7277// 7278// In this case, the index must be adjusted to match LD1 type. 7279// 7280class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand 7281 VecIndex, ValueType VTy, ValueType STy, 7282 Instruction LD1, SDNodeXForm IdxOp> 7283 : Pat<(vector_insert (VTy VecListOne128:$Rd), 7284 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7285 (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; 7286 7287def VectorIndexStoH : SDNodeXForm<imm, [{ 7288 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7289}]>; 7290def VectorIndexStoB : SDNodeXForm<imm, [{ 7291 return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64); 7292}]>; 7293def VectorIndexHtoB : SDNodeXForm<imm, [{ 7294 return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64); 7295}]>; 7296 7297def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>; 7298def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>; 7299def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>; 7300 7301// Same as above, but the first element is populated using 7302// scalar_to_vector + insert_subvector instead of insert_vector_elt. 7303let Predicates = [NotInStreamingSVEMode] in { 7304 class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy, 7305 SDPatternOperator ExtLoad, Instruction LD1> 7306 : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))), 7307 (ResultTy (EXTRACT_SUBREG 7308 (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>; 7309 7310 def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>; 7311 def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>; 7312 def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>; 7313} 7314class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex, 7315 ValueType VTy, ValueType STy, Instruction LD1> 7316 : Pat<(vector_insert (VTy VecListOne64:$Rd), 7317 (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), 7318 (EXTRACT_SUBREG 7319 (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), 7320 VecIndex:$idx, GPR64sp:$Rn), 7321 dsub)>; 7322 7323def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>; 7324def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>; 7325def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>; 7326def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>; 7327def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>; 7328def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>; 7329 7330 7331defm LD1 : SIMDLdSt1SingleAliases<"ld1">; 7332defm LD2 : SIMDLdSt2SingleAliases<"ld2">; 7333defm LD3 : SIMDLdSt3SingleAliases<"ld3">; 7334defm LD4 : SIMDLdSt4SingleAliases<"ld4">; 7335 7336// Stores 7337defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; 7338defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; 7339defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; 7340defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; 7341 7342let AddedComplexity = 19 in 7343class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7344 ValueType VTy, ValueType STy, Instruction ST1> 7345 : Pat<(scalar_store 7346 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7347 GPR64sp:$Rn), 7348 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; 7349 7350def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>; 7351def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>; 7352def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>; 7353def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>; 7354def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>; 7355def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>; 7356def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>; 7357def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>; 7358 7359let AddedComplexity = 19 in 7360class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7361 ValueType VTy, ValueType STy, Instruction ST1> 7362 : Pat<(scalar_store 7363 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7364 GPR64sp:$Rn), 7365 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7366 VecIndex:$idx, GPR64sp:$Rn)>; 7367 7368def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>; 7369def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>; 7370def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>; 7371def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>; 7372def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>; 7373def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>; 7374 7375multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex, 7376 ValueType VTy, ValueType STy, Instruction ST1, 7377 int offset> { 7378 def : Pat<(scalar_store 7379 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7380 GPR64sp:$Rn, offset), 7381 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7382 VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7383 7384 def : Pat<(scalar_store 7385 (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), 7386 GPR64sp:$Rn, GPR64:$Rm), 7387 (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), 7388 VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7389} 7390 7391defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>; 7392defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST, 7393 2>; 7394defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>; 7395defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>; 7396defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>; 7397defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>; 7398defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>; 7399defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>; 7400 7401multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex, 7402 ValueType VTy, ValueType STy, Instruction ST1, 7403 int offset> { 7404 def : Pat<(scalar_store 7405 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7406 GPR64sp:$Rn, offset), 7407 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; 7408 7409 def : Pat<(scalar_store 7410 (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), 7411 GPR64sp:$Rn, GPR64:$Rm), 7412 (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; 7413} 7414 7415defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST, 7416 1>; 7417defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST, 7418 2>; 7419defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>; 7420defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>; 7421defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>; 7422defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>; 7423defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>; 7424defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>; 7425 7426let mayStore = 1, hasSideEffects = 0 in { 7427defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; 7428defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; 7429defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; 7430defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; 7431defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; 7432defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; 7433defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; 7434defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; 7435defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; 7436defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; 7437defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; 7438defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; 7439} 7440 7441defm ST1 : SIMDLdSt1SingleAliases<"st1">; 7442defm ST2 : SIMDLdSt2SingleAliases<"st2">; 7443defm ST3 : SIMDLdSt3SingleAliases<"st3">; 7444defm ST4 : SIMDLdSt4SingleAliases<"st4">; 7445 7446//---------------------------------------------------------------------------- 7447// Crypto extensions 7448//---------------------------------------------------------------------------- 7449 7450let Predicates = [HasAES] in { 7451def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; 7452def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; 7453def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; 7454def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; 7455} 7456 7457// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required 7458// for AES fusion on some CPUs. 7459let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in { 7460def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7461 Sched<[WriteVq]>; 7462def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">, 7463 Sched<[WriteVq]>; 7464} 7465 7466// Only use constrained versions of AES(I)MC instructions if they are paired with 7467// AESE/AESD. 7468def : Pat<(v16i8 (int_aarch64_crypto_aesmc 7469 (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1), 7470 (v16i8 V128:$src2))))), 7471 (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1), 7472 (v16i8 V128:$src2)))))>, 7473 Requires<[HasFuseAES]>; 7474 7475def : Pat<(v16i8 (int_aarch64_crypto_aesimc 7476 (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1), 7477 (v16i8 V128:$src2))))), 7478 (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1), 7479 (v16i8 V128:$src2)))))>, 7480 Requires<[HasFuseAES]>; 7481 7482let Predicates = [HasSHA2] in { 7483def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; 7484def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; 7485def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; 7486def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; 7487def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; 7488def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; 7489def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; 7490 7491def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; 7492def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; 7493def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; 7494} 7495 7496//---------------------------------------------------------------------------- 7497// Compiler-pseudos 7498//---------------------------------------------------------------------------- 7499// FIXME: Like for X86, these should go in their own separate .td file. 7500 7501// For an anyext, we don't care what the high bits are, so we can perform an 7502// INSERT_SUBREF into an IMPLICIT_DEF. 7503def : Pat<(i64 (anyext GPR32:$src)), 7504 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; 7505 7506// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and 7507// then assert the extension has happened. 7508def : Pat<(i64 (zext GPR32:$src)), 7509 (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 7510 7511// To sign extend, we use a signed bitfield move instruction (SBFM) on the 7512// containing super-reg. 7513def : Pat<(i64 (sext GPR32:$src)), 7514 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; 7515def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; 7516def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; 7517def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; 7518def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; 7519def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; 7520def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; 7521def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; 7522 7523def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), 7524 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7525 (i64 (i32shift_sext_i8 imm0_31:$imm)))>; 7526def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), 7527 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7528 (i64 (i64shift_sext_i8 imm0_63:$imm)))>; 7529 7530def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), 7531 (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), 7532 (i64 (i32shift_sext_i16 imm0_31:$imm)))>; 7533def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), 7534 (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), 7535 (i64 (i64shift_sext_i16 imm0_63:$imm)))>; 7536 7537def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), 7538 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7539 (i64 (i64shift_a imm0_63:$imm)), 7540 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7541 7542def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)), 7543 (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7544 (i64 (i64shift_a imm0_63:$imm)), 7545 (i64 (i64shift_sext_i32 imm0_63:$imm)))>; 7546 7547// sra patterns have an AddedComplexity of 10, so make sure we have a higher 7548// AddedComplexity for the following patterns since we want to match sext + sra 7549// patterns before we attempt to match a single sra node. 7550let AddedComplexity = 20 in { 7551// We support all sext + sra combinations which preserve at least one bit of the 7552// original value which is to be sign extended. E.g. we support shifts up to 7553// bitwidth-1 bits. 7554def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), 7555 (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; 7556def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), 7557 (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; 7558 7559def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), 7560 (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; 7561def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), 7562 (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; 7563 7564def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), 7565 (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), 7566 (i64 imm0_31:$imm), 31)>; 7567} // AddedComplexity = 20 7568 7569// To truncate, we can simply extract from a subregister. 7570def : Pat<(i32 (trunc GPR64sp:$src)), 7571 (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; 7572 7573// __builtin_trap() uses the BRK instruction on AArch64. 7574def : Pat<(trap), (BRK 1)>; 7575def : Pat<(debugtrap), (BRK 0xF000)>; 7576 7577def ubsan_trap_xform : SDNodeXForm<timm, [{ 7578 return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32); 7579}]>; 7580 7581def ubsan_trap_imm : TImmLeaf<i32, [{ 7582 return isUInt<8>(Imm); 7583}], ubsan_trap_xform>; 7584 7585def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>; 7586 7587// Multiply high patterns which multiply the lower subvector using smull/umull 7588// and the upper subvector with smull2/umull2. Then shuffle the high the high 7589// part of both results together. 7590def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)), 7591 (UZP2v16i8 7592 (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7593 (EXTRACT_SUBREG V128:$Rm, dsub)), 7594 (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7595def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)), 7596 (UZP2v8i16 7597 (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7598 (EXTRACT_SUBREG V128:$Rm, dsub)), 7599 (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7600def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)), 7601 (UZP2v4i32 7602 (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7603 (EXTRACT_SUBREG V128:$Rm, dsub)), 7604 (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7605 7606def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)), 7607 (UZP2v16i8 7608 (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub), 7609 (EXTRACT_SUBREG V128:$Rm, dsub)), 7610 (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>; 7611def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)), 7612 (UZP2v8i16 7613 (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub), 7614 (EXTRACT_SUBREG V128:$Rm, dsub)), 7615 (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>; 7616def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)), 7617 (UZP2v4i32 7618 (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub), 7619 (EXTRACT_SUBREG V128:$Rm, dsub)), 7620 (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>; 7621 7622// Conversions within AdvSIMD types in the same register size are free. 7623// But because we need a consistent lane ordering, in big endian many 7624// conversions require one or more REV instructions. 7625// 7626// Consider a simple memory load followed by a bitconvert then a store. 7627// v0 = load v2i32 7628// v1 = BITCAST v2i32 v0 to v4i16 7629// store v4i16 v2 7630// 7631// In big endian mode every memory access has an implicit byte swap. LDR and 7632// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that 7633// is, they treat the vector as a sequence of elements to be byte-swapped. 7634// The two pairs of instructions are fundamentally incompatible. We've decided 7635// to use LD1/ST1 only to simplify compiler implementation. 7636// 7637// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes 7638// the original code sequence: 7639// v0 = load v2i32 7640// v1 = REV v2i32 (implicit) 7641// v2 = BITCAST v2i32 v1 to v4i16 7642// v3 = REV v4i16 v2 (implicit) 7643// store v4i16 v3 7644// 7645// But this is now broken - the value stored is different to the value loaded 7646// due to lane reordering. To fix this, on every BITCAST we must perform two 7647// other REVs: 7648// v0 = load v2i32 7649// v1 = REV v2i32 (implicit) 7650// v2 = REV v2i32 7651// v3 = BITCAST v2i32 v2 to v4i16 7652// v4 = REV v4i16 7653// v5 = REV v4i16 v4 (implicit) 7654// store v4i16 v5 7655// 7656// This means an extra two instructions, but actually in most cases the two REV 7657// instructions can be combined into one. For example: 7658// (REV64_2s (REV64_4h X)) === (REV32_4h X) 7659// 7660// There is also no 128-bit REV instruction. This must be synthesized with an 7661// EXT instruction. 7662// 7663// Most bitconverts require some sort of conversion. The only exceptions are: 7664// a) Identity conversions - vNfX <-> vNiX 7665// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX 7666// 7667 7668// Natural vector casts (64 bit) 7669foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7670 foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in 7671 def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))), 7672 (VT FPR64:$src)>; 7673 7674// Natural vector casts (128 bit) 7675foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7676 foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in 7677 def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))), 7678 (VT FPR128:$src)>; 7679 7680let Predicates = [IsLE] in { 7681def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7682def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7683def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7684def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7685def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7686def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7687 7688def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 7689 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7690def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 7691 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7692def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 7693 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7694def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 7695 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7696def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 7697 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7698def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 7699 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7700def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 7701 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7702} 7703let Predicates = [IsBE] in { 7704def : Pat<(v8i8 (bitconvert GPR64:$Xn)), 7705 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7706def : Pat<(v4i16 (bitconvert GPR64:$Xn)), 7707 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7708def : Pat<(v2i32 (bitconvert GPR64:$Xn)), 7709 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7710def : Pat<(v4f16 (bitconvert GPR64:$Xn)), 7711 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7712def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), 7713 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7714def : Pat<(v2f32 (bitconvert GPR64:$Xn)), 7715 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; 7716 7717def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), 7718 (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7719def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), 7720 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7721def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), 7722 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7723def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))), 7724 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7725def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))), 7726 (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7727def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), 7728 (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; 7729} 7730def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7731def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7732def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), 7733 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7734def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), 7735 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7736def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), 7737 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7738def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; 7739 7740def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), 7741 (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; 7742def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), 7743 (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; 7744def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), 7745 (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; 7746def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), 7747 (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; 7748def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), 7749 (COPY_TO_REGCLASS V64:$Vn, GPR64)>; 7750 7751def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>; 7752def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>; 7753 7754let Predicates = [IsLE] in { 7755def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; 7756def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; 7757def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; 7758def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>; 7759def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>; 7760def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; 7761} 7762let Predicates = [IsBE] in { 7763def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), 7764 (v1i64 (REV64v2i32 FPR64:$src))>; 7765def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), 7766 (v1i64 (REV64v4i16 FPR64:$src))>; 7767def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), 7768 (v1i64 (REV64v8i8 FPR64:$src))>; 7769def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), 7770 (v1i64 (REV64v4i16 FPR64:$src))>; 7771def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), 7772 (v1i64 (REV64v4i16 FPR64:$src))>; 7773def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), 7774 (v1i64 (REV64v2i32 FPR64:$src))>; 7775} 7776def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; 7777def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; 7778 7779let Predicates = [IsLE] in { 7780def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; 7781def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; 7782def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; 7783def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; 7784def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; 7785def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>; 7786def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>; 7787} 7788let Predicates = [IsBE] in { 7789def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), 7790 (v2i32 (REV64v2i32 FPR64:$src))>; 7791def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), 7792 (v2i32 (REV32v4i16 FPR64:$src))>; 7793def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), 7794 (v2i32 (REV32v8i8 FPR64:$src))>; 7795def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), 7796 (v2i32 (REV64v2i32 FPR64:$src))>; 7797def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), 7798 (v2i32 (REV64v2i32 FPR64:$src))>; 7799def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), 7800 (v2i32 (REV32v4i16 FPR64:$src))>; 7801def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), 7802 (v2i32 (REV32v4i16 FPR64:$src))>; 7803} 7804def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; 7805 7806let Predicates = [IsLE] in { 7807def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; 7808def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; 7809def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; 7810def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; 7811def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; 7812def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; 7813} 7814let Predicates = [IsBE] in { 7815def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), 7816 (v4i16 (REV64v4i16 FPR64:$src))>; 7817def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), 7818 (v4i16 (REV32v4i16 FPR64:$src))>; 7819def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), 7820 (v4i16 (REV16v8i8 FPR64:$src))>; 7821def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), 7822 (v4i16 (REV64v4i16 FPR64:$src))>; 7823def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), 7824 (v4i16 (REV32v4i16 FPR64:$src))>; 7825def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), 7826 (v4i16 (REV64v4i16 FPR64:$src))>; 7827} 7828def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>; 7829def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>; 7830 7831let Predicates = [IsLE] in { 7832def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>; 7833def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>; 7834def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>; 7835def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>; 7836def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>; 7837def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>; 7838 7839def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7840def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>; 7841def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>; 7842def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7843def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>; 7844def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>; 7845} 7846let Predicates = [IsBE] in { 7847def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), 7848 (v4f16 (REV64v4i16 FPR64:$src))>; 7849def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), 7850 (v4f16 (REV32v4i16 FPR64:$src))>; 7851def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), 7852 (v4f16 (REV16v8i8 FPR64:$src))>; 7853def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), 7854 (v4f16 (REV64v4i16 FPR64:$src))>; 7855def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), 7856 (v4f16 (REV32v4i16 FPR64:$src))>; 7857def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), 7858 (v4f16 (REV64v4i16 FPR64:$src))>; 7859 7860def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), 7861 (v4bf16 (REV64v4i16 FPR64:$src))>; 7862def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), 7863 (v4bf16 (REV32v4i16 FPR64:$src))>; 7864def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), 7865 (v4bf16 (REV16v8i8 FPR64:$src))>; 7866def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), 7867 (v4bf16 (REV64v4i16 FPR64:$src))>; 7868def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), 7869 (v4bf16 (REV32v4i16 FPR64:$src))>; 7870def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), 7871 (v4bf16 (REV64v4i16 FPR64:$src))>; 7872} 7873def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>; 7874def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>; 7875 7876let Predicates = [IsLE] in { 7877def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; 7878def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; 7879def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; 7880def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; 7881def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; 7882def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; 7883def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>; 7884def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>; 7885} 7886let Predicates = [IsBE] in { 7887def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), 7888 (v8i8 (REV64v8i8 FPR64:$src))>; 7889def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), 7890 (v8i8 (REV32v8i8 FPR64:$src))>; 7891def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), 7892 (v8i8 (REV16v8i8 FPR64:$src))>; 7893def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), 7894 (v8i8 (REV64v8i8 FPR64:$src))>; 7895def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), 7896 (v8i8 (REV32v8i8 FPR64:$src))>; 7897def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), 7898 (v8i8 (REV64v8i8 FPR64:$src))>; 7899def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), 7900 (v8i8 (REV16v8i8 FPR64:$src))>; 7901def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), 7902 (v8i8 (REV16v8i8 FPR64:$src))>; 7903} 7904 7905let Predicates = [IsLE] in { 7906def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; 7907def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; 7908def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; 7909def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; 7910def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>; 7911def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>; 7912} 7913let Predicates = [IsBE] in { 7914def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), 7915 (f64 (REV64v2i32 FPR64:$src))>; 7916def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), 7917 (f64 (REV64v4i16 FPR64:$src))>; 7918def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), 7919 (f64 (REV64v2i32 FPR64:$src))>; 7920def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), 7921 (f64 (REV64v8i8 FPR64:$src))>; 7922def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), 7923 (f64 (REV64v4i16 FPR64:$src))>; 7924def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), 7925 (f64 (REV64v4i16 FPR64:$src))>; 7926} 7927def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; 7928def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; 7929 7930let Predicates = [IsLE] in { 7931def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; 7932def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; 7933def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; 7934def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; 7935def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>; 7936def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>; 7937} 7938let Predicates = [IsBE] in { 7939def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), 7940 (v1f64 (REV64v2i32 FPR64:$src))>; 7941def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), 7942 (v1f64 (REV64v4i16 FPR64:$src))>; 7943def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), 7944 (v1f64 (REV64v8i8 FPR64:$src))>; 7945def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), 7946 (v1f64 (REV64v2i32 FPR64:$src))>; 7947def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), 7948 (v1f64 (REV64v4i16 FPR64:$src))>; 7949def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), 7950 (v1f64 (REV64v4i16 FPR64:$src))>; 7951} 7952def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; 7953def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; 7954 7955let Predicates = [IsLE] in { 7956def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; 7957def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; 7958def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; 7959def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; 7960def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; 7961def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>; 7962def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>; 7963} 7964let Predicates = [IsBE] in { 7965def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), 7966 (v2f32 (REV64v2i32 FPR64:$src))>; 7967def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), 7968 (v2f32 (REV32v4i16 FPR64:$src))>; 7969def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), 7970 (v2f32 (REV32v8i8 FPR64:$src))>; 7971def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), 7972 (v2f32 (REV64v2i32 FPR64:$src))>; 7973def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), 7974 (v2f32 (REV64v2i32 FPR64:$src))>; 7975def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), 7976 (v2f32 (REV32v4i16 FPR64:$src))>; 7977def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), 7978 (v2f32 (REV32v4i16 FPR64:$src))>; 7979} 7980def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; 7981 7982let Predicates = [IsLE] in { 7983def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; 7984def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; 7985def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; 7986def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; 7987def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; 7988def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>; 7989def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>; 7990def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; 7991} 7992let Predicates = [IsBE] in { 7993def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), 7994 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 7995def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), 7996 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 7997 (REV64v4i32 FPR128:$src), (i32 8)))>; 7998def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), 7999 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8000 (REV64v8i16 FPR128:$src), (i32 8)))>; 8001def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), 8002 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8003 (REV64v8i16 FPR128:$src), (i32 8)))>; 8004def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), 8005 (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), 8006 (REV64v8i16 FPR128:$src), (i32 8)))>; 8007def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), 8008 (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; 8009def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), 8010 (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), 8011 (REV64v4i32 FPR128:$src), (i32 8)))>; 8012def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), 8013 (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), 8014 (REV64v16i8 FPR128:$src), (i32 8)))>; 8015} 8016 8017let Predicates = [IsLE] in { 8018def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; 8019def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; 8020def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; 8021def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>; 8022def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>; 8023def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; 8024def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; 8025} 8026let Predicates = [IsBE] in { 8027def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), 8028 (v2f64 (EXTv16i8 FPR128:$src, 8029 FPR128:$src, (i32 8)))>; 8030def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), 8031 (v2f64 (REV64v4i32 FPR128:$src))>; 8032def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), 8033 (v2f64 (REV64v8i16 FPR128:$src))>; 8034def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), 8035 (v2f64 (REV64v8i16 FPR128:$src))>; 8036def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), 8037 (v2f64 (REV64v8i16 FPR128:$src))>; 8038def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), 8039 (v2f64 (REV64v16i8 FPR128:$src))>; 8040def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), 8041 (v2f64 (REV64v4i32 FPR128:$src))>; 8042} 8043def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; 8044 8045let Predicates = [IsLE] in { 8046def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; 8047def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; 8048def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>; 8049def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>; 8050def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; 8051def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; 8052def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; 8053} 8054let Predicates = [IsBE] in { 8055def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), 8056 (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8057 (REV64v4i32 FPR128:$src), (i32 8)))>; 8058def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), 8059 (v4f32 (REV32v8i16 FPR128:$src))>; 8060def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), 8061 (v4f32 (REV32v8i16 FPR128:$src))>; 8062def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), 8063 (v4f32 (REV32v8i16 FPR128:$src))>; 8064def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), 8065 (v4f32 (REV32v16i8 FPR128:$src))>; 8066def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), 8067 (v4f32 (REV64v4i32 FPR128:$src))>; 8068def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), 8069 (v4f32 (REV64v4i32 FPR128:$src))>; 8070} 8071def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; 8072 8073let Predicates = [IsLE] in { 8074def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; 8075def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; 8076def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; 8077def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; 8078def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; 8079def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>; 8080def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>; 8081} 8082let Predicates = [IsBE] in { 8083def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), 8084 (v2i64 (EXTv16i8 FPR128:$src, 8085 FPR128:$src, (i32 8)))>; 8086def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), 8087 (v2i64 (REV64v4i32 FPR128:$src))>; 8088def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), 8089 (v2i64 (REV64v8i16 FPR128:$src))>; 8090def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), 8091 (v2i64 (REV64v16i8 FPR128:$src))>; 8092def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), 8093 (v2i64 (REV64v4i32 FPR128:$src))>; 8094def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), 8095 (v2i64 (REV64v8i16 FPR128:$src))>; 8096def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), 8097 (v2i64 (REV64v8i16 FPR128:$src))>; 8098} 8099def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; 8100 8101let Predicates = [IsLE] in { 8102def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; 8103def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; 8104def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; 8105def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; 8106def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; 8107def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>; 8108def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>; 8109} 8110let Predicates = [IsBE] in { 8111def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), 8112 (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), 8113 (REV64v4i32 FPR128:$src), 8114 (i32 8)))>; 8115def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), 8116 (v4i32 (REV64v4i32 FPR128:$src))>; 8117def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), 8118 (v4i32 (REV32v8i16 FPR128:$src))>; 8119def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), 8120 (v4i32 (REV32v16i8 FPR128:$src))>; 8121def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), 8122 (v4i32 (REV64v4i32 FPR128:$src))>; 8123def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), 8124 (v4i32 (REV32v8i16 FPR128:$src))>; 8125def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), 8126 (v4i32 (REV32v8i16 FPR128:$src))>; 8127} 8128def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; 8129 8130let Predicates = [IsLE] in { 8131def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; 8132def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; 8133def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; 8134def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; 8135def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; 8136def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; 8137} 8138let Predicates = [IsBE] in { 8139def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), 8140 (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8141 (REV64v8i16 FPR128:$src), 8142 (i32 8)))>; 8143def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), 8144 (v8i16 (REV64v8i16 FPR128:$src))>; 8145def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), 8146 (v8i16 (REV32v8i16 FPR128:$src))>; 8147def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), 8148 (v8i16 (REV16v16i8 FPR128:$src))>; 8149def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), 8150 (v8i16 (REV64v8i16 FPR128:$src))>; 8151def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), 8152 (v8i16 (REV32v8i16 FPR128:$src))>; 8153} 8154def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>; 8155def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>; 8156 8157let Predicates = [IsLE] in { 8158def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>; 8159def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>; 8160def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>; 8161def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>; 8162def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>; 8163def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>; 8164 8165def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>; 8166def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8167def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8168def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>; 8169def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>; 8170def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>; 8171} 8172let Predicates = [IsBE] in { 8173def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), 8174 (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8175 (REV64v8i16 FPR128:$src), 8176 (i32 8)))>; 8177def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), 8178 (v8f16 (REV64v8i16 FPR128:$src))>; 8179def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), 8180 (v8f16 (REV32v8i16 FPR128:$src))>; 8181def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), 8182 (v8f16 (REV16v16i8 FPR128:$src))>; 8183def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), 8184 (v8f16 (REV64v8i16 FPR128:$src))>; 8185def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), 8186 (v8f16 (REV32v8i16 FPR128:$src))>; 8187 8188def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), 8189 (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src), 8190 (REV64v8i16 FPR128:$src), 8191 (i32 8)))>; 8192def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), 8193 (v8bf16 (REV64v8i16 FPR128:$src))>; 8194def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), 8195 (v8bf16 (REV32v8i16 FPR128:$src))>; 8196def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), 8197 (v8bf16 (REV16v16i8 FPR128:$src))>; 8198def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), 8199 (v8bf16 (REV64v8i16 FPR128:$src))>; 8200def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), 8201 (v8bf16 (REV32v8i16 FPR128:$src))>; 8202} 8203def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>; 8204def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>; 8205 8206let Predicates = [IsLE] in { 8207def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; 8208def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; 8209def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; 8210def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; 8211def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; 8212def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; 8213def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>; 8214def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>; 8215} 8216let Predicates = [IsBE] in { 8217def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), 8218 (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), 8219 (REV64v16i8 FPR128:$src), 8220 (i32 8)))>; 8221def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), 8222 (v16i8 (REV64v16i8 FPR128:$src))>; 8223def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), 8224 (v16i8 (REV32v16i8 FPR128:$src))>; 8225def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), 8226 (v16i8 (REV16v16i8 FPR128:$src))>; 8227def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), 8228 (v16i8 (REV64v16i8 FPR128:$src))>; 8229def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), 8230 (v16i8 (REV32v16i8 FPR128:$src))>; 8231def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), 8232 (v16i8 (REV16v16i8 FPR128:$src))>; 8233def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), 8234 (v16i8 (REV16v16i8 FPR128:$src))>; 8235} 8236 8237def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))), 8238 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8239def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))), 8240 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8241def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))), 8242 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8243def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))), 8244 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8245def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))), 8246 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8247def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))), 8248 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8249def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))), 8250 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8251def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))), 8252 (EXTRACT_SUBREG V128:$Rn, dsub)>; 8253 8254def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), 8255 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8256def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), 8257 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8258def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), 8259 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8260def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), 8261 (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; 8262 8263// A 64-bit subvector insert to the first 128-bit vector position 8264// is a subregister copy that needs no instruction. 8265multiclass InsertSubvectorUndef<ValueType Ty> { 8266 def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)), 8267 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8268 def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)), 8269 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8270 def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)), 8271 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8272 def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)), 8273 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8274 def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)), 8275 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8276 def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)), 8277 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8278 def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)), 8279 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8280 def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)), 8281 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; 8282} 8283 8284defm : InsertSubvectorUndef<i32>; 8285defm : InsertSubvectorUndef<i64>; 8286 8287// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 8288// or v2f32. 8289def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), 8290 (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), 8291 (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; 8292def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), 8293 (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), 8294 (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; 8295 // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, 8296 // so we match on v4f32 here, not v2f32. This will also catch adding 8297 // the low two lanes of a true v4f32 vector. 8298def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), 8299 (vector_extract (v4f32 FPR128:$Rn), (i64 1))), 8300 (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8301def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)), 8302 (vector_extract (v8f16 FPR128:$Rn), (i64 1))), 8303 (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; 8304 8305// Scalar 64-bit shifts in FPR64 registers. 8306def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8307 (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8308def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8309 (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8310def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8311 (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8312def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), 8313 (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; 8314 8315// Patterns for nontemporal/no-allocate stores. 8316// We have to resort to tricks to turn a single-input store into a store pair, 8317// because there is no single-input nontemporal store, only STNP. 8318let Predicates = [IsLE] in { 8319let AddedComplexity = 15 in { 8320class NTStore128Pat<ValueType VT> : 8321 Pat<(nontemporalstore (VT FPR128:$Rt), 8322 (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)), 8323 (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub), 8324 (DUPi64 FPR128:$Rt, (i64 1)), 8325 GPR64sp:$Rn, simm7s8:$offset)>; 8326 8327def : NTStore128Pat<v2i64>; 8328def : NTStore128Pat<v4i32>; 8329def : NTStore128Pat<v8i16>; 8330def : NTStore128Pat<v16i8>; 8331 8332class NTStore64Pat<ValueType VT> : 8333 Pat<(nontemporalstore (VT FPR64:$Rt), 8334 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8335 (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub), 8336 (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)), 8337 GPR64sp:$Rn, simm7s4:$offset)>; 8338 8339// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64? 8340def : NTStore64Pat<v1f64>; 8341def : NTStore64Pat<v1i64>; 8342def : NTStore64Pat<v2i32>; 8343def : NTStore64Pat<v4i16>; 8344def : NTStore64Pat<v8i8>; 8345 8346def : Pat<(nontemporalstore GPR64:$Rt, 8347 (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), 8348 (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), 8349 (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), 8350 GPR64sp:$Rn, simm7s4:$offset)>; 8351} // AddedComplexity=10 8352} // Predicates = [IsLE] 8353 8354// Tail call return handling. These are all compiler pseudo-instructions, 8355// so no encoding information or anything like that. 8356let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { 8357 def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>, 8358 Sched<[WriteBrReg]>; 8359 def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>, 8360 Sched<[WriteBrReg]>; 8361 // Indirect tail-call with any register allowed, used by MachineOutliner when 8362 // this is proven safe. 8363 // FIXME: If we have to add any more hacks like this, we should instead relax 8364 // some verifier checks for outlined functions. 8365 def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>, 8366 Sched<[WriteBrReg]>; 8367 // Indirect tail-call limited to only use registers (x16 and x17) which are 8368 // allowed to tail-call a "BTI c" instruction. 8369 def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>, 8370 Sched<[WriteBrReg]>; 8371} 8372 8373def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), 8374 (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>, 8375 Requires<[NotUseBTI]>; 8376def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)), 8377 (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>, 8378 Requires<[UseBTI]>; 8379def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), 8380 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8381def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), 8382 (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; 8383 8384def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; 8385def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; 8386 8387// Extracting lane zero is a special case where we can just use a plain 8388// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the 8389// rest of the compiler, especially the register allocator and copy propagation, 8390// to reason about, so is preferred when it's possible to use it. 8391let AddedComplexity = 10 in { 8392 def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; 8393 def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; 8394 def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; 8395} 8396 8397// dot_v4i8 8398class mul_v4i8<SDPatternOperator ldop> : 8399 PatFrag<(ops node:$Rn, node:$Rm, node:$offset), 8400 (mul (ldop (add node:$Rn, node:$offset)), 8401 (ldop (add node:$Rm, node:$offset)))>; 8402class mulz_v4i8<SDPatternOperator ldop> : 8403 PatFrag<(ops node:$Rn, node:$Rm), 8404 (mul (ldop node:$Rn), (ldop node:$Rm))>; 8405 8406def load_v4i8 : 8407 OutPatFrag<(ops node:$R), 8408 (INSERT_SUBREG 8409 (v2i32 (IMPLICIT_DEF)), 8410 (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)), 8411 ssub)>; 8412 8413class dot_v4i8<Instruction DOT, SDPatternOperator ldop> : 8414 Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)), 8415 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)), 8416 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)), 8417 (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))), 8418 (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR), 8419 (load_v4i8 GPR64sp:$Rn), 8420 (load_v4i8 GPR64sp:$Rm))), 8421 sub_32)>, Requires<[HasDotProd]>; 8422 8423// dot_v8i8 8424class ee_v8i8<SDPatternOperator extend> : 8425 PatFrag<(ops node:$V, node:$K), 8426 (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>; 8427 8428class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8429 PatFrag<(ops node:$M, node:$N, node:$K), 8430 (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)), 8431 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>; 8432 8433class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> : 8434 PatFrag<(ops node:$M, node:$N), 8435 (i32 (extractelt 8436 (v4i32 (AArch64uaddv 8437 (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)), 8438 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))), 8439 (i64 0)))>; 8440 8441// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm 8442def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>; 8443 8444class odot_v8i8<Instruction DOT> : 8445 OutPatFrag<(ops node:$Vm, node:$Vn), 8446 (EXTRACT_SUBREG 8447 (VADDV_32 8448 (i64 (DOT (DUPv2i32gpr WZR), 8449 (v8i8 node:$Vm), 8450 (v8i8 node:$Vn)))), 8451 sub_32)>; 8452 8453class dot_v8i8<Instruction DOT, SDPatternOperator mulop, 8454 SDPatternOperator extend> : 8455 Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn), 8456 (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>, 8457 Requires<[HasDotProd]>; 8458 8459// dot_v16i8 8460class ee_v16i8<SDPatternOperator extend> : 8461 PatFrag<(ops node:$V, node:$K1, node:$K2), 8462 (v4i16 (extract_subvector 8463 (v8i16 (extend 8464 (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>; 8465 8466class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> : 8467 PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2), 8468 (v4i32 8469 (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)), 8470 (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>; 8471 8472class idot_v16i8<SDPatternOperator m, SDPatternOperator x> : 8473 PatFrag<(ops node:$M, node:$N), 8474 (i32 (extractelt 8475 (v4i32 (AArch64uaddv 8476 (add 8477 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)), 8478 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))), 8479 (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)), 8480 (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))), 8481 (i64 0)))>; 8482 8483class odot_v16i8<Instruction DOT> : 8484 OutPatFrag<(ops node:$Vm, node:$Vn), 8485 (i32 (ADDVv4i32v 8486 (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>; 8487 8488class dot_v16i8<Instruction DOT, SDPatternOperator mulop, 8489 SDPatternOperator extend> : 8490 Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn), 8491 (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>, 8492 Requires<[HasDotProd]>; 8493 8494let AddedComplexity = 10 in { 8495 def : dot_v4i8<SDOTv8i8, sextloadi8>; 8496 def : dot_v4i8<UDOTv8i8, zextloadi8>; 8497 def : dot_v8i8<SDOTv8i8, AArch64smull, sext>; 8498 def : dot_v8i8<UDOTv8i8, AArch64umull, zext>; 8499 def : dot_v16i8<SDOTv16i8, AArch64smull, sext>; 8500 def : dot_v16i8<UDOTv16i8, AArch64umull, zext>; 8501 8502 // FIXME: add patterns to generate vector by element dot product. 8503 // FIXME: add SVE dot-product patterns. 8504} 8505 8506// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs, 8507// so that it can be used as input to inline asm, and vice versa. 8508def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>; 8509def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>; 8510def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, 8511 GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)), 8512 (REG_SEQUENCE GPR64x8Class, 8513 $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, 8514 $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>; 8515foreach i = 0-7 in { 8516 def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))), 8517 (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>; 8518} 8519 8520let Predicates = [HasLS64] in { 8521 def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn), 8522 (outs GPR64x8:$Rt)>; 8523 def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn), 8524 (outs)>; 8525 def ST64BV: Store64BV<0b011, "st64bv">; 8526 def ST64BV0: Store64BV<0b010, "st64bv0">; 8527 8528 class ST64BPattern<Intrinsic intrinsic, Instruction instruction> 8529 : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7), 8530 (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>; 8531 8532 def : ST64BPattern<int_aarch64_st64b, ST64B>; 8533 def : ST64BPattern<int_aarch64_st64bv, ST64BV>; 8534 def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>; 8535} 8536 8537let Predicates = [HasMOPS] in { 8538 let Defs = [NZCV] in { 8539 defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">; 8540 8541 defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">; 8542 8543 defm SETP : MOPSMemorySetInsns<0b00, "setp">; 8544 } 8545 let Uses = [NZCV] in { 8546 defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">; 8547 defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">; 8548 8549 defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">; 8550 defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">; 8551 8552 defm SETM : MOPSMemorySetInsns<0b01, "setm">; 8553 defm SETE : MOPSMemorySetInsns<0b10, "sete">; 8554 } 8555} 8556let Predicates = [HasMOPS, HasMTE] in { 8557 let Defs = [NZCV] in { 8558 defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">; 8559 } 8560 let Uses = [NZCV] in { 8561 defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">; 8562 // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td 8563 defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">; 8564 } 8565} 8566 8567// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain 8568// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain 8569def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>; 8570def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>; 8571def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>; 8572def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>; 8573def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>; 8574 8575// MOPS operations always contain three 4-byte instructions 8576let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { 8577 let mayLoad = 1 in { 8578 def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8579 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8580 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8581 def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), 8582 (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), 8583 [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; 8584 } 8585 let mayLoad = 0 in { 8586 def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8587 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8588 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8589 } 8590} 8591let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { 8592 def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), 8593 (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), 8594 [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; 8595} 8596 8597// This gets lowered into an instruction sequence of 20 bytes 8598let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in 8599def StoreSwiftAsyncContext 8600 : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), 8601 []>, Sched<[]>; 8602 8603def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; 8604def : Pat<(AArch64AssertZExtBool GPR32:$op), 8605 (i32 GPR32:$op)>; 8606 8607//===----------------------------===// 8608// 2022 Architecture Extensions: 8609//===----------------------------===// 8610 8611def : InstAlias<"clrbhb", (HINT 22), 0>; 8612let Predicates = [HasCLRBHB] in { 8613 def : InstAlias<"clrbhb", (HINT 22), 1>; 8614} 8615 8616//===----------------------------------------------------------------------===// 8617// Translation Hardening Extension (FEAT_THE) 8618//===----------------------------------------------------------------------===// 8619defm RCW : ReadCheckWriteCompareAndSwap; 8620 8621defm RCWCLR : ReadCheckWriteOperation<0b001, "clr">; 8622defm RCWSET : ReadCheckWriteOperation<0b011, "set">; 8623defm RCWSWP : ReadCheckWriteOperation<0b010, "swp">; 8624 8625//===----------------------------------------------------------------------===// 8626// General Data-Processing Instructions (FEAT_V94_DP) 8627//===----------------------------------------------------------------------===// 8628defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>; 8629defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>; 8630defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>; 8631 8632defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>; 8633defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>; 8634defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>; 8635defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>; 8636 8637def RPRFM: 8638 I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn), 8639 "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>, 8640 Sched<[]> { 8641 bits<6> Rt; 8642 bits<5> Rn; 8643 bits<5> Rm; 8644 let Inst{2-0} = Rt{2-0}; 8645 let Inst{4-3} = 0b11; 8646 let Inst{9-5} = Rn; 8647 let Inst{11-10} = 0b10; 8648 let Inst{13-12} = Rt{4-3}; 8649 let Inst{14} = 0b1; 8650 let Inst{15} = Rt{5}; 8651 let Inst{20-16} = Rm; 8652 let Inst{31-21} = 0b11111000101; 8653 let mayLoad = 0; 8654 let mayStore = 0; 8655 let hasSideEffects = 1; 8656 // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns 8657 // Fail, the decoder should attempt to decode RPRFM. This requires setting 8658 // the decoder namespace to "Fallback". 8659 let DecoderNamespace = "Fallback"; 8660} 8661 8662//===----------------------------------------------------------------------===// 8663// 128-bit Atomics (FEAT_LSE128) 8664//===----------------------------------------------------------------------===// 8665let Predicates = [HasLSE128] in { 8666 def SWPP : LSE128Base<0b000, 0b00, 0b1, "swpp">; 8667 def SWPPA : LSE128Base<0b000, 0b10, 0b1, "swppa">; 8668 def SWPPAL : LSE128Base<0b000, 0b11, 0b1, "swppal">; 8669 def SWPPL : LSE128Base<0b000, 0b01, 0b1, "swppl">; 8670 def LDCLRP : LSE128Base<0b001, 0b00, 0b0, "ldclrp">; 8671 def LDCLRPA : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">; 8672 def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">; 8673 def LDCLRPL : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">; 8674 def LDSETP : LSE128Base<0b011, 0b00, 0b0, "ldsetp">; 8675 def LDSETPA : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">; 8676 def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">; 8677 def LDSETPL : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">; 8678} 8679 8680//===----------------------------------------------------------------------===// 8681// RCPC Instructions (FEAT_LRCPC3) 8682//===----------------------------------------------------------------------===// 8683 8684let Predicates = [HasRCPC3] in { 8685 // size opc opc2 8686 def STILPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">; 8687 def STILPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">; 8688 def STILPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 8689 def STILPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">; 8690 def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">; 8691 def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">; 8692 def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 8693 def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">; 8694 8695 // Aliases for when offset=0 8696 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>; 8697 def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>; 8698 8699 // size opc 8700 def STLRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">; 8701 def STLRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64sp:$Rn), "stlr", "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">; 8702 def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #4", "$Rn = $wback">; 8703 def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn), "ldapr", "\t$Rt, [$Rn], #8", "$Rn = $wback">; 8704} 8705 8706let Predicates = [HasRCPC3, HasNEON] in { 8707 // size opc regtype 8708 defm STLURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8 , (outs), (ins FPR8 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8709 defm STLURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8710 defm STLURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8711 defm STLURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8712 defm STLURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">; 8713 defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8 , (outs FPR8 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8714 defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8715 defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8716 defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8717 defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">; 8718 8719 // L 8720 def STL1: LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">; 8721 def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">; 8722 8723 // Aliases for when offset=0 8724 def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>; 8725} 8726 8727//===----------------------------------------------------------------------===// 8728// 128-bit System Instructions (FEAT_SYSINSTR128) 8729//===----------------------------------------------------------------------===// 8730let Predicates = [HasD128] in { 8731 def SYSPxt : SystemPXtI<0, "sysp">; 8732 8733 def SYSPxt_XZR 8734 : BaseSystemI<0, (outs), 8735 (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair), 8736 "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">, 8737 Sched<[WriteSys]> 8738 { 8739 // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?) 8740 // and therefore autogenerates a decoder that builds an MC representation that has 4 fields 8741 // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one 8742 // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc 8743 // is based off of the asm template (maybe) and therefore wants to print 5 operands. 8744 // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would 8745 // overlap with the main SYSP instruction. 8746 let DecoderMethod = "DecodeSyspXzrInstruction"; 8747 bits<3> op1; 8748 bits<4> Cn; 8749 bits<4> Cm; 8750 bits<3> op2; 8751 let Inst{22} = 0b1; // override BaseSystemI 8752 let Inst{20-19} = 0b01; 8753 let Inst{18-16} = op1; 8754 let Inst{15-12} = Cn; 8755 let Inst{11-8} = Cm; 8756 let Inst{7-5} = op2; 8757 let Inst{4-0} = 0b11111; 8758 } 8759 8760 def : InstAlias<"sysp $op1, $Cn, $Cm, $op2", 8761 (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>; 8762} 8763 8764//--- 8765// 128-bit System Registers (FEAT_SYSREG128) 8766//--- 8767 8768// Instruction encoding: 8769// 8770// 31 22|21|20|19|18 16|15 12|11 8|7 5|4 0 8771// MRRS 1101010101| 1| 1|o0| op1| Cn| Cm|op2| Rt 8772// MSRR 1101010101| 0| 1|o0| op1| Cn| Cm|op2| Rt 8773 8774// Instruction syntax: 8775// 8776// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>> 8777// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1> 8778// 8779// ...where t is even (X0, X2, etc). 8780 8781let Predicates = [HasD128] in { 8782 def MRRS : RtSystemI128<1, 8783 (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg), 8784 "mrrs", "\t$Rt, $systemreg"> 8785 { 8786 bits<16> systemreg; 8787 let Inst{20-5} = systemreg; 8788 } 8789 8790 def MSRR : RtSystemI128<0, 8791 (outs), (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt), 8792 "msrr", "\t$systemreg, $Rt"> 8793 { 8794 bits<16> systemreg; 8795 let Inst{20-5} = systemreg; 8796 } 8797} 8798 8799 8800include "AArch64InstrAtomics.td" 8801include "AArch64SVEInstrInfo.td" 8802include "AArch64SMEInstrInfo.td" 8803include "AArch64InstrGISel.td" 8804