AArch64LegalizerInfo.cpp revision 353358
1//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// \file 9/// This file implements the targeting of the Machinelegalizer class for 10/// AArch64. 11/// \todo This should be generated by TableGen. 12//===----------------------------------------------------------------------===// 13 14#include "AArch64LegalizerInfo.h" 15#include "AArch64Subtarget.h" 16#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 17#include "llvm/CodeGen/MachineInstr.h" 18#include "llvm/CodeGen/MachineRegisterInfo.h" 19#include "llvm/CodeGen/TargetOpcodes.h" 20#include "llvm/CodeGen/ValueTypes.h" 21#include "llvm/IR/DerivedTypes.h" 22#include "llvm/IR/Type.h" 23 24#define DEBUG_TYPE "aarch64-legalinfo" 25 26using namespace llvm; 27using namespace LegalizeActions; 28using namespace LegalizeMutations; 29using namespace LegalityPredicates; 30 31AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { 32 using namespace TargetOpcode; 33 const LLT p0 = LLT::pointer(0, 64); 34 const LLT s1 = LLT::scalar(1); 35 const LLT s8 = LLT::scalar(8); 36 const LLT s16 = LLT::scalar(16); 37 const LLT s32 = LLT::scalar(32); 38 const LLT s64 = LLT::scalar(64); 39 const LLT s128 = LLT::scalar(128); 40 const LLT s256 = LLT::scalar(256); 41 const LLT s512 = LLT::scalar(512); 42 const LLT v16s8 = LLT::vector(16, 8); 43 const LLT v8s8 = LLT::vector(8, 8); 44 const LLT v4s8 = LLT::vector(4, 8); 45 const LLT v8s16 = LLT::vector(8, 16); 46 const LLT v4s16 = LLT::vector(4, 16); 47 const LLT v2s16 = LLT::vector(2, 16); 48 const LLT v2s32 = LLT::vector(2, 32); 49 const LLT v4s32 = LLT::vector(4, 32); 50 const LLT v2s64 = LLT::vector(2, 64); 51 const LLT v2p0 = LLT::vector(2, p0); 52 53 getActionDefinitionsBuilder(G_IMPLICIT_DEF) 54 .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64}) 55 .clampScalar(0, s1, s64) 56 .widenScalarToNextPow2(0, 8) 57 .fewerElementsIf( 58 [=](const LegalityQuery &Query) { 59 return Query.Types[0].isVector() && 60 (Query.Types[0].getElementType() != s64 || 61 Query.Types[0].getNumElements() != 2); 62 }, 63 [=](const LegalityQuery &Query) { 64 LLT EltTy = Query.Types[0].getElementType(); 65 if (EltTy == s64) 66 return std::make_pair(0, LLT::vector(2, 64)); 67 return std::make_pair(0, EltTy); 68 }); 69 70 getActionDefinitionsBuilder(G_PHI) 71 .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 72 .clampScalar(0, s16, s64) 73 .widenScalarToNextPow2(0); 74 75 getActionDefinitionsBuilder(G_BSWAP) 76 .legalFor({s32, s64, v4s32, v2s32, v2s64}) 77 .clampScalar(0, s16, s64) 78 .widenScalarToNextPow2(0); 79 80 getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 81 .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 82 .clampScalar(0, s32, s64) 83 .widenScalarToNextPow2(0) 84 .clampNumElements(0, v2s32, v4s32) 85 .clampNumElements(0, v2s64, v2s64) 86 .moreElementsToNextPow2(0); 87 88 getActionDefinitionsBuilder(G_SHL) 89 .legalFor({{s32, s32}, {s64, s64}, 90 {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) 91 .clampScalar(1, s32, s64) 92 .clampScalar(0, s32, s64) 93 .widenScalarToNextPow2(0) 94 .clampNumElements(0, v2s32, v4s32) 95 .clampNumElements(0, v2s64, v2s64) 96 .moreElementsToNextPow2(0) 97 .minScalarSameAs(1, 0); 98 99 getActionDefinitionsBuilder(G_GEP) 100 .legalFor({{p0, s64}}) 101 .clampScalar(1, s64, s64); 102 103 getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0}); 104 105 getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 106 .legalFor({s32, s64}) 107 .clampScalar(0, s32, s64) 108 .widenScalarToNextPow2(0) 109 .scalarize(0); 110 111 getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 112 .customIf([=](const LegalityQuery &Query) { 113 const auto &SrcTy = Query.Types[0]; 114 const auto &AmtTy = Query.Types[1]; 115 return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 116 AmtTy.getSizeInBits() == 32; 117 }) 118 .legalFor( 119 {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}}) 120 .clampScalar(1, s32, s64) 121 .clampScalar(0, s32, s64) 122 .minScalarSameAs(1, 0); 123 124 getActionDefinitionsBuilder({G_SREM, G_UREM}) 125 .lowerFor({s1, s8, s16, s32, s64}); 126 127 getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 128 .lowerFor({{s64, s1}}); 129 130 getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 131 132 getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 133 .legalFor({{s32, s1}, {s64, s1}}); 134 135 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 136 .legalFor({s32, s64, v2s64, v4s32, v2s32}); 137 138 getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 139 140 getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 141 G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 142 G_FNEARBYINT}) 143 // If we don't have full FP16 support, then scalarize the elements of 144 // vectors containing fp16 types. 145 .fewerElementsIf( 146 [=, &ST](const LegalityQuery &Query) { 147 const auto &Ty = Query.Types[0]; 148 return Ty.isVector() && Ty.getElementType() == s16 && 149 !ST.hasFullFP16(); 150 }, 151 [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 152 // If we don't have full FP16 support, then widen s16 to s32 if we 153 // encounter it. 154 .widenScalarIf( 155 [=, &ST](const LegalityQuery &Query) { 156 return Query.Types[0] == s16 && !ST.hasFullFP16(); 157 }, 158 [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 159 .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 160 161 getActionDefinitionsBuilder( 162 {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 163 // We need a call for these, so we always need to scalarize. 164 .scalarize(0) 165 // Regardless of FP16 support, widen 16-bit elements to 32-bits. 166 .minScalar(0, s32) 167 .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 168 169 getActionDefinitionsBuilder(G_INSERT) 170 .unsupportedIf([=](const LegalityQuery &Query) { 171 return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 172 }) 173 .legalIf([=](const LegalityQuery &Query) { 174 const LLT &Ty0 = Query.Types[0]; 175 const LLT &Ty1 = Query.Types[1]; 176 if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 177 return false; 178 return isPowerOf2_32(Ty1.getSizeInBits()) && 179 (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 180 }) 181 .clampScalar(0, s32, s64) 182 .widenScalarToNextPow2(0) 183 .maxScalarIf(typeInSet(0, {s32}), 1, s16) 184 .maxScalarIf(typeInSet(0, {s64}), 1, s32) 185 .widenScalarToNextPow2(1); 186 187 getActionDefinitionsBuilder(G_EXTRACT) 188 .unsupportedIf([=](const LegalityQuery &Query) { 189 return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 190 }) 191 .legalIf([=](const LegalityQuery &Query) { 192 const LLT &Ty0 = Query.Types[0]; 193 const LLT &Ty1 = Query.Types[1]; 194 if (Ty1 != s32 && Ty1 != s64) 195 return false; 196 if (Ty1 == p0) 197 return true; 198 return isPowerOf2_32(Ty0.getSizeInBits()) && 199 (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 200 }) 201 .clampScalar(1, s32, s64) 202 .widenScalarToNextPow2(1) 203 .maxScalarIf(typeInSet(1, {s32}), 0, s16) 204 .maxScalarIf(typeInSet(1, {s64}), 0, s32) 205 .widenScalarToNextPow2(0); 206 207 getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 208 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 209 {s32, p0, 16, 8}, 210 {s32, p0, 32, 8}, 211 {s64, p0, 8, 2}, 212 {s64, p0, 16, 2}, 213 {s64, p0, 32, 4}, 214 {s64, p0, 64, 8}, 215 {p0, p0, 64, 8}, 216 {v2s32, p0, 64, 8}}) 217 .clampScalar(0, s32, s64) 218 .widenScalarToNextPow2(0) 219 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 220 // how to do that yet. 221 .unsupportedIfMemSizeNotPow2() 222 // Lower anything left over into G_*EXT and G_LOAD 223 .lower(); 224 225 auto IsPtrVecPred = [=](const LegalityQuery &Query) { 226 const LLT &ValTy = Query.Types[0]; 227 if (!ValTy.isVector()) 228 return false; 229 const LLT EltTy = ValTy.getElementType(); 230 return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 231 }; 232 233 getActionDefinitionsBuilder(G_LOAD) 234 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 235 {s16, p0, 16, 8}, 236 {s32, p0, 32, 8}, 237 {s64, p0, 64, 8}, 238 {p0, p0, 64, 8}, 239 {v8s8, p0, 64, 8}, 240 {v16s8, p0, 128, 8}, 241 {v4s16, p0, 64, 8}, 242 {v8s16, p0, 128, 8}, 243 {v2s32, p0, 64, 8}, 244 {v4s32, p0, 128, 8}, 245 {v2s64, p0, 128, 8}}) 246 // These extends are also legal 247 .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 248 {s32, p0, 16, 8}}) 249 .clampScalar(0, s8, s64) 250 .widenScalarToNextPow2(0) 251 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 252 // how to do that yet. 253 .unsupportedIfMemSizeNotPow2() 254 // Lower any any-extending loads left into G_ANYEXT and G_LOAD 255 .lowerIf([=](const LegalityQuery &Query) { 256 return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 257 }) 258 .clampMaxNumElements(0, s32, 2) 259 .clampMaxNumElements(0, s64, 1) 260 .customIf(IsPtrVecPred); 261 262 getActionDefinitionsBuilder(G_STORE) 263 .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 264 {s16, p0, 16, 8}, 265 {s32, p0, 32, 8}, 266 {s64, p0, 64, 8}, 267 {p0, p0, 64, 8}, 268 {v16s8, p0, 128, 8}, 269 {v4s16, p0, 64, 8}, 270 {v8s16, p0, 128, 8}, 271 {v2s32, p0, 64, 8}, 272 {v4s32, p0, 128, 8}, 273 {v2s64, p0, 128, 8}}) 274 .clampScalar(0, s8, s64) 275 .widenScalarToNextPow2(0) 276 // TODO: We could support sum-of-pow2's but the lowering code doesn't know 277 // how to do that yet. 278 .unsupportedIfMemSizeNotPow2() 279 .lowerIf([=](const LegalityQuery &Query) { 280 return Query.Types[0].isScalar() && 281 Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 282 }) 283 .clampMaxNumElements(0, s32, 2) 284 .clampMaxNumElements(0, s64, 1) 285 .customIf(IsPtrVecPred); 286 287 // Constants 288 getActionDefinitionsBuilder(G_CONSTANT) 289 .legalFor({p0, s8, s16, s32, s64}) 290 .clampScalar(0, s8, s64) 291 .widenScalarToNextPow2(0); 292 getActionDefinitionsBuilder(G_FCONSTANT) 293 .legalFor({s32, s64}) 294 .clampScalar(0, s32, s64); 295 296 getActionDefinitionsBuilder(G_ICMP) 297 .legalFor({{s32, s32}, 298 {s32, s64}, 299 {s32, p0}, 300 {v4s32, v4s32}, 301 {v2s32, v2s32}, 302 {v2s64, v2s64}, 303 {v2s64, v2p0}, 304 {v4s16, v4s16}, 305 {v8s16, v8s16}, 306 {v8s8, v8s8}, 307 {v16s8, v16s8}}) 308 .clampScalar(0, s32, s32) 309 .clampScalar(1, s32, s64) 310 .minScalarEltSameAsIf( 311 [=](const LegalityQuery &Query) { 312 const LLT &Ty = Query.Types[0]; 313 const LLT &SrcTy = Query.Types[1]; 314 return Ty.isVector() && !SrcTy.getElementType().isPointer() && 315 Ty.getElementType() != SrcTy.getElementType(); 316 }, 317 0, 1) 318 .minScalarOrEltIf( 319 [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 320 1, s32) 321 .minScalarOrEltIf( 322 [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 323 s64) 324 .widenScalarOrEltToNextPow2(1); 325 326 getActionDefinitionsBuilder(G_FCMP) 327 .legalFor({{s32, s32}, {s32, s64}}) 328 .clampScalar(0, s32, s32) 329 .clampScalar(1, s32, s64) 330 .widenScalarToNextPow2(1); 331 332 // Extensions 333 getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 334 .legalIf([=](const LegalityQuery &Query) { 335 unsigned DstSize = Query.Types[0].getSizeInBits(); 336 337 // Make sure that we have something that will fit in a register, and 338 // make sure it's a power of 2. 339 if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 340 return false; 341 342 const LLT &SrcTy = Query.Types[1]; 343 344 // Special case for s1. 345 if (SrcTy == s1) 346 return true; 347 348 // Make sure we fit in a register otherwise. Don't bother checking that 349 // the source type is below 128 bits. We shouldn't be allowing anything 350 // through which is wider than the destination in the first place. 351 unsigned SrcSize = SrcTy.getSizeInBits(); 352 if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 353 return false; 354 355 return true; 356 }); 357 358 getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 359 360 // FP conversions 361 getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 362 {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 363 getActionDefinitionsBuilder(G_FPEXT).legalFor( 364 {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 365 366 // Conversions 367 getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 368 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 369 .clampScalar(0, s32, s64) 370 .widenScalarToNextPow2(0) 371 .clampScalar(1, s32, s64) 372 .widenScalarToNextPow2(1); 373 374 getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 375 .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 376 .clampScalar(1, s32, s64) 377 .widenScalarToNextPow2(1) 378 .clampScalar(0, s32, s64) 379 .widenScalarToNextPow2(0); 380 381 // Control-flow 382 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 383 getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 384 385 // Select 386 // FIXME: We can probably do a bit better than just scalarizing vector 387 // selects. 388 getActionDefinitionsBuilder(G_SELECT) 389 .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 390 .clampScalar(0, s32, s64) 391 .widenScalarToNextPow2(0) 392 .scalarize(0); 393 394 // Pointer-handling 395 getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 396 getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 397 398 getActionDefinitionsBuilder(G_PTRTOINT) 399 .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 400 .maxScalar(0, s64) 401 .widenScalarToNextPow2(0, /*Min*/ 8); 402 403 getActionDefinitionsBuilder(G_INTTOPTR) 404 .unsupportedIf([&](const LegalityQuery &Query) { 405 return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 406 }) 407 .legalFor({{p0, s64}}); 408 409 // Casts for 32 and 64-bit width type are just copies. 410 // Same for 128-bit width type, except they are on the FPR bank. 411 getActionDefinitionsBuilder(G_BITCAST) 412 // FIXME: This is wrong since G_BITCAST is not allowed to change the 413 // number of bits but it's what the previous code described and fixing 414 // it breaks tests. 415 .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 416 v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 417 v2p0}); 418 419 getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 420 421 // va_list must be a pointer, but most sized types are pretty easy to handle 422 // as the destination. 423 getActionDefinitionsBuilder(G_VAARG) 424 .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 425 .clampScalar(0, s8, s64) 426 .widenScalarToNextPow2(0, /*Min*/ 8); 427 428 if (ST.hasLSE()) { 429 getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 430 .lowerIf(all( 431 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 432 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 433 434 getActionDefinitionsBuilder( 435 {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 436 G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 437 G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 438 .legalIf(all( 439 typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 440 atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 441 } 442 443 getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 444 445 // Merge/Unmerge 446 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 447 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 448 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 449 450 auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 451 const LLT &Ty = Query.Types[TypeIdx]; 452 if (Ty.isVector()) { 453 const LLT &EltTy = Ty.getElementType(); 454 if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 455 return true; 456 if (!isPowerOf2_32(EltTy.getSizeInBits())) 457 return true; 458 } 459 return false; 460 }; 461 462 // FIXME: This rule is horrible, but specifies the same as what we had 463 // before with the particularly strange definitions removed (e.g. 464 // s8 = G_MERGE_VALUES s32, s32). 465 // Part of the complexity comes from these ops being extremely flexible. For 466 // example, you can build/decompose vectors with it, concatenate vectors, 467 // etc. and in addition to this you can also bitcast with it at the same 468 // time. We've been considering breaking it up into multiple ops to make it 469 // more manageable throughout the backend. 470 getActionDefinitionsBuilder(Op) 471 // Break up vectors with weird elements into scalars 472 .fewerElementsIf( 473 [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 474 scalarize(0)) 475 .fewerElementsIf( 476 [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 477 scalarize(1)) 478 // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 479 // or 384. 480 .clampScalar(BigTyIdx, s8, s512) 481 .widenScalarIf( 482 [=](const LegalityQuery &Query) { 483 const LLT &Ty = Query.Types[BigTyIdx]; 484 return !isPowerOf2_32(Ty.getSizeInBits()) && 485 Ty.getSizeInBits() % 64 != 0; 486 }, 487 [=](const LegalityQuery &Query) { 488 // Pick the next power of 2, or a multiple of 64 over 128. 489 // Whichever is smaller. 490 const LLT &Ty = Query.Types[BigTyIdx]; 491 unsigned NewSizeInBits = 1 492 << Log2_32_Ceil(Ty.getSizeInBits() + 1); 493 if (NewSizeInBits >= 256) { 494 unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 495 if (RoundedTo < NewSizeInBits) 496 NewSizeInBits = RoundedTo; 497 } 498 return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 499 }) 500 // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 501 // worth considering the multiples of 64 since 2*192 and 2*384 are not 502 // valid. 503 .clampScalar(LitTyIdx, s8, s256) 504 .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 505 // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 506 // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 507 // At this point it's simple enough to accept the legal types. 508 .legalIf([=](const LegalityQuery &Query) { 509 const LLT &BigTy = Query.Types[BigTyIdx]; 510 const LLT &LitTy = Query.Types[LitTyIdx]; 511 if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 512 return false; 513 if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 514 return false; 515 return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 516 }) 517 // Any vectors left are the wrong size. Scalarize them. 518 .scalarize(0) 519 .scalarize(1); 520 } 521 522 getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 523 .unsupportedIf([=](const LegalityQuery &Query) { 524 const LLT &EltTy = Query.Types[1].getElementType(); 525 return Query.Types[0] != EltTy; 526 }) 527 .minScalar(2, s64) 528 .legalIf([=](const LegalityQuery &Query) { 529 const LLT &VecTy = Query.Types[1]; 530 return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 531 VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 532 }); 533 534 getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 535 .legalIf([=](const LegalityQuery &Query) { 536 const LLT &VecTy = Query.Types[0]; 537 // TODO: Support s8 and s16 538 return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 539 }); 540 541 getActionDefinitionsBuilder(G_BUILD_VECTOR) 542 .legalFor({{v4s16, s16}, 543 {v8s16, s16}, 544 {v2s32, s32}, 545 {v4s32, s32}, 546 {v2p0, p0}, 547 {v2s64, s64}}) 548 .clampNumElements(0, v4s32, v4s32) 549 .clampNumElements(0, v2s64, v2s64) 550 551 // Deal with larger scalar types, which will be implicitly truncated. 552 .legalIf([=](const LegalityQuery &Query) { 553 return Query.Types[0].getScalarSizeInBits() < 554 Query.Types[1].getSizeInBits(); 555 }) 556 .minScalarSameAs(1, 0); 557 558 getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 559 {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 560 .scalarize(1); 561 562 getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 563 .legalIf([=](const LegalityQuery &Query) { 564 const LLT &DstTy = Query.Types[0]; 565 const LLT &SrcTy = Query.Types[1]; 566 // For now just support the TBL2 variant which needs the source vectors 567 // to be the same size as the dest. 568 if (DstTy != SrcTy) 569 return false; 570 for (auto &Ty : {v2s32, v4s32, v2s64}) { 571 if (DstTy == Ty) 572 return true; 573 } 574 return false; 575 }) 576 // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 577 // just want those lowered into G_BUILD_VECTOR 578 .lowerIf([=](const LegalityQuery &Query) { 579 return !Query.Types[1].isVector(); 580 }) 581 .clampNumElements(0, v4s32, v4s32) 582 .clampNumElements(0, v2s64, v2s64); 583 584 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 585 .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 586 587 getActionDefinitionsBuilder(G_JUMP_TABLE) 588 .legalFor({{p0}, {s64}}); 589 590 getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 591 return Query.Types[0] == p0 && Query.Types[1] == s64; 592 }); 593 594 computeTables(); 595 verify(*ST.getInstrInfo()); 596} 597 598bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, 599 MachineRegisterInfo &MRI, 600 MachineIRBuilder &MIRBuilder, 601 GISelChangeObserver &Observer) const { 602 switch (MI.getOpcode()) { 603 default: 604 // No idea what to do. 605 return false; 606 case TargetOpcode::G_VAARG: 607 return legalizeVaArg(MI, MRI, MIRBuilder); 608 case TargetOpcode::G_LOAD: 609 case TargetOpcode::G_STORE: 610 return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 611 case TargetOpcode::G_SHL: 612 case TargetOpcode::G_ASHR: 613 case TargetOpcode::G_LSHR: 614 return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 615 } 616 617 llvm_unreachable("expected switch to return"); 618} 619 620bool AArch64LegalizerInfo::legalizeShlAshrLshr( 621 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 622 GISelChangeObserver &Observer) const { 623 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 624 MI.getOpcode() == TargetOpcode::G_LSHR || 625 MI.getOpcode() == TargetOpcode::G_SHL); 626 // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 627 // imported patterns can select it later. Either way, it will be legal. 628 Register AmtReg = MI.getOperand(2).getReg(); 629 auto *CstMI = MRI.getVRegDef(AmtReg); 630 assert(CstMI && "expected to find a vreg def"); 631 if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) 632 return true; 633 // Check the shift amount is in range for an immediate form. 634 unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); 635 if (Amount > 31) 636 return true; // This will have to remain a register variant. 637 assert(MRI.getType(AmtReg).getSizeInBits() == 32); 638 MIRBuilder.setInstr(MI); 639 auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 640 MI.getOperand(2).setReg(ExtCst.getReg(0)); 641 return true; 642} 643 644bool AArch64LegalizerInfo::legalizeLoadStore( 645 MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 646 GISelChangeObserver &Observer) const { 647 assert(MI.getOpcode() == TargetOpcode::G_STORE || 648 MI.getOpcode() == TargetOpcode::G_LOAD); 649 // Here we just try to handle vector loads/stores where our value type might 650 // have pointer elements, which the SelectionDAG importer can't handle. To 651 // allow the existing patterns for s64 to fire for p0, we just try to bitcast 652 // the value to use s64 types. 653 654 // Custom legalization requires the instruction, if not deleted, must be fully 655 // legalized. In order to allow further legalization of the inst, we create 656 // a new instruction and erase the existing one. 657 658 unsigned ValReg = MI.getOperand(0).getReg(); 659 const LLT ValTy = MRI.getType(ValReg); 660 661 if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 662 ValTy.getElementType().getAddressSpace() != 0) { 663 LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 664 return false; 665 } 666 667 MIRBuilder.setInstr(MI); 668 unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 669 const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 670 auto &MMO = **MI.memoperands_begin(); 671 if (MI.getOpcode() == TargetOpcode::G_STORE) { 672 auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); 673 MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); 674 } else { 675 unsigned NewReg = MRI.createGenericVirtualRegister(NewTy); 676 auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); 677 MIRBuilder.buildBitcast({ValReg}, {NewLoad}); 678 } 679 MI.eraseFromParent(); 680 return true; 681} 682 683bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 684 MachineRegisterInfo &MRI, 685 MachineIRBuilder &MIRBuilder) const { 686 MIRBuilder.setInstr(MI); 687 MachineFunction &MF = MIRBuilder.getMF(); 688 unsigned Align = MI.getOperand(2).getImm(); 689 Register Dst = MI.getOperand(0).getReg(); 690 Register ListPtr = MI.getOperand(1).getReg(); 691 692 LLT PtrTy = MRI.getType(ListPtr); 693 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 694 695 const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 696 Register List = MRI.createGenericVirtualRegister(PtrTy); 697 MIRBuilder.buildLoad( 698 List, ListPtr, 699 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 700 PtrSize, /* Align = */ PtrSize)); 701 702 Register DstPtr; 703 if (Align > PtrSize) { 704 // Realign the list to the actual required alignment. 705 auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); 706 707 auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0)); 708 709 DstPtr = MRI.createGenericVirtualRegister(PtrTy); 710 MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); 711 } else 712 DstPtr = List; 713 714 uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 715 MIRBuilder.buildLoad( 716 Dst, DstPtr, 717 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 718 ValSize, std::max(Align, PtrSize))); 719 720 auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize)); 721 722 auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0)); 723 724 MIRBuilder.buildStore( 725 NewList, ListPtr, 726 *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 727 PtrSize, /* Align = */ PtrSize)); 728 729 MI.eraseFromParent(); 730 return true; 731} 732