AArch64LegalizerInfo.cpp revision 353358
1311116Sdim//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2311116Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311116Sdim// 7311116Sdim//===----------------------------------------------------------------------===// 8311116Sdim/// \file 9311116Sdim/// This file implements the targeting of the Machinelegalizer class for 10311116Sdim/// AArch64. 11311116Sdim/// \todo This should be generated by TableGen. 12311116Sdim//===----------------------------------------------------------------------===// 13311116Sdim 14311116Sdim#include "AArch64LegalizerInfo.h" 15327952Sdim#include "AArch64Subtarget.h" 16321369Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 17321369Sdim#include "llvm/CodeGen/MachineInstr.h" 18321369Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 19327952Sdim#include "llvm/CodeGen/TargetOpcodes.h" 20311116Sdim#include "llvm/CodeGen/ValueTypes.h" 21321369Sdim#include "llvm/IR/DerivedTypes.h" 22311116Sdim#include "llvm/IR/Type.h" 23311116Sdim 24353358Sdim#define DEBUG_TYPE "aarch64-legalinfo" 25353358Sdim 26311116Sdimusing namespace llvm; 27341825Sdimusing namespace LegalizeActions; 28353358Sdimusing namespace LegalizeMutations; 29341825Sdimusing namespace LegalityPredicates; 30311116Sdim 31327952SdimAArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { 32311116Sdim using namespace TargetOpcode; 33311116Sdim const LLT p0 = LLT::pointer(0, 64); 34311116Sdim const LLT s1 = LLT::scalar(1); 35311116Sdim const LLT s8 = LLT::scalar(8); 36311116Sdim const LLT s16 = LLT::scalar(16); 37311116Sdim const LLT s32 = LLT::scalar(32); 38311116Sdim const LLT s64 = LLT::scalar(64); 39327952Sdim const LLT s128 = LLT::scalar(128); 40341825Sdim const LLT s256 = LLT::scalar(256); 41341825Sdim const LLT s512 = LLT::scalar(512); 42341825Sdim const LLT v16s8 = LLT::vector(16, 8); 43341825Sdim const LLT v8s8 = LLT::vector(8, 8); 44341825Sdim const LLT v4s8 = LLT::vector(4, 8); 45341825Sdim const LLT v8s16 = LLT::vector(8, 16); 46341825Sdim const LLT v4s16 = LLT::vector(4, 16); 47341825Sdim const LLT v2s16 = LLT::vector(2, 16); 48311116Sdim const LLT v2s32 = LLT::vector(2, 32); 49311116Sdim const LLT v4s32 = LLT::vector(4, 32); 50311116Sdim const LLT v2s64 = LLT::vector(2, 64); 51353358Sdim const LLT v2p0 = LLT::vector(2, p0); 52311116Sdim 53341825Sdim getActionDefinitionsBuilder(G_IMPLICIT_DEF) 54353358Sdim .legalFor({p0, s1, s8, s16, s32, s64, v4s32, v2s64}) 55344779Sdim .clampScalar(0, s1, s64) 56344779Sdim .widenScalarToNextPow2(0, 8) 57344779Sdim .fewerElementsIf( 58344779Sdim [=](const LegalityQuery &Query) { 59344779Sdim return Query.Types[0].isVector() && 60344779Sdim (Query.Types[0].getElementType() != s64 || 61344779Sdim Query.Types[0].getNumElements() != 2); 62344779Sdim }, 63344779Sdim [=](const LegalityQuery &Query) { 64344779Sdim LLT EltTy = Query.Types[0].getElementType(); 65344779Sdim if (EltTy == s64) 66344779Sdim return std::make_pair(0, LLT::vector(2, 64)); 67344779Sdim return std::make_pair(0, EltTy); 68344779Sdim }); 69321369Sdim 70341825Sdim getActionDefinitionsBuilder(G_PHI) 71353358Sdim .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 72341825Sdim .clampScalar(0, s16, s64) 73341825Sdim .widenScalarToNextPow2(0); 74327952Sdim 75341825Sdim getActionDefinitionsBuilder(G_BSWAP) 76353358Sdim .legalFor({s32, s64, v4s32, v2s32, v2s64}) 77341825Sdim .clampScalar(0, s16, s64) 78341825Sdim .widenScalarToNextPow2(0); 79327952Sdim 80353358Sdim getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 81353358Sdim .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 82341825Sdim .clampScalar(0, s32, s64) 83341825Sdim .widenScalarToNextPow2(0) 84341825Sdim .clampNumElements(0, v2s32, v4s32) 85341825Sdim .clampNumElements(0, v2s64, v2s64) 86341825Sdim .moreElementsToNextPow2(0); 87327952Sdim 88353358Sdim getActionDefinitionsBuilder(G_SHL) 89353358Sdim .legalFor({{s32, s32}, {s64, s64}, 90353358Sdim {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) 91353358Sdim .clampScalar(1, s32, s64) 92353358Sdim .clampScalar(0, s32, s64) 93353358Sdim .widenScalarToNextPow2(0) 94353358Sdim .clampNumElements(0, v2s32, v4s32) 95353358Sdim .clampNumElements(0, v2s64, v2s64) 96353358Sdim .moreElementsToNextPow2(0) 97353358Sdim .minScalarSameAs(1, 0); 98353358Sdim 99341825Sdim getActionDefinitionsBuilder(G_GEP) 100341825Sdim .legalFor({{p0, s64}}) 101341825Sdim .clampScalar(1, s64, s64); 102321369Sdim 103341825Sdim getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0}); 104311116Sdim 105353358Sdim getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 106341825Sdim .legalFor({s32, s64}) 107341825Sdim .clampScalar(0, s32, s64) 108353358Sdim .widenScalarToNextPow2(0) 109353358Sdim .scalarize(0); 110311116Sdim 111353358Sdim getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 112353358Sdim .customIf([=](const LegalityQuery &Query) { 113353358Sdim const auto &SrcTy = Query.Types[0]; 114353358Sdim const auto &AmtTy = Query.Types[1]; 115353358Sdim return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 116353358Sdim AmtTy.getSizeInBits() == 32; 117353358Sdim }) 118353358Sdim .legalFor( 119353358Sdim {{s32, s32}, {s32, s64}, {s64, s64}, {v2s32, v2s32}, {v4s32, v4s32}}) 120353358Sdim .clampScalar(1, s32, s64) 121353358Sdim .clampScalar(0, s32, s64) 122353358Sdim .minScalarSameAs(1, 0); 123353358Sdim 124341825Sdim getActionDefinitionsBuilder({G_SREM, G_UREM}) 125341825Sdim .lowerFor({s1, s8, s16, s32, s64}); 126311116Sdim 127341825Sdim getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 128341825Sdim .lowerFor({{s64, s1}}); 129321369Sdim 130341825Sdim getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 131311116Sdim 132353358Sdim getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 133341825Sdim .legalFor({{s32, s1}, {s64, s1}}); 134311116Sdim 135353358Sdim getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 136353358Sdim .legalFor({s32, s64, v2s64, v4s32, v2s32}); 137311116Sdim 138353358Sdim getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 139321369Sdim 140353358Sdim getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 141353358Sdim G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 142353358Sdim G_FNEARBYINT}) 143353358Sdim // If we don't have full FP16 support, then scalarize the elements of 144353358Sdim // vectors containing fp16 types. 145353358Sdim .fewerElementsIf( 146353358Sdim [=, &ST](const LegalityQuery &Query) { 147353358Sdim const auto &Ty = Query.Types[0]; 148353358Sdim return Ty.isVector() && Ty.getElementType() == s16 && 149353358Sdim !ST.hasFullFP16(); 150353358Sdim }, 151353358Sdim [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 152344779Sdim // If we don't have full FP16 support, then widen s16 to s32 if we 153344779Sdim // encounter it. 154344779Sdim .widenScalarIf( 155344779Sdim [=, &ST](const LegalityQuery &Query) { 156344779Sdim return Query.Types[0] == s16 && !ST.hasFullFP16(); 157344779Sdim }, 158344779Sdim [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 159353358Sdim .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 160344779Sdim 161353358Sdim getActionDefinitionsBuilder( 162353358Sdim {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 163353358Sdim // We need a call for these, so we always need to scalarize. 164353358Sdim .scalarize(0) 165353358Sdim // Regardless of FP16 support, widen 16-bit elements to 32-bits. 166353358Sdim .minScalar(0, s32) 167353358Sdim .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 168353358Sdim 169341825Sdim getActionDefinitionsBuilder(G_INSERT) 170341825Sdim .unsupportedIf([=](const LegalityQuery &Query) { 171341825Sdim return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 172341825Sdim }) 173341825Sdim .legalIf([=](const LegalityQuery &Query) { 174341825Sdim const LLT &Ty0 = Query.Types[0]; 175341825Sdim const LLT &Ty1 = Query.Types[1]; 176341825Sdim if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 177341825Sdim return false; 178341825Sdim return isPowerOf2_32(Ty1.getSizeInBits()) && 179341825Sdim (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 180341825Sdim }) 181341825Sdim .clampScalar(0, s32, s64) 182341825Sdim .widenScalarToNextPow2(0) 183341825Sdim .maxScalarIf(typeInSet(0, {s32}), 1, s16) 184341825Sdim .maxScalarIf(typeInSet(0, {s64}), 1, s32) 185341825Sdim .widenScalarToNextPow2(1); 186311116Sdim 187341825Sdim getActionDefinitionsBuilder(G_EXTRACT) 188341825Sdim .unsupportedIf([=](const LegalityQuery &Query) { 189341825Sdim return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 190341825Sdim }) 191341825Sdim .legalIf([=](const LegalityQuery &Query) { 192341825Sdim const LLT &Ty0 = Query.Types[0]; 193341825Sdim const LLT &Ty1 = Query.Types[1]; 194341825Sdim if (Ty1 != s32 && Ty1 != s64) 195341825Sdim return false; 196341825Sdim if (Ty1 == p0) 197341825Sdim return true; 198341825Sdim return isPowerOf2_32(Ty0.getSizeInBits()) && 199341825Sdim (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 200341825Sdim }) 201341825Sdim .clampScalar(1, s32, s64) 202341825Sdim .widenScalarToNextPow2(1) 203341825Sdim .maxScalarIf(typeInSet(1, {s32}), 0, s16) 204341825Sdim .maxScalarIf(typeInSet(1, {s64}), 0, s32) 205341825Sdim .widenScalarToNextPow2(0); 206311116Sdim 207341825Sdim getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 208353358Sdim .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 209353358Sdim {s32, p0, 16, 8}, 210353358Sdim {s32, p0, 32, 8}, 211353358Sdim {s64, p0, 8, 2}, 212353358Sdim {s64, p0, 16, 2}, 213353358Sdim {s64, p0, 32, 4}, 214353358Sdim {s64, p0, 64, 8}, 215353358Sdim {p0, p0, 64, 8}, 216353358Sdim {v2s32, p0, 64, 8}}) 217341825Sdim .clampScalar(0, s32, s64) 218341825Sdim .widenScalarToNextPow2(0) 219341825Sdim // TODO: We could support sum-of-pow2's but the lowering code doesn't know 220341825Sdim // how to do that yet. 221341825Sdim .unsupportedIfMemSizeNotPow2() 222341825Sdim // Lower anything left over into G_*EXT and G_LOAD 223341825Sdim .lower(); 224311116Sdim 225353358Sdim auto IsPtrVecPred = [=](const LegalityQuery &Query) { 226353358Sdim const LLT &ValTy = Query.Types[0]; 227353358Sdim if (!ValTy.isVector()) 228353358Sdim return false; 229353358Sdim const LLT EltTy = ValTy.getElementType(); 230353358Sdim return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 231353358Sdim }; 232353358Sdim 233341825Sdim getActionDefinitionsBuilder(G_LOAD) 234353358Sdim .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 235353358Sdim {s16, p0, 16, 8}, 236353358Sdim {s32, p0, 32, 8}, 237353358Sdim {s64, p0, 64, 8}, 238353358Sdim {p0, p0, 64, 8}, 239353358Sdim {v8s8, p0, 64, 8}, 240353358Sdim {v16s8, p0, 128, 8}, 241353358Sdim {v4s16, p0, 64, 8}, 242353358Sdim {v8s16, p0, 128, 8}, 243353358Sdim {v2s32, p0, 64, 8}, 244353358Sdim {v4s32, p0, 128, 8}, 245353358Sdim {v2s64, p0, 128, 8}}) 246341825Sdim // These extends are also legal 247353358Sdim .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 248353358Sdim {s32, p0, 16, 8}}) 249341825Sdim .clampScalar(0, s8, s64) 250341825Sdim .widenScalarToNextPow2(0) 251341825Sdim // TODO: We could support sum-of-pow2's but the lowering code doesn't know 252341825Sdim // how to do that yet. 253341825Sdim .unsupportedIfMemSizeNotPow2() 254341825Sdim // Lower any any-extending loads left into G_ANYEXT and G_LOAD 255341825Sdim .lowerIf([=](const LegalityQuery &Query) { 256344779Sdim return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 257341825Sdim }) 258353358Sdim .clampMaxNumElements(0, s32, 2) 259353358Sdim .clampMaxNumElements(0, s64, 1) 260353358Sdim .customIf(IsPtrVecPred); 261311116Sdim 262341825Sdim getActionDefinitionsBuilder(G_STORE) 263353358Sdim .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 264353358Sdim {s16, p0, 16, 8}, 265353358Sdim {s32, p0, 32, 8}, 266353358Sdim {s64, p0, 64, 8}, 267353358Sdim {p0, p0, 64, 8}, 268353358Sdim {v16s8, p0, 128, 8}, 269353358Sdim {v4s16, p0, 64, 8}, 270353358Sdim {v8s16, p0, 128, 8}, 271353358Sdim {v2s32, p0, 64, 8}, 272353358Sdim {v4s32, p0, 128, 8}, 273353358Sdim {v2s64, p0, 128, 8}}) 274341825Sdim .clampScalar(0, s8, s64) 275341825Sdim .widenScalarToNextPow2(0) 276341825Sdim // TODO: We could support sum-of-pow2's but the lowering code doesn't know 277341825Sdim // how to do that yet. 278341825Sdim .unsupportedIfMemSizeNotPow2() 279341825Sdim .lowerIf([=](const LegalityQuery &Query) { 280341825Sdim return Query.Types[0].isScalar() && 281344779Sdim Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 282341825Sdim }) 283353358Sdim .clampMaxNumElements(0, s32, 2) 284353358Sdim .clampMaxNumElements(0, s64, 1) 285353358Sdim .customIf(IsPtrVecPred); 286321369Sdim 287311116Sdim // Constants 288341825Sdim getActionDefinitionsBuilder(G_CONSTANT) 289353358Sdim .legalFor({p0, s8, s16, s32, s64}) 290353358Sdim .clampScalar(0, s8, s64) 291341825Sdim .widenScalarToNextPow2(0); 292341825Sdim getActionDefinitionsBuilder(G_FCONSTANT) 293341825Sdim .legalFor({s32, s64}) 294341825Sdim .clampScalar(0, s32, s64); 295311116Sdim 296341825Sdim getActionDefinitionsBuilder(G_ICMP) 297353358Sdim .legalFor({{s32, s32}, 298353358Sdim {s32, s64}, 299353358Sdim {s32, p0}, 300353358Sdim {v4s32, v4s32}, 301353358Sdim {v2s32, v2s32}, 302353358Sdim {v2s64, v2s64}, 303353358Sdim {v2s64, v2p0}, 304353358Sdim {v4s16, v4s16}, 305353358Sdim {v8s16, v8s16}, 306353358Sdim {v8s8, v8s8}, 307353358Sdim {v16s8, v16s8}}) 308341825Sdim .clampScalar(0, s32, s32) 309341825Sdim .clampScalar(1, s32, s64) 310353358Sdim .minScalarEltSameAsIf( 311353358Sdim [=](const LegalityQuery &Query) { 312353358Sdim const LLT &Ty = Query.Types[0]; 313353358Sdim const LLT &SrcTy = Query.Types[1]; 314353358Sdim return Ty.isVector() && !SrcTy.getElementType().isPointer() && 315353358Sdim Ty.getElementType() != SrcTy.getElementType(); 316353358Sdim }, 317353358Sdim 0, 1) 318353358Sdim .minScalarOrEltIf( 319353358Sdim [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 320353358Sdim 1, s32) 321353358Sdim .minScalarOrEltIf( 322353358Sdim [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 323353358Sdim s64) 324353358Sdim .widenScalarOrEltToNextPow2(1); 325311116Sdim 326341825Sdim getActionDefinitionsBuilder(G_FCMP) 327341825Sdim .legalFor({{s32, s32}, {s32, s64}}) 328341825Sdim .clampScalar(0, s32, s32) 329341825Sdim .clampScalar(1, s32, s64) 330341825Sdim .widenScalarToNextPow2(1); 331311116Sdim 332311116Sdim // Extensions 333341825Sdim getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 334353358Sdim .legalIf([=](const LegalityQuery &Query) { 335353358Sdim unsigned DstSize = Query.Types[0].getSizeInBits(); 336311116Sdim 337353358Sdim // Make sure that we have something that will fit in a register, and 338353358Sdim // make sure it's a power of 2. 339353358Sdim if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 340353358Sdim return false; 341353358Sdim 342353358Sdim const LLT &SrcTy = Query.Types[1]; 343353358Sdim 344353358Sdim // Special case for s1. 345353358Sdim if (SrcTy == s1) 346353358Sdim return true; 347353358Sdim 348353358Sdim // Make sure we fit in a register otherwise. Don't bother checking that 349353358Sdim // the source type is below 128 bits. We shouldn't be allowing anything 350353358Sdim // through which is wider than the destination in the first place. 351353358Sdim unsigned SrcSize = SrcTy.getSizeInBits(); 352353358Sdim if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 353353358Sdim return false; 354353358Sdim 355353358Sdim return true; 356353358Sdim }); 357353358Sdim 358353358Sdim getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 359353358Sdim 360327952Sdim // FP conversions 361341825Sdim getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 362353358Sdim {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 363341825Sdim getActionDefinitionsBuilder(G_FPEXT).legalFor( 364353358Sdim {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 365311116Sdim 366311116Sdim // Conversions 367341825Sdim getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 368353358Sdim .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 369341825Sdim .clampScalar(0, s32, s64) 370341825Sdim .widenScalarToNextPow2(0) 371341825Sdim .clampScalar(1, s32, s64) 372341825Sdim .widenScalarToNextPow2(1); 373311116Sdim 374341825Sdim getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 375353358Sdim .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 376341825Sdim .clampScalar(1, s32, s64) 377341825Sdim .widenScalarToNextPow2(1) 378341825Sdim .clampScalar(0, s32, s64) 379341825Sdim .widenScalarToNextPow2(0); 380311116Sdim 381311116Sdim // Control-flow 382341825Sdim getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 383341825Sdim getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 384311116Sdim 385311116Sdim // Select 386353358Sdim // FIXME: We can probably do a bit better than just scalarizing vector 387353358Sdim // selects. 388341825Sdim getActionDefinitionsBuilder(G_SELECT) 389341825Sdim .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 390341825Sdim .clampScalar(0, s32, s64) 391353358Sdim .widenScalarToNextPow2(0) 392353358Sdim .scalarize(0); 393321369Sdim 394311116Sdim // Pointer-handling 395341825Sdim getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 396341825Sdim getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 397311116Sdim 398341825Sdim getActionDefinitionsBuilder(G_PTRTOINT) 399341825Sdim .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 400341825Sdim .maxScalar(0, s64) 401341825Sdim .widenScalarToNextPow2(0, /*Min*/ 8); 402311116Sdim 403341825Sdim getActionDefinitionsBuilder(G_INTTOPTR) 404341825Sdim .unsupportedIf([&](const LegalityQuery &Query) { 405341825Sdim return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 406341825Sdim }) 407341825Sdim .legalFor({{p0, s64}}); 408311116Sdim 409311116Sdim // Casts for 32 and 64-bit width type are just copies. 410327952Sdim // Same for 128-bit width type, except they are on the FPR bank. 411341825Sdim getActionDefinitionsBuilder(G_BITCAST) 412341825Sdim // FIXME: This is wrong since G_BITCAST is not allowed to change the 413341825Sdim // number of bits but it's what the previous code described and fixing 414341825Sdim // it breaks tests. 415341825Sdim .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 416353358Sdim v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 417353358Sdim v2p0}); 418311116Sdim 419341825Sdim getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 420311116Sdim 421321369Sdim // va_list must be a pointer, but most sized types are pretty easy to handle 422321369Sdim // as the destination. 423341825Sdim getActionDefinitionsBuilder(G_VAARG) 424341825Sdim .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 425341825Sdim .clampScalar(0, s8, s64) 426341825Sdim .widenScalarToNextPow2(0, /*Min*/ 8); 427321369Sdim 428327952Sdim if (ST.hasLSE()) { 429341825Sdim getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 430341825Sdim .lowerIf(all( 431341825Sdim typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 432341825Sdim atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 433327952Sdim 434341825Sdim getActionDefinitionsBuilder( 435341825Sdim {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 436341825Sdim G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 437341825Sdim G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 438341825Sdim .legalIf(all( 439341825Sdim typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 440341825Sdim atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 441327952Sdim } 442327952Sdim 443341825Sdim getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 444341825Sdim 445327952Sdim // Merge/Unmerge 446341825Sdim for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 447341825Sdim unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 448341825Sdim unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 449341825Sdim 450341825Sdim auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 451341825Sdim const LLT &Ty = Query.Types[TypeIdx]; 452341825Sdim if (Ty.isVector()) { 453341825Sdim const LLT &EltTy = Ty.getElementType(); 454341825Sdim if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 455341825Sdim return true; 456341825Sdim if (!isPowerOf2_32(EltTy.getSizeInBits())) 457341825Sdim return true; 458327952Sdim } 459341825Sdim return false; 460341825Sdim }; 461327952Sdim 462341825Sdim // FIXME: This rule is horrible, but specifies the same as what we had 463341825Sdim // before with the particularly strange definitions removed (e.g. 464341825Sdim // s8 = G_MERGE_VALUES s32, s32). 465341825Sdim // Part of the complexity comes from these ops being extremely flexible. For 466341825Sdim // example, you can build/decompose vectors with it, concatenate vectors, 467341825Sdim // etc. and in addition to this you can also bitcast with it at the same 468341825Sdim // time. We've been considering breaking it up into multiple ops to make it 469341825Sdim // more manageable throughout the backend. 470341825Sdim getActionDefinitionsBuilder(Op) 471341825Sdim // Break up vectors with weird elements into scalars 472341825Sdim .fewerElementsIf( 473341825Sdim [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 474353358Sdim scalarize(0)) 475341825Sdim .fewerElementsIf( 476341825Sdim [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 477353358Sdim scalarize(1)) 478341825Sdim // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 479341825Sdim // or 384. 480341825Sdim .clampScalar(BigTyIdx, s8, s512) 481341825Sdim .widenScalarIf( 482341825Sdim [=](const LegalityQuery &Query) { 483341825Sdim const LLT &Ty = Query.Types[BigTyIdx]; 484341825Sdim return !isPowerOf2_32(Ty.getSizeInBits()) && 485341825Sdim Ty.getSizeInBits() % 64 != 0; 486341825Sdim }, 487341825Sdim [=](const LegalityQuery &Query) { 488341825Sdim // Pick the next power of 2, or a multiple of 64 over 128. 489341825Sdim // Whichever is smaller. 490341825Sdim const LLT &Ty = Query.Types[BigTyIdx]; 491341825Sdim unsigned NewSizeInBits = 1 492341825Sdim << Log2_32_Ceil(Ty.getSizeInBits() + 1); 493341825Sdim if (NewSizeInBits >= 256) { 494341825Sdim unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 495341825Sdim if (RoundedTo < NewSizeInBits) 496341825Sdim NewSizeInBits = RoundedTo; 497341825Sdim } 498341825Sdim return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 499341825Sdim }) 500341825Sdim // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 501341825Sdim // worth considering the multiples of 64 since 2*192 and 2*384 are not 502341825Sdim // valid. 503341825Sdim .clampScalar(LitTyIdx, s8, s256) 504341825Sdim .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 505341825Sdim // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 506341825Sdim // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 507341825Sdim // At this point it's simple enough to accept the legal types. 508341825Sdim .legalIf([=](const LegalityQuery &Query) { 509341825Sdim const LLT &BigTy = Query.Types[BigTyIdx]; 510341825Sdim const LLT &LitTy = Query.Types[LitTyIdx]; 511341825Sdim if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 512341825Sdim return false; 513341825Sdim if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 514341825Sdim return false; 515341825Sdim return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 516341825Sdim }) 517341825Sdim // Any vectors left are the wrong size. Scalarize them. 518353358Sdim .scalarize(0) 519353358Sdim .scalarize(1); 520341825Sdim } 521341825Sdim 522344779Sdim getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 523344779Sdim .unsupportedIf([=](const LegalityQuery &Query) { 524344779Sdim const LLT &EltTy = Query.Types[1].getElementType(); 525344779Sdim return Query.Types[0] != EltTy; 526344779Sdim }) 527344779Sdim .minScalar(2, s64) 528344779Sdim .legalIf([=](const LegalityQuery &Query) { 529344779Sdim const LLT &VecTy = Query.Types[1]; 530353358Sdim return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 531353358Sdim VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 532344779Sdim }); 533344779Sdim 534353358Sdim getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 535353358Sdim .legalIf([=](const LegalityQuery &Query) { 536353358Sdim const LLT &VecTy = Query.Types[0]; 537353358Sdim // TODO: Support s8 and s16 538353358Sdim return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 539353358Sdim }); 540353358Sdim 541344779Sdim getActionDefinitionsBuilder(G_BUILD_VECTOR) 542353358Sdim .legalFor({{v4s16, s16}, 543353358Sdim {v8s16, s16}, 544353358Sdim {v2s32, s32}, 545353358Sdim {v4s32, s32}, 546353358Sdim {v2p0, p0}, 547353358Sdim {v2s64, s64}}) 548344779Sdim .clampNumElements(0, v4s32, v4s32) 549344779Sdim .clampNumElements(0, v2s64, v2s64) 550344779Sdim 551344779Sdim // Deal with larger scalar types, which will be implicitly truncated. 552344779Sdim .legalIf([=](const LegalityQuery &Query) { 553344779Sdim return Query.Types[0].getScalarSizeInBits() < 554344779Sdim Query.Types[1].getSizeInBits(); 555344779Sdim }) 556344779Sdim .minScalarSameAs(1, 0); 557344779Sdim 558353358Sdim getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 559353358Sdim {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 560353358Sdim .scalarize(1); 561353358Sdim 562353358Sdim getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 563353358Sdim .legalIf([=](const LegalityQuery &Query) { 564353358Sdim const LLT &DstTy = Query.Types[0]; 565353358Sdim const LLT &SrcTy = Query.Types[1]; 566353358Sdim // For now just support the TBL2 variant which needs the source vectors 567353358Sdim // to be the same size as the dest. 568353358Sdim if (DstTy != SrcTy) 569353358Sdim return false; 570353358Sdim for (auto &Ty : {v2s32, v4s32, v2s64}) { 571353358Sdim if (DstTy == Ty) 572353358Sdim return true; 573353358Sdim } 574353358Sdim return false; 575353358Sdim }) 576353358Sdim // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 577353358Sdim // just want those lowered into G_BUILD_VECTOR 578353358Sdim .lowerIf([=](const LegalityQuery &Query) { 579353358Sdim return !Query.Types[1].isVector(); 580353358Sdim }) 581353358Sdim .clampNumElements(0, v4s32, v4s32) 582353358Sdim .clampNumElements(0, v2s64, v2s64); 583353358Sdim 584353358Sdim getActionDefinitionsBuilder(G_CONCAT_VECTORS) 585353358Sdim .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 586353358Sdim 587353358Sdim getActionDefinitionsBuilder(G_JUMP_TABLE) 588353358Sdim .legalFor({{p0}, {s64}}); 589353358Sdim 590353358Sdim getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 591353358Sdim return Query.Types[0] == p0 && Query.Types[1] == s64; 592353358Sdim }); 593353358Sdim 594311116Sdim computeTables(); 595341825Sdim verify(*ST.getInstrInfo()); 596311116Sdim} 597321369Sdim 598321369Sdimbool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, 599321369Sdim MachineRegisterInfo &MRI, 600344779Sdim MachineIRBuilder &MIRBuilder, 601344779Sdim GISelChangeObserver &Observer) const { 602321369Sdim switch (MI.getOpcode()) { 603321369Sdim default: 604321369Sdim // No idea what to do. 605321369Sdim return false; 606321369Sdim case TargetOpcode::G_VAARG: 607321369Sdim return legalizeVaArg(MI, MRI, MIRBuilder); 608353358Sdim case TargetOpcode::G_LOAD: 609353358Sdim case TargetOpcode::G_STORE: 610353358Sdim return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 611353358Sdim case TargetOpcode::G_SHL: 612353358Sdim case TargetOpcode::G_ASHR: 613353358Sdim case TargetOpcode::G_LSHR: 614353358Sdim return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 615321369Sdim } 616321369Sdim 617321369Sdim llvm_unreachable("expected switch to return"); 618321369Sdim} 619321369Sdim 620353358Sdimbool AArch64LegalizerInfo::legalizeShlAshrLshr( 621353358Sdim MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 622353358Sdim GISelChangeObserver &Observer) const { 623353358Sdim assert(MI.getOpcode() == TargetOpcode::G_ASHR || 624353358Sdim MI.getOpcode() == TargetOpcode::G_LSHR || 625353358Sdim MI.getOpcode() == TargetOpcode::G_SHL); 626353358Sdim // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 627353358Sdim // imported patterns can select it later. Either way, it will be legal. 628353358Sdim Register AmtReg = MI.getOperand(2).getReg(); 629353358Sdim auto *CstMI = MRI.getVRegDef(AmtReg); 630353358Sdim assert(CstMI && "expected to find a vreg def"); 631353358Sdim if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) 632353358Sdim return true; 633353358Sdim // Check the shift amount is in range for an immediate form. 634353358Sdim unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); 635353358Sdim if (Amount > 31) 636353358Sdim return true; // This will have to remain a register variant. 637353358Sdim assert(MRI.getType(AmtReg).getSizeInBits() == 32); 638353358Sdim MIRBuilder.setInstr(MI); 639353358Sdim auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 640353358Sdim MI.getOperand(2).setReg(ExtCst.getReg(0)); 641353358Sdim return true; 642353358Sdim} 643353358Sdim 644353358Sdimbool AArch64LegalizerInfo::legalizeLoadStore( 645353358Sdim MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 646353358Sdim GISelChangeObserver &Observer) const { 647353358Sdim assert(MI.getOpcode() == TargetOpcode::G_STORE || 648353358Sdim MI.getOpcode() == TargetOpcode::G_LOAD); 649353358Sdim // Here we just try to handle vector loads/stores where our value type might 650353358Sdim // have pointer elements, which the SelectionDAG importer can't handle. To 651353358Sdim // allow the existing patterns for s64 to fire for p0, we just try to bitcast 652353358Sdim // the value to use s64 types. 653353358Sdim 654353358Sdim // Custom legalization requires the instruction, if not deleted, must be fully 655353358Sdim // legalized. In order to allow further legalization of the inst, we create 656353358Sdim // a new instruction and erase the existing one. 657353358Sdim 658353358Sdim unsigned ValReg = MI.getOperand(0).getReg(); 659353358Sdim const LLT ValTy = MRI.getType(ValReg); 660353358Sdim 661353358Sdim if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 662353358Sdim ValTy.getElementType().getAddressSpace() != 0) { 663353358Sdim LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 664353358Sdim return false; 665353358Sdim } 666353358Sdim 667353358Sdim MIRBuilder.setInstr(MI); 668353358Sdim unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 669353358Sdim const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 670353358Sdim auto &MMO = **MI.memoperands_begin(); 671353358Sdim if (MI.getOpcode() == TargetOpcode::G_STORE) { 672353358Sdim auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); 673353358Sdim MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); 674353358Sdim } else { 675353358Sdim unsigned NewReg = MRI.createGenericVirtualRegister(NewTy); 676353358Sdim auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); 677353358Sdim MIRBuilder.buildBitcast({ValReg}, {NewLoad}); 678353358Sdim } 679353358Sdim MI.eraseFromParent(); 680353358Sdim return true; 681353358Sdim} 682353358Sdim 683321369Sdimbool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 684321369Sdim MachineRegisterInfo &MRI, 685321369Sdim MachineIRBuilder &MIRBuilder) const { 686321369Sdim MIRBuilder.setInstr(MI); 687321369Sdim MachineFunction &MF = MIRBuilder.getMF(); 688321369Sdim unsigned Align = MI.getOperand(2).getImm(); 689353358Sdim Register Dst = MI.getOperand(0).getReg(); 690353358Sdim Register ListPtr = MI.getOperand(1).getReg(); 691321369Sdim 692321369Sdim LLT PtrTy = MRI.getType(ListPtr); 693321369Sdim LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 694321369Sdim 695321369Sdim const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 696353358Sdim Register List = MRI.createGenericVirtualRegister(PtrTy); 697321369Sdim MIRBuilder.buildLoad( 698321369Sdim List, ListPtr, 699321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 700321369Sdim PtrSize, /* Align = */ PtrSize)); 701321369Sdim 702353358Sdim Register DstPtr; 703321369Sdim if (Align > PtrSize) { 704321369Sdim // Realign the list to the actual required alignment. 705321369Sdim auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); 706321369Sdim 707353358Sdim auto ListTmp = MIRBuilder.buildGEP(PtrTy, List, AlignMinus1.getReg(0)); 708321369Sdim 709321369Sdim DstPtr = MRI.createGenericVirtualRegister(PtrTy); 710321369Sdim MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); 711321369Sdim } else 712321369Sdim DstPtr = List; 713321369Sdim 714321369Sdim uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 715321369Sdim MIRBuilder.buildLoad( 716321369Sdim Dst, DstPtr, 717321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 718321369Sdim ValSize, std::max(Align, PtrSize))); 719321369Sdim 720353358Sdim auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize)); 721321369Sdim 722353358Sdim auto NewList = MIRBuilder.buildGEP(PtrTy, DstPtr, Size.getReg(0)); 723321369Sdim 724321369Sdim MIRBuilder.buildStore( 725321369Sdim NewList, ListPtr, 726321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 727321369Sdim PtrSize, /* Align = */ PtrSize)); 728321369Sdim 729321369Sdim MI.eraseFromParent(); 730321369Sdim return true; 731321369Sdim} 732