1311116Sdim//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==// 2311116Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311116Sdim// 7311116Sdim//===----------------------------------------------------------------------===// 8311116Sdim/// \file 9311116Sdim/// This file implements the targeting of the Machinelegalizer class for 10311116Sdim/// AArch64. 11311116Sdim/// \todo This should be generated by TableGen. 12311116Sdim//===----------------------------------------------------------------------===// 13311116Sdim 14311116Sdim#include "AArch64LegalizerInfo.h" 15327952Sdim#include "AArch64Subtarget.h" 16360784Sdim#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 17321369Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 18360784Sdim#include "llvm/CodeGen/GlobalISel/Utils.h" 19321369Sdim#include "llvm/CodeGen/MachineInstr.h" 20321369Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 21327952Sdim#include "llvm/CodeGen/TargetOpcodes.h" 22311116Sdim#include "llvm/CodeGen/ValueTypes.h" 23321369Sdim#include "llvm/IR/DerivedTypes.h" 24311116Sdim#include "llvm/IR/Type.h" 25311116Sdim 26353358Sdim#define DEBUG_TYPE "aarch64-legalinfo" 27353358Sdim 28311116Sdimusing namespace llvm; 29341825Sdimusing namespace LegalizeActions; 30353358Sdimusing namespace LegalizeMutations; 31341825Sdimusing namespace LegalityPredicates; 32311116Sdim 33327952SdimAArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) { 34311116Sdim using namespace TargetOpcode; 35311116Sdim const LLT p0 = LLT::pointer(0, 64); 36311116Sdim const LLT s1 = LLT::scalar(1); 37311116Sdim const LLT s8 = LLT::scalar(8); 38311116Sdim const LLT s16 = LLT::scalar(16); 39311116Sdim const LLT s32 = LLT::scalar(32); 40311116Sdim const LLT s64 = LLT::scalar(64); 41327952Sdim const LLT s128 = LLT::scalar(128); 42341825Sdim const LLT s256 = LLT::scalar(256); 43341825Sdim const LLT s512 = LLT::scalar(512); 44341825Sdim const LLT v16s8 = LLT::vector(16, 8); 45341825Sdim const LLT v8s8 = LLT::vector(8, 8); 46341825Sdim const LLT v4s8 = LLT::vector(4, 8); 47341825Sdim const LLT v8s16 = LLT::vector(8, 16); 48341825Sdim const LLT v4s16 = LLT::vector(4, 16); 49341825Sdim const LLT v2s16 = LLT::vector(2, 16); 50311116Sdim const LLT v2s32 = LLT::vector(2, 32); 51311116Sdim const LLT v4s32 = LLT::vector(4, 32); 52311116Sdim const LLT v2s64 = LLT::vector(2, 64); 53353358Sdim const LLT v2p0 = LLT::vector(2, p0); 54311116Sdim 55360784Sdim // FIXME: support subtargets which have neon/fp-armv8 disabled. 56360784Sdim if (!ST.hasNEON() || !ST.hasFPARMv8()) { 57360784Sdim computeTables(); 58360784Sdim return; 59360784Sdim } 60360784Sdim 61341825Sdim getActionDefinitionsBuilder(G_IMPLICIT_DEF) 62360784Sdim .legalFor({p0, s1, s8, s16, s32, s64, v2s32, v4s32, v2s64}) 63344779Sdim .clampScalar(0, s1, s64) 64344779Sdim .widenScalarToNextPow2(0, 8) 65344779Sdim .fewerElementsIf( 66344779Sdim [=](const LegalityQuery &Query) { 67344779Sdim return Query.Types[0].isVector() && 68344779Sdim (Query.Types[0].getElementType() != s64 || 69344779Sdim Query.Types[0].getNumElements() != 2); 70344779Sdim }, 71344779Sdim [=](const LegalityQuery &Query) { 72344779Sdim LLT EltTy = Query.Types[0].getElementType(); 73344779Sdim if (EltTy == s64) 74344779Sdim return std::make_pair(0, LLT::vector(2, 64)); 75344779Sdim return std::make_pair(0, EltTy); 76344779Sdim }); 77321369Sdim 78341825Sdim getActionDefinitionsBuilder(G_PHI) 79353358Sdim .legalFor({p0, s16, s32, s64, v2s32, v4s32, v2s64}) 80341825Sdim .clampScalar(0, s16, s64) 81341825Sdim .widenScalarToNextPow2(0); 82327952Sdim 83341825Sdim getActionDefinitionsBuilder(G_BSWAP) 84353358Sdim .legalFor({s32, s64, v4s32, v2s32, v2s64}) 85360784Sdim .clampScalar(0, s32, s64) 86341825Sdim .widenScalarToNextPow2(0); 87327952Sdim 88353358Sdim getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) 89353358Sdim .legalFor({s32, s64, v2s32, v4s32, v2s64, v8s16, v16s8}) 90341825Sdim .clampScalar(0, s32, s64) 91341825Sdim .widenScalarToNextPow2(0) 92341825Sdim .clampNumElements(0, v2s32, v4s32) 93341825Sdim .clampNumElements(0, v2s64, v2s64) 94341825Sdim .moreElementsToNextPow2(0); 95327952Sdim 96353358Sdim getActionDefinitionsBuilder(G_SHL) 97353358Sdim .legalFor({{s32, s32}, {s64, s64}, 98353358Sdim {v2s32, v2s32}, {v4s32, v4s32}, {v2s64, v2s64}}) 99353358Sdim .clampScalar(1, s32, s64) 100353358Sdim .clampScalar(0, s32, s64) 101353358Sdim .widenScalarToNextPow2(0) 102353358Sdim .clampNumElements(0, v2s32, v4s32) 103353358Sdim .clampNumElements(0, v2s64, v2s64) 104353358Sdim .moreElementsToNextPow2(0) 105353358Sdim .minScalarSameAs(1, 0); 106353358Sdim 107360784Sdim getActionDefinitionsBuilder(G_PTR_ADD) 108341825Sdim .legalFor({{p0, s64}}) 109341825Sdim .clampScalar(1, s64, s64); 110321369Sdim 111341825Sdim getActionDefinitionsBuilder(G_PTR_MASK).legalFor({p0}); 112311116Sdim 113353358Sdim getActionDefinitionsBuilder({G_SDIV, G_UDIV}) 114341825Sdim .legalFor({s32, s64}) 115360784Sdim .libcallFor({s128}) 116341825Sdim .clampScalar(0, s32, s64) 117353358Sdim .widenScalarToNextPow2(0) 118353358Sdim .scalarize(0); 119311116Sdim 120353358Sdim getActionDefinitionsBuilder({G_LSHR, G_ASHR}) 121353358Sdim .customIf([=](const LegalityQuery &Query) { 122353358Sdim const auto &SrcTy = Query.Types[0]; 123353358Sdim const auto &AmtTy = Query.Types[1]; 124353358Sdim return !SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 125353358Sdim AmtTy.getSizeInBits() == 32; 126353358Sdim }) 127360784Sdim .legalFor({{s32, s32}, 128360784Sdim {s32, s64}, 129360784Sdim {s64, s64}, 130360784Sdim {v2s32, v2s32}, 131360784Sdim {v4s32, v4s32}, 132360784Sdim {v2s64, v2s64}}) 133353358Sdim .clampScalar(1, s32, s64) 134353358Sdim .clampScalar(0, s32, s64) 135353358Sdim .minScalarSameAs(1, 0); 136353358Sdim 137341825Sdim getActionDefinitionsBuilder({G_SREM, G_UREM}) 138341825Sdim .lowerFor({s1, s8, s16, s32, s64}); 139311116Sdim 140341825Sdim getActionDefinitionsBuilder({G_SMULO, G_UMULO}) 141341825Sdim .lowerFor({{s64, s1}}); 142321369Sdim 143341825Sdim getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); 144311116Sdim 145353358Sdim getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) 146360784Sdim .legalFor({{s32, s1}, {s64, s1}}) 147360784Sdim .minScalar(0, s32); 148311116Sdim 149353358Sdim getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG}) 150353358Sdim .legalFor({s32, s64, v2s64, v4s32, v2s32}); 151311116Sdim 152353358Sdim getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64}); 153321369Sdim 154353358Sdim getActionDefinitionsBuilder({G_FCEIL, G_FABS, G_FSQRT, G_FFLOOR, G_FRINT, 155353358Sdim G_FMA, G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND, 156353358Sdim G_FNEARBYINT}) 157353358Sdim // If we don't have full FP16 support, then scalarize the elements of 158353358Sdim // vectors containing fp16 types. 159353358Sdim .fewerElementsIf( 160353358Sdim [=, &ST](const LegalityQuery &Query) { 161353358Sdim const auto &Ty = Query.Types[0]; 162353358Sdim return Ty.isVector() && Ty.getElementType() == s16 && 163353358Sdim !ST.hasFullFP16(); 164353358Sdim }, 165353358Sdim [=](const LegalityQuery &Query) { return std::make_pair(0, s16); }) 166344779Sdim // If we don't have full FP16 support, then widen s16 to s32 if we 167344779Sdim // encounter it. 168344779Sdim .widenScalarIf( 169344779Sdim [=, &ST](const LegalityQuery &Query) { 170344779Sdim return Query.Types[0] == s16 && !ST.hasFullFP16(); 171344779Sdim }, 172344779Sdim [=](const LegalityQuery &Query) { return std::make_pair(0, s32); }) 173353358Sdim .legalFor({s16, s32, s64, v2s32, v4s32, v2s64, v2s16, v4s16, v8s16}); 174344779Sdim 175353358Sdim getActionDefinitionsBuilder( 176353358Sdim {G_FCOS, G_FSIN, G_FLOG10, G_FLOG, G_FLOG2, G_FEXP, G_FEXP2, G_FPOW}) 177353358Sdim // We need a call for these, so we always need to scalarize. 178353358Sdim .scalarize(0) 179353358Sdim // Regardless of FP16 support, widen 16-bit elements to 32-bits. 180353358Sdim .minScalar(0, s32) 181353358Sdim .libcallFor({s32, s64, v2s32, v4s32, v2s64}); 182353358Sdim 183341825Sdim getActionDefinitionsBuilder(G_INSERT) 184341825Sdim .unsupportedIf([=](const LegalityQuery &Query) { 185341825Sdim return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits(); 186341825Sdim }) 187341825Sdim .legalIf([=](const LegalityQuery &Query) { 188341825Sdim const LLT &Ty0 = Query.Types[0]; 189341825Sdim const LLT &Ty1 = Query.Types[1]; 190341825Sdim if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0) 191341825Sdim return false; 192341825Sdim return isPowerOf2_32(Ty1.getSizeInBits()) && 193341825Sdim (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8); 194341825Sdim }) 195341825Sdim .clampScalar(0, s32, s64) 196341825Sdim .widenScalarToNextPow2(0) 197341825Sdim .maxScalarIf(typeInSet(0, {s32}), 1, s16) 198341825Sdim .maxScalarIf(typeInSet(0, {s64}), 1, s32) 199341825Sdim .widenScalarToNextPow2(1); 200311116Sdim 201341825Sdim getActionDefinitionsBuilder(G_EXTRACT) 202341825Sdim .unsupportedIf([=](const LegalityQuery &Query) { 203341825Sdim return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits(); 204341825Sdim }) 205341825Sdim .legalIf([=](const LegalityQuery &Query) { 206341825Sdim const LLT &Ty0 = Query.Types[0]; 207341825Sdim const LLT &Ty1 = Query.Types[1]; 208360784Sdim if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128) 209341825Sdim return false; 210341825Sdim if (Ty1 == p0) 211341825Sdim return true; 212341825Sdim return isPowerOf2_32(Ty0.getSizeInBits()) && 213341825Sdim (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8); 214341825Sdim }) 215360784Sdim .clampScalar(1, s32, s128) 216341825Sdim .widenScalarToNextPow2(1) 217341825Sdim .maxScalarIf(typeInSet(1, {s32}), 0, s16) 218341825Sdim .maxScalarIf(typeInSet(1, {s64}), 0, s32) 219341825Sdim .widenScalarToNextPow2(0); 220311116Sdim 221341825Sdim getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) 222353358Sdim .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 223353358Sdim {s32, p0, 16, 8}, 224353358Sdim {s32, p0, 32, 8}, 225353358Sdim {s64, p0, 8, 2}, 226353358Sdim {s64, p0, 16, 2}, 227353358Sdim {s64, p0, 32, 4}, 228353358Sdim {s64, p0, 64, 8}, 229353358Sdim {p0, p0, 64, 8}, 230353358Sdim {v2s32, p0, 64, 8}}) 231341825Sdim .clampScalar(0, s32, s64) 232341825Sdim .widenScalarToNextPow2(0) 233341825Sdim // TODO: We could support sum-of-pow2's but the lowering code doesn't know 234341825Sdim // how to do that yet. 235341825Sdim .unsupportedIfMemSizeNotPow2() 236341825Sdim // Lower anything left over into G_*EXT and G_LOAD 237341825Sdim .lower(); 238311116Sdim 239353358Sdim auto IsPtrVecPred = [=](const LegalityQuery &Query) { 240353358Sdim const LLT &ValTy = Query.Types[0]; 241353358Sdim if (!ValTy.isVector()) 242353358Sdim return false; 243353358Sdim const LLT EltTy = ValTy.getElementType(); 244353358Sdim return EltTy.isPointer() && EltTy.getAddressSpace() == 0; 245353358Sdim }; 246353358Sdim 247341825Sdim getActionDefinitionsBuilder(G_LOAD) 248353358Sdim .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 249353358Sdim {s16, p0, 16, 8}, 250353358Sdim {s32, p0, 32, 8}, 251353358Sdim {s64, p0, 64, 8}, 252353358Sdim {p0, p0, 64, 8}, 253360784Sdim {s128, p0, 128, 8}, 254353358Sdim {v8s8, p0, 64, 8}, 255353358Sdim {v16s8, p0, 128, 8}, 256353358Sdim {v4s16, p0, 64, 8}, 257353358Sdim {v8s16, p0, 128, 8}, 258353358Sdim {v2s32, p0, 64, 8}, 259353358Sdim {v4s32, p0, 128, 8}, 260353358Sdim {v2s64, p0, 128, 8}}) 261341825Sdim // These extends are also legal 262353358Sdim .legalForTypesWithMemDesc({{s32, p0, 8, 8}, 263353358Sdim {s32, p0, 16, 8}}) 264341825Sdim .clampScalar(0, s8, s64) 265360784Sdim .lowerIfMemSizeNotPow2() 266341825Sdim // Lower any any-extending loads left into G_ANYEXT and G_LOAD 267341825Sdim .lowerIf([=](const LegalityQuery &Query) { 268344779Sdim return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 269341825Sdim }) 270360784Sdim .widenScalarToNextPow2(0) 271353358Sdim .clampMaxNumElements(0, s32, 2) 272353358Sdim .clampMaxNumElements(0, s64, 1) 273353358Sdim .customIf(IsPtrVecPred); 274311116Sdim 275341825Sdim getActionDefinitionsBuilder(G_STORE) 276353358Sdim .legalForTypesWithMemDesc({{s8, p0, 8, 8}, 277353358Sdim {s16, p0, 16, 8}, 278360784Sdim {s32, p0, 8, 8}, 279360784Sdim {s32, p0, 16, 8}, 280353358Sdim {s32, p0, 32, 8}, 281353358Sdim {s64, p0, 64, 8}, 282353358Sdim {p0, p0, 64, 8}, 283360784Sdim {s128, p0, 128, 8}, 284353358Sdim {v16s8, p0, 128, 8}, 285353358Sdim {v4s16, p0, 64, 8}, 286353358Sdim {v8s16, p0, 128, 8}, 287353358Sdim {v2s32, p0, 64, 8}, 288353358Sdim {v4s32, p0, 128, 8}, 289353358Sdim {v2s64, p0, 128, 8}}) 290341825Sdim .clampScalar(0, s8, s64) 291360784Sdim .lowerIfMemSizeNotPow2() 292341825Sdim .lowerIf([=](const LegalityQuery &Query) { 293341825Sdim return Query.Types[0].isScalar() && 294344779Sdim Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; 295341825Sdim }) 296353358Sdim .clampMaxNumElements(0, s32, 2) 297353358Sdim .clampMaxNumElements(0, s64, 1) 298353358Sdim .customIf(IsPtrVecPred); 299321369Sdim 300311116Sdim // Constants 301341825Sdim getActionDefinitionsBuilder(G_CONSTANT) 302353358Sdim .legalFor({p0, s8, s16, s32, s64}) 303353358Sdim .clampScalar(0, s8, s64) 304341825Sdim .widenScalarToNextPow2(0); 305341825Sdim getActionDefinitionsBuilder(G_FCONSTANT) 306341825Sdim .legalFor({s32, s64}) 307341825Sdim .clampScalar(0, s32, s64); 308311116Sdim 309341825Sdim getActionDefinitionsBuilder(G_ICMP) 310353358Sdim .legalFor({{s32, s32}, 311353358Sdim {s32, s64}, 312353358Sdim {s32, p0}, 313353358Sdim {v4s32, v4s32}, 314353358Sdim {v2s32, v2s32}, 315353358Sdim {v2s64, v2s64}, 316353358Sdim {v2s64, v2p0}, 317353358Sdim {v4s16, v4s16}, 318353358Sdim {v8s16, v8s16}, 319353358Sdim {v8s8, v8s8}, 320353358Sdim {v16s8, v16s8}}) 321360784Sdim .clampScalar(1, s32, s64) 322341825Sdim .clampScalar(0, s32, s32) 323353358Sdim .minScalarEltSameAsIf( 324353358Sdim [=](const LegalityQuery &Query) { 325353358Sdim const LLT &Ty = Query.Types[0]; 326353358Sdim const LLT &SrcTy = Query.Types[1]; 327353358Sdim return Ty.isVector() && !SrcTy.getElementType().isPointer() && 328353358Sdim Ty.getElementType() != SrcTy.getElementType(); 329353358Sdim }, 330353358Sdim 0, 1) 331353358Sdim .minScalarOrEltIf( 332353358Sdim [=](const LegalityQuery &Query) { return Query.Types[1] == v2s16; }, 333353358Sdim 1, s32) 334353358Sdim .minScalarOrEltIf( 335353358Sdim [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, 336353358Sdim s64) 337353358Sdim .widenScalarOrEltToNextPow2(1); 338311116Sdim 339341825Sdim getActionDefinitionsBuilder(G_FCMP) 340341825Sdim .legalFor({{s32, s32}, {s32, s64}}) 341341825Sdim .clampScalar(0, s32, s32) 342341825Sdim .clampScalar(1, s32, s64) 343341825Sdim .widenScalarToNextPow2(1); 344311116Sdim 345311116Sdim // Extensions 346360784Sdim auto ExtLegalFunc = [=](const LegalityQuery &Query) { 347360784Sdim unsigned DstSize = Query.Types[0].getSizeInBits(); 348311116Sdim 349360784Sdim if (DstSize == 128 && !Query.Types[0].isVector()) 350360784Sdim return false; // Extending to a scalar s128 needs narrowing. 351360784Sdim 352360784Sdim // Make sure that we have something that will fit in a register, and 353360784Sdim // make sure it's a power of 2. 354360784Sdim if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) 355360784Sdim return false; 356353358Sdim 357360784Sdim const LLT &SrcTy = Query.Types[1]; 358353358Sdim 359360784Sdim // Special case for s1. 360360784Sdim if (SrcTy == s1) 361360784Sdim return true; 362353358Sdim 363360784Sdim // Make sure we fit in a register otherwise. Don't bother checking that 364360784Sdim // the source type is below 128 bits. We shouldn't be allowing anything 365360784Sdim // through which is wider than the destination in the first place. 366360784Sdim unsigned SrcSize = SrcTy.getSizeInBits(); 367360784Sdim if (SrcSize < 8 || !isPowerOf2_32(SrcSize)) 368360784Sdim return false; 369353358Sdim 370360784Sdim return true; 371360784Sdim }; 372360784Sdim getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) 373360784Sdim .legalIf(ExtLegalFunc) 374360784Sdim .clampScalar(0, s64, s64); // Just for s128, others are handled above. 375353358Sdim 376353358Sdim getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); 377353358Sdim 378360784Sdim getActionDefinitionsBuilder(G_SEXT_INREG).lower(); 379360784Sdim 380327952Sdim // FP conversions 381341825Sdim getActionDefinitionsBuilder(G_FPTRUNC).legalFor( 382353358Sdim {{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}}); 383341825Sdim getActionDefinitionsBuilder(G_FPEXT).legalFor( 384353358Sdim {{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}}); 385311116Sdim 386311116Sdim // Conversions 387341825Sdim getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) 388353358Sdim .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 389341825Sdim .clampScalar(0, s32, s64) 390341825Sdim .widenScalarToNextPow2(0) 391341825Sdim .clampScalar(1, s32, s64) 392341825Sdim .widenScalarToNextPow2(1); 393311116Sdim 394341825Sdim getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) 395353358Sdim .legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32}) 396341825Sdim .clampScalar(1, s32, s64) 397341825Sdim .widenScalarToNextPow2(1) 398341825Sdim .clampScalar(0, s32, s64) 399341825Sdim .widenScalarToNextPow2(0); 400311116Sdim 401311116Sdim // Control-flow 402341825Sdim getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); 403341825Sdim getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); 404311116Sdim 405311116Sdim // Select 406353358Sdim // FIXME: We can probably do a bit better than just scalarizing vector 407353358Sdim // selects. 408341825Sdim getActionDefinitionsBuilder(G_SELECT) 409341825Sdim .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) 410341825Sdim .clampScalar(0, s32, s64) 411353358Sdim .widenScalarToNextPow2(0) 412353358Sdim .scalarize(0); 413321369Sdim 414311116Sdim // Pointer-handling 415341825Sdim getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); 416341825Sdim getActionDefinitionsBuilder(G_GLOBAL_VALUE).legalFor({p0}); 417311116Sdim 418341825Sdim getActionDefinitionsBuilder(G_PTRTOINT) 419341825Sdim .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0}) 420341825Sdim .maxScalar(0, s64) 421341825Sdim .widenScalarToNextPow2(0, /*Min*/ 8); 422311116Sdim 423341825Sdim getActionDefinitionsBuilder(G_INTTOPTR) 424341825Sdim .unsupportedIf([&](const LegalityQuery &Query) { 425341825Sdim return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits(); 426341825Sdim }) 427341825Sdim .legalFor({{p0, s64}}); 428311116Sdim 429311116Sdim // Casts for 32 and 64-bit width type are just copies. 430327952Sdim // Same for 128-bit width type, except they are on the FPR bank. 431341825Sdim getActionDefinitionsBuilder(G_BITCAST) 432341825Sdim // FIXME: This is wrong since G_BITCAST is not allowed to change the 433341825Sdim // number of bits but it's what the previous code described and fixing 434341825Sdim // it breaks tests. 435341825Sdim .legalForCartesianProduct({s1, s8, s16, s32, s64, s128, v16s8, v8s8, v4s8, 436353358Sdim v8s16, v4s16, v2s16, v4s32, v2s32, v2s64, 437353358Sdim v2p0}); 438311116Sdim 439341825Sdim getActionDefinitionsBuilder(G_VASTART).legalFor({p0}); 440311116Sdim 441321369Sdim // va_list must be a pointer, but most sized types are pretty easy to handle 442321369Sdim // as the destination. 443341825Sdim getActionDefinitionsBuilder(G_VAARG) 444341825Sdim .customForCartesianProduct({s8, s16, s32, s64, p0}, {p0}) 445341825Sdim .clampScalar(0, s8, s64) 446341825Sdim .widenScalarToNextPow2(0, /*Min*/ 8); 447321369Sdim 448327952Sdim if (ST.hasLSE()) { 449341825Sdim getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) 450341825Sdim .lowerIf(all( 451341825Sdim typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0), 452341825Sdim atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 453327952Sdim 454341825Sdim getActionDefinitionsBuilder( 455341825Sdim {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, 456341825Sdim G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, 457341825Sdim G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) 458341825Sdim .legalIf(all( 459341825Sdim typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0), 460341825Sdim atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic))); 461327952Sdim } 462327952Sdim 463341825Sdim getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); 464341825Sdim 465327952Sdim // Merge/Unmerge 466341825Sdim for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 467341825Sdim unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 468341825Sdim unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 469341825Sdim 470341825Sdim auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) { 471341825Sdim const LLT &Ty = Query.Types[TypeIdx]; 472341825Sdim if (Ty.isVector()) { 473341825Sdim const LLT &EltTy = Ty.getElementType(); 474341825Sdim if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64) 475341825Sdim return true; 476341825Sdim if (!isPowerOf2_32(EltTy.getSizeInBits())) 477341825Sdim return true; 478327952Sdim } 479341825Sdim return false; 480341825Sdim }; 481327952Sdim 482341825Sdim // FIXME: This rule is horrible, but specifies the same as what we had 483341825Sdim // before with the particularly strange definitions removed (e.g. 484341825Sdim // s8 = G_MERGE_VALUES s32, s32). 485341825Sdim // Part of the complexity comes from these ops being extremely flexible. For 486341825Sdim // example, you can build/decompose vectors with it, concatenate vectors, 487341825Sdim // etc. and in addition to this you can also bitcast with it at the same 488341825Sdim // time. We've been considering breaking it up into multiple ops to make it 489341825Sdim // more manageable throughout the backend. 490341825Sdim getActionDefinitionsBuilder(Op) 491341825Sdim // Break up vectors with weird elements into scalars 492341825Sdim .fewerElementsIf( 493341825Sdim [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, 494353358Sdim scalarize(0)) 495341825Sdim .fewerElementsIf( 496341825Sdim [=](const LegalityQuery &Query) { return notValidElt(Query, 1); }, 497353358Sdim scalarize(1)) 498341825Sdim // Clamp the big scalar to s8-s512 and make it either a power of 2, 192, 499341825Sdim // or 384. 500341825Sdim .clampScalar(BigTyIdx, s8, s512) 501341825Sdim .widenScalarIf( 502341825Sdim [=](const LegalityQuery &Query) { 503341825Sdim const LLT &Ty = Query.Types[BigTyIdx]; 504341825Sdim return !isPowerOf2_32(Ty.getSizeInBits()) && 505341825Sdim Ty.getSizeInBits() % 64 != 0; 506341825Sdim }, 507341825Sdim [=](const LegalityQuery &Query) { 508341825Sdim // Pick the next power of 2, or a multiple of 64 over 128. 509341825Sdim // Whichever is smaller. 510341825Sdim const LLT &Ty = Query.Types[BigTyIdx]; 511341825Sdim unsigned NewSizeInBits = 1 512341825Sdim << Log2_32_Ceil(Ty.getSizeInBits() + 1); 513341825Sdim if (NewSizeInBits >= 256) { 514341825Sdim unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1); 515341825Sdim if (RoundedTo < NewSizeInBits) 516341825Sdim NewSizeInBits = RoundedTo; 517341825Sdim } 518341825Sdim return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); 519341825Sdim }) 520341825Sdim // Clamp the little scalar to s8-s256 and make it a power of 2. It's not 521341825Sdim // worth considering the multiples of 64 since 2*192 and 2*384 are not 522341825Sdim // valid. 523341825Sdim .clampScalar(LitTyIdx, s8, s256) 524341825Sdim .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8) 525341825Sdim // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384, 526341825Sdim // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>. 527341825Sdim // At this point it's simple enough to accept the legal types. 528341825Sdim .legalIf([=](const LegalityQuery &Query) { 529341825Sdim const LLT &BigTy = Query.Types[BigTyIdx]; 530341825Sdim const LLT &LitTy = Query.Types[LitTyIdx]; 531341825Sdim if (BigTy.isVector() && BigTy.getSizeInBits() < 32) 532341825Sdim return false; 533341825Sdim if (LitTy.isVector() && LitTy.getSizeInBits() < 32) 534341825Sdim return false; 535341825Sdim return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0; 536341825Sdim }) 537341825Sdim // Any vectors left are the wrong size. Scalarize them. 538353358Sdim .scalarize(0) 539353358Sdim .scalarize(1); 540341825Sdim } 541341825Sdim 542344779Sdim getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT) 543344779Sdim .unsupportedIf([=](const LegalityQuery &Query) { 544344779Sdim const LLT &EltTy = Query.Types[1].getElementType(); 545344779Sdim return Query.Types[0] != EltTy; 546344779Sdim }) 547344779Sdim .minScalar(2, s64) 548344779Sdim .legalIf([=](const LegalityQuery &Query) { 549344779Sdim const LLT &VecTy = Query.Types[1]; 550353358Sdim return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || 551353358Sdim VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32; 552344779Sdim }); 553344779Sdim 554353358Sdim getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) 555353358Sdim .legalIf([=](const LegalityQuery &Query) { 556353358Sdim const LLT &VecTy = Query.Types[0]; 557353358Sdim // TODO: Support s8 and s16 558353358Sdim return VecTy == v2s32 || VecTy == v4s32 || VecTy == v2s64; 559353358Sdim }); 560353358Sdim 561344779Sdim getActionDefinitionsBuilder(G_BUILD_VECTOR) 562353358Sdim .legalFor({{v4s16, s16}, 563353358Sdim {v8s16, s16}, 564353358Sdim {v2s32, s32}, 565353358Sdim {v4s32, s32}, 566353358Sdim {v2p0, p0}, 567353358Sdim {v2s64, s64}}) 568344779Sdim .clampNumElements(0, v4s32, v4s32) 569344779Sdim .clampNumElements(0, v2s64, v2s64) 570344779Sdim 571344779Sdim // Deal with larger scalar types, which will be implicitly truncated. 572344779Sdim .legalIf([=](const LegalityQuery &Query) { 573344779Sdim return Query.Types[0].getScalarSizeInBits() < 574344779Sdim Query.Types[1].getSizeInBits(); 575344779Sdim }) 576344779Sdim .minScalarSameAs(1, 0); 577344779Sdim 578353358Sdim getActionDefinitionsBuilder(G_CTLZ).legalForCartesianProduct( 579353358Sdim {s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32}) 580353358Sdim .scalarize(1); 581353358Sdim 582353358Sdim getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) 583353358Sdim .legalIf([=](const LegalityQuery &Query) { 584353358Sdim const LLT &DstTy = Query.Types[0]; 585353358Sdim const LLT &SrcTy = Query.Types[1]; 586353358Sdim // For now just support the TBL2 variant which needs the source vectors 587353358Sdim // to be the same size as the dest. 588353358Sdim if (DstTy != SrcTy) 589353358Sdim return false; 590353358Sdim for (auto &Ty : {v2s32, v4s32, v2s64}) { 591353358Sdim if (DstTy == Ty) 592353358Sdim return true; 593353358Sdim } 594353358Sdim return false; 595353358Sdim }) 596353358Sdim // G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we 597353358Sdim // just want those lowered into G_BUILD_VECTOR 598353358Sdim .lowerIf([=](const LegalityQuery &Query) { 599353358Sdim return !Query.Types[1].isVector(); 600353358Sdim }) 601353358Sdim .clampNumElements(0, v4s32, v4s32) 602353358Sdim .clampNumElements(0, v2s64, v2s64); 603353358Sdim 604353358Sdim getActionDefinitionsBuilder(G_CONCAT_VECTORS) 605353358Sdim .legalFor({{v4s32, v2s32}, {v8s16, v4s16}}); 606353358Sdim 607353358Sdim getActionDefinitionsBuilder(G_JUMP_TABLE) 608353358Sdim .legalFor({{p0}, {s64}}); 609353358Sdim 610353358Sdim getActionDefinitionsBuilder(G_BRJT).legalIf([=](const LegalityQuery &Query) { 611353358Sdim return Query.Types[0] == p0 && Query.Types[1] == s64; 612353358Sdim }); 613353358Sdim 614360784Sdim getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); 615360784Sdim 616311116Sdim computeTables(); 617341825Sdim verify(*ST.getInstrInfo()); 618311116Sdim} 619321369Sdim 620321369Sdimbool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI, 621321369Sdim MachineRegisterInfo &MRI, 622344779Sdim MachineIRBuilder &MIRBuilder, 623344779Sdim GISelChangeObserver &Observer) const { 624321369Sdim switch (MI.getOpcode()) { 625321369Sdim default: 626321369Sdim // No idea what to do. 627321369Sdim return false; 628321369Sdim case TargetOpcode::G_VAARG: 629321369Sdim return legalizeVaArg(MI, MRI, MIRBuilder); 630353358Sdim case TargetOpcode::G_LOAD: 631353358Sdim case TargetOpcode::G_STORE: 632353358Sdim return legalizeLoadStore(MI, MRI, MIRBuilder, Observer); 633353358Sdim case TargetOpcode::G_SHL: 634353358Sdim case TargetOpcode::G_ASHR: 635353358Sdim case TargetOpcode::G_LSHR: 636353358Sdim return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); 637321369Sdim } 638321369Sdim 639321369Sdim llvm_unreachable("expected switch to return"); 640321369Sdim} 641321369Sdim 642360784Sdimbool AArch64LegalizerInfo::legalizeIntrinsic( 643360784Sdim MachineInstr &MI, MachineRegisterInfo &MRI, 644360784Sdim MachineIRBuilder &MIRBuilder) const { 645360784Sdim switch (MI.getIntrinsicID()) { 646360784Sdim case Intrinsic::memcpy: 647360784Sdim case Intrinsic::memset: 648360784Sdim case Intrinsic::memmove: 649360784Sdim if (createMemLibcall(MIRBuilder, MRI, MI) == 650360784Sdim LegalizerHelper::UnableToLegalize) 651360784Sdim return false; 652360784Sdim MI.eraseFromParent(); 653360784Sdim return true; 654360784Sdim default: 655360784Sdim break; 656360784Sdim } 657360784Sdim return true; 658360784Sdim} 659360784Sdim 660353358Sdimbool AArch64LegalizerInfo::legalizeShlAshrLshr( 661353358Sdim MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 662353358Sdim GISelChangeObserver &Observer) const { 663353358Sdim assert(MI.getOpcode() == TargetOpcode::G_ASHR || 664353358Sdim MI.getOpcode() == TargetOpcode::G_LSHR || 665353358Sdim MI.getOpcode() == TargetOpcode::G_SHL); 666353358Sdim // If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the 667353358Sdim // imported patterns can select it later. Either way, it will be legal. 668353358Sdim Register AmtReg = MI.getOperand(2).getReg(); 669353358Sdim auto *CstMI = MRI.getVRegDef(AmtReg); 670353358Sdim assert(CstMI && "expected to find a vreg def"); 671353358Sdim if (CstMI->getOpcode() != TargetOpcode::G_CONSTANT) 672353358Sdim return true; 673353358Sdim // Check the shift amount is in range for an immediate form. 674353358Sdim unsigned Amount = CstMI->getOperand(1).getCImm()->getZExtValue(); 675353358Sdim if (Amount > 31) 676353358Sdim return true; // This will have to remain a register variant. 677353358Sdim assert(MRI.getType(AmtReg).getSizeInBits() == 32); 678353358Sdim MIRBuilder.setInstr(MI); 679353358Sdim auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); 680353358Sdim MI.getOperand(2).setReg(ExtCst.getReg(0)); 681353358Sdim return true; 682353358Sdim} 683353358Sdim 684353358Sdimbool AArch64LegalizerInfo::legalizeLoadStore( 685353358Sdim MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, 686353358Sdim GISelChangeObserver &Observer) const { 687353358Sdim assert(MI.getOpcode() == TargetOpcode::G_STORE || 688353358Sdim MI.getOpcode() == TargetOpcode::G_LOAD); 689353358Sdim // Here we just try to handle vector loads/stores where our value type might 690353358Sdim // have pointer elements, which the SelectionDAG importer can't handle. To 691353358Sdim // allow the existing patterns for s64 to fire for p0, we just try to bitcast 692353358Sdim // the value to use s64 types. 693353358Sdim 694353358Sdim // Custom legalization requires the instruction, if not deleted, must be fully 695353358Sdim // legalized. In order to allow further legalization of the inst, we create 696353358Sdim // a new instruction and erase the existing one. 697353358Sdim 698360784Sdim Register ValReg = MI.getOperand(0).getReg(); 699353358Sdim const LLT ValTy = MRI.getType(ValReg); 700353358Sdim 701353358Sdim if (!ValTy.isVector() || !ValTy.getElementType().isPointer() || 702353358Sdim ValTy.getElementType().getAddressSpace() != 0) { 703353358Sdim LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store"); 704353358Sdim return false; 705353358Sdim } 706353358Sdim 707353358Sdim MIRBuilder.setInstr(MI); 708353358Sdim unsigned PtrSize = ValTy.getElementType().getSizeInBits(); 709353358Sdim const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); 710353358Sdim auto &MMO = **MI.memoperands_begin(); 711353358Sdim if (MI.getOpcode() == TargetOpcode::G_STORE) { 712353358Sdim auto Bitcast = MIRBuilder.buildBitcast({NewTy}, {ValReg}); 713353358Sdim MIRBuilder.buildStore(Bitcast.getReg(0), MI.getOperand(1).getReg(), MMO); 714353358Sdim } else { 715360784Sdim Register NewReg = MRI.createGenericVirtualRegister(NewTy); 716353358Sdim auto NewLoad = MIRBuilder.buildLoad(NewReg, MI.getOperand(1).getReg(), MMO); 717353358Sdim MIRBuilder.buildBitcast({ValReg}, {NewLoad}); 718353358Sdim } 719353358Sdim MI.eraseFromParent(); 720353358Sdim return true; 721353358Sdim} 722353358Sdim 723321369Sdimbool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, 724321369Sdim MachineRegisterInfo &MRI, 725321369Sdim MachineIRBuilder &MIRBuilder) const { 726321369Sdim MIRBuilder.setInstr(MI); 727321369Sdim MachineFunction &MF = MIRBuilder.getMF(); 728321369Sdim unsigned Align = MI.getOperand(2).getImm(); 729353358Sdim Register Dst = MI.getOperand(0).getReg(); 730353358Sdim Register ListPtr = MI.getOperand(1).getReg(); 731321369Sdim 732321369Sdim LLT PtrTy = MRI.getType(ListPtr); 733321369Sdim LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); 734321369Sdim 735321369Sdim const unsigned PtrSize = PtrTy.getSizeInBits() / 8; 736353358Sdim Register List = MRI.createGenericVirtualRegister(PtrTy); 737321369Sdim MIRBuilder.buildLoad( 738321369Sdim List, ListPtr, 739321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 740321369Sdim PtrSize, /* Align = */ PtrSize)); 741321369Sdim 742353358Sdim Register DstPtr; 743321369Sdim if (Align > PtrSize) { 744321369Sdim // Realign the list to the actual required alignment. 745321369Sdim auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); 746321369Sdim 747360784Sdim auto ListTmp = MIRBuilder.buildPtrAdd(PtrTy, List, AlignMinus1.getReg(0)); 748321369Sdim 749321369Sdim DstPtr = MRI.createGenericVirtualRegister(PtrTy); 750321369Sdim MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); 751321369Sdim } else 752321369Sdim DstPtr = List; 753321369Sdim 754321369Sdim uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8; 755321369Sdim MIRBuilder.buildLoad( 756321369Sdim Dst, DstPtr, 757321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad, 758321369Sdim ValSize, std::max(Align, PtrSize))); 759321369Sdim 760353358Sdim auto Size = MIRBuilder.buildConstant(IntPtrTy, alignTo(ValSize, PtrSize)); 761321369Sdim 762360784Sdim auto NewList = MIRBuilder.buildPtrAdd(PtrTy, DstPtr, Size.getReg(0)); 763321369Sdim 764321369Sdim MIRBuilder.buildStore( 765321369Sdim NewList, ListPtr, 766321369Sdim *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 767321369Sdim PtrSize, /* Align = */ PtrSize)); 768321369Sdim 769321369Sdim MI.eraseFromParent(); 770321369Sdim return true; 771321369Sdim} 772