1311116Sdim//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2311116Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6311116Sdim// 7311116Sdim//===----------------------------------------------------------------------===// 8311116Sdim/// \file 9311116Sdim/// This file implements the targeting of the InstructionSelector class for 10311116Sdim/// AArch64. 11311116Sdim/// \todo This should be generated by TableGen. 12311116Sdim//===----------------------------------------------------------------------===// 13311116Sdim 14311116Sdim#include "AArch64InstrInfo.h" 15321369Sdim#include "AArch64MachineFunctionInfo.h" 16311116Sdim#include "AArch64RegisterBankInfo.h" 17311116Sdim#include "AArch64RegisterInfo.h" 18311116Sdim#include "AArch64Subtarget.h" 19311116Sdim#include "AArch64TargetMachine.h" 20311116Sdim#include "MCTargetDesc/AArch64AddressingModes.h" 21353358Sdim#include "llvm/ADT/Optional.h" 22321369Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23327952Sdim#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24341825Sdim#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 25353358Sdim#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26321369Sdim#include "llvm/CodeGen/GlobalISel/Utils.h" 27311116Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 28353358Sdim#include "llvm/CodeGen/MachineConstantPool.h" 29311116Sdim#include "llvm/CodeGen/MachineFunction.h" 30311116Sdim#include "llvm/CodeGen/MachineInstr.h" 31311116Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 32321369Sdim#include "llvm/CodeGen/MachineOperand.h" 33311116Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 34311116Sdim#include "llvm/IR/Type.h" 35360784Sdim#include "llvm/IR/IntrinsicsAArch64.h" 36311116Sdim#include "llvm/Support/Debug.h" 37311116Sdim#include "llvm/Support/raw_ostream.h" 38311116Sdim 39311116Sdim#define DEBUG_TYPE "aarch64-isel" 40311116Sdim 41311116Sdimusing namespace llvm; 42311116Sdim 43321369Sdimnamespace { 44321369Sdim 45321369Sdim#define GET_GLOBALISEL_PREDICATE_BITSET 46311116Sdim#include "AArch64GenGlobalISel.inc" 47321369Sdim#undef GET_GLOBALISEL_PREDICATE_BITSET 48311116Sdim 49321369Sdimclass AArch64InstructionSelector : public InstructionSelector { 50321369Sdimpublic: 51321369Sdim AArch64InstructionSelector(const AArch64TargetMachine &TM, 52321369Sdim const AArch64Subtarget &STI, 53321369Sdim const AArch64RegisterBankInfo &RBI); 54321369Sdim 55360784Sdim bool select(MachineInstr &I) override; 56327952Sdim static const char *getName() { return DEBUG_TYPE; } 57321369Sdim 58360784Sdim void setupMF(MachineFunction &MF, GISelKnownBits &KB, 59360784Sdim CodeGenCoverage &CoverageInfo) override { 60360784Sdim InstructionSelector::setupMF(MF, KB, CoverageInfo); 61360784Sdim 62360784Sdim // hasFnAttribute() is expensive to call on every BRCOND selection, so 63360784Sdim // cache it here for each run of the selector. 64360784Sdim ProduceNonFlagSettingCondBr = 65360784Sdim !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 66360784Sdim } 67360784Sdim 68321369Sdimprivate: 69321369Sdim /// tblgen-erated 'select' implementation, used as the initial selector for 70321369Sdim /// the patterns that don't require complex C++. 71327952Sdim bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 72321369Sdim 73353358Sdim // A lowering phase that runs before any selection attempts. 74353358Sdim 75353358Sdim void preISelLower(MachineInstr &I) const; 76353358Sdim 77353358Sdim // An early selection function that runs before the selectImpl() call. 78353358Sdim bool earlySelect(MachineInstr &I) const; 79353358Sdim 80353358Sdim bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 81353358Sdim 82360784Sdim /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 83360784Sdim void contractCrossBankCopyIntoStore(MachineInstr &I, 84360784Sdim MachineRegisterInfo &MRI) const; 85360784Sdim 86321369Sdim bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 87321369Sdim MachineRegisterInfo &MRI) const; 88321369Sdim bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 89321369Sdim MachineRegisterInfo &MRI) const; 90321369Sdim 91321369Sdim bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 92321369Sdim MachineRegisterInfo &MRI) const; 93321369Sdim 94353358Sdim bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; 95353358Sdim bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 96353358Sdim 97344779Sdim // Helper to generate an equivalent of scalar_to_vector into a new register, 98344779Sdim // returned via 'Dst'. 99353358Sdim MachineInstr *emitScalarToVector(unsigned EltSize, 100353358Sdim const TargetRegisterClass *DstRC, 101353358Sdim Register Scalar, 102353358Sdim MachineIRBuilder &MIRBuilder) const; 103353358Sdim 104353358Sdim /// Emit a lane insert into \p DstReg, or a new vector register if None is 105353358Sdim /// provided. 106353358Sdim /// 107353358Sdim /// The lane inserted into is defined by \p LaneIdx. The vector source 108353358Sdim /// register is given by \p SrcReg. The register containing the element is 109353358Sdim /// given by \p EltReg. 110353358Sdim MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 111353358Sdim Register EltReg, unsigned LaneIdx, 112353358Sdim const RegisterBank &RB, 113353358Sdim MachineIRBuilder &MIRBuilder) const; 114353358Sdim bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 115344779Sdim bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 116344779Sdim bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 117353358Sdim bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 118344779Sdim 119353358Sdim bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 120353358Sdim bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 121353358Sdim bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; 122353358Sdim bool selectSplitVectorUnmerge(MachineInstr &I, 123353358Sdim MachineRegisterInfo &MRI) const; 124353358Sdim bool selectIntrinsicWithSideEffects(MachineInstr &I, 125353358Sdim MachineRegisterInfo &MRI) const; 126353358Sdim bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const; 127353358Sdim bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; 128353358Sdim bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 129353358Sdim bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 130353358Sdim bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; 131353358Sdim bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; 132360784Sdim bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; 133353358Sdim 134353358Sdim unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; 135353358Sdim MachineInstr *emitLoadFromConstantPool(Constant *CPVal, 136353358Sdim MachineIRBuilder &MIRBuilder) const; 137353358Sdim 138353358Sdim // Emit a vector concat operation. 139353358Sdim MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 140353358Sdim Register Op2, 141353358Sdim MachineIRBuilder &MIRBuilder) const; 142353358Sdim MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 143353358Sdim MachineOperand &Predicate, 144353358Sdim MachineIRBuilder &MIRBuilder) const; 145360784Sdim MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, 146360784Sdim MachineIRBuilder &MIRBuilder) const; 147353358Sdim MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 148353358Sdim MachineIRBuilder &MIRBuilder) const; 149353358Sdim MachineInstr *emitTST(const Register &LHS, const Register &RHS, 150353358Sdim MachineIRBuilder &MIRBuilder) const; 151353358Sdim MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 152353358Sdim const RegisterBank &DstRB, LLT ScalarTy, 153353358Sdim Register VecReg, unsigned LaneIdx, 154353358Sdim MachineIRBuilder &MIRBuilder) const; 155353358Sdim 156353358Sdim /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be 157353358Sdim /// materialized using a FMOV instruction, then update MI and return it. 158353358Sdim /// Otherwise, do nothing and return a nullptr. 159353358Sdim MachineInstr *emitFMovForFConstant(MachineInstr &MI, 160353358Sdim MachineRegisterInfo &MRI) const; 161353358Sdim 162353358Sdim /// Emit a CSet for a compare. 163353358Sdim MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 164353358Sdim MachineIRBuilder &MIRBuilder) const; 165353358Sdim 166353358Sdim // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 167353358Sdim // We use these manually instead of using the importer since it doesn't 168353358Sdim // support SDNodeXForm. 169353358Sdim ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 170353358Sdim ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 171353358Sdim ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 172353358Sdim ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 173353358Sdim 174360784Sdim ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 175327952Sdim ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 176360784Sdim ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 177321369Sdim 178327952Sdim ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 179327952Sdim unsigned Size) const; 180327952Sdim 181327952Sdim ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 182327952Sdim return selectAddrModeUnscaled(Root, 1); 183327952Sdim } 184327952Sdim ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 185327952Sdim return selectAddrModeUnscaled(Root, 2); 186327952Sdim } 187327952Sdim ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 188327952Sdim return selectAddrModeUnscaled(Root, 4); 189327952Sdim } 190327952Sdim ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 191327952Sdim return selectAddrModeUnscaled(Root, 8); 192327952Sdim } 193327952Sdim ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 194327952Sdim return selectAddrModeUnscaled(Root, 16); 195327952Sdim } 196327952Sdim 197327952Sdim ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 198327952Sdim unsigned Size) const; 199327952Sdim template <int Width> 200327952Sdim ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 201327952Sdim return selectAddrModeIndexed(Root, Width / 8); 202327952Sdim } 203327952Sdim 204360784Sdim bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 205360784Sdim const MachineRegisterInfo &MRI) const; 206360784Sdim ComplexRendererFns 207360784Sdim selectAddrModeShiftedExtendXReg(MachineOperand &Root, 208360784Sdim unsigned SizeInBytes) const; 209341825Sdim 210360784Sdim /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 211360784Sdim /// or not a shift + extend should be folded into an addressing mode. Returns 212360784Sdim /// None when this is not profitable or possible. 213360784Sdim ComplexRendererFns 214360784Sdim selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 215360784Sdim MachineOperand &Offset, unsigned SizeInBytes, 216360784Sdim bool WantsExt) const; 217360784Sdim ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 218360784Sdim ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 219360784Sdim unsigned SizeInBytes) const; 220360784Sdim template <int Width> 221360784Sdim ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 222360784Sdim return selectAddrModeXRO(Root, Width / 8); 223360784Sdim } 224360784Sdim 225360784Sdim ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 226360784Sdim unsigned SizeInBytes) const; 227360784Sdim template <int Width> 228360784Sdim ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 229360784Sdim return selectAddrModeWRO(Root, Width / 8); 230360784Sdim } 231360784Sdim 232360784Sdim ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 233360784Sdim 234360784Sdim ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 235360784Sdim return selectShiftedRegister(Root); 236360784Sdim } 237360784Sdim 238360784Sdim ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 239360784Sdim // TODO: selectShiftedRegister should allow for rotates on logical shifts. 240360784Sdim // For now, make them the same. The only difference between the two is that 241360784Sdim // logical shifts are allowed to fold in rotates. Otherwise, these are 242360784Sdim // functionally the same. 243360784Sdim return selectShiftedRegister(Root); 244360784Sdim } 245360784Sdim 246360784Sdim /// Given an extend instruction, determine the correct shift-extend type for 247360784Sdim /// that instruction. 248360784Sdim /// 249360784Sdim /// If the instruction is going to be used in a load or store, pass 250360784Sdim /// \p IsLoadStore = true. 251360784Sdim AArch64_AM::ShiftExtendType 252360784Sdim getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 253360784Sdim bool IsLoadStore = false) const; 254360784Sdim 255360784Sdim /// Instructions that accept extend modifiers like UXTW expect the register 256360784Sdim /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a 257360784Sdim /// subregister copy if necessary. Return either ExtReg, or the result of the 258360784Sdim /// new copy. 259360784Sdim Register narrowExtendRegIfNeeded(Register ExtReg, 260360784Sdim MachineIRBuilder &MIB) const; 261360784Sdim ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 262360784Sdim 263360784Sdim void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 264360784Sdim int OpIdx = -1) const; 265360784Sdim void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 266360784Sdim int OpIdx = -1) const; 267360784Sdim void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 268360784Sdim int OpIdx = -1) const; 269360784Sdim 270341825Sdim // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 271341825Sdim void materializeLargeCMVal(MachineInstr &I, const Value *V, 272360784Sdim unsigned OpFlags) const; 273341825Sdim 274353358Sdim // Optimization methods. 275353358Sdim bool tryOptVectorShuffle(MachineInstr &I) const; 276353358Sdim bool tryOptVectorDup(MachineInstr &MI) const; 277353358Sdim bool tryOptSelect(MachineInstr &MI) const; 278353358Sdim MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 279353358Sdim MachineOperand &Predicate, 280353358Sdim MachineIRBuilder &MIRBuilder) const; 281353358Sdim 282360784Sdim /// Return true if \p MI is a load or store of \p NumBytes bytes. 283360784Sdim bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 284360784Sdim 285360784Sdim /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 286360784Sdim /// register zeroed out. In other words, the result of MI has been explicitly 287360784Sdim /// zero extended. 288360784Sdim bool isDef32(const MachineInstr &MI) const; 289360784Sdim 290321369Sdim const AArch64TargetMachine &TM; 291321369Sdim const AArch64Subtarget &STI; 292321369Sdim const AArch64InstrInfo &TII; 293321369Sdim const AArch64RegisterInfo &TRI; 294321369Sdim const AArch64RegisterBankInfo &RBI; 295321369Sdim 296360784Sdim bool ProduceNonFlagSettingCondBr = false; 297360784Sdim 298321369Sdim#define GET_GLOBALISEL_PREDICATES_DECL 299321369Sdim#include "AArch64GenGlobalISel.inc" 300321369Sdim#undef GET_GLOBALISEL_PREDICATES_DECL 301321369Sdim 302321369Sdim// We declare the temporaries used by selectImpl() in the class to minimize the 303321369Sdim// cost of constructing placeholder values. 304321369Sdim#define GET_GLOBALISEL_TEMPORARIES_DECL 305321369Sdim#include "AArch64GenGlobalISel.inc" 306321369Sdim#undef GET_GLOBALISEL_TEMPORARIES_DECL 307321369Sdim}; 308321369Sdim 309321369Sdim} // end anonymous namespace 310321369Sdim 311321369Sdim#define GET_GLOBALISEL_IMPL 312321369Sdim#include "AArch64GenGlobalISel.inc" 313321369Sdim#undef GET_GLOBALISEL_IMPL 314321369Sdim 315311116SdimAArch64InstructionSelector::AArch64InstructionSelector( 316311116Sdim const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 317311116Sdim const AArch64RegisterBankInfo &RBI) 318321369Sdim : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 319321369Sdim TRI(*STI.getRegisterInfo()), RBI(RBI), 320321369Sdim#define GET_GLOBALISEL_PREDICATES_INIT 321321369Sdim#include "AArch64GenGlobalISel.inc" 322321369Sdim#undef GET_GLOBALISEL_PREDICATES_INIT 323321369Sdim#define GET_GLOBALISEL_TEMPORARIES_INIT 324321369Sdim#include "AArch64GenGlobalISel.inc" 325321369Sdim#undef GET_GLOBALISEL_TEMPORARIES_INIT 326321369Sdim{ 327321369Sdim} 328311116Sdim 329311116Sdim// FIXME: This should be target-independent, inferred from the types declared 330311116Sdim// for each class in the bank. 331311116Sdimstatic const TargetRegisterClass * 332311116SdimgetRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 333329983Sdim const RegisterBankInfo &RBI, 334329983Sdim bool GetAllRegSet = false) { 335311116Sdim if (RB.getID() == AArch64::GPRRegBankID) { 336311116Sdim if (Ty.getSizeInBits() <= 32) 337329983Sdim return GetAllRegSet ? &AArch64::GPR32allRegClass 338329983Sdim : &AArch64::GPR32RegClass; 339311116Sdim if (Ty.getSizeInBits() == 64) 340329983Sdim return GetAllRegSet ? &AArch64::GPR64allRegClass 341329983Sdim : &AArch64::GPR64RegClass; 342311116Sdim return nullptr; 343311116Sdim } 344311116Sdim 345311116Sdim if (RB.getID() == AArch64::FPRRegBankID) { 346329983Sdim if (Ty.getSizeInBits() <= 16) 347329983Sdim return &AArch64::FPR16RegClass; 348311116Sdim if (Ty.getSizeInBits() == 32) 349311116Sdim return &AArch64::FPR32RegClass; 350311116Sdim if (Ty.getSizeInBits() == 64) 351311116Sdim return &AArch64::FPR64RegClass; 352311116Sdim if (Ty.getSizeInBits() == 128) 353311116Sdim return &AArch64::FPR128RegClass; 354311116Sdim return nullptr; 355311116Sdim } 356311116Sdim 357311116Sdim return nullptr; 358311116Sdim} 359311116Sdim 360353358Sdim/// Given a register bank, and size in bits, return the smallest register class 361353358Sdim/// that can represent that combination. 362353358Sdimstatic const TargetRegisterClass * 363353358SdimgetMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 364353358Sdim bool GetAllRegSet = false) { 365353358Sdim unsigned RegBankID = RB.getID(); 366353358Sdim 367353358Sdim if (RegBankID == AArch64::GPRRegBankID) { 368353358Sdim if (SizeInBits <= 32) 369353358Sdim return GetAllRegSet ? &AArch64::GPR32allRegClass 370353358Sdim : &AArch64::GPR32RegClass; 371353358Sdim if (SizeInBits == 64) 372353358Sdim return GetAllRegSet ? &AArch64::GPR64allRegClass 373353358Sdim : &AArch64::GPR64RegClass; 374353358Sdim } 375353358Sdim 376353358Sdim if (RegBankID == AArch64::FPRRegBankID) { 377353358Sdim switch (SizeInBits) { 378353358Sdim default: 379353358Sdim return nullptr; 380353358Sdim case 8: 381353358Sdim return &AArch64::FPR8RegClass; 382353358Sdim case 16: 383353358Sdim return &AArch64::FPR16RegClass; 384353358Sdim case 32: 385353358Sdim return &AArch64::FPR32RegClass; 386353358Sdim case 64: 387353358Sdim return &AArch64::FPR64RegClass; 388353358Sdim case 128: 389353358Sdim return &AArch64::FPR128RegClass; 390353358Sdim } 391353358Sdim } 392353358Sdim 393353358Sdim return nullptr; 394353358Sdim} 395353358Sdim 396353358Sdim/// Returns the correct subregister to use for a given register class. 397353358Sdimstatic bool getSubRegForClass(const TargetRegisterClass *RC, 398353358Sdim const TargetRegisterInfo &TRI, unsigned &SubReg) { 399353358Sdim switch (TRI.getRegSizeInBits(*RC)) { 400353358Sdim case 8: 401353358Sdim SubReg = AArch64::bsub; 402353358Sdim break; 403353358Sdim case 16: 404353358Sdim SubReg = AArch64::hsub; 405353358Sdim break; 406353358Sdim case 32: 407360784Sdim if (RC != &AArch64::FPR32RegClass) 408353358Sdim SubReg = AArch64::sub_32; 409353358Sdim else 410353358Sdim SubReg = AArch64::ssub; 411353358Sdim break; 412353358Sdim case 64: 413353358Sdim SubReg = AArch64::dsub; 414353358Sdim break; 415353358Sdim default: 416353358Sdim LLVM_DEBUG( 417353358Sdim dbgs() << "Couldn't find appropriate subregister for register class."); 418353358Sdim return false; 419353358Sdim } 420353358Sdim 421353358Sdim return true; 422353358Sdim} 423353358Sdim 424311116Sdim/// Check whether \p I is a currently unsupported binary operation: 425311116Sdim/// - it has an unsized type 426311116Sdim/// - an operand is not a vreg 427311116Sdim/// - all operands are not in the same bank 428311116Sdim/// These are checks that should someday live in the verifier, but right now, 429311116Sdim/// these are mostly limitations of the aarch64 selector. 430311116Sdimstatic bool unsupportedBinOp(const MachineInstr &I, 431311116Sdim const AArch64RegisterBankInfo &RBI, 432311116Sdim const MachineRegisterInfo &MRI, 433311116Sdim const AArch64RegisterInfo &TRI) { 434311116Sdim LLT Ty = MRI.getType(I.getOperand(0).getReg()); 435311116Sdim if (!Ty.isValid()) { 436341825Sdim LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 437311116Sdim return true; 438311116Sdim } 439311116Sdim 440311116Sdim const RegisterBank *PrevOpBank = nullptr; 441311116Sdim for (auto &MO : I.operands()) { 442311116Sdim // FIXME: Support non-register operands. 443311116Sdim if (!MO.isReg()) { 444341825Sdim LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 445311116Sdim return true; 446311116Sdim } 447311116Sdim 448311116Sdim // FIXME: Can generic operations have physical registers operands? If 449311116Sdim // so, this will need to be taught about that, and we'll need to get the 450311116Sdim // bank out of the minimal class for the register. 451311116Sdim // Either way, this needs to be documented (and possibly verified). 452360784Sdim if (!Register::isVirtualRegister(MO.getReg())) { 453341825Sdim LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 454311116Sdim return true; 455311116Sdim } 456311116Sdim 457311116Sdim const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 458311116Sdim if (!OpBank) { 459341825Sdim LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 460311116Sdim return true; 461311116Sdim } 462311116Sdim 463311116Sdim if (PrevOpBank && OpBank != PrevOpBank) { 464341825Sdim LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 465311116Sdim return true; 466311116Sdim } 467311116Sdim PrevOpBank = OpBank; 468311116Sdim } 469311116Sdim return false; 470311116Sdim} 471311116Sdim 472311116Sdim/// Select the AArch64 opcode for the basic binary operation \p GenericOpc 473321369Sdim/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 474311116Sdim/// and of size \p OpSize. 475311116Sdim/// \returns \p GenericOpc if the combination is unsupported. 476311116Sdimstatic unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 477311116Sdim unsigned OpSize) { 478311116Sdim switch (RegBankID) { 479311116Sdim case AArch64::GPRRegBankID: 480321369Sdim if (OpSize == 32) { 481311116Sdim switch (GenericOpc) { 482311116Sdim case TargetOpcode::G_SHL: 483311116Sdim return AArch64::LSLVWr; 484311116Sdim case TargetOpcode::G_LSHR: 485311116Sdim return AArch64::LSRVWr; 486311116Sdim case TargetOpcode::G_ASHR: 487311116Sdim return AArch64::ASRVWr; 488311116Sdim default: 489311116Sdim return GenericOpc; 490311116Sdim } 491311116Sdim } else if (OpSize == 64) { 492311116Sdim switch (GenericOpc) { 493360784Sdim case TargetOpcode::G_PTR_ADD: 494311116Sdim return AArch64::ADDXrr; 495311116Sdim case TargetOpcode::G_SHL: 496311116Sdim return AArch64::LSLVXr; 497311116Sdim case TargetOpcode::G_LSHR: 498311116Sdim return AArch64::LSRVXr; 499311116Sdim case TargetOpcode::G_ASHR: 500311116Sdim return AArch64::ASRVXr; 501311116Sdim default: 502311116Sdim return GenericOpc; 503311116Sdim } 504311116Sdim } 505321369Sdim break; 506311116Sdim case AArch64::FPRRegBankID: 507311116Sdim switch (OpSize) { 508311116Sdim case 32: 509311116Sdim switch (GenericOpc) { 510311116Sdim case TargetOpcode::G_FADD: 511311116Sdim return AArch64::FADDSrr; 512311116Sdim case TargetOpcode::G_FSUB: 513311116Sdim return AArch64::FSUBSrr; 514311116Sdim case TargetOpcode::G_FMUL: 515311116Sdim return AArch64::FMULSrr; 516311116Sdim case TargetOpcode::G_FDIV: 517311116Sdim return AArch64::FDIVSrr; 518311116Sdim default: 519311116Sdim return GenericOpc; 520311116Sdim } 521311116Sdim case 64: 522311116Sdim switch (GenericOpc) { 523311116Sdim case TargetOpcode::G_FADD: 524311116Sdim return AArch64::FADDDrr; 525311116Sdim case TargetOpcode::G_FSUB: 526311116Sdim return AArch64::FSUBDrr; 527311116Sdim case TargetOpcode::G_FMUL: 528311116Sdim return AArch64::FMULDrr; 529311116Sdim case TargetOpcode::G_FDIV: 530311116Sdim return AArch64::FDIVDrr; 531311116Sdim case TargetOpcode::G_OR: 532311116Sdim return AArch64::ORRv8i8; 533311116Sdim default: 534311116Sdim return GenericOpc; 535311116Sdim } 536311116Sdim } 537321369Sdim break; 538321369Sdim } 539311116Sdim return GenericOpc; 540311116Sdim} 541311116Sdim 542311116Sdim/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 543311116Sdim/// appropriate for the (value) register bank \p RegBankID and of memory access 544311116Sdim/// size \p OpSize. This returns the variant with the base+unsigned-immediate 545311116Sdim/// addressing mode (e.g., LDRXui). 546311116Sdim/// \returns \p GenericOpc if the combination is unsupported. 547311116Sdimstatic unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 548311116Sdim unsigned OpSize) { 549311116Sdim const bool isStore = GenericOpc == TargetOpcode::G_STORE; 550311116Sdim switch (RegBankID) { 551311116Sdim case AArch64::GPRRegBankID: 552311116Sdim switch (OpSize) { 553311116Sdim case 8: 554311116Sdim return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 555311116Sdim case 16: 556311116Sdim return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 557311116Sdim case 32: 558311116Sdim return isStore ? AArch64::STRWui : AArch64::LDRWui; 559311116Sdim case 64: 560311116Sdim return isStore ? AArch64::STRXui : AArch64::LDRXui; 561311116Sdim } 562321369Sdim break; 563311116Sdim case AArch64::FPRRegBankID: 564311116Sdim switch (OpSize) { 565311116Sdim case 8: 566311116Sdim return isStore ? AArch64::STRBui : AArch64::LDRBui; 567311116Sdim case 16: 568311116Sdim return isStore ? AArch64::STRHui : AArch64::LDRHui; 569311116Sdim case 32: 570311116Sdim return isStore ? AArch64::STRSui : AArch64::LDRSui; 571311116Sdim case 64: 572311116Sdim return isStore ? AArch64::STRDui : AArch64::LDRDui; 573311116Sdim } 574321369Sdim break; 575321369Sdim } 576311116Sdim return GenericOpc; 577311116Sdim} 578311116Sdim 579353358Sdim#ifndef NDEBUG 580353358Sdim/// Helper function that verifies that we have a valid copy at the end of 581353358Sdim/// selectCopy. Verifies that the source and dest have the expected sizes and 582353358Sdim/// then returns true. 583353358Sdimstatic bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 584353358Sdim const MachineRegisterInfo &MRI, 585353358Sdim const TargetRegisterInfo &TRI, 586353358Sdim const RegisterBankInfo &RBI) { 587360784Sdim const Register DstReg = I.getOperand(0).getReg(); 588360784Sdim const Register SrcReg = I.getOperand(1).getReg(); 589353358Sdim const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 590353358Sdim const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 591329983Sdim 592353358Sdim // Make sure the size of the source and dest line up. 593353358Sdim assert( 594353358Sdim (DstSize == SrcSize || 595353358Sdim // Copies are a mean to setup initial types, the number of 596353358Sdim // bits may not exactly match. 597360784Sdim (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 598353358Sdim // Copies are a mean to copy bits around, as long as we are 599353358Sdim // on the same register class, that's fine. Otherwise, that 600353358Sdim // means we need some SUBREG_TO_REG or AND & co. 601353358Sdim (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 602353358Sdim "Copy with different width?!"); 603353358Sdim 604353358Sdim // Check the size of the destination. 605353358Sdim assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 606353358Sdim "GPRs cannot get more than 64-bit width values"); 607353358Sdim 608353358Sdim return true; 609353358Sdim} 610353358Sdim#endif 611353358Sdim 612353358Sdim/// Helper function for selectCopy. Inserts a subregister copy from 613353358Sdim/// \p *From to \p *To, linking it up to \p I. 614353358Sdim/// 615353358Sdim/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into 616353358Sdim/// 617353358Sdim/// CopyReg (From class) = COPY SrcReg 618353358Sdim/// SubRegCopy (To class) = COPY CopyReg:SubReg 619353358Sdim/// Dst = COPY SubRegCopy 620353358Sdimstatic bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, 621360784Sdim const RegisterBankInfo &RBI, Register SrcReg, 622353358Sdim const TargetRegisterClass *From, 623353358Sdim const TargetRegisterClass *To, 624353358Sdim unsigned SubReg) { 625353358Sdim MachineIRBuilder MIB(I); 626353358Sdim auto Copy = MIB.buildCopy({From}, {SrcReg}); 627353358Sdim auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {}) 628353358Sdim .addReg(Copy.getReg(0), 0, SubReg); 629329983Sdim MachineOperand &RegOp = I.getOperand(1); 630353358Sdim RegOp.setReg(SubRegCopy.getReg(0)); 631353358Sdim 632353358Sdim // It's possible that the destination register won't be constrained. Make 633353358Sdim // sure that happens. 634360784Sdim if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 635353358Sdim RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 636353358Sdim 637329983Sdim return true; 638329983Sdim} 639329983Sdim 640353358Sdim/// Helper function to get the source and destination register classes for a 641353358Sdim/// copy. Returns a std::pair containing the source register class for the 642353358Sdim/// copy, and the destination register class for the copy. If a register class 643353358Sdim/// cannot be determined, then it will be nullptr. 644353358Sdimstatic std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 645353358SdimgetRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 646353358Sdim MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 647353358Sdim const RegisterBankInfo &RBI) { 648360784Sdim Register DstReg = I.getOperand(0).getReg(); 649360784Sdim Register SrcReg = I.getOperand(1).getReg(); 650353358Sdim const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 651353358Sdim const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 652353358Sdim unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 653353358Sdim unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 654353358Sdim 655353358Sdim // Special casing for cross-bank copies of s1s. We can technically represent 656353358Sdim // a 1-bit value with any size of register. The minimum size for a GPR is 32 657353358Sdim // bits. So, we need to put the FPR on 32 bits as well. 658353358Sdim // 659353358Sdim // FIXME: I'm not sure if this case holds true outside of copies. If it does, 660353358Sdim // then we can pull it into the helpers that get the appropriate class for a 661353358Sdim // register bank. Or make a new helper that carries along some constraint 662353358Sdim // information. 663353358Sdim if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 664353358Sdim SrcSize = DstSize = 32; 665353358Sdim 666353358Sdim return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 667353358Sdim getMinClassForRegBank(DstRegBank, DstSize, true)}; 668353358Sdim} 669353358Sdim 670311116Sdimstatic bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 671311116Sdim MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 672311116Sdim const RegisterBankInfo &RBI) { 673311116Sdim 674360784Sdim Register DstReg = I.getOperand(0).getReg(); 675360784Sdim Register SrcReg = I.getOperand(1).getReg(); 676353358Sdim const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 677353358Sdim const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 678329983Sdim 679353358Sdim // Find the correct register classes for the source and destination registers. 680353358Sdim const TargetRegisterClass *SrcRC; 681353358Sdim const TargetRegisterClass *DstRC; 682353358Sdim std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 683311116Sdim 684353358Sdim if (!DstRC) { 685353358Sdim LLVM_DEBUG(dbgs() << "Unexpected dest size " 686353358Sdim << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 687329983Sdim return false; 688329983Sdim } 689329983Sdim 690353358Sdim // A couple helpers below, for making sure that the copy we produce is valid. 691353358Sdim 692353358Sdim // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 693353358Sdim // to verify that the src and dst are the same size, since that's handled by 694353358Sdim // the SUBREG_TO_REG. 695353358Sdim bool KnownValid = false; 696353358Sdim 697353358Sdim // Returns true, or asserts if something we don't expect happens. Instead of 698353358Sdim // returning true, we return isValidCopy() to ensure that we verify the 699353358Sdim // result. 700353358Sdim auto CheckCopy = [&]() { 701353358Sdim // If we have a bitcast or something, we can't have physical registers. 702360784Sdim assert((I.isCopy() || 703360784Sdim (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 704360784Sdim !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 705360784Sdim "No phys reg on generic operator!"); 706353358Sdim assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI)); 707353358Sdim (void)KnownValid; 708353358Sdim return true; 709353358Sdim }; 710353358Sdim 711353358Sdim // Is this a copy? If so, then we may need to insert a subregister copy, or 712353358Sdim // a SUBREG_TO_REG. 713353358Sdim if (I.isCopy()) { 714353358Sdim // Yes. Check if there's anything to fix up. 715329983Sdim if (!SrcRC) { 716353358Sdim LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 717353358Sdim return false; 718311116Sdim } 719353358Sdim 720360784Sdim unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 721360784Sdim unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 722353358Sdim 723360784Sdim // If we're doing a cross-bank copy on different-sized registers, we need 724360784Sdim // to do a bit more work. 725360784Sdim if (SrcSize > DstSize) { 726360784Sdim // We're doing a cross-bank copy into a smaller register. We need a 727360784Sdim // subregister copy. First, get a register class that's on the same bank 728360784Sdim // as the destination, but the same size as the source. 729360784Sdim const TargetRegisterClass *SubregRC = 730360784Sdim getMinClassForRegBank(DstRegBank, SrcSize, true); 731360784Sdim assert(SubregRC && "Didn't get a register class for subreg?"); 732353358Sdim 733360784Sdim // Get the appropriate subregister for the destination. 734360784Sdim unsigned SubReg = 0; 735360784Sdim if (!getSubRegForClass(DstRC, TRI, SubReg)) { 736360784Sdim LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); 737360784Sdim return false; 738353358Sdim } 739353358Sdim 740360784Sdim // Now, insert a subregister copy using the new register class. 741360784Sdim selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); 742360784Sdim return CheckCopy(); 743360784Sdim } 744360784Sdim 745360784Sdim // Is this a cross-bank copy? 746360784Sdim if (DstRegBank.getID() != SrcRegBank.getID()) { 747360784Sdim if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && 748360784Sdim SrcSize == 16) { 749353358Sdim // Special case for FPR16 to GPR32. 750353358Sdim // FIXME: This can probably be generalized like the above case. 751360784Sdim Register PromoteReg = 752353358Sdim MRI.createVirtualRegister(&AArch64::FPR32RegClass); 753353358Sdim BuildMI(*I.getParent(), I, I.getDebugLoc(), 754353358Sdim TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 755353358Sdim .addImm(0) 756353358Sdim .addUse(SrcReg) 757353358Sdim .addImm(AArch64::hsub); 758353358Sdim MachineOperand &RegOp = I.getOperand(1); 759353358Sdim RegOp.setReg(PromoteReg); 760353358Sdim 761353358Sdim // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 762353358Sdim KnownValid = true; 763353358Sdim } 764329983Sdim } 765353358Sdim 766353358Sdim // If the destination is a physical register, then there's nothing to 767353358Sdim // change, so we're done. 768360784Sdim if (Register::isPhysicalRegister(DstReg)) 769353358Sdim return CheckCopy(); 770311116Sdim } 771311116Sdim 772353358Sdim // No need to constrain SrcReg. It will get constrained when we hit another 773353358Sdim // of its use or its defs. Copies do not have constraints. 774353358Sdim if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 775341825Sdim LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 776341825Sdim << " operand\n"); 777311116Sdim return false; 778311116Sdim } 779311116Sdim I.setDesc(TII.get(AArch64::COPY)); 780353358Sdim return CheckCopy(); 781311116Sdim} 782311116Sdim 783311116Sdimstatic unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 784311116Sdim if (!DstTy.isScalar() || !SrcTy.isScalar()) 785311116Sdim return GenericOpc; 786311116Sdim 787311116Sdim const unsigned DstSize = DstTy.getSizeInBits(); 788311116Sdim const unsigned SrcSize = SrcTy.getSizeInBits(); 789311116Sdim 790311116Sdim switch (DstSize) { 791311116Sdim case 32: 792311116Sdim switch (SrcSize) { 793311116Sdim case 32: 794311116Sdim switch (GenericOpc) { 795311116Sdim case TargetOpcode::G_SITOFP: 796311116Sdim return AArch64::SCVTFUWSri; 797311116Sdim case TargetOpcode::G_UITOFP: 798311116Sdim return AArch64::UCVTFUWSri; 799311116Sdim case TargetOpcode::G_FPTOSI: 800311116Sdim return AArch64::FCVTZSUWSr; 801311116Sdim case TargetOpcode::G_FPTOUI: 802311116Sdim return AArch64::FCVTZUUWSr; 803311116Sdim default: 804311116Sdim return GenericOpc; 805311116Sdim } 806311116Sdim case 64: 807311116Sdim switch (GenericOpc) { 808311116Sdim case TargetOpcode::G_SITOFP: 809311116Sdim return AArch64::SCVTFUXSri; 810311116Sdim case TargetOpcode::G_UITOFP: 811311116Sdim return AArch64::UCVTFUXSri; 812311116Sdim case TargetOpcode::G_FPTOSI: 813311116Sdim return AArch64::FCVTZSUWDr; 814311116Sdim case TargetOpcode::G_FPTOUI: 815311116Sdim return AArch64::FCVTZUUWDr; 816311116Sdim default: 817311116Sdim return GenericOpc; 818311116Sdim } 819311116Sdim default: 820311116Sdim return GenericOpc; 821311116Sdim } 822311116Sdim case 64: 823311116Sdim switch (SrcSize) { 824311116Sdim case 32: 825311116Sdim switch (GenericOpc) { 826311116Sdim case TargetOpcode::G_SITOFP: 827311116Sdim return AArch64::SCVTFUWDri; 828311116Sdim case TargetOpcode::G_UITOFP: 829311116Sdim return AArch64::UCVTFUWDri; 830311116Sdim case TargetOpcode::G_FPTOSI: 831311116Sdim return AArch64::FCVTZSUXSr; 832311116Sdim case TargetOpcode::G_FPTOUI: 833311116Sdim return AArch64::FCVTZUUXSr; 834311116Sdim default: 835311116Sdim return GenericOpc; 836311116Sdim } 837311116Sdim case 64: 838311116Sdim switch (GenericOpc) { 839311116Sdim case TargetOpcode::G_SITOFP: 840311116Sdim return AArch64::SCVTFUXDri; 841311116Sdim case TargetOpcode::G_UITOFP: 842311116Sdim return AArch64::UCVTFUXDri; 843311116Sdim case TargetOpcode::G_FPTOSI: 844311116Sdim return AArch64::FCVTZSUXDr; 845311116Sdim case TargetOpcode::G_FPTOUI: 846311116Sdim return AArch64::FCVTZUUXDr; 847311116Sdim default: 848311116Sdim return GenericOpc; 849311116Sdim } 850311116Sdim default: 851311116Sdim return GenericOpc; 852311116Sdim } 853311116Sdim default: 854311116Sdim return GenericOpc; 855311116Sdim }; 856311116Sdim return GenericOpc; 857311116Sdim} 858311116Sdim 859353358Sdimstatic unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, 860353358Sdim const RegisterBankInfo &RBI) { 861353358Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 862353358Sdim bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 863353358Sdim AArch64::GPRRegBankID); 864353358Sdim LLT Ty = MRI.getType(I.getOperand(0).getReg()); 865353358Sdim if (Ty == LLT::scalar(32)) 866353358Sdim return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; 867353358Sdim else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) 868353358Sdim return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; 869353358Sdim return 0; 870353358Sdim} 871353358Sdim 872353358Sdim/// Helper function to select the opcode for a G_FCMP. 873353358Sdimstatic unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { 874353358Sdim // If this is a compare against +0.0, then we don't have to explicitly 875353358Sdim // materialize a constant. 876353358Sdim const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); 877353358Sdim bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 878353358Sdim unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 879353358Sdim if (OpSize != 32 && OpSize != 64) 880353358Sdim return 0; 881353358Sdim unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 882353358Sdim {AArch64::FCMPSri, AArch64::FCMPDri}}; 883353358Sdim return CmpOpcTbl[ShouldUseImm][OpSize == 64]; 884353358Sdim} 885353358Sdim 886353358Sdim/// Returns true if \p P is an unsigned integer comparison predicate. 887353358Sdimstatic bool isUnsignedICMPPred(const CmpInst::Predicate P) { 888353358Sdim switch (P) { 889353358Sdim default: 890353358Sdim return false; 891353358Sdim case CmpInst::ICMP_UGT: 892353358Sdim case CmpInst::ICMP_UGE: 893353358Sdim case CmpInst::ICMP_ULT: 894353358Sdim case CmpInst::ICMP_ULE: 895353358Sdim return true; 896353358Sdim } 897353358Sdim} 898353358Sdim 899311116Sdimstatic AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 900311116Sdim switch (P) { 901311116Sdim default: 902311116Sdim llvm_unreachable("Unknown condition code!"); 903311116Sdim case CmpInst::ICMP_NE: 904311116Sdim return AArch64CC::NE; 905311116Sdim case CmpInst::ICMP_EQ: 906311116Sdim return AArch64CC::EQ; 907311116Sdim case CmpInst::ICMP_SGT: 908311116Sdim return AArch64CC::GT; 909311116Sdim case CmpInst::ICMP_SGE: 910311116Sdim return AArch64CC::GE; 911311116Sdim case CmpInst::ICMP_SLT: 912311116Sdim return AArch64CC::LT; 913311116Sdim case CmpInst::ICMP_SLE: 914311116Sdim return AArch64CC::LE; 915311116Sdim case CmpInst::ICMP_UGT: 916311116Sdim return AArch64CC::HI; 917311116Sdim case CmpInst::ICMP_UGE: 918311116Sdim return AArch64CC::HS; 919311116Sdim case CmpInst::ICMP_ULT: 920311116Sdim return AArch64CC::LO; 921311116Sdim case CmpInst::ICMP_ULE: 922311116Sdim return AArch64CC::LS; 923311116Sdim } 924311116Sdim} 925311116Sdim 926311116Sdimstatic void changeFCMPPredToAArch64CC(CmpInst::Predicate P, 927311116Sdim AArch64CC::CondCode &CondCode, 928311116Sdim AArch64CC::CondCode &CondCode2) { 929311116Sdim CondCode2 = AArch64CC::AL; 930311116Sdim switch (P) { 931311116Sdim default: 932311116Sdim llvm_unreachable("Unknown FP condition!"); 933311116Sdim case CmpInst::FCMP_OEQ: 934311116Sdim CondCode = AArch64CC::EQ; 935311116Sdim break; 936311116Sdim case CmpInst::FCMP_OGT: 937311116Sdim CondCode = AArch64CC::GT; 938311116Sdim break; 939311116Sdim case CmpInst::FCMP_OGE: 940311116Sdim CondCode = AArch64CC::GE; 941311116Sdim break; 942311116Sdim case CmpInst::FCMP_OLT: 943311116Sdim CondCode = AArch64CC::MI; 944311116Sdim break; 945311116Sdim case CmpInst::FCMP_OLE: 946311116Sdim CondCode = AArch64CC::LS; 947311116Sdim break; 948311116Sdim case CmpInst::FCMP_ONE: 949311116Sdim CondCode = AArch64CC::MI; 950311116Sdim CondCode2 = AArch64CC::GT; 951311116Sdim break; 952311116Sdim case CmpInst::FCMP_ORD: 953311116Sdim CondCode = AArch64CC::VC; 954311116Sdim break; 955311116Sdim case CmpInst::FCMP_UNO: 956311116Sdim CondCode = AArch64CC::VS; 957311116Sdim break; 958311116Sdim case CmpInst::FCMP_UEQ: 959311116Sdim CondCode = AArch64CC::EQ; 960311116Sdim CondCode2 = AArch64CC::VS; 961311116Sdim break; 962311116Sdim case CmpInst::FCMP_UGT: 963311116Sdim CondCode = AArch64CC::HI; 964311116Sdim break; 965311116Sdim case CmpInst::FCMP_UGE: 966311116Sdim CondCode = AArch64CC::PL; 967311116Sdim break; 968311116Sdim case CmpInst::FCMP_ULT: 969311116Sdim CondCode = AArch64CC::LT; 970311116Sdim break; 971311116Sdim case CmpInst::FCMP_ULE: 972311116Sdim CondCode = AArch64CC::LE; 973311116Sdim break; 974311116Sdim case CmpInst::FCMP_UNE: 975311116Sdim CondCode = AArch64CC::NE; 976311116Sdim break; 977311116Sdim } 978311116Sdim} 979311116Sdim 980321369Sdimbool AArch64InstructionSelector::selectCompareBranch( 981321369Sdim MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 982321369Sdim 983353358Sdim const Register CondReg = I.getOperand(0).getReg(); 984321369Sdim MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 985321369Sdim MachineInstr *CCMI = MRI.getVRegDef(CondReg); 986327952Sdim if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) 987327952Sdim CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); 988321369Sdim if (CCMI->getOpcode() != TargetOpcode::G_ICMP) 989321369Sdim return false; 990321369Sdim 991353358Sdim Register LHS = CCMI->getOperand(2).getReg(); 992353358Sdim Register RHS = CCMI->getOperand(3).getReg(); 993353358Sdim auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 994353358Sdim if (!VRegAndVal) 995321369Sdim std::swap(RHS, LHS); 996321369Sdim 997353358Sdim VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 998353358Sdim if (!VRegAndVal || VRegAndVal->Value != 0) { 999353358Sdim MachineIRBuilder MIB(I); 1000353358Sdim // If we can't select a CBZ then emit a cmp + Bcc. 1001353358Sdim if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), 1002353358Sdim CCMI->getOperand(1), MIB)) 1003353358Sdim return false; 1004353358Sdim const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1005353358Sdim (CmpInst::Predicate)CCMI->getOperand(1).getPredicate()); 1006353358Sdim MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1007353358Sdim I.eraseFromParent(); 1008353358Sdim return true; 1009353358Sdim } 1010321369Sdim 1011321369Sdim const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); 1012321369Sdim if (RB.getID() != AArch64::GPRRegBankID) 1013321369Sdim return false; 1014321369Sdim 1015321369Sdim const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); 1016321369Sdim if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) 1017321369Sdim return false; 1018321369Sdim 1019321369Sdim const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); 1020321369Sdim unsigned CBOpc = 0; 1021321369Sdim if (CmpWidth <= 32) 1022321369Sdim CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); 1023321369Sdim else if (CmpWidth == 64) 1024321369Sdim CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); 1025321369Sdim else 1026321369Sdim return false; 1027321369Sdim 1028341825Sdim BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) 1029341825Sdim .addUse(LHS) 1030341825Sdim .addMBB(DestMBB) 1031341825Sdim .constrainAllUses(TII, TRI, RBI); 1032321369Sdim 1033321369Sdim I.eraseFromParent(); 1034321369Sdim return true; 1035321369Sdim} 1036321369Sdim 1037360784Sdim/// Returns the element immediate value of a vector shift operand if found. 1038360784Sdim/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1039360784Sdimstatic Optional<int64_t> getVectorShiftImm(Register Reg, 1040360784Sdim MachineRegisterInfo &MRI) { 1041360784Sdim assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1042360784Sdim MachineInstr *OpMI = MRI.getVRegDef(Reg); 1043360784Sdim assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1044360784Sdim if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) 1045360784Sdim return None; 1046360784Sdim 1047360784Sdim // Check all operands are identical immediates. 1048360784Sdim int64_t ImmVal = 0; 1049360784Sdim for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { 1050360784Sdim auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); 1051360784Sdim if (!VRegAndVal) 1052360784Sdim return None; 1053360784Sdim 1054360784Sdim if (Idx == 1) 1055360784Sdim ImmVal = VRegAndVal->Value; 1056360784Sdim if (ImmVal != VRegAndVal->Value) 1057360784Sdim return None; 1058360784Sdim } 1059360784Sdim 1060360784Sdim return ImmVal; 1061360784Sdim} 1062360784Sdim 1063360784Sdim/// Matches and returns the shift immediate value for a SHL instruction given 1064360784Sdim/// a shift operand. 1065360784Sdimstatic Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1066360784Sdim Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1067360784Sdim if (!ShiftImm) 1068360784Sdim return None; 1069360784Sdim // Check the immediate is in range for a SHL. 1070360784Sdim int64_t Imm = *ShiftImm; 1071360784Sdim if (Imm < 0) 1072360784Sdim return None; 1073360784Sdim switch (SrcTy.getElementType().getSizeInBits()) { 1074360784Sdim default: 1075360784Sdim LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1076360784Sdim return None; 1077360784Sdim case 8: 1078360784Sdim if (Imm > 7) 1079360784Sdim return None; 1080360784Sdim break; 1081360784Sdim case 16: 1082360784Sdim if (Imm > 15) 1083360784Sdim return None; 1084360784Sdim break; 1085360784Sdim case 32: 1086360784Sdim if (Imm > 31) 1087360784Sdim return None; 1088360784Sdim break; 1089360784Sdim case 64: 1090360784Sdim if (Imm > 63) 1091360784Sdim return None; 1092360784Sdim break; 1093360784Sdim } 1094360784Sdim return Imm; 1095360784Sdim} 1096360784Sdim 1097353358Sdimbool AArch64InstructionSelector::selectVectorSHL( 1098353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 1099353358Sdim assert(I.getOpcode() == TargetOpcode::G_SHL); 1100353358Sdim Register DstReg = I.getOperand(0).getReg(); 1101353358Sdim const LLT Ty = MRI.getType(DstReg); 1102353358Sdim Register Src1Reg = I.getOperand(1).getReg(); 1103353358Sdim Register Src2Reg = I.getOperand(2).getReg(); 1104353358Sdim 1105353358Sdim if (!Ty.isVector()) 1106353358Sdim return false; 1107353358Sdim 1108360784Sdim // Check if we have a vector of constants on RHS that we can select as the 1109360784Sdim // immediate form. 1110360784Sdim Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1111360784Sdim 1112353358Sdim unsigned Opc = 0; 1113360784Sdim if (Ty == LLT::vector(2, 64)) { 1114360784Sdim Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1115360784Sdim } else if (Ty == LLT::vector(4, 32)) { 1116360784Sdim Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1117353358Sdim } else if (Ty == LLT::vector(2, 32)) { 1118360784Sdim Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1119353358Sdim } else { 1120353358Sdim LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1121353358Sdim return false; 1122353358Sdim } 1123353358Sdim 1124353358Sdim MachineIRBuilder MIB(I); 1125360784Sdim auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1126360784Sdim if (ImmVal) 1127360784Sdim Shl.addImm(*ImmVal); 1128360784Sdim else 1129360784Sdim Shl.addUse(Src2Reg); 1130360784Sdim constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1131353358Sdim I.eraseFromParent(); 1132353358Sdim return true; 1133353358Sdim} 1134353358Sdim 1135353358Sdimbool AArch64InstructionSelector::selectVectorASHR( 1136353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 1137353358Sdim assert(I.getOpcode() == TargetOpcode::G_ASHR); 1138353358Sdim Register DstReg = I.getOperand(0).getReg(); 1139353358Sdim const LLT Ty = MRI.getType(DstReg); 1140353358Sdim Register Src1Reg = I.getOperand(1).getReg(); 1141353358Sdim Register Src2Reg = I.getOperand(2).getReg(); 1142353358Sdim 1143353358Sdim if (!Ty.isVector()) 1144353358Sdim return false; 1145353358Sdim 1146353358Sdim // There is not a shift right register instruction, but the shift left 1147353358Sdim // register instruction takes a signed value, where negative numbers specify a 1148353358Sdim // right shift. 1149353358Sdim 1150353358Sdim unsigned Opc = 0; 1151353358Sdim unsigned NegOpc = 0; 1152353358Sdim const TargetRegisterClass *RC = nullptr; 1153360784Sdim if (Ty == LLT::vector(2, 64)) { 1154360784Sdim Opc = AArch64::SSHLv2i64; 1155360784Sdim NegOpc = AArch64::NEGv2i64; 1156360784Sdim RC = &AArch64::FPR128RegClass; 1157360784Sdim } else if (Ty == LLT::vector(4, 32)) { 1158353358Sdim Opc = AArch64::SSHLv4i32; 1159353358Sdim NegOpc = AArch64::NEGv4i32; 1160353358Sdim RC = &AArch64::FPR128RegClass; 1161353358Sdim } else if (Ty == LLT::vector(2, 32)) { 1162353358Sdim Opc = AArch64::SSHLv2i32; 1163353358Sdim NegOpc = AArch64::NEGv2i32; 1164353358Sdim RC = &AArch64::FPR64RegClass; 1165353358Sdim } else { 1166353358Sdim LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1167353358Sdim return false; 1168353358Sdim } 1169353358Sdim 1170353358Sdim MachineIRBuilder MIB(I); 1171353358Sdim auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1172353358Sdim constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1173353358Sdim auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1174353358Sdim constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1175353358Sdim I.eraseFromParent(); 1176353358Sdim return true; 1177353358Sdim} 1178353358Sdim 1179321369Sdimbool AArch64InstructionSelector::selectVaStartAAPCS( 1180321369Sdim MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1181321369Sdim return false; 1182321369Sdim} 1183321369Sdim 1184321369Sdimbool AArch64InstructionSelector::selectVaStartDarwin( 1185321369Sdim MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1186321369Sdim AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1187353358Sdim Register ListReg = I.getOperand(0).getReg(); 1188321369Sdim 1189353358Sdim Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1190321369Sdim 1191321369Sdim auto MIB = 1192321369Sdim BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1193321369Sdim .addDef(ArgsAddrReg) 1194321369Sdim .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1195321369Sdim .addImm(0) 1196321369Sdim .addImm(0); 1197321369Sdim 1198321369Sdim constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1199321369Sdim 1200321369Sdim MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1201321369Sdim .addUse(ArgsAddrReg) 1202321369Sdim .addUse(ListReg) 1203321369Sdim .addImm(0) 1204321369Sdim .addMemOperand(*I.memoperands_begin()); 1205321369Sdim 1206321369Sdim constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1207321369Sdim I.eraseFromParent(); 1208321369Sdim return true; 1209321369Sdim} 1210321369Sdim 1211341825Sdimvoid AArch64InstructionSelector::materializeLargeCMVal( 1212360784Sdim MachineInstr &I, const Value *V, unsigned OpFlags) const { 1213341825Sdim MachineBasicBlock &MBB = *I.getParent(); 1214341825Sdim MachineFunction &MF = *MBB.getParent(); 1215341825Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1216341825Sdim MachineIRBuilder MIB(I); 1217341825Sdim 1218344779Sdim auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1219341825Sdim MovZ->addOperand(MF, I.getOperand(1)); 1220341825Sdim MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1221341825Sdim AArch64II::MO_NC); 1222341825Sdim MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1223341825Sdim constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1224341825Sdim 1225353358Sdim auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1226353358Sdim Register ForceDstReg) { 1227353358Sdim Register DstReg = ForceDstReg 1228341825Sdim ? ForceDstReg 1229341825Sdim : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1230341825Sdim auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1231341825Sdim if (auto *GV = dyn_cast<GlobalValue>(V)) { 1232341825Sdim MovI->addOperand(MF, MachineOperand::CreateGA( 1233341825Sdim GV, MovZ->getOperand(1).getOffset(), Flags)); 1234341825Sdim } else { 1235341825Sdim MovI->addOperand( 1236341825Sdim MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1237341825Sdim MovZ->getOperand(1).getOffset(), Flags)); 1238341825Sdim } 1239341825Sdim MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1240341825Sdim constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1241341825Sdim return DstReg; 1242341825Sdim }; 1243353358Sdim Register DstReg = BuildMovK(MovZ.getReg(0), 1244341825Sdim AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1245341825Sdim DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1246341825Sdim BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1247341825Sdim return; 1248341825Sdim} 1249341825Sdim 1250353358Sdimvoid AArch64InstructionSelector::preISelLower(MachineInstr &I) const { 1251353358Sdim MachineBasicBlock &MBB = *I.getParent(); 1252353358Sdim MachineFunction &MF = *MBB.getParent(); 1253353358Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1254353358Sdim 1255353358Sdim switch (I.getOpcode()) { 1256353358Sdim case TargetOpcode::G_SHL: 1257353358Sdim case TargetOpcode::G_ASHR: 1258353358Sdim case TargetOpcode::G_LSHR: { 1259353358Sdim // These shifts are legalized to have 64 bit shift amounts because we want 1260353358Sdim // to take advantage of the existing imported selection patterns that assume 1261353358Sdim // the immediates are s64s. However, if the shifted type is 32 bits and for 1262353358Sdim // some reason we receive input GMIR that has an s64 shift amount that's not 1263353358Sdim // a G_CONSTANT, insert a truncate so that we can still select the s32 1264353358Sdim // register-register variant. 1265360784Sdim Register SrcReg = I.getOperand(1).getReg(); 1266360784Sdim Register ShiftReg = I.getOperand(2).getReg(); 1267353358Sdim const LLT ShiftTy = MRI.getType(ShiftReg); 1268353358Sdim const LLT SrcTy = MRI.getType(SrcReg); 1269353358Sdim if (SrcTy.isVector()) 1270353358Sdim return; 1271353358Sdim assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1272353358Sdim if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1273353358Sdim return; 1274353358Sdim auto *AmtMI = MRI.getVRegDef(ShiftReg); 1275353358Sdim assert(AmtMI && "could not find a vreg definition for shift amount"); 1276353358Sdim if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1277353358Sdim // Insert a subregister copy to implement a 64->32 trunc 1278353358Sdim MachineIRBuilder MIB(I); 1279353358Sdim auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1280353358Sdim .addReg(ShiftReg, 0, AArch64::sub_32); 1281353358Sdim MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1282353358Sdim I.getOperand(2).setReg(Trunc.getReg(0)); 1283353358Sdim } 1284353358Sdim return; 1285353358Sdim } 1286360784Sdim case TargetOpcode::G_STORE: 1287360784Sdim contractCrossBankCopyIntoStore(I, MRI); 1288360784Sdim return; 1289353358Sdim default: 1290353358Sdim return; 1291353358Sdim } 1292353358Sdim} 1293353358Sdim 1294353358Sdimbool AArch64InstructionSelector::earlySelectSHL( 1295353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 1296353358Sdim // We try to match the immediate variant of LSL, which is actually an alias 1297353358Sdim // for a special case of UBFM. Otherwise, we fall back to the imported 1298353358Sdim // selector which will match the register variant. 1299353358Sdim assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 1300353358Sdim const auto &MO = I.getOperand(2); 1301353358Sdim auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 1302353358Sdim if (!VRegAndVal) 1303353358Sdim return false; 1304353358Sdim 1305353358Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1306353358Sdim if (DstTy.isVector()) 1307353358Sdim return false; 1308353358Sdim bool Is64Bit = DstTy.getSizeInBits() == 64; 1309353358Sdim auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 1310353358Sdim auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 1311353358Sdim MachineIRBuilder MIB(I); 1312353358Sdim 1313353358Sdim if (!Imm1Fn || !Imm2Fn) 1314353358Sdim return false; 1315353358Sdim 1316353358Sdim auto NewI = 1317353358Sdim MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 1318353358Sdim {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 1319353358Sdim 1320353358Sdim for (auto &RenderFn : *Imm1Fn) 1321353358Sdim RenderFn(NewI); 1322353358Sdim for (auto &RenderFn : *Imm2Fn) 1323353358Sdim RenderFn(NewI); 1324353358Sdim 1325353358Sdim I.eraseFromParent(); 1326353358Sdim return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 1327353358Sdim} 1328353358Sdim 1329360784Sdimvoid AArch64InstructionSelector::contractCrossBankCopyIntoStore( 1330360784Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 1331360784Sdim assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 1332360784Sdim // If we're storing a scalar, it doesn't matter what register bank that 1333360784Sdim // scalar is on. All that matters is the size. 1334360784Sdim // 1335360784Sdim // So, if we see something like this (with a 32-bit scalar as an example): 1336360784Sdim // 1337360784Sdim // %x:gpr(s32) = ... something ... 1338360784Sdim // %y:fpr(s32) = COPY %x:gpr(s32) 1339360784Sdim // G_STORE %y:fpr(s32) 1340360784Sdim // 1341360784Sdim // We can fix this up into something like this: 1342360784Sdim // 1343360784Sdim // G_STORE %x:gpr(s32) 1344360784Sdim // 1345360784Sdim // And then continue the selection process normally. 1346360784Sdim MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); 1347360784Sdim if (!Def) 1348360784Sdim return; 1349360784Sdim Register DefDstReg = Def->getOperand(0).getReg(); 1350360784Sdim LLT DefDstTy = MRI.getType(DefDstReg); 1351360784Sdim Register StoreSrcReg = I.getOperand(0).getReg(); 1352360784Sdim LLT StoreSrcTy = MRI.getType(StoreSrcReg); 1353360784Sdim 1354360784Sdim // If we get something strange like a physical register, then we shouldn't 1355360784Sdim // go any further. 1356360784Sdim if (!DefDstTy.isValid()) 1357360784Sdim return; 1358360784Sdim 1359360784Sdim // Are the source and dst types the same size? 1360360784Sdim if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 1361360784Sdim return; 1362360784Sdim 1363360784Sdim if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 1364360784Sdim RBI.getRegBank(DefDstReg, MRI, TRI)) 1365360784Sdim return; 1366360784Sdim 1367360784Sdim // We have a cross-bank copy, which is entering a store. Let's fold it. 1368360784Sdim I.getOperand(0).setReg(DefDstReg); 1369360784Sdim} 1370360784Sdim 1371353358Sdimbool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { 1372353358Sdim assert(I.getParent() && "Instruction should be in a basic block!"); 1373353358Sdim assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1374353358Sdim 1375353358Sdim MachineBasicBlock &MBB = *I.getParent(); 1376353358Sdim MachineFunction &MF = *MBB.getParent(); 1377353358Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1378353358Sdim 1379353358Sdim switch (I.getOpcode()) { 1380353358Sdim case TargetOpcode::G_SHL: 1381353358Sdim return earlySelectSHL(I, MRI); 1382360784Sdim case TargetOpcode::G_CONSTANT: { 1383360784Sdim bool IsZero = false; 1384360784Sdim if (I.getOperand(1).isCImm()) 1385360784Sdim IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 1386360784Sdim else if (I.getOperand(1).isImm()) 1387360784Sdim IsZero = I.getOperand(1).getImm() == 0; 1388360784Sdim 1389360784Sdim if (!IsZero) 1390360784Sdim return false; 1391360784Sdim 1392360784Sdim Register DefReg = I.getOperand(0).getReg(); 1393360784Sdim LLT Ty = MRI.getType(DefReg); 1394360784Sdim if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32)) 1395360784Sdim return false; 1396360784Sdim 1397360784Sdim if (Ty == LLT::scalar(64)) { 1398360784Sdim I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 1399360784Sdim RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 1400360784Sdim } else { 1401360784Sdim I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 1402360784Sdim RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 1403360784Sdim } 1404360784Sdim I.setDesc(TII.get(TargetOpcode::COPY)); 1405360784Sdim return true; 1406360784Sdim } 1407353358Sdim default: 1408353358Sdim return false; 1409353358Sdim } 1410353358Sdim} 1411353358Sdim 1412360784Sdimbool AArch64InstructionSelector::select(MachineInstr &I) { 1413311116Sdim assert(I.getParent() && "Instruction should be in a basic block!"); 1414311116Sdim assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1415311116Sdim 1416311116Sdim MachineBasicBlock &MBB = *I.getParent(); 1417311116Sdim MachineFunction &MF = *MBB.getParent(); 1418311116Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1419311116Sdim 1420311116Sdim unsigned Opcode = I.getOpcode(); 1421327952Sdim // G_PHI requires same handling as PHI 1422327952Sdim if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) { 1423311116Sdim // Certain non-generic instructions also need some special handling. 1424311116Sdim 1425311116Sdim if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 1426311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1427311116Sdim 1428327952Sdim if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 1429353358Sdim const Register DefReg = I.getOperand(0).getReg(); 1430311116Sdim const LLT DefTy = MRI.getType(DefReg); 1431311116Sdim 1432353358Sdim const RegClassOrRegBank &RegClassOrBank = 1433353358Sdim MRI.getRegClassOrRegBank(DefReg); 1434311116Sdim 1435353358Sdim const TargetRegisterClass *DefRC 1436353358Sdim = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 1437353358Sdim if (!DefRC) { 1438353358Sdim if (!DefTy.isValid()) { 1439353358Sdim LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 1440353358Sdim return false; 1441353358Sdim } 1442353358Sdim const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 1443353358Sdim DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 1444311116Sdim if (!DefRC) { 1445353358Sdim LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 1446353358Sdim return false; 1447311116Sdim } 1448311116Sdim } 1449353358Sdim 1450327952Sdim I.setDesc(TII.get(TargetOpcode::PHI)); 1451311116Sdim 1452311116Sdim return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 1453311116Sdim } 1454311116Sdim 1455311116Sdim if (I.isCopy()) 1456311116Sdim return selectCopy(I, TII, MRI, TRI, RBI); 1457311116Sdim 1458311116Sdim return true; 1459311116Sdim } 1460311116Sdim 1461311116Sdim 1462311116Sdim if (I.getNumOperands() != I.getNumExplicitOperands()) { 1463341825Sdim LLVM_DEBUG( 1464341825Sdim dbgs() << "Generic instruction has unexpected implicit operands\n"); 1465311116Sdim return false; 1466311116Sdim } 1467311116Sdim 1468353358Sdim // Try to do some lowering before we start instruction selecting. These 1469353358Sdim // lowerings are purely transformations on the input G_MIR and so selection 1470353358Sdim // must continue after any modification of the instruction. 1471353358Sdim preISelLower(I); 1472353358Sdim 1473353358Sdim // There may be patterns where the importer can't deal with them optimally, 1474353358Sdim // but does select it to a suboptimal sequence so our custom C++ selection 1475353358Sdim // code later never has a chance to work on it. Therefore, we have an early 1476353358Sdim // selection attempt here to give priority to certain selection routines 1477353358Sdim // over the imported ones. 1478353358Sdim if (earlySelect(I)) 1479353358Sdim return true; 1480353358Sdim 1481360784Sdim if (selectImpl(I, *CoverageInfo)) 1482311116Sdim return true; 1483311116Sdim 1484311116Sdim LLT Ty = 1485311116Sdim I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 1486311116Sdim 1487353358Sdim MachineIRBuilder MIB(I); 1488353358Sdim 1489311116Sdim switch (Opcode) { 1490311116Sdim case TargetOpcode::G_BRCOND: { 1491311116Sdim if (Ty.getSizeInBits() > 32) { 1492311116Sdim // We shouldn't need this on AArch64, but it would be implemented as an 1493311116Sdim // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the 1494311116Sdim // bit being tested is < 32. 1495341825Sdim LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty 1496341825Sdim << ", expected at most 32-bits"); 1497311116Sdim return false; 1498311116Sdim } 1499311116Sdim 1500353358Sdim const Register CondReg = I.getOperand(0).getReg(); 1501311116Sdim MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1502311116Sdim 1503344779Sdim // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1504344779Sdim // instructions will not be produced, as they are conditional branch 1505344779Sdim // instructions that do not set flags. 1506344779Sdim bool ProduceNonFlagSettingCondBr = 1507344779Sdim !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 1508344779Sdim if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) 1509321369Sdim return true; 1510321369Sdim 1511344779Sdim if (ProduceNonFlagSettingCondBr) { 1512344779Sdim auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) 1513344779Sdim .addUse(CondReg) 1514344779Sdim .addImm(/*bit offset=*/0) 1515344779Sdim .addMBB(DestMBB); 1516311116Sdim 1517344779Sdim I.eraseFromParent(); 1518344779Sdim return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); 1519344779Sdim } else { 1520344779Sdim auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 1521344779Sdim .addDef(AArch64::WZR) 1522344779Sdim .addUse(CondReg) 1523344779Sdim .addImm(1); 1524344779Sdim constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); 1525344779Sdim auto Bcc = 1526344779Sdim BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) 1527344779Sdim .addImm(AArch64CC::EQ) 1528344779Sdim .addMBB(DestMBB); 1529344779Sdim 1530344779Sdim I.eraseFromParent(); 1531344779Sdim return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); 1532344779Sdim } 1533311116Sdim } 1534311116Sdim 1535321369Sdim case TargetOpcode::G_BRINDIRECT: { 1536321369Sdim I.setDesc(TII.get(AArch64::BR)); 1537321369Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1538321369Sdim } 1539321369Sdim 1540353358Sdim case TargetOpcode::G_BRJT: 1541353358Sdim return selectBrJT(I, MRI); 1542353358Sdim 1543353358Sdim case TargetOpcode::G_BSWAP: { 1544353358Sdim // Handle vector types for G_BSWAP directly. 1545353358Sdim Register DstReg = I.getOperand(0).getReg(); 1546353358Sdim LLT DstTy = MRI.getType(DstReg); 1547353358Sdim 1548353358Sdim // We should only get vector types here; everything else is handled by the 1549353358Sdim // importer right now. 1550353358Sdim if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 1551353358Sdim LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 1552353358Sdim return false; 1553353358Sdim } 1554353358Sdim 1555353358Sdim // Only handle 4 and 2 element vectors for now. 1556353358Sdim // TODO: 16-bit elements. 1557353358Sdim unsigned NumElts = DstTy.getNumElements(); 1558353358Sdim if (NumElts != 4 && NumElts != 2) { 1559353358Sdim LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 1560353358Sdim return false; 1561353358Sdim } 1562353358Sdim 1563353358Sdim // Choose the correct opcode for the supported types. Right now, that's 1564353358Sdim // v2s32, v4s32, and v2s64. 1565353358Sdim unsigned Opc = 0; 1566353358Sdim unsigned EltSize = DstTy.getElementType().getSizeInBits(); 1567353358Sdim if (EltSize == 32) 1568353358Sdim Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 1569353358Sdim : AArch64::REV32v16i8; 1570353358Sdim else if (EltSize == 64) 1571353358Sdim Opc = AArch64::REV64v16i8; 1572353358Sdim 1573353358Sdim // We should always get something by the time we get here... 1574353358Sdim assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 1575353358Sdim 1576353358Sdim I.setDesc(TII.get(Opc)); 1577353358Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1578353358Sdim } 1579353358Sdim 1580311116Sdim case TargetOpcode::G_FCONSTANT: 1581311116Sdim case TargetOpcode::G_CONSTANT: { 1582311116Sdim const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 1583311116Sdim 1584353358Sdim const LLT s8 = LLT::scalar(8); 1585353358Sdim const LLT s16 = LLT::scalar(16); 1586311116Sdim const LLT s32 = LLT::scalar(32); 1587311116Sdim const LLT s64 = LLT::scalar(64); 1588311116Sdim const LLT p0 = LLT::pointer(0, 64); 1589311116Sdim 1590353358Sdim const Register DefReg = I.getOperand(0).getReg(); 1591311116Sdim const LLT DefTy = MRI.getType(DefReg); 1592311116Sdim const unsigned DefSize = DefTy.getSizeInBits(); 1593311116Sdim const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1594311116Sdim 1595311116Sdim // FIXME: Redundant check, but even less readable when factored out. 1596311116Sdim if (isFP) { 1597311116Sdim if (Ty != s32 && Ty != s64) { 1598341825Sdim LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 1599341825Sdim << " constant, expected: " << s32 << " or " << s64 1600341825Sdim << '\n'); 1601311116Sdim return false; 1602311116Sdim } 1603311116Sdim 1604311116Sdim if (RB.getID() != AArch64::FPRRegBankID) { 1605341825Sdim LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 1606341825Sdim << " constant on bank: " << RB 1607341825Sdim << ", expected: FPR\n"); 1608311116Sdim return false; 1609311116Sdim } 1610327952Sdim 1611327952Sdim // The case when we have 0.0 is covered by tablegen. Reject it here so we 1612327952Sdim // can be sure tablegen works correctly and isn't rescued by this code. 1613327952Sdim if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) 1614327952Sdim return false; 1615311116Sdim } else { 1616327952Sdim // s32 and s64 are covered by tablegen. 1617353358Sdim if (Ty != p0 && Ty != s8 && Ty != s16) { 1618341825Sdim LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 1619341825Sdim << " constant, expected: " << s32 << ", " << s64 1620341825Sdim << ", or " << p0 << '\n'); 1621311116Sdim return false; 1622311116Sdim } 1623311116Sdim 1624311116Sdim if (RB.getID() != AArch64::GPRRegBankID) { 1625341825Sdim LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 1626341825Sdim << " constant on bank: " << RB 1627341825Sdim << ", expected: GPR\n"); 1628311116Sdim return false; 1629311116Sdim } 1630311116Sdim } 1631311116Sdim 1632353358Sdim // We allow G_CONSTANT of types < 32b. 1633311116Sdim const unsigned MovOpc = 1634353358Sdim DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 1635311116Sdim 1636311116Sdim if (isFP) { 1637353358Sdim // Either emit a FMOV, or emit a copy to emit a normal mov. 1638311116Sdim const TargetRegisterClass &GPRRC = 1639311116Sdim DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 1640311116Sdim const TargetRegisterClass &FPRRC = 1641311116Sdim DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass; 1642311116Sdim 1643353358Sdim // Can we use a FMOV instruction to represent the immediate? 1644353358Sdim if (emitFMovForFConstant(I, MRI)) 1645353358Sdim return true; 1646353358Sdim 1647353358Sdim // Nope. Emit a copy and use a normal mov instead. 1648353358Sdim const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 1649311116Sdim MachineOperand &RegOp = I.getOperand(0); 1650311116Sdim RegOp.setReg(DefGPRReg); 1651353358Sdim MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1652353358Sdim MIB.buildCopy({DefReg}, {DefGPRReg}); 1653311116Sdim 1654311116Sdim if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 1655341825Sdim LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 1656311116Sdim return false; 1657311116Sdim } 1658311116Sdim 1659311116Sdim MachineOperand &ImmOp = I.getOperand(1); 1660311116Sdim // FIXME: Is going through int64_t always correct? 1661311116Sdim ImmOp.ChangeToImmediate( 1662311116Sdim ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 1663321369Sdim } else if (I.getOperand(1).isCImm()) { 1664311116Sdim uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 1665311116Sdim I.getOperand(1).ChangeToImmediate(Val); 1666321369Sdim } else if (I.getOperand(1).isImm()) { 1667321369Sdim uint64_t Val = I.getOperand(1).getImm(); 1668321369Sdim I.getOperand(1).ChangeToImmediate(Val); 1669311116Sdim } 1670311116Sdim 1671353358Sdim I.setDesc(TII.get(MovOpc)); 1672311116Sdim constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1673311116Sdim return true; 1674311116Sdim } 1675327952Sdim case TargetOpcode::G_EXTRACT: { 1676360784Sdim Register DstReg = I.getOperand(0).getReg(); 1677360784Sdim Register SrcReg = I.getOperand(1).getReg(); 1678360784Sdim LLT SrcTy = MRI.getType(SrcReg); 1679360784Sdim LLT DstTy = MRI.getType(DstReg); 1680330384Sdim (void)DstTy; 1681329983Sdim unsigned SrcSize = SrcTy.getSizeInBits(); 1682311116Sdim 1683360784Sdim if (SrcTy.getSizeInBits() > 64) { 1684360784Sdim // This should be an extract of an s128, which is like a vector extract. 1685360784Sdim if (SrcTy.getSizeInBits() != 128) 1686360784Sdim return false; 1687360784Sdim // Only support extracting 64 bits from an s128 at the moment. 1688360784Sdim if (DstTy.getSizeInBits() != 64) 1689360784Sdim return false; 1690360784Sdim 1691360784Sdim const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 1692360784Sdim const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 1693360784Sdim // Check we have the right regbank always. 1694360784Sdim assert(SrcRB.getID() == AArch64::FPRRegBankID && 1695360784Sdim DstRB.getID() == AArch64::FPRRegBankID && 1696360784Sdim "Wrong extract regbank!"); 1697360784Sdim (void)SrcRB; 1698360784Sdim 1699360784Sdim // Emit the same code as a vector extract. 1700360784Sdim // Offset must be a multiple of 64. 1701360784Sdim unsigned Offset = I.getOperand(2).getImm(); 1702360784Sdim if (Offset % 64 != 0) 1703360784Sdim return false; 1704360784Sdim unsigned LaneIdx = Offset / 64; 1705360784Sdim MachineIRBuilder MIB(I); 1706360784Sdim MachineInstr *Extract = emitExtractVectorElt( 1707360784Sdim DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 1708360784Sdim if (!Extract) 1709360784Sdim return false; 1710360784Sdim I.eraseFromParent(); 1711360784Sdim return true; 1712360784Sdim } 1713360784Sdim 1714329983Sdim I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 1715327952Sdim MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 1716327952Sdim Ty.getSizeInBits() - 1); 1717327952Sdim 1718329983Sdim if (SrcSize < 64) { 1719329983Sdim assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 1720329983Sdim "unexpected G_EXTRACT types"); 1721329983Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1722329983Sdim } 1723329983Sdim 1724360784Sdim DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 1725353358Sdim MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1726353358Sdim MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 1727353358Sdim .addReg(DstReg, 0, AArch64::sub_32); 1728327952Sdim RBI.constrainGenericRegister(I.getOperand(0).getReg(), 1729327952Sdim AArch64::GPR32RegClass, MRI); 1730327952Sdim I.getOperand(0).setReg(DstReg); 1731327952Sdim 1732327952Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1733327952Sdim } 1734327952Sdim 1735327952Sdim case TargetOpcode::G_INSERT: { 1736327952Sdim LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 1737329983Sdim LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1738329983Sdim unsigned DstSize = DstTy.getSizeInBits(); 1739327952Sdim // Larger inserts are vectors, same-size ones should be something else by 1740327952Sdim // now (split up or turned into COPYs). 1741327952Sdim if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 1742327952Sdim return false; 1743327952Sdim 1744329983Sdim I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 1745327952Sdim unsigned LSB = I.getOperand(3).getImm(); 1746327952Sdim unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 1747329983Sdim I.getOperand(3).setImm((DstSize - LSB) % DstSize); 1748327952Sdim MachineInstrBuilder(MF, I).addImm(Width - 1); 1749327952Sdim 1750329983Sdim if (DstSize < 64) { 1751329983Sdim assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 1752329983Sdim "unexpected G_INSERT types"); 1753329983Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1754329983Sdim } 1755329983Sdim 1756353358Sdim Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 1757327952Sdim BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 1758327952Sdim TII.get(AArch64::SUBREG_TO_REG)) 1759327952Sdim .addDef(SrcReg) 1760327952Sdim .addImm(0) 1761327952Sdim .addUse(I.getOperand(2).getReg()) 1762327952Sdim .addImm(AArch64::sub_32); 1763327952Sdim RBI.constrainGenericRegister(I.getOperand(2).getReg(), 1764327952Sdim AArch64::GPR32RegClass, MRI); 1765327952Sdim I.getOperand(2).setReg(SrcReg); 1766327952Sdim 1767327952Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1768327952Sdim } 1769311116Sdim case TargetOpcode::G_FRAME_INDEX: { 1770311116Sdim // allocas and G_FRAME_INDEX are only supported in addrspace(0). 1771311116Sdim if (Ty != LLT::pointer(0, 64)) { 1772341825Sdim LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 1773341825Sdim << ", expected: " << LLT::pointer(0, 64) << '\n'); 1774311116Sdim return false; 1775311116Sdim } 1776311116Sdim I.setDesc(TII.get(AArch64::ADDXri)); 1777311116Sdim 1778311116Sdim // MOs for a #0 shifted immediate. 1779311116Sdim I.addOperand(MachineOperand::CreateImm(0)); 1780311116Sdim I.addOperand(MachineOperand::CreateImm(0)); 1781311116Sdim 1782311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1783311116Sdim } 1784311116Sdim 1785311116Sdim case TargetOpcode::G_GLOBAL_VALUE: { 1786311116Sdim auto GV = I.getOperand(1).getGlobal(); 1787360784Sdim if (GV->isThreadLocal()) 1788360784Sdim return selectTLSGlobalValue(I, MRI); 1789360784Sdim 1790360784Sdim unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 1791311116Sdim if (OpFlags & AArch64II::MO_GOT) { 1792311116Sdim I.setDesc(TII.get(AArch64::LOADgot)); 1793311116Sdim I.getOperand(1).setTargetFlags(OpFlags); 1794328381Sdim } else if (TM.getCodeModel() == CodeModel::Large) { 1795328381Sdim // Materialize the global using movz/movk instructions. 1796341825Sdim materializeLargeCMVal(I, GV, OpFlags); 1797328381Sdim I.eraseFromParent(); 1798328381Sdim return true; 1799344779Sdim } else if (TM.getCodeModel() == CodeModel::Tiny) { 1800344779Sdim I.setDesc(TII.get(AArch64::ADR)); 1801344779Sdim I.getOperand(1).setTargetFlags(OpFlags); 1802311116Sdim } else { 1803311116Sdim I.setDesc(TII.get(AArch64::MOVaddr)); 1804311116Sdim I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 1805311116Sdim MachineInstrBuilder MIB(MF, I); 1806311116Sdim MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 1807311116Sdim OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1808311116Sdim } 1809311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1810311116Sdim } 1811311116Sdim 1812353358Sdim case TargetOpcode::G_ZEXTLOAD: 1813311116Sdim case TargetOpcode::G_LOAD: 1814311116Sdim case TargetOpcode::G_STORE: { 1815353358Sdim bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 1816353358Sdim MachineIRBuilder MIB(I); 1817353358Sdim 1818311116Sdim LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 1819311116Sdim 1820311116Sdim if (PtrTy != LLT::pointer(0, 64)) { 1821341825Sdim LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 1822341825Sdim << ", expected: " << LLT::pointer(0, 64) << '\n'); 1823311116Sdim return false; 1824311116Sdim } 1825311116Sdim 1826321369Sdim auto &MemOp = **I.memoperands_begin(); 1827360784Sdim if (MemOp.isAtomic()) { 1828360784Sdim // For now we just support s8 acquire loads to be able to compile stack 1829360784Sdim // protector code. 1830360784Sdim if (MemOp.getOrdering() == AtomicOrdering::Acquire && 1831360784Sdim MemOp.getSize() == 1) { 1832360784Sdim I.setDesc(TII.get(AArch64::LDARB)); 1833360784Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1834360784Sdim } 1835360784Sdim LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); 1836321369Sdim return false; 1837321369Sdim } 1838341825Sdim unsigned MemSizeInBits = MemOp.getSize() * 8; 1839321369Sdim 1840353358Sdim const Register PtrReg = I.getOperand(1).getReg(); 1841311116Sdim#ifndef NDEBUG 1842321369Sdim const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 1843311116Sdim // Sanity-check the pointer register. 1844311116Sdim assert(PtrRB.getID() == AArch64::GPRRegBankID && 1845311116Sdim "Load/Store pointer operand isn't a GPR"); 1846311116Sdim assert(MRI.getType(PtrReg).isPointer() && 1847311116Sdim "Load/Store pointer operand isn't a pointer"); 1848311116Sdim#endif 1849311116Sdim 1850353358Sdim const Register ValReg = I.getOperand(0).getReg(); 1851311116Sdim const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 1852311116Sdim 1853311116Sdim const unsigned NewOpc = 1854341825Sdim selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 1855311116Sdim if (NewOpc == I.getOpcode()) 1856311116Sdim return false; 1857311116Sdim 1858311116Sdim I.setDesc(TII.get(NewOpc)); 1859311116Sdim 1860321369Sdim uint64_t Offset = 0; 1861321369Sdim auto *PtrMI = MRI.getVRegDef(PtrReg); 1862321369Sdim 1863321369Sdim // Try to fold a GEP into our unsigned immediate addressing mode. 1864360784Sdim if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { 1865321369Sdim if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { 1866321369Sdim int64_t Imm = *COff; 1867341825Sdim const unsigned Size = MemSizeInBits / 8; 1868321369Sdim const unsigned Scale = Log2_32(Size); 1869321369Sdim if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { 1870360784Sdim Register Ptr2Reg = PtrMI->getOperand(1).getReg(); 1871321369Sdim I.getOperand(1).setReg(Ptr2Reg); 1872321369Sdim PtrMI = MRI.getVRegDef(Ptr2Reg); 1873321369Sdim Offset = Imm / Size; 1874321369Sdim } 1875321369Sdim } 1876321369Sdim } 1877321369Sdim 1878321369Sdim // If we haven't folded anything into our addressing mode yet, try to fold 1879321369Sdim // a frame index into the base+offset. 1880321369Sdim if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) 1881321369Sdim I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); 1882321369Sdim 1883321369Sdim I.addOperand(MachineOperand::CreateImm(Offset)); 1884321369Sdim 1885321369Sdim // If we're storing a 0, use WZR/XZR. 1886321369Sdim if (auto CVal = getConstantVRegVal(ValReg, MRI)) { 1887321369Sdim if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) { 1888321369Sdim if (I.getOpcode() == AArch64::STRWui) 1889321369Sdim I.getOperand(0).setReg(AArch64::WZR); 1890321369Sdim else if (I.getOpcode() == AArch64::STRXui) 1891321369Sdim I.getOperand(0).setReg(AArch64::XZR); 1892321369Sdim } 1893321369Sdim } 1894321369Sdim 1895353358Sdim if (IsZExtLoad) { 1896353358Sdim // The zextload from a smaller type to i32 should be handled by the importer. 1897353358Sdim if (MRI.getType(ValReg).getSizeInBits() != 64) 1898353358Sdim return false; 1899353358Sdim // If we have a ZEXTLOAD then change the load's type to be a narrower reg 1900353358Sdim //and zero_extend with SUBREG_TO_REG. 1901353358Sdim Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 1902353358Sdim Register DstReg = I.getOperand(0).getReg(); 1903353358Sdim I.getOperand(0).setReg(LdReg); 1904353358Sdim 1905353358Sdim MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1906353358Sdim MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 1907353358Sdim .addImm(0) 1908353358Sdim .addUse(LdReg) 1909353358Sdim .addImm(AArch64::sub_32); 1910353358Sdim constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1911353358Sdim return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 1912353358Sdim MRI); 1913353358Sdim } 1914311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1915311116Sdim } 1916311116Sdim 1917321369Sdim case TargetOpcode::G_SMULH: 1918321369Sdim case TargetOpcode::G_UMULH: { 1919311116Sdim // Reject the various things we don't support yet. 1920311116Sdim if (unsupportedBinOp(I, RBI, MRI, TRI)) 1921311116Sdim return false; 1922311116Sdim 1923353358Sdim const Register DefReg = I.getOperand(0).getReg(); 1924311116Sdim const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1925311116Sdim 1926311116Sdim if (RB.getID() != AArch64::GPRRegBankID) { 1927341825Sdim LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 1928311116Sdim return false; 1929311116Sdim } 1930311116Sdim 1931321369Sdim if (Ty != LLT::scalar(64)) { 1932341825Sdim LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 1933341825Sdim << ", expected: " << LLT::scalar(64) << '\n'); 1934311116Sdim return false; 1935311116Sdim } 1936311116Sdim 1937321369Sdim unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 1938321369Sdim : AArch64::UMULHrr; 1939311116Sdim I.setDesc(TII.get(NewOpc)); 1940311116Sdim 1941311116Sdim // Now that we selected an opcode, we need to constrain the register 1942311116Sdim // operands to use appropriate classes. 1943311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1944311116Sdim } 1945311116Sdim case TargetOpcode::G_FADD: 1946311116Sdim case TargetOpcode::G_FSUB: 1947311116Sdim case TargetOpcode::G_FMUL: 1948311116Sdim case TargetOpcode::G_FDIV: 1949311116Sdim 1950353358Sdim case TargetOpcode::G_ASHR: 1951353358Sdim if (MRI.getType(I.getOperand(0).getReg()).isVector()) 1952353358Sdim return selectVectorASHR(I, MRI); 1953353358Sdim LLVM_FALLTHROUGH; 1954353358Sdim case TargetOpcode::G_SHL: 1955353358Sdim if (Opcode == TargetOpcode::G_SHL && 1956353358Sdim MRI.getType(I.getOperand(0).getReg()).isVector()) 1957353358Sdim return selectVectorSHL(I, MRI); 1958353358Sdim LLVM_FALLTHROUGH; 1959311116Sdim case TargetOpcode::G_OR: 1960360784Sdim case TargetOpcode::G_LSHR: { 1961311116Sdim // Reject the various things we don't support yet. 1962311116Sdim if (unsupportedBinOp(I, RBI, MRI, TRI)) 1963311116Sdim return false; 1964311116Sdim 1965311116Sdim const unsigned OpSize = Ty.getSizeInBits(); 1966311116Sdim 1967353358Sdim const Register DefReg = I.getOperand(0).getReg(); 1968311116Sdim const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1969311116Sdim 1970311116Sdim const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 1971311116Sdim if (NewOpc == I.getOpcode()) 1972311116Sdim return false; 1973311116Sdim 1974311116Sdim I.setDesc(TII.get(NewOpc)); 1975311116Sdim // FIXME: Should the type be always reset in setDesc? 1976311116Sdim 1977311116Sdim // Now that we selected an opcode, we need to constrain the register 1978311116Sdim // operands to use appropriate classes. 1979311116Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1980311116Sdim } 1981311116Sdim 1982360784Sdim case TargetOpcode::G_PTR_ADD: { 1983360784Sdim MachineIRBuilder MIRBuilder(I); 1984360784Sdim emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), 1985360784Sdim MIRBuilder); 1986360784Sdim I.eraseFromParent(); 1987360784Sdim return true; 1988360784Sdim } 1989353358Sdim case TargetOpcode::G_UADDO: { 1990353358Sdim // TODO: Support other types. 1991353358Sdim unsigned OpSize = Ty.getSizeInBits(); 1992353358Sdim if (OpSize != 32 && OpSize != 64) { 1993353358Sdim LLVM_DEBUG( 1994353358Sdim dbgs() 1995353358Sdim << "G_UADDO currently only supported for 32 and 64 b types.\n"); 1996353358Sdim return false; 1997353358Sdim } 1998353358Sdim 1999353358Sdim // TODO: Support vectors. 2000353358Sdim if (Ty.isVector()) { 2001353358Sdim LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); 2002353358Sdim return false; 2003353358Sdim } 2004353358Sdim 2005353358Sdim // Add and set the set condition flag. 2006353358Sdim unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr; 2007353358Sdim MachineIRBuilder MIRBuilder(I); 2008353358Sdim auto AddsMI = MIRBuilder.buildInstr( 2009353358Sdim AddsOpc, {I.getOperand(0).getReg()}, 2010353358Sdim {I.getOperand(2).getReg(), I.getOperand(3).getReg()}); 2011353358Sdim constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI); 2012353358Sdim 2013353358Sdim // Now, put the overflow result in the register given by the first operand 2014353358Sdim // to the G_UADDO. CSINC increments the result when the predicate is false, 2015353358Sdim // so to get the increment when it's true, we need to use the inverse. In 2016353358Sdim // this case, we want to increment when carry is set. 2017353358Sdim auto CsetMI = MIRBuilder 2018353358Sdim .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2019353358Sdim {Register(AArch64::WZR), Register(AArch64::WZR)}) 2020353358Sdim .addImm(getInvertedCondCode(AArch64CC::HS)); 2021353358Sdim constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2022353358Sdim I.eraseFromParent(); 2023353358Sdim return true; 2024353358Sdim } 2025353358Sdim 2026321369Sdim case TargetOpcode::G_PTR_MASK: { 2027321369Sdim uint64_t Align = I.getOperand(2).getImm(); 2028321369Sdim if (Align >= 64 || Align == 0) 2029321369Sdim return false; 2030321369Sdim 2031321369Sdim uint64_t Mask = ~((1ULL << Align) - 1); 2032321369Sdim I.setDesc(TII.get(AArch64::ANDXri)); 2033321369Sdim I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2034321369Sdim 2035321369Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2036321369Sdim } 2037311116Sdim case TargetOpcode::G_PTRTOINT: 2038311116Sdim case TargetOpcode::G_TRUNC: { 2039311116Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2040311116Sdim const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2041311116Sdim 2042353358Sdim const Register DstReg = I.getOperand(0).getReg(); 2043353358Sdim const Register SrcReg = I.getOperand(1).getReg(); 2044311116Sdim 2045311116Sdim const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2046311116Sdim const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2047311116Sdim 2048311116Sdim if (DstRB.getID() != SrcRB.getID()) { 2049341825Sdim LLVM_DEBUG( 2050341825Sdim dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2051311116Sdim return false; 2052311116Sdim } 2053311116Sdim 2054311116Sdim if (DstRB.getID() == AArch64::GPRRegBankID) { 2055311116Sdim const TargetRegisterClass *DstRC = 2056311116Sdim getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2057311116Sdim if (!DstRC) 2058311116Sdim return false; 2059311116Sdim 2060311116Sdim const TargetRegisterClass *SrcRC = 2061311116Sdim getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2062311116Sdim if (!SrcRC) 2063311116Sdim return false; 2064311116Sdim 2065311116Sdim if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2066311116Sdim !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2067341825Sdim LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2068311116Sdim return false; 2069311116Sdim } 2070311116Sdim 2071311116Sdim if (DstRC == SrcRC) { 2072311116Sdim // Nothing to be done 2073321369Sdim } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2074321369Sdim SrcTy == LLT::scalar(64)) { 2075321369Sdim llvm_unreachable("TableGen can import this case"); 2076321369Sdim return false; 2077311116Sdim } else if (DstRC == &AArch64::GPR32RegClass && 2078311116Sdim SrcRC == &AArch64::GPR64RegClass) { 2079311116Sdim I.getOperand(1).setSubReg(AArch64::sub_32); 2080311116Sdim } else { 2081341825Sdim LLVM_DEBUG( 2082341825Sdim dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2083311116Sdim return false; 2084311116Sdim } 2085311116Sdim 2086311116Sdim I.setDesc(TII.get(TargetOpcode::COPY)); 2087311116Sdim return true; 2088311116Sdim } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2089311116Sdim if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { 2090311116Sdim I.setDesc(TII.get(AArch64::XTNv4i16)); 2091311116Sdim constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2092311116Sdim return true; 2093311116Sdim } 2094360784Sdim 2095360784Sdim if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2096360784Sdim MachineIRBuilder MIB(I); 2097360784Sdim MachineInstr *Extract = emitExtractVectorElt( 2098360784Sdim DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2099360784Sdim if (!Extract) 2100360784Sdim return false; 2101360784Sdim I.eraseFromParent(); 2102360784Sdim return true; 2103360784Sdim } 2104311116Sdim } 2105311116Sdim 2106311116Sdim return false; 2107311116Sdim } 2108311116Sdim 2109311116Sdim case TargetOpcode::G_ANYEXT: { 2110353358Sdim const Register DstReg = I.getOperand(0).getReg(); 2111353358Sdim const Register SrcReg = I.getOperand(1).getReg(); 2112311116Sdim 2113311116Sdim const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 2114311116Sdim if (RBDst.getID() != AArch64::GPRRegBankID) { 2115341825Sdim LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 2116341825Sdim << ", expected: GPR\n"); 2117311116Sdim return false; 2118311116Sdim } 2119311116Sdim 2120311116Sdim const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 2121311116Sdim if (RBSrc.getID() != AArch64::GPRRegBankID) { 2122341825Sdim LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 2123341825Sdim << ", expected: GPR\n"); 2124311116Sdim return false; 2125311116Sdim } 2126311116Sdim 2127311116Sdim const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 2128311116Sdim 2129311116Sdim if (DstSize == 0) { 2130341825Sdim LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 2131311116Sdim return false; 2132311116Sdim } 2133311116Sdim 2134311116Sdim if (DstSize != 64 && DstSize > 32) { 2135341825Sdim LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 2136341825Sdim << ", expected: 32 or 64\n"); 2137311116Sdim return false; 2138311116Sdim } 2139311116Sdim // At this point G_ANYEXT is just like a plain COPY, but we need 2140311116Sdim // to explicitly form the 64-bit value if any. 2141311116Sdim if (DstSize > 32) { 2142353358Sdim Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 2143311116Sdim BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 2144311116Sdim .addDef(ExtSrc) 2145311116Sdim .addImm(0) 2146311116Sdim .addUse(SrcReg) 2147311116Sdim .addImm(AArch64::sub_32); 2148311116Sdim I.getOperand(1).setReg(ExtSrc); 2149311116Sdim } 2150311116Sdim return selectCopy(I, TII, MRI, TRI, RBI); 2151311116Sdim } 2152311116Sdim 2153311116Sdim case TargetOpcode::G_ZEXT: 2154311116Sdim case TargetOpcode::G_SEXT: { 2155311116Sdim unsigned Opcode = I.getOpcode(); 2156360784Sdim const bool IsSigned = Opcode == TargetOpcode::G_SEXT; 2157353358Sdim const Register DefReg = I.getOperand(0).getReg(); 2158353358Sdim const Register SrcReg = I.getOperand(1).getReg(); 2159360784Sdim const LLT DstTy = MRI.getType(DefReg); 2160360784Sdim const LLT SrcTy = MRI.getType(SrcReg); 2161360784Sdim unsigned DstSize = DstTy.getSizeInBits(); 2162360784Sdim unsigned SrcSize = SrcTy.getSizeInBits(); 2163311116Sdim 2164360784Sdim if (DstTy.isVector()) 2165360784Sdim return false; // Should be handled by imported patterns. 2166360784Sdim 2167360784Sdim assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 2168360784Sdim AArch64::GPRRegBankID && 2169360784Sdim "Unexpected ext regbank"); 2170360784Sdim 2171360784Sdim MachineIRBuilder MIB(I); 2172360784Sdim MachineInstr *ExtI; 2173360784Sdim 2174360784Sdim // First check if we're extending the result of a load which has a dest type 2175360784Sdim // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 2176360784Sdim // GPR register on AArch64 and all loads which are smaller automatically 2177360784Sdim // zero-extend the upper bits. E.g. 2178360784Sdim // %v(s8) = G_LOAD %p, :: (load 1) 2179360784Sdim // %v2(s32) = G_ZEXT %v(s8) 2180360784Sdim if (!IsSigned) { 2181360784Sdim auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 2182360784Sdim if (LoadMI && 2183360784Sdim RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) { 2184360784Sdim const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 2185360784Sdim unsigned BytesLoaded = MemOp->getSize(); 2186360784Sdim if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 2187360784Sdim return selectCopy(I, TII, MRI, TRI, RBI); 2188360784Sdim } 2189311116Sdim } 2190311116Sdim 2191360784Sdim if (DstSize == 64) { 2192311116Sdim // FIXME: Can we avoid manually doing this? 2193311116Sdim if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { 2194341825Sdim LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 2195341825Sdim << " operand\n"); 2196311116Sdim return false; 2197311116Sdim } 2198311116Sdim 2199360784Sdim auto SubregToReg = 2200360784Sdim MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) 2201360784Sdim .addImm(0) 2202360784Sdim .addUse(SrcReg) 2203360784Sdim .addImm(AArch64::sub_32); 2204311116Sdim 2205360784Sdim ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 2206360784Sdim {DefReg}, {SubregToReg}) 2207360784Sdim .addImm(0) 2208360784Sdim .addImm(SrcSize - 1); 2209360784Sdim } else if (DstSize <= 32) { 2210360784Sdim ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 2211360784Sdim {DefReg}, {SrcReg}) 2212360784Sdim .addImm(0) 2213360784Sdim .addImm(SrcSize - 1); 2214311116Sdim } else { 2215311116Sdim return false; 2216311116Sdim } 2217311116Sdim 2218311116Sdim constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 2219311116Sdim I.eraseFromParent(); 2220311116Sdim return true; 2221311116Sdim } 2222311116Sdim 2223311116Sdim case TargetOpcode::G_SITOFP: 2224311116Sdim case TargetOpcode::G_UITOFP: 2225311116Sdim case TargetOpcode::G_FPTOSI: 2226311116Sdim case TargetOpcode::G_FPTOUI: { 2227311116Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 2228311116Sdim SrcTy = MRI.getType(I.getOperand(1).getReg()); 2229311116Sdim const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 2230311116Sdim if (NewOpc == Opcode) 2231311116Sdim return false; 2232311116Sdim 2233311116Sdim I.setDesc(TII.get(NewOpc)); 2234311116Sdim constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2235311116Sdim 2236311116Sdim return true; 2237311116Sdim } 2238311116Sdim 2239311116Sdim 2240311116Sdim case TargetOpcode::G_INTTOPTR: 2241327952Sdim // The importer is currently unable to import pointer types since they 2242327952Sdim // didn't exist in SelectionDAG. 2243311116Sdim return selectCopy(I, TII, MRI, TRI, RBI); 2244311116Sdim 2245327952Sdim case TargetOpcode::G_BITCAST: 2246327952Sdim // Imported SelectionDAG rules can handle every bitcast except those that 2247327952Sdim // bitcast from a type to the same type. Ideally, these shouldn't occur 2248353358Sdim // but we might not run an optimizer that deletes them. The other exception 2249353358Sdim // is bitcasts involving pointer types, as SelectionDAG has no knowledge 2250353358Sdim // of them. 2251353358Sdim return selectCopy(I, TII, MRI, TRI, RBI); 2252311116Sdim 2253311116Sdim case TargetOpcode::G_SELECT: { 2254311116Sdim if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 2255341825Sdim LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 2256341825Sdim << ", expected: " << LLT::scalar(1) << '\n'); 2257311116Sdim return false; 2258311116Sdim } 2259311116Sdim 2260353358Sdim const Register CondReg = I.getOperand(1).getReg(); 2261353358Sdim const Register TReg = I.getOperand(2).getReg(); 2262353358Sdim const Register FReg = I.getOperand(3).getReg(); 2263311116Sdim 2264353358Sdim if (tryOptSelect(I)) 2265353358Sdim return true; 2266311116Sdim 2267353358Sdim Register CSelOpc = selectSelectOpc(I, MRI, RBI); 2268311116Sdim MachineInstr &TstMI = 2269311116Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 2270311116Sdim .addDef(AArch64::WZR) 2271311116Sdim .addUse(CondReg) 2272311116Sdim .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2273311116Sdim 2274311116Sdim MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc)) 2275311116Sdim .addDef(I.getOperand(0).getReg()) 2276311116Sdim .addUse(TReg) 2277311116Sdim .addUse(FReg) 2278311116Sdim .addImm(AArch64CC::NE); 2279311116Sdim 2280311116Sdim constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI); 2281311116Sdim constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI); 2282311116Sdim 2283311116Sdim I.eraseFromParent(); 2284311116Sdim return true; 2285311116Sdim } 2286311116Sdim case TargetOpcode::G_ICMP: { 2287353358Sdim if (Ty.isVector()) 2288353358Sdim return selectVectorICmp(I, MRI); 2289353358Sdim 2290327952Sdim if (Ty != LLT::scalar(32)) { 2291341825Sdim LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 2292341825Sdim << ", expected: " << LLT::scalar(32) << '\n'); 2293311116Sdim return false; 2294311116Sdim } 2295311116Sdim 2296353358Sdim MachineIRBuilder MIRBuilder(I); 2297353358Sdim if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 2298353358Sdim MIRBuilder)) 2299311116Sdim return false; 2300353358Sdim emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), 2301353358Sdim MIRBuilder); 2302311116Sdim I.eraseFromParent(); 2303311116Sdim return true; 2304311116Sdim } 2305311116Sdim 2306311116Sdim case TargetOpcode::G_FCMP: { 2307327952Sdim if (Ty != LLT::scalar(32)) { 2308341825Sdim LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty 2309341825Sdim << ", expected: " << LLT::scalar(32) << '\n'); 2310311116Sdim return false; 2311311116Sdim } 2312311116Sdim 2313353358Sdim unsigned CmpOpc = selectFCMPOpc(I, MRI); 2314353358Sdim if (!CmpOpc) 2315311116Sdim return false; 2316311116Sdim 2317311116Sdim // FIXME: regbank 2318311116Sdim 2319311116Sdim AArch64CC::CondCode CC1, CC2; 2320311116Sdim changeFCMPPredToAArch64CC( 2321311116Sdim (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); 2322311116Sdim 2323353358Sdim // Partially build the compare. Decide if we need to add a use for the 2324353358Sdim // third operand based off whether or not we're comparing against 0.0. 2325353358Sdim auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) 2326353358Sdim .addUse(I.getOperand(2).getReg()); 2327311116Sdim 2328353358Sdim // If we don't have an immediate compare, then we need to add a use of the 2329353358Sdim // register which wasn't used for the immediate. 2330353358Sdim // Note that the immediate will always be the last operand. 2331353358Sdim if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 2332353358Sdim CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); 2333353358Sdim 2334353358Sdim const Register DefReg = I.getOperand(0).getReg(); 2335353358Sdim Register Def1Reg = DefReg; 2336311116Sdim if (CC2 != AArch64CC::AL) 2337311116Sdim Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2338311116Sdim 2339311116Sdim MachineInstr &CSetMI = 2340311116Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2341311116Sdim .addDef(Def1Reg) 2342311116Sdim .addUse(AArch64::WZR) 2343311116Sdim .addUse(AArch64::WZR) 2344321369Sdim .addImm(getInvertedCondCode(CC1)); 2345311116Sdim 2346311116Sdim if (CC2 != AArch64CC::AL) { 2347353358Sdim Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2348311116Sdim MachineInstr &CSet2MI = 2349311116Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2350311116Sdim .addDef(Def2Reg) 2351311116Sdim .addUse(AArch64::WZR) 2352311116Sdim .addUse(AArch64::WZR) 2353321369Sdim .addImm(getInvertedCondCode(CC2)); 2354311116Sdim MachineInstr &OrMI = 2355311116Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) 2356311116Sdim .addDef(DefReg) 2357311116Sdim .addUse(Def1Reg) 2358311116Sdim .addUse(Def2Reg); 2359311116Sdim constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); 2360311116Sdim constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); 2361311116Sdim } 2362353358Sdim constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 2363311116Sdim constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); 2364311116Sdim 2365311116Sdim I.eraseFromParent(); 2366311116Sdim return true; 2367311116Sdim } 2368321369Sdim case TargetOpcode::G_VASTART: 2369321369Sdim return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 2370321369Sdim : selectVaStartAAPCS(I, MF, MRI); 2371353358Sdim case TargetOpcode::G_INTRINSIC: 2372353358Sdim return selectIntrinsic(I, MRI); 2373341825Sdim case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 2374353358Sdim return selectIntrinsicWithSideEffects(I, MRI); 2375341825Sdim case TargetOpcode::G_IMPLICIT_DEF: { 2376321369Sdim I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 2377341825Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2378353358Sdim const Register DstReg = I.getOperand(0).getReg(); 2379341825Sdim const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2380341825Sdim const TargetRegisterClass *DstRC = 2381341825Sdim getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2382341825Sdim RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 2383321369Sdim return true; 2384311116Sdim } 2385341825Sdim case TargetOpcode::G_BLOCK_ADDR: { 2386341825Sdim if (TM.getCodeModel() == CodeModel::Large) { 2387341825Sdim materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 2388341825Sdim I.eraseFromParent(); 2389341825Sdim return true; 2390341825Sdim } else { 2391341825Sdim I.setDesc(TII.get(AArch64::MOVaddrBA)); 2392341825Sdim auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 2393341825Sdim I.getOperand(0).getReg()) 2394341825Sdim .addBlockAddress(I.getOperand(1).getBlockAddress(), 2395341825Sdim /* Offset */ 0, AArch64II::MO_PAGE) 2396341825Sdim .addBlockAddress( 2397341825Sdim I.getOperand(1).getBlockAddress(), /* Offset */ 0, 2398341825Sdim AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2399341825Sdim I.eraseFromParent(); 2400341825Sdim return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2401341825Sdim } 2402341825Sdim } 2403353358Sdim case TargetOpcode::G_INTRINSIC_TRUNC: 2404353358Sdim return selectIntrinsicTrunc(I, MRI); 2405353358Sdim case TargetOpcode::G_INTRINSIC_ROUND: 2406353358Sdim return selectIntrinsicRound(I, MRI); 2407344779Sdim case TargetOpcode::G_BUILD_VECTOR: 2408344779Sdim return selectBuildVector(I, MRI); 2409344779Sdim case TargetOpcode::G_MERGE_VALUES: 2410344779Sdim return selectMergeValues(I, MRI); 2411353358Sdim case TargetOpcode::G_UNMERGE_VALUES: 2412353358Sdim return selectUnmergeValues(I, MRI); 2413353358Sdim case TargetOpcode::G_SHUFFLE_VECTOR: 2414353358Sdim return selectShuffleVector(I, MRI); 2415353358Sdim case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2416353358Sdim return selectExtractElt(I, MRI); 2417353358Sdim case TargetOpcode::G_INSERT_VECTOR_ELT: 2418353358Sdim return selectInsertElt(I, MRI); 2419353358Sdim case TargetOpcode::G_CONCAT_VECTORS: 2420353358Sdim return selectConcatVectors(I, MRI); 2421353358Sdim case TargetOpcode::G_JUMP_TABLE: 2422353358Sdim return selectJumpTable(I, MRI); 2423341825Sdim } 2424311116Sdim 2425311116Sdim return false; 2426311116Sdim} 2427321369Sdim 2428353358Sdimbool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 2429353358Sdim MachineRegisterInfo &MRI) const { 2430353358Sdim assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 2431353358Sdim Register JTAddr = I.getOperand(0).getReg(); 2432353358Sdim unsigned JTI = I.getOperand(1).getIndex(); 2433353358Sdim Register Index = I.getOperand(2).getReg(); 2434353358Sdim MachineIRBuilder MIB(I); 2435344779Sdim 2436353358Sdim Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2437353358Sdim Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 2438353358Sdim MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg}, 2439353358Sdim {JTAddr, Index}) 2440353358Sdim .addJumpTableIndex(JTI); 2441344779Sdim 2442353358Sdim // Build the indirect branch. 2443353358Sdim MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 2444353358Sdim I.eraseFromParent(); 2445353358Sdim return true; 2446353358Sdim} 2447353358Sdim 2448353358Sdimbool AArch64InstructionSelector::selectJumpTable( 2449353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2450353358Sdim assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 2451353358Sdim assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 2452353358Sdim 2453353358Sdim Register DstReg = I.getOperand(0).getReg(); 2454353358Sdim unsigned JTI = I.getOperand(1).getIndex(); 2455353358Sdim // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 2456353358Sdim MachineIRBuilder MIB(I); 2457353358Sdim auto MovMI = 2458353358Sdim MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 2459353358Sdim .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 2460353358Sdim .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2461353358Sdim I.eraseFromParent(); 2462353358Sdim return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2463353358Sdim} 2464353358Sdim 2465360784Sdimbool AArch64InstructionSelector::selectTLSGlobalValue( 2466360784Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2467360784Sdim if (!STI.isTargetMachO()) 2468360784Sdim return false; 2469360784Sdim MachineFunction &MF = *I.getParent()->getParent(); 2470360784Sdim MF.getFrameInfo().setAdjustsStack(true); 2471360784Sdim 2472360784Sdim const GlobalValue &GV = *I.getOperand(1).getGlobal(); 2473360784Sdim MachineIRBuilder MIB(I); 2474360784Sdim 2475360784Sdim MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) 2476360784Sdim .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 2477360784Sdim 2478360784Sdim auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 2479360784Sdim {Register(AArch64::X0)}) 2480360784Sdim .addImm(0); 2481360784Sdim 2482360784Sdim // TLS calls preserve all registers except those that absolutely must be 2483360784Sdim // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 2484360784Sdim // silly). 2485360784Sdim MIB.buildInstr(AArch64::BLR, {}, {Load}) 2486360784Sdim .addDef(AArch64::X0, RegState::Implicit) 2487360784Sdim .addRegMask(TRI.getTLSCallPreservedMask()); 2488360784Sdim 2489360784Sdim MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 2490360784Sdim RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 2491360784Sdim MRI); 2492360784Sdim I.eraseFromParent(); 2493360784Sdim return true; 2494360784Sdim} 2495360784Sdim 2496353358Sdimbool AArch64InstructionSelector::selectIntrinsicTrunc( 2497353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2498353358Sdim const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2499353358Sdim 2500353358Sdim // Select the correct opcode. 2501353358Sdim unsigned Opc = 0; 2502353358Sdim if (!SrcTy.isVector()) { 2503353358Sdim switch (SrcTy.getSizeInBits()) { 2504353358Sdim default: 2505353358Sdim case 16: 2506353358Sdim Opc = AArch64::FRINTZHr; 2507353358Sdim break; 2508353358Sdim case 32: 2509353358Sdim Opc = AArch64::FRINTZSr; 2510353358Sdim break; 2511353358Sdim case 64: 2512353358Sdim Opc = AArch64::FRINTZDr; 2513353358Sdim break; 2514353358Sdim } 2515353358Sdim } else { 2516353358Sdim unsigned NumElts = SrcTy.getNumElements(); 2517353358Sdim switch (SrcTy.getElementType().getSizeInBits()) { 2518353358Sdim default: 2519353358Sdim break; 2520353358Sdim case 16: 2521353358Sdim if (NumElts == 4) 2522353358Sdim Opc = AArch64::FRINTZv4f16; 2523353358Sdim else if (NumElts == 8) 2524353358Sdim Opc = AArch64::FRINTZv8f16; 2525353358Sdim break; 2526353358Sdim case 32: 2527353358Sdim if (NumElts == 2) 2528353358Sdim Opc = AArch64::FRINTZv2f32; 2529353358Sdim else if (NumElts == 4) 2530353358Sdim Opc = AArch64::FRINTZv4f32; 2531353358Sdim break; 2532353358Sdim case 64: 2533353358Sdim if (NumElts == 2) 2534353358Sdim Opc = AArch64::FRINTZv2f64; 2535353358Sdim break; 2536353358Sdim } 2537353358Sdim } 2538353358Sdim 2539353358Sdim if (!Opc) { 2540353358Sdim // Didn't get an opcode above, bail. 2541353358Sdim LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 2542353358Sdim return false; 2543353358Sdim } 2544353358Sdim 2545353358Sdim // Legalization would have set us up perfectly for this; we just need to 2546353358Sdim // set the opcode and move on. 2547353358Sdim I.setDesc(TII.get(Opc)); 2548353358Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2549353358Sdim} 2550353358Sdim 2551353358Sdimbool AArch64InstructionSelector::selectIntrinsicRound( 2552353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2553353358Sdim const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2554353358Sdim 2555353358Sdim // Select the correct opcode. 2556353358Sdim unsigned Opc = 0; 2557353358Sdim if (!SrcTy.isVector()) { 2558353358Sdim switch (SrcTy.getSizeInBits()) { 2559353358Sdim default: 2560353358Sdim case 16: 2561353358Sdim Opc = AArch64::FRINTAHr; 2562353358Sdim break; 2563353358Sdim case 32: 2564353358Sdim Opc = AArch64::FRINTASr; 2565353358Sdim break; 2566353358Sdim case 64: 2567353358Sdim Opc = AArch64::FRINTADr; 2568353358Sdim break; 2569353358Sdim } 2570353358Sdim } else { 2571353358Sdim unsigned NumElts = SrcTy.getNumElements(); 2572353358Sdim switch (SrcTy.getElementType().getSizeInBits()) { 2573353358Sdim default: 2574353358Sdim break; 2575353358Sdim case 16: 2576353358Sdim if (NumElts == 4) 2577353358Sdim Opc = AArch64::FRINTAv4f16; 2578353358Sdim else if (NumElts == 8) 2579353358Sdim Opc = AArch64::FRINTAv8f16; 2580353358Sdim break; 2581353358Sdim case 32: 2582353358Sdim if (NumElts == 2) 2583353358Sdim Opc = AArch64::FRINTAv2f32; 2584353358Sdim else if (NumElts == 4) 2585353358Sdim Opc = AArch64::FRINTAv4f32; 2586353358Sdim break; 2587353358Sdim case 64: 2588353358Sdim if (NumElts == 2) 2589353358Sdim Opc = AArch64::FRINTAv2f64; 2590353358Sdim break; 2591353358Sdim } 2592353358Sdim } 2593353358Sdim 2594353358Sdim if (!Opc) { 2595353358Sdim // Didn't get an opcode above, bail. 2596353358Sdim LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 2597353358Sdim return false; 2598353358Sdim } 2599353358Sdim 2600353358Sdim // Legalization would have set us up perfectly for this; we just need to 2601353358Sdim // set the opcode and move on. 2602353358Sdim I.setDesc(TII.get(Opc)); 2603353358Sdim return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2604353358Sdim} 2605353358Sdim 2606353358Sdimbool AArch64InstructionSelector::selectVectorICmp( 2607353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2608353358Sdim Register DstReg = I.getOperand(0).getReg(); 2609353358Sdim LLT DstTy = MRI.getType(DstReg); 2610353358Sdim Register SrcReg = I.getOperand(2).getReg(); 2611353358Sdim Register Src2Reg = I.getOperand(3).getReg(); 2612353358Sdim LLT SrcTy = MRI.getType(SrcReg); 2613353358Sdim 2614353358Sdim unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 2615353358Sdim unsigned NumElts = DstTy.getNumElements(); 2616353358Sdim 2617353358Sdim // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 2618353358Sdim // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 2619353358Sdim // Third index is cc opcode: 2620353358Sdim // 0 == eq 2621353358Sdim // 1 == ugt 2622353358Sdim // 2 == uge 2623353358Sdim // 3 == ult 2624353358Sdim // 4 == ule 2625353358Sdim // 5 == sgt 2626353358Sdim // 6 == sge 2627353358Sdim // 7 == slt 2628353358Sdim // 8 == sle 2629353358Sdim // ne is done by negating 'eq' result. 2630353358Sdim 2631353358Sdim // This table below assumes that for some comparisons the operands will be 2632353358Sdim // commuted. 2633353358Sdim // ult op == commute + ugt op 2634353358Sdim // ule op == commute + uge op 2635353358Sdim // slt op == commute + sgt op 2636353358Sdim // sle op == commute + sge op 2637353358Sdim unsigned PredIdx = 0; 2638353358Sdim bool SwapOperands = false; 2639353358Sdim CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 2640353358Sdim switch (Pred) { 2641353358Sdim case CmpInst::ICMP_NE: 2642353358Sdim case CmpInst::ICMP_EQ: 2643353358Sdim PredIdx = 0; 2644353358Sdim break; 2645353358Sdim case CmpInst::ICMP_UGT: 2646353358Sdim PredIdx = 1; 2647353358Sdim break; 2648353358Sdim case CmpInst::ICMP_UGE: 2649353358Sdim PredIdx = 2; 2650353358Sdim break; 2651353358Sdim case CmpInst::ICMP_ULT: 2652353358Sdim PredIdx = 3; 2653353358Sdim SwapOperands = true; 2654353358Sdim break; 2655353358Sdim case CmpInst::ICMP_ULE: 2656353358Sdim PredIdx = 4; 2657353358Sdim SwapOperands = true; 2658353358Sdim break; 2659353358Sdim case CmpInst::ICMP_SGT: 2660353358Sdim PredIdx = 5; 2661353358Sdim break; 2662353358Sdim case CmpInst::ICMP_SGE: 2663353358Sdim PredIdx = 6; 2664353358Sdim break; 2665353358Sdim case CmpInst::ICMP_SLT: 2666353358Sdim PredIdx = 7; 2667353358Sdim SwapOperands = true; 2668353358Sdim break; 2669353358Sdim case CmpInst::ICMP_SLE: 2670353358Sdim PredIdx = 8; 2671353358Sdim SwapOperands = true; 2672353358Sdim break; 2673353358Sdim default: 2674353358Sdim llvm_unreachable("Unhandled icmp predicate"); 2675353358Sdim return false; 2676353358Sdim } 2677353358Sdim 2678353358Sdim // This table obviously should be tablegen'd when we have our GISel native 2679353358Sdim // tablegen selector. 2680353358Sdim 2681353358Sdim static const unsigned OpcTable[4][4][9] = { 2682353358Sdim { 2683353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2684353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2685353358Sdim 0 /* invalid */}, 2686353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2687353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2688353358Sdim 0 /* invalid */}, 2689353358Sdim {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 2690353358Sdim AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 2691353358Sdim AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 2692353358Sdim {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 2693353358Sdim AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 2694353358Sdim AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 2695353358Sdim }, 2696353358Sdim { 2697353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2698353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2699353358Sdim 0 /* invalid */}, 2700353358Sdim {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 2701353358Sdim AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 2702353358Sdim AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 2703353358Sdim {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 2704353358Sdim AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 2705353358Sdim AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 2706353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2707353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2708353358Sdim 0 /* invalid */} 2709353358Sdim }, 2710353358Sdim { 2711353358Sdim {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 2712353358Sdim AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 2713353358Sdim AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 2714353358Sdim {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 2715353358Sdim AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 2716353358Sdim AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 2717353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2718353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2719353358Sdim 0 /* invalid */}, 2720353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2721353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2722353358Sdim 0 /* invalid */} 2723353358Sdim }, 2724353358Sdim { 2725353358Sdim {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 2726353358Sdim AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 2727353358Sdim AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 2728353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2729353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2730353358Sdim 0 /* invalid */}, 2731353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2732353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2733353358Sdim 0 /* invalid */}, 2734353358Sdim {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2735353358Sdim 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2736353358Sdim 0 /* invalid */} 2737353358Sdim }, 2738353358Sdim }; 2739353358Sdim unsigned EltIdx = Log2_32(SrcEltSize / 8); 2740353358Sdim unsigned NumEltsIdx = Log2_32(NumElts / 2); 2741353358Sdim unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 2742353358Sdim if (!Opc) { 2743353358Sdim LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 2744353358Sdim return false; 2745353358Sdim } 2746353358Sdim 2747353358Sdim const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2748353358Sdim const TargetRegisterClass *SrcRC = 2749353358Sdim getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 2750353358Sdim if (!SrcRC) { 2751353358Sdim LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 2752353358Sdim return false; 2753353358Sdim } 2754353358Sdim 2755353358Sdim unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 2756353358Sdim if (SrcTy.getSizeInBits() == 128) 2757353358Sdim NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 2758353358Sdim 2759353358Sdim if (SwapOperands) 2760353358Sdim std::swap(SrcReg, Src2Reg); 2761353358Sdim 2762353358Sdim MachineIRBuilder MIB(I); 2763353358Sdim auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 2764353358Sdim constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 2765353358Sdim 2766353358Sdim // Invert if we had a 'ne' cc. 2767353358Sdim if (NotOpc) { 2768353358Sdim Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 2769353358Sdim constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 2770353358Sdim } else { 2771353358Sdim MIB.buildCopy(DstReg, Cmp.getReg(0)); 2772353358Sdim } 2773353358Sdim RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 2774353358Sdim I.eraseFromParent(); 2775353358Sdim return true; 2776353358Sdim} 2777353358Sdim 2778353358SdimMachineInstr *AArch64InstructionSelector::emitScalarToVector( 2779353358Sdim unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 2780353358Sdim MachineIRBuilder &MIRBuilder) const { 2781353358Sdim auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 2782353358Sdim 2783344779Sdim auto BuildFn = [&](unsigned SubregIndex) { 2784353358Sdim auto Ins = 2785353358Sdim MIRBuilder 2786353358Sdim .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 2787353358Sdim .addImm(SubregIndex); 2788353358Sdim constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 2789353358Sdim constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 2790353358Sdim return &*Ins; 2791344779Sdim }; 2792344779Sdim 2793353358Sdim switch (EltSize) { 2794353358Sdim case 16: 2795353358Sdim return BuildFn(AArch64::hsub); 2796344779Sdim case 32: 2797344779Sdim return BuildFn(AArch64::ssub); 2798344779Sdim case 64: 2799344779Sdim return BuildFn(AArch64::dsub); 2800344779Sdim default: 2801353358Sdim return nullptr; 2802344779Sdim } 2803344779Sdim} 2804344779Sdim 2805344779Sdimbool AArch64InstructionSelector::selectMergeValues( 2806344779Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2807344779Sdim assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 2808344779Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2809344779Sdim const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2810344779Sdim assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 2811360784Sdim const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 2812344779Sdim 2813344779Sdim if (I.getNumOperands() != 3) 2814344779Sdim return false; 2815360784Sdim 2816360784Sdim // Merging 2 s64s into an s128. 2817360784Sdim if (DstTy == LLT::scalar(128)) { 2818360784Sdim if (SrcTy.getSizeInBits() != 64) 2819360784Sdim return false; 2820360784Sdim MachineIRBuilder MIB(I); 2821360784Sdim Register DstReg = I.getOperand(0).getReg(); 2822360784Sdim Register Src1Reg = I.getOperand(1).getReg(); 2823360784Sdim Register Src2Reg = I.getOperand(2).getReg(); 2824360784Sdim auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 2825360784Sdim MachineInstr *InsMI = 2826360784Sdim emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 2827360784Sdim if (!InsMI) 2828360784Sdim return false; 2829360784Sdim MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 2830360784Sdim Src2Reg, /* LaneIdx */ 1, RB, MIB); 2831360784Sdim if (!Ins2MI) 2832360784Sdim return false; 2833360784Sdim constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 2834360784Sdim constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 2835360784Sdim I.eraseFromParent(); 2836360784Sdim return true; 2837360784Sdim } 2838360784Sdim 2839344779Sdim if (RB.getID() != AArch64::GPRRegBankID) 2840344779Sdim return false; 2841344779Sdim 2842360784Sdim if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 2843360784Sdim return false; 2844360784Sdim 2845344779Sdim auto *DstRC = &AArch64::GPR64RegClass; 2846353358Sdim Register SubToRegDef = MRI.createVirtualRegister(DstRC); 2847344779Sdim MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 2848344779Sdim TII.get(TargetOpcode::SUBREG_TO_REG)) 2849344779Sdim .addDef(SubToRegDef) 2850344779Sdim .addImm(0) 2851344779Sdim .addUse(I.getOperand(1).getReg()) 2852344779Sdim .addImm(AArch64::sub_32); 2853353358Sdim Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 2854344779Sdim // Need to anyext the second scalar before we can use bfm 2855344779Sdim MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 2856344779Sdim TII.get(TargetOpcode::SUBREG_TO_REG)) 2857344779Sdim .addDef(SubToRegDef2) 2858344779Sdim .addImm(0) 2859344779Sdim .addUse(I.getOperand(2).getReg()) 2860344779Sdim .addImm(AArch64::sub_32); 2861344779Sdim MachineInstr &BFM = 2862344779Sdim *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 2863344779Sdim .addDef(I.getOperand(0).getReg()) 2864344779Sdim .addUse(SubToRegDef) 2865344779Sdim .addUse(SubToRegDef2) 2866344779Sdim .addImm(32) 2867344779Sdim .addImm(31); 2868344779Sdim constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 2869344779Sdim constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 2870344779Sdim constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 2871344779Sdim I.eraseFromParent(); 2872344779Sdim return true; 2873344779Sdim} 2874344779Sdim 2875353358Sdimstatic bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 2876353358Sdim const unsigned EltSize) { 2877353358Sdim // Choose a lane copy opcode and subregister based off of the size of the 2878353358Sdim // vector's elements. 2879353358Sdim switch (EltSize) { 2880353358Sdim case 16: 2881353358Sdim CopyOpc = AArch64::CPYi16; 2882353358Sdim ExtractSubReg = AArch64::hsub; 2883353358Sdim break; 2884353358Sdim case 32: 2885353358Sdim CopyOpc = AArch64::CPYi32; 2886353358Sdim ExtractSubReg = AArch64::ssub; 2887353358Sdim break; 2888353358Sdim case 64: 2889353358Sdim CopyOpc = AArch64::CPYi64; 2890353358Sdim ExtractSubReg = AArch64::dsub; 2891353358Sdim break; 2892353358Sdim default: 2893353358Sdim // Unknown size, bail out. 2894353358Sdim LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 2895353358Sdim return false; 2896353358Sdim } 2897353358Sdim return true; 2898353358Sdim} 2899353358Sdim 2900353358SdimMachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 2901353358Sdim Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 2902353358Sdim Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 2903353358Sdim MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 2904353358Sdim unsigned CopyOpc = 0; 2905353358Sdim unsigned ExtractSubReg = 0; 2906353358Sdim if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 2907353358Sdim LLVM_DEBUG( 2908353358Sdim dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 2909353358Sdim return nullptr; 2910353358Sdim } 2911353358Sdim 2912353358Sdim const TargetRegisterClass *DstRC = 2913353358Sdim getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 2914353358Sdim if (!DstRC) { 2915353358Sdim LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 2916353358Sdim return nullptr; 2917353358Sdim } 2918353358Sdim 2919353358Sdim const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 2920353358Sdim const LLT &VecTy = MRI.getType(VecReg); 2921353358Sdim const TargetRegisterClass *VecRC = 2922353358Sdim getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 2923353358Sdim if (!VecRC) { 2924353358Sdim LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 2925353358Sdim return nullptr; 2926353358Sdim } 2927353358Sdim 2928353358Sdim // The register that we're going to copy into. 2929353358Sdim Register InsertReg = VecReg; 2930353358Sdim if (!DstReg) 2931353358Sdim DstReg = MRI.createVirtualRegister(DstRC); 2932353358Sdim // If the lane index is 0, we just use a subregister COPY. 2933353358Sdim if (LaneIdx == 0) { 2934353358Sdim auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 2935353358Sdim .addReg(VecReg, 0, ExtractSubReg); 2936353358Sdim RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 2937353358Sdim return &*Copy; 2938353358Sdim } 2939353358Sdim 2940353358Sdim // Lane copies require 128-bit wide registers. If we're dealing with an 2941353358Sdim // unpacked vector, then we need to move up to that width. Insert an implicit 2942353358Sdim // def and a subregister insert to get us there. 2943353358Sdim if (VecTy.getSizeInBits() != 128) { 2944353358Sdim MachineInstr *ScalarToVector = emitScalarToVector( 2945353358Sdim VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 2946353358Sdim if (!ScalarToVector) 2947353358Sdim return nullptr; 2948353358Sdim InsertReg = ScalarToVector->getOperand(0).getReg(); 2949353358Sdim } 2950353358Sdim 2951353358Sdim MachineInstr *LaneCopyMI = 2952353358Sdim MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 2953353358Sdim constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 2954353358Sdim 2955353358Sdim // Make sure that we actually constrain the initial copy. 2956353358Sdim RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 2957353358Sdim return LaneCopyMI; 2958353358Sdim} 2959353358Sdim 2960353358Sdimbool AArch64InstructionSelector::selectExtractElt( 2961344779Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 2962353358Sdim assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 2963353358Sdim "unexpected opcode!"); 2964353358Sdim Register DstReg = I.getOperand(0).getReg(); 2965353358Sdim const LLT NarrowTy = MRI.getType(DstReg); 2966353358Sdim const Register SrcReg = I.getOperand(1).getReg(); 2967353358Sdim const LLT WideTy = MRI.getType(SrcReg); 2968353358Sdim (void)WideTy; 2969353358Sdim assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 2970353358Sdim "source register size too small!"); 2971353358Sdim assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); 2972353358Sdim 2973353358Sdim // Need the lane index to determine the correct copy opcode. 2974353358Sdim MachineOperand &LaneIdxOp = I.getOperand(2); 2975353358Sdim assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 2976353358Sdim 2977353358Sdim if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 2978353358Sdim LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 2979353358Sdim return false; 2980353358Sdim } 2981353358Sdim 2982353358Sdim // Find the index to extract from. 2983353358Sdim auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 2984353358Sdim if (!VRegAndVal) 2985353358Sdim return false; 2986353358Sdim unsigned LaneIdx = VRegAndVal->Value; 2987353358Sdim 2988353358Sdim MachineIRBuilder MIRBuilder(I); 2989353358Sdim 2990353358Sdim const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2991353358Sdim MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 2992353358Sdim LaneIdx, MIRBuilder); 2993353358Sdim if (!Extract) 2994353358Sdim return false; 2995353358Sdim 2996353358Sdim I.eraseFromParent(); 2997353358Sdim return true; 2998353358Sdim} 2999353358Sdim 3000353358Sdimbool AArch64InstructionSelector::selectSplitVectorUnmerge( 3001353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3002353358Sdim unsigned NumElts = I.getNumOperands() - 1; 3003353358Sdim Register SrcReg = I.getOperand(NumElts).getReg(); 3004353358Sdim const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3005353358Sdim const LLT SrcTy = MRI.getType(SrcReg); 3006353358Sdim 3007353358Sdim assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3008353358Sdim if (SrcTy.getSizeInBits() > 128) { 3009353358Sdim LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3010353358Sdim return false; 3011353358Sdim } 3012353358Sdim 3013353358Sdim MachineIRBuilder MIB(I); 3014353358Sdim 3015353358Sdim // We implement a split vector operation by treating the sub-vectors as 3016353358Sdim // scalars and extracting them. 3017353358Sdim const RegisterBank &DstRB = 3018353358Sdim *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3019353358Sdim for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3020353358Sdim Register Dst = I.getOperand(OpIdx).getReg(); 3021353358Sdim MachineInstr *Extract = 3022353358Sdim emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3023353358Sdim if (!Extract) 3024353358Sdim return false; 3025353358Sdim } 3026353358Sdim I.eraseFromParent(); 3027353358Sdim return true; 3028353358Sdim} 3029353358Sdim 3030353358Sdimbool AArch64InstructionSelector::selectUnmergeValues( 3031353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3032353358Sdim assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 3033353358Sdim "unexpected opcode"); 3034353358Sdim 3035353358Sdim // TODO: Handle unmerging into GPRs and from scalars to scalars. 3036353358Sdim if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 3037353358Sdim AArch64::FPRRegBankID || 3038353358Sdim RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3039353358Sdim AArch64::FPRRegBankID) { 3040353358Sdim LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 3041353358Sdim "currently unsupported.\n"); 3042353358Sdim return false; 3043353358Sdim } 3044353358Sdim 3045353358Sdim // The last operand is the vector source register, and every other operand is 3046353358Sdim // a register to unpack into. 3047353358Sdim unsigned NumElts = I.getNumOperands() - 1; 3048353358Sdim Register SrcReg = I.getOperand(NumElts).getReg(); 3049353358Sdim const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3050353358Sdim const LLT WideTy = MRI.getType(SrcReg); 3051353358Sdim (void)WideTy; 3052360784Sdim assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 3053360784Sdim "can only unmerge from vector or s128 types!"); 3054353358Sdim assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 3055353358Sdim "source register size too small!"); 3056353358Sdim 3057353358Sdim if (!NarrowTy.isScalar()) 3058353358Sdim return selectSplitVectorUnmerge(I, MRI); 3059353358Sdim 3060353358Sdim MachineIRBuilder MIB(I); 3061353358Sdim 3062353358Sdim // Choose a lane copy opcode and subregister based off of the size of the 3063353358Sdim // vector's elements. 3064353358Sdim unsigned CopyOpc = 0; 3065353358Sdim unsigned ExtractSubReg = 0; 3066353358Sdim if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 3067353358Sdim return false; 3068353358Sdim 3069353358Sdim // Set up for the lane copies. 3070353358Sdim MachineBasicBlock &MBB = *I.getParent(); 3071353358Sdim 3072353358Sdim // Stores the registers we'll be copying from. 3073353358Sdim SmallVector<Register, 4> InsertRegs; 3074353358Sdim 3075353358Sdim // We'll use the first register twice, so we only need NumElts-1 registers. 3076353358Sdim unsigned NumInsertRegs = NumElts - 1; 3077353358Sdim 3078353358Sdim // If our elements fit into exactly 128 bits, then we can copy from the source 3079353358Sdim // directly. Otherwise, we need to do a bit of setup with some subregister 3080353358Sdim // inserts. 3081353358Sdim if (NarrowTy.getSizeInBits() * NumElts == 128) { 3082353358Sdim InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 3083353358Sdim } else { 3084353358Sdim // No. We have to perform subregister inserts. For each insert, create an 3085353358Sdim // implicit def and a subregister insert, and save the register we create. 3086353358Sdim for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 3087353358Sdim Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3088353358Sdim MachineInstr &ImpDefMI = 3089353358Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 3090353358Sdim ImpDefReg); 3091353358Sdim 3092353358Sdim // Now, create the subregister insert from SrcReg. 3093353358Sdim Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3094353358Sdim MachineInstr &InsMI = 3095353358Sdim *BuildMI(MBB, I, I.getDebugLoc(), 3096353358Sdim TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 3097353358Sdim .addUse(ImpDefReg) 3098353358Sdim .addUse(SrcReg) 3099353358Sdim .addImm(AArch64::dsub); 3100353358Sdim 3101353358Sdim constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 3102353358Sdim constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 3103353358Sdim 3104353358Sdim // Save the register so that we can copy from it after. 3105353358Sdim InsertRegs.push_back(InsertReg); 3106353358Sdim } 3107353358Sdim } 3108353358Sdim 3109353358Sdim // Now that we've created any necessary subregister inserts, we can 3110353358Sdim // create the copies. 3111353358Sdim // 3112353358Sdim // Perform the first copy separately as a subregister copy. 3113353358Sdim Register CopyTo = I.getOperand(0).getReg(); 3114353358Sdim auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 3115353358Sdim .addReg(InsertRegs[0], 0, ExtractSubReg); 3116353358Sdim constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 3117353358Sdim 3118353358Sdim // Now, perform the remaining copies as vector lane copies. 3119353358Sdim unsigned LaneIdx = 1; 3120353358Sdim for (Register InsReg : InsertRegs) { 3121353358Sdim Register CopyTo = I.getOperand(LaneIdx).getReg(); 3122353358Sdim MachineInstr &CopyInst = 3123353358Sdim *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 3124353358Sdim .addUse(InsReg) 3125353358Sdim .addImm(LaneIdx); 3126353358Sdim constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 3127353358Sdim ++LaneIdx; 3128353358Sdim } 3129353358Sdim 3130353358Sdim // Separately constrain the first copy's destination. Because of the 3131353358Sdim // limitation in constrainOperandRegClass, we can't guarantee that this will 3132353358Sdim // actually be constrained. So, do it ourselves using the second operand. 3133353358Sdim const TargetRegisterClass *RC = 3134353358Sdim MRI.getRegClassOrNull(I.getOperand(1).getReg()); 3135353358Sdim if (!RC) { 3136353358Sdim LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 3137353358Sdim return false; 3138353358Sdim } 3139353358Sdim 3140353358Sdim RBI.constrainGenericRegister(CopyTo, *RC, MRI); 3141353358Sdim I.eraseFromParent(); 3142353358Sdim return true; 3143353358Sdim} 3144353358Sdim 3145353358Sdimbool AArch64InstructionSelector::selectConcatVectors( 3146353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3147353358Sdim assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 3148353358Sdim "Unexpected opcode"); 3149353358Sdim Register Dst = I.getOperand(0).getReg(); 3150353358Sdim Register Op1 = I.getOperand(1).getReg(); 3151353358Sdim Register Op2 = I.getOperand(2).getReg(); 3152353358Sdim MachineIRBuilder MIRBuilder(I); 3153353358Sdim MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); 3154353358Sdim if (!ConcatMI) 3155353358Sdim return false; 3156353358Sdim I.eraseFromParent(); 3157353358Sdim return true; 3158353358Sdim} 3159353358Sdim 3160353358Sdimunsigned 3161353358SdimAArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal, 3162353358Sdim MachineFunction &MF) const { 3163353358Sdim Type *CPTy = CPVal->getType(); 3164353358Sdim unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy); 3165353358Sdim if (Align == 0) 3166353358Sdim Align = MF.getDataLayout().getTypeAllocSize(CPTy); 3167353358Sdim 3168353358Sdim MachineConstantPool *MCP = MF.getConstantPool(); 3169353358Sdim return MCP->getConstantPoolIndex(CPVal, Align); 3170353358Sdim} 3171353358Sdim 3172353358SdimMachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 3173353358Sdim Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 3174353358Sdim unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); 3175353358Sdim 3176353358Sdim auto Adrp = 3177353358Sdim MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 3178353358Sdim .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 3179353358Sdim 3180353358Sdim MachineInstr *LoadMI = nullptr; 3181353358Sdim switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) { 3182353358Sdim case 16: 3183353358Sdim LoadMI = 3184353358Sdim &*MIRBuilder 3185353358Sdim .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 3186353358Sdim .addConstantPoolIndex(CPIdx, 0, 3187353358Sdim AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3188353358Sdim break; 3189353358Sdim case 8: 3190353358Sdim LoadMI = &*MIRBuilder 3191353358Sdim .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 3192353358Sdim .addConstantPoolIndex( 3193353358Sdim CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3194353358Sdim break; 3195353358Sdim default: 3196353358Sdim LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 3197353358Sdim << *CPVal->getType()); 3198353358Sdim return nullptr; 3199353358Sdim } 3200353358Sdim constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 3201353358Sdim constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 3202353358Sdim return LoadMI; 3203353358Sdim} 3204353358Sdim 3205353358Sdim/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 3206353358Sdim/// size and RB. 3207353358Sdimstatic std::pair<unsigned, unsigned> 3208353358SdimgetInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 3209353358Sdim unsigned Opc, SubregIdx; 3210344779Sdim if (RB.getID() == AArch64::GPRRegBankID) { 3211344779Sdim if (EltSize == 32) { 3212344779Sdim Opc = AArch64::INSvi32gpr; 3213344779Sdim SubregIdx = AArch64::ssub; 3214353358Sdim } else if (EltSize == 64) { 3215344779Sdim Opc = AArch64::INSvi64gpr; 3216344779Sdim SubregIdx = AArch64::dsub; 3217353358Sdim } else { 3218353358Sdim llvm_unreachable("invalid elt size!"); 3219344779Sdim } 3220344779Sdim } else { 3221353358Sdim if (EltSize == 8) { 3222353358Sdim Opc = AArch64::INSvi8lane; 3223353358Sdim SubregIdx = AArch64::bsub; 3224353358Sdim } else if (EltSize == 16) { 3225353358Sdim Opc = AArch64::INSvi16lane; 3226353358Sdim SubregIdx = AArch64::hsub; 3227353358Sdim } else if (EltSize == 32) { 3228344779Sdim Opc = AArch64::INSvi32lane; 3229344779Sdim SubregIdx = AArch64::ssub; 3230353358Sdim } else if (EltSize == 64) { 3231344779Sdim Opc = AArch64::INSvi64lane; 3232344779Sdim SubregIdx = AArch64::dsub; 3233353358Sdim } else { 3234353358Sdim llvm_unreachable("invalid elt size!"); 3235344779Sdim } 3236344779Sdim } 3237353358Sdim return std::make_pair(Opc, SubregIdx); 3238353358Sdim} 3239344779Sdim 3240353358SdimMachineInstr * 3241360784SdimAArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 3242360784Sdim MachineOperand &RHS, 3243360784Sdim MachineIRBuilder &MIRBuilder) const { 3244360784Sdim assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3245360784Sdim MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3246360784Sdim static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, 3247360784Sdim {AArch64::ADDWrr, AArch64::ADDWri}}; 3248360784Sdim bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; 3249360784Sdim auto ImmFns = selectArithImmed(RHS); 3250360784Sdim unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3251360784Sdim auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()}); 3252360784Sdim 3253360784Sdim // If we matched a valid constant immediate, add those operands. 3254360784Sdim if (ImmFns) { 3255360784Sdim for (auto &RenderFn : *ImmFns) 3256360784Sdim RenderFn(AddMI); 3257360784Sdim } else { 3258360784Sdim AddMI.addUse(RHS.getReg()); 3259360784Sdim } 3260360784Sdim 3261360784Sdim constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); 3262360784Sdim return &*AddMI; 3263360784Sdim} 3264360784Sdim 3265360784SdimMachineInstr * 3266353358SdimAArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 3267353358Sdim MachineIRBuilder &MIRBuilder) const { 3268353358Sdim assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3269353358Sdim MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3270353358Sdim static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri}, 3271353358Sdim {AArch64::ADDSWrr, AArch64::ADDSWri}}; 3272353358Sdim bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 3273353358Sdim auto ImmFns = selectArithImmed(RHS); 3274353358Sdim unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3275353358Sdim Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3276344779Sdim 3277353358Sdim auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 3278353358Sdim 3279353358Sdim // If we matched a valid constant immediate, add those operands. 3280353358Sdim if (ImmFns) { 3281353358Sdim for (auto &RenderFn : *ImmFns) 3282353358Sdim RenderFn(CmpMI); 3283353358Sdim } else { 3284353358Sdim CmpMI.addUse(RHS.getReg()); 3285353358Sdim } 3286353358Sdim 3287353358Sdim constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3288353358Sdim return &*CmpMI; 3289353358Sdim} 3290353358Sdim 3291353358SdimMachineInstr * 3292353358SdimAArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, 3293353358Sdim MachineIRBuilder &MIRBuilder) const { 3294353358Sdim MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3295353358Sdim unsigned RegSize = MRI.getType(LHS).getSizeInBits(); 3296353358Sdim bool Is32Bit = (RegSize == 32); 3297353358Sdim static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, 3298353358Sdim {AArch64::ANDSWrr, AArch64::ANDSWri}}; 3299353358Sdim Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3300353358Sdim 3301353358Sdim // We might be able to fold in an immediate into the TST. We need to make sure 3302353358Sdim // it's a logical immediate though, since ANDS requires that. 3303353358Sdim auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 3304353358Sdim bool IsImmForm = ValAndVReg.hasValue() && 3305353358Sdim AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); 3306353358Sdim unsigned Opc = OpcTable[Is32Bit][IsImmForm]; 3307353358Sdim auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3308353358Sdim 3309353358Sdim if (IsImmForm) 3310353358Sdim TstMI.addImm( 3311353358Sdim AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); 3312353358Sdim else 3313353358Sdim TstMI.addUse(RHS); 3314353358Sdim 3315353358Sdim constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3316353358Sdim return &*TstMI; 3317353358Sdim} 3318353358Sdim 3319353358SdimMachineInstr *AArch64InstructionSelector::emitIntegerCompare( 3320353358Sdim MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3321353358Sdim MachineIRBuilder &MIRBuilder) const { 3322353358Sdim assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3323353358Sdim MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3324353358Sdim 3325353358Sdim // Fold the compare if possible. 3326353358Sdim MachineInstr *FoldCmp = 3327353358Sdim tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); 3328353358Sdim if (FoldCmp) 3329353358Sdim return FoldCmp; 3330353358Sdim 3331353358Sdim // Can't fold into a CMN. Just emit a normal compare. 3332353358Sdim unsigned CmpOpc = 0; 3333353358Sdim Register ZReg; 3334353358Sdim 3335353358Sdim LLT CmpTy = MRI.getType(LHS.getReg()); 3336353358Sdim assert((CmpTy.isScalar() || CmpTy.isPointer()) && 3337353358Sdim "Expected scalar or pointer"); 3338353358Sdim if (CmpTy == LLT::scalar(32)) { 3339353358Sdim CmpOpc = AArch64::SUBSWrr; 3340353358Sdim ZReg = AArch64::WZR; 3341353358Sdim } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { 3342353358Sdim CmpOpc = AArch64::SUBSXrr; 3343353358Sdim ZReg = AArch64::XZR; 3344353358Sdim } else { 3345353358Sdim return nullptr; 3346353358Sdim } 3347353358Sdim 3348353358Sdim // Try to match immediate forms. 3349353358Sdim auto ImmFns = selectArithImmed(RHS); 3350353358Sdim if (ImmFns) 3351353358Sdim CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; 3352353358Sdim 3353353358Sdim auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg()); 3354353358Sdim // If we matched a valid constant immediate, add those operands. 3355353358Sdim if (ImmFns) { 3356353358Sdim for (auto &RenderFn : *ImmFns) 3357353358Sdim RenderFn(CmpMI); 3358353358Sdim } else { 3359353358Sdim CmpMI.addUse(RHS.getReg()); 3360353358Sdim } 3361353358Sdim 3362353358Sdim // Make sure that we can constrain the compare that we emitted. 3363353358Sdim constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3364353358Sdim return &*CmpMI; 3365353358Sdim} 3366353358Sdim 3367353358SdimMachineInstr *AArch64InstructionSelector::emitVectorConcat( 3368353358Sdim Optional<Register> Dst, Register Op1, Register Op2, 3369353358Sdim MachineIRBuilder &MIRBuilder) const { 3370353358Sdim // We implement a vector concat by: 3371353358Sdim // 1. Use scalar_to_vector to insert the lower vector into the larger dest 3372353358Sdim // 2. Insert the upper vector into the destination's upper element 3373353358Sdim // TODO: some of this code is common with G_BUILD_VECTOR handling. 3374353358Sdim MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3375353358Sdim 3376353358Sdim const LLT Op1Ty = MRI.getType(Op1); 3377353358Sdim const LLT Op2Ty = MRI.getType(Op2); 3378353358Sdim 3379353358Sdim if (Op1Ty != Op2Ty) { 3380353358Sdim LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 3381353358Sdim return nullptr; 3382353358Sdim } 3383353358Sdim assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 3384353358Sdim 3385353358Sdim if (Op1Ty.getSizeInBits() >= 128) { 3386353358Sdim LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 3387353358Sdim return nullptr; 3388353358Sdim } 3389353358Sdim 3390353358Sdim // At the moment we just support 64 bit vector concats. 3391353358Sdim if (Op1Ty.getSizeInBits() != 64) { 3392353358Sdim LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 3393353358Sdim return nullptr; 3394353358Sdim } 3395353358Sdim 3396353358Sdim const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 3397353358Sdim const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 3398353358Sdim const TargetRegisterClass *DstRC = 3399353358Sdim getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 3400353358Sdim 3401353358Sdim MachineInstr *WidenedOp1 = 3402353358Sdim emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 3403353358Sdim MachineInstr *WidenedOp2 = 3404353358Sdim emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 3405353358Sdim if (!WidenedOp1 || !WidenedOp2) { 3406353358Sdim LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 3407353358Sdim return nullptr; 3408353358Sdim } 3409353358Sdim 3410353358Sdim // Now do the insert of the upper element. 3411353358Sdim unsigned InsertOpc, InsSubRegIdx; 3412353358Sdim std::tie(InsertOpc, InsSubRegIdx) = 3413353358Sdim getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 3414353358Sdim 3415353358Sdim if (!Dst) 3416353358Sdim Dst = MRI.createVirtualRegister(DstRC); 3417353358Sdim auto InsElt = 3418353358Sdim MIRBuilder 3419353358Sdim .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 3420353358Sdim .addImm(1) /* Lane index */ 3421353358Sdim .addUse(WidenedOp2->getOperand(0).getReg()) 3422353358Sdim .addImm(0); 3423353358Sdim constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3424353358Sdim return &*InsElt; 3425353358Sdim} 3426353358Sdim 3427353358SdimMachineInstr *AArch64InstructionSelector::emitFMovForFConstant( 3428353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3429353358Sdim assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && 3430353358Sdim "Expected a G_FCONSTANT!"); 3431353358Sdim MachineOperand &ImmOp = I.getOperand(1); 3432353358Sdim unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 3433353358Sdim 3434353358Sdim // Only handle 32 and 64 bit defs for now. 3435353358Sdim if (DefSize != 32 && DefSize != 64) 3436353358Sdim return nullptr; 3437353358Sdim 3438353358Sdim // Don't handle null values using FMOV. 3439353358Sdim if (ImmOp.getFPImm()->isNullValue()) 3440353358Sdim return nullptr; 3441353358Sdim 3442353358Sdim // Get the immediate representation for the FMOV. 3443353358Sdim const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF(); 3444353358Sdim int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF) 3445353358Sdim : AArch64_AM::getFP64Imm(ImmValAPF); 3446353358Sdim 3447353358Sdim // If this is -1, it means the immediate can't be represented as the requested 3448353358Sdim // floating point value. Bail. 3449353358Sdim if (Imm == -1) 3450353358Sdim return nullptr; 3451353358Sdim 3452353358Sdim // Update MI to represent the new FMOV instruction, constrain it, and return. 3453353358Sdim ImmOp.ChangeToImmediate(Imm); 3454353358Sdim unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi; 3455353358Sdim I.setDesc(TII.get(MovOpc)); 3456353358Sdim constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3457353358Sdim return &I; 3458353358Sdim} 3459353358Sdim 3460353358SdimMachineInstr * 3461353358SdimAArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 3462353358Sdim MachineIRBuilder &MIRBuilder) const { 3463353358Sdim // CSINC increments the result when the predicate is false. Invert it. 3464353358Sdim const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 3465353358Sdim CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 3466353358Sdim auto I = 3467353358Sdim MIRBuilder 3468353358Sdim .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) 3469353358Sdim .addImm(InvCC); 3470353358Sdim constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 3471353358Sdim return &*I; 3472353358Sdim} 3473353358Sdim 3474353358Sdimbool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { 3475353358Sdim MachineIRBuilder MIB(I); 3476353358Sdim MachineRegisterInfo &MRI = *MIB.getMRI(); 3477353358Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3478353358Sdim 3479353358Sdim // We want to recognize this pattern: 3480353358Sdim // 3481353358Sdim // $z = G_FCMP pred, $x, $y 3482353358Sdim // ... 3483353358Sdim // $w = G_SELECT $z, $a, $b 3484353358Sdim // 3485353358Sdim // Where the value of $z is *only* ever used by the G_SELECT (possibly with 3486353358Sdim // some copies/truncs in between.) 3487353358Sdim // 3488353358Sdim // If we see this, then we can emit something like this: 3489353358Sdim // 3490353358Sdim // fcmp $x, $y 3491353358Sdim // fcsel $w, $a, $b, pred 3492353358Sdim // 3493353358Sdim // Rather than emitting both of the rather long sequences in the standard 3494353358Sdim // G_FCMP/G_SELECT select methods. 3495353358Sdim 3496353358Sdim // First, check if the condition is defined by a compare. 3497353358Sdim MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 3498353358Sdim while (CondDef) { 3499353358Sdim // We can only fold if all of the defs have one use. 3500353358Sdim if (!MRI.hasOneUse(CondDef->getOperand(0).getReg())) 3501353358Sdim return false; 3502353358Sdim 3503353358Sdim // We can skip over G_TRUNC since the condition is 1-bit. 3504353358Sdim // Truncating/extending can have no impact on the value. 3505353358Sdim unsigned Opc = CondDef->getOpcode(); 3506353358Sdim if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 3507353358Sdim break; 3508353358Sdim 3509353358Sdim // Can't see past copies from physregs. 3510353358Sdim if (Opc == TargetOpcode::COPY && 3511360784Sdim Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 3512353358Sdim return false; 3513353358Sdim 3514353358Sdim CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 3515353358Sdim } 3516353358Sdim 3517353358Sdim // Is the condition defined by a compare? 3518353358Sdim if (!CondDef) 3519353358Sdim return false; 3520353358Sdim 3521353358Sdim unsigned CondOpc = CondDef->getOpcode(); 3522353358Sdim if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 3523353358Sdim return false; 3524353358Sdim 3525353358Sdim AArch64CC::CondCode CondCode; 3526353358Sdim if (CondOpc == TargetOpcode::G_ICMP) { 3527353358Sdim CondCode = changeICMPPredToAArch64CC( 3528353358Sdim (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); 3529353358Sdim if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 3530353358Sdim CondDef->getOperand(1), MIB)) { 3531353358Sdim LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 3532353358Sdim return false; 3533344779Sdim } 3534353358Sdim } else { 3535353358Sdim // Get the condition code for the select. 3536353358Sdim AArch64CC::CondCode CondCode2; 3537353358Sdim changeFCMPPredToAArch64CC( 3538353358Sdim (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, 3539353358Sdim CondCode2); 3540353358Sdim 3541353358Sdim // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 3542353358Sdim // instructions to emit the comparison. 3543353358Sdim // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 3544353358Sdim // unnecessary. 3545353358Sdim if (CondCode2 != AArch64CC::AL) 3546353358Sdim return false; 3547353358Sdim 3548353358Sdim // Make sure we'll be able to select the compare. 3549353358Sdim unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); 3550353358Sdim if (!CmpOpc) 3551353358Sdim return false; 3552353358Sdim 3553353358Sdim // Emit a new compare. 3554353358Sdim auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); 3555353358Sdim if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 3556353358Sdim Cmp.addUse(CondDef->getOperand(3).getReg()); 3557353358Sdim constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3558344779Sdim } 3559353358Sdim 3560353358Sdim // Emit the select. 3561353358Sdim unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); 3562353358Sdim auto CSel = 3563353358Sdim MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()}, 3564353358Sdim {I.getOperand(2).getReg(), I.getOperand(3).getReg()}) 3565353358Sdim .addImm(CondCode); 3566353358Sdim constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); 3567344779Sdim I.eraseFromParent(); 3568344779Sdim return true; 3569344779Sdim} 3570344779Sdim 3571353358SdimMachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 3572353358Sdim MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3573353358Sdim MachineIRBuilder &MIRBuilder) const { 3574353358Sdim assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 3575353358Sdim "Unexpected MachineOperand"); 3576353358Sdim MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3577353358Sdim // We want to find this sort of thing: 3578353358Sdim // x = G_SUB 0, y 3579353358Sdim // G_ICMP z, x 3580353358Sdim // 3581353358Sdim // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 3582353358Sdim // e.g: 3583353358Sdim // 3584353358Sdim // cmn z, y 3585353358Sdim 3586353358Sdim // Helper lambda to detect the subtract followed by the compare. 3587353358Sdim // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. 3588353358Sdim auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { 3589353358Sdim if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) 3590353358Sdim return false; 3591353358Sdim 3592353358Sdim // Need to make sure NZCV is the same at the end of the transformation. 3593353358Sdim if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 3594353358Sdim return false; 3595353358Sdim 3596353358Sdim // We want to match against SUBs. 3597353358Sdim if (DefMI->getOpcode() != TargetOpcode::G_SUB) 3598353358Sdim return false; 3599353358Sdim 3600353358Sdim // Make sure that we're getting 3601353358Sdim // x = G_SUB 0, y 3602353358Sdim auto ValAndVReg = 3603353358Sdim getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); 3604353358Sdim if (!ValAndVReg || ValAndVReg->Value != 0) 3605353358Sdim return false; 3606353358Sdim 3607353358Sdim // This can safely be represented as a CMN. 3608353358Sdim return true; 3609353358Sdim }; 3610353358Sdim 3611353358Sdim // Check if the RHS or LHS of the G_ICMP is defined by a SUB 3612353358Sdim MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 3613353358Sdim MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 3614353358Sdim CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 3615353358Sdim const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); 3616353358Sdim 3617353358Sdim // Given this: 3618353358Sdim // 3619353358Sdim // x = G_SUB 0, y 3620353358Sdim // G_ICMP x, z 3621353358Sdim // 3622353358Sdim // Produce this: 3623353358Sdim // 3624353358Sdim // cmn y, z 3625353358Sdim if (IsCMN(LHSDef, CC)) 3626353358Sdim return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 3627353358Sdim 3628353358Sdim // Same idea here, but with the RHS of the compare instead: 3629353358Sdim // 3630353358Sdim // Given this: 3631353358Sdim // 3632353358Sdim // x = G_SUB 0, y 3633353358Sdim // G_ICMP z, x 3634353358Sdim // 3635353358Sdim // Produce this: 3636353358Sdim // 3637353358Sdim // cmn z, y 3638353358Sdim if (IsCMN(RHSDef, CC)) 3639353358Sdim return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 3640353358Sdim 3641353358Sdim // Given this: 3642353358Sdim // 3643353358Sdim // z = G_AND x, y 3644353358Sdim // G_ICMP z, 0 3645353358Sdim // 3646353358Sdim // Produce this if the compare is signed: 3647353358Sdim // 3648353358Sdim // tst x, y 3649353358Sdim if (!isUnsignedICMPPred(P) && LHSDef && 3650353358Sdim LHSDef->getOpcode() == TargetOpcode::G_AND) { 3651353358Sdim // Make sure that the RHS is 0. 3652353358Sdim auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 3653353358Sdim if (!ValAndVReg || ValAndVReg->Value != 0) 3654353358Sdim return nullptr; 3655353358Sdim 3656353358Sdim return emitTST(LHSDef->getOperand(1).getReg(), 3657353358Sdim LHSDef->getOperand(2).getReg(), MIRBuilder); 3658353358Sdim } 3659353358Sdim 3660353358Sdim return nullptr; 3661353358Sdim} 3662353358Sdim 3663353358Sdimbool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { 3664353358Sdim // Try to match a vector splat operation into a dup instruction. 3665353358Sdim // We're looking for this pattern: 3666353358Sdim // %scalar:gpr(s64) = COPY $x0 3667353358Sdim // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 3668353358Sdim // %cst0:gpr(s32) = G_CONSTANT i32 0 3669353358Sdim // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 3670353358Sdim // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 3671353358Sdim // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, 3672353358Sdim // %zerovec(<2 x s32>) 3673353358Sdim // 3674353358Sdim // ...into: 3675353358Sdim // %splat = DUP %scalar 3676353358Sdim // We use the regbank of the scalar to determine which kind of dup to use. 3677353358Sdim MachineIRBuilder MIB(I); 3678353358Sdim MachineRegisterInfo &MRI = *MIB.getMRI(); 3679353358Sdim const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3680353358Sdim using namespace TargetOpcode; 3681353358Sdim using namespace MIPatternMatch; 3682353358Sdim 3683353358Sdim // Begin matching the insert. 3684353358Sdim auto *InsMI = 3685353358Sdim getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI); 3686353358Sdim if (!InsMI) 3687353358Sdim return false; 3688353358Sdim // Match the undef vector operand. 3689353358Sdim auto *UndefMI = 3690353358Sdim getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI); 3691353358Sdim if (!UndefMI) 3692353358Sdim return false; 3693353358Sdim // Match the scalar being splatted. 3694353358Sdim Register ScalarReg = InsMI->getOperand(2).getReg(); 3695353358Sdim const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI); 3696353358Sdim // Match the index constant 0. 3697353358Sdim int64_t Index = 0; 3698353358Sdim if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index) 3699353358Sdim return false; 3700353358Sdim 3701353358Sdim // The shuffle's second operand doesn't matter if the mask is all zero. 3702360784Sdim ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 3703360784Sdim if (!all_of(Mask, [](int Elem) { return Elem == 0; })) 3704353358Sdim return false; 3705353358Sdim 3706353358Sdim // We're done, now find out what kind of splat we need. 3707353358Sdim LLT VecTy = MRI.getType(I.getOperand(0).getReg()); 3708353358Sdim LLT EltTy = VecTy.getElementType(); 3709360784Sdim if (EltTy.getSizeInBits() < 32) { 3710360784Sdim LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet"); 3711353358Sdim return false; 3712353358Sdim } 3713353358Sdim bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID; 3714360784Sdim unsigned Opc = 0; 3715360784Sdim if (IsFP) { 3716360784Sdim switch (EltTy.getSizeInBits()) { 3717360784Sdim case 32: 3718360784Sdim if (VecTy.getNumElements() == 2) { 3719360784Sdim Opc = AArch64::DUPv2i32lane; 3720360784Sdim } else { 3721360784Sdim Opc = AArch64::DUPv4i32lane; 3722360784Sdim assert(VecTy.getNumElements() == 4); 3723360784Sdim } 3724360784Sdim break; 3725360784Sdim case 64: 3726360784Sdim assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); 3727360784Sdim Opc = AArch64::DUPv2i64lane; 3728360784Sdim break; 3729360784Sdim } 3730360784Sdim } else { 3731360784Sdim switch (EltTy.getSizeInBits()) { 3732360784Sdim case 32: 3733360784Sdim if (VecTy.getNumElements() == 2) { 3734360784Sdim Opc = AArch64::DUPv2i32gpr; 3735360784Sdim } else { 3736360784Sdim Opc = AArch64::DUPv4i32gpr; 3737360784Sdim assert(VecTy.getNumElements() == 4); 3738360784Sdim } 3739360784Sdim break; 3740360784Sdim case 64: 3741360784Sdim assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); 3742360784Sdim Opc = AArch64::DUPv2i64gpr; 3743360784Sdim break; 3744360784Sdim } 3745360784Sdim } 3746360784Sdim assert(Opc && "Did not compute an opcode for a dup"); 3747353358Sdim 3748353358Sdim // For FP splats, we need to widen the scalar reg via undef too. 3749353358Sdim if (IsFP) { 3750353358Sdim MachineInstr *Widen = emitScalarToVector( 3751353358Sdim EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB); 3752353358Sdim if (!Widen) 3753353358Sdim return false; 3754353358Sdim ScalarReg = Widen->getOperand(0).getReg(); 3755353358Sdim } 3756353358Sdim auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg}); 3757353358Sdim if (IsFP) 3758353358Sdim Dup.addImm(0); 3759353358Sdim constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI); 3760353358Sdim I.eraseFromParent(); 3761353358Sdim return true; 3762353358Sdim} 3763353358Sdim 3764353358Sdimbool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const { 3765353358Sdim if (TM.getOptLevel() == CodeGenOpt::None) 3766353358Sdim return false; 3767353358Sdim if (tryOptVectorDup(I)) 3768353358Sdim return true; 3769353358Sdim return false; 3770353358Sdim} 3771353358Sdim 3772353358Sdimbool AArch64InstructionSelector::selectShuffleVector( 3773353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3774353358Sdim if (tryOptVectorShuffle(I)) 3775353358Sdim return true; 3776353358Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3777353358Sdim Register Src1Reg = I.getOperand(1).getReg(); 3778353358Sdim const LLT Src1Ty = MRI.getType(Src1Reg); 3779353358Sdim Register Src2Reg = I.getOperand(2).getReg(); 3780353358Sdim const LLT Src2Ty = MRI.getType(Src2Reg); 3781360784Sdim ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 3782353358Sdim 3783353358Sdim MachineBasicBlock &MBB = *I.getParent(); 3784321369Sdim MachineFunction &MF = *MBB.getParent(); 3785353358Sdim LLVMContext &Ctx = MF.getFunction().getContext(); 3786321369Sdim 3787353358Sdim // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 3788353358Sdim // it's originated from a <1 x T> type. Those should have been lowered into 3789353358Sdim // G_BUILD_VECTOR earlier. 3790353358Sdim if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 3791353358Sdim LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 3792353358Sdim return false; 3793353358Sdim } 3794353358Sdim 3795353358Sdim unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 3796353358Sdim 3797353358Sdim SmallVector<Constant *, 64> CstIdxs; 3798360784Sdim for (int Val : Mask) { 3799353358Sdim // For now, any undef indexes we'll just assume to be 0. This should be 3800353358Sdim // optimized in future, e.g. to select DUP etc. 3801360784Sdim Val = Val < 0 ? 0 : Val; 3802353358Sdim for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 3803353358Sdim unsigned Offset = Byte + Val * BytesPerElt; 3804353358Sdim CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 3805353358Sdim } 3806353358Sdim } 3807353358Sdim 3808353358Sdim MachineIRBuilder MIRBuilder(I); 3809353358Sdim 3810353358Sdim // Use a constant pool to load the index vector for TBL. 3811353358Sdim Constant *CPVal = ConstantVector::get(CstIdxs); 3812353358Sdim MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); 3813353358Sdim if (!IndexLoad) { 3814353358Sdim LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 3815353358Sdim return false; 3816353358Sdim } 3817353358Sdim 3818353358Sdim if (DstTy.getSizeInBits() != 128) { 3819353358Sdim assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 3820353358Sdim // This case can be done with TBL1. 3821353358Sdim MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); 3822353358Sdim if (!Concat) { 3823353358Sdim LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 3824353358Sdim return false; 3825353358Sdim } 3826353358Sdim 3827353358Sdim // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 3828353358Sdim IndexLoad = 3829353358Sdim emitScalarToVector(64, &AArch64::FPR128RegClass, 3830353358Sdim IndexLoad->getOperand(0).getReg(), MIRBuilder); 3831353358Sdim 3832353358Sdim auto TBL1 = MIRBuilder.buildInstr( 3833353358Sdim AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 3834353358Sdim {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 3835353358Sdim constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 3836353358Sdim 3837353358Sdim auto Copy = 3838353358Sdim MIRBuilder 3839353358Sdim .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 3840353358Sdim .addReg(TBL1.getReg(0), 0, AArch64::dsub); 3841353358Sdim RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 3842353358Sdim I.eraseFromParent(); 3843353358Sdim return true; 3844353358Sdim } 3845353358Sdim 3846353358Sdim // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 3847353358Sdim // Q registers for regalloc. 3848353358Sdim auto RegSeq = MIRBuilder 3849353358Sdim .buildInstr(TargetOpcode::REG_SEQUENCE, 3850353358Sdim {&AArch64::QQRegClass}, {Src1Reg}) 3851353358Sdim .addImm(AArch64::qsub0) 3852353358Sdim .addUse(Src2Reg) 3853353358Sdim .addImm(AArch64::qsub1); 3854353358Sdim 3855353358Sdim auto TBL2 = 3856353358Sdim MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()}, 3857353358Sdim {RegSeq, IndexLoad->getOperand(0).getReg()}); 3858353358Sdim constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); 3859353358Sdim constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 3860353358Sdim I.eraseFromParent(); 3861353358Sdim return true; 3862353358Sdim} 3863353358Sdim 3864353358SdimMachineInstr *AArch64InstructionSelector::emitLaneInsert( 3865353358Sdim Optional<Register> DstReg, Register SrcReg, Register EltReg, 3866353358Sdim unsigned LaneIdx, const RegisterBank &RB, 3867353358Sdim MachineIRBuilder &MIRBuilder) const { 3868353358Sdim MachineInstr *InsElt = nullptr; 3869353358Sdim const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 3870353358Sdim MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3871353358Sdim 3872353358Sdim // Create a register to define with the insert if one wasn't passed in. 3873353358Sdim if (!DstReg) 3874353358Sdim DstReg = MRI.createVirtualRegister(DstRC); 3875353358Sdim 3876353358Sdim unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 3877353358Sdim unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 3878353358Sdim 3879353358Sdim if (RB.getID() == AArch64::FPRRegBankID) { 3880353358Sdim auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 3881353358Sdim InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 3882353358Sdim .addImm(LaneIdx) 3883353358Sdim .addUse(InsSub->getOperand(0).getReg()) 3884353358Sdim .addImm(0); 3885353358Sdim } else { 3886353358Sdim InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 3887353358Sdim .addImm(LaneIdx) 3888353358Sdim .addUse(EltReg); 3889353358Sdim } 3890353358Sdim 3891353358Sdim constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3892353358Sdim return InsElt; 3893353358Sdim} 3894353358Sdim 3895353358Sdimbool AArch64InstructionSelector::selectInsertElt( 3896353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3897353358Sdim assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 3898353358Sdim 3899353358Sdim // Get information on the destination. 3900353358Sdim Register DstReg = I.getOperand(0).getReg(); 3901353358Sdim const LLT DstTy = MRI.getType(DstReg); 3902353358Sdim unsigned VecSize = DstTy.getSizeInBits(); 3903353358Sdim 3904353358Sdim // Get information on the element we want to insert into the destination. 3905353358Sdim Register EltReg = I.getOperand(2).getReg(); 3906353358Sdim const LLT EltTy = MRI.getType(EltReg); 3907353358Sdim unsigned EltSize = EltTy.getSizeInBits(); 3908353358Sdim if (EltSize < 16 || EltSize > 64) 3909353358Sdim return false; // Don't support all element types yet. 3910353358Sdim 3911353358Sdim // Find the definition of the index. Bail out if it's not defined by a 3912353358Sdim // G_CONSTANT. 3913353358Sdim Register IdxReg = I.getOperand(3).getReg(); 3914353358Sdim auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 3915353358Sdim if (!VRegAndVal) 3916353358Sdim return false; 3917353358Sdim unsigned LaneIdx = VRegAndVal->Value; 3918353358Sdim 3919353358Sdim // Perform the lane insert. 3920353358Sdim Register SrcReg = I.getOperand(1).getReg(); 3921353358Sdim const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 3922353358Sdim MachineIRBuilder MIRBuilder(I); 3923353358Sdim 3924353358Sdim if (VecSize < 128) { 3925353358Sdim // If the vector we're inserting into is smaller than 128 bits, widen it 3926353358Sdim // to 128 to do the insert. 3927353358Sdim MachineInstr *ScalarToVec = emitScalarToVector( 3928353358Sdim VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); 3929353358Sdim if (!ScalarToVec) 3930353358Sdim return false; 3931353358Sdim SrcReg = ScalarToVec->getOperand(0).getReg(); 3932353358Sdim } 3933353358Sdim 3934353358Sdim // Create an insert into a new FPR128 register. 3935353358Sdim // Note that if our vector is already 128 bits, we end up emitting an extra 3936353358Sdim // register. 3937353358Sdim MachineInstr *InsMI = 3938353358Sdim emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); 3939353358Sdim 3940353358Sdim if (VecSize < 128) { 3941353358Sdim // If we had to widen to perform the insert, then we have to demote back to 3942353358Sdim // the original size to get the result we want. 3943353358Sdim Register DemoteVec = InsMI->getOperand(0).getReg(); 3944353358Sdim const TargetRegisterClass *RC = 3945353358Sdim getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 3946353358Sdim if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 3947353358Sdim LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 3948353358Sdim return false; 3949353358Sdim } 3950353358Sdim unsigned SubReg = 0; 3951353358Sdim if (!getSubRegForClass(RC, TRI, SubReg)) 3952353358Sdim return false; 3953353358Sdim if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 3954353358Sdim LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 3955353358Sdim << "\n"); 3956353358Sdim return false; 3957353358Sdim } 3958353358Sdim MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 3959353358Sdim .addReg(DemoteVec, 0, SubReg); 3960353358Sdim RBI.constrainGenericRegister(DstReg, *RC, MRI); 3961353358Sdim } else { 3962353358Sdim // No widening needed. 3963353358Sdim InsMI->getOperand(0).setReg(DstReg); 3964353358Sdim constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3965353358Sdim } 3966353358Sdim 3967353358Sdim I.eraseFromParent(); 3968353358Sdim return true; 3969353358Sdim} 3970353358Sdim 3971353358Sdimbool AArch64InstructionSelector::selectBuildVector( 3972353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 3973353358Sdim assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3974353358Sdim // Until we port more of the optimized selections, for now just use a vector 3975353358Sdim // insert sequence. 3976353358Sdim const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3977353358Sdim const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 3978353358Sdim unsigned EltSize = EltTy.getSizeInBits(); 3979353358Sdim if (EltSize < 16 || EltSize > 64) 3980353358Sdim return false; // Don't support all element types yet. 3981353358Sdim const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3982353358Sdim MachineIRBuilder MIRBuilder(I); 3983353358Sdim 3984353358Sdim const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 3985353358Sdim MachineInstr *ScalarToVec = 3986353358Sdim emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 3987353358Sdim I.getOperand(1).getReg(), MIRBuilder); 3988353358Sdim if (!ScalarToVec) 3989353358Sdim return false; 3990353358Sdim 3991353358Sdim Register DstVec = ScalarToVec->getOperand(0).getReg(); 3992353358Sdim unsigned DstSize = DstTy.getSizeInBits(); 3993353358Sdim 3994353358Sdim // Keep track of the last MI we inserted. Later on, we might be able to save 3995353358Sdim // a copy using it. 3996353358Sdim MachineInstr *PrevMI = nullptr; 3997353358Sdim for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 3998353358Sdim // Note that if we don't do a subregister copy, we can end up making an 3999353358Sdim // extra register. 4000353358Sdim PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4001353358Sdim MIRBuilder); 4002353358Sdim DstVec = PrevMI->getOperand(0).getReg(); 4003353358Sdim } 4004353358Sdim 4005353358Sdim // If DstTy's size in bits is less than 128, then emit a subregister copy 4006353358Sdim // from DstVec to the last register we've defined. 4007353358Sdim if (DstSize < 128) { 4008353358Sdim // Force this to be FPR using the destination vector. 4009353358Sdim const TargetRegisterClass *RC = 4010353358Sdim getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4011353358Sdim if (!RC) 4012353358Sdim return false; 4013353358Sdim if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4014353358Sdim LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4015353358Sdim return false; 4016353358Sdim } 4017353358Sdim 4018353358Sdim unsigned SubReg = 0; 4019353358Sdim if (!getSubRegForClass(RC, TRI, SubReg)) 4020353358Sdim return false; 4021353358Sdim if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4022353358Sdim LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 4023353358Sdim << "\n"); 4024353358Sdim return false; 4025353358Sdim } 4026353358Sdim 4027353358Sdim Register Reg = MRI.createVirtualRegister(RC); 4028353358Sdim Register DstReg = I.getOperand(0).getReg(); 4029353358Sdim 4030353358Sdim MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4031353358Sdim .addReg(DstVec, 0, SubReg); 4032353358Sdim MachineOperand &RegOp = I.getOperand(1); 4033353358Sdim RegOp.setReg(Reg); 4034353358Sdim RBI.constrainGenericRegister(DstReg, *RC, MRI); 4035353358Sdim } else { 4036353358Sdim // We don't need a subregister copy. Save a copy by re-using the 4037353358Sdim // destination register on the final insert. 4038353358Sdim assert(PrevMI && "PrevMI was null?"); 4039353358Sdim PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 4040353358Sdim constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 4041353358Sdim } 4042353358Sdim 4043353358Sdim I.eraseFromParent(); 4044353358Sdim return true; 4045353358Sdim} 4046353358Sdim 4047353358Sdim/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 4048353358Sdim/// ID if it exists, and 0 otherwise. 4049353358Sdimstatic unsigned findIntrinsicID(MachineInstr &I) { 4050353358Sdim auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 4051353358Sdim return Op.isIntrinsicID(); 4052353358Sdim }); 4053353358Sdim if (IntrinOp == I.operands_end()) 4054353358Sdim return 0; 4055353358Sdim return IntrinOp->getIntrinsicID(); 4056353358Sdim} 4057353358Sdim 4058353358Sdimbool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 4059353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 4060353358Sdim // Find the intrinsic ID. 4061353358Sdim unsigned IntrinID = findIntrinsicID(I); 4062353358Sdim if (!IntrinID) 4063353358Sdim return false; 4064353358Sdim MachineIRBuilder MIRBuilder(I); 4065353358Sdim 4066353358Sdim // Select the instruction. 4067353358Sdim switch (IntrinID) { 4068353358Sdim default: 4069353358Sdim return false; 4070353358Sdim case Intrinsic::trap: 4071353358Sdim MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); 4072353358Sdim break; 4073353358Sdim case Intrinsic::debugtrap: 4074353358Sdim if (!STI.isTargetWindows()) 4075353358Sdim return false; 4076353358Sdim MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 4077353358Sdim break; 4078353358Sdim } 4079353358Sdim 4080353358Sdim I.eraseFromParent(); 4081353358Sdim return true; 4082353358Sdim} 4083353358Sdim 4084353358Sdimbool AArch64InstructionSelector::selectIntrinsic( 4085353358Sdim MachineInstr &I, MachineRegisterInfo &MRI) const { 4086353358Sdim unsigned IntrinID = findIntrinsicID(I); 4087353358Sdim if (!IntrinID) 4088353358Sdim return false; 4089353358Sdim MachineIRBuilder MIRBuilder(I); 4090353358Sdim 4091353358Sdim switch (IntrinID) { 4092353358Sdim default: 4093353358Sdim break; 4094353358Sdim case Intrinsic::aarch64_crypto_sha1h: 4095353358Sdim Register DstReg = I.getOperand(0).getReg(); 4096353358Sdim Register SrcReg = I.getOperand(2).getReg(); 4097353358Sdim 4098353358Sdim // FIXME: Should this be an assert? 4099353358Sdim if (MRI.getType(DstReg).getSizeInBits() != 32 || 4100353358Sdim MRI.getType(SrcReg).getSizeInBits() != 32) 4101353358Sdim return false; 4102353358Sdim 4103353358Sdim // The operation has to happen on FPRs. Set up some new FPR registers for 4104353358Sdim // the source and destination if they are on GPRs. 4105353358Sdim if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4106353358Sdim SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4107353358Sdim MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)}); 4108353358Sdim 4109353358Sdim // Make sure the copy ends up getting constrained properly. 4110353358Sdim RBI.constrainGenericRegister(I.getOperand(2).getReg(), 4111353358Sdim AArch64::GPR32RegClass, MRI); 4112353358Sdim } 4113353358Sdim 4114353358Sdim if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 4115353358Sdim DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4116353358Sdim 4117353358Sdim // Actually insert the instruction. 4118353358Sdim auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 4119353358Sdim constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 4120353358Sdim 4121353358Sdim // Did we create a new register for the destination? 4122353358Sdim if (DstReg != I.getOperand(0).getReg()) { 4123353358Sdim // Yep. Copy the result of the instruction back into the original 4124353358Sdim // destination. 4125353358Sdim MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg}); 4126353358Sdim RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4127353358Sdim AArch64::GPR32RegClass, MRI); 4128353358Sdim } 4129353358Sdim 4130353358Sdim I.eraseFromParent(); 4131353358Sdim return true; 4132353358Sdim } 4133353358Sdim return false; 4134353358Sdim} 4135353358Sdim 4136353358Sdimstatic Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 4137353358Sdim auto &MI = *Root.getParent(); 4138353358Sdim auto &MBB = *MI.getParent(); 4139353358Sdim auto &MF = *MBB.getParent(); 4140353358Sdim auto &MRI = MF.getRegInfo(); 4141321369Sdim uint64_t Immed; 4142321369Sdim if (Root.isImm()) 4143321369Sdim Immed = Root.getImm(); 4144321369Sdim else if (Root.isCImm()) 4145321369Sdim Immed = Root.getCImm()->getZExtValue(); 4146321369Sdim else if (Root.isReg()) { 4147353358Sdim auto ValAndVReg = 4148353358Sdim getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 4149353358Sdim if (!ValAndVReg) 4150327952Sdim return None; 4151353358Sdim Immed = ValAndVReg->Value; 4152321369Sdim } else 4153327952Sdim return None; 4154353358Sdim return Immed; 4155353358Sdim} 4156321369Sdim 4157353358SdimInstructionSelector::ComplexRendererFns 4158353358SdimAArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 4159353358Sdim auto MaybeImmed = getImmedFromMO(Root); 4160353358Sdim if (MaybeImmed == None || *MaybeImmed > 31) 4161353358Sdim return None; 4162353358Sdim uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 4163353358Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4164353358Sdim} 4165353358Sdim 4166353358SdimInstructionSelector::ComplexRendererFns 4167353358SdimAArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 4168353358Sdim auto MaybeImmed = getImmedFromMO(Root); 4169353358Sdim if (MaybeImmed == None || *MaybeImmed > 31) 4170353358Sdim return None; 4171353358Sdim uint64_t Enc = 31 - *MaybeImmed; 4172353358Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4173353358Sdim} 4174353358Sdim 4175353358SdimInstructionSelector::ComplexRendererFns 4176353358SdimAArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 4177353358Sdim auto MaybeImmed = getImmedFromMO(Root); 4178353358Sdim if (MaybeImmed == None || *MaybeImmed > 63) 4179353358Sdim return None; 4180353358Sdim uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 4181353358Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4182353358Sdim} 4183353358Sdim 4184353358SdimInstructionSelector::ComplexRendererFns 4185353358SdimAArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 4186353358Sdim auto MaybeImmed = getImmedFromMO(Root); 4187353358Sdim if (MaybeImmed == None || *MaybeImmed > 63) 4188353358Sdim return None; 4189353358Sdim uint64_t Enc = 63 - *MaybeImmed; 4190353358Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4191353358Sdim} 4192353358Sdim 4193360784Sdim/// Helper to select an immediate value that can be represented as a 12-bit 4194360784Sdim/// value shifted left by either 0 or 12. If it is possible to do so, return 4195360784Sdim/// the immediate and shift value. If not, return None. 4196360784Sdim/// 4197360784Sdim/// Used by selectArithImmed and selectNegArithImmed. 4198360784SdimInstructionSelector::ComplexRendererFns 4199360784SdimAArch64InstructionSelector::select12BitValueWithLeftShift( 4200360784Sdim uint64_t Immed) const { 4201360784Sdim unsigned ShiftAmt; 4202360784Sdim if (Immed >> 12 == 0) { 4203360784Sdim ShiftAmt = 0; 4204360784Sdim } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 4205360784Sdim ShiftAmt = 12; 4206360784Sdim Immed = Immed >> 12; 4207360784Sdim } else 4208360784Sdim return None; 4209360784Sdim 4210360784Sdim unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 4211360784Sdim return {{ 4212360784Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 4213360784Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 4214360784Sdim }}; 4215360784Sdim} 4216360784Sdim 4217353358Sdim/// SelectArithImmed - Select an immediate value that can be represented as 4218353358Sdim/// a 12-bit value shifted left by either 0 or 12. If so, return true with 4219353358Sdim/// Val set to the 12-bit value and Shift set to the shifter operand. 4220353358SdimInstructionSelector::ComplexRendererFns 4221353358SdimAArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 4222353358Sdim // This function is called from the addsub_shifted_imm ComplexPattern, 4223353358Sdim // which lists [imm] as the list of opcode it's interested in, however 4224353358Sdim // we still need to check whether the operand is actually an immediate 4225353358Sdim // here because the ComplexPattern opcode list is only used in 4226353358Sdim // root-level opcode matching. 4227353358Sdim auto MaybeImmed = getImmedFromMO(Root); 4228353358Sdim if (MaybeImmed == None) 4229353358Sdim return None; 4230360784Sdim return select12BitValueWithLeftShift(*MaybeImmed); 4231360784Sdim} 4232360784Sdim 4233360784Sdim/// SelectNegArithImmed - As above, but negates the value before trying to 4234360784Sdim/// select it. 4235360784SdimInstructionSelector::ComplexRendererFns 4236360784SdimAArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 4237360784Sdim // We need a register here, because we need to know if we have a 64 or 32 4238360784Sdim // bit immediate. 4239360784Sdim if (!Root.isReg()) 4240360784Sdim return None; 4241360784Sdim auto MaybeImmed = getImmedFromMO(Root); 4242360784Sdim if (MaybeImmed == None) 4243360784Sdim return None; 4244353358Sdim uint64_t Immed = *MaybeImmed; 4245321369Sdim 4246360784Sdim // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 4247360784Sdim // have the opposite effect on the C flag, so this pattern mustn't match under 4248360784Sdim // those circumstances. 4249360784Sdim if (Immed == 0) 4250327952Sdim return None; 4251321369Sdim 4252360784Sdim // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 4253360784Sdim // the root. 4254360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4255360784Sdim if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 4256360784Sdim Immed = ~((uint32_t)Immed) + 1; 4257360784Sdim else 4258360784Sdim Immed = ~Immed + 1ULL; 4259360784Sdim 4260360784Sdim if (Immed & 0xFFFFFFFFFF000000ULL) 4261360784Sdim return None; 4262360784Sdim 4263360784Sdim Immed &= 0xFFFFFFULL; 4264360784Sdim return select12BitValueWithLeftShift(Immed); 4265321369Sdim} 4266321369Sdim 4267360784Sdim/// Return true if it is worth folding MI into an extended register. That is, 4268360784Sdim/// if it's safe to pull it into the addressing mode of a load or store as a 4269360784Sdim/// shift. 4270360784Sdimbool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 4271360784Sdim MachineInstr &MI, const MachineRegisterInfo &MRI) const { 4272360784Sdim // Always fold if there is one use, or if we're optimizing for size. 4273360784Sdim Register DefReg = MI.getOperand(0).getReg(); 4274360784Sdim if (MRI.hasOneUse(DefReg) || 4275360784Sdim MI.getParent()->getParent()->getFunction().hasMinSize()) 4276360784Sdim return true; 4277360784Sdim 4278360784Sdim // It's better to avoid folding and recomputing shifts when we don't have a 4279360784Sdim // fastpath. 4280360784Sdim if (!STI.hasLSLFast()) 4281360784Sdim return false; 4282360784Sdim 4283360784Sdim // We have a fastpath, so folding a shift in and potentially computing it 4284360784Sdim // many times may be beneficial. Check if this is only used in memory ops. 4285360784Sdim // If it is, then we should fold. 4286360784Sdim return all_of(MRI.use_instructions(DefReg), 4287360784Sdim [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 4288360784Sdim} 4289360784Sdim 4290360784SdimInstructionSelector::ComplexRendererFns 4291360784SdimAArch64InstructionSelector::selectExtendedSHL( 4292360784Sdim MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 4293360784Sdim unsigned SizeInBytes, bool WantsExt) const { 4294360784Sdim assert(Base.isReg() && "Expected base to be a register operand"); 4295360784Sdim assert(Offset.isReg() && "Expected offset to be a register operand"); 4296360784Sdim 4297360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4298360784Sdim MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 4299360784Sdim if (!OffsetInst) 4300360784Sdim return None; 4301360784Sdim 4302360784Sdim unsigned OffsetOpc = OffsetInst->getOpcode(); 4303360784Sdim if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 4304360784Sdim return None; 4305360784Sdim 4306360784Sdim // Make sure that the memory op is a valid size. 4307360784Sdim int64_t LegalShiftVal = Log2_32(SizeInBytes); 4308360784Sdim if (LegalShiftVal == 0) 4309360784Sdim return None; 4310360784Sdim if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4311360784Sdim return None; 4312360784Sdim 4313360784Sdim // Now, try to find the specific G_CONSTANT. Start by assuming that the 4314360784Sdim // register we will offset is the LHS, and the register containing the 4315360784Sdim // constant is the RHS. 4316360784Sdim Register OffsetReg = OffsetInst->getOperand(1).getReg(); 4317360784Sdim Register ConstantReg = OffsetInst->getOperand(2).getReg(); 4318360784Sdim auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4319360784Sdim if (!ValAndVReg) { 4320360784Sdim // We didn't get a constant on the RHS. If the opcode is a shift, then 4321360784Sdim // we're done. 4322360784Sdim if (OffsetOpc == TargetOpcode::G_SHL) 4323360784Sdim return None; 4324360784Sdim 4325360784Sdim // If we have a G_MUL, we can use either register. Try looking at the RHS. 4326360784Sdim std::swap(OffsetReg, ConstantReg); 4327360784Sdim ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4328360784Sdim if (!ValAndVReg) 4329360784Sdim return None; 4330360784Sdim } 4331360784Sdim 4332360784Sdim // The value must fit into 3 bits, and must be positive. Make sure that is 4333360784Sdim // true. 4334360784Sdim int64_t ImmVal = ValAndVReg->Value; 4335360784Sdim 4336360784Sdim // Since we're going to pull this into a shift, the constant value must be 4337360784Sdim // a power of 2. If we got a multiply, then we need to check this. 4338360784Sdim if (OffsetOpc == TargetOpcode::G_MUL) { 4339360784Sdim if (!isPowerOf2_32(ImmVal)) 4340360784Sdim return None; 4341360784Sdim 4342360784Sdim // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 4343360784Sdim ImmVal = Log2_32(ImmVal); 4344360784Sdim } 4345360784Sdim 4346360784Sdim if ((ImmVal & 0x7) != ImmVal) 4347360784Sdim return None; 4348360784Sdim 4349360784Sdim // We are only allowed to shift by LegalShiftVal. This shift value is built 4350360784Sdim // into the instruction, so we can't just use whatever we want. 4351360784Sdim if (ImmVal != LegalShiftVal) 4352360784Sdim return None; 4353360784Sdim 4354360784Sdim unsigned SignExtend = 0; 4355360784Sdim if (WantsExt) { 4356360784Sdim // Check if the offset is defined by an extend. 4357360784Sdim MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 4358360784Sdim auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 4359360784Sdim if (Ext == AArch64_AM::InvalidShiftExtend) 4360360784Sdim return None; 4361360784Sdim 4362360784Sdim SignExtend = Ext == AArch64_AM::SXTW; 4363360784Sdim 4364360784Sdim // Need a 32-bit wide register here. 4365360784Sdim MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 4366360784Sdim OffsetReg = ExtInst->getOperand(1).getReg(); 4367360784Sdim OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); 4368360784Sdim } 4369360784Sdim 4370360784Sdim // We can use the LHS of the GEP as the base, and the LHS of the shift as an 4371360784Sdim // offset. Signify that we are shifting by setting the shift flag to 1. 4372360784Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 4373360784Sdim [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 4374360784Sdim [=](MachineInstrBuilder &MIB) { 4375360784Sdim // Need to add both immediates here to make sure that they are both 4376360784Sdim // added to the instruction. 4377360784Sdim MIB.addImm(SignExtend); 4378360784Sdim MIB.addImm(1); 4379360784Sdim }}}; 4380360784Sdim} 4381360784Sdim 4382360784Sdim/// This is used for computing addresses like this: 4383360784Sdim/// 4384360784Sdim/// ldr x1, [x2, x3, lsl #3] 4385360784Sdim/// 4386360784Sdim/// Where x2 is the base register, and x3 is an offset register. The shift-left 4387360784Sdim/// is a constant value specific to this load instruction. That is, we'll never 4388360784Sdim/// see anything other than a 3 here (which corresponds to the size of the 4389360784Sdim/// element being loaded.) 4390360784SdimInstructionSelector::ComplexRendererFns 4391360784SdimAArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 4392360784Sdim MachineOperand &Root, unsigned SizeInBytes) const { 4393360784Sdim if (!Root.isReg()) 4394360784Sdim return None; 4395360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4396360784Sdim 4397360784Sdim // We want to find something like this: 4398360784Sdim // 4399360784Sdim // val = G_CONSTANT LegalShiftVal 4400360784Sdim // shift = G_SHL off_reg val 4401360784Sdim // ptr = G_PTR_ADD base_reg shift 4402360784Sdim // x = G_LOAD ptr 4403360784Sdim // 4404360784Sdim // And fold it into this addressing mode: 4405360784Sdim // 4406360784Sdim // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 4407360784Sdim 4408360784Sdim // Check if we can find the G_PTR_ADD. 4409360784Sdim MachineInstr *PtrAdd = 4410360784Sdim getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 4411360784Sdim if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 4412360784Sdim return None; 4413360784Sdim 4414360784Sdim // Now, try to match an opcode which will match our specific offset. 4415360784Sdim // We want a G_SHL or a G_MUL. 4416360784Sdim MachineInstr *OffsetInst = 4417360784Sdim getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 4418360784Sdim return selectExtendedSHL(Root, PtrAdd->getOperand(1), 4419360784Sdim OffsetInst->getOperand(0), SizeInBytes, 4420360784Sdim /*WantsExt=*/false); 4421360784Sdim} 4422360784Sdim 4423360784Sdim/// This is used for computing addresses like this: 4424360784Sdim/// 4425360784Sdim/// ldr x1, [x2, x3] 4426360784Sdim/// 4427360784Sdim/// Where x2 is the base register, and x3 is an offset register. 4428360784Sdim/// 4429360784Sdim/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 4430360784Sdim/// this will do so. Otherwise, it will return None. 4431360784SdimInstructionSelector::ComplexRendererFns 4432360784SdimAArch64InstructionSelector::selectAddrModeRegisterOffset( 4433360784Sdim MachineOperand &Root) const { 4434360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4435360784Sdim 4436360784Sdim // We need a GEP. 4437360784Sdim MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 4438360784Sdim if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 4439360784Sdim return None; 4440360784Sdim 4441360784Sdim // If this is used more than once, let's not bother folding. 4442360784Sdim // TODO: Check if they are memory ops. If they are, then we can still fold 4443360784Sdim // without having to recompute anything. 4444360784Sdim if (!MRI.hasOneUse(Gep->getOperand(0).getReg())) 4445360784Sdim return None; 4446360784Sdim 4447360784Sdim // Base is the GEP's LHS, offset is its RHS. 4448360784Sdim return {{[=](MachineInstrBuilder &MIB) { 4449360784Sdim MIB.addUse(Gep->getOperand(1).getReg()); 4450360784Sdim }, 4451360784Sdim [=](MachineInstrBuilder &MIB) { 4452360784Sdim MIB.addUse(Gep->getOperand(2).getReg()); 4453360784Sdim }, 4454360784Sdim [=](MachineInstrBuilder &MIB) { 4455360784Sdim // Need to add both immediates here to make sure that they are both 4456360784Sdim // added to the instruction. 4457360784Sdim MIB.addImm(0); 4458360784Sdim MIB.addImm(0); 4459360784Sdim }}}; 4460360784Sdim} 4461360784Sdim 4462360784Sdim/// This is intended to be equivalent to selectAddrModeXRO in 4463360784Sdim/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 4464360784SdimInstructionSelector::ComplexRendererFns 4465360784SdimAArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 4466360784Sdim unsigned SizeInBytes) const { 4467360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4468360784Sdim 4469360784Sdim // If we have a constant offset, then we probably don't want to match a 4470360784Sdim // register offset. 4471360784Sdim if (isBaseWithConstantOffset(Root, MRI)) 4472360784Sdim return None; 4473360784Sdim 4474360784Sdim // Try to fold shifts into the addressing mode. 4475360784Sdim auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 4476360784Sdim if (AddrModeFns) 4477360784Sdim return AddrModeFns; 4478360784Sdim 4479360784Sdim // If that doesn't work, see if it's possible to fold in registers from 4480360784Sdim // a GEP. 4481360784Sdim return selectAddrModeRegisterOffset(Root); 4482360784Sdim} 4483360784Sdim 4484360784Sdim/// This is used for computing addresses like this: 4485360784Sdim/// 4486360784Sdim/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 4487360784Sdim/// 4488360784Sdim/// Where we have a 64-bit base register, a 32-bit offset register, and an 4489360784Sdim/// extend (which may or may not be signed). 4490360784SdimInstructionSelector::ComplexRendererFns 4491360784SdimAArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 4492360784Sdim unsigned SizeInBytes) const { 4493360784Sdim MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4494360784Sdim 4495360784Sdim MachineInstr *PtrAdd = 4496360784Sdim getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 4497360784Sdim if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 4498360784Sdim return None; 4499360784Sdim 4500360784Sdim MachineOperand &LHS = PtrAdd->getOperand(1); 4501360784Sdim MachineOperand &RHS = PtrAdd->getOperand(2); 4502360784Sdim MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 4503360784Sdim 4504360784Sdim // The first case is the same as selectAddrModeXRO, except we need an extend. 4505360784Sdim // In this case, we try to find a shift and extend, and fold them into the 4506360784Sdim // addressing mode. 4507360784Sdim // 4508360784Sdim // E.g. 4509360784Sdim // 4510360784Sdim // off_reg = G_Z/S/ANYEXT ext_reg 4511360784Sdim // val = G_CONSTANT LegalShiftVal 4512360784Sdim // shift = G_SHL off_reg val 4513360784Sdim // ptr = G_PTR_ADD base_reg shift 4514360784Sdim // x = G_LOAD ptr 4515360784Sdim // 4516360784Sdim // In this case we can get a load like this: 4517360784Sdim // 4518360784Sdim // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 4519360784Sdim auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 4520360784Sdim SizeInBytes, /*WantsExt=*/true); 4521360784Sdim if (ExtendedShl) 4522360784Sdim return ExtendedShl; 4523360784Sdim 4524360784Sdim // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 4525360784Sdim // 4526360784Sdim // e.g. 4527360784Sdim // ldr something, [base_reg, ext_reg, sxtw] 4528360784Sdim if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4529360784Sdim return None; 4530360784Sdim 4531360784Sdim // Check if this is an extend. We'll get an extend type if it is. 4532360784Sdim AArch64_AM::ShiftExtendType Ext = 4533360784Sdim getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 4534360784Sdim if (Ext == AArch64_AM::InvalidShiftExtend) 4535360784Sdim return None; 4536360784Sdim 4537360784Sdim // Need a 32-bit wide register. 4538360784Sdim MachineIRBuilder MIB(*PtrAdd); 4539360784Sdim Register ExtReg = 4540360784Sdim narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); 4541360784Sdim unsigned SignExtend = Ext == AArch64_AM::SXTW; 4542360784Sdim 4543360784Sdim // Base is LHS, offset is ExtReg. 4544360784Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 4545360784Sdim [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 4546360784Sdim [=](MachineInstrBuilder &MIB) { 4547360784Sdim MIB.addImm(SignExtend); 4548360784Sdim MIB.addImm(0); 4549360784Sdim }}}; 4550360784Sdim} 4551360784Sdim 4552327952Sdim/// Select a "register plus unscaled signed 9-bit immediate" address. This 4553327952Sdim/// should only match when there is an offset that is not valid for a scaled 4554327952Sdim/// immediate addressing mode. The "Size" argument is the size in bytes of the 4555327952Sdim/// memory reference, which is needed here to know what is valid for a scaled 4556327952Sdim/// immediate. 4557327952SdimInstructionSelector::ComplexRendererFns 4558327952SdimAArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 4559327952Sdim unsigned Size) const { 4560327952Sdim MachineRegisterInfo &MRI = 4561327952Sdim Root.getParent()->getParent()->getParent()->getRegInfo(); 4562327952Sdim 4563327952Sdim if (!Root.isReg()) 4564327952Sdim return None; 4565327952Sdim 4566327952Sdim if (!isBaseWithConstantOffset(Root, MRI)) 4567327952Sdim return None; 4568327952Sdim 4569327952Sdim MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 4570327952Sdim if (!RootDef) 4571327952Sdim return None; 4572327952Sdim 4573327952Sdim MachineOperand &OffImm = RootDef->getOperand(2); 4574327952Sdim if (!OffImm.isReg()) 4575327952Sdim return None; 4576327952Sdim MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 4577327952Sdim if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 4578327952Sdim return None; 4579327952Sdim int64_t RHSC; 4580327952Sdim MachineOperand &RHSOp1 = RHS->getOperand(1); 4581327952Sdim if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 4582327952Sdim return None; 4583327952Sdim RHSC = RHSOp1.getCImm()->getSExtValue(); 4584327952Sdim 4585327952Sdim // If the offset is valid as a scaled immediate, don't match here. 4586327952Sdim if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 4587327952Sdim return None; 4588327952Sdim if (RHSC >= -256 && RHSC < 256) { 4589327952Sdim MachineOperand &Base = RootDef->getOperand(1); 4590327952Sdim return {{ 4591327952Sdim [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 4592327952Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 4593327952Sdim }}; 4594327952Sdim } 4595327952Sdim return None; 4596327952Sdim} 4597327952Sdim 4598327952Sdim/// Select a "register plus scaled unsigned 12-bit immediate" address. The 4599327952Sdim/// "Size" argument is the size in bytes of the memory reference, which 4600327952Sdim/// determines the scale. 4601327952SdimInstructionSelector::ComplexRendererFns 4602327952SdimAArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 4603327952Sdim unsigned Size) const { 4604327952Sdim MachineRegisterInfo &MRI = 4605327952Sdim Root.getParent()->getParent()->getParent()->getRegInfo(); 4606327952Sdim 4607327952Sdim if (!Root.isReg()) 4608327952Sdim return None; 4609327952Sdim 4610327952Sdim MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 4611327952Sdim if (!RootDef) 4612327952Sdim return None; 4613327952Sdim 4614327952Sdim if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 4615327952Sdim return {{ 4616327952Sdim [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 4617327952Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 4618327952Sdim }}; 4619327952Sdim } 4620327952Sdim 4621327952Sdim if (isBaseWithConstantOffset(Root, MRI)) { 4622327952Sdim MachineOperand &LHS = RootDef->getOperand(1); 4623327952Sdim MachineOperand &RHS = RootDef->getOperand(2); 4624327952Sdim MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 4625327952Sdim MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 4626327952Sdim if (LHSDef && RHSDef) { 4627327952Sdim int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 4628327952Sdim unsigned Scale = Log2_32(Size); 4629327952Sdim if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 4630327952Sdim if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 4631327952Sdim return {{ 4632327952Sdim [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 4633327952Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 4634327952Sdim }}; 4635327952Sdim 4636327952Sdim return {{ 4637327952Sdim [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 4638327952Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 4639327952Sdim }}; 4640327952Sdim } 4641327952Sdim } 4642327952Sdim } 4643327952Sdim 4644327952Sdim // Before falling back to our general case, check if the unscaled 4645327952Sdim // instructions can handle this. If so, that's preferable. 4646327952Sdim if (selectAddrModeUnscaled(Root, Size).hasValue()) 4647327952Sdim return None; 4648327952Sdim 4649327952Sdim return {{ 4650327952Sdim [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 4651327952Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 4652327952Sdim }}; 4653327952Sdim} 4654327952Sdim 4655360784Sdim/// Given a shift instruction, return the correct shift type for that 4656360784Sdim/// instruction. 4657360784Sdimstatic AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 4658360784Sdim // TODO: Handle AArch64_AM::ROR 4659360784Sdim switch (MI.getOpcode()) { 4660360784Sdim default: 4661360784Sdim return AArch64_AM::InvalidShiftExtend; 4662360784Sdim case TargetOpcode::G_SHL: 4663360784Sdim return AArch64_AM::LSL; 4664360784Sdim case TargetOpcode::G_LSHR: 4665360784Sdim return AArch64_AM::LSR; 4666360784Sdim case TargetOpcode::G_ASHR: 4667360784Sdim return AArch64_AM::ASR; 4668360784Sdim } 4669360784Sdim} 4670360784Sdim 4671360784Sdim/// Select a "shifted register" operand. If the value is not shifted, set the 4672360784Sdim/// shift operand to a default value of "lsl 0". 4673360784Sdim/// 4674360784Sdim/// TODO: Allow shifted register to be rotated in logical instructions. 4675360784SdimInstructionSelector::ComplexRendererFns 4676360784SdimAArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 4677360784Sdim if (!Root.isReg()) 4678360784Sdim return None; 4679360784Sdim MachineRegisterInfo &MRI = 4680360784Sdim Root.getParent()->getParent()->getParent()->getRegInfo(); 4681360784Sdim 4682360784Sdim // Check if the operand is defined by an instruction which corresponds to 4683360784Sdim // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 4684360784Sdim // 4685360784Sdim // TODO: Handle AArch64_AM::ROR for logical instructions. 4686360784Sdim MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 4687360784Sdim if (!ShiftInst) 4688360784Sdim return None; 4689360784Sdim AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 4690360784Sdim if (ShType == AArch64_AM::InvalidShiftExtend) 4691360784Sdim return None; 4692360784Sdim if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 4693360784Sdim return None; 4694360784Sdim 4695360784Sdim // Need an immediate on the RHS. 4696360784Sdim MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 4697360784Sdim auto Immed = getImmedFromMO(ShiftRHS); 4698360784Sdim if (!Immed) 4699360784Sdim return None; 4700360784Sdim 4701360784Sdim // We have something that we can fold. Fold in the shift's LHS and RHS into 4702360784Sdim // the instruction. 4703360784Sdim MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 4704360784Sdim Register ShiftReg = ShiftLHS.getReg(); 4705360784Sdim 4706360784Sdim unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 4707360784Sdim unsigned Val = *Immed & (NumBits - 1); 4708360784Sdim unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 4709360784Sdim 4710360784Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 4711360784Sdim [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 4712360784Sdim} 4713360784Sdim 4714360784SdimAArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 4715360784Sdim MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 4716360784Sdim unsigned Opc = MI.getOpcode(); 4717360784Sdim 4718360784Sdim // Handle explicit extend instructions first. 4719360784Sdim if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 4720360784Sdim unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 4721360784Sdim assert(Size != 64 && "Extend from 64 bits?"); 4722360784Sdim switch (Size) { 4723360784Sdim case 8: 4724360784Sdim return AArch64_AM::SXTB; 4725360784Sdim case 16: 4726360784Sdim return AArch64_AM::SXTH; 4727360784Sdim case 32: 4728360784Sdim return AArch64_AM::SXTW; 4729360784Sdim default: 4730360784Sdim return AArch64_AM::InvalidShiftExtend; 4731360784Sdim } 4732360784Sdim } 4733360784Sdim 4734360784Sdim if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 4735360784Sdim unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 4736360784Sdim assert(Size != 64 && "Extend from 64 bits?"); 4737360784Sdim switch (Size) { 4738360784Sdim case 8: 4739360784Sdim return AArch64_AM::UXTB; 4740360784Sdim case 16: 4741360784Sdim return AArch64_AM::UXTH; 4742360784Sdim case 32: 4743360784Sdim return AArch64_AM::UXTW; 4744360784Sdim default: 4745360784Sdim return AArch64_AM::InvalidShiftExtend; 4746360784Sdim } 4747360784Sdim } 4748360784Sdim 4749360784Sdim // Don't have an explicit extend. Try to handle a G_AND with a constant mask 4750360784Sdim // on the RHS. 4751360784Sdim if (Opc != TargetOpcode::G_AND) 4752360784Sdim return AArch64_AM::InvalidShiftExtend; 4753360784Sdim 4754360784Sdim Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 4755360784Sdim if (!MaybeAndMask) 4756360784Sdim return AArch64_AM::InvalidShiftExtend; 4757360784Sdim uint64_t AndMask = *MaybeAndMask; 4758360784Sdim switch (AndMask) { 4759360784Sdim default: 4760360784Sdim return AArch64_AM::InvalidShiftExtend; 4761360784Sdim case 0xFF: 4762360784Sdim return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 4763360784Sdim case 0xFFFF: 4764360784Sdim return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 4765360784Sdim case 0xFFFFFFFF: 4766360784Sdim return AArch64_AM::UXTW; 4767360784Sdim } 4768360784Sdim} 4769360784Sdim 4770360784SdimRegister AArch64InstructionSelector::narrowExtendRegIfNeeded( 4771360784Sdim Register ExtReg, MachineIRBuilder &MIB) const { 4772360784Sdim MachineRegisterInfo &MRI = *MIB.getMRI(); 4773360784Sdim if (MRI.getType(ExtReg).getSizeInBits() == 32) 4774360784Sdim return ExtReg; 4775360784Sdim 4776360784Sdim // Insert a copy to move ExtReg to GPR32. 4777360784Sdim Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 4778360784Sdim auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); 4779360784Sdim 4780360784Sdim // Select the copy into a subregister copy. 4781360784Sdim selectCopy(*Copy, TII, MRI, TRI, RBI); 4782360784Sdim return Copy.getReg(0); 4783360784Sdim} 4784360784Sdim 4785360784Sdim/// Select an "extended register" operand. This operand folds in an extend 4786360784Sdim/// followed by an optional left shift. 4787360784SdimInstructionSelector::ComplexRendererFns 4788360784SdimAArch64InstructionSelector::selectArithExtendedRegister( 4789360784Sdim MachineOperand &Root) const { 4790360784Sdim if (!Root.isReg()) 4791360784Sdim return None; 4792360784Sdim MachineRegisterInfo &MRI = 4793360784Sdim Root.getParent()->getParent()->getParent()->getRegInfo(); 4794360784Sdim 4795360784Sdim uint64_t ShiftVal = 0; 4796360784Sdim Register ExtReg; 4797360784Sdim AArch64_AM::ShiftExtendType Ext; 4798360784Sdim MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 4799360784Sdim if (!RootDef) 4800360784Sdim return None; 4801360784Sdim 4802360784Sdim if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 4803360784Sdim return None; 4804360784Sdim 4805360784Sdim // Check if we can fold a shift and an extend. 4806360784Sdim if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 4807360784Sdim // Look for a constant on the RHS of the shift. 4808360784Sdim MachineOperand &RHS = RootDef->getOperand(2); 4809360784Sdim Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 4810360784Sdim if (!MaybeShiftVal) 4811360784Sdim return None; 4812360784Sdim ShiftVal = *MaybeShiftVal; 4813360784Sdim if (ShiftVal > 4) 4814360784Sdim return None; 4815360784Sdim // Look for a valid extend instruction on the LHS of the shift. 4816360784Sdim MachineOperand &LHS = RootDef->getOperand(1); 4817360784Sdim MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4818360784Sdim if (!ExtDef) 4819360784Sdim return None; 4820360784Sdim Ext = getExtendTypeForInst(*ExtDef, MRI); 4821360784Sdim if (Ext == AArch64_AM::InvalidShiftExtend) 4822360784Sdim return None; 4823360784Sdim ExtReg = ExtDef->getOperand(1).getReg(); 4824360784Sdim } else { 4825360784Sdim // Didn't get a shift. Try just folding an extend. 4826360784Sdim Ext = getExtendTypeForInst(*RootDef, MRI); 4827360784Sdim if (Ext == AArch64_AM::InvalidShiftExtend) 4828360784Sdim return None; 4829360784Sdim ExtReg = RootDef->getOperand(1).getReg(); 4830360784Sdim 4831360784Sdim // If we have a 32 bit instruction which zeroes out the high half of a 4832360784Sdim // register, we get an implicit zero extend for free. Check if we have one. 4833360784Sdim // FIXME: We actually emit the extend right now even though we don't have 4834360784Sdim // to. 4835360784Sdim if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 4836360784Sdim MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 4837360784Sdim if (ExtInst && isDef32(*ExtInst)) 4838360784Sdim return None; 4839360784Sdim } 4840360784Sdim } 4841360784Sdim 4842360784Sdim // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 4843360784Sdim // copy. 4844360784Sdim MachineIRBuilder MIB(*RootDef); 4845360784Sdim ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); 4846360784Sdim 4847360784Sdim return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 4848360784Sdim [=](MachineInstrBuilder &MIB) { 4849360784Sdim MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 4850360784Sdim }}}; 4851360784Sdim} 4852360784Sdim 4853341825Sdimvoid AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 4854360784Sdim const MachineInstr &MI, 4855360784Sdim int OpIdx) const { 4856341825Sdim const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 4857360784Sdim assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4858360784Sdim "Expected G_CONSTANT"); 4859341825Sdim Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); 4860341825Sdim assert(CstVal && "Expected constant value"); 4861341825Sdim MIB.addImm(CstVal.getValue()); 4862341825Sdim} 4863341825Sdim 4864360784Sdimvoid AArch64InstructionSelector::renderLogicalImm32( 4865360784Sdim MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 4866360784Sdim assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4867360784Sdim "Expected G_CONSTANT"); 4868360784Sdim uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 4869360784Sdim uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 4870360784Sdim MIB.addImm(Enc); 4871360784Sdim} 4872360784Sdim 4873360784Sdimvoid AArch64InstructionSelector::renderLogicalImm64( 4874360784Sdim MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 4875360784Sdim assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4876360784Sdim "Expected G_CONSTANT"); 4877360784Sdim uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 4878360784Sdim uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 4879360784Sdim MIB.addImm(Enc); 4880360784Sdim} 4881360784Sdim 4882360784Sdimbool AArch64InstructionSelector::isLoadStoreOfNumBytes( 4883360784Sdim const MachineInstr &MI, unsigned NumBytes) const { 4884360784Sdim if (!MI.mayLoadOrStore()) 4885360784Sdim return false; 4886360784Sdim assert(MI.hasOneMemOperand() && 4887360784Sdim "Expected load/store to have only one mem op!"); 4888360784Sdim return (*MI.memoperands_begin())->getSize() == NumBytes; 4889360784Sdim} 4890360784Sdim 4891360784Sdimbool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 4892360784Sdim const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 4893360784Sdim if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 4894360784Sdim return false; 4895360784Sdim 4896360784Sdim // Only return true if we know the operation will zero-out the high half of 4897360784Sdim // the 64-bit register. Truncates can be subregister copies, which don't 4898360784Sdim // zero out the high bits. Copies and other copy-like instructions can be 4899360784Sdim // fed by truncates, or could be lowered as subregister copies. 4900360784Sdim switch (MI.getOpcode()) { 4901360784Sdim default: 4902360784Sdim return true; 4903360784Sdim case TargetOpcode::COPY: 4904360784Sdim case TargetOpcode::G_BITCAST: 4905360784Sdim case TargetOpcode::G_TRUNC: 4906360784Sdim case TargetOpcode::G_PHI: 4907360784Sdim return false; 4908360784Sdim } 4909360784Sdim} 4910360784Sdim 4911321369Sdimnamespace llvm { 4912321369SdimInstructionSelector * 4913321369SdimcreateAArch64InstructionSelector(const AArch64TargetMachine &TM, 4914321369Sdim AArch64Subtarget &Subtarget, 4915321369Sdim AArch64RegisterBankInfo &RBI) { 4916321369Sdim return new AArch64InstructionSelector(TM, Subtarget, RBI); 4917321369Sdim} 4918321369Sdim} 4919