1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// \file 9/// This file implements the targeting of the InstructionSelector class for 10/// AArch64. 11/// \todo This should be generated by TableGen. 12//===----------------------------------------------------------------------===// 13 14#include "AArch64InstrInfo.h" 15#include "AArch64MachineFunctionInfo.h" 16#include "AArch64RegisterBankInfo.h" 17#include "AArch64RegisterInfo.h" 18#include "AArch64Subtarget.h" 19#include "AArch64TargetMachine.h" 20#include "MCTargetDesc/AArch64AddressingModes.h" 21#include "llvm/ADT/Optional.h" 22#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 25#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26#include "llvm/CodeGen/GlobalISel/Utils.h" 27#include "llvm/CodeGen/MachineBasicBlock.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineInstr.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineOperand.h" 33#include "llvm/CodeGen/MachineRegisterInfo.h" 34#include "llvm/IR/Type.h" 35#include "llvm/IR/IntrinsicsAArch64.h" 36#include "llvm/Support/Debug.h" 37#include "llvm/Support/raw_ostream.h" 38 39#define DEBUG_TYPE "aarch64-isel" 40 41using namespace llvm; 42 43namespace { 44 45#define GET_GLOBALISEL_PREDICATE_BITSET 46#include "AArch64GenGlobalISel.inc" 47#undef GET_GLOBALISEL_PREDICATE_BITSET 48 49class AArch64InstructionSelector : public InstructionSelector { 50public: 51 AArch64InstructionSelector(const AArch64TargetMachine &TM, 52 const AArch64Subtarget &STI, 53 const AArch64RegisterBankInfo &RBI); 54 55 bool select(MachineInstr &I) override; 56 static const char *getName() { return DEBUG_TYPE; } 57 58 void setupMF(MachineFunction &MF, GISelKnownBits &KB, 59 CodeGenCoverage &CoverageInfo) override { 60 InstructionSelector::setupMF(MF, KB, CoverageInfo); 61 62 // hasFnAttribute() is expensive to call on every BRCOND selection, so 63 // cache it here for each run of the selector. 64 ProduceNonFlagSettingCondBr = 65 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 66 } 67 68private: 69 /// tblgen-erated 'select' implementation, used as the initial selector for 70 /// the patterns that don't require complex C++. 71 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 72 73 // A lowering phase that runs before any selection attempts. 74 75 void preISelLower(MachineInstr &I) const; 76 77 // An early selection function that runs before the selectImpl() call. 78 bool earlySelect(MachineInstr &I) const; 79 80 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 81 82 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 83 void contractCrossBankCopyIntoStore(MachineInstr &I, 84 MachineRegisterInfo &MRI) const; 85 86 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 87 MachineRegisterInfo &MRI) const; 88 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 89 MachineRegisterInfo &MRI) const; 90 91 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 92 MachineRegisterInfo &MRI) const; 93 94 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; 95 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 96 97 // Helper to generate an equivalent of scalar_to_vector into a new register, 98 // returned via 'Dst'. 99 MachineInstr *emitScalarToVector(unsigned EltSize, 100 const TargetRegisterClass *DstRC, 101 Register Scalar, 102 MachineIRBuilder &MIRBuilder) const; 103 104 /// Emit a lane insert into \p DstReg, or a new vector register if None is 105 /// provided. 106 /// 107 /// The lane inserted into is defined by \p LaneIdx. The vector source 108 /// register is given by \p SrcReg. The register containing the element is 109 /// given by \p EltReg. 110 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 111 Register EltReg, unsigned LaneIdx, 112 const RegisterBank &RB, 113 MachineIRBuilder &MIRBuilder) const; 114 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 115 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 116 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 117 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 118 119 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 120 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 121 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; 122 bool selectSplitVectorUnmerge(MachineInstr &I, 123 MachineRegisterInfo &MRI) const; 124 bool selectIntrinsicWithSideEffects(MachineInstr &I, 125 MachineRegisterInfo &MRI) const; 126 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI) const; 127 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; 128 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 129 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 130 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; 131 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; 132 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; 133 134 unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const; 135 MachineInstr *emitLoadFromConstantPool(Constant *CPVal, 136 MachineIRBuilder &MIRBuilder) const; 137 138 // Emit a vector concat operation. 139 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 140 Register Op2, 141 MachineIRBuilder &MIRBuilder) const; 142 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 143 MachineOperand &Predicate, 144 MachineIRBuilder &MIRBuilder) const; 145 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, 146 MachineIRBuilder &MIRBuilder) const; 147 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 148 MachineIRBuilder &MIRBuilder) const; 149 MachineInstr *emitTST(const Register &LHS, const Register &RHS, 150 MachineIRBuilder &MIRBuilder) const; 151 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 152 const RegisterBank &DstRB, LLT ScalarTy, 153 Register VecReg, unsigned LaneIdx, 154 MachineIRBuilder &MIRBuilder) const; 155 156 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be 157 /// materialized using a FMOV instruction, then update MI and return it. 158 /// Otherwise, do nothing and return a nullptr. 159 MachineInstr *emitFMovForFConstant(MachineInstr &MI, 160 MachineRegisterInfo &MRI) const; 161 162 /// Emit a CSet for a compare. 163 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 164 MachineIRBuilder &MIRBuilder) const; 165 166 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 167 // We use these manually instead of using the importer since it doesn't 168 // support SDNodeXForm. 169 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 170 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 171 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 172 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 173 174 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 175 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 176 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 177 178 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 179 unsigned Size) const; 180 181 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 182 return selectAddrModeUnscaled(Root, 1); 183 } 184 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 185 return selectAddrModeUnscaled(Root, 2); 186 } 187 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 188 return selectAddrModeUnscaled(Root, 4); 189 } 190 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 191 return selectAddrModeUnscaled(Root, 8); 192 } 193 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 194 return selectAddrModeUnscaled(Root, 16); 195 } 196 197 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 198 unsigned Size) const; 199 template <int Width> 200 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 201 return selectAddrModeIndexed(Root, Width / 8); 202 } 203 204 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 205 const MachineRegisterInfo &MRI) const; 206 ComplexRendererFns 207 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 208 unsigned SizeInBytes) const; 209 210 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 211 /// or not a shift + extend should be folded into an addressing mode. Returns 212 /// None when this is not profitable or possible. 213 ComplexRendererFns 214 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 215 MachineOperand &Offset, unsigned SizeInBytes, 216 bool WantsExt) const; 217 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 218 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 219 unsigned SizeInBytes) const; 220 template <int Width> 221 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 222 return selectAddrModeXRO(Root, Width / 8); 223 } 224 225 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 226 unsigned SizeInBytes) const; 227 template <int Width> 228 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 229 return selectAddrModeWRO(Root, Width / 8); 230 } 231 232 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 233 234 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 235 return selectShiftedRegister(Root); 236 } 237 238 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 239 // TODO: selectShiftedRegister should allow for rotates on logical shifts. 240 // For now, make them the same. The only difference between the two is that 241 // logical shifts are allowed to fold in rotates. Otherwise, these are 242 // functionally the same. 243 return selectShiftedRegister(Root); 244 } 245 246 /// Given an extend instruction, determine the correct shift-extend type for 247 /// that instruction. 248 /// 249 /// If the instruction is going to be used in a load or store, pass 250 /// \p IsLoadStore = true. 251 AArch64_AM::ShiftExtendType 252 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 253 bool IsLoadStore = false) const; 254 255 /// Instructions that accept extend modifiers like UXTW expect the register 256 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a 257 /// subregister copy if necessary. Return either ExtReg, or the result of the 258 /// new copy. 259 Register narrowExtendRegIfNeeded(Register ExtReg, 260 MachineIRBuilder &MIB) const; 261 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 262 263 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 264 int OpIdx = -1) const; 265 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 266 int OpIdx = -1) const; 267 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 268 int OpIdx = -1) const; 269 270 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 271 void materializeLargeCMVal(MachineInstr &I, const Value *V, 272 unsigned OpFlags) const; 273 274 // Optimization methods. 275 bool tryOptVectorShuffle(MachineInstr &I) const; 276 bool tryOptVectorDup(MachineInstr &MI) const; 277 bool tryOptSelect(MachineInstr &MI) const; 278 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 279 MachineOperand &Predicate, 280 MachineIRBuilder &MIRBuilder) const; 281 282 /// Return true if \p MI is a load or store of \p NumBytes bytes. 283 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 284 285 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 286 /// register zeroed out. In other words, the result of MI has been explicitly 287 /// zero extended. 288 bool isDef32(const MachineInstr &MI) const; 289 290 const AArch64TargetMachine &TM; 291 const AArch64Subtarget &STI; 292 const AArch64InstrInfo &TII; 293 const AArch64RegisterInfo &TRI; 294 const AArch64RegisterBankInfo &RBI; 295 296 bool ProduceNonFlagSettingCondBr = false; 297 298#define GET_GLOBALISEL_PREDICATES_DECL 299#include "AArch64GenGlobalISel.inc" 300#undef GET_GLOBALISEL_PREDICATES_DECL 301 302// We declare the temporaries used by selectImpl() in the class to minimize the 303// cost of constructing placeholder values. 304#define GET_GLOBALISEL_TEMPORARIES_DECL 305#include "AArch64GenGlobalISel.inc" 306#undef GET_GLOBALISEL_TEMPORARIES_DECL 307}; 308 309} // end anonymous namespace 310 311#define GET_GLOBALISEL_IMPL 312#include "AArch64GenGlobalISel.inc" 313#undef GET_GLOBALISEL_IMPL 314 315AArch64InstructionSelector::AArch64InstructionSelector( 316 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 317 const AArch64RegisterBankInfo &RBI) 318 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 319 TRI(*STI.getRegisterInfo()), RBI(RBI), 320#define GET_GLOBALISEL_PREDICATES_INIT 321#include "AArch64GenGlobalISel.inc" 322#undef GET_GLOBALISEL_PREDICATES_INIT 323#define GET_GLOBALISEL_TEMPORARIES_INIT 324#include "AArch64GenGlobalISel.inc" 325#undef GET_GLOBALISEL_TEMPORARIES_INIT 326{ 327} 328 329// FIXME: This should be target-independent, inferred from the types declared 330// for each class in the bank. 331static const TargetRegisterClass * 332getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 333 const RegisterBankInfo &RBI, 334 bool GetAllRegSet = false) { 335 if (RB.getID() == AArch64::GPRRegBankID) { 336 if (Ty.getSizeInBits() <= 32) 337 return GetAllRegSet ? &AArch64::GPR32allRegClass 338 : &AArch64::GPR32RegClass; 339 if (Ty.getSizeInBits() == 64) 340 return GetAllRegSet ? &AArch64::GPR64allRegClass 341 : &AArch64::GPR64RegClass; 342 return nullptr; 343 } 344 345 if (RB.getID() == AArch64::FPRRegBankID) { 346 if (Ty.getSizeInBits() <= 16) 347 return &AArch64::FPR16RegClass; 348 if (Ty.getSizeInBits() == 32) 349 return &AArch64::FPR32RegClass; 350 if (Ty.getSizeInBits() == 64) 351 return &AArch64::FPR64RegClass; 352 if (Ty.getSizeInBits() == 128) 353 return &AArch64::FPR128RegClass; 354 return nullptr; 355 } 356 357 return nullptr; 358} 359 360/// Given a register bank, and size in bits, return the smallest register class 361/// that can represent that combination. 362static const TargetRegisterClass * 363getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 364 bool GetAllRegSet = false) { 365 unsigned RegBankID = RB.getID(); 366 367 if (RegBankID == AArch64::GPRRegBankID) { 368 if (SizeInBits <= 32) 369 return GetAllRegSet ? &AArch64::GPR32allRegClass 370 : &AArch64::GPR32RegClass; 371 if (SizeInBits == 64) 372 return GetAllRegSet ? &AArch64::GPR64allRegClass 373 : &AArch64::GPR64RegClass; 374 } 375 376 if (RegBankID == AArch64::FPRRegBankID) { 377 switch (SizeInBits) { 378 default: 379 return nullptr; 380 case 8: 381 return &AArch64::FPR8RegClass; 382 case 16: 383 return &AArch64::FPR16RegClass; 384 case 32: 385 return &AArch64::FPR32RegClass; 386 case 64: 387 return &AArch64::FPR64RegClass; 388 case 128: 389 return &AArch64::FPR128RegClass; 390 } 391 } 392 393 return nullptr; 394} 395 396/// Returns the correct subregister to use for a given register class. 397static bool getSubRegForClass(const TargetRegisterClass *RC, 398 const TargetRegisterInfo &TRI, unsigned &SubReg) { 399 switch (TRI.getRegSizeInBits(*RC)) { 400 case 8: 401 SubReg = AArch64::bsub; 402 break; 403 case 16: 404 SubReg = AArch64::hsub; 405 break; 406 case 32: 407 if (RC != &AArch64::FPR32RegClass) 408 SubReg = AArch64::sub_32; 409 else 410 SubReg = AArch64::ssub; 411 break; 412 case 64: 413 SubReg = AArch64::dsub; 414 break; 415 default: 416 LLVM_DEBUG( 417 dbgs() << "Couldn't find appropriate subregister for register class."); 418 return false; 419 } 420 421 return true; 422} 423 424/// Check whether \p I is a currently unsupported binary operation: 425/// - it has an unsized type 426/// - an operand is not a vreg 427/// - all operands are not in the same bank 428/// These are checks that should someday live in the verifier, but right now, 429/// these are mostly limitations of the aarch64 selector. 430static bool unsupportedBinOp(const MachineInstr &I, 431 const AArch64RegisterBankInfo &RBI, 432 const MachineRegisterInfo &MRI, 433 const AArch64RegisterInfo &TRI) { 434 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 435 if (!Ty.isValid()) { 436 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 437 return true; 438 } 439 440 const RegisterBank *PrevOpBank = nullptr; 441 for (auto &MO : I.operands()) { 442 // FIXME: Support non-register operands. 443 if (!MO.isReg()) { 444 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 445 return true; 446 } 447 448 // FIXME: Can generic operations have physical registers operands? If 449 // so, this will need to be taught about that, and we'll need to get the 450 // bank out of the minimal class for the register. 451 // Either way, this needs to be documented (and possibly verified). 452 if (!Register::isVirtualRegister(MO.getReg())) { 453 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 454 return true; 455 } 456 457 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 458 if (!OpBank) { 459 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 460 return true; 461 } 462 463 if (PrevOpBank && OpBank != PrevOpBank) { 464 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 465 return true; 466 } 467 PrevOpBank = OpBank; 468 } 469 return false; 470} 471 472/// Select the AArch64 opcode for the basic binary operation \p GenericOpc 473/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 474/// and of size \p OpSize. 475/// \returns \p GenericOpc if the combination is unsupported. 476static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 477 unsigned OpSize) { 478 switch (RegBankID) { 479 case AArch64::GPRRegBankID: 480 if (OpSize == 32) { 481 switch (GenericOpc) { 482 case TargetOpcode::G_SHL: 483 return AArch64::LSLVWr; 484 case TargetOpcode::G_LSHR: 485 return AArch64::LSRVWr; 486 case TargetOpcode::G_ASHR: 487 return AArch64::ASRVWr; 488 default: 489 return GenericOpc; 490 } 491 } else if (OpSize == 64) { 492 switch (GenericOpc) { 493 case TargetOpcode::G_PTR_ADD: 494 return AArch64::ADDXrr; 495 case TargetOpcode::G_SHL: 496 return AArch64::LSLVXr; 497 case TargetOpcode::G_LSHR: 498 return AArch64::LSRVXr; 499 case TargetOpcode::G_ASHR: 500 return AArch64::ASRVXr; 501 default: 502 return GenericOpc; 503 } 504 } 505 break; 506 case AArch64::FPRRegBankID: 507 switch (OpSize) { 508 case 32: 509 switch (GenericOpc) { 510 case TargetOpcode::G_FADD: 511 return AArch64::FADDSrr; 512 case TargetOpcode::G_FSUB: 513 return AArch64::FSUBSrr; 514 case TargetOpcode::G_FMUL: 515 return AArch64::FMULSrr; 516 case TargetOpcode::G_FDIV: 517 return AArch64::FDIVSrr; 518 default: 519 return GenericOpc; 520 } 521 case 64: 522 switch (GenericOpc) { 523 case TargetOpcode::G_FADD: 524 return AArch64::FADDDrr; 525 case TargetOpcode::G_FSUB: 526 return AArch64::FSUBDrr; 527 case TargetOpcode::G_FMUL: 528 return AArch64::FMULDrr; 529 case TargetOpcode::G_FDIV: 530 return AArch64::FDIVDrr; 531 case TargetOpcode::G_OR: 532 return AArch64::ORRv8i8; 533 default: 534 return GenericOpc; 535 } 536 } 537 break; 538 } 539 return GenericOpc; 540} 541 542/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 543/// appropriate for the (value) register bank \p RegBankID and of memory access 544/// size \p OpSize. This returns the variant with the base+unsigned-immediate 545/// addressing mode (e.g., LDRXui). 546/// \returns \p GenericOpc if the combination is unsupported. 547static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 548 unsigned OpSize) { 549 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 550 switch (RegBankID) { 551 case AArch64::GPRRegBankID: 552 switch (OpSize) { 553 case 8: 554 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 555 case 16: 556 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 557 case 32: 558 return isStore ? AArch64::STRWui : AArch64::LDRWui; 559 case 64: 560 return isStore ? AArch64::STRXui : AArch64::LDRXui; 561 } 562 break; 563 case AArch64::FPRRegBankID: 564 switch (OpSize) { 565 case 8: 566 return isStore ? AArch64::STRBui : AArch64::LDRBui; 567 case 16: 568 return isStore ? AArch64::STRHui : AArch64::LDRHui; 569 case 32: 570 return isStore ? AArch64::STRSui : AArch64::LDRSui; 571 case 64: 572 return isStore ? AArch64::STRDui : AArch64::LDRDui; 573 } 574 break; 575 } 576 return GenericOpc; 577} 578 579#ifndef NDEBUG 580/// Helper function that verifies that we have a valid copy at the end of 581/// selectCopy. Verifies that the source and dest have the expected sizes and 582/// then returns true. 583static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 584 const MachineRegisterInfo &MRI, 585 const TargetRegisterInfo &TRI, 586 const RegisterBankInfo &RBI) { 587 const Register DstReg = I.getOperand(0).getReg(); 588 const Register SrcReg = I.getOperand(1).getReg(); 589 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 590 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 591 592 // Make sure the size of the source and dest line up. 593 assert( 594 (DstSize == SrcSize || 595 // Copies are a mean to setup initial types, the number of 596 // bits may not exactly match. 597 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 598 // Copies are a mean to copy bits around, as long as we are 599 // on the same register class, that's fine. Otherwise, that 600 // means we need some SUBREG_TO_REG or AND & co. 601 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 602 "Copy with different width?!"); 603 604 // Check the size of the destination. 605 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 606 "GPRs cannot get more than 64-bit width values"); 607 608 return true; 609} 610#endif 611 612/// Helper function for selectCopy. Inserts a subregister copy from 613/// \p *From to \p *To, linking it up to \p I. 614/// 615/// e.g, given I = "Dst = COPY SrcReg", we'll transform that into 616/// 617/// CopyReg (From class) = COPY SrcReg 618/// SubRegCopy (To class) = COPY CopyReg:SubReg 619/// Dst = COPY SubRegCopy 620static bool selectSubregisterCopy(MachineInstr &I, MachineRegisterInfo &MRI, 621 const RegisterBankInfo &RBI, Register SrcReg, 622 const TargetRegisterClass *From, 623 const TargetRegisterClass *To, 624 unsigned SubReg) { 625 MachineIRBuilder MIB(I); 626 auto Copy = MIB.buildCopy({From}, {SrcReg}); 627 auto SubRegCopy = MIB.buildInstr(TargetOpcode::COPY, {To}, {}) 628 .addReg(Copy.getReg(0), 0, SubReg); 629 MachineOperand &RegOp = I.getOperand(1); 630 RegOp.setReg(SubRegCopy.getReg(0)); 631 632 // It's possible that the destination register won't be constrained. Make 633 // sure that happens. 634 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 635 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 636 637 return true; 638} 639 640/// Helper function to get the source and destination register classes for a 641/// copy. Returns a std::pair containing the source register class for the 642/// copy, and the destination register class for the copy. If a register class 643/// cannot be determined, then it will be nullptr. 644static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 645getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 646 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 647 const RegisterBankInfo &RBI) { 648 Register DstReg = I.getOperand(0).getReg(); 649 Register SrcReg = I.getOperand(1).getReg(); 650 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 651 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 652 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 653 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 654 655 // Special casing for cross-bank copies of s1s. We can technically represent 656 // a 1-bit value with any size of register. The minimum size for a GPR is 32 657 // bits. So, we need to put the FPR on 32 bits as well. 658 // 659 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 660 // then we can pull it into the helpers that get the appropriate class for a 661 // register bank. Or make a new helper that carries along some constraint 662 // information. 663 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 664 SrcSize = DstSize = 32; 665 666 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 667 getMinClassForRegBank(DstRegBank, DstSize, true)}; 668} 669 670static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 671 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 672 const RegisterBankInfo &RBI) { 673 674 Register DstReg = I.getOperand(0).getReg(); 675 Register SrcReg = I.getOperand(1).getReg(); 676 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 677 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 678 679 // Find the correct register classes for the source and destination registers. 680 const TargetRegisterClass *SrcRC; 681 const TargetRegisterClass *DstRC; 682 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 683 684 if (!DstRC) { 685 LLVM_DEBUG(dbgs() << "Unexpected dest size " 686 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 687 return false; 688 } 689 690 // A couple helpers below, for making sure that the copy we produce is valid. 691 692 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 693 // to verify that the src and dst are the same size, since that's handled by 694 // the SUBREG_TO_REG. 695 bool KnownValid = false; 696 697 // Returns true, or asserts if something we don't expect happens. Instead of 698 // returning true, we return isValidCopy() to ensure that we verify the 699 // result. 700 auto CheckCopy = [&]() { 701 // If we have a bitcast or something, we can't have physical registers. 702 assert((I.isCopy() || 703 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 704 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 705 "No phys reg on generic operator!"); 706 assert(KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI)); 707 (void)KnownValid; 708 return true; 709 }; 710 711 // Is this a copy? If so, then we may need to insert a subregister copy, or 712 // a SUBREG_TO_REG. 713 if (I.isCopy()) { 714 // Yes. Check if there's anything to fix up. 715 if (!SrcRC) { 716 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 717 return false; 718 } 719 720 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 721 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 722 723 // If we're doing a cross-bank copy on different-sized registers, we need 724 // to do a bit more work. 725 if (SrcSize > DstSize) { 726 // We're doing a cross-bank copy into a smaller register. We need a 727 // subregister copy. First, get a register class that's on the same bank 728 // as the destination, but the same size as the source. 729 const TargetRegisterClass *SubregRC = 730 getMinClassForRegBank(DstRegBank, SrcSize, true); 731 assert(SubregRC && "Didn't get a register class for subreg?"); 732 733 // Get the appropriate subregister for the destination. 734 unsigned SubReg = 0; 735 if (!getSubRegForClass(DstRC, TRI, SubReg)) { 736 LLVM_DEBUG(dbgs() << "Couldn't determine subregister for copy.\n"); 737 return false; 738 } 739 740 // Now, insert a subregister copy using the new register class. 741 selectSubregisterCopy(I, MRI, RBI, SrcReg, SubregRC, DstRC, SubReg); 742 return CheckCopy(); 743 } 744 745 // Is this a cross-bank copy? 746 if (DstRegBank.getID() != SrcRegBank.getID()) { 747 if (DstRegBank.getID() == AArch64::GPRRegBankID && DstSize == 32 && 748 SrcSize == 16) { 749 // Special case for FPR16 to GPR32. 750 // FIXME: This can probably be generalized like the above case. 751 Register PromoteReg = 752 MRI.createVirtualRegister(&AArch64::FPR32RegClass); 753 BuildMI(*I.getParent(), I, I.getDebugLoc(), 754 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 755 .addImm(0) 756 .addUse(SrcReg) 757 .addImm(AArch64::hsub); 758 MachineOperand &RegOp = I.getOperand(1); 759 RegOp.setReg(PromoteReg); 760 761 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 762 KnownValid = true; 763 } 764 } 765 766 // If the destination is a physical register, then there's nothing to 767 // change, so we're done. 768 if (Register::isPhysicalRegister(DstReg)) 769 return CheckCopy(); 770 } 771 772 // No need to constrain SrcReg. It will get constrained when we hit another 773 // of its use or its defs. Copies do not have constraints. 774 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 775 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 776 << " operand\n"); 777 return false; 778 } 779 I.setDesc(TII.get(AArch64::COPY)); 780 return CheckCopy(); 781} 782 783static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 784 if (!DstTy.isScalar() || !SrcTy.isScalar()) 785 return GenericOpc; 786 787 const unsigned DstSize = DstTy.getSizeInBits(); 788 const unsigned SrcSize = SrcTy.getSizeInBits(); 789 790 switch (DstSize) { 791 case 32: 792 switch (SrcSize) { 793 case 32: 794 switch (GenericOpc) { 795 case TargetOpcode::G_SITOFP: 796 return AArch64::SCVTFUWSri; 797 case TargetOpcode::G_UITOFP: 798 return AArch64::UCVTFUWSri; 799 case TargetOpcode::G_FPTOSI: 800 return AArch64::FCVTZSUWSr; 801 case TargetOpcode::G_FPTOUI: 802 return AArch64::FCVTZUUWSr; 803 default: 804 return GenericOpc; 805 } 806 case 64: 807 switch (GenericOpc) { 808 case TargetOpcode::G_SITOFP: 809 return AArch64::SCVTFUXSri; 810 case TargetOpcode::G_UITOFP: 811 return AArch64::UCVTFUXSri; 812 case TargetOpcode::G_FPTOSI: 813 return AArch64::FCVTZSUWDr; 814 case TargetOpcode::G_FPTOUI: 815 return AArch64::FCVTZUUWDr; 816 default: 817 return GenericOpc; 818 } 819 default: 820 return GenericOpc; 821 } 822 case 64: 823 switch (SrcSize) { 824 case 32: 825 switch (GenericOpc) { 826 case TargetOpcode::G_SITOFP: 827 return AArch64::SCVTFUWDri; 828 case TargetOpcode::G_UITOFP: 829 return AArch64::UCVTFUWDri; 830 case TargetOpcode::G_FPTOSI: 831 return AArch64::FCVTZSUXSr; 832 case TargetOpcode::G_FPTOUI: 833 return AArch64::FCVTZUUXSr; 834 default: 835 return GenericOpc; 836 } 837 case 64: 838 switch (GenericOpc) { 839 case TargetOpcode::G_SITOFP: 840 return AArch64::SCVTFUXDri; 841 case TargetOpcode::G_UITOFP: 842 return AArch64::UCVTFUXDri; 843 case TargetOpcode::G_FPTOSI: 844 return AArch64::FCVTZSUXDr; 845 case TargetOpcode::G_FPTOUI: 846 return AArch64::FCVTZUUXDr; 847 default: 848 return GenericOpc; 849 } 850 default: 851 return GenericOpc; 852 } 853 default: 854 return GenericOpc; 855 }; 856 return GenericOpc; 857} 858 859static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, 860 const RegisterBankInfo &RBI) { 861 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 862 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 863 AArch64::GPRRegBankID); 864 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 865 if (Ty == LLT::scalar(32)) 866 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; 867 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) 868 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; 869 return 0; 870} 871 872/// Helper function to select the opcode for a G_FCMP. 873static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { 874 // If this is a compare against +0.0, then we don't have to explicitly 875 // materialize a constant. 876 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); 877 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 878 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 879 if (OpSize != 32 && OpSize != 64) 880 return 0; 881 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 882 {AArch64::FCMPSri, AArch64::FCMPDri}}; 883 return CmpOpcTbl[ShouldUseImm][OpSize == 64]; 884} 885 886/// Returns true if \p P is an unsigned integer comparison predicate. 887static bool isUnsignedICMPPred(const CmpInst::Predicate P) { 888 switch (P) { 889 default: 890 return false; 891 case CmpInst::ICMP_UGT: 892 case CmpInst::ICMP_UGE: 893 case CmpInst::ICMP_ULT: 894 case CmpInst::ICMP_ULE: 895 return true; 896 } 897} 898 899static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 900 switch (P) { 901 default: 902 llvm_unreachable("Unknown condition code!"); 903 case CmpInst::ICMP_NE: 904 return AArch64CC::NE; 905 case CmpInst::ICMP_EQ: 906 return AArch64CC::EQ; 907 case CmpInst::ICMP_SGT: 908 return AArch64CC::GT; 909 case CmpInst::ICMP_SGE: 910 return AArch64CC::GE; 911 case CmpInst::ICMP_SLT: 912 return AArch64CC::LT; 913 case CmpInst::ICMP_SLE: 914 return AArch64CC::LE; 915 case CmpInst::ICMP_UGT: 916 return AArch64CC::HI; 917 case CmpInst::ICMP_UGE: 918 return AArch64CC::HS; 919 case CmpInst::ICMP_ULT: 920 return AArch64CC::LO; 921 case CmpInst::ICMP_ULE: 922 return AArch64CC::LS; 923 } 924} 925 926static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, 927 AArch64CC::CondCode &CondCode, 928 AArch64CC::CondCode &CondCode2) { 929 CondCode2 = AArch64CC::AL; 930 switch (P) { 931 default: 932 llvm_unreachable("Unknown FP condition!"); 933 case CmpInst::FCMP_OEQ: 934 CondCode = AArch64CC::EQ; 935 break; 936 case CmpInst::FCMP_OGT: 937 CondCode = AArch64CC::GT; 938 break; 939 case CmpInst::FCMP_OGE: 940 CondCode = AArch64CC::GE; 941 break; 942 case CmpInst::FCMP_OLT: 943 CondCode = AArch64CC::MI; 944 break; 945 case CmpInst::FCMP_OLE: 946 CondCode = AArch64CC::LS; 947 break; 948 case CmpInst::FCMP_ONE: 949 CondCode = AArch64CC::MI; 950 CondCode2 = AArch64CC::GT; 951 break; 952 case CmpInst::FCMP_ORD: 953 CondCode = AArch64CC::VC; 954 break; 955 case CmpInst::FCMP_UNO: 956 CondCode = AArch64CC::VS; 957 break; 958 case CmpInst::FCMP_UEQ: 959 CondCode = AArch64CC::EQ; 960 CondCode2 = AArch64CC::VS; 961 break; 962 case CmpInst::FCMP_UGT: 963 CondCode = AArch64CC::HI; 964 break; 965 case CmpInst::FCMP_UGE: 966 CondCode = AArch64CC::PL; 967 break; 968 case CmpInst::FCMP_ULT: 969 CondCode = AArch64CC::LT; 970 break; 971 case CmpInst::FCMP_ULE: 972 CondCode = AArch64CC::LE; 973 break; 974 case CmpInst::FCMP_UNE: 975 CondCode = AArch64CC::NE; 976 break; 977 } 978} 979 980bool AArch64InstructionSelector::selectCompareBranch( 981 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 982 983 const Register CondReg = I.getOperand(0).getReg(); 984 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 985 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 986 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) 987 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); 988 if (CCMI->getOpcode() != TargetOpcode::G_ICMP) 989 return false; 990 991 Register LHS = CCMI->getOperand(2).getReg(); 992 Register RHS = CCMI->getOperand(3).getReg(); 993 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 994 if (!VRegAndVal) 995 std::swap(RHS, LHS); 996 997 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 998 if (!VRegAndVal || VRegAndVal->Value != 0) { 999 MachineIRBuilder MIB(I); 1000 // If we can't select a CBZ then emit a cmp + Bcc. 1001 if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3), 1002 CCMI->getOperand(1), MIB)) 1003 return false; 1004 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1005 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate()); 1006 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1007 I.eraseFromParent(); 1008 return true; 1009 } 1010 1011 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); 1012 if (RB.getID() != AArch64::GPRRegBankID) 1013 return false; 1014 1015 const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); 1016 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) 1017 return false; 1018 1019 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); 1020 unsigned CBOpc = 0; 1021 if (CmpWidth <= 32) 1022 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); 1023 else if (CmpWidth == 64) 1024 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); 1025 else 1026 return false; 1027 1028 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) 1029 .addUse(LHS) 1030 .addMBB(DestMBB) 1031 .constrainAllUses(TII, TRI, RBI); 1032 1033 I.eraseFromParent(); 1034 return true; 1035} 1036 1037/// Returns the element immediate value of a vector shift operand if found. 1038/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1039static Optional<int64_t> getVectorShiftImm(Register Reg, 1040 MachineRegisterInfo &MRI) { 1041 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1042 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1043 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1044 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) 1045 return None; 1046 1047 // Check all operands are identical immediates. 1048 int64_t ImmVal = 0; 1049 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { 1050 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); 1051 if (!VRegAndVal) 1052 return None; 1053 1054 if (Idx == 1) 1055 ImmVal = VRegAndVal->Value; 1056 if (ImmVal != VRegAndVal->Value) 1057 return None; 1058 } 1059 1060 return ImmVal; 1061} 1062 1063/// Matches and returns the shift immediate value for a SHL instruction given 1064/// a shift operand. 1065static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1066 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1067 if (!ShiftImm) 1068 return None; 1069 // Check the immediate is in range for a SHL. 1070 int64_t Imm = *ShiftImm; 1071 if (Imm < 0) 1072 return None; 1073 switch (SrcTy.getElementType().getSizeInBits()) { 1074 default: 1075 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1076 return None; 1077 case 8: 1078 if (Imm > 7) 1079 return None; 1080 break; 1081 case 16: 1082 if (Imm > 15) 1083 return None; 1084 break; 1085 case 32: 1086 if (Imm > 31) 1087 return None; 1088 break; 1089 case 64: 1090 if (Imm > 63) 1091 return None; 1092 break; 1093 } 1094 return Imm; 1095} 1096 1097bool AArch64InstructionSelector::selectVectorSHL( 1098 MachineInstr &I, MachineRegisterInfo &MRI) const { 1099 assert(I.getOpcode() == TargetOpcode::G_SHL); 1100 Register DstReg = I.getOperand(0).getReg(); 1101 const LLT Ty = MRI.getType(DstReg); 1102 Register Src1Reg = I.getOperand(1).getReg(); 1103 Register Src2Reg = I.getOperand(2).getReg(); 1104 1105 if (!Ty.isVector()) 1106 return false; 1107 1108 // Check if we have a vector of constants on RHS that we can select as the 1109 // immediate form. 1110 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1111 1112 unsigned Opc = 0; 1113 if (Ty == LLT::vector(2, 64)) { 1114 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1115 } else if (Ty == LLT::vector(4, 32)) { 1116 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1117 } else if (Ty == LLT::vector(2, 32)) { 1118 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1119 } else { 1120 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1121 return false; 1122 } 1123 1124 MachineIRBuilder MIB(I); 1125 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1126 if (ImmVal) 1127 Shl.addImm(*ImmVal); 1128 else 1129 Shl.addUse(Src2Reg); 1130 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1131 I.eraseFromParent(); 1132 return true; 1133} 1134 1135bool AArch64InstructionSelector::selectVectorASHR( 1136 MachineInstr &I, MachineRegisterInfo &MRI) const { 1137 assert(I.getOpcode() == TargetOpcode::G_ASHR); 1138 Register DstReg = I.getOperand(0).getReg(); 1139 const LLT Ty = MRI.getType(DstReg); 1140 Register Src1Reg = I.getOperand(1).getReg(); 1141 Register Src2Reg = I.getOperand(2).getReg(); 1142 1143 if (!Ty.isVector()) 1144 return false; 1145 1146 // There is not a shift right register instruction, but the shift left 1147 // register instruction takes a signed value, where negative numbers specify a 1148 // right shift. 1149 1150 unsigned Opc = 0; 1151 unsigned NegOpc = 0; 1152 const TargetRegisterClass *RC = nullptr; 1153 if (Ty == LLT::vector(2, 64)) { 1154 Opc = AArch64::SSHLv2i64; 1155 NegOpc = AArch64::NEGv2i64; 1156 RC = &AArch64::FPR128RegClass; 1157 } else if (Ty == LLT::vector(4, 32)) { 1158 Opc = AArch64::SSHLv4i32; 1159 NegOpc = AArch64::NEGv4i32; 1160 RC = &AArch64::FPR128RegClass; 1161 } else if (Ty == LLT::vector(2, 32)) { 1162 Opc = AArch64::SSHLv2i32; 1163 NegOpc = AArch64::NEGv2i32; 1164 RC = &AArch64::FPR64RegClass; 1165 } else { 1166 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1167 return false; 1168 } 1169 1170 MachineIRBuilder MIB(I); 1171 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1172 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1173 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1174 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1175 I.eraseFromParent(); 1176 return true; 1177} 1178 1179bool AArch64InstructionSelector::selectVaStartAAPCS( 1180 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1181 return false; 1182} 1183 1184bool AArch64InstructionSelector::selectVaStartDarwin( 1185 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1186 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1187 Register ListReg = I.getOperand(0).getReg(); 1188 1189 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1190 1191 auto MIB = 1192 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1193 .addDef(ArgsAddrReg) 1194 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1195 .addImm(0) 1196 .addImm(0); 1197 1198 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1199 1200 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1201 .addUse(ArgsAddrReg) 1202 .addUse(ListReg) 1203 .addImm(0) 1204 .addMemOperand(*I.memoperands_begin()); 1205 1206 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1207 I.eraseFromParent(); 1208 return true; 1209} 1210 1211void AArch64InstructionSelector::materializeLargeCMVal( 1212 MachineInstr &I, const Value *V, unsigned OpFlags) const { 1213 MachineBasicBlock &MBB = *I.getParent(); 1214 MachineFunction &MF = *MBB.getParent(); 1215 MachineRegisterInfo &MRI = MF.getRegInfo(); 1216 MachineIRBuilder MIB(I); 1217 1218 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1219 MovZ->addOperand(MF, I.getOperand(1)); 1220 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1221 AArch64II::MO_NC); 1222 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1223 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1224 1225 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1226 Register ForceDstReg) { 1227 Register DstReg = ForceDstReg 1228 ? ForceDstReg 1229 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1230 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1231 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1232 MovI->addOperand(MF, MachineOperand::CreateGA( 1233 GV, MovZ->getOperand(1).getOffset(), Flags)); 1234 } else { 1235 MovI->addOperand( 1236 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1237 MovZ->getOperand(1).getOffset(), Flags)); 1238 } 1239 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1240 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1241 return DstReg; 1242 }; 1243 Register DstReg = BuildMovK(MovZ.getReg(0), 1244 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1245 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1246 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1247 return; 1248} 1249 1250void AArch64InstructionSelector::preISelLower(MachineInstr &I) const { 1251 MachineBasicBlock &MBB = *I.getParent(); 1252 MachineFunction &MF = *MBB.getParent(); 1253 MachineRegisterInfo &MRI = MF.getRegInfo(); 1254 1255 switch (I.getOpcode()) { 1256 case TargetOpcode::G_SHL: 1257 case TargetOpcode::G_ASHR: 1258 case TargetOpcode::G_LSHR: { 1259 // These shifts are legalized to have 64 bit shift amounts because we want 1260 // to take advantage of the existing imported selection patterns that assume 1261 // the immediates are s64s. However, if the shifted type is 32 bits and for 1262 // some reason we receive input GMIR that has an s64 shift amount that's not 1263 // a G_CONSTANT, insert a truncate so that we can still select the s32 1264 // register-register variant. 1265 Register SrcReg = I.getOperand(1).getReg(); 1266 Register ShiftReg = I.getOperand(2).getReg(); 1267 const LLT ShiftTy = MRI.getType(ShiftReg); 1268 const LLT SrcTy = MRI.getType(SrcReg); 1269 if (SrcTy.isVector()) 1270 return; 1271 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1272 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1273 return; 1274 auto *AmtMI = MRI.getVRegDef(ShiftReg); 1275 assert(AmtMI && "could not find a vreg definition for shift amount"); 1276 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1277 // Insert a subregister copy to implement a 64->32 trunc 1278 MachineIRBuilder MIB(I); 1279 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1280 .addReg(ShiftReg, 0, AArch64::sub_32); 1281 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1282 I.getOperand(2).setReg(Trunc.getReg(0)); 1283 } 1284 return; 1285 } 1286 case TargetOpcode::G_STORE: 1287 contractCrossBankCopyIntoStore(I, MRI); 1288 return; 1289 default: 1290 return; 1291 } 1292} 1293 1294bool AArch64InstructionSelector::earlySelectSHL( 1295 MachineInstr &I, MachineRegisterInfo &MRI) const { 1296 // We try to match the immediate variant of LSL, which is actually an alias 1297 // for a special case of UBFM. Otherwise, we fall back to the imported 1298 // selector which will match the register variant. 1299 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 1300 const auto &MO = I.getOperand(2); 1301 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 1302 if (!VRegAndVal) 1303 return false; 1304 1305 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1306 if (DstTy.isVector()) 1307 return false; 1308 bool Is64Bit = DstTy.getSizeInBits() == 64; 1309 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 1310 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 1311 MachineIRBuilder MIB(I); 1312 1313 if (!Imm1Fn || !Imm2Fn) 1314 return false; 1315 1316 auto NewI = 1317 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 1318 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 1319 1320 for (auto &RenderFn : *Imm1Fn) 1321 RenderFn(NewI); 1322 for (auto &RenderFn : *Imm2Fn) 1323 RenderFn(NewI); 1324 1325 I.eraseFromParent(); 1326 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 1327} 1328 1329void AArch64InstructionSelector::contractCrossBankCopyIntoStore( 1330 MachineInstr &I, MachineRegisterInfo &MRI) const { 1331 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 1332 // If we're storing a scalar, it doesn't matter what register bank that 1333 // scalar is on. All that matters is the size. 1334 // 1335 // So, if we see something like this (with a 32-bit scalar as an example): 1336 // 1337 // %x:gpr(s32) = ... something ... 1338 // %y:fpr(s32) = COPY %x:gpr(s32) 1339 // G_STORE %y:fpr(s32) 1340 // 1341 // We can fix this up into something like this: 1342 // 1343 // G_STORE %x:gpr(s32) 1344 // 1345 // And then continue the selection process normally. 1346 MachineInstr *Def = getDefIgnoringCopies(I.getOperand(0).getReg(), MRI); 1347 if (!Def) 1348 return; 1349 Register DefDstReg = Def->getOperand(0).getReg(); 1350 LLT DefDstTy = MRI.getType(DefDstReg); 1351 Register StoreSrcReg = I.getOperand(0).getReg(); 1352 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 1353 1354 // If we get something strange like a physical register, then we shouldn't 1355 // go any further. 1356 if (!DefDstTy.isValid()) 1357 return; 1358 1359 // Are the source and dst types the same size? 1360 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 1361 return; 1362 1363 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 1364 RBI.getRegBank(DefDstReg, MRI, TRI)) 1365 return; 1366 1367 // We have a cross-bank copy, which is entering a store. Let's fold it. 1368 I.getOperand(0).setReg(DefDstReg); 1369} 1370 1371bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { 1372 assert(I.getParent() && "Instruction should be in a basic block!"); 1373 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1374 1375 MachineBasicBlock &MBB = *I.getParent(); 1376 MachineFunction &MF = *MBB.getParent(); 1377 MachineRegisterInfo &MRI = MF.getRegInfo(); 1378 1379 switch (I.getOpcode()) { 1380 case TargetOpcode::G_SHL: 1381 return earlySelectSHL(I, MRI); 1382 case TargetOpcode::G_CONSTANT: { 1383 bool IsZero = false; 1384 if (I.getOperand(1).isCImm()) 1385 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 1386 else if (I.getOperand(1).isImm()) 1387 IsZero = I.getOperand(1).getImm() == 0; 1388 1389 if (!IsZero) 1390 return false; 1391 1392 Register DefReg = I.getOperand(0).getReg(); 1393 LLT Ty = MRI.getType(DefReg); 1394 if (Ty != LLT::scalar(64) && Ty != LLT::scalar(32)) 1395 return false; 1396 1397 if (Ty == LLT::scalar(64)) { 1398 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 1399 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 1400 } else { 1401 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 1402 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 1403 } 1404 I.setDesc(TII.get(TargetOpcode::COPY)); 1405 return true; 1406 } 1407 default: 1408 return false; 1409 } 1410} 1411 1412bool AArch64InstructionSelector::select(MachineInstr &I) { 1413 assert(I.getParent() && "Instruction should be in a basic block!"); 1414 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1415 1416 MachineBasicBlock &MBB = *I.getParent(); 1417 MachineFunction &MF = *MBB.getParent(); 1418 MachineRegisterInfo &MRI = MF.getRegInfo(); 1419 1420 unsigned Opcode = I.getOpcode(); 1421 // G_PHI requires same handling as PHI 1422 if (!isPreISelGenericOpcode(Opcode) || Opcode == TargetOpcode::G_PHI) { 1423 // Certain non-generic instructions also need some special handling. 1424 1425 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 1426 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1427 1428 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 1429 const Register DefReg = I.getOperand(0).getReg(); 1430 const LLT DefTy = MRI.getType(DefReg); 1431 1432 const RegClassOrRegBank &RegClassOrBank = 1433 MRI.getRegClassOrRegBank(DefReg); 1434 1435 const TargetRegisterClass *DefRC 1436 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 1437 if (!DefRC) { 1438 if (!DefTy.isValid()) { 1439 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 1440 return false; 1441 } 1442 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 1443 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 1444 if (!DefRC) { 1445 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 1446 return false; 1447 } 1448 } 1449 1450 I.setDesc(TII.get(TargetOpcode::PHI)); 1451 1452 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 1453 } 1454 1455 if (I.isCopy()) 1456 return selectCopy(I, TII, MRI, TRI, RBI); 1457 1458 return true; 1459 } 1460 1461 1462 if (I.getNumOperands() != I.getNumExplicitOperands()) { 1463 LLVM_DEBUG( 1464 dbgs() << "Generic instruction has unexpected implicit operands\n"); 1465 return false; 1466 } 1467 1468 // Try to do some lowering before we start instruction selecting. These 1469 // lowerings are purely transformations on the input G_MIR and so selection 1470 // must continue after any modification of the instruction. 1471 preISelLower(I); 1472 1473 // There may be patterns where the importer can't deal with them optimally, 1474 // but does select it to a suboptimal sequence so our custom C++ selection 1475 // code later never has a chance to work on it. Therefore, we have an early 1476 // selection attempt here to give priority to certain selection routines 1477 // over the imported ones. 1478 if (earlySelect(I)) 1479 return true; 1480 1481 if (selectImpl(I, *CoverageInfo)) 1482 return true; 1483 1484 LLT Ty = 1485 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 1486 1487 MachineIRBuilder MIB(I); 1488 1489 switch (Opcode) { 1490 case TargetOpcode::G_BRCOND: { 1491 if (Ty.getSizeInBits() > 32) { 1492 // We shouldn't need this on AArch64, but it would be implemented as an 1493 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the 1494 // bit being tested is < 32. 1495 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty 1496 << ", expected at most 32-bits"); 1497 return false; 1498 } 1499 1500 const Register CondReg = I.getOperand(0).getReg(); 1501 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1502 1503 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1504 // instructions will not be produced, as they are conditional branch 1505 // instructions that do not set flags. 1506 bool ProduceNonFlagSettingCondBr = 1507 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 1508 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) 1509 return true; 1510 1511 if (ProduceNonFlagSettingCondBr) { 1512 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) 1513 .addUse(CondReg) 1514 .addImm(/*bit offset=*/0) 1515 .addMBB(DestMBB); 1516 1517 I.eraseFromParent(); 1518 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); 1519 } else { 1520 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 1521 .addDef(AArch64::WZR) 1522 .addUse(CondReg) 1523 .addImm(1); 1524 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); 1525 auto Bcc = 1526 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) 1527 .addImm(AArch64CC::EQ) 1528 .addMBB(DestMBB); 1529 1530 I.eraseFromParent(); 1531 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); 1532 } 1533 } 1534 1535 case TargetOpcode::G_BRINDIRECT: { 1536 I.setDesc(TII.get(AArch64::BR)); 1537 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1538 } 1539 1540 case TargetOpcode::G_BRJT: 1541 return selectBrJT(I, MRI); 1542 1543 case TargetOpcode::G_BSWAP: { 1544 // Handle vector types for G_BSWAP directly. 1545 Register DstReg = I.getOperand(0).getReg(); 1546 LLT DstTy = MRI.getType(DstReg); 1547 1548 // We should only get vector types here; everything else is handled by the 1549 // importer right now. 1550 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 1551 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 1552 return false; 1553 } 1554 1555 // Only handle 4 and 2 element vectors for now. 1556 // TODO: 16-bit elements. 1557 unsigned NumElts = DstTy.getNumElements(); 1558 if (NumElts != 4 && NumElts != 2) { 1559 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 1560 return false; 1561 } 1562 1563 // Choose the correct opcode for the supported types. Right now, that's 1564 // v2s32, v4s32, and v2s64. 1565 unsigned Opc = 0; 1566 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 1567 if (EltSize == 32) 1568 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 1569 : AArch64::REV32v16i8; 1570 else if (EltSize == 64) 1571 Opc = AArch64::REV64v16i8; 1572 1573 // We should always get something by the time we get here... 1574 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 1575 1576 I.setDesc(TII.get(Opc)); 1577 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1578 } 1579 1580 case TargetOpcode::G_FCONSTANT: 1581 case TargetOpcode::G_CONSTANT: { 1582 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 1583 1584 const LLT s8 = LLT::scalar(8); 1585 const LLT s16 = LLT::scalar(16); 1586 const LLT s32 = LLT::scalar(32); 1587 const LLT s64 = LLT::scalar(64); 1588 const LLT p0 = LLT::pointer(0, 64); 1589 1590 const Register DefReg = I.getOperand(0).getReg(); 1591 const LLT DefTy = MRI.getType(DefReg); 1592 const unsigned DefSize = DefTy.getSizeInBits(); 1593 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1594 1595 // FIXME: Redundant check, but even less readable when factored out. 1596 if (isFP) { 1597 if (Ty != s32 && Ty != s64) { 1598 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 1599 << " constant, expected: " << s32 << " or " << s64 1600 << '\n'); 1601 return false; 1602 } 1603 1604 if (RB.getID() != AArch64::FPRRegBankID) { 1605 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 1606 << " constant on bank: " << RB 1607 << ", expected: FPR\n"); 1608 return false; 1609 } 1610 1611 // The case when we have 0.0 is covered by tablegen. Reject it here so we 1612 // can be sure tablegen works correctly and isn't rescued by this code. 1613 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) 1614 return false; 1615 } else { 1616 // s32 and s64 are covered by tablegen. 1617 if (Ty != p0 && Ty != s8 && Ty != s16) { 1618 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 1619 << " constant, expected: " << s32 << ", " << s64 1620 << ", or " << p0 << '\n'); 1621 return false; 1622 } 1623 1624 if (RB.getID() != AArch64::GPRRegBankID) { 1625 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 1626 << " constant on bank: " << RB 1627 << ", expected: GPR\n"); 1628 return false; 1629 } 1630 } 1631 1632 // We allow G_CONSTANT of types < 32b. 1633 const unsigned MovOpc = 1634 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 1635 1636 if (isFP) { 1637 // Either emit a FMOV, or emit a copy to emit a normal mov. 1638 const TargetRegisterClass &GPRRC = 1639 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 1640 const TargetRegisterClass &FPRRC = 1641 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass; 1642 1643 // Can we use a FMOV instruction to represent the immediate? 1644 if (emitFMovForFConstant(I, MRI)) 1645 return true; 1646 1647 // Nope. Emit a copy and use a normal mov instead. 1648 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 1649 MachineOperand &RegOp = I.getOperand(0); 1650 RegOp.setReg(DefGPRReg); 1651 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1652 MIB.buildCopy({DefReg}, {DefGPRReg}); 1653 1654 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 1655 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 1656 return false; 1657 } 1658 1659 MachineOperand &ImmOp = I.getOperand(1); 1660 // FIXME: Is going through int64_t always correct? 1661 ImmOp.ChangeToImmediate( 1662 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 1663 } else if (I.getOperand(1).isCImm()) { 1664 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 1665 I.getOperand(1).ChangeToImmediate(Val); 1666 } else if (I.getOperand(1).isImm()) { 1667 uint64_t Val = I.getOperand(1).getImm(); 1668 I.getOperand(1).ChangeToImmediate(Val); 1669 } 1670 1671 I.setDesc(TII.get(MovOpc)); 1672 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1673 return true; 1674 } 1675 case TargetOpcode::G_EXTRACT: { 1676 Register DstReg = I.getOperand(0).getReg(); 1677 Register SrcReg = I.getOperand(1).getReg(); 1678 LLT SrcTy = MRI.getType(SrcReg); 1679 LLT DstTy = MRI.getType(DstReg); 1680 (void)DstTy; 1681 unsigned SrcSize = SrcTy.getSizeInBits(); 1682 1683 if (SrcTy.getSizeInBits() > 64) { 1684 // This should be an extract of an s128, which is like a vector extract. 1685 if (SrcTy.getSizeInBits() != 128) 1686 return false; 1687 // Only support extracting 64 bits from an s128 at the moment. 1688 if (DstTy.getSizeInBits() != 64) 1689 return false; 1690 1691 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 1692 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 1693 // Check we have the right regbank always. 1694 assert(SrcRB.getID() == AArch64::FPRRegBankID && 1695 DstRB.getID() == AArch64::FPRRegBankID && 1696 "Wrong extract regbank!"); 1697 (void)SrcRB; 1698 1699 // Emit the same code as a vector extract. 1700 // Offset must be a multiple of 64. 1701 unsigned Offset = I.getOperand(2).getImm(); 1702 if (Offset % 64 != 0) 1703 return false; 1704 unsigned LaneIdx = Offset / 64; 1705 MachineIRBuilder MIB(I); 1706 MachineInstr *Extract = emitExtractVectorElt( 1707 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 1708 if (!Extract) 1709 return false; 1710 I.eraseFromParent(); 1711 return true; 1712 } 1713 1714 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 1715 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 1716 Ty.getSizeInBits() - 1); 1717 1718 if (SrcSize < 64) { 1719 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 1720 "unexpected G_EXTRACT types"); 1721 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1722 } 1723 1724 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 1725 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1726 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 1727 .addReg(DstReg, 0, AArch64::sub_32); 1728 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 1729 AArch64::GPR32RegClass, MRI); 1730 I.getOperand(0).setReg(DstReg); 1731 1732 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1733 } 1734 1735 case TargetOpcode::G_INSERT: { 1736 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 1737 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1738 unsigned DstSize = DstTy.getSizeInBits(); 1739 // Larger inserts are vectors, same-size ones should be something else by 1740 // now (split up or turned into COPYs). 1741 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 1742 return false; 1743 1744 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 1745 unsigned LSB = I.getOperand(3).getImm(); 1746 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 1747 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 1748 MachineInstrBuilder(MF, I).addImm(Width - 1); 1749 1750 if (DstSize < 64) { 1751 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 1752 "unexpected G_INSERT types"); 1753 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1754 } 1755 1756 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 1757 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 1758 TII.get(AArch64::SUBREG_TO_REG)) 1759 .addDef(SrcReg) 1760 .addImm(0) 1761 .addUse(I.getOperand(2).getReg()) 1762 .addImm(AArch64::sub_32); 1763 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 1764 AArch64::GPR32RegClass, MRI); 1765 I.getOperand(2).setReg(SrcReg); 1766 1767 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1768 } 1769 case TargetOpcode::G_FRAME_INDEX: { 1770 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 1771 if (Ty != LLT::pointer(0, 64)) { 1772 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 1773 << ", expected: " << LLT::pointer(0, 64) << '\n'); 1774 return false; 1775 } 1776 I.setDesc(TII.get(AArch64::ADDXri)); 1777 1778 // MOs for a #0 shifted immediate. 1779 I.addOperand(MachineOperand::CreateImm(0)); 1780 I.addOperand(MachineOperand::CreateImm(0)); 1781 1782 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1783 } 1784 1785 case TargetOpcode::G_GLOBAL_VALUE: { 1786 auto GV = I.getOperand(1).getGlobal(); 1787 if (GV->isThreadLocal()) 1788 return selectTLSGlobalValue(I, MRI); 1789 1790 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 1791 if (OpFlags & AArch64II::MO_GOT) { 1792 I.setDesc(TII.get(AArch64::LOADgot)); 1793 I.getOperand(1).setTargetFlags(OpFlags); 1794 } else if (TM.getCodeModel() == CodeModel::Large) { 1795 // Materialize the global using movz/movk instructions. 1796 materializeLargeCMVal(I, GV, OpFlags); 1797 I.eraseFromParent(); 1798 return true; 1799 } else if (TM.getCodeModel() == CodeModel::Tiny) { 1800 I.setDesc(TII.get(AArch64::ADR)); 1801 I.getOperand(1).setTargetFlags(OpFlags); 1802 } else { 1803 I.setDesc(TII.get(AArch64::MOVaddr)); 1804 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 1805 MachineInstrBuilder MIB(MF, I); 1806 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 1807 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1808 } 1809 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1810 } 1811 1812 case TargetOpcode::G_ZEXTLOAD: 1813 case TargetOpcode::G_LOAD: 1814 case TargetOpcode::G_STORE: { 1815 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 1816 MachineIRBuilder MIB(I); 1817 1818 LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 1819 1820 if (PtrTy != LLT::pointer(0, 64)) { 1821 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 1822 << ", expected: " << LLT::pointer(0, 64) << '\n'); 1823 return false; 1824 } 1825 1826 auto &MemOp = **I.memoperands_begin(); 1827 if (MemOp.isAtomic()) { 1828 // For now we just support s8 acquire loads to be able to compile stack 1829 // protector code. 1830 if (MemOp.getOrdering() == AtomicOrdering::Acquire && 1831 MemOp.getSize() == 1) { 1832 I.setDesc(TII.get(AArch64::LDARB)); 1833 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1834 } 1835 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); 1836 return false; 1837 } 1838 unsigned MemSizeInBits = MemOp.getSize() * 8; 1839 1840 const Register PtrReg = I.getOperand(1).getReg(); 1841#ifndef NDEBUG 1842 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 1843 // Sanity-check the pointer register. 1844 assert(PtrRB.getID() == AArch64::GPRRegBankID && 1845 "Load/Store pointer operand isn't a GPR"); 1846 assert(MRI.getType(PtrReg).isPointer() && 1847 "Load/Store pointer operand isn't a pointer"); 1848#endif 1849 1850 const Register ValReg = I.getOperand(0).getReg(); 1851 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 1852 1853 const unsigned NewOpc = 1854 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 1855 if (NewOpc == I.getOpcode()) 1856 return false; 1857 1858 I.setDesc(TII.get(NewOpc)); 1859 1860 uint64_t Offset = 0; 1861 auto *PtrMI = MRI.getVRegDef(PtrReg); 1862 1863 // Try to fold a GEP into our unsigned immediate addressing mode. 1864 if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { 1865 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { 1866 int64_t Imm = *COff; 1867 const unsigned Size = MemSizeInBits / 8; 1868 const unsigned Scale = Log2_32(Size); 1869 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { 1870 Register Ptr2Reg = PtrMI->getOperand(1).getReg(); 1871 I.getOperand(1).setReg(Ptr2Reg); 1872 PtrMI = MRI.getVRegDef(Ptr2Reg); 1873 Offset = Imm / Size; 1874 } 1875 } 1876 } 1877 1878 // If we haven't folded anything into our addressing mode yet, try to fold 1879 // a frame index into the base+offset. 1880 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) 1881 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); 1882 1883 I.addOperand(MachineOperand::CreateImm(Offset)); 1884 1885 // If we're storing a 0, use WZR/XZR. 1886 if (auto CVal = getConstantVRegVal(ValReg, MRI)) { 1887 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) { 1888 if (I.getOpcode() == AArch64::STRWui) 1889 I.getOperand(0).setReg(AArch64::WZR); 1890 else if (I.getOpcode() == AArch64::STRXui) 1891 I.getOperand(0).setReg(AArch64::XZR); 1892 } 1893 } 1894 1895 if (IsZExtLoad) { 1896 // The zextload from a smaller type to i32 should be handled by the importer. 1897 if (MRI.getType(ValReg).getSizeInBits() != 64) 1898 return false; 1899 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 1900 //and zero_extend with SUBREG_TO_REG. 1901 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 1902 Register DstReg = I.getOperand(0).getReg(); 1903 I.getOperand(0).setReg(LdReg); 1904 1905 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 1906 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 1907 .addImm(0) 1908 .addUse(LdReg) 1909 .addImm(AArch64::sub_32); 1910 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1911 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 1912 MRI); 1913 } 1914 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1915 } 1916 1917 case TargetOpcode::G_SMULH: 1918 case TargetOpcode::G_UMULH: { 1919 // Reject the various things we don't support yet. 1920 if (unsupportedBinOp(I, RBI, MRI, TRI)) 1921 return false; 1922 1923 const Register DefReg = I.getOperand(0).getReg(); 1924 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1925 1926 if (RB.getID() != AArch64::GPRRegBankID) { 1927 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 1928 return false; 1929 } 1930 1931 if (Ty != LLT::scalar(64)) { 1932 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 1933 << ", expected: " << LLT::scalar(64) << '\n'); 1934 return false; 1935 } 1936 1937 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 1938 : AArch64::UMULHrr; 1939 I.setDesc(TII.get(NewOpc)); 1940 1941 // Now that we selected an opcode, we need to constrain the register 1942 // operands to use appropriate classes. 1943 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1944 } 1945 case TargetOpcode::G_FADD: 1946 case TargetOpcode::G_FSUB: 1947 case TargetOpcode::G_FMUL: 1948 case TargetOpcode::G_FDIV: 1949 1950 case TargetOpcode::G_ASHR: 1951 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 1952 return selectVectorASHR(I, MRI); 1953 LLVM_FALLTHROUGH; 1954 case TargetOpcode::G_SHL: 1955 if (Opcode == TargetOpcode::G_SHL && 1956 MRI.getType(I.getOperand(0).getReg()).isVector()) 1957 return selectVectorSHL(I, MRI); 1958 LLVM_FALLTHROUGH; 1959 case TargetOpcode::G_OR: 1960 case TargetOpcode::G_LSHR: { 1961 // Reject the various things we don't support yet. 1962 if (unsupportedBinOp(I, RBI, MRI, TRI)) 1963 return false; 1964 1965 const unsigned OpSize = Ty.getSizeInBits(); 1966 1967 const Register DefReg = I.getOperand(0).getReg(); 1968 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 1969 1970 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 1971 if (NewOpc == I.getOpcode()) 1972 return false; 1973 1974 I.setDesc(TII.get(NewOpc)); 1975 // FIXME: Should the type be always reset in setDesc? 1976 1977 // Now that we selected an opcode, we need to constrain the register 1978 // operands to use appropriate classes. 1979 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1980 } 1981 1982 case TargetOpcode::G_PTR_ADD: { 1983 MachineIRBuilder MIRBuilder(I); 1984 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), 1985 MIRBuilder); 1986 I.eraseFromParent(); 1987 return true; 1988 } 1989 case TargetOpcode::G_UADDO: { 1990 // TODO: Support other types. 1991 unsigned OpSize = Ty.getSizeInBits(); 1992 if (OpSize != 32 && OpSize != 64) { 1993 LLVM_DEBUG( 1994 dbgs() 1995 << "G_UADDO currently only supported for 32 and 64 b types.\n"); 1996 return false; 1997 } 1998 1999 // TODO: Support vectors. 2000 if (Ty.isVector()) { 2001 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); 2002 return false; 2003 } 2004 2005 // Add and set the set condition flag. 2006 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr; 2007 MachineIRBuilder MIRBuilder(I); 2008 auto AddsMI = MIRBuilder.buildInstr( 2009 AddsOpc, {I.getOperand(0).getReg()}, 2010 {I.getOperand(2).getReg(), I.getOperand(3).getReg()}); 2011 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI); 2012 2013 // Now, put the overflow result in the register given by the first operand 2014 // to the G_UADDO. CSINC increments the result when the predicate is false, 2015 // so to get the increment when it's true, we need to use the inverse. In 2016 // this case, we want to increment when carry is set. 2017 auto CsetMI = MIRBuilder 2018 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2019 {Register(AArch64::WZR), Register(AArch64::WZR)}) 2020 .addImm(getInvertedCondCode(AArch64CC::HS)); 2021 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2022 I.eraseFromParent(); 2023 return true; 2024 } 2025 2026 case TargetOpcode::G_PTR_MASK: { 2027 uint64_t Align = I.getOperand(2).getImm(); 2028 if (Align >= 64 || Align == 0) 2029 return false; 2030 2031 uint64_t Mask = ~((1ULL << Align) - 1); 2032 I.setDesc(TII.get(AArch64::ANDXri)); 2033 I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2034 2035 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2036 } 2037 case TargetOpcode::G_PTRTOINT: 2038 case TargetOpcode::G_TRUNC: { 2039 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2040 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2041 2042 const Register DstReg = I.getOperand(0).getReg(); 2043 const Register SrcReg = I.getOperand(1).getReg(); 2044 2045 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2046 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2047 2048 if (DstRB.getID() != SrcRB.getID()) { 2049 LLVM_DEBUG( 2050 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2051 return false; 2052 } 2053 2054 if (DstRB.getID() == AArch64::GPRRegBankID) { 2055 const TargetRegisterClass *DstRC = 2056 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2057 if (!DstRC) 2058 return false; 2059 2060 const TargetRegisterClass *SrcRC = 2061 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2062 if (!SrcRC) 2063 return false; 2064 2065 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2066 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2067 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2068 return false; 2069 } 2070 2071 if (DstRC == SrcRC) { 2072 // Nothing to be done 2073 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2074 SrcTy == LLT::scalar(64)) { 2075 llvm_unreachable("TableGen can import this case"); 2076 return false; 2077 } else if (DstRC == &AArch64::GPR32RegClass && 2078 SrcRC == &AArch64::GPR64RegClass) { 2079 I.getOperand(1).setSubReg(AArch64::sub_32); 2080 } else { 2081 LLVM_DEBUG( 2082 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2083 return false; 2084 } 2085 2086 I.setDesc(TII.get(TargetOpcode::COPY)); 2087 return true; 2088 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2089 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { 2090 I.setDesc(TII.get(AArch64::XTNv4i16)); 2091 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2092 return true; 2093 } 2094 2095 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2096 MachineIRBuilder MIB(I); 2097 MachineInstr *Extract = emitExtractVectorElt( 2098 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2099 if (!Extract) 2100 return false; 2101 I.eraseFromParent(); 2102 return true; 2103 } 2104 } 2105 2106 return false; 2107 } 2108 2109 case TargetOpcode::G_ANYEXT: { 2110 const Register DstReg = I.getOperand(0).getReg(); 2111 const Register SrcReg = I.getOperand(1).getReg(); 2112 2113 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 2114 if (RBDst.getID() != AArch64::GPRRegBankID) { 2115 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 2116 << ", expected: GPR\n"); 2117 return false; 2118 } 2119 2120 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 2121 if (RBSrc.getID() != AArch64::GPRRegBankID) { 2122 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 2123 << ", expected: GPR\n"); 2124 return false; 2125 } 2126 2127 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 2128 2129 if (DstSize == 0) { 2130 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 2131 return false; 2132 } 2133 2134 if (DstSize != 64 && DstSize > 32) { 2135 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 2136 << ", expected: 32 or 64\n"); 2137 return false; 2138 } 2139 // At this point G_ANYEXT is just like a plain COPY, but we need 2140 // to explicitly form the 64-bit value if any. 2141 if (DstSize > 32) { 2142 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 2143 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 2144 .addDef(ExtSrc) 2145 .addImm(0) 2146 .addUse(SrcReg) 2147 .addImm(AArch64::sub_32); 2148 I.getOperand(1).setReg(ExtSrc); 2149 } 2150 return selectCopy(I, TII, MRI, TRI, RBI); 2151 } 2152 2153 case TargetOpcode::G_ZEXT: 2154 case TargetOpcode::G_SEXT: { 2155 unsigned Opcode = I.getOpcode(); 2156 const bool IsSigned = Opcode == TargetOpcode::G_SEXT; 2157 const Register DefReg = I.getOperand(0).getReg(); 2158 const Register SrcReg = I.getOperand(1).getReg(); 2159 const LLT DstTy = MRI.getType(DefReg); 2160 const LLT SrcTy = MRI.getType(SrcReg); 2161 unsigned DstSize = DstTy.getSizeInBits(); 2162 unsigned SrcSize = SrcTy.getSizeInBits(); 2163 2164 if (DstTy.isVector()) 2165 return false; // Should be handled by imported patterns. 2166 2167 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 2168 AArch64::GPRRegBankID && 2169 "Unexpected ext regbank"); 2170 2171 MachineIRBuilder MIB(I); 2172 MachineInstr *ExtI; 2173 2174 // First check if we're extending the result of a load which has a dest type 2175 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 2176 // GPR register on AArch64 and all loads which are smaller automatically 2177 // zero-extend the upper bits. E.g. 2178 // %v(s8) = G_LOAD %p, :: (load 1) 2179 // %v2(s32) = G_ZEXT %v(s8) 2180 if (!IsSigned) { 2181 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 2182 if (LoadMI && 2183 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID) { 2184 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 2185 unsigned BytesLoaded = MemOp->getSize(); 2186 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 2187 return selectCopy(I, TII, MRI, TRI, RBI); 2188 } 2189 } 2190 2191 if (DstSize == 64) { 2192 // FIXME: Can we avoid manually doing this? 2193 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { 2194 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 2195 << " operand\n"); 2196 return false; 2197 } 2198 2199 auto SubregToReg = 2200 MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) 2201 .addImm(0) 2202 .addUse(SrcReg) 2203 .addImm(AArch64::sub_32); 2204 2205 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 2206 {DefReg}, {SubregToReg}) 2207 .addImm(0) 2208 .addImm(SrcSize - 1); 2209 } else if (DstSize <= 32) { 2210 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 2211 {DefReg}, {SrcReg}) 2212 .addImm(0) 2213 .addImm(SrcSize - 1); 2214 } else { 2215 return false; 2216 } 2217 2218 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 2219 I.eraseFromParent(); 2220 return true; 2221 } 2222 2223 case TargetOpcode::G_SITOFP: 2224 case TargetOpcode::G_UITOFP: 2225 case TargetOpcode::G_FPTOSI: 2226 case TargetOpcode::G_FPTOUI: { 2227 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 2228 SrcTy = MRI.getType(I.getOperand(1).getReg()); 2229 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 2230 if (NewOpc == Opcode) 2231 return false; 2232 2233 I.setDesc(TII.get(NewOpc)); 2234 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2235 2236 return true; 2237 } 2238 2239 2240 case TargetOpcode::G_INTTOPTR: 2241 // The importer is currently unable to import pointer types since they 2242 // didn't exist in SelectionDAG. 2243 return selectCopy(I, TII, MRI, TRI, RBI); 2244 2245 case TargetOpcode::G_BITCAST: 2246 // Imported SelectionDAG rules can handle every bitcast except those that 2247 // bitcast from a type to the same type. Ideally, these shouldn't occur 2248 // but we might not run an optimizer that deletes them. The other exception 2249 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 2250 // of them. 2251 return selectCopy(I, TII, MRI, TRI, RBI); 2252 2253 case TargetOpcode::G_SELECT: { 2254 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 2255 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 2256 << ", expected: " << LLT::scalar(1) << '\n'); 2257 return false; 2258 } 2259 2260 const Register CondReg = I.getOperand(1).getReg(); 2261 const Register TReg = I.getOperand(2).getReg(); 2262 const Register FReg = I.getOperand(3).getReg(); 2263 2264 if (tryOptSelect(I)) 2265 return true; 2266 2267 Register CSelOpc = selectSelectOpc(I, MRI, RBI); 2268 MachineInstr &TstMI = 2269 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 2270 .addDef(AArch64::WZR) 2271 .addUse(CondReg) 2272 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2273 2274 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc)) 2275 .addDef(I.getOperand(0).getReg()) 2276 .addUse(TReg) 2277 .addUse(FReg) 2278 .addImm(AArch64CC::NE); 2279 2280 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI); 2281 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI); 2282 2283 I.eraseFromParent(); 2284 return true; 2285 } 2286 case TargetOpcode::G_ICMP: { 2287 if (Ty.isVector()) 2288 return selectVectorICmp(I, MRI); 2289 2290 if (Ty != LLT::scalar(32)) { 2291 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 2292 << ", expected: " << LLT::scalar(32) << '\n'); 2293 return false; 2294 } 2295 2296 MachineIRBuilder MIRBuilder(I); 2297 if (!emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 2298 MIRBuilder)) 2299 return false; 2300 emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), 2301 MIRBuilder); 2302 I.eraseFromParent(); 2303 return true; 2304 } 2305 2306 case TargetOpcode::G_FCMP: { 2307 if (Ty != LLT::scalar(32)) { 2308 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty 2309 << ", expected: " << LLT::scalar(32) << '\n'); 2310 return false; 2311 } 2312 2313 unsigned CmpOpc = selectFCMPOpc(I, MRI); 2314 if (!CmpOpc) 2315 return false; 2316 2317 // FIXME: regbank 2318 2319 AArch64CC::CondCode CC1, CC2; 2320 changeFCMPPredToAArch64CC( 2321 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); 2322 2323 // Partially build the compare. Decide if we need to add a use for the 2324 // third operand based off whether or not we're comparing against 0.0. 2325 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) 2326 .addUse(I.getOperand(2).getReg()); 2327 2328 // If we don't have an immediate compare, then we need to add a use of the 2329 // register which wasn't used for the immediate. 2330 // Note that the immediate will always be the last operand. 2331 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 2332 CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); 2333 2334 const Register DefReg = I.getOperand(0).getReg(); 2335 Register Def1Reg = DefReg; 2336 if (CC2 != AArch64CC::AL) 2337 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2338 2339 MachineInstr &CSetMI = 2340 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2341 .addDef(Def1Reg) 2342 .addUse(AArch64::WZR) 2343 .addUse(AArch64::WZR) 2344 .addImm(getInvertedCondCode(CC1)); 2345 2346 if (CC2 != AArch64CC::AL) { 2347 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2348 MachineInstr &CSet2MI = 2349 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2350 .addDef(Def2Reg) 2351 .addUse(AArch64::WZR) 2352 .addUse(AArch64::WZR) 2353 .addImm(getInvertedCondCode(CC2)); 2354 MachineInstr &OrMI = 2355 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) 2356 .addDef(DefReg) 2357 .addUse(Def1Reg) 2358 .addUse(Def2Reg); 2359 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); 2360 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); 2361 } 2362 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 2363 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); 2364 2365 I.eraseFromParent(); 2366 return true; 2367 } 2368 case TargetOpcode::G_VASTART: 2369 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 2370 : selectVaStartAAPCS(I, MF, MRI); 2371 case TargetOpcode::G_INTRINSIC: 2372 return selectIntrinsic(I, MRI); 2373 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 2374 return selectIntrinsicWithSideEffects(I, MRI); 2375 case TargetOpcode::G_IMPLICIT_DEF: { 2376 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 2377 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2378 const Register DstReg = I.getOperand(0).getReg(); 2379 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2380 const TargetRegisterClass *DstRC = 2381 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2382 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 2383 return true; 2384 } 2385 case TargetOpcode::G_BLOCK_ADDR: { 2386 if (TM.getCodeModel() == CodeModel::Large) { 2387 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 2388 I.eraseFromParent(); 2389 return true; 2390 } else { 2391 I.setDesc(TII.get(AArch64::MOVaddrBA)); 2392 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 2393 I.getOperand(0).getReg()) 2394 .addBlockAddress(I.getOperand(1).getBlockAddress(), 2395 /* Offset */ 0, AArch64II::MO_PAGE) 2396 .addBlockAddress( 2397 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 2398 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2399 I.eraseFromParent(); 2400 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2401 } 2402 } 2403 case TargetOpcode::G_INTRINSIC_TRUNC: 2404 return selectIntrinsicTrunc(I, MRI); 2405 case TargetOpcode::G_INTRINSIC_ROUND: 2406 return selectIntrinsicRound(I, MRI); 2407 case TargetOpcode::G_BUILD_VECTOR: 2408 return selectBuildVector(I, MRI); 2409 case TargetOpcode::G_MERGE_VALUES: 2410 return selectMergeValues(I, MRI); 2411 case TargetOpcode::G_UNMERGE_VALUES: 2412 return selectUnmergeValues(I, MRI); 2413 case TargetOpcode::G_SHUFFLE_VECTOR: 2414 return selectShuffleVector(I, MRI); 2415 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2416 return selectExtractElt(I, MRI); 2417 case TargetOpcode::G_INSERT_VECTOR_ELT: 2418 return selectInsertElt(I, MRI); 2419 case TargetOpcode::G_CONCAT_VECTORS: 2420 return selectConcatVectors(I, MRI); 2421 case TargetOpcode::G_JUMP_TABLE: 2422 return selectJumpTable(I, MRI); 2423 } 2424 2425 return false; 2426} 2427 2428bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 2429 MachineRegisterInfo &MRI) const { 2430 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 2431 Register JTAddr = I.getOperand(0).getReg(); 2432 unsigned JTI = I.getOperand(1).getIndex(); 2433 Register Index = I.getOperand(2).getReg(); 2434 MachineIRBuilder MIB(I); 2435 2436 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2437 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 2438 MIB.buildInstr(AArch64::JumpTableDest32, {TargetReg, ScratchReg}, 2439 {JTAddr, Index}) 2440 .addJumpTableIndex(JTI); 2441 2442 // Build the indirect branch. 2443 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 2444 I.eraseFromParent(); 2445 return true; 2446} 2447 2448bool AArch64InstructionSelector::selectJumpTable( 2449 MachineInstr &I, MachineRegisterInfo &MRI) const { 2450 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 2451 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 2452 2453 Register DstReg = I.getOperand(0).getReg(); 2454 unsigned JTI = I.getOperand(1).getIndex(); 2455 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 2456 MachineIRBuilder MIB(I); 2457 auto MovMI = 2458 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 2459 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 2460 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2461 I.eraseFromParent(); 2462 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2463} 2464 2465bool AArch64InstructionSelector::selectTLSGlobalValue( 2466 MachineInstr &I, MachineRegisterInfo &MRI) const { 2467 if (!STI.isTargetMachO()) 2468 return false; 2469 MachineFunction &MF = *I.getParent()->getParent(); 2470 MF.getFrameInfo().setAdjustsStack(true); 2471 2472 const GlobalValue &GV = *I.getOperand(1).getGlobal(); 2473 MachineIRBuilder MIB(I); 2474 2475 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) 2476 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 2477 2478 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 2479 {Register(AArch64::X0)}) 2480 .addImm(0); 2481 2482 // TLS calls preserve all registers except those that absolutely must be 2483 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 2484 // silly). 2485 MIB.buildInstr(AArch64::BLR, {}, {Load}) 2486 .addDef(AArch64::X0, RegState::Implicit) 2487 .addRegMask(TRI.getTLSCallPreservedMask()); 2488 2489 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 2490 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 2491 MRI); 2492 I.eraseFromParent(); 2493 return true; 2494} 2495 2496bool AArch64InstructionSelector::selectIntrinsicTrunc( 2497 MachineInstr &I, MachineRegisterInfo &MRI) const { 2498 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2499 2500 // Select the correct opcode. 2501 unsigned Opc = 0; 2502 if (!SrcTy.isVector()) { 2503 switch (SrcTy.getSizeInBits()) { 2504 default: 2505 case 16: 2506 Opc = AArch64::FRINTZHr; 2507 break; 2508 case 32: 2509 Opc = AArch64::FRINTZSr; 2510 break; 2511 case 64: 2512 Opc = AArch64::FRINTZDr; 2513 break; 2514 } 2515 } else { 2516 unsigned NumElts = SrcTy.getNumElements(); 2517 switch (SrcTy.getElementType().getSizeInBits()) { 2518 default: 2519 break; 2520 case 16: 2521 if (NumElts == 4) 2522 Opc = AArch64::FRINTZv4f16; 2523 else if (NumElts == 8) 2524 Opc = AArch64::FRINTZv8f16; 2525 break; 2526 case 32: 2527 if (NumElts == 2) 2528 Opc = AArch64::FRINTZv2f32; 2529 else if (NumElts == 4) 2530 Opc = AArch64::FRINTZv4f32; 2531 break; 2532 case 64: 2533 if (NumElts == 2) 2534 Opc = AArch64::FRINTZv2f64; 2535 break; 2536 } 2537 } 2538 2539 if (!Opc) { 2540 // Didn't get an opcode above, bail. 2541 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 2542 return false; 2543 } 2544 2545 // Legalization would have set us up perfectly for this; we just need to 2546 // set the opcode and move on. 2547 I.setDesc(TII.get(Opc)); 2548 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2549} 2550 2551bool AArch64InstructionSelector::selectIntrinsicRound( 2552 MachineInstr &I, MachineRegisterInfo &MRI) const { 2553 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2554 2555 // Select the correct opcode. 2556 unsigned Opc = 0; 2557 if (!SrcTy.isVector()) { 2558 switch (SrcTy.getSizeInBits()) { 2559 default: 2560 case 16: 2561 Opc = AArch64::FRINTAHr; 2562 break; 2563 case 32: 2564 Opc = AArch64::FRINTASr; 2565 break; 2566 case 64: 2567 Opc = AArch64::FRINTADr; 2568 break; 2569 } 2570 } else { 2571 unsigned NumElts = SrcTy.getNumElements(); 2572 switch (SrcTy.getElementType().getSizeInBits()) { 2573 default: 2574 break; 2575 case 16: 2576 if (NumElts == 4) 2577 Opc = AArch64::FRINTAv4f16; 2578 else if (NumElts == 8) 2579 Opc = AArch64::FRINTAv8f16; 2580 break; 2581 case 32: 2582 if (NumElts == 2) 2583 Opc = AArch64::FRINTAv2f32; 2584 else if (NumElts == 4) 2585 Opc = AArch64::FRINTAv4f32; 2586 break; 2587 case 64: 2588 if (NumElts == 2) 2589 Opc = AArch64::FRINTAv2f64; 2590 break; 2591 } 2592 } 2593 2594 if (!Opc) { 2595 // Didn't get an opcode above, bail. 2596 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 2597 return false; 2598 } 2599 2600 // Legalization would have set us up perfectly for this; we just need to 2601 // set the opcode and move on. 2602 I.setDesc(TII.get(Opc)); 2603 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2604} 2605 2606bool AArch64InstructionSelector::selectVectorICmp( 2607 MachineInstr &I, MachineRegisterInfo &MRI) const { 2608 Register DstReg = I.getOperand(0).getReg(); 2609 LLT DstTy = MRI.getType(DstReg); 2610 Register SrcReg = I.getOperand(2).getReg(); 2611 Register Src2Reg = I.getOperand(3).getReg(); 2612 LLT SrcTy = MRI.getType(SrcReg); 2613 2614 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 2615 unsigned NumElts = DstTy.getNumElements(); 2616 2617 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 2618 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 2619 // Third index is cc opcode: 2620 // 0 == eq 2621 // 1 == ugt 2622 // 2 == uge 2623 // 3 == ult 2624 // 4 == ule 2625 // 5 == sgt 2626 // 6 == sge 2627 // 7 == slt 2628 // 8 == sle 2629 // ne is done by negating 'eq' result. 2630 2631 // This table below assumes that for some comparisons the operands will be 2632 // commuted. 2633 // ult op == commute + ugt op 2634 // ule op == commute + uge op 2635 // slt op == commute + sgt op 2636 // sle op == commute + sge op 2637 unsigned PredIdx = 0; 2638 bool SwapOperands = false; 2639 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 2640 switch (Pred) { 2641 case CmpInst::ICMP_NE: 2642 case CmpInst::ICMP_EQ: 2643 PredIdx = 0; 2644 break; 2645 case CmpInst::ICMP_UGT: 2646 PredIdx = 1; 2647 break; 2648 case CmpInst::ICMP_UGE: 2649 PredIdx = 2; 2650 break; 2651 case CmpInst::ICMP_ULT: 2652 PredIdx = 3; 2653 SwapOperands = true; 2654 break; 2655 case CmpInst::ICMP_ULE: 2656 PredIdx = 4; 2657 SwapOperands = true; 2658 break; 2659 case CmpInst::ICMP_SGT: 2660 PredIdx = 5; 2661 break; 2662 case CmpInst::ICMP_SGE: 2663 PredIdx = 6; 2664 break; 2665 case CmpInst::ICMP_SLT: 2666 PredIdx = 7; 2667 SwapOperands = true; 2668 break; 2669 case CmpInst::ICMP_SLE: 2670 PredIdx = 8; 2671 SwapOperands = true; 2672 break; 2673 default: 2674 llvm_unreachable("Unhandled icmp predicate"); 2675 return false; 2676 } 2677 2678 // This table obviously should be tablegen'd when we have our GISel native 2679 // tablegen selector. 2680 2681 static const unsigned OpcTable[4][4][9] = { 2682 { 2683 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2684 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2685 0 /* invalid */}, 2686 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2687 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2688 0 /* invalid */}, 2689 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 2690 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 2691 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 2692 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 2693 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 2694 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 2695 }, 2696 { 2697 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2698 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2699 0 /* invalid */}, 2700 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 2701 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 2702 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 2703 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 2704 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 2705 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 2706 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2707 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2708 0 /* invalid */} 2709 }, 2710 { 2711 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 2712 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 2713 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 2714 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 2715 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 2716 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 2717 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2718 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2719 0 /* invalid */}, 2720 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2721 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2722 0 /* invalid */} 2723 }, 2724 { 2725 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 2726 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 2727 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 2728 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2729 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2730 0 /* invalid */}, 2731 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2732 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2733 0 /* invalid */}, 2734 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2735 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 2736 0 /* invalid */} 2737 }, 2738 }; 2739 unsigned EltIdx = Log2_32(SrcEltSize / 8); 2740 unsigned NumEltsIdx = Log2_32(NumElts / 2); 2741 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 2742 if (!Opc) { 2743 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 2744 return false; 2745 } 2746 2747 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2748 const TargetRegisterClass *SrcRC = 2749 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 2750 if (!SrcRC) { 2751 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 2752 return false; 2753 } 2754 2755 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 2756 if (SrcTy.getSizeInBits() == 128) 2757 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 2758 2759 if (SwapOperands) 2760 std::swap(SrcReg, Src2Reg); 2761 2762 MachineIRBuilder MIB(I); 2763 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 2764 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 2765 2766 // Invert if we had a 'ne' cc. 2767 if (NotOpc) { 2768 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 2769 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 2770 } else { 2771 MIB.buildCopy(DstReg, Cmp.getReg(0)); 2772 } 2773 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 2774 I.eraseFromParent(); 2775 return true; 2776} 2777 2778MachineInstr *AArch64InstructionSelector::emitScalarToVector( 2779 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 2780 MachineIRBuilder &MIRBuilder) const { 2781 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 2782 2783 auto BuildFn = [&](unsigned SubregIndex) { 2784 auto Ins = 2785 MIRBuilder 2786 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 2787 .addImm(SubregIndex); 2788 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 2789 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 2790 return &*Ins; 2791 }; 2792 2793 switch (EltSize) { 2794 case 16: 2795 return BuildFn(AArch64::hsub); 2796 case 32: 2797 return BuildFn(AArch64::ssub); 2798 case 64: 2799 return BuildFn(AArch64::dsub); 2800 default: 2801 return nullptr; 2802 } 2803} 2804 2805bool AArch64InstructionSelector::selectMergeValues( 2806 MachineInstr &I, MachineRegisterInfo &MRI) const { 2807 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 2808 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2809 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2810 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 2811 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 2812 2813 if (I.getNumOperands() != 3) 2814 return false; 2815 2816 // Merging 2 s64s into an s128. 2817 if (DstTy == LLT::scalar(128)) { 2818 if (SrcTy.getSizeInBits() != 64) 2819 return false; 2820 MachineIRBuilder MIB(I); 2821 Register DstReg = I.getOperand(0).getReg(); 2822 Register Src1Reg = I.getOperand(1).getReg(); 2823 Register Src2Reg = I.getOperand(2).getReg(); 2824 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 2825 MachineInstr *InsMI = 2826 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 2827 if (!InsMI) 2828 return false; 2829 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 2830 Src2Reg, /* LaneIdx */ 1, RB, MIB); 2831 if (!Ins2MI) 2832 return false; 2833 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 2834 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 2835 I.eraseFromParent(); 2836 return true; 2837 } 2838 2839 if (RB.getID() != AArch64::GPRRegBankID) 2840 return false; 2841 2842 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 2843 return false; 2844 2845 auto *DstRC = &AArch64::GPR64RegClass; 2846 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 2847 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 2848 TII.get(TargetOpcode::SUBREG_TO_REG)) 2849 .addDef(SubToRegDef) 2850 .addImm(0) 2851 .addUse(I.getOperand(1).getReg()) 2852 .addImm(AArch64::sub_32); 2853 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 2854 // Need to anyext the second scalar before we can use bfm 2855 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 2856 TII.get(TargetOpcode::SUBREG_TO_REG)) 2857 .addDef(SubToRegDef2) 2858 .addImm(0) 2859 .addUse(I.getOperand(2).getReg()) 2860 .addImm(AArch64::sub_32); 2861 MachineInstr &BFM = 2862 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 2863 .addDef(I.getOperand(0).getReg()) 2864 .addUse(SubToRegDef) 2865 .addUse(SubToRegDef2) 2866 .addImm(32) 2867 .addImm(31); 2868 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 2869 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 2870 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 2871 I.eraseFromParent(); 2872 return true; 2873} 2874 2875static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 2876 const unsigned EltSize) { 2877 // Choose a lane copy opcode and subregister based off of the size of the 2878 // vector's elements. 2879 switch (EltSize) { 2880 case 16: 2881 CopyOpc = AArch64::CPYi16; 2882 ExtractSubReg = AArch64::hsub; 2883 break; 2884 case 32: 2885 CopyOpc = AArch64::CPYi32; 2886 ExtractSubReg = AArch64::ssub; 2887 break; 2888 case 64: 2889 CopyOpc = AArch64::CPYi64; 2890 ExtractSubReg = AArch64::dsub; 2891 break; 2892 default: 2893 // Unknown size, bail out. 2894 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 2895 return false; 2896 } 2897 return true; 2898} 2899 2900MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 2901 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 2902 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 2903 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 2904 unsigned CopyOpc = 0; 2905 unsigned ExtractSubReg = 0; 2906 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 2907 LLVM_DEBUG( 2908 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 2909 return nullptr; 2910 } 2911 2912 const TargetRegisterClass *DstRC = 2913 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 2914 if (!DstRC) { 2915 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 2916 return nullptr; 2917 } 2918 2919 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 2920 const LLT &VecTy = MRI.getType(VecReg); 2921 const TargetRegisterClass *VecRC = 2922 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 2923 if (!VecRC) { 2924 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 2925 return nullptr; 2926 } 2927 2928 // The register that we're going to copy into. 2929 Register InsertReg = VecReg; 2930 if (!DstReg) 2931 DstReg = MRI.createVirtualRegister(DstRC); 2932 // If the lane index is 0, we just use a subregister COPY. 2933 if (LaneIdx == 0) { 2934 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 2935 .addReg(VecReg, 0, ExtractSubReg); 2936 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 2937 return &*Copy; 2938 } 2939 2940 // Lane copies require 128-bit wide registers. If we're dealing with an 2941 // unpacked vector, then we need to move up to that width. Insert an implicit 2942 // def and a subregister insert to get us there. 2943 if (VecTy.getSizeInBits() != 128) { 2944 MachineInstr *ScalarToVector = emitScalarToVector( 2945 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 2946 if (!ScalarToVector) 2947 return nullptr; 2948 InsertReg = ScalarToVector->getOperand(0).getReg(); 2949 } 2950 2951 MachineInstr *LaneCopyMI = 2952 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 2953 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 2954 2955 // Make sure that we actually constrain the initial copy. 2956 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 2957 return LaneCopyMI; 2958} 2959 2960bool AArch64InstructionSelector::selectExtractElt( 2961 MachineInstr &I, MachineRegisterInfo &MRI) const { 2962 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 2963 "unexpected opcode!"); 2964 Register DstReg = I.getOperand(0).getReg(); 2965 const LLT NarrowTy = MRI.getType(DstReg); 2966 const Register SrcReg = I.getOperand(1).getReg(); 2967 const LLT WideTy = MRI.getType(SrcReg); 2968 (void)WideTy; 2969 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 2970 "source register size too small!"); 2971 assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); 2972 2973 // Need the lane index to determine the correct copy opcode. 2974 MachineOperand &LaneIdxOp = I.getOperand(2); 2975 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 2976 2977 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 2978 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 2979 return false; 2980 } 2981 2982 // Find the index to extract from. 2983 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 2984 if (!VRegAndVal) 2985 return false; 2986 unsigned LaneIdx = VRegAndVal->Value; 2987 2988 MachineIRBuilder MIRBuilder(I); 2989 2990 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2991 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 2992 LaneIdx, MIRBuilder); 2993 if (!Extract) 2994 return false; 2995 2996 I.eraseFromParent(); 2997 return true; 2998} 2999 3000bool AArch64InstructionSelector::selectSplitVectorUnmerge( 3001 MachineInstr &I, MachineRegisterInfo &MRI) const { 3002 unsigned NumElts = I.getNumOperands() - 1; 3003 Register SrcReg = I.getOperand(NumElts).getReg(); 3004 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3005 const LLT SrcTy = MRI.getType(SrcReg); 3006 3007 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3008 if (SrcTy.getSizeInBits() > 128) { 3009 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3010 return false; 3011 } 3012 3013 MachineIRBuilder MIB(I); 3014 3015 // We implement a split vector operation by treating the sub-vectors as 3016 // scalars and extracting them. 3017 const RegisterBank &DstRB = 3018 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3019 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3020 Register Dst = I.getOperand(OpIdx).getReg(); 3021 MachineInstr *Extract = 3022 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3023 if (!Extract) 3024 return false; 3025 } 3026 I.eraseFromParent(); 3027 return true; 3028} 3029 3030bool AArch64InstructionSelector::selectUnmergeValues( 3031 MachineInstr &I, MachineRegisterInfo &MRI) const { 3032 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 3033 "unexpected opcode"); 3034 3035 // TODO: Handle unmerging into GPRs and from scalars to scalars. 3036 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 3037 AArch64::FPRRegBankID || 3038 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3039 AArch64::FPRRegBankID) { 3040 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 3041 "currently unsupported.\n"); 3042 return false; 3043 } 3044 3045 // The last operand is the vector source register, and every other operand is 3046 // a register to unpack into. 3047 unsigned NumElts = I.getNumOperands() - 1; 3048 Register SrcReg = I.getOperand(NumElts).getReg(); 3049 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3050 const LLT WideTy = MRI.getType(SrcReg); 3051 (void)WideTy; 3052 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 3053 "can only unmerge from vector or s128 types!"); 3054 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 3055 "source register size too small!"); 3056 3057 if (!NarrowTy.isScalar()) 3058 return selectSplitVectorUnmerge(I, MRI); 3059 3060 MachineIRBuilder MIB(I); 3061 3062 // Choose a lane copy opcode and subregister based off of the size of the 3063 // vector's elements. 3064 unsigned CopyOpc = 0; 3065 unsigned ExtractSubReg = 0; 3066 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 3067 return false; 3068 3069 // Set up for the lane copies. 3070 MachineBasicBlock &MBB = *I.getParent(); 3071 3072 // Stores the registers we'll be copying from. 3073 SmallVector<Register, 4> InsertRegs; 3074 3075 // We'll use the first register twice, so we only need NumElts-1 registers. 3076 unsigned NumInsertRegs = NumElts - 1; 3077 3078 // If our elements fit into exactly 128 bits, then we can copy from the source 3079 // directly. Otherwise, we need to do a bit of setup with some subregister 3080 // inserts. 3081 if (NarrowTy.getSizeInBits() * NumElts == 128) { 3082 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 3083 } else { 3084 // No. We have to perform subregister inserts. For each insert, create an 3085 // implicit def and a subregister insert, and save the register we create. 3086 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 3087 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3088 MachineInstr &ImpDefMI = 3089 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 3090 ImpDefReg); 3091 3092 // Now, create the subregister insert from SrcReg. 3093 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3094 MachineInstr &InsMI = 3095 *BuildMI(MBB, I, I.getDebugLoc(), 3096 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 3097 .addUse(ImpDefReg) 3098 .addUse(SrcReg) 3099 .addImm(AArch64::dsub); 3100 3101 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 3102 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 3103 3104 // Save the register so that we can copy from it after. 3105 InsertRegs.push_back(InsertReg); 3106 } 3107 } 3108 3109 // Now that we've created any necessary subregister inserts, we can 3110 // create the copies. 3111 // 3112 // Perform the first copy separately as a subregister copy. 3113 Register CopyTo = I.getOperand(0).getReg(); 3114 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 3115 .addReg(InsertRegs[0], 0, ExtractSubReg); 3116 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 3117 3118 // Now, perform the remaining copies as vector lane copies. 3119 unsigned LaneIdx = 1; 3120 for (Register InsReg : InsertRegs) { 3121 Register CopyTo = I.getOperand(LaneIdx).getReg(); 3122 MachineInstr &CopyInst = 3123 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 3124 .addUse(InsReg) 3125 .addImm(LaneIdx); 3126 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 3127 ++LaneIdx; 3128 } 3129 3130 // Separately constrain the first copy's destination. Because of the 3131 // limitation in constrainOperandRegClass, we can't guarantee that this will 3132 // actually be constrained. So, do it ourselves using the second operand. 3133 const TargetRegisterClass *RC = 3134 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 3135 if (!RC) { 3136 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 3137 return false; 3138 } 3139 3140 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 3141 I.eraseFromParent(); 3142 return true; 3143} 3144 3145bool AArch64InstructionSelector::selectConcatVectors( 3146 MachineInstr &I, MachineRegisterInfo &MRI) const { 3147 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 3148 "Unexpected opcode"); 3149 Register Dst = I.getOperand(0).getReg(); 3150 Register Op1 = I.getOperand(1).getReg(); 3151 Register Op2 = I.getOperand(2).getReg(); 3152 MachineIRBuilder MIRBuilder(I); 3153 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); 3154 if (!ConcatMI) 3155 return false; 3156 I.eraseFromParent(); 3157 return true; 3158} 3159 3160unsigned 3161AArch64InstructionSelector::emitConstantPoolEntry(Constant *CPVal, 3162 MachineFunction &MF) const { 3163 Type *CPTy = CPVal->getType(); 3164 unsigned Align = MF.getDataLayout().getPrefTypeAlignment(CPTy); 3165 if (Align == 0) 3166 Align = MF.getDataLayout().getTypeAllocSize(CPTy); 3167 3168 MachineConstantPool *MCP = MF.getConstantPool(); 3169 return MCP->getConstantPoolIndex(CPVal, Align); 3170} 3171 3172MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 3173 Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 3174 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); 3175 3176 auto Adrp = 3177 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 3178 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 3179 3180 MachineInstr *LoadMI = nullptr; 3181 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) { 3182 case 16: 3183 LoadMI = 3184 &*MIRBuilder 3185 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 3186 .addConstantPoolIndex(CPIdx, 0, 3187 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3188 break; 3189 case 8: 3190 LoadMI = &*MIRBuilder 3191 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 3192 .addConstantPoolIndex( 3193 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3194 break; 3195 default: 3196 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 3197 << *CPVal->getType()); 3198 return nullptr; 3199 } 3200 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 3201 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 3202 return LoadMI; 3203} 3204 3205/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 3206/// size and RB. 3207static std::pair<unsigned, unsigned> 3208getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 3209 unsigned Opc, SubregIdx; 3210 if (RB.getID() == AArch64::GPRRegBankID) { 3211 if (EltSize == 32) { 3212 Opc = AArch64::INSvi32gpr; 3213 SubregIdx = AArch64::ssub; 3214 } else if (EltSize == 64) { 3215 Opc = AArch64::INSvi64gpr; 3216 SubregIdx = AArch64::dsub; 3217 } else { 3218 llvm_unreachable("invalid elt size!"); 3219 } 3220 } else { 3221 if (EltSize == 8) { 3222 Opc = AArch64::INSvi8lane; 3223 SubregIdx = AArch64::bsub; 3224 } else if (EltSize == 16) { 3225 Opc = AArch64::INSvi16lane; 3226 SubregIdx = AArch64::hsub; 3227 } else if (EltSize == 32) { 3228 Opc = AArch64::INSvi32lane; 3229 SubregIdx = AArch64::ssub; 3230 } else if (EltSize == 64) { 3231 Opc = AArch64::INSvi64lane; 3232 SubregIdx = AArch64::dsub; 3233 } else { 3234 llvm_unreachable("invalid elt size!"); 3235 } 3236 } 3237 return std::make_pair(Opc, SubregIdx); 3238} 3239 3240MachineInstr * 3241AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 3242 MachineOperand &RHS, 3243 MachineIRBuilder &MIRBuilder) const { 3244 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3245 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3246 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, 3247 {AArch64::ADDWrr, AArch64::ADDWri}}; 3248 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; 3249 auto ImmFns = selectArithImmed(RHS); 3250 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3251 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS.getReg()}); 3252 3253 // If we matched a valid constant immediate, add those operands. 3254 if (ImmFns) { 3255 for (auto &RenderFn : *ImmFns) 3256 RenderFn(AddMI); 3257 } else { 3258 AddMI.addUse(RHS.getReg()); 3259 } 3260 3261 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); 3262 return &*AddMI; 3263} 3264 3265MachineInstr * 3266AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 3267 MachineIRBuilder &MIRBuilder) const { 3268 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3269 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3270 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri}, 3271 {AArch64::ADDSWrr, AArch64::ADDSWri}}; 3272 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 3273 auto ImmFns = selectArithImmed(RHS); 3274 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3275 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3276 3277 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 3278 3279 // If we matched a valid constant immediate, add those operands. 3280 if (ImmFns) { 3281 for (auto &RenderFn : *ImmFns) 3282 RenderFn(CmpMI); 3283 } else { 3284 CmpMI.addUse(RHS.getReg()); 3285 } 3286 3287 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3288 return &*CmpMI; 3289} 3290 3291MachineInstr * 3292AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, 3293 MachineIRBuilder &MIRBuilder) const { 3294 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3295 unsigned RegSize = MRI.getType(LHS).getSizeInBits(); 3296 bool Is32Bit = (RegSize == 32); 3297 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, 3298 {AArch64::ANDSWrr, AArch64::ANDSWri}}; 3299 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3300 3301 // We might be able to fold in an immediate into the TST. We need to make sure 3302 // it's a logical immediate though, since ANDS requires that. 3303 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 3304 bool IsImmForm = ValAndVReg.hasValue() && 3305 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); 3306 unsigned Opc = OpcTable[Is32Bit][IsImmForm]; 3307 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3308 3309 if (IsImmForm) 3310 TstMI.addImm( 3311 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); 3312 else 3313 TstMI.addUse(RHS); 3314 3315 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3316 return &*TstMI; 3317} 3318 3319MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 3320 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3321 MachineIRBuilder &MIRBuilder) const { 3322 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3323 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3324 3325 // Fold the compare if possible. 3326 MachineInstr *FoldCmp = 3327 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); 3328 if (FoldCmp) 3329 return FoldCmp; 3330 3331 // Can't fold into a CMN. Just emit a normal compare. 3332 unsigned CmpOpc = 0; 3333 Register ZReg; 3334 3335 LLT CmpTy = MRI.getType(LHS.getReg()); 3336 assert((CmpTy.isScalar() || CmpTy.isPointer()) && 3337 "Expected scalar or pointer"); 3338 if (CmpTy == LLT::scalar(32)) { 3339 CmpOpc = AArch64::SUBSWrr; 3340 ZReg = AArch64::WZR; 3341 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { 3342 CmpOpc = AArch64::SUBSXrr; 3343 ZReg = AArch64::XZR; 3344 } else { 3345 return nullptr; 3346 } 3347 3348 // Try to match immediate forms. 3349 auto ImmFns = selectArithImmed(RHS); 3350 if (ImmFns) 3351 CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; 3352 3353 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addDef(ZReg).addUse(LHS.getReg()); 3354 // If we matched a valid constant immediate, add those operands. 3355 if (ImmFns) { 3356 for (auto &RenderFn : *ImmFns) 3357 RenderFn(CmpMI); 3358 } else { 3359 CmpMI.addUse(RHS.getReg()); 3360 } 3361 3362 // Make sure that we can constrain the compare that we emitted. 3363 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3364 return &*CmpMI; 3365} 3366 3367MachineInstr *AArch64InstructionSelector::emitVectorConcat( 3368 Optional<Register> Dst, Register Op1, Register Op2, 3369 MachineIRBuilder &MIRBuilder) const { 3370 // We implement a vector concat by: 3371 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 3372 // 2. Insert the upper vector into the destination's upper element 3373 // TODO: some of this code is common with G_BUILD_VECTOR handling. 3374 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3375 3376 const LLT Op1Ty = MRI.getType(Op1); 3377 const LLT Op2Ty = MRI.getType(Op2); 3378 3379 if (Op1Ty != Op2Ty) { 3380 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 3381 return nullptr; 3382 } 3383 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 3384 3385 if (Op1Ty.getSizeInBits() >= 128) { 3386 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 3387 return nullptr; 3388 } 3389 3390 // At the moment we just support 64 bit vector concats. 3391 if (Op1Ty.getSizeInBits() != 64) { 3392 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 3393 return nullptr; 3394 } 3395 3396 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 3397 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 3398 const TargetRegisterClass *DstRC = 3399 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 3400 3401 MachineInstr *WidenedOp1 = 3402 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 3403 MachineInstr *WidenedOp2 = 3404 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 3405 if (!WidenedOp1 || !WidenedOp2) { 3406 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 3407 return nullptr; 3408 } 3409 3410 // Now do the insert of the upper element. 3411 unsigned InsertOpc, InsSubRegIdx; 3412 std::tie(InsertOpc, InsSubRegIdx) = 3413 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 3414 3415 if (!Dst) 3416 Dst = MRI.createVirtualRegister(DstRC); 3417 auto InsElt = 3418 MIRBuilder 3419 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 3420 .addImm(1) /* Lane index */ 3421 .addUse(WidenedOp2->getOperand(0).getReg()) 3422 .addImm(0); 3423 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3424 return &*InsElt; 3425} 3426 3427MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( 3428 MachineInstr &I, MachineRegisterInfo &MRI) const { 3429 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && 3430 "Expected a G_FCONSTANT!"); 3431 MachineOperand &ImmOp = I.getOperand(1); 3432 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 3433 3434 // Only handle 32 and 64 bit defs for now. 3435 if (DefSize != 32 && DefSize != 64) 3436 return nullptr; 3437 3438 // Don't handle null values using FMOV. 3439 if (ImmOp.getFPImm()->isNullValue()) 3440 return nullptr; 3441 3442 // Get the immediate representation for the FMOV. 3443 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF(); 3444 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF) 3445 : AArch64_AM::getFP64Imm(ImmValAPF); 3446 3447 // If this is -1, it means the immediate can't be represented as the requested 3448 // floating point value. Bail. 3449 if (Imm == -1) 3450 return nullptr; 3451 3452 // Update MI to represent the new FMOV instruction, constrain it, and return. 3453 ImmOp.ChangeToImmediate(Imm); 3454 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi; 3455 I.setDesc(TII.get(MovOpc)); 3456 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3457 return &I; 3458} 3459 3460MachineInstr * 3461AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 3462 MachineIRBuilder &MIRBuilder) const { 3463 // CSINC increments the result when the predicate is false. Invert it. 3464 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 3465 CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 3466 auto I = 3467 MIRBuilder 3468 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) 3469 .addImm(InvCC); 3470 constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 3471 return &*I; 3472} 3473 3474bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { 3475 MachineIRBuilder MIB(I); 3476 MachineRegisterInfo &MRI = *MIB.getMRI(); 3477 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3478 3479 // We want to recognize this pattern: 3480 // 3481 // $z = G_FCMP pred, $x, $y 3482 // ... 3483 // $w = G_SELECT $z, $a, $b 3484 // 3485 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 3486 // some copies/truncs in between.) 3487 // 3488 // If we see this, then we can emit something like this: 3489 // 3490 // fcmp $x, $y 3491 // fcsel $w, $a, $b, pred 3492 // 3493 // Rather than emitting both of the rather long sequences in the standard 3494 // G_FCMP/G_SELECT select methods. 3495 3496 // First, check if the condition is defined by a compare. 3497 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 3498 while (CondDef) { 3499 // We can only fold if all of the defs have one use. 3500 if (!MRI.hasOneUse(CondDef->getOperand(0).getReg())) 3501 return false; 3502 3503 // We can skip over G_TRUNC since the condition is 1-bit. 3504 // Truncating/extending can have no impact on the value. 3505 unsigned Opc = CondDef->getOpcode(); 3506 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 3507 break; 3508 3509 // Can't see past copies from physregs. 3510 if (Opc == TargetOpcode::COPY && 3511 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 3512 return false; 3513 3514 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 3515 } 3516 3517 // Is the condition defined by a compare? 3518 if (!CondDef) 3519 return false; 3520 3521 unsigned CondOpc = CondDef->getOpcode(); 3522 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 3523 return false; 3524 3525 AArch64CC::CondCode CondCode; 3526 if (CondOpc == TargetOpcode::G_ICMP) { 3527 CondCode = changeICMPPredToAArch64CC( 3528 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate()); 3529 if (!emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 3530 CondDef->getOperand(1), MIB)) { 3531 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 3532 return false; 3533 } 3534 } else { 3535 // Get the condition code for the select. 3536 AArch64CC::CondCode CondCode2; 3537 changeFCMPPredToAArch64CC( 3538 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, 3539 CondCode2); 3540 3541 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 3542 // instructions to emit the comparison. 3543 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 3544 // unnecessary. 3545 if (CondCode2 != AArch64CC::AL) 3546 return false; 3547 3548 // Make sure we'll be able to select the compare. 3549 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); 3550 if (!CmpOpc) 3551 return false; 3552 3553 // Emit a new compare. 3554 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); 3555 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 3556 Cmp.addUse(CondDef->getOperand(3).getReg()); 3557 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3558 } 3559 3560 // Emit the select. 3561 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); 3562 auto CSel = 3563 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()}, 3564 {I.getOperand(2).getReg(), I.getOperand(3).getReg()}) 3565 .addImm(CondCode); 3566 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); 3567 I.eraseFromParent(); 3568 return true; 3569} 3570 3571MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 3572 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3573 MachineIRBuilder &MIRBuilder) const { 3574 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 3575 "Unexpected MachineOperand"); 3576 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3577 // We want to find this sort of thing: 3578 // x = G_SUB 0, y 3579 // G_ICMP z, x 3580 // 3581 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 3582 // e.g: 3583 // 3584 // cmn z, y 3585 3586 // Helper lambda to detect the subtract followed by the compare. 3587 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. 3588 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { 3589 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) 3590 return false; 3591 3592 // Need to make sure NZCV is the same at the end of the transformation. 3593 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 3594 return false; 3595 3596 // We want to match against SUBs. 3597 if (DefMI->getOpcode() != TargetOpcode::G_SUB) 3598 return false; 3599 3600 // Make sure that we're getting 3601 // x = G_SUB 0, y 3602 auto ValAndVReg = 3603 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); 3604 if (!ValAndVReg || ValAndVReg->Value != 0) 3605 return false; 3606 3607 // This can safely be represented as a CMN. 3608 return true; 3609 }; 3610 3611 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 3612 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 3613 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 3614 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 3615 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); 3616 3617 // Given this: 3618 // 3619 // x = G_SUB 0, y 3620 // G_ICMP x, z 3621 // 3622 // Produce this: 3623 // 3624 // cmn y, z 3625 if (IsCMN(LHSDef, CC)) 3626 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 3627 3628 // Same idea here, but with the RHS of the compare instead: 3629 // 3630 // Given this: 3631 // 3632 // x = G_SUB 0, y 3633 // G_ICMP z, x 3634 // 3635 // Produce this: 3636 // 3637 // cmn z, y 3638 if (IsCMN(RHSDef, CC)) 3639 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 3640 3641 // Given this: 3642 // 3643 // z = G_AND x, y 3644 // G_ICMP z, 0 3645 // 3646 // Produce this if the compare is signed: 3647 // 3648 // tst x, y 3649 if (!isUnsignedICMPPred(P) && LHSDef && 3650 LHSDef->getOpcode() == TargetOpcode::G_AND) { 3651 // Make sure that the RHS is 0. 3652 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 3653 if (!ValAndVReg || ValAndVReg->Value != 0) 3654 return nullptr; 3655 3656 return emitTST(LHSDef->getOperand(1).getReg(), 3657 LHSDef->getOperand(2).getReg(), MIRBuilder); 3658 } 3659 3660 return nullptr; 3661} 3662 3663bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { 3664 // Try to match a vector splat operation into a dup instruction. 3665 // We're looking for this pattern: 3666 // %scalar:gpr(s64) = COPY $x0 3667 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 3668 // %cst0:gpr(s32) = G_CONSTANT i32 0 3669 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 3670 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 3671 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, 3672 // %zerovec(<2 x s32>) 3673 // 3674 // ...into: 3675 // %splat = DUP %scalar 3676 // We use the regbank of the scalar to determine which kind of dup to use. 3677 MachineIRBuilder MIB(I); 3678 MachineRegisterInfo &MRI = *MIB.getMRI(); 3679 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3680 using namespace TargetOpcode; 3681 using namespace MIPatternMatch; 3682 3683 // Begin matching the insert. 3684 auto *InsMI = 3685 getOpcodeDef(G_INSERT_VECTOR_ELT, I.getOperand(1).getReg(), MRI); 3686 if (!InsMI) 3687 return false; 3688 // Match the undef vector operand. 3689 auto *UndefMI = 3690 getOpcodeDef(G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), MRI); 3691 if (!UndefMI) 3692 return false; 3693 // Match the scalar being splatted. 3694 Register ScalarReg = InsMI->getOperand(2).getReg(); 3695 const RegisterBank *ScalarRB = RBI.getRegBank(ScalarReg, MRI, TRI); 3696 // Match the index constant 0. 3697 int64_t Index = 0; 3698 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ICst(Index)) || Index) 3699 return false; 3700 3701 // The shuffle's second operand doesn't matter if the mask is all zero. 3702 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 3703 if (!all_of(Mask, [](int Elem) { return Elem == 0; })) 3704 return false; 3705 3706 // We're done, now find out what kind of splat we need. 3707 LLT VecTy = MRI.getType(I.getOperand(0).getReg()); 3708 LLT EltTy = VecTy.getElementType(); 3709 if (EltTy.getSizeInBits() < 32) { 3710 LLVM_DEBUG(dbgs() << "Could not optimize splat pattern < 32b elts yet"); 3711 return false; 3712 } 3713 bool IsFP = ScalarRB->getID() == AArch64::FPRRegBankID; 3714 unsigned Opc = 0; 3715 if (IsFP) { 3716 switch (EltTy.getSizeInBits()) { 3717 case 32: 3718 if (VecTy.getNumElements() == 2) { 3719 Opc = AArch64::DUPv2i32lane; 3720 } else { 3721 Opc = AArch64::DUPv4i32lane; 3722 assert(VecTy.getNumElements() == 4); 3723 } 3724 break; 3725 case 64: 3726 assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); 3727 Opc = AArch64::DUPv2i64lane; 3728 break; 3729 } 3730 } else { 3731 switch (EltTy.getSizeInBits()) { 3732 case 32: 3733 if (VecTy.getNumElements() == 2) { 3734 Opc = AArch64::DUPv2i32gpr; 3735 } else { 3736 Opc = AArch64::DUPv4i32gpr; 3737 assert(VecTy.getNumElements() == 4); 3738 } 3739 break; 3740 case 64: 3741 assert(VecTy.getNumElements() == 2 && "Unexpected num elts"); 3742 Opc = AArch64::DUPv2i64gpr; 3743 break; 3744 } 3745 } 3746 assert(Opc && "Did not compute an opcode for a dup"); 3747 3748 // For FP splats, we need to widen the scalar reg via undef too. 3749 if (IsFP) { 3750 MachineInstr *Widen = emitScalarToVector( 3751 EltTy.getSizeInBits(), &AArch64::FPR128RegClass, ScalarReg, MIB); 3752 if (!Widen) 3753 return false; 3754 ScalarReg = Widen->getOperand(0).getReg(); 3755 } 3756 auto Dup = MIB.buildInstr(Opc, {I.getOperand(0).getReg()}, {ScalarReg}); 3757 if (IsFP) 3758 Dup.addImm(0); 3759 constrainSelectedInstRegOperands(*Dup, TII, TRI, RBI); 3760 I.eraseFromParent(); 3761 return true; 3762} 3763 3764bool AArch64InstructionSelector::tryOptVectorShuffle(MachineInstr &I) const { 3765 if (TM.getOptLevel() == CodeGenOpt::None) 3766 return false; 3767 if (tryOptVectorDup(I)) 3768 return true; 3769 return false; 3770} 3771 3772bool AArch64InstructionSelector::selectShuffleVector( 3773 MachineInstr &I, MachineRegisterInfo &MRI) const { 3774 if (tryOptVectorShuffle(I)) 3775 return true; 3776 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3777 Register Src1Reg = I.getOperand(1).getReg(); 3778 const LLT Src1Ty = MRI.getType(Src1Reg); 3779 Register Src2Reg = I.getOperand(2).getReg(); 3780 const LLT Src2Ty = MRI.getType(Src2Reg); 3781 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 3782 3783 MachineBasicBlock &MBB = *I.getParent(); 3784 MachineFunction &MF = *MBB.getParent(); 3785 LLVMContext &Ctx = MF.getFunction().getContext(); 3786 3787 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 3788 // it's originated from a <1 x T> type. Those should have been lowered into 3789 // G_BUILD_VECTOR earlier. 3790 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 3791 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 3792 return false; 3793 } 3794 3795 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 3796 3797 SmallVector<Constant *, 64> CstIdxs; 3798 for (int Val : Mask) { 3799 // For now, any undef indexes we'll just assume to be 0. This should be 3800 // optimized in future, e.g. to select DUP etc. 3801 Val = Val < 0 ? 0 : Val; 3802 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 3803 unsigned Offset = Byte + Val * BytesPerElt; 3804 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 3805 } 3806 } 3807 3808 MachineIRBuilder MIRBuilder(I); 3809 3810 // Use a constant pool to load the index vector for TBL. 3811 Constant *CPVal = ConstantVector::get(CstIdxs); 3812 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); 3813 if (!IndexLoad) { 3814 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 3815 return false; 3816 } 3817 3818 if (DstTy.getSizeInBits() != 128) { 3819 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 3820 // This case can be done with TBL1. 3821 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); 3822 if (!Concat) { 3823 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 3824 return false; 3825 } 3826 3827 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 3828 IndexLoad = 3829 emitScalarToVector(64, &AArch64::FPR128RegClass, 3830 IndexLoad->getOperand(0).getReg(), MIRBuilder); 3831 3832 auto TBL1 = MIRBuilder.buildInstr( 3833 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 3834 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 3835 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 3836 3837 auto Copy = 3838 MIRBuilder 3839 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 3840 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 3841 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 3842 I.eraseFromParent(); 3843 return true; 3844 } 3845 3846 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 3847 // Q registers for regalloc. 3848 auto RegSeq = MIRBuilder 3849 .buildInstr(TargetOpcode::REG_SEQUENCE, 3850 {&AArch64::QQRegClass}, {Src1Reg}) 3851 .addImm(AArch64::qsub0) 3852 .addUse(Src2Reg) 3853 .addImm(AArch64::qsub1); 3854 3855 auto TBL2 = 3856 MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0).getReg()}, 3857 {RegSeq, IndexLoad->getOperand(0).getReg()}); 3858 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); 3859 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 3860 I.eraseFromParent(); 3861 return true; 3862} 3863 3864MachineInstr *AArch64InstructionSelector::emitLaneInsert( 3865 Optional<Register> DstReg, Register SrcReg, Register EltReg, 3866 unsigned LaneIdx, const RegisterBank &RB, 3867 MachineIRBuilder &MIRBuilder) const { 3868 MachineInstr *InsElt = nullptr; 3869 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 3870 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3871 3872 // Create a register to define with the insert if one wasn't passed in. 3873 if (!DstReg) 3874 DstReg = MRI.createVirtualRegister(DstRC); 3875 3876 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 3877 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 3878 3879 if (RB.getID() == AArch64::FPRRegBankID) { 3880 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 3881 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 3882 .addImm(LaneIdx) 3883 .addUse(InsSub->getOperand(0).getReg()) 3884 .addImm(0); 3885 } else { 3886 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 3887 .addImm(LaneIdx) 3888 .addUse(EltReg); 3889 } 3890 3891 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3892 return InsElt; 3893} 3894 3895bool AArch64InstructionSelector::selectInsertElt( 3896 MachineInstr &I, MachineRegisterInfo &MRI) const { 3897 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 3898 3899 // Get information on the destination. 3900 Register DstReg = I.getOperand(0).getReg(); 3901 const LLT DstTy = MRI.getType(DstReg); 3902 unsigned VecSize = DstTy.getSizeInBits(); 3903 3904 // Get information on the element we want to insert into the destination. 3905 Register EltReg = I.getOperand(2).getReg(); 3906 const LLT EltTy = MRI.getType(EltReg); 3907 unsigned EltSize = EltTy.getSizeInBits(); 3908 if (EltSize < 16 || EltSize > 64) 3909 return false; // Don't support all element types yet. 3910 3911 // Find the definition of the index. Bail out if it's not defined by a 3912 // G_CONSTANT. 3913 Register IdxReg = I.getOperand(3).getReg(); 3914 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 3915 if (!VRegAndVal) 3916 return false; 3917 unsigned LaneIdx = VRegAndVal->Value; 3918 3919 // Perform the lane insert. 3920 Register SrcReg = I.getOperand(1).getReg(); 3921 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 3922 MachineIRBuilder MIRBuilder(I); 3923 3924 if (VecSize < 128) { 3925 // If the vector we're inserting into is smaller than 128 bits, widen it 3926 // to 128 to do the insert. 3927 MachineInstr *ScalarToVec = emitScalarToVector( 3928 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); 3929 if (!ScalarToVec) 3930 return false; 3931 SrcReg = ScalarToVec->getOperand(0).getReg(); 3932 } 3933 3934 // Create an insert into a new FPR128 register. 3935 // Note that if our vector is already 128 bits, we end up emitting an extra 3936 // register. 3937 MachineInstr *InsMI = 3938 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); 3939 3940 if (VecSize < 128) { 3941 // If we had to widen to perform the insert, then we have to demote back to 3942 // the original size to get the result we want. 3943 Register DemoteVec = InsMI->getOperand(0).getReg(); 3944 const TargetRegisterClass *RC = 3945 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 3946 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 3947 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 3948 return false; 3949 } 3950 unsigned SubReg = 0; 3951 if (!getSubRegForClass(RC, TRI, SubReg)) 3952 return false; 3953 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 3954 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 3955 << "\n"); 3956 return false; 3957 } 3958 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 3959 .addReg(DemoteVec, 0, SubReg); 3960 RBI.constrainGenericRegister(DstReg, *RC, MRI); 3961 } else { 3962 // No widening needed. 3963 InsMI->getOperand(0).setReg(DstReg); 3964 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3965 } 3966 3967 I.eraseFromParent(); 3968 return true; 3969} 3970 3971bool AArch64InstructionSelector::selectBuildVector( 3972 MachineInstr &I, MachineRegisterInfo &MRI) const { 3973 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 3974 // Until we port more of the optimized selections, for now just use a vector 3975 // insert sequence. 3976 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3977 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 3978 unsigned EltSize = EltTy.getSizeInBits(); 3979 if (EltSize < 16 || EltSize > 64) 3980 return false; // Don't support all element types yet. 3981 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3982 MachineIRBuilder MIRBuilder(I); 3983 3984 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 3985 MachineInstr *ScalarToVec = 3986 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 3987 I.getOperand(1).getReg(), MIRBuilder); 3988 if (!ScalarToVec) 3989 return false; 3990 3991 Register DstVec = ScalarToVec->getOperand(0).getReg(); 3992 unsigned DstSize = DstTy.getSizeInBits(); 3993 3994 // Keep track of the last MI we inserted. Later on, we might be able to save 3995 // a copy using it. 3996 MachineInstr *PrevMI = nullptr; 3997 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 3998 // Note that if we don't do a subregister copy, we can end up making an 3999 // extra register. 4000 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4001 MIRBuilder); 4002 DstVec = PrevMI->getOperand(0).getReg(); 4003 } 4004 4005 // If DstTy's size in bits is less than 128, then emit a subregister copy 4006 // from DstVec to the last register we've defined. 4007 if (DstSize < 128) { 4008 // Force this to be FPR using the destination vector. 4009 const TargetRegisterClass *RC = 4010 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4011 if (!RC) 4012 return false; 4013 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4014 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4015 return false; 4016 } 4017 4018 unsigned SubReg = 0; 4019 if (!getSubRegForClass(RC, TRI, SubReg)) 4020 return false; 4021 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4022 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 4023 << "\n"); 4024 return false; 4025 } 4026 4027 Register Reg = MRI.createVirtualRegister(RC); 4028 Register DstReg = I.getOperand(0).getReg(); 4029 4030 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4031 .addReg(DstVec, 0, SubReg); 4032 MachineOperand &RegOp = I.getOperand(1); 4033 RegOp.setReg(Reg); 4034 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4035 } else { 4036 // We don't need a subregister copy. Save a copy by re-using the 4037 // destination register on the final insert. 4038 assert(PrevMI && "PrevMI was null?"); 4039 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 4040 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 4041 } 4042 4043 I.eraseFromParent(); 4044 return true; 4045} 4046 4047/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 4048/// ID if it exists, and 0 otherwise. 4049static unsigned findIntrinsicID(MachineInstr &I) { 4050 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 4051 return Op.isIntrinsicID(); 4052 }); 4053 if (IntrinOp == I.operands_end()) 4054 return 0; 4055 return IntrinOp->getIntrinsicID(); 4056} 4057 4058bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 4059 MachineInstr &I, MachineRegisterInfo &MRI) const { 4060 // Find the intrinsic ID. 4061 unsigned IntrinID = findIntrinsicID(I); 4062 if (!IntrinID) 4063 return false; 4064 MachineIRBuilder MIRBuilder(I); 4065 4066 // Select the instruction. 4067 switch (IntrinID) { 4068 default: 4069 return false; 4070 case Intrinsic::trap: 4071 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); 4072 break; 4073 case Intrinsic::debugtrap: 4074 if (!STI.isTargetWindows()) 4075 return false; 4076 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 4077 break; 4078 } 4079 4080 I.eraseFromParent(); 4081 return true; 4082} 4083 4084bool AArch64InstructionSelector::selectIntrinsic( 4085 MachineInstr &I, MachineRegisterInfo &MRI) const { 4086 unsigned IntrinID = findIntrinsicID(I); 4087 if (!IntrinID) 4088 return false; 4089 MachineIRBuilder MIRBuilder(I); 4090 4091 switch (IntrinID) { 4092 default: 4093 break; 4094 case Intrinsic::aarch64_crypto_sha1h: 4095 Register DstReg = I.getOperand(0).getReg(); 4096 Register SrcReg = I.getOperand(2).getReg(); 4097 4098 // FIXME: Should this be an assert? 4099 if (MRI.getType(DstReg).getSizeInBits() != 32 || 4100 MRI.getType(SrcReg).getSizeInBits() != 32) 4101 return false; 4102 4103 // The operation has to happen on FPRs. Set up some new FPR registers for 4104 // the source and destination if they are on GPRs. 4105 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4106 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4107 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)}); 4108 4109 // Make sure the copy ends up getting constrained properly. 4110 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 4111 AArch64::GPR32RegClass, MRI); 4112 } 4113 4114 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 4115 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4116 4117 // Actually insert the instruction. 4118 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 4119 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 4120 4121 // Did we create a new register for the destination? 4122 if (DstReg != I.getOperand(0).getReg()) { 4123 // Yep. Copy the result of the instruction back into the original 4124 // destination. 4125 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg}); 4126 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4127 AArch64::GPR32RegClass, MRI); 4128 } 4129 4130 I.eraseFromParent(); 4131 return true; 4132 } 4133 return false; 4134} 4135 4136static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 4137 auto &MI = *Root.getParent(); 4138 auto &MBB = *MI.getParent(); 4139 auto &MF = *MBB.getParent(); 4140 auto &MRI = MF.getRegInfo(); 4141 uint64_t Immed; 4142 if (Root.isImm()) 4143 Immed = Root.getImm(); 4144 else if (Root.isCImm()) 4145 Immed = Root.getCImm()->getZExtValue(); 4146 else if (Root.isReg()) { 4147 auto ValAndVReg = 4148 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 4149 if (!ValAndVReg) 4150 return None; 4151 Immed = ValAndVReg->Value; 4152 } else 4153 return None; 4154 return Immed; 4155} 4156 4157InstructionSelector::ComplexRendererFns 4158AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 4159 auto MaybeImmed = getImmedFromMO(Root); 4160 if (MaybeImmed == None || *MaybeImmed > 31) 4161 return None; 4162 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 4163 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4164} 4165 4166InstructionSelector::ComplexRendererFns 4167AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 4168 auto MaybeImmed = getImmedFromMO(Root); 4169 if (MaybeImmed == None || *MaybeImmed > 31) 4170 return None; 4171 uint64_t Enc = 31 - *MaybeImmed; 4172 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4173} 4174 4175InstructionSelector::ComplexRendererFns 4176AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 4177 auto MaybeImmed = getImmedFromMO(Root); 4178 if (MaybeImmed == None || *MaybeImmed > 63) 4179 return None; 4180 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 4181 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4182} 4183 4184InstructionSelector::ComplexRendererFns 4185AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 4186 auto MaybeImmed = getImmedFromMO(Root); 4187 if (MaybeImmed == None || *MaybeImmed > 63) 4188 return None; 4189 uint64_t Enc = 63 - *MaybeImmed; 4190 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4191} 4192 4193/// Helper to select an immediate value that can be represented as a 12-bit 4194/// value shifted left by either 0 or 12. If it is possible to do so, return 4195/// the immediate and shift value. If not, return None. 4196/// 4197/// Used by selectArithImmed and selectNegArithImmed. 4198InstructionSelector::ComplexRendererFns 4199AArch64InstructionSelector::select12BitValueWithLeftShift( 4200 uint64_t Immed) const { 4201 unsigned ShiftAmt; 4202 if (Immed >> 12 == 0) { 4203 ShiftAmt = 0; 4204 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 4205 ShiftAmt = 12; 4206 Immed = Immed >> 12; 4207 } else 4208 return None; 4209 4210 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 4211 return {{ 4212 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 4213 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 4214 }}; 4215} 4216 4217/// SelectArithImmed - Select an immediate value that can be represented as 4218/// a 12-bit value shifted left by either 0 or 12. If so, return true with 4219/// Val set to the 12-bit value and Shift set to the shifter operand. 4220InstructionSelector::ComplexRendererFns 4221AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 4222 // This function is called from the addsub_shifted_imm ComplexPattern, 4223 // which lists [imm] as the list of opcode it's interested in, however 4224 // we still need to check whether the operand is actually an immediate 4225 // here because the ComplexPattern opcode list is only used in 4226 // root-level opcode matching. 4227 auto MaybeImmed = getImmedFromMO(Root); 4228 if (MaybeImmed == None) 4229 return None; 4230 return select12BitValueWithLeftShift(*MaybeImmed); 4231} 4232 4233/// SelectNegArithImmed - As above, but negates the value before trying to 4234/// select it. 4235InstructionSelector::ComplexRendererFns 4236AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 4237 // We need a register here, because we need to know if we have a 64 or 32 4238 // bit immediate. 4239 if (!Root.isReg()) 4240 return None; 4241 auto MaybeImmed = getImmedFromMO(Root); 4242 if (MaybeImmed == None) 4243 return None; 4244 uint64_t Immed = *MaybeImmed; 4245 4246 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 4247 // have the opposite effect on the C flag, so this pattern mustn't match under 4248 // those circumstances. 4249 if (Immed == 0) 4250 return None; 4251 4252 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 4253 // the root. 4254 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4255 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 4256 Immed = ~((uint32_t)Immed) + 1; 4257 else 4258 Immed = ~Immed + 1ULL; 4259 4260 if (Immed & 0xFFFFFFFFFF000000ULL) 4261 return None; 4262 4263 Immed &= 0xFFFFFFULL; 4264 return select12BitValueWithLeftShift(Immed); 4265} 4266 4267/// Return true if it is worth folding MI into an extended register. That is, 4268/// if it's safe to pull it into the addressing mode of a load or store as a 4269/// shift. 4270bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 4271 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 4272 // Always fold if there is one use, or if we're optimizing for size. 4273 Register DefReg = MI.getOperand(0).getReg(); 4274 if (MRI.hasOneUse(DefReg) || 4275 MI.getParent()->getParent()->getFunction().hasMinSize()) 4276 return true; 4277 4278 // It's better to avoid folding and recomputing shifts when we don't have a 4279 // fastpath. 4280 if (!STI.hasLSLFast()) 4281 return false; 4282 4283 // We have a fastpath, so folding a shift in and potentially computing it 4284 // many times may be beneficial. Check if this is only used in memory ops. 4285 // If it is, then we should fold. 4286 return all_of(MRI.use_instructions(DefReg), 4287 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 4288} 4289 4290InstructionSelector::ComplexRendererFns 4291AArch64InstructionSelector::selectExtendedSHL( 4292 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 4293 unsigned SizeInBytes, bool WantsExt) const { 4294 assert(Base.isReg() && "Expected base to be a register operand"); 4295 assert(Offset.isReg() && "Expected offset to be a register operand"); 4296 4297 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4298 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 4299 if (!OffsetInst) 4300 return None; 4301 4302 unsigned OffsetOpc = OffsetInst->getOpcode(); 4303 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 4304 return None; 4305 4306 // Make sure that the memory op is a valid size. 4307 int64_t LegalShiftVal = Log2_32(SizeInBytes); 4308 if (LegalShiftVal == 0) 4309 return None; 4310 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4311 return None; 4312 4313 // Now, try to find the specific G_CONSTANT. Start by assuming that the 4314 // register we will offset is the LHS, and the register containing the 4315 // constant is the RHS. 4316 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 4317 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 4318 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4319 if (!ValAndVReg) { 4320 // We didn't get a constant on the RHS. If the opcode is a shift, then 4321 // we're done. 4322 if (OffsetOpc == TargetOpcode::G_SHL) 4323 return None; 4324 4325 // If we have a G_MUL, we can use either register. Try looking at the RHS. 4326 std::swap(OffsetReg, ConstantReg); 4327 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4328 if (!ValAndVReg) 4329 return None; 4330 } 4331 4332 // The value must fit into 3 bits, and must be positive. Make sure that is 4333 // true. 4334 int64_t ImmVal = ValAndVReg->Value; 4335 4336 // Since we're going to pull this into a shift, the constant value must be 4337 // a power of 2. If we got a multiply, then we need to check this. 4338 if (OffsetOpc == TargetOpcode::G_MUL) { 4339 if (!isPowerOf2_32(ImmVal)) 4340 return None; 4341 4342 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 4343 ImmVal = Log2_32(ImmVal); 4344 } 4345 4346 if ((ImmVal & 0x7) != ImmVal) 4347 return None; 4348 4349 // We are only allowed to shift by LegalShiftVal. This shift value is built 4350 // into the instruction, so we can't just use whatever we want. 4351 if (ImmVal != LegalShiftVal) 4352 return None; 4353 4354 unsigned SignExtend = 0; 4355 if (WantsExt) { 4356 // Check if the offset is defined by an extend. 4357 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 4358 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 4359 if (Ext == AArch64_AM::InvalidShiftExtend) 4360 return None; 4361 4362 SignExtend = Ext == AArch64_AM::SXTW; 4363 4364 // Need a 32-bit wide register here. 4365 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 4366 OffsetReg = ExtInst->getOperand(1).getReg(); 4367 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); 4368 } 4369 4370 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 4371 // offset. Signify that we are shifting by setting the shift flag to 1. 4372 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 4373 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 4374 [=](MachineInstrBuilder &MIB) { 4375 // Need to add both immediates here to make sure that they are both 4376 // added to the instruction. 4377 MIB.addImm(SignExtend); 4378 MIB.addImm(1); 4379 }}}; 4380} 4381 4382/// This is used for computing addresses like this: 4383/// 4384/// ldr x1, [x2, x3, lsl #3] 4385/// 4386/// Where x2 is the base register, and x3 is an offset register. The shift-left 4387/// is a constant value specific to this load instruction. That is, we'll never 4388/// see anything other than a 3 here (which corresponds to the size of the 4389/// element being loaded.) 4390InstructionSelector::ComplexRendererFns 4391AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 4392 MachineOperand &Root, unsigned SizeInBytes) const { 4393 if (!Root.isReg()) 4394 return None; 4395 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4396 4397 // We want to find something like this: 4398 // 4399 // val = G_CONSTANT LegalShiftVal 4400 // shift = G_SHL off_reg val 4401 // ptr = G_PTR_ADD base_reg shift 4402 // x = G_LOAD ptr 4403 // 4404 // And fold it into this addressing mode: 4405 // 4406 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 4407 4408 // Check if we can find the G_PTR_ADD. 4409 MachineInstr *PtrAdd = 4410 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 4411 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 4412 return None; 4413 4414 // Now, try to match an opcode which will match our specific offset. 4415 // We want a G_SHL or a G_MUL. 4416 MachineInstr *OffsetInst = 4417 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 4418 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 4419 OffsetInst->getOperand(0), SizeInBytes, 4420 /*WantsExt=*/false); 4421} 4422 4423/// This is used for computing addresses like this: 4424/// 4425/// ldr x1, [x2, x3] 4426/// 4427/// Where x2 is the base register, and x3 is an offset register. 4428/// 4429/// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 4430/// this will do so. Otherwise, it will return None. 4431InstructionSelector::ComplexRendererFns 4432AArch64InstructionSelector::selectAddrModeRegisterOffset( 4433 MachineOperand &Root) const { 4434 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4435 4436 // We need a GEP. 4437 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 4438 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 4439 return None; 4440 4441 // If this is used more than once, let's not bother folding. 4442 // TODO: Check if they are memory ops. If they are, then we can still fold 4443 // without having to recompute anything. 4444 if (!MRI.hasOneUse(Gep->getOperand(0).getReg())) 4445 return None; 4446 4447 // Base is the GEP's LHS, offset is its RHS. 4448 return {{[=](MachineInstrBuilder &MIB) { 4449 MIB.addUse(Gep->getOperand(1).getReg()); 4450 }, 4451 [=](MachineInstrBuilder &MIB) { 4452 MIB.addUse(Gep->getOperand(2).getReg()); 4453 }, 4454 [=](MachineInstrBuilder &MIB) { 4455 // Need to add both immediates here to make sure that they are both 4456 // added to the instruction. 4457 MIB.addImm(0); 4458 MIB.addImm(0); 4459 }}}; 4460} 4461 4462/// This is intended to be equivalent to selectAddrModeXRO in 4463/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 4464InstructionSelector::ComplexRendererFns 4465AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 4466 unsigned SizeInBytes) const { 4467 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4468 4469 // If we have a constant offset, then we probably don't want to match a 4470 // register offset. 4471 if (isBaseWithConstantOffset(Root, MRI)) 4472 return None; 4473 4474 // Try to fold shifts into the addressing mode. 4475 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 4476 if (AddrModeFns) 4477 return AddrModeFns; 4478 4479 // If that doesn't work, see if it's possible to fold in registers from 4480 // a GEP. 4481 return selectAddrModeRegisterOffset(Root); 4482} 4483 4484/// This is used for computing addresses like this: 4485/// 4486/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 4487/// 4488/// Where we have a 64-bit base register, a 32-bit offset register, and an 4489/// extend (which may or may not be signed). 4490InstructionSelector::ComplexRendererFns 4491AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 4492 unsigned SizeInBytes) const { 4493 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4494 4495 MachineInstr *PtrAdd = 4496 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 4497 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 4498 return None; 4499 4500 MachineOperand &LHS = PtrAdd->getOperand(1); 4501 MachineOperand &RHS = PtrAdd->getOperand(2); 4502 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 4503 4504 // The first case is the same as selectAddrModeXRO, except we need an extend. 4505 // In this case, we try to find a shift and extend, and fold them into the 4506 // addressing mode. 4507 // 4508 // E.g. 4509 // 4510 // off_reg = G_Z/S/ANYEXT ext_reg 4511 // val = G_CONSTANT LegalShiftVal 4512 // shift = G_SHL off_reg val 4513 // ptr = G_PTR_ADD base_reg shift 4514 // x = G_LOAD ptr 4515 // 4516 // In this case we can get a load like this: 4517 // 4518 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 4519 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 4520 SizeInBytes, /*WantsExt=*/true); 4521 if (ExtendedShl) 4522 return ExtendedShl; 4523 4524 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 4525 // 4526 // e.g. 4527 // ldr something, [base_reg, ext_reg, sxtw] 4528 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4529 return None; 4530 4531 // Check if this is an extend. We'll get an extend type if it is. 4532 AArch64_AM::ShiftExtendType Ext = 4533 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 4534 if (Ext == AArch64_AM::InvalidShiftExtend) 4535 return None; 4536 4537 // Need a 32-bit wide register. 4538 MachineIRBuilder MIB(*PtrAdd); 4539 Register ExtReg = 4540 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); 4541 unsigned SignExtend = Ext == AArch64_AM::SXTW; 4542 4543 // Base is LHS, offset is ExtReg. 4544 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 4545 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 4546 [=](MachineInstrBuilder &MIB) { 4547 MIB.addImm(SignExtend); 4548 MIB.addImm(0); 4549 }}}; 4550} 4551 4552/// Select a "register plus unscaled signed 9-bit immediate" address. This 4553/// should only match when there is an offset that is not valid for a scaled 4554/// immediate addressing mode. The "Size" argument is the size in bytes of the 4555/// memory reference, which is needed here to know what is valid for a scaled 4556/// immediate. 4557InstructionSelector::ComplexRendererFns 4558AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 4559 unsigned Size) const { 4560 MachineRegisterInfo &MRI = 4561 Root.getParent()->getParent()->getParent()->getRegInfo(); 4562 4563 if (!Root.isReg()) 4564 return None; 4565 4566 if (!isBaseWithConstantOffset(Root, MRI)) 4567 return None; 4568 4569 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 4570 if (!RootDef) 4571 return None; 4572 4573 MachineOperand &OffImm = RootDef->getOperand(2); 4574 if (!OffImm.isReg()) 4575 return None; 4576 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 4577 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 4578 return None; 4579 int64_t RHSC; 4580 MachineOperand &RHSOp1 = RHS->getOperand(1); 4581 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 4582 return None; 4583 RHSC = RHSOp1.getCImm()->getSExtValue(); 4584 4585 // If the offset is valid as a scaled immediate, don't match here. 4586 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 4587 return None; 4588 if (RHSC >= -256 && RHSC < 256) { 4589 MachineOperand &Base = RootDef->getOperand(1); 4590 return {{ 4591 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 4592 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 4593 }}; 4594 } 4595 return None; 4596} 4597 4598/// Select a "register plus scaled unsigned 12-bit immediate" address. The 4599/// "Size" argument is the size in bytes of the memory reference, which 4600/// determines the scale. 4601InstructionSelector::ComplexRendererFns 4602AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 4603 unsigned Size) const { 4604 MachineRegisterInfo &MRI = 4605 Root.getParent()->getParent()->getParent()->getRegInfo(); 4606 4607 if (!Root.isReg()) 4608 return None; 4609 4610 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 4611 if (!RootDef) 4612 return None; 4613 4614 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 4615 return {{ 4616 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 4617 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 4618 }}; 4619 } 4620 4621 if (isBaseWithConstantOffset(Root, MRI)) { 4622 MachineOperand &LHS = RootDef->getOperand(1); 4623 MachineOperand &RHS = RootDef->getOperand(2); 4624 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 4625 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 4626 if (LHSDef && RHSDef) { 4627 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 4628 unsigned Scale = Log2_32(Size); 4629 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 4630 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 4631 return {{ 4632 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 4633 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 4634 }}; 4635 4636 return {{ 4637 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 4638 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 4639 }}; 4640 } 4641 } 4642 } 4643 4644 // Before falling back to our general case, check if the unscaled 4645 // instructions can handle this. If so, that's preferable. 4646 if (selectAddrModeUnscaled(Root, Size).hasValue()) 4647 return None; 4648 4649 return {{ 4650 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 4651 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 4652 }}; 4653} 4654 4655/// Given a shift instruction, return the correct shift type for that 4656/// instruction. 4657static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 4658 // TODO: Handle AArch64_AM::ROR 4659 switch (MI.getOpcode()) { 4660 default: 4661 return AArch64_AM::InvalidShiftExtend; 4662 case TargetOpcode::G_SHL: 4663 return AArch64_AM::LSL; 4664 case TargetOpcode::G_LSHR: 4665 return AArch64_AM::LSR; 4666 case TargetOpcode::G_ASHR: 4667 return AArch64_AM::ASR; 4668 } 4669} 4670 4671/// Select a "shifted register" operand. If the value is not shifted, set the 4672/// shift operand to a default value of "lsl 0". 4673/// 4674/// TODO: Allow shifted register to be rotated in logical instructions. 4675InstructionSelector::ComplexRendererFns 4676AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 4677 if (!Root.isReg()) 4678 return None; 4679 MachineRegisterInfo &MRI = 4680 Root.getParent()->getParent()->getParent()->getRegInfo(); 4681 4682 // Check if the operand is defined by an instruction which corresponds to 4683 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 4684 // 4685 // TODO: Handle AArch64_AM::ROR for logical instructions. 4686 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 4687 if (!ShiftInst) 4688 return None; 4689 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 4690 if (ShType == AArch64_AM::InvalidShiftExtend) 4691 return None; 4692 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 4693 return None; 4694 4695 // Need an immediate on the RHS. 4696 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 4697 auto Immed = getImmedFromMO(ShiftRHS); 4698 if (!Immed) 4699 return None; 4700 4701 // We have something that we can fold. Fold in the shift's LHS and RHS into 4702 // the instruction. 4703 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 4704 Register ShiftReg = ShiftLHS.getReg(); 4705 4706 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 4707 unsigned Val = *Immed & (NumBits - 1); 4708 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 4709 4710 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 4711 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 4712} 4713 4714AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 4715 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 4716 unsigned Opc = MI.getOpcode(); 4717 4718 // Handle explicit extend instructions first. 4719 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 4720 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 4721 assert(Size != 64 && "Extend from 64 bits?"); 4722 switch (Size) { 4723 case 8: 4724 return AArch64_AM::SXTB; 4725 case 16: 4726 return AArch64_AM::SXTH; 4727 case 32: 4728 return AArch64_AM::SXTW; 4729 default: 4730 return AArch64_AM::InvalidShiftExtend; 4731 } 4732 } 4733 4734 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 4735 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 4736 assert(Size != 64 && "Extend from 64 bits?"); 4737 switch (Size) { 4738 case 8: 4739 return AArch64_AM::UXTB; 4740 case 16: 4741 return AArch64_AM::UXTH; 4742 case 32: 4743 return AArch64_AM::UXTW; 4744 default: 4745 return AArch64_AM::InvalidShiftExtend; 4746 } 4747 } 4748 4749 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 4750 // on the RHS. 4751 if (Opc != TargetOpcode::G_AND) 4752 return AArch64_AM::InvalidShiftExtend; 4753 4754 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 4755 if (!MaybeAndMask) 4756 return AArch64_AM::InvalidShiftExtend; 4757 uint64_t AndMask = *MaybeAndMask; 4758 switch (AndMask) { 4759 default: 4760 return AArch64_AM::InvalidShiftExtend; 4761 case 0xFF: 4762 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 4763 case 0xFFFF: 4764 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 4765 case 0xFFFFFFFF: 4766 return AArch64_AM::UXTW; 4767 } 4768} 4769 4770Register AArch64InstructionSelector::narrowExtendRegIfNeeded( 4771 Register ExtReg, MachineIRBuilder &MIB) const { 4772 MachineRegisterInfo &MRI = *MIB.getMRI(); 4773 if (MRI.getType(ExtReg).getSizeInBits() == 32) 4774 return ExtReg; 4775 4776 // Insert a copy to move ExtReg to GPR32. 4777 Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 4778 auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); 4779 4780 // Select the copy into a subregister copy. 4781 selectCopy(*Copy, TII, MRI, TRI, RBI); 4782 return Copy.getReg(0); 4783} 4784 4785/// Select an "extended register" operand. This operand folds in an extend 4786/// followed by an optional left shift. 4787InstructionSelector::ComplexRendererFns 4788AArch64InstructionSelector::selectArithExtendedRegister( 4789 MachineOperand &Root) const { 4790 if (!Root.isReg()) 4791 return None; 4792 MachineRegisterInfo &MRI = 4793 Root.getParent()->getParent()->getParent()->getRegInfo(); 4794 4795 uint64_t ShiftVal = 0; 4796 Register ExtReg; 4797 AArch64_AM::ShiftExtendType Ext; 4798 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 4799 if (!RootDef) 4800 return None; 4801 4802 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 4803 return None; 4804 4805 // Check if we can fold a shift and an extend. 4806 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 4807 // Look for a constant on the RHS of the shift. 4808 MachineOperand &RHS = RootDef->getOperand(2); 4809 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 4810 if (!MaybeShiftVal) 4811 return None; 4812 ShiftVal = *MaybeShiftVal; 4813 if (ShiftVal > 4) 4814 return None; 4815 // Look for a valid extend instruction on the LHS of the shift. 4816 MachineOperand &LHS = RootDef->getOperand(1); 4817 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4818 if (!ExtDef) 4819 return None; 4820 Ext = getExtendTypeForInst(*ExtDef, MRI); 4821 if (Ext == AArch64_AM::InvalidShiftExtend) 4822 return None; 4823 ExtReg = ExtDef->getOperand(1).getReg(); 4824 } else { 4825 // Didn't get a shift. Try just folding an extend. 4826 Ext = getExtendTypeForInst(*RootDef, MRI); 4827 if (Ext == AArch64_AM::InvalidShiftExtend) 4828 return None; 4829 ExtReg = RootDef->getOperand(1).getReg(); 4830 4831 // If we have a 32 bit instruction which zeroes out the high half of a 4832 // register, we get an implicit zero extend for free. Check if we have one. 4833 // FIXME: We actually emit the extend right now even though we don't have 4834 // to. 4835 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 4836 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 4837 if (ExtInst && isDef32(*ExtInst)) 4838 return None; 4839 } 4840 } 4841 4842 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 4843 // copy. 4844 MachineIRBuilder MIB(*RootDef); 4845 ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); 4846 4847 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 4848 [=](MachineInstrBuilder &MIB) { 4849 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 4850 }}}; 4851} 4852 4853void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 4854 const MachineInstr &MI, 4855 int OpIdx) const { 4856 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 4857 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4858 "Expected G_CONSTANT"); 4859 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); 4860 assert(CstVal && "Expected constant value"); 4861 MIB.addImm(CstVal.getValue()); 4862} 4863 4864void AArch64InstructionSelector::renderLogicalImm32( 4865 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 4866 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4867 "Expected G_CONSTANT"); 4868 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 4869 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 4870 MIB.addImm(Enc); 4871} 4872 4873void AArch64InstructionSelector::renderLogicalImm64( 4874 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 4875 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 4876 "Expected G_CONSTANT"); 4877 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 4878 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 4879 MIB.addImm(Enc); 4880} 4881 4882bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 4883 const MachineInstr &MI, unsigned NumBytes) const { 4884 if (!MI.mayLoadOrStore()) 4885 return false; 4886 assert(MI.hasOneMemOperand() && 4887 "Expected load/store to have only one mem op!"); 4888 return (*MI.memoperands_begin())->getSize() == NumBytes; 4889} 4890 4891bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 4892 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 4893 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 4894 return false; 4895 4896 // Only return true if we know the operation will zero-out the high half of 4897 // the 64-bit register. Truncates can be subregister copies, which don't 4898 // zero out the high bits. Copies and other copy-like instructions can be 4899 // fed by truncates, or could be lowered as subregister copies. 4900 switch (MI.getOpcode()) { 4901 default: 4902 return true; 4903 case TargetOpcode::COPY: 4904 case TargetOpcode::G_BITCAST: 4905 case TargetOpcode::G_TRUNC: 4906 case TargetOpcode::G_PHI: 4907 return false; 4908 } 4909} 4910 4911namespace llvm { 4912InstructionSelector * 4913createAArch64InstructionSelector(const AArch64TargetMachine &TM, 4914 AArch64Subtarget &Subtarget, 4915 AArch64RegisterBankInfo &RBI) { 4916 return new AArch64InstructionSelector(TM, Subtarget, RBI); 4917} 4918} 4919