PeepholeOptimizer.cpp revision 288943
1212793Sdim//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2212793Sdim// 3212793Sdim// The LLVM Compiler Infrastructure 4212793Sdim// 5212793Sdim// This file is distributed under the University of Illinois Open Source 6212793Sdim// License. See LICENSE.TXT for details. 7212793Sdim// 8212793Sdim//===----------------------------------------------------------------------===// 9212793Sdim// 10212793Sdim// Perform peephole optimizations on the machine code: 11212793Sdim// 12212793Sdim// - Optimize Extensions 13212793Sdim// 14212793Sdim// Optimization of sign / zero extension instructions. It may be extended to 15212793Sdim// handle other instructions with similar properties. 16212793Sdim// 17212793Sdim// On some targets, some instructions, e.g. X86 sign / zero extension, may 18212793Sdim// leave the source value in the lower part of the result. This optimization 19212793Sdim// will replace some uses of the pre-extension value with uses of the 20212793Sdim// sub-register of the results. 21212793Sdim// 22212793Sdim// - Optimize Comparisons 23212793Sdim// 24212793Sdim// Optimization of comparison instructions. For instance, in this code: 25212793Sdim// 26212793Sdim// sub r1, 1 27212793Sdim// cmp r1, 0 28212793Sdim// bz L1 29212793Sdim// 30212793Sdim// If the "sub" instruction all ready sets (or could be modified to set) the 31212793Sdim// same flag that the "cmp" instruction sets and that "bz" uses, then we can 32212793Sdim// eliminate the "cmp" instruction. 33221345Sdim// 34239462Sdim// Another instance, in this code: 35239462Sdim// 36239462Sdim// sub r1, r3 | sub r1, imm 37239462Sdim// cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38239462Sdim// bge L1 39239462Sdim// 40239462Sdim// If the branch instruction can use flag from "sub", then we can replace 41239462Sdim// "sub" with "subs" and eliminate the "cmp" instruction. 42239462Sdim// 43249423Sdim// - Optimize Loads: 44249423Sdim// 45249423Sdim// Loads that can be folded into a later instruction. A load is foldable 46249423Sdim// if it loads to virtual registers and the virtual register defined has 47249423Sdim// a single use. 48261991Sdim// 49280031Sdim// - Optimize Copies and Bitcast (more generally, target specific copies): 50261991Sdim// 51261991Sdim// Rewrite copies and bitcasts to avoid cross register bank copies 52261991Sdim// when possible. 53261991Sdim// E.g., Consider the following example, where capital and lower 54261991Sdim// letters denote different register file: 55261991Sdim// b = copy A <-- cross-bank copy 56261991Sdim// C = copy b <-- cross-bank copy 57261991Sdim// => 58261991Sdim// b = copy A <-- cross-bank copy 59261991Sdim// C = copy A <-- same-bank copy 60261991Sdim// 61261991Sdim// E.g., for bitcast: 62261991Sdim// b = bitcast A <-- cross-bank copy 63261991Sdim// C = bitcast b <-- cross-bank copy 64261991Sdim// => 65261991Sdim// b = bitcast A <-- cross-bank copy 66261991Sdim// C = copy A <-- same-bank copy 67212793Sdim//===----------------------------------------------------------------------===// 68212793Sdim 69212793Sdim#include "llvm/CodeGen/Passes.h" 70249423Sdim#include "llvm/ADT/DenseMap.h" 71249423Sdim#include "llvm/ADT/SmallPtrSet.h" 72249423Sdim#include "llvm/ADT/SmallSet.h" 73249423Sdim#include "llvm/ADT/Statistic.h" 74212793Sdim#include "llvm/CodeGen/MachineDominators.h" 75212793Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 76212793Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 77249423Sdim#include "llvm/Support/CommandLine.h" 78249423Sdim#include "llvm/Support/Debug.h" 79288943Sdim#include "llvm/Support/raw_ostream.h" 80212793Sdim#include "llvm/Target/TargetInstrInfo.h" 81212793Sdim#include "llvm/Target/TargetRegisterInfo.h" 82280031Sdim#include "llvm/Target/TargetSubtargetInfo.h" 83280031Sdim#include <utility> 84212793Sdimusing namespace llvm; 85212793Sdim 86276479Sdim#define DEBUG_TYPE "peephole-opt" 87276479Sdim 88212793Sdim// Optimize Extensions 89212793Sdimstatic cl::opt<bool> 90212793SdimAggressive("aggressive-ext-opt", cl::Hidden, 91212793Sdim cl::desc("Aggressive extension optimization")); 92212793Sdim 93218893Sdimstatic cl::opt<bool> 94218893SdimDisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 95218893Sdim cl::desc("Disable the peephole optimizer")); 96218893Sdim 97276479Sdimstatic cl::opt<bool> 98280031SdimDisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 99276479Sdim cl::desc("Disable advanced copy optimization")); 100276479Sdim 101212793SdimSTATISTIC(NumReuse, "Number of extension results reused"); 102221345SdimSTATISTIC(NumCmps, "Number of compares eliminated"); 103234353SdimSTATISTIC(NumImmFold, "Number of move immediate folded"); 104239462SdimSTATISTIC(NumLoadFold, "Number of loads folded"); 105239462SdimSTATISTIC(NumSelects, "Number of selects optimized"); 106280031SdimSTATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 107280031SdimSTATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 108212793Sdim 109212793Sdimnamespace { 110212793Sdim class PeepholeOptimizer : public MachineFunctionPass { 111212793Sdim const TargetInstrInfo *TII; 112280031Sdim const TargetRegisterInfo *TRI; 113212793Sdim MachineRegisterInfo *MRI; 114212793Sdim MachineDominatorTree *DT; // Machine dominator tree 115212793Sdim 116212793Sdim public: 117212793Sdim static char ID; // Pass identification 118218893Sdim PeepholeOptimizer() : MachineFunctionPass(ID) { 119218893Sdim initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 120218893Sdim } 121212793Sdim 122276479Sdim bool runOnMachineFunction(MachineFunction &MF) override; 123212793Sdim 124276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 125212793Sdim AU.setPreservesCFG(); 126212793Sdim MachineFunctionPass::getAnalysisUsage(AU); 127212793Sdim if (Aggressive) { 128212793Sdim AU.addRequired<MachineDominatorTree>(); 129212793Sdim AU.addPreserved<MachineDominatorTree>(); 130212793Sdim } 131212793Sdim } 132212793Sdim 133212793Sdim private: 134239462Sdim bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 135239462Sdim bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 136280031Sdim SmallPtrSetImpl<MachineInstr*> &LocalMIs); 137280031Sdim bool optimizeSelect(MachineInstr *MI, 138280031Sdim SmallPtrSetImpl<MachineInstr *> &LocalMIs); 139280031Sdim bool optimizeCondBranch(MachineInstr *MI); 140261991Sdim bool optimizeCopyOrBitcast(MachineInstr *MI); 141280031Sdim bool optimizeCoalescableCopy(MachineInstr *MI); 142280031Sdim bool optimizeUncoalescableCopy(MachineInstr *MI, 143280031Sdim SmallPtrSetImpl<MachineInstr *> &LocalMIs); 144280031Sdim bool findNextSource(unsigned &Reg, unsigned &SubReg); 145218893Sdim bool isMoveImmediate(MachineInstr *MI, 146218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 147218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 148239462Sdim bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 149218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 150218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 151276479Sdim bool isLoadFoldable(MachineInstr *MI, 152276479Sdim SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 153280031Sdim 154280031Sdim /// \brief Check whether \p MI is understood by the register coalescer 155280031Sdim /// but may require some rewriting. 156280031Sdim bool isCoalescableCopy(const MachineInstr &MI) { 157280031Sdim // SubregToRegs are not interesting, because they are already register 158280031Sdim // coalescer friendly. 159280031Sdim return MI.isCopy() || (!DisableAdvCopyOpt && 160280031Sdim (MI.isRegSequence() || MI.isInsertSubreg() || 161280031Sdim MI.isExtractSubreg())); 162280031Sdim } 163280031Sdim 164280031Sdim /// \brief Check whether \p MI is a copy like instruction that is 165280031Sdim /// not recognized by the register coalescer. 166280031Sdim bool isUncoalescableCopy(const MachineInstr &MI) { 167280031Sdim return MI.isBitcast() || 168280031Sdim (!DisableAdvCopyOpt && 169280031Sdim (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 170280031Sdim MI.isExtractSubregLike())); 171280031Sdim } 172212793Sdim }; 173276479Sdim 174276479Sdim /// \brief Helper class to track the possible sources of a value defined by 175276479Sdim /// a (chain of) copy related instructions. 176276479Sdim /// Given a definition (instruction and definition index), this class 177276479Sdim /// follows the use-def chain to find successive suitable sources. 178276479Sdim /// The given source can be used to rewrite the definition into 179276479Sdim /// def = COPY src. 180276479Sdim /// 181276479Sdim /// For instance, let us consider the following snippet: 182276479Sdim /// v0 = 183276479Sdim /// v2 = INSERT_SUBREG v1, v0, sub0 184276479Sdim /// def = COPY v2.sub0 185276479Sdim /// 186276479Sdim /// Using a ValueTracker for def = COPY v2.sub0 will give the following 187276479Sdim /// suitable sources: 188276479Sdim /// v2.sub0 and v0. 189276479Sdim /// Then, def can be rewritten into def = COPY v0. 190276479Sdim class ValueTracker { 191276479Sdim private: 192276479Sdim /// The current point into the use-def chain. 193276479Sdim const MachineInstr *Def; 194276479Sdim /// The index of the definition in Def. 195276479Sdim unsigned DefIdx; 196276479Sdim /// The sub register index of the definition. 197276479Sdim unsigned DefSubReg; 198276479Sdim /// The register where the value can be found. 199276479Sdim unsigned Reg; 200276479Sdim /// Specifiy whether or not the value tracking looks through 201276479Sdim /// complex instructions. When this is false, the value tracker 202276479Sdim /// bails on everything that is not a copy or a bitcast. 203276479Sdim /// 204276479Sdim /// Note: This could have been implemented as a specialized version of 205276479Sdim /// the ValueTracker class but that would have complicated the code of 206276479Sdim /// the users of this class. 207276479Sdim bool UseAdvancedTracking; 208280031Sdim /// MachineRegisterInfo used to perform tracking. 209280031Sdim const MachineRegisterInfo &MRI; 210280031Sdim /// Optional TargetInstrInfo used to perform some complex 211276479Sdim /// tracking. 212280031Sdim const TargetInstrInfo *TII; 213276479Sdim 214276479Sdim /// \brief Dispatcher to the right underlying implementation of 215276479Sdim /// getNextSource. 216280031Sdim bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 217276479Sdim /// \brief Specialized version of getNextSource for Copy instructions. 218280031Sdim bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 219276479Sdim /// \brief Specialized version of getNextSource for Bitcast instructions. 220280031Sdim bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 221276479Sdim /// \brief Specialized version of getNextSource for RegSequence 222276479Sdim /// instructions. 223280031Sdim bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 224276479Sdim /// \brief Specialized version of getNextSource for InsertSubreg 225276479Sdim /// instructions. 226280031Sdim bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 227276479Sdim /// \brief Specialized version of getNextSource for ExtractSubreg 228276479Sdim /// instructions. 229280031Sdim bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 230276479Sdim /// \brief Specialized version of getNextSource for SubregToReg 231276479Sdim /// instructions. 232280031Sdim bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 233276479Sdim 234276479Sdim public: 235280031Sdim /// \brief Create a ValueTracker instance for the value defined by \p Reg. 236276479Sdim /// \p DefSubReg represents the sub register index the value tracker will 237280031Sdim /// track. It does not need to match the sub register index used in the 238280031Sdim /// definition of \p Reg. 239276479Sdim /// \p UseAdvancedTracking specifies whether or not the value tracker looks 240276479Sdim /// through complex instructions. By default (false), it handles only copy 241276479Sdim /// and bitcast instructions. 242280031Sdim /// If \p Reg is a physical register, a value tracker constructed with 243280031Sdim /// this constructor will not find any alternative source. 244280031Sdim /// Indeed, when \p Reg is a physical register that constructor does not 245280031Sdim /// know which definition of \p Reg it should track. 246280031Sdim /// Use the next constructor to track a physical register. 247280031Sdim ValueTracker(unsigned Reg, unsigned DefSubReg, 248280031Sdim const MachineRegisterInfo &MRI, 249280031Sdim bool UseAdvancedTracking = false, 250280031Sdim const TargetInstrInfo *TII = nullptr) 251280031Sdim : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 252280031Sdim UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 253280031Sdim if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 254280031Sdim Def = MRI.getVRegDef(Reg); 255280031Sdim DefIdx = MRI.def_begin(Reg).getOperandNo(); 256280031Sdim } 257280031Sdim } 258280031Sdim 259280031Sdim /// \brief Create a ValueTracker instance for the value defined by 260280031Sdim /// the pair \p MI, \p DefIdx. 261280031Sdim /// Unlike the other constructor, the value tracker produced by this one 262280031Sdim /// may be able to find a new source when the definition is a physical 263280031Sdim /// register. 264280031Sdim /// This could be useful to rewrite target specific instructions into 265280031Sdim /// generic copy instructions. 266276479Sdim ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 267280031Sdim const MachineRegisterInfo &MRI, 268276479Sdim bool UseAdvancedTracking = false, 269280031Sdim const TargetInstrInfo *TII = nullptr) 270276479Sdim : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 271280031Sdim UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 272280031Sdim assert(DefIdx < Def->getDesc().getNumDefs() && 273280031Sdim Def->getOperand(DefIdx).isReg() && "Invalid definition"); 274276479Sdim Reg = Def->getOperand(DefIdx).getReg(); 275276479Sdim } 276276479Sdim 277276479Sdim /// \brief Following the use-def chain, get the next available source 278276479Sdim /// for the tracked value. 279280031Sdim /// When the returned value is not nullptr, \p SrcReg gives the register 280276479Sdim /// that contain the tracked value. 281276479Sdim /// \note The sub register index returned in \p SrcSubReg must be used 282280031Sdim /// on \p SrcReg to access the actual value. 283276479Sdim /// \return Unless the returned value is nullptr (i.e., no source found), 284280031Sdim /// \p SrcReg gives the register of the next source used in the returned 285280031Sdim /// instruction and \p SrcSubReg the sub-register index to be used on that 286280031Sdim /// source to get the tracked value. When nullptr is returned, no 287280031Sdim /// alternative source has been found. 288280031Sdim const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 289276479Sdim 290276479Sdim /// \brief Get the last register where the initial value can be found. 291276479Sdim /// Initially this is the register of the definition. 292276479Sdim /// Then, after each successful call to getNextSource, this is the 293276479Sdim /// register of the last source. 294276479Sdim unsigned getReg() const { return Reg; } 295276479Sdim }; 296212793Sdim} 297212793Sdim 298212793Sdimchar PeepholeOptimizer::ID = 0; 299234353Sdimchar &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 300218893SdimINITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 301218893Sdim "Peephole Optimizations", false, false) 302218893SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 303218893SdimINITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 304218893Sdim "Peephole Optimizations", false, false) 305212793Sdim 306239462Sdim/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 307212793Sdim/// a single register and writes a single register and it does not modify the 308212793Sdim/// source, and if the source value is preserved as a sub-register of the 309212793Sdim/// result, then replace all reachable uses of the source with the subreg of the 310212793Sdim/// result. 311234353Sdim/// 312212793Sdim/// Do not generate an EXTRACT that is used only in a debug use, as this changes 313212793Sdim/// the code. Since this code does not currently share EXTRACTs, just ignore all 314212793Sdim/// debug uses. 315212793Sdimbool PeepholeOptimizer:: 316239462SdimoptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 317280031Sdim SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 318212793Sdim unsigned SrcReg, DstReg, SubIdx; 319212793Sdim if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 320212793Sdim return false; 321234353Sdim 322212793Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 323212793Sdim TargetRegisterInfo::isPhysicalRegister(SrcReg)) 324212793Sdim return false; 325212793Sdim 326239462Sdim if (MRI->hasOneNonDBGUse(SrcReg)) 327212793Sdim // No other uses. 328212793Sdim return false; 329212793Sdim 330239462Sdim // Ensure DstReg can get a register class that actually supports 331239462Sdim // sub-registers. Don't change the class until we commit. 332239462Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 333280031Sdim DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 334239462Sdim if (!DstRC) 335239462Sdim return false; 336239462Sdim 337239462Sdim // The ext instr may be operating on a sub-register of SrcReg as well. 338239462Sdim // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 339239462Sdim // register. 340239462Sdim // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 341239462Sdim // SrcReg:SubIdx should be replaced. 342280031Sdim bool UseSrcSubIdx = 343280031Sdim TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 344239462Sdim 345212793Sdim // The source has other uses. See if we can replace the other uses with use of 346212793Sdim // the result of the extension. 347212793Sdim SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 348276479Sdim for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 349276479Sdim ReachedBBs.insert(UI.getParent()); 350212793Sdim 351212793Sdim // Uses that are in the same BB of uses of the result of the instruction. 352212793Sdim SmallVector<MachineOperand*, 8> Uses; 353212793Sdim 354212793Sdim // Uses that the result of the instruction can reach. 355212793Sdim SmallVector<MachineOperand*, 8> ExtendedUses; 356212793Sdim 357212793Sdim bool ExtendLife = true; 358276479Sdim for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 359276479Sdim MachineInstr *UseMI = UseMO.getParent(); 360212793Sdim if (UseMI == MI) 361212793Sdim continue; 362212793Sdim 363212793Sdim if (UseMI->isPHI()) { 364212793Sdim ExtendLife = false; 365212793Sdim continue; 366212793Sdim } 367212793Sdim 368239462Sdim // Only accept uses of SrcReg:SubIdx. 369239462Sdim if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 370239462Sdim continue; 371239462Sdim 372212793Sdim // It's an error to translate this: 373212793Sdim // 374212793Sdim // %reg1025 = <sext> %reg1024 375212793Sdim // ... 376212793Sdim // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 377212793Sdim // 378212793Sdim // into this: 379212793Sdim // 380212793Sdim // %reg1025 = <sext> %reg1024 381212793Sdim // ... 382212793Sdim // %reg1027 = COPY %reg1025:4 383212793Sdim // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 384212793Sdim // 385212793Sdim // The problem here is that SUBREG_TO_REG is there to assert that an 386212793Sdim // implicit zext occurs. It doesn't insert a zext instruction. If we allow 387212793Sdim // the COPY here, it will give us the value after the <sext>, not the 388212793Sdim // original value of %reg1024 before <sext>. 389212793Sdim if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 390212793Sdim continue; 391212793Sdim 392212793Sdim MachineBasicBlock *UseMBB = UseMI->getParent(); 393212793Sdim if (UseMBB == MBB) { 394212793Sdim // Local uses that come after the extension. 395212793Sdim if (!LocalMIs.count(UseMI)) 396212793Sdim Uses.push_back(&UseMO); 397212793Sdim } else if (ReachedBBs.count(UseMBB)) { 398212793Sdim // Non-local uses where the result of the extension is used. Always 399212793Sdim // replace these unless it's a PHI. 400212793Sdim Uses.push_back(&UseMO); 401212793Sdim } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 402212793Sdim // We may want to extend the live range of the extension result in order 403212793Sdim // to replace these uses. 404212793Sdim ExtendedUses.push_back(&UseMO); 405212793Sdim } else { 406212793Sdim // Both will be live out of the def MBB anyway. Don't extend live range of 407212793Sdim // the extension result. 408212793Sdim ExtendLife = false; 409212793Sdim break; 410212793Sdim } 411212793Sdim } 412212793Sdim 413212793Sdim if (ExtendLife && !ExtendedUses.empty()) 414212793Sdim // Extend the liveness of the extension result. 415288943Sdim Uses.append(ExtendedUses.begin(), ExtendedUses.end()); 416212793Sdim 417212793Sdim // Now replace all uses. 418212793Sdim bool Changed = false; 419212793Sdim if (!Uses.empty()) { 420212793Sdim SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 421212793Sdim 422212793Sdim // Look for PHI uses of the extended result, we don't want to extend the 423212793Sdim // liveness of a PHI input. It breaks all kinds of assumptions down 424212793Sdim // stream. A PHI use is expected to be the kill of its source values. 425276479Sdim for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 426276479Sdim if (UI.isPHI()) 427276479Sdim PHIBBs.insert(UI.getParent()); 428212793Sdim 429212793Sdim const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 430212793Sdim for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 431212793Sdim MachineOperand *UseMO = Uses[i]; 432212793Sdim MachineInstr *UseMI = UseMO->getParent(); 433212793Sdim MachineBasicBlock *UseMBB = UseMI->getParent(); 434212793Sdim if (PHIBBs.count(UseMBB)) 435212793Sdim continue; 436212793Sdim 437234353Sdim // About to add uses of DstReg, clear DstReg's kill flags. 438239462Sdim if (!Changed) { 439234353Sdim MRI->clearKillFlags(DstReg); 440239462Sdim MRI->constrainRegClass(DstReg, DstRC); 441239462Sdim } 442234353Sdim 443212793Sdim unsigned NewVR = MRI->createVirtualRegister(RC); 444239462Sdim MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 445239462Sdim TII->get(TargetOpcode::COPY), NewVR) 446212793Sdim .addReg(DstReg, 0, SubIdx); 447239462Sdim // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 448239462Sdim if (UseSrcSubIdx) { 449239462Sdim Copy->getOperand(0).setSubReg(SubIdx); 450239462Sdim Copy->getOperand(0).setIsUndef(); 451239462Sdim } 452212793Sdim UseMO->setReg(NewVR); 453212793Sdim ++NumReuse; 454212793Sdim Changed = true; 455212793Sdim } 456212793Sdim } 457212793Sdim 458212793Sdim return Changed; 459212793Sdim} 460212793Sdim 461239462Sdim/// optimizeCmpInstr - If the instruction is a compare and the previous 462212793Sdim/// instruction it's comparing against all ready sets (or could be modified to 463212793Sdim/// set) the same flag as the compare, then we can remove the comparison and use 464212793Sdim/// the flag from the previous instruction. 465239462Sdimbool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 466221345Sdim MachineBasicBlock *MBB) { 467212793Sdim // If this instruction is a comparison against zero and isn't comparing a 468212793Sdim // physical register, we can try to optimize it. 469239462Sdim unsigned SrcReg, SrcReg2; 470218893Sdim int CmpMask, CmpValue; 471239462Sdim if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 472239462Sdim TargetRegisterInfo::isPhysicalRegister(SrcReg) || 473239462Sdim (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 474212793Sdim return false; 475212793Sdim 476218893Sdim // Attempt to optimize the comparison instruction. 477239462Sdim if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 478221345Sdim ++NumCmps; 479212793Sdim return true; 480212793Sdim } 481212793Sdim 482212793Sdim return false; 483212793Sdim} 484212793Sdim 485239462Sdim/// Optimize a select instruction. 486280031Sdimbool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, 487280031Sdim SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 488239462Sdim unsigned TrueOp = 0; 489239462Sdim unsigned FalseOp = 0; 490239462Sdim bool Optimizable = false; 491239462Sdim SmallVector<MachineOperand, 4> Cond; 492239462Sdim if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 493239462Sdim return false; 494239462Sdim if (!Optimizable) 495239462Sdim return false; 496280031Sdim if (!TII->optimizeSelect(MI, LocalMIs)) 497239462Sdim return false; 498239462Sdim MI->eraseFromParent(); 499239462Sdim ++NumSelects; 500239462Sdim return true; 501239462Sdim} 502239462Sdim 503280031Sdim/// \brief Check if a simpler conditional branch can be 504280031Sdim// generated 505280031Sdimbool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 506280031Sdim return TII->optimizeCondBranch(MI); 507280031Sdim} 508280031Sdim 509261991Sdim/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 510261991Sdim/// share the same register file. 511261991Sdimstatic bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 512261991Sdim const TargetRegisterClass *DefRC, 513261991Sdim unsigned DefSubReg, 514261991Sdim const TargetRegisterClass *SrcRC, 515261991Sdim unsigned SrcSubReg) { 516261991Sdim // Same register class. 517261991Sdim if (DefRC == SrcRC) 518261991Sdim return true; 519261991Sdim 520261991Sdim // Both operands are sub registers. Check if they share a register class. 521261991Sdim unsigned SrcIdx, DefIdx; 522261991Sdim if (SrcSubReg && DefSubReg) 523261991Sdim return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 524276479Sdim SrcIdx, DefIdx) != nullptr; 525261991Sdim // At most one of the register is a sub register, make it Src to avoid 526261991Sdim // duplicating the test. 527261991Sdim if (!SrcSubReg) { 528261991Sdim std::swap(DefSubReg, SrcSubReg); 529261991Sdim std::swap(DefRC, SrcRC); 530261991Sdim } 531261991Sdim 532261991Sdim // One of the register is a sub register, check if we can get a superclass. 533261991Sdim if (SrcSubReg) 534276479Sdim return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 535261991Sdim // Plain copy. 536276479Sdim return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 537261991Sdim} 538261991Sdim 539280031Sdim/// \brief Try to find the next source that share the same register file 540280031Sdim/// for the value defined by \p Reg and \p SubReg. 541280031Sdim/// When true is returned, \p Reg and \p SubReg are updated with the 542280031Sdim/// register number and sub-register index of the new source. 543280031Sdim/// \return False if no alternative sources are available. True otherwise. 544280031Sdimbool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 545280031Sdim // Do not try to find a new source for a physical register. 546280031Sdim // So far we do not have any motivating example for doing that. 547280031Sdim // Thus, instead of maintaining untested code, we will revisit that if 548280031Sdim // that changes at some point. 549280031Sdim if (TargetRegisterInfo::isPhysicalRegister(Reg)) 550261991Sdim return false; 551261991Sdim 552280031Sdim const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 553280031Sdim unsigned DefSubReg = SubReg; 554261991Sdim 555261991Sdim unsigned Src; 556261991Sdim unsigned SrcSubReg; 557261991Sdim bool ShouldRewrite = false; 558261991Sdim 559276479Sdim // Follow the chain of copies until we reach the top of the use-def chain 560276479Sdim // or find a more suitable source. 561280031Sdim ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 562261991Sdim do { 563280031Sdim unsigned CopySrcReg, CopySrcSubReg; 564280031Sdim if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 565261991Sdim break; 566280031Sdim Src = CopySrcReg; 567276479Sdim SrcSubReg = CopySrcSubReg; 568261991Sdim 569276479Sdim // Do not extend the live-ranges of physical registers as they add 570276479Sdim // constraints to the register allocator. 571276479Sdim // Moreover, if we want to extend the live-range of a physical register, 572276479Sdim // unlike SSA virtual register, we will have to check that they are not 573276479Sdim // redefine before the related use. 574261991Sdim if (TargetRegisterInfo::isPhysicalRegister(Src)) 575261991Sdim break; 576261991Sdim 577261991Sdim const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 578261991Sdim 579261991Sdim // If this source does not incur a cross register bank copy, use it. 580280031Sdim ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 581261991Sdim SrcSubReg); 582276479Sdim } while (!ShouldRewrite); 583261991Sdim 584261991Sdim // If we did not find a more suitable source, there is nothing to optimize. 585280031Sdim if (!ShouldRewrite || Src == Reg) 586261991Sdim return false; 587261991Sdim 588280031Sdim Reg = Src; 589280031Sdim SubReg = SrcSubReg; 590280031Sdim return true; 591280031Sdim} 592261991Sdim 593280031Sdimnamespace { 594280031Sdim/// \brief Helper class to rewrite the arguments of a copy-like instruction. 595280031Sdimclass CopyRewriter { 596280031Sdimprotected: 597280031Sdim /// The copy-like instruction. 598280031Sdim MachineInstr &CopyLike; 599280031Sdim /// The index of the source being rewritten. 600280031Sdim unsigned CurrentSrcIdx; 601280031Sdim 602280031Sdimpublic: 603280031Sdim CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 604280031Sdim 605280031Sdim virtual ~CopyRewriter() {} 606280031Sdim 607280031Sdim /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 608280031Sdim /// the related value that it affects (TrackReg, TrackSubReg). 609280031Sdim /// A source is considered rewritable if its register class and the 610280031Sdim /// register class of the related TrackReg may not be register 611280031Sdim /// coalescer friendly. In other words, given a copy-like instruction 612280031Sdim /// not all the arguments may be returned at rewritable source, since 613280031Sdim /// some arguments are none to be register coalescer friendly. 614280031Sdim /// 615280031Sdim /// Each call of this method moves the current source to the next 616280031Sdim /// rewritable source. 617280031Sdim /// For instance, let CopyLike be the instruction to rewrite. 618280031Sdim /// CopyLike has one definition and one source: 619280031Sdim /// dst.dstSubIdx = CopyLike src.srcSubIdx. 620280031Sdim /// 621280031Sdim /// The first call will give the first rewritable source, i.e., 622280031Sdim /// the only source this instruction has: 623280031Sdim /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 624280031Sdim /// This source defines the whole definition, i.e., 625280031Sdim /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 626280031Sdim /// 627280031Sdim /// The second and subsequent calls will return false, has there is only one 628280031Sdim /// rewritable source. 629280031Sdim /// 630280031Sdim /// \return True if a rewritable source has been found, false otherwise. 631280031Sdim /// The output arguments are valid if and only if true is returned. 632280031Sdim virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 633280031Sdim unsigned &TrackReg, 634280031Sdim unsigned &TrackSubReg) { 635280031Sdim // If CurrentSrcIdx == 1, this means this function has already been 636280031Sdim // called once. CopyLike has one defintiion and one argument, thus, 637280031Sdim // there is nothing else to rewrite. 638280031Sdim if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 639280031Sdim return false; 640280031Sdim // This is the first call to getNextRewritableSource. 641280031Sdim // Move the CurrentSrcIdx to remember that we made that call. 642280031Sdim CurrentSrcIdx = 1; 643280031Sdim // The rewritable source is the argument. 644280031Sdim const MachineOperand &MOSrc = CopyLike.getOperand(1); 645280031Sdim SrcReg = MOSrc.getReg(); 646280031Sdim SrcSubReg = MOSrc.getSubReg(); 647280031Sdim // What we track are the alternative sources of the definition. 648280031Sdim const MachineOperand &MODef = CopyLike.getOperand(0); 649280031Sdim TrackReg = MODef.getReg(); 650280031Sdim TrackSubReg = MODef.getSubReg(); 651280031Sdim return true; 652280031Sdim } 653280031Sdim 654280031Sdim /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 655280031Sdim /// if possible. 656280031Sdim /// \return True if the rewritting was possible, false otherwise. 657280031Sdim virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 658280031Sdim if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 659280031Sdim return false; 660280031Sdim MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 661280031Sdim MOSrc.setReg(NewReg); 662280031Sdim MOSrc.setSubReg(NewSubReg); 663280031Sdim return true; 664280031Sdim } 665280031Sdim}; 666280031Sdim 667280031Sdim/// \brief Specialized rewriter for INSERT_SUBREG instruction. 668280031Sdimclass InsertSubregRewriter : public CopyRewriter { 669280031Sdimpublic: 670280031Sdim InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 671280031Sdim assert(MI.isInsertSubreg() && "Invalid instruction"); 672280031Sdim } 673280031Sdim 674280031Sdim /// \brief See CopyRewriter::getNextRewritableSource. 675280031Sdim /// Here CopyLike has the following form: 676280031Sdim /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 677280031Sdim /// Src1 has the same register class has dst, hence, there is 678280031Sdim /// nothing to rewrite. 679280031Sdim /// Src2.src2SubIdx, may not be register coalescer friendly. 680280031Sdim /// Therefore, the first call to this method returns: 681280031Sdim /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 682280031Sdim /// (TrackReg, TrackSubReg) = (dst, subIdx). 683280031Sdim /// 684280031Sdim /// Subsequence calls will return false. 685280031Sdim bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 686280031Sdim unsigned &TrackReg, 687280031Sdim unsigned &TrackSubReg) override { 688280031Sdim // If we already get the only source we can rewrite, return false. 689280031Sdim if (CurrentSrcIdx == 2) 690280031Sdim return false; 691280031Sdim // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 692280031Sdim CurrentSrcIdx = 2; 693280031Sdim const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 694280031Sdim SrcReg = MOInsertedReg.getReg(); 695280031Sdim SrcSubReg = MOInsertedReg.getSubReg(); 696280031Sdim const MachineOperand &MODef = CopyLike.getOperand(0); 697280031Sdim 698280031Sdim // We want to track something that is compatible with the 699280031Sdim // partial definition. 700280031Sdim TrackReg = MODef.getReg(); 701280031Sdim if (MODef.getSubReg()) 702280031Sdim // Bails if we have to compose sub-register indices. 703280031Sdim return false; 704280031Sdim TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 705280031Sdim return true; 706280031Sdim } 707280031Sdim bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 708280031Sdim if (CurrentSrcIdx != 2) 709280031Sdim return false; 710280031Sdim // We are rewriting the inserted reg. 711280031Sdim MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 712280031Sdim MO.setReg(NewReg); 713280031Sdim MO.setSubReg(NewSubReg); 714280031Sdim return true; 715280031Sdim } 716280031Sdim}; 717280031Sdim 718280031Sdim/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 719280031Sdimclass ExtractSubregRewriter : public CopyRewriter { 720280031Sdim const TargetInstrInfo &TII; 721280031Sdim 722280031Sdimpublic: 723280031Sdim ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 724280031Sdim : CopyRewriter(MI), TII(TII) { 725280031Sdim assert(MI.isExtractSubreg() && "Invalid instruction"); 726280031Sdim } 727280031Sdim 728280031Sdim /// \brief See CopyRewriter::getNextRewritableSource. 729280031Sdim /// Here CopyLike has the following form: 730280031Sdim /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 731280031Sdim /// There is only one rewritable source: Src.subIdx, 732280031Sdim /// which defines dst.dstSubIdx. 733280031Sdim bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 734280031Sdim unsigned &TrackReg, 735280031Sdim unsigned &TrackSubReg) override { 736280031Sdim // If we already get the only source we can rewrite, return false. 737280031Sdim if (CurrentSrcIdx == 1) 738280031Sdim return false; 739280031Sdim // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 740280031Sdim CurrentSrcIdx = 1; 741280031Sdim const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 742280031Sdim SrcReg = MOExtractedReg.getReg(); 743280031Sdim // If we have to compose sub-register indices, bails out. 744280031Sdim if (MOExtractedReg.getSubReg()) 745280031Sdim return false; 746280031Sdim 747280031Sdim SrcSubReg = CopyLike.getOperand(2).getImm(); 748280031Sdim 749280031Sdim // We want to track something that is compatible with the definition. 750280031Sdim const MachineOperand &MODef = CopyLike.getOperand(0); 751280031Sdim TrackReg = MODef.getReg(); 752280031Sdim TrackSubReg = MODef.getSubReg(); 753280031Sdim return true; 754280031Sdim } 755280031Sdim 756280031Sdim bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 757280031Sdim // The only source we can rewrite is the input register. 758280031Sdim if (CurrentSrcIdx != 1) 759280031Sdim return false; 760280031Sdim 761280031Sdim CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 762280031Sdim 763280031Sdim // If we find a source that does not require to extract something, 764280031Sdim // rewrite the operation with a copy. 765280031Sdim if (!NewSubReg) { 766280031Sdim // Move the current index to an invalid position. 767280031Sdim // We do not want another call to this method to be able 768280031Sdim // to do any change. 769280031Sdim CurrentSrcIdx = -1; 770280031Sdim // Rewrite the operation as a COPY. 771280031Sdim // Get rid of the sub-register index. 772280031Sdim CopyLike.RemoveOperand(2); 773280031Sdim // Morph the operation into a COPY. 774280031Sdim CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 775280031Sdim return true; 776280031Sdim } 777280031Sdim CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 778280031Sdim return true; 779280031Sdim } 780280031Sdim}; 781280031Sdim 782280031Sdim/// \brief Specialized rewriter for REG_SEQUENCE instruction. 783280031Sdimclass RegSequenceRewriter : public CopyRewriter { 784280031Sdimpublic: 785280031Sdim RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 786280031Sdim assert(MI.isRegSequence() && "Invalid instruction"); 787280031Sdim } 788280031Sdim 789280031Sdim /// \brief See CopyRewriter::getNextRewritableSource. 790280031Sdim /// Here CopyLike has the following form: 791280031Sdim /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 792280031Sdim /// Each call will return a different source, walking all the available 793280031Sdim /// source. 794280031Sdim /// 795280031Sdim /// The first call returns: 796280031Sdim /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 797280031Sdim /// (TrackReg, TrackSubReg) = (dst, subIdx1). 798280031Sdim /// 799280031Sdim /// The second call returns: 800280031Sdim /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 801280031Sdim /// (TrackReg, TrackSubReg) = (dst, subIdx2). 802280031Sdim /// 803280031Sdim /// And so on, until all the sources have been traversed, then 804280031Sdim /// it returns false. 805280031Sdim bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 806280031Sdim unsigned &TrackReg, 807280031Sdim unsigned &TrackSubReg) override { 808280031Sdim // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 809280031Sdim 810280031Sdim // If this is the first call, move to the first argument. 811280031Sdim if (CurrentSrcIdx == 0) { 812280031Sdim CurrentSrcIdx = 1; 813280031Sdim } else { 814280031Sdim // Otherwise, move to the next argument and check that it is valid. 815280031Sdim CurrentSrcIdx += 2; 816280031Sdim if (CurrentSrcIdx >= CopyLike.getNumOperands()) 817280031Sdim return false; 818280031Sdim } 819280031Sdim const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 820280031Sdim SrcReg = MOInsertedReg.getReg(); 821280031Sdim // If we have to compose sub-register indices, bails out. 822280031Sdim if ((SrcSubReg = MOInsertedReg.getSubReg())) 823280031Sdim return false; 824280031Sdim 825280031Sdim // We want to track something that is compatible with the related 826280031Sdim // partial definition. 827280031Sdim TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 828280031Sdim 829280031Sdim const MachineOperand &MODef = CopyLike.getOperand(0); 830280031Sdim TrackReg = MODef.getReg(); 831280031Sdim // If we have to compose sub-registers, bails. 832280031Sdim return MODef.getSubReg() == 0; 833280031Sdim } 834280031Sdim 835280031Sdim bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 836280031Sdim // We cannot rewrite out of bound operands. 837280031Sdim // Moreover, rewritable sources are at odd positions. 838280031Sdim if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 839280031Sdim return false; 840280031Sdim 841280031Sdim MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 842280031Sdim MO.setReg(NewReg); 843280031Sdim MO.setSubReg(NewSubReg); 844280031Sdim return true; 845280031Sdim } 846280031Sdim}; 847280031Sdim} // End namespace. 848280031Sdim 849280031Sdim/// \brief Get the appropriated CopyRewriter for \p MI. 850280031Sdim/// \return A pointer to a dynamically allocated CopyRewriter or nullptr 851280031Sdim/// if no rewriter works for \p MI. 852280031Sdimstatic CopyRewriter *getCopyRewriter(MachineInstr &MI, 853280031Sdim const TargetInstrInfo &TII) { 854280031Sdim switch (MI.getOpcode()) { 855280031Sdim default: 856280031Sdim return nullptr; 857280031Sdim case TargetOpcode::COPY: 858280031Sdim return new CopyRewriter(MI); 859280031Sdim case TargetOpcode::INSERT_SUBREG: 860280031Sdim return new InsertSubregRewriter(MI); 861280031Sdim case TargetOpcode::EXTRACT_SUBREG: 862280031Sdim return new ExtractSubregRewriter(MI, TII); 863280031Sdim case TargetOpcode::REG_SEQUENCE: 864280031Sdim return new RegSequenceRewriter(MI); 865280031Sdim } 866280031Sdim llvm_unreachable(nullptr); 867280031Sdim} 868280031Sdim 869280031Sdim/// \brief Optimize generic copy instructions to avoid cross 870280031Sdim/// register bank copy. The optimization looks through a chain of 871280031Sdim/// copies and tries to find a source that has a compatible register 872280031Sdim/// class. 873280031Sdim/// Two register classes are considered to be compatible if they share 874280031Sdim/// the same register bank. 875280031Sdim/// New copies issued by this optimization are register allocator 876280031Sdim/// friendly. This optimization does not remove any copy as it may 877280031Sdim/// overconstraint the register allocator, but replaces some operands 878280031Sdim/// when possible. 879280031Sdim/// \pre isCoalescableCopy(*MI) is true. 880280031Sdim/// \return True, when \p MI has been rewritten. False otherwise. 881280031Sdimbool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 882280031Sdim assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 883280031Sdim assert(MI->getDesc().getNumDefs() == 1 && 884280031Sdim "Coalescer can understand multiple defs?!"); 885280031Sdim const MachineOperand &MODef = MI->getOperand(0); 886280031Sdim // Do not rewrite physical definitions. 887280031Sdim if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 888280031Sdim return false; 889280031Sdim 890280031Sdim bool Changed = false; 891280031Sdim // Get the right rewriter for the current copy. 892280031Sdim std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 893280031Sdim // If none exists, bails out. 894280031Sdim if (!CpyRewriter) 895280031Sdim return false; 896280031Sdim // Rewrite each rewritable source. 897280031Sdim unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 898280031Sdim while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 899280031Sdim TrackSubReg)) { 900280031Sdim unsigned NewSrc = TrackReg; 901280031Sdim unsigned NewSubReg = TrackSubReg; 902280031Sdim // Try to find a more suitable source. 903280031Sdim // If we failed to do so, or get the actual source, 904280031Sdim // move to the next source. 905280031Sdim if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 906280031Sdim continue; 907280031Sdim // Rewrite source. 908280031Sdim if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 909280031Sdim // We may have extended the live-range of NewSrc, account for that. 910280031Sdim MRI->clearKillFlags(NewSrc); 911280031Sdim Changed = true; 912280031Sdim } 913280031Sdim } 914280031Sdim // TODO: We could have a clean-up method to tidy the instruction. 915280031Sdim // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 916280031Sdim // => v0 = COPY v1 917280031Sdim // Currently we haven't seen motivating example for that and we 918280031Sdim // want to avoid untested code. 919288943Sdim NumRewrittenCopies += Changed; 920280031Sdim return Changed; 921280031Sdim} 922280031Sdim 923280031Sdim/// \brief Optimize copy-like instructions to create 924280031Sdim/// register coalescer friendly instruction. 925280031Sdim/// The optimization tries to kill-off the \p MI by looking 926280031Sdim/// through a chain of copies to find a source that has a compatible 927280031Sdim/// register class. 928280031Sdim/// If such a source is found, it replace \p MI by a generic COPY 929280031Sdim/// operation. 930280031Sdim/// \pre isUncoalescableCopy(*MI) is true. 931280031Sdim/// \return True, when \p MI has been optimized. In that case, \p MI has 932280031Sdim/// been removed from its parent. 933280031Sdim/// All COPY instructions created, are inserted in \p LocalMIs. 934280031Sdimbool PeepholeOptimizer::optimizeUncoalescableCopy( 935280031Sdim MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 936280031Sdim assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 937280031Sdim 938280031Sdim // Check if we can rewrite all the values defined by this instruction. 939280031Sdim SmallVector< 940280031Sdim std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 941280031Sdim 4> RewritePairs; 942280031Sdim for (const MachineOperand &MODef : MI->defs()) { 943280031Sdim if (MODef.isDead()) 944280031Sdim // We can ignore those. 945280031Sdim continue; 946280031Sdim 947280031Sdim // If a physical register is here, this is probably for a good reason. 948280031Sdim // Do not rewrite that. 949280031Sdim if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 950280031Sdim return false; 951280031Sdim 952280031Sdim // If we do not know how to rewrite this definition, there is no point 953280031Sdim // in trying to kill this instruction. 954280031Sdim TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 955280031Sdim TargetInstrInfo::RegSubRegPair Src = Def; 956280031Sdim if (!findNextSource(Src.Reg, Src.SubReg)) 957280031Sdim return false; 958280031Sdim RewritePairs.push_back(std::make_pair(Def, Src)); 959280031Sdim } 960280031Sdim // The change is possible for all defs, do it. 961280031Sdim for (const auto &PairDefSrc : RewritePairs) { 962280031Sdim const auto &Def = PairDefSrc.first; 963280031Sdim const auto &Src = PairDefSrc.second; 964280031Sdim // Rewrite the "copy" in a way the register coalescer understands. 965280031Sdim assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 966280031Sdim "We do not rewrite physical registers"); 967280031Sdim const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 968280031Sdim unsigned NewVR = MRI->createVirtualRegister(DefRC); 969280031Sdim MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 970280031Sdim TII->get(TargetOpcode::COPY), 971280031Sdim NewVR).addReg(Src.Reg, 0, Src.SubReg); 972280031Sdim NewCopy->getOperand(0).setSubReg(Def.SubReg); 973280031Sdim if (Def.SubReg) 974280031Sdim NewCopy->getOperand(0).setIsUndef(); 975280031Sdim LocalMIs.insert(NewCopy); 976280031Sdim MRI->replaceRegWith(Def.Reg, NewVR); 977280031Sdim MRI->clearKillFlags(NewVR); 978280031Sdim // We extended the lifetime of Src. 979280031Sdim // Clear the kill flags to account for that. 980280031Sdim MRI->clearKillFlags(Src.Reg); 981280031Sdim } 982280031Sdim // MI is now dead. 983261991Sdim MI->eraseFromParent(); 984280031Sdim ++NumUncoalescableCopies; 985261991Sdim return true; 986261991Sdim} 987261991Sdim 988239462Sdim/// isLoadFoldable - Check whether MI is a candidate for folding into a later 989239462Sdim/// instruction. We only fold loads to virtual registers and the virtual 990239462Sdim/// register defined has a single use. 991276479Sdimbool PeepholeOptimizer::isLoadFoldable( 992276479Sdim MachineInstr *MI, 993276479Sdim SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 994239462Sdim if (!MI->canFoldAsLoad() || !MI->mayLoad()) 995239462Sdim return false; 996239462Sdim const MCInstrDesc &MCID = MI->getDesc(); 997239462Sdim if (MCID.getNumDefs() != 1) 998239462Sdim return false; 999239462Sdim 1000239462Sdim unsigned Reg = MI->getOperand(0).getReg(); 1001276479Sdim // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 1002239462Sdim // loads. It should be checked when processing uses of the load, since 1003239462Sdim // uses can be removed during peephole. 1004239462Sdim if (!MI->getOperand(0).getSubReg() && 1005239462Sdim TargetRegisterInfo::isVirtualRegister(Reg) && 1006276479Sdim MRI->hasOneNonDBGUse(Reg)) { 1007276479Sdim FoldAsLoadDefCandidates.insert(Reg); 1008239462Sdim return true; 1009239462Sdim } 1010239462Sdim return false; 1011239462Sdim} 1012239462Sdim 1013218893Sdimbool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1014218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 1015218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1016224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 1017234353Sdim if (!MI->isMoveImmediate()) 1018218893Sdim return false; 1019224145Sdim if (MCID.getNumDefs() != 1) 1020218893Sdim return false; 1021218893Sdim unsigned Reg = MI->getOperand(0).getReg(); 1022218893Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1023218893Sdim ImmDefMIs.insert(std::make_pair(Reg, MI)); 1024218893Sdim ImmDefRegs.insert(Reg); 1025218893Sdim return true; 1026218893Sdim } 1027234353Sdim 1028218893Sdim return false; 1029218893Sdim} 1030218893Sdim 1031239462Sdim/// foldImmediate - Try folding register operands that are defined by move 1032218893Sdim/// immediate instructions, i.e. a trivial constant folding optimization, if 1033218893Sdim/// and only if the def and use are in the same BB. 1034239462Sdimbool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1035218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 1036218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1037218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1038218893Sdim MachineOperand &MO = MI->getOperand(i); 1039218893Sdim if (!MO.isReg() || MO.isDef()) 1040218893Sdim continue; 1041218893Sdim unsigned Reg = MO.getReg(); 1042218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1043218893Sdim continue; 1044218893Sdim if (ImmDefRegs.count(Reg) == 0) 1045218893Sdim continue; 1046218893Sdim DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1047218893Sdim assert(II != ImmDefMIs.end()); 1048218893Sdim if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1049218893Sdim ++NumImmFold; 1050218893Sdim return true; 1051218893Sdim } 1052218893Sdim } 1053218893Sdim return false; 1054218893Sdim} 1055218893Sdim 1056212793Sdimbool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1057276479Sdim if (skipOptnoneFunction(*MF.getFunction())) 1058276479Sdim return false; 1059276479Sdim 1060249423Sdim DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1061249423Sdim DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1062249423Sdim 1063218893Sdim if (DisablePeephole) 1064218893Sdim return false; 1065234353Sdim 1066280031Sdim TII = MF.getSubtarget().getInstrInfo(); 1067280031Sdim TRI = MF.getSubtarget().getRegisterInfo(); 1068212793Sdim MRI = &MF.getRegInfo(); 1069276479Sdim DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1070212793Sdim 1071212793Sdim bool Changed = false; 1072212793Sdim 1073212793Sdim for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1074212793Sdim MachineBasicBlock *MBB = &*I; 1075234353Sdim 1076218893Sdim bool SeenMoveImm = false; 1077280031Sdim 1078280031Sdim // During this forward scan, at some point it needs to answer the question 1079280031Sdim // "given a pointer to an MI in the current BB, is it located before or 1080280031Sdim // after the current instruction". 1081280031Sdim // To perform this, the following set keeps track of the MIs already seen 1082280031Sdim // during the scan, if a MI is not in the set, it is assumed to be located 1083280031Sdim // after. Newly created MIs have to be inserted in the set as well. 1084280031Sdim SmallPtrSet<MachineInstr*, 16> LocalMIs; 1085276479Sdim SmallSet<unsigned, 4> ImmDefRegs; 1086276479Sdim DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1087276479Sdim SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1088212793Sdim 1089212793Sdim for (MachineBasicBlock::iterator 1090218893Sdim MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1091212793Sdim MachineInstr *MI = &*MII; 1092239462Sdim // We may be erasing MI below, increment MII now. 1093239462Sdim ++MII; 1094218893Sdim LocalMIs.insert(MI); 1095212793Sdim 1096276479Sdim // Skip debug values. They should not affect this peephole optimization. 1097276479Sdim if (MI->isDebugValue()) 1098276479Sdim continue; 1099276479Sdim 1100239462Sdim // If there exists an instruction which belongs to the following 1101276479Sdim // categories, we will discard the load candidates. 1102276479Sdim if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1103276479Sdim MI->isKill() || MI->isInlineAsm() || 1104218893Sdim MI->hasUnmodeledSideEffects()) { 1105276479Sdim FoldAsLoadDefCandidates.clear(); 1106218893Sdim continue; 1107218893Sdim } 1108239462Sdim if (MI->mayStore() || MI->isCall()) 1109276479Sdim FoldAsLoadDefCandidates.clear(); 1110218893Sdim 1111280031Sdim if ((isUncoalescableCopy(*MI) && 1112280031Sdim optimizeUncoalescableCopy(MI, LocalMIs)) || 1113239462Sdim (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1114280031Sdim (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { 1115239462Sdim // MI is deleted. 1116239462Sdim LocalMIs.erase(MI); 1117239462Sdim Changed = true; 1118239462Sdim continue; 1119218893Sdim } 1120218893Sdim 1121280031Sdim if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { 1122280031Sdim Changed = true; 1123280031Sdim continue; 1124280031Sdim } 1125280031Sdim 1126280031Sdim if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1127280031Sdim // MI is just rewritten. 1128280031Sdim Changed = true; 1129280031Sdim continue; 1130280031Sdim } 1131280031Sdim 1132218893Sdim if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1133218893Sdim SeenMoveImm = true; 1134212793Sdim } else { 1135239462Sdim Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1136243830Sdim // optimizeExtInstr might have created new instructions after MI 1137243830Sdim // and before the already incremented MII. Adjust MII so that the 1138243830Sdim // next iteration sees the new instructions. 1139243830Sdim MII = MI; 1140243830Sdim ++MII; 1141218893Sdim if (SeenMoveImm) 1142239462Sdim Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1143212793Sdim } 1144218893Sdim 1145239462Sdim // Check whether MI is a load candidate for folding into a later 1146239462Sdim // instruction. If MI is not a candidate, check whether we can fold an 1147239462Sdim // earlier load into MI. 1148276479Sdim if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1149276479Sdim !FoldAsLoadDefCandidates.empty()) { 1150276479Sdim const MCInstrDesc &MIDesc = MI->getDesc(); 1151276479Sdim for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1152276479Sdim ++i) { 1153276479Sdim const MachineOperand &MOp = MI->getOperand(i); 1154276479Sdim if (!MOp.isReg()) 1155276479Sdim continue; 1156276479Sdim unsigned FoldAsLoadDefReg = MOp.getReg(); 1157276479Sdim if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1158276479Sdim // We need to fold load after optimizeCmpInstr, since 1159276479Sdim // optimizeCmpInstr can enable folding by converting SUB to CMP. 1160276479Sdim // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1161276479Sdim // we need it for markUsesInDebugValueAsUndef(). 1162276479Sdim unsigned FoldedReg = FoldAsLoadDefReg; 1163276479Sdim MachineInstr *DefMI = nullptr; 1164276479Sdim MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1165276479Sdim FoldAsLoadDefReg, 1166276479Sdim DefMI); 1167276479Sdim if (FoldMI) { 1168276479Sdim // Update LocalMIs since we replaced MI with FoldMI and deleted 1169276479Sdim // DefMI. 1170276479Sdim DEBUG(dbgs() << "Replacing: " << *MI); 1171276479Sdim DEBUG(dbgs() << " With: " << *FoldMI); 1172276479Sdim LocalMIs.erase(MI); 1173276479Sdim LocalMIs.erase(DefMI); 1174276479Sdim LocalMIs.insert(FoldMI); 1175276479Sdim MI->eraseFromParent(); 1176276479Sdim DefMI->eraseFromParent(); 1177276479Sdim MRI->markUsesInDebugValueAsUndef(FoldedReg); 1178276479Sdim FoldAsLoadDefCandidates.erase(FoldedReg); 1179276479Sdim ++NumLoadFold; 1180276479Sdim // MI is replaced with FoldMI. 1181276479Sdim Changed = true; 1182276479Sdim break; 1183276479Sdim } 1184276479Sdim } 1185239462Sdim } 1186239462Sdim } 1187212793Sdim } 1188212793Sdim } 1189212793Sdim 1190212793Sdim return Changed; 1191212793Sdim} 1192276479Sdim 1193280031Sdimbool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1194276479Sdim unsigned &SrcSubReg) { 1195276479Sdim assert(Def->isCopy() && "Invalid definition"); 1196276479Sdim // Copy instruction are supposed to be: Def = Src. 1197276479Sdim // If someone breaks this assumption, bad things will happen everywhere. 1198280031Sdim assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1199276479Sdim 1200276479Sdim if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1201276479Sdim // If we look for a different subreg, it means we want a subreg of src. 1202276479Sdim // Bails as we do not support composing subreg yet. 1203276479Sdim return false; 1204276479Sdim // Otherwise, we want the whole source. 1205280031Sdim const MachineOperand &Src = Def->getOperand(1); 1206280031Sdim SrcReg = Src.getReg(); 1207280031Sdim SrcSubReg = Src.getSubReg(); 1208276479Sdim return true; 1209276479Sdim} 1210276479Sdim 1211280031Sdimbool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 1212276479Sdim unsigned &SrcSubReg) { 1213276479Sdim assert(Def->isBitcast() && "Invalid definition"); 1214276479Sdim 1215276479Sdim // Bail if there are effects that a plain copy will not expose. 1216276479Sdim if (Def->hasUnmodeledSideEffects()) 1217276479Sdim return false; 1218276479Sdim 1219276479Sdim // Bitcasts with more than one def are not supported. 1220276479Sdim if (Def->getDesc().getNumDefs() != 1) 1221276479Sdim return false; 1222276479Sdim if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1223276479Sdim // If we look for a different subreg, it means we want a subreg of the src. 1224276479Sdim // Bails as we do not support composing subreg yet. 1225276479Sdim return false; 1226276479Sdim 1227280031Sdim unsigned SrcIdx = Def->getNumOperands(); 1228276479Sdim for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1229276479Sdim ++OpIdx) { 1230276479Sdim const MachineOperand &MO = Def->getOperand(OpIdx); 1231276479Sdim if (!MO.isReg() || !MO.getReg()) 1232276479Sdim continue; 1233276479Sdim assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1234276479Sdim if (SrcIdx != EndOpIdx) 1235276479Sdim // Multiple sources? 1236276479Sdim return false; 1237276479Sdim SrcIdx = OpIdx; 1238276479Sdim } 1239280031Sdim const MachineOperand &Src = Def->getOperand(SrcIdx); 1240280031Sdim SrcReg = Src.getReg(); 1241280031Sdim SrcSubReg = Src.getSubReg(); 1242276479Sdim return true; 1243276479Sdim} 1244276479Sdim 1245280031Sdimbool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 1246276479Sdim unsigned &SrcSubReg) { 1247280031Sdim assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1248280031Sdim "Invalid definition"); 1249276479Sdim 1250276479Sdim if (Def->getOperand(DefIdx).getSubReg()) 1251276479Sdim // If we are composing subreg, bails out. 1252276479Sdim // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1253276479Sdim // This should almost never happen as the SSA property is tracked at 1254276479Sdim // the register level (as opposed to the subreg level). 1255276479Sdim // I.e., 1256276479Sdim // Def.sub0 = 1257276479Sdim // Def.sub1 = 1258276479Sdim // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1259276479Sdim // Def. Thus, it must not be generated. 1260276479Sdim // However, some code could theoretically generates a single 1261276479Sdim // Def.sub0 (i.e, not defining the other subregs) and we would 1262276479Sdim // have this case. 1263276479Sdim // If we can ascertain (or force) that this never happens, we could 1264276479Sdim // turn that into an assertion. 1265276479Sdim return false; 1266276479Sdim 1267280031Sdim if (!TII) 1268280031Sdim // We could handle the REG_SEQUENCE here, but we do not want to 1269280031Sdim // duplicate the code from the generic TII. 1270280031Sdim return false; 1271280031Sdim 1272280031Sdim SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1273280031Sdim if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1274280031Sdim return false; 1275280031Sdim 1276276479Sdim // We are looking at: 1277276479Sdim // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1278276479Sdim // Check if one of the operand defines the subreg we are interested in. 1279280031Sdim for (auto &RegSeqInput : RegSeqInputRegs) { 1280280031Sdim if (RegSeqInput.SubIdx == DefSubReg) { 1281280031Sdim if (RegSeqInput.SubReg) 1282280031Sdim // Bails if we have to compose sub registers. 1283280031Sdim return false; 1284280031Sdim 1285280031Sdim SrcReg = RegSeqInput.Reg; 1286280031Sdim SrcSubReg = RegSeqInput.SubReg; 1287276479Sdim return true; 1288276479Sdim } 1289276479Sdim } 1290276479Sdim 1291276479Sdim // If the subreg we are tracking is super-defined by another subreg, 1292276479Sdim // we could follow this value. However, this would require to compose 1293276479Sdim // the subreg and we do not do that for now. 1294276479Sdim return false; 1295276479Sdim} 1296276479Sdim 1297280031Sdimbool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1298276479Sdim unsigned &SrcSubReg) { 1299280031Sdim assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1300280031Sdim "Invalid definition"); 1301280031Sdim 1302276479Sdim if (Def->getOperand(DefIdx).getSubReg()) 1303276479Sdim // If we are composing subreg, bails out. 1304276479Sdim // Same remark as getNextSourceFromRegSequence. 1305276479Sdim // I.e., this may be turned into an assert. 1306276479Sdim return false; 1307276479Sdim 1308280031Sdim if (!TII) 1309280031Sdim // We could handle the REG_SEQUENCE here, but we do not want to 1310280031Sdim // duplicate the code from the generic TII. 1311280031Sdim return false; 1312280031Sdim 1313280031Sdim TargetInstrInfo::RegSubRegPair BaseReg; 1314280031Sdim TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1315280031Sdim if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1316280031Sdim return false; 1317280031Sdim 1318276479Sdim // We are looking at: 1319276479Sdim // Def = INSERT_SUBREG v0, v1, sub1 1320276479Sdim // There are two cases: 1321276479Sdim // 1. DefSubReg == sub1, get v1. 1322276479Sdim // 2. DefSubReg != sub1, the value may be available through v0. 1323276479Sdim 1324280031Sdim // #1 Check if the inserted register matches the required sub index. 1325280031Sdim if (InsertedReg.SubIdx == DefSubReg) { 1326280031Sdim SrcReg = InsertedReg.Reg; 1327280031Sdim SrcSubReg = InsertedReg.SubReg; 1328276479Sdim return true; 1329276479Sdim } 1330276479Sdim // #2 Otherwise, if the sub register we are looking for is not partial 1331276479Sdim // defined by the inserted element, we can look through the main 1332276479Sdim // register (v0). 1333276479Sdim const MachineOperand &MODef = Def->getOperand(DefIdx); 1334276479Sdim // If the result register (Def) and the base register (v0) do not 1335276479Sdim // have the same register class or if we have to compose 1336276479Sdim // subregisters, bails out. 1337280031Sdim if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1338280031Sdim BaseReg.SubReg) 1339276479Sdim return false; 1340276479Sdim 1341280031Sdim // Get the TRI and check if the inserted sub-register overlaps with the 1342280031Sdim // sub-register we are tracking. 1343280031Sdim const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1344276479Sdim if (!TRI || 1345276479Sdim (TRI->getSubRegIndexLaneMask(DefSubReg) & 1346280031Sdim TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1347276479Sdim return false; 1348276479Sdim // At this point, the value is available in v0 via the same subreg 1349276479Sdim // we used for Def. 1350280031Sdim SrcReg = BaseReg.Reg; 1351276479Sdim SrcSubReg = DefSubReg; 1352276479Sdim return true; 1353276479Sdim} 1354276479Sdim 1355280031Sdimbool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 1356276479Sdim unsigned &SrcSubReg) { 1357280031Sdim assert((Def->isExtractSubreg() || 1358280031Sdim Def->isExtractSubregLike()) && "Invalid definition"); 1359276479Sdim // We are looking at: 1360276479Sdim // Def = EXTRACT_SUBREG v0, sub0 1361276479Sdim 1362276479Sdim // Bails if we have to compose sub registers. 1363276479Sdim // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1364276479Sdim if (DefSubReg) 1365276479Sdim return false; 1366276479Sdim 1367280031Sdim if (!TII) 1368280031Sdim // We could handle the EXTRACT_SUBREG here, but we do not want to 1369280031Sdim // duplicate the code from the generic TII. 1370280031Sdim return false; 1371280031Sdim 1372280031Sdim TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1373280031Sdim if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1374280031Sdim return false; 1375280031Sdim 1376276479Sdim // Bails if we have to compose sub registers. 1377276479Sdim // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1378280031Sdim if (ExtractSubregInputReg.SubReg) 1379276479Sdim return false; 1380276479Sdim // Otherwise, the value is available in the v0.sub0. 1381280031Sdim SrcReg = ExtractSubregInputReg.Reg; 1382280031Sdim SrcSubReg = ExtractSubregInputReg.SubIdx; 1383276479Sdim return true; 1384276479Sdim} 1385276479Sdim 1386280031Sdimbool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 1387276479Sdim unsigned &SrcSubReg) { 1388276479Sdim assert(Def->isSubregToReg() && "Invalid definition"); 1389276479Sdim // We are looking at: 1390276479Sdim // Def = SUBREG_TO_REG Imm, v0, sub0 1391276479Sdim 1392276479Sdim // Bails if we have to compose sub registers. 1393276479Sdim // If DefSubReg != sub0, we would have to check that all the bits 1394276479Sdim // we track are included in sub0 and if yes, we would have to 1395276479Sdim // determine the right subreg in v0. 1396276479Sdim if (DefSubReg != Def->getOperand(3).getImm()) 1397276479Sdim return false; 1398276479Sdim // Bails if we have to compose sub registers. 1399276479Sdim // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1400276479Sdim if (Def->getOperand(2).getSubReg()) 1401276479Sdim return false; 1402276479Sdim 1403280031Sdim SrcReg = Def->getOperand(2).getReg(); 1404276479Sdim SrcSubReg = Def->getOperand(3).getImm(); 1405276479Sdim return true; 1406276479Sdim} 1407276479Sdim 1408280031Sdimbool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1409276479Sdim assert(Def && "This method needs a valid definition"); 1410276479Sdim 1411276479Sdim assert( 1412276479Sdim (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1413276479Sdim Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1414276479Sdim if (Def->isCopy()) 1415280031Sdim return getNextSourceFromCopy(SrcReg, SrcSubReg); 1416276479Sdim if (Def->isBitcast()) 1417280031Sdim return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1418276479Sdim // All the remaining cases involve "complex" instructions. 1419276479Sdim // Bails if we did not ask for the advanced tracking. 1420276479Sdim if (!UseAdvancedTracking) 1421276479Sdim return false; 1422280031Sdim if (Def->isRegSequence() || Def->isRegSequenceLike()) 1423280031Sdim return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1424280031Sdim if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1425280031Sdim return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1426280031Sdim if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1427280031Sdim return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1428276479Sdim if (Def->isSubregToReg()) 1429280031Sdim return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1430276479Sdim return false; 1431276479Sdim} 1432276479Sdim 1433280031Sdimconst MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1434276479Sdim unsigned &SrcSubReg) { 1435276479Sdim // If we reach a point where we cannot move up in the use-def chain, 1436276479Sdim // there is nothing we can get. 1437276479Sdim if (!Def) 1438276479Sdim return nullptr; 1439276479Sdim 1440276479Sdim const MachineInstr *PrevDef = nullptr; 1441276479Sdim // Try to find the next source. 1442280031Sdim if (getNextSourceImpl(SrcReg, SrcSubReg)) { 1443276479Sdim // Update definition, definition index, and subregister for the 1444276479Sdim // next call of getNextSource. 1445276479Sdim // Update the current register. 1446280031Sdim Reg = SrcReg; 1447276479Sdim // Update the return value before moving up in the use-def chain. 1448276479Sdim PrevDef = Def; 1449276479Sdim // If we can still move up in the use-def chain, move to the next 1450276479Sdim // defintion. 1451276479Sdim if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1452280031Sdim Def = MRI.getVRegDef(Reg); 1453280031Sdim DefIdx = MRI.def_begin(Reg).getOperandNo(); 1454276479Sdim DefSubReg = SrcSubReg; 1455276479Sdim return PrevDef; 1456276479Sdim } 1457276479Sdim } 1458276479Sdim // If we end up here, this means we will not be able to find another source 1459276479Sdim // for the next iteration. 1460276479Sdim // Make sure any new call to getNextSource bails out early by cutting the 1461276479Sdim // use-def chain. 1462276479Sdim Def = nullptr; 1463276479Sdim return PrevDef; 1464276479Sdim} 1465