PeepholeOptimizer.cpp revision 234353
1212793Sdim//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2212793Sdim// 3212793Sdim// The LLVM Compiler Infrastructure 4212793Sdim// 5212793Sdim// This file is distributed under the University of Illinois Open Source 6212793Sdim// License. See LICENSE.TXT for details. 7212793Sdim// 8212793Sdim//===----------------------------------------------------------------------===// 9212793Sdim// 10212793Sdim// Perform peephole optimizations on the machine code: 11212793Sdim// 12212793Sdim// - Optimize Extensions 13212793Sdim// 14212793Sdim// Optimization of sign / zero extension instructions. It may be extended to 15212793Sdim// handle other instructions with similar properties. 16212793Sdim// 17212793Sdim// On some targets, some instructions, e.g. X86 sign / zero extension, may 18212793Sdim// leave the source value in the lower part of the result. This optimization 19212793Sdim// will replace some uses of the pre-extension value with uses of the 20212793Sdim// sub-register of the results. 21212793Sdim// 22212793Sdim// - Optimize Comparisons 23212793Sdim// 24212793Sdim// Optimization of comparison instructions. For instance, in this code: 25212793Sdim// 26212793Sdim// sub r1, 1 27212793Sdim// cmp r1, 0 28212793Sdim// bz L1 29212793Sdim// 30212793Sdim// If the "sub" instruction all ready sets (or could be modified to set) the 31212793Sdim// same flag that the "cmp" instruction sets and that "bz" uses, then we can 32212793Sdim// eliminate the "cmp" instruction. 33221345Sdim// 34221345Sdim// - Optimize Bitcast pairs: 35221345Sdim// 36221345Sdim// v1 = bitcast v0 37221345Sdim// v2 = bitcast v1 38221345Sdim// = v2 39221345Sdim// => 40221345Sdim// v1 = bitcast v0 41221345Sdim// = v0 42234353Sdim// 43212793Sdim//===----------------------------------------------------------------------===// 44212793Sdim 45212793Sdim#define DEBUG_TYPE "peephole-opt" 46212793Sdim#include "llvm/CodeGen/Passes.h" 47212793Sdim#include "llvm/CodeGen/MachineDominators.h" 48212793Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 49212793Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 50212793Sdim#include "llvm/Target/TargetInstrInfo.h" 51212793Sdim#include "llvm/Target/TargetRegisterInfo.h" 52212793Sdim#include "llvm/Support/CommandLine.h" 53218893Sdim#include "llvm/ADT/DenseMap.h" 54212793Sdim#include "llvm/ADT/SmallPtrSet.h" 55218893Sdim#include "llvm/ADT/SmallSet.h" 56212793Sdim#include "llvm/ADT/Statistic.h" 57212793Sdimusing namespace llvm; 58212793Sdim 59212793Sdim// Optimize Extensions 60212793Sdimstatic cl::opt<bool> 61212793SdimAggressive("aggressive-ext-opt", cl::Hidden, 62212793Sdim cl::desc("Aggressive extension optimization")); 63212793Sdim 64218893Sdimstatic cl::opt<bool> 65218893SdimDisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 66218893Sdim cl::desc("Disable the peephole optimizer")); 67218893Sdim 68212793SdimSTATISTIC(NumReuse, "Number of extension results reused"); 69221345SdimSTATISTIC(NumBitcasts, "Number of bitcasts eliminated"); 70221345SdimSTATISTIC(NumCmps, "Number of compares eliminated"); 71234353SdimSTATISTIC(NumImmFold, "Number of move immediate folded"); 72212793Sdim 73212793Sdimnamespace { 74212793Sdim class PeepholeOptimizer : public MachineFunctionPass { 75212793Sdim const TargetMachine *TM; 76212793Sdim const TargetInstrInfo *TII; 77212793Sdim MachineRegisterInfo *MRI; 78212793Sdim MachineDominatorTree *DT; // Machine dominator tree 79212793Sdim 80212793Sdim public: 81212793Sdim static char ID; // Pass identification 82218893Sdim PeepholeOptimizer() : MachineFunctionPass(ID) { 83218893Sdim initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 84218893Sdim } 85212793Sdim 86212793Sdim virtual bool runOnMachineFunction(MachineFunction &MF); 87212793Sdim 88212793Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 89212793Sdim AU.setPreservesCFG(); 90212793Sdim MachineFunctionPass::getAnalysisUsage(AU); 91212793Sdim if (Aggressive) { 92212793Sdim AU.addRequired<MachineDominatorTree>(); 93212793Sdim AU.addPreserved<MachineDominatorTree>(); 94212793Sdim } 95212793Sdim } 96212793Sdim 97212793Sdim private: 98221345Sdim bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); 99212793Sdim bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 100212793Sdim bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 101212793Sdim SmallPtrSet<MachineInstr*, 8> &LocalMIs); 102218893Sdim bool isMoveImmediate(MachineInstr *MI, 103218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 104218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 105218893Sdim bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 106218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 107218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 108212793Sdim }; 109212793Sdim} 110212793Sdim 111212793Sdimchar PeepholeOptimizer::ID = 0; 112234353Sdimchar &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 113218893SdimINITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 114218893Sdim "Peephole Optimizations", false, false) 115218893SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 116218893SdimINITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 117218893Sdim "Peephole Optimizations", false, false) 118212793Sdim 119212793Sdim/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 120212793Sdim/// a single register and writes a single register and it does not modify the 121212793Sdim/// source, and if the source value is preserved as a sub-register of the 122212793Sdim/// result, then replace all reachable uses of the source with the subreg of the 123212793Sdim/// result. 124234353Sdim/// 125212793Sdim/// Do not generate an EXTRACT that is used only in a debug use, as this changes 126212793Sdim/// the code. Since this code does not currently share EXTRACTs, just ignore all 127212793Sdim/// debug uses. 128212793Sdimbool PeepholeOptimizer:: 129212793SdimOptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 130212793Sdim SmallPtrSet<MachineInstr*, 8> &LocalMIs) { 131212793Sdim unsigned SrcReg, DstReg, SubIdx; 132212793Sdim if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 133212793Sdim return false; 134234353Sdim 135212793Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 136212793Sdim TargetRegisterInfo::isPhysicalRegister(SrcReg)) 137212793Sdim return false; 138212793Sdim 139212793Sdim MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg); 140212793Sdim if (++UI == MRI->use_nodbg_end()) 141212793Sdim // No other uses. 142212793Sdim return false; 143212793Sdim 144212793Sdim // The source has other uses. See if we can replace the other uses with use of 145212793Sdim // the result of the extension. 146212793Sdim SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 147212793Sdim UI = MRI->use_nodbg_begin(DstReg); 148212793Sdim for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 149212793Sdim UI != UE; ++UI) 150212793Sdim ReachedBBs.insert(UI->getParent()); 151212793Sdim 152212793Sdim // Uses that are in the same BB of uses of the result of the instruction. 153212793Sdim SmallVector<MachineOperand*, 8> Uses; 154212793Sdim 155212793Sdim // Uses that the result of the instruction can reach. 156212793Sdim SmallVector<MachineOperand*, 8> ExtendedUses; 157212793Sdim 158212793Sdim bool ExtendLife = true; 159212793Sdim UI = MRI->use_nodbg_begin(SrcReg); 160212793Sdim for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end(); 161212793Sdim UI != UE; ++UI) { 162212793Sdim MachineOperand &UseMO = UI.getOperand(); 163212793Sdim MachineInstr *UseMI = &*UI; 164212793Sdim if (UseMI == MI) 165212793Sdim continue; 166212793Sdim 167212793Sdim if (UseMI->isPHI()) { 168212793Sdim ExtendLife = false; 169212793Sdim continue; 170212793Sdim } 171212793Sdim 172212793Sdim // It's an error to translate this: 173212793Sdim // 174212793Sdim // %reg1025 = <sext> %reg1024 175212793Sdim // ... 176212793Sdim // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 177212793Sdim // 178212793Sdim // into this: 179212793Sdim // 180212793Sdim // %reg1025 = <sext> %reg1024 181212793Sdim // ... 182212793Sdim // %reg1027 = COPY %reg1025:4 183212793Sdim // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 184212793Sdim // 185212793Sdim // The problem here is that SUBREG_TO_REG is there to assert that an 186212793Sdim // implicit zext occurs. It doesn't insert a zext instruction. If we allow 187212793Sdim // the COPY here, it will give us the value after the <sext>, not the 188212793Sdim // original value of %reg1024 before <sext>. 189212793Sdim if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 190212793Sdim continue; 191212793Sdim 192212793Sdim MachineBasicBlock *UseMBB = UseMI->getParent(); 193212793Sdim if (UseMBB == MBB) { 194212793Sdim // Local uses that come after the extension. 195212793Sdim if (!LocalMIs.count(UseMI)) 196212793Sdim Uses.push_back(&UseMO); 197212793Sdim } else if (ReachedBBs.count(UseMBB)) { 198212793Sdim // Non-local uses where the result of the extension is used. Always 199212793Sdim // replace these unless it's a PHI. 200212793Sdim Uses.push_back(&UseMO); 201212793Sdim } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 202212793Sdim // We may want to extend the live range of the extension result in order 203212793Sdim // to replace these uses. 204212793Sdim ExtendedUses.push_back(&UseMO); 205212793Sdim } else { 206212793Sdim // Both will be live out of the def MBB anyway. Don't extend live range of 207212793Sdim // the extension result. 208212793Sdim ExtendLife = false; 209212793Sdim break; 210212793Sdim } 211212793Sdim } 212212793Sdim 213212793Sdim if (ExtendLife && !ExtendedUses.empty()) 214212793Sdim // Extend the liveness of the extension result. 215212793Sdim std::copy(ExtendedUses.begin(), ExtendedUses.end(), 216212793Sdim std::back_inserter(Uses)); 217212793Sdim 218212793Sdim // Now replace all uses. 219212793Sdim bool Changed = false; 220212793Sdim if (!Uses.empty()) { 221212793Sdim SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 222212793Sdim 223212793Sdim // Look for PHI uses of the extended result, we don't want to extend the 224212793Sdim // liveness of a PHI input. It breaks all kinds of assumptions down 225212793Sdim // stream. A PHI use is expected to be the kill of its source values. 226212793Sdim UI = MRI->use_nodbg_begin(DstReg); 227212793Sdim for (MachineRegisterInfo::use_nodbg_iterator 228212793Sdim UE = MRI->use_nodbg_end(); UI != UE; ++UI) 229212793Sdim if (UI->isPHI()) 230212793Sdim PHIBBs.insert(UI->getParent()); 231212793Sdim 232212793Sdim const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 233212793Sdim for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 234212793Sdim MachineOperand *UseMO = Uses[i]; 235212793Sdim MachineInstr *UseMI = UseMO->getParent(); 236212793Sdim MachineBasicBlock *UseMBB = UseMI->getParent(); 237212793Sdim if (PHIBBs.count(UseMBB)) 238212793Sdim continue; 239212793Sdim 240234353Sdim // About to add uses of DstReg, clear DstReg's kill flags. 241234353Sdim if (!Changed) 242234353Sdim MRI->clearKillFlags(DstReg); 243234353Sdim 244212793Sdim unsigned NewVR = MRI->createVirtualRegister(RC); 245212793Sdim BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 246212793Sdim TII->get(TargetOpcode::COPY), NewVR) 247212793Sdim .addReg(DstReg, 0, SubIdx); 248212793Sdim 249212793Sdim UseMO->setReg(NewVR); 250212793Sdim ++NumReuse; 251212793Sdim Changed = true; 252212793Sdim } 253212793Sdim } 254212793Sdim 255212793Sdim return Changed; 256212793Sdim} 257212793Sdim 258221345Sdim/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that 259221345Sdim/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast 260221345Sdim/// a value cross register classes), and the source is defined by another 261221345Sdim/// bitcast instruction B. And if the register class of source of B matches 262221345Sdim/// the register class of instruction A, then it is legal to replace all uses 263221345Sdim/// of the def of A with source of B. e.g. 264221345Sdim/// %vreg0<def> = VMOVSR %vreg1 265221345Sdim/// %vreg3<def> = VMOVRS %vreg0 266221345Sdim/// Replace all uses of vreg3 with vreg1. 267221345Sdim 268221345Sdimbool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, 269221345Sdim MachineBasicBlock *MBB) { 270221345Sdim unsigned NumDefs = MI->getDesc().getNumDefs(); 271221345Sdim unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; 272221345Sdim if (NumDefs != 1) 273221345Sdim return false; 274221345Sdim 275221345Sdim unsigned Def = 0; 276221345Sdim unsigned Src = 0; 277221345Sdim for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { 278221345Sdim const MachineOperand &MO = MI->getOperand(i); 279221345Sdim if (!MO.isReg()) 280221345Sdim continue; 281221345Sdim unsigned Reg = MO.getReg(); 282221345Sdim if (!Reg) 283221345Sdim continue; 284221345Sdim if (MO.isDef()) 285221345Sdim Def = Reg; 286221345Sdim else if (Src) 287221345Sdim // Multiple sources? 288221345Sdim return false; 289221345Sdim else 290221345Sdim Src = Reg; 291221345Sdim } 292221345Sdim 293221345Sdim assert(Def && Src && "Malformed bitcast instruction!"); 294221345Sdim 295221345Sdim MachineInstr *DefMI = MRI->getVRegDef(Src); 296234353Sdim if (!DefMI || !DefMI->isBitcast()) 297221345Sdim return false; 298221345Sdim 299221345Sdim unsigned SrcSrc = 0; 300221345Sdim NumDefs = DefMI->getDesc().getNumDefs(); 301221345Sdim NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; 302221345Sdim if (NumDefs != 1) 303221345Sdim return false; 304221345Sdim for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { 305221345Sdim const MachineOperand &MO = DefMI->getOperand(i); 306221345Sdim if (!MO.isReg() || MO.isDef()) 307221345Sdim continue; 308221345Sdim unsigned Reg = MO.getReg(); 309221345Sdim if (!Reg) 310221345Sdim continue; 311226633Sdim if (!MO.isDef()) { 312226633Sdim if (SrcSrc) 313226633Sdim // Multiple sources? 314226633Sdim return false; 315226633Sdim else 316226633Sdim SrcSrc = Reg; 317226633Sdim } 318221345Sdim } 319221345Sdim 320221345Sdim if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) 321221345Sdim return false; 322221345Sdim 323221345Sdim MRI->replaceRegWith(Def, SrcSrc); 324221345Sdim MRI->clearKillFlags(SrcSrc); 325221345Sdim MI->eraseFromParent(); 326221345Sdim ++NumBitcasts; 327221345Sdim return true; 328221345Sdim} 329221345Sdim 330212793Sdim/// OptimizeCmpInstr - If the instruction is a compare and the previous 331212793Sdim/// instruction it's comparing against all ready sets (or could be modified to 332212793Sdim/// set) the same flag as the compare, then we can remove the comparison and use 333212793Sdim/// the flag from the previous instruction. 334212793Sdimbool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, 335221345Sdim MachineBasicBlock *MBB) { 336212793Sdim // If this instruction is a comparison against zero and isn't comparing a 337212793Sdim // physical register, we can try to optimize it. 338212793Sdim unsigned SrcReg; 339218893Sdim int CmpMask, CmpValue; 340218893Sdim if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) || 341218893Sdim TargetRegisterInfo::isPhysicalRegister(SrcReg)) 342212793Sdim return false; 343212793Sdim 344218893Sdim // Attempt to optimize the comparison instruction. 345218893Sdim if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { 346221345Sdim ++NumCmps; 347212793Sdim return true; 348212793Sdim } 349212793Sdim 350212793Sdim return false; 351212793Sdim} 352212793Sdim 353218893Sdimbool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 354218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 355218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 356224145Sdim const MCInstrDesc &MCID = MI->getDesc(); 357234353Sdim if (!MI->isMoveImmediate()) 358218893Sdim return false; 359224145Sdim if (MCID.getNumDefs() != 1) 360218893Sdim return false; 361218893Sdim unsigned Reg = MI->getOperand(0).getReg(); 362218893Sdim if (TargetRegisterInfo::isVirtualRegister(Reg)) { 363218893Sdim ImmDefMIs.insert(std::make_pair(Reg, MI)); 364218893Sdim ImmDefRegs.insert(Reg); 365218893Sdim return true; 366218893Sdim } 367234353Sdim 368218893Sdim return false; 369218893Sdim} 370218893Sdim 371218893Sdim/// FoldImmediate - Try folding register operands that are defined by move 372218893Sdim/// immediate instructions, i.e. a trivial constant folding optimization, if 373218893Sdim/// and only if the def and use are in the same BB. 374218893Sdimbool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 375218893Sdim SmallSet<unsigned, 4> &ImmDefRegs, 376218893Sdim DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 377218893Sdim for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 378218893Sdim MachineOperand &MO = MI->getOperand(i); 379218893Sdim if (!MO.isReg() || MO.isDef()) 380218893Sdim continue; 381218893Sdim unsigned Reg = MO.getReg(); 382218893Sdim if (!TargetRegisterInfo::isVirtualRegister(Reg)) 383218893Sdim continue; 384218893Sdim if (ImmDefRegs.count(Reg) == 0) 385218893Sdim continue; 386218893Sdim DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 387218893Sdim assert(II != ImmDefMIs.end()); 388218893Sdim if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 389218893Sdim ++NumImmFold; 390218893Sdim return true; 391218893Sdim } 392218893Sdim } 393218893Sdim return false; 394218893Sdim} 395218893Sdim 396212793Sdimbool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 397218893Sdim if (DisablePeephole) 398218893Sdim return false; 399234353Sdim 400212793Sdim TM = &MF.getTarget(); 401212793Sdim TII = TM->getInstrInfo(); 402212793Sdim MRI = &MF.getRegInfo(); 403212793Sdim DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0; 404212793Sdim 405212793Sdim bool Changed = false; 406212793Sdim 407212793Sdim SmallPtrSet<MachineInstr*, 8> LocalMIs; 408218893Sdim SmallSet<unsigned, 4> ImmDefRegs; 409218893Sdim DenseMap<unsigned, MachineInstr*> ImmDefMIs; 410212793Sdim for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 411212793Sdim MachineBasicBlock *MBB = &*I; 412234353Sdim 413218893Sdim bool SeenMoveImm = false; 414212793Sdim LocalMIs.clear(); 415218893Sdim ImmDefRegs.clear(); 416218893Sdim ImmDefMIs.clear(); 417212793Sdim 418218893Sdim bool First = true; 419218893Sdim MachineBasicBlock::iterator PMII; 420212793Sdim for (MachineBasicBlock::iterator 421218893Sdim MII = I->begin(), MIE = I->end(); MII != MIE; ) { 422212793Sdim MachineInstr *MI = &*MII; 423218893Sdim LocalMIs.insert(MI); 424212793Sdim 425218893Sdim if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 426218893Sdim MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || 427218893Sdim MI->hasUnmodeledSideEffects()) { 428218893Sdim ++MII; 429218893Sdim continue; 430218893Sdim } 431218893Sdim 432234353Sdim if (MI->isBitcast()) { 433221345Sdim if (OptimizeBitcastInstr(MI, MBB)) { 434221345Sdim // MI is deleted. 435226633Sdim LocalMIs.erase(MI); 436221345Sdim Changed = true; 437221345Sdim MII = First ? I->begin() : llvm::next(PMII); 438221345Sdim continue; 439234353Sdim } 440234353Sdim } else if (MI->isCompare()) { 441218893Sdim if (OptimizeCmpInstr(MI, MBB)) { 442218893Sdim // MI is deleted. 443226633Sdim LocalMIs.erase(MI); 444218893Sdim Changed = true; 445218893Sdim MII = First ? I->begin() : llvm::next(PMII); 446218893Sdim continue; 447218893Sdim } 448218893Sdim } 449218893Sdim 450218893Sdim if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 451218893Sdim SeenMoveImm = true; 452212793Sdim } else { 453212793Sdim Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); 454218893Sdim if (SeenMoveImm) 455218893Sdim Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 456212793Sdim } 457218893Sdim 458218893Sdim First = false; 459218893Sdim PMII = MII; 460218893Sdim ++MII; 461212793Sdim } 462212793Sdim } 463212793Sdim 464212793Sdim return Changed; 465212793Sdim} 466