//===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // A pre-emit peephole for catching opportunities introduced by late passes such // as MachineBlockPlacement. // //===----------------------------------------------------------------------===// #include "PPC.h" #include "PPCInstrInfo.h" #include "PPCSubtarget.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "ppc-pre-emit-peephole" STATISTIC(NumRRConvertedInPreEmit, "Number of r+r instructions converted to r+i in pre-emit peephole"); STATISTIC(NumRemovedInPreEmit, "Number of instructions deleted in pre-emit peephole"); STATISTIC(NumberOfSelfCopies, "Number of self copy instructions eliminated"); STATISTIC(NumFrameOffFoldInPreEmit, "Number of folding frame offset by using r+r in pre-emit peephole"); static cl::opt RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true), cl::desc("Run pre-emit peephole optimizations.")); namespace { class PPCPreEmitPeephole : public MachineFunctionPass { public: static char ID; PPCPreEmitPeephole() : MachineFunctionPass(ID) { initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } // This function removes any redundant load immediates. It has two level // loops - The outer loop finds the load immediates BBI that could be used // to replace following redundancy. The inner loop scans instructions that // after BBI to find redundancy and update kill/dead flags accordingly. If // AfterBBI is the same as BBI, it is redundant, otherwise any instructions // that modify the def register of BBI would break the scanning. // DeadOrKillToUnset is a pointer to the previous operand that had the // kill/dead flag set. It keeps track of the def register of BBI, the use // registers of AfterBBIs and the def registers of AfterBBIs. bool removeRedundantLIs(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n"; MBB.dump(); dbgs() << "\n"); DenseSet InstrsToErase; for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { // Skip load immediate that is marked to be erased later because it // cannot be used to replace any other instructions. if (InstrsToErase.find(&*BBI) != InstrsToErase.end()) continue; // Skip non-load immediate. unsigned Opc = BBI->getOpcode(); if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && Opc != PPC::LIS8) continue; // Skip load immediate, where the operand is a relocation (e.g., $r3 = // LI target-flags(ppc-lo) %const.0). if (!BBI->getOperand(1).isImm()) continue; assert(BBI->getOperand(0).isReg() && "Expected a register for the first operand"); LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump();); Register Reg = BBI->getOperand(0).getReg(); int64_t Imm = BBI->getOperand(1).getImm(); MachineOperand *DeadOrKillToUnset = nullptr; if (BBI->getOperand(0).isDead()) { DeadOrKillToUnset = &BBI->getOperand(0); LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset << " from load immediate " << *BBI << " is a unsetting candidate\n"); } // This loop scans instructions after BBI to see if there is any // redundant load immediate. for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end(); ++AfterBBI) { // Track the operand that kill Reg. We would unset the kill flag of // the operand if there is a following redundant load immediate. int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI); // We can't just clear implicit kills, so if we encounter one, stop // looking further. if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) { LLVM_DEBUG(dbgs() << "Encountered an implicit kill, cannot proceed: "); LLVM_DEBUG(AfterBBI->dump()); break; } if (KillIdx != -1) { assert(!DeadOrKillToUnset && "Shouldn't kill same register twice"); DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx); LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset << " from " << *AfterBBI << " is a unsetting candidate\n"); } if (!AfterBBI->modifiesRegister(Reg, TRI)) continue; // Finish scanning because Reg is overwritten by a non-load // instruction. if (AfterBBI->getOpcode() != Opc) break; assert(AfterBBI->getOperand(0).isReg() && "Expected a register for the first operand"); // Finish scanning because Reg is overwritten by a relocation or a // different value. if (!AfterBBI->getOperand(1).isImm() || AfterBBI->getOperand(1).getImm() != Imm) break; // It loads same immediate value to the same Reg, which is redundant. // We would unset kill flag in previous Reg usage to extend live range // of Reg first, then remove the redundancy. if (DeadOrKillToUnset) { LLVM_DEBUG(dbgs() << " Unset dead/kill flag of " << *DeadOrKillToUnset << " from " << *DeadOrKillToUnset->getParent()); if (DeadOrKillToUnset->isDef()) DeadOrKillToUnset->setIsDead(false); else DeadOrKillToUnset->setIsKill(false); } DeadOrKillToUnset = AfterBBI->findRegisterDefOperand(Reg, true, true, TRI); if (DeadOrKillToUnset) LLVM_DEBUG(dbgs() << " Dead flag of " << *DeadOrKillToUnset << " from " << *AfterBBI << " is a unsetting candidate\n"); InstrsToErase.insert(&*AfterBBI); LLVM_DEBUG(dbgs() << " Remove redundant load immediate: "; AfterBBI->dump()); } } for (MachineInstr *MI : InstrsToErase) { MI->eraseFromParent(); } NumRemovedInPreEmit += InstrsToErase.size(); return !InstrsToErase.empty(); } bool runOnMachineFunction(MachineFunction &MF) override { if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) { // Remove UNENCODED_NOP even when this pass is disabled. // This needs to be done unconditionally so we don't emit zeros // in the instruction stream. SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) for (MachineInstr &MI : MBB) if (MI.getOpcode() == PPC::UNENCODED_NOP) InstrsToErase.push_back(&MI); for (MachineInstr *MI : InstrsToErase) MI->eraseFromParent(); return false; } bool Changed = false; const PPCInstrInfo *TII = MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); SmallVector InstrsToErase; for (MachineBasicBlock &MBB : MF) { Changed |= removeRedundantLIs(MBB, TRI); for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == PPC::UNENCODED_NOP) { InstrsToErase.push_back(&MI); continue; } // Detect self copies - these can result from running AADB. if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) { const MCInstrDesc &MCID = TII->get(Opc); if (MCID.getNumOperands() == 3 && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) { NumberOfSelfCopies++; LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); LLVM_DEBUG(MI.dump()); InstrsToErase.push_back(&MI); continue; } else if (MCID.getNumOperands() == 2 && MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { NumberOfSelfCopies++; LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: "); LLVM_DEBUG(MI.dump()); InstrsToErase.push_back(&MI); continue; } } MachineInstr *DefMIToErase = nullptr; if (TII->convertToImmediateForm(MI, &DefMIToErase)) { Changed = true; NumRRConvertedInPreEmit++; LLVM_DEBUG(dbgs() << "Converted instruction to imm form: "); LLVM_DEBUG(MI.dump()); if (DefMIToErase) { InstrsToErase.push_back(DefMIToErase); } } if (TII->foldFrameOffset(MI)) { Changed = true; NumFrameOffFoldInPreEmit++; LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: "); LLVM_DEBUG(MI.dump()); } } // Eliminate conditional branch based on a constant CR bit by // CRSET or CRUNSET. We eliminate the conditional branch or // convert it into an unconditional branch. Also, if the CR bit // is not used by other instructions, we eliminate CRSET as well. auto I = MBB.getFirstInstrTerminator(); if (I == MBB.instr_end()) continue; MachineInstr *Br = &*I; if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) continue; MachineInstr *CRSetMI = nullptr; Register CRBit = Br->getOperand(0).getReg(); unsigned CRReg = getCRFromCRBit(CRBit); bool SeenUse = false; MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); for (It++; It != Er; It++) { if (It->modifiesRegister(CRBit, TRI)) { if ((It->getOpcode() == PPC::CRUNSET || It->getOpcode() == PPC::CRSET) && It->getOperand(0).getReg() == CRBit) CRSetMI = &*It; break; } if (It->readsRegister(CRBit, TRI)) SeenUse = true; } if (!CRSetMI) continue; unsigned CRSetOp = CRSetMI->getOpcode(); if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { // Remove this branch since it cannot be taken. InstrsToErase.push_back(Br); MBB.removeSuccessor(Br->getOperand(1).getMBB()); } else { // This conditional branch is always taken. So, remove all branches // and insert an unconditional branch to the destination of this. MachineBasicBlock::iterator It = Br, Er = MBB.end(); for (; It != Er; It++) { if (It->isDebugInstr()) continue; assert(It->isTerminator() && "Non-terminator after a terminator"); InstrsToErase.push_back(&*It); } if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) { ArrayRef NoCond; TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr, NoCond, Br->getDebugLoc()); } for (auto &Succ : MBB.successors()) if (Succ != Br->getOperand(1).getMBB()) { MBB.removeSuccessor(Succ); break; } } // If the CRBit is not used by another instruction, we can eliminate // CRSET/CRUNSET instruction. if (!SeenUse) { // We need to check use of the CRBit in successors. for (auto &SuccMBB : MBB.successors()) if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) { SeenUse = true; break; } if (!SeenUse) InstrsToErase.push_back(CRSetMI); } } for (MachineInstr *MI : InstrsToErase) { LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: "); LLVM_DEBUG(MI->dump()); MI->eraseFromParent(); NumRemovedInPreEmit++; } return Changed; } }; } INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole", false, false) char PPCPreEmitPeephole::ID = 0; FunctionPass *llvm::createPPCPreEmitPeepholePass() { return new PPCPreEmitPeephole(); }