1326938Sdim//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===// 2326938Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6326938Sdim// 7326938Sdim//===---------------------------------------------------------------------===// 8326938Sdim// 9326938Sdim// This pass aims to reduce the number of logical operations on bits in the CR 10326938Sdim// register. These instructions have a fairly high latency and only a single 11326938Sdim// pipeline at their disposal in modern PPC cores. Furthermore, they have a 12326938Sdim// tendency to occur in fairly small blocks where there's little opportunity 13326938Sdim// to hide the latency between the CR logical operation and its user. 14326938Sdim// 15326938Sdim//===---------------------------------------------------------------------===// 16326938Sdim 17341825Sdim#include "PPC.h" 18326938Sdim#include "PPCInstrInfo.h" 19326938Sdim#include "PPCTargetMachine.h" 20341825Sdim#include "llvm/ADT/Statistic.h" 21341825Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 22341825Sdim#include "llvm/CodeGen/MachineDominators.h" 23326938Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 24341825Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 25341825Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 26341825Sdim#include "llvm/Config/llvm-config.h" 27360784Sdim#include "llvm/InitializePasses.h" 28326938Sdim#include "llvm/Support/Debug.h" 29326938Sdim 30326938Sdimusing namespace llvm; 31326938Sdim 32326938Sdim#define DEBUG_TYPE "ppc-reduce-cr-ops" 33326938Sdim 34326938SdimSTATISTIC(NumContainedSingleUseBinOps, 35326938Sdim "Number of single-use binary CR logical ops contained in a block"); 36326938SdimSTATISTIC(NumToSplitBlocks, 37326938Sdim "Number of binary CR logical ops that can be used to split blocks"); 38326938SdimSTATISTIC(TotalCRLogicals, "Number of CR logical ops."); 39326938SdimSTATISTIC(TotalNullaryCRLogicals, 40326938Sdim "Number of nullary CR logical ops (CRSET/CRUNSET)."); 41326938SdimSTATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops."); 42326938SdimSTATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops."); 43326938SdimSTATISTIC(NumBlocksSplitOnBinaryCROp, 44326938Sdim "Number of blocks split on CR binary logical ops."); 45326938SdimSTATISTIC(NumNotSplitIdenticalOperands, 46326938Sdim "Number of blocks not split due to operands being identical."); 47326938SdimSTATISTIC(NumNotSplitChainCopies, 48326938Sdim "Number of blocks not split due to operands being chained copies."); 49326938SdimSTATISTIC(NumNotSplitWrongOpcode, 50326938Sdim "Number of blocks not split due to the wrong opcode."); 51326938Sdim 52341825Sdim/// Given a basic block \p Successor that potentially contains PHIs, this 53341825Sdim/// function will look for any incoming values in the PHIs that are supposed to 54341825Sdim/// be coming from \p OrigMBB but whose definition is actually in \p NewMBB. 55341825Sdim/// Any such PHIs will be updated to reflect reality. 56341825Sdimstatic void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB, 57341825Sdim MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) { 58341825Sdim for (auto &MI : Successor->instrs()) { 59341825Sdim if (!MI.isPHI()) 60341825Sdim continue; 61341825Sdim // This is a really ugly-looking loop, but it was pillaged directly from 62341825Sdim // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 63341825Sdim for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 64341825Sdim MachineOperand &MO = MI.getOperand(i); 65341825Sdim if (MO.getMBB() == OrigMBB) { 66341825Sdim // Check if the instruction is actually defined in NewMBB. 67341825Sdim if (MI.getOperand(i - 1).isReg()) { 68341825Sdim MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg()); 69341825Sdim if (DefMI->getParent() == NewMBB || 70341825Sdim !OrigMBB->isSuccessor(Successor)) { 71341825Sdim MO.setMBB(NewMBB); 72341825Sdim break; 73341825Sdim } 74341825Sdim } 75341825Sdim } 76341825Sdim } 77341825Sdim } 78341825Sdim} 79326938Sdim 80341825Sdim/// Given a basic block \p Successor that potentially contains PHIs, this 81341825Sdim/// function will look for PHIs that have an incoming value from \p OrigMBB 82341825Sdim/// and will add the same incoming value from \p NewMBB. 83341825Sdim/// NOTE: This should only be used if \p NewMBB is an immediate dominator of 84341825Sdim/// \p OrigMBB. 85341825Sdimstatic void addIncomingValuesToPHIs(MachineBasicBlock *Successor, 86341825Sdim MachineBasicBlock *OrigMBB, 87341825Sdim MachineBasicBlock *NewMBB, 88341825Sdim MachineRegisterInfo *MRI) { 89341825Sdim assert(OrigMBB->isSuccessor(NewMBB) && 90341825Sdim "NewMBB must be a successor of OrigMBB"); 91341825Sdim for (auto &MI : Successor->instrs()) { 92341825Sdim if (!MI.isPHI()) 93341825Sdim continue; 94341825Sdim // This is a really ugly-looking loop, but it was pillaged directly from 95341825Sdim // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 96341825Sdim for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 97341825Sdim MachineOperand &MO = MI.getOperand(i); 98341825Sdim if (MO.getMBB() == OrigMBB) { 99341825Sdim MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); 100341825Sdim MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB); 101341825Sdim break; 102341825Sdim } 103341825Sdim } 104341825Sdim } 105341825Sdim} 106341825Sdim 107341825Sdimstruct BlockSplitInfo { 108341825Sdim MachineInstr *OrigBranch; 109341825Sdim MachineInstr *SplitBefore; 110341825Sdim MachineInstr *SplitCond; 111341825Sdim bool InvertNewBranch; 112341825Sdim bool InvertOrigBranch; 113341825Sdim bool BranchToFallThrough; 114341825Sdim const MachineBranchProbabilityInfo *MBPI; 115341825Sdim MachineInstr *MIToDelete; 116341825Sdim MachineInstr *NewCond; 117341825Sdim bool allInstrsInSameMBB() { 118341825Sdim if (!OrigBranch || !SplitBefore || !SplitCond) 119341825Sdim return false; 120341825Sdim MachineBasicBlock *MBB = OrigBranch->getParent(); 121341825Sdim if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB) 122341825Sdim return false; 123341825Sdim if (MIToDelete && MIToDelete->getParent() != MBB) 124341825Sdim return false; 125341825Sdim if (NewCond && NewCond->getParent() != MBB) 126341825Sdim return false; 127341825Sdim return true; 128341825Sdim } 129341825Sdim}; 130341825Sdim 131341825Sdim/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original 132341825Sdim/// branch is \p OrigBranch. The target of the new branch can either be the same 133341825Sdim/// as the target of the original branch or the fallthrough successor of the 134341825Sdim/// original block as determined by \p BranchToFallThrough. The branch 135341825Sdim/// conditions will be inverted according to \p InvertNewBranch and 136341825Sdim/// \p InvertOrigBranch. If an instruction that previously fed the branch is to 137341825Sdim/// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as 138341825Sdim/// the branch condition. The branch probabilities will be set if the 139341825Sdim/// MachineBranchProbabilityInfo isn't null. 140341825Sdimstatic bool splitMBB(BlockSplitInfo &BSI) { 141341825Sdim assert(BSI.allInstrsInSameMBB() && 142341825Sdim "All instructions must be in the same block."); 143341825Sdim 144341825Sdim MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); 145341825Sdim MachineFunction *MF = ThisMBB->getParent(); 146341825Sdim MachineRegisterInfo *MRI = &MF->getRegInfo(); 147341825Sdim assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); 148341825Sdim if (ThisMBB->succ_size() != 2) { 149341825Sdim LLVM_DEBUG( 150341825Sdim dbgs() << "Don't know how to handle blocks that don't have exactly" 151341825Sdim << " two successors.\n"); 152341825Sdim return false; 153341825Sdim } 154341825Sdim 155341825Sdim const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 156341825Sdim unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); 157341825Sdim unsigned InvertedOpcode = 158341825Sdim OrigBROpcode == PPC::BC 159341825Sdim ? PPC::BCn 160341825Sdim : OrigBROpcode == PPC::BCn 161341825Sdim ? PPC::BC 162341825Sdim : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; 163341825Sdim unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; 164341825Sdim MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); 165341825Sdim MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin() 166341825Sdim ? *ThisMBB->succ_rbegin() 167341825Sdim : *ThisMBB->succ_begin(); 168341825Sdim MachineBasicBlock *NewBRTarget = 169341825Sdim BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; 170341825Sdim 171353358Sdim // It's impossible to know the precise branch probability after the split. 172353358Sdim // But it still needs to be reasonable, the whole probability to original 173353358Sdim // targets should not be changed. 174353358Sdim // After split NewBRTarget will get two incoming edges. Assume P0 is the 175353358Sdim // original branch probability to NewBRTarget, P1 and P2 are new branch 176353358Sdim // probabilies to NewBRTarget after split. If the two edge frequencies are 177353358Sdim // same, then 178353358Sdim // F * P1 = F * P0 / 2 ==> P1 = P0 / 2 179353358Sdim // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1) 180353358Sdim BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. 181353358Sdim BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. 182353358Sdim ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); 183353358Sdim ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); 184353358Sdim if (BSI.MBPI) { 185353358Sdim if (BSI.BranchToFallThrough) { 186353358Sdim ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2; 187353358Sdim ProbFallThrough = ProbToNewTarget.getCompl(); 188353358Sdim ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl(); 189353358Sdim ProbOrigTarget = ProbOrigFallThrough.getCompl(); 190353358Sdim } else { 191353358Sdim ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2; 192353358Sdim ProbFallThrough = ProbToNewTarget.getCompl(); 193353358Sdim ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl(); 194353358Sdim ProbOrigFallThrough = ProbOrigTarget.getCompl(); 195353358Sdim } 196353358Sdim } 197353358Sdim 198341825Sdim // Create a new basic block. 199341825Sdim MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; 200341825Sdim const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 201341825Sdim MachineFunction::iterator It = ThisMBB->getIterator(); 202341825Sdim MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); 203341825Sdim MF->insert(++It, NewMBB); 204341825Sdim 205341825Sdim // Move everything after SplitBefore into the new block. 206341825Sdim NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); 207341825Sdim NewMBB->transferSuccessors(ThisMBB); 208353358Sdim if (!ProbOrigTarget.isUnknown()) { 209353358Sdim auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget); 210353358Sdim NewMBB->setSuccProbability(MBBI, ProbOrigTarget); 211353358Sdim MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough); 212353358Sdim NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); 213353358Sdim } 214341825Sdim 215353358Sdim // Add the two successors to ThisMBB. 216341825Sdim ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); 217353358Sdim ThisMBB->addSuccessor(NewMBB, ProbFallThrough); 218341825Sdim 219341825Sdim // Add the branches to ThisMBB. 220341825Sdim BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 221341825Sdim TII->get(NewBROpcode)) 222341825Sdim .addReg(BSI.SplitCond->getOperand(0).getReg()) 223341825Sdim .addMBB(NewBRTarget); 224341825Sdim BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 225341825Sdim TII->get(PPC::B)) 226341825Sdim .addMBB(NewMBB); 227341825Sdim if (BSI.MIToDelete) 228341825Sdim BSI.MIToDelete->eraseFromParent(); 229341825Sdim 230341825Sdim // Change the condition on the original branch and invert it if requested. 231341825Sdim auto FirstTerminator = NewMBB->getFirstTerminator(); 232341825Sdim if (BSI.NewCond) { 233341825Sdim assert(FirstTerminator->getOperand(0).isReg() && 234341825Sdim "Can't update condition of unconditional branch."); 235341825Sdim FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); 236341825Sdim } 237341825Sdim if (BSI.InvertOrigBranch) 238341825Sdim FirstTerminator->setDesc(TII->get(InvertedOpcode)); 239341825Sdim 240341825Sdim // If any of the PHIs in the successors of NewMBB reference values that 241341825Sdim // now come from NewMBB, they need to be updated. 242341825Sdim for (auto *Succ : NewMBB->successors()) { 243341825Sdim updatePHIs(Succ, ThisMBB, NewMBB, MRI); 244341825Sdim } 245341825Sdim addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); 246341825Sdim 247341825Sdim LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); 248341825Sdim LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); 249341825Sdim LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); 250341825Sdim return true; 251341825Sdim} 252341825Sdim 253326938Sdimstatic bool isBinary(MachineInstr &MI) { 254326938Sdim return MI.getNumOperands() == 3; 255326938Sdim} 256326938Sdim 257326938Sdimstatic bool isNullary(MachineInstr &MI) { 258326938Sdim return MI.getNumOperands() == 1; 259326938Sdim} 260326938Sdim 261326938Sdim/// Given a CR logical operation \p CROp, branch opcode \p BROp as well as 262326938Sdim/// a flag to indicate if the first operand of \p CROp is used as the 263326938Sdim/// SplitBefore operand, determines whether either of the branches are to be 264326938Sdim/// inverted as well as whether the new target should be the original 265326938Sdim/// fall-through block. 266326938Sdimstatic void 267326938SdimcomputeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1, 268326938Sdim bool &InvertNewBranch, bool &InvertOrigBranch, 269326938Sdim bool &TargetIsFallThrough) { 270326938Sdim // The conditions under which each of the output operands should be [un]set 271326938Sdim // can certainly be written much more concisely with just 3 if statements or 272326938Sdim // ternary expressions. However, this provides a much clearer overview to the 273326938Sdim // reader as to what is set for each <CROp, BROp, OpUsed> combination. 274326938Sdim if (BROp == PPC::BC || BROp == PPC::BCLR) { 275326938Sdim // Regular branches. 276326938Sdim switch (CROp) { 277326938Sdim default: 278326938Sdim llvm_unreachable("Don't know how to handle this CR logical."); 279326938Sdim case PPC::CROR: 280326938Sdim InvertNewBranch = false; 281326938Sdim InvertOrigBranch = false; 282326938Sdim TargetIsFallThrough = false; 283326938Sdim return; 284326938Sdim case PPC::CRAND: 285326938Sdim InvertNewBranch = true; 286326938Sdim InvertOrigBranch = false; 287326938Sdim TargetIsFallThrough = true; 288326938Sdim return; 289326938Sdim case PPC::CRNAND: 290326938Sdim InvertNewBranch = true; 291326938Sdim InvertOrigBranch = true; 292326938Sdim TargetIsFallThrough = false; 293326938Sdim return; 294326938Sdim case PPC::CRNOR: 295326938Sdim InvertNewBranch = false; 296326938Sdim InvertOrigBranch = true; 297326938Sdim TargetIsFallThrough = true; 298326938Sdim return; 299326938Sdim case PPC::CRORC: 300326938Sdim InvertNewBranch = UsingDef1; 301326938Sdim InvertOrigBranch = !UsingDef1; 302326938Sdim TargetIsFallThrough = false; 303326938Sdim return; 304326938Sdim case PPC::CRANDC: 305326938Sdim InvertNewBranch = !UsingDef1; 306326938Sdim InvertOrigBranch = !UsingDef1; 307326938Sdim TargetIsFallThrough = true; 308326938Sdim return; 309326938Sdim } 310326938Sdim } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) { 311326938Sdim // Negated branches. 312326938Sdim switch (CROp) { 313326938Sdim default: 314326938Sdim llvm_unreachable("Don't know how to handle this CR logical."); 315326938Sdim case PPC::CROR: 316326938Sdim InvertNewBranch = true; 317326938Sdim InvertOrigBranch = false; 318326938Sdim TargetIsFallThrough = true; 319326938Sdim return; 320326938Sdim case PPC::CRAND: 321326938Sdim InvertNewBranch = false; 322326938Sdim InvertOrigBranch = false; 323326938Sdim TargetIsFallThrough = false; 324326938Sdim return; 325326938Sdim case PPC::CRNAND: 326326938Sdim InvertNewBranch = false; 327326938Sdim InvertOrigBranch = true; 328326938Sdim TargetIsFallThrough = true; 329326938Sdim return; 330326938Sdim case PPC::CRNOR: 331326938Sdim InvertNewBranch = true; 332326938Sdim InvertOrigBranch = true; 333326938Sdim TargetIsFallThrough = false; 334326938Sdim return; 335326938Sdim case PPC::CRORC: 336326938Sdim InvertNewBranch = !UsingDef1; 337326938Sdim InvertOrigBranch = !UsingDef1; 338326938Sdim TargetIsFallThrough = true; 339326938Sdim return; 340326938Sdim case PPC::CRANDC: 341326938Sdim InvertNewBranch = UsingDef1; 342326938Sdim InvertOrigBranch = !UsingDef1; 343326938Sdim TargetIsFallThrough = false; 344326938Sdim return; 345326938Sdim } 346326938Sdim } else 347326938Sdim llvm_unreachable("Don't know how to handle this branch."); 348326938Sdim} 349326938Sdim 350341825Sdimnamespace { 351341825Sdim 352326938Sdimclass PPCReduceCRLogicals : public MachineFunctionPass { 353326938Sdim 354326938Sdimpublic: 355326938Sdim static char ID; 356326938Sdim struct CRLogicalOpInfo { 357326938Sdim MachineInstr *MI; 358326938Sdim // FIXME: If chains of copies are to be handled, this should be a vector. 359326938Sdim std::pair<MachineInstr*, MachineInstr*> CopyDefs; 360326938Sdim std::pair<MachineInstr*, MachineInstr*> TrueDefs; 361326938Sdim unsigned IsBinary : 1; 362326938Sdim unsigned IsNullary : 1; 363326938Sdim unsigned ContainedInBlock : 1; 364326938Sdim unsigned FeedsISEL : 1; 365326938Sdim unsigned FeedsBR : 1; 366326938Sdim unsigned FeedsLogical : 1; 367326938Sdim unsigned SingleUse : 1; 368326938Sdim unsigned DefsSingleUse : 1; 369326938Sdim unsigned SubregDef1; 370326938Sdim unsigned SubregDef2; 371326938Sdim CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0), 372326938Sdim ContainedInBlock(0), FeedsISEL(0), FeedsBR(0), 373326938Sdim FeedsLogical(0), SingleUse(0), DefsSingleUse(1), 374326938Sdim SubregDef1(0), SubregDef2(0) { } 375326938Sdim void dump(); 376326938Sdim }; 377326938Sdim 378326938Sdimprivate: 379360784Sdim const PPCInstrInfo *TII = nullptr; 380360784Sdim MachineFunction *MF = nullptr; 381360784Sdim MachineRegisterInfo *MRI = nullptr; 382360784Sdim const MachineBranchProbabilityInfo *MBPI = nullptr; 383326938Sdim 384326938Sdim // A vector to contain all the CR logical operations 385360784Sdim SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps; 386326938Sdim void initialize(MachineFunction &MFParm); 387326938Sdim void collectCRLogicals(); 388360784Sdim bool handleCROp(unsigned Idx); 389326938Sdim bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); 390326938Sdim static bool isCRLogical(MachineInstr &MI) { 391326938Sdim unsigned Opc = MI.getOpcode(); 392326938Sdim return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR || 393326938Sdim Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CREQV || 394326938Sdim Opc == PPC::CRANDC || Opc == PPC::CRORC || Opc == PPC::CRSET || 395326938Sdim Opc == PPC::CRUNSET || Opc == PPC::CR6SET || Opc == PPC::CR6UNSET; 396326938Sdim } 397326938Sdim bool simplifyCode() { 398326938Sdim bool Changed = false; 399326938Sdim // Not using a range-based for loop here as the vector may grow while being 400326938Sdim // operated on. 401326938Sdim for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) 402360784Sdim Changed |= handleCROp(i); 403326938Sdim return Changed; 404326938Sdim } 405326938Sdim 406326938Sdimpublic: 407326938Sdim PPCReduceCRLogicals() : MachineFunctionPass(ID) { 408326938Sdim initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry()); 409326938Sdim } 410326938Sdim 411326938Sdim MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg, 412326938Sdim MachineInstr *&CpDef); 413326938Sdim bool runOnMachineFunction(MachineFunction &MF) override { 414326938Sdim if (skipFunction(MF.getFunction())) 415326938Sdim return false; 416326938Sdim 417326938Sdim // If the subtarget doesn't use CR bits, there's nothing to do. 418326938Sdim const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); 419326938Sdim if (!STI.useCRBits()) 420326938Sdim return false; 421326938Sdim 422326938Sdim initialize(MF); 423326938Sdim collectCRLogicals(); 424326938Sdim return simplifyCode(); 425326938Sdim } 426326938Sdim CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI); 427326938Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 428326938Sdim AU.addRequired<MachineBranchProbabilityInfo>(); 429326938Sdim AU.addRequired<MachineDominatorTree>(); 430326938Sdim MachineFunctionPass::getAnalysisUsage(AU); 431326938Sdim } 432326938Sdim}; 433326938Sdim 434326938Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 435326938SdimLLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() { 436326938Sdim dbgs() << "CRLogicalOpMI: "; 437326938Sdim MI->dump(); 438326938Sdim dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL; 439326938Sdim dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: "; 440326938Sdim dbgs() << FeedsLogical << ", SingleUse: " << SingleUse; 441326938Sdim dbgs() << ", DefsSingleUse: " << DefsSingleUse; 442326938Sdim dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: "; 443326938Sdim dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock; 444326938Sdim if (!IsNullary) { 445326938Sdim dbgs() << "\nDefs:\n"; 446326938Sdim TrueDefs.first->dump(); 447326938Sdim } 448326938Sdim if (IsBinary) 449326938Sdim TrueDefs.second->dump(); 450326938Sdim dbgs() << "\n"; 451326938Sdim if (CopyDefs.first) { 452326938Sdim dbgs() << "CopyDef1: "; 453326938Sdim CopyDefs.first->dump(); 454326938Sdim } 455326938Sdim if (CopyDefs.second) { 456326938Sdim dbgs() << "CopyDef2: "; 457326938Sdim CopyDefs.second->dump(); 458326938Sdim } 459326938Sdim} 460326938Sdim#endif 461326938Sdim 462326938SdimPPCReduceCRLogicals::CRLogicalOpInfo 463326938SdimPPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) { 464326938Sdim CRLogicalOpInfo Ret; 465326938Sdim Ret.MI = &MIParam; 466326938Sdim // Get the defs 467326938Sdim if (isNullary(MIParam)) { 468326938Sdim Ret.IsNullary = 1; 469326938Sdim Ret.TrueDefs = std::make_pair(nullptr, nullptr); 470326938Sdim Ret.CopyDefs = std::make_pair(nullptr, nullptr); 471326938Sdim } else { 472326938Sdim MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(), 473326938Sdim Ret.SubregDef1, Ret.CopyDefs.first); 474360784Sdim assert(Def1 && "Must be able to find a definition of operand 1."); 475326938Sdim Ret.DefsSingleUse &= 476326938Sdim MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg()); 477326938Sdim Ret.DefsSingleUse &= 478326938Sdim MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg()); 479326938Sdim if (isBinary(MIParam)) { 480326938Sdim Ret.IsBinary = 1; 481326938Sdim MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(), 482326938Sdim Ret.SubregDef2, 483326938Sdim Ret.CopyDefs.second); 484360784Sdim assert(Def2 && "Must be able to find a definition of operand 2."); 485326938Sdim Ret.DefsSingleUse &= 486326938Sdim MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg()); 487326938Sdim Ret.DefsSingleUse &= 488326938Sdim MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg()); 489326938Sdim Ret.TrueDefs = std::make_pair(Def1, Def2); 490326938Sdim } else { 491326938Sdim Ret.TrueDefs = std::make_pair(Def1, nullptr); 492326938Sdim Ret.CopyDefs.second = nullptr; 493326938Sdim } 494326938Sdim } 495326938Sdim 496326938Sdim Ret.ContainedInBlock = 1; 497326938Sdim // Get the uses 498326938Sdim for (MachineInstr &UseMI : 499326938Sdim MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) { 500326938Sdim unsigned Opc = UseMI.getOpcode(); 501326938Sdim if (Opc == PPC::ISEL || Opc == PPC::ISEL8) 502326938Sdim Ret.FeedsISEL = 1; 503326938Sdim if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR || 504326938Sdim Opc == PPC::BCLRn) 505326938Sdim Ret.FeedsBR = 1; 506326938Sdim Ret.FeedsLogical = isCRLogical(UseMI); 507326938Sdim if (UseMI.getParent() != MIParam.getParent()) 508326938Sdim Ret.ContainedInBlock = 0; 509326938Sdim } 510326938Sdim Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0; 511326938Sdim 512326938Sdim // We now know whether all the uses of the CR logical are in the same block. 513326938Sdim if (!Ret.IsNullary) { 514326938Sdim Ret.ContainedInBlock &= 515326938Sdim (MIParam.getParent() == Ret.TrueDefs.first->getParent()); 516326938Sdim if (Ret.IsBinary) 517326938Sdim Ret.ContainedInBlock &= 518326938Sdim (MIParam.getParent() == Ret.TrueDefs.second->getParent()); 519326938Sdim } 520341825Sdim LLVM_DEBUG(Ret.dump()); 521326938Sdim if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) { 522326938Sdim NumContainedSingleUseBinOps++; 523326938Sdim if (Ret.FeedsBR && Ret.DefsSingleUse) 524326938Sdim NumToSplitBlocks++; 525326938Sdim } 526326938Sdim return Ret; 527326938Sdim} 528326938Sdim 529341825Sdim/// Looks through a COPY instruction to the actual definition of the CR-bit 530326938Sdim/// register and returns the instruction that defines it. 531326938Sdim/// FIXME: This currently handles what is by-far the most common case: 532326938Sdim/// an instruction that defines a CR field followed by a single copy of a bit 533326938Sdim/// from that field into a virtual register. If chains of copies need to be 534326938Sdim/// handled, this should have a loop until a non-copy instruction is found. 535326938SdimMachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, 536326938Sdim unsigned &Subreg, 537326938Sdim MachineInstr *&CpDef) { 538326938Sdim Subreg = -1; 539360784Sdim if (!Register::isVirtualRegister(Reg)) 540326938Sdim return nullptr; 541326938Sdim MachineInstr *Copy = MRI->getVRegDef(Reg); 542326938Sdim CpDef = Copy; 543326938Sdim if (!Copy->isCopy()) 544326938Sdim return Copy; 545360784Sdim Register CopySrc = Copy->getOperand(1).getReg(); 546326938Sdim Subreg = Copy->getOperand(1).getSubReg(); 547360784Sdim if (!Register::isVirtualRegister(CopySrc)) { 548326938Sdim const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 549326938Sdim // Set the Subreg 550326938Sdim if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) 551326938Sdim Subreg = PPC::sub_eq; 552326938Sdim if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT) 553326938Sdim Subreg = PPC::sub_lt; 554326938Sdim if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT) 555326938Sdim Subreg = PPC::sub_gt; 556326938Sdim if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN) 557326938Sdim Subreg = PPC::sub_un; 558326938Sdim // Loop backwards and return the first MI that modifies the physical CR Reg. 559326938Sdim MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin(); 560326938Sdim while (Me != B) 561326938Sdim if ((--Me)->modifiesRegister(CopySrc, TRI)) 562326938Sdim return &*Me; 563326938Sdim return nullptr; 564326938Sdim } 565326938Sdim return MRI->getVRegDef(CopySrc); 566326938Sdim} 567326938Sdim 568326938Sdimvoid PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { 569326938Sdim MF = &MFParam; 570326938Sdim MRI = &MF->getRegInfo(); 571326938Sdim TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 572326938Sdim MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 573326938Sdim 574326938Sdim AllCRLogicalOps.clear(); 575326938Sdim} 576326938Sdim 577326938Sdim/// Contains all the implemented transformations on CR logical operations. 578326938Sdim/// For example, a binary CR logical can be used to split a block on its inputs, 579326938Sdim/// a unary CR logical might be used to change the condition code on a 580326938Sdim/// comparison feeding it. A nullary CR logical might simply be removable 581326938Sdim/// if the user of the bit it [un]sets can be transformed. 582360784Sdimbool PPCReduceCRLogicals::handleCROp(unsigned Idx) { 583326938Sdim // We can definitely split a block on the inputs to a binary CR operation 584326938Sdim // whose defs and (single) use are within the same block. 585326938Sdim bool Changed = false; 586360784Sdim CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; 587326938Sdim if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && 588326938Sdim CRI.DefsSingleUse) { 589326938Sdim Changed = splitBlockOnBinaryCROp(CRI); 590326938Sdim if (Changed) 591326938Sdim NumBlocksSplitOnBinaryCROp++; 592326938Sdim } 593326938Sdim return Changed; 594326938Sdim} 595326938Sdim 596326938Sdim/// Splits a block that contains a CR-logical operation that feeds a branch 597326938Sdim/// and whose operands are produced within the block. 598326938Sdim/// Example: 599326938Sdim/// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 600326938Sdim/// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 601326938Sdim/// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 602326938Sdim/// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 603326938Sdim/// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8 604326938Sdim/// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 605326938Sdim/// Becomes: 606326938Sdim/// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 607326938Sdim/// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 608326938Sdim/// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6 609326938Sdim/// 610326938Sdim/// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 611326938Sdim/// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 612326938Sdim/// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 613326938Sdimbool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) { 614326938Sdim if (CRI.CopyDefs.first == CRI.CopyDefs.second) { 615341825Sdim LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n"); 616326938Sdim NumNotSplitIdenticalOperands++; 617326938Sdim return false; 618326938Sdim } 619326938Sdim if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() || 620326938Sdim CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) { 621341825Sdim LLVM_DEBUG( 622341825Sdim dbgs() << "Unable to split because one of the operands is a PHI or " 623341825Sdim "chain of copies.\n"); 624326938Sdim NumNotSplitChainCopies++; 625326938Sdim return false; 626326938Sdim } 627326938Sdim // Note: keep in sync with computeBranchTargetAndInversion(). 628326938Sdim if (CRI.MI->getOpcode() != PPC::CROR && 629326938Sdim CRI.MI->getOpcode() != PPC::CRAND && 630326938Sdim CRI.MI->getOpcode() != PPC::CRNOR && 631326938Sdim CRI.MI->getOpcode() != PPC::CRNAND && 632326938Sdim CRI.MI->getOpcode() != PPC::CRORC && 633326938Sdim CRI.MI->getOpcode() != PPC::CRANDC) { 634341825Sdim LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n"); 635326938Sdim NumNotSplitWrongOpcode++; 636326938Sdim return false; 637326938Sdim } 638341825Sdim LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump()); 639326938Sdim MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first; 640326938Sdim MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second; 641326938Sdim 642326938Sdim bool UsingDef1 = false; 643326938Sdim MachineInstr *SplitBefore = &*Def2It; 644326938Sdim for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) { 645326938Sdim if (Def1It == Def2It) { // Def2 comes before Def1. 646326938Sdim SplitBefore = &*Def1It; 647326938Sdim UsingDef1 = true; 648326938Sdim break; 649326938Sdim } 650326938Sdim } 651326938Sdim 652341825Sdim LLVM_DEBUG(dbgs() << "We will split the following block:\n";); 653341825Sdim LLVM_DEBUG(CRI.MI->getParent()->dump()); 654341825Sdim LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump()); 655326938Sdim 656326938Sdim // Get the branch instruction. 657326938Sdim MachineInstr *Branch = 658326938Sdim MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent(); 659326938Sdim 660326938Sdim // We want the new block to have no code in it other than the definition 661326938Sdim // of the input to the CR logical and the CR logical itself. So we move 662326938Sdim // those to the bottom of the block (just before the branch). Then we 663326938Sdim // will split before the CR logical. 664326938Sdim MachineBasicBlock *MBB = SplitBefore->getParent(); 665326938Sdim auto FirstTerminator = MBB->getFirstTerminator(); 666326938Sdim MachineBasicBlock::iterator FirstInstrToMove = 667326938Sdim UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second; 668326938Sdim MachineBasicBlock::iterator SecondInstrToMove = 669326938Sdim UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second; 670326938Sdim 671326938Sdim // The instructions that need to be moved are not guaranteed to be 672326938Sdim // contiguous. Move them individually. 673326938Sdim // FIXME: If one of the operands is a chain of (single use) copies, they 674326938Sdim // can all be moved and we can still split. 675326938Sdim MBB->splice(FirstTerminator, MBB, FirstInstrToMove); 676326938Sdim if (FirstInstrToMove != SecondInstrToMove) 677326938Sdim MBB->splice(FirstTerminator, MBB, SecondInstrToMove); 678326938Sdim MBB->splice(FirstTerminator, MBB, CRI.MI); 679326938Sdim 680326938Sdim unsigned Opc = CRI.MI->getOpcode(); 681326938Sdim bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough; 682326938Sdim computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1, 683326938Sdim InvertNewBranch, InvertOrigBranch, 684326938Sdim TargetIsFallThrough); 685326938Sdim MachineInstr *SplitCond = 686326938Sdim UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first; 687341825Sdim LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy")); 688341825Sdim LLVM_DEBUG(dbgs() << " the original branch and the target is the " 689341825Sdim << (TargetIsFallThrough ? "fallthrough block\n" 690341825Sdim : "orig. target block\n")); 691341825Sdim LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump()); 692326938Sdim BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch, 693326938Sdim InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI, 694326938Sdim UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second }; 695326938Sdim bool Changed = splitMBB(BSI); 696326938Sdim // If we've split on a CR logical that is fed by a CR logical, 697326938Sdim // recompute the source CR logical as it may be usable for splitting. 698326938Sdim if (Changed) { 699326938Sdim bool Input1CRlogical = 700326938Sdim CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first); 701326938Sdim bool Input2CRlogical = 702326938Sdim CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second); 703326938Sdim if (Input1CRlogical) 704326938Sdim AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first)); 705326938Sdim if (Input2CRlogical) 706326938Sdim AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second)); 707326938Sdim } 708326938Sdim return Changed; 709326938Sdim} 710326938Sdim 711326938Sdimvoid PPCReduceCRLogicals::collectCRLogicals() { 712326938Sdim for (MachineBasicBlock &MBB : *MF) { 713326938Sdim for (MachineInstr &MI : MBB) { 714326938Sdim if (isCRLogical(MI)) { 715326938Sdim AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI)); 716326938Sdim TotalCRLogicals++; 717326938Sdim if (AllCRLogicalOps.back().IsNullary) 718326938Sdim TotalNullaryCRLogicals++; 719326938Sdim else if (AllCRLogicalOps.back().IsBinary) 720326938Sdim TotalBinaryCRLogicals++; 721326938Sdim else 722326938Sdim TotalUnaryCRLogicals++; 723326938Sdim } 724326938Sdim } 725326938Sdim } 726326938Sdim} 727326938Sdim 728341825Sdim} // end anonymous namespace 729326938Sdim 730326938SdimINITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE, 731326938Sdim "PowerPC Reduce CR logical Operation", false, false) 732326938SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 733326938SdimINITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE, 734326938Sdim "PowerPC Reduce CR logical Operation", false, false) 735326938Sdim 736326938Sdimchar PPCReduceCRLogicals::ID = 0; 737326938SdimFunctionPass* 738326938Sdimllvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); } 739