1326938Sdim//===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
2326938Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6326938Sdim//
7326938Sdim//===---------------------------------------------------------------------===//
8326938Sdim//
9326938Sdim// This pass aims to reduce the number of logical operations on bits in the CR
10326938Sdim// register. These instructions have a fairly high latency and only a single
11326938Sdim// pipeline at their disposal in modern PPC cores. Furthermore, they have a
12326938Sdim// tendency to occur in fairly small blocks where there's little opportunity
13326938Sdim// to hide the latency between the CR logical operation and its user.
14326938Sdim//
15326938Sdim//===---------------------------------------------------------------------===//
16326938Sdim
17341825Sdim#include "PPC.h"
18326938Sdim#include "PPCInstrInfo.h"
19326938Sdim#include "PPCTargetMachine.h"
20341825Sdim#include "llvm/ADT/Statistic.h"
21341825Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22341825Sdim#include "llvm/CodeGen/MachineDominators.h"
23326938Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
24341825Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
25341825Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
26341825Sdim#include "llvm/Config/llvm-config.h"
27360784Sdim#include "llvm/InitializePasses.h"
28326938Sdim#include "llvm/Support/Debug.h"
29326938Sdim
30326938Sdimusing namespace llvm;
31326938Sdim
32326938Sdim#define DEBUG_TYPE "ppc-reduce-cr-ops"
33326938Sdim
34326938SdimSTATISTIC(NumContainedSingleUseBinOps,
35326938Sdim          "Number of single-use binary CR logical ops contained in a block");
36326938SdimSTATISTIC(NumToSplitBlocks,
37326938Sdim          "Number of binary CR logical ops that can be used to split blocks");
38326938SdimSTATISTIC(TotalCRLogicals, "Number of CR logical ops.");
39326938SdimSTATISTIC(TotalNullaryCRLogicals,
40326938Sdim          "Number of nullary CR logical ops (CRSET/CRUNSET).");
41326938SdimSTATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
42326938SdimSTATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
43326938SdimSTATISTIC(NumBlocksSplitOnBinaryCROp,
44326938Sdim          "Number of blocks split on CR binary logical ops.");
45326938SdimSTATISTIC(NumNotSplitIdenticalOperands,
46326938Sdim          "Number of blocks not split due to operands being identical.");
47326938SdimSTATISTIC(NumNotSplitChainCopies,
48326938Sdim          "Number of blocks not split due to operands being chained copies.");
49326938SdimSTATISTIC(NumNotSplitWrongOpcode,
50326938Sdim          "Number of blocks not split due to the wrong opcode.");
51326938Sdim
52341825Sdim/// Given a basic block \p Successor that potentially contains PHIs, this
53341825Sdim/// function will look for any incoming values in the PHIs that are supposed to
54341825Sdim/// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
55341825Sdim/// Any such PHIs will be updated to reflect reality.
56341825Sdimstatic void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB,
57341825Sdim                       MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) {
58341825Sdim  for (auto &MI : Successor->instrs()) {
59341825Sdim    if (!MI.isPHI())
60341825Sdim      continue;
61341825Sdim    // This is a really ugly-looking loop, but it was pillaged directly from
62341825Sdim    // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
63341825Sdim    for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
64341825Sdim      MachineOperand &MO = MI.getOperand(i);
65341825Sdim      if (MO.getMBB() == OrigMBB) {
66341825Sdim        // Check if the instruction is actually defined in NewMBB.
67341825Sdim        if (MI.getOperand(i - 1).isReg()) {
68341825Sdim          MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg());
69341825Sdim          if (DefMI->getParent() == NewMBB ||
70341825Sdim              !OrigMBB->isSuccessor(Successor)) {
71341825Sdim            MO.setMBB(NewMBB);
72341825Sdim            break;
73341825Sdim          }
74341825Sdim        }
75341825Sdim      }
76341825Sdim    }
77341825Sdim  }
78341825Sdim}
79326938Sdim
80341825Sdim/// Given a basic block \p Successor that potentially contains PHIs, this
81341825Sdim/// function will look for PHIs that have an incoming value from \p OrigMBB
82341825Sdim/// and will add the same incoming value from \p NewMBB.
83341825Sdim/// NOTE: This should only be used if \p NewMBB is an immediate dominator of
84341825Sdim/// \p OrigMBB.
85341825Sdimstatic void addIncomingValuesToPHIs(MachineBasicBlock *Successor,
86341825Sdim                                    MachineBasicBlock *OrigMBB,
87341825Sdim                                    MachineBasicBlock *NewMBB,
88341825Sdim                                    MachineRegisterInfo *MRI) {
89341825Sdim  assert(OrigMBB->isSuccessor(NewMBB) &&
90341825Sdim         "NewMBB must be a successor of OrigMBB");
91341825Sdim  for (auto &MI : Successor->instrs()) {
92341825Sdim    if (!MI.isPHI())
93341825Sdim      continue;
94341825Sdim    // This is a really ugly-looking loop, but it was pillaged directly from
95341825Sdim    // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
96341825Sdim    for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
97341825Sdim      MachineOperand &MO = MI.getOperand(i);
98341825Sdim      if (MO.getMBB() == OrigMBB) {
99341825Sdim        MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
100341825Sdim        MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB);
101341825Sdim        break;
102341825Sdim      }
103341825Sdim    }
104341825Sdim  }
105341825Sdim}
106341825Sdim
107341825Sdimstruct BlockSplitInfo {
108341825Sdim  MachineInstr *OrigBranch;
109341825Sdim  MachineInstr *SplitBefore;
110341825Sdim  MachineInstr *SplitCond;
111341825Sdim  bool InvertNewBranch;
112341825Sdim  bool InvertOrigBranch;
113341825Sdim  bool BranchToFallThrough;
114341825Sdim  const MachineBranchProbabilityInfo *MBPI;
115341825Sdim  MachineInstr *MIToDelete;
116341825Sdim  MachineInstr *NewCond;
117341825Sdim  bool allInstrsInSameMBB() {
118341825Sdim    if (!OrigBranch || !SplitBefore || !SplitCond)
119341825Sdim      return false;
120341825Sdim    MachineBasicBlock *MBB = OrigBranch->getParent();
121341825Sdim    if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB)
122341825Sdim      return false;
123341825Sdim    if (MIToDelete && MIToDelete->getParent() != MBB)
124341825Sdim      return false;
125341825Sdim    if (NewCond && NewCond->getParent() != MBB)
126341825Sdim      return false;
127341825Sdim    return true;
128341825Sdim  }
129341825Sdim};
130341825Sdim
131341825Sdim/// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
132341825Sdim/// branch is \p OrigBranch. The target of the new branch can either be the same
133341825Sdim/// as the target of the original branch or the fallthrough successor of the
134341825Sdim/// original block as determined by \p BranchToFallThrough. The branch
135341825Sdim/// conditions will be inverted according to \p InvertNewBranch and
136341825Sdim/// \p InvertOrigBranch. If an instruction that previously fed the branch is to
137341825Sdim/// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
138341825Sdim/// the branch condition. The branch probabilities will be set if the
139341825Sdim/// MachineBranchProbabilityInfo isn't null.
140341825Sdimstatic bool splitMBB(BlockSplitInfo &BSI) {
141341825Sdim  assert(BSI.allInstrsInSameMBB() &&
142341825Sdim         "All instructions must be in the same block.");
143341825Sdim
144341825Sdim  MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
145341825Sdim  MachineFunction *MF = ThisMBB->getParent();
146341825Sdim  MachineRegisterInfo *MRI = &MF->getRegInfo();
147341825Sdim  assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
148341825Sdim  if (ThisMBB->succ_size() != 2) {
149341825Sdim    LLVM_DEBUG(
150341825Sdim        dbgs() << "Don't know how to handle blocks that don't have exactly"
151341825Sdim               << " two successors.\n");
152341825Sdim    return false;
153341825Sdim  }
154341825Sdim
155341825Sdim  const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
156341825Sdim  unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
157341825Sdim  unsigned InvertedOpcode =
158341825Sdim      OrigBROpcode == PPC::BC
159341825Sdim          ? PPC::BCn
160341825Sdim          : OrigBROpcode == PPC::BCn
161341825Sdim                ? PPC::BC
162341825Sdim                : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
163341825Sdim  unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
164341825Sdim  MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
165341825Sdim  MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
166341825Sdim                                           ? *ThisMBB->succ_rbegin()
167341825Sdim                                           : *ThisMBB->succ_begin();
168341825Sdim  MachineBasicBlock *NewBRTarget =
169341825Sdim      BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
170341825Sdim
171353358Sdim  // It's impossible to know the precise branch probability after the split.
172353358Sdim  // But it still needs to be reasonable, the whole probability to original
173353358Sdim  // targets should not be changed.
174353358Sdim  // After split NewBRTarget will get two incoming edges. Assume P0 is the
175353358Sdim  // original branch probability to NewBRTarget, P1 and P2 are new branch
176353358Sdim  // probabilies to NewBRTarget after split. If the two edge frequencies are
177353358Sdim  // same, then
178353358Sdim  //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
179353358Sdim  //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
180353358Sdim  BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
181353358Sdim  BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
182353358Sdim  ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
183353358Sdim  ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
184353358Sdim  if (BSI.MBPI) {
185353358Sdim    if (BSI.BranchToFallThrough) {
186353358Sdim      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
187353358Sdim      ProbFallThrough = ProbToNewTarget.getCompl();
188353358Sdim      ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
189353358Sdim      ProbOrigTarget = ProbOrigFallThrough.getCompl();
190353358Sdim    } else {
191353358Sdim      ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
192353358Sdim      ProbFallThrough = ProbToNewTarget.getCompl();
193353358Sdim      ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
194353358Sdim      ProbOrigFallThrough = ProbOrigTarget.getCompl();
195353358Sdim    }
196353358Sdim  }
197353358Sdim
198341825Sdim  // Create a new basic block.
199341825Sdim  MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
200341825Sdim  const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
201341825Sdim  MachineFunction::iterator It = ThisMBB->getIterator();
202341825Sdim  MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
203341825Sdim  MF->insert(++It, NewMBB);
204341825Sdim
205341825Sdim  // Move everything after SplitBefore into the new block.
206341825Sdim  NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
207341825Sdim  NewMBB->transferSuccessors(ThisMBB);
208353358Sdim  if (!ProbOrigTarget.isUnknown()) {
209353358Sdim    auto MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigTarget);
210353358Sdim    NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
211353358Sdim    MBBI = std::find(NewMBB->succ_begin(), NewMBB->succ_end(), OrigFallThrough);
212353358Sdim    NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
213353358Sdim  }
214341825Sdim
215353358Sdim  // Add the two successors to ThisMBB.
216341825Sdim  ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
217353358Sdim  ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
218341825Sdim
219341825Sdim  // Add the branches to ThisMBB.
220341825Sdim  BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
221341825Sdim          TII->get(NewBROpcode))
222341825Sdim      .addReg(BSI.SplitCond->getOperand(0).getReg())
223341825Sdim      .addMBB(NewBRTarget);
224341825Sdim  BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
225341825Sdim          TII->get(PPC::B))
226341825Sdim      .addMBB(NewMBB);
227341825Sdim  if (BSI.MIToDelete)
228341825Sdim    BSI.MIToDelete->eraseFromParent();
229341825Sdim
230341825Sdim  // Change the condition on the original branch and invert it if requested.
231341825Sdim  auto FirstTerminator = NewMBB->getFirstTerminator();
232341825Sdim  if (BSI.NewCond) {
233341825Sdim    assert(FirstTerminator->getOperand(0).isReg() &&
234341825Sdim           "Can't update condition of unconditional branch.");
235341825Sdim    FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
236341825Sdim  }
237341825Sdim  if (BSI.InvertOrigBranch)
238341825Sdim    FirstTerminator->setDesc(TII->get(InvertedOpcode));
239341825Sdim
240341825Sdim  // If any of the PHIs in the successors of NewMBB reference values that
241341825Sdim  // now come from NewMBB, they need to be updated.
242341825Sdim  for (auto *Succ : NewMBB->successors()) {
243341825Sdim    updatePHIs(Succ, ThisMBB, NewMBB, MRI);
244341825Sdim  }
245341825Sdim  addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);
246341825Sdim
247341825Sdim  LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
248341825Sdim  LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
249341825Sdim  LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
250341825Sdim  return true;
251341825Sdim}
252341825Sdim
253326938Sdimstatic bool isBinary(MachineInstr &MI) {
254326938Sdim  return MI.getNumOperands() == 3;
255326938Sdim}
256326938Sdim
257326938Sdimstatic bool isNullary(MachineInstr &MI) {
258326938Sdim  return MI.getNumOperands() == 1;
259326938Sdim}
260326938Sdim
261326938Sdim/// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
262326938Sdim/// a flag to indicate if the first operand of \p CROp is used as the
263326938Sdim/// SplitBefore operand, determines whether either of the branches are to be
264326938Sdim/// inverted as well as whether the new target should be the original
265326938Sdim/// fall-through block.
266326938Sdimstatic void
267326938SdimcomputeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
268326938Sdim                                bool &InvertNewBranch, bool &InvertOrigBranch,
269326938Sdim                                bool &TargetIsFallThrough) {
270326938Sdim  // The conditions under which each of the output operands should be [un]set
271326938Sdim  // can certainly be written much more concisely with just 3 if statements or
272326938Sdim  // ternary expressions. However, this provides a much clearer overview to the
273326938Sdim  // reader as to what is set for each <CROp, BROp, OpUsed> combination.
274326938Sdim  if (BROp == PPC::BC || BROp == PPC::BCLR) {
275326938Sdim    // Regular branches.
276326938Sdim    switch (CROp) {
277326938Sdim    default:
278326938Sdim      llvm_unreachable("Don't know how to handle this CR logical.");
279326938Sdim    case PPC::CROR:
280326938Sdim      InvertNewBranch = false;
281326938Sdim      InvertOrigBranch = false;
282326938Sdim      TargetIsFallThrough = false;
283326938Sdim      return;
284326938Sdim    case PPC::CRAND:
285326938Sdim      InvertNewBranch = true;
286326938Sdim      InvertOrigBranch = false;
287326938Sdim      TargetIsFallThrough = true;
288326938Sdim      return;
289326938Sdim    case PPC::CRNAND:
290326938Sdim      InvertNewBranch = true;
291326938Sdim      InvertOrigBranch = true;
292326938Sdim      TargetIsFallThrough = false;
293326938Sdim      return;
294326938Sdim    case PPC::CRNOR:
295326938Sdim      InvertNewBranch = false;
296326938Sdim      InvertOrigBranch = true;
297326938Sdim      TargetIsFallThrough = true;
298326938Sdim      return;
299326938Sdim    case PPC::CRORC:
300326938Sdim      InvertNewBranch = UsingDef1;
301326938Sdim      InvertOrigBranch = !UsingDef1;
302326938Sdim      TargetIsFallThrough = false;
303326938Sdim      return;
304326938Sdim    case PPC::CRANDC:
305326938Sdim      InvertNewBranch = !UsingDef1;
306326938Sdim      InvertOrigBranch = !UsingDef1;
307326938Sdim      TargetIsFallThrough = true;
308326938Sdim      return;
309326938Sdim    }
310326938Sdim  } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
311326938Sdim    // Negated branches.
312326938Sdim    switch (CROp) {
313326938Sdim    default:
314326938Sdim      llvm_unreachable("Don't know how to handle this CR logical.");
315326938Sdim    case PPC::CROR:
316326938Sdim      InvertNewBranch = true;
317326938Sdim      InvertOrigBranch = false;
318326938Sdim      TargetIsFallThrough = true;
319326938Sdim      return;
320326938Sdim    case PPC::CRAND:
321326938Sdim      InvertNewBranch = false;
322326938Sdim      InvertOrigBranch = false;
323326938Sdim      TargetIsFallThrough = false;
324326938Sdim      return;
325326938Sdim    case PPC::CRNAND:
326326938Sdim      InvertNewBranch = false;
327326938Sdim      InvertOrigBranch = true;
328326938Sdim      TargetIsFallThrough = true;
329326938Sdim      return;
330326938Sdim    case PPC::CRNOR:
331326938Sdim      InvertNewBranch = true;
332326938Sdim      InvertOrigBranch = true;
333326938Sdim      TargetIsFallThrough = false;
334326938Sdim      return;
335326938Sdim    case PPC::CRORC:
336326938Sdim      InvertNewBranch = !UsingDef1;
337326938Sdim      InvertOrigBranch = !UsingDef1;
338326938Sdim      TargetIsFallThrough = true;
339326938Sdim      return;
340326938Sdim    case PPC::CRANDC:
341326938Sdim      InvertNewBranch = UsingDef1;
342326938Sdim      InvertOrigBranch = !UsingDef1;
343326938Sdim      TargetIsFallThrough = false;
344326938Sdim      return;
345326938Sdim    }
346326938Sdim  } else
347326938Sdim    llvm_unreachable("Don't know how to handle this branch.");
348326938Sdim}
349326938Sdim
350341825Sdimnamespace {
351341825Sdim
352326938Sdimclass PPCReduceCRLogicals : public MachineFunctionPass {
353326938Sdim
354326938Sdimpublic:
355326938Sdim  static char ID;
356326938Sdim  struct CRLogicalOpInfo {
357326938Sdim    MachineInstr *MI;
358326938Sdim    // FIXME: If chains of copies are to be handled, this should be a vector.
359326938Sdim    std::pair<MachineInstr*, MachineInstr*> CopyDefs;
360326938Sdim    std::pair<MachineInstr*, MachineInstr*> TrueDefs;
361326938Sdim    unsigned IsBinary : 1;
362326938Sdim    unsigned IsNullary : 1;
363326938Sdim    unsigned ContainedInBlock : 1;
364326938Sdim    unsigned FeedsISEL : 1;
365326938Sdim    unsigned FeedsBR : 1;
366326938Sdim    unsigned FeedsLogical : 1;
367326938Sdim    unsigned SingleUse : 1;
368326938Sdim    unsigned DefsSingleUse : 1;
369326938Sdim    unsigned SubregDef1;
370326938Sdim    unsigned SubregDef2;
371326938Sdim    CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
372326938Sdim                        ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
373326938Sdim                        FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
374326938Sdim                        SubregDef1(0), SubregDef2(0) { }
375326938Sdim    void dump();
376326938Sdim  };
377326938Sdim
378326938Sdimprivate:
379360784Sdim  const PPCInstrInfo *TII = nullptr;
380360784Sdim  MachineFunction *MF = nullptr;
381360784Sdim  MachineRegisterInfo *MRI = nullptr;
382360784Sdim  const MachineBranchProbabilityInfo *MBPI = nullptr;
383326938Sdim
384326938Sdim  // A vector to contain all the CR logical operations
385360784Sdim  SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
386326938Sdim  void initialize(MachineFunction &MFParm);
387326938Sdim  void collectCRLogicals();
388360784Sdim  bool handleCROp(unsigned Idx);
389326938Sdim  bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
390326938Sdim  static bool isCRLogical(MachineInstr &MI) {
391326938Sdim    unsigned Opc = MI.getOpcode();
392326938Sdim    return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
393326938Sdim      Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CREQV ||
394326938Sdim      Opc == PPC::CRANDC || Opc == PPC::CRORC || Opc == PPC::CRSET ||
395326938Sdim      Opc == PPC::CRUNSET || Opc == PPC::CR6SET || Opc == PPC::CR6UNSET;
396326938Sdim  }
397326938Sdim  bool simplifyCode() {
398326938Sdim    bool Changed = false;
399326938Sdim    // Not using a range-based for loop here as the vector may grow while being
400326938Sdim    // operated on.
401326938Sdim    for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
402360784Sdim      Changed |= handleCROp(i);
403326938Sdim    return Changed;
404326938Sdim  }
405326938Sdim
406326938Sdimpublic:
407326938Sdim  PPCReduceCRLogicals() : MachineFunctionPass(ID) {
408326938Sdim    initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry());
409326938Sdim  }
410326938Sdim
411326938Sdim  MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
412326938Sdim                                  MachineInstr *&CpDef);
413326938Sdim  bool runOnMachineFunction(MachineFunction &MF) override {
414326938Sdim    if (skipFunction(MF.getFunction()))
415326938Sdim      return false;
416326938Sdim
417326938Sdim    // If the subtarget doesn't use CR bits, there's nothing to do.
418326938Sdim    const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
419326938Sdim    if (!STI.useCRBits())
420326938Sdim      return false;
421326938Sdim
422326938Sdim    initialize(MF);
423326938Sdim    collectCRLogicals();
424326938Sdim    return simplifyCode();
425326938Sdim  }
426326938Sdim  CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
427326938Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override {
428326938Sdim    AU.addRequired<MachineBranchProbabilityInfo>();
429326938Sdim    AU.addRequired<MachineDominatorTree>();
430326938Sdim    MachineFunctionPass::getAnalysisUsage(AU);
431326938Sdim  }
432326938Sdim};
433326938Sdim
434326938Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
435326938SdimLLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
436326938Sdim  dbgs() << "CRLogicalOpMI: ";
437326938Sdim  MI->dump();
438326938Sdim  dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
439326938Sdim  dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
440326938Sdim  dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
441326938Sdim  dbgs() << ", DefsSingleUse: " << DefsSingleUse;
442326938Sdim  dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
443326938Sdim  dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
444326938Sdim  if (!IsNullary) {
445326938Sdim    dbgs() << "\nDefs:\n";
446326938Sdim    TrueDefs.first->dump();
447326938Sdim  }
448326938Sdim  if (IsBinary)
449326938Sdim    TrueDefs.second->dump();
450326938Sdim  dbgs() << "\n";
451326938Sdim  if (CopyDefs.first) {
452326938Sdim    dbgs() << "CopyDef1: ";
453326938Sdim    CopyDefs.first->dump();
454326938Sdim  }
455326938Sdim  if (CopyDefs.second) {
456326938Sdim    dbgs() << "CopyDef2: ";
457326938Sdim    CopyDefs.second->dump();
458326938Sdim  }
459326938Sdim}
460326938Sdim#endif
461326938Sdim
462326938SdimPPCReduceCRLogicals::CRLogicalOpInfo
463326938SdimPPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
464326938Sdim  CRLogicalOpInfo Ret;
465326938Sdim  Ret.MI = &MIParam;
466326938Sdim  // Get the defs
467326938Sdim  if (isNullary(MIParam)) {
468326938Sdim    Ret.IsNullary = 1;
469326938Sdim    Ret.TrueDefs = std::make_pair(nullptr, nullptr);
470326938Sdim    Ret.CopyDefs = std::make_pair(nullptr, nullptr);
471326938Sdim  } else {
472326938Sdim    MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
473326938Sdim                                           Ret.SubregDef1, Ret.CopyDefs.first);
474360784Sdim    assert(Def1 && "Must be able to find a definition of operand 1.");
475326938Sdim    Ret.DefsSingleUse &=
476326938Sdim      MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
477326938Sdim    Ret.DefsSingleUse &=
478326938Sdim      MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
479326938Sdim    if (isBinary(MIParam)) {
480326938Sdim      Ret.IsBinary = 1;
481326938Sdim      MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
482326938Sdim                                             Ret.SubregDef2,
483326938Sdim                                             Ret.CopyDefs.second);
484360784Sdim      assert(Def2 && "Must be able to find a definition of operand 2.");
485326938Sdim      Ret.DefsSingleUse &=
486326938Sdim        MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
487326938Sdim      Ret.DefsSingleUse &=
488326938Sdim        MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
489326938Sdim      Ret.TrueDefs = std::make_pair(Def1, Def2);
490326938Sdim    } else {
491326938Sdim      Ret.TrueDefs = std::make_pair(Def1, nullptr);
492326938Sdim      Ret.CopyDefs.second = nullptr;
493326938Sdim    }
494326938Sdim  }
495326938Sdim
496326938Sdim  Ret.ContainedInBlock = 1;
497326938Sdim  // Get the uses
498326938Sdim  for (MachineInstr &UseMI :
499326938Sdim       MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
500326938Sdim    unsigned Opc = UseMI.getOpcode();
501326938Sdim    if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
502326938Sdim      Ret.FeedsISEL = 1;
503326938Sdim    if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
504326938Sdim        Opc == PPC::BCLRn)
505326938Sdim      Ret.FeedsBR = 1;
506326938Sdim    Ret.FeedsLogical = isCRLogical(UseMI);
507326938Sdim    if (UseMI.getParent() != MIParam.getParent())
508326938Sdim      Ret.ContainedInBlock = 0;
509326938Sdim  }
510326938Sdim  Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;
511326938Sdim
512326938Sdim  // We now know whether all the uses of the CR logical are in the same block.
513326938Sdim  if (!Ret.IsNullary) {
514326938Sdim    Ret.ContainedInBlock &=
515326938Sdim      (MIParam.getParent() == Ret.TrueDefs.first->getParent());
516326938Sdim    if (Ret.IsBinary)
517326938Sdim      Ret.ContainedInBlock &=
518326938Sdim        (MIParam.getParent() == Ret.TrueDefs.second->getParent());
519326938Sdim  }
520341825Sdim  LLVM_DEBUG(Ret.dump());
521326938Sdim  if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
522326938Sdim    NumContainedSingleUseBinOps++;
523326938Sdim    if (Ret.FeedsBR && Ret.DefsSingleUse)
524326938Sdim      NumToSplitBlocks++;
525326938Sdim  }
526326938Sdim  return Ret;
527326938Sdim}
528326938Sdim
529341825Sdim/// Looks through a COPY instruction to the actual definition of the CR-bit
530326938Sdim/// register and returns the instruction that defines it.
531326938Sdim/// FIXME: This currently handles what is by-far the most common case:
532326938Sdim/// an instruction that defines a CR field followed by a single copy of a bit
533326938Sdim/// from that field into a virtual register. If chains of copies need to be
534326938Sdim/// handled, this should have a loop until a non-copy instruction is found.
535326938SdimMachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
536326938Sdim                                                     unsigned &Subreg,
537326938Sdim                                                     MachineInstr *&CpDef) {
538326938Sdim  Subreg = -1;
539360784Sdim  if (!Register::isVirtualRegister(Reg))
540326938Sdim    return nullptr;
541326938Sdim  MachineInstr *Copy = MRI->getVRegDef(Reg);
542326938Sdim  CpDef = Copy;
543326938Sdim  if (!Copy->isCopy())
544326938Sdim    return Copy;
545360784Sdim  Register CopySrc = Copy->getOperand(1).getReg();
546326938Sdim  Subreg = Copy->getOperand(1).getSubReg();
547360784Sdim  if (!Register::isVirtualRegister(CopySrc)) {
548326938Sdim    const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
549326938Sdim    // Set the Subreg
550326938Sdim    if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
551326938Sdim      Subreg = PPC::sub_eq;
552326938Sdim    if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT)
553326938Sdim      Subreg = PPC::sub_lt;
554326938Sdim    if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT)
555326938Sdim      Subreg = PPC::sub_gt;
556326938Sdim    if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN)
557326938Sdim      Subreg = PPC::sub_un;
558326938Sdim    // Loop backwards and return the first MI that modifies the physical CR Reg.
559326938Sdim    MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
560326938Sdim    while (Me != B)
561326938Sdim      if ((--Me)->modifiesRegister(CopySrc, TRI))
562326938Sdim        return &*Me;
563326938Sdim    return nullptr;
564326938Sdim  }
565326938Sdim  return MRI->getVRegDef(CopySrc);
566326938Sdim}
567326938Sdim
568326938Sdimvoid PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
569326938Sdim  MF = &MFParam;
570326938Sdim  MRI = &MF->getRegInfo();
571326938Sdim  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
572326938Sdim  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
573326938Sdim
574326938Sdim  AllCRLogicalOps.clear();
575326938Sdim}
576326938Sdim
577326938Sdim/// Contains all the implemented transformations on CR logical operations.
578326938Sdim/// For example, a binary CR logical can be used to split a block on its inputs,
579326938Sdim/// a unary CR logical might be used to change the condition code on a
580326938Sdim/// comparison feeding it. A nullary CR logical might simply be removable
581326938Sdim/// if the user of the bit it [un]sets can be transformed.
582360784Sdimbool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
583326938Sdim  // We can definitely split a block on the inputs to a binary CR operation
584326938Sdim  // whose defs and (single) use are within the same block.
585326938Sdim  bool Changed = false;
586360784Sdim  CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
587326938Sdim  if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
588326938Sdim      CRI.DefsSingleUse) {
589326938Sdim    Changed = splitBlockOnBinaryCROp(CRI);
590326938Sdim    if (Changed)
591326938Sdim      NumBlocksSplitOnBinaryCROp++;
592326938Sdim  }
593326938Sdim  return Changed;
594326938Sdim}
595326938Sdim
596326938Sdim/// Splits a block that contains a CR-logical operation that feeds a branch
597326938Sdim/// and whose operands are produced within the block.
598326938Sdim/// Example:
599326938Sdim///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
600326938Sdim///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
601326938Sdim///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
602326938Sdim///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
603326938Sdim///    %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
604326938Sdim///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
605326938Sdim/// Becomes:
606326938Sdim///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
607326938Sdim///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
608326938Sdim///    BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
609326938Sdim///
610326938Sdim///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
611326938Sdim///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
612326938Sdim///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
613326938Sdimbool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
614326938Sdim  if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
615341825Sdim    LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
616326938Sdim    NumNotSplitIdenticalOperands++;
617326938Sdim    return false;
618326938Sdim  }
619326938Sdim  if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
620326938Sdim      CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
621341825Sdim    LLVM_DEBUG(
622341825Sdim        dbgs() << "Unable to split because one of the operands is a PHI or "
623341825Sdim                  "chain of copies.\n");
624326938Sdim    NumNotSplitChainCopies++;
625326938Sdim    return false;
626326938Sdim  }
627326938Sdim  // Note: keep in sync with computeBranchTargetAndInversion().
628326938Sdim  if (CRI.MI->getOpcode() != PPC::CROR &&
629326938Sdim      CRI.MI->getOpcode() != PPC::CRAND &&
630326938Sdim      CRI.MI->getOpcode() != PPC::CRNOR &&
631326938Sdim      CRI.MI->getOpcode() != PPC::CRNAND &&
632326938Sdim      CRI.MI->getOpcode() != PPC::CRORC &&
633326938Sdim      CRI.MI->getOpcode() != PPC::CRANDC) {
634341825Sdim    LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
635326938Sdim    NumNotSplitWrongOpcode++;
636326938Sdim    return false;
637326938Sdim  }
638341825Sdim  LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
639326938Sdim  MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
640326938Sdim  MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;
641326938Sdim
642326938Sdim  bool UsingDef1 = false;
643326938Sdim  MachineInstr *SplitBefore = &*Def2It;
644326938Sdim  for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
645326938Sdim    if (Def1It == Def2It) { // Def2 comes before Def1.
646326938Sdim      SplitBefore = &*Def1It;
647326938Sdim      UsingDef1 = true;
648326938Sdim      break;
649326938Sdim    }
650326938Sdim  }
651326938Sdim
652341825Sdim  LLVM_DEBUG(dbgs() << "We will split the following block:\n";);
653341825Sdim  LLVM_DEBUG(CRI.MI->getParent()->dump());
654341825Sdim  LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());
655326938Sdim
656326938Sdim  // Get the branch instruction.
657326938Sdim  MachineInstr *Branch =
658326938Sdim    MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();
659326938Sdim
660326938Sdim  // We want the new block to have no code in it other than the definition
661326938Sdim  // of the input to the CR logical and the CR logical itself. So we move
662326938Sdim  // those to the bottom of the block (just before the branch). Then we
663326938Sdim  // will split before the CR logical.
664326938Sdim  MachineBasicBlock *MBB = SplitBefore->getParent();
665326938Sdim  auto FirstTerminator = MBB->getFirstTerminator();
666326938Sdim  MachineBasicBlock::iterator FirstInstrToMove =
667326938Sdim    UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
668326938Sdim  MachineBasicBlock::iterator SecondInstrToMove =
669326938Sdim    UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;
670326938Sdim
671326938Sdim  // The instructions that need to be moved are not guaranteed to be
672326938Sdim  // contiguous. Move them individually.
673326938Sdim  // FIXME: If one of the operands is a chain of (single use) copies, they
674326938Sdim  // can all be moved and we can still split.
675326938Sdim  MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
676326938Sdim  if (FirstInstrToMove != SecondInstrToMove)
677326938Sdim    MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
678326938Sdim  MBB->splice(FirstTerminator, MBB, CRI.MI);
679326938Sdim
680326938Sdim  unsigned Opc = CRI.MI->getOpcode();
681326938Sdim  bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
682326938Sdim  computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
683326938Sdim                                  InvertNewBranch, InvertOrigBranch,
684326938Sdim                                  TargetIsFallThrough);
685326938Sdim  MachineInstr *SplitCond =
686326938Sdim    UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first;
687341825Sdim  LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
688341825Sdim  LLVM_DEBUG(dbgs() << " the original branch and the target is the "
689341825Sdim                    << (TargetIsFallThrough ? "fallthrough block\n"
690341825Sdim                                            : "orig. target block\n"));
691341825Sdim  LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
692326938Sdim  BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch,
693326938Sdim    InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI,
694326938Sdim    UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second };
695326938Sdim  bool Changed = splitMBB(BSI);
696326938Sdim  // If we've split on a CR logical that is fed by a CR logical,
697326938Sdim  // recompute the source CR logical as it may be usable for splitting.
698326938Sdim  if (Changed) {
699326938Sdim    bool Input1CRlogical =
700326938Sdim      CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
701326938Sdim    bool Input2CRlogical =
702326938Sdim      CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
703326938Sdim    if (Input1CRlogical)
704326938Sdim      AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
705326938Sdim    if (Input2CRlogical)
706326938Sdim      AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
707326938Sdim  }
708326938Sdim  return Changed;
709326938Sdim}
710326938Sdim
711326938Sdimvoid PPCReduceCRLogicals::collectCRLogicals() {
712326938Sdim  for (MachineBasicBlock &MBB : *MF) {
713326938Sdim    for (MachineInstr &MI : MBB) {
714326938Sdim      if (isCRLogical(MI)) {
715326938Sdim        AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
716326938Sdim        TotalCRLogicals++;
717326938Sdim        if (AllCRLogicalOps.back().IsNullary)
718326938Sdim          TotalNullaryCRLogicals++;
719326938Sdim        else if (AllCRLogicalOps.back().IsBinary)
720326938Sdim          TotalBinaryCRLogicals++;
721326938Sdim        else
722326938Sdim          TotalUnaryCRLogicals++;
723326938Sdim      }
724326938Sdim    }
725326938Sdim  }
726326938Sdim}
727326938Sdim
728341825Sdim} // end anonymous namespace
729326938Sdim
730326938SdimINITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
731326938Sdim                      "PowerPC Reduce CR logical Operation", false, false)
732326938SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
733326938SdimINITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
734326938Sdim                    "PowerPC Reduce CR logical Operation", false, false)
735326938Sdim
736326938Sdimchar PPCReduceCRLogicals::ID = 0;
737326938SdimFunctionPass*
738326938Sdimllvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }
739