1//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass optmizes the s_cbranch_execz instructions.
11/// The pass removes this skip instruction for short branches,
12/// if there is no unwanted sideeffect in the fallthrough code sequence.
13///
14//===----------------------------------------------------------------------===//
15
16#include "AMDGPU.h"
17#include "AMDGPUSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIInstrInfo.h"
20#include "llvm/CodeGen/MachineFunctionPass.h"
21#include "llvm/Support/CommandLine.h"
22
23using namespace llvm;
24
25#define DEBUG_TYPE "si-remove-short-exec-branches"
26
27static unsigned SkipThreshold;
28
29static cl::opt<unsigned, true> SkipThresholdFlag(
30    "amdgpu-skip-threshold", cl::Hidden,
31    cl::desc(
32        "Number of instructions before jumping over divergent control flow"),
33    cl::location(SkipThreshold), cl::init(12));
34
35namespace {
36
37class SIRemoveShortExecBranches : public MachineFunctionPass {
38private:
39  const SIInstrInfo *TII = nullptr;
40  bool getBlockDestinations(MachineBasicBlock &SrcMBB,
41                            MachineBasicBlock *&TrueMBB,
42                            MachineBasicBlock *&FalseMBB,
43                            SmallVectorImpl<MachineOperand> &Cond);
44  bool mustRetainExeczBranch(const MachineBasicBlock &From,
45                             const MachineBasicBlock &To) const;
46  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
47
48public:
49  static char ID;
50
51  SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
52    initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
53  }
54
55  bool runOnMachineFunction(MachineFunction &MF) override;
56};
57
58} // End anonymous namespace.
59
60INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
61                "SI remove short exec branches", false, false)
62
63char SIRemoveShortExecBranches::ID = 0;
64
65char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
66
67bool SIRemoveShortExecBranches::getBlockDestinations(
68    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
69    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
70  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
71    return false;
72
73  if (!FalseMBB)
74    FalseMBB = SrcMBB.getNextNode();
75
76  return true;
77}
78
79bool SIRemoveShortExecBranches::mustRetainExeczBranch(
80    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
81  unsigned NumInstr = 0;
82  const MachineFunction *MF = From.getParent();
83
84  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
85       MBBI != End && MBBI != ToI; ++MBBI) {
86    const MachineBasicBlock &MBB = *MBBI;
87
88    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
89         I != E; ++I) {
90      // When a uniform loop is inside non-uniform control flow, the branch
91      // leaving the loop might never be taken when EXEC = 0.
92      // Hence we should retain cbranch out of the loop lest it become infinite.
93      if (I->isConditionalBranch())
94        return true;
95
96      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
97        return true;
98
99      if (TII->isKillTerminator(I->getOpcode()))
100        return true;
101
102      // These instructions are potentially expensive even if EXEC = 0.
103      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
104          I->getOpcode() == AMDGPU::S_WAITCNT)
105        return true;
106
107      ++NumInstr;
108      if (NumInstr >= SkipThreshold)
109        return true;
110    }
111  }
112
113  return false;
114}
115
116// Returns true if the skip branch instruction is removed.
117bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
118                                                  MachineBasicBlock &SrcMBB) {
119  MachineBasicBlock *TrueMBB = nullptr;
120  MachineBasicBlock *FalseMBB = nullptr;
121  SmallVector<MachineOperand, 1> Cond;
122
123  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
124    return false;
125
126  // Consider only the forward branches.
127  if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
128      mustRetainExeczBranch(*FalseMBB, *TrueMBB))
129    return false;
130
131  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
132  MI.eraseFromParent();
133  SrcMBB.removeSuccessor(TrueMBB);
134
135  return true;
136}
137
138bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
139  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
140  TII = ST.getInstrInfo();
141  MF.RenumberBlocks();
142  bool Changed = false;
143
144  for (MachineBasicBlock &MBB : MF) {
145    MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
146    if (MBBI == MBB.end())
147      continue;
148
149    MachineInstr &MI = *MBBI;
150    switch (MI.getOpcode()) {
151    case AMDGPU::S_CBRANCH_EXECZ:
152      Changed = removeExeczBranch(MI, MBB);
153      break;
154    default:
155      break;
156    }
157  }
158
159  return Changed;
160}
161