1//===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Uses profile information to split out cold blocks.
11//
12// This pass splits out cold machine basic blocks from the parent function. This
13// implementation leverages the basic block section framework. Blocks marked
14// cold by this pass are grouped together in a separate section prefixed with
15// ".text.unlikely.*". The linker can then group these together as a cold
16// section. The split part of the function is a contiguous region identified by
17// the symbol "foo.cold". Grouping all cold blocks across functions together
18// decreases fragmentation and improves icache and itlb utilization. Note that
19// the overall changes to the binary size are negligible; only a small number of
20// additional jump instructions may be introduced.
21//
22// For the original RFC of this pass please see
23// https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24//===----------------------------------------------------------------------===//
25
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/Analysis/ProfileSummaryInfo.h"
28#include "llvm/CodeGen/BasicBlockSectionUtils.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineFunctionPass.h"
33#include "llvm/CodeGen/MachineModuleInfo.h"
34#include "llvm/CodeGen/Passes.h"
35#include "llvm/IR/Function.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/Support/CommandLine.h"
38#include <optional>
39
40using namespace llvm;
41
42// FIXME: This cutoff value is CPU dependent and should be moved to
43// TargetTransformInfo once we consider enabling this on other platforms.
44// The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
45// Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
46// The default was empirically determined to be optimal when considering cutoff
47// values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
48// Intel CPUs.
49static cl::opt<unsigned>
50    PercentileCutoff("mfs-psi-cutoff",
51                     cl::desc("Percentile profile summary cutoff used to "
52                              "determine cold blocks. Unused if set to zero."),
53                     cl::init(999950), cl::Hidden);
54
55static cl::opt<unsigned> ColdCountThreshold(
56    "mfs-count-threshold",
57    cl::desc(
58        "Minimum number of times a block must be executed to be retained."),
59    cl::init(1), cl::Hidden);
60
61static cl::opt<bool> SplitAllEHCode(
62    "mfs-split-ehcode",
63    cl::desc("Splits all EH code and it's descendants by default."),
64    cl::init(false), cl::Hidden);
65
66namespace {
67
68class MachineFunctionSplitter : public MachineFunctionPass {
69public:
70  static char ID;
71  MachineFunctionSplitter() : MachineFunctionPass(ID) {
72    initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
73  }
74
75  StringRef getPassName() const override {
76    return "Machine Function Splitter Transformation";
77  }
78
79  void getAnalysisUsage(AnalysisUsage &AU) const override;
80
81  bool runOnMachineFunction(MachineFunction &F) override;
82};
83} // end anonymous namespace
84
85/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
86/// only by EH pad as cold. This will help mark EH pads statically cold instead
87/// of relying on profile data.
88static void
89setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks,
90                          MachineFunction &MF) {
91  MachineBasicBlock *StartBlock = &MF.front();
92  // A block can be unknown if its not reachable from anywhere
93  // EH if its only reachable from start blocks via some path through EH pads
94  // NonEH if it's reachable from Non EH blocks as well.
95  enum Status { Unknown = 0, EH = 1, NonEH = 2 };
96  DenseSet<MachineBasicBlock *> WorkList;
97  DenseMap<MachineBasicBlock *, Status> Statuses;
98
99  auto getStatus = [&](MachineBasicBlock *MBB) {
100    if (Statuses.find(MBB) != Statuses.end())
101      return Statuses[MBB];
102    else
103      return Unknown;
104  };
105
106  auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
107    for (auto *PredMBB : MBB->predecessors()) {
108      Status PredStatus = getStatus(PredMBB);
109      // If status of predecessor block has gone above current block
110      // we update current blocks status.
111      if (PredStatus > Stat)
112        Stat = PredStatus;
113    }
114    return Stat;
115  };
116
117  auto addSuccesors = [&](MachineBasicBlock *MBB) {
118    for (auto *SuccMBB : MBB->successors()) {
119      if (!SuccMBB->isEHPad())
120        WorkList.insert(SuccMBB);
121    }
122  };
123
124  // Insert the successors of start block
125  // and landing pads successor.
126  Statuses[StartBlock] = NonEH;
127  addSuccesors(StartBlock);
128  for (auto *LP : EHBlocks) {
129    addSuccesors(LP);
130    Statuses[LP] = EH;
131  }
132
133  // Worklist iterative algorithm.
134  while (!WorkList.empty()) {
135    auto *MBB = *WorkList.begin();
136    WorkList.erase(MBB);
137
138    Status OldStatus = getStatus(MBB);
139
140    // Check on predecessors and check for
141    // Status update.
142    Status NewStatus = checkPredecessors(MBB, OldStatus);
143
144    // Did the block status change?
145    bool changed = OldStatus != NewStatus;
146    if (changed) {
147      addSuccesors(MBB);
148      Statuses[MBB] = NewStatus;
149    }
150  }
151
152  for (auto Entry : Statuses) {
153    if (Entry.second == EH)
154      Entry.first->setSectionID(MBBSectionID::ColdSectionID);
155  }
156}
157
158static bool isColdBlock(const MachineBasicBlock &MBB,
159                        const MachineBlockFrequencyInfo *MBFI,
160                        ProfileSummaryInfo *PSI) {
161  std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
162  if (!Count)
163    return true;
164
165  if (PercentileCutoff > 0) {
166    return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
167  }
168  return (*Count < ColdCountThreshold);
169}
170
171bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
172  // We target functions with profile data. Static information in the form
173  // of exception handling code may be split to cold if user passes the
174  // mfs-split-ehcode flag.
175  bool UseProfileData = MF.getFunction().hasProfileData();
176  if (!UseProfileData && !SplitAllEHCode)
177    return false;
178
179  // TODO: We don't split functions where a section attribute has been set
180  // since the split part may not be placed in a contiguous region. It may also
181  // be more beneficial to augment the linker to ensure contiguous layout of
182  // split functions within the same section as specified by the attribute.
183  if (MF.getFunction().hasSection() ||
184      MF.getFunction().hasFnAttribute("implicit-section-name"))
185    return false;
186
187  // We don't want to proceed further for cold functions
188  // or functions of unknown hotness. Lukewarm functions have no prefix.
189  std::optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
190  if (SectionPrefix &&
191      (*SectionPrefix == "unlikely" || *SectionPrefix == "unknown")) {
192    return false;
193  }
194
195  // Renumbering blocks here preserves the order of the blocks as
196  // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
197  // blocks. Preserving the order of blocks is essential to retaining decisions
198  // made by prior passes such as MachineBlockPlacement.
199  MF.RenumberBlocks();
200  MF.setBBSectionsType(BasicBlockSection::Preset);
201
202  MachineBlockFrequencyInfo *MBFI = nullptr;
203  ProfileSummaryInfo *PSI = nullptr;
204  if (UseProfileData) {
205    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
206    PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
207  }
208
209  SmallVector<MachineBasicBlock *, 2> LandingPads;
210  for (auto &MBB : MF) {
211    if (MBB.isEntryBlock())
212      continue;
213
214    if (MBB.isEHPad())
215      LandingPads.push_back(&MBB);
216    else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
217      MBB.setSectionID(MBBSectionID::ColdSectionID);
218  }
219
220  // Split all EH code and it's descendant statically by default.
221  if (SplitAllEHCode)
222    setDescendantEHBlocksCold(LandingPads, MF);
223  // We only split out eh pads if all of them are cold.
224  else {
225    bool HasHotLandingPads = false;
226    for (const MachineBasicBlock *LP : LandingPads) {
227      if (!isColdBlock(*LP, MBFI, PSI))
228        HasHotLandingPads = true;
229    }
230    if (!HasHotLandingPads) {
231      for (MachineBasicBlock *LP : LandingPads)
232        LP->setSectionID(MBBSectionID::ColdSectionID);
233    }
234  }
235  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
236    return X.getSectionID().Type < Y.getSectionID().Type;
237  };
238  llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
239  llvm::avoidZeroOffsetLandingPad(MF);
240  return true;
241}
242
243void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
244  AU.addRequired<MachineModuleInfoWrapperPass>();
245  AU.addRequired<MachineBlockFrequencyInfo>();
246  AU.addRequired<ProfileSummaryInfoWrapperPass>();
247}
248
249char MachineFunctionSplitter::ID = 0;
250INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
251                "Split machine functions using profile information", false,
252                false)
253
254MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
255  return new MachineFunctionSplitter();
256}
257