1//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements feature and label extraction for offline supervised learning
10// of a IR to native size model.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
14
15#ifdef LLVM_HAVE_TFLITE
16#include "llvm/Analysis/Utils/TFUtils.h"
17#endif
18#include "llvm/IR/Function.h"
19#include "llvm/IR/PassManager.h"
20#include "llvm/Support/raw_ostream.h"
21
22using namespace llvm;
23
24AnalysisKey InlineSizeEstimatorAnalysis::Key;
25
26#ifdef LLVM_HAVE_TFLITE
27#include "llvm/Analysis/LoopInfo.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/TargetTransformInfo.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/MC/MCAsmLayout.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/CommandLine.h"
36#include <algorithm>
37#include <deque>
38#include <optional>
39
40cl::opt<std::string> TFIR2NativeModelPath(
41    "ml-inliner-ir2native-model", cl::Hidden,
42    cl::desc("Path to saved model evaluating native size from IR."));
43
44#define DEBUG_TYPE "inline-size-estimator"
45namespace {
46unsigned getMaxInstructionID() {
47#define LAST_OTHER_INST(NR) return NR;
48#include "llvm/IR/Instruction.def"
49}
50
51class IRToNativeSizeLearning {
52public:
53  enum class NamedFeatureIndex : size_t {
54    InitialSize,
55    Blocks,
56    Calls,
57    IsLocal,
58    IsLinkOnceODR,
59    IsLinkOnce,
60    Loops,
61    MaxLoopDepth,
62    MaxDomTreeLevel,
63
64    NumNamedFeatures
65  };
66  static const size_t NumNamedFeatures =
67      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
68  struct FunctionFeatures {
69    static const size_t FeatureCount;
70
71    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
72    std::vector<int32_t> InstructionHistogram;
73    std::vector<int32_t> InstructionPairHistogram;
74
75    void fillTensor(int32_t *Ptr) const;
76    int32_t &operator[](NamedFeatureIndex Pos) {
77      return NamedFeatures[static_cast<size_t>(Pos)];
78    }
79  };
80  IRToNativeSizeLearning() = default;
81
82  static FunctionFeatures getFunctionFeatures(Function &F,
83                                              FunctionAnalysisManager &FAM);
84};
85
86// This is a point in time - we determined including these pairs of
87// consecutive instructions (in the IR layout available at inline time) as
88// features improves the model performance. We want to move away from manual
89// feature selection.
90// The array is given in opcode pairs rather than labels because 1) labels
91// weren't readily available, and 2) the successions were hand - extracted.
92//
93// This array must be sorted.
94static const std::array<std::pair<size_t, size_t>, 137>
95    ImportantInstructionSuccessions{
96        {{1, 1},   {1, 4},   {1, 5},   {1, 7},   {1, 8},   {1, 9},   {1, 11},
97         {1, 12},  {1, 13},  {1, 14},  {1, 18},  {1, 20},  {1, 22},  {1, 24},
98         {1, 25},  {1, 26},  {1, 27},  {1, 28},  {1, 29},  {1, 30},  {1, 31},
99         {1, 32},  {1, 33},  {1, 34},  {1, 39},  {1, 40},  {1, 42},  {1, 45},
100         {2, 1},   {2, 2},   {2, 13},  {2, 28},  {2, 29},  {2, 32},  {2, 33},
101         {2, 34},  {2, 38},  {2, 48},  {2, 49},  {2, 53},  {2, 55},  {2, 56},
102         {13, 2},  {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27},
103         {28, 2},  {28, 48}, {28, 53}, {29, 2},  {29, 33}, {29, 56}, {31, 31},
104         {31, 33}, {31, 34}, {31, 49}, {32, 1},  {32, 2},  {32, 13}, {32, 15},
105         {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40},
106         {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1},  {33, 2},  {33, 32},
107         {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1},  {34, 2},
108         {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34},
109         {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2},  {48, 34}, {48, 56},
110         {49, 1},  {49, 2},  {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39},
111         {49, 49}, {49, 56}, {53, 1},  {53, 2},  {53, 28}, {53, 34}, {53, 53},
112         {53, 57}, {55, 1},  {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56},
113         {56, 1},  {56, 2},  {56, 7},  {56, 13}, {56, 32}, {56, 33}, {56, 34},
114         {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57},
115         {64, 1},  {64, 64}, {65, 1},  {65, 65}}};
116
117// We have: 9 calculated features (the features here); 1 feature for each
118// instruction opcode; and 1 feature for each manually-identified sequence.
119// For the latter 2, we build a histogram: we count the number of
120// occurrences of each instruction opcode or succession of instructions,
121// respectively.
122// Note that instruction opcodes start from 1. For convenience, we also have an
123// always 0 feature for the '0' opcode, hence the extra 1.
124const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
125    ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 +
126    IRToNativeSizeLearning::NumNamedFeatures;
127
128size_t getSize(Function &F, TargetTransformInfo &TTI) {
129  size_t Ret = 0;
130  for (const auto &BB : F)
131    for (const auto &I : BB)
132      Ret += *(TTI.getInstructionCost(
133          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue());
134  return Ret;
135}
136
137size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
138  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
139  return getSize(F, TTI);
140}
141
142unsigned getMaxDominatorTreeDepth(const Function &F,
143                                  const DominatorTree &Tree) {
144  unsigned Ret = 0;
145  for (const auto &BB : F)
146    if (const auto *TN = Tree.getNode(&BB))
147      Ret = std::max(Ret, TN->getLevel());
148  return Ret;
149}
150} // namespace
151
152IRToNativeSizeLearning::FunctionFeatures
153IRToNativeSizeLearning::getFunctionFeatures(Function &F,
154                                            FunctionAnalysisManager &FAM) {
155  assert(llvm::is_sorted(ImportantInstructionSuccessions) &&
156         "expected function features are sorted");
157
158  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
159  FunctionFeatures FF;
160  size_t InstrCount = getMaxInstructionID() + 1;
161  FF.InstructionHistogram.resize(InstrCount);
162
163  FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size());
164
165  int StartID = 0;
166  int LastID = StartID;
167  auto getPairIndex = [](size_t a, size_t b) {
168    auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b));
169    if (I == ImportantInstructionSuccessions.end())
170      return -1;
171    return static_cast<int>(
172        std::distance(ImportantInstructionSuccessions.begin(), I));
173  };
174
175  // We don't want debug calls, because they'd just add noise.
176  for (const auto &BB : F) {
177    for (const auto &I : BB.instructionsWithoutDebug()) {
178      auto ID = I.getOpcode();
179
180      ++FF.InstructionHistogram[ID];
181      int PairIndex = getPairIndex(LastID, ID);
182      if (PairIndex >= 0)
183        ++FF.InstructionPairHistogram[PairIndex];
184      LastID = ID;
185      if (isa<CallBase>(I))
186        ++FF[NamedFeatureIndex::Calls];
187    }
188  }
189
190  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
191  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
192  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
193  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
194  FF[NamedFeatureIndex::Blocks] = F.size();
195  auto &LI = FAM.getResult<LoopAnalysis>(F);
196  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
197  for (auto &L : LI)
198    FF[NamedFeatureIndex::MaxLoopDepth] =
199        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
200                 static_cast<int32_t>(L->getLoopDepth()));
201  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
202  return FF;
203}
204
205void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
206  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
207  Ptr += NamedFeatures.size();
208  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
209  Ptr += InstructionHistogram.size();
210  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
211            Ptr);
212}
213
214bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
215  return !TFIR2NativeModelPath.empty();
216}
217
218InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
219  if (!isEvaluatorRequested()) {
220    return;
221  }
222  std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>(
223      "serving_default_input_1",
224      {1, static_cast<int64_t>(
225              IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})};
226  std::vector<TensorSpec> OutputSpecs{
227      TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})};
228  Evaluator = std::make_unique<TFModelEvaluator>(
229      TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs);
230  if (!Evaluator || !Evaluator->isValid()) {
231    Evaluator.reset();
232    return;
233  }
234}
235
236InlineSizeEstimatorAnalysis::Result
237InlineSizeEstimatorAnalysis::run(const Function &F,
238                                 FunctionAnalysisManager &FAM) {
239  if (!Evaluator)
240    return std::nullopt;
241  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
242      const_cast<Function &>(F), FAM);
243  int32_t *V = Evaluator->getInput<int32_t>(0);
244  Features.fillTensor(V);
245  auto ER = Evaluator->evaluate();
246  if (!ER)
247    return std::nullopt;
248  float Ret = *ER->getTensorValue<float>(0);
249  if (Ret < 0.0)
250    Ret = 0.0;
251  return static_cast<size_t>(Ret);
252}
253
254InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
255InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
256    InlineSizeEstimatorAnalysis &&Other)
257    : Evaluator(std::move(Other.Evaluator)) {}
258
259#else
260namespace llvm {
261class TFModelEvaluator {};
262} // namespace llvm
263InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default;
264InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
265    InlineSizeEstimatorAnalysis &&) {}
266InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default;
267InlineSizeEstimatorAnalysis::Result
268InlineSizeEstimatorAnalysis::run(const Function &F,
269                                 FunctionAnalysisManager &FAM) {
270  return std::nullopt;
271}
272bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
273#endif
274
275PreservedAnalyses
276InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
277                                            FunctionAnalysisManager &AM) {
278  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
279     << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
280  return PreservedAnalyses::all();
281}
282