1//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief This pass propagates attributes from kernels to the non-entry
11/// functions. Most of the library functions were not compiled for specific ABI,
12/// yet will be correctly compiled if proper attrbutes are propagated from the
13/// caller.
14///
15/// The pass analyzes call graph and propagates ABI target features through the
16/// call graph.
17///
18/// It can run in two modes: as a function or module pass. A function pass
19/// simply propagates attributes. A module pass clones functions if there are
20/// callers with different ABI. If a function is clonned all call sites will
21/// be updated to use a correct clone.
22///
23/// A function pass is limited in functionality but can run early in the
24/// pipeline. A module pass is more powerful but has to run late, so misses
25/// library folding opportunities.
26//
27//===----------------------------------------------------------------------===//
28
29#include "AMDGPU.h"
30#include "AMDGPUSubtarget.h"
31#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32#include "Utils/AMDGPUBaseInfo.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/Module.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/Utils/Cloning.h"
39#include <string>
40
41#define DEBUG_TYPE "amdgpu-propagate-attributes"
42
43using namespace llvm;
44
45namespace llvm {
46extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47}
48
49namespace {
50
51class AMDGPUPropagateAttributes {
52  const FeatureBitset TargetFeatures = {
53    AMDGPU::FeatureWavefrontSize16,
54    AMDGPU::FeatureWavefrontSize32,
55    AMDGPU::FeatureWavefrontSize64
56  };
57
58  class Clone{
59  public:
60    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62
63    FeatureBitset FeatureMask;
64    Function *OrigF;
65    Function *NewF;
66  };
67
68  const TargetMachine *TM;
69
70  // Clone functions as needed or just set attributes.
71  bool AllowClone;
72
73  // Option propagation roots.
74  SmallSet<Function *, 32> Roots;
75
76  // Clones of functions with their attributes.
77  SmallVector<Clone, 32> Clones;
78
79  // Find a clone with required features.
80  Function *findFunction(const FeatureBitset &FeaturesNeeded,
81                         Function *OrigF);
82
83  // Clone function F and set NewFeatures on the clone.
84  // Cole takes the name of original function.
85  Function *cloneWithFeatures(Function &F,
86                              const FeatureBitset &NewFeatures);
87
88  // Set new function's features in place.
89  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90
91  std::string getFeatureString(const FeatureBitset &Features) const;
92
93  // Propagate attributes from Roots.
94  bool process();
95
96public:
97  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98    TM(TM), AllowClone(AllowClone) {}
99
100  // Use F as a root and propagate its attributes.
101  bool process(Function &F);
102
103  // Propagate attributes starting from kernel functions.
104  bool process(Module &M);
105};
106
107// Allows to propagate attributes early, but no clonning is allowed as it must
108// be a function pass to run before any optimizations.
109// TODO: We shall only need a one instance of module pass, but that needs to be
110// in the linker pipeline which is currently not possible.
111class AMDGPUPropagateAttributesEarly : public FunctionPass {
112  const TargetMachine *TM;
113
114public:
115  static char ID; // Pass identification
116
117  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118    FunctionPass(ID), TM(TM) {
119    initializeAMDGPUPropagateAttributesEarlyPass(
120      *PassRegistry::getPassRegistry());
121  }
122
123  bool runOnFunction(Function &F) override;
124};
125
126// Allows to propagate attributes with clonning but does that late in the
127// pipeline.
128class AMDGPUPropagateAttributesLate : public ModulePass {
129  const TargetMachine *TM;
130
131public:
132  static char ID; // Pass identification
133
134  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135    ModulePass(ID), TM(TM) {
136    initializeAMDGPUPropagateAttributesLatePass(
137      *PassRegistry::getPassRegistry());
138  }
139
140  bool runOnModule(Module &M) override;
141};
142
143}  // end anonymous namespace.
144
145char AMDGPUPropagateAttributesEarly::ID = 0;
146char AMDGPUPropagateAttributesLate::ID = 0;
147
148INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149                "amdgpu-propagate-attributes-early",
150                "Early propagate attributes from kernels to functions",
151                false, false)
152INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153                "amdgpu-propagate-attributes-late",
154                "Late propagate attributes from kernels to functions",
155                false, false)
156
157Function *
158AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159                                        Function *OrigF) {
160  // TODO: search for clone's clones.
161  for (Clone &C : Clones)
162    if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
163      return C.NewF;
164
165  return nullptr;
166}
167
168bool AMDGPUPropagateAttributes::process(Module &M) {
169  for (auto &F : M.functions())
170    if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171      Roots.insert(&F);
172
173  return process();
174}
175
176bool AMDGPUPropagateAttributes::process(Function &F) {
177  Roots.insert(&F);
178  return process();
179}
180
181bool AMDGPUPropagateAttributes::process() {
182  bool Changed = false;
183  SmallSet<Function *, 32> NewRoots;
184  SmallSet<Function *, 32> Replaced;
185
186  if (Roots.empty())
187    return false;
188  Module &M = *(*Roots.begin())->getParent();
189
190  do {
191    Roots.insert(NewRoots.begin(), NewRoots.end());
192    NewRoots.clear();
193
194    for (auto &F : M.functions()) {
195      if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
196        continue;
197
198      const FeatureBitset &CalleeBits =
199        TM->getSubtargetImpl(F)->getFeatureBits();
200      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
201
202      for (User *U : F.users()) {
203        Instruction *I = dyn_cast<Instruction>(U);
204        if (!I)
205          continue;
206        CallBase *CI = dyn_cast<CallBase>(I);
207        if (!CI)
208          continue;
209        Function *Caller = CI->getCaller();
210        if (!Caller)
211          continue;
212        if (!Roots.count(Caller))
213          continue;
214
215        const FeatureBitset &CallerBits =
216          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
217
218        if (CallerBits == (CalleeBits  & TargetFeatures)) {
219          NewRoots.insert(&F);
220          continue;
221        }
222
223        Function *NewF = findFunction(CallerBits, &F);
224        if (!NewF) {
225          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
226                                    CallerBits);
227          if (!AllowClone) {
228            // This may set different features on different iteartions if
229            // there is a contradiction in callers' attributes. In this case
230            // we rely on a second pass running on Module, which is allowed
231            // to clone.
232            setFeatures(F, NewFeatures);
233            NewRoots.insert(&F);
234            Changed = true;
235            break;
236          }
237
238          NewF = cloneWithFeatures(F, NewFeatures);
239          Clones.push_back(Clone(CallerBits, &F, NewF));
240          NewRoots.insert(NewF);
241        }
242
243        ToReplace.push_back(std::make_pair(CI, NewF));
244        Replaced.insert(&F);
245
246        Changed = true;
247      }
248
249      while (!ToReplace.empty()) {
250        auto R = ToReplace.pop_back_val();
251        R.first->setCalledFunction(R.second);
252      }
253    }
254  } while (!NewRoots.empty());
255
256  for (Function *F : Replaced) {
257    if (F->use_empty())
258      F->eraseFromParent();
259  }
260
261  return Changed;
262}
263
264Function *
265AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
266                                             const FeatureBitset &NewFeatures) {
267  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
268
269  ValueToValueMapTy dummy;
270  Function *NewF = CloneFunction(&F, dummy);
271  setFeatures(*NewF, NewFeatures);
272
273  // Swap names. If that is the only clone it will retain the name of now
274  // dead value.
275  if (F.hasName()) {
276    std::string NewName = NewF->getName();
277    NewF->takeName(&F);
278    F.setName(NewName);
279
280    // Name has changed, it does not need an external symbol.
281    F.setVisibility(GlobalValue::DefaultVisibility);
282    F.setLinkage(GlobalValue::InternalLinkage);
283  }
284
285  return NewF;
286}
287
288void AMDGPUPropagateAttributes::setFeatures(Function &F,
289                                            const FeatureBitset &NewFeatures) {
290  std::string NewFeatureStr = getFeatureString(NewFeatures);
291
292  LLVM_DEBUG(dbgs() << "Set features "
293                    << getFeatureString(NewFeatures & TargetFeatures)
294                    << " on " << F.getName() << '\n');
295
296  F.removeFnAttr("target-features");
297  F.addFnAttr("target-features", NewFeatureStr);
298}
299
300std::string
301AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
302{
303  std::string Ret;
304  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
305    if (Features[KV.Value])
306      Ret += (StringRef("+") + KV.Key + ",").str();
307    else if (TargetFeatures[KV.Value])
308      Ret += (StringRef("-") + KV.Key + ",").str();
309  }
310  Ret.pop_back(); // Remove last comma.
311  return Ret;
312}
313
314bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
315  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
316    return false;
317
318  return AMDGPUPropagateAttributes(TM, false).process(F);
319}
320
321bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
322  if (!TM)
323    return false;
324
325  return AMDGPUPropagateAttributes(TM, true).process(M);
326}
327
328FunctionPass
329*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
330  return new AMDGPUPropagateAttributesEarly(TM);
331}
332
333ModulePass
334*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
335  return new AMDGPUPropagateAttributesLate(TM);
336}
337