1//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// \brief This pass propagates attributes from kernels to the non-entry
11/// functions. Most of the library functions were not compiled for specific ABI,
12/// yet will be correctly compiled if proper attrbutes are propagated from the
13/// caller.
14///
15/// The pass analyzes call graph and propagates ABI target features through the
16/// call graph.
17///
18/// It can run in two modes: as a function or module pass. A function pass
19/// simply propagates attributes. A module pass clones functions if there are
20/// callers with different ABI. If a function is clonned all call sites will
21/// be updated to use a correct clone.
22///
23/// A function pass is limited in functionality but can run early in the
24/// pipeline. A module pass is more powerful but has to run late, so misses
25/// library folding opportunities.
26//
27//===----------------------------------------------------------------------===//
28
29#include "AMDGPU.h"
30#include "AMDGPUSubtarget.h"
31#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32#include "Utils/AMDGPUBaseInfo.h"
33#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/Module.h"
37#include "llvm/Target/TargetMachine.h"
38#include "llvm/Transforms/Utils/Cloning.h"
39#include <string>
40
41#define DEBUG_TYPE "amdgpu-propagate-attributes"
42
43using namespace llvm;
44
45namespace llvm {
46extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47}
48
49namespace {
50
51// Target features to propagate.
52static constexpr const FeatureBitset TargetFeatures = {
53  AMDGPU::FeatureWavefrontSize16,
54  AMDGPU::FeatureWavefrontSize32,
55  AMDGPU::FeatureWavefrontSize64
56};
57
58// Attributes to propagate.
59static constexpr const char* AttributeNames[] = {
60  "amdgpu-waves-per-eu"
61};
62
63static constexpr unsigned NumAttr =
64  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65
66class AMDGPUPropagateAttributes {
67
68  class FnProperties {
69  private:
70    explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71
72  public:
73    explicit FnProperties(const TargetMachine &TM, const Function &F) {
74      Features = TM.getSubtargetImpl(F)->getFeatureBits();
75
76      for (unsigned I = 0; I < NumAttr; ++I)
77        if (F.hasFnAttribute(AttributeNames[I]))
78          Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79    }
80
81    bool operator == (const FnProperties &Other) const {
82      if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83        return false;
84      for (unsigned I = 0; I < NumAttr; ++I)
85        if (Attributes[I] != Other.Attributes[I])
86          return false;
87      return true;
88    }
89
90    FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91      FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92      for (unsigned I = 0; I < NumAttr; ++I)
93        New.Attributes[I] = CallerProps.Attributes[I];
94      return New;
95    }
96
97    FeatureBitset Features;
98    Optional<Attribute> Attributes[NumAttr];
99  };
100
101  class Clone {
102  public:
103    Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104      Properties(Props), OrigF(OrigF), NewF(NewF) {}
105
106    FnProperties Properties;
107    Function *OrigF;
108    Function *NewF;
109  };
110
111  const TargetMachine *TM;
112
113  // Clone functions as needed or just set attributes.
114  bool AllowClone;
115
116  // Option propagation roots.
117  SmallSet<Function *, 32> Roots;
118
119  // Clones of functions with their attributes.
120  SmallVector<Clone, 32> Clones;
121
122  // Find a clone with required features.
123  Function *findFunction(const FnProperties &PropsNeeded,
124                         Function *OrigF);
125
126  // Clone function \p F and set \p NewProps on the clone.
127  // Cole takes the name of original function.
128  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129
130  // Set new function's features in place.
131  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132
133  // Set new function's attributes in place.
134  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135
136  std::string getFeatureString(const FeatureBitset &Features) const;
137
138  // Propagate attributes from Roots.
139  bool process();
140
141public:
142  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143    TM(TM), AllowClone(AllowClone) {}
144
145  // Use F as a root and propagate its attributes.
146  bool process(Function &F);
147
148  // Propagate attributes starting from kernel functions.
149  bool process(Module &M);
150};
151
152// Allows to propagate attributes early, but no clonning is allowed as it must
153// be a function pass to run before any optimizations.
154// TODO: We shall only need a one instance of module pass, but that needs to be
155// in the linker pipeline which is currently not possible.
156class AMDGPUPropagateAttributesEarly : public FunctionPass {
157  const TargetMachine *TM;
158
159public:
160  static char ID; // Pass identification
161
162  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163    FunctionPass(ID), TM(TM) {
164    initializeAMDGPUPropagateAttributesEarlyPass(
165      *PassRegistry::getPassRegistry());
166  }
167
168  bool runOnFunction(Function &F) override;
169};
170
171// Allows to propagate attributes with clonning but does that late in the
172// pipeline.
173class AMDGPUPropagateAttributesLate : public ModulePass {
174  const TargetMachine *TM;
175
176public:
177  static char ID; // Pass identification
178
179  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180    ModulePass(ID), TM(TM) {
181    initializeAMDGPUPropagateAttributesLatePass(
182      *PassRegistry::getPassRegistry());
183  }
184
185  bool runOnModule(Module &M) override;
186};
187
188}  // end anonymous namespace.
189
190char AMDGPUPropagateAttributesEarly::ID = 0;
191char AMDGPUPropagateAttributesLate::ID = 0;
192
193INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194                "amdgpu-propagate-attributes-early",
195                "Early propagate attributes from kernels to functions",
196                false, false)
197INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
198                "amdgpu-propagate-attributes-late",
199                "Late propagate attributes from kernels to functions",
200                false, false)
201
202Function *
203AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204                                        Function *OrigF) {
205  // TODO: search for clone's clones.
206  for (Clone &C : Clones)
207    if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208      return C.NewF;
209
210  return nullptr;
211}
212
213bool AMDGPUPropagateAttributes::process(Module &M) {
214  for (auto &F : M.functions())
215    if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
216      Roots.insert(&F);
217
218  return process();
219}
220
221bool AMDGPUPropagateAttributes::process(Function &F) {
222  Roots.insert(&F);
223  return process();
224}
225
226bool AMDGPUPropagateAttributes::process() {
227  bool Changed = false;
228  SmallSet<Function *, 32> NewRoots;
229  SmallSet<Function *, 32> Replaced;
230
231  if (Roots.empty())
232    return false;
233  Module &M = *(*Roots.begin())->getParent();
234
235  do {
236    Roots.insert(NewRoots.begin(), NewRoots.end());
237    NewRoots.clear();
238
239    for (auto &F : M.functions()) {
240      if (F.isDeclaration())
241        continue;
242
243      const FnProperties CalleeProps(*TM, F);
244      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
245      SmallSet<CallBase *, 32> Visited;
246
247      for (User *U : F.users()) {
248        Instruction *I = dyn_cast<Instruction>(U);
249        if (!I)
250          continue;
251        CallBase *CI = dyn_cast<CallBase>(I);
252        if (!CI)
253          continue;
254        Function *Caller = CI->getCaller();
255        if (!Caller || !Visited.insert(CI).second)
256          continue;
257        if (!Roots.count(Caller) && !NewRoots.count(Caller))
258          continue;
259
260        const FnProperties CallerProps(*TM, *Caller);
261
262        if (CalleeProps == CallerProps) {
263          if (!Roots.count(&F))
264            NewRoots.insert(&F);
265          continue;
266        }
267
268        Function *NewF = findFunction(CallerProps, &F);
269        if (!NewF) {
270          const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
271          if (!AllowClone) {
272            // This may set different features on different iteartions if
273            // there is a contradiction in callers' attributes. In this case
274            // we rely on a second pass running on Module, which is allowed
275            // to clone.
276            setFeatures(F, NewProps.Features);
277            setAttributes(F, NewProps.Attributes);
278            NewRoots.insert(&F);
279            Changed = true;
280            break;
281          }
282
283          NewF = cloneWithProperties(F, NewProps);
284          Clones.push_back(Clone(CallerProps, &F, NewF));
285          NewRoots.insert(NewF);
286        }
287
288        ToReplace.push_back(std::make_pair(CI, NewF));
289        Replaced.insert(&F);
290
291        Changed = true;
292      }
293
294      while (!ToReplace.empty()) {
295        auto R = ToReplace.pop_back_val();
296        R.first->setCalledFunction(R.second);
297      }
298    }
299  } while (!NewRoots.empty());
300
301  for (Function *F : Replaced) {
302    if (F->use_empty())
303      F->eraseFromParent();
304  }
305
306  Roots.clear();
307  Clones.clear();
308
309  return Changed;
310}
311
312Function *
313AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314                                               const FnProperties &NewProps) {
315  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
316
317  ValueToValueMapTy dummy;
318  Function *NewF = CloneFunction(&F, dummy);
319  setFeatures(*NewF, NewProps.Features);
320  setAttributes(*NewF, NewProps.Attributes);
321  NewF->setVisibility(GlobalValue::DefaultVisibility);
322  NewF->setLinkage(GlobalValue::InternalLinkage);
323
324  // Swap names. If that is the only clone it will retain the name of now
325  // dead value. Preserve original name for externally visible functions.
326  if (F.hasName() && F.hasLocalLinkage()) {
327    std::string NewName = std::string(NewF->getName());
328    NewF->takeName(&F);
329    F.setName(NewName);
330  }
331
332  return NewF;
333}
334
335void AMDGPUPropagateAttributes::setFeatures(Function &F,
336                                            const FeatureBitset &NewFeatures) {
337  std::string NewFeatureStr = getFeatureString(NewFeatures);
338
339  LLVM_DEBUG(dbgs() << "Set features "
340                    << getFeatureString(NewFeatures & TargetFeatures)
341                    << " on " << F.getName() << '\n');
342
343  F.removeFnAttr("target-features");
344  F.addFnAttr("target-features", NewFeatureStr);
345}
346
347void AMDGPUPropagateAttributes::setAttributes(Function &F,
348    const ArrayRef<Optional<Attribute>> NewAttrs) {
349  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350  for (unsigned I = 0; I < NumAttr; ++I) {
351    F.removeFnAttr(AttributeNames[I]);
352    if (NewAttrs[I]) {
353      LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354      F.addFnAttr(*NewAttrs[I]);
355    }
356  }
357}
358
359std::string
360AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
361{
362  std::string Ret;
363  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
364    if (Features[KV.Value])
365      Ret += (StringRef("+") + KV.Key + ",").str();
366    else if (TargetFeatures[KV.Value])
367      Ret += (StringRef("-") + KV.Key + ",").str();
368  }
369  Ret.pop_back(); // Remove last comma.
370  return Ret;
371}
372
373bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
374  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
375    return false;
376
377  return AMDGPUPropagateAttributes(TM, false).process(F);
378}
379
380bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
381  if (!TM)
382    return false;
383
384  return AMDGPUPropagateAttributes(TM, true).process(M);
385}
386
387FunctionPass
388*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
389  return new AMDGPUPropagateAttributesEarly(TM);
390}
391
392ModulePass
393*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
394  return new AMDGPUPropagateAttributesLate(TM);
395}
396