1//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9/// \file 10/// \brief This pass propagates attributes from kernels to the non-entry 11/// functions. Most of the library functions were not compiled for specific ABI, 12/// yet will be correctly compiled if proper attrbutes are propagated from the 13/// caller. 14/// 15/// The pass analyzes call graph and propagates ABI target features through the 16/// call graph. 17/// 18/// It can run in two modes: as a function or module pass. A function pass 19/// simply propagates attributes. A module pass clones functions if there are 20/// callers with different ABI. If a function is clonned all call sites will 21/// be updated to use a correct clone. 22/// 23/// A function pass is limited in functionality but can run early in the 24/// pipeline. A module pass is more powerful but has to run late, so misses 25/// library folding opportunities. 26// 27//===----------------------------------------------------------------------===// 28 29#include "AMDGPU.h" 30#include "AMDGPUSubtarget.h" 31#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 32#include "Utils/AMDGPUBaseInfo.h" 33#include "llvm/ADT/SmallSet.h" 34#include "llvm/ADT/SmallVector.h" 35#include "llvm/IR/Function.h" 36#include "llvm/IR/Module.h" 37#include "llvm/Target/TargetMachine.h" 38#include "llvm/Transforms/Utils/Cloning.h" 39#include <string> 40 41#define DEBUG_TYPE "amdgpu-propagate-attributes" 42 43using namespace llvm; 44 45namespace llvm { 46extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 47} 48 49namespace { 50 51class AMDGPUPropagateAttributes { 52 const FeatureBitset TargetFeatures = { 53 AMDGPU::FeatureWavefrontSize16, 54 AMDGPU::FeatureWavefrontSize32, 55 AMDGPU::FeatureWavefrontSize64 56 }; 57 58 class Clone{ 59 public: 60 Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : 61 FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} 62 63 FeatureBitset FeatureMask; 64 Function *OrigF; 65 Function *NewF; 66 }; 67 68 const TargetMachine *TM; 69 70 // Clone functions as needed or just set attributes. 71 bool AllowClone; 72 73 // Option propagation roots. 74 SmallSet<Function *, 32> Roots; 75 76 // Clones of functions with their attributes. 77 SmallVector<Clone, 32> Clones; 78 79 // Find a clone with required features. 80 Function *findFunction(const FeatureBitset &FeaturesNeeded, 81 Function *OrigF); 82 83 // Clone function F and set NewFeatures on the clone. 84 // Cole takes the name of original function. 85 Function *cloneWithFeatures(Function &F, 86 const FeatureBitset &NewFeatures); 87 88 // Set new function's features in place. 89 void setFeatures(Function &F, const FeatureBitset &NewFeatures); 90 91 std::string getFeatureString(const FeatureBitset &Features) const; 92 93 // Propagate attributes from Roots. 94 bool process(); 95 96public: 97 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 98 TM(TM), AllowClone(AllowClone) {} 99 100 // Use F as a root and propagate its attributes. 101 bool process(Function &F); 102 103 // Propagate attributes starting from kernel functions. 104 bool process(Module &M); 105}; 106 107// Allows to propagate attributes early, but no clonning is allowed as it must 108// be a function pass to run before any optimizations. 109// TODO: We shall only need a one instance of module pass, but that needs to be 110// in the linker pipeline which is currently not possible. 111class AMDGPUPropagateAttributesEarly : public FunctionPass { 112 const TargetMachine *TM; 113 114public: 115 static char ID; // Pass identification 116 117 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 118 FunctionPass(ID), TM(TM) { 119 initializeAMDGPUPropagateAttributesEarlyPass( 120 *PassRegistry::getPassRegistry()); 121 } 122 123 bool runOnFunction(Function &F) override; 124}; 125 126// Allows to propagate attributes with clonning but does that late in the 127// pipeline. 128class AMDGPUPropagateAttributesLate : public ModulePass { 129 const TargetMachine *TM; 130 131public: 132 static char ID; // Pass identification 133 134 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 135 ModulePass(ID), TM(TM) { 136 initializeAMDGPUPropagateAttributesLatePass( 137 *PassRegistry::getPassRegistry()); 138 } 139 140 bool runOnModule(Module &M) override; 141}; 142 143} // end anonymous namespace. 144 145char AMDGPUPropagateAttributesEarly::ID = 0; 146char AMDGPUPropagateAttributesLate::ID = 0; 147 148INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 149 "amdgpu-propagate-attributes-early", 150 "Early propagate attributes from kernels to functions", 151 false, false) 152INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 153 "amdgpu-propagate-attributes-late", 154 "Late propagate attributes from kernels to functions", 155 false, false) 156 157Function * 158AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, 159 Function *OrigF) { 160 // TODO: search for clone's clones. 161 for (Clone &C : Clones) 162 if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) 163 return C.NewF; 164 165 return nullptr; 166} 167 168bool AMDGPUPropagateAttributes::process(Module &M) { 169 for (auto &F : M.functions()) 170 if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 171 Roots.insert(&F); 172 173 return process(); 174} 175 176bool AMDGPUPropagateAttributes::process(Function &F) { 177 Roots.insert(&F); 178 return process(); 179} 180 181bool AMDGPUPropagateAttributes::process() { 182 bool Changed = false; 183 SmallSet<Function *, 32> NewRoots; 184 SmallSet<Function *, 32> Replaced; 185 186 if (Roots.empty()) 187 return false; 188 Module &M = *(*Roots.begin())->getParent(); 189 190 do { 191 Roots.insert(NewRoots.begin(), NewRoots.end()); 192 NewRoots.clear(); 193 194 for (auto &F : M.functions()) { 195 if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) 196 continue; 197 198 const FeatureBitset &CalleeBits = 199 TM->getSubtargetImpl(F)->getFeatureBits(); 200 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 201 202 for (User *U : F.users()) { 203 Instruction *I = dyn_cast<Instruction>(U); 204 if (!I) 205 continue; 206 CallBase *CI = dyn_cast<CallBase>(I); 207 if (!CI) 208 continue; 209 Function *Caller = CI->getCaller(); 210 if (!Caller) 211 continue; 212 if (!Roots.count(Caller)) 213 continue; 214 215 const FeatureBitset &CallerBits = 216 TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; 217 218 if (CallerBits == (CalleeBits & TargetFeatures)) { 219 NewRoots.insert(&F); 220 continue; 221 } 222 223 Function *NewF = findFunction(CallerBits, &F); 224 if (!NewF) { 225 FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | 226 CallerBits); 227 if (!AllowClone) { 228 // This may set different features on different iteartions if 229 // there is a contradiction in callers' attributes. In this case 230 // we rely on a second pass running on Module, which is allowed 231 // to clone. 232 setFeatures(F, NewFeatures); 233 NewRoots.insert(&F); 234 Changed = true; 235 break; 236 } 237 238 NewF = cloneWithFeatures(F, NewFeatures); 239 Clones.push_back(Clone(CallerBits, &F, NewF)); 240 NewRoots.insert(NewF); 241 } 242 243 ToReplace.push_back(std::make_pair(CI, NewF)); 244 Replaced.insert(&F); 245 246 Changed = true; 247 } 248 249 while (!ToReplace.empty()) { 250 auto R = ToReplace.pop_back_val(); 251 R.first->setCalledFunction(R.second); 252 } 253 } 254 } while (!NewRoots.empty()); 255 256 for (Function *F : Replaced) { 257 if (F->use_empty()) 258 F->eraseFromParent(); 259 } 260 261 return Changed; 262} 263 264Function * 265AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, 266 const FeatureBitset &NewFeatures) { 267 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 268 269 ValueToValueMapTy dummy; 270 Function *NewF = CloneFunction(&F, dummy); 271 setFeatures(*NewF, NewFeatures); 272 273 // Swap names. If that is the only clone it will retain the name of now 274 // dead value. 275 if (F.hasName()) { 276 std::string NewName = NewF->getName(); 277 NewF->takeName(&F); 278 F.setName(NewName); 279 280 // Name has changed, it does not need an external symbol. 281 F.setVisibility(GlobalValue::DefaultVisibility); 282 F.setLinkage(GlobalValue::InternalLinkage); 283 } 284 285 return NewF; 286} 287 288void AMDGPUPropagateAttributes::setFeatures(Function &F, 289 const FeatureBitset &NewFeatures) { 290 std::string NewFeatureStr = getFeatureString(NewFeatures); 291 292 LLVM_DEBUG(dbgs() << "Set features " 293 << getFeatureString(NewFeatures & TargetFeatures) 294 << " on " << F.getName() << '\n'); 295 296 F.removeFnAttr("target-features"); 297 F.addFnAttr("target-features", NewFeatureStr); 298} 299 300std::string 301AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 302{ 303 std::string Ret; 304 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 305 if (Features[KV.Value]) 306 Ret += (StringRef("+") + KV.Key + ",").str(); 307 else if (TargetFeatures[KV.Value]) 308 Ret += (StringRef("-") + KV.Key + ",").str(); 309 } 310 Ret.pop_back(); // Remove last comma. 311 return Ret; 312} 313 314bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 315 if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 316 return false; 317 318 return AMDGPUPropagateAttributes(TM, false).process(F); 319} 320 321bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 322 if (!TM) 323 return false; 324 325 return AMDGPUPropagateAttributes(TM, true).process(M); 326} 327 328FunctionPass 329*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 330 return new AMDGPUPropagateAttributesEarly(TM); 331} 332 333ModulePass 334*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 335 return new AMDGPUPropagateAttributesLate(TM); 336} 337