1292915Sdim//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// 2292915Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6292915Sdim// 7292915Sdim//===----------------------------------------------------------------------===// 8292915Sdim// 9292915Sdim/// \file 10292915Sdim/// This pass adds amdgpu.uniform metadata to IR values so this information 11292915Sdim/// can be used during instruction selection. 12292915Sdim// 13292915Sdim//===----------------------------------------------------------------------===// 14292915Sdim 15292915Sdim#include "AMDGPU.h" 16314564Sdim#include "llvm/ADT/SetVector.h" 17344779Sdim#include "llvm/Analysis/LegacyDivergenceAnalysis.h" 18314564Sdim#include "llvm/Analysis/LoopInfo.h" 19314564Sdim#include "llvm/Analysis/MemoryDependenceAnalysis.h" 20321369Sdim#include "llvm/IR/IRBuilder.h" 21292915Sdim#include "llvm/IR/InstVisitor.h" 22360784Sdim#include "llvm/InitializePasses.h" 23292915Sdim#include "llvm/Support/Debug.h" 24292915Sdim#include "llvm/Support/raw_ostream.h" 25292915Sdim 26292915Sdim#define DEBUG_TYPE "amdgpu-annotate-uniform" 27292915Sdim 28292915Sdimusing namespace llvm; 29292915Sdim 30292915Sdimnamespace { 31292915Sdim 32292915Sdimclass AMDGPUAnnotateUniformValues : public FunctionPass, 33292915Sdim public InstVisitor<AMDGPUAnnotateUniformValues> { 34344779Sdim LegacyDivergenceAnalysis *DA; 35314564Sdim MemoryDependenceResults *MDR; 36314564Sdim LoopInfo *LI; 37314564Sdim DenseMap<Value*, GetElementPtrInst*> noClobberClones; 38314564Sdim bool isKernelFunc; 39292915Sdim 40292915Sdimpublic: 41292915Sdim static char ID; 42292915Sdim AMDGPUAnnotateUniformValues() : 43292915Sdim FunctionPass(ID) { } 44292915Sdim bool doInitialization(Module &M) override; 45292915Sdim bool runOnFunction(Function &F) override; 46314564Sdim StringRef getPassName() const override { 47314564Sdim return "AMDGPU Annotate Uniform Values"; 48314564Sdim } 49292915Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 50344779Sdim AU.addRequired<LegacyDivergenceAnalysis>(); 51314564Sdim AU.addRequired<MemoryDependenceWrapperPass>(); 52314564Sdim AU.addRequired<LoopInfoWrapperPass>(); 53292915Sdim AU.setPreservesAll(); 54292915Sdim } 55292915Sdim 56309124Sdim void visitBranchInst(BranchInst &I); 57292915Sdim void visitLoadInst(LoadInst &I); 58314564Sdim bool isClobberedInFunction(LoadInst * Load); 59292915Sdim}; 60292915Sdim 61292915Sdim} // End anonymous namespace 62292915Sdim 63292915SdimINITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 64292915Sdim "Add AMDGPU uniform metadata", false, false) 65344779SdimINITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 66314564SdimINITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 67314564SdimINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 68292915SdimINITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, 69292915Sdim "Add AMDGPU uniform metadata", false, false) 70292915Sdim 71292915Sdimchar AMDGPUAnnotateUniformValues::ID = 0; 72292915Sdim 73309124Sdimstatic void setUniformMetadata(Instruction *I) { 74309124Sdim I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); 75309124Sdim} 76314564Sdimstatic void setNoClobberMetadata(Instruction *I) { 77314564Sdim I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); 78314564Sdim} 79309124Sdim 80314564Sdimstatic void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { 81314564Sdim for (auto I : predecessors(Root)) 82314564Sdim if (Set.insert(I)) 83314564Sdim DFS(I, Set); 84314564Sdim} 85314564Sdim 86314564Sdimbool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { 87314564Sdim // 1. get Loop for the Load->getparent(); 88314564Sdim // 2. if it exists, collect all the BBs from the most outer 89314564Sdim // loop and check for the writes. If NOT - start DFS over all preds. 90314564Sdim // 3. Start DFS over all preds from the most outer loop header. 91314564Sdim SetVector<BasicBlock *> Checklist; 92314564Sdim BasicBlock *Start = Load->getParent(); 93314564Sdim Checklist.insert(Start); 94314564Sdim const Value *Ptr = Load->getPointerOperand(); 95314564Sdim const Loop *L = LI->getLoopFor(Start); 96314564Sdim if (L) { 97314564Sdim const Loop *P = L; 98314564Sdim do { 99314564Sdim L = P; 100314564Sdim P = P->getParentLoop(); 101314564Sdim } while (P); 102314564Sdim Checklist.insert(L->block_begin(), L->block_end()); 103314564Sdim Start = L->getHeader(); 104314564Sdim } 105314564Sdim 106314564Sdim DFS(Start, Checklist); 107314564Sdim for (auto &BB : Checklist) { 108321369Sdim BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? 109321369Sdim BasicBlock::iterator(Load) : BB->end(); 110321369Sdim auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, 111321369Sdim StartIt, BB, Load); 112321369Sdim if (Q.isClobber() || Q.isUnknown()) 113321369Sdim return true; 114314564Sdim } 115314564Sdim return false; 116314564Sdim} 117314564Sdim 118309124Sdimvoid AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { 119344779Sdim if (DA->isUniform(&I)) 120344779Sdim setUniformMetadata(I.getParent()->getTerminator()); 121309124Sdim} 122309124Sdim 123292915Sdimvoid AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { 124292915Sdim Value *Ptr = I.getPointerOperand(); 125292915Sdim if (!DA->isUniform(Ptr)) 126292915Sdim return; 127321369Sdim auto isGlobalLoad = [&](LoadInst &Load)->bool { 128344779Sdim return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 129314564Sdim }; 130314564Sdim // We're tracking up to the Function boundaries 131314564Sdim // We cannot go beyond because of FunctionPass restrictions 132314564Sdim // Thus we can ensure that memory not clobbered for memory 133314564Sdim // operations that live in kernel only. 134314564Sdim bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I); 135314564Sdim Instruction *PtrI = dyn_cast<Instruction>(Ptr); 136314564Sdim if (!PtrI && NotClobbered && isGlobalLoad(I)) { 137314564Sdim if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { 138314564Sdim // Lookup for the existing GEP 139314564Sdim if (noClobberClones.count(Ptr)) { 140314564Sdim PtrI = noClobberClones[Ptr]; 141314564Sdim } else { 142314564Sdim // Create GEP of the Value 143314564Sdim Function *F = I.getParent()->getParent(); 144314564Sdim Value *Idx = Constant::getIntegerValue( 145314564Sdim Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); 146314564Sdim // Insert GEP at the entry to make it dominate all uses 147314564Sdim PtrI = GetElementPtrInst::Create( 148314564Sdim Ptr->getType()->getPointerElementType(), Ptr, 149314564Sdim ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); 150314564Sdim } 151314564Sdim I.replaceUsesOfWith(Ptr, PtrI); 152314564Sdim } 153314564Sdim } 154292915Sdim 155314564Sdim if (PtrI) { 156309124Sdim setUniformMetadata(PtrI); 157314564Sdim if (NotClobbered) 158314564Sdim setNoClobberMetadata(PtrI); 159314564Sdim } 160292915Sdim} 161292915Sdim 162292915Sdimbool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { 163292915Sdim return false; 164292915Sdim} 165292915Sdim 166292915Sdimbool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { 167309124Sdim if (skipFunction(F)) 168309124Sdim return false; 169309124Sdim 170344779Sdim DA = &getAnalysis<LegacyDivergenceAnalysis>(); 171314564Sdim MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 172314564Sdim LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 173314564Sdim isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; 174314564Sdim 175292915Sdim visit(F); 176314564Sdim noClobberClones.clear(); 177292915Sdim return true; 178292915Sdim} 179292915Sdim 180292915SdimFunctionPass * 181292915Sdimllvm::createAMDGPUAnnotateUniformValues() { 182292915Sdim return new AMDGPUAnnotateUniformValues(); 183292915Sdim} 184