1//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPUMachineFunction.h"
10#include "AMDGPU.h"
11#include "AMDGPUPerfHintAnalysis.h"
12#include "AMDGPUSubtarget.h"
13#include "Utils/AMDGPUBaseInfo.h"
14#include "llvm/CodeGen/MachineModuleInfo.h"
15#include "llvm/IR/ConstantRange.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Metadata.h"
18#include "llvm/Target/TargetMachine.h"
19
20using namespace llvm;
21
22static const GlobalVariable *
23getKernelDynLDSGlobalFromFunction(const Function &F) {
24  const Module *M = F.getParent();
25  SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26  KernelDynLDSName += F.getName();
27  KernelDynLDSName += ".dynlds";
28  return M->getNamedGlobal(KernelDynLDSName);
29}
30
31static bool hasLDSKernelArgument(const Function &F) {
32  for (const Argument &Arg : F.args()) {
33    Type *ArgTy = Arg.getType();
34    if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {
35      if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36        return true;
37    }
38  }
39  return false;
40}
41
42AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43                                             const AMDGPUSubtarget &ST)
44    : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45      IsModuleEntryFunction(
46          AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
47      IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())),
48      NoSignedZerosFPMath(false) {
49
50  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
51  // except reserved size is not correctly aligned.
52
53  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
54  MemoryBound = MemBoundAttr.getValueAsBool();
55
56  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
57  WaveLimiter = WaveLimitAttr.getValueAsBool();
58
59  // FIXME: How is this attribute supposed to interact with statically known
60  // global sizes?
61  StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
62  if (!S.empty())
63    S.consumeInteger(0, GDSSize);
64
65  // Assume the attribute allocates before any known GDS globals.
66  StaticGDSSize = GDSSize;
67
68  // Second value, if present, is the maximum value that can be assigned.
69  // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
70  // during codegen.
71  std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
72      F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
73
74  // The two separate variables are only profitable when the LDS module lowering
75  // pass is disabled. If graphics does not use dynamic LDS, this is never
76  // profitable. Leaving cleanup for a later change.
77  LDSSize = LDSSizeRange.first;
78  StaticLDSSize = LDSSize;
79
80  CallingConv::ID CC = F.getCallingConv();
81  if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
82    ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
83
84  // FIXME: Shouldn't be target specific
85  Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
86  NoSignedZerosFPMath =
87      NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
88
89  const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
90  if (DynLdsGlobal || hasLDSKernelArgument(F))
91    UsesDynamicLDS = true;
92}
93
94unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
95                                                  const GlobalVariable &GV,
96                                                  Align Trailing) {
97  auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
98  if (!Entry.second)
99    return Entry.first->second;
100
101  Align Alignment =
102      DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
103
104  unsigned Offset;
105  if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
106
107    std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
108    if (MaybeAbs) {
109      // Absolute address LDS variables that exist prior to the LDS lowering
110      // pass raise a fatal error in that pass. These failure modes are only
111      // reachable if that lowering pass is disabled or broken. If/when adding
112      // support for absolute addresses on user specified variables, the
113      // alignment check moves to the lowering pass and the frame calculation
114      // needs to take the user variables into consideration.
115
116      uint32_t ObjectStart = *MaybeAbs;
117
118      if (ObjectStart != alignTo(ObjectStart, Alignment)) {
119        report_fatal_error("Absolute address LDS variable inconsistent with "
120                           "variable alignment");
121      }
122
123      if (isModuleEntryFunction()) {
124        // If this is a module entry function, we can also sanity check against
125        // the static frame. Strictly it would be better to check against the
126        // attribute, i.e. that the variable is within the always-allocated
127        // section, and not within some other non-absolute-address object
128        // allocated here, but the extra error detection is minimal and we would
129        // have to pass the Function around or cache the attribute value.
130        uint32_t ObjectEnd =
131            ObjectStart + DL.getTypeAllocSize(GV.getValueType());
132        if (ObjectEnd > StaticLDSSize) {
133          report_fatal_error(
134              "Absolute address LDS variable outside of static frame");
135        }
136      }
137
138      Entry.first->second = ObjectStart;
139      return ObjectStart;
140    }
141
142    /// TODO: We should sort these to minimize wasted space due to alignment
143    /// padding. Currently the padding is decided by the first encountered use
144    /// during lowering.
145    Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
146
147    StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
148
149    // Align LDS size to trailing, e.g. for aligning dynamic shared memory
150    LDSSize = alignTo(StaticLDSSize, Trailing);
151  } else {
152    assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
153           "expected region address space");
154
155    Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
156    StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
157
158    // FIXME: Apply alignment of dynamic GDS
159    GDSSize = StaticGDSSize;
160  }
161
162  Entry.first->second = Offset;
163  return Offset;
164}
165
166std::optional<uint32_t>
167AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
168  // TODO: Would be more consistent with the abs symbols to use a range
169  MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
170  if (MD && MD->getNumOperands() == 1) {
171    if (ConstantInt *KnownSize =
172            mdconst::extract<ConstantInt>(MD->getOperand(0))) {
173      uint64_t ZExt = KnownSize->getZExtValue();
174      if (ZExt <= UINT32_MAX) {
175        return ZExt;
176      }
177    }
178  }
179  return {};
180}
181
182std::optional<uint32_t>
183AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
184  if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
185    return {};
186
187  std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
188  if (!AbsSymRange)
189    return {};
190
191  if (const APInt *V = AbsSymRange->getSingleElement()) {
192    std::optional<uint64_t> ZExt = V->tryZExtValue();
193    if (ZExt && (*ZExt <= UINT32_MAX)) {
194      return *ZExt;
195    }
196  }
197
198  return {};
199}
200
201void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
202                                           const GlobalVariable &GV) {
203  const Module *M = F.getParent();
204  const DataLayout &DL = M->getDataLayout();
205  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
206
207  Align Alignment =
208      DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
209  if (Alignment <= DynLDSAlign)
210    return;
211
212  LDSSize = alignTo(StaticLDSSize, Alignment);
213  DynLDSAlign = Alignment;
214
215  // If there is a dynamic LDS variable associated with this function F, every
216  // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
217  // map to the same address. This holds because no LDS is allocated after the
218  // lowering pass if there are dynamic LDS variables present.
219  const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
220  if (Dyn) {
221    unsigned Offset = LDSSize; // return this?
222    std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
223    if (!Expect || (Offset != *Expect)) {
224      report_fatal_error("Inconsistent metadata on dynamic LDS variable");
225    }
226  }
227}
228
229void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
230  UsesDynamicLDS = DynLDS;
231}
232
233bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
234