1327952Sdim//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2284677Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6284677Sdim//
7284677Sdim//===----------------------------------------------------------------------===//
8284677Sdim
9284677Sdim#include "SIMachineFunctionInfo.h"
10327952Sdim#include "AMDGPUArgumentUsageInfo.h"
11284677Sdim#include "AMDGPUSubtarget.h"
12327952Sdim#include "SIRegisterInfo.h"
13341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14327952Sdim#include "Utils/AMDGPUBaseInfo.h"
15327952Sdim#include "llvm/ADT/Optional.h"
16327952Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
17309124Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
18327952Sdim#include "llvm/CodeGen/MachineFunction.h"
19284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
20327952Sdim#include "llvm/IR/CallingConv.h"
21284677Sdim#include "llvm/IR/Function.h"
22327952Sdim#include <cassert>
23327952Sdim#include <vector>
24284677Sdim
25284677Sdim#define MAX_LANES 64
26284677Sdim
27284677Sdimusing namespace llvm;
28284677Sdim
29284677SdimSIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30284677Sdim  : AMDGPUMachineFunction(MF),
31296417Sdim    PrivateSegmentBuffer(false),
32296417Sdim    DispatchPtr(false),
33296417Sdim    QueuePtr(false),
34314564Sdim    KernargSegmentPtr(false),
35296417Sdim    DispatchID(false),
36296417Sdim    FlatScratchInit(false),
37309124Sdim    WorkGroupIDX(false),
38296417Sdim    WorkGroupIDY(false),
39296417Sdim    WorkGroupIDZ(false),
40296417Sdim    WorkGroupInfo(false),
41296417Sdim    PrivateSegmentWaveByteOffset(false),
42309124Sdim    WorkItemIDX(false),
43296417Sdim    WorkItemIDY(false),
44314564Sdim    WorkItemIDZ(false),
45327952Sdim    ImplicitBufferPtr(false),
46327952Sdim    ImplicitArgPtr(false),
47341825Sdim    GITPtrHigh(0xffffffff),
48353358Sdim    HighBitsOf32BitAddress(0),
49353358Sdim    GDSSize(0) {
50341825Sdim  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
51327952Sdim  const Function &F = MF.getFunction();
52327952Sdim  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
53327952Sdim  WavesPerEU = ST.getWavesPerEU(F);
54284677Sdim
55360784Sdim  Occupancy = ST.computeOccupancy(MF, getLDSSize());
56341825Sdim  CallingConv::ID CC = F.getCallingConv();
57341825Sdim
58341825Sdim  if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
59341825Sdim    if (!F.arg_empty())
60341825Sdim      KernargSegmentPtr = true;
61341825Sdim    WorkGroupIDX = true;
62341825Sdim    WorkItemIDX = true;
63341825Sdim  } else if (CC == CallingConv::AMDGPU_PS) {
64341825Sdim    PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
65341825Sdim  }
66341825Sdim
67321369Sdim  if (!isEntryFunction()) {
68321369Sdim    // Non-entry functions have no special inputs for now, other registers
69321369Sdim    // required for scratch access.
70321369Sdim    ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
71353358Sdim    ScratchWaveOffsetReg = AMDGPU::SGPR33;
72353358Sdim
73353358Sdim    // TODO: Pick a high register, and shift down, similar to a kernel.
74353358Sdim    FrameOffsetReg = AMDGPU::SGPR34;
75321369Sdim    StackPtrOffsetReg = AMDGPU::SGPR32;
76296417Sdim
77327952Sdim    ArgInfo.PrivateSegmentBuffer =
78327952Sdim      ArgDescriptor::createRegister(ScratchRSrcReg);
79327952Sdim    ArgInfo.PrivateSegmentWaveByteOffset =
80327952Sdim      ArgDescriptor::createRegister(ScratchWaveOffsetReg);
81327952Sdim
82327952Sdim    if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
83327952Sdim      ImplicitArgPtr = true;
84327952Sdim  } else {
85341825Sdim    if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
86327952Sdim      KernargSegmentPtr = true;
87341825Sdim      MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
88341825Sdim                                 MaxKernArgAlign);
89341825Sdim    }
90321369Sdim  }
91296417Sdim
92353358Sdim  if (F.hasFnAttribute("amdgpu-work-group-id-x"))
93321369Sdim    WorkGroupIDX = true;
94353358Sdim
95353358Sdim  if (F.hasFnAttribute("amdgpu-work-group-id-y"))
96296417Sdim    WorkGroupIDY = true;
97353358Sdim
98353358Sdim  if (F.hasFnAttribute("amdgpu-work-group-id-z"))
99296417Sdim    WorkGroupIDZ = true;
100353358Sdim
101353358Sdim  if (F.hasFnAttribute("amdgpu-work-item-id-x"))
102321369Sdim    WorkItemIDX = true;
103353358Sdim
104353358Sdim  if (F.hasFnAttribute("amdgpu-work-item-id-y"))
105296417Sdim    WorkItemIDY = true;
106353358Sdim
107353358Sdim  if (F.hasFnAttribute("amdgpu-work-item-id-z"))
108296417Sdim    WorkItemIDZ = true;
109296417Sdim
110321369Sdim  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
111314564Sdim  bool HasStackObjects = FrameInfo.hasStackObjects();
112296417Sdim
113321369Sdim  if (isEntryFunction()) {
114321369Sdim    // X, XY, and XYZ are the only supported combinations, so make sure Y is
115321369Sdim    // enabled if Z is.
116321369Sdim    if (WorkItemIDZ)
117321369Sdim      WorkItemIDY = true;
118296417Sdim
119344779Sdim    PrivateSegmentWaveByteOffset = true;
120321369Sdim
121327952Sdim    // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
122327952Sdim    if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
123327952Sdim        (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
124344779Sdim      ArgInfo.PrivateSegmentWaveByteOffset =
125344779Sdim          ArgDescriptor::createRegister(AMDGPU::SGPR5);
126321369Sdim  }
127321369Sdim
128344779Sdim  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
129344779Sdim  if (isAmdHsaOrMesa) {
130344779Sdim    PrivateSegmentBuffer = true;
131296417Sdim
132327952Sdim    if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
133296417Sdim      DispatchPtr = true;
134309124Sdim
135327952Sdim    if (F.hasFnAttribute("amdgpu-queue-ptr"))
136309124Sdim      QueuePtr = true;
137314564Sdim
138327952Sdim    if (F.hasFnAttribute("amdgpu-dispatch-id"))
139314564Sdim      DispatchID = true;
140341825Sdim  } else if (ST.isMesaGfxShader(F)) {
141344779Sdim    ImplicitBufferPtr = true;
142296417Sdim  }
143296417Sdim
144327952Sdim  if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
145321369Sdim    KernargSegmentPtr = true;
146309124Sdim
147344779Sdim  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
148353358Sdim    auto hasNonSpillStackObjects = [&]() {
149353358Sdim      // Avoid expensive checking if there's no stack objects.
150353358Sdim      if (!HasStackObjects)
151353358Sdim        return false;
152353358Sdim      for (auto OI = FrameInfo.getObjectIndexBegin(),
153353358Sdim                OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI)
154353358Sdim        if (!FrameInfo.isSpillSlotObjectIndex(OI))
155353358Sdim          return true;
156353358Sdim      // All stack objects are spill slots.
157353358Sdim      return false;
158353358Sdim    };
159321369Sdim    // TODO: This could be refined a lot. The attribute is a poor way of
160321369Sdim    // detecting calls that may require it before argument lowering.
161353358Sdim    if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch"))
162321369Sdim      FlatScratchInit = true;
163321369Sdim  }
164327952Sdim
165327952Sdim  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
166327952Sdim  StringRef S = A.getValueAsString();
167327952Sdim  if (!S.empty())
168327952Sdim    S.consumeInteger(0, GITPtrHigh);
169341825Sdim
170341825Sdim  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
171341825Sdim  S = A.getValueAsString();
172341825Sdim  if (!S.empty())
173341825Sdim    S.consumeInteger(0, HighBitsOf32BitAddress);
174353358Sdim
175353358Sdim  S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
176353358Sdim  if (!S.empty())
177353358Sdim    S.consumeInteger(0, GDSSize);
178296417Sdim}
179296417Sdim
180341825Sdimvoid SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
181341825Sdim  limitOccupancy(getMaxWavesPerEU());
182341825Sdim  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
183341825Sdim  limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
184341825Sdim                 MF.getFunction()));
185341825Sdim}
186341825Sdim
187296417Sdimunsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
188296417Sdim  const SIRegisterInfo &TRI) {
189327952Sdim  ArgInfo.PrivateSegmentBuffer =
190327952Sdim    ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
191360784Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
192296417Sdim  NumUserSGPRs += 4;
193327952Sdim  return ArgInfo.PrivateSegmentBuffer.getRegister();
194296417Sdim}
195296417Sdim
196296417Sdimunsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
197327952Sdim  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
198327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
199296417Sdim  NumUserSGPRs += 2;
200327952Sdim  return ArgInfo.DispatchPtr.getRegister();
201296417Sdim}
202296417Sdim
203296417Sdimunsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
204327952Sdim  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
205327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
206296417Sdim  NumUserSGPRs += 2;
207327952Sdim  return ArgInfo.QueuePtr.getRegister();
208296417Sdim}
209296417Sdim
210296417Sdimunsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
211327952Sdim  ArgInfo.KernargSegmentPtr
212327952Sdim    = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
213327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
214296417Sdim  NumUserSGPRs += 2;
215327952Sdim  return ArgInfo.KernargSegmentPtr.getRegister();
216296417Sdim}
217296417Sdim
218314564Sdimunsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
219327952Sdim  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
220327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
221314564Sdim  NumUserSGPRs += 2;
222327952Sdim  return ArgInfo.DispatchID.getRegister();
223314564Sdim}
224314564Sdim
225309124Sdimunsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
226327952Sdim  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
227327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
228309124Sdim  NumUserSGPRs += 2;
229327952Sdim  return ArgInfo.FlatScratchInit.getRegister();
230309124Sdim}
231309124Sdim
232321369Sdimunsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
233327952Sdim  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
234327952Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
235314564Sdim  NumUserSGPRs += 2;
236327952Sdim  return ArgInfo.ImplicitBufferPtr.getRegister();
237314564Sdim}
238314564Sdim
239327952Sdimstatic bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) {
240327952Sdim  for (unsigned I = 0; CSRegs[I]; ++I) {
241327952Sdim    if (CSRegs[I] == Reg)
242327952Sdim      return true;
243327952Sdim  }
244327952Sdim
245327952Sdim  return false;
246327952Sdim}
247327952Sdim
248353358Sdim/// \p returns true if \p NumLanes slots are available in VGPRs already used for
249353358Sdim/// SGPR spilling.
250353358Sdim//
251353358Sdim// FIXME: This only works after processFunctionBeforeFrameFinalized
252353358Sdimbool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
253353358Sdim                                                      unsigned NumNeed) const {
254353358Sdim  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
255353358Sdim  unsigned WaveSize = ST.getWavefrontSize();
256353358Sdim  return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
257353358Sdim}
258353358Sdim
259321369Sdim/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
260321369Sdimbool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
261321369Sdim                                                    int FI) {
262321369Sdim  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
263309124Sdim
264321369Sdim  // This has already been allocated.
265321369Sdim  if (!SpillLanes.empty())
266321369Sdim    return true;
267321369Sdim
268341825Sdim  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
269309124Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
270321369Sdim  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
271321369Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
272321369Sdim  unsigned WaveSize = ST.getWavefrontSize();
273309124Sdim
274321369Sdim  unsigned Size = FrameInfo.getObjectSize(FI);
275321369Sdim  assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
276321369Sdim  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
277284677Sdim
278321369Sdim  int NumLanes = Size / 4;
279284677Sdim
280353358Sdim  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
281327952Sdim
282321369Sdim  // Make sure to handle the case where a wide SGPR spill may span between two
283321369Sdim  // VGPRs.
284321369Sdim  for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
285321369Sdim    unsigned LaneVGPR;
286321369Sdim    unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
287284677Sdim
288321369Sdim    if (VGPRIndex == 0) {
289321369Sdim      LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
290321369Sdim      if (LaneVGPR == AMDGPU::NoRegister) {
291327952Sdim        // We have no VGPRs left for spilling SGPRs. Reset because we will not
292321369Sdim        // partially spill the SGPR to VGPRs.
293321369Sdim        SGPRToVGPRSpills.erase(FI);
294321369Sdim        NumVGPRSpillLanes -= I;
295321369Sdim        return false;
296321369Sdim      }
297296417Sdim
298327952Sdim      Optional<int> CSRSpillFI;
299341825Sdim      if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
300341825Sdim          isCalleeSavedReg(CSRegs, LaneVGPR)) {
301341825Sdim        CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4);
302327952Sdim      }
303296417Sdim
304327952Sdim      SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
305327952Sdim
306321369Sdim      // Add this register as live-in to all blocks to avoid machine verifer
307321369Sdim      // complaining about use of an undefined physical register.
308321369Sdim      for (MachineBasicBlock &BB : MF)
309321369Sdim        BB.addLiveIn(LaneVGPR);
310321369Sdim    } else {
311327952Sdim      LaneVGPR = SpillVGPRs.back().VGPR;
312321369Sdim    }
313284677Sdim
314321369Sdim    SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
315284677Sdim  }
316284677Sdim
317321369Sdim  return true;
318284677Sdim}
319321369Sdim
320353358Sdim/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
321353358Sdim/// Either AGPR is spilled to VGPR to vice versa.
322353358Sdim/// Returns true if a \p FI can be eliminated completely.
323353358Sdimbool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
324353358Sdim                                                    int FI,
325353358Sdim                                                    bool isAGPRtoVGPR) {
326353358Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
327353358Sdim  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
328353358Sdim  const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
329353358Sdim
330353358Sdim  assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
331353358Sdim
332353358Sdim  auto &Spill = VGPRToAGPRSpills[FI];
333353358Sdim
334353358Sdim  // This has already been allocated.
335353358Sdim  if (!Spill.Lanes.empty())
336353358Sdim    return Spill.FullyAllocated;
337353358Sdim
338353358Sdim  unsigned Size = FrameInfo.getObjectSize(FI);
339353358Sdim  unsigned NumLanes = Size / 4;
340353358Sdim  Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
341353358Sdim
342353358Sdim  const TargetRegisterClass &RC =
343353358Sdim      isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
344353358Sdim  auto Regs = RC.getRegisters();
345353358Sdim
346353358Sdim  auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
347353358Sdim  const SIRegisterInfo *TRI = ST.getRegisterInfo();
348353358Sdim  Spill.FullyAllocated = true;
349353358Sdim
350353358Sdim  // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
351353358Sdim  // once.
352353358Sdim  BitVector OtherUsedRegs;
353353358Sdim  OtherUsedRegs.resize(TRI->getNumRegs());
354353358Sdim
355353358Sdim  const uint32_t *CSRMask =
356353358Sdim      TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
357353358Sdim  if (CSRMask)
358353358Sdim    OtherUsedRegs.setBitsInMask(CSRMask);
359353358Sdim
360353358Sdim  // TODO: Should include register tuples, but doesn't matter with current
361353358Sdim  // usage.
362353358Sdim  for (MCPhysReg Reg : SpillAGPR)
363353358Sdim    OtherUsedRegs.set(Reg);
364353358Sdim  for (MCPhysReg Reg : SpillVGPR)
365353358Sdim    OtherUsedRegs.set(Reg);
366353358Sdim
367353358Sdim  SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
368353358Sdim  for (unsigned I = 0; I < NumLanes; ++I) {
369353358Sdim    NextSpillReg = std::find_if(
370353358Sdim        NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
371353358Sdim          return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
372353358Sdim                 !OtherUsedRegs[Reg];
373353358Sdim        });
374353358Sdim
375353358Sdim    if (NextSpillReg == Regs.end()) { // Registers exhausted
376353358Sdim      Spill.FullyAllocated = false;
377353358Sdim      break;
378353358Sdim    }
379353358Sdim
380353358Sdim    OtherUsedRegs.set(*NextSpillReg);
381353358Sdim    SpillRegs.push_back(*NextSpillReg);
382353358Sdim    Spill.Lanes[I] = *NextSpillReg++;
383353358Sdim  }
384353358Sdim
385353358Sdim  return Spill.FullyAllocated;
386321369Sdim}
387341825Sdim
388353358Sdimvoid SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
389353358Sdim  // The FP spill hasn't been inserted yet, so keep it around.
390353358Sdim  for (auto &R : SGPRToVGPRSpills) {
391353358Sdim    if (R.first != FramePointerSaveIndex)
392353358Sdim      MFI.RemoveStackObject(R.first);
393353358Sdim  }
394341825Sdim
395353358Sdim  // All other SPGRs must be allocated on the default stack, so reset the stack
396353358Sdim  // ID.
397353358Sdim  for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
398353358Sdim       ++i)
399353358Sdim    if (i != FramePointerSaveIndex)
400353358Sdim      MFI.setStackID(i, TargetStackID::Default);
401353358Sdim
402353358Sdim  for (auto &R : VGPRToAGPRSpills) {
403353358Sdim    if (R.second.FullyAllocated)
404353358Sdim      MFI.RemoveStackObject(R.first);
405341825Sdim  }
406341825Sdim}
407341825Sdim
408341825SdimMCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
409341825Sdim  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
410341825Sdim  return AMDGPU::SGPR0 + NumUserSGPRs;
411341825Sdim}
412341825Sdim
413341825SdimMCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
414341825Sdim  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
415341825Sdim}
416353358Sdim
417353358Sdimstatic yaml::StringValue regToString(unsigned Reg,
418353358Sdim                                     const TargetRegisterInfo &TRI) {
419353358Sdim  yaml::StringValue Dest;
420353358Sdim  {
421353358Sdim    raw_string_ostream OS(Dest.Value);
422353358Sdim    OS << printReg(Reg, &TRI);
423353358Sdim  }
424353358Sdim  return Dest;
425353358Sdim}
426353358Sdim
427353358Sdimstatic Optional<yaml::SIArgumentInfo>
428353358SdimconvertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
429353358Sdim                    const TargetRegisterInfo &TRI) {
430353358Sdim  yaml::SIArgumentInfo AI;
431353358Sdim
432353358Sdim  auto convertArg = [&](Optional<yaml::SIArgument> &A,
433353358Sdim                        const ArgDescriptor &Arg) {
434353358Sdim    if (!Arg)
435353358Sdim      return false;
436353358Sdim
437353358Sdim    // Create a register or stack argument.
438353358Sdim    yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
439353358Sdim    if (Arg.isRegister()) {
440353358Sdim      raw_string_ostream OS(SA.RegisterName.Value);
441353358Sdim      OS << printReg(Arg.getRegister(), &TRI);
442353358Sdim    } else
443353358Sdim      SA.StackOffset = Arg.getStackOffset();
444353358Sdim    // Check and update the optional mask.
445353358Sdim    if (Arg.isMasked())
446353358Sdim      SA.Mask = Arg.getMask();
447353358Sdim
448353358Sdim    A = SA;
449353358Sdim    return true;
450353358Sdim  };
451353358Sdim
452353358Sdim  bool Any = false;
453353358Sdim  Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
454353358Sdim  Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
455353358Sdim  Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
456353358Sdim  Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
457353358Sdim  Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
458353358Sdim  Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
459353358Sdim  Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
460353358Sdim  Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
461353358Sdim  Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
462353358Sdim  Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
463353358Sdim  Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
464353358Sdim  Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
465353358Sdim                    ArgInfo.PrivateSegmentWaveByteOffset);
466353358Sdim  Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
467353358Sdim  Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
468353358Sdim  Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
469353358Sdim  Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
470353358Sdim  Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
471353358Sdim
472353358Sdim  if (Any)
473353358Sdim    return AI;
474353358Sdim
475353358Sdim  return None;
476353358Sdim}
477353358Sdim
478353358Sdimyaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
479353358Sdim  const llvm::SIMachineFunctionInfo& MFI,
480353358Sdim  const TargetRegisterInfo &TRI)
481353358Sdim  : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
482353358Sdim    MaxKernArgAlign(MFI.getMaxKernArgAlign()),
483353358Sdim    LDSSize(MFI.getLDSSize()),
484353358Sdim    IsEntryFunction(MFI.isEntryFunction()),
485353358Sdim    NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
486353358Sdim    MemoryBound(MFI.isMemoryBound()),
487353358Sdim    WaveLimiter(MFI.needsWaveLimiter()),
488360784Sdim    HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
489353358Sdim    ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
490353358Sdim    ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)),
491353358Sdim    FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
492353358Sdim    StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
493353358Sdim    ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
494353358Sdim    Mode(MFI.getMode()) {}
495353358Sdim
496353358Sdimvoid yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
497353358Sdim  MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
498353358Sdim}
499353358Sdim
500353358Sdimbool SIMachineFunctionInfo::initializeBaseYamlFields(
501353358Sdim  const yaml::SIMachineFunctionInfo &YamlMFI) {
502353358Sdim  ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
503360784Sdim  MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
504353358Sdim  LDSSize = YamlMFI.LDSSize;
505360784Sdim  HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
506353358Sdim  IsEntryFunction = YamlMFI.IsEntryFunction;
507353358Sdim  NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
508353358Sdim  MemoryBound = YamlMFI.MemoryBound;
509353358Sdim  WaveLimiter = YamlMFI.WaveLimiter;
510353358Sdim  return false;
511353358Sdim}
512