1//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "SIMachineFunctionInfo.h"
10#include "AMDGPUArgumentUsageInfo.h"
11#include "AMDGPUTargetMachine.h"
12#include "AMDGPUSubtarget.h"
13#include "SIRegisterInfo.h"
14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15#include "Utils/AMDGPUBaseInfo.h"
16#include "llvm/ADT/Optional.h"
17#include "llvm/CodeGen/MachineBasicBlock.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/IR/CallingConv.h"
22#include "llvm/IR/Function.h"
23#include <cassert>
24#include <vector>
25
26#define MAX_LANES 64
27
28using namespace llvm;
29
30SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
31  : AMDGPUMachineFunction(MF),
32    PrivateSegmentBuffer(false),
33    DispatchPtr(false),
34    QueuePtr(false),
35    KernargSegmentPtr(false),
36    DispatchID(false),
37    FlatScratchInit(false),
38    WorkGroupIDX(false),
39    WorkGroupIDY(false),
40    WorkGroupIDZ(false),
41    WorkGroupInfo(false),
42    PrivateSegmentWaveByteOffset(false),
43    WorkItemIDX(false),
44    WorkItemIDY(false),
45    WorkItemIDZ(false),
46    ImplicitBufferPtr(false),
47    ImplicitArgPtr(false),
48    GITPtrHigh(0xffffffff),
49    HighBitsOf32BitAddress(0),
50    GDSSize(0) {
51  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
52  const Function &F = MF.getFunction();
53  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
54  WavesPerEU = ST.getWavesPerEU(F);
55
56  Occupancy = ST.computeOccupancy(F, getLDSSize());
57  CallingConv::ID CC = F.getCallingConv();
58
59  // FIXME: Should have analysis or something rather than attribute to detect
60  // calls.
61  const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
62
63  // Enable all kernel inputs if we have the fixed ABI. Don't bother if we don't
64  // have any calls.
65  const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
66                           (!isEntryFunction() || HasCalls);
67
68  if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
69    if (!F.arg_empty())
70      KernargSegmentPtr = true;
71    WorkGroupIDX = true;
72    WorkItemIDX = true;
73  } else if (CC == CallingConv::AMDGPU_PS) {
74    PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
75  }
76
77  if (!isEntryFunction()) {
78    // Non-entry functions have no special inputs for now, other registers
79    // required for scratch access.
80    ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
81
82    // TODO: Pick a high register, and shift down, similar to a kernel.
83    FrameOffsetReg = AMDGPU::SGPR33;
84    StackPtrOffsetReg = AMDGPU::SGPR32;
85
86    ArgInfo.PrivateSegmentBuffer =
87      ArgDescriptor::createRegister(ScratchRSrcReg);
88
89    if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
90      ImplicitArgPtr = true;
91  } else {
92    if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
93      KernargSegmentPtr = true;
94      MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
95                                 MaxKernArgAlign);
96    }
97  }
98
99  if (UseFixedABI) {
100    WorkGroupIDX = true;
101    WorkGroupIDY = true;
102    WorkGroupIDZ = true;
103    WorkItemIDX = true;
104    WorkItemIDY = true;
105    WorkItemIDZ = true;
106    ImplicitArgPtr = true;
107  } else {
108    if (F.hasFnAttribute("amdgpu-work-group-id-x"))
109      WorkGroupIDX = true;
110
111    if (F.hasFnAttribute("amdgpu-work-group-id-y"))
112      WorkGroupIDY = true;
113
114    if (F.hasFnAttribute("amdgpu-work-group-id-z"))
115      WorkGroupIDZ = true;
116
117    if (F.hasFnAttribute("amdgpu-work-item-id-x"))
118      WorkItemIDX = true;
119
120    if (F.hasFnAttribute("amdgpu-work-item-id-y"))
121      WorkItemIDY = true;
122
123    if (F.hasFnAttribute("amdgpu-work-item-id-z"))
124      WorkItemIDZ = true;
125  }
126
127  bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
128  if (isEntryFunction()) {
129    // X, XY, and XYZ are the only supported combinations, so make sure Y is
130    // enabled if Z is.
131    if (WorkItemIDZ)
132      WorkItemIDY = true;
133
134    PrivateSegmentWaveByteOffset = true;
135
136    // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
137    if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
138        (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
139      ArgInfo.PrivateSegmentWaveByteOffset =
140          ArgDescriptor::createRegister(AMDGPU::SGPR5);
141  }
142
143  bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
144  if (isAmdHsaOrMesa) {
145    PrivateSegmentBuffer = true;
146
147    if (UseFixedABI) {
148      DispatchPtr = true;
149      QueuePtr = true;
150
151      // FIXME: We don't need this?
152      DispatchID = true;
153    } else {
154      if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
155        DispatchPtr = true;
156
157      if (F.hasFnAttribute("amdgpu-queue-ptr"))
158        QueuePtr = true;
159
160      if (F.hasFnAttribute("amdgpu-dispatch-id"))
161        DispatchID = true;
162    }
163  } else if (ST.isMesaGfxShader(F)) {
164    ImplicitBufferPtr = true;
165  }
166
167  if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
168    KernargSegmentPtr = true;
169
170  if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) {
171    // TODO: This could be refined a lot. The attribute is a poor way of
172    // detecting calls or stack objects that may require it before argument
173    // lowering.
174    if (HasCalls || HasStackObjects)
175      FlatScratchInit = true;
176  }
177
178  Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
179  StringRef S = A.getValueAsString();
180  if (!S.empty())
181    S.consumeInteger(0, GITPtrHigh);
182
183  A = F.getFnAttribute("amdgpu-32bit-address-high-bits");
184  S = A.getValueAsString();
185  if (!S.empty())
186    S.consumeInteger(0, HighBitsOf32BitAddress);
187
188  S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
189  if (!S.empty())
190    S.consumeInteger(0, GDSSize);
191}
192
193void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
194  limitOccupancy(getMaxWavesPerEU());
195  const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
196  limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
197                 MF.getFunction()));
198}
199
200Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
201  const SIRegisterInfo &TRI) {
202  ArgInfo.PrivateSegmentBuffer =
203    ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
204    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
205  NumUserSGPRs += 4;
206  return ArgInfo.PrivateSegmentBuffer.getRegister();
207}
208
209Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
210  ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
211    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
212  NumUserSGPRs += 2;
213  return ArgInfo.DispatchPtr.getRegister();
214}
215
216Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
217  ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
218    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
219  NumUserSGPRs += 2;
220  return ArgInfo.QueuePtr.getRegister();
221}
222
223Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
224  ArgInfo.KernargSegmentPtr
225    = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
226    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
227  NumUserSGPRs += 2;
228  return ArgInfo.KernargSegmentPtr.getRegister();
229}
230
231Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
232  ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
233    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
234  NumUserSGPRs += 2;
235  return ArgInfo.DispatchID.getRegister();
236}
237
238Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
239  ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
240    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
241  NumUserSGPRs += 2;
242  return ArgInfo.FlatScratchInit.getRegister();
243}
244
245Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
246  ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg(
247    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
248  NumUserSGPRs += 2;
249  return ArgInfo.ImplicitBufferPtr.getRegister();
250}
251
252bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
253                                             MCPhysReg Reg) {
254  for (unsigned I = 0; CSRegs[I]; ++I) {
255    if (CSRegs[I] == Reg)
256      return true;
257  }
258
259  return false;
260}
261
262/// \p returns true if \p NumLanes slots are available in VGPRs already used for
263/// SGPR spilling.
264//
265// FIXME: This only works after processFunctionBeforeFrameFinalized
266bool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF,
267                                                      unsigned NumNeed) const {
268  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
269  unsigned WaveSize = ST.getWavefrontSize();
270  return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size();
271}
272
273/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
274bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
275                                                    int FI) {
276  std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
277
278  // This has already been allocated.
279  if (!SpillLanes.empty())
280    return true;
281
282  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
283  const SIRegisterInfo *TRI = ST.getRegisterInfo();
284  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
285  MachineRegisterInfo &MRI = MF.getRegInfo();
286  unsigned WaveSize = ST.getWavefrontSize();
287  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
288
289  unsigned Size = FrameInfo.getObjectSize(FI);
290  unsigned NumLanes = Size / 4;
291
292  if (NumLanes > WaveSize)
293    return false;
294
295  assert(Size >= 4 && "invalid sgpr spill size");
296  assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
297
298  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
299
300  // Make sure to handle the case where a wide SGPR spill may span between two
301  // VGPRs.
302  for (unsigned I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
303    Register LaneVGPR;
304    unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
305
306    // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
307    // when one of the two conditions is true:
308    // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
309    // reserved.
310    // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
311    // required.
312    if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
313      assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
314      LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
315    } else if (VGPRIndex == 0) {
316      LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
317      if (LaneVGPR == AMDGPU::NoRegister) {
318        // We have no VGPRs left for spilling SGPRs. Reset because we will not
319        // partially spill the SGPR to VGPRs.
320        SGPRToVGPRSpills.erase(FI);
321        NumVGPRSpillLanes -= I;
322        return false;
323      }
324
325      Optional<int> CSRSpillFI;
326      if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs &&
327          isCalleeSavedReg(CSRegs, LaneVGPR)) {
328        CSRSpillFI = FrameInfo.CreateSpillStackObject(4, Align(4));
329      }
330
331      SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI));
332
333      // Add this register as live-in to all blocks to avoid machine verifer
334      // complaining about use of an undefined physical register.
335      for (MachineBasicBlock &BB : MF)
336        BB.addLiveIn(LaneVGPR);
337    } else {
338      LaneVGPR = SpillVGPRs.back().VGPR;
339    }
340
341    SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
342  }
343
344  return true;
345}
346
347/// Reserve a VGPR for spilling of SGPRs
348bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
349  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
350  const SIRegisterInfo *TRI = ST.getRegisterInfo();
351  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
352
353  Register LaneVGPR = TRI->findUnusedRegister(
354      MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
355  SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, None));
356  FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
357  return true;
358}
359
360/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
361/// Either AGPR is spilled to VGPR to vice versa.
362/// Returns true if a \p FI can be eliminated completely.
363bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
364                                                    int FI,
365                                                    bool isAGPRtoVGPR) {
366  MachineRegisterInfo &MRI = MF.getRegInfo();
367  MachineFrameInfo &FrameInfo = MF.getFrameInfo();
368  const GCNSubtarget &ST =  MF.getSubtarget<GCNSubtarget>();
369
370  assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
371
372  auto &Spill = VGPRToAGPRSpills[FI];
373
374  // This has already been allocated.
375  if (!Spill.Lanes.empty())
376    return Spill.FullyAllocated;
377
378  unsigned Size = FrameInfo.getObjectSize(FI);
379  unsigned NumLanes = Size / 4;
380  Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
381
382  const TargetRegisterClass &RC =
383      isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
384  auto Regs = RC.getRegisters();
385
386  auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
387  const SIRegisterInfo *TRI = ST.getRegisterInfo();
388  Spill.FullyAllocated = true;
389
390  // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
391  // once.
392  BitVector OtherUsedRegs;
393  OtherUsedRegs.resize(TRI->getNumRegs());
394
395  const uint32_t *CSRMask =
396      TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
397  if (CSRMask)
398    OtherUsedRegs.setBitsInMask(CSRMask);
399
400  // TODO: Should include register tuples, but doesn't matter with current
401  // usage.
402  for (MCPhysReg Reg : SpillAGPR)
403    OtherUsedRegs.set(Reg);
404  for (MCPhysReg Reg : SpillVGPR)
405    OtherUsedRegs.set(Reg);
406
407  SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
408  for (unsigned I = 0; I < NumLanes; ++I) {
409    NextSpillReg = std::find_if(
410        NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
411          return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
412                 !OtherUsedRegs[Reg];
413        });
414
415    if (NextSpillReg == Regs.end()) { // Registers exhausted
416      Spill.FullyAllocated = false;
417      break;
418    }
419
420    OtherUsedRegs.set(*NextSpillReg);
421    SpillRegs.push_back(*NextSpillReg);
422    Spill.Lanes[I] = *NextSpillReg++;
423  }
424
425  return Spill.FullyAllocated;
426}
427
428void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
429  // The FP & BP spills haven't been inserted yet, so keep them around.
430  for (auto &R : SGPRToVGPRSpills) {
431    if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
432      MFI.RemoveStackObject(R.first);
433  }
434
435  // All other SPGRs must be allocated on the default stack, so reset the stack
436  // ID.
437  for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
438       ++i)
439    if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
440      MFI.setStackID(i, TargetStackID::Default);
441
442  for (auto &R : VGPRToAGPRSpills) {
443    if (R.second.FullyAllocated)
444      MFI.RemoveStackObject(R.first);
445  }
446}
447
448MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
449  assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
450  return AMDGPU::SGPR0 + NumUserSGPRs;
451}
452
453MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
454  return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
455}
456
457Register
458SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
459  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
460  if (!ST.isAmdPalOS())
461    return Register();
462  Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
463  if (ST.hasMergedShaders()) {
464    switch (MF.getFunction().getCallingConv()) {
465    case CallingConv::AMDGPU_HS:
466    case CallingConv::AMDGPU_GS:
467      // Low GIT address is passed in s8 rather than s0 for an LS+HS or
468      // ES+GS merged shader on gfx9+.
469      GitPtrLo = AMDGPU::SGPR8;
470      return GitPtrLo;
471    default:
472      return GitPtrLo;
473    }
474  }
475  return GitPtrLo;
476}
477
478static yaml::StringValue regToString(Register Reg,
479                                     const TargetRegisterInfo &TRI) {
480  yaml::StringValue Dest;
481  {
482    raw_string_ostream OS(Dest.Value);
483    OS << printReg(Reg, &TRI);
484  }
485  return Dest;
486}
487
488static Optional<yaml::SIArgumentInfo>
489convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
490                    const TargetRegisterInfo &TRI) {
491  yaml::SIArgumentInfo AI;
492
493  auto convertArg = [&](Optional<yaml::SIArgument> &A,
494                        const ArgDescriptor &Arg) {
495    if (!Arg)
496      return false;
497
498    // Create a register or stack argument.
499    yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister());
500    if (Arg.isRegister()) {
501      raw_string_ostream OS(SA.RegisterName.Value);
502      OS << printReg(Arg.getRegister(), &TRI);
503    } else
504      SA.StackOffset = Arg.getStackOffset();
505    // Check and update the optional mask.
506    if (Arg.isMasked())
507      SA.Mask = Arg.getMask();
508
509    A = SA;
510    return true;
511  };
512
513  bool Any = false;
514  Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
515  Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr);
516  Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr);
517  Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
518  Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID);
519  Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit);
520  Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
521  Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
522  Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
523  Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
524  Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
525  Any |= convertArg(AI.PrivateSegmentWaveByteOffset,
526                    ArgInfo.PrivateSegmentWaveByteOffset);
527  Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
528  Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
529  Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX);
530  Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY);
531  Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
532
533  if (Any)
534    return AI;
535
536  return None;
537}
538
539yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
540  const llvm::SIMachineFunctionInfo& MFI,
541  const TargetRegisterInfo &TRI)
542  : ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
543    MaxKernArgAlign(MFI.getMaxKernArgAlign()),
544    LDSSize(MFI.getLDSSize()),
545    IsEntryFunction(MFI.isEntryFunction()),
546    NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
547    MemoryBound(MFI.isMemoryBound()),
548    WaveLimiter(MFI.needsWaveLimiter()),
549    HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
550    ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
551    FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
552    StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
553    ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
554    Mode(MFI.getMode()) {}
555
556void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
557  MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this);
558}
559
560bool SIMachineFunctionInfo::initializeBaseYamlFields(
561  const yaml::SIMachineFunctionInfo &YamlMFI) {
562  ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
563  MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign);
564  LDSSize = YamlMFI.LDSSize;
565  HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
566  IsEntryFunction = YamlMFI.IsEntryFunction;
567  NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
568  MemoryBound = YamlMFI.MemoryBound;
569  WaveLimiter = YamlMFI.WaveLimiter;
570  return false;
571}
572
573// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
574bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
575                                                   MachineFunction &MF) {
576  for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
577    if (i->VGPR == ReservedVGPR) {
578      SpillVGPRs.erase(i);
579
580      for (MachineBasicBlock &MBB : MF) {
581        MBB.removeLiveIn(ReservedVGPR);
582        MBB.sortUniqueLiveIns();
583      }
584      this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
585      return true;
586    }
587  }
588  return false;
589}
590