190618Stmm//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
290618Stmm//
390618Stmm// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
490618Stmm// See https://llvm.org/LICENSE.txt for license information.
590618Stmm// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
690618Stmm//
790618Stmm//===----------------------------------------------------------------------===//
890618Stmm//
990618Stmm/// \file
1090618Stmm//
1190618Stmm//===----------------------------------------------------------------------===//
1290618Stmm
1390618Stmm#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
1490618Stmm#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
1590618Stmm
1690618Stmm#include "AMDGPUArgumentUsageInfo.h"
1790618Stmm#include "AMDGPUMachineFunction.h"
1890618Stmm#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1990618Stmm#include "SIInstrInfo.h"
2090618Stmm#include "SIRegisterInfo.h"
2190618Stmm#include "llvm/ADT/ArrayRef.h"
2290618Stmm#include "llvm/ADT/DenseMap.h"
2390618Stmm#include "llvm/ADT/Optional.h"
2490618Stmm#include "llvm/ADT/STLExtras.h"
2590618Stmm#include "llvm/ADT/SmallVector.h"
2690618Stmm#include "llvm/ADT/SparseBitVector.h"
2790618Stmm#include "llvm/CodeGen/MIRYamlMapping.h"
2890618Stmm#include "llvm/CodeGen/PseudoSourceValue.h"
2990618Stmm#include "llvm/CodeGen/TargetInstrInfo.h"
3090618Stmm#include "llvm/MC/MCRegisterInfo.h"
3190618Stmm#include "llvm/Support/ErrorHandling.h"
3290618Stmm#include <array>
3390618Stmm#include <cassert>
3490618Stmm#include <utility>
3590618Stmm#include <vector>
3690618Stmm
3790618Stmmnamespace llvm {
3890618Stmm
3990618Stmmclass MachineFrameInfo;
4090618Stmmclass MachineFunction;
4190618Stmmclass TargetRegisterClass;
4290618Stmm
4390618Stmmclass AMDGPUPseudoSourceValue : public PseudoSourceValue {
4490618Stmmpublic:
4590618Stmm  enum AMDGPUPSVKind : unsigned {
4690618Stmm    PSVBuffer = PseudoSourceValue::TargetCustom,
4790618Stmm    PSVImage,
4890618Stmm    GWSResource
4990618Stmm  };
5090618Stmm
5190618Stmmprotected:
5290618Stmm  AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
5390618Stmm      : PseudoSourceValue(Kind, TII) {}
5490618Stmm
5590618Stmmpublic:
5690618Stmm  bool isConstant(const MachineFrameInfo *) const override {
5790618Stmm    // This should probably be true for most images, but we will start by being
5890618Stmm    // conservative.
5990618Stmm    return false;
6090618Stmm  }
6190618Stmm
6290618Stmm  bool isAliased(const MachineFrameInfo *) const override {
6390618Stmm    return true;
6490618Stmm  }
6590618Stmm
6690618Stmm  bool mayAlias(const MachineFrameInfo *) const override {
6790618Stmm    return true;
6890618Stmm  }
6990618Stmm};
7090618Stmm
7190618Stmmclass AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
7290618Stmmpublic:
7390618Stmm  explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
7490618Stmm      : AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
7590618Stmm
7690618Stmm  static bool classof(const PseudoSourceValue *V) {
7790618Stmm    return V->kind() == PSVBuffer;
7890618Stmm  }
7990618Stmm};
8090618Stmm
8190618Stmmclass AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
8290618Stmmpublic:
8390618Stmm  // TODO: Is the img rsrc useful?
8490618Stmm  explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
8590618Stmm      : AMDGPUPseudoSourceValue(PSVImage, TII) {}
8690618Stmm
8790618Stmm  static bool classof(const PseudoSourceValue *V) {
8890618Stmm    return V->kind() == PSVImage;
8990618Stmm  }
9090618Stmm};
9190618Stmm
9290618Stmmclass AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
9390618Stmmpublic:
9490618Stmm  explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
9590618Stmm      : AMDGPUPseudoSourceValue(GWSResource, TII) {}
9690618Stmm
9790618Stmm  static bool classof(const PseudoSourceValue *V) {
9890618Stmm    return V->kind() == GWSResource;
9990618Stmm  }
10090618Stmm
10190618Stmm  // These are inaccessible memory from IR.
10290618Stmm  bool isAliased(const MachineFrameInfo *) const override {
10390618Stmm    return false;
10490618Stmm  }
10590618Stmm
10690618Stmm  // These are inaccessible memory from IR.
10790618Stmm  bool mayAlias(const MachineFrameInfo *) const override {
10890618Stmm    return false;
10990618Stmm  }
11090618Stmm
11190618Stmm  void printCustom(raw_ostream &OS) const override {
11290618Stmm    OS << "GWSResource";
11390618Stmm  }
11490618Stmm};
11590618Stmm
11690618Stmmnamespace yaml {
11790618Stmm
11890618Stmmstruct SIArgument {
11990618Stmm  bool IsRegister;
12090618Stmm  union {
12190618Stmm    StringValue RegisterName;
12290618Stmm    unsigned StackOffset;
12390618Stmm  };
12490618Stmm  Optional<unsigned> Mask;
12590618Stmm
12690618Stmm  // Default constructor, which creates a stack argument.
12790618Stmm  SIArgument() : IsRegister(false), StackOffset(0) {}
12890618Stmm  SIArgument(const SIArgument &Other) {
12990618Stmm    IsRegister = Other.IsRegister;
13090618Stmm    if (IsRegister) {
13190618Stmm      ::new ((void *)std::addressof(RegisterName))
13290618Stmm          StringValue(Other.RegisterName);
13390618Stmm    } else
13490618Stmm      StackOffset = Other.StackOffset;
13590618Stmm    Mask = Other.Mask;
13690618Stmm  }
13790618Stmm  SIArgument &operator=(const SIArgument &Other) {
13890618Stmm    IsRegister = Other.IsRegister;
13990618Stmm    if (IsRegister) {
14090618Stmm      ::new ((void *)std::addressof(RegisterName))
141          StringValue(Other.RegisterName);
142    } else
143      StackOffset = Other.StackOffset;
144    Mask = Other.Mask;
145    return *this;
146  }
147  ~SIArgument() {
148    if (IsRegister)
149      RegisterName.~StringValue();
150  }
151
152  // Helper to create a register or stack argument.
153  static inline SIArgument createArgument(bool IsReg) {
154    if (IsReg)
155      return SIArgument(IsReg);
156    return SIArgument();
157  }
158
159private:
160  // Construct a register argument.
161  SIArgument(bool) : IsRegister(true), RegisterName() {}
162};
163
164template <> struct MappingTraits<SIArgument> {
165  static void mapping(IO &YamlIO, SIArgument &A) {
166    if (YamlIO.outputting()) {
167      if (A.IsRegister)
168        YamlIO.mapRequired("reg", A.RegisterName);
169      else
170        YamlIO.mapRequired("offset", A.StackOffset);
171    } else {
172      auto Keys = YamlIO.keys();
173      if (is_contained(Keys, "reg")) {
174        A = SIArgument::createArgument(true);
175        YamlIO.mapRequired("reg", A.RegisterName);
176      } else if (is_contained(Keys, "offset"))
177        YamlIO.mapRequired("offset", A.StackOffset);
178      else
179        YamlIO.setError("missing required key 'reg' or 'offset'");
180    }
181    YamlIO.mapOptional("mask", A.Mask);
182  }
183  static const bool flow = true;
184};
185
186struct SIArgumentInfo {
187  Optional<SIArgument> PrivateSegmentBuffer;
188  Optional<SIArgument> DispatchPtr;
189  Optional<SIArgument> QueuePtr;
190  Optional<SIArgument> KernargSegmentPtr;
191  Optional<SIArgument> DispatchID;
192  Optional<SIArgument> FlatScratchInit;
193  Optional<SIArgument> PrivateSegmentSize;
194
195  Optional<SIArgument> WorkGroupIDX;
196  Optional<SIArgument> WorkGroupIDY;
197  Optional<SIArgument> WorkGroupIDZ;
198  Optional<SIArgument> WorkGroupInfo;
199  Optional<SIArgument> PrivateSegmentWaveByteOffset;
200
201  Optional<SIArgument> ImplicitArgPtr;
202  Optional<SIArgument> ImplicitBufferPtr;
203
204  Optional<SIArgument> WorkItemIDX;
205  Optional<SIArgument> WorkItemIDY;
206  Optional<SIArgument> WorkItemIDZ;
207};
208
209template <> struct MappingTraits<SIArgumentInfo> {
210  static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
211    YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
212    YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
213    YamlIO.mapOptional("queuePtr", AI.QueuePtr);
214    YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
215    YamlIO.mapOptional("dispatchID", AI.DispatchID);
216    YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
217    YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
218
219    YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
220    YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
221    YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
222    YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
223    YamlIO.mapOptional("privateSegmentWaveByteOffset",
224                       AI.PrivateSegmentWaveByteOffset);
225
226    YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
227    YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
228
229    YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
230    YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
231    YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
232  }
233};
234
235// Default to default mode for default calling convention.
236struct SIMode {
237  bool IEEE = true;
238  bool DX10Clamp = true;
239  bool FP32Denormals = true;
240  bool FP64FP16Denormals = true;
241
242  SIMode() = default;
243
244  SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
245    IEEE = Mode.IEEE;
246    DX10Clamp = Mode.DX10Clamp;
247    FP32Denormals = Mode.FP32Denormals;
248    FP64FP16Denormals = Mode.FP64FP16Denormals;
249  }
250
251  bool operator ==(const SIMode Other) const {
252    return IEEE == Other.IEEE &&
253           DX10Clamp == Other.DX10Clamp &&
254           FP32Denormals == Other.FP32Denormals &&
255           FP64FP16Denormals == Other.FP64FP16Denormals;
256  }
257};
258
259template <> struct MappingTraits<SIMode> {
260  static void mapping(IO &YamlIO, SIMode &Mode) {
261    YamlIO.mapOptional("ieee", Mode.IEEE, true);
262    YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
263    YamlIO.mapOptional("fp32-denormals", Mode.FP32Denormals, true);
264    YamlIO.mapOptional("fp64-fp16-denormals", Mode.FP64FP16Denormals, true);
265  }
266};
267
268struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
269  uint64_t ExplicitKernArgSize = 0;
270  unsigned MaxKernArgAlign = 0;
271  unsigned LDSSize = 0;
272  bool IsEntryFunction = false;
273  bool NoSignedZerosFPMath = false;
274  bool MemoryBound = false;
275  bool WaveLimiter = false;
276  uint32_t HighBitsOf32BitAddress = 0;
277
278  StringValue ScratchRSrcReg = "$private_rsrc_reg";
279  StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
280  StringValue FrameOffsetReg = "$fp_reg";
281  StringValue StackPtrOffsetReg = "$sp_reg";
282
283  Optional<SIArgumentInfo> ArgInfo;
284  SIMode Mode;
285
286  SIMachineFunctionInfo() = default;
287  SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
288                        const TargetRegisterInfo &TRI);
289
290  void mappingImpl(yaml::IO &YamlIO) override;
291  ~SIMachineFunctionInfo() = default;
292};
293
294template <> struct MappingTraits<SIMachineFunctionInfo> {
295  static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
296    YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
297                       UINT64_C(0));
298    YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
299    YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
300    YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
301    YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
302    YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
303    YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
304    YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
305                       StringValue("$private_rsrc_reg"));
306    YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
307                       StringValue("$scratch_wave_offset_reg"));
308    YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
309                       StringValue("$fp_reg"));
310    YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
311                       StringValue("$sp_reg"));
312    YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
313    YamlIO.mapOptional("mode", MFI.Mode, SIMode());
314    YamlIO.mapOptional("highBitsOf32BitAddress",
315                       MFI.HighBitsOf32BitAddress, 0u);
316  }
317};
318
319} // end namespace yaml
320
321/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
322/// tells the hardware which interpolation parameters to load.
323class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
324  friend class GCNTargetMachine;
325
326  unsigned TIDReg = AMDGPU::NoRegister;
327
328  // Registers that may be reserved for spilling purposes. These may be the same
329  // as the input registers.
330  unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
331  unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
332
333  // This is the current function's incremented size from the kernel's scratch
334  // wave offset register. For an entry function, this is exactly the same as
335  // the ScratchWaveOffsetReg.
336  unsigned FrameOffsetReg = AMDGPU::FP_REG;
337
338  // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
339  unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
340
341  AMDGPUFunctionArgInfo ArgInfo;
342
343  // Graphics info.
344  unsigned PSInputAddr = 0;
345  unsigned PSInputEnable = 0;
346
347  /// Number of bytes of arguments this function has on the stack. If the callee
348  /// is expected to restore the argument stack this should be a multiple of 16,
349  /// all usable during a tail call.
350  ///
351  /// The alternative would forbid tail call optimisation in some cases: if we
352  /// want to transfer control from a function with 8-bytes of stack-argument
353  /// space to a function with 16-bytes then misalignment of this value would
354  /// make a stack adjustment necessary, which could not be undone by the
355  /// callee.
356  unsigned BytesInStackArgArea = 0;
357
358  bool ReturnsVoid = true;
359
360  // A pair of default/requested minimum/maximum flat work group sizes.
361  // Minimum - first, maximum - second.
362  std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
363
364  // A pair of default/requested minimum/maximum number of waves per execution
365  // unit. Minimum - first, maximum - second.
366  std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
367
368  DenseMap<const Value *,
369           std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
370  DenseMap<const Value *,
371           std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
372  std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
373
374private:
375  unsigned LDSWaveSpillSize = 0;
376  unsigned NumUserSGPRs = 0;
377  unsigned NumSystemSGPRs = 0;
378
379  bool HasSpilledSGPRs = false;
380  bool HasSpilledVGPRs = false;
381  bool HasNonSpillStackObjects = false;
382  bool IsStackRealigned = false;
383
384  unsigned NumSpilledSGPRs = 0;
385  unsigned NumSpilledVGPRs = 0;
386
387  // Feature bits required for inputs passed in user SGPRs.
388  bool PrivateSegmentBuffer : 1;
389  bool DispatchPtr : 1;
390  bool QueuePtr : 1;
391  bool KernargSegmentPtr : 1;
392  bool DispatchID : 1;
393  bool FlatScratchInit : 1;
394
395  // Feature bits required for inputs passed in system SGPRs.
396  bool WorkGroupIDX : 1; // Always initialized.
397  bool WorkGroupIDY : 1;
398  bool WorkGroupIDZ : 1;
399  bool WorkGroupInfo : 1;
400  bool PrivateSegmentWaveByteOffset : 1;
401
402  bool WorkItemIDX : 1; // Always initialized.
403  bool WorkItemIDY : 1;
404  bool WorkItemIDZ : 1;
405
406  // Private memory buffer
407  // Compute directly in sgpr[0:1]
408  // Other shaders indirect 64-bits at sgpr[0:1]
409  bool ImplicitBufferPtr : 1;
410
411  // Pointer to where the ABI inserts special kernel arguments separate from the
412  // user arguments. This is an offset from the KernargSegmentPtr.
413  bool ImplicitArgPtr : 1;
414
415  // The hard-wired high half of the address of the global information table
416  // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
417  // current hardware only allows a 16 bit value.
418  unsigned GITPtrHigh;
419
420  unsigned HighBitsOf32BitAddress;
421  unsigned GDSSize;
422
423  // Current recorded maximum possible occupancy.
424  unsigned Occupancy;
425
426  MCPhysReg getNextUserSGPR() const;
427
428  MCPhysReg getNextSystemSGPR() const;
429
430public:
431  struct SpilledReg {
432    unsigned VGPR = 0;
433    int Lane = -1;
434
435    SpilledReg() = default;
436    SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
437
438    bool hasLane() { return Lane != -1;}
439    bool hasReg() { return VGPR != 0;}
440  };
441
442  struct SGPRSpillVGPRCSR {
443    // VGPR used for SGPR spills
444    unsigned VGPR;
445
446    // If the VGPR is a CSR, the stack slot used to save/restore it in the
447    // prolog/epilog.
448    Optional<int> FI;
449
450    SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
451  };
452
453  struct VGPRSpillToAGPR {
454    SmallVector<MCPhysReg, 32> Lanes;
455    bool FullyAllocated = false;
456  };
457
458  SparseBitVector<> WWMReservedRegs;
459
460  void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
461
462private:
463  // SGPR->VGPR spilling support.
464  using SpillRegMask = std::pair<unsigned, unsigned>;
465
466  // Track VGPR + wave index for each subregister of the SGPR spilled to
467  // frameindex key.
468  DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
469  unsigned NumVGPRSpillLanes = 0;
470  SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
471
472  DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
473
474  // AGPRs used for VGPR spills.
475  SmallVector<MCPhysReg, 32> SpillAGPR;
476
477  // VGPRs used for AGPR spills.
478  SmallVector<MCPhysReg, 32> SpillVGPR;
479
480public: // FIXME
481  /// If this is set, an SGPR used for save/restore of the register used for the
482  /// frame pointer.
483  unsigned SGPRForFPSaveRestoreCopy = 0;
484  Optional<int> FramePointerSaveIndex;
485
486public:
487  SIMachineFunctionInfo(const MachineFunction &MF);
488
489  bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
490
491  ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
492    auto I = SGPRToVGPRSpills.find(FrameIndex);
493    return (I == SGPRToVGPRSpills.end()) ?
494      ArrayRef<SpilledReg>() : makeArrayRef(I->second);
495  }
496
497  ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
498    return SpillVGPRs;
499  }
500
501  ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
502    return SpillAGPR;
503  }
504
505  ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
506    return SpillVGPR;
507  }
508
509  MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
510    auto I = VGPRToAGPRSpills.find(FrameIndex);
511    return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
512                                         : I->second.Lanes[Lane];
513  }
514
515  bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
516                                 unsigned NumLane) const;
517  bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
518  bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
519  void removeDeadFrameIndices(MachineFrameInfo &MFI);
520
521  bool hasCalculatedTID() const { return TIDReg != 0; };
522  unsigned getTIDReg() const { return TIDReg; };
523  void setTIDReg(unsigned Reg) { TIDReg = Reg; }
524
525  unsigned getBytesInStackArgArea() const {
526    return BytesInStackArgArea;
527  }
528
529  void setBytesInStackArgArea(unsigned Bytes) {
530    BytesInStackArgArea = Bytes;
531  }
532
533  // Add user SGPRs.
534  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
535  unsigned addDispatchPtr(const SIRegisterInfo &TRI);
536  unsigned addQueuePtr(const SIRegisterInfo &TRI);
537  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
538  unsigned addDispatchID(const SIRegisterInfo &TRI);
539  unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
540  unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
541
542  // Add system SGPRs.
543  unsigned addWorkGroupIDX() {
544    ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
545    NumSystemSGPRs += 1;
546    return ArgInfo.WorkGroupIDX.getRegister();
547  }
548
549  unsigned addWorkGroupIDY() {
550    ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
551    NumSystemSGPRs += 1;
552    return ArgInfo.WorkGroupIDY.getRegister();
553  }
554
555  unsigned addWorkGroupIDZ() {
556    ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
557    NumSystemSGPRs += 1;
558    return ArgInfo.WorkGroupIDZ.getRegister();
559  }
560
561  unsigned addWorkGroupInfo() {
562    ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
563    NumSystemSGPRs += 1;
564    return ArgInfo.WorkGroupInfo.getRegister();
565  }
566
567  // Add special VGPR inputs
568  void setWorkItemIDX(ArgDescriptor Arg) {
569    ArgInfo.WorkItemIDX = Arg;
570  }
571
572  void setWorkItemIDY(ArgDescriptor Arg) {
573    ArgInfo.WorkItemIDY = Arg;
574  }
575
576  void setWorkItemIDZ(ArgDescriptor Arg) {
577    ArgInfo.WorkItemIDZ = Arg;
578  }
579
580  unsigned addPrivateSegmentWaveByteOffset() {
581    ArgInfo.PrivateSegmentWaveByteOffset
582      = ArgDescriptor::createRegister(getNextSystemSGPR());
583    NumSystemSGPRs += 1;
584    return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
585  }
586
587  void setPrivateSegmentWaveByteOffset(unsigned Reg) {
588    ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
589  }
590
591  bool hasPrivateSegmentBuffer() const {
592    return PrivateSegmentBuffer;
593  }
594
595  bool hasDispatchPtr() const {
596    return DispatchPtr;
597  }
598
599  bool hasQueuePtr() const {
600    return QueuePtr;
601  }
602
603  bool hasKernargSegmentPtr() const {
604    return KernargSegmentPtr;
605  }
606
607  bool hasDispatchID() const {
608    return DispatchID;
609  }
610
611  bool hasFlatScratchInit() const {
612    return FlatScratchInit;
613  }
614
615  bool hasWorkGroupIDX() const {
616    return WorkGroupIDX;
617  }
618
619  bool hasWorkGroupIDY() const {
620    return WorkGroupIDY;
621  }
622
623  bool hasWorkGroupIDZ() const {
624    return WorkGroupIDZ;
625  }
626
627  bool hasWorkGroupInfo() const {
628    return WorkGroupInfo;
629  }
630
631  bool hasPrivateSegmentWaveByteOffset() const {
632    return PrivateSegmentWaveByteOffset;
633  }
634
635  bool hasWorkItemIDX() const {
636    return WorkItemIDX;
637  }
638
639  bool hasWorkItemIDY() const {
640    return WorkItemIDY;
641  }
642
643  bool hasWorkItemIDZ() const {
644    return WorkItemIDZ;
645  }
646
647  bool hasImplicitArgPtr() const {
648    return ImplicitArgPtr;
649  }
650
651  bool hasImplicitBufferPtr() const {
652    return ImplicitBufferPtr;
653  }
654
655  AMDGPUFunctionArgInfo &getArgInfo() {
656    return ArgInfo;
657  }
658
659  const AMDGPUFunctionArgInfo &getArgInfo() const {
660    return ArgInfo;
661  }
662
663  std::pair<const ArgDescriptor *, const TargetRegisterClass *>
664  getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
665    return ArgInfo.getPreloadedValue(Value);
666  }
667
668  Register getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
669    auto Arg = ArgInfo.getPreloadedValue(Value).first;
670    return Arg ? Arg->getRegister() : Register();
671  }
672
673  unsigned getGITPtrHigh() const {
674    return GITPtrHigh;
675  }
676
677  uint32_t get32BitAddressHighBits() const {
678    return HighBitsOf32BitAddress;
679  }
680
681  unsigned getGDSSize() const {
682    return GDSSize;
683  }
684
685  unsigned getNumUserSGPRs() const {
686    return NumUserSGPRs;
687  }
688
689  unsigned getNumPreloadedSGPRs() const {
690    return NumUserSGPRs + NumSystemSGPRs;
691  }
692
693  unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
694    return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
695  }
696
697  /// Returns the physical register reserved for use as the resource
698  /// descriptor for scratch accesses.
699  unsigned getScratchRSrcReg() const {
700    return ScratchRSrcReg;
701  }
702
703  void setScratchRSrcReg(unsigned Reg) {
704    assert(Reg != 0 && "Should never be unset");
705    ScratchRSrcReg = Reg;
706  }
707
708  unsigned getScratchWaveOffsetReg() const {
709    return ScratchWaveOffsetReg;
710  }
711
712  unsigned getFrameOffsetReg() const {
713    return FrameOffsetReg;
714  }
715
716  void setFrameOffsetReg(unsigned Reg) {
717    assert(Reg != 0 && "Should never be unset");
718    FrameOffsetReg = Reg;
719  }
720
721  void setStackPtrOffsetReg(unsigned Reg) {
722    assert(Reg != 0 && "Should never be unset");
723    StackPtrOffsetReg = Reg;
724  }
725
726  // Note the unset value for this is AMDGPU::SP_REG rather than
727  // NoRegister. This is mostly a workaround for MIR tests where state that
728  // can't be directly computed from the function is not preserved in serialized
729  // MIR.
730  unsigned getStackPtrOffsetReg() const {
731    return StackPtrOffsetReg;
732  }
733
734  void setScratchWaveOffsetReg(unsigned Reg) {
735    assert(Reg != 0 && "Should never be unset");
736    ScratchWaveOffsetReg = Reg;
737  }
738
739  unsigned getQueuePtrUserSGPR() const {
740    return ArgInfo.QueuePtr.getRegister();
741  }
742
743  unsigned getImplicitBufferPtrUserSGPR() const {
744    return ArgInfo.ImplicitBufferPtr.getRegister();
745  }
746
747  bool hasSpilledSGPRs() const {
748    return HasSpilledSGPRs;
749  }
750
751  void setHasSpilledSGPRs(bool Spill = true) {
752    HasSpilledSGPRs = Spill;
753  }
754
755  bool hasSpilledVGPRs() const {
756    return HasSpilledVGPRs;
757  }
758
759  void setHasSpilledVGPRs(bool Spill = true) {
760    HasSpilledVGPRs = Spill;
761  }
762
763  bool hasNonSpillStackObjects() const {
764    return HasNonSpillStackObjects;
765  }
766
767  void setHasNonSpillStackObjects(bool StackObject = true) {
768    HasNonSpillStackObjects = StackObject;
769  }
770
771  bool isStackRealigned() const {
772    return IsStackRealigned;
773  }
774
775  void setIsStackRealigned(bool Realigned = true) {
776    IsStackRealigned = Realigned;
777  }
778
779  unsigned getNumSpilledSGPRs() const {
780    return NumSpilledSGPRs;
781  }
782
783  unsigned getNumSpilledVGPRs() const {
784    return NumSpilledVGPRs;
785  }
786
787  void addToSpilledSGPRs(unsigned num) {
788    NumSpilledSGPRs += num;
789  }
790
791  void addToSpilledVGPRs(unsigned num) {
792    NumSpilledVGPRs += num;
793  }
794
795  unsigned getPSInputAddr() const {
796    return PSInputAddr;
797  }
798
799  unsigned getPSInputEnable() const {
800    return PSInputEnable;
801  }
802
803  bool isPSInputAllocated(unsigned Index) const {
804    return PSInputAddr & (1 << Index);
805  }
806
807  void markPSInputAllocated(unsigned Index) {
808    PSInputAddr |= 1 << Index;
809  }
810
811  void markPSInputEnabled(unsigned Index) {
812    PSInputEnable |= 1 << Index;
813  }
814
815  bool returnsVoid() const {
816    return ReturnsVoid;
817  }
818
819  void setIfReturnsVoid(bool Value) {
820    ReturnsVoid = Value;
821  }
822
823  /// \returns A pair of default/requested minimum/maximum flat work group sizes
824  /// for this function.
825  std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
826    return FlatWorkGroupSizes;
827  }
828
829  /// \returns Default/requested minimum flat work group size for this function.
830  unsigned getMinFlatWorkGroupSize() const {
831    return FlatWorkGroupSizes.first;
832  }
833
834  /// \returns Default/requested maximum flat work group size for this function.
835  unsigned getMaxFlatWorkGroupSize() const {
836    return FlatWorkGroupSizes.second;
837  }
838
839  /// \returns A pair of default/requested minimum/maximum number of waves per
840  /// execution unit.
841  std::pair<unsigned, unsigned> getWavesPerEU() const {
842    return WavesPerEU;
843  }
844
845  /// \returns Default/requested minimum number of waves per execution unit.
846  unsigned getMinWavesPerEU() const {
847    return WavesPerEU.first;
848  }
849
850  /// \returns Default/requested maximum number of waves per execution unit.
851  unsigned getMaxWavesPerEU() const {
852    return WavesPerEU.second;
853  }
854
855  /// \returns SGPR used for \p Dim's work group ID.
856  unsigned getWorkGroupIDSGPR(unsigned Dim) const {
857    switch (Dim) {
858    case 0:
859      assert(hasWorkGroupIDX());
860      return ArgInfo.WorkGroupIDX.getRegister();
861    case 1:
862      assert(hasWorkGroupIDY());
863      return ArgInfo.WorkGroupIDY.getRegister();
864    case 2:
865      assert(hasWorkGroupIDZ());
866      return ArgInfo.WorkGroupIDZ.getRegister();
867    }
868    llvm_unreachable("unexpected dimension");
869  }
870
871  unsigned getLDSWaveSpillSize() const {
872    return LDSWaveSpillSize;
873  }
874
875  const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
876                                                    const Value *BufferRsrc) {
877    assert(BufferRsrc);
878    auto PSV = BufferPSVs.try_emplace(
879      BufferRsrc,
880      std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
881    return PSV.first->second.get();
882  }
883
884  const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
885                                                  const Value *ImgRsrc) {
886    assert(ImgRsrc);
887    auto PSV = ImagePSVs.try_emplace(
888      ImgRsrc,
889      std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
890    return PSV.first->second.get();
891  }
892
893  const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
894    if (!GWSResourcePSV) {
895      GWSResourcePSV =
896          std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
897    }
898
899    return GWSResourcePSV.get();
900  }
901
902  unsigned getOccupancy() const {
903    return Occupancy;
904  }
905
906  unsigned getMinAllowedOccupancy() const {
907    if (!isMemoryBound() && !needsWaveLimiter())
908      return Occupancy;
909    return (Occupancy < 4) ? Occupancy : 4;
910  }
911
912  void limitOccupancy(const MachineFunction &MF);
913
914  void limitOccupancy(unsigned Limit) {
915    if (Occupancy > Limit)
916      Occupancy = Limit;
917  }
918
919  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
920    if (Occupancy < Limit)
921      Occupancy = Limit;
922    limitOccupancy(MF);
923  }
924};
925
926} // end namespace llvm
927
928#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
929