1284677Sdim//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
2284677Sdim//
3284677Sdim//                     The LLVM Compiler Infrastructure
4284677Sdim//
5284677Sdim// This file is distributed under the University of Illinois Open Source
6284677Sdim// License. See LICENSE.TXT for details.
7284677Sdim//
8284677Sdim/// \file
9284677Sdim//===----------------------------------------------------------------------===//
10284677Sdim
11284677Sdim
12284677Sdim#include "SIMachineFunctionInfo.h"
13284677Sdim#include "AMDGPUSubtarget.h"
14284677Sdim#include "SIInstrInfo.h"
15284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
16284677Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
17284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
18284677Sdim#include "llvm/IR/Function.h"
19284677Sdim#include "llvm/IR/LLVMContext.h"
20284677Sdim
21284677Sdim#define MAX_LANES 64
22284677Sdim
23284677Sdimusing namespace llvm;
24284677Sdim
25284677Sdim
26284677Sdim// Pin the vtable to this file.
27284677Sdimvoid SIMachineFunctionInfo::anchor() {}
28284677Sdim
29284677SdimSIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30284677Sdim  : AMDGPUMachineFunction(MF),
31284677Sdim    TIDReg(AMDGPU::NoRegister),
32296417Sdim    ScratchRSrcReg(AMDGPU::NoRegister),
33296417Sdim    ScratchWaveOffsetReg(AMDGPU::NoRegister),
34296417Sdim    PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35296417Sdim    DispatchPtrUserSGPR(AMDGPU::NoRegister),
36296417Sdim    QueuePtrUserSGPR(AMDGPU::NoRegister),
37296417Sdim    KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38296417Sdim    DispatchIDUserSGPR(AMDGPU::NoRegister),
39296417Sdim    FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40296417Sdim    PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41296417Sdim    GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42296417Sdim    GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43296417Sdim    GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44296417Sdim    WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45296417Sdim    WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46296417Sdim    WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47296417Sdim    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48296417Sdim    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
49284677Sdim    PSInputAddr(0),
50296417Sdim    ReturnsVoid(true),
51296417Sdim    LDSWaveSpillSize(0),
52296417Sdim    PSInputEna(0),
53284677Sdim    NumUserSGPRs(0),
54296417Sdim    NumSystemSGPRs(0),
55296417Sdim    HasSpilledSGPRs(false),
56296417Sdim    HasSpilledVGPRs(false),
57296417Sdim    PrivateSegmentBuffer(false),
58296417Sdim    DispatchPtr(false),
59296417Sdim    QueuePtr(false),
60296417Sdim    DispatchID(false),
61296417Sdim    KernargSegmentPtr(false),
62296417Sdim    FlatScratchInit(false),
63296417Sdim    GridWorkgroupCountX(false),
64296417Sdim    GridWorkgroupCountY(false),
65296417Sdim    GridWorkgroupCountZ(false),
66296417Sdim    WorkGroupIDX(true),
67296417Sdim    WorkGroupIDY(false),
68296417Sdim    WorkGroupIDZ(false),
69296417Sdim    WorkGroupInfo(false),
70296417Sdim    PrivateSegmentWaveByteOffset(false),
71296417Sdim    WorkItemIDX(true),
72296417Sdim    WorkItemIDY(false),
73296417Sdim    WorkItemIDZ(false) {
74296417Sdim  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
75296417Sdim  const Function *F = MF.getFunction();
76284677Sdim
77296417Sdim  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
78296417Sdim
79296417Sdim  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
80296417Sdim
81296417Sdim  if (getShaderType() == ShaderType::COMPUTE)
82296417Sdim    KernargSegmentPtr = true;
83296417Sdim
84296417Sdim  if (F->hasFnAttribute("amdgpu-work-group-id-y"))
85296417Sdim    WorkGroupIDY = true;
86296417Sdim
87296417Sdim  if (F->hasFnAttribute("amdgpu-work-group-id-z"))
88296417Sdim    WorkGroupIDZ = true;
89296417Sdim
90296417Sdim  if (F->hasFnAttribute("amdgpu-work-item-id-y"))
91296417Sdim    WorkItemIDY = true;
92296417Sdim
93296417Sdim  if (F->hasFnAttribute("amdgpu-work-item-id-z"))
94296417Sdim    WorkItemIDZ = true;
95296417Sdim
96296417Sdim  bool MaySpill = ST.isVGPRSpillingEnabled(this);
97296417Sdim  bool HasStackObjects = FrameInfo->hasStackObjects();
98296417Sdim
99296417Sdim  if (HasStackObjects || MaySpill)
100296417Sdim    PrivateSegmentWaveByteOffset = true;
101296417Sdim
102296417Sdim  if (ST.isAmdHsaOS()) {
103296417Sdim    if (HasStackObjects || MaySpill)
104296417Sdim      PrivateSegmentBuffer = true;
105296417Sdim
106296417Sdim    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
107296417Sdim      DispatchPtr = true;
108296417Sdim  }
109296417Sdim
110296417Sdim  // X, XY, and XYZ are the only supported combinations, so make sure Y is
111296417Sdim  // enabled if Z is.
112296417Sdim  if (WorkItemIDZ)
113296417Sdim    WorkItemIDY = true;
114296417Sdim}
115296417Sdim
116296417Sdimunsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
117296417Sdim  const SIRegisterInfo &TRI) {
118296417Sdim  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
119296417Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
120296417Sdim  NumUserSGPRs += 4;
121296417Sdim  return PrivateSegmentBufferUserSGPR;
122296417Sdim}
123296417Sdim
124296417Sdimunsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
125296417Sdim  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
126296417Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
127296417Sdim  NumUserSGPRs += 2;
128296417Sdim  return DispatchPtrUserSGPR;
129296417Sdim}
130296417Sdim
131296417Sdimunsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
132296417Sdim  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
133296417Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
134296417Sdim  NumUserSGPRs += 2;
135296417Sdim  return QueuePtrUserSGPR;
136296417Sdim}
137296417Sdim
138296417Sdimunsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
139296417Sdim  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
140296417Sdim    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
141296417Sdim  NumUserSGPRs += 2;
142296417Sdim  return KernargSegmentPtrUserSGPR;
143296417Sdim}
144296417Sdim
145284677SdimSIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
146284677Sdim                                                       MachineFunction *MF,
147284677Sdim                                                       unsigned FrameIndex,
148284677Sdim                                                       unsigned SubIdx) {
149284677Sdim  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
150284677Sdim  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
151284677Sdim      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
152284677Sdim  MachineRegisterInfo &MRI = MF->getRegInfo();
153284677Sdim  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
154284677Sdim  Offset += SubIdx * 4;
155284677Sdim
156284677Sdim  unsigned LaneVGPRIdx = Offset / (64 * 4);
157284677Sdim  unsigned Lane = (Offset / 4) % 64;
158284677Sdim
159284677Sdim  struct SpilledReg Spill;
160284677Sdim
161284677Sdim  if (!LaneVGPRs.count(LaneVGPRIdx)) {
162284677Sdim    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
163296417Sdim
164296417Sdim    if (LaneVGPR == AMDGPU::NoRegister) {
165296417Sdim      LLVMContext &Ctx = MF->getFunction()->getContext();
166296417Sdim      Ctx.emitError("Ran out of VGPRs for spilling SGPR");
167296417Sdim
168296417Sdim      // When compiling from inside Mesa, the compilation continues.
169296417Sdim      // Select an arbitrary register to avoid triggering assertions
170296417Sdim      // during subsequent passes.
171296417Sdim      LaneVGPR = AMDGPU::VGPR0;
172296417Sdim    }
173296417Sdim
174284677Sdim    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
175284677Sdim
176284677Sdim    // Add this register as live-in to all blocks to avoid machine verifer
177284677Sdim    // complaining about use of an undefined physical register.
178284677Sdim    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
179284677Sdim         BI != BE; ++BI) {
180284677Sdim      BI->addLiveIn(LaneVGPR);
181284677Sdim    }
182284677Sdim  }
183284677Sdim
184284677Sdim  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
185284677Sdim  Spill.Lane = Lane;
186284677Sdim  return Spill;
187284677Sdim}
188284677Sdim
189284677Sdimunsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
190284677Sdim                                              const MachineFunction &MF) const {
191284677Sdim  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
192284677Sdim  // FIXME: We should get this information from kernel attributes if it
193284677Sdim  // is available.
194284677Sdim  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
195284677Sdim}
196