1//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//===----------------------------------------------------------------------===//
10
11
12#include "SIMachineFunctionInfo.h"
13#include "AMDGPUSubtarget.h"
14#include "SIInstrInfo.h"
15#include "llvm/CodeGen/MachineInstrBuilder.h"
16#include "llvm/CodeGen/MachineFrameInfo.h"
17#include "llvm/CodeGen/MachineRegisterInfo.h"
18#include "llvm/IR/Function.h"
19#include "llvm/IR/LLVMContext.h"
20
21#define MAX_LANES 64
22
23using namespace llvm;
24
25
26// Pin the vtable to this file.
27void SIMachineFunctionInfo::anchor() {}
28
29SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
30  : AMDGPUMachineFunction(MF),
31    TIDReg(AMDGPU::NoRegister),
32    ScratchRSrcReg(AMDGPU::NoRegister),
33    ScratchWaveOffsetReg(AMDGPU::NoRegister),
34    PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
35    DispatchPtrUserSGPR(AMDGPU::NoRegister),
36    QueuePtrUserSGPR(AMDGPU::NoRegister),
37    KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
38    DispatchIDUserSGPR(AMDGPU::NoRegister),
39    FlatScratchInitUserSGPR(AMDGPU::NoRegister),
40    PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
41    GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
42    GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
43    GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
44    WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
45    WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
46    WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
47    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
48    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
49    PSInputAddr(0),
50    ReturnsVoid(true),
51    LDSWaveSpillSize(0),
52    PSInputEna(0),
53    NumUserSGPRs(0),
54    NumSystemSGPRs(0),
55    HasSpilledSGPRs(false),
56    HasSpilledVGPRs(false),
57    PrivateSegmentBuffer(false),
58    DispatchPtr(false),
59    QueuePtr(false),
60    DispatchID(false),
61    KernargSegmentPtr(false),
62    FlatScratchInit(false),
63    GridWorkgroupCountX(false),
64    GridWorkgroupCountY(false),
65    GridWorkgroupCountZ(false),
66    WorkGroupIDX(true),
67    WorkGroupIDY(false),
68    WorkGroupIDZ(false),
69    WorkGroupInfo(false),
70    PrivateSegmentWaveByteOffset(false),
71    WorkItemIDX(true),
72    WorkItemIDY(false),
73    WorkItemIDZ(false) {
74  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
75  const Function *F = MF.getFunction();
76
77  PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
78
79  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
80
81  if (getShaderType() == ShaderType::COMPUTE)
82    KernargSegmentPtr = true;
83
84  if (F->hasFnAttribute("amdgpu-work-group-id-y"))
85    WorkGroupIDY = true;
86
87  if (F->hasFnAttribute("amdgpu-work-group-id-z"))
88    WorkGroupIDZ = true;
89
90  if (F->hasFnAttribute("amdgpu-work-item-id-y"))
91    WorkItemIDY = true;
92
93  if (F->hasFnAttribute("amdgpu-work-item-id-z"))
94    WorkItemIDZ = true;
95
96  bool MaySpill = ST.isVGPRSpillingEnabled(this);
97  bool HasStackObjects = FrameInfo->hasStackObjects();
98
99  if (HasStackObjects || MaySpill)
100    PrivateSegmentWaveByteOffset = true;
101
102  if (ST.isAmdHsaOS()) {
103    if (HasStackObjects || MaySpill)
104      PrivateSegmentBuffer = true;
105
106    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
107      DispatchPtr = true;
108  }
109
110  // X, XY, and XYZ are the only supported combinations, so make sure Y is
111  // enabled if Z is.
112  if (WorkItemIDZ)
113    WorkItemIDY = true;
114}
115
116unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
117  const SIRegisterInfo &TRI) {
118  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
119    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
120  NumUserSGPRs += 4;
121  return PrivateSegmentBufferUserSGPR;
122}
123
124unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
125  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
126    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
127  NumUserSGPRs += 2;
128  return DispatchPtrUserSGPR;
129}
130
131unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
132  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
133    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
134  NumUserSGPRs += 2;
135  return QueuePtrUserSGPR;
136}
137
138unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
139  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
140    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
141  NumUserSGPRs += 2;
142  return KernargSegmentPtrUserSGPR;
143}
144
145SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
146                                                       MachineFunction *MF,
147                                                       unsigned FrameIndex,
148                                                       unsigned SubIdx) {
149  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
150  const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
151      MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
152  MachineRegisterInfo &MRI = MF->getRegInfo();
153  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
154  Offset += SubIdx * 4;
155
156  unsigned LaneVGPRIdx = Offset / (64 * 4);
157  unsigned Lane = (Offset / 4) % 64;
158
159  struct SpilledReg Spill;
160
161  if (!LaneVGPRs.count(LaneVGPRIdx)) {
162    unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
163
164    if (LaneVGPR == AMDGPU::NoRegister) {
165      LLVMContext &Ctx = MF->getFunction()->getContext();
166      Ctx.emitError("Ran out of VGPRs for spilling SGPR");
167
168      // When compiling from inside Mesa, the compilation continues.
169      // Select an arbitrary register to avoid triggering assertions
170      // during subsequent passes.
171      LaneVGPR = AMDGPU::VGPR0;
172    }
173
174    LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
175
176    // Add this register as live-in to all blocks to avoid machine verifer
177    // complaining about use of an undefined physical register.
178    for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
179         BI != BE; ++BI) {
180      BI->addLiveIn(LaneVGPR);
181    }
182  }
183
184  Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
185  Spill.Lane = Lane;
186  return Spill;
187}
188
189unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
190                                              const MachineFunction &MF) const {
191  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
192  // FIXME: We should get this information from kernel attributes if it
193  // is available.
194  return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize();
195}
196