1284677Sdim//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// 2284677Sdim// 3284677Sdim// The LLVM Compiler Infrastructure 4284677Sdim// 5284677Sdim// This file is distributed under the University of Illinois Open Source 6284677Sdim// License. See LICENSE.TXT for details. 7284677Sdim// 8284677Sdim/// \file 9284677Sdim//===----------------------------------------------------------------------===// 10284677Sdim 11284677Sdim 12284677Sdim#include "SIMachineFunctionInfo.h" 13284677Sdim#include "AMDGPUSubtarget.h" 14284677Sdim#include "SIInstrInfo.h" 15284677Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 16284677Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 17284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 18284677Sdim#include "llvm/IR/Function.h" 19284677Sdim#include "llvm/IR/LLVMContext.h" 20284677Sdim 21284677Sdim#define MAX_LANES 64 22284677Sdim 23284677Sdimusing namespace llvm; 24284677Sdim 25284677Sdim 26284677Sdim// Pin the vtable to this file. 27284677Sdimvoid SIMachineFunctionInfo::anchor() {} 28284677Sdim 29284677SdimSIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30284677Sdim : AMDGPUMachineFunction(MF), 31284677Sdim TIDReg(AMDGPU::NoRegister), 32296417Sdim ScratchRSrcReg(AMDGPU::NoRegister), 33296417Sdim ScratchWaveOffsetReg(AMDGPU::NoRegister), 34296417Sdim PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister), 35296417Sdim DispatchPtrUserSGPR(AMDGPU::NoRegister), 36296417Sdim QueuePtrUserSGPR(AMDGPU::NoRegister), 37296417Sdim KernargSegmentPtrUserSGPR(AMDGPU::NoRegister), 38296417Sdim DispatchIDUserSGPR(AMDGPU::NoRegister), 39296417Sdim FlatScratchInitUserSGPR(AMDGPU::NoRegister), 40296417Sdim PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister), 41296417Sdim GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister), 42296417Sdim GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister), 43296417Sdim GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister), 44296417Sdim WorkGroupIDXSystemSGPR(AMDGPU::NoRegister), 45296417Sdim WorkGroupIDYSystemSGPR(AMDGPU::NoRegister), 46296417Sdim WorkGroupIDZSystemSGPR(AMDGPU::NoRegister), 47296417Sdim WorkGroupInfoSystemSGPR(AMDGPU::NoRegister), 48296417Sdim PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister), 49284677Sdim PSInputAddr(0), 50296417Sdim ReturnsVoid(true), 51296417Sdim LDSWaveSpillSize(0), 52296417Sdim PSInputEna(0), 53284677Sdim NumUserSGPRs(0), 54296417Sdim NumSystemSGPRs(0), 55296417Sdim HasSpilledSGPRs(false), 56296417Sdim HasSpilledVGPRs(false), 57296417Sdim PrivateSegmentBuffer(false), 58296417Sdim DispatchPtr(false), 59296417Sdim QueuePtr(false), 60296417Sdim DispatchID(false), 61296417Sdim KernargSegmentPtr(false), 62296417Sdim FlatScratchInit(false), 63296417Sdim GridWorkgroupCountX(false), 64296417Sdim GridWorkgroupCountY(false), 65296417Sdim GridWorkgroupCountZ(false), 66296417Sdim WorkGroupIDX(true), 67296417Sdim WorkGroupIDY(false), 68296417Sdim WorkGroupIDZ(false), 69296417Sdim WorkGroupInfo(false), 70296417Sdim PrivateSegmentWaveByteOffset(false), 71296417Sdim WorkItemIDX(true), 72296417Sdim WorkItemIDY(false), 73296417Sdim WorkItemIDZ(false) { 74296417Sdim const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 75296417Sdim const Function *F = MF.getFunction(); 76284677Sdim 77296417Sdim PSInputAddr = AMDGPU::getInitialPSInputAddr(*F); 78296417Sdim 79296417Sdim const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 80296417Sdim 81296417Sdim if (getShaderType() == ShaderType::COMPUTE) 82296417Sdim KernargSegmentPtr = true; 83296417Sdim 84296417Sdim if (F->hasFnAttribute("amdgpu-work-group-id-y")) 85296417Sdim WorkGroupIDY = true; 86296417Sdim 87296417Sdim if (F->hasFnAttribute("amdgpu-work-group-id-z")) 88296417Sdim WorkGroupIDZ = true; 89296417Sdim 90296417Sdim if (F->hasFnAttribute("amdgpu-work-item-id-y")) 91296417Sdim WorkItemIDY = true; 92296417Sdim 93296417Sdim if (F->hasFnAttribute("amdgpu-work-item-id-z")) 94296417Sdim WorkItemIDZ = true; 95296417Sdim 96296417Sdim bool MaySpill = ST.isVGPRSpillingEnabled(this); 97296417Sdim bool HasStackObjects = FrameInfo->hasStackObjects(); 98296417Sdim 99296417Sdim if (HasStackObjects || MaySpill) 100296417Sdim PrivateSegmentWaveByteOffset = true; 101296417Sdim 102296417Sdim if (ST.isAmdHsaOS()) { 103296417Sdim if (HasStackObjects || MaySpill) 104296417Sdim PrivateSegmentBuffer = true; 105296417Sdim 106296417Sdim if (F->hasFnAttribute("amdgpu-dispatch-ptr")) 107296417Sdim DispatchPtr = true; 108296417Sdim } 109296417Sdim 110296417Sdim // X, XY, and XYZ are the only supported combinations, so make sure Y is 111296417Sdim // enabled if Z is. 112296417Sdim if (WorkItemIDZ) 113296417Sdim WorkItemIDY = true; 114296417Sdim} 115296417Sdim 116296417Sdimunsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 117296417Sdim const SIRegisterInfo &TRI) { 118296417Sdim PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg( 119296417Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass); 120296417Sdim NumUserSGPRs += 4; 121296417Sdim return PrivateSegmentBufferUserSGPR; 122296417Sdim} 123296417Sdim 124296417Sdimunsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 125296417Sdim DispatchPtrUserSGPR = TRI.getMatchingSuperReg( 126296417Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 127296417Sdim NumUserSGPRs += 2; 128296417Sdim return DispatchPtrUserSGPR; 129296417Sdim} 130296417Sdim 131296417Sdimunsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 132296417Sdim QueuePtrUserSGPR = TRI.getMatchingSuperReg( 133296417Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 134296417Sdim NumUserSGPRs += 2; 135296417Sdim return QueuePtrUserSGPR; 136296417Sdim} 137296417Sdim 138296417Sdimunsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 139296417Sdim KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg( 140296417Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass); 141296417Sdim NumUserSGPRs += 2; 142296417Sdim return KernargSegmentPtrUserSGPR; 143296417Sdim} 144296417Sdim 145284677SdimSIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( 146284677Sdim MachineFunction *MF, 147284677Sdim unsigned FrameIndex, 148284677Sdim unsigned SubIdx) { 149284677Sdim const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 150284677Sdim const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( 151284677Sdim MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); 152284677Sdim MachineRegisterInfo &MRI = MF->getRegInfo(); 153284677Sdim int64_t Offset = FrameInfo->getObjectOffset(FrameIndex); 154284677Sdim Offset += SubIdx * 4; 155284677Sdim 156284677Sdim unsigned LaneVGPRIdx = Offset / (64 * 4); 157284677Sdim unsigned Lane = (Offset / 4) % 64; 158284677Sdim 159284677Sdim struct SpilledReg Spill; 160284677Sdim 161284677Sdim if (!LaneVGPRs.count(LaneVGPRIdx)) { 162284677Sdim unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); 163296417Sdim 164296417Sdim if (LaneVGPR == AMDGPU::NoRegister) { 165296417Sdim LLVMContext &Ctx = MF->getFunction()->getContext(); 166296417Sdim Ctx.emitError("Ran out of VGPRs for spilling SGPR"); 167296417Sdim 168296417Sdim // When compiling from inside Mesa, the compilation continues. 169296417Sdim // Select an arbitrary register to avoid triggering assertions 170296417Sdim // during subsequent passes. 171296417Sdim LaneVGPR = AMDGPU::VGPR0; 172296417Sdim } 173296417Sdim 174284677Sdim LaneVGPRs[LaneVGPRIdx] = LaneVGPR; 175284677Sdim 176284677Sdim // Add this register as live-in to all blocks to avoid machine verifer 177284677Sdim // complaining about use of an undefined physical register. 178284677Sdim for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); 179284677Sdim BI != BE; ++BI) { 180284677Sdim BI->addLiveIn(LaneVGPR); 181284677Sdim } 182284677Sdim } 183284677Sdim 184284677Sdim Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; 185284677Sdim Spill.Lane = Lane; 186284677Sdim return Spill; 187284677Sdim} 188284677Sdim 189284677Sdimunsigned SIMachineFunctionInfo::getMaximumWorkGroupSize( 190284677Sdim const MachineFunction &MF) const { 191284677Sdim const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 192284677Sdim // FIXME: We should get this information from kernel attributes if it 193284677Sdim // is available. 194284677Sdim return getShaderType() == ShaderType::COMPUTE ? 256 : ST.getWavefrontSize(); 195284677Sdim} 196