1327952Sdim//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===// 2284677Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6284677Sdim// 7284677Sdim//===----------------------------------------------------------------------===// 8284677Sdim 9284677Sdim#include "SIMachineFunctionInfo.h" 10327952Sdim#include "AMDGPUArgumentUsageInfo.h" 11284677Sdim#include "AMDGPUSubtarget.h" 12327952Sdim#include "SIRegisterInfo.h" 13341825Sdim#include "MCTargetDesc/AMDGPUMCTargetDesc.h" 14327952Sdim#include "Utils/AMDGPUBaseInfo.h" 15327952Sdim#include "llvm/ADT/Optional.h" 16327952Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 17309124Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 18327952Sdim#include "llvm/CodeGen/MachineFunction.h" 19284677Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 20327952Sdim#include "llvm/IR/CallingConv.h" 21284677Sdim#include "llvm/IR/Function.h" 22327952Sdim#include <cassert> 23327952Sdim#include <vector> 24284677Sdim 25284677Sdim#define MAX_LANES 64 26284677Sdim 27284677Sdimusing namespace llvm; 28284677Sdim 29284677SdimSIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) 30284677Sdim : AMDGPUMachineFunction(MF), 31296417Sdim PrivateSegmentBuffer(false), 32296417Sdim DispatchPtr(false), 33296417Sdim QueuePtr(false), 34314564Sdim KernargSegmentPtr(false), 35296417Sdim DispatchID(false), 36296417Sdim FlatScratchInit(false), 37309124Sdim WorkGroupIDX(false), 38296417Sdim WorkGroupIDY(false), 39296417Sdim WorkGroupIDZ(false), 40296417Sdim WorkGroupInfo(false), 41296417Sdim PrivateSegmentWaveByteOffset(false), 42309124Sdim WorkItemIDX(false), 43296417Sdim WorkItemIDY(false), 44314564Sdim WorkItemIDZ(false), 45327952Sdim ImplicitBufferPtr(false), 46327952Sdim ImplicitArgPtr(false), 47341825Sdim GITPtrHigh(0xffffffff), 48353358Sdim HighBitsOf32BitAddress(0), 49353358Sdim GDSSize(0) { 50341825Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 51327952Sdim const Function &F = MF.getFunction(); 52327952Sdim FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F); 53327952Sdim WavesPerEU = ST.getWavesPerEU(F); 54284677Sdim 55360784Sdim Occupancy = ST.computeOccupancy(MF, getLDSSize()); 56341825Sdim CallingConv::ID CC = F.getCallingConv(); 57341825Sdim 58341825Sdim if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { 59341825Sdim if (!F.arg_empty()) 60341825Sdim KernargSegmentPtr = true; 61341825Sdim WorkGroupIDX = true; 62341825Sdim WorkItemIDX = true; 63341825Sdim } else if (CC == CallingConv::AMDGPU_PS) { 64341825Sdim PSInputAddr = AMDGPU::getInitialPSInputAddr(F); 65341825Sdim } 66341825Sdim 67321369Sdim if (!isEntryFunction()) { 68321369Sdim // Non-entry functions have no special inputs for now, other registers 69321369Sdim // required for scratch access. 70321369Sdim ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3; 71353358Sdim ScratchWaveOffsetReg = AMDGPU::SGPR33; 72353358Sdim 73353358Sdim // TODO: Pick a high register, and shift down, similar to a kernel. 74353358Sdim FrameOffsetReg = AMDGPU::SGPR34; 75321369Sdim StackPtrOffsetReg = AMDGPU::SGPR32; 76296417Sdim 77327952Sdim ArgInfo.PrivateSegmentBuffer = 78327952Sdim ArgDescriptor::createRegister(ScratchRSrcReg); 79327952Sdim ArgInfo.PrivateSegmentWaveByteOffset = 80327952Sdim ArgDescriptor::createRegister(ScratchWaveOffsetReg); 81327952Sdim 82327952Sdim if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) 83327952Sdim ImplicitArgPtr = true; 84327952Sdim } else { 85341825Sdim if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) { 86327952Sdim KernargSegmentPtr = true; 87341825Sdim MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(), 88341825Sdim MaxKernArgAlign); 89341825Sdim } 90321369Sdim } 91296417Sdim 92353358Sdim if (F.hasFnAttribute("amdgpu-work-group-id-x")) 93321369Sdim WorkGroupIDX = true; 94353358Sdim 95353358Sdim if (F.hasFnAttribute("amdgpu-work-group-id-y")) 96296417Sdim WorkGroupIDY = true; 97353358Sdim 98353358Sdim if (F.hasFnAttribute("amdgpu-work-group-id-z")) 99296417Sdim WorkGroupIDZ = true; 100353358Sdim 101353358Sdim if (F.hasFnAttribute("amdgpu-work-item-id-x")) 102321369Sdim WorkItemIDX = true; 103353358Sdim 104353358Sdim if (F.hasFnAttribute("amdgpu-work-item-id-y")) 105296417Sdim WorkItemIDY = true; 106353358Sdim 107353358Sdim if (F.hasFnAttribute("amdgpu-work-item-id-z")) 108296417Sdim WorkItemIDZ = true; 109296417Sdim 110321369Sdim const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 111314564Sdim bool HasStackObjects = FrameInfo.hasStackObjects(); 112296417Sdim 113321369Sdim if (isEntryFunction()) { 114321369Sdim // X, XY, and XYZ are the only supported combinations, so make sure Y is 115321369Sdim // enabled if Z is. 116321369Sdim if (WorkItemIDZ) 117321369Sdim WorkItemIDY = true; 118296417Sdim 119344779Sdim PrivateSegmentWaveByteOffset = true; 120321369Sdim 121327952Sdim // HS and GS always have the scratch wave offset in SGPR5 on GFX9. 122327952Sdim if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && 123327952Sdim (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) 124344779Sdim ArgInfo.PrivateSegmentWaveByteOffset = 125344779Sdim ArgDescriptor::createRegister(AMDGPU::SGPR5); 126321369Sdim } 127321369Sdim 128344779Sdim bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F); 129344779Sdim if (isAmdHsaOrMesa) { 130344779Sdim PrivateSegmentBuffer = true; 131296417Sdim 132327952Sdim if (F.hasFnAttribute("amdgpu-dispatch-ptr")) 133296417Sdim DispatchPtr = true; 134309124Sdim 135327952Sdim if (F.hasFnAttribute("amdgpu-queue-ptr")) 136309124Sdim QueuePtr = true; 137314564Sdim 138327952Sdim if (F.hasFnAttribute("amdgpu-dispatch-id")) 139314564Sdim DispatchID = true; 140341825Sdim } else if (ST.isMesaGfxShader(F)) { 141344779Sdim ImplicitBufferPtr = true; 142296417Sdim } 143296417Sdim 144327952Sdim if (F.hasFnAttribute("amdgpu-kernarg-segment-ptr")) 145321369Sdim KernargSegmentPtr = true; 146309124Sdim 147344779Sdim if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { 148353358Sdim auto hasNonSpillStackObjects = [&]() { 149353358Sdim // Avoid expensive checking if there's no stack objects. 150353358Sdim if (!HasStackObjects) 151353358Sdim return false; 152353358Sdim for (auto OI = FrameInfo.getObjectIndexBegin(), 153353358Sdim OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) 154353358Sdim if (!FrameInfo.isSpillSlotObjectIndex(OI)) 155353358Sdim return true; 156353358Sdim // All stack objects are spill slots. 157353358Sdim return false; 158353358Sdim }; 159321369Sdim // TODO: This could be refined a lot. The attribute is a poor way of 160321369Sdim // detecting calls that may require it before argument lowering. 161353358Sdim if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) 162321369Sdim FlatScratchInit = true; 163321369Sdim } 164327952Sdim 165327952Sdim Attribute A = F.getFnAttribute("amdgpu-git-ptr-high"); 166327952Sdim StringRef S = A.getValueAsString(); 167327952Sdim if (!S.empty()) 168327952Sdim S.consumeInteger(0, GITPtrHigh); 169341825Sdim 170341825Sdim A = F.getFnAttribute("amdgpu-32bit-address-high-bits"); 171341825Sdim S = A.getValueAsString(); 172341825Sdim if (!S.empty()) 173341825Sdim S.consumeInteger(0, HighBitsOf32BitAddress); 174353358Sdim 175353358Sdim S = F.getFnAttribute("amdgpu-gds-size").getValueAsString(); 176353358Sdim if (!S.empty()) 177353358Sdim S.consumeInteger(0, GDSSize); 178296417Sdim} 179296417Sdim 180341825Sdimvoid SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) { 181341825Sdim limitOccupancy(getMaxWavesPerEU()); 182341825Sdim const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>(); 183341825Sdim limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(), 184341825Sdim MF.getFunction())); 185341825Sdim} 186341825Sdim 187296417Sdimunsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( 188296417Sdim const SIRegisterInfo &TRI) { 189327952Sdim ArgInfo.PrivateSegmentBuffer = 190327952Sdim ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 191360784Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass)); 192296417Sdim NumUserSGPRs += 4; 193327952Sdim return ArgInfo.PrivateSegmentBuffer.getRegister(); 194296417Sdim} 195296417Sdim 196296417Sdimunsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) { 197327952Sdim ArgInfo.DispatchPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 198327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 199296417Sdim NumUserSGPRs += 2; 200327952Sdim return ArgInfo.DispatchPtr.getRegister(); 201296417Sdim} 202296417Sdim 203296417Sdimunsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) { 204327952Sdim ArgInfo.QueuePtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 205327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 206296417Sdim NumUserSGPRs += 2; 207327952Sdim return ArgInfo.QueuePtr.getRegister(); 208296417Sdim} 209296417Sdim 210296417Sdimunsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) { 211327952Sdim ArgInfo.KernargSegmentPtr 212327952Sdim = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 213327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 214296417Sdim NumUserSGPRs += 2; 215327952Sdim return ArgInfo.KernargSegmentPtr.getRegister(); 216296417Sdim} 217296417Sdim 218314564Sdimunsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) { 219327952Sdim ArgInfo.DispatchID = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 220327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 221314564Sdim NumUserSGPRs += 2; 222327952Sdim return ArgInfo.DispatchID.getRegister(); 223314564Sdim} 224314564Sdim 225309124Sdimunsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) { 226327952Sdim ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 227327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 228309124Sdim NumUserSGPRs += 2; 229327952Sdim return ArgInfo.FlatScratchInit.getRegister(); 230309124Sdim} 231309124Sdim 232321369Sdimunsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) { 233327952Sdim ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(TRI.getMatchingSuperReg( 234327952Sdim getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass)); 235314564Sdim NumUserSGPRs += 2; 236327952Sdim return ArgInfo.ImplicitBufferPtr.getRegister(); 237314564Sdim} 238314564Sdim 239327952Sdimstatic bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { 240327952Sdim for (unsigned I = 0; CSRegs[I]; ++I) { 241327952Sdim if (CSRegs[I] == Reg) 242327952Sdim return true; 243327952Sdim } 244327952Sdim 245327952Sdim return false; 246327952Sdim} 247327952Sdim 248353358Sdim/// \p returns true if \p NumLanes slots are available in VGPRs already used for 249353358Sdim/// SGPR spilling. 250353358Sdim// 251353358Sdim// FIXME: This only works after processFunctionBeforeFrameFinalized 252353358Sdimbool SIMachineFunctionInfo::haveFreeLanesForSGPRSpill(const MachineFunction &MF, 253353358Sdim unsigned NumNeed) const { 254353358Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 255353358Sdim unsigned WaveSize = ST.getWavefrontSize(); 256353358Sdim return NumVGPRSpillLanes + NumNeed <= WaveSize * SpillVGPRs.size(); 257353358Sdim} 258353358Sdim 259321369Sdim/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. 260321369Sdimbool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, 261321369Sdim int FI) { 262321369Sdim std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; 263309124Sdim 264321369Sdim // This has already been allocated. 265321369Sdim if (!SpillLanes.empty()) 266321369Sdim return true; 267321369Sdim 268341825Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 269309124Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 270321369Sdim MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 271321369Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 272321369Sdim unsigned WaveSize = ST.getWavefrontSize(); 273309124Sdim 274321369Sdim unsigned Size = FrameInfo.getObjectSize(FI); 275321369Sdim assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); 276321369Sdim assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); 277284677Sdim 278321369Sdim int NumLanes = Size / 4; 279284677Sdim 280353358Sdim const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs(); 281327952Sdim 282321369Sdim // Make sure to handle the case where a wide SGPR spill may span between two 283321369Sdim // VGPRs. 284321369Sdim for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { 285321369Sdim unsigned LaneVGPR; 286321369Sdim unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); 287284677Sdim 288321369Sdim if (VGPRIndex == 0) { 289321369Sdim LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); 290321369Sdim if (LaneVGPR == AMDGPU::NoRegister) { 291327952Sdim // We have no VGPRs left for spilling SGPRs. Reset because we will not 292321369Sdim // partially spill the SGPR to VGPRs. 293321369Sdim SGPRToVGPRSpills.erase(FI); 294321369Sdim NumVGPRSpillLanes -= I; 295321369Sdim return false; 296321369Sdim } 297296417Sdim 298327952Sdim Optional<int> CSRSpillFI; 299341825Sdim if ((FrameInfo.hasCalls() || !isEntryFunction()) && CSRegs && 300341825Sdim isCalleeSavedReg(CSRegs, LaneVGPR)) { 301341825Sdim CSRSpillFI = FrameInfo.CreateSpillStackObject(4, 4); 302327952Sdim } 303296417Sdim 304327952Sdim SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); 305327952Sdim 306321369Sdim // Add this register as live-in to all blocks to avoid machine verifer 307321369Sdim // complaining about use of an undefined physical register. 308321369Sdim for (MachineBasicBlock &BB : MF) 309321369Sdim BB.addLiveIn(LaneVGPR); 310321369Sdim } else { 311327952Sdim LaneVGPR = SpillVGPRs.back().VGPR; 312321369Sdim } 313284677Sdim 314321369Sdim SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); 315284677Sdim } 316284677Sdim 317321369Sdim return true; 318284677Sdim} 319321369Sdim 320353358Sdim/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. 321353358Sdim/// Either AGPR is spilled to VGPR to vice versa. 322353358Sdim/// Returns true if a \p FI can be eliminated completely. 323353358Sdimbool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, 324353358Sdim int FI, 325353358Sdim bool isAGPRtoVGPR) { 326353358Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 327353358Sdim MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 328353358Sdim const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 329353358Sdim 330353358Sdim assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); 331353358Sdim 332353358Sdim auto &Spill = VGPRToAGPRSpills[FI]; 333353358Sdim 334353358Sdim // This has already been allocated. 335353358Sdim if (!Spill.Lanes.empty()) 336353358Sdim return Spill.FullyAllocated; 337353358Sdim 338353358Sdim unsigned Size = FrameInfo.getObjectSize(FI); 339353358Sdim unsigned NumLanes = Size / 4; 340353358Sdim Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); 341353358Sdim 342353358Sdim const TargetRegisterClass &RC = 343353358Sdim isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; 344353358Sdim auto Regs = RC.getRegisters(); 345353358Sdim 346353358Sdim auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; 347353358Sdim const SIRegisterInfo *TRI = ST.getRegisterInfo(); 348353358Sdim Spill.FullyAllocated = true; 349353358Sdim 350353358Sdim // FIXME: Move allocation logic out of MachineFunctionInfo and initialize 351353358Sdim // once. 352353358Sdim BitVector OtherUsedRegs; 353353358Sdim OtherUsedRegs.resize(TRI->getNumRegs()); 354353358Sdim 355353358Sdim const uint32_t *CSRMask = 356353358Sdim TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); 357353358Sdim if (CSRMask) 358353358Sdim OtherUsedRegs.setBitsInMask(CSRMask); 359353358Sdim 360353358Sdim // TODO: Should include register tuples, but doesn't matter with current 361353358Sdim // usage. 362353358Sdim for (MCPhysReg Reg : SpillAGPR) 363353358Sdim OtherUsedRegs.set(Reg); 364353358Sdim for (MCPhysReg Reg : SpillVGPR) 365353358Sdim OtherUsedRegs.set(Reg); 366353358Sdim 367353358Sdim SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); 368353358Sdim for (unsigned I = 0; I < NumLanes; ++I) { 369353358Sdim NextSpillReg = std::find_if( 370353358Sdim NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { 371353358Sdim return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && 372353358Sdim !OtherUsedRegs[Reg]; 373353358Sdim }); 374353358Sdim 375353358Sdim if (NextSpillReg == Regs.end()) { // Registers exhausted 376353358Sdim Spill.FullyAllocated = false; 377353358Sdim break; 378353358Sdim } 379353358Sdim 380353358Sdim OtherUsedRegs.set(*NextSpillReg); 381353358Sdim SpillRegs.push_back(*NextSpillReg); 382353358Sdim Spill.Lanes[I] = *NextSpillReg++; 383353358Sdim } 384353358Sdim 385353358Sdim return Spill.FullyAllocated; 386321369Sdim} 387341825Sdim 388353358Sdimvoid SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { 389353358Sdim // The FP spill hasn't been inserted yet, so keep it around. 390353358Sdim for (auto &R : SGPRToVGPRSpills) { 391353358Sdim if (R.first != FramePointerSaveIndex) 392353358Sdim MFI.RemoveStackObject(R.first); 393353358Sdim } 394341825Sdim 395353358Sdim // All other SPGRs must be allocated on the default stack, so reset the stack 396353358Sdim // ID. 397353358Sdim for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e; 398353358Sdim ++i) 399353358Sdim if (i != FramePointerSaveIndex) 400353358Sdim MFI.setStackID(i, TargetStackID::Default); 401353358Sdim 402353358Sdim for (auto &R : VGPRToAGPRSpills) { 403353358Sdim if (R.second.FullyAllocated) 404353358Sdim MFI.RemoveStackObject(R.first); 405341825Sdim } 406341825Sdim} 407341825Sdim 408341825SdimMCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { 409341825Sdim assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); 410341825Sdim return AMDGPU::SGPR0 + NumUserSGPRs; 411341825Sdim} 412341825Sdim 413341825SdimMCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const { 414341825Sdim return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs; 415341825Sdim} 416353358Sdim 417353358Sdimstatic yaml::StringValue regToString(unsigned Reg, 418353358Sdim const TargetRegisterInfo &TRI) { 419353358Sdim yaml::StringValue Dest; 420353358Sdim { 421353358Sdim raw_string_ostream OS(Dest.Value); 422353358Sdim OS << printReg(Reg, &TRI); 423353358Sdim } 424353358Sdim return Dest; 425353358Sdim} 426353358Sdim 427353358Sdimstatic Optional<yaml::SIArgumentInfo> 428353358SdimconvertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo, 429353358Sdim const TargetRegisterInfo &TRI) { 430353358Sdim yaml::SIArgumentInfo AI; 431353358Sdim 432353358Sdim auto convertArg = [&](Optional<yaml::SIArgument> &A, 433353358Sdim const ArgDescriptor &Arg) { 434353358Sdim if (!Arg) 435353358Sdim return false; 436353358Sdim 437353358Sdim // Create a register or stack argument. 438353358Sdim yaml::SIArgument SA = yaml::SIArgument::createArgument(Arg.isRegister()); 439353358Sdim if (Arg.isRegister()) { 440353358Sdim raw_string_ostream OS(SA.RegisterName.Value); 441353358Sdim OS << printReg(Arg.getRegister(), &TRI); 442353358Sdim } else 443353358Sdim SA.StackOffset = Arg.getStackOffset(); 444353358Sdim // Check and update the optional mask. 445353358Sdim if (Arg.isMasked()) 446353358Sdim SA.Mask = Arg.getMask(); 447353358Sdim 448353358Sdim A = SA; 449353358Sdim return true; 450353358Sdim }; 451353358Sdim 452353358Sdim bool Any = false; 453353358Sdim Any |= convertArg(AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer); 454353358Sdim Any |= convertArg(AI.DispatchPtr, ArgInfo.DispatchPtr); 455353358Sdim Any |= convertArg(AI.QueuePtr, ArgInfo.QueuePtr); 456353358Sdim Any |= convertArg(AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr); 457353358Sdim Any |= convertArg(AI.DispatchID, ArgInfo.DispatchID); 458353358Sdim Any |= convertArg(AI.FlatScratchInit, ArgInfo.FlatScratchInit); 459353358Sdim Any |= convertArg(AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize); 460353358Sdim Any |= convertArg(AI.WorkGroupIDX, ArgInfo.WorkGroupIDX); 461353358Sdim Any |= convertArg(AI.WorkGroupIDY, ArgInfo.WorkGroupIDY); 462353358Sdim Any |= convertArg(AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ); 463353358Sdim Any |= convertArg(AI.WorkGroupInfo, ArgInfo.WorkGroupInfo); 464353358Sdim Any |= convertArg(AI.PrivateSegmentWaveByteOffset, 465353358Sdim ArgInfo.PrivateSegmentWaveByteOffset); 466353358Sdim Any |= convertArg(AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr); 467353358Sdim Any |= convertArg(AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr); 468353358Sdim Any |= convertArg(AI.WorkItemIDX, ArgInfo.WorkItemIDX); 469353358Sdim Any |= convertArg(AI.WorkItemIDY, ArgInfo.WorkItemIDY); 470353358Sdim Any |= convertArg(AI.WorkItemIDZ, ArgInfo.WorkItemIDZ); 471353358Sdim 472353358Sdim if (Any) 473353358Sdim return AI; 474353358Sdim 475353358Sdim return None; 476353358Sdim} 477353358Sdim 478353358Sdimyaml::SIMachineFunctionInfo::SIMachineFunctionInfo( 479353358Sdim const llvm::SIMachineFunctionInfo& MFI, 480353358Sdim const TargetRegisterInfo &TRI) 481353358Sdim : ExplicitKernArgSize(MFI.getExplicitKernArgSize()), 482353358Sdim MaxKernArgAlign(MFI.getMaxKernArgAlign()), 483353358Sdim LDSSize(MFI.getLDSSize()), 484353358Sdim IsEntryFunction(MFI.isEntryFunction()), 485353358Sdim NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()), 486353358Sdim MemoryBound(MFI.isMemoryBound()), 487353358Sdim WaveLimiter(MFI.needsWaveLimiter()), 488360784Sdim HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()), 489353358Sdim ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)), 490353358Sdim ScratchWaveOffsetReg(regToString(MFI.getScratchWaveOffsetReg(), TRI)), 491353358Sdim FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)), 492353358Sdim StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)), 493353358Sdim ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), 494353358Sdim Mode(MFI.getMode()) {} 495353358Sdim 496353358Sdimvoid yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { 497353358Sdim MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, *this); 498353358Sdim} 499353358Sdim 500353358Sdimbool SIMachineFunctionInfo::initializeBaseYamlFields( 501353358Sdim const yaml::SIMachineFunctionInfo &YamlMFI) { 502353358Sdim ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize; 503360784Sdim MaxKernArgAlign = assumeAligned(YamlMFI.MaxKernArgAlign); 504353358Sdim LDSSize = YamlMFI.LDSSize; 505360784Sdim HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress; 506353358Sdim IsEntryFunction = YamlMFI.IsEntryFunction; 507353358Sdim NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath; 508353358Sdim MemoryBound = YamlMFI.MemoryBound; 509353358Sdim WaveLimiter = YamlMFI.WaveLimiter; 510353358Sdim return false; 511353358Sdim} 512