1292915Sdim//===----------------------- SIFrameLowering.cpp --------------------------===// 2292915Sdim// 3292915Sdim// The LLVM Compiler Infrastructure 4292915Sdim// 5292915Sdim// This file is distributed under the University of Illinois Open Source 6292915Sdim// License. See LICENSE.TXT for details. 7292915Sdim// 8292915Sdim//==-----------------------------------------------------------------------===// 9292915Sdim 10292915Sdim#include "SIFrameLowering.h" 11292915Sdim#include "SIInstrInfo.h" 12292915Sdim#include "SIMachineFunctionInfo.h" 13292915Sdim#include "SIRegisterInfo.h" 14292915Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 15292915Sdim#include "llvm/CodeGen/MachineFunction.h" 16292915Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 17292915Sdim#include "llvm/CodeGen/RegisterScavenging.h" 18292915Sdim 19292915Sdimusing namespace llvm; 20292915Sdim 21292915Sdim 22292915Sdimstatic bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo, 23292915Sdim const MachineFrameInfo *FrameInfo) { 24292915Sdim if (!FuncInfo->hasSpilledSGPRs()) 25292915Sdim return false; 26292915Sdim 27292915Sdim if (FuncInfo->hasSpilledVGPRs()) 28292915Sdim return false; 29292915Sdim 30292915Sdim for (int I = FrameInfo->getObjectIndexBegin(), 31292915Sdim E = FrameInfo->getObjectIndexEnd(); I != E; ++I) { 32292915Sdim if (!FrameInfo->isSpillSlotObjectIndex(I)) 33292915Sdim return false; 34292915Sdim } 35292915Sdim 36292915Sdim return true; 37292915Sdim} 38292915Sdim 39292915Sdimstatic ArrayRef<MCPhysReg> getAllSGPR128() { 40292915Sdim return makeArrayRef(AMDGPU::SReg_128RegClass.begin(), 41292915Sdim AMDGPU::SReg_128RegClass.getNumRegs()); 42292915Sdim} 43292915Sdim 44292915Sdimstatic ArrayRef<MCPhysReg> getAllSGPRs() { 45292915Sdim return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(), 46292915Sdim AMDGPU::SGPR_32RegClass.getNumRegs()); 47292915Sdim} 48292915Sdim 49292915Sdimvoid SIFrameLowering::emitPrologue(MachineFunction &MF, 50292915Sdim MachineBasicBlock &MBB) const { 51292915Sdim if (!MF.getFrameInfo()->hasStackObjects()) 52292915Sdim return; 53292915Sdim 54292915Sdim assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 55292915Sdim 56292915Sdim SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 57292915Sdim 58292915Sdim // If we only have SGPR spills, we won't actually be using scratch memory 59292915Sdim // since these spill to VGPRs. 60292915Sdim // 61292915Sdim // FIXME: We should be cleaning up these unused SGPR spill frame indices 62292915Sdim // somewhere. 63292915Sdim if (hasOnlySGPRSpills(MFI, MF.getFrameInfo())) 64292915Sdim return; 65292915Sdim 66292915Sdim const SIInstrInfo *TII = 67292915Sdim static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 68292915Sdim const SIRegisterInfo *TRI = &TII->getRegisterInfo(); 69292915Sdim const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); 70292915Sdim 71292915Sdim // We need to insert initialization of the scratch resource descriptor. 72292915Sdim unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); 73292915Sdim assert(ScratchRsrcReg != AMDGPU::NoRegister); 74292915Sdim 75292915Sdim unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); 76292915Sdim assert(ScratchWaveOffsetReg != AMDGPU::NoRegister); 77292915Sdim 78292915Sdim unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue( 79292915Sdim MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); 80292915Sdim 81292915Sdim unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister; 82292915Sdim if (ST.isAmdHsaOS()) { 83292915Sdim PreloadedPrivateBufferReg = TRI->getPreloadedValue( 84292915Sdim MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER); 85292915Sdim } 86292915Sdim 87292915Sdim // If we reserved the original input registers, we don't need to copy to the 88292915Sdim // reserved registers. 89292915Sdim if (ScratchRsrcReg == PreloadedPrivateBufferReg) { 90292915Sdim // We should always reserve these 5 registers at the same time. 91292915Sdim assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg && 92292915Sdim "scratch wave offset and private segment buffer inconsistent"); 93292915Sdim return; 94292915Sdim } 95292915Sdim 96292915Sdim 97292915Sdim // We added live-ins during argument lowering, but since they were not used 98292915Sdim // they were deleted. We're adding the uses now, so add them back. 99292915Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 100292915Sdim MRI.addLiveIn(PreloadedScratchWaveOffsetReg); 101292915Sdim MBB.addLiveIn(PreloadedScratchWaveOffsetReg); 102292915Sdim 103292915Sdim if (ST.isAmdHsaOS()) { 104292915Sdim MRI.addLiveIn(PreloadedPrivateBufferReg); 105292915Sdim MBB.addLiveIn(PreloadedPrivateBufferReg); 106292915Sdim } 107292915Sdim 108293265Sdim if (!ST.hasSGPRInitBug()) { 109293265Sdim // We reserved the last registers for this. Shift it down to the end of those 110293265Sdim // which were actually used. 111293265Sdim // 112293265Sdim // FIXME: It might be safer to use a pseudoregister before replacement. 113292915Sdim 114293265Sdim // FIXME: We should be able to eliminate unused input registers. We only 115293265Sdim // cannot do this for the resources required for scratch access. For now we 116293265Sdim // skip over user SGPRs and may leave unused holes. 117292915Sdim 118293265Sdim // We find the resource first because it has an alignment requirement. 119293265Sdim if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) { 120293265Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 121292915Sdim 122293265Sdim unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4; 123293265Sdim // Skip the last 2 elements because the last one is reserved for VCC, and 124293265Sdim // this is the 2nd to last element already. 125293265Sdim for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) { 126293265Sdim // Pick the first unallocated one. Make sure we don't clobber the other 127293265Sdim // reserved input we needed. 128293265Sdim if (!MRI.isPhysRegUsed(Reg)) { 129293265Sdim assert(MRI.isAllocatable(Reg)); 130293265Sdim MRI.replaceRegWith(ScratchRsrcReg, Reg); 131293265Sdim ScratchRsrcReg = Reg; 132293265Sdim MFI->setScratchRSrcReg(ScratchRsrcReg); 133293265Sdim break; 134293265Sdim } 135292915Sdim } 136292915Sdim } 137292915Sdim 138293265Sdim if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) { 139293265Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 140293265Sdim // Skip the last 2 elements because the last one is reserved for VCC, and 141293265Sdim // this is the 2nd to last element already. 142293265Sdim unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); 143293265Sdim for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) { 144293265Sdim // Pick the first unallocated SGPR. Be careful not to pick an alias of the 145293265Sdim // scratch descriptor, since we haven���t added its uses yet. 146293265Sdim if (!MRI.isPhysRegUsed(Reg)) { 147293265Sdim assert(MRI.isAllocatable(Reg) && 148293265Sdim !TRI->isSubRegisterEq(ScratchRsrcReg, Reg)); 149292915Sdim 150293265Sdim MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); 151293265Sdim ScratchWaveOffsetReg = Reg; 152293265Sdim MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg); 153293265Sdim break; 154293265Sdim } 155292915Sdim } 156292915Sdim } 157292915Sdim } 158292915Sdim 159292915Sdim 160292915Sdim assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg)); 161292915Sdim 162292915Sdim const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); 163292915Sdim MachineBasicBlock::iterator I = MBB.begin(); 164292915Sdim DebugLoc DL; 165292915Sdim 166292915Sdim if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { 167292915Sdim // Make sure we emit the copy for the offset first. We may have chosen to copy 168292915Sdim // the buffer resource into a register that aliases the input offset register. 169292915Sdim BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg) 170292915Sdim .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); 171292915Sdim } 172292915Sdim 173292915Sdim if (ST.isAmdHsaOS()) { 174292915Sdim // Insert copies from argument register. 175292915Sdim assert( 176292915Sdim !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) && 177292915Sdim !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg)); 178292915Sdim 179292915Sdim unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); 180292915Sdim unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3); 181292915Sdim 182292915Sdim unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1); 183292915Sdim unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3); 184292915Sdim 185292915Sdim const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64); 186292915Sdim 187292915Sdim BuildMI(MBB, I, DL, SMovB64, Rsrc01) 188292915Sdim .addReg(Lo, RegState::Kill); 189292915Sdim BuildMI(MBB, I, DL, SMovB64, Rsrc23) 190292915Sdim .addReg(Hi, RegState::Kill); 191292915Sdim } else { 192292915Sdim unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); 193292915Sdim unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); 194292915Sdim unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); 195292915Sdim unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); 196292915Sdim 197292915Sdim // Use relocations to get the pointer, and setup the other bits manually. 198292915Sdim uint64_t Rsrc23 = TII->getScratchRsrcWords23(); 199292915Sdim BuildMI(MBB, I, DL, SMovB32, Rsrc0) 200292915Sdim .addExternalSymbol("SCRATCH_RSRC_DWORD0") 201292915Sdim .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 202292915Sdim 203292915Sdim BuildMI(MBB, I, DL, SMovB32, Rsrc1) 204292915Sdim .addExternalSymbol("SCRATCH_RSRC_DWORD1") 205292915Sdim .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 206292915Sdim 207292915Sdim BuildMI(MBB, I, DL, SMovB32, Rsrc2) 208292915Sdim .addImm(Rsrc23 & 0xffffffff) 209292915Sdim .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 210292915Sdim 211292915Sdim BuildMI(MBB, I, DL, SMovB32, Rsrc3) 212292915Sdim .addImm(Rsrc23 >> 32) 213292915Sdim .addReg(ScratchRsrcReg, RegState::ImplicitDefine); 214292915Sdim } 215292915Sdim 216292915Sdim // Make the register selected live throughout the function. 217292915Sdim for (MachineBasicBlock &OtherBB : MF) { 218292915Sdim if (&OtherBB == &MBB) 219292915Sdim continue; 220292915Sdim 221292915Sdim OtherBB.addLiveIn(ScratchRsrcReg); 222292915Sdim OtherBB.addLiveIn(ScratchWaveOffsetReg); 223292915Sdim } 224292915Sdim} 225292915Sdim 226292915Sdimvoid SIFrameLowering::processFunctionBeforeFrameFinalized( 227292915Sdim MachineFunction &MF, 228292915Sdim RegScavenger *RS) const { 229292915Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 230292915Sdim 231292915Sdim if (!MFI->hasStackObjects()) 232292915Sdim return; 233292915Sdim 234292915Sdim bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects(); 235292915Sdim 236292915Sdim assert((RS || !MayNeedScavengingEmergencySlot) && 237292915Sdim "RegScavenger required if spilling"); 238292915Sdim 239292915Sdim if (MayNeedScavengingEmergencySlot) { 240292915Sdim int ScavengeFI = MFI->CreateSpillStackObject( 241292915Sdim AMDGPU::SGPR_32RegClass.getSize(), 242292915Sdim AMDGPU::SGPR_32RegClass.getAlignment()); 243292915Sdim RS->addScavengingFrameIndex(ScavengeFI); 244292915Sdim } 245292915Sdim} 246