1//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11//
12// This pass must never create new SGPR virtual registers.
13//
14// FIXME: Must stop RegScavenger spills in later passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "GCNSubtarget.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIMachineFunctionInfo.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/RegisterScavenging.h"
25#include "llvm/InitializePasses.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-lower-sgpr-spills"
30
31using MBBVector = SmallVector<MachineBasicBlock *, 4>;
32
33namespace {
34
35class SILowerSGPRSpills : public MachineFunctionPass {
36private:
37  const SIRegisterInfo *TRI = nullptr;
38  const SIInstrInfo *TII = nullptr;
39  LiveIntervals *LIS = nullptr;
40  SlotIndexes *Indexes = nullptr;
41
42  // Save and Restore blocks of the current function. Typically there is a
43  // single save block, unless Windows EH funclets are involved.
44  MBBVector SaveBlocks;
45  MBBVector RestoreBlocks;
46
47public:
48  static char ID;
49
50  SILowerSGPRSpills() : MachineFunctionPass(ID) {}
51
52  void calculateSaveRestoreBlocks(MachineFunction &MF);
53  bool spillCalleeSavedRegs(MachineFunction &MF);
54
55  bool runOnMachineFunction(MachineFunction &MF) override;
56
57  void getAnalysisUsage(AnalysisUsage &AU) const override {
58    AU.setPreservesAll();
59    MachineFunctionPass::getAnalysisUsage(AU);
60  }
61};
62
63} // end anonymous namespace
64
65char SILowerSGPRSpills::ID = 0;
66
67INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
68                      "SI lower SGPR spill instructions", false, false)
69INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
70INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
71INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
72                    "SI lower SGPR spill instructions", false, false)
73
74char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
75
76/// Insert spill code for the callee-saved registers used in the function.
77static void insertCSRSaves(MachineBasicBlock &SaveBlock,
78                           ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
79                           LiveIntervals *LIS) {
80  MachineFunction &MF = *SaveBlock.getParent();
81  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
82  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
83  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
84  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
85  const SIRegisterInfo *RI = ST.getRegisterInfo();
86
87  MachineBasicBlock::iterator I = SaveBlock.begin();
88  if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
89    const MachineRegisterInfo &MRI = MF.getRegInfo();
90
91    for (const CalleeSavedInfo &CS : CSI) {
92      // Insert the spill to the stack frame.
93      MCRegister Reg = CS.getReg();
94
95      MachineInstrSpan MIS(I, &SaveBlock);
96      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
97          Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
98
99      // If this value was already livein, we probably have a direct use of the
100      // incoming register value, so don't kill at the spill point. This happens
101      // since we pass some special inputs (workgroup IDs) in the callee saved
102      // range.
103      const bool IsLiveIn = MRI.isLiveIn(Reg);
104      TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
105                              RC, TRI, Register());
106
107      if (Indexes) {
108        assert(std::distance(MIS.begin(), I) == 1);
109        MachineInstr &Inst = *std::prev(I);
110        Indexes->insertMachineInstrInMaps(Inst);
111      }
112
113      if (LIS)
114        LIS->removeAllRegUnitsForPhysReg(Reg);
115    }
116  }
117}
118
119/// Insert restore code for the callee-saved registers used in the function.
120static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
121                              MutableArrayRef<CalleeSavedInfo> CSI,
122                              SlotIndexes *Indexes, LiveIntervals *LIS) {
123  MachineFunction &MF = *RestoreBlock.getParent();
124  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
125  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
126  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
127  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
128  const SIRegisterInfo *RI = ST.getRegisterInfo();
129  // Restore all registers immediately before the return and any
130  // terminators that precede it.
131  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
132
133  // FIXME: Just emit the readlane/writelane directly
134  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
135    for (const CalleeSavedInfo &CI : reverse(CSI)) {
136      Register Reg = CI.getReg();
137      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
138          Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
139
140      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
141                               Register());
142      assert(I != RestoreBlock.begin() &&
143             "loadRegFromStackSlot didn't insert any code!");
144      // Insert in reverse order.  loadRegFromStackSlot can insert
145      // multiple instructions.
146
147      if (Indexes) {
148        MachineInstr &Inst = *std::prev(I);
149        Indexes->insertMachineInstrInMaps(Inst);
150      }
151
152      if (LIS)
153        LIS->removeAllRegUnitsForPhysReg(Reg);
154    }
155  }
156}
157
158/// Compute the sets of entry and return blocks for saving and restoring
159/// callee-saved registers, and placing prolog and epilog code.
160void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
161  const MachineFrameInfo &MFI = MF.getFrameInfo();
162
163  // Even when we do not change any CSR, we still want to insert the
164  // prologue and epilogue of the function.
165  // So set the save points for those.
166
167  // Use the points found by shrink-wrapping, if any.
168  if (MFI.getSavePoint()) {
169    SaveBlocks.push_back(MFI.getSavePoint());
170    assert(MFI.getRestorePoint() && "Both restore and save must be set");
171    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
172    // If RestoreBlock does not have any successor and is not a return block
173    // then the end point is unreachable and we do not need to insert any
174    // epilogue.
175    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
176      RestoreBlocks.push_back(RestoreBlock);
177    return;
178  }
179
180  // Save refs to entry and return blocks.
181  SaveBlocks.push_back(&MF.front());
182  for (MachineBasicBlock &MBB : MF) {
183    if (MBB.isEHFuncletEntry())
184      SaveBlocks.push_back(&MBB);
185    if (MBB.isReturnBlock())
186      RestoreBlocks.push_back(&MBB);
187  }
188}
189
190// TODO: To support shrink wrapping, this would need to copy
191// PrologEpilogInserter's updateLiveness.
192static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
193  MachineBasicBlock &EntryBB = MF.front();
194
195  for (const CalleeSavedInfo &CSIReg : CSI)
196    EntryBB.addLiveIn(CSIReg.getReg());
197  EntryBB.sortUniqueLiveIns();
198}
199
200bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
201  MachineRegisterInfo &MRI = MF.getRegInfo();
202  const Function &F = MF.getFunction();
203  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
204  const SIFrameLowering *TFI = ST.getFrameLowering();
205  MachineFrameInfo &MFI = MF.getFrameInfo();
206  RegScavenger *RS = nullptr;
207
208  // Determine which of the registers in the callee save list should be saved.
209  BitVector SavedRegs;
210  TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
211
212  // Add the code to save and restore the callee saved registers.
213  if (!F.hasFnAttribute(Attribute::Naked)) {
214    // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
215    // necessary for verifier liveness checks.
216    MFI.setCalleeSavedInfoValid(true);
217
218    std::vector<CalleeSavedInfo> CSI;
219    const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
220
221    for (unsigned I = 0; CSRegs[I]; ++I) {
222      MCRegister Reg = CSRegs[I];
223
224      if (SavedRegs.test(Reg)) {
225        const TargetRegisterClass *RC =
226          TRI->getMinimalPhysRegClass(Reg, MVT::i32);
227        int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
228                                           TRI->getSpillAlign(*RC), true);
229
230        CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
231      }
232    }
233
234    if (!CSI.empty()) {
235      for (MachineBasicBlock *SaveBlock : SaveBlocks)
236        insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
237
238      // Add live ins to save blocks.
239      assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
240      updateLiveness(MF, CSI);
241
242      for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
243        insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
244      return true;
245    }
246  }
247
248  return false;
249}
250
251bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
252  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
253  TII = ST.getInstrInfo();
254  TRI = &TII->getRegisterInfo();
255
256  LIS = getAnalysisIfAvailable<LiveIntervals>();
257  Indexes = getAnalysisIfAvailable<SlotIndexes>();
258
259  assert(SaveBlocks.empty() && RestoreBlocks.empty());
260
261  // First, expose any CSR SGPR spills. This is mostly the same as what PEI
262  // does, but somewhat simpler.
263  calculateSaveRestoreBlocks(MF);
264  bool HasCSRs = spillCalleeSavedRegs(MF);
265
266  MachineFrameInfo &MFI = MF.getFrameInfo();
267  MachineRegisterInfo &MRI = MF.getRegInfo();
268  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
269
270  if (!MFI.hasStackObjects() && !HasCSRs) {
271    SaveBlocks.clear();
272    RestoreBlocks.clear();
273    return false;
274  }
275
276  bool MadeChange = false;
277  bool NewReservedRegs = false;
278
279  // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
280  // handled as SpilledToReg in regular PrologEpilogInserter.
281  const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
282                                  (HasCSRs || FuncInfo->hasSpilledSGPRs());
283  if (HasSGPRSpillToVGPR) {
284    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
285    // are spilled to VGPRs, in which case we can eliminate the stack usage.
286    //
287    // This operates under the assumption that only other SGPR spills are users
288    // of the frame index.
289
290    // To track the spill frame indices handled in this pass.
291    BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
292
293    for (MachineBasicBlock &MBB : MF) {
294      for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
295        if (!TII->isSGPRSpill(MI))
296          continue;
297
298        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
299        assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
300        if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
301          NewReservedRegs = true;
302          bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
303              MI, FI, nullptr, Indexes, LIS);
304          (void)Spilled;
305          assert(Spilled && "failed to spill SGPR to VGPR when allocated");
306          SpillFIs.set(FI);
307        }
308      }
309    }
310
311    // FIXME: Adding to live-ins redundant with reserving registers.
312    for (MachineBasicBlock &MBB : MF) {
313      for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
314        MBB.addLiveIn(Reg);
315      MBB.sortUniqueLiveIns();
316
317      // FIXME: The dead frame indices are replaced with a null register from
318      // the debug value instructions. We should instead, update it with the
319      // correct register value. But not sure the register value alone is
320      // adequate to lower the DIExpression. It should be worked out later.
321      for (MachineInstr &MI : MBB) {
322        if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
323            !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
324            SpillFIs[MI.getOperand(0).getIndex()]) {
325          MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
326        }
327      }
328    }
329
330    // All those frame indices which are dead by now should be removed from the
331    // function frame. Otherwise, there is a side effect such as re-mapping of
332    // free frame index ids by the later pass(es) like "stack slot coloring"
333    // which in turn could mess-up with the book keeping of "frame index to VGPR
334    // lane".
335    FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
336
337    MadeChange = true;
338  }
339
340  SaveBlocks.clear();
341  RestoreBlocks.clear();
342
343  // Updated the reserved registers with any VGPRs added for SGPR spills.
344  if (NewReservedRegs)
345    MRI.freezeReservedRegs(MF);
346
347  return MadeChange;
348}
349