1//===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10// SGPR spills, so must insert CSR SGPR spills as well as expand them.
11//
12// This pass must never create new SGPR virtual registers.
13//
14// FIXME: Must stop RegScavenger spills in later passes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "AMDGPU.h"
19#include "GCNSubtarget.h"
20#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21#include "SIMachineFunctionInfo.h"
22#include "llvm/CodeGen/LiveIntervals.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/RegisterScavenging.h"
25#include "llvm/InitializePasses.h"
26
27using namespace llvm;
28
29#define DEBUG_TYPE "si-lower-sgpr-spills"
30
31using MBBVector = SmallVector<MachineBasicBlock *, 4>;
32
33namespace {
34
35class SILowerSGPRSpills : public MachineFunctionPass {
36private:
37  const SIRegisterInfo *TRI = nullptr;
38  const SIInstrInfo *TII = nullptr;
39  LiveIntervals *LIS = nullptr;
40  SlotIndexes *Indexes = nullptr;
41
42  // Save and Restore blocks of the current function. Typically there is a
43  // single save block, unless Windows EH funclets are involved.
44  MBBVector SaveBlocks;
45  MBBVector RestoreBlocks;
46
47public:
48  static char ID;
49
50  SILowerSGPRSpills() : MachineFunctionPass(ID) {}
51
52  void calculateSaveRestoreBlocks(MachineFunction &MF);
53  bool spillCalleeSavedRegs(MachineFunction &MF,
54                            SmallVectorImpl<int> &CalleeSavedFIs);
55  void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
56
57  bool runOnMachineFunction(MachineFunction &MF) override;
58
59  void getAnalysisUsage(AnalysisUsage &AU) const override {
60    AU.setPreservesAll();
61    MachineFunctionPass::getAnalysisUsage(AU);
62  }
63
64  MachineFunctionProperties getClearedProperties() const override {
65    // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
66    return MachineFunctionProperties()
67        .set(MachineFunctionProperties::Property::IsSSA)
68        .set(MachineFunctionProperties::Property::NoVRegs);
69  }
70};
71
72} // end anonymous namespace
73
74char SILowerSGPRSpills::ID = 0;
75
76INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
77                      "SI lower SGPR spill instructions", false, false)
78INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
79INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
80INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
81                    "SI lower SGPR spill instructions", false, false)
82
83char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
84
85/// Insert spill code for the callee-saved registers used in the function.
86static void insertCSRSaves(MachineBasicBlock &SaveBlock,
87                           ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
88                           LiveIntervals *LIS) {
89  MachineFunction &MF = *SaveBlock.getParent();
90  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
91  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
92  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
93  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
94  const SIRegisterInfo *RI = ST.getRegisterInfo();
95
96  MachineBasicBlock::iterator I = SaveBlock.begin();
97  if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98    const MachineRegisterInfo &MRI = MF.getRegInfo();
99
100    for (const CalleeSavedInfo &CS : CSI) {
101      // Insert the spill to the stack frame.
102      MCRegister Reg = CS.getReg();
103
104      MachineInstrSpan MIS(I, &SaveBlock);
105      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
106          Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
107
108      // If this value was already livein, we probably have a direct use of the
109      // incoming register value, so don't kill at the spill point. This happens
110      // since we pass some special inputs (workgroup IDs) in the callee saved
111      // range.
112      const bool IsLiveIn = MRI.isLiveIn(Reg);
113      TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
114                              RC, TRI, Register());
115
116      if (Indexes) {
117        assert(std::distance(MIS.begin(), I) == 1);
118        MachineInstr &Inst = *std::prev(I);
119        Indexes->insertMachineInstrInMaps(Inst);
120      }
121
122      if (LIS)
123        LIS->removeAllRegUnitsForPhysReg(Reg);
124    }
125  }
126}
127
128/// Insert restore code for the callee-saved registers used in the function.
129static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
130                              MutableArrayRef<CalleeSavedInfo> CSI,
131                              SlotIndexes *Indexes, LiveIntervals *LIS) {
132  MachineFunction &MF = *RestoreBlock.getParent();
133  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
134  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
135  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
136  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
137  const SIRegisterInfo *RI = ST.getRegisterInfo();
138  // Restore all registers immediately before the return and any
139  // terminators that precede it.
140  MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
141
142  // FIXME: Just emit the readlane/writelane directly
143  if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
144    for (const CalleeSavedInfo &CI : reverse(CSI)) {
145      Register Reg = CI.getReg();
146      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
147          Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
148
149      TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
150                               Register());
151      assert(I != RestoreBlock.begin() &&
152             "loadRegFromStackSlot didn't insert any code!");
153      // Insert in reverse order.  loadRegFromStackSlot can insert
154      // multiple instructions.
155
156      if (Indexes) {
157        MachineInstr &Inst = *std::prev(I);
158        Indexes->insertMachineInstrInMaps(Inst);
159      }
160
161      if (LIS)
162        LIS->removeAllRegUnitsForPhysReg(Reg);
163    }
164  }
165}
166
167/// Compute the sets of entry and return blocks for saving and restoring
168/// callee-saved registers, and placing prolog and epilog code.
169void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
170  const MachineFrameInfo &MFI = MF.getFrameInfo();
171
172  // Even when we do not change any CSR, we still want to insert the
173  // prologue and epilogue of the function.
174  // So set the save points for those.
175
176  // Use the points found by shrink-wrapping, if any.
177  if (MFI.getSavePoint()) {
178    SaveBlocks.push_back(MFI.getSavePoint());
179    assert(MFI.getRestorePoint() && "Both restore and save must be set");
180    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
181    // If RestoreBlock does not have any successor and is not a return block
182    // then the end point is unreachable and we do not need to insert any
183    // epilogue.
184    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
185      RestoreBlocks.push_back(RestoreBlock);
186    return;
187  }
188
189  // Save refs to entry and return blocks.
190  SaveBlocks.push_back(&MF.front());
191  for (MachineBasicBlock &MBB : MF) {
192    if (MBB.isEHFuncletEntry())
193      SaveBlocks.push_back(&MBB);
194    if (MBB.isReturnBlock())
195      RestoreBlocks.push_back(&MBB);
196  }
197}
198
199// TODO: To support shrink wrapping, this would need to copy
200// PrologEpilogInserter's updateLiveness.
201static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
202  MachineBasicBlock &EntryBB = MF.front();
203
204  for (const CalleeSavedInfo &CSIReg : CSI)
205    EntryBB.addLiveIn(CSIReg.getReg());
206  EntryBB.sortUniqueLiveIns();
207}
208
209bool SILowerSGPRSpills::spillCalleeSavedRegs(
210    MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
211  MachineRegisterInfo &MRI = MF.getRegInfo();
212  const Function &F = MF.getFunction();
213  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
214  const SIFrameLowering *TFI = ST.getFrameLowering();
215  MachineFrameInfo &MFI = MF.getFrameInfo();
216  RegScavenger *RS = nullptr;
217
218  // Determine which of the registers in the callee save list should be saved.
219  BitVector SavedRegs;
220  TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
221
222  // Add the code to save and restore the callee saved registers.
223  if (!F.hasFnAttribute(Attribute::Naked)) {
224    // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
225    // necessary for verifier liveness checks.
226    MFI.setCalleeSavedInfoValid(true);
227
228    std::vector<CalleeSavedInfo> CSI;
229    const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
230
231    for (unsigned I = 0; CSRegs[I]; ++I) {
232      MCRegister Reg = CSRegs[I];
233
234      if (SavedRegs.test(Reg)) {
235        const TargetRegisterClass *RC =
236          TRI->getMinimalPhysRegClass(Reg, MVT::i32);
237        int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
238                                           TRI->getSpillAlign(*RC), true);
239
240        CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
241        CalleeSavedFIs.push_back(JunkFI);
242      }
243    }
244
245    if (!CSI.empty()) {
246      for (MachineBasicBlock *SaveBlock : SaveBlocks)
247        insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
248
249      // Add live ins to save blocks.
250      assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
251      updateLiveness(MF, CSI);
252
253      for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
254        insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
255      return true;
256    }
257  }
258
259  return false;
260}
261
262void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
263                                                 LiveIntervals *LIS) {
264  // TODO: This is a workaround to avoid the unmodelled liveness computed with
265  // whole-wave virtual registers when allocated together with the regular VGPR
266  // virtual registers. Presently, the liveness computed during the regalloc is
267  // only uniform (or single lane aware) and it doesn't take account of the
268  // divergent control flow that exists for our GPUs. Since the WWM registers
269  // can modify inactive lanes, the wave-aware liveness should be computed for
270  // the virtual registers to accurately plot their interferences. Without
271  // having the divergent CFG for the function, it is difficult to implement the
272  // wave-aware liveness info. Until then, we conservatively extend the liveness
273  // of the wwm registers into the entire function so that they won't be reused
274  // without first spilling/splitting their liveranges.
275  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
276
277  // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
278  for (auto Reg : MFI->getSGPRSpillVGPRs()) {
279    for (MachineBasicBlock *SaveBlock : SaveBlocks) {
280      MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
281      auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
282                         TII->get(AMDGPU::IMPLICIT_DEF), Reg);
283      MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
284      // Set SGPR_SPILL asm printer flag
285      MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
286      if (LIS) {
287        LIS->InsertMachineInstrInMaps(*MIB);
288      }
289    }
290  }
291
292  // Insert the KILL in the return blocks to extend their liveness untill the
293  // end of function. Insert a separate KILL for each VGPR.
294  for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
295    MachineBasicBlock::iterator InsertBefore =
296        RestoreBlock->getFirstTerminator();
297    for (auto Reg : MFI->getSGPRSpillVGPRs()) {
298      auto MIB =
299          BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
300                  TII->get(TargetOpcode::KILL));
301      MIB.addReg(Reg);
302      if (LIS)
303        LIS->InsertMachineInstrInMaps(*MIB);
304    }
305  }
306}
307
308bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
309  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
310  TII = ST.getInstrInfo();
311  TRI = &TII->getRegisterInfo();
312
313  LIS = getAnalysisIfAvailable<LiveIntervals>();
314  Indexes = getAnalysisIfAvailable<SlotIndexes>();
315
316  assert(SaveBlocks.empty() && RestoreBlocks.empty());
317
318  // First, expose any CSR SGPR spills. This is mostly the same as what PEI
319  // does, but somewhat simpler.
320  calculateSaveRestoreBlocks(MF);
321  SmallVector<int> CalleeSavedFIs;
322  bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
323
324  MachineFrameInfo &MFI = MF.getFrameInfo();
325  MachineRegisterInfo &MRI = MF.getRegInfo();
326  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
327
328  if (!MFI.hasStackObjects() && !HasCSRs) {
329    SaveBlocks.clear();
330    RestoreBlocks.clear();
331    return false;
332  }
333
334  bool MadeChange = false;
335  bool SpilledToVirtVGPRLanes = false;
336
337  // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
338  // handled as SpilledToReg in regular PrologEpilogInserter.
339  const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
340                                  (HasCSRs || FuncInfo->hasSpilledSGPRs());
341  if (HasSGPRSpillToVGPR) {
342    // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
343    // are spilled to VGPRs, in which case we can eliminate the stack usage.
344    //
345    // This operates under the assumption that only other SGPR spills are users
346    // of the frame index.
347
348    // To track the spill frame indices handled in this pass.
349    BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
350
351    for (MachineBasicBlock &MBB : MF) {
352      for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
353        if (!TII->isSGPRSpill(MI))
354          continue;
355
356        int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
357        assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
358
359        bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
360        if (IsCalleeSaveSGPRSpill) {
361          // Spill callee-saved SGPRs into physical VGPR lanes.
362
363          // TODO: This is to ensure the CFIs are static for efficient frame
364          // unwinding in the debugger. Spilling them into virtual VGPR lanes
365          // involve regalloc to allocate the physical VGPRs and that might
366          // cause intermediate spill/split of such liveranges for successful
367          // allocation. This would result in broken CFI encoding unless the
368          // regalloc aware CFI generation to insert new CFIs along with the
369          // intermediate spills is implemented. There is no such support
370          // currently exist in the LLVM compiler.
371          if (FuncInfo->allocateSGPRSpillToVGPRLane(
372                  MF, FI, /*SpillToPhysVGPRLane=*/true)) {
373            bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
374                MI, FI, nullptr, Indexes, LIS, true);
375            if (!Spilled)
376              llvm_unreachable(
377                  "failed to spill SGPR to physical VGPR lane when allocated");
378          }
379        } else {
380          if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
381            bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
382                MI, FI, nullptr, Indexes, LIS);
383            if (!Spilled)
384              llvm_unreachable(
385                  "failed to spill SGPR to virtual VGPR lane when allocated");
386            SpillFIs.set(FI);
387            SpilledToVirtVGPRLanes = true;
388          }
389        }
390      }
391    }
392
393    if (SpilledToVirtVGPRLanes) {
394      extendWWMVirtRegLiveness(MF, LIS);
395      if (LIS) {
396        // Compute the LiveInterval for the newly created virtual registers.
397        for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
398          LIS->createAndComputeVirtRegInterval(Reg);
399      }
400    }
401
402    for (MachineBasicBlock &MBB : MF) {
403      // FIXME: The dead frame indices are replaced with a null register from
404      // the debug value instructions. We should instead, update it with the
405      // correct register value. But not sure the register value alone is
406      // adequate to lower the DIExpression. It should be worked out later.
407      for (MachineInstr &MI : MBB) {
408        if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
409            !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
410            SpillFIs[MI.getOperand(0).getIndex()]) {
411          MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
412        }
413      }
414    }
415
416    // All those frame indices which are dead by now should be removed from the
417    // function frame. Otherwise, there is a side effect such as re-mapping of
418    // free frame index ids by the later pass(es) like "stack slot coloring"
419    // which in turn could mess-up with the book keeping of "frame index to VGPR
420    // lane".
421    FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
422
423    MadeChange = true;
424  }
425
426  if (SpilledToVirtVGPRLanes) {
427    const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
428    // Shift back the reserved SGPR for EXEC copy into the lowest range.
429    // This SGPR is reserved to handle the whole-wave spill/copy operations
430    // that might get inserted during vgpr regalloc.
431    Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
432    if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
433                             TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
434      FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
435  } else {
436    // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
437    // spills/copies. Reset the SGPR reserved for EXEC copy.
438    FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
439  }
440
441  SaveBlocks.clear();
442  RestoreBlocks.clear();
443
444  return MadeChange;
445}
446