1//===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
11/// structures and waterfall loops.
12///
13/// When we do structurization, we usually transform an if-else into two
14/// successive if-then (with a flow block to do predicate inversion). Consider a
15/// simple case after structurization: A divergent value %a was defined before
16/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
17///    bb.if:
18///      %a = ...
19///      ...
20///    bb.then:
21///      ... = op %a
22///      ... // %a can be dead here
23///    bb.flow:
24///      ...
25///    bb.else:
26///      ... = %a
27///      ...
28///    bb.endif
29///
30///  As register allocator has no idea of the thread-control-flow, it will just
31///  assume %a would be alive in the whole range of bb.then because of a later
32///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
33///  to exec mask. For this if-else case, the lanes active in bb.then will be
34///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
35///  after the last use in bb.then until the end of the block. The reason is
36///  the instructions in bb.then will only overwrite lanes that will never be
37///  accessed in bb.else.
38///
39///  This pass aims to tell register allocator that %a is in-fact dead,
40///  through inserting a phi-node in bb.flow saying that %a is undef when coming
41///  from bb.then, and then replace the uses in the bb.else with the result of
42///  newly inserted phi.
43///
44///  Two key conditions must be met to ensure correctness:
45///  1.) The def-point should be in the same loop-level as if-else-endif to make
46///      sure the second loop iteration still get correct data.
47///  2.) There should be no further uses after the IF-ELSE region.
48///
49///
50/// Waterfall loops get inserted around instructions that use divergent values
51/// but can only be executed with a uniform value. For example an indirect call
52/// to a divergent address:
53///    bb.start:
54///      %a = ...
55///      %fun = ...
56///      ...
57///    bb.loop:
58///      call %fun (%a)
59///      ... // %a can be dead here
60///      loop %bb.loop
61///
62///  The loop block is executed multiple times, but it is run exactly once for
63///  each active lane. Similar to the if-else case, the register allocator
64///  assumes that %a is live throughout the loop as it is used again in the next
65///  iteration. If %a is a VGPR that is unused after the loop, it does not need
66///  to be live after its last use in the loop block. By inserting a phi-node at
67///  the start of bb.loop that is undef when coming from bb.loop, the register
68///  allocation knows that the value of %a does not need to be preserved through
69///  iterations of the loop.
70///
71//
72//===----------------------------------------------------------------------===//
73
74#include "AMDGPU.h"
75#include "GCNSubtarget.h"
76#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
77#include "SIMachineFunctionInfo.h"
78#include "llvm/CodeGen/LiveVariables.h"
79#include "llvm/CodeGen/MachineDominators.h"
80#include "llvm/CodeGen/MachineLoopInfo.h"
81#include "llvm/CodeGen/TargetRegisterInfo.h"
82#include "llvm/InitializePasses.h"
83
84using namespace llvm;
85
86#define DEBUG_TYPE "si-opt-vgpr-liverange"
87
88namespace {
89
90class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
91private:
92  const SIRegisterInfo *TRI = nullptr;
93  const SIInstrInfo *TII = nullptr;
94  LiveVariables *LV = nullptr;
95  MachineDominatorTree *MDT = nullptr;
96  const MachineLoopInfo *Loops = nullptr;
97  MachineRegisterInfo *MRI = nullptr;
98
99public:
100  static char ID;
101
102  MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
103
104  void collectElseRegionBlocks(MachineBasicBlock *Flow,
105                               MachineBasicBlock *Endif,
106                               SmallSetVector<MachineBasicBlock *, 16> &) const;
107
108  void
109  collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
110                            MachineBasicBlock *Endif,
111                            SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
112                            SmallVectorImpl<Register> &CandidateRegs) const;
113
114  void collectWaterfallCandidateRegisters(
115      MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
116      SmallSetVector<Register, 16> &CandidateRegs,
117      SmallSetVector<MachineBasicBlock *, 2> &Blocks,
118      SmallVectorImpl<MachineInstr *> &Instructions) const;
119
120  void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
121                             SmallVectorImpl<MachineInstr *> &Uses) const;
122
123  void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
124                                   MachineBasicBlock *Flow) const;
125
126  void updateLiveRangeInElseRegion(
127      Register Reg, Register NewReg, MachineBasicBlock *Flow,
128      MachineBasicBlock *Endif,
129      SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
130
131  void
132  optimizeLiveRange(Register Reg, MachineBasicBlock *If,
133                    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
134                    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
135
136  void optimizeWaterfallLiveRange(
137      Register Reg, MachineBasicBlock *LoopHeader,
138      SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks,
139      SmallVectorImpl<MachineInstr *> &Instructions) const;
140
141  SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
142
143  bool runOnMachineFunction(MachineFunction &MF) override;
144
145  StringRef getPassName() const override {
146    return "SI Optimize VGPR LiveRange";
147  }
148
149  void getAnalysisUsage(AnalysisUsage &AU) const override {
150    AU.addRequired<LiveVariables>();
151    AU.addRequired<MachineDominatorTree>();
152    AU.addRequired<MachineLoopInfo>();
153    AU.addPreserved<LiveVariables>();
154    AU.addPreserved<MachineDominatorTree>();
155    AU.addPreserved<MachineLoopInfo>();
156    MachineFunctionPass::getAnalysisUsage(AU);
157  }
158
159  MachineFunctionProperties getRequiredProperties() const override {
160    return MachineFunctionProperties().set(
161        MachineFunctionProperties::Property::IsSSA);
162  }
163
164  MachineFunctionProperties getClearedProperties() const override {
165    return MachineFunctionProperties().set(
166        MachineFunctionProperties::Property::NoPHIs);
167  }
168};
169
170} // end anonymous namespace
171
172// Check whether the MBB is a else flow block and get the branching target which
173// is the Endif block
174MachineBasicBlock *
175SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
176  for (auto &BR : MBB->terminators()) {
177    if (BR.getOpcode() == AMDGPU::SI_ELSE)
178      return BR.getOperand(2).getMBB();
179  }
180  return nullptr;
181}
182
183void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
184    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
185    SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
186  assert(Flow != Endif);
187
188  MachineBasicBlock *MBB = Endif;
189  unsigned Cur = 0;
190  while (MBB) {
191    for (auto *Pred : MBB->predecessors()) {
192      if (Pred != Flow && !Blocks.contains(Pred))
193        Blocks.insert(Pred);
194    }
195
196    if (Cur < Blocks.size())
197      MBB = Blocks[Cur++];
198    else
199      MBB = nullptr;
200  }
201
202  LLVM_DEBUG({
203    dbgs() << "Found Else blocks: ";
204    for (auto *MBB : Blocks)
205      dbgs() << printMBBReference(*MBB) << ' ';
206    dbgs() << '\n';
207  });
208}
209
210/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
211void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
212    Register Reg, MachineBasicBlock *MBB,
213    SmallVectorImpl<MachineInstr *> &Uses) const {
214  for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
215    if (UseMI.getParent() == MBB && !UseMI.isPHI())
216      Uses.push_back(&UseMI);
217  }
218}
219
220/// Collect the killed registers in the ELSE region which are not alive through
221/// the whole THEN region.
222void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
223    MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
224    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
225    SmallVectorImpl<Register> &CandidateRegs) const {
226
227  SmallSet<Register, 8> KillsInElse;
228
229  for (auto *Else : ElseBlocks) {
230    for (auto &MI : Else->instrs()) {
231      if (MI.isDebugInstr())
232        continue;
233
234      for (auto &MO : MI.operands()) {
235        if (!MO.isReg() || !MO.getReg() || MO.isDef())
236          continue;
237
238        Register MOReg = MO.getReg();
239        // We can only optimize AGPR/VGPR virtual register
240        if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
241          continue;
242
243        if (MO.readsReg()) {
244          LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
245          const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
246          // Make sure two conditions are met:
247          // a.) the value is defined before/in the IF block
248          // b.) should be defined in the same loop-level.
249          if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
250              Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
251            // Check if the register is live into the endif block. If not,
252            // consider it killed in the else region.
253            LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
254            if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
255              KillsInElse.insert(MOReg);
256            } else {
257              LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
258                                << " as Live in Endif\n");
259            }
260          }
261        }
262      }
263    }
264  }
265
266  // Check the phis in the Endif, looking for value coming from the ELSE
267  // region. Make sure the phi-use is the last use.
268  for (auto &MI : Endif->phis()) {
269    for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270      auto &MO = MI.getOperand(Idx);
271      auto *Pred = MI.getOperand(Idx + 1).getMBB();
272      if (Pred == Flow)
273        continue;
274      assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
275
276      if (!MO.isReg() || !MO.getReg() || MO.isUndef())
277        continue;
278
279      Register Reg = MO.getReg();
280      if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
281        continue;
282
283      LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
284
285      if (VI.isLiveIn(*Endif, Reg, *MRI)) {
286        LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
287                          << " as Live in Endif\n");
288        continue;
289      }
290      // Make sure two conditions are met:
291      // a.) the value is defined before/in the IF block
292      // b.) should be defined in the same loop-level.
293      const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
294      if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
295          Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
296        KillsInElse.insert(Reg);
297    }
298  }
299
300  auto IsLiveThroughThen = [&](Register Reg) {
301    for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
302         ++I) {
303      if (!I->readsReg())
304        continue;
305      auto *UseMI = I->getParent();
306      auto *UseMBB = UseMI->getParent();
307      if (UseMBB == Flow || UseMBB == Endif) {
308        if (!UseMI->isPHI())
309          return true;
310
311        auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
312        // The register is live through the path If->Flow or Flow->Endif.
313        // we should not optimize for such cases.
314        if ((UseMBB == Flow && IncomingMBB != If) ||
315            (UseMBB == Endif && IncomingMBB == Flow))
316          return true;
317      }
318    }
319    return false;
320  };
321
322  for (auto Reg : KillsInElse) {
323    if (!IsLiveThroughThen(Reg))
324      CandidateRegs.push_back(Reg);
325  }
326}
327
328/// Collect the registers used in the waterfall loop block that are defined
329/// before.
330void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
331    MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd,
332    SmallSetVector<Register, 16> &CandidateRegs,
333    SmallSetVector<MachineBasicBlock *, 2> &Blocks,
334    SmallVectorImpl<MachineInstr *> &Instructions) const {
335
336  // Collect loop instructions, potentially spanning multiple blocks
337  auto *MBB = LoopHeader;
338  for (;;) {
339    Blocks.insert(MBB);
340    for (auto &MI : *MBB) {
341      if (MI.isDebugInstr())
342        continue;
343      Instructions.push_back(&MI);
344    }
345    if (MBB == LoopEnd)
346      break;
347
348    if ((MBB != LoopHeader && MBB->pred_size() != 1) ||
349        (MBB == LoopHeader && MBB->pred_size() != 2) || MBB->succ_size() != 1) {
350      LLVM_DEBUG(dbgs() << "Unexpected edges in CFG, ignoring loop\n");
351      return;
352    }
353
354    MBB = *MBB->succ_begin();
355  }
356
357  for (auto *I : Instructions) {
358    auto &MI = *I;
359
360    for (auto &MO : MI.all_uses()) {
361      if (!MO.getReg())
362        continue;
363
364      Register MOReg = MO.getReg();
365      // We can only optimize AGPR/VGPR virtual register
366      if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
367        continue;
368
369      if (MO.readsReg()) {
370        MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
371        // Make sure the value is defined before the LOOP block
372        if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) {
373          // If the variable is used after the loop, the register coalescer will
374          // merge the newly created register and remove the phi node again.
375          // Just do nothing in that case.
376          LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
377          bool IsUsed = false;
378          for (auto *Succ : LoopEnd->successors()) {
379            if (!Blocks.contains(Succ) &&
380                OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
381              IsUsed = true;
382              break;
383            }
384          }
385          if (!IsUsed) {
386            LLVM_DEBUG(dbgs() << "Found candidate reg: "
387                              << printReg(MOReg, TRI, 0, MRI) << '\n');
388            CandidateRegs.insert(MOReg);
389          } else {
390            LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
391                              << printReg(MOReg, TRI, 0, MRI) << '\n');
392          }
393        }
394      }
395    }
396  }
397}
398
399// Re-calculate the liveness of \p Reg in the THEN-region
400void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
401    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
402  SetVector<MachineBasicBlock *> Blocks;
403  SmallVector<MachineBasicBlock *> WorkList({If});
404
405  // Collect all successors until we see the flow block, where we should
406  // reconverge.
407  while (!WorkList.empty()) {
408    auto *MBB = WorkList.pop_back_val();
409    for (auto *Succ : MBB->successors()) {
410      if (Succ != Flow && !Blocks.contains(Succ)) {
411        WorkList.push_back(Succ);
412        Blocks.insert(Succ);
413      }
414    }
415  }
416
417  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
418  for (MachineBasicBlock *MBB : Blocks) {
419    // Clear Live bit, as we will recalculate afterwards
420    LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
421                      << '\n');
422    OldVarInfo.AliveBlocks.reset(MBB->getNumber());
423  }
424
425  SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
426
427  // Get the blocks the Reg should be alive through
428  for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
429       ++I) {
430    auto *UseMI = I->getParent();
431    if (UseMI->isPHI() && I->readsReg()) {
432      if (Blocks.contains(UseMI->getParent()))
433        PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
434    }
435  }
436
437  for (MachineBasicBlock *MBB : Blocks) {
438    SmallVector<MachineInstr *> Uses;
439    // PHI instructions has been processed before.
440    findNonPHIUsesInBlock(Reg, MBB, Uses);
441
442    if (Uses.size() == 1) {
443      LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
444                        << printMBBReference(*MBB) << '\n');
445      LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
446    } else if (Uses.size() > 1) {
447      // Process the instructions in-order
448      LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
449                        << printMBBReference(*MBB) << '\n');
450      for (MachineInstr &MI : *MBB) {
451        if (llvm::is_contained(Uses, &MI))
452          LV->HandleVirtRegUse(Reg, MBB, MI);
453      }
454    }
455
456    // Mark Reg alive through the block if this is a PHI incoming block
457    if (PHIIncoming.contains(MBB))
458      LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
459                                  MBB);
460  }
461
462  // Set the isKilled flag if we get new Kills in the THEN region.
463  for (auto *MI : OldVarInfo.Kills) {
464    if (Blocks.contains(MI->getParent()))
465      MI->addRegisterKilled(Reg, TRI);
466  }
467}
468
469void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
470    Register Reg, Register NewReg, MachineBasicBlock *Flow,
471    MachineBasicBlock *Endif,
472    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
473  LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
474  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
475
476  // Transfer aliveBlocks from Reg to NewReg
477  for (auto *MBB : ElseBlocks) {
478    unsigned BBNum = MBB->getNumber();
479    if (OldVarInfo.AliveBlocks.test(BBNum)) {
480      NewVarInfo.AliveBlocks.set(BBNum);
481      LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
482                        << '\n');
483      OldVarInfo.AliveBlocks.reset(BBNum);
484    }
485  }
486
487  // Transfer the possible Kills in ElseBlocks from Reg to NewReg
488  auto I = OldVarInfo.Kills.begin();
489  while (I != OldVarInfo.Kills.end()) {
490    if (ElseBlocks.contains((*I)->getParent())) {
491      NewVarInfo.Kills.push_back(*I);
492      I = OldVarInfo.Kills.erase(I);
493    } else {
494      ++I;
495    }
496  }
497}
498
499void SIOptimizeVGPRLiveRange::optimizeLiveRange(
500    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
501    MachineBasicBlock *Endif,
502    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
503  // Insert a new PHI, marking the value from the THEN region being
504  // undef.
505  LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
506  const auto *RC = MRI->getRegClass(Reg);
507  Register NewReg = MRI->createVirtualRegister(RC);
508  Register UndefReg = MRI->createVirtualRegister(RC);
509  MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
510                                    TII->get(TargetOpcode::PHI), NewReg);
511  for (auto *Pred : Flow->predecessors()) {
512    if (Pred == If)
513      PHI.addReg(Reg).addMBB(Pred);
514    else
515      PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
516  }
517
518  // Replace all uses in the ELSE region or the PHIs in ENDIF block
519  // Use early increment range because setReg() will update the linked list.
520  for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
521    auto *UseMI = O.getParent();
522    auto *UseBlock = UseMI->getParent();
523    // Replace uses in Endif block
524    if (UseBlock == Endif) {
525      if (UseMI->isPHI())
526        O.setReg(NewReg);
527      else if (UseMI->isDebugInstr())
528        continue;
529      else {
530        // DetectDeadLanes may mark register uses as undef without removing
531        // them, in which case a non-phi instruction using the original register
532        // may exist in the Endif block even though the register is not live
533        // into it.
534        assert(!O.readsReg());
535      }
536      continue;
537    }
538
539    // Replace uses in Else region
540    if (ElseBlocks.contains(UseBlock))
541      O.setReg(NewReg);
542  }
543
544  // The optimized Reg is not alive through Flow blocks anymore.
545  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
546  OldVarInfo.AliveBlocks.reset(Flow->getNumber());
547
548  updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
549  updateLiveRangeInThenRegion(Reg, If, Flow);
550}
551
552void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
553    Register Reg, MachineBasicBlock *LoopHeader,
554    SmallSetVector<MachineBasicBlock *, 2> &Blocks,
555    SmallVectorImpl<MachineInstr *> &Instructions) const {
556  // Insert a new PHI, marking the value from the last loop iteration undef.
557  LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
558  const auto *RC = MRI->getRegClass(Reg);
559  Register NewReg = MRI->createVirtualRegister(RC);
560  Register UndefReg = MRI->createVirtualRegister(RC);
561
562  // Replace all uses in the LOOP region
563  // Use early increment range because setReg() will update the linked list.
564  for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
565    auto *UseMI = O.getParent();
566    auto *UseBlock = UseMI->getParent();
567    // Replace uses in Loop blocks
568    if (Blocks.contains(UseBlock))
569      O.setReg(NewReg);
570  }
571
572  MachineInstrBuilder PHI =
573      BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(),
574              TII->get(TargetOpcode::PHI), NewReg);
575  for (auto *Pred : LoopHeader->predecessors()) {
576    if (Blocks.contains(Pred))
577      PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
578    else
579      PHI.addReg(Reg).addMBB(Pred);
580  }
581
582  LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
583  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
584
585  // Find last use and mark as kill
586  MachineInstr *Kill = nullptr;
587  for (auto *MI : reverse(Instructions)) {
588    if (MI->readsRegister(NewReg, TRI)) {
589      MI->addRegisterKilled(NewReg, TRI);
590      NewVarInfo.Kills.push_back(MI);
591      Kill = MI;
592      break;
593    }
594  }
595  assert(Kill && "Failed to find last usage of register in loop");
596
597  MachineBasicBlock *KillBlock = Kill->getParent();
598  bool PostKillBlock = false;
599  for (auto *Block : Blocks) {
600    auto BBNum = Block->getNumber();
601
602    // collectWaterfallCandidateRegisters only collects registers that are dead
603    // after the loop. So we know that the old reg is no longer live throughout
604    // the waterfall loop.
605    OldVarInfo.AliveBlocks.reset(BBNum);
606
607    // The new register is live up to (and including) the block that kills it.
608    PostKillBlock |= (Block == KillBlock);
609    if (PostKillBlock) {
610      NewVarInfo.AliveBlocks.reset(BBNum);
611    } else if (Block != LoopHeader) {
612      NewVarInfo.AliveBlocks.set(BBNum);
613    }
614  }
615}
616
617char SIOptimizeVGPRLiveRange::ID = 0;
618
619INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
620                      "SI Optimize VGPR LiveRange", false, false)
621INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
622INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
623INITIALIZE_PASS_DEPENDENCY(LiveVariables)
624INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
625                    "SI Optimize VGPR LiveRange", false, false)
626
627char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
628
629FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
630  return new SIOptimizeVGPRLiveRange();
631}
632
633bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
634
635  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
636  TII = ST.getInstrInfo();
637  TRI = &TII->getRegisterInfo();
638  MDT = &getAnalysis<MachineDominatorTree>();
639  Loops = &getAnalysis<MachineLoopInfo>();
640  LV = &getAnalysis<LiveVariables>();
641  MRI = &MF.getRegInfo();
642
643  if (skipFunction(MF.getFunction()))
644    return false;
645
646  bool MadeChange = false;
647
648  // TODO: we need to think about the order of visiting the blocks to get
649  // optimal result for nesting if-else cases.
650  for (MachineBasicBlock &MBB : MF) {
651    for (auto &MI : MBB.terminators()) {
652      // Detect the if-else blocks
653      if (MI.getOpcode() == AMDGPU::SI_IF) {
654        MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
655        auto *Endif = getElseTarget(IfTarget);
656        if (!Endif)
657          continue;
658
659        // Skip unexpected control flow.
660        if (!MDT->dominates(&MBB, IfTarget) || !MDT->dominates(IfTarget, Endif))
661          continue;
662
663        SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
664        SmallVector<Register> CandidateRegs;
665
666        LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
667                          << printMBBReference(MBB) << ' '
668                          << printMBBReference(*IfTarget) << ' '
669                          << printMBBReference(*Endif) << '\n');
670
671        // Collect all the blocks in the ELSE region
672        collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
673
674        // Collect the registers can be optimized
675        collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
676                                  CandidateRegs);
677        MadeChange |= !CandidateRegs.empty();
678        // Now we are safe to optimize.
679        for (auto Reg : CandidateRegs)
680          optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
681      } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
682        auto *LoopHeader = MI.getOperand(0).getMBB();
683        auto *LoopEnd = &MBB;
684
685        LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
686                          << printMBBReference(*LoopHeader) << '\n');
687
688        SmallSetVector<Register, 16> CandidateRegs;
689        SmallVector<MachineInstr *, 16> Instructions;
690        SmallSetVector<MachineBasicBlock *, 2> Blocks;
691
692        collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs,
693                                           Blocks, Instructions);
694        MadeChange |= !CandidateRegs.empty();
695        // Now we are safe to optimize.
696        for (auto Reg : CandidateRegs)
697          optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions);
698      }
699    }
700  }
701
702  return MadeChange;
703}
704