1//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass generates machine instructions for the CTR loops related pseudos:
10// 1: MTCTRloop/DecreaseCTRloop
11// 2: MTCTR8loop/DecreaseCTR8loop
12//
13// If a CTR loop can be generated:
14// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16//    its user branch instruction can be deleted.
17//
18// If a CTR loop can not be generated due to clobber of CTR:
19// 1: MTCTRloop/MTCTR8loop can be deleted.
20// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21//    a "cmplwi/cmpldi".
22//
23// This pass runs just before register allocation, because we don't want
24// register allocator to allocate register for DecreaseCTRloop if a CTR can be
25// generated or if a CTR loop can not be generated, we don't have any condition
26// register for the new added "cmplwi/cmpldi".
27//
28//===----------------------------------------------------------------------===//
29
30#include "PPC.h"
31#include "PPCInstrInfo.h"
32#include "PPCSubtarget.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFunction.h"
36#include "llvm/CodeGen/MachineFunctionPass.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineLoopInfo.h"
39#include "llvm/CodeGen/MachineOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/Register.h"
42#include "llvm/InitializePasses.h"
43#include "llvm/Pass.h"
44#include "llvm/PassRegistry.h"
45#include "llvm/Support/CodeGen.h"
46#include "llvm/Support/Debug.h"
47#include "llvm/Support/ErrorHandling.h"
48#include <cassert>
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-ctrloops"
53
54STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56
57namespace {
58class PPCCTRLoops : public MachineFunctionPass {
59public:
60  static char ID;
61
62  PPCCTRLoops() : MachineFunctionPass(ID) {
63    initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64  }
65
66  void getAnalysisUsage(AnalysisUsage &AU) const override {
67    AU.addRequired<MachineLoopInfo>();
68    MachineFunctionPass::getAnalysisUsage(AU);
69  }
70
71  bool runOnMachineFunction(MachineFunction &MF) override;
72
73private:
74  const PPCInstrInfo *TII = nullptr;
75  MachineRegisterInfo *MRI = nullptr;
76
77  bool processLoop(MachineLoop *ML);
78  bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79  void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80                         MachineInstr *Dec);
81  void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82};
83} // namespace
84
85char PPCCTRLoops::ID = 0;
86
87INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88                      false, false)
89INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91                    false, false)
92
93FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94
95bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96  bool Changed = false;
97
98  auto &MLI = getAnalysis<MachineLoopInfo>();
99  TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100  MRI = &MF.getRegInfo();
101
102  for (auto *ML : MLI) {
103    if (ML->isOutermost())
104      Changed |= processLoop(ML);
105  }
106
107#ifndef NDEBUG
108  for (const MachineBasicBlock &BB : MF) {
109    for (const MachineInstr &I : BB)
110      assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111              I.getOpcode() != PPC::DecreaseCTR8loop) &&
112             "CTR loop pseudo is not expanded!");
113  }
114#endif
115
116  return Changed;
117}
118
119bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120  if (!CheckReads) {
121    // If we are only checking for defs, that is we are going to find
122    // definitions before MTCTRloop, for this case:
123    // CTR defination inside the callee of a call instruction will not impact
124    // the defination of MTCTRloop, so we can use definesRegister() for the
125    // check, no need to check the regmask.
126    return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127  }
128
129  if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130    return true;
131
132  if (MI->getDesc().isCall())
133    return true;
134
135  // We define the CTR in the loop preheader, so if there is any CTR reader in
136  // the loop, we also can not use CTR loop form.
137  if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138    return true;
139
140  return false;
141}
142
143bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144  bool Changed = false;
145
146  // Align with HardwareLoop pass, process inner loops first.
147  for (MachineLoop *I : *ML)
148    Changed |= processLoop(I);
149
150  // If any inner loop is changed, outter loop must be without hardware loop
151  // intrinsics.
152  if (Changed)
153    return true;
154
155  auto IsLoopStart = [](MachineInstr &MI) {
156    return MI.getOpcode() == PPC::MTCTRloop ||
157           MI.getOpcode() == PPC::MTCTR8loop;
158  };
159
160  auto SearchForStart =
161      [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162    for (auto &MI : *MBB) {
163      if (IsLoopStart(MI))
164        return &MI;
165    }
166    return nullptr;
167  };
168
169  MachineInstr *Start = nullptr;
170  MachineInstr *Dec = nullptr;
171  bool InvalidCTRLoop = false;
172
173  MachineBasicBlock *Preheader = ML->getLoopPreheader();
174  // If there is no preheader for this loop, there must be no MTCTRloop
175  // either.
176  if (!Preheader)
177    return false;
178
179  Start = SearchForStart(Preheader);
180  // This is not a CTR loop candidate.
181  if (!Start)
182    return false;
183
184  // If CTR is live to the preheader, we can not redefine the CTR register.
185  if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186    InvalidCTRLoop = true;
187
188  // Make sure there is also no CTR clobber in the block preheader between the
189  // begin and MTCTR.
190  for (MachineBasicBlock::reverse_instr_iterator I =
191           std::next(Start->getReverseIterator());
192       I != Preheader->instr_rend(); ++I)
193    // Only check the definitions of CTR. If there is non-dead definition for
194    // the CTR, we conservatively don't generate a CTR loop.
195    if (isCTRClobber(&*I, /* CheckReads */ false)) {
196      InvalidCTRLoop = true;
197      break;
198    }
199
200  // Make sure there is also no CTR clobber/user in the block preheader between
201  // MTCTR and the end.
202  for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203       I != Preheader->instr_end(); ++I)
204    if (isCTRClobber(&*I, /* CheckReads */ true)) {
205      InvalidCTRLoop = true;
206      break;
207    }
208
209  // Find the CTR loop components and decide whether or not to fall back to a
210  // normal loop.
211  for (auto *MBB : reverse(ML->getBlocks())) {
212    for (auto &MI : *MBB) {
213      if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214          MI.getOpcode() == PPC::DecreaseCTR8loop)
215        Dec = &MI;
216      else if (!InvalidCTRLoop)
217        // If any instruction clobber CTR, then we can not generate a CTR loop.
218        InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219    }
220    if (Dec && InvalidCTRLoop)
221      break;
222  }
223
224  assert(Dec && "CTR loop is not complete!");
225
226  if (InvalidCTRLoop) {
227    expandNormalLoops(ML, Start, Dec);
228    ++NumNormalLoops;
229  }
230  else {
231    expandCTRLoops(ML, Start, Dec);
232    ++NumCTRLoops;
233  }
234  return true;
235}
236
237void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238                                    MachineInstr *Dec) {
239  bool Is64Bit =
240      Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241
242  MachineBasicBlock *Preheader = Start->getParent();
243  MachineBasicBlock *Exiting = Dec->getParent();
244  assert((Preheader && Exiting) &&
245         "Preheader and exiting should exist for CTR loop!");
246
247  assert(Dec->getOperand(1).getImm() == 1 &&
248         "Loop decrement stride must be 1");
249
250  unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251  unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252
253  Register PHIDef =
254      MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255                                         : &PPC::GPRC_and_GPRC_NOR0RegClass);
256
257  Start->getParent()->getParent()->getProperties().reset(
258      MachineFunctionProperties::Property::NoPHIs);
259
260  // Generate "PHI" in the header block.
261  auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262                        DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263  PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264
265  Register ADDIDef =
266      MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267                                         : &PPC::GPRC_and_GPRC_NOR0RegClass);
268  // Generate "addi -1" in the exiting block.
269  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270      .addReg(PHIDef)
271      .addImm(-1);
272
273  // Add other inputs for the PHI node.
274  if (ML->isLoopLatch(Exiting)) {
275    // There must be only two predecessors for the loop header, one is the
276    // Preheader and the other one is loop latch Exiting. In hardware loop
277    // insertion pass, the block containing DecreaseCTRloop must dominate all
278    // loop latches. So there must be only one latch.
279    assert(ML->getHeader()->pred_size() == 2 &&
280           "Loop header predecessor is not right!");
281    PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282  } else {
283    // If the block containing DecreaseCTRloop is not a loop latch, we can use
284    // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285    // insertion pass, the block containing DecreaseCTRloop must dominate all
286    // loop latches.
287    for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288      if (ML->contains(P)) {
289        assert(ML->isLoopLatch(P) &&
290               "Loop's header in-loop predecessor is not loop latch!");
291        PHIMIB.addReg(ADDIDef).addMBB(P);
292      } else
293        assert(P == Preheader &&
294               "CTR loop should not be generated for irreducible loop!");
295    }
296  }
297
298  // Generate the compare in the exiting block.
299  Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300  auto CMPMIB =
301      BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302          .addReg(ADDIDef)
303          .addImm(0);
304
305  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306          Dec->getOperand(0).getReg())
307      .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308
309  // Remove the pseudo instructions.
310  Start->eraseFromParent();
311  Dec->eraseFromParent();
312}
313
314void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315                                 MachineInstr *Dec) {
316  bool Is64Bit =
317      Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318
319  MachineBasicBlock *Preheader = Start->getParent();
320  MachineBasicBlock *Exiting = Dec->getParent();
321
322  (void)Preheader;
323  assert((Preheader && Exiting) &&
324         "Preheader and exiting should exist for CTR loop!");
325
326  assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327
328  unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329  unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330  auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331  assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332         "There should be only one user for loop decrement pseudo!");
333
334  unsigned Opcode = 0;
335  switch (BrInstr->getOpcode()) {
336  case PPC::BC:
337    Opcode = BDNZOpcode;
338    (void) ML;
339    assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340           "Invalid ctr loop!");
341    break;
342  case PPC::BCn:
343    Opcode = BDZOpcode;
344    assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345           "Invalid ctr loop!");
346    break;
347  default:
348    llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
349  }
350
351  // Generate "bdnz/bdz" in the exiting block just before the terminator.
352  BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353      .addMBB(BrInstr->getOperand(1).getMBB());
354
355  // Remove the pseudo instructions.
356  BrInstr->eraseFromParent();
357  Dec->eraseFromParent();
358}
359