1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer.  It runs at the end of
11// the SSA phases, following VSX swap removal.  A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here.  Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects:  it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19//===---------------------------------------------------------------------===//
20
21#include "MCTargetDesc/PPCMCTargetDesc.h"
22#include "MCTargetDesc/PPCPredicates.h"
23#include "PPC.h"
24#include "PPCInstrBuilder.h"
25#include "PPCInstrInfo.h"
26#include "PPCMachineFunctionInfo.h"
27#include "PPCTargetMachine.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30#include "llvm/CodeGen/MachineDominators.h"
31#include "llvm/CodeGen/MachineFunctionPass.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachinePostDominators.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/InitializePasses.h"
36#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "ppc-mi-peepholes"
41
42STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
43STATISTIC(MultiTOCSaves,
44          "Number of functions with multiple TOC saves that must be kept");
45STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
46STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
47STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
48STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
49STATISTIC(NumConvertedToImmediateForm,
50          "Number of instructions converted to their immediate form");
51STATISTIC(NumFunctionsEnteredInMIPeephole,
52          "Number of functions entered in PPC MI Peepholes");
53STATISTIC(NumFixedPointIterations,
54          "Number of fixed-point iterations converting reg-reg instructions "
55          "to reg-imm ones");
56STATISTIC(NumRotatesCollapsed,
57          "Number of pairs of rotate left, clear left/right collapsed");
58STATISTIC(NumEXTSWAndSLDICombined,
59          "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
60STATISTIC(NumLoadImmZeroFoldedAndRemoved,
61          "Number of LI(8) reg, 0 that are folded to r0 and removed");
62
63static cl::opt<bool>
64FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
65                   cl::desc("Iterate to a fixed point when attempting to "
66                            "convert reg-reg instructions to reg-imm"));
67
68static cl::opt<bool>
69ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
70              cl::desc("Convert eligible reg+reg instructions to reg+imm"));
71
72static cl::opt<bool>
73    EnableSExtElimination("ppc-eliminate-signext",
74                          cl::desc("enable elimination of sign-extensions"),
75                          cl::init(false), cl::Hidden);
76
77static cl::opt<bool>
78    EnableZExtElimination("ppc-eliminate-zeroext",
79                          cl::desc("enable elimination of zero-extensions"),
80                          cl::init(false), cl::Hidden);
81
82namespace {
83
84struct PPCMIPeephole : public MachineFunctionPass {
85
86  static char ID;
87  const PPCInstrInfo *TII;
88  MachineFunction *MF;
89  MachineRegisterInfo *MRI;
90
91  PPCMIPeephole() : MachineFunctionPass(ID) {
92    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
93  }
94
95private:
96  MachineDominatorTree *MDT;
97  MachinePostDominatorTree *MPDT;
98  MachineBlockFrequencyInfo *MBFI;
99  uint64_t EntryFreq;
100
101  // Initialize class variables.
102  void initialize(MachineFunction &MFParm);
103
104  // Perform peepholes.
105  bool simplifyCode(void);
106
107  // Perform peepholes.
108  bool eliminateRedundantCompare(void);
109  bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
110  bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
111  bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
112  void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
113                      MachineInstr *MI);
114
115public:
116
117  void getAnalysisUsage(AnalysisUsage &AU) const override {
118    AU.addRequired<MachineDominatorTree>();
119    AU.addRequired<MachinePostDominatorTree>();
120    AU.addRequired<MachineBlockFrequencyInfo>();
121    AU.addPreserved<MachineDominatorTree>();
122    AU.addPreserved<MachinePostDominatorTree>();
123    AU.addPreserved<MachineBlockFrequencyInfo>();
124    MachineFunctionPass::getAnalysisUsage(AU);
125  }
126
127  // Main entry point for this pass.
128  bool runOnMachineFunction(MachineFunction &MF) override {
129    initialize(MF);
130    // At this point, TOC pointer should not be used in a function that uses
131    // PC-Relative addressing.
132    assert((MF.getRegInfo().use_empty(PPC::X2) ||
133            !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
134           "TOC pointer used in a function using PC-Relative addressing!");
135    if (skipFunction(MF.getFunction()))
136      return false;
137    return simplifyCode();
138  }
139};
140
141// Initialize class variables.
142void PPCMIPeephole::initialize(MachineFunction &MFParm) {
143  MF = &MFParm;
144  MRI = &MF->getRegInfo();
145  MDT = &getAnalysis<MachineDominatorTree>();
146  MPDT = &getAnalysis<MachinePostDominatorTree>();
147  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
148  EntryFreq = MBFI->getEntryFreq();
149  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
150  LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
151  LLVM_DEBUG(MF->dump());
152}
153
154static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
155                                      MachineRegisterInfo *MRI) {
156  assert(Op && "Invalid Operand!");
157  if (!Op->isReg())
158    return nullptr;
159
160  Register Reg = Op->getReg();
161  if (!Register::isVirtualRegister(Reg))
162    return nullptr;
163
164  return MRI->getVRegDef(Reg);
165}
166
167// This function returns number of known zero bits in output of MI
168// starting from the most significant bit.
169static unsigned
170getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
171  unsigned Opcode = MI->getOpcode();
172  if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
173      Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
174    return MI->getOperand(3).getImm();
175
176  if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
177      MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
178    return MI->getOperand(3).getImm();
179
180  if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
181       Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
182       Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
183      MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
184    return 32 + MI->getOperand(3).getImm();
185
186  if (Opcode == PPC::ANDI_rec) {
187    uint16_t Imm = MI->getOperand(2).getImm();
188    return 48 + countLeadingZeros(Imm);
189  }
190
191  if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
192      Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
193      Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
194    // The result ranges from 0 to 32.
195    return 58;
196
197  if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
198      Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
199    // The result ranges from 0 to 64.
200    return 57;
201
202  if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
203      Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
204      Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
205      Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
206    return 48;
207
208  if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
209      Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
210      Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
211      Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
212    return 56;
213
214  if (TII->isZeroExtended(*MI))
215    return 32;
216
217  return 0;
218}
219
220// This function maintains a map for the pairs <TOC Save Instr, Keep>
221// Each time a new TOC save is encountered, it checks if any of the existing
222// ones are dominated by the new one. If so, it marks the existing one as
223// redundant by setting it's entry in the map as false. It then adds the new
224// instruction to the map with either true or false depending on if any
225// existing instructions dominated the new one.
226void PPCMIPeephole::UpdateTOCSaves(
227  std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
228  assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
229  assert(MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
230         "TOC-save removal only supported on ELFv2");
231  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
232
233  MachineBasicBlock *Entry = &MF->front();
234  uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
235
236  // If the block in which the TOC save resides is in a block that
237  // post-dominates Entry, or a block that is hotter than entry (keep in mind
238  // that early MachineLICM has already run so the TOC save won't be hoisted)
239  // we can just do the save in the prologue.
240  if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
241    FI->setMustSaveTOC(true);
242
243  // If we are saving the TOC in the prologue, all the TOC saves can be removed
244  // from the code.
245  if (FI->mustSaveTOC()) {
246    for (auto &TOCSave : TOCSaves)
247      TOCSave.second = false;
248    // Add new instruction to map.
249    TOCSaves[MI] = false;
250    return;
251  }
252
253  bool Keep = true;
254  for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
255    MachineInstr *CurrInst = It->first;
256    // If new instruction dominates an existing one, mark existing one as
257    // redundant.
258    if (It->second && MDT->dominates(MI, CurrInst))
259      It->second = false;
260    // Check if the new instruction is redundant.
261    if (MDT->dominates(CurrInst, MI)) {
262      Keep = false;
263      break;
264    }
265  }
266  // Add new instruction to map.
267  TOCSaves[MI] = Keep;
268}
269
270// Perform peephole optimizations.
271bool PPCMIPeephole::simplifyCode(void) {
272  bool Simplified = false;
273  MachineInstr* ToErase = nullptr;
274  std::map<MachineInstr *, bool> TOCSaves;
275  const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
276  NumFunctionsEnteredInMIPeephole++;
277  if (ConvertRegReg) {
278    // Fixed-point conversion of reg/reg instructions fed by load-immediate
279    // into reg/imm instructions. FIXME: This is expensive, control it with
280    // an option.
281    bool SomethingChanged = false;
282    do {
283      NumFixedPointIterations++;
284      SomethingChanged = false;
285      for (MachineBasicBlock &MBB : *MF) {
286        for (MachineInstr &MI : MBB) {
287          if (MI.isDebugInstr())
288            continue;
289
290          if (TII->convertToImmediateForm(MI)) {
291            // We don't erase anything in case the def has other uses. Let DCE
292            // remove it if it can be removed.
293            LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
294            LLVM_DEBUG(MI.dump());
295            NumConvertedToImmediateForm++;
296            SomethingChanged = true;
297            Simplified = true;
298            continue;
299          }
300        }
301      }
302    } while (SomethingChanged && FixedPointRegToImm);
303  }
304
305  for (MachineBasicBlock &MBB : *MF) {
306    for (MachineInstr &MI : MBB) {
307
308      // If the previous instruction was marked for elimination,
309      // remove it now.
310      if (ToErase) {
311        ToErase->eraseFromParent();
312        ToErase = nullptr;
313      }
314
315      // Ignore debug instructions.
316      if (MI.isDebugInstr())
317        continue;
318
319      // Per-opcode peepholes.
320      switch (MI.getOpcode()) {
321
322      default:
323        break;
324      case PPC::LI:
325      case PPC::LI8: {
326        // If we are materializing a zero, look for any use operands for which
327        // zero means immediate zero. All such operands can be replaced with
328        // PPC::ZERO.
329        if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
330          break;
331        unsigned MIDestReg = MI.getOperand(0).getReg();
332        for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
333          Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
334        if (MRI->use_nodbg_empty(MIDestReg)) {
335          ++NumLoadImmZeroFoldedAndRemoved;
336          ToErase = &MI;
337        }
338        break;
339      }
340      case PPC::STD: {
341        MachineFrameInfo &MFI = MF->getFrameInfo();
342        if (MFI.hasVarSizedObjects() ||
343            !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
344          break;
345        // When encountering a TOC save instruction, call UpdateTOCSaves
346        // to add it to the TOCSaves map and mark any existing TOC saves
347        // it dominates as redundant.
348        if (TII->isTOCSaveMI(MI))
349          UpdateTOCSaves(TOCSaves, &MI);
350        break;
351      }
352      case PPC::XXPERMDI: {
353        // Perform simplifications of 2x64 vector swaps and splats.
354        // A swap is identified by an immediate value of 2, and a splat
355        // is identified by an immediate value of 0 or 3.
356        int Immed = MI.getOperand(3).getImm();
357
358        if (Immed == 1)
359          break;
360
361        // For each of these simplifications, we need the two source
362        // regs to match.  Unfortunately, MachineCSE ignores COPY and
363        // SUBREG_TO_REG, so for example we can see
364        //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
365        // We have to look through chains of COPY and SUBREG_TO_REG
366        // to find the real source values for comparison.
367        unsigned TrueReg1 =
368          TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
369        unsigned TrueReg2 =
370          TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
371
372        if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
373          break;
374
375        MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
376
377        if (!DefMI)
378          break;
379
380        unsigned DefOpc = DefMI->getOpcode();
381
382        // If this is a splat fed by a splatting load, the splat is
383        // redundant. Replace with a copy. This doesn't happen directly due
384        // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
385        // a load of a double to a vector of 64-bit integers.
386        auto isConversionOfLoadAndSplat = [=]() -> bool {
387          if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
388            return false;
389          unsigned FeedReg1 =
390            TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
391          if (Register::isVirtualRegister(FeedReg1)) {
392            MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
393            if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
394              return true;
395          }
396          return false;
397        };
398        if ((Immed == 0 || Immed == 3) &&
399            (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
400          LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
401                               "to load-and-splat/copy: ");
402          LLVM_DEBUG(MI.dump());
403          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
404                  MI.getOperand(0).getReg())
405              .add(MI.getOperand(1));
406          ToErase = &MI;
407          Simplified = true;
408        }
409
410        // If this is a splat or a swap fed by another splat, we
411        // can replace it with a copy.
412        if (DefOpc == PPC::XXPERMDI) {
413          unsigned DefReg1 = DefMI->getOperand(1).getReg();
414          unsigned DefReg2 = DefMI->getOperand(2).getReg();
415          unsigned DefImmed = DefMI->getOperand(3).getImm();
416
417          // If the two inputs are not the same register, check to see if
418          // they originate from the same virtual register after only
419          // copy-like instructions.
420          if (DefReg1 != DefReg2) {
421            unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
422            unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
423
424            if (!(FeedReg1 == FeedReg2 &&
425                  Register::isVirtualRegister(FeedReg1)))
426              break;
427          }
428
429          if (DefImmed == 0 || DefImmed == 3) {
430            LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
431                                 "to splat/copy: ");
432            LLVM_DEBUG(MI.dump());
433            BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
434                    MI.getOperand(0).getReg())
435                .add(MI.getOperand(1));
436            ToErase = &MI;
437            Simplified = true;
438          }
439
440          // If this is a splat fed by a swap, we can simplify modify
441          // the splat to splat the other value from the swap's input
442          // parameter.
443          else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
444            LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
445            LLVM_DEBUG(MI.dump());
446            MI.getOperand(1).setReg(DefReg1);
447            MI.getOperand(2).setReg(DefReg2);
448            MI.getOperand(3).setImm(3 - Immed);
449            Simplified = true;
450          }
451
452          // If this is a swap fed by a swap, we can replace it
453          // with a copy from the first swap's input.
454          else if (Immed == 2 && DefImmed == 2) {
455            LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
456            LLVM_DEBUG(MI.dump());
457            BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
458                    MI.getOperand(0).getReg())
459                .add(DefMI->getOperand(1));
460            ToErase = &MI;
461            Simplified = true;
462          }
463        } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
464                   (DefMI->getOperand(2).getImm() == 0 ||
465                    DefMI->getOperand(2).getImm() == 3)) {
466          // Splat fed by another splat - switch the output of the first
467          // and remove the second.
468          DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
469          ToErase = &MI;
470          Simplified = true;
471          LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
472          LLVM_DEBUG(MI.dump());
473        }
474        break;
475      }
476      case PPC::VSPLTB:
477      case PPC::VSPLTH:
478      case PPC::XXSPLTW: {
479        unsigned MyOpcode = MI.getOpcode();
480        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
481        unsigned TrueReg =
482          TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
483        if (!Register::isVirtualRegister(TrueReg))
484          break;
485        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
486        if (!DefMI)
487          break;
488        unsigned DefOpcode = DefMI->getOpcode();
489        auto isConvertOfSplat = [=]() -> bool {
490          if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
491            return false;
492          Register ConvReg = DefMI->getOperand(1).getReg();
493          if (!Register::isVirtualRegister(ConvReg))
494            return false;
495          MachineInstr *Splt = MRI->getVRegDef(ConvReg);
496          return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
497            Splt->getOpcode() == PPC::XXSPLTW);
498        };
499        bool AlreadySplat = (MyOpcode == DefOpcode) ||
500          (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
501          (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
502          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
503          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
504          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
505          (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
506        // If the instruction[s] that feed this splat have already splat
507        // the value, this splat is redundant.
508        if (AlreadySplat) {
509          LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
510          LLVM_DEBUG(MI.dump());
511          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
512                  MI.getOperand(0).getReg())
513              .add(MI.getOperand(OpNo));
514          ToErase = &MI;
515          Simplified = true;
516        }
517        // Splat fed by a shift. Usually when we align value to splat into
518        // vector element zero.
519        if (DefOpcode == PPC::XXSLDWI) {
520          Register ShiftRes = DefMI->getOperand(0).getReg();
521          Register ShiftOp1 = DefMI->getOperand(1).getReg();
522          Register ShiftOp2 = DefMI->getOperand(2).getReg();
523          unsigned ShiftImm = DefMI->getOperand(3).getImm();
524          unsigned SplatImm = MI.getOperand(2).getImm();
525          if (ShiftOp1 == ShiftOp2) {
526            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
527            if (MRI->hasOneNonDBGUse(ShiftRes)) {
528              LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
529              LLVM_DEBUG(DefMI->dump());
530              ToErase = DefMI;
531            }
532            Simplified = true;
533            LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
534                              << " to " << NewElem << " in instruction: ");
535            LLVM_DEBUG(MI.dump());
536            MI.getOperand(1).setReg(ShiftOp1);
537            MI.getOperand(2).setImm(NewElem);
538          }
539        }
540        break;
541      }
542      case PPC::XVCVDPSP: {
543        // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
544        unsigned TrueReg =
545          TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
546        if (!Register::isVirtualRegister(TrueReg))
547          break;
548        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
549
550        // This can occur when building a vector of single precision or integer
551        // values.
552        if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
553          unsigned DefsReg1 =
554            TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
555          unsigned DefsReg2 =
556            TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
557          if (!Register::isVirtualRegister(DefsReg1) ||
558              !Register::isVirtualRegister(DefsReg2))
559            break;
560          MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
561          MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
562
563          if (!P1 || !P2)
564            break;
565
566          // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
567          // and set any uses of that FRSP/XSRSP (in this MI) to the source of
568          // the FRSP/XSRSP.
569          auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
570            unsigned Opc = RoundInstr->getOpcode();
571            if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
572                MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
573              Simplified = true;
574              Register ConvReg1 = RoundInstr->getOperand(1).getReg();
575              Register FRSPDefines = RoundInstr->getOperand(0).getReg();
576              MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
577              for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
578                if (Use.getOperand(i).isReg() &&
579                    Use.getOperand(i).getReg() == FRSPDefines)
580                  Use.getOperand(i).setReg(ConvReg1);
581              LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
582              LLVM_DEBUG(RoundInstr->dump());
583              LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
584              LLVM_DEBUG(MI.dump());
585              LLVM_DEBUG(dbgs() << "Through instruction:\n");
586              LLVM_DEBUG(DefMI->dump());
587              RoundInstr->eraseFromParent();
588            }
589          };
590
591          // If the input to XVCVDPSP is a vector that was built (even
592          // partially) out of FRSP's, the FRSP(s) can safely be removed
593          // since this instruction performs the same operation.
594          if (P1 != P2) {
595            removeFRSPIfPossible(P1);
596            removeFRSPIfPossible(P2);
597            break;
598          }
599          removeFRSPIfPossible(P1);
600        }
601        break;
602      }
603      case PPC::EXTSH:
604      case PPC::EXTSH8:
605      case PPC::EXTSH8_32_64: {
606        if (!EnableSExtElimination) break;
607        Register NarrowReg = MI.getOperand(1).getReg();
608        if (!Register::isVirtualRegister(NarrowReg))
609          break;
610
611        MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
612        // If we've used a zero-extending load that we will sign-extend,
613        // just do a sign-extending load.
614        if (SrcMI->getOpcode() == PPC::LHZ ||
615            SrcMI->getOpcode() == PPC::LHZX) {
616          if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
617            break;
618          auto is64Bit = [] (unsigned Opcode) {
619            return Opcode == PPC::EXTSH8;
620          };
621          auto isXForm = [] (unsigned Opcode) {
622            return Opcode == PPC::LHZX;
623          };
624          auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
625            if (is64Bit)
626              if (isXForm) return PPC::LHAX8;
627              else         return PPC::LHA8;
628            else
629              if (isXForm) return PPC::LHAX;
630              else         return PPC::LHA;
631          };
632          unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
633                                       isXForm(SrcMI->getOpcode()));
634          LLVM_DEBUG(dbgs() << "Zero-extending load\n");
635          LLVM_DEBUG(SrcMI->dump());
636          LLVM_DEBUG(dbgs() << "and sign-extension\n");
637          LLVM_DEBUG(MI.dump());
638          LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
639          SrcMI->setDesc(TII->get(Opc));
640          SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
641          ToErase = &MI;
642          Simplified = true;
643          NumEliminatedSExt++;
644        }
645        break;
646      }
647      case PPC::EXTSW:
648      case PPC::EXTSW_32:
649      case PPC::EXTSW_32_64: {
650        if (!EnableSExtElimination) break;
651        Register NarrowReg = MI.getOperand(1).getReg();
652        if (!Register::isVirtualRegister(NarrowReg))
653          break;
654
655        MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
656        // If we've used a zero-extending load that we will sign-extend,
657        // just do a sign-extending load.
658        if (SrcMI->getOpcode() == PPC::LWZ ||
659            SrcMI->getOpcode() == PPC::LWZX) {
660          if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
661            break;
662          auto is64Bit = [] (unsigned Opcode) {
663            return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
664          };
665          auto isXForm = [] (unsigned Opcode) {
666            return Opcode == PPC::LWZX;
667          };
668          auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
669            if (is64Bit)
670              if (isXForm) return PPC::LWAX;
671              else         return PPC::LWA;
672            else
673              if (isXForm) return PPC::LWAX_32;
674              else         return PPC::LWA_32;
675          };
676          unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
677                                       isXForm(SrcMI->getOpcode()));
678          LLVM_DEBUG(dbgs() << "Zero-extending load\n");
679          LLVM_DEBUG(SrcMI->dump());
680          LLVM_DEBUG(dbgs() << "and sign-extension\n");
681          LLVM_DEBUG(MI.dump());
682          LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
683          SrcMI->setDesc(TII->get(Opc));
684          SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
685          ToErase = &MI;
686          Simplified = true;
687          NumEliminatedSExt++;
688        } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
689                   TII->isSignExtended(*SrcMI)) {
690          // We can eliminate EXTSW if the input is known to be already
691          // sign-extended.
692          LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
693          Register TmpReg =
694              MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
695          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
696                  TmpReg);
697          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
698                  MI.getOperand(0).getReg())
699              .addReg(TmpReg)
700              .addReg(NarrowReg)
701              .addImm(PPC::sub_32);
702          ToErase = &MI;
703          Simplified = true;
704          NumEliminatedSExt++;
705        }
706        break;
707      }
708      case PPC::RLDICL: {
709        // We can eliminate RLDICL (e.g. for zero-extension)
710        // if all bits to clear are already zero in the input.
711        // This code assume following code sequence for zero-extension.
712        //   %6 = COPY %5:sub_32; (optional)
713        //   %8 = IMPLICIT_DEF;
714        //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
715        if (!EnableZExtElimination) break;
716
717        if (MI.getOperand(2).getImm() != 0)
718          break;
719
720        Register SrcReg = MI.getOperand(1).getReg();
721        if (!Register::isVirtualRegister(SrcReg))
722          break;
723
724        MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
725        if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
726              SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
727          break;
728
729        MachineInstr *ImpDefMI, *SubRegMI;
730        ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
731        SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
732        if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
733
734        SrcMI = SubRegMI;
735        if (SubRegMI->getOpcode() == PPC::COPY) {
736          Register CopyReg = SubRegMI->getOperand(1).getReg();
737          if (Register::isVirtualRegister(CopyReg))
738            SrcMI = MRI->getVRegDef(CopyReg);
739        }
740
741        unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
742        if (MI.getOperand(3).getImm() <= KnownZeroCount) {
743          LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
744          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
745                  MI.getOperand(0).getReg())
746              .addReg(SrcReg);
747          ToErase = &MI;
748          Simplified = true;
749          NumEliminatedZExt++;
750        }
751        break;
752      }
753
754      // TODO: Any instruction that has an immediate form fed only by a PHI
755      // whose operands are all load immediate can be folded away. We currently
756      // do this for ADD instructions, but should expand it to arithmetic and
757      // binary instructions with immediate forms in the future.
758      case PPC::ADD4:
759      case PPC::ADD8: {
760        auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
761          assert(PhiOp && "Invalid Operand!");
762          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
763
764          return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
765                 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
766        };
767
768        auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
769                                            MachineOperand *PhiOp) {
770          assert(PhiOp && "Invalid Operand!");
771          assert(DominatorOp && "Invalid Operand!");
772          MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
773          MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
774
775          // Note: the vregs only show up at odd indices position of PHI Node,
776          // the even indices position save the BB info.
777          for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
778            MachineInstr *LiMI =
779                getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
780            if (!LiMI ||
781                (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
782                || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
783                !MDT->dominates(DefDomMI, LiMI))
784              return false;
785          }
786
787          return true;
788        };
789
790        MachineOperand Op1 = MI.getOperand(1);
791        MachineOperand Op2 = MI.getOperand(2);
792        if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
793          std::swap(Op1, Op2);
794        else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
795          break; // We don't have an ADD fed by LI's that can be transformed
796
797        // Now we know that Op1 is the PHI node and Op2 is the dominator
798        Register DominatorReg = Op2.getReg();
799
800        const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
801                                             ? &PPC::G8RC_and_G8RC_NOX0RegClass
802                                             : &PPC::GPRC_and_GPRC_NOR0RegClass;
803        MRI->setRegClass(DominatorReg, TRC);
804
805        // replace LIs with ADDIs
806        MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
807        for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
808          MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
809          LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
810          LLVM_DEBUG(LiMI->dump());
811
812          // There could be repeated registers in the PHI, e.g: %1 =
813          // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
814          // already replaced the def instruction, skip.
815          if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
816            continue;
817
818          assert((LiMI->getOpcode() == PPC::LI ||
819                  LiMI->getOpcode() == PPC::LI8) &&
820                 "Invalid Opcode!");
821          auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
822          LiMI->RemoveOperand(1);                    // remove the imm of LI
823          LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
824                                                              : PPC::ADDI8));
825          MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
826              .addReg(DominatorReg)
827              .addImm(LiImm); // restore the imm of LI
828          LLVM_DEBUG(LiMI->dump());
829        }
830
831        // Replace ADD with COPY
832        LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
833        LLVM_DEBUG(MI.dump());
834        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
835                MI.getOperand(0).getReg())
836            .add(Op1);
837        ToErase = &MI;
838        Simplified = true;
839        NumOptADDLIs++;
840        break;
841      }
842      case PPC::RLDICR: {
843        Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
844                      combineSEXTAndSHL(MI, ToErase);
845        break;
846      }
847      case PPC::RLWINM:
848      case PPC::RLWINM_rec:
849      case PPC::RLWINM8:
850      case PPC::RLWINM8_rec: {
851        unsigned FoldingReg = MI.getOperand(1).getReg();
852        if (!Register::isVirtualRegister(FoldingReg))
853          break;
854
855        MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
856        if (SrcMI->getOpcode() != PPC::RLWINM &&
857            SrcMI->getOpcode() != PPC::RLWINM_rec &&
858            SrcMI->getOpcode() != PPC::RLWINM8 &&
859            SrcMI->getOpcode() != PPC::RLWINM8_rec)
860          break;
861        assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
862                MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
863                SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
864               "Invalid PPC::RLWINM Instruction!");
865        uint64_t SHSrc = SrcMI->getOperand(2).getImm();
866        uint64_t SHMI = MI.getOperand(2).getImm();
867        uint64_t MBSrc = SrcMI->getOperand(3).getImm();
868        uint64_t MBMI = MI.getOperand(3).getImm();
869        uint64_t MESrc = SrcMI->getOperand(4).getImm();
870        uint64_t MEMI = MI.getOperand(4).getImm();
871
872        assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&
873               "Invalid PPC::RLWINM Instruction!");
874
875        // If MBMI is bigger than MEMI, we always can not get run of ones.
876        // RotatedSrcMask non-wrap:
877        //                 0........31|32........63
878        // RotatedSrcMask:   B---E        B---E
879        // MaskMI:         -----------|--E  B------
880        // Result:           -----          ---      (Bad candidate)
881        //
882        // RotatedSrcMask wrap:
883        //                 0........31|32........63
884        // RotatedSrcMask: --E   B----|--E    B----
885        // MaskMI:         -----------|--E  B------
886        // Result:         ---   -----|---    -----  (Bad candidate)
887        //
888        // One special case is RotatedSrcMask is a full set mask.
889        // RotatedSrcMask full:
890        //                 0........31|32........63
891        // RotatedSrcMask: ------EB---|-------EB---
892        // MaskMI:         -----------|--E  B------
893        // Result:         -----------|---  -------  (Good candidate)
894
895        // Mark special case.
896        bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);
897
898        // For other MBMI > MEMI cases, just return.
899        if ((MBMI > MEMI) && !SrcMaskFull)
900          break;
901
902        // Handle MBMI <= MEMI cases.
903        APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);
904        // In MI, we only need low 32 bits of SrcMI, just consider about low 32
905        // bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
906        // while in PowerPC ISA, lowerest bit is at index 63.
907        APInt MaskSrc =
908            APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);
909
910        APInt RotatedSrcMask = MaskSrc.rotl(SHMI);
911        APInt FinalMask = RotatedSrcMask & MaskMI;
912        uint32_t NewMB, NewME;
913
914        // If final mask is 0, MI result should be 0 too.
915        if (FinalMask.isNullValue()) {
916          bool Is64Bit = (MI.getOpcode() == PPC::RLWINM8 ||
917                          MI.getOpcode() == PPC::RLWINM8_rec);
918
919          Simplified = true;
920
921          LLVM_DEBUG(dbgs() << "Replace Instr: ");
922          LLVM_DEBUG(MI.dump());
923
924          if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {
925            // Replace MI with "LI 0"
926            MI.RemoveOperand(4);
927            MI.RemoveOperand(3);
928            MI.RemoveOperand(2);
929            MI.getOperand(1).ChangeToImmediate(0);
930            MI.setDesc(TII->get(Is64Bit ? PPC::LI8 : PPC::LI));
931          } else {
932            // Replace MI with "ANDI_rec reg, 0"
933            MI.RemoveOperand(4);
934            MI.RemoveOperand(3);
935            MI.getOperand(2).setImm(0);
936            MI.setDesc(TII->get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
937            MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
938            if (SrcMI->getOperand(1).isKill()) {
939              MI.getOperand(1).setIsKill(true);
940              SrcMI->getOperand(1).setIsKill(false);
941            } else
942              // About to replace MI.getOperand(1), clear its kill flag.
943              MI.getOperand(1).setIsKill(false);
944          }
945
946          LLVM_DEBUG(dbgs() << "With: ");
947          LLVM_DEBUG(MI.dump());
948        } else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB,
949                               NewME) && NewMB <= NewME)|| SrcMaskFull) {
950          // Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
951          // than NewME. Otherwise we get a 64 bit value after folding, but MI
952          // return a 32 bit value.
953
954          Simplified = true;
955          LLVM_DEBUG(dbgs() << "Converting Instr: ");
956          LLVM_DEBUG(MI.dump());
957
958          uint16_t NewSH = (SHSrc + SHMI) % 32;
959          MI.getOperand(2).setImm(NewSH);
960          // If SrcMI mask is full, no need to update MBMI and MEMI.
961          if (!SrcMaskFull) {
962            MI.getOperand(3).setImm(NewMB);
963            MI.getOperand(4).setImm(NewME);
964          }
965          MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
966          if (SrcMI->getOperand(1).isKill()) {
967            MI.getOperand(1).setIsKill(true);
968            SrcMI->getOperand(1).setIsKill(false);
969          } else
970            // About to replace MI.getOperand(1), clear its kill flag.
971            MI.getOperand(1).setIsKill(false);
972
973          LLVM_DEBUG(dbgs() << "To: ");
974          LLVM_DEBUG(MI.dump());
975        }
976        if (Simplified) {
977          // If FoldingReg has no non-debug use and it has no implicit def (it
978          // is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
979          // Otherwise keep it.
980          ++NumRotatesCollapsed;
981          if (MRI->use_nodbg_empty(FoldingReg) && !SrcMI->hasImplicitDef()) {
982            ToErase = SrcMI;
983            LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
984            LLVM_DEBUG(SrcMI->dump());
985          }
986        }
987        break;
988      }
989      }
990    }
991
992    // If the last instruction was marked for elimination,
993    // remove it now.
994    if (ToErase) {
995      ToErase->eraseFromParent();
996      ToErase = nullptr;
997    }
998  }
999
1000  // Eliminate all the TOC save instructions which are redundant.
1001  Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1002  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1003  if (FI->mustSaveTOC())
1004    NumTOCSavesInPrologue++;
1005
1006  // We try to eliminate redundant compare instruction.
1007  Simplified |= eliminateRedundantCompare();
1008
1009  return Simplified;
1010}
1011
1012// helper functions for eliminateRedundantCompare
1013static bool isEqOrNe(MachineInstr *BI) {
1014  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
1015  unsigned PredCond = PPC::getPredicateCondition(Pred);
1016  return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1017}
1018
1019static bool isSupportedCmpOp(unsigned opCode) {
1020  return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
1021          opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
1022          opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1023          opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1024}
1025
1026static bool is64bitCmpOp(unsigned opCode) {
1027  return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
1028          opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1029}
1030
1031static bool isSignedCmpOp(unsigned opCode) {
1032  return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
1033          opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1034}
1035
1036static unsigned getSignedCmpOpCode(unsigned opCode) {
1037  if (opCode == PPC::CMPLD)  return PPC::CMPD;
1038  if (opCode == PPC::CMPLW)  return PPC::CMPW;
1039  if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1040  if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1041  return opCode;
1042}
1043
1044// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1045// (LT x) to (LE x-1)
1046static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1047  uint64_t Imm = CMPI->getOperand(2).getImm();
1048  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1049  if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1050    return 0;
1051
1052  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
1053  unsigned PredCond = PPC::getPredicateCondition(Pred);
1054  unsigned PredHint = PPC::getPredicateHint(Pred);
1055  if (PredCond == PPC::PRED_GE)
1056    return PPC::getPredicate(PPC::PRED_GT, PredHint);
1057  if (PredCond == PPC::PRED_LT)
1058    return PPC::getPredicate(PPC::PRED_LE, PredHint);
1059
1060  return 0;
1061}
1062
1063// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1064// (LE x) to (LT x+1)
1065static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1066  uint64_t Imm = CMPI->getOperand(2).getImm();
1067  bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
1068  if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1069    return 0;
1070
1071  PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
1072  unsigned PredCond = PPC::getPredicateCondition(Pred);
1073  unsigned PredHint = PPC::getPredicateHint(Pred);
1074  if (PredCond == PPC::PRED_GT)
1075    return PPC::getPredicate(PPC::PRED_GE, PredHint);
1076  if (PredCond == PPC::PRED_LE)
1077    return PPC::getPredicate(PPC::PRED_LT, PredHint);
1078
1079  return 0;
1080}
1081
1082// This takes a Phi node and returns a register value for the specified BB.
1083static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1084                                       MachineBasicBlock *MBB) {
1085  for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1086    MachineOperand &MO = Phi->getOperand(I);
1087    if (MO.getMBB() == MBB)
1088      return Phi->getOperand(I-1).getReg();
1089  }
1090  llvm_unreachable("invalid src basic block for this Phi node\n");
1091  return 0;
1092}
1093
1094// This function tracks the source of the register through register copy.
1095// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1096// assuming that the control comes from BB1 into BB2.
1097static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1098                           MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1099  unsigned SrcReg = Reg;
1100  while (1) {
1101    unsigned NextReg = SrcReg;
1102    MachineInstr *Inst = MRI->getVRegDef(SrcReg);
1103    if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1104      NextReg = getIncomingRegForBlock(Inst, BB1);
1105      // We track through PHI only once to avoid infinite loop.
1106      BB1 = nullptr;
1107    }
1108    else if (Inst->isFullCopy())
1109      NextReg = Inst->getOperand(1).getReg();
1110    if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
1111      break;
1112    SrcReg = NextReg;
1113  }
1114  return SrcReg;
1115}
1116
1117static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1118                                          MachineBasicBlock *&PredMBB,
1119                                          MachineBasicBlock *&MBBtoMoveCmp,
1120                                          MachineRegisterInfo *MRI) {
1121
1122  auto isEligibleBB = [&](MachineBasicBlock &BB) {
1123    auto BII = BB.getFirstInstrTerminator();
1124    // We optimize BBs ending with a conditional branch.
1125    // We check only for BCC here, not BCCLR, because BCCLR
1126    // will be formed only later in the pipeline.
1127    if (BB.succ_size() == 2 &&
1128        BII != BB.instr_end() &&
1129        (*BII).getOpcode() == PPC::BCC &&
1130        (*BII).getOperand(1).isReg()) {
1131      // We optimize only if the condition code is used only by one BCC.
1132      Register CndReg = (*BII).getOperand(1).getReg();
1133      if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg))
1134        return false;
1135
1136      MachineInstr *CMPI = MRI->getVRegDef(CndReg);
1137      // We assume compare and branch are in the same BB for ease of analysis.
1138      if (CMPI->getParent() != &BB)
1139        return false;
1140
1141      // We skip this BB if a physical register is used in comparison.
1142      for (MachineOperand &MO : CMPI->operands())
1143        if (MO.isReg() && !Register::isVirtualRegister(MO.getReg()))
1144          return false;
1145
1146      return true;
1147    }
1148    return false;
1149  };
1150
1151  // If this BB has more than one successor, we can create a new BB and
1152  // move the compare instruction in the new BB.
1153  // So far, we do not move compare instruction to a BB having multiple
1154  // successors to avoid potentially increasing code size.
1155  auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1156    return BB.succ_size() == 1;
1157  };
1158
1159  if (!isEligibleBB(MBB))
1160    return false;
1161
1162  unsigned NumPredBBs = MBB.pred_size();
1163  if (NumPredBBs == 1) {
1164    MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1165    if (isEligibleBB(*TmpMBB)) {
1166      PredMBB = TmpMBB;
1167      MBBtoMoveCmp = nullptr;
1168      return true;
1169    }
1170  }
1171  else if (NumPredBBs == 2) {
1172    // We check for partially redundant case.
1173    // So far, we support cases with only two predecessors
1174    // to avoid increasing the number of instructions.
1175    MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
1176    MachineBasicBlock *Pred1MBB = *PI;
1177    MachineBasicBlock *Pred2MBB = *(PI+1);
1178
1179    if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1180      // We assume Pred1MBB is the BB containing the compare to be merged and
1181      // Pred2MBB is the BB to which we will append a compare instruction.
1182      // Hence we can proceed as is.
1183    }
1184    else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1185      // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1186      std::swap(Pred1MBB, Pred2MBB);
1187    }
1188    else return false;
1189
1190    // Here, Pred2MBB is the BB to which we need to append a compare inst.
1191    // We cannot move the compare instruction if operands are not available
1192    // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1193    MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1194    MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
1195    for (int I = 1; I <= 2; I++)
1196      if (CMPI->getOperand(I).isReg()) {
1197        MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
1198        if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1199          return false;
1200      }
1201
1202    PredMBB = Pred1MBB;
1203    MBBtoMoveCmp = Pred2MBB;
1204    return true;
1205  }
1206
1207  return false;
1208}
1209
1210// This function will iterate over the input map containing a pair of TOC save
1211// instruction and a flag. The flag will be set to false if the TOC save is
1212// proven redundant. This function will erase from the basic block all the TOC
1213// saves marked as redundant.
1214bool PPCMIPeephole::eliminateRedundantTOCSaves(
1215    std::map<MachineInstr *, bool> &TOCSaves) {
1216  bool Simplified = false;
1217  int NumKept = 0;
1218  for (auto TOCSave : TOCSaves) {
1219    if (!TOCSave.second) {
1220      TOCSave.first->eraseFromParent();
1221      RemoveTOCSave++;
1222      Simplified = true;
1223    } else {
1224      NumKept++;
1225    }
1226  }
1227
1228  if (NumKept > 1)
1229    MultiTOCSaves++;
1230
1231  return Simplified;
1232}
1233
1234// If multiple conditional branches are executed based on the (essentially)
1235// same comparison, we merge compare instructions into one and make multiple
1236// conditional branches on this comparison.
1237// For example,
1238//   if (a == 0) { ... }
1239//   else if (a < 0) { ... }
1240// can be executed by one compare and two conditional branches instead of
1241// two pairs of a compare and a conditional branch.
1242//
1243// This method merges two compare instructions in two MBBs and modifies the
1244// compare and conditional branch instructions if needed.
1245// For the above example, the input for this pass looks like:
1246//   cmplwi r3, 0
1247//   beq    0, .LBB0_3
1248//   cmpwi  r3, -1
1249//   bgt    0, .LBB0_4
1250// So, before merging two compares, we need to modify these instructions as
1251//   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
1252//   beq    0, .LBB0_3
1253//   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
1254//   bge    0, .LBB0_4
1255
1256bool PPCMIPeephole::eliminateRedundantCompare(void) {
1257  bool Simplified = false;
1258
1259  for (MachineBasicBlock &MBB2 : *MF) {
1260    MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1261
1262    // For fully redundant case, we select two basic blocks MBB1 and MBB2
1263    // as an optimization target if
1264    // - both MBBs end with a conditional branch,
1265    // - MBB1 is the only predecessor of MBB2, and
1266    // - compare does not take a physical register as a operand in both MBBs.
1267    // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1268    //
1269    // As partially redundant case, we additionally handle if MBB2 has one
1270    // additional predecessor, which has only one successor (MBB2).
1271    // In this case, we move the compare instruction originally in MBB2 into
1272    // MBBtoMoveCmp. This partially redundant case is typically appear by
1273    // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1274    //
1275    // Overview of CFG of related basic blocks
1276    // Fully redundant case        Partially redundant case
1277    //   --------                   ----------------  --------
1278    //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
1279    //   --------                   ----------------  --------
1280    //      |    \                     (w/ 1 succ) \     |    \
1281    //      |     \                                 \    |     \
1282    //      |                                        \   |
1283    //   --------                                     --------
1284    //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
1285    //   -------- and 2 succ)                         -------- and 2 succ)
1286    //      |    \                                       |    \
1287    //      |     \                                      |     \
1288    //
1289    if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1290      continue;
1291
1292    MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
1293    MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1294
1295    MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
1296    MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1297    bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1298
1299    // We cannot optimize an unsupported compare opcode or
1300    // a mix of 32-bit and 64-bit comaprisons
1301    if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1302        !isSupportedCmpOp(CMPI2->getOpcode()) ||
1303        is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1304      continue;
1305
1306    unsigned NewOpCode = 0;
1307    unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1308    int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1309    bool SwapOperands = false;
1310
1311    if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1312      // Typically, unsigned comparison is used for equality check, but
1313      // we replace it with a signed comparison if the comparison
1314      // to be merged is a signed comparison.
1315      // In other cases of opcode mismatch, we cannot optimize this.
1316
1317      // We cannot change opcode when comparing against an immediate
1318      // if the most significant bit of the immediate is one
1319      // due to the difference in sign extension.
1320      auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1321        if (!I->getOperand(2).isImm())
1322          return false;
1323        int16_t Imm = (int16_t)I->getOperand(2).getImm();
1324        return Imm < 0;
1325      };
1326
1327      if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1328          CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1329        NewOpCode = CMPI1->getOpcode();
1330      else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1331               getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1332        NewOpCode = CMPI2->getOpcode();
1333      else continue;
1334    }
1335
1336    if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1337      // In case of comparisons between two registers, these two registers
1338      // must be same to merge two comparisons.
1339      unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1340                                         nullptr, nullptr, MRI);
1341      unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1342                                         nullptr, nullptr, MRI);
1343      unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1344                                         MBB1, &MBB2, MRI);
1345      unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1346                                         MBB1, &MBB2, MRI);
1347
1348      if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1349        // Same pair of registers in the same order; ready to merge as is.
1350      }
1351      else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1352        // Same pair of registers in different order.
1353        // We reverse the predicate to merge compare instructions.
1354        PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1355        NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1356        // In case of partial redundancy, we need to swap operands
1357        // in another compare instruction.
1358        SwapOperands = true;
1359      }
1360      else continue;
1361    }
1362    else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1363      // In case of comparisons between a register and an immediate,
1364      // the operand register must be same for two compare instructions.
1365      unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1366                                         nullptr, nullptr, MRI);
1367      unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1368                                         MBB1, &MBB2, MRI);
1369      if (Cmp1Operand1 != Cmp2Operand1)
1370        continue;
1371
1372      NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1373      NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1374
1375      // If immediate are not same, we try to adjust by changing predicate;
1376      // e.g. GT imm means GE (imm+1).
1377      if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1378        int Diff = Imm1 - Imm2;
1379        if (Diff < -2 || Diff > 2)
1380          continue;
1381
1382        unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1383        unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1384        unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1385        unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1386        if (Diff == 2) {
1387          if (PredToInc2 && PredToDec1) {
1388            NewPredicate2 = PredToInc2;
1389            NewPredicate1 = PredToDec1;
1390            NewImm2++;
1391            NewImm1--;
1392          }
1393        }
1394        else if (Diff == 1) {
1395          if (PredToInc2) {
1396            NewImm2++;
1397            NewPredicate2 = PredToInc2;
1398          }
1399          else if (PredToDec1) {
1400            NewImm1--;
1401            NewPredicate1 = PredToDec1;
1402          }
1403        }
1404        else if (Diff == -1) {
1405          if (PredToDec2) {
1406            NewImm2--;
1407            NewPredicate2 = PredToDec2;
1408          }
1409          else if (PredToInc1) {
1410            NewImm1++;
1411            NewPredicate1 = PredToInc1;
1412          }
1413        }
1414        else if (Diff == -2) {
1415          if (PredToDec2 && PredToInc1) {
1416            NewPredicate2 = PredToDec2;
1417            NewPredicate1 = PredToInc1;
1418            NewImm2--;
1419            NewImm1++;
1420          }
1421        }
1422      }
1423
1424      // We cannot merge two compares if the immediates are not same.
1425      if (NewImm2 != NewImm1)
1426        continue;
1427    }
1428
1429    LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1430    LLVM_DEBUG(CMPI1->dump());
1431    LLVM_DEBUG(BI1->dump());
1432    LLVM_DEBUG(CMPI2->dump());
1433    LLVM_DEBUG(BI2->dump());
1434
1435    // We adjust opcode, predicates and immediate as we determined above.
1436    if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1437      CMPI1->setDesc(TII->get(NewOpCode));
1438    }
1439    if (NewPredicate1) {
1440      BI1->getOperand(0).setImm(NewPredicate1);
1441    }
1442    if (NewPredicate2) {
1443      BI2->getOperand(0).setImm(NewPredicate2);
1444    }
1445    if (NewImm1 != Imm1) {
1446      CMPI1->getOperand(2).setImm(NewImm1);
1447    }
1448
1449    if (IsPartiallyRedundant) {
1450      // We touch up the compare instruction in MBB2 and move it to
1451      // a previous BB to handle partially redundant case.
1452      if (SwapOperands) {
1453        Register Op1 = CMPI2->getOperand(1).getReg();
1454        Register Op2 = CMPI2->getOperand(2).getReg();
1455        CMPI2->getOperand(1).setReg(Op2);
1456        CMPI2->getOperand(2).setReg(Op1);
1457      }
1458      if (NewImm2 != Imm2)
1459        CMPI2->getOperand(2).setImm(NewImm2);
1460
1461      for (int I = 1; I <= 2; I++) {
1462        if (CMPI2->getOperand(I).isReg()) {
1463          MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1464          if (Inst->getParent() != &MBB2)
1465            continue;
1466
1467          assert(Inst->getOpcode() == PPC::PHI &&
1468                 "We cannot support if an operand comes from this BB.");
1469          unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1470          CMPI2->getOperand(I).setReg(SrcReg);
1471        }
1472      }
1473      auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1474      MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1475
1476      DebugLoc DL = CMPI2->getDebugLoc();
1477      Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1478      BuildMI(MBB2, MBB2.begin(), DL,
1479              TII->get(PPC::PHI), NewVReg)
1480        .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1481        .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1482      BI2->getOperand(1).setReg(NewVReg);
1483    }
1484    else {
1485      // We finally eliminate compare instruction in MBB2.
1486      BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1487      CMPI2->eraseFromParent();
1488    }
1489    BI2->getOperand(1).setIsKill(true);
1490    BI1->getOperand(1).setIsKill(false);
1491
1492    LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1493    LLVM_DEBUG(CMPI1->dump());
1494    LLVM_DEBUG(BI1->dump());
1495    LLVM_DEBUG(BI2->dump());
1496    if (IsPartiallyRedundant) {
1497      LLVM_DEBUG(dbgs() << "The following compare is moved into "
1498                        << printMBBReference(*MBBtoMoveCmp)
1499                        << " to handle partial redundancy.\n");
1500      LLVM_DEBUG(CMPI2->dump());
1501    }
1502
1503    Simplified = true;
1504  }
1505
1506  return Simplified;
1507}
1508
1509// We miss the opportunity to emit an RLDIC when lowering jump tables
1510// since ISEL sees only a single basic block. When selecting, the clear
1511// and shift left will be in different blocks.
1512bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
1513  if (MI.getOpcode() != PPC::RLDICR)
1514    return false;
1515
1516  Register SrcReg = MI.getOperand(1).getReg();
1517  if (!Register::isVirtualRegister(SrcReg))
1518    return false;
1519
1520  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1521  if (SrcMI->getOpcode() != PPC::RLDICL)
1522    return false;
1523
1524  MachineOperand MOpSHSrc = SrcMI->getOperand(2);
1525  MachineOperand MOpMBSrc = SrcMI->getOperand(3);
1526  MachineOperand MOpSHMI = MI.getOperand(2);
1527  MachineOperand MOpMEMI = MI.getOperand(3);
1528  if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1529        MOpMEMI.isImm()))
1530    return false;
1531
1532  uint64_t SHSrc = MOpSHSrc.getImm();
1533  uint64_t MBSrc = MOpMBSrc.getImm();
1534  uint64_t SHMI = MOpSHMI.getImm();
1535  uint64_t MEMI = MOpMEMI.getImm();
1536  uint64_t NewSH = SHSrc + SHMI;
1537  uint64_t NewMB = MBSrc - SHMI;
1538  if (NewMB > 63 || NewSH > 63)
1539    return false;
1540
1541  // The bits cleared with RLDICL are [0, MBSrc).
1542  // The bits cleared with RLDICR are (MEMI, 63].
1543  // After the sequence, the bits cleared are:
1544  // [0, MBSrc-SHMI) and (MEMI, 63).
1545  //
1546  // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1547  if ((63 - NewSH) != MEMI)
1548    return false;
1549
1550  LLVM_DEBUG(dbgs() << "Converting pair: ");
1551  LLVM_DEBUG(SrcMI->dump());
1552  LLVM_DEBUG(MI.dump());
1553
1554  MI.setDesc(TII->get(PPC::RLDIC));
1555  MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
1556  MI.getOperand(2).setImm(NewSH);
1557  MI.getOperand(3).setImm(NewMB);
1558  MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
1559  SrcMI->getOperand(1).setIsKill(false);
1560
1561  LLVM_DEBUG(dbgs() << "To: ");
1562  LLVM_DEBUG(MI.dump());
1563  NumRotatesCollapsed++;
1564  // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1565  if (MRI->use_nodbg_empty(SrcReg)) {
1566    assert(!SrcMI->hasImplicitDef() &&
1567           "Not expecting an implicit def with this instr.");
1568    SrcMI->eraseFromParent();
1569  }
1570  return true;
1571}
1572
1573// For case in LLVM IR
1574// entry:
1575//   %iconv = sext i32 %index to i64
1576//   br i1 undef label %true, label %false
1577// true:
1578//   %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1579// ...
1580// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1581// different BBs when conducting instruction selection. We can do a peephole
1582// optimization to combine these two instructions into extswsli after
1583// instruction selection.
1584bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1585                                      MachineInstr *&ToErase) {
1586  if (MI.getOpcode() != PPC::RLDICR)
1587    return false;
1588
1589  if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1590    return false;
1591
1592  assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1593
1594  MachineOperand MOpSHMI = MI.getOperand(2);
1595  MachineOperand MOpMEMI = MI.getOperand(3);
1596  if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1597    return false;
1598
1599  uint64_t SHMI = MOpSHMI.getImm();
1600  uint64_t MEMI = MOpMEMI.getImm();
1601  if (SHMI + MEMI != 63)
1602    return false;
1603
1604  Register SrcReg = MI.getOperand(1).getReg();
1605  if (!Register::isVirtualRegister(SrcReg))
1606    return false;
1607
1608  MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
1609  if (SrcMI->getOpcode() != PPC::EXTSW &&
1610      SrcMI->getOpcode() != PPC::EXTSW_32_64)
1611    return false;
1612
1613  // If the register defined by extsw has more than one use, combination is not
1614  // needed.
1615  if (!MRI->hasOneNonDBGUse(SrcReg))
1616    return false;
1617
1618  assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
1619  assert(SrcMI->getOperand(1).isReg() &&
1620         "EXTSW's second operand should be a register");
1621  if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg()))
1622    return false;
1623
1624  LLVM_DEBUG(dbgs() << "Combining pair: ");
1625  LLVM_DEBUG(SrcMI->dump());
1626  LLVM_DEBUG(MI.dump());
1627
1628  MachineInstr *NewInstr =
1629      BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
1630              SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
1631                                               : TII->get(PPC::EXTSWSLI_32_64),
1632              MI.getOperand(0).getReg())
1633          .add(SrcMI->getOperand(1))
1634          .add(MOpSHMI);
1635  (void)NewInstr;
1636
1637  LLVM_DEBUG(dbgs() << "TO: ");
1638  LLVM_DEBUG(NewInstr->dump());
1639  ++NumEXTSWAndSLDICombined;
1640  ToErase = &MI;
1641  // SrcMI, which is extsw, is of no use now, erase it.
1642  SrcMI->eraseFromParent();
1643  return true;
1644}
1645
1646} // end default namespace
1647
1648INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1649                      "PowerPC MI Peephole Optimization", false, false)
1650INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
1651INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1652INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1653INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1654                    "PowerPC MI Peephole Optimization", false, false)
1655
1656char PPCMIPeephole::ID = 0;
1657FunctionPass*
1658llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1659
1660