PeepholeOptimizer.cpp revision 249423
1212793Sdim//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
2212793Sdim//
3212793Sdim//                     The LLVM Compiler Infrastructure
4212793Sdim//
5212793Sdim// This file is distributed under the University of Illinois Open Source
6212793Sdim// License. See LICENSE.TXT for details.
7212793Sdim//
8212793Sdim//===----------------------------------------------------------------------===//
9212793Sdim//
10212793Sdim// Perform peephole optimizations on the machine code:
11212793Sdim//
12212793Sdim// - Optimize Extensions
13212793Sdim//
14212793Sdim//     Optimization of sign / zero extension instructions. It may be extended to
15212793Sdim//     handle other instructions with similar properties.
16212793Sdim//
17212793Sdim//     On some targets, some instructions, e.g. X86 sign / zero extension, may
18212793Sdim//     leave the source value in the lower part of the result. This optimization
19212793Sdim//     will replace some uses of the pre-extension value with uses of the
20212793Sdim//     sub-register of the results.
21212793Sdim//
22212793Sdim// - Optimize Comparisons
23212793Sdim//
24212793Sdim//     Optimization of comparison instructions. For instance, in this code:
25212793Sdim//
26212793Sdim//       sub r1, 1
27212793Sdim//       cmp r1, 0
28212793Sdim//       bz  L1
29212793Sdim//
30212793Sdim//     If the "sub" instruction all ready sets (or could be modified to set) the
31212793Sdim//     same flag that the "cmp" instruction sets and that "bz" uses, then we can
32212793Sdim//     eliminate the "cmp" instruction.
33221345Sdim//
34239462Sdim//     Another instance, in this code:
35239462Sdim//
36239462Sdim//       sub r1, r3 | sub r1, imm
37239462Sdim//       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
38239462Sdim//       bge L1
39239462Sdim//
40239462Sdim//     If the branch instruction can use flag from "sub", then we can replace
41239462Sdim//     "sub" with "subs" and eliminate the "cmp" instruction.
42239462Sdim//
43221345Sdim// - Optimize Bitcast pairs:
44221345Sdim//
45221345Sdim//     v1 = bitcast v0
46221345Sdim//     v2 = bitcast v1
47221345Sdim//        = v2
48221345Sdim//   =>
49221345Sdim//     v1 = bitcast v0
50221345Sdim//        = v0
51234353Sdim//
52249423Sdim// - Optimize Loads:
53249423Sdim//
54249423Sdim//     Loads that can be folded into a later instruction. A load is foldable
55249423Sdim//     if it loads to virtual registers and the virtual register defined has
56249423Sdim//     a single use.
57212793Sdim//===----------------------------------------------------------------------===//
58212793Sdim
59212793Sdim#define DEBUG_TYPE "peephole-opt"
60212793Sdim#include "llvm/CodeGen/Passes.h"
61249423Sdim#include "llvm/ADT/DenseMap.h"
62249423Sdim#include "llvm/ADT/SmallPtrSet.h"
63249423Sdim#include "llvm/ADT/SmallSet.h"
64249423Sdim#include "llvm/ADT/Statistic.h"
65212793Sdim#include "llvm/CodeGen/MachineDominators.h"
66212793Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
67212793Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
68249423Sdim#include "llvm/Support/CommandLine.h"
69249423Sdim#include "llvm/Support/Debug.h"
70212793Sdim#include "llvm/Target/TargetInstrInfo.h"
71212793Sdim#include "llvm/Target/TargetRegisterInfo.h"
72212793Sdimusing namespace llvm;
73212793Sdim
74212793Sdim// Optimize Extensions
75212793Sdimstatic cl::opt<bool>
76212793SdimAggressive("aggressive-ext-opt", cl::Hidden,
77212793Sdim           cl::desc("Aggressive extension optimization"));
78212793Sdim
79218893Sdimstatic cl::opt<bool>
80218893SdimDisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
81218893Sdim                cl::desc("Disable the peephole optimizer"));
82218893Sdim
83212793SdimSTATISTIC(NumReuse,      "Number of extension results reused");
84221345SdimSTATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
85221345SdimSTATISTIC(NumCmps,       "Number of compares eliminated");
86234353SdimSTATISTIC(NumImmFold,    "Number of move immediate folded");
87239462SdimSTATISTIC(NumLoadFold,   "Number of loads folded");
88239462SdimSTATISTIC(NumSelects,    "Number of selects optimized");
89212793Sdim
90212793Sdimnamespace {
91212793Sdim  class PeepholeOptimizer : public MachineFunctionPass {
92212793Sdim    const TargetMachine   *TM;
93212793Sdim    const TargetInstrInfo *TII;
94212793Sdim    MachineRegisterInfo   *MRI;
95212793Sdim    MachineDominatorTree  *DT;  // Machine dominator tree
96212793Sdim
97212793Sdim  public:
98212793Sdim    static char ID; // Pass identification
99218893Sdim    PeepholeOptimizer() : MachineFunctionPass(ID) {
100218893Sdim      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
101218893Sdim    }
102212793Sdim
103212793Sdim    virtual bool runOnMachineFunction(MachineFunction &MF);
104212793Sdim
105212793Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
106212793Sdim      AU.setPreservesCFG();
107212793Sdim      MachineFunctionPass::getAnalysisUsage(AU);
108212793Sdim      if (Aggressive) {
109212793Sdim        AU.addRequired<MachineDominatorTree>();
110212793Sdim        AU.addPreserved<MachineDominatorTree>();
111212793Sdim      }
112212793Sdim    }
113212793Sdim
114212793Sdim  private:
115239462Sdim    bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
116239462Sdim    bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
117239462Sdim    bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
118212793Sdim                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
119239462Sdim    bool optimizeSelect(MachineInstr *MI);
120218893Sdim    bool isMoveImmediate(MachineInstr *MI,
121218893Sdim                         SmallSet<unsigned, 4> &ImmDefRegs,
122218893Sdim                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
123239462Sdim    bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
124218893Sdim                       SmallSet<unsigned, 4> &ImmDefRegs,
125218893Sdim                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
126239462Sdim    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
127212793Sdim  };
128212793Sdim}
129212793Sdim
130212793Sdimchar PeepholeOptimizer::ID = 0;
131234353Sdimchar &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
132218893SdimINITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
133218893Sdim                "Peephole Optimizations", false, false)
134218893SdimINITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
135218893SdimINITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
136218893Sdim                "Peephole Optimizations", false, false)
137212793Sdim
138239462Sdim/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
139212793Sdim/// a single register and writes a single register and it does not modify the
140212793Sdim/// source, and if the source value is preserved as a sub-register of the
141212793Sdim/// result, then replace all reachable uses of the source with the subreg of the
142212793Sdim/// result.
143234353Sdim///
144212793Sdim/// Do not generate an EXTRACT that is used only in a debug use, as this changes
145212793Sdim/// the code. Since this code does not currently share EXTRACTs, just ignore all
146212793Sdim/// debug uses.
147212793Sdimbool PeepholeOptimizer::
148239462SdimoptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
149212793Sdim                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
150212793Sdim  unsigned SrcReg, DstReg, SubIdx;
151212793Sdim  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
152212793Sdim    return false;
153234353Sdim
154212793Sdim  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
155212793Sdim      TargetRegisterInfo::isPhysicalRegister(SrcReg))
156212793Sdim    return false;
157212793Sdim
158239462Sdim  if (MRI->hasOneNonDBGUse(SrcReg))
159212793Sdim    // No other uses.
160212793Sdim    return false;
161212793Sdim
162239462Sdim  // Ensure DstReg can get a register class that actually supports
163239462Sdim  // sub-registers. Don't change the class until we commit.
164239462Sdim  const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
165239462Sdim  DstRC = TM->getRegisterInfo()->getSubClassWithSubReg(DstRC, SubIdx);
166239462Sdim  if (!DstRC)
167239462Sdim    return false;
168239462Sdim
169239462Sdim  // The ext instr may be operating on a sub-register of SrcReg as well.
170239462Sdim  // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
171239462Sdim  // register.
172239462Sdim  // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
173239462Sdim  // SrcReg:SubIdx should be replaced.
174239462Sdim  bool UseSrcSubIdx = TM->getRegisterInfo()->
175239462Sdim    getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
176239462Sdim
177212793Sdim  // The source has other uses. See if we can replace the other uses with use of
178212793Sdim  // the result of the extension.
179212793Sdim  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
180239462Sdim  for (MachineRegisterInfo::use_nodbg_iterator
181239462Sdim       UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
182212793Sdim       UI != UE; ++UI)
183212793Sdim    ReachedBBs.insert(UI->getParent());
184212793Sdim
185212793Sdim  // Uses that are in the same BB of uses of the result of the instruction.
186212793Sdim  SmallVector<MachineOperand*, 8> Uses;
187212793Sdim
188212793Sdim  // Uses that the result of the instruction can reach.
189212793Sdim  SmallVector<MachineOperand*, 8> ExtendedUses;
190212793Sdim
191212793Sdim  bool ExtendLife = true;
192239462Sdim  for (MachineRegisterInfo::use_nodbg_iterator
193239462Sdim       UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end();
194212793Sdim       UI != UE; ++UI) {
195212793Sdim    MachineOperand &UseMO = UI.getOperand();
196212793Sdim    MachineInstr *UseMI = &*UI;
197212793Sdim    if (UseMI == MI)
198212793Sdim      continue;
199212793Sdim
200212793Sdim    if (UseMI->isPHI()) {
201212793Sdim      ExtendLife = false;
202212793Sdim      continue;
203212793Sdim    }
204212793Sdim
205239462Sdim    // Only accept uses of SrcReg:SubIdx.
206239462Sdim    if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
207239462Sdim      continue;
208239462Sdim
209212793Sdim    // It's an error to translate this:
210212793Sdim    //
211212793Sdim    //    %reg1025 = <sext> %reg1024
212212793Sdim    //     ...
213212793Sdim    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
214212793Sdim    //
215212793Sdim    // into this:
216212793Sdim    //
217212793Sdim    //    %reg1025 = <sext> %reg1024
218212793Sdim    //     ...
219212793Sdim    //    %reg1027 = COPY %reg1025:4
220212793Sdim    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
221212793Sdim    //
222212793Sdim    // The problem here is that SUBREG_TO_REG is there to assert that an
223212793Sdim    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
224212793Sdim    // the COPY here, it will give us the value after the <sext>, not the
225212793Sdim    // original value of %reg1024 before <sext>.
226212793Sdim    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
227212793Sdim      continue;
228212793Sdim
229212793Sdim    MachineBasicBlock *UseMBB = UseMI->getParent();
230212793Sdim    if (UseMBB == MBB) {
231212793Sdim      // Local uses that come after the extension.
232212793Sdim      if (!LocalMIs.count(UseMI))
233212793Sdim        Uses.push_back(&UseMO);
234212793Sdim    } else if (ReachedBBs.count(UseMBB)) {
235212793Sdim      // Non-local uses where the result of the extension is used. Always
236212793Sdim      // replace these unless it's a PHI.
237212793Sdim      Uses.push_back(&UseMO);
238212793Sdim    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
239212793Sdim      // We may want to extend the live range of the extension result in order
240212793Sdim      // to replace these uses.
241212793Sdim      ExtendedUses.push_back(&UseMO);
242212793Sdim    } else {
243212793Sdim      // Both will be live out of the def MBB anyway. Don't extend live range of
244212793Sdim      // the extension result.
245212793Sdim      ExtendLife = false;
246212793Sdim      break;
247212793Sdim    }
248212793Sdim  }
249212793Sdim
250212793Sdim  if (ExtendLife && !ExtendedUses.empty())
251212793Sdim    // Extend the liveness of the extension result.
252212793Sdim    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
253212793Sdim              std::back_inserter(Uses));
254212793Sdim
255212793Sdim  // Now replace all uses.
256212793Sdim  bool Changed = false;
257212793Sdim  if (!Uses.empty()) {
258212793Sdim    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
259212793Sdim
260212793Sdim    // Look for PHI uses of the extended result, we don't want to extend the
261212793Sdim    // liveness of a PHI input. It breaks all kinds of assumptions down
262212793Sdim    // stream. A PHI use is expected to be the kill of its source values.
263212793Sdim    for (MachineRegisterInfo::use_nodbg_iterator
264239462Sdim         UI = MRI->use_nodbg_begin(DstReg), UE = MRI->use_nodbg_end();
265239462Sdim         UI != UE; ++UI)
266212793Sdim      if (UI->isPHI())
267212793Sdim        PHIBBs.insert(UI->getParent());
268212793Sdim
269212793Sdim    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
270212793Sdim    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
271212793Sdim      MachineOperand *UseMO = Uses[i];
272212793Sdim      MachineInstr *UseMI = UseMO->getParent();
273212793Sdim      MachineBasicBlock *UseMBB = UseMI->getParent();
274212793Sdim      if (PHIBBs.count(UseMBB))
275212793Sdim        continue;
276212793Sdim
277234353Sdim      // About to add uses of DstReg, clear DstReg's kill flags.
278239462Sdim      if (!Changed) {
279234353Sdim        MRI->clearKillFlags(DstReg);
280239462Sdim        MRI->constrainRegClass(DstReg, DstRC);
281239462Sdim      }
282234353Sdim
283212793Sdim      unsigned NewVR = MRI->createVirtualRegister(RC);
284239462Sdim      MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
285239462Sdim                                   TII->get(TargetOpcode::COPY), NewVR)
286212793Sdim        .addReg(DstReg, 0, SubIdx);
287239462Sdim      // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
288239462Sdim      if (UseSrcSubIdx) {
289239462Sdim        Copy->getOperand(0).setSubReg(SubIdx);
290239462Sdim        Copy->getOperand(0).setIsUndef();
291239462Sdim      }
292212793Sdim      UseMO->setReg(NewVR);
293212793Sdim      ++NumReuse;
294212793Sdim      Changed = true;
295212793Sdim    }
296212793Sdim  }
297212793Sdim
298212793Sdim  return Changed;
299212793Sdim}
300212793Sdim
301239462Sdim/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that
302221345Sdim/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
303221345Sdim/// a value cross register classes), and the source is defined by another
304221345Sdim/// bitcast instruction B. And if the register class of source of B matches
305221345Sdim/// the register class of instruction A, then it is legal to replace all uses
306221345Sdim/// of the def of A with source of B. e.g.
307221345Sdim///   %vreg0<def> = VMOVSR %vreg1
308221345Sdim///   %vreg3<def> = VMOVRS %vreg0
309221345Sdim///   Replace all uses of vreg3 with vreg1.
310221345Sdim
311239462Sdimbool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI,
312221345Sdim                                             MachineBasicBlock *MBB) {
313221345Sdim  unsigned NumDefs = MI->getDesc().getNumDefs();
314221345Sdim  unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
315221345Sdim  if (NumDefs != 1)
316221345Sdim    return false;
317221345Sdim
318221345Sdim  unsigned Def = 0;
319221345Sdim  unsigned Src = 0;
320221345Sdim  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
321221345Sdim    const MachineOperand &MO = MI->getOperand(i);
322221345Sdim    if (!MO.isReg())
323221345Sdim      continue;
324221345Sdim    unsigned Reg = MO.getReg();
325221345Sdim    if (!Reg)
326221345Sdim      continue;
327221345Sdim    if (MO.isDef())
328221345Sdim      Def = Reg;
329221345Sdim    else if (Src)
330221345Sdim      // Multiple sources?
331221345Sdim      return false;
332221345Sdim    else
333221345Sdim      Src = Reg;
334221345Sdim  }
335221345Sdim
336221345Sdim  assert(Def && Src && "Malformed bitcast instruction!");
337221345Sdim
338221345Sdim  MachineInstr *DefMI = MRI->getVRegDef(Src);
339234353Sdim  if (!DefMI || !DefMI->isBitcast())
340221345Sdim    return false;
341221345Sdim
342221345Sdim  unsigned SrcSrc = 0;
343221345Sdim  NumDefs = DefMI->getDesc().getNumDefs();
344221345Sdim  NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
345221345Sdim  if (NumDefs != 1)
346221345Sdim    return false;
347221345Sdim  for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
348221345Sdim    const MachineOperand &MO = DefMI->getOperand(i);
349221345Sdim    if (!MO.isReg() || MO.isDef())
350221345Sdim      continue;
351221345Sdim    unsigned Reg = MO.getReg();
352221345Sdim    if (!Reg)
353221345Sdim      continue;
354226633Sdim    if (!MO.isDef()) {
355226633Sdim      if (SrcSrc)
356226633Sdim        // Multiple sources?
357226633Sdim        return false;
358226633Sdim      else
359226633Sdim        SrcSrc = Reg;
360226633Sdim    }
361221345Sdim  }
362221345Sdim
363221345Sdim  if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
364221345Sdim    return false;
365221345Sdim
366221345Sdim  MRI->replaceRegWith(Def, SrcSrc);
367221345Sdim  MRI->clearKillFlags(SrcSrc);
368221345Sdim  MI->eraseFromParent();
369221345Sdim  ++NumBitcasts;
370221345Sdim  return true;
371221345Sdim}
372221345Sdim
373239462Sdim/// optimizeCmpInstr - If the instruction is a compare and the previous
374212793Sdim/// instruction it's comparing against all ready sets (or could be modified to
375212793Sdim/// set) the same flag as the compare, then we can remove the comparison and use
376212793Sdim/// the flag from the previous instruction.
377239462Sdimbool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
378221345Sdim                                         MachineBasicBlock *MBB) {
379212793Sdim  // If this instruction is a comparison against zero and isn't comparing a
380212793Sdim  // physical register, we can try to optimize it.
381239462Sdim  unsigned SrcReg, SrcReg2;
382218893Sdim  int CmpMask, CmpValue;
383239462Sdim  if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
384239462Sdim      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
385239462Sdim      (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
386212793Sdim    return false;
387212793Sdim
388218893Sdim  // Attempt to optimize the comparison instruction.
389239462Sdim  if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
390221345Sdim    ++NumCmps;
391212793Sdim    return true;
392212793Sdim  }
393212793Sdim
394212793Sdim  return false;
395212793Sdim}
396212793Sdim
397239462Sdim/// Optimize a select instruction.
398239462Sdimbool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
399239462Sdim  unsigned TrueOp = 0;
400239462Sdim  unsigned FalseOp = 0;
401239462Sdim  bool Optimizable = false;
402239462Sdim  SmallVector<MachineOperand, 4> Cond;
403239462Sdim  if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
404239462Sdim    return false;
405239462Sdim  if (!Optimizable)
406239462Sdim    return false;
407239462Sdim  if (!TII->optimizeSelect(MI))
408239462Sdim    return false;
409239462Sdim  MI->eraseFromParent();
410239462Sdim  ++NumSelects;
411239462Sdim  return true;
412239462Sdim}
413239462Sdim
414239462Sdim/// isLoadFoldable - Check whether MI is a candidate for folding into a later
415239462Sdim/// instruction. We only fold loads to virtual registers and the virtual
416239462Sdim/// register defined has a single use.
417239462Sdimbool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
418239462Sdim                                       unsigned &FoldAsLoadDefReg) {
419239462Sdim  if (!MI->canFoldAsLoad() || !MI->mayLoad())
420239462Sdim    return false;
421239462Sdim  const MCInstrDesc &MCID = MI->getDesc();
422239462Sdim  if (MCID.getNumDefs() != 1)
423239462Sdim    return false;
424239462Sdim
425239462Sdim  unsigned Reg = MI->getOperand(0).getReg();
426239462Sdim  // To reduce compilation time, we check MRI->hasOneUse when inserting
427239462Sdim  // loads. It should be checked when processing uses of the load, since
428239462Sdim  // uses can be removed during peephole.
429239462Sdim  if (!MI->getOperand(0).getSubReg() &&
430239462Sdim      TargetRegisterInfo::isVirtualRegister(Reg) &&
431239462Sdim      MRI->hasOneUse(Reg)) {
432239462Sdim    FoldAsLoadDefReg = Reg;
433239462Sdim    return true;
434239462Sdim  }
435239462Sdim  return false;
436239462Sdim}
437239462Sdim
438218893Sdimbool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
439218893Sdim                                        SmallSet<unsigned, 4> &ImmDefRegs,
440218893Sdim                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
441224145Sdim  const MCInstrDesc &MCID = MI->getDesc();
442234353Sdim  if (!MI->isMoveImmediate())
443218893Sdim    return false;
444224145Sdim  if (MCID.getNumDefs() != 1)
445218893Sdim    return false;
446218893Sdim  unsigned Reg = MI->getOperand(0).getReg();
447218893Sdim  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
448218893Sdim    ImmDefMIs.insert(std::make_pair(Reg, MI));
449218893Sdim    ImmDefRegs.insert(Reg);
450218893Sdim    return true;
451218893Sdim  }
452234353Sdim
453218893Sdim  return false;
454218893Sdim}
455218893Sdim
456239462Sdim/// foldImmediate - Try folding register operands that are defined by move
457218893Sdim/// immediate instructions, i.e. a trivial constant folding optimization, if
458218893Sdim/// and only if the def and use are in the same BB.
459239462Sdimbool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
460218893Sdim                                      SmallSet<unsigned, 4> &ImmDefRegs,
461218893Sdim                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
462218893Sdim  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
463218893Sdim    MachineOperand &MO = MI->getOperand(i);
464218893Sdim    if (!MO.isReg() || MO.isDef())
465218893Sdim      continue;
466218893Sdim    unsigned Reg = MO.getReg();
467218893Sdim    if (!TargetRegisterInfo::isVirtualRegister(Reg))
468218893Sdim      continue;
469218893Sdim    if (ImmDefRegs.count(Reg) == 0)
470218893Sdim      continue;
471218893Sdim    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
472218893Sdim    assert(II != ImmDefMIs.end());
473218893Sdim    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
474218893Sdim      ++NumImmFold;
475218893Sdim      return true;
476218893Sdim    }
477218893Sdim  }
478218893Sdim  return false;
479218893Sdim}
480218893Sdim
481212793Sdimbool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
482249423Sdim  DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
483249423Sdim  DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
484249423Sdim
485218893Sdim  if (DisablePeephole)
486218893Sdim    return false;
487234353Sdim
488212793Sdim  TM  = &MF.getTarget();
489212793Sdim  TII = TM->getInstrInfo();
490212793Sdim  MRI = &MF.getRegInfo();
491212793Sdim  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
492212793Sdim
493212793Sdim  bool Changed = false;
494212793Sdim
495212793Sdim  SmallPtrSet<MachineInstr*, 8> LocalMIs;
496218893Sdim  SmallSet<unsigned, 4> ImmDefRegs;
497218893Sdim  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
498239462Sdim  unsigned FoldAsLoadDefReg;
499212793Sdim  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
500212793Sdim    MachineBasicBlock *MBB = &*I;
501234353Sdim
502218893Sdim    bool SeenMoveImm = false;
503212793Sdim    LocalMIs.clear();
504218893Sdim    ImmDefRegs.clear();
505218893Sdim    ImmDefMIs.clear();
506239462Sdim    FoldAsLoadDefReg = 0;
507212793Sdim
508212793Sdim    for (MachineBasicBlock::iterator
509218893Sdim           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
510212793Sdim      MachineInstr *MI = &*MII;
511239462Sdim      // We may be erasing MI below, increment MII now.
512239462Sdim      ++MII;
513218893Sdim      LocalMIs.insert(MI);
514212793Sdim
515239462Sdim      // If there exists an instruction which belongs to the following
516239462Sdim      // categories, we will discard the load candidate.
517218893Sdim      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
518218893Sdim          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
519218893Sdim          MI->hasUnmodeledSideEffects()) {
520239462Sdim        FoldAsLoadDefReg = 0;
521218893Sdim        continue;
522218893Sdim      }
523239462Sdim      if (MI->mayStore() || MI->isCall())
524239462Sdim        FoldAsLoadDefReg = 0;
525218893Sdim
526239462Sdim      if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
527239462Sdim          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
528239462Sdim          (MI->isSelect() && optimizeSelect(MI))) {
529239462Sdim        // MI is deleted.
530239462Sdim        LocalMIs.erase(MI);
531239462Sdim        Changed = true;
532239462Sdim        continue;
533218893Sdim      }
534218893Sdim
535218893Sdim      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
536218893Sdim        SeenMoveImm = true;
537212793Sdim      } else {
538239462Sdim        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
539243830Sdim        // optimizeExtInstr might have created new instructions after MI
540243830Sdim        // and before the already incremented MII. Adjust MII so that the
541243830Sdim        // next iteration sees the new instructions.
542243830Sdim        MII = MI;
543243830Sdim        ++MII;
544218893Sdim        if (SeenMoveImm)
545239462Sdim          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
546212793Sdim      }
547218893Sdim
548239462Sdim      // Check whether MI is a load candidate for folding into a later
549239462Sdim      // instruction. If MI is not a candidate, check whether we can fold an
550239462Sdim      // earlier load into MI.
551239462Sdim      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
552239462Sdim        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
553239462Sdim        // can enable folding by converting SUB to CMP.
554239462Sdim        MachineInstr *DefMI = 0;
555239462Sdim        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
556239462Sdim                                                      FoldAsLoadDefReg, DefMI);
557239462Sdim        if (FoldMI) {
558239462Sdim          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
559249423Sdim          DEBUG(dbgs() << "Replacing: " << *MI);
560249423Sdim          DEBUG(dbgs() << "     With: " << *FoldMI);
561239462Sdim          LocalMIs.erase(MI);
562239462Sdim          LocalMIs.erase(DefMI);
563239462Sdim          LocalMIs.insert(FoldMI);
564239462Sdim          MI->eraseFromParent();
565239462Sdim          DefMI->eraseFromParent();
566239462Sdim          ++NumLoadFold;
567239462Sdim
568239462Sdim          // MI is replaced with FoldMI.
569239462Sdim          Changed = true;
570239462Sdim          continue;
571239462Sdim        }
572239462Sdim      }
573212793Sdim    }
574212793Sdim  }
575212793Sdim
576212793Sdim  return Changed;
577212793Sdim}
578