1235633Sdim//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
2198090Srdivacky//
3198090Srdivacky//                     The LLVM Compiler Infrastructure
4198090Srdivacky//
5198090Srdivacky// This file is distributed under the University of Illinois Open Source
6198090Srdivacky// License. See LICENSE.TXT for details.
7198090Srdivacky//
8198090Srdivacky//===----------------------------------------------------------------------===//
9198090Srdivacky//
10198090Srdivacky// This file contains the Base ARM implementation of the TargetInstrInfo class.
11198090Srdivacky//
12198090Srdivacky//===----------------------------------------------------------------------===//
13198090Srdivacky
14263509Sdim#include "ARM.h"
15198090Srdivacky#include "ARMBaseInstrInfo.h"
16235633Sdim#include "ARMBaseRegisterInfo.h"
17199481Srdivacky#include "ARMConstantPoolValue.h"
18263509Sdim#include "ARMFeatures.h"
19218893Sdim#include "ARMHazardRecognizer.h"
20198090Srdivacky#include "ARMMachineFunctionInfo.h"
21226890Sdim#include "MCTargetDesc/ARMAddressingModes.h"
22252723Sdim#include "llvm/ADT/STLExtras.h"
23198090Srdivacky#include "llvm/CodeGen/LiveVariables.h"
24199481Srdivacky#include "llvm/CodeGen/MachineConstantPool.h"
25198090Srdivacky#include "llvm/CodeGen/MachineFrameInfo.h"
26198090Srdivacky#include "llvm/CodeGen/MachineInstrBuilder.h"
27198090Srdivacky#include "llvm/CodeGen/MachineJumpTableInfo.h"
28198090Srdivacky#include "llvm/CodeGen/MachineMemOperand.h"
29208599Srdivacky#include "llvm/CodeGen/MachineRegisterInfo.h"
30226890Sdim#include "llvm/CodeGen/SelectionDAGNodes.h"
31252723Sdim#include "llvm/IR/Constants.h"
32252723Sdim#include "llvm/IR/Function.h"
33252723Sdim#include "llvm/IR/GlobalValue.h"
34198090Srdivacky#include "llvm/MC/MCAsmInfo.h"
35224145Sdim#include "llvm/Support/BranchProbability.h"
36198090Srdivacky#include "llvm/Support/CommandLine.h"
37198892Srdivacky#include "llvm/Support/Debug.h"
38198090Srdivacky#include "llvm/Support/ErrorHandling.h"
39224145Sdim
40263509Sdim#define GET_INSTRINFO_CTOR_DTOR
41224145Sdim#include "ARMGenInstrInfo.inc"
42224145Sdim
43198090Srdivackyusing namespace llvm;
44198090Srdivacky
45198090Srdivackystatic cl::opt<bool>
46198090SrdivackyEnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
47198090Srdivacky               cl::desc("Enable ARM 2-addr to 3-addr conv"));
48198090Srdivacky
49226890Sdimstatic cl::opt<bool>
50235633SdimWidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
51226890Sdim           cl::desc("Widen ARM vmovs to vmovd when possible"));
52226890Sdim
53245431Sdimstatic cl::opt<unsigned>
54245431SdimSwiftPartialUpdateClearance("swift-partial-update-clearance",
55245431Sdim     cl::Hidden, cl::init(12),
56245431Sdim     cl::desc("Clearance before partial register updates"));
57245431Sdim
58218893Sdim/// ARM_MLxEntry - Record information about MLA / MLS instructions.
59218893Sdimstruct ARM_MLxEntry {
60245431Sdim  uint16_t MLxOpc;     // MLA / MLS opcode
61245431Sdim  uint16_t MulOpc;     // Expanded multiplication opcode
62245431Sdim  uint16_t AddSubOpc;  // Expanded add / sub opcode
63218893Sdim  bool NegAcc;         // True if the acc is negated before the add / sub.
64218893Sdim  bool HasLane;        // True if instruction has an extra "lane" operand.
65218893Sdim};
66218893Sdim
67218893Sdimstatic const ARM_MLxEntry ARM_MLxTable[] = {
68218893Sdim  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
69218893Sdim  // fp scalar ops
70218893Sdim  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
71218893Sdim  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
72218893Sdim  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
73218893Sdim  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
74218893Sdim  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
75218893Sdim  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
76218893Sdim  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
77218893Sdim  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
78218893Sdim
79218893Sdim  // fp SIMD ops
80218893Sdim  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
81218893Sdim  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
82218893Sdim  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
83218893Sdim  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
84218893Sdim  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
85218893Sdim  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
86218893Sdim  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
87218893Sdim  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
88218893Sdim};
89218893Sdim
90198892SrdivackyARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
91224145Sdim  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
92198892Srdivacky    Subtarget(STI) {
93218893Sdim  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
94218893Sdim    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
95218893Sdim      assert(false && "Duplicated entries?");
96218893Sdim    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
97218893Sdim    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
98218893Sdim  }
99198090Srdivacky}
100198090Srdivacky
101218893Sdim// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
102218893Sdim// currently defaults to no prepass hazard recognizer.
103218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo::
104218893SdimCreateTargetHazardRecognizer(const TargetMachine *TM,
105218893Sdim                             const ScheduleDAG *DAG) const {
106218893Sdim  if (usePreRAHazardRecognizer()) {
107218893Sdim    const InstrItineraryData *II = TM->getInstrItineraryData();
108218893Sdim    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
109218893Sdim  }
110252723Sdim  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
111218893Sdim}
112218893Sdim
113218893SdimScheduleHazardRecognizer *ARMBaseInstrInfo::
114218893SdimCreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
115218893Sdim                                   const ScheduleDAG *DAG) const {
116218893Sdim  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
117263509Sdim    return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
118252723Sdim  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
119218893Sdim}
120218893Sdim
121198090SrdivackyMachineInstr *
122198090SrdivackyARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
123198090Srdivacky                                        MachineBasicBlock::iterator &MBBI,
124198090Srdivacky                                        LiveVariables *LV) const {
125198090Srdivacky  // FIXME: Thumb2 support.
126198090Srdivacky
127198090Srdivacky  if (!EnableARM3Addr)
128198090Srdivacky    return NULL;
129198090Srdivacky
130198090Srdivacky  MachineInstr *MI = MBBI;
131198090Srdivacky  MachineFunction &MF = *MI->getParent()->getParent();
132210299Sed  uint64_t TSFlags = MI->getDesc().TSFlags;
133198090Srdivacky  bool isPre = false;
134198090Srdivacky  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
135198090Srdivacky  default: return NULL;
136198090Srdivacky  case ARMII::IndexModePre:
137198090Srdivacky    isPre = true;
138198090Srdivacky    break;
139198090Srdivacky  case ARMII::IndexModePost:
140198090Srdivacky    break;
141198090Srdivacky  }
142198090Srdivacky
143198090Srdivacky  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
144198090Srdivacky  // operation.
145198090Srdivacky  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
146198090Srdivacky  if (MemOpc == 0)
147198090Srdivacky    return NULL;
148198090Srdivacky
149198090Srdivacky  MachineInstr *UpdateMI = NULL;
150198090Srdivacky  MachineInstr *MemMI = NULL;
151198090Srdivacky  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
152224145Sdim  const MCInstrDesc &MCID = MI->getDesc();
153224145Sdim  unsigned NumOps = MCID.getNumOperands();
154235633Sdim  bool isLoad = !MI->mayStore();
155198090Srdivacky  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
156198090Srdivacky  const MachineOperand &Base = MI->getOperand(2);
157198090Srdivacky  const MachineOperand &Offset = MI->getOperand(NumOps-3);
158198090Srdivacky  unsigned WBReg = WB.getReg();
159198090Srdivacky  unsigned BaseReg = Base.getReg();
160198090Srdivacky  unsigned OffReg = Offset.getReg();
161198090Srdivacky  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
162198090Srdivacky  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
163198090Srdivacky  switch (AddrMode) {
164235633Sdim  default: llvm_unreachable("Unknown indexed op!");
165198090Srdivacky  case ARMII::AddrMode2: {
166198090Srdivacky    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
167198090Srdivacky    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
168198090Srdivacky    if (OffReg == 0) {
169198090Srdivacky      if (ARM_AM::getSOImmVal(Amt) == -1)
170198090Srdivacky        // Can't encode it in a so_imm operand. This transformation will
171198090Srdivacky        // add more than 1 instruction. Abandon!
172198090Srdivacky        return NULL;
173198090Srdivacky      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
174198090Srdivacky                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
175198090Srdivacky        .addReg(BaseReg).addImm(Amt)
176198090Srdivacky        .addImm(Pred).addReg(0).addReg(0);
177198090Srdivacky    } else if (Amt != 0) {
178198090Srdivacky      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
179198090Srdivacky      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
180198090Srdivacky      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
181226890Sdim                         get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
182198090Srdivacky        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
183198090Srdivacky        .addImm(Pred).addReg(0).addReg(0);
184198090Srdivacky    } else
185198090Srdivacky      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
186198090Srdivacky                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
187198090Srdivacky        .addReg(BaseReg).addReg(OffReg)
188198090Srdivacky        .addImm(Pred).addReg(0).addReg(0);
189198090Srdivacky    break;
190198090Srdivacky  }
191198090Srdivacky  case ARMII::AddrMode3 : {
192198090Srdivacky    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
193198090Srdivacky    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
194198090Srdivacky    if (OffReg == 0)
195198090Srdivacky      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
196198090Srdivacky      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
197198090Srdivacky                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
198198090Srdivacky        .addReg(BaseReg).addImm(Amt)
199198090Srdivacky        .addImm(Pred).addReg(0).addReg(0);
200198090Srdivacky    else
201198090Srdivacky      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
202198090Srdivacky                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
203198090Srdivacky        .addReg(BaseReg).addReg(OffReg)
204198090Srdivacky        .addImm(Pred).addReg(0).addReg(0);
205198090Srdivacky    break;
206198090Srdivacky  }
207198090Srdivacky  }
208198090Srdivacky
209198090Srdivacky  std::vector<MachineInstr*> NewMIs;
210198090Srdivacky  if (isPre) {
211198090Srdivacky    if (isLoad)
212198090Srdivacky      MemMI = BuildMI(MF, MI->getDebugLoc(),
213198090Srdivacky                      get(MemOpc), MI->getOperand(0).getReg())
214218893Sdim        .addReg(WBReg).addImm(0).addImm(Pred);
215198090Srdivacky    else
216198090Srdivacky      MemMI = BuildMI(MF, MI->getDebugLoc(),
217198090Srdivacky                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
218198090Srdivacky        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
219198090Srdivacky    NewMIs.push_back(MemMI);
220198090Srdivacky    NewMIs.push_back(UpdateMI);
221198090Srdivacky  } else {
222198090Srdivacky    if (isLoad)
223198090Srdivacky      MemMI = BuildMI(MF, MI->getDebugLoc(),
224198090Srdivacky                      get(MemOpc), MI->getOperand(0).getReg())
225218893Sdim        .addReg(BaseReg).addImm(0).addImm(Pred);
226198090Srdivacky    else
227198090Srdivacky      MemMI = BuildMI(MF, MI->getDebugLoc(),
228198090Srdivacky                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
229198090Srdivacky        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
230198090Srdivacky    if (WB.isDead())
231198090Srdivacky      UpdateMI->getOperand(0).setIsDead();
232198090Srdivacky    NewMIs.push_back(UpdateMI);
233198090Srdivacky    NewMIs.push_back(MemMI);
234198090Srdivacky  }
235198090Srdivacky
236198090Srdivacky  // Transfer LiveVariables states, kill / dead info.
237198090Srdivacky  if (LV) {
238198090Srdivacky    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
239198090Srdivacky      MachineOperand &MO = MI->getOperand(i);
240218893Sdim      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
241198090Srdivacky        unsigned Reg = MO.getReg();
242198090Srdivacky
243198090Srdivacky        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
244198090Srdivacky        if (MO.isDef()) {
245198090Srdivacky          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
246198090Srdivacky          if (MO.isDead())
247198090Srdivacky            LV->addVirtualRegisterDead(Reg, NewMI);
248198090Srdivacky        }
249198090Srdivacky        if (MO.isUse() && MO.isKill()) {
250198090Srdivacky          for (unsigned j = 0; j < 2; ++j) {
251198090Srdivacky            // Look at the two new MI's in reverse order.
252198090Srdivacky            MachineInstr *NewMI = NewMIs[j];
253198090Srdivacky            if (!NewMI->readsRegister(Reg))
254198090Srdivacky              continue;
255198090Srdivacky            LV->addVirtualRegisterKilled(Reg, NewMI);
256198090Srdivacky            if (VI.removeKill(MI))
257198090Srdivacky              VI.Kills.push_back(NewMI);
258198090Srdivacky            break;
259198090Srdivacky          }
260198090Srdivacky        }
261198090Srdivacky      }
262198090Srdivacky    }
263198090Srdivacky  }
264198090Srdivacky
265198090Srdivacky  MFI->insert(MBBI, NewMIs[1]);
266198090Srdivacky  MFI->insert(MBBI, NewMIs[0]);
267198090Srdivacky  return NewMIs[0];
268198090Srdivacky}
269198090Srdivacky
270198090Srdivacky// Branch analysis.
271198090Srdivackybool
272198090SrdivackyARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
273198090Srdivacky                                MachineBasicBlock *&FBB,
274198090Srdivacky                                SmallVectorImpl<MachineOperand> &Cond,
275198090Srdivacky                                bool AllowModify) const {
276263509Sdim  TBB = 0;
277263509Sdim  FBB = 0;
278263509Sdim
279198090Srdivacky  MachineBasicBlock::iterator I = MBB.end();
280206083Srdivacky  if (I == MBB.begin())
281263509Sdim    return false; // Empty blocks are easy.
282206083Srdivacky  --I;
283198090Srdivacky
284263509Sdim  // Walk backwards from the end of the basic block until the branch is
285263509Sdim  // analyzed or we give up.
286263509Sdim  while (isPredicated(I) || I->isTerminator()) {
287198090Srdivacky
288263509Sdim    // Flag to be raised on unanalyzeable instructions. This is useful in cases
289263509Sdim    // where we want to clean up on the end of the basic block before we bail
290263509Sdim    // out.
291263509Sdim    bool CantAnalyze = false;
292252723Sdim
293263509Sdim    // Skip over DEBUG values and predicated nonterminators.
294263509Sdim    while (I->isDebugValue() || !I->isTerminator()) {
295263509Sdim      if (I == MBB.begin())
296263509Sdim        return false;
297263509Sdim      --I;
298263509Sdim    }
299252723Sdim
300263509Sdim    if (isIndirectBranchOpcode(I->getOpcode()) ||
301263509Sdim        isJumpTableBranchOpcode(I->getOpcode())) {
302263509Sdim      // Indirect branches and jump tables can't be analyzed, but we still want
303263509Sdim      // to clean up any instructions at the tail of the basic block.
304263509Sdim      CantAnalyze = true;
305263509Sdim    } else if (isUncondBranchOpcode(I->getOpcode())) {
306263509Sdim      TBB = I->getOperand(0).getMBB();
307263509Sdim    } else if (isCondBranchOpcode(I->getOpcode())) {
308263509Sdim      // Bail out if we encounter multiple conditional branches.
309263509Sdim      if (!Cond.empty())
310263509Sdim        return true;
311263509Sdim
312263509Sdim      assert(!FBB && "FBB should have been null.");
313263509Sdim      FBB = TBB;
314263509Sdim      TBB = I->getOperand(0).getMBB();
315263509Sdim      Cond.push_back(I->getOperand(1));
316263509Sdim      Cond.push_back(I->getOperand(2));
317263509Sdim    } else if (I->isReturn()) {
318263509Sdim      // Returns can't be analyzed, but we should run cleanup.
319263509Sdim      CantAnalyze = !isPredicated(I);
320263509Sdim    } else {
321263509Sdim      // We encountered other unrecognized terminator. Bail out immediately.
322263509Sdim      return true;
323198090Srdivacky    }
324198090Srdivacky
325263509Sdim    // Cleanup code - to be run for unpredicated unconditional branches and
326263509Sdim    //                returns.
327263509Sdim    if (!isPredicated(I) &&
328263509Sdim          (isUncondBranchOpcode(I->getOpcode()) ||
329263509Sdim           isIndirectBranchOpcode(I->getOpcode()) ||
330263509Sdim           isJumpTableBranchOpcode(I->getOpcode()) ||
331263509Sdim           I->isReturn())) {
332263509Sdim      // Forget any previous condition branch information - it no longer applies.
333263509Sdim      Cond.clear();
334263509Sdim      FBB = 0;
335198090Srdivacky
336263509Sdim      // If we can modify the function, delete everything below this
337263509Sdim      // unconditional branch.
338263509Sdim      if (AllowModify) {
339263509Sdim        MachineBasicBlock::iterator DI = llvm::next(I);
340263509Sdim        while (DI != MBB.end()) {
341263509Sdim          MachineInstr *InstToDelete = DI;
342263509Sdim          ++DI;
343263509Sdim          InstToDelete->eraseFromParent();
344263509Sdim        }
345218893Sdim      }
346218893Sdim    }
347218893Sdim
348263509Sdim    if (CantAnalyze)
349263509Sdim      return true;
350198090Srdivacky
351263509Sdim    if (I == MBB.begin())
352263509Sdim      return false;
353198090Srdivacky
354263509Sdim    --I;
355198090Srdivacky  }
356198090Srdivacky
357263509Sdim  // We made it past the terminators without bailing out - we must have
358263509Sdim  // analyzed this branch successfully.
359263509Sdim  return false;
360198090Srdivacky}
361198090Srdivacky
362198090Srdivacky
363198090Srdivackyunsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
364198090Srdivacky  MachineBasicBlock::iterator I = MBB.end();
365198090Srdivacky  if (I == MBB.begin()) return 0;
366198090Srdivacky  --I;
367206083Srdivacky  while (I->isDebugValue()) {
368206083Srdivacky    if (I == MBB.begin())
369206083Srdivacky      return 0;
370206083Srdivacky    --I;
371206083Srdivacky  }
372198090Srdivacky  if (!isUncondBranchOpcode(I->getOpcode()) &&
373198090Srdivacky      !isCondBranchOpcode(I->getOpcode()))
374198090Srdivacky    return 0;
375198090Srdivacky
376198090Srdivacky  // Remove the branch.
377198090Srdivacky  I->eraseFromParent();
378198090Srdivacky
379198090Srdivacky  I = MBB.end();
380198090Srdivacky
381198090Srdivacky  if (I == MBB.begin()) return 1;
382198090Srdivacky  --I;
383198090Srdivacky  if (!isCondBranchOpcode(I->getOpcode()))
384198090Srdivacky    return 1;
385198090Srdivacky
386198090Srdivacky  // Remove the branch.
387198090Srdivacky  I->eraseFromParent();
388198090Srdivacky  return 2;
389198090Srdivacky}
390198090Srdivacky
391198090Srdivackyunsigned
392198090SrdivackyARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
393198090Srdivacky                               MachineBasicBlock *FBB,
394210299Sed                               const SmallVectorImpl<MachineOperand> &Cond,
395210299Sed                               DebugLoc DL) const {
396198090Srdivacky  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
397198090Srdivacky  int BOpc   = !AFI->isThumbFunction()
398198090Srdivacky    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
399198090Srdivacky  int BccOpc = !AFI->isThumbFunction()
400198090Srdivacky    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
401226890Sdim  bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
402198090Srdivacky
403198090Srdivacky  // Shouldn't be a fall through.
404198090Srdivacky  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
405198090Srdivacky  assert((Cond.size() == 2 || Cond.size() == 0) &&
406198090Srdivacky         "ARM branch conditions have two components!");
407198090Srdivacky
408198090Srdivacky  if (FBB == 0) {
409226890Sdim    if (Cond.empty()) { // Unconditional branch?
410226890Sdim      if (isThumb)
411226890Sdim        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
412226890Sdim      else
413226890Sdim        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
414226890Sdim    } else
415210299Sed      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
416198090Srdivacky        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
417198090Srdivacky    return 1;
418198090Srdivacky  }
419198090Srdivacky
420198090Srdivacky  // Two-way conditional branch.
421210299Sed  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
422198090Srdivacky    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
423226890Sdim  if (isThumb)
424226890Sdim    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
425226890Sdim  else
426226890Sdim    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
427198090Srdivacky  return 2;
428198090Srdivacky}
429198090Srdivacky
430198090Srdivackybool ARMBaseInstrInfo::
431198090SrdivackyReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
432198090Srdivacky  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
433198090Srdivacky  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
434198090Srdivacky  return false;
435198090Srdivacky}
436198090Srdivacky
437235633Sdimbool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
438235633Sdim  if (MI->isBundle()) {
439235633Sdim    MachineBasicBlock::const_instr_iterator I = MI;
440235633Sdim    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
441235633Sdim    while (++I != E && I->isInsideBundle()) {
442235633Sdim      int PIdx = I->findFirstPredOperandIdx();
443235633Sdim      if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
444235633Sdim        return true;
445235633Sdim    }
446235633Sdim    return false;
447235633Sdim  }
448235633Sdim
449235633Sdim  int PIdx = MI->findFirstPredOperandIdx();
450235633Sdim  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
451235633Sdim}
452235633Sdim
453198090Srdivackybool ARMBaseInstrInfo::
454198090SrdivackyPredicateInstruction(MachineInstr *MI,
455198090Srdivacky                     const SmallVectorImpl<MachineOperand> &Pred) const {
456198090Srdivacky  unsigned Opc = MI->getOpcode();
457198090Srdivacky  if (isUncondBranchOpcode(Opc)) {
458198090Srdivacky    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
459252723Sdim    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
460252723Sdim      .addImm(Pred[0].getImm())
461252723Sdim      .addReg(Pred[1].getReg());
462198090Srdivacky    return true;
463198090Srdivacky  }
464198090Srdivacky
465198090Srdivacky  int PIdx = MI->findFirstPredOperandIdx();
466198090Srdivacky  if (PIdx != -1) {
467198090Srdivacky    MachineOperand &PMO = MI->getOperand(PIdx);
468198090Srdivacky    PMO.setImm(Pred[0].getImm());
469198090Srdivacky    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
470198090Srdivacky    return true;
471198090Srdivacky  }
472198090Srdivacky  return false;
473198090Srdivacky}
474198090Srdivacky
475198090Srdivackybool ARMBaseInstrInfo::
476198090SrdivackySubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
477198090Srdivacky                  const SmallVectorImpl<MachineOperand> &Pred2) const {
478198090Srdivacky  if (Pred1.size() > 2 || Pred2.size() > 2)
479198090Srdivacky    return false;
480198090Srdivacky
481198090Srdivacky  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
482198090Srdivacky  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
483198090Srdivacky  if (CC1 == CC2)
484198090Srdivacky    return true;
485198090Srdivacky
486198090Srdivacky  switch (CC1) {
487198090Srdivacky  default:
488198090Srdivacky    return false;
489198090Srdivacky  case ARMCC::AL:
490198090Srdivacky    return true;
491198090Srdivacky  case ARMCC::HS:
492198090Srdivacky    return CC2 == ARMCC::HI;
493198090Srdivacky  case ARMCC::LS:
494198090Srdivacky    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
495198090Srdivacky  case ARMCC::GE:
496198090Srdivacky    return CC2 == ARMCC::GT;
497198090Srdivacky  case ARMCC::LE:
498198090Srdivacky    return CC2 == ARMCC::LT;
499198090Srdivacky  }
500198090Srdivacky}
501198090Srdivacky
502198090Srdivackybool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
503198090Srdivacky                                    std::vector<MachineOperand> &Pred) const {
504198090Srdivacky  bool Found = false;
505198090Srdivacky  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
506198090Srdivacky    const MachineOperand &MO = MI->getOperand(i);
507235633Sdim    if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
508235633Sdim        (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
509198090Srdivacky      Pred.push_back(MO);
510198090Srdivacky      Found = true;
511198090Srdivacky    }
512198090Srdivacky  }
513198090Srdivacky
514198090Srdivacky  return Found;
515198090Srdivacky}
516198090Srdivacky
517199989Srdivacky/// isPredicable - Return true if the specified instruction can be predicated.
518199989Srdivacky/// By default, this returns true for every instruction with a
519199989Srdivacky/// PredicateOperand.
520199989Srdivackybool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
521235633Sdim  if (!MI->isPredicable())
522199989Srdivacky    return false;
523198090Srdivacky
524263509Sdim  ARMFunctionInfo *AFI =
525263509Sdim    MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
526263509Sdim
527263509Sdim  if (AFI->isThumb2Function()) {
528263509Sdim    if (getSubtarget().restrictIT())
529263509Sdim      return isV8EligibleForIT(MI);
530263509Sdim  } else { // non-Thumb
531263509Sdim    if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
532263509Sdim      return false;
533199989Srdivacky  }
534263509Sdim
535199989Srdivacky  return true;
536199989Srdivacky}
537199989Srdivacky
538200581Srdivacky/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
539218893SdimLLVM_ATTRIBUTE_NOINLINE
540198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
541200581Srdivacky                                unsigned JTI);
542198090Srdivackystatic unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
543198090Srdivacky                                unsigned JTI) {
544200581Srdivacky  assert(JTI < JT.size());
545198090Srdivacky  return JT[JTI].MBBs.size();
546198090Srdivacky}
547198090Srdivacky
548198090Srdivacky/// GetInstSize - Return the size of the specified MachineInstr.
549198090Srdivacky///
550198090Srdivackyunsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
551198090Srdivacky  const MachineBasicBlock &MBB = *MI->getParent();
552198090Srdivacky  const MachineFunction *MF = MBB.getParent();
553198090Srdivacky  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
554198090Srdivacky
555224145Sdim  const MCInstrDesc &MCID = MI->getDesc();
556224145Sdim  if (MCID.getSize())
557224145Sdim    return MCID.getSize();
558198090Srdivacky
559235633Sdim  // If this machine instr is an inline asm, measure it.
560235633Sdim  if (MI->getOpcode() == ARM::INLINEASM)
561235633Sdim    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
562235633Sdim  if (MI->isLabel())
563235633Sdim    return 0;
564224145Sdim  unsigned Opc = MI->getOpcode();
565235633Sdim  switch (Opc) {
566235633Sdim  case TargetOpcode::IMPLICIT_DEF:
567235633Sdim  case TargetOpcode::KILL:
568235633Sdim  case TargetOpcode::PROLOG_LABEL:
569235633Sdim  case TargetOpcode::EH_LABEL:
570235633Sdim  case TargetOpcode::DBG_VALUE:
571235633Sdim    return 0;
572235633Sdim  case TargetOpcode::BUNDLE:
573235633Sdim    return getInstBundleLength(MI);
574235633Sdim  case ARM::MOVi16_ga_pcrel:
575235633Sdim  case ARM::MOVTi16_ga_pcrel:
576235633Sdim  case ARM::t2MOVi16_ga_pcrel:
577235633Sdim  case ARM::t2MOVTi16_ga_pcrel:
578235633Sdim    return 4;
579235633Sdim  case ARM::MOVi32imm:
580235633Sdim  case ARM::t2MOVi32imm:
581235633Sdim    return 8;
582235633Sdim  case ARM::CONSTPOOL_ENTRY:
583235633Sdim    // If this machine instr is a constant pool entry, its size is recorded as
584235633Sdim    // operand #2.
585235633Sdim    return MI->getOperand(2).getImm();
586235633Sdim  case ARM::Int_eh_sjlj_longjmp:
587235633Sdim    return 16;
588235633Sdim  case ARM::tInt_eh_sjlj_longjmp:
589235633Sdim    return 10;
590235633Sdim  case ARM::Int_eh_sjlj_setjmp:
591235633Sdim  case ARM::Int_eh_sjlj_setjmp_nofp:
592235633Sdim    return 20;
593235633Sdim  case ARM::tInt_eh_sjlj_setjmp:
594235633Sdim  case ARM::t2Int_eh_sjlj_setjmp:
595235633Sdim  case ARM::t2Int_eh_sjlj_setjmp_nofp:
596235633Sdim    return 12;
597235633Sdim  case ARM::BR_JTr:
598235633Sdim  case ARM::BR_JTm:
599235633Sdim  case ARM::BR_JTadd:
600235633Sdim  case ARM::tBR_JTr:
601235633Sdim  case ARM::t2BR_JT:
602235633Sdim  case ARM::t2TBB_JT:
603235633Sdim  case ARM::t2TBH_JT: {
604235633Sdim    // These are jumptable branches, i.e. a branch followed by an inlined
605235633Sdim    // jumptable. The size is 4 + 4 * number of entries. For TBB, each
606235633Sdim    // entry is one byte; TBH two byte each.
607235633Sdim    unsigned EntrySize = (Opc == ARM::t2TBB_JT)
608235633Sdim      ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
609235633Sdim    unsigned NumOps = MCID.getNumOperands();
610235633Sdim    MachineOperand JTOP =
611235633Sdim      MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
612235633Sdim    unsigned JTI = JTOP.getIndex();
613235633Sdim    const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
614235633Sdim    assert(MJTI != 0);
615235633Sdim    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
616235633Sdim    assert(JTI < JT.size());
617235633Sdim    // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
618235633Sdim    // 4 aligned. The assembler / linker may add 2 byte padding just before
619235633Sdim    // the JT entries.  The size does not include this padding; the
620235633Sdim    // constant islands pass does separate bookkeeping for it.
621235633Sdim    // FIXME: If we know the size of the function is less than (1 << 16) *2
622235633Sdim    // bytes, we can use 16-bit entries instead. Then there won't be an
623235633Sdim    // alignment issue.
624235633Sdim    unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
625235633Sdim    unsigned NumEntries = getNumJTEntries(JT, JTI);
626235633Sdim    if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
627235633Sdim      // Make sure the instruction that follows TBB is 2-byte aligned.
628235633Sdim      // FIXME: Constant island pass should insert an "ALIGN" instruction
629235633Sdim      // instead.
630235633Sdim      ++NumEntries;
631235633Sdim    return NumEntries * EntrySize + InstSize;
632235633Sdim  }
633235633Sdim  default:
634235633Sdim    // Otherwise, pseudo-instruction sizes are zero.
635235633Sdim    return 0;
636235633Sdim  }
637198090Srdivacky}
638198090Srdivacky
639235633Sdimunsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
640235633Sdim  unsigned Size = 0;
641235633Sdim  MachineBasicBlock::const_instr_iterator I = MI;
642235633Sdim  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
643235633Sdim  while (++I != E && I->isInsideBundle()) {
644235633Sdim    assert(!I->isBundle() && "No nested bundle!");
645235633Sdim    Size += GetInstSizeInBytes(&*I);
646235633Sdim  }
647235633Sdim  return Size;
648235633Sdim}
649235633Sdim
650210299Sedvoid ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
651210299Sed                                   MachineBasicBlock::iterator I, DebugLoc DL,
652210299Sed                                   unsigned DestReg, unsigned SrcReg,
653210299Sed                                   bool KillSrc) const {
654210299Sed  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
655263509Sdim  bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
656204642Srdivacky
657210299Sed  if (GPRDest && GPRSrc) {
658210299Sed    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
659263509Sdim                                    .addReg(SrcReg, getKillRegState(KillSrc))));
660210299Sed    return;
661210299Sed  }
662198892Srdivacky
663210299Sed  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
664263509Sdim  bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
665205407Srdivacky
666226890Sdim  unsigned Opc = 0;
667210299Sed  if (SPRDest && SPRSrc)
668210299Sed    Opc = ARM::VMOVS;
669210299Sed  else if (GPRDest && SPRSrc)
670210299Sed    Opc = ARM::VMOVRS;
671210299Sed  else if (SPRDest && GPRSrc)
672210299Sed    Opc = ARM::VMOVSR;
673210299Sed  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
674210299Sed    Opc = ARM::VMOVD;
675210299Sed  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
676224145Sdim    Opc = ARM::VORRq;
677208599Srdivacky
678226890Sdim  if (Opc) {
679226890Sdim    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
680224145Sdim    MIB.addReg(SrcReg, getKillRegState(KillSrc));
681226890Sdim    if (Opc == ARM::VORRq)
682226890Sdim      MIB.addReg(SrcReg, getKillRegState(KillSrc));
683210299Sed    AddDefaultPred(MIB);
684226890Sdim    return;
685226890Sdim  }
686226890Sdim
687235633Sdim  // Handle register classes that require multiple instructions.
688235633Sdim  unsigned BeginIdx = 0;
689235633Sdim  unsigned SubRegs = 0;
690245431Sdim  int Spacing = 1;
691235633Sdim
692235633Sdim  // Use VORRq when possible.
693263509Sdim  if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
694263509Sdim    Opc = ARM::VORRq;
695263509Sdim    BeginIdx = ARM::qsub_0;
696263509Sdim    SubRegs = 2;
697263509Sdim  } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
698263509Sdim    Opc = ARM::VORRq;
699263509Sdim    BeginIdx = ARM::qsub_0;
700263509Sdim    SubRegs = 4;
701235633Sdim  // Fall back to VMOVD.
702263509Sdim  } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
703263509Sdim    Opc = ARM::VMOVD;
704263509Sdim    BeginIdx = ARM::dsub_0;
705263509Sdim    SubRegs = 2;
706263509Sdim  } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
707263509Sdim    Opc = ARM::VMOVD;
708263509Sdim    BeginIdx = ARM::dsub_0;
709263509Sdim    SubRegs = 3;
710263509Sdim  } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
711263509Sdim    Opc = ARM::VMOVD;
712263509Sdim    BeginIdx = ARM::dsub_0;
713263509Sdim    SubRegs = 4;
714263509Sdim  } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
715263509Sdim    Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
716263509Sdim    BeginIdx = ARM::gsub_0;
717263509Sdim    SubRegs = 2;
718263509Sdim  } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
719263509Sdim    Opc = ARM::VMOVD;
720263509Sdim    BeginIdx = ARM::dsub_0;
721263509Sdim    SubRegs = 2;
722263509Sdim    Spacing = 2;
723263509Sdim  } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
724263509Sdim    Opc = ARM::VMOVD;
725263509Sdim    BeginIdx = ARM::dsub_0;
726263509Sdim    SubRegs = 3;
727263509Sdim    Spacing = 2;
728263509Sdim  } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
729263509Sdim    Opc = ARM::VMOVD;
730263509Sdim    BeginIdx = ARM::dsub_0;
731263509Sdim    SubRegs = 4;
732263509Sdim    Spacing = 2;
733263509Sdim  }
734235633Sdim
735245431Sdim  assert(Opc && "Impossible reg-to-reg copy");
736245431Sdim
737245431Sdim  const TargetRegisterInfo *TRI = &getRegisterInfo();
738245431Sdim  MachineInstrBuilder Mov;
739245431Sdim
740245431Sdim  // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
741245431Sdim  if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
742263509Sdim    BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
743245431Sdim    Spacing = -Spacing;
744226890Sdim  }
745245431Sdim#ifndef NDEBUG
746245431Sdim  SmallSet<unsigned, 4> DstRegs;
747245431Sdim#endif
748245431Sdim  for (unsigned i = 0; i != SubRegs; ++i) {
749263509Sdim    unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
750263509Sdim    unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
751245431Sdim    assert(Dst && Src && "Bad sub-register");
752245431Sdim#ifndef NDEBUG
753245431Sdim    assert(!DstRegs.count(Src) && "destructive vector copy");
754245431Sdim    DstRegs.insert(Dst);
755245431Sdim#endif
756263509Sdim    Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
757245431Sdim    // VORR takes two source operands.
758245431Sdim    if (Opc == ARM::VORRq)
759245431Sdim      Mov.addReg(Src);
760245431Sdim    Mov = AddDefaultPred(Mov);
761263509Sdim    // MOVr can set CC.
762263509Sdim    if (Opc == ARM::MOVr)
763263509Sdim      Mov = AddDefaultCC(Mov);
764245431Sdim  }
765245431Sdim  // Add implicit super-register defs and kills to the last instruction.
766245431Sdim  Mov->addRegisterDefined(DestReg, TRI);
767245431Sdim  if (KillSrc)
768245431Sdim    Mov->addRegisterKilled(SrcReg, TRI);
769198090Srdivacky}
770198090Srdivacky
771252723Sdimconst MachineInstrBuilder &
772252723SdimARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
773252723Sdim                          unsigned SubIdx, unsigned State,
774252723Sdim                          const TargetRegisterInfo *TRI) const {
775208599Srdivacky  if (!SubIdx)
776208599Srdivacky    return MIB.addReg(Reg, State);
777208599Srdivacky
778208599Srdivacky  if (TargetRegisterInfo::isPhysicalRegister(Reg))
779208599Srdivacky    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
780208599Srdivacky  return MIB.addReg(Reg, State, SubIdx);
781208599Srdivacky}
782208599Srdivacky
783198090Srdivackyvoid ARMBaseInstrInfo::
784198090SrdivackystoreRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
785198090Srdivacky                    unsigned SrcReg, bool isKill, int FI,
786208599Srdivacky                    const TargetRegisterClass *RC,
787208599Srdivacky                    const TargetRegisterInfo *TRI) const {
788206124Srdivacky  DebugLoc DL;
789198090Srdivacky  if (I != MBB.end()) DL = I->getDebugLoc();
790198090Srdivacky  MachineFunction &MF = *MBB.getParent();
791198090Srdivacky  MachineFrameInfo &MFI = *MF.getFrameInfo();
792199481Srdivacky  unsigned Align = MFI.getObjectAlignment(FI);
793198090Srdivacky
794198090Srdivacky  MachineMemOperand *MMO =
795235633Sdim    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
796218893Sdim                            MachineMemOperand::MOStore,
797198090Srdivacky                            MFI.getObjectSize(FI),
798199481Srdivacky                            Align);
799198090Srdivacky
800226890Sdim  switch (RC->getSize()) {
801226890Sdim    case 4:
802226890Sdim      if (ARM::GPRRegClass.hasSubClassEq(RC)) {
803226890Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
804198090Srdivacky                   .addReg(SrcReg, getKillRegState(isKill))
805218893Sdim                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
806226890Sdim      } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
807226890Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
808208599Srdivacky                   .addReg(SrcReg, getKillRegState(isKill))
809208599Srdivacky                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
810226890Sdim      } else
811226890Sdim        llvm_unreachable("Unknown reg class!");
812226890Sdim      break;
813226890Sdim    case 8:
814226890Sdim      if (ARM::DPRRegClass.hasSubClassEq(RC)) {
815226890Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
816198090Srdivacky                   .addReg(SrcReg, getKillRegState(isKill))
817198090Srdivacky                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
818245431Sdim      } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
819252723Sdim        if (Subtarget.hasV5TEOps()) {
820252723Sdim          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
821252723Sdim          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
822252723Sdim          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
823252723Sdim          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
824252723Sdim
825252723Sdim          AddDefaultPred(MIB);
826252723Sdim        } else {
827252723Sdim          // Fallback to STM instruction, which has existed since the dawn of
828252723Sdim          // time.
829252723Sdim          MachineInstrBuilder MIB =
830252723Sdim            AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
831252723Sdim                             .addFrameIndex(FI).addMemOperand(MMO));
832252723Sdim          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
833252723Sdim          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
834252723Sdim        }
835226890Sdim      } else
836226890Sdim        llvm_unreachable("Unknown reg class!");
837226890Sdim      break;
838226890Sdim    case 16:
839235633Sdim      if (ARM::DPairRegClass.hasSubClassEq(RC)) {
840235633Sdim        // Use aligned spills if the stack can be realigned.
841235633Sdim        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
842235633Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
843210299Sed                     .addFrameIndex(FI).addImm(16)
844208599Srdivacky                     .addReg(SrcReg, getKillRegState(isKill))
845208599Srdivacky                     .addMemOperand(MMO));
846226890Sdim        } else {
847226890Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
848208599Srdivacky                     .addReg(SrcReg, getKillRegState(isKill))
849206083Srdivacky                     .addFrameIndex(FI)
850206083Srdivacky                     .addMemOperand(MMO));
851226890Sdim        }
852226890Sdim      } else
853226890Sdim        llvm_unreachable("Unknown reg class!");
854226890Sdim      break;
855245431Sdim    case 24:
856245431Sdim      if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
857245431Sdim        // Use aligned spills if the stack can be realigned.
858245431Sdim        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
859245431Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
860245431Sdim                     .addFrameIndex(FI).addImm(16)
861245431Sdim                     .addReg(SrcReg, getKillRegState(isKill))
862245431Sdim                     .addMemOperand(MMO));
863245431Sdim        } else {
864245431Sdim          MachineInstrBuilder MIB =
865245431Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
866245431Sdim                       .addFrameIndex(FI))
867245431Sdim                       .addMemOperand(MMO);
868245431Sdim          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
869245431Sdim          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
870245431Sdim          AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
871245431Sdim        }
872245431Sdim      } else
873245431Sdim        llvm_unreachable("Unknown reg class!");
874245431Sdim      break;
875226890Sdim    case 32:
876245431Sdim      if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
877226890Sdim        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
878226890Sdim          // FIXME: It's possible to only store part of the QQ register if the
879226890Sdim          // spilled def has a sub-register index.
880226890Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
881218893Sdim                     .addFrameIndex(FI).addImm(16)
882218893Sdim                     .addReg(SrcReg, getKillRegState(isKill))
883218893Sdim                     .addMemOperand(MMO));
884226890Sdim        } else {
885226890Sdim          MachineInstrBuilder MIB =
886226890Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
887218893Sdim                       .addFrameIndex(FI))
888226890Sdim                       .addMemOperand(MMO);
889226890Sdim          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
890226890Sdim          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
891226890Sdim          MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
892226890Sdim                AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
893226890Sdim        }
894226890Sdim      } else
895226890Sdim        llvm_unreachable("Unknown reg class!");
896226890Sdim      break;
897226890Sdim    case 64:
898226890Sdim      if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
899226890Sdim        MachineInstrBuilder MIB =
900226890Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
901226890Sdim                         .addFrameIndex(FI))
902226890Sdim                         .addMemOperand(MMO);
903226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
904226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
905226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
906226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
907226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
908226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
909226890Sdim        MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
910226890Sdim              AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
911226890Sdim      } else
912226890Sdim        llvm_unreachable("Unknown reg class!");
913226890Sdim      break;
914226890Sdim    default:
915226890Sdim      llvm_unreachable("Unknown reg class!");
916198090Srdivacky  }
917198090Srdivacky}
918198090Srdivacky
919218893Sdimunsigned
920218893SdimARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
921218893Sdim                                     int &FrameIndex) const {
922218893Sdim  switch (MI->getOpcode()) {
923218893Sdim  default: break;
924218893Sdim  case ARM::STRrs:
925218893Sdim  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
926218893Sdim    if (MI->getOperand(1).isFI() &&
927218893Sdim        MI->getOperand(2).isReg() &&
928218893Sdim        MI->getOperand(3).isImm() &&
929218893Sdim        MI->getOperand(2).getReg() == 0 &&
930218893Sdim        MI->getOperand(3).getImm() == 0) {
931218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
932218893Sdim      return MI->getOperand(0).getReg();
933218893Sdim    }
934218893Sdim    break;
935218893Sdim  case ARM::STRi12:
936218893Sdim  case ARM::t2STRi12:
937224145Sdim  case ARM::tSTRspi:
938218893Sdim  case ARM::VSTRD:
939218893Sdim  case ARM::VSTRS:
940218893Sdim    if (MI->getOperand(1).isFI() &&
941218893Sdim        MI->getOperand(2).isImm() &&
942218893Sdim        MI->getOperand(2).getImm() == 0) {
943218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
944218893Sdim      return MI->getOperand(0).getReg();
945218893Sdim    }
946218893Sdim    break;
947235633Sdim  case ARM::VST1q64:
948245431Sdim  case ARM::VST1d64TPseudo:
949245431Sdim  case ARM::VST1d64QPseudo:
950218893Sdim    if (MI->getOperand(0).isFI() &&
951218893Sdim        MI->getOperand(2).getSubReg() == 0) {
952218893Sdim      FrameIndex = MI->getOperand(0).getIndex();
953218893Sdim      return MI->getOperand(2).getReg();
954218893Sdim    }
955218893Sdim    break;
956218893Sdim  case ARM::VSTMQIA:
957218893Sdim    if (MI->getOperand(1).isFI() &&
958218893Sdim        MI->getOperand(0).getSubReg() == 0) {
959218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
960218893Sdim      return MI->getOperand(0).getReg();
961218893Sdim    }
962218893Sdim    break;
963218893Sdim  }
964218893Sdim
965218893Sdim  return 0;
966218893Sdim}
967218893Sdim
968226890Sdimunsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
969226890Sdim                                                    int &FrameIndex) const {
970226890Sdim  const MachineMemOperand *Dummy;
971235633Sdim  return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
972226890Sdim}
973226890Sdim
974198090Srdivackyvoid ARMBaseInstrInfo::
975198090SrdivackyloadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
976198090Srdivacky                     unsigned DestReg, int FI,
977208599Srdivacky                     const TargetRegisterClass *RC,
978208599Srdivacky                     const TargetRegisterInfo *TRI) const {
979206124Srdivacky  DebugLoc DL;
980198090Srdivacky  if (I != MBB.end()) DL = I->getDebugLoc();
981198090Srdivacky  MachineFunction &MF = *MBB.getParent();
982198090Srdivacky  MachineFrameInfo &MFI = *MF.getFrameInfo();
983199481Srdivacky  unsigned Align = MFI.getObjectAlignment(FI);
984198090Srdivacky  MachineMemOperand *MMO =
985218893Sdim    MF.getMachineMemOperand(
986235633Sdim                    MachinePointerInfo::getFixedStack(FI),
987218893Sdim                            MachineMemOperand::MOLoad,
988198090Srdivacky                            MFI.getObjectSize(FI),
989199481Srdivacky                            Align);
990198090Srdivacky
991226890Sdim  switch (RC->getSize()) {
992226890Sdim  case 4:
993226890Sdim    if (ARM::GPRRegClass.hasSubClassEq(RC)) {
994226890Sdim      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
995226890Sdim                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
996204642Srdivacky
997226890Sdim    } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
998226890Sdim      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
999218893Sdim                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1000226890Sdim    } else
1001226890Sdim      llvm_unreachable("Unknown reg class!");
1002210299Sed    break;
1003226890Sdim  case 8:
1004226890Sdim    if (ARM::DPRRegClass.hasSubClassEq(RC)) {
1005226890Sdim      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
1006208599Srdivacky                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
1007245431Sdim    } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
1008252723Sdim      MachineInstrBuilder MIB;
1009252723Sdim
1010252723Sdim      if (Subtarget.hasV5TEOps()) {
1011252723Sdim        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
1012252723Sdim        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1013252723Sdim        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1014252723Sdim        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
1015252723Sdim
1016252723Sdim        AddDefaultPred(MIB);
1017252723Sdim      } else {
1018252723Sdim        // Fallback to LDM instruction, which has existed since the dawn of
1019252723Sdim        // time.
1020252723Sdim        MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
1021252723Sdim                                 .addFrameIndex(FI).addMemOperand(MMO));
1022252723Sdim        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
1023252723Sdim        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
1024252723Sdim      }
1025252723Sdim
1026245431Sdim      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1027245431Sdim        MIB.addReg(DestReg, RegState::ImplicitDefine);
1028226890Sdim    } else
1029226890Sdim      llvm_unreachable("Unknown reg class!");
1030210299Sed    break;
1031226890Sdim  case 16:
1032235633Sdim    if (ARM::DPairRegClass.hasSubClassEq(RC)) {
1033235633Sdim      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1034235633Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
1035210299Sed                     .addFrameIndex(FI).addImm(16)
1036199989Srdivacky                     .addMemOperand(MMO));
1037226890Sdim      } else {
1038226890Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
1039226890Sdim                       .addFrameIndex(FI)
1040226890Sdim                       .addMemOperand(MMO));
1041226890Sdim      }
1042226890Sdim    } else
1043226890Sdim      llvm_unreachable("Unknown reg class!");
1044210299Sed    break;
1045245431Sdim  case 24:
1046245431Sdim    if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
1047226890Sdim      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1048245431Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
1049245431Sdim                     .addFrameIndex(FI).addImm(16)
1050245431Sdim                     .addMemOperand(MMO));
1051245431Sdim      } else {
1052245431Sdim        MachineInstrBuilder MIB =
1053245431Sdim          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1054245431Sdim                         .addFrameIndex(FI)
1055245431Sdim                         .addMemOperand(MMO));
1056245431Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1057245431Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1058245431Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1059245431Sdim        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1060245431Sdim          MIB.addReg(DestReg, RegState::ImplicitDefine);
1061245431Sdim      }
1062245431Sdim    } else
1063245431Sdim      llvm_unreachable("Unknown reg class!");
1064245431Sdim    break;
1065245431Sdim   case 32:
1066245431Sdim    if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
1067245431Sdim      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
1068226890Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
1069218893Sdim                     .addFrameIndex(FI).addImm(16)
1070218893Sdim                     .addMemOperand(MMO));
1071226890Sdim      } else {
1072226890Sdim        MachineInstrBuilder MIB =
1073218893Sdim        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1074218893Sdim                       .addFrameIndex(FI))
1075226890Sdim                       .addMemOperand(MMO);
1076235633Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1077235633Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1078235633Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1079235633Sdim        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1080235633Sdim        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1081235633Sdim          MIB.addReg(DestReg, RegState::ImplicitDefine);
1082226890Sdim      }
1083226890Sdim    } else
1084226890Sdim      llvm_unreachable("Unknown reg class!");
1085226890Sdim    break;
1086226890Sdim  case 64:
1087226890Sdim    if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
1088226890Sdim      MachineInstrBuilder MIB =
1089226890Sdim      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
1090226890Sdim                     .addFrameIndex(FI))
1091226890Sdim                     .addMemOperand(MMO);
1092235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
1093235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
1094235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
1095235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
1096235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
1097235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
1098235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
1099235633Sdim      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
1100235633Sdim      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
1101235633Sdim        MIB.addReg(DestReg, RegState::ImplicitDefine);
1102226890Sdim    } else
1103226890Sdim      llvm_unreachable("Unknown reg class!");
1104210299Sed    break;
1105210299Sed  default:
1106210299Sed    llvm_unreachable("Unknown regclass!");
1107210299Sed  }
1108198090Srdivacky}
1109198090Srdivacky
1110218893Sdimunsigned
1111218893SdimARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1112218893Sdim                                      int &FrameIndex) const {
1113218893Sdim  switch (MI->getOpcode()) {
1114218893Sdim  default: break;
1115218893Sdim  case ARM::LDRrs:
1116218893Sdim  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
1117218893Sdim    if (MI->getOperand(1).isFI() &&
1118218893Sdim        MI->getOperand(2).isReg() &&
1119218893Sdim        MI->getOperand(3).isImm() &&
1120218893Sdim        MI->getOperand(2).getReg() == 0 &&
1121218893Sdim        MI->getOperand(3).getImm() == 0) {
1122218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
1123218893Sdim      return MI->getOperand(0).getReg();
1124218893Sdim    }
1125218893Sdim    break;
1126218893Sdim  case ARM::LDRi12:
1127218893Sdim  case ARM::t2LDRi12:
1128224145Sdim  case ARM::tLDRspi:
1129218893Sdim  case ARM::VLDRD:
1130218893Sdim  case ARM::VLDRS:
1131218893Sdim    if (MI->getOperand(1).isFI() &&
1132218893Sdim        MI->getOperand(2).isImm() &&
1133218893Sdim        MI->getOperand(2).getImm() == 0) {
1134218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
1135218893Sdim      return MI->getOperand(0).getReg();
1136218893Sdim    }
1137218893Sdim    break;
1138235633Sdim  case ARM::VLD1q64:
1139245431Sdim  case ARM::VLD1d64TPseudo:
1140245431Sdim  case ARM::VLD1d64QPseudo:
1141218893Sdim    if (MI->getOperand(1).isFI() &&
1142218893Sdim        MI->getOperand(0).getSubReg() == 0) {
1143218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
1144218893Sdim      return MI->getOperand(0).getReg();
1145218893Sdim    }
1146218893Sdim    break;
1147218893Sdim  case ARM::VLDMQIA:
1148218893Sdim    if (MI->getOperand(1).isFI() &&
1149218893Sdim        MI->getOperand(0).getSubReg() == 0) {
1150218893Sdim      FrameIndex = MI->getOperand(1).getIndex();
1151218893Sdim      return MI->getOperand(0).getReg();
1152218893Sdim    }
1153218893Sdim    break;
1154218893Sdim  }
1155218893Sdim
1156218893Sdim  return 0;
1157218893Sdim}
1158218893Sdim
1159226890Sdimunsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
1160226890Sdim                                             int &FrameIndex) const {
1161226890Sdim  const MachineMemOperand *Dummy;
1162235633Sdim  return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1163226890Sdim}
1164226890Sdim
1165226890Sdimbool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
1166226890Sdim  // This hook gets to expand COPY instructions before they become
1167226890Sdim  // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
1168226890Sdim  // widened to VMOVD.  We prefer the VMOVD when possible because it may be
1169226890Sdim  // changed into a VORR that can go down the NEON pipeline.
1170252723Sdim  if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
1171226890Sdim    return false;
1172226890Sdim
1173226890Sdim  // Look for a copy between even S-registers.  That is where we keep floats
1174226890Sdim  // when using NEON v2f32 instructions for f32 arithmetic.
1175226890Sdim  unsigned DstRegS = MI->getOperand(0).getReg();
1176226890Sdim  unsigned SrcRegS = MI->getOperand(1).getReg();
1177226890Sdim  if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
1178226890Sdim    return false;
1179226890Sdim
1180226890Sdim  const TargetRegisterInfo *TRI = &getRegisterInfo();
1181226890Sdim  unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
1182226890Sdim                                              &ARM::DPRRegClass);
1183226890Sdim  unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
1184226890Sdim                                              &ARM::DPRRegClass);
1185226890Sdim  if (!DstRegD || !SrcRegD)
1186226890Sdim    return false;
1187226890Sdim
1188226890Sdim  // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
1189226890Sdim  // legal if the COPY already defines the full DstRegD, and it isn't a
1190226890Sdim  // sub-register insertion.
1191226890Sdim  if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
1192226890Sdim    return false;
1193226890Sdim
1194226890Sdim  // A dead copy shouldn't show up here, but reject it just in case.
1195226890Sdim  if (MI->getOperand(0).isDead())
1196226890Sdim    return false;
1197226890Sdim
1198226890Sdim  // All clear, widen the COPY.
1199226890Sdim  DEBUG(dbgs() << "widening:    " << *MI);
1200252723Sdim  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
1201226890Sdim
1202226890Sdim  // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
1203226890Sdim  // or some other super-register.
1204226890Sdim  int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
1205226890Sdim  if (ImpDefIdx != -1)
1206226890Sdim    MI->RemoveOperand(ImpDefIdx);
1207226890Sdim
1208226890Sdim  // Change the opcode and operands.
1209226890Sdim  MI->setDesc(get(ARM::VMOVD));
1210226890Sdim  MI->getOperand(0).setReg(DstRegD);
1211226890Sdim  MI->getOperand(1).setReg(SrcRegD);
1212252723Sdim  AddDefaultPred(MIB);
1213226890Sdim
1214226890Sdim  // We are now reading SrcRegD instead of SrcRegS.  This may upset the
1215226890Sdim  // register scavenger and machine verifier, so we need to indicate that we
1216226890Sdim  // are reading an undefined value from SrcRegD, but a proper value from
1217226890Sdim  // SrcRegS.
1218226890Sdim  MI->getOperand(1).setIsUndef();
1219252723Sdim  MIB.addReg(SrcRegS, RegState::Implicit);
1220226890Sdim
1221226890Sdim  // SrcRegD may actually contain an unrelated value in the ssub_1
1222226890Sdim  // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
1223226890Sdim  if (MI->getOperand(1).isKill()) {
1224226890Sdim    MI->getOperand(1).setIsKill(false);
1225226890Sdim    MI->addRegisterKilled(SrcRegS, TRI, true);
1226226890Sdim  }
1227226890Sdim
1228226890Sdim  DEBUG(dbgs() << "replaced by: " << *MI);
1229226890Sdim  return true;
1230226890Sdim}
1231226890Sdim
1232202375Srdivacky/// Create a copy of a const pool value. Update CPI to the new index and return
1233202375Srdivacky/// the label UID.
1234202375Srdivackystatic unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
1235202375Srdivacky  MachineConstantPool *MCP = MF.getConstantPool();
1236202375Srdivacky  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1237202375Srdivacky
1238202375Srdivacky  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
1239202375Srdivacky  assert(MCPE.isMachineConstantPoolEntry() &&
1240202375Srdivacky         "Expecting a machine constantpool entry!");
1241202375Srdivacky  ARMConstantPoolValue *ACPV =
1242202375Srdivacky    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
1243202375Srdivacky
1244218893Sdim  unsigned PCLabelId = AFI->createPICLabelUId();
1245202375Srdivacky  ARMConstantPoolValue *NewCPV = 0;
1246212904Sdim  // FIXME: The below assumes PIC relocation model and that the function
1247212904Sdim  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
1248212904Sdim  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
1249212904Sdim  // instructions, so that's probably OK, but is PIC always correct when
1250212904Sdim  // we get here?
1251202375Srdivacky  if (ACPV->isGlobalValue())
1252226890Sdim    NewCPV = ARMConstantPoolConstant::
1253226890Sdim      Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
1254226890Sdim             ARMCP::CPValue, 4);
1255202375Srdivacky  else if (ACPV->isExtSymbol())
1256226890Sdim    NewCPV = ARMConstantPoolSymbol::
1257226890Sdim      Create(MF.getFunction()->getContext(),
1258226890Sdim             cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
1259202375Srdivacky  else if (ACPV->isBlockAddress())
1260226890Sdim    NewCPV = ARMConstantPoolConstant::
1261226890Sdim      Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
1262226890Sdim             ARMCP::CPBlockAddress, 4);
1263212904Sdim  else if (ACPV->isLSDA())
1264226890Sdim    NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
1265226890Sdim                                             ARMCP::CPLSDA, 4);
1266226890Sdim  else if (ACPV->isMachineBasicBlock())
1267226890Sdim    NewCPV = ARMConstantPoolMBB::
1268226890Sdim      Create(MF.getFunction()->getContext(),
1269226890Sdim             cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
1270202375Srdivacky  else
1271202375Srdivacky    llvm_unreachable("Unexpected ARM constantpool value type!!");
1272202375Srdivacky  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
1273202375Srdivacky  return PCLabelId;
1274202375Srdivacky}
1275202375Srdivacky
1276199481Srdivackyvoid ARMBaseInstrInfo::
1277199481SrdivackyreMaterialize(MachineBasicBlock &MBB,
1278199481Srdivacky              MachineBasicBlock::iterator I,
1279199481Srdivacky              unsigned DestReg, unsigned SubIdx,
1280199481Srdivacky              const MachineInstr *Orig,
1281210299Sed              const TargetRegisterInfo &TRI) const {
1282199481Srdivacky  unsigned Opcode = Orig->getOpcode();
1283199481Srdivacky  switch (Opcode) {
1284199481Srdivacky  default: {
1285199481Srdivacky    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
1286210299Sed    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
1287199481Srdivacky    MBB.insert(I, MI);
1288199481Srdivacky    break;
1289199481Srdivacky  }
1290199481Srdivacky  case ARM::tLDRpci_pic:
1291199481Srdivacky  case ARM::t2LDRpci_pic: {
1292199481Srdivacky    MachineFunction &MF = *MBB.getParent();
1293199481Srdivacky    unsigned CPI = Orig->getOperand(1).getIndex();
1294202375Srdivacky    unsigned PCLabelId = duplicateCPV(MF, CPI);
1295199481Srdivacky    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
1296199481Srdivacky                                      DestReg)
1297199481Srdivacky      .addConstantPoolIndex(CPI).addImm(PCLabelId);
1298221345Sdim    MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
1299199481Srdivacky    break;
1300199481Srdivacky  }
1301199481Srdivacky  }
1302199481Srdivacky}
1303199481Srdivacky
1304202375SrdivackyMachineInstr *
1305202375SrdivackyARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
1306252723Sdim  MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
1307202375Srdivacky  switch(Orig->getOpcode()) {
1308202375Srdivacky  case ARM::tLDRpci_pic:
1309202375Srdivacky  case ARM::t2LDRpci_pic: {
1310202375Srdivacky    unsigned CPI = Orig->getOperand(1).getIndex();
1311202375Srdivacky    unsigned PCLabelId = duplicateCPV(MF, CPI);
1312202375Srdivacky    Orig->getOperand(1).setIndex(CPI);
1313202375Srdivacky    Orig->getOperand(2).setImm(PCLabelId);
1314202375Srdivacky    break;
1315202375Srdivacky  }
1316202375Srdivacky  }
1317202375Srdivacky  return MI;
1318202375Srdivacky}
1319202375Srdivacky
1320204642Srdivackybool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
1321218893Sdim                                        const MachineInstr *MI1,
1322218893Sdim                                        const MachineRegisterInfo *MRI) const {
1323199481Srdivacky  int Opcode = MI0->getOpcode();
1324199989Srdivacky  if (Opcode == ARM::t2LDRpci ||
1325199989Srdivacky      Opcode == ARM::t2LDRpci_pic ||
1326199989Srdivacky      Opcode == ARM::tLDRpci ||
1327218893Sdim      Opcode == ARM::tLDRpci_pic ||
1328218893Sdim      Opcode == ARM::MOV_ga_dyn ||
1329218893Sdim      Opcode == ARM::MOV_ga_pcrel ||
1330218893Sdim      Opcode == ARM::MOV_ga_pcrel_ldr ||
1331218893Sdim      Opcode == ARM::t2MOV_ga_dyn ||
1332218893Sdim      Opcode == ARM::t2MOV_ga_pcrel) {
1333199481Srdivacky    if (MI1->getOpcode() != Opcode)
1334199481Srdivacky      return false;
1335199481Srdivacky    if (MI0->getNumOperands() != MI1->getNumOperands())
1336199481Srdivacky      return false;
1337199481Srdivacky
1338199481Srdivacky    const MachineOperand &MO0 = MI0->getOperand(1);
1339199481Srdivacky    const MachineOperand &MO1 = MI1->getOperand(1);
1340199481Srdivacky    if (MO0.getOffset() != MO1.getOffset())
1341199481Srdivacky      return false;
1342199481Srdivacky
1343218893Sdim    if (Opcode == ARM::MOV_ga_dyn ||
1344218893Sdim        Opcode == ARM::MOV_ga_pcrel ||
1345218893Sdim        Opcode == ARM::MOV_ga_pcrel_ldr ||
1346218893Sdim        Opcode == ARM::t2MOV_ga_dyn ||
1347218893Sdim        Opcode == ARM::t2MOV_ga_pcrel)
1348218893Sdim      // Ignore the PC labels.
1349218893Sdim      return MO0.getGlobal() == MO1.getGlobal();
1350218893Sdim
1351199481Srdivacky    const MachineFunction *MF = MI0->getParent()->getParent();
1352199481Srdivacky    const MachineConstantPool *MCP = MF->getConstantPool();
1353199481Srdivacky    int CPI0 = MO0.getIndex();
1354199481Srdivacky    int CPI1 = MO1.getIndex();
1355199481Srdivacky    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
1356199481Srdivacky    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
1357221345Sdim    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
1358221345Sdim    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
1359221345Sdim    if (isARMCP0 && isARMCP1) {
1360221345Sdim      ARMConstantPoolValue *ACPV0 =
1361221345Sdim        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
1362221345Sdim      ARMConstantPoolValue *ACPV1 =
1363221345Sdim        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
1364221345Sdim      return ACPV0->hasSameValue(ACPV1);
1365221345Sdim    } else if (!isARMCP0 && !isARMCP1) {
1366221345Sdim      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
1367221345Sdim    }
1368221345Sdim    return false;
1369218893Sdim  } else if (Opcode == ARM::PICLDR) {
1370218893Sdim    if (MI1->getOpcode() != Opcode)
1371218893Sdim      return false;
1372218893Sdim    if (MI0->getNumOperands() != MI1->getNumOperands())
1373218893Sdim      return false;
1374218893Sdim
1375218893Sdim    unsigned Addr0 = MI0->getOperand(1).getReg();
1376218893Sdim    unsigned Addr1 = MI1->getOperand(1).getReg();
1377218893Sdim    if (Addr0 != Addr1) {
1378218893Sdim      if (!MRI ||
1379218893Sdim          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
1380218893Sdim          !TargetRegisterInfo::isVirtualRegister(Addr1))
1381218893Sdim        return false;
1382218893Sdim
1383218893Sdim      // This assumes SSA form.
1384218893Sdim      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
1385218893Sdim      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
1386218893Sdim      // Check if the loaded value, e.g. a constantpool of a global address, are
1387218893Sdim      // the same.
1388218893Sdim      if (!produceSameValue(Def0, Def1, MRI))
1389218893Sdim        return false;
1390218893Sdim    }
1391218893Sdim
1392218893Sdim    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
1393218893Sdim      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
1394218893Sdim      const MachineOperand &MO0 = MI0->getOperand(i);
1395218893Sdim      const MachineOperand &MO1 = MI1->getOperand(i);
1396218893Sdim      if (!MO0.isIdenticalTo(MO1))
1397218893Sdim        return false;
1398218893Sdim    }
1399218893Sdim    return true;
1400199481Srdivacky  }
1401199481Srdivacky
1402204642Srdivacky  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
1403199481Srdivacky}
1404199481Srdivacky
1405210299Sed/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
1406210299Sed/// determine if two loads are loading from the same base address. It should
1407210299Sed/// only return true if the base pointers are the same and the only differences
1408210299Sed/// between the two addresses is the offset. It also returns the offsets by
1409210299Sed/// reference.
1410252723Sdim///
1411252723Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1412252723Sdim/// is permanently disabled.
1413210299Sedbool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
1414210299Sed                                               int64_t &Offset1,
1415210299Sed                                               int64_t &Offset2) const {
1416210299Sed  // Don't worry about Thumb: just ARM and Thumb2.
1417210299Sed  if (Subtarget.isThumb1Only()) return false;
1418210299Sed
1419210299Sed  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
1420210299Sed    return false;
1421210299Sed
1422210299Sed  switch (Load1->getMachineOpcode()) {
1423210299Sed  default:
1424210299Sed    return false;
1425218893Sdim  case ARM::LDRi12:
1426218893Sdim  case ARM::LDRBi12:
1427210299Sed  case ARM::LDRD:
1428210299Sed  case ARM::LDRH:
1429210299Sed  case ARM::LDRSB:
1430210299Sed  case ARM::LDRSH:
1431210299Sed  case ARM::VLDRD:
1432210299Sed  case ARM::VLDRS:
1433210299Sed  case ARM::t2LDRi8:
1434263509Sdim  case ARM::t2LDRBi8:
1435210299Sed  case ARM::t2LDRDi8:
1436210299Sed  case ARM::t2LDRSHi8:
1437210299Sed  case ARM::t2LDRi12:
1438263509Sdim  case ARM::t2LDRBi12:
1439210299Sed  case ARM::t2LDRSHi12:
1440210299Sed    break;
1441210299Sed  }
1442210299Sed
1443210299Sed  switch (Load2->getMachineOpcode()) {
1444210299Sed  default:
1445210299Sed    return false;
1446218893Sdim  case ARM::LDRi12:
1447218893Sdim  case ARM::LDRBi12:
1448210299Sed  case ARM::LDRD:
1449210299Sed  case ARM::LDRH:
1450210299Sed  case ARM::LDRSB:
1451210299Sed  case ARM::LDRSH:
1452210299Sed  case ARM::VLDRD:
1453210299Sed  case ARM::VLDRS:
1454210299Sed  case ARM::t2LDRi8:
1455263509Sdim  case ARM::t2LDRBi8:
1456210299Sed  case ARM::t2LDRSHi8:
1457210299Sed  case ARM::t2LDRi12:
1458263509Sdim  case ARM::t2LDRBi12:
1459210299Sed  case ARM::t2LDRSHi12:
1460210299Sed    break;
1461210299Sed  }
1462210299Sed
1463210299Sed  // Check if base addresses and chain operands match.
1464210299Sed  if (Load1->getOperand(0) != Load2->getOperand(0) ||
1465210299Sed      Load1->getOperand(4) != Load2->getOperand(4))
1466210299Sed    return false;
1467210299Sed
1468210299Sed  // Index should be Reg0.
1469210299Sed  if (Load1->getOperand(3) != Load2->getOperand(3))
1470210299Sed    return false;
1471210299Sed
1472210299Sed  // Determine the offsets.
1473210299Sed  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
1474210299Sed      isa<ConstantSDNode>(Load2->getOperand(1))) {
1475210299Sed    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
1476210299Sed    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
1477210299Sed    return true;
1478210299Sed  }
1479210299Sed
1480210299Sed  return false;
1481210299Sed}
1482210299Sed
1483210299Sed/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
1484221345Sdim/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
1485210299Sed/// be scheduled togther. On some targets if two loads are loading from
1486210299Sed/// addresses in the same cache line, it's better if they are scheduled
1487210299Sed/// together. This function takes two integers that represent the load offsets
1488210299Sed/// from the common base address. It returns true if it decides it's desirable
1489210299Sed/// to schedule the two loads together. "NumLoads" is the number of loads that
1490210299Sed/// have already been scheduled after Load1.
1491252723Sdim///
1492252723Sdim/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
1493252723Sdim/// is permanently disabled.
1494210299Sedbool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
1495210299Sed                                               int64_t Offset1, int64_t Offset2,
1496210299Sed                                               unsigned NumLoads) const {
1497210299Sed  // Don't worry about Thumb: just ARM and Thumb2.
1498210299Sed  if (Subtarget.isThumb1Only()) return false;
1499210299Sed
1500210299Sed  assert(Offset2 > Offset1);
1501210299Sed
1502210299Sed  if ((Offset2 - Offset1) / 8 > 64)
1503210299Sed    return false;
1504210299Sed
1505263509Sdim  // Check if the machine opcodes are different. If they are different
1506263509Sdim  // then we consider them to not be of the same base address,
1507263509Sdim  // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
1508263509Sdim  // In this case, they are considered to be the same because they are different
1509263509Sdim  // encoding forms of the same basic instruction.
1510263509Sdim  if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
1511263509Sdim      !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
1512263509Sdim         Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
1513263509Sdim        (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
1514263509Sdim         Load2->getMachineOpcode() == ARM::t2LDRBi8)))
1515210299Sed    return false;  // FIXME: overly conservative?
1516210299Sed
1517210299Sed  // Four loads in a row should be sufficient.
1518210299Sed  if (NumLoads >= 3)
1519210299Sed    return false;
1520210299Sed
1521210299Sed  return true;
1522210299Sed}
1523210299Sed
1524210299Sedbool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
1525210299Sed                                            const MachineBasicBlock *MBB,
1526210299Sed                                            const MachineFunction &MF) const {
1527210299Sed  // Debug info is never a scheduling boundary. It's necessary to be explicit
1528210299Sed  // due to the special treatment of IT instructions below, otherwise a
1529210299Sed  // dbg_value followed by an IT will result in the IT instruction being
1530210299Sed  // considered a scheduling hazard, which is wrong. It should be the actual
1531210299Sed  // instruction preceding the dbg_value instruction(s), just like it is
1532210299Sed  // when debug info is not present.
1533210299Sed  if (MI->isDebugValue())
1534210299Sed    return false;
1535210299Sed
1536210299Sed  // Terminators and labels can't be scheduled around.
1537235633Sdim  if (MI->isTerminator() || MI->isLabel())
1538210299Sed    return true;
1539210299Sed
1540210299Sed  // Treat the start of the IT block as a scheduling boundary, but schedule
1541210299Sed  // t2IT along with all instructions following it.
1542210299Sed  // FIXME: This is a big hammer. But the alternative is to add all potential
1543210299Sed  // true and anti dependencies to IT block instructions as implicit operands
1544210299Sed  // to the t2IT instruction. The added compile time and complexity does not
1545210299Sed  // seem worth it.
1546210299Sed  MachineBasicBlock::const_iterator I = MI;
1547210299Sed  // Make sure to skip any dbg_value instructions
1548210299Sed  while (++I != MBB->end() && I->isDebugValue())
1549210299Sed    ;
1550210299Sed  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
1551210299Sed    return true;
1552210299Sed
1553210299Sed  // Don't attempt to schedule around any instruction that defines
1554210299Sed  // a stack-oriented pointer, as it's unlikely to be profitable. This
1555210299Sed  // saves compile time, because it doesn't require every single
1556210299Sed  // stack slot reference to depend on the instruction that does the
1557210299Sed  // modification.
1558235633Sdim  // Calls don't actually change the stack pointer, even if they have imp-defs.
1559235633Sdim  // No ARM calling conventions change the stack pointer. (X86 calling
1560235633Sdim  // conventions sometimes do).
1561235633Sdim  if (!MI->isCall() && MI->definesRegister(ARM::SP))
1562210299Sed    return true;
1563210299Sed
1564210299Sed  return false;
1565210299Sed}
1566210299Sed
1567224145Sdimbool ARMBaseInstrInfo::
1568224145SdimisProfitableToIfCvt(MachineBasicBlock &MBB,
1569224145Sdim                    unsigned NumCycles, unsigned ExtraPredCycles,
1570224145Sdim                    const BranchProbability &Probability) const {
1571221345Sdim  if (!NumCycles)
1572210299Sed    return false;
1573218893Sdim
1574218893Sdim  // Attempt to estimate the relative costs of predication versus branching.
1575224145Sdim  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
1576224145Sdim  UnpredCost /= Probability.getDenominator();
1577224145Sdim  UnpredCost += 1; // The branch itself
1578224145Sdim  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1579218893Sdim
1580224145Sdim  return (NumCycles + ExtraPredCycles) <= UnpredCost;
1581210299Sed}
1582218893Sdim
1583210299Sedbool ARMBaseInstrInfo::
1584218893SdimisProfitableToIfCvt(MachineBasicBlock &TMBB,
1585218893Sdim                    unsigned TCycles, unsigned TExtra,
1586218893Sdim                    MachineBasicBlock &FMBB,
1587218893Sdim                    unsigned FCycles, unsigned FExtra,
1588224145Sdim                    const BranchProbability &Probability) const {
1589218893Sdim  if (!TCycles || !FCycles)
1590218893Sdim    return false;
1591218893Sdim
1592218893Sdim  // Attempt to estimate the relative costs of predication versus branching.
1593224145Sdim  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
1594224145Sdim  TUnpredCost /= Probability.getDenominator();
1595226890Sdim
1596224145Sdim  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
1597224145Sdim  unsigned FUnpredCost = Comp * FCycles;
1598224145Sdim  FUnpredCost /= Probability.getDenominator();
1599218893Sdim
1600224145Sdim  unsigned UnpredCost = TUnpredCost + FUnpredCost;
1601224145Sdim  UnpredCost += 1; // The branch itself
1602224145Sdim  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
1603224145Sdim
1604224145Sdim  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
1605210299Sed}
1606210299Sed
1607245431Sdimbool
1608245431SdimARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
1609245431Sdim                                            MachineBasicBlock &FMBB) const {
1610245431Sdim  // Reduce false anti-dependencies to let Swift's out-of-order execution
1611245431Sdim  // engine do its thing.
1612245431Sdim  return Subtarget.isSwift();
1613245431Sdim}
1614245431Sdim
1615198090Srdivacky/// getInstrPredicate - If instruction is predicated, returns its predicate
1616198090Srdivacky/// condition, otherwise returns AL. It also returns the condition code
1617198090Srdivacky/// register by reference.
1618198090SrdivackyARMCC::CondCodes
1619198090Srdivackyllvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
1620198090Srdivacky  int PIdx = MI->findFirstPredOperandIdx();
1621198090Srdivacky  if (PIdx == -1) {
1622198090Srdivacky    PredReg = 0;
1623198090Srdivacky    return ARMCC::AL;
1624198090Srdivacky  }
1625198090Srdivacky
1626198090Srdivacky  PredReg = MI->getOperand(PIdx+1).getReg();
1627198090Srdivacky  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
1628198090Srdivacky}
1629198090Srdivacky
1630198090Srdivacky
1631198090Srdivackyint llvm::getMatchingCondBranchOpcode(int Opc) {
1632198090Srdivacky  if (Opc == ARM::B)
1633198090Srdivacky    return ARM::Bcc;
1634235633Sdim  if (Opc == ARM::tB)
1635198090Srdivacky    return ARM::tBcc;
1636235633Sdim  if (Opc == ARM::t2B)
1637235633Sdim    return ARM::t2Bcc;
1638198090Srdivacky
1639198090Srdivacky  llvm_unreachable("Unknown unconditional branch opcode!");
1640198090Srdivacky}
1641198090Srdivacky
1642235633Sdim/// commuteInstruction - Handle commutable instructions.
1643235633SdimMachineInstr *
1644235633SdimARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
1645235633Sdim  switch (MI->getOpcode()) {
1646235633Sdim  case ARM::MOVCCr:
1647235633Sdim  case ARM::t2MOVCCr: {
1648235633Sdim    // MOVCC can be commuted by inverting the condition.
1649235633Sdim    unsigned PredReg = 0;
1650235633Sdim    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
1651235633Sdim    // MOVCC AL can't be inverted. Shouldn't happen.
1652235633Sdim    if (CC == ARMCC::AL || PredReg != ARM::CPSR)
1653235633Sdim      return NULL;
1654252723Sdim    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
1655235633Sdim    if (!MI)
1656235633Sdim      return NULL;
1657235633Sdim    // After swapping the MOVCC operands, also invert the condition.
1658235633Sdim    MI->getOperand(MI->findFirstPredOperandIdx())
1659235633Sdim      .setImm(ARMCC::getOppositeCondition(CC));
1660235633Sdim    return MI;
1661235633Sdim  }
1662235633Sdim  }
1663252723Sdim  return TargetInstrInfo::commuteInstruction(MI, NewMI);
1664235633Sdim}
1665198090Srdivacky
1666245431Sdim/// Identify instructions that can be folded into a MOVCC instruction, and
1667245431Sdim/// return the defining instruction.
1668245431Sdimstatic MachineInstr *canFoldIntoMOVCC(unsigned Reg,
1669245431Sdim                                      const MachineRegisterInfo &MRI,
1670245431Sdim                                      const TargetInstrInfo *TII) {
1671245431Sdim  if (!TargetRegisterInfo::isVirtualRegister(Reg))
1672245431Sdim    return 0;
1673245431Sdim  if (!MRI.hasOneNonDBGUse(Reg))
1674245431Sdim    return 0;
1675245431Sdim  MachineInstr *MI = MRI.getVRegDef(Reg);
1676245431Sdim  if (!MI)
1677245431Sdim    return 0;
1678245431Sdim  // MI is folded into the MOVCC by predicating it.
1679245431Sdim  if (!MI->isPredicable())
1680245431Sdim    return 0;
1681245431Sdim  // Check if MI has any non-dead defs or physreg uses. This also detects
1682245431Sdim  // predicated instructions which will be reading CPSR.
1683245431Sdim  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
1684245431Sdim    const MachineOperand &MO = MI->getOperand(i);
1685245431Sdim    // Reject frame index operands, PEI can't handle the predicated pseudos.
1686245431Sdim    if (MO.isFI() || MO.isCPI() || MO.isJTI())
1687245431Sdim      return 0;
1688245431Sdim    if (!MO.isReg())
1689245431Sdim      continue;
1690245431Sdim    // MI can't have any tied operands, that would conflict with predication.
1691245431Sdim    if (MO.isTied())
1692245431Sdim      return 0;
1693245431Sdim    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1694245431Sdim      return 0;
1695245431Sdim    if (MO.isDef() && !MO.isDead())
1696245431Sdim      return 0;
1697245431Sdim  }
1698245431Sdim  bool DontMoveAcrossStores = true;
1699245431Sdim  if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
1700245431Sdim    return 0;
1701245431Sdim  return MI;
1702245431Sdim}
1703245431Sdim
1704245431Sdimbool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
1705245431Sdim                                     SmallVectorImpl<MachineOperand> &Cond,
1706245431Sdim                                     unsigned &TrueOp, unsigned &FalseOp,
1707245431Sdim                                     bool &Optimizable) const {
1708245431Sdim  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1709245431Sdim         "Unknown select instruction");
1710245431Sdim  // MOVCC operands:
1711245431Sdim  // 0: Def.
1712245431Sdim  // 1: True use.
1713245431Sdim  // 2: False use.
1714245431Sdim  // 3: Condition code.
1715245431Sdim  // 4: CPSR use.
1716245431Sdim  TrueOp = 1;
1717245431Sdim  FalseOp = 2;
1718245431Sdim  Cond.push_back(MI->getOperand(3));
1719245431Sdim  Cond.push_back(MI->getOperand(4));
1720245431Sdim  // We can always fold a def.
1721245431Sdim  Optimizable = true;
1722245431Sdim  return false;
1723245431Sdim}
1724245431Sdim
1725245431SdimMachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
1726245431Sdim                                               bool PreferFalse) const {
1727245431Sdim  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
1728245431Sdim         "Unknown select instruction");
1729263509Sdim  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
1730245431Sdim  MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
1731245431Sdim  bool Invert = !DefMI;
1732245431Sdim  if (!DefMI)
1733245431Sdim    DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
1734245431Sdim  if (!DefMI)
1735245431Sdim    return 0;
1736245431Sdim
1737263509Sdim  // Find new register class to use.
1738263509Sdim  MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
1739263509Sdim  unsigned       DestReg  = MI->getOperand(0).getReg();
1740263509Sdim  const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
1741263509Sdim  if (!MRI.constrainRegClass(DestReg, PreviousClass))
1742263509Sdim    return 0;
1743263509Sdim
1744245431Sdim  // Create a new predicated version of DefMI.
1745245431Sdim  // Rfalse is the first use.
1746245431Sdim  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1747263509Sdim                                      DefMI->getDesc(), DestReg);
1748245431Sdim
1749245431Sdim  // Copy all the DefMI operands, excluding its (null) predicate.
1750245431Sdim  const MCInstrDesc &DefDesc = DefMI->getDesc();
1751245431Sdim  for (unsigned i = 1, e = DefDesc.getNumOperands();
1752245431Sdim       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
1753245431Sdim    NewMI.addOperand(DefMI->getOperand(i));
1754245431Sdim
1755245431Sdim  unsigned CondCode = MI->getOperand(3).getImm();
1756245431Sdim  if (Invert)
1757245431Sdim    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
1758245431Sdim  else
1759245431Sdim    NewMI.addImm(CondCode);
1760245431Sdim  NewMI.addOperand(MI->getOperand(4));
1761245431Sdim
1762245431Sdim  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
1763245431Sdim  if (NewMI->hasOptionalDef())
1764245431Sdim    AddDefaultCC(NewMI);
1765245431Sdim
1766245431Sdim  // The output register value when the predicate is false is an implicit
1767245431Sdim  // register operand tied to the first def.
1768245431Sdim  // The tie makes the register allocator ensure the FalseReg is allocated the
1769245431Sdim  // same register as operand 0.
1770245431Sdim  FalseReg.setImplicit();
1771252723Sdim  NewMI.addOperand(FalseReg);
1772245431Sdim  NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
1773245431Sdim
1774245431Sdim  // The caller will erase MI, but not DefMI.
1775245431Sdim  DefMI->eraseFromParent();
1776245431Sdim  return NewMI;
1777245431Sdim}
1778245431Sdim
1779226890Sdim/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
1780226890Sdim/// instruction is encoded with an 'S' bit is determined by the optional CPSR
1781226890Sdim/// def operand.
1782226890Sdim///
1783226890Sdim/// This will go away once we can teach tblgen how to set the optional CPSR def
1784226890Sdim/// operand itself.
1785226890Sdimstruct AddSubFlagsOpcodePair {
1786245431Sdim  uint16_t PseudoOpc;
1787245431Sdim  uint16_t MachineOpc;
1788226890Sdim};
1789226890Sdim
1790245431Sdimstatic const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
1791226890Sdim  {ARM::ADDSri, ARM::ADDri},
1792226890Sdim  {ARM::ADDSrr, ARM::ADDrr},
1793226890Sdim  {ARM::ADDSrsi, ARM::ADDrsi},
1794226890Sdim  {ARM::ADDSrsr, ARM::ADDrsr},
1795226890Sdim
1796226890Sdim  {ARM::SUBSri, ARM::SUBri},
1797226890Sdim  {ARM::SUBSrr, ARM::SUBrr},
1798226890Sdim  {ARM::SUBSrsi, ARM::SUBrsi},
1799226890Sdim  {ARM::SUBSrsr, ARM::SUBrsr},
1800226890Sdim
1801226890Sdim  {ARM::RSBSri, ARM::RSBri},
1802226890Sdim  {ARM::RSBSrsi, ARM::RSBrsi},
1803226890Sdim  {ARM::RSBSrsr, ARM::RSBrsr},
1804226890Sdim
1805226890Sdim  {ARM::t2ADDSri, ARM::t2ADDri},
1806226890Sdim  {ARM::t2ADDSrr, ARM::t2ADDrr},
1807226890Sdim  {ARM::t2ADDSrs, ARM::t2ADDrs},
1808226890Sdim
1809226890Sdim  {ARM::t2SUBSri, ARM::t2SUBri},
1810226890Sdim  {ARM::t2SUBSrr, ARM::t2SUBrr},
1811226890Sdim  {ARM::t2SUBSrs, ARM::t2SUBrs},
1812226890Sdim
1813226890Sdim  {ARM::t2RSBSri, ARM::t2RSBri},
1814226890Sdim  {ARM::t2RSBSrs, ARM::t2RSBrs},
1815226890Sdim};
1816226890Sdim
1817226890Sdimunsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
1818245431Sdim  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
1819245431Sdim    if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
1820245431Sdim      return AddSubFlagsOpcodeMap[i].MachineOpc;
1821226890Sdim  return 0;
1822226890Sdim}
1823226890Sdim
1824198090Srdivackyvoid llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
1825198090Srdivacky                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
1826198090Srdivacky                               unsigned DestReg, unsigned BaseReg, int NumBytes,
1827198090Srdivacky                               ARMCC::CondCodes Pred, unsigned PredReg,
1828221345Sdim                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
1829263509Sdim  if (NumBytes == 0 && DestReg != BaseReg) {
1830263509Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
1831263509Sdim      .addReg(BaseReg, RegState::Kill)
1832263509Sdim      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1833263509Sdim      .setMIFlags(MIFlags);
1834263509Sdim    return;
1835263509Sdim  }
1836263509Sdim
1837198090Srdivacky  bool isSub = NumBytes < 0;
1838198090Srdivacky  if (isSub) NumBytes = -NumBytes;
1839198090Srdivacky
1840198090Srdivacky  while (NumBytes) {
1841198090Srdivacky    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
1842198090Srdivacky    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
1843198090Srdivacky    assert(ThisVal && "Didn't extract field correctly");
1844198090Srdivacky
1845198090Srdivacky    // We will handle these bits from offset, clear them.
1846198090Srdivacky    NumBytes &= ~ThisVal;
1847198090Srdivacky
1848198090Srdivacky    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
1849198090Srdivacky
1850198090Srdivacky    // Build the new ADD / SUB.
1851198090Srdivacky    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
1852198090Srdivacky    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
1853198090Srdivacky      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
1854221345Sdim      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
1855221345Sdim      .setMIFlags(MIFlags);
1856198090Srdivacky    BaseReg = DestReg;
1857198090Srdivacky  }
1858198090Srdivacky}
1859198090Srdivacky
1860263509Sdimbool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
1861263509Sdim                                      MachineInstr *MI,
1862263509Sdim                                      unsigned NumBytes) {
1863263509Sdim  // This optimisation potentially adds lots of load and store
1864263509Sdim  // micro-operations, it's only really a great benefit to code-size.
1865263509Sdim  if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
1866263509Sdim    return false;
1867263509Sdim
1868263509Sdim  // If only one register is pushed/popped, LLVM can use an LDR/STR
1869263509Sdim  // instead. We can't modify those so make sure we're dealing with an
1870263509Sdim  // instruction we understand.
1871263509Sdim  bool IsPop = isPopOpcode(MI->getOpcode());
1872263509Sdim  bool IsPush = isPushOpcode(MI->getOpcode());
1873263509Sdim  if (!IsPush && !IsPop)
1874263509Sdim    return false;
1875263509Sdim
1876263509Sdim  bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
1877263509Sdim                      MI->getOpcode() == ARM::VLDMDIA_UPD;
1878263509Sdim  bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
1879263509Sdim                     MI->getOpcode() == ARM::tPOP ||
1880263509Sdim                     MI->getOpcode() == ARM::tPOP_RET;
1881263509Sdim
1882263509Sdim  assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
1883263509Sdim                          MI->getOperand(1).getReg() == ARM::SP)) &&
1884263509Sdim         "trying to fold sp update into non-sp-updating push/pop");
1885263509Sdim
1886263509Sdim  // The VFP push & pop act on D-registers, so we can only fold an adjustment
1887263509Sdim  // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
1888263509Sdim  // if this is violated.
1889263509Sdim  if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
1890263509Sdim    return false;
1891263509Sdim
1892263509Sdim  // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
1893263509Sdim  // pred) so the list starts at 4. Thumb1 starts after the predicate.
1894263509Sdim  int RegListIdx = IsT1PushPop ? 2 : 4;
1895263509Sdim
1896263509Sdim  // Calculate the space we'll need in terms of registers.
1897263509Sdim  unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
1898263509Sdim  unsigned RD0Reg, RegsNeeded;
1899263509Sdim  if (IsVFPPushPop) {
1900263509Sdim    RD0Reg = ARM::D0;
1901263509Sdim    RegsNeeded = NumBytes / 8;
1902263509Sdim  } else {
1903263509Sdim    RD0Reg = ARM::R0;
1904263509Sdim    RegsNeeded = NumBytes / 4;
1905263509Sdim  }
1906263509Sdim
1907263509Sdim  // We're going to have to strip all list operands off before
1908263509Sdim  // re-adding them since the order matters, so save the existing ones
1909263509Sdim  // for later.
1910263509Sdim  SmallVector<MachineOperand, 4> RegList;
1911263509Sdim  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
1912263509Sdim    RegList.push_back(MI->getOperand(i));
1913263509Sdim
1914263509Sdim  MachineBasicBlock *MBB = MI->getParent();
1915263509Sdim  const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
1916263509Sdim  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
1917263509Sdim
1918263509Sdim  // Now try to find enough space in the reglist to allocate NumBytes.
1919263509Sdim  for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
1920263509Sdim       --CurReg) {
1921263509Sdim    if (!IsPop) {
1922263509Sdim      // Pushing any register is completely harmless, mark the
1923263509Sdim      // register involved as undef since we don't care about it in
1924263509Sdim      // the slightest.
1925263509Sdim      RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
1926263509Sdim                                                  false, false, true));
1927263509Sdim      --RegsNeeded;
1928263509Sdim      continue;
1929263509Sdim    }
1930263509Sdim
1931263509Sdim    // However, we can only pop an extra register if it's not live. For
1932263509Sdim    // registers live within the function we might clobber a return value
1933263509Sdim    // register; the other way a register can be live here is if it's
1934263509Sdim    // callee-saved.
1935263509Sdim    if (isCalleeSavedRegister(CurReg, CSRegs) ||
1936263509Sdim        MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
1937263509Sdim            MachineBasicBlock::LQR_Dead) {
1938263509Sdim      // VFP pops don't allow holes in the register list, so any skip is fatal
1939263509Sdim      // for our transformation. GPR pops do, so we should just keep looking.
1940263509Sdim      if (IsVFPPushPop)
1941263509Sdim        return false;
1942263509Sdim      else
1943263509Sdim        continue;
1944263509Sdim    }
1945263509Sdim
1946263509Sdim    // Mark the unimportant registers as <def,dead> in the POP.
1947263509Sdim    RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
1948263509Sdim                                                true));
1949263509Sdim    --RegsNeeded;
1950263509Sdim  }
1951263509Sdim
1952263509Sdim  if (RegsNeeded > 0)
1953263509Sdim    return false;
1954263509Sdim
1955263509Sdim  // Finally we know we can profitably perform the optimisation so go
1956263509Sdim  // ahead: strip all existing registers off and add them back again
1957263509Sdim  // in the right order.
1958263509Sdim  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
1959263509Sdim    MI->RemoveOperand(i);
1960263509Sdim
1961263509Sdim  // Add the complete list back in.
1962263509Sdim  MachineInstrBuilder MIB(MF, &*MI);
1963263509Sdim  for (int i = RegList.size() - 1; i >= 0; --i)
1964263509Sdim    MIB.addOperand(RegList[i]);
1965263509Sdim
1966263509Sdim  return true;
1967263509Sdim}
1968263509Sdim
1969198090Srdivackybool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
1970198090Srdivacky                                unsigned FrameReg, int &Offset,
1971198090Srdivacky                                const ARMBaseInstrInfo &TII) {
1972198090Srdivacky  unsigned Opcode = MI.getOpcode();
1973224145Sdim  const MCInstrDesc &Desc = MI.getDesc();
1974198090Srdivacky  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
1975198090Srdivacky  bool isSub = false;
1976198090Srdivacky
1977198090Srdivacky  // Memory operands in inline assembly always use AddrMode2.
1978198090Srdivacky  if (Opcode == ARM::INLINEASM)
1979198090Srdivacky    AddrMode = ARMII::AddrMode2;
1980198090Srdivacky
1981198090Srdivacky  if (Opcode == ARM::ADDri) {
1982198090Srdivacky    Offset += MI.getOperand(FrameRegIdx+1).getImm();
1983198090Srdivacky    if (Offset == 0) {
1984198090Srdivacky      // Turn it into a move.
1985198090Srdivacky      MI.setDesc(TII.get(ARM::MOVr));
1986198090Srdivacky      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
1987198090Srdivacky      MI.RemoveOperand(FrameRegIdx+1);
1988198090Srdivacky      Offset = 0;
1989198090Srdivacky      return true;
1990198090Srdivacky    } else if (Offset < 0) {
1991198090Srdivacky      Offset = -Offset;
1992198090Srdivacky      isSub = true;
1993198090Srdivacky      MI.setDesc(TII.get(ARM::SUBri));
1994198090Srdivacky    }
1995198090Srdivacky
1996198090Srdivacky    // Common case: small offset, fits into instruction.
1997198090Srdivacky    if (ARM_AM::getSOImmVal(Offset) != -1) {
1998198090Srdivacky      // Replace the FrameIndex with sp / fp
1999198090Srdivacky      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2000198090Srdivacky      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
2001198090Srdivacky      Offset = 0;
2002198090Srdivacky      return true;
2003198090Srdivacky    }
2004198090Srdivacky
2005198090Srdivacky    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
2006198090Srdivacky    // as possible.
2007198090Srdivacky    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
2008198090Srdivacky    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
2009198090Srdivacky
2010198090Srdivacky    // We will handle these bits from offset, clear them.
2011198090Srdivacky    Offset &= ~ThisImmVal;
2012198090Srdivacky
2013198090Srdivacky    // Get the properly encoded SOImmVal field.
2014198090Srdivacky    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
2015198090Srdivacky           "Bit extraction didn't work?");
2016198090Srdivacky    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
2017198090Srdivacky } else {
2018198090Srdivacky    unsigned ImmIdx = 0;
2019198090Srdivacky    int InstrOffs = 0;
2020198090Srdivacky    unsigned NumBits = 0;
2021198090Srdivacky    unsigned Scale = 1;
2022198090Srdivacky    switch (AddrMode) {
2023218893Sdim    case ARMII::AddrMode_i12: {
2024218893Sdim      ImmIdx = FrameRegIdx + 1;
2025218893Sdim      InstrOffs = MI.getOperand(ImmIdx).getImm();
2026218893Sdim      NumBits = 12;
2027218893Sdim      break;
2028218893Sdim    }
2029198090Srdivacky    case ARMII::AddrMode2: {
2030198090Srdivacky      ImmIdx = FrameRegIdx+2;
2031198090Srdivacky      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
2032198090Srdivacky      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2033198090Srdivacky        InstrOffs *= -1;
2034198090Srdivacky      NumBits = 12;
2035198090Srdivacky      break;
2036198090Srdivacky    }
2037198090Srdivacky    case ARMII::AddrMode3: {
2038198090Srdivacky      ImmIdx = FrameRegIdx+2;
2039198090Srdivacky      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
2040198090Srdivacky      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2041198090Srdivacky        InstrOffs *= -1;
2042198090Srdivacky      NumBits = 8;
2043198090Srdivacky      break;
2044198090Srdivacky    }
2045198090Srdivacky    case ARMII::AddrMode4:
2046199481Srdivacky    case ARMII::AddrMode6:
2047198090Srdivacky      // Can't fold any offset even if it's zero.
2048198090Srdivacky      return false;
2049198090Srdivacky    case ARMII::AddrMode5: {
2050198090Srdivacky      ImmIdx = FrameRegIdx+1;
2051198090Srdivacky      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
2052198090Srdivacky      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
2053198090Srdivacky        InstrOffs *= -1;
2054198090Srdivacky      NumBits = 8;
2055198090Srdivacky      Scale = 4;
2056198090Srdivacky      break;
2057198090Srdivacky    }
2058198090Srdivacky    default:
2059198090Srdivacky      llvm_unreachable("Unsupported addressing mode!");
2060198090Srdivacky    }
2061198090Srdivacky
2062198090Srdivacky    Offset += InstrOffs * Scale;
2063198090Srdivacky    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
2064198090Srdivacky    if (Offset < 0) {
2065198090Srdivacky      Offset = -Offset;
2066198090Srdivacky      isSub = true;
2067198090Srdivacky    }
2068198090Srdivacky
2069198090Srdivacky    // Attempt to fold address comp. if opcode has offset bits
2070198090Srdivacky    if (NumBits > 0) {
2071198090Srdivacky      // Common case: small offset, fits into instruction.
2072198090Srdivacky      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
2073198090Srdivacky      int ImmedOffset = Offset / Scale;
2074198090Srdivacky      unsigned Mask = (1 << NumBits) - 1;
2075198090Srdivacky      if ((unsigned)Offset <= Mask * Scale) {
2076198090Srdivacky        // Replace the FrameIndex with sp
2077198090Srdivacky        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
2078218893Sdim        // FIXME: When addrmode2 goes away, this will simplify (like the
2079218893Sdim        // T2 version), as the LDR.i12 versions don't need the encoding
2080218893Sdim        // tricks for the offset value.
2081218893Sdim        if (isSub) {
2082218893Sdim          if (AddrMode == ARMII::AddrMode_i12)
2083218893Sdim            ImmedOffset = -ImmedOffset;
2084218893Sdim          else
2085218893Sdim            ImmedOffset |= 1 << NumBits;
2086218893Sdim        }
2087198090Srdivacky        ImmOp.ChangeToImmediate(ImmedOffset);
2088198090Srdivacky        Offset = 0;
2089198090Srdivacky        return true;
2090198090Srdivacky      }
2091198090Srdivacky
2092198090Srdivacky      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
2093198090Srdivacky      ImmedOffset = ImmedOffset & Mask;
2094218893Sdim      if (isSub) {
2095218893Sdim        if (AddrMode == ARMII::AddrMode_i12)
2096218893Sdim          ImmedOffset = -ImmedOffset;
2097218893Sdim        else
2098218893Sdim          ImmedOffset |= 1 << NumBits;
2099218893Sdim      }
2100198090Srdivacky      ImmOp.ChangeToImmediate(ImmedOffset);
2101198090Srdivacky      Offset &= ~(Mask*Scale);
2102198090Srdivacky    }
2103198090Srdivacky  }
2104198090Srdivacky
2105198090Srdivacky  Offset = (isSub) ? -Offset : Offset;
2106198090Srdivacky  return Offset == 0;
2107198090Srdivacky}
2108212904Sdim
2109245431Sdim/// analyzeCompare - For a comparison instruction, return the source registers
2110245431Sdim/// in SrcReg and SrcReg2 if having two register operands, and the value it
2111245431Sdim/// compares against in CmpValue. Return true if the comparison instruction
2112245431Sdim/// can be analyzed.
2113212904Sdimbool ARMBaseInstrInfo::
2114245431SdimanalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
2115245431Sdim               int &CmpMask, int &CmpValue) const {
2116212904Sdim  switch (MI->getOpcode()) {
2117212904Sdim  default: break;
2118212904Sdim  case ARM::CMPri:
2119212904Sdim  case ARM::t2CMPri:
2120212904Sdim    SrcReg = MI->getOperand(0).getReg();
2121245431Sdim    SrcReg2 = 0;
2122218893Sdim    CmpMask = ~0;
2123212904Sdim    CmpValue = MI->getOperand(1).getImm();
2124212904Sdim    return true;
2125245431Sdim  case ARM::CMPrr:
2126245431Sdim  case ARM::t2CMPrr:
2127245431Sdim    SrcReg = MI->getOperand(0).getReg();
2128245431Sdim    SrcReg2 = MI->getOperand(1).getReg();
2129245431Sdim    CmpMask = ~0;
2130245431Sdim    CmpValue = 0;
2131245431Sdim    return true;
2132218893Sdim  case ARM::TSTri:
2133218893Sdim  case ARM::t2TSTri:
2134218893Sdim    SrcReg = MI->getOperand(0).getReg();
2135245431Sdim    SrcReg2 = 0;
2136218893Sdim    CmpMask = MI->getOperand(1).getImm();
2137218893Sdim    CmpValue = 0;
2138218893Sdim    return true;
2139212904Sdim  }
2140212904Sdim
2141212904Sdim  return false;
2142212904Sdim}
2143212904Sdim
2144218893Sdim/// isSuitableForMask - Identify a suitable 'and' instruction that
2145218893Sdim/// operates on the given source register and applies the same mask
2146218893Sdim/// as a 'tst' instruction. Provide a limited look-through for copies.
2147218893Sdim/// When successful, MI will hold the found instruction.
2148218893Sdimstatic bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
2149218893Sdim                              int CmpMask, bool CommonUse) {
2150218893Sdim  switch (MI->getOpcode()) {
2151218893Sdim    case ARM::ANDri:
2152218893Sdim    case ARM::t2ANDri:
2153218893Sdim      if (CmpMask != MI->getOperand(2).getImm())
2154218893Sdim        return false;
2155218893Sdim      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
2156218893Sdim        return true;
2157218893Sdim      break;
2158218893Sdim    case ARM::COPY: {
2159218893Sdim      // Walk down one instruction which is potentially an 'and'.
2160218893Sdim      const MachineInstr &Copy = *MI;
2161218893Sdim      MachineBasicBlock::iterator AND(
2162218893Sdim        llvm::next(MachineBasicBlock::iterator(MI)));
2163218893Sdim      if (AND == MI->getParent()->end()) return false;
2164218893Sdim      MI = AND;
2165218893Sdim      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
2166218893Sdim                               CmpMask, true);
2167218893Sdim    }
2168218893Sdim  }
2169218893Sdim
2170218893Sdim  return false;
2171218893Sdim}
2172218893Sdim
2173245431Sdim/// getSwappedCondition - assume the flags are set by MI(a,b), return
2174245431Sdim/// the condition code if we modify the instructions such that flags are
2175245431Sdim/// set by MI(b,a).
2176245431Sdiminline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
2177245431Sdim  switch (CC) {
2178245431Sdim  default: return ARMCC::AL;
2179245431Sdim  case ARMCC::EQ: return ARMCC::EQ;
2180245431Sdim  case ARMCC::NE: return ARMCC::NE;
2181245431Sdim  case ARMCC::HS: return ARMCC::LS;
2182245431Sdim  case ARMCC::LO: return ARMCC::HI;
2183245431Sdim  case ARMCC::HI: return ARMCC::LO;
2184245431Sdim  case ARMCC::LS: return ARMCC::HS;
2185245431Sdim  case ARMCC::GE: return ARMCC::LE;
2186245431Sdim  case ARMCC::LT: return ARMCC::GT;
2187245431Sdim  case ARMCC::GT: return ARMCC::LT;
2188245431Sdim  case ARMCC::LE: return ARMCC::GE;
2189245431Sdim  }
2190245431Sdim}
2191218893Sdim
2192245431Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only
2193245431Sdim/// purpose is to update flags, can be made redundant.
2194245431Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same.
2195245431Sdim/// CMPri can be made redundant by SUBri if the operands are the same.
2196245431Sdim/// This function can be extended later on.
2197245431Sdiminline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
2198245431Sdim                                        unsigned SrcReg2, int ImmValue,
2199245431Sdim                                        MachineInstr *OI) {
2200245431Sdim  if ((CmpI->getOpcode() == ARM::CMPrr ||
2201245431Sdim       CmpI->getOpcode() == ARM::t2CMPrr) &&
2202245431Sdim      (OI->getOpcode() == ARM::SUBrr ||
2203245431Sdim       OI->getOpcode() == ARM::t2SUBrr) &&
2204245431Sdim      ((OI->getOperand(1).getReg() == SrcReg &&
2205245431Sdim        OI->getOperand(2).getReg() == SrcReg2) ||
2206245431Sdim       (OI->getOperand(1).getReg() == SrcReg2 &&
2207245431Sdim        OI->getOperand(2).getReg() == SrcReg)))
2208245431Sdim    return true;
2209218893Sdim
2210245431Sdim  if ((CmpI->getOpcode() == ARM::CMPri ||
2211245431Sdim       CmpI->getOpcode() == ARM::t2CMPri) &&
2212245431Sdim      (OI->getOpcode() == ARM::SUBri ||
2213245431Sdim       OI->getOpcode() == ARM::t2SUBri) &&
2214245431Sdim      OI->getOperand(1).getReg() == SrcReg &&
2215245431Sdim      OI->getOperand(2).getImm() == ImmValue)
2216245431Sdim    return true;
2217245431Sdim  return false;
2218245431Sdim}
2219218893Sdim
2220245431Sdim/// optimizeCompareInstr - Convert the instruction supplying the argument to the
2221245431Sdim/// comparison into one that sets the zero bit in the flags register;
2222245431Sdim/// Remove a redundant Compare instruction if an earlier instruction can set the
2223245431Sdim/// flags in the same way as Compare.
2224245431Sdim/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
2225245431Sdim/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
2226245431Sdim/// condition code of instructions which use the flags.
2227245431Sdimbool ARMBaseInstrInfo::
2228245431SdimoptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
2229245431Sdim                     int CmpMask, int CmpValue,
2230245431Sdim                     const MachineRegisterInfo *MRI) const {
2231245431Sdim  // Get the unique definition of SrcReg.
2232245431Sdim  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
2233245431Sdim  if (!MI) return false;
2234245431Sdim
2235218893Sdim  // Masked compares sometimes use the same register as the corresponding 'and'.
2236218893Sdim  if (CmpMask != ~0) {
2237245431Sdim    if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
2238218893Sdim      MI = 0;
2239218893Sdim      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
2240218893Sdim           UE = MRI->use_end(); UI != UE; ++UI) {
2241218893Sdim        if (UI->getParent() != CmpInstr->getParent()) continue;
2242218893Sdim        MachineInstr *PotentialAND = &*UI;
2243245431Sdim        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
2244245431Sdim            isPredicated(PotentialAND))
2245218893Sdim          continue;
2246218893Sdim        MI = PotentialAND;
2247218893Sdim        break;
2248218893Sdim      }
2249218893Sdim      if (!MI) return false;
2250218893Sdim    }
2251218893Sdim  }
2252218893Sdim
2253245431Sdim  // Get ready to iterate backward from CmpInstr.
2254245431Sdim  MachineBasicBlock::iterator I = CmpInstr, E = MI,
2255245431Sdim                              B = CmpInstr->getParent()->begin();
2256212904Sdim
2257218893Sdim  // Early exit if CmpInstr is at the beginning of the BB.
2258218893Sdim  if (I == B) return false;
2259218893Sdim
2260245431Sdim  // There are two possible candidates which can be changed to set CPSR:
2261245431Sdim  // One is MI, the other is a SUB instruction.
2262245431Sdim  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2263245431Sdim  // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
2264245431Sdim  MachineInstr *Sub = NULL;
2265245431Sdim  if (SrcReg2 != 0)
2266245431Sdim    // MI is not a candidate for CMPrr.
2267245431Sdim    MI = NULL;
2268245431Sdim  else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
2269245431Sdim    // Conservatively refuse to convert an instruction which isn't in the same
2270245431Sdim    // BB as the comparison.
2271245431Sdim    // For CMPri, we need to check Sub, thus we can't return here.
2272245431Sdim    if (CmpInstr->getOpcode() == ARM::CMPri ||
2273245431Sdim       CmpInstr->getOpcode() == ARM::t2CMPri)
2274245431Sdim      MI = NULL;
2275245431Sdim    else
2276245431Sdim      return false;
2277245431Sdim  }
2278245431Sdim
2279245431Sdim  // Check that CPSR isn't set between the comparison instruction and the one we
2280245431Sdim  // want to change. At the same time, search for Sub.
2281245431Sdim  const TargetRegisterInfo *TRI = &getRegisterInfo();
2282212904Sdim  --I;
2283212904Sdim  for (; I != E; --I) {
2284212904Sdim    const MachineInstr &Instr = *I;
2285212904Sdim
2286245431Sdim    if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
2287245431Sdim        Instr.readsRegister(ARM::CPSR, TRI))
2288218893Sdim      // This instruction modifies or uses CPSR after the one we want to
2289218893Sdim      // change. We can't do this transformation.
2290245431Sdim      return false;
2291245431Sdim
2292245431Sdim    // Check whether CmpInstr can be made redundant by the current instruction.
2293245431Sdim    if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
2294245431Sdim      Sub = &*I;
2295245431Sdim      break;
2296212904Sdim    }
2297213534Sdim
2298213534Sdim    if (I == B)
2299213534Sdim      // The 'and' is below the comparison instruction.
2300213534Sdim      return false;
2301212904Sdim  }
2302212904Sdim
2303245431Sdim  // Return false if no candidates exist.
2304245431Sdim  if (!MI && !Sub)
2305245431Sdim    return false;
2306245431Sdim
2307245431Sdim  // The single candidate is called MI.
2308245431Sdim  if (!MI) MI = Sub;
2309245431Sdim
2310245431Sdim  // We can't use a predicated instruction - it doesn't always write the flags.
2311245431Sdim  if (isPredicated(MI))
2312245431Sdim    return false;
2313245431Sdim
2314212904Sdim  switch (MI->getOpcode()) {
2315212904Sdim  default: break;
2316221345Sdim  case ARM::RSBrr:
2317221345Sdim  case ARM::RSBri:
2318221345Sdim  case ARM::RSCrr:
2319221345Sdim  case ARM::RSCri:
2320221345Sdim  case ARM::ADDrr:
2321212904Sdim  case ARM::ADDri:
2322221345Sdim  case ARM::ADCrr:
2323221345Sdim  case ARM::ADCri:
2324221345Sdim  case ARM::SUBrr:
2325212904Sdim  case ARM::SUBri:
2326221345Sdim  case ARM::SBCrr:
2327221345Sdim  case ARM::SBCri:
2328221345Sdim  case ARM::t2RSBri:
2329221345Sdim  case ARM::t2ADDrr:
2330212904Sdim  case ARM::t2ADDri:
2331221345Sdim  case ARM::t2ADCrr:
2332221345Sdim  case ARM::t2ADCri:
2333221345Sdim  case ARM::t2SUBrr:
2334212904Sdim  case ARM::t2SUBri:
2335221345Sdim  case ARM::t2SBCrr:
2336221345Sdim  case ARM::t2SBCri:
2337221345Sdim  case ARM::ANDrr:
2338221345Sdim  case ARM::ANDri:
2339221345Sdim  case ARM::t2ANDrr:
2340221345Sdim  case ARM::t2ANDri:
2341221345Sdim  case ARM::ORRrr:
2342221345Sdim  case ARM::ORRri:
2343221345Sdim  case ARM::t2ORRrr:
2344221345Sdim  case ARM::t2ORRri:
2345221345Sdim  case ARM::EORrr:
2346221345Sdim  case ARM::EORri:
2347221345Sdim  case ARM::t2EORrr:
2348221345Sdim  case ARM::t2EORri: {
2349245431Sdim    // Scan forward for the use of CPSR
2350245431Sdim    // When checking against MI: if it's a conditional code requires
2351245431Sdim    // checking of V bit, then this is not safe to do.
2352245431Sdim    // It is safe to remove CmpInstr if CPSR is redefined or killed.
2353245431Sdim    // If we are done with the basic block, we need to check whether CPSR is
2354245431Sdim    // live-out.
2355245431Sdim    SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
2356245431Sdim        OperandsToUpdate;
2357221345Sdim    bool isSafe = false;
2358221345Sdim    I = CmpInstr;
2359245431Sdim    E = CmpInstr->getParent()->end();
2360221345Sdim    while (!isSafe && ++I != E) {
2361221345Sdim      const MachineInstr &Instr = *I;
2362221345Sdim      for (unsigned IO = 0, EO = Instr.getNumOperands();
2363221345Sdim           !isSafe && IO != EO; ++IO) {
2364221345Sdim        const MachineOperand &MO = Instr.getOperand(IO);
2365235633Sdim        if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
2366235633Sdim          isSafe = true;
2367235633Sdim          break;
2368235633Sdim        }
2369221345Sdim        if (!MO.isReg() || MO.getReg() != ARM::CPSR)
2370221345Sdim          continue;
2371221345Sdim        if (MO.isDef()) {
2372221345Sdim          isSafe = true;
2373221345Sdim          break;
2374221345Sdim        }
2375263509Sdim        // Condition code is after the operand before CPSR except for VSELs.
2376263509Sdim        ARMCC::CondCodes CC;
2377263509Sdim        bool IsInstrVSel = true;
2378263509Sdim        switch (Instr.getOpcode()) {
2379263509Sdim        default:
2380263509Sdim          IsInstrVSel = false;
2381263509Sdim          CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
2382263509Sdim          break;
2383263509Sdim        case ARM::VSELEQD:
2384263509Sdim        case ARM::VSELEQS:
2385263509Sdim          CC = ARMCC::EQ;
2386263509Sdim          break;
2387263509Sdim        case ARM::VSELGTD:
2388263509Sdim        case ARM::VSELGTS:
2389263509Sdim          CC = ARMCC::GT;
2390263509Sdim          break;
2391263509Sdim        case ARM::VSELGED:
2392263509Sdim        case ARM::VSELGES:
2393263509Sdim          CC = ARMCC::GE;
2394263509Sdim          break;
2395263509Sdim        case ARM::VSELVSS:
2396263509Sdim        case ARM::VSELVSD:
2397263509Sdim          CC = ARMCC::VS;
2398263509Sdim          break;
2399263509Sdim        }
2400263509Sdim
2401245431Sdim        if (Sub) {
2402245431Sdim          ARMCC::CondCodes NewCC = getSwappedCondition(CC);
2403245431Sdim          if (NewCC == ARMCC::AL)
2404245431Sdim            return false;
2405245431Sdim          // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
2406245431Sdim          // on CMP needs to be updated to be based on SUB.
2407245431Sdim          // Push the condition code operands to OperandsToUpdate.
2408245431Sdim          // If it is safe to remove CmpInstr, the condition code of these
2409245431Sdim          // operands will be modified.
2410245431Sdim          if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
2411263509Sdim              Sub->getOperand(2).getReg() == SrcReg) {
2412263509Sdim            // VSel doesn't support condition code update.
2413263509Sdim            if (IsInstrVSel)
2414263509Sdim              return false;
2415263509Sdim            OperandsToUpdate.push_back(
2416263509Sdim                std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
2417263509Sdim          }
2418263509Sdim        } else
2419245431Sdim          switch (CC) {
2420245431Sdim          default:
2421245431Sdim            // CPSR can be used multiple times, we should continue.
2422245431Sdim            break;
2423245431Sdim          case ARMCC::VS:
2424245431Sdim          case ARMCC::VC:
2425245431Sdim          case ARMCC::GE:
2426245431Sdim          case ARMCC::LT:
2427245431Sdim          case ARMCC::GT:
2428245431Sdim          case ARMCC::LE:
2429245431Sdim            return false;
2430245431Sdim          }
2431221345Sdim      }
2432221345Sdim    }
2433221345Sdim
2434245431Sdim    // If CPSR is not killed nor re-defined, we should check whether it is
2435245431Sdim    // live-out. If it is live-out, do not optimize.
2436245431Sdim    if (!isSafe) {
2437245431Sdim      MachineBasicBlock *MBB = CmpInstr->getParent();
2438245431Sdim      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
2439245431Sdim               SE = MBB->succ_end(); SI != SE; ++SI)
2440245431Sdim        if ((*SI)->isLiveIn(ARM::CPSR))
2441245431Sdim          return false;
2442245431Sdim    }
2443221345Sdim
2444218893Sdim    // Toggle the optional operand to CPSR.
2445218893Sdim    MI->getOperand(5).setReg(ARM::CPSR);
2446218893Sdim    MI->getOperand(5).setIsDef(true);
2447245431Sdim    assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
2448212904Sdim    CmpInstr->eraseFromParent();
2449245431Sdim
2450245431Sdim    // Modify the condition code of operands in OperandsToUpdate.
2451245431Sdim    // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2452245431Sdim    // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2453245431Sdim    for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
2454245431Sdim      OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
2455212904Sdim    return true;
2456212904Sdim  }
2457221345Sdim  }
2458212904Sdim
2459212904Sdim  return false;
2460212904Sdim}
2461218893Sdim
2462218893Sdimbool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
2463218893Sdim                                     MachineInstr *DefMI, unsigned Reg,
2464218893Sdim                                     MachineRegisterInfo *MRI) const {
2465218893Sdim  // Fold large immediates into add, sub, or, xor.
2466218893Sdim  unsigned DefOpc = DefMI->getOpcode();
2467218893Sdim  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
2468218893Sdim    return false;
2469218893Sdim  if (!DefMI->getOperand(1).isImm())
2470218893Sdim    // Could be t2MOVi32imm <ga:xx>
2471218893Sdim    return false;
2472218893Sdim
2473218893Sdim  if (!MRI->hasOneNonDBGUse(Reg))
2474218893Sdim    return false;
2475218893Sdim
2476235633Sdim  const MCInstrDesc &DefMCID = DefMI->getDesc();
2477235633Sdim  if (DefMCID.hasOptionalDef()) {
2478235633Sdim    unsigned NumOps = DefMCID.getNumOperands();
2479235633Sdim    const MachineOperand &MO = DefMI->getOperand(NumOps-1);
2480235633Sdim    if (MO.getReg() == ARM::CPSR && !MO.isDead())
2481235633Sdim      // If DefMI defines CPSR and it is not dead, it's obviously not safe
2482235633Sdim      // to delete DefMI.
2483235633Sdim      return false;
2484235633Sdim  }
2485235633Sdim
2486235633Sdim  const MCInstrDesc &UseMCID = UseMI->getDesc();
2487235633Sdim  if (UseMCID.hasOptionalDef()) {
2488235633Sdim    unsigned NumOps = UseMCID.getNumOperands();
2489235633Sdim    if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
2490235633Sdim      // If the instruction sets the flag, do not attempt this optimization
2491235633Sdim      // since it may change the semantics of the code.
2492235633Sdim      return false;
2493235633Sdim  }
2494235633Sdim
2495218893Sdim  unsigned UseOpc = UseMI->getOpcode();
2496218893Sdim  unsigned NewUseOpc = 0;
2497218893Sdim  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
2498218893Sdim  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
2499218893Sdim  bool Commute = false;
2500218893Sdim  switch (UseOpc) {
2501218893Sdim  default: return false;
2502218893Sdim  case ARM::SUBrr:
2503218893Sdim  case ARM::ADDrr:
2504218893Sdim  case ARM::ORRrr:
2505218893Sdim  case ARM::EORrr:
2506218893Sdim  case ARM::t2SUBrr:
2507218893Sdim  case ARM::t2ADDrr:
2508218893Sdim  case ARM::t2ORRrr:
2509218893Sdim  case ARM::t2EORrr: {
2510218893Sdim    Commute = UseMI->getOperand(2).getReg() != Reg;
2511218893Sdim    switch (UseOpc) {
2512218893Sdim    default: break;
2513218893Sdim    case ARM::SUBrr: {
2514218893Sdim      if (Commute)
2515218893Sdim        return false;
2516218893Sdim      ImmVal = -ImmVal;
2517218893Sdim      NewUseOpc = ARM::SUBri;
2518218893Sdim      // Fallthrough
2519218893Sdim    }
2520218893Sdim    case ARM::ADDrr:
2521218893Sdim    case ARM::ORRrr:
2522218893Sdim    case ARM::EORrr: {
2523218893Sdim      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
2524218893Sdim        return false;
2525218893Sdim      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
2526218893Sdim      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
2527218893Sdim      switch (UseOpc) {
2528218893Sdim      default: break;
2529218893Sdim      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
2530218893Sdim      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
2531218893Sdim      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
2532218893Sdim      }
2533218893Sdim      break;
2534218893Sdim    }
2535218893Sdim    case ARM::t2SUBrr: {
2536218893Sdim      if (Commute)
2537218893Sdim        return false;
2538218893Sdim      ImmVal = -ImmVal;
2539218893Sdim      NewUseOpc = ARM::t2SUBri;
2540218893Sdim      // Fallthrough
2541218893Sdim    }
2542218893Sdim    case ARM::t2ADDrr:
2543218893Sdim    case ARM::t2ORRrr:
2544218893Sdim    case ARM::t2EORrr: {
2545218893Sdim      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
2546218893Sdim        return false;
2547218893Sdim      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
2548218893Sdim      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
2549218893Sdim      switch (UseOpc) {
2550218893Sdim      default: break;
2551218893Sdim      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
2552218893Sdim      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
2553218893Sdim      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
2554218893Sdim      }
2555218893Sdim      break;
2556218893Sdim    }
2557218893Sdim    }
2558218893Sdim  }
2559218893Sdim  }
2560218893Sdim
2561218893Sdim  unsigned OpIdx = Commute ? 2 : 1;
2562218893Sdim  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
2563218893Sdim  bool isKill = UseMI->getOperand(OpIdx).isKill();
2564218893Sdim  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
2565218893Sdim  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
2566235633Sdim                                      UseMI, UseMI->getDebugLoc(),
2567218893Sdim                                      get(NewUseOpc), NewReg)
2568218893Sdim                              .addReg(Reg1, getKillRegState(isKill))
2569218893Sdim                              .addImm(SOImmValV1)));
2570218893Sdim  UseMI->setDesc(get(NewUseOpc));
2571218893Sdim  UseMI->getOperand(1).setReg(NewReg);
2572218893Sdim  UseMI->getOperand(1).setIsKill();
2573218893Sdim  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
2574218893Sdim  DefMI->eraseFromParent();
2575218893Sdim  return true;
2576218893Sdim}
2577218893Sdim
2578245431Sdimstatic unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
2579245431Sdim                                        const MachineInstr *MI) {
2580245431Sdim  switch (MI->getOpcode()) {
2581245431Sdim  default: {
2582245431Sdim    const MCInstrDesc &Desc = MI->getDesc();
2583245431Sdim    int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
2584245431Sdim    assert(UOps >= 0 && "bad # UOps");
2585245431Sdim    return UOps;
2586245431Sdim  }
2587245431Sdim
2588245431Sdim  case ARM::LDRrs:
2589245431Sdim  case ARM::LDRBrs:
2590245431Sdim  case ARM::STRrs:
2591245431Sdim  case ARM::STRBrs: {
2592245431Sdim    unsigned ShOpVal = MI->getOperand(3).getImm();
2593245431Sdim    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2594245431Sdim    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2595245431Sdim    if (!isSub &&
2596245431Sdim        (ShImm == 0 ||
2597245431Sdim         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2598245431Sdim          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2599245431Sdim      return 1;
2600245431Sdim    return 2;
2601245431Sdim  }
2602245431Sdim
2603245431Sdim  case ARM::LDRH:
2604245431Sdim  case ARM::STRH: {
2605245431Sdim    if (!MI->getOperand(2).getReg())
2606245431Sdim      return 1;
2607245431Sdim
2608245431Sdim    unsigned ShOpVal = MI->getOperand(3).getImm();
2609245431Sdim    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2610245431Sdim    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2611245431Sdim    if (!isSub &&
2612245431Sdim        (ShImm == 0 ||
2613245431Sdim         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2614245431Sdim          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2615245431Sdim      return 1;
2616245431Sdim    return 2;
2617245431Sdim  }
2618245431Sdim
2619245431Sdim  case ARM::LDRSB:
2620245431Sdim  case ARM::LDRSH:
2621245431Sdim    return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
2622245431Sdim
2623245431Sdim  case ARM::LDRSB_POST:
2624245431Sdim  case ARM::LDRSH_POST: {
2625245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2626245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2627245431Sdim    return (Rt == Rm) ? 4 : 3;
2628245431Sdim  }
2629245431Sdim
2630245431Sdim  case ARM::LDR_PRE_REG:
2631245431Sdim  case ARM::LDRB_PRE_REG: {
2632245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2633245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2634245431Sdim    if (Rt == Rm)
2635245431Sdim      return 3;
2636245431Sdim    unsigned ShOpVal = MI->getOperand(4).getImm();
2637245431Sdim    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2638245431Sdim    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2639245431Sdim    if (!isSub &&
2640245431Sdim        (ShImm == 0 ||
2641245431Sdim         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2642245431Sdim          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2643245431Sdim      return 2;
2644245431Sdim    return 3;
2645245431Sdim  }
2646245431Sdim
2647245431Sdim  case ARM::STR_PRE_REG:
2648245431Sdim  case ARM::STRB_PRE_REG: {
2649245431Sdim    unsigned ShOpVal = MI->getOperand(4).getImm();
2650245431Sdim    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2651245431Sdim    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2652245431Sdim    if (!isSub &&
2653245431Sdim        (ShImm == 0 ||
2654245431Sdim         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2655245431Sdim          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2656245431Sdim      return 2;
2657245431Sdim    return 3;
2658245431Sdim  }
2659245431Sdim
2660245431Sdim  case ARM::LDRH_PRE:
2661245431Sdim  case ARM::STRH_PRE: {
2662245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2663245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2664245431Sdim    if (!Rm)
2665245431Sdim      return 2;
2666245431Sdim    if (Rt == Rm)
2667245431Sdim      return 3;
2668245431Sdim    return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
2669245431Sdim      ? 3 : 2;
2670245431Sdim  }
2671245431Sdim
2672245431Sdim  case ARM::LDR_POST_REG:
2673245431Sdim  case ARM::LDRB_POST_REG:
2674245431Sdim  case ARM::LDRH_POST: {
2675245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2676245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2677245431Sdim    return (Rt == Rm) ? 3 : 2;
2678245431Sdim  }
2679245431Sdim
2680245431Sdim  case ARM::LDR_PRE_IMM:
2681245431Sdim  case ARM::LDRB_PRE_IMM:
2682245431Sdim  case ARM::LDR_POST_IMM:
2683245431Sdim  case ARM::LDRB_POST_IMM:
2684245431Sdim  case ARM::STRB_POST_IMM:
2685245431Sdim  case ARM::STRB_POST_REG:
2686245431Sdim  case ARM::STRB_PRE_IMM:
2687245431Sdim  case ARM::STRH_POST:
2688245431Sdim  case ARM::STR_POST_IMM:
2689245431Sdim  case ARM::STR_POST_REG:
2690245431Sdim  case ARM::STR_PRE_IMM:
2691245431Sdim    return 2;
2692245431Sdim
2693245431Sdim  case ARM::LDRSB_PRE:
2694245431Sdim  case ARM::LDRSH_PRE: {
2695245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2696245431Sdim    if (Rm == 0)
2697245431Sdim      return 3;
2698245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2699245431Sdim    if (Rt == Rm)
2700245431Sdim      return 4;
2701245431Sdim    unsigned ShOpVal = MI->getOperand(4).getImm();
2702245431Sdim    bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
2703245431Sdim    unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
2704245431Sdim    if (!isSub &&
2705245431Sdim        (ShImm == 0 ||
2706245431Sdim         ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
2707245431Sdim          ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
2708245431Sdim      return 3;
2709245431Sdim    return 4;
2710245431Sdim  }
2711245431Sdim
2712245431Sdim  case ARM::LDRD: {
2713245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2714245431Sdim    unsigned Rn = MI->getOperand(2).getReg();
2715245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2716245431Sdim    if (Rm)
2717245431Sdim      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2718245431Sdim    return (Rt == Rn) ? 3 : 2;
2719245431Sdim  }
2720245431Sdim
2721245431Sdim  case ARM::STRD: {
2722245431Sdim    unsigned Rm = MI->getOperand(3).getReg();
2723245431Sdim    if (Rm)
2724245431Sdim      return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
2725245431Sdim    return 2;
2726245431Sdim  }
2727245431Sdim
2728245431Sdim  case ARM::LDRD_POST:
2729245431Sdim  case ARM::t2LDRD_POST:
2730245431Sdim    return 3;
2731245431Sdim
2732245431Sdim  case ARM::STRD_POST:
2733245431Sdim  case ARM::t2STRD_POST:
2734245431Sdim    return 4;
2735245431Sdim
2736245431Sdim  case ARM::LDRD_PRE: {
2737245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2738245431Sdim    unsigned Rn = MI->getOperand(3).getReg();
2739245431Sdim    unsigned Rm = MI->getOperand(4).getReg();
2740245431Sdim    if (Rm)
2741245431Sdim      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2742245431Sdim    return (Rt == Rn) ? 4 : 3;
2743245431Sdim  }
2744245431Sdim
2745245431Sdim  case ARM::t2LDRD_PRE: {
2746245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2747245431Sdim    unsigned Rn = MI->getOperand(3).getReg();
2748245431Sdim    return (Rt == Rn) ? 4 : 3;
2749245431Sdim  }
2750245431Sdim
2751245431Sdim  case ARM::STRD_PRE: {
2752245431Sdim    unsigned Rm = MI->getOperand(4).getReg();
2753245431Sdim    if (Rm)
2754245431Sdim      return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
2755245431Sdim    return 3;
2756245431Sdim  }
2757245431Sdim
2758245431Sdim  case ARM::t2STRD_PRE:
2759245431Sdim    return 3;
2760245431Sdim
2761245431Sdim  case ARM::t2LDR_POST:
2762245431Sdim  case ARM::t2LDRB_POST:
2763245431Sdim  case ARM::t2LDRB_PRE:
2764245431Sdim  case ARM::t2LDRSBi12:
2765245431Sdim  case ARM::t2LDRSBi8:
2766245431Sdim  case ARM::t2LDRSBpci:
2767245431Sdim  case ARM::t2LDRSBs:
2768245431Sdim  case ARM::t2LDRH_POST:
2769245431Sdim  case ARM::t2LDRH_PRE:
2770245431Sdim  case ARM::t2LDRSBT:
2771245431Sdim  case ARM::t2LDRSB_POST:
2772245431Sdim  case ARM::t2LDRSB_PRE:
2773245431Sdim  case ARM::t2LDRSH_POST:
2774245431Sdim  case ARM::t2LDRSH_PRE:
2775245431Sdim  case ARM::t2LDRSHi12:
2776245431Sdim  case ARM::t2LDRSHi8:
2777245431Sdim  case ARM::t2LDRSHpci:
2778245431Sdim  case ARM::t2LDRSHs:
2779245431Sdim    return 2;
2780245431Sdim
2781245431Sdim  case ARM::t2LDRDi8: {
2782245431Sdim    unsigned Rt = MI->getOperand(0).getReg();
2783245431Sdim    unsigned Rn = MI->getOperand(2).getReg();
2784245431Sdim    return (Rt == Rn) ? 3 : 2;
2785245431Sdim  }
2786245431Sdim
2787245431Sdim  case ARM::t2STRB_POST:
2788245431Sdim  case ARM::t2STRB_PRE:
2789245431Sdim  case ARM::t2STRBs:
2790245431Sdim  case ARM::t2STRDi8:
2791245431Sdim  case ARM::t2STRH_POST:
2792245431Sdim  case ARM::t2STRH_PRE:
2793245431Sdim  case ARM::t2STRHs:
2794245431Sdim  case ARM::t2STR_POST:
2795245431Sdim  case ARM::t2STR_PRE:
2796245431Sdim  case ARM::t2STRs:
2797245431Sdim    return 2;
2798245431Sdim  }
2799245431Sdim}
2800245431Sdim
2801245431Sdim// Return the number of 32-bit words loaded by LDM or stored by STM. If this
2802245431Sdim// can't be easily determined return 0 (missing MachineMemOperand).
2803245431Sdim//
2804245431Sdim// FIXME: The current MachineInstr design does not support relying on machine
2805245431Sdim// mem operands to determine the width of a memory access. Instead, we expect
2806245431Sdim// the target to provide this information based on the instruction opcode and
2807245431Sdim// operands. However, using MachineMemOperand is a the best solution now for
2808245431Sdim// two reasons:
2809245431Sdim//
2810245431Sdim// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
2811245431Sdim// operands. This is much more dangerous than using the MachineMemOperand
2812245431Sdim// sizes because CodeGen passes can insert/remove optional machine operands. In
2813245431Sdim// fact, it's totally incorrect for preRA passes and appears to be wrong for
2814245431Sdim// postRA passes as well.
2815245431Sdim//
2816245431Sdim// 2) getNumLDMAddresses is only used by the scheduling machine model and any
2817245431Sdim// machine model that calls this should handle the unknown (zero size) case.
2818245431Sdim//
2819245431Sdim// Long term, we should require a target hook that verifies MachineMemOperand
2820245431Sdim// sizes during MC lowering. That target hook should be local to MC lowering
2821245431Sdim// because we can't ensure that it is aware of other MI forms. Doing this will
2822245431Sdim// ensure that MachineMemOperands are correctly propagated through all passes.
2823245431Sdimunsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
2824245431Sdim  unsigned Size = 0;
2825245431Sdim  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
2826245431Sdim         E = MI->memoperands_end(); I != E; ++I) {
2827245431Sdim    Size += (*I)->getSize();
2828245431Sdim  }
2829245431Sdim  return Size / 4;
2830245431Sdim}
2831245431Sdim
2832218893Sdimunsigned
2833218893SdimARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
2834218893Sdim                                 const MachineInstr *MI) const {
2835218893Sdim  if (!ItinData || ItinData->isEmpty())
2836218893Sdim    return 1;
2837218893Sdim
2838224145Sdim  const MCInstrDesc &Desc = MI->getDesc();
2839218893Sdim  unsigned Class = Desc.getSchedClass();
2840245431Sdim  int ItinUOps = ItinData->getNumMicroOps(Class);
2841245431Sdim  if (ItinUOps >= 0) {
2842245431Sdim    if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
2843245431Sdim      return getNumMicroOpsSwiftLdSt(ItinData, MI);
2844218893Sdim
2845245431Sdim    return ItinUOps;
2846245431Sdim  }
2847245431Sdim
2848218893Sdim  unsigned Opc = MI->getOpcode();
2849218893Sdim  switch (Opc) {
2850218893Sdim  default:
2851218893Sdim    llvm_unreachable("Unexpected multi-uops instruction!");
2852218893Sdim  case ARM::VLDMQIA:
2853218893Sdim  case ARM::VSTMQIA:
2854218893Sdim    return 2;
2855218893Sdim
2856218893Sdim  // The number of uOps for load / store multiple are determined by the number
2857218893Sdim  // registers.
2858218893Sdim  //
2859218893Sdim  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
2860218893Sdim  // same cycle. The scheduling for the first load / store must be done
2861245431Sdim  // separately by assuming the address is not 64-bit aligned.
2862218893Sdim  //
2863218893Sdim  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
2864218893Sdim  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
2865218893Sdim  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
2866218893Sdim  case ARM::VLDMDIA:
2867218893Sdim  case ARM::VLDMDIA_UPD:
2868218893Sdim  case ARM::VLDMDDB_UPD:
2869218893Sdim  case ARM::VLDMSIA:
2870218893Sdim  case ARM::VLDMSIA_UPD:
2871218893Sdim  case ARM::VLDMSDB_UPD:
2872218893Sdim  case ARM::VSTMDIA:
2873218893Sdim  case ARM::VSTMDIA_UPD:
2874218893Sdim  case ARM::VSTMDDB_UPD:
2875218893Sdim  case ARM::VSTMSIA:
2876218893Sdim  case ARM::VSTMSIA_UPD:
2877218893Sdim  case ARM::VSTMSDB_UPD: {
2878218893Sdim    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
2879218893Sdim    return (NumRegs / 2) + (NumRegs % 2) + 1;
2880218893Sdim  }
2881218893Sdim
2882218893Sdim  case ARM::LDMIA_RET:
2883218893Sdim  case ARM::LDMIA:
2884218893Sdim  case ARM::LDMDA:
2885218893Sdim  case ARM::LDMDB:
2886218893Sdim  case ARM::LDMIB:
2887218893Sdim  case ARM::LDMIA_UPD:
2888218893Sdim  case ARM::LDMDA_UPD:
2889218893Sdim  case ARM::LDMDB_UPD:
2890218893Sdim  case ARM::LDMIB_UPD:
2891218893Sdim  case ARM::STMIA:
2892218893Sdim  case ARM::STMDA:
2893218893Sdim  case ARM::STMDB:
2894218893Sdim  case ARM::STMIB:
2895218893Sdim  case ARM::STMIA_UPD:
2896218893Sdim  case ARM::STMDA_UPD:
2897218893Sdim  case ARM::STMDB_UPD:
2898218893Sdim  case ARM::STMIB_UPD:
2899218893Sdim  case ARM::tLDMIA:
2900218893Sdim  case ARM::tLDMIA_UPD:
2901218893Sdim  case ARM::tSTMIA_UPD:
2902218893Sdim  case ARM::tPOP_RET:
2903218893Sdim  case ARM::tPOP:
2904218893Sdim  case ARM::tPUSH:
2905218893Sdim  case ARM::t2LDMIA_RET:
2906218893Sdim  case ARM::t2LDMIA:
2907218893Sdim  case ARM::t2LDMDB:
2908218893Sdim  case ARM::t2LDMIA_UPD:
2909218893Sdim  case ARM::t2LDMDB_UPD:
2910218893Sdim  case ARM::t2STMIA:
2911218893Sdim  case ARM::t2STMDB:
2912218893Sdim  case ARM::t2STMIA_UPD:
2913218893Sdim  case ARM::t2STMDB_UPD: {
2914218893Sdim    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
2915245431Sdim    if (Subtarget.isSwift()) {
2916245431Sdim      int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
2917245431Sdim      switch (Opc) {
2918245431Sdim      default: break;
2919245431Sdim      case ARM::VLDMDIA_UPD:
2920245431Sdim      case ARM::VLDMDDB_UPD:
2921245431Sdim      case ARM::VLDMSIA_UPD:
2922245431Sdim      case ARM::VLDMSDB_UPD:
2923245431Sdim      case ARM::VSTMDIA_UPD:
2924245431Sdim      case ARM::VSTMDDB_UPD:
2925245431Sdim      case ARM::VSTMSIA_UPD:
2926245431Sdim      case ARM::VSTMSDB_UPD:
2927245431Sdim      case ARM::LDMIA_UPD:
2928245431Sdim      case ARM::LDMDA_UPD:
2929245431Sdim      case ARM::LDMDB_UPD:
2930245431Sdim      case ARM::LDMIB_UPD:
2931245431Sdim      case ARM::STMIA_UPD:
2932245431Sdim      case ARM::STMDA_UPD:
2933245431Sdim      case ARM::STMDB_UPD:
2934245431Sdim      case ARM::STMIB_UPD:
2935245431Sdim      case ARM::tLDMIA_UPD:
2936245431Sdim      case ARM::tSTMIA_UPD:
2937245431Sdim      case ARM::t2LDMIA_UPD:
2938245431Sdim      case ARM::t2LDMDB_UPD:
2939245431Sdim      case ARM::t2STMIA_UPD:
2940245431Sdim      case ARM::t2STMDB_UPD:
2941245431Sdim        ++UOps; // One for base register writeback.
2942245431Sdim        break;
2943245431Sdim      case ARM::LDMIA_RET:
2944245431Sdim      case ARM::tPOP_RET:
2945245431Sdim      case ARM::t2LDMIA_RET:
2946245431Sdim        UOps += 2; // One for base reg wb, one for write to pc.
2947245431Sdim        break;
2948245431Sdim      }
2949245431Sdim      return UOps;
2950245431Sdim    } else if (Subtarget.isCortexA8()) {
2951218893Sdim      if (NumRegs < 4)
2952218893Sdim        return 2;
2953218893Sdim      // 4 registers would be issued: 2, 2.
2954218893Sdim      // 5 registers would be issued: 2, 2, 1.
2955245431Sdim      int A8UOps = (NumRegs / 2);
2956218893Sdim      if (NumRegs % 2)
2957245431Sdim        ++A8UOps;
2958245431Sdim      return A8UOps;
2959245431Sdim    } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2960245431Sdim      int A9UOps = (NumRegs / 2);
2961218893Sdim      // If there are odd number of registers or if it's not 64-bit aligned,
2962218893Sdim      // then it takes an extra AGU (Address Generation Unit) cycle.
2963218893Sdim      if ((NumRegs % 2) ||
2964218893Sdim          !MI->hasOneMemOperand() ||
2965218893Sdim          (*MI->memoperands_begin())->getAlignment() < 8)
2966245431Sdim        ++A9UOps;
2967245431Sdim      return A9UOps;
2968218893Sdim    } else {
2969218893Sdim      // Assume the worst.
2970218893Sdim      return NumRegs;
2971218893Sdim    }
2972218893Sdim  }
2973218893Sdim  }
2974218893Sdim}
2975218893Sdim
2976218893Sdimint
2977218893SdimARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
2978224145Sdim                                  const MCInstrDesc &DefMCID,
2979218893Sdim                                  unsigned DefClass,
2980218893Sdim                                  unsigned DefIdx, unsigned DefAlign) const {
2981224145Sdim  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
2982218893Sdim  if (RegNo <= 0)
2983218893Sdim    // Def is the address writeback.
2984218893Sdim    return ItinData->getOperandCycle(DefClass, DefIdx);
2985218893Sdim
2986218893Sdim  int DefCycle;
2987218893Sdim  if (Subtarget.isCortexA8()) {
2988218893Sdim    // (regno / 2) + (regno % 2) + 1
2989218893Sdim    DefCycle = RegNo / 2 + 1;
2990218893Sdim    if (RegNo % 2)
2991218893Sdim      ++DefCycle;
2992245431Sdim  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
2993218893Sdim    DefCycle = RegNo;
2994218893Sdim    bool isSLoad = false;
2995218893Sdim
2996224145Sdim    switch (DefMCID.getOpcode()) {
2997218893Sdim    default: break;
2998218893Sdim    case ARM::VLDMSIA:
2999218893Sdim    case ARM::VLDMSIA_UPD:
3000218893Sdim    case ARM::VLDMSDB_UPD:
3001218893Sdim      isSLoad = true;
3002218893Sdim      break;
3003218893Sdim    }
3004218893Sdim
3005218893Sdim    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3006218893Sdim    // then it takes an extra cycle.
3007218893Sdim    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
3008218893Sdim      ++DefCycle;
3009218893Sdim  } else {
3010218893Sdim    // Assume the worst.
3011218893Sdim    DefCycle = RegNo + 2;
3012218893Sdim  }
3013218893Sdim
3014218893Sdim  return DefCycle;
3015218893Sdim}
3016218893Sdim
3017218893Sdimint
3018218893SdimARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
3019224145Sdim                                 const MCInstrDesc &DefMCID,
3020218893Sdim                                 unsigned DefClass,
3021218893Sdim                                 unsigned DefIdx, unsigned DefAlign) const {
3022224145Sdim  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
3023218893Sdim  if (RegNo <= 0)
3024218893Sdim    // Def is the address writeback.
3025218893Sdim    return ItinData->getOperandCycle(DefClass, DefIdx);
3026218893Sdim
3027218893Sdim  int DefCycle;
3028218893Sdim  if (Subtarget.isCortexA8()) {
3029218893Sdim    // 4 registers would be issued: 1, 2, 1.
3030218893Sdim    // 5 registers would be issued: 1, 2, 2.
3031218893Sdim    DefCycle = RegNo / 2;
3032218893Sdim    if (DefCycle < 1)
3033218893Sdim      DefCycle = 1;
3034218893Sdim    // Result latency is issue cycle + 2: E2.
3035218893Sdim    DefCycle += 2;
3036245431Sdim  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3037218893Sdim    DefCycle = (RegNo / 2);
3038218893Sdim    // If there are odd number of registers or if it's not 64-bit aligned,
3039218893Sdim    // then it takes an extra AGU (Address Generation Unit) cycle.
3040218893Sdim    if ((RegNo % 2) || DefAlign < 8)
3041218893Sdim      ++DefCycle;
3042218893Sdim    // Result latency is AGU cycles + 2.
3043218893Sdim    DefCycle += 2;
3044218893Sdim  } else {
3045218893Sdim    // Assume the worst.
3046218893Sdim    DefCycle = RegNo + 2;
3047218893Sdim  }
3048218893Sdim
3049218893Sdim  return DefCycle;
3050218893Sdim}
3051218893Sdim
3052218893Sdimint
3053218893SdimARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
3054224145Sdim                                  const MCInstrDesc &UseMCID,
3055218893Sdim                                  unsigned UseClass,
3056218893Sdim                                  unsigned UseIdx, unsigned UseAlign) const {
3057224145Sdim  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3058218893Sdim  if (RegNo <= 0)
3059218893Sdim    return ItinData->getOperandCycle(UseClass, UseIdx);
3060218893Sdim
3061218893Sdim  int UseCycle;
3062218893Sdim  if (Subtarget.isCortexA8()) {
3063218893Sdim    // (regno / 2) + (regno % 2) + 1
3064218893Sdim    UseCycle = RegNo / 2 + 1;
3065218893Sdim    if (RegNo % 2)
3066218893Sdim      ++UseCycle;
3067245431Sdim  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3068218893Sdim    UseCycle = RegNo;
3069218893Sdim    bool isSStore = false;
3070218893Sdim
3071224145Sdim    switch (UseMCID.getOpcode()) {
3072218893Sdim    default: break;
3073218893Sdim    case ARM::VSTMSIA:
3074218893Sdim    case ARM::VSTMSIA_UPD:
3075218893Sdim    case ARM::VSTMSDB_UPD:
3076218893Sdim      isSStore = true;
3077218893Sdim      break;
3078218893Sdim    }
3079218893Sdim
3080218893Sdim    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
3081218893Sdim    // then it takes an extra cycle.
3082218893Sdim    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
3083218893Sdim      ++UseCycle;
3084218893Sdim  } else {
3085218893Sdim    // Assume the worst.
3086218893Sdim    UseCycle = RegNo + 2;
3087218893Sdim  }
3088218893Sdim
3089218893Sdim  return UseCycle;
3090218893Sdim}
3091218893Sdim
3092218893Sdimint
3093218893SdimARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
3094224145Sdim                                 const MCInstrDesc &UseMCID,
3095218893Sdim                                 unsigned UseClass,
3096218893Sdim                                 unsigned UseIdx, unsigned UseAlign) const {
3097224145Sdim  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
3098218893Sdim  if (RegNo <= 0)
3099218893Sdim    return ItinData->getOperandCycle(UseClass, UseIdx);
3100218893Sdim
3101218893Sdim  int UseCycle;
3102218893Sdim  if (Subtarget.isCortexA8()) {
3103218893Sdim    UseCycle = RegNo / 2;
3104218893Sdim    if (UseCycle < 2)
3105218893Sdim      UseCycle = 2;
3106218893Sdim    // Read in E3.
3107218893Sdim    UseCycle += 2;
3108245431Sdim  } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
3109218893Sdim    UseCycle = (RegNo / 2);
3110218893Sdim    // If there are odd number of registers or if it's not 64-bit aligned,
3111218893Sdim    // then it takes an extra AGU (Address Generation Unit) cycle.
3112218893Sdim    if ((RegNo % 2) || UseAlign < 8)
3113218893Sdim      ++UseCycle;
3114218893Sdim  } else {
3115218893Sdim    // Assume the worst.
3116218893Sdim    UseCycle = 1;
3117218893Sdim  }
3118218893Sdim  return UseCycle;
3119218893Sdim}
3120218893Sdim
3121218893Sdimint
3122218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3123224145Sdim                                    const MCInstrDesc &DefMCID,
3124218893Sdim                                    unsigned DefIdx, unsigned DefAlign,
3125224145Sdim                                    const MCInstrDesc &UseMCID,
3126218893Sdim                                    unsigned UseIdx, unsigned UseAlign) const {
3127224145Sdim  unsigned DefClass = DefMCID.getSchedClass();
3128224145Sdim  unsigned UseClass = UseMCID.getSchedClass();
3129218893Sdim
3130224145Sdim  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
3131218893Sdim    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
3132218893Sdim
3133218893Sdim  // This may be a def / use of a variable_ops instruction, the operand
3134218893Sdim  // latency might be determinable dynamically. Let the target try to
3135218893Sdim  // figure it out.
3136218893Sdim  int DefCycle = -1;
3137218893Sdim  bool LdmBypass = false;
3138224145Sdim  switch (DefMCID.getOpcode()) {
3139218893Sdim  default:
3140218893Sdim    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3141218893Sdim    break;
3142218893Sdim
3143218893Sdim  case ARM::VLDMDIA:
3144218893Sdim  case ARM::VLDMDIA_UPD:
3145218893Sdim  case ARM::VLDMDDB_UPD:
3146218893Sdim  case ARM::VLDMSIA:
3147218893Sdim  case ARM::VLDMSIA_UPD:
3148218893Sdim  case ARM::VLDMSDB_UPD:
3149224145Sdim    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3150218893Sdim    break;
3151218893Sdim
3152218893Sdim  case ARM::LDMIA_RET:
3153218893Sdim  case ARM::LDMIA:
3154218893Sdim  case ARM::LDMDA:
3155218893Sdim  case ARM::LDMDB:
3156218893Sdim  case ARM::LDMIB:
3157218893Sdim  case ARM::LDMIA_UPD:
3158218893Sdim  case ARM::LDMDA_UPD:
3159218893Sdim  case ARM::LDMDB_UPD:
3160218893Sdim  case ARM::LDMIB_UPD:
3161218893Sdim  case ARM::tLDMIA:
3162218893Sdim  case ARM::tLDMIA_UPD:
3163218893Sdim  case ARM::tPUSH:
3164218893Sdim  case ARM::t2LDMIA_RET:
3165218893Sdim  case ARM::t2LDMIA:
3166218893Sdim  case ARM::t2LDMDB:
3167218893Sdim  case ARM::t2LDMIA_UPD:
3168218893Sdim  case ARM::t2LDMDB_UPD:
3169218893Sdim    LdmBypass = 1;
3170224145Sdim    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
3171218893Sdim    break;
3172218893Sdim  }
3173218893Sdim
3174218893Sdim  if (DefCycle == -1)
3175218893Sdim    // We can't seem to determine the result latency of the def, assume it's 2.
3176218893Sdim    DefCycle = 2;
3177218893Sdim
3178218893Sdim  int UseCycle = -1;
3179224145Sdim  switch (UseMCID.getOpcode()) {
3180218893Sdim  default:
3181218893Sdim    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
3182218893Sdim    break;
3183218893Sdim
3184218893Sdim  case ARM::VSTMDIA:
3185218893Sdim  case ARM::VSTMDIA_UPD:
3186218893Sdim  case ARM::VSTMDDB_UPD:
3187218893Sdim  case ARM::VSTMSIA:
3188218893Sdim  case ARM::VSTMSIA_UPD:
3189218893Sdim  case ARM::VSTMSDB_UPD:
3190224145Sdim    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3191218893Sdim    break;
3192218893Sdim
3193218893Sdim  case ARM::STMIA:
3194218893Sdim  case ARM::STMDA:
3195218893Sdim  case ARM::STMDB:
3196218893Sdim  case ARM::STMIB:
3197218893Sdim  case ARM::STMIA_UPD:
3198218893Sdim  case ARM::STMDA_UPD:
3199218893Sdim  case ARM::STMDB_UPD:
3200218893Sdim  case ARM::STMIB_UPD:
3201218893Sdim  case ARM::tSTMIA_UPD:
3202218893Sdim  case ARM::tPOP_RET:
3203218893Sdim  case ARM::tPOP:
3204218893Sdim  case ARM::t2STMIA:
3205218893Sdim  case ARM::t2STMDB:
3206218893Sdim  case ARM::t2STMIA_UPD:
3207218893Sdim  case ARM::t2STMDB_UPD:
3208224145Sdim    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
3209218893Sdim    break;
3210218893Sdim  }
3211218893Sdim
3212218893Sdim  if (UseCycle == -1)
3213218893Sdim    // Assume it's read in the first stage.
3214218893Sdim    UseCycle = 1;
3215218893Sdim
3216218893Sdim  UseCycle = DefCycle - UseCycle + 1;
3217218893Sdim  if (UseCycle > 0) {
3218218893Sdim    if (LdmBypass) {
3219218893Sdim      // It's a variable_ops instruction so we can't use DefIdx here. Just use
3220218893Sdim      // first def operand.
3221224145Sdim      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
3222218893Sdim                                          UseClass, UseIdx))
3223218893Sdim        --UseCycle;
3224218893Sdim    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
3225218893Sdim                                               UseClass, UseIdx)) {
3226218893Sdim      --UseCycle;
3227218893Sdim    }
3228218893Sdim  }
3229218893Sdim
3230218893Sdim  return UseCycle;
3231218893Sdim}
3232218893Sdim
3233235633Sdimstatic const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
3234235633Sdim                                           const MachineInstr *MI, unsigned Reg,
3235235633Sdim                                           unsigned &DefIdx, unsigned &Dist) {
3236235633Sdim  Dist = 0;
3237235633Sdim
3238235633Sdim  MachineBasicBlock::const_iterator I = MI; ++I;
3239235633Sdim  MachineBasicBlock::const_instr_iterator II =
3240235633Sdim    llvm::prior(I.getInstrIterator());
3241235633Sdim  assert(II->isInsideBundle() && "Empty bundle?");
3242235633Sdim
3243235633Sdim  int Idx = -1;
3244235633Sdim  while (II->isInsideBundle()) {
3245235633Sdim    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
3246235633Sdim    if (Idx != -1)
3247235633Sdim      break;
3248235633Sdim    --II;
3249235633Sdim    ++Dist;
3250235633Sdim  }
3251235633Sdim
3252235633Sdim  assert(Idx != -1 && "Cannot find bundled definition!");
3253235633Sdim  DefIdx = Idx;
3254235633Sdim  return II;
3255235633Sdim}
3256235633Sdim
3257235633Sdimstatic const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
3258235633Sdim                                           const MachineInstr *MI, unsigned Reg,
3259235633Sdim                                           unsigned &UseIdx, unsigned &Dist) {
3260235633Sdim  Dist = 0;
3261235633Sdim
3262235633Sdim  MachineBasicBlock::const_instr_iterator II = MI; ++II;
3263235633Sdim  assert(II->isInsideBundle() && "Empty bundle?");
3264235633Sdim  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3265235633Sdim
3266235633Sdim  // FIXME: This doesn't properly handle multiple uses.
3267235633Sdim  int Idx = -1;
3268235633Sdim  while (II != E && II->isInsideBundle()) {
3269235633Sdim    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
3270235633Sdim    if (Idx != -1)
3271235633Sdim      break;
3272235633Sdim    if (II->getOpcode() != ARM::t2IT)
3273235633Sdim      ++Dist;
3274235633Sdim    ++II;
3275235633Sdim  }
3276235633Sdim
3277235633Sdim  if (Idx == -1) {
3278235633Sdim    Dist = 0;
3279235633Sdim    return 0;
3280235633Sdim  }
3281235633Sdim
3282235633Sdim  UseIdx = Idx;
3283235633Sdim  return II;
3284235633Sdim}
3285235633Sdim
3286245431Sdim/// Return the number of cycles to add to (or subtract from) the static
3287245431Sdim/// itinerary based on the def opcode and alignment. The caller will ensure that
3288245431Sdim/// adjusted latency is at least one cycle.
3289245431Sdimstatic int adjustDefLatency(const ARMSubtarget &Subtarget,
3290245431Sdim                            const MachineInstr *DefMI,
3291245431Sdim                            const MCInstrDesc *DefMCID, unsigned DefAlign) {
3292245431Sdim  int Adjust = 0;
3293245431Sdim  if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
3294218893Sdim    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3295218893Sdim    // variants are one cycle cheaper.
3296235633Sdim    switch (DefMCID->getOpcode()) {
3297218893Sdim    default: break;
3298218893Sdim    case ARM::LDRrs:
3299218893Sdim    case ARM::LDRBrs: {
3300218893Sdim      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3301218893Sdim      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3302218893Sdim      if (ShImm == 0 ||
3303218893Sdim          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3304245431Sdim        --Adjust;
3305218893Sdim      break;
3306218893Sdim    }
3307218893Sdim    case ARM::t2LDRs:
3308218893Sdim    case ARM::t2LDRBs:
3309218893Sdim    case ARM::t2LDRHs:
3310218893Sdim    case ARM::t2LDRSHs: {
3311218893Sdim      // Thumb2 mode: lsl only.
3312218893Sdim      unsigned ShAmt = DefMI->getOperand(3).getImm();
3313218893Sdim      if (ShAmt == 0 || ShAmt == 2)
3314245431Sdim        --Adjust;
3315218893Sdim      break;
3316218893Sdim    }
3317218893Sdim    }
3318245431Sdim  } else if (Subtarget.isSwift()) {
3319245431Sdim    // FIXME: Properly handle all of the latency adjustments for address
3320245431Sdim    // writeback.
3321245431Sdim    switch (DefMCID->getOpcode()) {
3322245431Sdim    default: break;
3323245431Sdim    case ARM::LDRrs:
3324245431Sdim    case ARM::LDRBrs: {
3325245431Sdim      unsigned ShOpVal = DefMI->getOperand(3).getImm();
3326245431Sdim      bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
3327245431Sdim      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3328245431Sdim      if (!isSub &&
3329245431Sdim          (ShImm == 0 ||
3330245431Sdim           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3331245431Sdim            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
3332245431Sdim        Adjust -= 2;
3333245431Sdim      else if (!isSub &&
3334245431Sdim               ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3335245431Sdim        --Adjust;
3336245431Sdim      break;
3337245431Sdim    }
3338245431Sdim    case ARM::t2LDRs:
3339245431Sdim    case ARM::t2LDRBs:
3340245431Sdim    case ARM::t2LDRHs:
3341245431Sdim    case ARM::t2LDRSHs: {
3342245431Sdim      // Thumb2 mode: lsl only.
3343245431Sdim      unsigned ShAmt = DefMI->getOperand(3).getImm();
3344245431Sdim      if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
3345245431Sdim        Adjust -= 2;
3346245431Sdim      break;
3347245431Sdim    }
3348245431Sdim    }
3349218893Sdim  }
3350218893Sdim
3351245431Sdim  if (DefAlign < 8 && Subtarget.isLikeA9()) {
3352235633Sdim    switch (DefMCID->getOpcode()) {
3353221345Sdim    default: break;
3354221345Sdim    case ARM::VLD1q8:
3355221345Sdim    case ARM::VLD1q16:
3356221345Sdim    case ARM::VLD1q32:
3357221345Sdim    case ARM::VLD1q64:
3358235633Sdim    case ARM::VLD1q8wb_fixed:
3359235633Sdim    case ARM::VLD1q16wb_fixed:
3360235633Sdim    case ARM::VLD1q32wb_fixed:
3361235633Sdim    case ARM::VLD1q64wb_fixed:
3362235633Sdim    case ARM::VLD1q8wb_register:
3363235633Sdim    case ARM::VLD1q16wb_register:
3364235633Sdim    case ARM::VLD1q32wb_register:
3365235633Sdim    case ARM::VLD1q64wb_register:
3366221345Sdim    case ARM::VLD2d8:
3367221345Sdim    case ARM::VLD2d16:
3368221345Sdim    case ARM::VLD2d32:
3369221345Sdim    case ARM::VLD2q8:
3370221345Sdim    case ARM::VLD2q16:
3371221345Sdim    case ARM::VLD2q32:
3372235633Sdim    case ARM::VLD2d8wb_fixed:
3373235633Sdim    case ARM::VLD2d16wb_fixed:
3374235633Sdim    case ARM::VLD2d32wb_fixed:
3375235633Sdim    case ARM::VLD2q8wb_fixed:
3376235633Sdim    case ARM::VLD2q16wb_fixed:
3377235633Sdim    case ARM::VLD2q32wb_fixed:
3378235633Sdim    case ARM::VLD2d8wb_register:
3379235633Sdim    case ARM::VLD2d16wb_register:
3380235633Sdim    case ARM::VLD2d32wb_register:
3381235633Sdim    case ARM::VLD2q8wb_register:
3382235633Sdim    case ARM::VLD2q16wb_register:
3383235633Sdim    case ARM::VLD2q32wb_register:
3384221345Sdim    case ARM::VLD3d8:
3385221345Sdim    case ARM::VLD3d16:
3386221345Sdim    case ARM::VLD3d32:
3387221345Sdim    case ARM::VLD1d64T:
3388221345Sdim    case ARM::VLD3d8_UPD:
3389221345Sdim    case ARM::VLD3d16_UPD:
3390221345Sdim    case ARM::VLD3d32_UPD:
3391235633Sdim    case ARM::VLD1d64Twb_fixed:
3392235633Sdim    case ARM::VLD1d64Twb_register:
3393221345Sdim    case ARM::VLD3q8_UPD:
3394221345Sdim    case ARM::VLD3q16_UPD:
3395221345Sdim    case ARM::VLD3q32_UPD:
3396221345Sdim    case ARM::VLD4d8:
3397221345Sdim    case ARM::VLD4d16:
3398221345Sdim    case ARM::VLD4d32:
3399221345Sdim    case ARM::VLD1d64Q:
3400221345Sdim    case ARM::VLD4d8_UPD:
3401221345Sdim    case ARM::VLD4d16_UPD:
3402221345Sdim    case ARM::VLD4d32_UPD:
3403235633Sdim    case ARM::VLD1d64Qwb_fixed:
3404235633Sdim    case ARM::VLD1d64Qwb_register:
3405221345Sdim    case ARM::VLD4q8_UPD:
3406221345Sdim    case ARM::VLD4q16_UPD:
3407221345Sdim    case ARM::VLD4q32_UPD:
3408221345Sdim    case ARM::VLD1DUPq8:
3409221345Sdim    case ARM::VLD1DUPq16:
3410221345Sdim    case ARM::VLD1DUPq32:
3411235633Sdim    case ARM::VLD1DUPq8wb_fixed:
3412235633Sdim    case ARM::VLD1DUPq16wb_fixed:
3413235633Sdim    case ARM::VLD1DUPq32wb_fixed:
3414235633Sdim    case ARM::VLD1DUPq8wb_register:
3415235633Sdim    case ARM::VLD1DUPq16wb_register:
3416235633Sdim    case ARM::VLD1DUPq32wb_register:
3417221345Sdim    case ARM::VLD2DUPd8:
3418221345Sdim    case ARM::VLD2DUPd16:
3419221345Sdim    case ARM::VLD2DUPd32:
3420235633Sdim    case ARM::VLD2DUPd8wb_fixed:
3421235633Sdim    case ARM::VLD2DUPd16wb_fixed:
3422235633Sdim    case ARM::VLD2DUPd32wb_fixed:
3423235633Sdim    case ARM::VLD2DUPd8wb_register:
3424235633Sdim    case ARM::VLD2DUPd16wb_register:
3425235633Sdim    case ARM::VLD2DUPd32wb_register:
3426221345Sdim    case ARM::VLD4DUPd8:
3427221345Sdim    case ARM::VLD4DUPd16:
3428221345Sdim    case ARM::VLD4DUPd32:
3429221345Sdim    case ARM::VLD4DUPd8_UPD:
3430221345Sdim    case ARM::VLD4DUPd16_UPD:
3431221345Sdim    case ARM::VLD4DUPd32_UPD:
3432221345Sdim    case ARM::VLD1LNd8:
3433221345Sdim    case ARM::VLD1LNd16:
3434221345Sdim    case ARM::VLD1LNd32:
3435221345Sdim    case ARM::VLD1LNd8_UPD:
3436221345Sdim    case ARM::VLD1LNd16_UPD:
3437221345Sdim    case ARM::VLD1LNd32_UPD:
3438221345Sdim    case ARM::VLD2LNd8:
3439221345Sdim    case ARM::VLD2LNd16:
3440221345Sdim    case ARM::VLD2LNd32:
3441221345Sdim    case ARM::VLD2LNq16:
3442221345Sdim    case ARM::VLD2LNq32:
3443221345Sdim    case ARM::VLD2LNd8_UPD:
3444221345Sdim    case ARM::VLD2LNd16_UPD:
3445221345Sdim    case ARM::VLD2LNd32_UPD:
3446221345Sdim    case ARM::VLD2LNq16_UPD:
3447221345Sdim    case ARM::VLD2LNq32_UPD:
3448221345Sdim    case ARM::VLD4LNd8:
3449221345Sdim    case ARM::VLD4LNd16:
3450221345Sdim    case ARM::VLD4LNd32:
3451221345Sdim    case ARM::VLD4LNq16:
3452221345Sdim    case ARM::VLD4LNq32:
3453221345Sdim    case ARM::VLD4LNd8_UPD:
3454221345Sdim    case ARM::VLD4LNd16_UPD:
3455221345Sdim    case ARM::VLD4LNd32_UPD:
3456221345Sdim    case ARM::VLD4LNq16_UPD:
3457221345Sdim    case ARM::VLD4LNq32_UPD:
3458221345Sdim      // If the address is not 64-bit aligned, the latencies of these
3459221345Sdim      // instructions increases by one.
3460245431Sdim      ++Adjust;
3461221345Sdim      break;
3462221345Sdim    }
3463245431Sdim  }
3464245431Sdim  return Adjust;
3465245431Sdim}
3466221345Sdim
3467245431Sdim
3468245431Sdim
3469245431Sdimint
3470245431SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3471245431Sdim                                    const MachineInstr *DefMI, unsigned DefIdx,
3472245431Sdim                                    const MachineInstr *UseMI,
3473245431Sdim                                    unsigned UseIdx) const {
3474245431Sdim  // No operand latency. The caller may fall back to getInstrLatency.
3475245431Sdim  if (!ItinData || ItinData->isEmpty())
3476245431Sdim    return -1;
3477245431Sdim
3478245431Sdim  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
3479245431Sdim  unsigned Reg = DefMO.getReg();
3480245431Sdim  const MCInstrDesc *DefMCID = &DefMI->getDesc();
3481245431Sdim  const MCInstrDesc *UseMCID = &UseMI->getDesc();
3482245431Sdim
3483245431Sdim  unsigned DefAdj = 0;
3484245431Sdim  if (DefMI->isBundle()) {
3485245431Sdim    DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
3486245431Sdim    DefMCID = &DefMI->getDesc();
3487245431Sdim  }
3488245431Sdim  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
3489245431Sdim      DefMI->isRegSequence() || DefMI->isImplicitDef()) {
3490245431Sdim    return 1;
3491245431Sdim  }
3492245431Sdim
3493245431Sdim  unsigned UseAdj = 0;
3494245431Sdim  if (UseMI->isBundle()) {
3495245431Sdim    unsigned NewUseIdx;
3496245431Sdim    const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
3497245431Sdim                                                   Reg, NewUseIdx, UseAdj);
3498245431Sdim    if (!NewUseMI)
3499245431Sdim      return -1;
3500245431Sdim
3501245431Sdim    UseMI = NewUseMI;
3502245431Sdim    UseIdx = NewUseIdx;
3503245431Sdim    UseMCID = &UseMI->getDesc();
3504245431Sdim  }
3505245431Sdim
3506245431Sdim  if (Reg == ARM::CPSR) {
3507245431Sdim    if (DefMI->getOpcode() == ARM::FMSTAT) {
3508245431Sdim      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
3509245431Sdim      return Subtarget.isLikeA9() ? 1 : 20;
3510245431Sdim    }
3511245431Sdim
3512245431Sdim    // CPSR set and branch can be paired in the same cycle.
3513245431Sdim    if (UseMI->isBranch())
3514245431Sdim      return 0;
3515245431Sdim
3516245431Sdim    // Otherwise it takes the instruction latency (generally one).
3517245431Sdim    unsigned Latency = getInstrLatency(ItinData, DefMI);
3518245431Sdim
3519245431Sdim    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
3520245431Sdim    // its uses. Instructions which are otherwise scheduled between them may
3521245431Sdim    // incur a code size penalty (not able to use the CPSR setting 16-bit
3522245431Sdim    // instructions).
3523245431Sdim    if (Latency > 0 && Subtarget.isThumb2()) {
3524245431Sdim      const MachineFunction *MF = DefMI->getParent()->getParent();
3525252723Sdim      if (MF->getFunction()->getAttributes().
3526252723Sdim            hasAttribute(AttributeSet::FunctionIndex,
3527252723Sdim                         Attribute::OptimizeForSize))
3528245431Sdim        --Latency;
3529245431Sdim    }
3530245431Sdim    return Latency;
3531245431Sdim  }
3532245431Sdim
3533245431Sdim  if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
3534245431Sdim    return -1;
3535245431Sdim
3536245431Sdim  unsigned DefAlign = DefMI->hasOneMemOperand()
3537245431Sdim    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
3538245431Sdim  unsigned UseAlign = UseMI->hasOneMemOperand()
3539245431Sdim    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
3540245431Sdim
3541245431Sdim  // Get the itinerary's latency if possible, and handle variable_ops.
3542245431Sdim  int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
3543245431Sdim                                  *UseMCID, UseIdx, UseAlign);
3544245431Sdim  // Unable to find operand latency. The caller may resort to getInstrLatency.
3545245431Sdim  if (Latency < 0)
3546245431Sdim    return Latency;
3547245431Sdim
3548245431Sdim  // Adjust for IT block position.
3549245431Sdim  int Adj = DefAdj + UseAdj;
3550245431Sdim
3551245431Sdim  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3552245431Sdim  Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
3553245431Sdim  if (Adj >= 0 || (int)Latency > -Adj) {
3554245431Sdim    return Latency + Adj;
3555245431Sdim  }
3556245431Sdim  // Return the itinerary latency, which may be zero but not less than zero.
3557218893Sdim  return Latency;
3558218893Sdim}
3559218893Sdim
3560218893Sdimint
3561218893SdimARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
3562218893Sdim                                    SDNode *DefNode, unsigned DefIdx,
3563218893Sdim                                    SDNode *UseNode, unsigned UseIdx) const {
3564218893Sdim  if (!DefNode->isMachineOpcode())
3565218893Sdim    return 1;
3566218893Sdim
3567224145Sdim  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
3568218893Sdim
3569224145Sdim  if (isZeroCost(DefMCID.Opcode))
3570218893Sdim    return 0;
3571218893Sdim
3572218893Sdim  if (!ItinData || ItinData->isEmpty())
3573224145Sdim    return DefMCID.mayLoad() ? 3 : 1;
3574218893Sdim
3575218893Sdim  if (!UseNode->isMachineOpcode()) {
3576224145Sdim    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
3577245431Sdim    if (Subtarget.isLikeA9() || Subtarget.isSwift())
3578218893Sdim      return Latency <= 2 ? 1 : Latency - 1;
3579218893Sdim    else
3580218893Sdim      return Latency <= 3 ? 1 : Latency - 2;
3581218893Sdim  }
3582218893Sdim
3583224145Sdim  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
3584218893Sdim  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
3585218893Sdim  unsigned DefAlign = !DefMN->memoperands_empty()
3586218893Sdim    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
3587218893Sdim  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
3588218893Sdim  unsigned UseAlign = !UseMN->memoperands_empty()
3589218893Sdim    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
3590224145Sdim  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
3591224145Sdim                                  UseMCID, UseIdx, UseAlign);
3592218893Sdim
3593218893Sdim  if (Latency > 1 &&
3594245431Sdim      (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
3595218893Sdim    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
3596218893Sdim    // variants are one cycle cheaper.
3597224145Sdim    switch (DefMCID.getOpcode()) {
3598218893Sdim    default: break;
3599218893Sdim    case ARM::LDRrs:
3600218893Sdim    case ARM::LDRBrs: {
3601218893Sdim      unsigned ShOpVal =
3602218893Sdim        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3603218893Sdim      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3604218893Sdim      if (ShImm == 0 ||
3605218893Sdim          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3606218893Sdim        --Latency;
3607218893Sdim      break;
3608218893Sdim    }
3609218893Sdim    case ARM::t2LDRs:
3610218893Sdim    case ARM::t2LDRBs:
3611218893Sdim    case ARM::t2LDRHs:
3612218893Sdim    case ARM::t2LDRSHs: {
3613218893Sdim      // Thumb2 mode: lsl only.
3614218893Sdim      unsigned ShAmt =
3615218893Sdim        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3616218893Sdim      if (ShAmt == 0 || ShAmt == 2)
3617218893Sdim        --Latency;
3618218893Sdim      break;
3619218893Sdim    }
3620218893Sdim    }
3621245431Sdim  } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
3622245431Sdim    // FIXME: Properly handle all of the latency adjustments for address
3623245431Sdim    // writeback.
3624245431Sdim    switch (DefMCID.getOpcode()) {
3625245431Sdim    default: break;
3626245431Sdim    case ARM::LDRrs:
3627245431Sdim    case ARM::LDRBrs: {
3628245431Sdim      unsigned ShOpVal =
3629245431Sdim        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
3630245431Sdim      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
3631245431Sdim      if (ShImm == 0 ||
3632245431Sdim          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
3633245431Sdim           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
3634245431Sdim        Latency -= 2;
3635245431Sdim      else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
3636245431Sdim        --Latency;
3637245431Sdim      break;
3638245431Sdim    }
3639245431Sdim    case ARM::t2LDRs:
3640245431Sdim    case ARM::t2LDRBs:
3641245431Sdim    case ARM::t2LDRHs:
3642245431Sdim    case ARM::t2LDRSHs: {
3643245431Sdim      // Thumb2 mode: lsl 0-3 only.
3644245431Sdim      Latency -= 2;
3645245431Sdim      break;
3646245431Sdim    }
3647245431Sdim    }
3648218893Sdim  }
3649218893Sdim
3650245431Sdim  if (DefAlign < 8 && Subtarget.isLikeA9())
3651224145Sdim    switch (DefMCID.getOpcode()) {
3652221345Sdim    default: break;
3653235633Sdim    case ARM::VLD1q8:
3654235633Sdim    case ARM::VLD1q16:
3655235633Sdim    case ARM::VLD1q32:
3656235633Sdim    case ARM::VLD1q64:
3657235633Sdim    case ARM::VLD1q8wb_register:
3658235633Sdim    case ARM::VLD1q16wb_register:
3659235633Sdim    case ARM::VLD1q32wb_register:
3660235633Sdim    case ARM::VLD1q64wb_register:
3661235633Sdim    case ARM::VLD1q8wb_fixed:
3662235633Sdim    case ARM::VLD1q16wb_fixed:
3663235633Sdim    case ARM::VLD1q32wb_fixed:
3664235633Sdim    case ARM::VLD1q64wb_fixed:
3665235633Sdim    case ARM::VLD2d8:
3666235633Sdim    case ARM::VLD2d16:
3667235633Sdim    case ARM::VLD2d32:
3668221345Sdim    case ARM::VLD2q8Pseudo:
3669221345Sdim    case ARM::VLD2q16Pseudo:
3670221345Sdim    case ARM::VLD2q32Pseudo:
3671235633Sdim    case ARM::VLD2d8wb_fixed:
3672235633Sdim    case ARM::VLD2d16wb_fixed:
3673235633Sdim    case ARM::VLD2d32wb_fixed:
3674235633Sdim    case ARM::VLD2q8PseudoWB_fixed:
3675235633Sdim    case ARM::VLD2q16PseudoWB_fixed:
3676235633Sdim    case ARM::VLD2q32PseudoWB_fixed:
3677235633Sdim    case ARM::VLD2d8wb_register:
3678235633Sdim    case ARM::VLD2d16wb_register:
3679235633Sdim    case ARM::VLD2d32wb_register:
3680235633Sdim    case ARM::VLD2q8PseudoWB_register:
3681235633Sdim    case ARM::VLD2q16PseudoWB_register:
3682235633Sdim    case ARM::VLD2q32PseudoWB_register:
3683221345Sdim    case ARM::VLD3d8Pseudo:
3684221345Sdim    case ARM::VLD3d16Pseudo:
3685221345Sdim    case ARM::VLD3d32Pseudo:
3686221345Sdim    case ARM::VLD1d64TPseudo:
3687266759Sdim    case ARM::VLD1d64TPseudoWB_fixed:
3688221345Sdim    case ARM::VLD3d8Pseudo_UPD:
3689221345Sdim    case ARM::VLD3d16Pseudo_UPD:
3690221345Sdim    case ARM::VLD3d32Pseudo_UPD:
3691221345Sdim    case ARM::VLD3q8Pseudo_UPD:
3692221345Sdim    case ARM::VLD3q16Pseudo_UPD:
3693221345Sdim    case ARM::VLD3q32Pseudo_UPD:
3694221345Sdim    case ARM::VLD3q8oddPseudo:
3695221345Sdim    case ARM::VLD3q16oddPseudo:
3696221345Sdim    case ARM::VLD3q32oddPseudo:
3697221345Sdim    case ARM::VLD3q8oddPseudo_UPD:
3698221345Sdim    case ARM::VLD3q16oddPseudo_UPD:
3699221345Sdim    case ARM::VLD3q32oddPseudo_UPD:
3700221345Sdim    case ARM::VLD4d8Pseudo:
3701221345Sdim    case ARM::VLD4d16Pseudo:
3702221345Sdim    case ARM::VLD4d32Pseudo:
3703221345Sdim    case ARM::VLD1d64QPseudo:
3704266759Sdim    case ARM::VLD1d64QPseudoWB_fixed:
3705221345Sdim    case ARM::VLD4d8Pseudo_UPD:
3706221345Sdim    case ARM::VLD4d16Pseudo_UPD:
3707221345Sdim    case ARM::VLD4d32Pseudo_UPD:
3708221345Sdim    case ARM::VLD4q8Pseudo_UPD:
3709221345Sdim    case ARM::VLD4q16Pseudo_UPD:
3710221345Sdim    case ARM::VLD4q32Pseudo_UPD:
3711221345Sdim    case ARM::VLD4q8oddPseudo:
3712221345Sdim    case ARM::VLD4q16oddPseudo:
3713221345Sdim    case ARM::VLD4q32oddPseudo:
3714221345Sdim    case ARM::VLD4q8oddPseudo_UPD:
3715221345Sdim    case ARM::VLD4q16oddPseudo_UPD:
3716221345Sdim    case ARM::VLD4q32oddPseudo_UPD:
3717235633Sdim    case ARM::VLD1DUPq8:
3718235633Sdim    case ARM::VLD1DUPq16:
3719235633Sdim    case ARM::VLD1DUPq32:
3720235633Sdim    case ARM::VLD1DUPq8wb_fixed:
3721235633Sdim    case ARM::VLD1DUPq16wb_fixed:
3722235633Sdim    case ARM::VLD1DUPq32wb_fixed:
3723235633Sdim    case ARM::VLD1DUPq8wb_register:
3724235633Sdim    case ARM::VLD1DUPq16wb_register:
3725235633Sdim    case ARM::VLD1DUPq32wb_register:
3726235633Sdim    case ARM::VLD2DUPd8:
3727235633Sdim    case ARM::VLD2DUPd16:
3728235633Sdim    case ARM::VLD2DUPd32:
3729235633Sdim    case ARM::VLD2DUPd8wb_fixed:
3730235633Sdim    case ARM::VLD2DUPd16wb_fixed:
3731235633Sdim    case ARM::VLD2DUPd32wb_fixed:
3732235633Sdim    case ARM::VLD2DUPd8wb_register:
3733235633Sdim    case ARM::VLD2DUPd16wb_register:
3734235633Sdim    case ARM::VLD2DUPd32wb_register:
3735221345Sdim    case ARM::VLD4DUPd8Pseudo:
3736221345Sdim    case ARM::VLD4DUPd16Pseudo:
3737221345Sdim    case ARM::VLD4DUPd32Pseudo:
3738221345Sdim    case ARM::VLD4DUPd8Pseudo_UPD:
3739221345Sdim    case ARM::VLD4DUPd16Pseudo_UPD:
3740221345Sdim    case ARM::VLD4DUPd32Pseudo_UPD:
3741221345Sdim    case ARM::VLD1LNq8Pseudo:
3742221345Sdim    case ARM::VLD1LNq16Pseudo:
3743221345Sdim    case ARM::VLD1LNq32Pseudo:
3744221345Sdim    case ARM::VLD1LNq8Pseudo_UPD:
3745221345Sdim    case ARM::VLD1LNq16Pseudo_UPD:
3746221345Sdim    case ARM::VLD1LNq32Pseudo_UPD:
3747221345Sdim    case ARM::VLD2LNd8Pseudo:
3748221345Sdim    case ARM::VLD2LNd16Pseudo:
3749221345Sdim    case ARM::VLD2LNd32Pseudo:
3750221345Sdim    case ARM::VLD2LNq16Pseudo:
3751221345Sdim    case ARM::VLD2LNq32Pseudo:
3752221345Sdim    case ARM::VLD2LNd8Pseudo_UPD:
3753221345Sdim    case ARM::VLD2LNd16Pseudo_UPD:
3754221345Sdim    case ARM::VLD2LNd32Pseudo_UPD:
3755221345Sdim    case ARM::VLD2LNq16Pseudo_UPD:
3756221345Sdim    case ARM::VLD2LNq32Pseudo_UPD:
3757221345Sdim    case ARM::VLD4LNd8Pseudo:
3758221345Sdim    case ARM::VLD4LNd16Pseudo:
3759221345Sdim    case ARM::VLD4LNd32Pseudo:
3760221345Sdim    case ARM::VLD4LNq16Pseudo:
3761221345Sdim    case ARM::VLD4LNq32Pseudo:
3762221345Sdim    case ARM::VLD4LNd8Pseudo_UPD:
3763221345Sdim    case ARM::VLD4LNd16Pseudo_UPD:
3764221345Sdim    case ARM::VLD4LNd32Pseudo_UPD:
3765221345Sdim    case ARM::VLD4LNq16Pseudo_UPD:
3766221345Sdim    case ARM::VLD4LNq32Pseudo_UPD:
3767221345Sdim      // If the address is not 64-bit aligned, the latencies of these
3768221345Sdim      // instructions increases by one.
3769221345Sdim      ++Latency;
3770221345Sdim      break;
3771221345Sdim    }
3772221345Sdim
3773218893Sdim  return Latency;
3774218893Sdim}
3775218893Sdim
3776263509Sdimunsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
3777263509Sdim   if (MI->isCopyLike() || MI->isInsertSubreg() ||
3778263509Sdim      MI->isRegSequence() || MI->isImplicitDef())
3779263509Sdim    return 0;
3780263509Sdim
3781263509Sdim  if (MI->isBundle())
3782263509Sdim    return 0;
3783263509Sdim
3784263509Sdim  const MCInstrDesc &MCID = MI->getDesc();
3785263509Sdim
3786263509Sdim  if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
3787263509Sdim    // When predicated, CPSR is an additional source operand for CPSR updating
3788263509Sdim    // instructions, this apparently increases their latencies.
3789263509Sdim    return 1;
3790263509Sdim  }
3791263509Sdim  return 0;
3792263509Sdim}
3793263509Sdim
3794245431Sdimunsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3795245431Sdim                                           const MachineInstr *MI,
3796245431Sdim                                           unsigned *PredCost) const {
3797218893Sdim  if (MI->isCopyLike() || MI->isInsertSubreg() ||
3798218893Sdim      MI->isRegSequence() || MI->isImplicitDef())
3799218893Sdim    return 1;
3800218893Sdim
3801245431Sdim  // An instruction scheduler typically runs on unbundled instructions, however
3802245431Sdim  // other passes may query the latency of a bundled instruction.
3803235633Sdim  if (MI->isBundle()) {
3804245431Sdim    unsigned Latency = 0;
3805235633Sdim    MachineBasicBlock::const_instr_iterator I = MI;
3806235633Sdim    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
3807235633Sdim    while (++I != E && I->isInsideBundle()) {
3808235633Sdim      if (I->getOpcode() != ARM::t2IT)
3809235633Sdim        Latency += getInstrLatency(ItinData, I, PredCost);
3810235633Sdim    }
3811235633Sdim    return Latency;
3812235633Sdim  }
3813235633Sdim
3814224145Sdim  const MCInstrDesc &MCID = MI->getDesc();
3815245431Sdim  if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
3816218893Sdim    // When predicated, CPSR is an additional source operand for CPSR updating
3817218893Sdim    // instructions, this apparently increases their latencies.
3818218893Sdim    *PredCost = 1;
3819245431Sdim  }
3820245431Sdim  // Be sure to call getStageLatency for an empty itinerary in case it has a
3821245431Sdim  // valid MinLatency property.
3822245431Sdim  if (!ItinData)
3823245431Sdim    return MI->mayLoad() ? 3 : 1;
3824245431Sdim
3825245431Sdim  unsigned Class = MCID.getSchedClass();
3826245431Sdim
3827245431Sdim  // For instructions with variable uops, use uops as latency.
3828245431Sdim  if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
3829245431Sdim    return getNumMicroOps(ItinData, MI);
3830245431Sdim
3831245431Sdim  // For the common case, fall back on the itinerary's latency.
3832245431Sdim  unsigned Latency = ItinData->getStageLatency(Class);
3833245431Sdim
3834245431Sdim  // Adjust for dynamic def-side opcode variants not captured by the itinerary.
3835245431Sdim  unsigned DefAlign = MI->hasOneMemOperand()
3836245431Sdim    ? (*MI->memoperands_begin())->getAlignment() : 0;
3837245431Sdim  int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
3838245431Sdim  if (Adj >= 0 || (int)Latency > -Adj) {
3839245431Sdim    return Latency + Adj;
3840245431Sdim  }
3841245431Sdim  return Latency;
3842218893Sdim}
3843218893Sdim
3844218893Sdimint ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
3845218893Sdim                                      SDNode *Node) const {
3846218893Sdim  if (!Node->isMachineOpcode())
3847218893Sdim    return 1;
3848218893Sdim
3849218893Sdim  if (!ItinData || ItinData->isEmpty())
3850218893Sdim    return 1;
3851218893Sdim
3852218893Sdim  unsigned Opcode = Node->getMachineOpcode();
3853218893Sdim  switch (Opcode) {
3854218893Sdim  default:
3855218893Sdim    return ItinData->getStageLatency(get(Opcode).getSchedClass());
3856218893Sdim  case ARM::VLDMQIA:
3857218893Sdim  case ARM::VSTMQIA:
3858218893Sdim    return 2;
3859218893Sdim  }
3860218893Sdim}
3861218893Sdim
3862218893Sdimbool ARMBaseInstrInfo::
3863218893SdimhasHighOperandLatency(const InstrItineraryData *ItinData,
3864218893Sdim                      const MachineRegisterInfo *MRI,
3865218893Sdim                      const MachineInstr *DefMI, unsigned DefIdx,
3866218893Sdim                      const MachineInstr *UseMI, unsigned UseIdx) const {
3867218893Sdim  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3868218893Sdim  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
3869218893Sdim  if (Subtarget.isCortexA8() &&
3870218893Sdim      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
3871218893Sdim    // CortexA8 VFP instructions are not pipelined.
3872218893Sdim    return true;
3873218893Sdim
3874218893Sdim  // Hoist VFP / NEON instructions with 4 or higher latency.
3875263509Sdim  int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
3876245431Sdim  if (Latency < 0)
3877245431Sdim    Latency = getInstrLatency(ItinData, DefMI);
3878218893Sdim  if (Latency <= 3)
3879218893Sdim    return false;
3880218893Sdim  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
3881218893Sdim         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
3882218893Sdim}
3883218893Sdim
3884218893Sdimbool ARMBaseInstrInfo::
3885218893SdimhasLowDefLatency(const InstrItineraryData *ItinData,
3886218893Sdim                 const MachineInstr *DefMI, unsigned DefIdx) const {
3887218893Sdim  if (!ItinData || ItinData->isEmpty())
3888218893Sdim    return false;
3889218893Sdim
3890218893Sdim  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
3891218893Sdim  if (DDomain == ARMII::DomainGeneral) {
3892218893Sdim    unsigned DefClass = DefMI->getDesc().getSchedClass();
3893218893Sdim    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
3894218893Sdim    return (DefCycle != -1 && DefCycle <= 2);
3895218893Sdim  }
3896218893Sdim  return false;
3897218893Sdim}
3898218893Sdim
3899226890Sdimbool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
3900226890Sdim                                         StringRef &ErrInfo) const {
3901226890Sdim  if (convertAddSubFlagsOpcode(MI->getOpcode())) {
3902226890Sdim    ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
3903226890Sdim    return false;
3904226890Sdim  }
3905226890Sdim  return true;
3906226890Sdim}
3907226890Sdim
3908218893Sdimbool
3909218893SdimARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
3910218893Sdim                                     unsigned &AddSubOpc,
3911218893Sdim                                     bool &NegAcc, bool &HasLane) const {
3912218893Sdim  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
3913218893Sdim  if (I == MLxEntryMap.end())
3914218893Sdim    return false;
3915218893Sdim
3916218893Sdim  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
3917218893Sdim  MulOpc = Entry.MulOpc;
3918218893Sdim  AddSubOpc = Entry.AddSubOpc;
3919218893Sdim  NegAcc = Entry.NegAcc;
3920218893Sdim  HasLane = Entry.HasLane;
3921218893Sdim  return true;
3922218893Sdim}
3923226890Sdim
3924226890Sdim//===----------------------------------------------------------------------===//
3925226890Sdim// Execution domains.
3926226890Sdim//===----------------------------------------------------------------------===//
3927226890Sdim//
3928226890Sdim// Some instructions go down the NEON pipeline, some go down the VFP pipeline,
3929226890Sdim// and some can go down both.  The vmov instructions go down the VFP pipeline,
3930226890Sdim// but they can be changed to vorr equivalents that are executed by the NEON
3931226890Sdim// pipeline.
3932226890Sdim//
3933226890Sdim// We use the following execution domain numbering:
3934226890Sdim//
3935226890Sdimenum ARMExeDomain {
3936226890Sdim  ExeGeneric = 0,
3937226890Sdim  ExeVFP = 1,
3938226890Sdim  ExeNEON = 2
3939226890Sdim};
3940226890Sdim//
3941226890Sdim// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
3942226890Sdim//
3943226890Sdimstd::pair<uint16_t, uint16_t>
3944226890SdimARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
3945245431Sdim  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
3946245431Sdim  // if they are not predicated.
3947226890Sdim  if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
3948226890Sdim    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3949226890Sdim
3950252723Sdim  // CortexA9 is particularly picky about mixing the two and wants these
3951245431Sdim  // converted.
3952252723Sdim  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
3953245431Sdim      (MI->getOpcode() == ARM::VMOVRS ||
3954245431Sdim       MI->getOpcode() == ARM::VMOVSR ||
3955245431Sdim       MI->getOpcode() == ARM::VMOVS))
3956245431Sdim    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
3957245431Sdim
3958226890Sdim  // No other instructions can be swizzled, so just determine their domain.
3959226890Sdim  unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
3960226890Sdim
3961226890Sdim  if (Domain & ARMII::DomainNEON)
3962226890Sdim    return std::make_pair(ExeNEON, 0);
3963226890Sdim
3964226890Sdim  // Certain instructions can go either way on Cortex-A8.
3965226890Sdim  // Treat them as NEON instructions.
3966226890Sdim  if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
3967226890Sdim    return std::make_pair(ExeNEON, 0);
3968226890Sdim
3969226890Sdim  if (Domain & ARMII::DomainVFP)
3970226890Sdim    return std::make_pair(ExeVFP, 0);
3971226890Sdim
3972226890Sdim  return std::make_pair(ExeGeneric, 0);
3973226890Sdim}
3974226890Sdim
3975245431Sdimstatic unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
3976245431Sdim                                            unsigned SReg, unsigned &Lane) {
3977245431Sdim  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
3978245431Sdim  Lane = 0;
3979245431Sdim
3980245431Sdim  if (DReg != ARM::NoRegister)
3981245431Sdim   return DReg;
3982245431Sdim
3983245431Sdim  Lane = 1;
3984245431Sdim  DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
3985245431Sdim
3986245431Sdim  assert(DReg && "S-register with no D super-register?");
3987245431Sdim  return DReg;
3988245431Sdim}
3989245431Sdim
3990245431Sdim/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
3991245431Sdim/// set ImplicitSReg to a register number that must be marked as implicit-use or
3992245431Sdim/// zero if no register needs to be defined as implicit-use.
3993245431Sdim///
3994245431Sdim/// If the function cannot determine if an SPR should be marked implicit use or
3995245431Sdim/// not, it returns false.
3996245431Sdim///
3997245431Sdim/// This function handles cases where an instruction is being modified from taking
3998245431Sdim/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
3999245431Sdim/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
4000245431Sdim/// lane of the DPR).
4001245431Sdim///
4002245431Sdim/// If the other SPR is defined, an implicit-use of it should be added. Else,
4003245431Sdim/// (including the case where the DPR itself is defined), it should not.
4004245431Sdim///
4005245431Sdimstatic bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
4006245431Sdim                                       MachineInstr *MI,
4007245431Sdim                                       unsigned DReg, unsigned Lane,
4008245431Sdim                                       unsigned &ImplicitSReg) {
4009245431Sdim  // If the DPR is defined or used already, the other SPR lane will be chained
4010245431Sdim  // correctly, so there is nothing to be done.
4011245431Sdim  if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
4012245431Sdim    ImplicitSReg = 0;
4013245431Sdim    return true;
4014245431Sdim  }
4015245431Sdim
4016245431Sdim  // Otherwise we need to go searching to see if the SPR is set explicitly.
4017245431Sdim  ImplicitSReg = TRI->getSubReg(DReg,
4018245431Sdim                                (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
4019245431Sdim  MachineBasicBlock::LivenessQueryResult LQR =
4020245431Sdim    MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
4021245431Sdim
4022245431Sdim  if (LQR == MachineBasicBlock::LQR_Live)
4023245431Sdim    return true;
4024245431Sdim  else if (LQR == MachineBasicBlock::LQR_Unknown)
4025245431Sdim    return false;
4026245431Sdim
4027245431Sdim  // If the register is known not to be live, there is no need to add an
4028245431Sdim  // implicit-use.
4029245431Sdim  ImplicitSReg = 0;
4030245431Sdim  return true;
4031245431Sdim}
4032245431Sdim
4033226890Sdimvoid
4034226890SdimARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
4035245431Sdim  unsigned DstReg, SrcReg, DReg;
4036245431Sdim  unsigned Lane;
4037252723Sdim  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
4038245431Sdim  const TargetRegisterInfo *TRI = &getRegisterInfo();
4039245431Sdim  switch (MI->getOpcode()) {
4040245431Sdim    default:
4041245431Sdim      llvm_unreachable("cannot handle opcode!");
4042245431Sdim      break;
4043245431Sdim    case ARM::VMOVD:
4044245431Sdim      if (Domain != ExeNEON)
4045245431Sdim        break;
4046226890Sdim
4047245431Sdim      // Zap the predicate operands.
4048245431Sdim      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
4049226890Sdim
4050245431Sdim      // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
4051245431Sdim      DstReg = MI->getOperand(0).getReg();
4052245431Sdim      SrcReg = MI->getOperand(1).getReg();
4053226890Sdim
4054245431Sdim      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4055245431Sdim        MI->RemoveOperand(i-1);
4056245431Sdim
4057245431Sdim      // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
4058245431Sdim      MI->setDesc(get(ARM::VORRd));
4059245431Sdim      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
4060245431Sdim                        .addReg(SrcReg)
4061245431Sdim                        .addReg(SrcReg));
4062245431Sdim      break;
4063245431Sdim    case ARM::VMOVRS:
4064245431Sdim      if (Domain != ExeNEON)
4065245431Sdim        break;
4066245431Sdim      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
4067245431Sdim
4068245431Sdim      // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
4069245431Sdim      DstReg = MI->getOperand(0).getReg();
4070245431Sdim      SrcReg = MI->getOperand(1).getReg();
4071245431Sdim
4072245431Sdim      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4073245431Sdim        MI->RemoveOperand(i-1);
4074245431Sdim
4075245431Sdim      DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
4076245431Sdim
4077245431Sdim      // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
4078245431Sdim      // Note that DSrc has been widened and the other lane may be undef, which
4079245431Sdim      // contaminates the entire register.
4080245431Sdim      MI->setDesc(get(ARM::VGETLNi32));
4081245431Sdim      AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
4082245431Sdim                        .addReg(DReg, RegState::Undef)
4083245431Sdim                        .addImm(Lane));
4084245431Sdim
4085245431Sdim      // The old source should be an implicit use, otherwise we might think it
4086245431Sdim      // was dead before here.
4087245431Sdim      MIB.addReg(SrcReg, RegState::Implicit);
4088245431Sdim      break;
4089245431Sdim    case ARM::VMOVSR: {
4090245431Sdim      if (Domain != ExeNEON)
4091245431Sdim        break;
4092245431Sdim      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
4093245431Sdim
4094245431Sdim      // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
4095245431Sdim      DstReg = MI->getOperand(0).getReg();
4096245431Sdim      SrcReg = MI->getOperand(1).getReg();
4097245431Sdim
4098245431Sdim      DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
4099245431Sdim
4100245431Sdim      unsigned ImplicitSReg;
4101245431Sdim      if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
4102245431Sdim        break;
4103245431Sdim
4104245431Sdim      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4105245431Sdim        MI->RemoveOperand(i-1);
4106245431Sdim
4107245431Sdim      // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
4108245431Sdim      // Again DDst may be undefined at the beginning of this instruction.
4109245431Sdim      MI->setDesc(get(ARM::VSETLNi32));
4110245431Sdim      MIB.addReg(DReg, RegState::Define)
4111245431Sdim         .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
4112245431Sdim         .addReg(SrcReg)
4113245431Sdim         .addImm(Lane);
4114245431Sdim      AddDefaultPred(MIB);
4115245431Sdim
4116245431Sdim      // The narrower destination must be marked as set to keep previous chains
4117245431Sdim      // in place.
4118245431Sdim      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4119245431Sdim      if (ImplicitSReg != 0)
4120245431Sdim        MIB.addReg(ImplicitSReg, RegState::Implicit);
4121245431Sdim      break;
4122245431Sdim    }
4123245431Sdim    case ARM::VMOVS: {
4124245431Sdim      if (Domain != ExeNEON)
4125245431Sdim        break;
4126245431Sdim
4127245431Sdim      // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
4128245431Sdim      DstReg = MI->getOperand(0).getReg();
4129245431Sdim      SrcReg = MI->getOperand(1).getReg();
4130245431Sdim
4131245431Sdim      unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
4132245431Sdim      DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
4133245431Sdim      DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
4134245431Sdim
4135245431Sdim      unsigned ImplicitSReg;
4136245431Sdim      if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
4137245431Sdim        break;
4138245431Sdim
4139245431Sdim      for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
4140245431Sdim        MI->RemoveOperand(i-1);
4141245431Sdim
4142245431Sdim      if (DSrc == DDst) {
4143245431Sdim        // Destination can be:
4144245431Sdim        //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
4145245431Sdim        MI->setDesc(get(ARM::VDUPLN32d));
4146245431Sdim        MIB.addReg(DDst, RegState::Define)
4147245431Sdim           .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
4148245431Sdim           .addImm(SrcLane);
4149245431Sdim        AddDefaultPred(MIB);
4150245431Sdim
4151245431Sdim        // Neither the source or the destination are naturally represented any
4152245431Sdim        // more, so add them in manually.
4153245431Sdim        MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
4154245431Sdim        MIB.addReg(SrcReg, RegState::Implicit);
4155245431Sdim        if (ImplicitSReg != 0)
4156245431Sdim          MIB.addReg(ImplicitSReg, RegState::Implicit);
4157245431Sdim        break;
4158245431Sdim      }
4159245431Sdim
4160245431Sdim      // In general there's no single instruction that can perform an S <-> S
4161245431Sdim      // move in NEON space, but a pair of VEXT instructions *can* do the
4162245431Sdim      // job. It turns out that the VEXTs needed will only use DSrc once, with
4163245431Sdim      // the position based purely on the combination of lane-0 and lane-1
4164245431Sdim      // involved. For example
4165245431Sdim      //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
4166245431Sdim      //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
4167245431Sdim      //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
4168245431Sdim      //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
4169245431Sdim      //
4170245431Sdim      // Pattern of the MachineInstrs is:
4171245431Sdim      //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
4172245431Sdim      MachineInstrBuilder NewMIB;
4173245431Sdim      NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4174245431Sdim                       get(ARM::VEXTd32), DDst);
4175245431Sdim
4176245431Sdim      // On the first instruction, both DSrc and DDst may be <undef> if present.
4177245431Sdim      // Specifically when the original instruction didn't have them as an
4178245431Sdim      // <imp-use>.
4179245431Sdim      unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
4180245431Sdim      bool CurUndef = !MI->readsRegister(CurReg, TRI);
4181245431Sdim      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4182245431Sdim
4183245431Sdim      CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
4184245431Sdim      CurUndef = !MI->readsRegister(CurReg, TRI);
4185245431Sdim      NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
4186245431Sdim
4187245431Sdim      NewMIB.addImm(1);
4188245431Sdim      AddDefaultPred(NewMIB);
4189245431Sdim
4190245431Sdim      if (SrcLane == DstLane)
4191245431Sdim        NewMIB.addReg(SrcReg, RegState::Implicit);
4192245431Sdim
4193245431Sdim      MI->setDesc(get(ARM::VEXTd32));
4194245431Sdim      MIB.addReg(DDst, RegState::Define);
4195245431Sdim
4196245431Sdim      // On the second instruction, DDst has definitely been defined above, so
4197245431Sdim      // it is not <undef>. DSrc, if present, can be <undef> as above.
4198245431Sdim      CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
4199245431Sdim      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4200245431Sdim      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4201245431Sdim
4202245431Sdim      CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
4203245431Sdim      CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
4204245431Sdim      MIB.addReg(CurReg, getUndefRegState(CurUndef));
4205245431Sdim
4206245431Sdim      MIB.addImm(1);
4207245431Sdim      AddDefaultPred(MIB);
4208245431Sdim
4209245431Sdim      if (SrcLane != DstLane)
4210245431Sdim        MIB.addReg(SrcReg, RegState::Implicit);
4211245431Sdim
4212245431Sdim      // As before, the original destination is no longer represented, add it
4213245431Sdim      // implicitly.
4214245431Sdim      MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
4215245431Sdim      if (ImplicitSReg != 0)
4216245431Sdim        MIB.addReg(ImplicitSReg, RegState::Implicit);
4217245431Sdim      break;
4218245431Sdim    }
4219245431Sdim  }
4220245431Sdim
4221226890Sdim}
4222235633Sdim
4223245431Sdim//===----------------------------------------------------------------------===//
4224245431Sdim// Partial register updates
4225245431Sdim//===----------------------------------------------------------------------===//
4226245431Sdim//
4227245431Sdim// Swift renames NEON registers with 64-bit granularity.  That means any
4228245431Sdim// instruction writing an S-reg implicitly reads the containing D-reg.  The
4229245431Sdim// problem is mostly avoided by translating f32 operations to v2f32 operations
4230245431Sdim// on D-registers, but f32 loads are still a problem.
4231245431Sdim//
4232245431Sdim// These instructions can load an f32 into a NEON register:
4233245431Sdim//
4234245431Sdim// VLDRS - Only writes S, partial D update.
4235245431Sdim// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
4236245431Sdim// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
4237245431Sdim//
4238245431Sdim// FCONSTD can be used as a dependency-breaking instruction.
4239245431Sdimunsigned ARMBaseInstrInfo::
4240245431SdimgetPartialRegUpdateClearance(const MachineInstr *MI,
4241245431Sdim                             unsigned OpNum,
4242245431Sdim                             const TargetRegisterInfo *TRI) const {
4243252723Sdim  if (!SwiftPartialUpdateClearance ||
4244252723Sdim      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
4245245431Sdim    return 0;
4246245431Sdim
4247245431Sdim  assert(TRI && "Need TRI instance");
4248245431Sdim
4249245431Sdim  const MachineOperand &MO = MI->getOperand(OpNum);
4250245431Sdim  if (MO.readsReg())
4251245431Sdim    return 0;
4252245431Sdim  unsigned Reg = MO.getReg();
4253245431Sdim  int UseOp = -1;
4254245431Sdim
4255245431Sdim  switch(MI->getOpcode()) {
4256245431Sdim    // Normal instructions writing only an S-register.
4257245431Sdim  case ARM::VLDRS:
4258245431Sdim  case ARM::FCONSTS:
4259245431Sdim  case ARM::VMOVSR:
4260245431Sdim  case ARM::VMOVv8i8:
4261245431Sdim  case ARM::VMOVv4i16:
4262245431Sdim  case ARM::VMOVv2i32:
4263245431Sdim  case ARM::VMOVv2f32:
4264245431Sdim  case ARM::VMOVv1i64:
4265245431Sdim    UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
4266245431Sdim    break;
4267245431Sdim
4268245431Sdim    // Explicitly reads the dependency.
4269245431Sdim  case ARM::VLD1LNd32:
4270252723Sdim    UseOp = 3;
4271245431Sdim    break;
4272245431Sdim  default:
4273245431Sdim    return 0;
4274245431Sdim  }
4275245431Sdim
4276245431Sdim  // If this instruction actually reads a value from Reg, there is no unwanted
4277245431Sdim  // dependency.
4278245431Sdim  if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
4279245431Sdim    return 0;
4280245431Sdim
4281245431Sdim  // We must be able to clobber the whole D-reg.
4282245431Sdim  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4283245431Sdim    // Virtual register must be a foo:ssub_0<def,undef> operand.
4284245431Sdim    if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
4285245431Sdim      return 0;
4286245431Sdim  } else if (ARM::SPRRegClass.contains(Reg)) {
4287245431Sdim    // Physical register: MI must define the full D-reg.
4288245431Sdim    unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
4289245431Sdim                                             &ARM::DPRRegClass);
4290245431Sdim    if (!DReg || !MI->definesRegister(DReg, TRI))
4291245431Sdim      return 0;
4292245431Sdim  }
4293245431Sdim
4294245431Sdim  // MI has an unwanted D-register dependency.
4295245431Sdim  // Avoid defs in the previous N instructrions.
4296245431Sdim  return SwiftPartialUpdateClearance;
4297245431Sdim}
4298245431Sdim
4299245431Sdim// Break a partial register dependency after getPartialRegUpdateClearance
4300245431Sdim// returned non-zero.
4301245431Sdimvoid ARMBaseInstrInfo::
4302245431SdimbreakPartialRegDependency(MachineBasicBlock::iterator MI,
4303245431Sdim                          unsigned OpNum,
4304245431Sdim                          const TargetRegisterInfo *TRI) const {
4305245431Sdim  assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
4306245431Sdim  assert(TRI && "Need TRI instance");
4307245431Sdim
4308245431Sdim  const MachineOperand &MO = MI->getOperand(OpNum);
4309245431Sdim  unsigned Reg = MO.getReg();
4310245431Sdim  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
4311245431Sdim         "Can't break virtual register dependencies.");
4312245431Sdim  unsigned DReg = Reg;
4313245431Sdim
4314245431Sdim  // If MI defines an S-reg, find the corresponding D super-register.
4315245431Sdim  if (ARM::SPRRegClass.contains(Reg)) {
4316245431Sdim    DReg = ARM::D0 + (Reg - ARM::S0) / 2;
4317245431Sdim    assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
4318245431Sdim  }
4319245431Sdim
4320245431Sdim  assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
4321245431Sdim  assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
4322245431Sdim
4323245431Sdim  // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
4324245431Sdim  // the full D-register by loading the same value to both lanes.  The
4325245431Sdim  // instruction is micro-coded with 2 uops, so don't do this until we can
4326263509Sdim  // properly schedule micro-coded instructions.  The dispatcher stalls cause
4327245431Sdim  // too big regressions.
4328245431Sdim
4329245431Sdim  // Insert the dependency-breaking FCONSTD before MI.
4330245431Sdim  // 96 is the encoding of 0.5, but the actual value doesn't matter here.
4331245431Sdim  AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
4332245431Sdim                         get(ARM::FCONSTD), DReg).addImm(96));
4333245431Sdim  MI->addRegisterKilled(DReg, TRI, true);
4334245431Sdim}
4335245431Sdim
4336235633Sdimbool ARMBaseInstrInfo::hasNOP() const {
4337235633Sdim  return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
4338235633Sdim}
4339252723Sdim
4340252723Sdimbool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
4341263509Sdim  if (MI->getNumOperands() < 4)
4342263509Sdim    return true;
4343252723Sdim  unsigned ShOpVal = MI->getOperand(3).getImm();
4344252723Sdim  unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
4345252723Sdim  // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
4346252723Sdim  if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
4347252723Sdim      ((ShImm == 1 || ShImm == 2) &&
4348252723Sdim       ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
4349252723Sdim    return true;
4350252723Sdim
4351252723Sdim  return false;
4352252723Sdim}
4353