1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass that performs load / store related peephole
11// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "arm-ldst-opt"
16#include "ARM.h"
17#include "ARMBaseInstrInfo.h"
18#include "ARMBaseRegisterInfo.h"
19#include "ARMMachineFunctionInfo.h"
20#include "MCTargetDesc/ARMAddressingModes.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/CodeGen/MachineInstr.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/RegisterScavenging.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/Target/TargetData.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetMachine.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/raw_ostream.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/STLExtras.h"
39#include "llvm/ADT/SmallPtrSet.h"
40#include "llvm/ADT/SmallSet.h"
41#include "llvm/ADT/SmallVector.h"
42#include "llvm/ADT/Statistic.h"
43using namespace llvm;
44
45STATISTIC(NumLDMGened , "Number of ldm instructions generated");
46STATISTIC(NumSTMGened , "Number of stm instructions generated");
47STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
48STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
49STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
50STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
51STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
52STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
53STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
54STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
55STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
56
57/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
58/// load / store instructions to form ldm / stm instructions.
59
60namespace {
61  struct ARMLoadStoreOpt : public MachineFunctionPass {
62    static char ID;
63    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
64
65    const TargetInstrInfo *TII;
66    const TargetRegisterInfo *TRI;
67    const ARMSubtarget *STI;
68    ARMFunctionInfo *AFI;
69    RegScavenger *RS;
70    bool isThumb2;
71
72    virtual bool runOnMachineFunction(MachineFunction &Fn);
73
74    virtual const char *getPassName() const {
75      return "ARM load / store optimization pass";
76    }
77
78  private:
79    struct MemOpQueueEntry {
80      int Offset;
81      unsigned Reg;
82      bool isKill;
83      unsigned Position;
84      MachineBasicBlock::iterator MBBI;
85      bool Merged;
86      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
87                      MachineBasicBlock::iterator i)
88        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
89    };
90    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
91    typedef MemOpQueue::iterator MemOpQueueIter;
92
93    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
94                  int Offset, unsigned Base, bool BaseKill, int Opcode,
95                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
96                  DebugLoc dl,
97                  ArrayRef<std::pair<unsigned, bool> > Regs,
98                  ArrayRef<unsigned> ImpDefs);
99    void MergeOpsUpdate(MachineBasicBlock &MBB,
100                        MemOpQueue &MemOps,
101                        unsigned memOpsBegin,
102                        unsigned memOpsEnd,
103                        unsigned insertAfter,
104                        int Offset,
105                        unsigned Base,
106                        bool BaseKill,
107                        int Opcode,
108                        ARMCC::CondCodes Pred,
109                        unsigned PredReg,
110                        unsigned Scratch,
111                        DebugLoc dl,
112                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
113    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
114                      int Opcode, unsigned Size,
115                      ARMCC::CondCodes Pred, unsigned PredReg,
116                      unsigned Scratch, MemOpQueue &MemOps,
117                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
118
119    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
120    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
121                             MachineBasicBlock::iterator &MBBI);
122    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
123                                  MachineBasicBlock::iterator MBBI,
124                                  const TargetInstrInfo *TII,
125                                  bool &Advance,
126                                  MachineBasicBlock::iterator &I);
127    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
128                                   MachineBasicBlock::iterator MBBI,
129                                   bool &Advance,
130                                   MachineBasicBlock::iterator &I);
131    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
132    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
133  };
134  char ARMLoadStoreOpt::ID = 0;
135}
136
137static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
138  switch (Opcode) {
139  default: llvm_unreachable("Unhandled opcode!");
140  case ARM::LDRi12:
141    ++NumLDMGened;
142    switch (Mode) {
143    default: llvm_unreachable("Unhandled submode!");
144    case ARM_AM::ia: return ARM::LDMIA;
145    case ARM_AM::da: return ARM::LDMDA;
146    case ARM_AM::db: return ARM::LDMDB;
147    case ARM_AM::ib: return ARM::LDMIB;
148    }
149  case ARM::STRi12:
150    ++NumSTMGened;
151    switch (Mode) {
152    default: llvm_unreachable("Unhandled submode!");
153    case ARM_AM::ia: return ARM::STMIA;
154    case ARM_AM::da: return ARM::STMDA;
155    case ARM_AM::db: return ARM::STMDB;
156    case ARM_AM::ib: return ARM::STMIB;
157    }
158  case ARM::t2LDRi8:
159  case ARM::t2LDRi12:
160    ++NumLDMGened;
161    switch (Mode) {
162    default: llvm_unreachable("Unhandled submode!");
163    case ARM_AM::ia: return ARM::t2LDMIA;
164    case ARM_AM::db: return ARM::t2LDMDB;
165    }
166  case ARM::t2STRi8:
167  case ARM::t2STRi12:
168    ++NumSTMGened;
169    switch (Mode) {
170    default: llvm_unreachable("Unhandled submode!");
171    case ARM_AM::ia: return ARM::t2STMIA;
172    case ARM_AM::db: return ARM::t2STMDB;
173    }
174  case ARM::VLDRS:
175    ++NumVLDMGened;
176    switch (Mode) {
177    default: llvm_unreachable("Unhandled submode!");
178    case ARM_AM::ia: return ARM::VLDMSIA;
179    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
180    }
181  case ARM::VSTRS:
182    ++NumVSTMGened;
183    switch (Mode) {
184    default: llvm_unreachable("Unhandled submode!");
185    case ARM_AM::ia: return ARM::VSTMSIA;
186    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
187    }
188  case ARM::VLDRD:
189    ++NumVLDMGened;
190    switch (Mode) {
191    default: llvm_unreachable("Unhandled submode!");
192    case ARM_AM::ia: return ARM::VLDMDIA;
193    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
194    }
195  case ARM::VSTRD:
196    ++NumVSTMGened;
197    switch (Mode) {
198    default: llvm_unreachable("Unhandled submode!");
199    case ARM_AM::ia: return ARM::VSTMDIA;
200    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
201    }
202  }
203}
204
205namespace llvm {
206  namespace ARM_AM {
207
208AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
209  switch (Opcode) {
210  default: llvm_unreachable("Unhandled opcode!");
211  case ARM::LDMIA_RET:
212  case ARM::LDMIA:
213  case ARM::LDMIA_UPD:
214  case ARM::STMIA:
215  case ARM::STMIA_UPD:
216  case ARM::t2LDMIA_RET:
217  case ARM::t2LDMIA:
218  case ARM::t2LDMIA_UPD:
219  case ARM::t2STMIA:
220  case ARM::t2STMIA_UPD:
221  case ARM::VLDMSIA:
222  case ARM::VLDMSIA_UPD:
223  case ARM::VSTMSIA:
224  case ARM::VSTMSIA_UPD:
225  case ARM::VLDMDIA:
226  case ARM::VLDMDIA_UPD:
227  case ARM::VSTMDIA:
228  case ARM::VSTMDIA_UPD:
229    return ARM_AM::ia;
230
231  case ARM::LDMDA:
232  case ARM::LDMDA_UPD:
233  case ARM::STMDA:
234  case ARM::STMDA_UPD:
235    return ARM_AM::da;
236
237  case ARM::LDMDB:
238  case ARM::LDMDB_UPD:
239  case ARM::STMDB:
240  case ARM::STMDB_UPD:
241  case ARM::t2LDMDB:
242  case ARM::t2LDMDB_UPD:
243  case ARM::t2STMDB:
244  case ARM::t2STMDB_UPD:
245  case ARM::VLDMSDB_UPD:
246  case ARM::VSTMSDB_UPD:
247  case ARM::VLDMDDB_UPD:
248  case ARM::VSTMDDB_UPD:
249    return ARM_AM::db;
250
251  case ARM::LDMIB:
252  case ARM::LDMIB_UPD:
253  case ARM::STMIB:
254  case ARM::STMIB_UPD:
255    return ARM_AM::ib;
256  }
257}
258
259  } // end namespace ARM_AM
260} // end namespace llvm
261
262static bool isT2i32Load(unsigned Opc) {
263  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
264}
265
266static bool isi32Load(unsigned Opc) {
267  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
268}
269
270static bool isT2i32Store(unsigned Opc) {
271  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
272}
273
274static bool isi32Store(unsigned Opc) {
275  return Opc == ARM::STRi12 || isT2i32Store(Opc);
276}
277
278/// MergeOps - Create and insert a LDM or STM with Base as base register and
279/// registers in Regs as the register operands that would be loaded / stored.
280/// It returns true if the transformation is done.
281bool
282ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
283                          MachineBasicBlock::iterator MBBI,
284                          int Offset, unsigned Base, bool BaseKill,
285                          int Opcode, ARMCC::CondCodes Pred,
286                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
287                          ArrayRef<std::pair<unsigned, bool> > Regs,
288                          ArrayRef<unsigned> ImpDefs) {
289  // Only a single register to load / store. Don't bother.
290  unsigned NumRegs = Regs.size();
291  if (NumRegs <= 1)
292    return false;
293
294  ARM_AM::AMSubMode Mode = ARM_AM::ia;
295  // VFP and Thumb2 do not support IB or DA modes.
296  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
297  bool haveIBAndDA = isNotVFP && !isThumb2;
298  if (Offset == 4 && haveIBAndDA)
299    Mode = ARM_AM::ib;
300  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
301    Mode = ARM_AM::da;
302  else if (Offset == -4 * (int)NumRegs && isNotVFP)
303    // VLDM/VSTM do not support DB mode without also updating the base reg.
304    Mode = ARM_AM::db;
305  else if (Offset != 0) {
306    // Check if this is a supported opcode before we insert instructions to
307    // calculate a new base register.
308    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
309
310    // If starting offset isn't zero, insert a MI to materialize a new base.
311    // But only do so if it is cost effective, i.e. merging more than two
312    // loads / stores.
313    if (NumRegs <= 2)
314      return false;
315
316    unsigned NewBase;
317    if (isi32Load(Opcode))
318      // If it is a load, then just use one of the destination register to
319      // use as the new base.
320      NewBase = Regs[NumRegs-1].first;
321    else {
322      // Use the scratch register to use as a new base.
323      NewBase = Scratch;
324      if (NewBase == 0)
325        return false;
326    }
327    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
328    if (Offset < 0) {
329      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
330      Offset = - Offset;
331    }
332    int ImmedOffset = isThumb2
333      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
334    if (ImmedOffset == -1)
335      // FIXME: Try t2ADDri12 or t2SUBri12?
336      return false;  // Probably not worth it then.
337
338    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
339      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
340      .addImm(Pred).addReg(PredReg).addReg(0);
341    Base = NewBase;
342    BaseKill = true;  // New base is always killed right its use.
343  }
344
345  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
346                Opcode == ARM::VLDRD);
347  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
348  if (!Opcode) return false;
349  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
350    .addReg(Base, getKillRegState(BaseKill))
351    .addImm(Pred).addReg(PredReg);
352  for (unsigned i = 0; i != NumRegs; ++i)
353    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
354                     | getKillRegState(Regs[i].second));
355
356  // Add implicit defs for super-registers.
357  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
358    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
359
360  return true;
361}
362
363// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
364// success.
365void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
366                                     MemOpQueue &memOps,
367                                     unsigned memOpsBegin, unsigned memOpsEnd,
368                                     unsigned insertAfter, int Offset,
369                                     unsigned Base, bool BaseKill,
370                                     int Opcode,
371                                     ARMCC::CondCodes Pred, unsigned PredReg,
372                                     unsigned Scratch,
373                                     DebugLoc dl,
374                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
375  // First calculate which of the registers should be killed by the merged
376  // instruction.
377  const unsigned insertPos = memOps[insertAfter].Position;
378  SmallSet<unsigned, 4> KilledRegs;
379  DenseMap<unsigned, unsigned> Killer;
380  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
381    if (i == memOpsBegin) {
382      i = memOpsEnd;
383      if (i == e)
384        break;
385    }
386    if (memOps[i].Position < insertPos && memOps[i].isKill) {
387      unsigned Reg = memOps[i].Reg;
388      KilledRegs.insert(Reg);
389      Killer[Reg] = i;
390    }
391  }
392
393  SmallVector<std::pair<unsigned, bool>, 8> Regs;
394  SmallVector<unsigned, 8> ImpDefs;
395  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
396    unsigned Reg = memOps[i].Reg;
397    // If we are inserting the merged operation after an operation that
398    // uses the same register, make sure to transfer any kill flag.
399    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
400    Regs.push_back(std::make_pair(Reg, isKill));
401
402    // Collect any implicit defs of super-registers. They must be preserved.
403    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
404      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
405        continue;
406      unsigned DefReg = MO->getReg();
407      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
408        ImpDefs.push_back(DefReg);
409    }
410  }
411
412  // Try to do the merge.
413  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
414  ++Loc;
415  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
416                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
417    return;
418
419  // Merge succeeded, update records.
420  Merges.push_back(prior(Loc));
421  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
422    // Remove kill flags from any memops that come before insertPos.
423    if (Regs[i-memOpsBegin].second) {
424      unsigned Reg = Regs[i-memOpsBegin].first;
425      if (KilledRegs.count(Reg)) {
426        unsigned j = Killer[Reg];
427        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
428        assert(Idx >= 0 && "Cannot find killing operand");
429        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
430        memOps[j].isKill = false;
431      }
432      memOps[i].isKill = true;
433    }
434    MBB.erase(memOps[i].MBBI);
435    // Update this memop to refer to the merged instruction.
436    // We may need to move kill flags again.
437    memOps[i].Merged = true;
438    memOps[i].MBBI = Merges.back();
439    memOps[i].Position = insertPos;
440  }
441}
442
443/// MergeLDR_STR - Merge a number of load / store instructions into one or more
444/// load / store multiple instructions.
445void
446ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
447                          unsigned Base, int Opcode, unsigned Size,
448                          ARMCC::CondCodes Pred, unsigned PredReg,
449                          unsigned Scratch, MemOpQueue &MemOps,
450                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
451  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
452  int Offset = MemOps[SIndex].Offset;
453  int SOffset = Offset;
454  unsigned insertAfter = SIndex;
455  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
456  DebugLoc dl = Loc->getDebugLoc();
457  const MachineOperand &PMO = Loc->getOperand(0);
458  unsigned PReg = PMO.getReg();
459  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
460  unsigned Count = 1;
461  unsigned Limit = ~0U;
462
463  // vldm / vstm limit are 32 for S variants, 16 for D variants.
464
465  switch (Opcode) {
466  default: break;
467  case ARM::VSTRS:
468    Limit = 32;
469    break;
470  case ARM::VSTRD:
471    Limit = 16;
472    break;
473  case ARM::VLDRD:
474    Limit = 16;
475    break;
476  case ARM::VLDRS:
477    Limit = 32;
478    break;
479  }
480
481  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
482    int NewOffset = MemOps[i].Offset;
483    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
484    unsigned Reg = MO.getReg();
485    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
486    // Register numbers must be in ascending order. For VFP / NEON load and
487    // store multiples, the registers must also be consecutive and within the
488    // limit on the number of registers per instruction.
489    if (Reg != ARM::SP &&
490        NewOffset == Offset + (int)Size &&
491        ((isNotVFP && RegNum > PRegNum) ||
492         ((Count < Limit) && RegNum == PRegNum+1))) {
493      Offset += Size;
494      PRegNum = RegNum;
495      ++Count;
496    } else {
497      // Can't merge this in. Try merge the earlier ones first.
498      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
499                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
500      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
501                   MemOps, Merges);
502      return;
503    }
504
505    if (MemOps[i].Position > MemOps[insertAfter].Position)
506      insertAfter = i;
507  }
508
509  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
510  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
511                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
512  return;
513}
514
515static bool definesCPSR(MachineInstr *MI) {
516  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
517    const MachineOperand &MO = MI->getOperand(i);
518    if (!MO.isReg())
519      continue;
520    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
521      // If the instruction has live CPSR def, then it's not safe to fold it
522      // into load / store.
523      return true;
524  }
525
526  return false;
527}
528
529static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
530                                unsigned Bytes, unsigned Limit,
531                                ARMCC::CondCodes Pred, unsigned PredReg) {
532  unsigned MyPredReg = 0;
533  if (!MI)
534    return false;
535
536  bool CheckCPSRDef = false;
537  switch (MI->getOpcode()) {
538  default: return false;
539  case ARM::t2SUBri:
540  case ARM::SUBri:
541    CheckCPSRDef = true;
542  // fallthrough
543  case ARM::tSUBspi:
544    break;
545  }
546
547  // Make sure the offset fits in 8 bits.
548  if (Bytes == 0 || (Limit && Bytes >= Limit))
549    return false;
550
551  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
552  if (!(MI->getOperand(0).getReg() == Base &&
553        MI->getOperand(1).getReg() == Base &&
554        (MI->getOperand(2).getImm()*Scale) == Bytes &&
555        getInstrPredicate(MI, MyPredReg) == Pred &&
556        MyPredReg == PredReg))
557    return false;
558
559  return CheckCPSRDef ? !definesCPSR(MI) : true;
560}
561
562static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
563                                unsigned Bytes, unsigned Limit,
564                                ARMCC::CondCodes Pred, unsigned PredReg) {
565  unsigned MyPredReg = 0;
566  if (!MI)
567    return false;
568
569  bool CheckCPSRDef = false;
570  switch (MI->getOpcode()) {
571  default: return false;
572  case ARM::t2ADDri:
573  case ARM::ADDri:
574    CheckCPSRDef = true;
575  // fallthrough
576  case ARM::tADDspi:
577    break;
578  }
579
580  if (Bytes == 0 || (Limit && Bytes >= Limit))
581    // Make sure the offset fits in 8 bits.
582    return false;
583
584  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
585  if (!(MI->getOperand(0).getReg() == Base &&
586        MI->getOperand(1).getReg() == Base &&
587        (MI->getOperand(2).getImm()*Scale) == Bytes &&
588        getInstrPredicate(MI, MyPredReg) == Pred &&
589        MyPredReg == PredReg))
590    return false;
591
592  return CheckCPSRDef ? !definesCPSR(MI) : true;
593}
594
595static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
596  switch (MI->getOpcode()) {
597  default: return 0;
598  case ARM::LDRi12:
599  case ARM::STRi12:
600  case ARM::t2LDRi8:
601  case ARM::t2LDRi12:
602  case ARM::t2STRi8:
603  case ARM::t2STRi12:
604  case ARM::VLDRS:
605  case ARM::VSTRS:
606    return 4;
607  case ARM::VLDRD:
608  case ARM::VSTRD:
609    return 8;
610  case ARM::LDMIA:
611  case ARM::LDMDA:
612  case ARM::LDMDB:
613  case ARM::LDMIB:
614  case ARM::STMIA:
615  case ARM::STMDA:
616  case ARM::STMDB:
617  case ARM::STMIB:
618  case ARM::t2LDMIA:
619  case ARM::t2LDMDB:
620  case ARM::t2STMIA:
621  case ARM::t2STMDB:
622  case ARM::VLDMSIA:
623  case ARM::VSTMSIA:
624    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
625  case ARM::VLDMDIA:
626  case ARM::VSTMDIA:
627    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
628  }
629}
630
631static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
632                                            ARM_AM::AMSubMode Mode) {
633  switch (Opc) {
634  default: llvm_unreachable("Unhandled opcode!");
635  case ARM::LDMIA:
636  case ARM::LDMDA:
637  case ARM::LDMDB:
638  case ARM::LDMIB:
639    switch (Mode) {
640    default: llvm_unreachable("Unhandled submode!");
641    case ARM_AM::ia: return ARM::LDMIA_UPD;
642    case ARM_AM::ib: return ARM::LDMIB_UPD;
643    case ARM_AM::da: return ARM::LDMDA_UPD;
644    case ARM_AM::db: return ARM::LDMDB_UPD;
645    }
646  case ARM::STMIA:
647  case ARM::STMDA:
648  case ARM::STMDB:
649  case ARM::STMIB:
650    switch (Mode) {
651    default: llvm_unreachable("Unhandled submode!");
652    case ARM_AM::ia: return ARM::STMIA_UPD;
653    case ARM_AM::ib: return ARM::STMIB_UPD;
654    case ARM_AM::da: return ARM::STMDA_UPD;
655    case ARM_AM::db: return ARM::STMDB_UPD;
656    }
657  case ARM::t2LDMIA:
658  case ARM::t2LDMDB:
659    switch (Mode) {
660    default: llvm_unreachable("Unhandled submode!");
661    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
662    case ARM_AM::db: return ARM::t2LDMDB_UPD;
663    }
664  case ARM::t2STMIA:
665  case ARM::t2STMDB:
666    switch (Mode) {
667    default: llvm_unreachable("Unhandled submode!");
668    case ARM_AM::ia: return ARM::t2STMIA_UPD;
669    case ARM_AM::db: return ARM::t2STMDB_UPD;
670    }
671  case ARM::VLDMSIA:
672    switch (Mode) {
673    default: llvm_unreachable("Unhandled submode!");
674    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
675    case ARM_AM::db: return ARM::VLDMSDB_UPD;
676    }
677  case ARM::VLDMDIA:
678    switch (Mode) {
679    default: llvm_unreachable("Unhandled submode!");
680    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
681    case ARM_AM::db: return ARM::VLDMDDB_UPD;
682    }
683  case ARM::VSTMSIA:
684    switch (Mode) {
685    default: llvm_unreachable("Unhandled submode!");
686    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
687    case ARM_AM::db: return ARM::VSTMSDB_UPD;
688    }
689  case ARM::VSTMDIA:
690    switch (Mode) {
691    default: llvm_unreachable("Unhandled submode!");
692    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
693    case ARM_AM::db: return ARM::VSTMDDB_UPD;
694    }
695  }
696}
697
698/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
699/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
700///
701/// stmia rn, <ra, rb, rc>
702/// rn := rn + 4 * 3;
703/// =>
704/// stmia rn!, <ra, rb, rc>
705///
706/// rn := rn - 4 * 3;
707/// ldmia rn, <ra, rb, rc>
708/// =>
709/// ldmdb rn!, <ra, rb, rc>
710bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
711                                               MachineBasicBlock::iterator MBBI,
712                                               bool &Advance,
713                                               MachineBasicBlock::iterator &I) {
714  MachineInstr *MI = MBBI;
715  unsigned Base = MI->getOperand(0).getReg();
716  bool BaseKill = MI->getOperand(0).isKill();
717  unsigned Bytes = getLSMultipleTransferSize(MI);
718  unsigned PredReg = 0;
719  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
720  int Opcode = MI->getOpcode();
721  DebugLoc dl = MI->getDebugLoc();
722
723  // Can't use an updating ld/st if the base register is also a dest
724  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
725  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
726    if (MI->getOperand(i).getReg() == Base)
727      return false;
728
729  bool DoMerge = false;
730  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
731
732  // Try merging with the previous instruction.
733  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
734  if (MBBI != BeginMBBI) {
735    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
736    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
737      --PrevMBBI;
738    if (Mode == ARM_AM::ia &&
739        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
740      Mode = ARM_AM::db;
741      DoMerge = true;
742    } else if (Mode == ARM_AM::ib &&
743               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
744      Mode = ARM_AM::da;
745      DoMerge = true;
746    }
747    if (DoMerge)
748      MBB.erase(PrevMBBI);
749  }
750
751  // Try merging with the next instruction.
752  MachineBasicBlock::iterator EndMBBI = MBB.end();
753  if (!DoMerge && MBBI != EndMBBI) {
754    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
755    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
756      ++NextMBBI;
757    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
758        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
759      DoMerge = true;
760    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
761               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
762      DoMerge = true;
763    }
764    if (DoMerge) {
765      if (NextMBBI == I) {
766        Advance = true;
767        ++I;
768      }
769      MBB.erase(NextMBBI);
770    }
771  }
772
773  if (!DoMerge)
774    return false;
775
776  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
777  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
778    .addReg(Base, getDefRegState(true)) // WB base register
779    .addReg(Base, getKillRegState(BaseKill))
780    .addImm(Pred).addReg(PredReg);
781
782  // Transfer the rest of operands.
783  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
784    MIB.addOperand(MI->getOperand(OpNum));
785
786  // Transfer memoperands.
787  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
788
789  MBB.erase(MBBI);
790  return true;
791}
792
793static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
794                                             ARM_AM::AddrOpc Mode) {
795  switch (Opc) {
796  case ARM::LDRi12:
797    return ARM::LDR_PRE_IMM;
798  case ARM::STRi12:
799    return ARM::STR_PRE_IMM;
800  case ARM::VLDRS:
801    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
802  case ARM::VLDRD:
803    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
804  case ARM::VSTRS:
805    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
806  case ARM::VSTRD:
807    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
808  case ARM::t2LDRi8:
809  case ARM::t2LDRi12:
810    return ARM::t2LDR_PRE;
811  case ARM::t2STRi8:
812  case ARM::t2STRi12:
813    return ARM::t2STR_PRE;
814  default: llvm_unreachable("Unhandled opcode!");
815  }
816}
817
818static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
819                                              ARM_AM::AddrOpc Mode) {
820  switch (Opc) {
821  case ARM::LDRi12:
822    return ARM::LDR_POST_IMM;
823  case ARM::STRi12:
824    return ARM::STR_POST_IMM;
825  case ARM::VLDRS:
826    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
827  case ARM::VLDRD:
828    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
829  case ARM::VSTRS:
830    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
831  case ARM::VSTRD:
832    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
833  case ARM::t2LDRi8:
834  case ARM::t2LDRi12:
835    return ARM::t2LDR_POST;
836  case ARM::t2STRi8:
837  case ARM::t2STRi12:
838    return ARM::t2STR_POST;
839  default: llvm_unreachable("Unhandled opcode!");
840  }
841}
842
843/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
844/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
845bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
846                                               MachineBasicBlock::iterator MBBI,
847                                               const TargetInstrInfo *TII,
848                                               bool &Advance,
849                                               MachineBasicBlock::iterator &I) {
850  MachineInstr *MI = MBBI;
851  unsigned Base = MI->getOperand(1).getReg();
852  bool BaseKill = MI->getOperand(1).isKill();
853  unsigned Bytes = getLSMultipleTransferSize(MI);
854  int Opcode = MI->getOpcode();
855  DebugLoc dl = MI->getDebugLoc();
856  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
857                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
858  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
859  if (isi32Load(Opcode) || isi32Store(Opcode))
860    if (MI->getOperand(2).getImm() != 0)
861      return false;
862  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
863    return false;
864
865  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
866  // Can't do the merge if the destination register is the same as the would-be
867  // writeback register.
868  if (isLd && MI->getOperand(0).getReg() == Base)
869    return false;
870
871  unsigned PredReg = 0;
872  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
873  bool DoMerge = false;
874  ARM_AM::AddrOpc AddSub = ARM_AM::add;
875  unsigned NewOpc = 0;
876  // AM2 - 12 bits, thumb2 - 8 bits.
877  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
878
879  // Try merging with the previous instruction.
880  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
881  if (MBBI != BeginMBBI) {
882    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
883    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
884      --PrevMBBI;
885    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
886      DoMerge = true;
887      AddSub = ARM_AM::sub;
888    } else if (!isAM5 &&
889               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
890      DoMerge = true;
891    }
892    if (DoMerge) {
893      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
894      MBB.erase(PrevMBBI);
895    }
896  }
897
898  // Try merging with the next instruction.
899  MachineBasicBlock::iterator EndMBBI = MBB.end();
900  if (!DoMerge && MBBI != EndMBBI) {
901    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
902    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
903      ++NextMBBI;
904    if (!isAM5 &&
905        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
906      DoMerge = true;
907      AddSub = ARM_AM::sub;
908    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
909      DoMerge = true;
910    }
911    if (DoMerge) {
912      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
913      if (NextMBBI == I) {
914        Advance = true;
915        ++I;
916      }
917      MBB.erase(NextMBBI);
918    }
919  }
920
921  if (!DoMerge)
922    return false;
923
924  if (isAM5) {
925    // VLDM[SD}_UPD, VSTM[SD]_UPD
926    // (There are no base-updating versions of VLDR/VSTR instructions, but the
927    // updating load/store-multiple instructions can be used with only one
928    // register.)
929    MachineOperand &MO = MI->getOperand(0);
930    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
931      .addReg(Base, getDefRegState(true)) // WB base register
932      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
933      .addImm(Pred).addReg(PredReg)
934      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
935                            getKillRegState(MO.isKill())));
936  } else if (isLd) {
937    if (isAM2) {
938      // LDR_PRE, LDR_POST
939      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
940        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
941        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
942          .addReg(Base, RegState::Define)
943          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
944      } else {
945        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
946        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
947          .addReg(Base, RegState::Define)
948          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
949      }
950    } else {
951      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
952      // t2LDR_PRE, t2LDR_POST
953      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
954        .addReg(Base, RegState::Define)
955        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
956    }
957  } else {
958    MachineOperand &MO = MI->getOperand(0);
959    // FIXME: post-indexed stores use am2offset_imm, which still encodes
960    // the vestigal zero-reg offset register. When that's fixed, this clause
961    // can be removed entirely.
962    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
963      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
964      // STR_PRE, STR_POST
965      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
966        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
967        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
968    } else {
969      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
970      // t2STR_PRE, t2STR_POST
971      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
972        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
973        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
974    }
975  }
976  MBB.erase(MBBI);
977
978  return true;
979}
980
981/// isMemoryOp - Returns true if instruction is a memory operation that this
982/// pass is capable of operating on.
983static bool isMemoryOp(const MachineInstr *MI) {
984  // When no memory operands are present, conservatively assume unaligned,
985  // volatile, unfoldable.
986  if (!MI->hasOneMemOperand())
987    return false;
988
989  const MachineMemOperand *MMO = *MI->memoperands_begin();
990
991  // Don't touch volatile memory accesses - we may be changing their order.
992  if (MMO->isVolatile())
993    return false;
994
995  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
996  // not.
997  if (MMO->getAlignment() < 4)
998    return false;
999
1000  // str <undef> could probably be eliminated entirely, but for now we just want
1001  // to avoid making a mess of it.
1002  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1003  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
1004      MI->getOperand(0).isUndef())
1005    return false;
1006
1007  // Likewise don't mess with references to undefined addresses.
1008  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
1009      MI->getOperand(1).isUndef())
1010    return false;
1011
1012  int Opcode = MI->getOpcode();
1013  switch (Opcode) {
1014  default: break;
1015  case ARM::VLDRS:
1016  case ARM::VSTRS:
1017    return MI->getOperand(1).isReg();
1018  case ARM::VLDRD:
1019  case ARM::VSTRD:
1020    return MI->getOperand(1).isReg();
1021  case ARM::LDRi12:
1022  case ARM::STRi12:
1023  case ARM::t2LDRi8:
1024  case ARM::t2LDRi12:
1025  case ARM::t2STRi8:
1026  case ARM::t2STRi12:
1027    return MI->getOperand(1).isReg();
1028  }
1029  return false;
1030}
1031
1032/// AdvanceRS - Advance register scavenger to just before the earliest memory
1033/// op that is being merged.
1034void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
1035  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
1036  unsigned Position = MemOps[0].Position;
1037  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
1038    if (MemOps[i].Position < Position) {
1039      Position = MemOps[i].Position;
1040      Loc = MemOps[i].MBBI;
1041    }
1042  }
1043
1044  if (Loc != MBB.begin())
1045    RS->forward(prior(Loc));
1046}
1047
1048static int getMemoryOpOffset(const MachineInstr *MI) {
1049  int Opcode = MI->getOpcode();
1050  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
1051  unsigned NumOperands = MI->getDesc().getNumOperands();
1052  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
1053
1054  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
1055      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
1056      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
1057      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
1058    return OffField;
1059
1060  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
1061    : ARM_AM::getAM5Offset(OffField) * 4;
1062  if (isAM3) {
1063    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
1064      Offset = -Offset;
1065  } else {
1066    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
1067      Offset = -Offset;
1068  }
1069  return Offset;
1070}
1071
1072static void InsertLDR_STR(MachineBasicBlock &MBB,
1073                          MachineBasicBlock::iterator &MBBI,
1074                          int Offset, bool isDef,
1075                          DebugLoc dl, unsigned NewOpc,
1076                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1077                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1078                          bool OffKill, bool OffUndef,
1079                          ARMCC::CondCodes Pred, unsigned PredReg,
1080                          const TargetInstrInfo *TII, bool isT2) {
1081  if (isDef) {
1082    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1083                                      TII->get(NewOpc))
1084      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1085      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1086    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1087  } else {
1088    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1089                                      TII->get(NewOpc))
1090      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1091      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1092    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1093  }
1094}
1095
1096bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1097                                          MachineBasicBlock::iterator &MBBI) {
1098  MachineInstr *MI = &*MBBI;
1099  unsigned Opcode = MI->getOpcode();
1100  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
1101      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
1102    const MachineOperand &BaseOp = MI->getOperand(2);
1103    unsigned BaseReg = BaseOp.getReg();
1104    unsigned EvenReg = MI->getOperand(0).getReg();
1105    unsigned OddReg  = MI->getOperand(1).getReg();
1106    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1107    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1108    // ARM errata 602117: LDRD with base in list may result in incorrect base
1109    // register when interrupted or faulted.
1110    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
1111    if (!Errata602117 &&
1112        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
1113      return false;
1114
1115    MachineBasicBlock::iterator NewBBI = MBBI;
1116    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1117    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1118    bool EvenDeadKill = isLd ?
1119      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1120    bool EvenUndef = MI->getOperand(0).isUndef();
1121    bool OddDeadKill  = isLd ?
1122      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1123    bool OddUndef = MI->getOperand(1).isUndef();
1124    bool BaseKill = BaseOp.isKill();
1125    bool BaseUndef = BaseOp.isUndef();
1126    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1127    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1128    int OffImm = getMemoryOpOffset(MI);
1129    unsigned PredReg = 0;
1130    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1131
1132    if (OddRegNum > EvenRegNum && OffImm == 0) {
1133      // Ascending register numbers and no offset. It's safe to change it to a
1134      // ldm or stm.
1135      unsigned NewOpc = (isLd)
1136        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1137        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1138      if (isLd) {
1139        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1140          .addReg(BaseReg, getKillRegState(BaseKill))
1141          .addImm(Pred).addReg(PredReg)
1142          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1143          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1144        ++NumLDRD2LDM;
1145      } else {
1146        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1147          .addReg(BaseReg, getKillRegState(BaseKill))
1148          .addImm(Pred).addReg(PredReg)
1149          .addReg(EvenReg,
1150                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1151          .addReg(OddReg,
1152                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1153        ++NumSTRD2STM;
1154      }
1155      NewBBI = llvm::prior(MBBI);
1156    } else {
1157      // Split into two instructions.
1158      unsigned NewOpc = (isLd)
1159        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1160        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1161      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1162      // so adjust and use t2LDRi12 here for that.
1163      unsigned NewOpc2 = (isLd)
1164        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1165        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1166      DebugLoc dl = MBBI->getDebugLoc();
1167      // If this is a load and base register is killed, it may have been
1168      // re-defed by the load, make sure the first load does not clobber it.
1169      if (isLd &&
1170          (BaseKill || OffKill) &&
1171          (TRI->regsOverlap(EvenReg, BaseReg))) {
1172        assert(!TRI->regsOverlap(OddReg, BaseReg));
1173        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1174                      OddReg, OddDeadKill, false,
1175                      BaseReg, false, BaseUndef, false, OffUndef,
1176                      Pred, PredReg, TII, isT2);
1177        NewBBI = llvm::prior(MBBI);
1178        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1179                      EvenReg, EvenDeadKill, false,
1180                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1181                      Pred, PredReg, TII, isT2);
1182      } else {
1183        if (OddReg == EvenReg && EvenDeadKill) {
1184          // If the two source operands are the same, the kill marker is
1185          // probably on the first one. e.g.
1186          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1187          EvenDeadKill = false;
1188          OddDeadKill = true;
1189        }
1190        // Never kill the base register in the first instruction.
1191        // <rdar://problem/11101911>
1192        if (EvenReg == BaseReg)
1193          EvenDeadKill = false;
1194        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1195                      EvenReg, EvenDeadKill, EvenUndef,
1196                      BaseReg, false, BaseUndef, false, OffUndef,
1197                      Pred, PredReg, TII, isT2);
1198        NewBBI = llvm::prior(MBBI);
1199        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1200                      OddReg, OddDeadKill, OddUndef,
1201                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1202                      Pred, PredReg, TII, isT2);
1203      }
1204      if (isLd)
1205        ++NumLDRD2LDR;
1206      else
1207        ++NumSTRD2STR;
1208    }
1209
1210    MBB.erase(MI);
1211    MBBI = NewBBI;
1212    return true;
1213  }
1214  return false;
1215}
1216
1217/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
1218/// ops of the same base and incrementing offset into LDM / STM ops.
1219bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1220  unsigned NumMerges = 0;
1221  unsigned NumMemOps = 0;
1222  MemOpQueue MemOps;
1223  unsigned CurrBase = 0;
1224  int CurrOpc = -1;
1225  unsigned CurrSize = 0;
1226  ARMCC::CondCodes CurrPred = ARMCC::AL;
1227  unsigned CurrPredReg = 0;
1228  unsigned Position = 0;
1229  SmallVector<MachineBasicBlock::iterator,4> Merges;
1230
1231  RS->enterBasicBlock(&MBB);
1232  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1233  while (MBBI != E) {
1234    if (FixInvalidRegPairOp(MBB, MBBI))
1235      continue;
1236
1237    bool Advance  = false;
1238    bool TryMerge = false;
1239    bool Clobber  = false;
1240
1241    bool isMemOp = isMemoryOp(MBBI);
1242    if (isMemOp) {
1243      int Opcode = MBBI->getOpcode();
1244      unsigned Size = getLSMultipleTransferSize(MBBI);
1245      const MachineOperand &MO = MBBI->getOperand(0);
1246      unsigned Reg = MO.getReg();
1247      bool isKill = MO.isDef() ? false : MO.isKill();
1248      unsigned Base = MBBI->getOperand(1).getReg();
1249      unsigned PredReg = 0;
1250      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1251      int Offset = getMemoryOpOffset(MBBI);
1252      // Watch out for:
1253      // r4 := ldr [r5]
1254      // r5 := ldr [r5, #4]
1255      // r6 := ldr [r5, #8]
1256      //
1257      // The second ldr has effectively broken the chain even though it
1258      // looks like the later ldr(s) use the same base register. Try to
1259      // merge the ldr's so far, including this one. But don't try to
1260      // combine the following ldr(s).
1261      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
1262      if (CurrBase == 0 && !Clobber) {
1263        // Start of a new chain.
1264        CurrBase = Base;
1265        CurrOpc  = Opcode;
1266        CurrSize = Size;
1267        CurrPred = Pred;
1268        CurrPredReg = PredReg;
1269        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
1270        ++NumMemOps;
1271        Advance = true;
1272      } else {
1273        if (Clobber) {
1274          TryMerge = true;
1275          Advance = true;
1276        }
1277
1278        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1279          // No need to match PredReg.
1280          // Continue adding to the queue.
1281          if (Offset > MemOps.back().Offset) {
1282            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
1283                                             Position, MBBI));
1284            ++NumMemOps;
1285            Advance = true;
1286          } else {
1287            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
1288                 I != E; ++I) {
1289              if (Offset < I->Offset) {
1290                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
1291                                                 Position, MBBI));
1292                ++NumMemOps;
1293                Advance = true;
1294                break;
1295              } else if (Offset == I->Offset) {
1296                // Collision! This can't be merged!
1297                break;
1298              }
1299            }
1300          }
1301        }
1302      }
1303    }
1304
1305    if (MBBI->isDebugValue()) {
1306      ++MBBI;
1307      if (MBBI == E)
1308        // Reach the end of the block, try merging the memory instructions.
1309        TryMerge = true;
1310    } else if (Advance) {
1311      ++Position;
1312      ++MBBI;
1313      if (MBBI == E)
1314        // Reach the end of the block, try merging the memory instructions.
1315        TryMerge = true;
1316    } else
1317      TryMerge = true;
1318
1319    if (TryMerge) {
1320      if (NumMemOps > 1) {
1321        // Try to find a free register to use as a new base in case it's needed.
1322        // First advance to the instruction just before the start of the chain.
1323        AdvanceRS(MBB, MemOps);
1324        // Find a scratch register.
1325        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
1326        // Process the load / store instructions.
1327        RS->forward(prior(MBBI));
1328
1329        // Merge ops.
1330        Merges.clear();
1331        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
1332                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
1333
1334        // Try folding preceding/trailing base inc/dec into the generated
1335        // LDM/STM ops.
1336        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
1337          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
1338            ++NumMerges;
1339        NumMerges += Merges.size();
1340
1341        // Try folding preceding/trailing base inc/dec into those load/store
1342        // that were not merged to form LDM/STM ops.
1343        for (unsigned i = 0; i != NumMemOps; ++i)
1344          if (!MemOps[i].Merged)
1345            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
1346              ++NumMerges;
1347
1348        // RS may be pointing to an instruction that's deleted.
1349        RS->skipTo(prior(MBBI));
1350      } else if (NumMemOps == 1) {
1351        // Try folding preceding/trailing base inc/dec into the single
1352        // load/store.
1353        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
1354          ++NumMerges;
1355          RS->forward(prior(MBBI));
1356        }
1357      }
1358
1359      CurrBase = 0;
1360      CurrOpc = -1;
1361      CurrSize = 0;
1362      CurrPred = ARMCC::AL;
1363      CurrPredReg = 0;
1364      if (NumMemOps) {
1365        MemOps.clear();
1366        NumMemOps = 0;
1367      }
1368
1369      // If iterator hasn't been advanced and this is not a memory op, skip it.
1370      // It can't start a new chain anyway.
1371      if (!Advance && !isMemOp && MBBI != E) {
1372        ++Position;
1373        ++MBBI;
1374      }
1375    }
1376  }
1377  return NumMerges > 0;
1378}
1379
1380/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
1381/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
1382/// directly restore the value of LR into pc.
1383///   ldmfd sp!, {..., lr}
1384///   bx lr
1385/// or
1386///   ldmfd sp!, {..., lr}
1387///   mov pc, lr
1388/// =>
1389///   ldmfd sp!, {..., pc}
1390bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1391  if (MBB.empty()) return false;
1392
1393  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1394  if (MBBI != MBB.begin() &&
1395      (MBBI->getOpcode() == ARM::BX_RET ||
1396       MBBI->getOpcode() == ARM::tBX_RET ||
1397       MBBI->getOpcode() == ARM::MOVPCLR)) {
1398    MachineInstr *PrevMI = prior(MBBI);
1399    unsigned Opcode = PrevMI->getOpcode();
1400    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1401        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1402        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1403      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1404      if (MO.getReg() != ARM::LR)
1405        return false;
1406      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1407      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1408              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1409      PrevMI->setDesc(TII->get(NewOpc));
1410      MO.setReg(ARM::PC);
1411      PrevMI->copyImplicitOps(&*MBBI);
1412      MBB.erase(MBBI);
1413      return true;
1414    }
1415  }
1416  return false;
1417}
1418
1419bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1420  const TargetMachine &TM = Fn.getTarget();
1421  AFI = Fn.getInfo<ARMFunctionInfo>();
1422  TII = TM.getInstrInfo();
1423  TRI = TM.getRegisterInfo();
1424  STI = &TM.getSubtarget<ARMSubtarget>();
1425  RS = new RegScavenger();
1426  isThumb2 = AFI->isThumb2Function();
1427
1428  bool Modified = false;
1429  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1430       ++MFI) {
1431    MachineBasicBlock &MBB = *MFI;
1432    Modified |= LoadStoreMultipleOpti(MBB);
1433    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
1434      Modified |= MergeReturnIntoLDM(MBB);
1435  }
1436
1437  delete RS;
1438  return Modified;
1439}
1440
1441
1442/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
1443/// load / stores from consecutive locations close to make it more
1444/// likely they will be combined later.
1445
1446namespace {
1447  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1448    static char ID;
1449    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
1450
1451    const TargetData *TD;
1452    const TargetInstrInfo *TII;
1453    const TargetRegisterInfo *TRI;
1454    const ARMSubtarget *STI;
1455    MachineRegisterInfo *MRI;
1456    MachineFunction *MF;
1457
1458    virtual bool runOnMachineFunction(MachineFunction &Fn);
1459
1460    virtual const char *getPassName() const {
1461      return "ARM pre- register allocation load / store optimization pass";
1462    }
1463
1464  private:
1465    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1466                          unsigned &NewOpc, unsigned &EvenReg,
1467                          unsigned &OddReg, unsigned &BaseReg,
1468                          int &Offset,
1469                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1470                          bool &isT2);
1471    bool RescheduleOps(MachineBasicBlock *MBB,
1472                       SmallVector<MachineInstr*, 4> &Ops,
1473                       unsigned Base, bool isLd,
1474                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1475    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1476  };
1477  char ARMPreAllocLoadStoreOpt::ID = 0;
1478}
1479
1480bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1481  TD  = Fn.getTarget().getTargetData();
1482  TII = Fn.getTarget().getInstrInfo();
1483  TRI = Fn.getTarget().getRegisterInfo();
1484  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
1485  MRI = &Fn.getRegInfo();
1486  MF  = &Fn;
1487
1488  bool Modified = false;
1489  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1490       ++MFI)
1491    Modified |= RescheduleLoadStoreInstrs(MFI);
1492
1493  return Modified;
1494}
1495
1496static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1497                                      MachineBasicBlock::iterator I,
1498                                      MachineBasicBlock::iterator E,
1499                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
1500                                      SmallSet<unsigned, 4> &MemRegs,
1501                                      const TargetRegisterInfo *TRI) {
1502  // Are there stores / loads / calls between them?
1503  // FIXME: This is overly conservative. We should make use of alias information
1504  // some day.
1505  SmallSet<unsigned, 4> AddedRegPressure;
1506  while (++I != E) {
1507    if (I->isDebugValue() || MemOps.count(&*I))
1508      continue;
1509    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1510      return false;
1511    if (isLd && I->mayStore())
1512      return false;
1513    if (!isLd) {
1514      if (I->mayLoad())
1515        return false;
1516      // It's not safe to move the first 'str' down.
1517      // str r1, [r0]
1518      // strh r5, [r0]
1519      // str r4, [r0, #+4]
1520      if (I->mayStore())
1521        return false;
1522    }
1523    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1524      MachineOperand &MO = I->getOperand(j);
1525      if (!MO.isReg())
1526        continue;
1527      unsigned Reg = MO.getReg();
1528      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1529        return false;
1530      if (Reg != Base && !MemRegs.count(Reg))
1531        AddedRegPressure.insert(Reg);
1532    }
1533  }
1534
1535  // Estimate register pressure increase due to the transformation.
1536  if (MemRegs.size() <= 4)
1537    // Ok if we are moving small number of instructions.
1538    return true;
1539  return AddedRegPressure.size() <= MemRegs.size() * 2;
1540}
1541
1542
1543/// Copy Op0 and Op1 operands into a new array assigned to MI.
1544static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
1545                                   MachineInstr *Op1) {
1546  assert(MI->memoperands_empty() && "expected a new machineinstr");
1547  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
1548    + (Op1->memoperands_end() - Op1->memoperands_begin());
1549
1550  MachineFunction *MF = MI->getParent()->getParent();
1551  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
1552  MachineSDNode::mmo_iterator MemEnd =
1553    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
1554  MemEnd =
1555    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
1556  MI->setMemRefs(MemBegin, MemEnd);
1557}
1558
1559bool
1560ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1561                                          DebugLoc &dl,
1562                                          unsigned &NewOpc, unsigned &EvenReg,
1563                                          unsigned &OddReg, unsigned &BaseReg,
1564                                          int &Offset, unsigned &PredReg,
1565                                          ARMCC::CondCodes &Pred,
1566                                          bool &isT2) {
1567  // Make sure we're allowed to generate LDRD/STRD.
1568  if (!STI->hasV5TEOps())
1569    return false;
1570
1571  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
1572  unsigned Scale = 1;
1573  unsigned Opcode = Op0->getOpcode();
1574  if (Opcode == ARM::LDRi12)
1575    NewOpc = ARM::LDRD;
1576  else if (Opcode == ARM::STRi12)
1577    NewOpc = ARM::STRD;
1578  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
1579    NewOpc = ARM::t2LDRDi8;
1580    Scale = 4;
1581    isT2 = true;
1582  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
1583    NewOpc = ARM::t2STRDi8;
1584    Scale = 4;
1585    isT2 = true;
1586  } else
1587    return false;
1588
1589  // Make sure the base address satisfies i64 ld / st alignment requirement.
1590  if (!Op0->hasOneMemOperand() ||
1591      !(*Op0->memoperands_begin())->getValue() ||
1592      (*Op0->memoperands_begin())->isVolatile())
1593    return false;
1594
1595  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
1596  const Function *Func = MF->getFunction();
1597  unsigned ReqAlign = STI->hasV6Ops()
1598    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
1599    : 8;  // Pre-v6 need 8-byte align
1600  if (Align < ReqAlign)
1601    return false;
1602
1603  // Then make sure the immediate offset fits.
1604  int OffImm = getMemoryOpOffset(Op0);
1605  if (isT2) {
1606    int Limit = (1 << 8) * Scale;
1607    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
1608      return false;
1609    Offset = OffImm;
1610  } else {
1611    ARM_AM::AddrOpc AddSub = ARM_AM::add;
1612    if (OffImm < 0) {
1613      AddSub = ARM_AM::sub;
1614      OffImm = - OffImm;
1615    }
1616    int Limit = (1 << 8) * Scale;
1617    if (OffImm >= Limit || (OffImm & (Scale-1)))
1618      return false;
1619    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
1620  }
1621  EvenReg = Op0->getOperand(0).getReg();
1622  OddReg  = Op1->getOperand(0).getReg();
1623  if (EvenReg == OddReg)
1624    return false;
1625  BaseReg = Op0->getOperand(1).getReg();
1626  Pred = getInstrPredicate(Op0, PredReg);
1627  dl = Op0->getDebugLoc();
1628  return true;
1629}
1630
1631namespace {
1632  struct OffsetCompare {
1633    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
1634      int LOffset = getMemoryOpOffset(LHS);
1635      int ROffset = getMemoryOpOffset(RHS);
1636      assert(LHS == RHS || LOffset != ROffset);
1637      return LOffset > ROffset;
1638    }
1639  };
1640}
1641
1642bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
1643                                 SmallVector<MachineInstr*, 4> &Ops,
1644                                 unsigned Base, bool isLd,
1645                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
1646  bool RetVal = false;
1647
1648  // Sort by offset (in reverse order).
1649  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
1650
1651  // The loads / stores of the same base are in order. Scan them from first to
1652  // last and check for the following:
1653  // 1. Any def of base.
1654  // 2. Any gaps.
1655  while (Ops.size() > 1) {
1656    unsigned FirstLoc = ~0U;
1657    unsigned LastLoc = 0;
1658    MachineInstr *FirstOp = 0;
1659    MachineInstr *LastOp = 0;
1660    int LastOffset = 0;
1661    unsigned LastOpcode = 0;
1662    unsigned LastBytes = 0;
1663    unsigned NumMove = 0;
1664    for (int i = Ops.size() - 1; i >= 0; --i) {
1665      MachineInstr *Op = Ops[i];
1666      unsigned Loc = MI2LocMap[Op];
1667      if (Loc <= FirstLoc) {
1668        FirstLoc = Loc;
1669        FirstOp = Op;
1670      }
1671      if (Loc >= LastLoc) {
1672        LastLoc = Loc;
1673        LastOp = Op;
1674      }
1675
1676      unsigned LSMOpcode
1677        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
1678      if (LastOpcode && LSMOpcode != LastOpcode)
1679        break;
1680
1681      int Offset = getMemoryOpOffset(Op);
1682      unsigned Bytes = getLSMultipleTransferSize(Op);
1683      if (LastBytes) {
1684        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
1685          break;
1686      }
1687      LastOffset = Offset;
1688      LastBytes = Bytes;
1689      LastOpcode = LSMOpcode;
1690      if (++NumMove == 8) // FIXME: Tune this limit.
1691        break;
1692    }
1693
1694    if (NumMove <= 1)
1695      Ops.pop_back();
1696    else {
1697      SmallPtrSet<MachineInstr*, 4> MemOps;
1698      SmallSet<unsigned, 4> MemRegs;
1699      for (int i = NumMove-1; i >= 0; --i) {
1700        MemOps.insert(Ops[i]);
1701        MemRegs.insert(Ops[i]->getOperand(0).getReg());
1702      }
1703
1704      // Be conservative, if the instructions are too far apart, don't
1705      // move them. We want to limit the increase of register pressure.
1706      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
1707      if (DoMove)
1708        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
1709                                           MemOps, MemRegs, TRI);
1710      if (!DoMove) {
1711        for (unsigned i = 0; i != NumMove; ++i)
1712          Ops.pop_back();
1713      } else {
1714        // This is the new location for the loads / stores.
1715        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
1716        while (InsertPos != MBB->end()
1717               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
1718          ++InsertPos;
1719
1720        // If we are moving a pair of loads / stores, see if it makes sense
1721        // to try to allocate a pair of registers that can form register pairs.
1722        MachineInstr *Op0 = Ops.back();
1723        MachineInstr *Op1 = Ops[Ops.size()-2];
1724        unsigned EvenReg = 0, OddReg = 0;
1725        unsigned BaseReg = 0, PredReg = 0;
1726        ARMCC::CondCodes Pred = ARMCC::AL;
1727        bool isT2 = false;
1728        unsigned NewOpc = 0;
1729        int Offset = 0;
1730        DebugLoc dl;
1731        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
1732                                             EvenReg, OddReg, BaseReg,
1733                                             Offset, PredReg, Pred, isT2)) {
1734          Ops.pop_back();
1735          Ops.pop_back();
1736
1737          const MCInstrDesc &MCID = TII->get(NewOpc);
1738          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
1739          MRI->constrainRegClass(EvenReg, TRC);
1740          MRI->constrainRegClass(OddReg, TRC);
1741
1742          // Form the pair instruction.
1743          if (isLd) {
1744            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1745              .addReg(EvenReg, RegState::Define)
1746              .addReg(OddReg, RegState::Define)
1747              .addReg(BaseReg);
1748            // FIXME: We're converting from LDRi12 to an insn that still
1749            // uses addrmode2, so we need an explicit offset reg. It should
1750            // always by reg0 since we're transforming LDRi12s.
1751            if (!isT2)
1752              MIB.addReg(0);
1753            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1754            concatenateMemOperands(MIB, Op0, Op1);
1755            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1756            ++NumLDRDFormed;
1757          } else {
1758            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
1759              .addReg(EvenReg)
1760              .addReg(OddReg)
1761              .addReg(BaseReg);
1762            // FIXME: We're converting from LDRi12 to an insn that still
1763            // uses addrmode2, so we need an explicit offset reg. It should
1764            // always by reg0 since we're transforming STRi12s.
1765            if (!isT2)
1766              MIB.addReg(0);
1767            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1768            concatenateMemOperands(MIB, Op0, Op1);
1769            DEBUG(dbgs() << "Formed " << *MIB << "\n");
1770            ++NumSTRDFormed;
1771          }
1772          MBB->erase(Op0);
1773          MBB->erase(Op1);
1774
1775          // Add register allocation hints to form register pairs.
1776          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
1777          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
1778        } else {
1779          for (unsigned i = 0; i != NumMove; ++i) {
1780            MachineInstr *Op = Ops.back();
1781            Ops.pop_back();
1782            MBB->splice(InsertPos, MBB, Op);
1783          }
1784        }
1785
1786        NumLdStMoved += NumMove;
1787        RetVal = true;
1788      }
1789    }
1790  }
1791
1792  return RetVal;
1793}
1794
1795bool
1796ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
1797  bool RetVal = false;
1798
1799  DenseMap<MachineInstr*, unsigned> MI2LocMap;
1800  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
1801  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
1802  SmallVector<unsigned, 4> LdBases;
1803  SmallVector<unsigned, 4> StBases;
1804
1805  unsigned Loc = 0;
1806  MachineBasicBlock::iterator MBBI = MBB->begin();
1807  MachineBasicBlock::iterator E = MBB->end();
1808  while (MBBI != E) {
1809    for (; MBBI != E; ++MBBI) {
1810      MachineInstr *MI = MBBI;
1811      if (MI->isCall() || MI->isTerminator()) {
1812        // Stop at barriers.
1813        ++MBBI;
1814        break;
1815      }
1816
1817      if (!MI->isDebugValue())
1818        MI2LocMap[MI] = ++Loc;
1819
1820      if (!isMemoryOp(MI))
1821        continue;
1822      unsigned PredReg = 0;
1823      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
1824        continue;
1825
1826      int Opc = MI->getOpcode();
1827      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
1828      unsigned Base = MI->getOperand(1).getReg();
1829      int Offset = getMemoryOpOffset(MI);
1830
1831      bool StopHere = false;
1832      if (isLd) {
1833        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1834          Base2LdsMap.find(Base);
1835        if (BI != Base2LdsMap.end()) {
1836          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1837            if (Offset == getMemoryOpOffset(BI->second[i])) {
1838              StopHere = true;
1839              break;
1840            }
1841          }
1842          if (!StopHere)
1843            BI->second.push_back(MI);
1844        } else {
1845          SmallVector<MachineInstr*, 4> MIs;
1846          MIs.push_back(MI);
1847          Base2LdsMap[Base] = MIs;
1848          LdBases.push_back(Base);
1849        }
1850      } else {
1851        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
1852          Base2StsMap.find(Base);
1853        if (BI != Base2StsMap.end()) {
1854          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
1855            if (Offset == getMemoryOpOffset(BI->second[i])) {
1856              StopHere = true;
1857              break;
1858            }
1859          }
1860          if (!StopHere)
1861            BI->second.push_back(MI);
1862        } else {
1863          SmallVector<MachineInstr*, 4> MIs;
1864          MIs.push_back(MI);
1865          Base2StsMap[Base] = MIs;
1866          StBases.push_back(Base);
1867        }
1868      }
1869
1870      if (StopHere) {
1871        // Found a duplicate (a base+offset combination that's seen earlier).
1872        // Backtrack.
1873        --Loc;
1874        break;
1875      }
1876    }
1877
1878    // Re-schedule loads.
1879    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
1880      unsigned Base = LdBases[i];
1881      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
1882      if (Lds.size() > 1)
1883        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
1884    }
1885
1886    // Re-schedule stores.
1887    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
1888      unsigned Base = StBases[i];
1889      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
1890      if (Sts.size() > 1)
1891        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
1892    }
1893
1894    if (MBBI != E) {
1895      Base2LdsMap.clear();
1896      Base2StsMap.clear();
1897      LdBases.clear();
1898      StBases.clear();
1899    }
1900  }
1901
1902  return RetVal;
1903}
1904
1905
1906/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
1907/// optimization pass.
1908FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
1909  if (PreAlloc)
1910    return new ARMPreAllocLoadStoreOpt();
1911  return new ARMLoadStoreOpt();
1912}
1913