1//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This file contains a pass that performs load / store related peephole
11/// optimizations. This pass should be run after register allocation.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMISelLowering.h"
19#include "ARMMachineFunctionInfo.h"
20#include "ARMSubtarget.h"
21#include "MCTargetDesc/ARMAddressingModes.h"
22#include "ThumbRegisterInfo.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallPtrSet.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/CodeGen/MachineBasicBlock.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstr.h"
32#include "llvm/CodeGen/MachineInstrBuilder.h"
33#include "llvm/CodeGen/MachineRegisterInfo.h"
34#include "llvm/CodeGen/RegisterClassInfo.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/LivePhysRegs.h"
37#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/Function.h"
40#include "llvm/Support/Allocator.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/ErrorHandling.h"
43#include "llvm/Support/raw_ostream.h"
44#include "llvm/Target/TargetInstrInfo.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetRegisterInfo.h"
47using namespace llvm;
48
49#define DEBUG_TYPE "arm-ldst-opt"
50
51STATISTIC(NumLDMGened , "Number of ldm instructions generated");
52STATISTIC(NumSTMGened , "Number of stm instructions generated");
53STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
54STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
55STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
56STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
57STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
58STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
59STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
60STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
61STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
62
63namespace llvm {
64void initializeARMLoadStoreOptPass(PassRegistry &);
65}
66
67#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
68
69namespace {
70  /// Post- register allocation pass the combine load / store instructions to
71  /// form ldm / stm instructions.
72  struct ARMLoadStoreOpt : public MachineFunctionPass {
73    static char ID;
74    ARMLoadStoreOpt() : MachineFunctionPass(ID) {
75      initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
76    }
77
78    const MachineFunction *MF;
79    const TargetInstrInfo *TII;
80    const TargetRegisterInfo *TRI;
81    const ARMSubtarget *STI;
82    const TargetLowering *TL;
83    ARMFunctionInfo *AFI;
84    LivePhysRegs LiveRegs;
85    RegisterClassInfo RegClassInfo;
86    MachineBasicBlock::const_iterator LiveRegPos;
87    bool LiveRegsValid;
88    bool RegClassInfoValid;
89    bool isThumb1, isThumb2;
90
91    bool runOnMachineFunction(MachineFunction &Fn) override;
92
93    const char *getPassName() const override {
94      return ARM_LOAD_STORE_OPT_NAME;
95    }
96
97  private:
98    /// A set of load/store MachineInstrs with same base register sorted by
99    /// offset.
100    struct MemOpQueueEntry {
101      MachineInstr *MI;
102      int Offset;        ///< Load/Store offset.
103      unsigned Position; ///< Position as counted from end of basic block.
104      MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
105        : MI(MI), Offset(Offset), Position(Position) {}
106    };
107    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
108
109    /// A set of MachineInstrs that fulfill (nearly all) conditions to get
110    /// merged into a LDM/STM.
111    struct MergeCandidate {
112      /// List of instructions ordered by load/store offset.
113      SmallVector<MachineInstr*, 4> Instrs;
114      /// Index in Instrs of the instruction being latest in the schedule.
115      unsigned LatestMIIdx;
116      /// Index in Instrs of the instruction being earliest in the schedule.
117      unsigned EarliestMIIdx;
118      /// Index into the basic block where the merged instruction will be
119      /// inserted. (See MemOpQueueEntry.Position)
120      unsigned InsertPos;
121      /// Whether the instructions can be merged into a ldm/stm instruction.
122      bool CanMergeToLSMulti;
123      /// Whether the instructions can be merged into a ldrd/strd instruction.
124      bool CanMergeToLSDouble;
125    };
126    SpecificBumpPtrAllocator<MergeCandidate> Allocator;
127    SmallVector<const MergeCandidate*,4> Candidates;
128    SmallVector<MachineInstr*,4> MergeBaseCandidates;
129
130    void moveLiveRegsBefore(const MachineBasicBlock &MBB,
131                            MachineBasicBlock::const_iterator Before);
132    unsigned findFreeReg(const TargetRegisterClass &RegClass);
133    void UpdateBaseRegUses(MachineBasicBlock &MBB,
134                           MachineBasicBlock::iterator MBBI,
135                           DebugLoc DL, unsigned Base, unsigned WordOffset,
136                           ARMCC::CondCodes Pred, unsigned PredReg);
137    MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
138        MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
139        bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
140        DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
141    MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
142        MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
143        bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
144        DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
145    void FormCandidates(const MemOpQueue &MemOps);
146    MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
147    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
148                             MachineBasicBlock::iterator &MBBI);
149    bool MergeBaseUpdateLoadStore(MachineInstr *MI);
150    bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
151    bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
152    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
153    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
154    bool CombineMovBx(MachineBasicBlock &MBB);
155  };
156  char ARMLoadStoreOpt::ID = 0;
157}
158
159INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
160
161static bool definesCPSR(const MachineInstr *MI) {
162  for (const auto &MO : MI->operands()) {
163    if (!MO.isReg())
164      continue;
165    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
166      // If the instruction has live CPSR def, then it's not safe to fold it
167      // into load / store.
168      return true;
169  }
170
171  return false;
172}
173
174static int getMemoryOpOffset(const MachineInstr *MI) {
175  unsigned Opcode = MI->getOpcode();
176  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
177  unsigned NumOperands = MI->getDesc().getNumOperands();
178  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
179
180  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
181      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
182      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
183      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
184    return OffField;
185
186  // Thumb1 immediate offsets are scaled by 4
187  if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
188      Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
189    return OffField * 4;
190
191  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
192    : ARM_AM::getAM5Offset(OffField) * 4;
193  ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
194    : ARM_AM::getAM5Op(OffField);
195
196  if (Op == ARM_AM::sub)
197    return -Offset;
198
199  return Offset;
200}
201
202static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
203  return MI.getOperand(1);
204}
205
206static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
207  return MI.getOperand(0);
208}
209
210static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
211  switch (Opcode) {
212  default: llvm_unreachable("Unhandled opcode!");
213  case ARM::LDRi12:
214    ++NumLDMGened;
215    switch (Mode) {
216    default: llvm_unreachable("Unhandled submode!");
217    case ARM_AM::ia: return ARM::LDMIA;
218    case ARM_AM::da: return ARM::LDMDA;
219    case ARM_AM::db: return ARM::LDMDB;
220    case ARM_AM::ib: return ARM::LDMIB;
221    }
222  case ARM::STRi12:
223    ++NumSTMGened;
224    switch (Mode) {
225    default: llvm_unreachable("Unhandled submode!");
226    case ARM_AM::ia: return ARM::STMIA;
227    case ARM_AM::da: return ARM::STMDA;
228    case ARM_AM::db: return ARM::STMDB;
229    case ARM_AM::ib: return ARM::STMIB;
230    }
231  case ARM::tLDRi:
232  case ARM::tLDRspi:
233    // tLDMIA is writeback-only - unless the base register is in the input
234    // reglist.
235    ++NumLDMGened;
236    switch (Mode) {
237    default: llvm_unreachable("Unhandled submode!");
238    case ARM_AM::ia: return ARM::tLDMIA;
239    }
240  case ARM::tSTRi:
241  case ARM::tSTRspi:
242    // There is no non-writeback tSTMIA either.
243    ++NumSTMGened;
244    switch (Mode) {
245    default: llvm_unreachable("Unhandled submode!");
246    case ARM_AM::ia: return ARM::tSTMIA_UPD;
247    }
248  case ARM::t2LDRi8:
249  case ARM::t2LDRi12:
250    ++NumLDMGened;
251    switch (Mode) {
252    default: llvm_unreachable("Unhandled submode!");
253    case ARM_AM::ia: return ARM::t2LDMIA;
254    case ARM_AM::db: return ARM::t2LDMDB;
255    }
256  case ARM::t2STRi8:
257  case ARM::t2STRi12:
258    ++NumSTMGened;
259    switch (Mode) {
260    default: llvm_unreachable("Unhandled submode!");
261    case ARM_AM::ia: return ARM::t2STMIA;
262    case ARM_AM::db: return ARM::t2STMDB;
263    }
264  case ARM::VLDRS:
265    ++NumVLDMGened;
266    switch (Mode) {
267    default: llvm_unreachable("Unhandled submode!");
268    case ARM_AM::ia: return ARM::VLDMSIA;
269    case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
270    }
271  case ARM::VSTRS:
272    ++NumVSTMGened;
273    switch (Mode) {
274    default: llvm_unreachable("Unhandled submode!");
275    case ARM_AM::ia: return ARM::VSTMSIA;
276    case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
277    }
278  case ARM::VLDRD:
279    ++NumVLDMGened;
280    switch (Mode) {
281    default: llvm_unreachable("Unhandled submode!");
282    case ARM_AM::ia: return ARM::VLDMDIA;
283    case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
284    }
285  case ARM::VSTRD:
286    ++NumVSTMGened;
287    switch (Mode) {
288    default: llvm_unreachable("Unhandled submode!");
289    case ARM_AM::ia: return ARM::VSTMDIA;
290    case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
291    }
292  }
293}
294
295static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
296  switch (Opcode) {
297  default: llvm_unreachable("Unhandled opcode!");
298  case ARM::LDMIA_RET:
299  case ARM::LDMIA:
300  case ARM::LDMIA_UPD:
301  case ARM::STMIA:
302  case ARM::STMIA_UPD:
303  case ARM::tLDMIA:
304  case ARM::tLDMIA_UPD:
305  case ARM::tSTMIA_UPD:
306  case ARM::t2LDMIA_RET:
307  case ARM::t2LDMIA:
308  case ARM::t2LDMIA_UPD:
309  case ARM::t2STMIA:
310  case ARM::t2STMIA_UPD:
311  case ARM::VLDMSIA:
312  case ARM::VLDMSIA_UPD:
313  case ARM::VSTMSIA:
314  case ARM::VSTMSIA_UPD:
315  case ARM::VLDMDIA:
316  case ARM::VLDMDIA_UPD:
317  case ARM::VSTMDIA:
318  case ARM::VSTMDIA_UPD:
319    return ARM_AM::ia;
320
321  case ARM::LDMDA:
322  case ARM::LDMDA_UPD:
323  case ARM::STMDA:
324  case ARM::STMDA_UPD:
325    return ARM_AM::da;
326
327  case ARM::LDMDB:
328  case ARM::LDMDB_UPD:
329  case ARM::STMDB:
330  case ARM::STMDB_UPD:
331  case ARM::t2LDMDB:
332  case ARM::t2LDMDB_UPD:
333  case ARM::t2STMDB:
334  case ARM::t2STMDB_UPD:
335  case ARM::VLDMSDB_UPD:
336  case ARM::VSTMSDB_UPD:
337  case ARM::VLDMDDB_UPD:
338  case ARM::VSTMDDB_UPD:
339    return ARM_AM::db;
340
341  case ARM::LDMIB:
342  case ARM::LDMIB_UPD:
343  case ARM::STMIB:
344  case ARM::STMIB_UPD:
345    return ARM_AM::ib;
346  }
347}
348
349static bool isT1i32Load(unsigned Opc) {
350  return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
351}
352
353static bool isT2i32Load(unsigned Opc) {
354  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
355}
356
357static bool isi32Load(unsigned Opc) {
358  return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
359}
360
361static bool isT1i32Store(unsigned Opc) {
362  return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
363}
364
365static bool isT2i32Store(unsigned Opc) {
366  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
367}
368
369static bool isi32Store(unsigned Opc) {
370  return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
371}
372
373static bool isLoadSingle(unsigned Opc) {
374  return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
375}
376
377static unsigned getImmScale(unsigned Opc) {
378  switch (Opc) {
379  default: llvm_unreachable("Unhandled opcode!");
380  case ARM::tLDRi:
381  case ARM::tSTRi:
382  case ARM::tLDRspi:
383  case ARM::tSTRspi:
384    return 1;
385  case ARM::tLDRHi:
386  case ARM::tSTRHi:
387    return 2;
388  case ARM::tLDRBi:
389  case ARM::tSTRBi:
390    return 4;
391  }
392}
393
394static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
395  switch (MI->getOpcode()) {
396  default: return 0;
397  case ARM::LDRi12:
398  case ARM::STRi12:
399  case ARM::tLDRi:
400  case ARM::tSTRi:
401  case ARM::tLDRspi:
402  case ARM::tSTRspi:
403  case ARM::t2LDRi8:
404  case ARM::t2LDRi12:
405  case ARM::t2STRi8:
406  case ARM::t2STRi12:
407  case ARM::VLDRS:
408  case ARM::VSTRS:
409    return 4;
410  case ARM::VLDRD:
411  case ARM::VSTRD:
412    return 8;
413  case ARM::LDMIA:
414  case ARM::LDMDA:
415  case ARM::LDMDB:
416  case ARM::LDMIB:
417  case ARM::STMIA:
418  case ARM::STMDA:
419  case ARM::STMDB:
420  case ARM::STMIB:
421  case ARM::tLDMIA:
422  case ARM::tLDMIA_UPD:
423  case ARM::tSTMIA_UPD:
424  case ARM::t2LDMIA:
425  case ARM::t2LDMDB:
426  case ARM::t2STMIA:
427  case ARM::t2STMDB:
428  case ARM::VLDMSIA:
429  case ARM::VSTMSIA:
430    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
431  case ARM::VLDMDIA:
432  case ARM::VSTMDIA:
433    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
434  }
435}
436
437/// Update future uses of the base register with the offset introduced
438/// due to writeback. This function only works on Thumb1.
439void
440ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
441                                   MachineBasicBlock::iterator MBBI,
442                                   DebugLoc DL, unsigned Base,
443                                   unsigned WordOffset,
444                                   ARMCC::CondCodes Pred, unsigned PredReg) {
445  assert(isThumb1 && "Can only update base register uses for Thumb1!");
446  // Start updating any instructions with immediate offsets. Insert a SUB before
447  // the first non-updateable instruction (if any).
448  for (; MBBI != MBB.end(); ++MBBI) {
449    bool InsertSub = false;
450    unsigned Opc = MBBI->getOpcode();
451
452    if (MBBI->readsRegister(Base)) {
453      int Offset;
454      bool IsLoad =
455        Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
456      bool IsStore =
457        Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
458
459      if (IsLoad || IsStore) {
460        // Loads and stores with immediate offsets can be updated, but only if
461        // the new offset isn't negative.
462        // The MachineOperand containing the offset immediate is the last one
463        // before predicates.
464        MachineOperand &MO =
465          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
466        // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
467        Offset = MO.getImm() - WordOffset * getImmScale(Opc);
468
469        // If storing the base register, it needs to be reset first.
470        unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
471
472        if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
473          MO.setImm(Offset);
474        else
475          InsertSub = true;
476
477      } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
478                 !definesCPSR(MBBI)) {
479        // SUBS/ADDS using this register, with a dead def of the CPSR.
480        // Merge it with the update; if the merged offset is too large,
481        // insert a new sub instead.
482        MachineOperand &MO =
483          MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
484        Offset = (Opc == ARM::tSUBi8) ?
485          MO.getImm() + WordOffset * 4 :
486          MO.getImm() - WordOffset * 4 ;
487        if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
488          // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
489          // Offset == 0.
490          MO.setImm(Offset);
491          // The base register has now been reset, so exit early.
492          return;
493        } else {
494          InsertSub = true;
495        }
496
497      } else {
498        // Can't update the instruction.
499        InsertSub = true;
500      }
501
502    } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
503      // Since SUBS sets the condition flags, we can't place the base reset
504      // after an instruction that has a live CPSR def.
505      // The base register might also contain an argument for a function call.
506      InsertSub = true;
507    }
508
509    if (InsertSub) {
510      // An instruction above couldn't be updated, so insert a sub.
511      AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
512        .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
513      return;
514    }
515
516    if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
517      // Register got killed. Stop updating.
518      return;
519  }
520
521  // End of block was reached.
522  if (MBB.succ_size() > 0) {
523    // FIXME: Because of a bug, live registers are sometimes missing from
524    // the successor blocks' live-in sets. This means we can't trust that
525    // information and *always* have to reset at the end of a block.
526    // See PR21029.
527    if (MBBI != MBB.end()) --MBBI;
528    AddDefaultT1CC(
529      BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
530      .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
531  }
532}
533
534/// Return the first register of class \p RegClass that is not in \p Regs.
535unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
536  if (!RegClassInfoValid) {
537    RegClassInfo.runOnMachineFunction(*MF);
538    RegClassInfoValid = true;
539  }
540
541  for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
542    if (!LiveRegs.contains(Reg))
543      return Reg;
544  return 0;
545}
546
547/// Compute live registers just before instruction \p Before (in normal schedule
548/// direction). Computes backwards so multiple queries in the same block must
549/// come in reverse order.
550void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
551    MachineBasicBlock::const_iterator Before) {
552  // Initialize if we never queried in this block.
553  if (!LiveRegsValid) {
554    LiveRegs.init(TRI);
555    LiveRegs.addLiveOuts(&MBB, true);
556    LiveRegPos = MBB.end();
557    LiveRegsValid = true;
558  }
559  // Move backward just before the "Before" position.
560  while (LiveRegPos != Before) {
561    --LiveRegPos;
562    LiveRegs.stepBackward(*LiveRegPos);
563  }
564}
565
566static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
567                        unsigned Reg) {
568  for (const std::pair<unsigned, bool> &R : Regs)
569    if (R.first == Reg)
570      return true;
571  return false;
572}
573
574/// Create and insert a LDM or STM with Base as base register and registers in
575/// Regs as the register operands that would be loaded / stored.  It returns
576/// true if the transformation is done.
577MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
578    MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
579    bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
580    DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
581  unsigned NumRegs = Regs.size();
582  assert(NumRegs > 1);
583
584  // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
585  // Compute liveness information for that register to make the decision.
586  bool SafeToClobberCPSR = !isThumb1 ||
587    (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
588     MachineBasicBlock::LQR_Dead);
589
590  bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
591
592  // Exception: If the base register is in the input reglist, Thumb1 LDM is
593  // non-writeback.
594  // It's also not possible to merge an STR of the base register in Thumb1.
595  if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
596    assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
597    if (Opcode == ARM::tLDRi) {
598      Writeback = false;
599    } else if (Opcode == ARM::tSTRi) {
600      return nullptr;
601    }
602  }
603
604  ARM_AM::AMSubMode Mode = ARM_AM::ia;
605  // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
606  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
607  bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
608
609  if (Offset == 4 && haveIBAndDA) {
610    Mode = ARM_AM::ib;
611  } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
612    Mode = ARM_AM::da;
613  } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
614    // VLDM/VSTM do not support DB mode without also updating the base reg.
615    Mode = ARM_AM::db;
616  } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
617    // Check if this is a supported opcode before inserting instructions to
618    // calculate a new base register.
619    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
620
621    // If starting offset isn't zero, insert a MI to materialize a new base.
622    // But only do so if it is cost effective, i.e. merging more than two
623    // loads / stores.
624    if (NumRegs <= 2)
625      return nullptr;
626
627    // On Thumb1, it's not worth materializing a new base register without
628    // clobbering the CPSR (i.e. not using ADDS/SUBS).
629    if (!SafeToClobberCPSR)
630      return nullptr;
631
632    unsigned NewBase;
633    if (isi32Load(Opcode)) {
634      // If it is a load, then just use one of the destination registers
635      // as the new base. Will no longer be writeback in Thumb1.
636      NewBase = Regs[NumRegs-1].first;
637      Writeback = false;
638    } else {
639      // Find a free register that we can use as scratch register.
640      moveLiveRegsBefore(MBB, InsertBefore);
641      // The merged instruction does not exist yet but will use several Regs if
642      // it is a Store.
643      if (!isLoadSingle(Opcode))
644        for (const std::pair<unsigned, bool> &R : Regs)
645          LiveRegs.addReg(R.first);
646
647      NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
648      if (NewBase == 0)
649        return nullptr;
650    }
651
652    int BaseOpc =
653      isThumb2 ? ARM::t2ADDri :
654      (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
655      (isThumb1 && Offset < 8) ? ARM::tADDi3 :
656      isThumb1 ? ARM::tADDi8  : ARM::ADDri;
657
658    if (Offset < 0) {
659      Offset = - Offset;
660      BaseOpc =
661        isThumb2 ? ARM::t2SUBri :
662        (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
663        isThumb1 ? ARM::tSUBi8  : ARM::SUBri;
664    }
665
666    if (!TL->isLegalAddImmediate(Offset))
667      // FIXME: Try add with register operand?
668      return nullptr; // Probably not worth it then.
669
670    // We can only append a kill flag to the add/sub input if the value is not
671    // used in the register list of the stm as well.
672    bool KillOldBase = BaseKill &&
673      (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
674
675    if (isThumb1) {
676      // Thumb1: depending on immediate size, use either
677      //   ADDS NewBase, Base, #imm3
678      // or
679      //   MOV  NewBase, Base
680      //   ADDS NewBase, #imm8.
681      if (Base != NewBase &&
682          (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
683        // Need to insert a MOV to the new base first.
684        if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
685            !STI->hasV6Ops()) {
686          // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
687          if (Pred != ARMCC::AL)
688            return nullptr;
689          BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
690            .addReg(Base, getKillRegState(KillOldBase));
691        } else
692          BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
693            .addReg(Base, getKillRegState(KillOldBase))
694            .addImm(Pred).addReg(PredReg);
695
696        // The following ADDS/SUBS becomes an update.
697        Base = NewBase;
698        KillOldBase = true;
699      }
700      if (BaseOpc == ARM::tADDrSPi) {
701        assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
702        BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
703          .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
704          .addImm(Pred).addReg(PredReg);
705      } else
706        AddDefaultT1CC(
707          BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
708          .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
709          .addImm(Pred).addReg(PredReg);
710    } else {
711      BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
712        .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
713        .addImm(Pred).addReg(PredReg).addReg(0);
714    }
715    Base = NewBase;
716    BaseKill = true; // New base is always killed straight away.
717  }
718
719  bool isDef = isLoadSingle(Opcode);
720
721  // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
722  // base register writeback.
723  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
724  if (!Opcode)
725    return nullptr;
726
727  // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
728  // - There is no writeback (LDM of base register),
729  // - the base register is killed by the merged instruction,
730  // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
731  //   to reset the base register.
732  // Otherwise, don't merge.
733  // It's safe to return here since the code to materialize a new base register
734  // above is also conditional on SafeToClobberCPSR.
735  if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
736    return nullptr;
737
738  MachineInstrBuilder MIB;
739
740  if (Writeback) {
741    assert(isThumb1 && "expected Writeback only inThumb1");
742    if (Opcode == ARM::tLDMIA) {
743      assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
744      // Update tLDMIA with writeback if necessary.
745      Opcode = ARM::tLDMIA_UPD;
746    }
747
748    MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
749
750    // Thumb1: we might need to set base writeback when building the MI.
751    MIB.addReg(Base, getDefRegState(true))
752       .addReg(Base, getKillRegState(BaseKill));
753
754    // The base isn't dead after a merged instruction with writeback.
755    // Insert a sub instruction after the newly formed instruction to reset.
756    if (!BaseKill)
757      UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
758
759  } else {
760    // No writeback, simply build the MachineInstr.
761    MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
762    MIB.addReg(Base, getKillRegState(BaseKill));
763  }
764
765  MIB.addImm(Pred).addReg(PredReg);
766
767  for (const std::pair<unsigned, bool> &R : Regs)
768    MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
769
770  return MIB.getInstr();
771}
772
773MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
774    MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
775    bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
776    DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
777  bool IsLoad = isi32Load(Opcode);
778  assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
779  unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
780
781  assert(Regs.size() == 2);
782  MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
783                                    TII->get(LoadStoreOpcode));
784  if (IsLoad) {
785    MIB.addReg(Regs[0].first, RegState::Define)
786       .addReg(Regs[1].first, RegState::Define);
787  } else {
788    MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
789       .addReg(Regs[1].first, getKillRegState(Regs[1].second));
790  }
791  MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
792  return MIB.getInstr();
793}
794
795/// Call MergeOps and update MemOps and merges accordingly on success.
796MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
797  const MachineInstr *First = Cand.Instrs.front();
798  unsigned Opcode = First->getOpcode();
799  bool IsLoad = isLoadSingle(Opcode);
800  SmallVector<std::pair<unsigned, bool>, 8> Regs;
801  SmallVector<unsigned, 4> ImpDefs;
802  DenseSet<unsigned> KilledRegs;
803  DenseSet<unsigned> UsedRegs;
804  // Determine list of registers and list of implicit super-register defs.
805  for (const MachineInstr *MI : Cand.Instrs) {
806    const MachineOperand &MO = getLoadStoreRegOp(*MI);
807    unsigned Reg = MO.getReg();
808    bool IsKill = MO.isKill();
809    if (IsKill)
810      KilledRegs.insert(Reg);
811    Regs.push_back(std::make_pair(Reg, IsKill));
812    UsedRegs.insert(Reg);
813
814    if (IsLoad) {
815      // Collect any implicit defs of super-registers, after merging we can't
816      // be sure anymore that we properly preserved these live ranges and must
817      // removed these implicit operands.
818      for (const MachineOperand &MO : MI->implicit_operands()) {
819        if (!MO.isReg() || !MO.isDef() || MO.isDead())
820          continue;
821        assert(MO.isImplicit());
822        unsigned DefReg = MO.getReg();
823
824        if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
825          continue;
826        // We can ignore cases where the super-reg is read and written.
827        if (MI->readsRegister(DefReg))
828          continue;
829        ImpDefs.push_back(DefReg);
830      }
831    }
832  }
833
834  // Attempt the merge.
835  typedef MachineBasicBlock::iterator iterator;
836  MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
837  iterator InsertBefore = std::next(iterator(LatestMI));
838  MachineBasicBlock &MBB = *LatestMI->getParent();
839  unsigned Offset = getMemoryOpOffset(First);
840  unsigned Base = getLoadStoreBaseOp(*First).getReg();
841  bool BaseKill = LatestMI->killsRegister(Base);
842  unsigned PredReg = 0;
843  ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
844  DebugLoc DL = First->getDebugLoc();
845  MachineInstr *Merged = nullptr;
846  if (Cand.CanMergeToLSDouble)
847    Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
848                                   Opcode, Pred, PredReg, DL, Regs);
849  if (!Merged && Cand.CanMergeToLSMulti)
850    Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
851                                  Opcode, Pred, PredReg, DL, Regs);
852  if (!Merged)
853    return nullptr;
854
855  // Determine earliest instruction that will get removed. We then keep an
856  // iterator just above it so the following erases don't invalidated it.
857  iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
858  bool EarliestAtBegin = false;
859  if (EarliestI == MBB.begin()) {
860    EarliestAtBegin = true;
861  } else {
862    EarliestI = std::prev(EarliestI);
863  }
864
865  // Remove instructions which have been merged.
866  for (MachineInstr *MI : Cand.Instrs)
867    MBB.erase(MI);
868
869  // Determine range between the earliest removed instruction and the new one.
870  if (EarliestAtBegin)
871    EarliestI = MBB.begin();
872  else
873    EarliestI = std::next(EarliestI);
874  auto FixupRange = make_range(EarliestI, iterator(Merged));
875
876  if (isLoadSingle(Opcode)) {
877    // If the previous loads defined a super-reg, then we have to mark earlier
878    // operands undef; Replicate the super-reg def on the merged instruction.
879    for (MachineInstr &MI : FixupRange) {
880      for (unsigned &ImpDefReg : ImpDefs) {
881        for (MachineOperand &MO : MI.implicit_operands()) {
882          if (!MO.isReg() || MO.getReg() != ImpDefReg)
883            continue;
884          if (MO.readsReg())
885            MO.setIsUndef();
886          else if (MO.isDef())
887            ImpDefReg = 0;
888        }
889      }
890    }
891
892    MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
893    for (unsigned ImpDef : ImpDefs)
894      MIB.addReg(ImpDef, RegState::ImplicitDefine);
895  } else {
896    // Remove kill flags: We are possibly storing the values later now.
897    assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
898    for (MachineInstr &MI : FixupRange) {
899      for (MachineOperand &MO : MI.uses()) {
900        if (!MO.isReg() || !MO.isKill())
901          continue;
902        if (UsedRegs.count(MO.getReg()))
903          MO.setIsKill(false);
904      }
905    }
906    assert(ImpDefs.empty());
907  }
908
909  return Merged;
910}
911
912static bool isValidLSDoubleOffset(int Offset) {
913  unsigned Value = abs(Offset);
914  // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
915  // multiplied by 4.
916  return (Value % 4) == 0 && Value < 1024;
917}
918
919/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
920void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
921  const MachineInstr *FirstMI = MemOps[0].MI;
922  unsigned Opcode = FirstMI->getOpcode();
923  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
924  unsigned Size = getLSMultipleTransferSize(FirstMI);
925
926  unsigned SIndex = 0;
927  unsigned EIndex = MemOps.size();
928  do {
929    // Look at the first instruction.
930    const MachineInstr *MI = MemOps[SIndex].MI;
931    int Offset = MemOps[SIndex].Offset;
932    const MachineOperand &PMO = getLoadStoreRegOp(*MI);
933    unsigned PReg = PMO.getReg();
934    unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
935    unsigned Latest = SIndex;
936    unsigned Earliest = SIndex;
937    unsigned Count = 1;
938    bool CanMergeToLSDouble =
939      STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
940    // ARM errata 602117: LDRD with base in list may result in incorrect base
941    // register when interrupted or faulted.
942    if (STI->isCortexM3() && isi32Load(Opcode) &&
943        PReg == getLoadStoreBaseOp(*MI).getReg())
944      CanMergeToLSDouble = false;
945
946    bool CanMergeToLSMulti = true;
947    // On swift vldm/vstm starting with an odd register number as that needs
948    // more uops than single vldrs.
949    if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
950      CanMergeToLSMulti = false;
951
952    // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
953    // deprecated; LDM to PC is fine but cannot happen here.
954    if (PReg == ARM::SP || PReg == ARM::PC)
955      CanMergeToLSMulti = CanMergeToLSDouble = false;
956
957    // Merge following instructions where possible.
958    for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
959      int NewOffset = MemOps[I].Offset;
960      if (NewOffset != Offset + (int)Size)
961        break;
962      const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
963      unsigned Reg = MO.getReg();
964      if (Reg == ARM::SP || Reg == ARM::PC)
965        break;
966
967      // See if the current load/store may be part of a multi load/store.
968      unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
969      bool PartOfLSMulti = CanMergeToLSMulti;
970      if (PartOfLSMulti) {
971        // Register numbers must be in ascending order.
972        if (RegNum <= PRegNum)
973          PartOfLSMulti = false;
974        // For VFP / NEON load/store multiples, the registers must be
975        // consecutive and within the limit on the number of registers per
976        // instruction.
977        else if (!isNotVFP && RegNum != PRegNum+1)
978          PartOfLSMulti = false;
979      }
980      // See if the current load/store may be part of a double load/store.
981      bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
982
983      if (!PartOfLSMulti && !PartOfLSDouble)
984        break;
985      CanMergeToLSMulti &= PartOfLSMulti;
986      CanMergeToLSDouble &= PartOfLSDouble;
987      // Track MemOp with latest and earliest position (Positions are
988      // counted in reverse).
989      unsigned Position = MemOps[I].Position;
990      if (Position < MemOps[Latest].Position)
991        Latest = I;
992      else if (Position > MemOps[Earliest].Position)
993        Earliest = I;
994      // Prepare for next MemOp.
995      Offset += Size;
996      PRegNum = RegNum;
997    }
998
999    // Form a candidate from the Ops collected so far.
1000    MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
1001    for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
1002      Candidate->Instrs.push_back(MemOps[C].MI);
1003    Candidate->LatestMIIdx = Latest - SIndex;
1004    Candidate->EarliestMIIdx = Earliest - SIndex;
1005    Candidate->InsertPos = MemOps[Latest].Position;
1006    if (Count == 1)
1007      CanMergeToLSMulti = CanMergeToLSDouble = false;
1008    Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
1009    Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
1010    Candidates.push_back(Candidate);
1011    // Continue after the chain.
1012    SIndex += Count;
1013  } while (SIndex < EIndex);
1014}
1015
1016static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
1017                                            ARM_AM::AMSubMode Mode) {
1018  switch (Opc) {
1019  default: llvm_unreachable("Unhandled opcode!");
1020  case ARM::LDMIA:
1021  case ARM::LDMDA:
1022  case ARM::LDMDB:
1023  case ARM::LDMIB:
1024    switch (Mode) {
1025    default: llvm_unreachable("Unhandled submode!");
1026    case ARM_AM::ia: return ARM::LDMIA_UPD;
1027    case ARM_AM::ib: return ARM::LDMIB_UPD;
1028    case ARM_AM::da: return ARM::LDMDA_UPD;
1029    case ARM_AM::db: return ARM::LDMDB_UPD;
1030    }
1031  case ARM::STMIA:
1032  case ARM::STMDA:
1033  case ARM::STMDB:
1034  case ARM::STMIB:
1035    switch (Mode) {
1036    default: llvm_unreachable("Unhandled submode!");
1037    case ARM_AM::ia: return ARM::STMIA_UPD;
1038    case ARM_AM::ib: return ARM::STMIB_UPD;
1039    case ARM_AM::da: return ARM::STMDA_UPD;
1040    case ARM_AM::db: return ARM::STMDB_UPD;
1041    }
1042  case ARM::t2LDMIA:
1043  case ARM::t2LDMDB:
1044    switch (Mode) {
1045    default: llvm_unreachable("Unhandled submode!");
1046    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
1047    case ARM_AM::db: return ARM::t2LDMDB_UPD;
1048    }
1049  case ARM::t2STMIA:
1050  case ARM::t2STMDB:
1051    switch (Mode) {
1052    default: llvm_unreachable("Unhandled submode!");
1053    case ARM_AM::ia: return ARM::t2STMIA_UPD;
1054    case ARM_AM::db: return ARM::t2STMDB_UPD;
1055    }
1056  case ARM::VLDMSIA:
1057    switch (Mode) {
1058    default: llvm_unreachable("Unhandled submode!");
1059    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
1060    case ARM_AM::db: return ARM::VLDMSDB_UPD;
1061    }
1062  case ARM::VLDMDIA:
1063    switch (Mode) {
1064    default: llvm_unreachable("Unhandled submode!");
1065    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
1066    case ARM_AM::db: return ARM::VLDMDDB_UPD;
1067    }
1068  case ARM::VSTMSIA:
1069    switch (Mode) {
1070    default: llvm_unreachable("Unhandled submode!");
1071    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
1072    case ARM_AM::db: return ARM::VSTMSDB_UPD;
1073    }
1074  case ARM::VSTMDIA:
1075    switch (Mode) {
1076    default: llvm_unreachable("Unhandled submode!");
1077    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
1078    case ARM_AM::db: return ARM::VSTMDDB_UPD;
1079    }
1080  }
1081}
1082
1083/// Check if the given instruction increments or decrements a register and
1084/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
1085/// generated by the instruction are possibly read as well.
1086static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
1087                                  ARMCC::CondCodes Pred, unsigned PredReg) {
1088  bool CheckCPSRDef;
1089  int Scale;
1090  switch (MI.getOpcode()) {
1091  case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
1092  case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
1093  case ARM::t2SUBri:
1094  case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
1095  case ARM::t2ADDri:
1096  case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
1097  case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
1098  case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
1099  default: return 0;
1100  }
1101
1102  unsigned MIPredReg;
1103  if (MI.getOperand(0).getReg() != Reg ||
1104      MI.getOperand(1).getReg() != Reg ||
1105      getInstrPredicate(&MI, MIPredReg) != Pred ||
1106      MIPredReg != PredReg)
1107    return 0;
1108
1109  if (CheckCPSRDef && definesCPSR(&MI))
1110    return 0;
1111  return MI.getOperand(2).getImm() * Scale;
1112}
1113
1114/// Searches for an increment or decrement of \p Reg before \p MBBI.
1115static MachineBasicBlock::iterator
1116findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
1117                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1118  Offset = 0;
1119  MachineBasicBlock &MBB = *MBBI->getParent();
1120  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
1121  MachineBasicBlock::iterator EndMBBI = MBB.end();
1122  if (MBBI == BeginMBBI)
1123    return EndMBBI;
1124
1125  // Skip debug values.
1126  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
1127  while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
1128    --PrevMBBI;
1129
1130  Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
1131  return Offset == 0 ? EndMBBI : PrevMBBI;
1132}
1133
1134/// Searches for a increment or decrement of \p Reg after \p MBBI.
1135static MachineBasicBlock::iterator
1136findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
1137                ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
1138  Offset = 0;
1139  MachineBasicBlock &MBB = *MBBI->getParent();
1140  MachineBasicBlock::iterator EndMBBI = MBB.end();
1141  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
1142  // Skip debug values.
1143  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
1144    ++NextMBBI;
1145  if (NextMBBI == EndMBBI)
1146    return EndMBBI;
1147
1148  Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
1149  return Offset == 0 ? EndMBBI : NextMBBI;
1150}
1151
1152/// Fold proceeding/trailing inc/dec of base register into the
1153/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
1154///
1155/// stmia rn, <ra, rb, rc>
1156/// rn := rn + 4 * 3;
1157/// =>
1158/// stmia rn!, <ra, rb, rc>
1159///
1160/// rn := rn - 4 * 3;
1161/// ldmia rn, <ra, rb, rc>
1162/// =>
1163/// ldmdb rn!, <ra, rb, rc>
1164bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
1165  // Thumb1 is already using updating loads/stores.
1166  if (isThumb1) return false;
1167
1168  const MachineOperand &BaseOP = MI->getOperand(0);
1169  unsigned Base = BaseOP.getReg();
1170  bool BaseKill = BaseOP.isKill();
1171  unsigned PredReg = 0;
1172  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1173  unsigned Opcode = MI->getOpcode();
1174  DebugLoc DL = MI->getDebugLoc();
1175
1176  // Can't use an updating ld/st if the base register is also a dest
1177  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
1178  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
1179    if (MI->getOperand(i).getReg() == Base)
1180      return false;
1181
1182  int Bytes = getLSMultipleTransferSize(MI);
1183  MachineBasicBlock &MBB = *MI->getParent();
1184  MachineBasicBlock::iterator MBBI(MI);
1185  int Offset;
1186  MachineBasicBlock::iterator MergeInstr
1187    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1188  ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
1189  if (Mode == ARM_AM::ia && Offset == -Bytes) {
1190    Mode = ARM_AM::db;
1191  } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
1192    Mode = ARM_AM::da;
1193  } else {
1194    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1195    if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
1196        ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
1197      return false;
1198  }
1199  MBB.erase(MergeInstr);
1200
1201  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
1202  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1203    .addReg(Base, getDefRegState(true)) // WB base register
1204    .addReg(Base, getKillRegState(BaseKill))
1205    .addImm(Pred).addReg(PredReg);
1206
1207  // Transfer the rest of operands.
1208  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
1209    MIB.addOperand(MI->getOperand(OpNum));
1210
1211  // Transfer memoperands.
1212  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1213
1214  MBB.erase(MBBI);
1215  return true;
1216}
1217
1218static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
1219                                             ARM_AM::AddrOpc Mode) {
1220  switch (Opc) {
1221  case ARM::LDRi12:
1222    return ARM::LDR_PRE_IMM;
1223  case ARM::STRi12:
1224    return ARM::STR_PRE_IMM;
1225  case ARM::VLDRS:
1226    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1227  case ARM::VLDRD:
1228    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1229  case ARM::VSTRS:
1230    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1231  case ARM::VSTRD:
1232    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1233  case ARM::t2LDRi8:
1234  case ARM::t2LDRi12:
1235    return ARM::t2LDR_PRE;
1236  case ARM::t2STRi8:
1237  case ARM::t2STRi12:
1238    return ARM::t2STR_PRE;
1239  default: llvm_unreachable("Unhandled opcode!");
1240  }
1241}
1242
1243static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1244                                              ARM_AM::AddrOpc Mode) {
1245  switch (Opc) {
1246  case ARM::LDRi12:
1247    return ARM::LDR_POST_IMM;
1248  case ARM::STRi12:
1249    return ARM::STR_POST_IMM;
1250  case ARM::VLDRS:
1251    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
1252  case ARM::VLDRD:
1253    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
1254  case ARM::VSTRS:
1255    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
1256  case ARM::VSTRD:
1257    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
1258  case ARM::t2LDRi8:
1259  case ARM::t2LDRi12:
1260    return ARM::t2LDR_POST;
1261  case ARM::t2STRi8:
1262  case ARM::t2STRi12:
1263    return ARM::t2STR_POST;
1264  default: llvm_unreachable("Unhandled opcode!");
1265  }
1266}
1267
1268/// Fold proceeding/trailing inc/dec of base register into the
1269/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
1270bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
1271  // Thumb1 doesn't have updating LDR/STR.
1272  // FIXME: Use LDM/STM with single register instead.
1273  if (isThumb1) return false;
1274
1275  unsigned Base = getLoadStoreBaseOp(*MI).getReg();
1276  bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
1277  unsigned Opcode = MI->getOpcode();
1278  DebugLoc DL = MI->getDebugLoc();
1279  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
1280                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
1281  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
1282  if (isi32Load(Opcode) || isi32Store(Opcode))
1283    if (MI->getOperand(2).getImm() != 0)
1284      return false;
1285  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
1286    return false;
1287
1288  // Can't do the merge if the destination register is the same as the would-be
1289  // writeback register.
1290  if (MI->getOperand(0).getReg() == Base)
1291    return false;
1292
1293  unsigned PredReg = 0;
1294  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1295  int Bytes = getLSMultipleTransferSize(MI);
1296  MachineBasicBlock &MBB = *MI->getParent();
1297  MachineBasicBlock::iterator MBBI(MI);
1298  int Offset;
1299  MachineBasicBlock::iterator MergeInstr
1300    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
1301  unsigned NewOpc;
1302  if (!isAM5 && Offset == Bytes) {
1303    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1304  } else if (Offset == -Bytes) {
1305    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1306  } else {
1307    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1308    if (Offset == Bytes) {
1309      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
1310    } else if (!isAM5 && Offset == -Bytes) {
1311      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
1312    } else
1313      return false;
1314  }
1315  MBB.erase(MergeInstr);
1316
1317  ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
1318
1319  bool isLd = isLoadSingle(Opcode);
1320  if (isAM5) {
1321    // VLDM[SD]_UPD, VSTM[SD]_UPD
1322    // (There are no base-updating versions of VLDR/VSTR instructions, but the
1323    // updating load/store-multiple instructions can be used with only one
1324    // register.)
1325    MachineOperand &MO = MI->getOperand(0);
1326    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
1327      .addReg(Base, getDefRegState(true)) // WB base register
1328      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
1329      .addImm(Pred).addReg(PredReg)
1330      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
1331                            getKillRegState(MO.isKill())));
1332  } else if (isLd) {
1333    if (isAM2) {
1334      // LDR_PRE, LDR_POST
1335      if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
1336        BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1337          .addReg(Base, RegState::Define)
1338          .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1339      } else {
1340        int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1341        BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1342          .addReg(Base, RegState::Define)
1343          .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1344      }
1345    } else {
1346      // t2LDR_PRE, t2LDR_POST
1347      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
1348        .addReg(Base, RegState::Define)
1349        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1350    }
1351  } else {
1352    MachineOperand &MO = MI->getOperand(0);
1353    // FIXME: post-indexed stores use am2offset_imm, which still encodes
1354    // the vestigal zero-reg offset register. When that's fixed, this clause
1355    // can be removed entirely.
1356    if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
1357      int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
1358      // STR_PRE, STR_POST
1359      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1360        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1361        .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
1362    } else {
1363      // t2STR_PRE, t2STR_POST
1364      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
1365        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
1366        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
1367    }
1368  }
1369  MBB.erase(MBBI);
1370
1371  return true;
1372}
1373
1374bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
1375  unsigned Opcode = MI.getOpcode();
1376  assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
1377         "Must have t2STRDi8 or t2LDRDi8");
1378  if (MI.getOperand(3).getImm() != 0)
1379    return false;
1380
1381  // Behaviour for writeback is undefined if base register is the same as one
1382  // of the others.
1383  const MachineOperand &BaseOp = MI.getOperand(2);
1384  unsigned Base = BaseOp.getReg();
1385  const MachineOperand &Reg0Op = MI.getOperand(0);
1386  const MachineOperand &Reg1Op = MI.getOperand(1);
1387  if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
1388    return false;
1389
1390  unsigned PredReg;
1391  ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
1392  MachineBasicBlock::iterator MBBI(MI);
1393  MachineBasicBlock &MBB = *MI.getParent();
1394  int Offset;
1395  MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
1396                                                            PredReg, Offset);
1397  unsigned NewOpc;
1398  if (Offset == 8 || Offset == -8) {
1399    NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
1400  } else {
1401    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
1402    if (Offset == 8 || Offset == -8) {
1403      NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
1404    } else
1405      return false;
1406  }
1407  MBB.erase(MergeInstr);
1408
1409  DebugLoc DL = MI.getDebugLoc();
1410  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
1411  if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
1412    MIB.addOperand(Reg0Op).addOperand(Reg1Op)
1413       .addReg(BaseOp.getReg(), RegState::Define);
1414  } else {
1415    assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
1416    MIB.addReg(BaseOp.getReg(), RegState::Define)
1417       .addOperand(Reg0Op).addOperand(Reg1Op);
1418  }
1419  MIB.addReg(BaseOp.getReg(), RegState::Kill)
1420     .addImm(Offset).addImm(Pred).addReg(PredReg);
1421  assert(TII->get(Opcode).getNumOperands() == 6 &&
1422         TII->get(NewOpc).getNumOperands() == 7 &&
1423         "Unexpected number of operands in Opcode specification.");
1424
1425  // Transfer implicit operands.
1426  for (const MachineOperand &MO : MI.implicit_operands())
1427    MIB.addOperand(MO);
1428  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
1429
1430  MBB.erase(MBBI);
1431  return true;
1432}
1433
1434/// Returns true if instruction is a memory operation that this pass is capable
1435/// of operating on.
1436static bool isMemoryOp(const MachineInstr &MI) {
1437  unsigned Opcode = MI.getOpcode();
1438  switch (Opcode) {
1439  case ARM::VLDRS:
1440  case ARM::VSTRS:
1441  case ARM::VLDRD:
1442  case ARM::VSTRD:
1443  case ARM::LDRi12:
1444  case ARM::STRi12:
1445  case ARM::tLDRi:
1446  case ARM::tSTRi:
1447  case ARM::tLDRspi:
1448  case ARM::tSTRspi:
1449  case ARM::t2LDRi8:
1450  case ARM::t2LDRi12:
1451  case ARM::t2STRi8:
1452  case ARM::t2STRi12:
1453    break;
1454  default:
1455    return false;
1456  }
1457  if (!MI.getOperand(1).isReg())
1458    return false;
1459
1460  // When no memory operands are present, conservatively assume unaligned,
1461  // volatile, unfoldable.
1462  if (!MI.hasOneMemOperand())
1463    return false;
1464
1465  const MachineMemOperand &MMO = **MI.memoperands_begin();
1466
1467  // Don't touch volatile memory accesses - we may be changing their order.
1468  if (MMO.isVolatile())
1469    return false;
1470
1471  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
1472  // not.
1473  if (MMO.getAlignment() < 4)
1474    return false;
1475
1476  // str <undef> could probably be eliminated entirely, but for now we just want
1477  // to avoid making a mess of it.
1478  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
1479  if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
1480    return false;
1481
1482  // Likewise don't mess with references to undefined addresses.
1483  if (MI.getOperand(1).isUndef())
1484    return false;
1485
1486  return true;
1487}
1488
1489static void InsertLDR_STR(MachineBasicBlock &MBB,
1490                          MachineBasicBlock::iterator &MBBI,
1491                          int Offset, bool isDef,
1492                          DebugLoc DL, unsigned NewOpc,
1493                          unsigned Reg, bool RegDeadKill, bool RegUndef,
1494                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
1495                          bool OffKill, bool OffUndef,
1496                          ARMCC::CondCodes Pred, unsigned PredReg,
1497                          const TargetInstrInfo *TII, bool isT2) {
1498  if (isDef) {
1499    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1500                                      TII->get(NewOpc))
1501      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
1502      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1503    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1504  } else {
1505    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
1506                                      TII->get(NewOpc))
1507      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
1508      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
1509    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
1510  }
1511}
1512
1513bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
1514                                          MachineBasicBlock::iterator &MBBI) {
1515  MachineInstr *MI = &*MBBI;
1516  unsigned Opcode = MI->getOpcode();
1517  if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
1518    return false;
1519
1520  const MachineOperand &BaseOp = MI->getOperand(2);
1521  unsigned BaseReg = BaseOp.getReg();
1522  unsigned EvenReg = MI->getOperand(0).getReg();
1523  unsigned OddReg  = MI->getOperand(1).getReg();
1524  unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
1525  unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
1526
1527  // ARM errata 602117: LDRD with base in list may result in incorrect base
1528  // register when interrupted or faulted.
1529  bool Errata602117 = EvenReg == BaseReg &&
1530    (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
1531  // ARM LDRD/STRD needs consecutive registers.
1532  bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
1533    (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
1534
1535  if (!Errata602117 && !NonConsecutiveRegs)
1536    return false;
1537
1538  bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
1539  bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
1540  bool EvenDeadKill = isLd ?
1541    MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
1542  bool EvenUndef = MI->getOperand(0).isUndef();
1543  bool OddDeadKill  = isLd ?
1544    MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
1545  bool OddUndef = MI->getOperand(1).isUndef();
1546  bool BaseKill = BaseOp.isKill();
1547  bool BaseUndef = BaseOp.isUndef();
1548  bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
1549  bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
1550  int OffImm = getMemoryOpOffset(MI);
1551  unsigned PredReg = 0;
1552  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
1553
1554  if (OddRegNum > EvenRegNum && OffImm == 0) {
1555    // Ascending register numbers and no offset. It's safe to change it to a
1556    // ldm or stm.
1557    unsigned NewOpc = (isLd)
1558      ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
1559      : (isT2 ? ARM::t2STMIA : ARM::STMIA);
1560    if (isLd) {
1561      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1562        .addReg(BaseReg, getKillRegState(BaseKill))
1563        .addImm(Pred).addReg(PredReg)
1564        .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
1565        .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
1566      ++NumLDRD2LDM;
1567    } else {
1568      BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
1569        .addReg(BaseReg, getKillRegState(BaseKill))
1570        .addImm(Pred).addReg(PredReg)
1571        .addReg(EvenReg,
1572                getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
1573        .addReg(OddReg,
1574                getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
1575      ++NumSTRD2STM;
1576    }
1577  } else {
1578    // Split into two instructions.
1579    unsigned NewOpc = (isLd)
1580      ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1581      : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1582    // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
1583    // so adjust and use t2LDRi12 here for that.
1584    unsigned NewOpc2 = (isLd)
1585      ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
1586      : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
1587    DebugLoc dl = MBBI->getDebugLoc();
1588    // If this is a load and base register is killed, it may have been
1589    // re-defed by the load, make sure the first load does not clobber it.
1590    if (isLd &&
1591        (BaseKill || OffKill) &&
1592        (TRI->regsOverlap(EvenReg, BaseReg))) {
1593      assert(!TRI->regsOverlap(OddReg, BaseReg));
1594      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1595                    OddReg, OddDeadKill, false,
1596                    BaseReg, false, BaseUndef, false, OffUndef,
1597                    Pred, PredReg, TII, isT2);
1598      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1599                    EvenReg, EvenDeadKill, false,
1600                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1601                    Pred, PredReg, TII, isT2);
1602    } else {
1603      if (OddReg == EvenReg && EvenDeadKill) {
1604        // If the two source operands are the same, the kill marker is
1605        // probably on the first one. e.g.
1606        // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
1607        EvenDeadKill = false;
1608        OddDeadKill = true;
1609      }
1610      // Never kill the base register in the first instruction.
1611      if (EvenReg == BaseReg)
1612        EvenDeadKill = false;
1613      InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
1614                    EvenReg, EvenDeadKill, EvenUndef,
1615                    BaseReg, false, BaseUndef, false, OffUndef,
1616                    Pred, PredReg, TII, isT2);
1617      InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
1618                    OddReg, OddDeadKill, OddUndef,
1619                    BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
1620                    Pred, PredReg, TII, isT2);
1621    }
1622    if (isLd)
1623      ++NumLDRD2LDR;
1624    else
1625      ++NumSTRD2STR;
1626  }
1627
1628  MBBI = MBB.erase(MBBI);
1629  return true;
1630}
1631
1632/// An optimization pass to turn multiple LDR / STR ops of the same base and
1633/// incrementing offset into LDM / STM ops.
1634bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
1635  MemOpQueue MemOps;
1636  unsigned CurrBase = 0;
1637  unsigned CurrOpc = ~0u;
1638  ARMCC::CondCodes CurrPred = ARMCC::AL;
1639  unsigned Position = 0;
1640  assert(Candidates.size() == 0);
1641  assert(MergeBaseCandidates.size() == 0);
1642  LiveRegsValid = false;
1643
1644  for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
1645       I = MBBI) {
1646    // The instruction in front of the iterator is the one we look at.
1647    MBBI = std::prev(I);
1648    if (FixInvalidRegPairOp(MBB, MBBI))
1649      continue;
1650    ++Position;
1651
1652    if (isMemoryOp(*MBBI)) {
1653      unsigned Opcode = MBBI->getOpcode();
1654      const MachineOperand &MO = MBBI->getOperand(0);
1655      unsigned Reg = MO.getReg();
1656      unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
1657      unsigned PredReg = 0;
1658      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
1659      int Offset = getMemoryOpOffset(MBBI);
1660      if (CurrBase == 0) {
1661        // Start of a new chain.
1662        CurrBase = Base;
1663        CurrOpc  = Opcode;
1664        CurrPred = Pred;
1665        MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1666        continue;
1667      }
1668      // Note: No need to match PredReg in the next if.
1669      if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
1670        // Watch out for:
1671        //   r4 := ldr [r0, #8]
1672        //   r4 := ldr [r0, #4]
1673        // or
1674        //   r0 := ldr [r0]
1675        // If a load overrides the base register or a register loaded by
1676        // another load in our chain, we cannot take this instruction.
1677        bool Overlap = false;
1678        if (isLoadSingle(Opcode)) {
1679          Overlap = (Base == Reg);
1680          if (!Overlap) {
1681            for (const MemOpQueueEntry &E : MemOps) {
1682              if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
1683                Overlap = true;
1684                break;
1685              }
1686            }
1687          }
1688        }
1689
1690        if (!Overlap) {
1691          // Check offset and sort memory operation into the current chain.
1692          if (Offset > MemOps.back().Offset) {
1693            MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
1694            continue;
1695          } else {
1696            MemOpQueue::iterator MI, ME;
1697            for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
1698              if (Offset < MI->Offset) {
1699                // Found a place to insert.
1700                break;
1701              }
1702              if (Offset == MI->Offset) {
1703                // Collision, abort.
1704                MI = ME;
1705                break;
1706              }
1707            }
1708            if (MI != MemOps.end()) {
1709              MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
1710              continue;
1711            }
1712          }
1713        }
1714      }
1715
1716      // Don't advance the iterator; The op will start a new chain next.
1717      MBBI = I;
1718      --Position;
1719      // Fallthrough to look into existing chain.
1720    } else if (MBBI->isDebugValue()) {
1721      continue;
1722    } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
1723               MBBI->getOpcode() == ARM::t2STRDi8) {
1724      // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
1725      // remember them because we may still be able to merge add/sub into them.
1726      MergeBaseCandidates.push_back(MBBI);
1727    }
1728
1729
1730    // If we are here then the chain is broken; Extract candidates for a merge.
1731    if (MemOps.size() > 0) {
1732      FormCandidates(MemOps);
1733      // Reset for the next chain.
1734      CurrBase = 0;
1735      CurrOpc = ~0u;
1736      CurrPred = ARMCC::AL;
1737      MemOps.clear();
1738    }
1739  }
1740  if (MemOps.size() > 0)
1741    FormCandidates(MemOps);
1742
1743  // Sort candidates so they get processed from end to begin of the basic
1744  // block later; This is necessary for liveness calculation.
1745  auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
1746    return M0->InsertPos < M1->InsertPos;
1747  };
1748  std::sort(Candidates.begin(), Candidates.end(), LessThan);
1749
1750  // Go through list of candidates and merge.
1751  bool Changed = false;
1752  for (const MergeCandidate *Candidate : Candidates) {
1753    if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
1754      MachineInstr *Merged = MergeOpsUpdate(*Candidate);
1755      // Merge preceding/trailing base inc/dec into the merged op.
1756      if (Merged) {
1757        Changed = true;
1758        unsigned Opcode = Merged->getOpcode();
1759        if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
1760          MergeBaseUpdateLSDouble(*Merged);
1761        else
1762          MergeBaseUpdateLSMultiple(Merged);
1763      } else {
1764        for (MachineInstr *MI : Candidate->Instrs) {
1765          if (MergeBaseUpdateLoadStore(MI))
1766            Changed = true;
1767        }
1768      }
1769    } else {
1770      assert(Candidate->Instrs.size() == 1);
1771      if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
1772        Changed = true;
1773    }
1774  }
1775  Candidates.clear();
1776  // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
1777  for (MachineInstr *MI : MergeBaseCandidates)
1778    MergeBaseUpdateLSDouble(*MI);
1779  MergeBaseCandidates.clear();
1780
1781  return Changed;
1782}
1783
1784/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
1785/// into the preceding stack restore so it directly restore the value of LR
1786/// into pc.
1787///   ldmfd sp!, {..., lr}
1788///   bx lr
1789/// or
1790///   ldmfd sp!, {..., lr}
1791///   mov pc, lr
1792/// =>
1793///   ldmfd sp!, {..., pc}
1794bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
1795  // Thumb1 LDM doesn't allow high registers.
1796  if (isThumb1) return false;
1797  if (MBB.empty()) return false;
1798
1799  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1800  if (MBBI != MBB.begin() &&
1801      (MBBI->getOpcode() == ARM::BX_RET ||
1802       MBBI->getOpcode() == ARM::tBX_RET ||
1803       MBBI->getOpcode() == ARM::MOVPCLR)) {
1804    MachineBasicBlock::iterator PrevI = std::prev(MBBI);
1805    // Ignore any DBG_VALUE instructions.
1806    while (PrevI->isDebugValue() && PrevI != MBB.begin())
1807      --PrevI;
1808    MachineInstr *PrevMI = PrevI;
1809    unsigned Opcode = PrevMI->getOpcode();
1810    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
1811        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
1812        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
1813      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
1814      if (MO.getReg() != ARM::LR)
1815        return false;
1816      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
1817      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
1818              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
1819      PrevMI->setDesc(TII->get(NewOpc));
1820      MO.setReg(ARM::PC);
1821      PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
1822      MBB.erase(MBBI);
1823      return true;
1824    }
1825  }
1826  return false;
1827}
1828
1829bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
1830  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1831  if (MBBI == MBB.begin() || MBBI == MBB.end() ||
1832      MBBI->getOpcode() != ARM::tBX_RET)
1833    return false;
1834
1835  MachineBasicBlock::iterator Prev = MBBI;
1836  --Prev;
1837  if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
1838    return false;
1839
1840  for (auto Use : Prev->uses())
1841    if (Use.isKill()) {
1842      AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
1843          .addReg(Use.getReg(), RegState::Kill))
1844          .copyImplicitOps(&*MBBI);
1845      MBB.erase(MBBI);
1846      MBB.erase(Prev);
1847      return true;
1848    }
1849
1850  llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
1851}
1852
1853bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1854  MF = &Fn;
1855  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1856  TL = STI->getTargetLowering();
1857  AFI = Fn.getInfo<ARMFunctionInfo>();
1858  TII = STI->getInstrInfo();
1859  TRI = STI->getRegisterInfo();
1860
1861  RegClassInfoValid = false;
1862  isThumb2 = AFI->isThumb2Function();
1863  isThumb1 = AFI->isThumbFunction() && !isThumb2;
1864
1865  bool Modified = false;
1866  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
1867       ++MFI) {
1868    MachineBasicBlock &MBB = *MFI;
1869    Modified |= LoadStoreMultipleOpti(MBB);
1870    if (STI->hasV5TOps())
1871      Modified |= MergeReturnIntoLDM(MBB);
1872    if (isThumb1)
1873      Modified |= CombineMovBx(MBB);
1874  }
1875
1876  Allocator.DestroyAll();
1877  return Modified;
1878}
1879
1880namespace llvm {
1881void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
1882}
1883
1884#define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
1885  "ARM pre- register allocation load / store optimization pass"
1886
1887namespace {
1888  /// Pre- register allocation pass that move load / stores from consecutive
1889  /// locations close to make it more likely they will be combined later.
1890  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
1891    static char ID;
1892    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
1893      initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
1894    }
1895
1896    const DataLayout *TD;
1897    const TargetInstrInfo *TII;
1898    const TargetRegisterInfo *TRI;
1899    const ARMSubtarget *STI;
1900    MachineRegisterInfo *MRI;
1901    MachineFunction *MF;
1902
1903    bool runOnMachineFunction(MachineFunction &Fn) override;
1904
1905    const char *getPassName() const override {
1906      return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
1907    }
1908
1909  private:
1910    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
1911                          unsigned &NewOpc, unsigned &EvenReg,
1912                          unsigned &OddReg, unsigned &BaseReg,
1913                          int &Offset,
1914                          unsigned &PredReg, ARMCC::CondCodes &Pred,
1915                          bool &isT2);
1916    bool RescheduleOps(MachineBasicBlock *MBB,
1917                       SmallVectorImpl<MachineInstr *> &Ops,
1918                       unsigned Base, bool isLd,
1919                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
1920    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
1921  };
1922  char ARMPreAllocLoadStoreOpt::ID = 0;
1923}
1924
1925INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
1926                ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
1927
1928bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
1929  TD = &Fn.getDataLayout();
1930  STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
1931  TII = STI->getInstrInfo();
1932  TRI = STI->getRegisterInfo();
1933  MRI = &Fn.getRegInfo();
1934  MF  = &Fn;
1935
1936  bool Modified = false;
1937  for (MachineBasicBlock &MFI : Fn)
1938    Modified |= RescheduleLoadStoreInstrs(&MFI);
1939
1940  return Modified;
1941}
1942
1943static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
1944                                      MachineBasicBlock::iterator I,
1945                                      MachineBasicBlock::iterator E,
1946                                      SmallPtrSetImpl<MachineInstr*> &MemOps,
1947                                      SmallSet<unsigned, 4> &MemRegs,
1948                                      const TargetRegisterInfo *TRI) {
1949  // Are there stores / loads / calls between them?
1950  // FIXME: This is overly conservative. We should make use of alias information
1951  // some day.
1952  SmallSet<unsigned, 4> AddedRegPressure;
1953  while (++I != E) {
1954    if (I->isDebugValue() || MemOps.count(&*I))
1955      continue;
1956    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
1957      return false;
1958    if (isLd && I->mayStore())
1959      return false;
1960    if (!isLd) {
1961      if (I->mayLoad())
1962        return false;
1963      // It's not safe to move the first 'str' down.
1964      // str r1, [r0]
1965      // strh r5, [r0]
1966      // str r4, [r0, #+4]
1967      if (I->mayStore())
1968        return false;
1969    }
1970    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
1971      MachineOperand &MO = I->getOperand(j);
1972      if (!MO.isReg())
1973        continue;
1974      unsigned Reg = MO.getReg();
1975      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
1976        return false;
1977      if (Reg != Base && !MemRegs.count(Reg))
1978        AddedRegPressure.insert(Reg);
1979    }
1980  }
1981
1982  // Estimate register pressure increase due to the transformation.
1983  if (MemRegs.size() <= 4)
1984    // Ok if we are moving small number of instructions.
1985    return true;
1986  return AddedRegPressure.size() <= MemRegs.size() * 2;
1987}
1988
1989bool
1990ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
1991                                          DebugLoc &dl, unsigned &NewOpc,
1992                                          unsigned &FirstReg,
1993                                          unsigned &SecondReg,
1994                                          unsigned &BaseReg, int &Offset,
1995                                          unsigned &PredReg,
1996                                          ARMCC::CondCodes &Pred,
1997                                          bool &isT2) {
1998  // Make sure we're allowed to generate LDRD/STRD.
1999  if (!STI->hasV5TEOps())
2000    return false;
2001
2002  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
2003  unsigned Scale = 1;
2004  unsigned Opcode = Op0->getOpcode();
2005  if (Opcode == ARM::LDRi12) {
2006    NewOpc = ARM::LDRD;
2007  } else if (Opcode == ARM::STRi12) {
2008    NewOpc = ARM::STRD;
2009  } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
2010    NewOpc = ARM::t2LDRDi8;
2011    Scale = 4;
2012    isT2 = true;
2013  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
2014    NewOpc = ARM::t2STRDi8;
2015    Scale = 4;
2016    isT2 = true;
2017  } else {
2018    return false;
2019  }
2020
2021  // Make sure the base address satisfies i64 ld / st alignment requirement.
2022  // At the moment, we ignore the memoryoperand's value.
2023  // If we want to use AliasAnalysis, we should check it accordingly.
2024  if (!Op0->hasOneMemOperand() ||
2025      (*Op0->memoperands_begin())->isVolatile())
2026    return false;
2027
2028  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
2029  const Function *Func = MF->getFunction();
2030  unsigned ReqAlign = STI->hasV6Ops()
2031    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
2032    : 8;  // Pre-v6 need 8-byte align
2033  if (Align < ReqAlign)
2034    return false;
2035
2036  // Then make sure the immediate offset fits.
2037  int OffImm = getMemoryOpOffset(Op0);
2038  if (isT2) {
2039    int Limit = (1 << 8) * Scale;
2040    if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
2041      return false;
2042    Offset = OffImm;
2043  } else {
2044    ARM_AM::AddrOpc AddSub = ARM_AM::add;
2045    if (OffImm < 0) {
2046      AddSub = ARM_AM::sub;
2047      OffImm = - OffImm;
2048    }
2049    int Limit = (1 << 8) * Scale;
2050    if (OffImm >= Limit || (OffImm & (Scale-1)))
2051      return false;
2052    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
2053  }
2054  FirstReg = Op0->getOperand(0).getReg();
2055  SecondReg = Op1->getOperand(0).getReg();
2056  if (FirstReg == SecondReg)
2057    return false;
2058  BaseReg = Op0->getOperand(1).getReg();
2059  Pred = getInstrPredicate(Op0, PredReg);
2060  dl = Op0->getDebugLoc();
2061  return true;
2062}
2063
2064bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
2065                                 SmallVectorImpl<MachineInstr *> &Ops,
2066                                 unsigned Base, bool isLd,
2067                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
2068  bool RetVal = false;
2069
2070  // Sort by offset (in reverse order).
2071  std::sort(Ops.begin(), Ops.end(),
2072            [](const MachineInstr *LHS, const MachineInstr *RHS) {
2073    int LOffset = getMemoryOpOffset(LHS);
2074    int ROffset = getMemoryOpOffset(RHS);
2075    assert(LHS == RHS || LOffset != ROffset);
2076    return LOffset > ROffset;
2077  });
2078
2079  // The loads / stores of the same base are in order. Scan them from first to
2080  // last and check for the following:
2081  // 1. Any def of base.
2082  // 2. Any gaps.
2083  while (Ops.size() > 1) {
2084    unsigned FirstLoc = ~0U;
2085    unsigned LastLoc = 0;
2086    MachineInstr *FirstOp = nullptr;
2087    MachineInstr *LastOp = nullptr;
2088    int LastOffset = 0;
2089    unsigned LastOpcode = 0;
2090    unsigned LastBytes = 0;
2091    unsigned NumMove = 0;
2092    for (int i = Ops.size() - 1; i >= 0; --i) {
2093      MachineInstr *Op = Ops[i];
2094      unsigned Loc = MI2LocMap[Op];
2095      if (Loc <= FirstLoc) {
2096        FirstLoc = Loc;
2097        FirstOp = Op;
2098      }
2099      if (Loc >= LastLoc) {
2100        LastLoc = Loc;
2101        LastOp = Op;
2102      }
2103
2104      unsigned LSMOpcode
2105        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
2106      if (LastOpcode && LSMOpcode != LastOpcode)
2107        break;
2108
2109      int Offset = getMemoryOpOffset(Op);
2110      unsigned Bytes = getLSMultipleTransferSize(Op);
2111      if (LastBytes) {
2112        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
2113          break;
2114      }
2115      LastOffset = Offset;
2116      LastBytes = Bytes;
2117      LastOpcode = LSMOpcode;
2118      if (++NumMove == 8) // FIXME: Tune this limit.
2119        break;
2120    }
2121
2122    if (NumMove <= 1)
2123      Ops.pop_back();
2124    else {
2125      SmallPtrSet<MachineInstr*, 4> MemOps;
2126      SmallSet<unsigned, 4> MemRegs;
2127      for (int i = NumMove-1; i >= 0; --i) {
2128        MemOps.insert(Ops[i]);
2129        MemRegs.insert(Ops[i]->getOperand(0).getReg());
2130      }
2131
2132      // Be conservative, if the instructions are too far apart, don't
2133      // move them. We want to limit the increase of register pressure.
2134      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
2135      if (DoMove)
2136        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
2137                                           MemOps, MemRegs, TRI);
2138      if (!DoMove) {
2139        for (unsigned i = 0; i != NumMove; ++i)
2140          Ops.pop_back();
2141      } else {
2142        // This is the new location for the loads / stores.
2143        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
2144        while (InsertPos != MBB->end()
2145               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
2146          ++InsertPos;
2147
2148        // If we are moving a pair of loads / stores, see if it makes sense
2149        // to try to allocate a pair of registers that can form register pairs.
2150        MachineInstr *Op0 = Ops.back();
2151        MachineInstr *Op1 = Ops[Ops.size()-2];
2152        unsigned FirstReg = 0, SecondReg = 0;
2153        unsigned BaseReg = 0, PredReg = 0;
2154        ARMCC::CondCodes Pred = ARMCC::AL;
2155        bool isT2 = false;
2156        unsigned NewOpc = 0;
2157        int Offset = 0;
2158        DebugLoc dl;
2159        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
2160                                             FirstReg, SecondReg, BaseReg,
2161                                             Offset, PredReg, Pred, isT2)) {
2162          Ops.pop_back();
2163          Ops.pop_back();
2164
2165          const MCInstrDesc &MCID = TII->get(NewOpc);
2166          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
2167          MRI->constrainRegClass(FirstReg, TRC);
2168          MRI->constrainRegClass(SecondReg, TRC);
2169
2170          // Form the pair instruction.
2171          if (isLd) {
2172            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2173              .addReg(FirstReg, RegState::Define)
2174              .addReg(SecondReg, RegState::Define)
2175              .addReg(BaseReg);
2176            // FIXME: We're converting from LDRi12 to an insn that still
2177            // uses addrmode2, so we need an explicit offset reg. It should
2178            // always by reg0 since we're transforming LDRi12s.
2179            if (!isT2)
2180              MIB.addReg(0);
2181            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2182            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2183            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2184            ++NumLDRDFormed;
2185          } else {
2186            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
2187              .addReg(FirstReg)
2188              .addReg(SecondReg)
2189              .addReg(BaseReg);
2190            // FIXME: We're converting from LDRi12 to an insn that still
2191            // uses addrmode2, so we need an explicit offset reg. It should
2192            // always by reg0 since we're transforming STRi12s.
2193            if (!isT2)
2194              MIB.addReg(0);
2195            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
2196            MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
2197            DEBUG(dbgs() << "Formed " << *MIB << "\n");
2198            ++NumSTRDFormed;
2199          }
2200          MBB->erase(Op0);
2201          MBB->erase(Op1);
2202
2203          if (!isT2) {
2204            // Add register allocation hints to form register pairs.
2205            MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
2206            MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
2207          }
2208        } else {
2209          for (unsigned i = 0; i != NumMove; ++i) {
2210            MachineInstr *Op = Ops.back();
2211            Ops.pop_back();
2212            MBB->splice(InsertPos, MBB, Op);
2213          }
2214        }
2215
2216        NumLdStMoved += NumMove;
2217        RetVal = true;
2218      }
2219    }
2220  }
2221
2222  return RetVal;
2223}
2224
2225bool
2226ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2227  bool RetVal = false;
2228
2229  DenseMap<MachineInstr*, unsigned> MI2LocMap;
2230  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
2231  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
2232  SmallVector<unsigned, 4> LdBases;
2233  SmallVector<unsigned, 4> StBases;
2234
2235  unsigned Loc = 0;
2236  MachineBasicBlock::iterator MBBI = MBB->begin();
2237  MachineBasicBlock::iterator E = MBB->end();
2238  while (MBBI != E) {
2239    for (; MBBI != E; ++MBBI) {
2240      MachineInstr *MI = MBBI;
2241      if (MI->isCall() || MI->isTerminator()) {
2242        // Stop at barriers.
2243        ++MBBI;
2244        break;
2245      }
2246
2247      if (!MI->isDebugValue())
2248        MI2LocMap[MI] = ++Loc;
2249
2250      if (!isMemoryOp(*MI))
2251        continue;
2252      unsigned PredReg = 0;
2253      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
2254        continue;
2255
2256      int Opc = MI->getOpcode();
2257      bool isLd = isLoadSingle(Opc);
2258      unsigned Base = MI->getOperand(1).getReg();
2259      int Offset = getMemoryOpOffset(MI);
2260
2261      bool StopHere = false;
2262      if (isLd) {
2263        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2264          Base2LdsMap.find(Base);
2265        if (BI != Base2LdsMap.end()) {
2266          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2267            if (Offset == getMemoryOpOffset(BI->second[i])) {
2268              StopHere = true;
2269              break;
2270            }
2271          }
2272          if (!StopHere)
2273            BI->second.push_back(MI);
2274        } else {
2275          Base2LdsMap[Base].push_back(MI);
2276          LdBases.push_back(Base);
2277        }
2278      } else {
2279        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
2280          Base2StsMap.find(Base);
2281        if (BI != Base2StsMap.end()) {
2282          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
2283            if (Offset == getMemoryOpOffset(BI->second[i])) {
2284              StopHere = true;
2285              break;
2286            }
2287          }
2288          if (!StopHere)
2289            BI->second.push_back(MI);
2290        } else {
2291          Base2StsMap[Base].push_back(MI);
2292          StBases.push_back(Base);
2293        }
2294      }
2295
2296      if (StopHere) {
2297        // Found a duplicate (a base+offset combination that's seen earlier).
2298        // Backtrack.
2299        --Loc;
2300        break;
2301      }
2302    }
2303
2304    // Re-schedule loads.
2305    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
2306      unsigned Base = LdBases[i];
2307      SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
2308      if (Lds.size() > 1)
2309        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
2310    }
2311
2312    // Re-schedule stores.
2313    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
2314      unsigned Base = StBases[i];
2315      SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
2316      if (Sts.size() > 1)
2317        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
2318    }
2319
2320    if (MBBI != E) {
2321      Base2LdsMap.clear();
2322      Base2StsMap.clear();
2323      LdBases.clear();
2324      StBases.clear();
2325    }
2326  }
2327
2328  return RetVal;
2329}
2330
2331
2332/// Returns an instance of the load / store optimization pass.
2333FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
2334  if (PreAlloc)
2335    return new ARMPreAllocLoadStoreOpt();
2336  return new ARMLoadStoreOpt();
2337}
2338
2339