1327952Sdim//===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
2218885Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6218885Sdim//
7218885Sdim//===----------------------------------------------------------------------===//
8218885Sdim//
9218885Sdim// This file contains the Thumb1 implementation of TargetFrameLowering class.
10218885Sdim//
11218885Sdim//===----------------------------------------------------------------------===//
12218885Sdim
13218885Sdim#include "Thumb1FrameLowering.h"
14321369Sdim#include "ARMBaseInstrInfo.h"
15321369Sdim#include "ARMBaseRegisterInfo.h"
16218885Sdim#include "ARMMachineFunctionInfo.h"
17321369Sdim#include "ARMSubtarget.h"
18321369Sdim#include "Thumb1InstrInfo.h"
19321369Sdim#include "ThumbRegisterInfo.h"
20327952Sdim#include "Utils/ARMBaseInfo.h"
21321369Sdim#include "llvm/ADT/BitVector.h"
22321369Sdim#include "llvm/ADT/STLExtras.h"
23321369Sdim#include "llvm/ADT/SmallVector.h"
24296417Sdim#include "llvm/CodeGen/LivePhysRegs.h"
25321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
26218885Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
27218885Sdim#include "llvm/CodeGen/MachineFunction.h"
28321369Sdim#include "llvm/CodeGen/MachineInstr.h"
29218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
30276479Sdim#include "llvm/CodeGen/MachineModuleInfo.h"
31321369Sdim#include "llvm/CodeGen/MachineOperand.h"
32218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
33327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h"
34327952Sdim#include "llvm/CodeGen/TargetOpcodes.h"
35327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h"
36321369Sdim#include "llvm/IR/DebugLoc.h"
37327952Sdim#include "llvm/MC/MCContext.h"
38321369Sdim#include "llvm/MC/MCDwarf.h"
39327952Sdim#include "llvm/MC/MCRegisterInfo.h"
40321369Sdim#include "llvm/Support/Compiler.h"
41321369Sdim#include "llvm/Support/ErrorHandling.h"
42327952Sdim#include "llvm/Support/MathExtras.h"
43327952Sdim#include <bitset>
44321369Sdim#include <cassert>
45321369Sdim#include <iterator>
46321369Sdim#include <vector>
47218885Sdim
48218885Sdimusing namespace llvm;
49218885Sdim
50276479SdimThumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
51276479Sdim    : ARMFrameLowering(sti) {}
52276479Sdim
53226633Sdimbool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
54314564Sdim  const MachineFrameInfo &MFI = MF.getFrameInfo();
55314564Sdim  unsigned CFSize = MFI.getMaxCallFrameSize();
56218885Sdim  // It's not always a good idea to include the call frame as part of the
57218885Sdim  // stack frame. ARM (especially Thumb) has small immediate offset to
58218885Sdim  // address the stack frame. So a large call frame can cause poor codegen
59218885Sdim  // and may even makes it impossible to scavenge a register.
60218885Sdim  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
61218885Sdim    return false;
62218885Sdim
63314564Sdim  return !MFI.hasVarSizedObjects();
64218885Sdim}
65218885Sdim
66353358Sdimstatic void
67353358SdimemitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
68353358Sdim                             MachineBasicBlock::iterator &MBBI,
69353358Sdim                             const TargetInstrInfo &TII, const DebugLoc &dl,
70353358Sdim                             const ThumbRegisterInfo &MRI, int NumBytes,
71353358Sdim                             unsigned ScratchReg, unsigned MIFlags) {
72353358Sdim  // If it would take more than three instructions to adjust the stack pointer
73353358Sdim  // using tADDspi/tSUBspi, load an immediate instead.
74353358Sdim  if (std::abs(NumBytes) > 508 * 3) {
75353358Sdim    // We use a different codepath here from the normal
76353358Sdim    // emitThumbRegPlusImmediate so we don't have to deal with register
77353358Sdim    // scavenging. (Scavenging could try to use the emergency spill slot
78353358Sdim    // before we've actually finished setting up the stack.)
79353358Sdim    if (ScratchReg == ARM::NoRegister)
80353358Sdim      report_fatal_error("Failed to emit Thumb1 stack adjustment");
81353358Sdim    MachineFunction &MF = *MBB.getParent();
82353358Sdim    const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
83353358Sdim    if (ST.genExecuteOnly()) {
84353358Sdim      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
85353358Sdim        .addImm(NumBytes).setMIFlags(MIFlags);
86353358Sdim    } else {
87353358Sdim      MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
88353358Sdim                            0, MIFlags);
89353358Sdim    }
90353358Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP)
91353358Sdim      .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill)
92353358Sdim      .add(predOps(ARMCC::AL));
93353358Sdim    return;
94353358Sdim  }
95353358Sdim  // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
96353358Sdim  // won't change.
97221345Sdim  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
98221345Sdim                            MRI, MIFlags);
99353358Sdim
100218885Sdim}
101218885Sdim
102353358Sdimstatic void emitCallSPUpdate(MachineBasicBlock &MBB,
103353358Sdim                             MachineBasicBlock::iterator &MBBI,
104353358Sdim                             const TargetInstrInfo &TII, const DebugLoc &dl,
105353358Sdim                             const ThumbRegisterInfo &MRI, int NumBytes,
106353358Sdim                             unsigned MIFlags = MachineInstr::NoFlags) {
107353358Sdim  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
108353358Sdim                            MRI, MIFlags);
109353358Sdim}
110353358Sdim
111353358Sdim
112309124SdimMachineBasicBlock::iterator Thumb1FrameLowering::
113249423SdimeliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
114249423Sdim                              MachineBasicBlock::iterator I) const {
115249423Sdim  const Thumb1InstrInfo &TII =
116288943Sdim      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
117288943Sdim  const ThumbRegisterInfo *RegInfo =
118288943Sdim      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
119249423Sdim  if (!hasReservedCallFrame(MF)) {
120249423Sdim    // If we have alloca, convert as follows:
121249423Sdim    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
122249423Sdim    // ADJCALLSTACKUP   -> add, sp, sp, amount
123309124Sdim    MachineInstr &Old = *I;
124309124Sdim    DebugLoc dl = Old.getDebugLoc();
125321369Sdim    unsigned Amount = TII.getFrameSize(Old);
126249423Sdim    if (Amount != 0) {
127249423Sdim      // We need to keep the stack aligned properly.  To do this, we round the
128249423Sdim      // amount of space needed for the outgoing arguments up to the next
129249423Sdim      // alignment boundary.
130321369Sdim      Amount = alignTo(Amount, getStackAlignment());
131249423Sdim
132249423Sdim      // Replace the pseudo instruction with a new instruction...
133309124Sdim      unsigned Opc = Old.getOpcode();
134249423Sdim      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
135353358Sdim        emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount);
136249423Sdim      } else {
137249423Sdim        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
138353358Sdim        emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount);
139249423Sdim      }
140249423Sdim    }
141249423Sdim  }
142309124Sdim  return MBB.erase(I);
143249423Sdim}
144249423Sdim
145288943Sdimvoid Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
146288943Sdim                                       MachineBasicBlock &MBB) const {
147218885Sdim  MachineBasicBlock::iterator MBBI = MBB.begin();
148314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
149218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
150276479Sdim  MachineModuleInfo &MMI = MF.getMMI();
151276479Sdim  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
152288943Sdim  const ThumbRegisterInfo *RegInfo =
153288943Sdim      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
154218885Sdim  const Thumb1InstrInfo &TII =
155288943Sdim      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
156218885Sdim
157288943Sdim  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
158314564Sdim  unsigned NumBytes = MFI.getStackSize();
159276479Sdim  assert(NumBytes >= ArgRegsSaveSize &&
160276479Sdim         "ArgRegsSaveSize is included in NumBytes");
161314564Sdim  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
162296417Sdim
163296417Sdim  // Debug location must be unknown since the first debug location is used
164296417Sdim  // to determine the end of the prologue.
165296417Sdim  DebugLoc dl;
166341825Sdim
167360784Sdim  Register FramePtr = RegInfo->getFrameRegister(MF);
168218885Sdim  unsigned BasePtr = RegInfo->getBaseRegister();
169276479Sdim  int CFAOffset = 0;
170218885Sdim
171218885Sdim  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
172218885Sdim  NumBytes = (NumBytes + 3) & ~3;
173314564Sdim  MFI.setStackSize(NumBytes);
174218885Sdim
175218885Sdim  // Determine the sizes of each callee-save spill areas and record which frame
176218885Sdim  // belongs to which callee-save spill areas.
177218885Sdim  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
178218885Sdim  int FramePtrSpillFI = 0;
179218885Sdim
180276479Sdim  if (ArgRegsSaveSize) {
181353358Sdim    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
182353358Sdim                                 ARM::NoRegister, MachineInstr::FrameSetup);
183276479Sdim    CFAOffset -= ArgRegsSaveSize;
184314564Sdim    unsigned CFIIndex = MF.addFrameInst(
185276479Sdim        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
186276479Sdim    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
187280031Sdim        .addCFIIndex(CFIIndex)
188280031Sdim        .setMIFlags(MachineInstr::FrameSetup);
189276479Sdim  }
190218885Sdim
191218885Sdim  if (!AFI->hasStackFrame()) {
192276479Sdim    if (NumBytes - ArgRegsSaveSize != 0) {
193353358Sdim      emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
194353358Sdim                                   -(NumBytes - ArgRegsSaveSize),
195353358Sdim                                   ARM::NoRegister, MachineInstr::FrameSetup);
196276479Sdim      CFAOffset -= NumBytes - ArgRegsSaveSize;
197314564Sdim      unsigned CFIIndex = MF.addFrameInst(
198276479Sdim          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
199276479Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
200280031Sdim          .addCFIIndex(CFIIndex)
201280031Sdim          .setMIFlags(MachineInstr::FrameSetup);
202276479Sdim    }
203218885Sdim    return;
204218885Sdim  }
205218885Sdim
206218885Sdim  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
207218885Sdim    unsigned Reg = CSI[i].getReg();
208218885Sdim    int FI = CSI[i].getFrameIdx();
209218885Sdim    switch (Reg) {
210276479Sdim    case ARM::R8:
211276479Sdim    case ARM::R9:
212276479Sdim    case ARM::R10:
213276479Sdim    case ARM::R11:
214314564Sdim      if (STI.splitFramePushPop(MF)) {
215276479Sdim        GPRCS2Size += 4;
216276479Sdim        break;
217276479Sdim      }
218314564Sdim      LLVM_FALLTHROUGH;
219218885Sdim    case ARM::R4:
220218885Sdim    case ARM::R5:
221218885Sdim    case ARM::R6:
222218885Sdim    case ARM::R7:
223218885Sdim    case ARM::LR:
224218885Sdim      if (Reg == FramePtr)
225218885Sdim        FramePtrSpillFI = FI;
226218885Sdim      GPRCS1Size += 4;
227218885Sdim      break;
228218885Sdim    default:
229218885Sdim      DPRCSSize += 8;
230218885Sdim    }
231218885Sdim  }
232218885Sdim
233218885Sdim  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
234218885Sdim    ++MBBI;
235218885Sdim  }
236218885Sdim
237218885Sdim  // Determine starting offsets of spill areas.
238276479Sdim  unsigned DPRCSOffset  = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize);
239218885Sdim  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
240218885Sdim  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
241249423Sdim  bool HasFP = hasFP(MF);
242249423Sdim  if (HasFP)
243314564Sdim    AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
244249423Sdim                                NumBytes);
245218885Sdim  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
246218885Sdim  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
247218885Sdim  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
248218885Sdim  NumBytes = DPRCSOffset;
249218885Sdim
250261991Sdim  int FramePtrOffsetInBlock = 0;
251276479Sdim  unsigned adjustedGPRCS1Size = GPRCS1Size;
252314564Sdim  if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
253314564Sdim      tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
254261991Sdim    FramePtrOffsetInBlock = NumBytes;
255276479Sdim    adjustedGPRCS1Size += NumBytes;
256261991Sdim    NumBytes = 0;
257261991Sdim  }
258261991Sdim
259276479Sdim  if (adjustedGPRCS1Size) {
260276479Sdim    CFAOffset -= adjustedGPRCS1Size;
261314564Sdim    unsigned CFIIndex = MF.addFrameInst(
262276479Sdim        MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
263276479Sdim    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
264280031Sdim        .addCFIIndex(CFIIndex)
265280031Sdim        .setMIFlags(MachineInstr::FrameSetup);
266276479Sdim  }
267276479Sdim  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
268276479Sdim         E = CSI.end(); I != E; ++I) {
269276479Sdim    unsigned Reg = I->getReg();
270276479Sdim    int FI = I->getFrameIdx();
271276479Sdim    switch (Reg) {
272276479Sdim    case ARM::R8:
273276479Sdim    case ARM::R9:
274276479Sdim    case ARM::R10:
275276479Sdim    case ARM::R11:
276276479Sdim    case ARM::R12:
277314564Sdim      if (STI.splitFramePushPop(MF))
278276479Sdim        break;
279321369Sdim      LLVM_FALLTHROUGH;
280276479Sdim    case ARM::R0:
281276479Sdim    case ARM::R1:
282276479Sdim    case ARM::R2:
283276479Sdim    case ARM::R3:
284276479Sdim    case ARM::R4:
285276479Sdim    case ARM::R5:
286276479Sdim    case ARM::R6:
287276479Sdim    case ARM::R7:
288276479Sdim    case ARM::LR:
289314564Sdim      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
290314564Sdim          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
291276479Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
292280031Sdim          .addCFIIndex(CFIIndex)
293280031Sdim          .setMIFlags(MachineInstr::FrameSetup);
294276479Sdim      break;
295276479Sdim    }
296276479Sdim  }
297276479Sdim
298218885Sdim  // Adjust FP so it point to the stack slot that contains the previous FP.
299249423Sdim  if (HasFP) {
300296417Sdim    FramePtrOffsetInBlock +=
301314564Sdim        MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
302321369Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
303321369Sdim        .addReg(ARM::SP)
304321369Sdim        .addImm(FramePtrOffsetInBlock / 4)
305321369Sdim        .setMIFlags(MachineInstr::FrameSetup)
306321369Sdim        .add(predOps(ARMCC::AL));
307276479Sdim    if(FramePtrOffsetInBlock) {
308276479Sdim      CFAOffset += FramePtrOffsetInBlock;
309314564Sdim      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
310276479Sdim          nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
311276479Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
312280031Sdim          .addCFIIndex(CFIIndex)
313280031Sdim          .setMIFlags(MachineInstr::FrameSetup);
314276479Sdim    } else {
315276479Sdim      unsigned CFIIndex =
316314564Sdim          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
317276479Sdim              nullptr, MRI->getDwarfRegNum(FramePtr, true)));
318276479Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
319280031Sdim          .addCFIIndex(CFIIndex)
320280031Sdim          .setMIFlags(MachineInstr::FrameSetup);
321276479Sdim    }
322224145Sdim    if (NumBytes > 508)
323224145Sdim      // If offset is > 508 then sp cannot be adjusted in a single instruction,
324218885Sdim      // try restoring from fp instead.
325218885Sdim      AFI->setShouldRestoreSPFromFP(true);
326218885Sdim  }
327218885Sdim
328314564Sdim  // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
329314564Sdim  // and tMOVr instructions. We don't need to add any call frame information
330314564Sdim  // in-between these instructions, because they do not modify the high
331314564Sdim  // registers.
332314564Sdim  while (true) {
333314564Sdim    MachineBasicBlock::iterator OldMBBI = MBBI;
334314564Sdim    // Skip a run of tMOVr instructions
335314564Sdim    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
336314564Sdim      MBBI++;
337314564Sdim    if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
338314564Sdim      MBBI++;
339314564Sdim    } else {
340314564Sdim      // We have reached an instruction which is not a push, so the previous
341314564Sdim      // run of tMOVr instructions (which may have been empty) was not part of
342314564Sdim      // the prologue. Reset MBBI back to the last PUSH of the prologue.
343314564Sdim      MBBI = OldMBBI;
344314564Sdim      break;
345314564Sdim    }
346314564Sdim  }
347314564Sdim
348314564Sdim  // Emit call frame information for the callee-saved high registers.
349314564Sdim  for (auto &I : CSI) {
350314564Sdim    unsigned Reg = I.getReg();
351314564Sdim    int FI = I.getFrameIdx();
352314564Sdim    switch (Reg) {
353314564Sdim    case ARM::R8:
354314564Sdim    case ARM::R9:
355314564Sdim    case ARM::R10:
356314564Sdim    case ARM::R11:
357314564Sdim    case ARM::R12: {
358314564Sdim      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
359314564Sdim          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
360314564Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
361314564Sdim          .addCFIIndex(CFIIndex)
362314564Sdim          .setMIFlags(MachineInstr::FrameSetup);
363314564Sdim      break;
364314564Sdim    }
365314564Sdim    default:
366314564Sdim      break;
367314564Sdim    }
368314564Sdim  }
369314564Sdim
370276479Sdim  if (NumBytes) {
371218885Sdim    // Insert it after all the callee-save spills.
372353358Sdim    //
373353358Sdim    // For a large stack frame, we might need a scratch register to store
374353358Sdim    // the size of the frame.  We know all callee-save registers are free
375353358Sdim    // at this point in the prologue, so pick one.
376353358Sdim    unsigned ScratchRegister = ARM::NoRegister;
377353358Sdim    for (auto &I : CSI) {
378353358Sdim      unsigned Reg = I.getReg();
379353358Sdim      if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
380353358Sdim        ScratchRegister = Reg;
381353358Sdim        break;
382353358Sdim      }
383353358Sdim    }
384353358Sdim    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
385353358Sdim                                 ScratchRegister, MachineInstr::FrameSetup);
386276479Sdim    if (!HasFP) {
387276479Sdim      CFAOffset -= NumBytes;
388314564Sdim      unsigned CFIIndex = MF.addFrameInst(
389276479Sdim          MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
390276479Sdim      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
391280031Sdim          .addCFIIndex(CFIIndex)
392280031Sdim          .setMIFlags(MachineInstr::FrameSetup);
393276479Sdim    }
394276479Sdim  }
395218885Sdim
396249423Sdim  if (STI.isTargetELF() && HasFP)
397314564Sdim    MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
398314564Sdim                            AFI->getFramePtrSpillOffset());
399218885Sdim
400218885Sdim  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
401218885Sdim  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
402218885Sdim  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
403218885Sdim
404327952Sdim  if (RegInfo->needsStackRealignment(MF)) {
405327952Sdim    const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment());
406327952Sdim    // Emit the following sequence, using R4 as a temporary, since we cannot use
407327952Sdim    // SP as a source or destination register for the shifts:
408327952Sdim    // mov  r4, sp
409327952Sdim    // lsrs r4, r4, #NrBitsToZero
410327952Sdim    // lsls r4, r4, #NrBitsToZero
411327952Sdim    // mov  sp, r4
412327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
413327952Sdim      .addReg(ARM::SP, RegState::Kill)
414327952Sdim      .add(predOps(ARMCC::AL));
415226633Sdim
416327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4)
417327952Sdim      .addDef(ARM::CPSR)
418327952Sdim      .addReg(ARM::R4, RegState::Kill)
419327952Sdim      .addImm(NrBitsToZero)
420327952Sdim      .add(predOps(ARMCC::AL));
421327952Sdim
422327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4)
423327952Sdim      .addDef(ARM::CPSR)
424327952Sdim      .addReg(ARM::R4, RegState::Kill)
425327952Sdim      .addImm(NrBitsToZero)
426327952Sdim      .add(predOps(ARMCC::AL));
427327952Sdim
428327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
429327952Sdim      .addReg(ARM::R4, RegState::Kill)
430327952Sdim      .add(predOps(ARMCC::AL));
431327952Sdim
432327952Sdim    AFI->setShouldRestoreSPFromFP(true);
433327952Sdim  }
434327952Sdim
435218885Sdim  // If we need a base pointer, set it up here. It's whatever the value
436218885Sdim  // of the stack pointer is at this point. Any variable size objects
437218885Sdim  // will be allocated after this, so we can still use the base pointer
438218885Sdim  // to reference locals.
439218885Sdim  if (RegInfo->hasBasePointer(MF))
440321369Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
441321369Sdim        .addReg(ARM::SP)
442321369Sdim        .add(predOps(ARMCC::AL));
443221345Sdim
444218885Sdim  // If the frame has variable sized objects then the epilogue must restore
445218885Sdim  // the sp from fp. We can assume there's an FP here since hasFP already
446218885Sdim  // checks for hasVarSizedObjects.
447314564Sdim  if (MFI.hasVarSizedObjects())
448218885Sdim    AFI->setShouldRestoreSPFromFP(true);
449321369Sdim
450321369Sdim  // In some cases, virtual registers have been introduced, e.g. by uses of
451321369Sdim  // emitThumbRegPlusImmInReg.
452321369Sdim  MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
453218885Sdim}
454218885Sdim
455309124Sdimstatic bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
456309124Sdim  if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() &&
457309124Sdim      isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
458218885Sdim    return true;
459309124Sdim  else if (MI.getOpcode() == ARM::tPOP) {
460218885Sdim    return true;
461314564Sdim  } else if (MI.getOpcode() == ARM::tMOVr) {
462360784Sdim    Register Dst = MI.getOperand(0).getReg();
463360784Sdim    Register Src = MI.getOperand(1).getReg();
464314564Sdim    return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
465314564Sdim            ARM::hGPRRegClass.contains(Dst));
466218885Sdim  }
467218885Sdim  return false;
468218885Sdim}
469218885Sdim
470218885Sdimvoid Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
471218885Sdim                                   MachineBasicBlock &MBB) const {
472296417Sdim  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
473296417Sdim  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
474314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
475218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
476288943Sdim  const ThumbRegisterInfo *RegInfo =
477288943Sdim      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
478218885Sdim  const Thumb1InstrInfo &TII =
479288943Sdim      *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
480218885Sdim
481288943Sdim  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
482314564Sdim  int NumBytes = (int)MFI.getStackSize();
483276479Sdim  assert((unsigned)NumBytes >= ArgRegsSaveSize &&
484276479Sdim         "ArgRegsSaveSize is included in NumBytes");
485288943Sdim  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
486360784Sdim  Register FramePtr = RegInfo->getFrameRegister(MF);
487218885Sdim
488218885Sdim  if (!AFI->hasStackFrame()) {
489276479Sdim    if (NumBytes - ArgRegsSaveSize != 0)
490353358Sdim      emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
491353358Sdim                                   NumBytes - ArgRegsSaveSize, ARM::NoRegister,
492353358Sdim                                   MachineInstr::NoFlags);
493218885Sdim  } else {
494218885Sdim    // Unwind MBBI to point to first LDR / VLDRD.
495218885Sdim    if (MBBI != MBB.begin()) {
496218885Sdim      do
497218885Sdim        --MBBI;
498309124Sdim      while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs));
499309124Sdim      if (!isCSRestore(*MBBI, CSRegs))
500218885Sdim        ++MBBI;
501218885Sdim    }
502218885Sdim
503218885Sdim    // Move SP to start of FP callee save spill area.
504218885Sdim    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
505218885Sdim                 AFI->getGPRCalleeSavedArea2Size() +
506276479Sdim                 AFI->getDPRCalleeSavedAreaSize() +
507276479Sdim                 ArgRegsSaveSize);
508218885Sdim
509218885Sdim    if (AFI->shouldRestoreSPFromFP()) {
510218885Sdim      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
511218885Sdim      // Reset SP based on frame pointer only if the stack frame extends beyond
512218885Sdim      // frame pointer stack slot, the target is ELF and the function has FP, or
513218885Sdim      // the target uses var sized objects.
514218885Sdim      if (NumBytes) {
515314564Sdim        assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
516218885Sdim               "No scratch register to restore SP from FP!");
517221345Sdim        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
518221345Sdim                                  TII, *RegInfo);
519321369Sdim        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
520321369Sdim            .addReg(ARM::R4)
521321369Sdim            .add(predOps(ARMCC::AL));
522218885Sdim      } else
523321369Sdim        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
524321369Sdim            .addReg(FramePtr)
525321369Sdim            .add(predOps(ARMCC::AL));
526218885Sdim    } else {
527353358Sdim      // For a large stack frame, we might need a scratch register to store
528353358Sdim      // the size of the frame.  We know all callee-save registers are free
529353358Sdim      // at this point in the epilogue, so pick one.
530353358Sdim      unsigned ScratchRegister = ARM::NoRegister;
531353358Sdim      bool HasFP = hasFP(MF);
532353358Sdim      for (auto &I : MFI.getCalleeSavedInfo()) {
533353358Sdim        unsigned Reg = I.getReg();
534353358Sdim        if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) {
535353358Sdim          ScratchRegister = Reg;
536353358Sdim          break;
537353358Sdim        }
538353358Sdim      }
539296417Sdim      if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
540309124Sdim          &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
541276479Sdim        MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
542309124Sdim        if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes))
543353358Sdim          emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes,
544353358Sdim                                       ScratchRegister, MachineInstr::NoFlags);
545309124Sdim      } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
546353358Sdim        emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes,
547353358Sdim                                     ScratchRegister, MachineInstr::NoFlags);
548218885Sdim    }
549218885Sdim  }
550218885Sdim
551296417Sdim  if (needPopSpecialFixUp(MF)) {
552296417Sdim    bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
553296417Sdim    (void)Done;
554296417Sdim    assert(Done && "Emission of the special fixup failed!?");
555296417Sdim  }
556296417Sdim}
557296417Sdim
558296417Sdimbool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
559296417Sdim  if (!needPopSpecialFixUp(*MBB.getParent()))
560296417Sdim    return true;
561296417Sdim
562296417Sdim  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
563296417Sdim  return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
564296417Sdim}
565296417Sdim
566296417Sdimbool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
567296417Sdim  ARMFunctionInfo *AFI =
568296417Sdim      const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
569296417Sdim  if (AFI->getArgRegsSaveSize())
570296417Sdim    return true;
571296417Sdim
572296417Sdim  // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
573314564Sdim  for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
574280031Sdim    if (CSI.getReg() == ARM::LR)
575296417Sdim      return true;
576218885Sdim
577296417Sdim  return false;
578296417Sdim}
579218885Sdim
580327952Sdimstatic void findTemporariesForLR(const BitVector &GPRsNoLRSP,
581327952Sdim                                 const BitVector &PopFriendly,
582327952Sdim                                 const LivePhysRegs &UsedRegs, unsigned &PopReg,
583327952Sdim                                 unsigned &TmpReg) {
584327952Sdim  PopReg = TmpReg = 0;
585327952Sdim  for (auto Reg : GPRsNoLRSP.set_bits()) {
586327952Sdim    if (!UsedRegs.contains(Reg)) {
587327952Sdim      // Remember the first pop-friendly register and exit.
588327952Sdim      if (PopFriendly.test(Reg)) {
589327952Sdim        PopReg = Reg;
590327952Sdim        TmpReg = 0;
591327952Sdim        break;
592327952Sdim      }
593327952Sdim      // Otherwise, remember that the register will be available to
594327952Sdim      // save a pop-friendly register.
595327952Sdim      TmpReg = Reg;
596327952Sdim    }
597327952Sdim  }
598327952Sdim}
599327952Sdim
600296417Sdimbool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
601296417Sdim                                              bool DoIt) const {
602296417Sdim  MachineFunction &MF = *MBB.getParent();
603296417Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
604296417Sdim  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
605296417Sdim  const TargetInstrInfo &TII = *STI.getInstrInfo();
606296417Sdim  const ThumbRegisterInfo *RegInfo =
607296417Sdim      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
608218885Sdim
609296417Sdim  // If MBBI is a return instruction, or is a tPOP followed by a return
610296417Sdim  // instruction in the successor BB, we may be able to directly restore
611296417Sdim  // LR in the PC.
612296417Sdim  // This is only possible with v5T ops (v4T can't change the Thumb bit via
613296417Sdim  // a POP PC instruction), and only if we do not need to emit any SP update.
614296417Sdim  // Otherwise, we need a temporary register to pop the value
615296417Sdim  // and copy that value into LR.
616296417Sdim  auto MBBI = MBB.getFirstTerminator();
617296417Sdim  bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
618296417Sdim  if (CanRestoreDirectly) {
619296417Sdim    if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
620296417Sdim      CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
621296417Sdim                            MBBI->getOpcode() == ARM::tPOP_RET);
622296417Sdim    else {
623296417Sdim      auto MBBI_prev = MBBI;
624296417Sdim      MBBI_prev--;
625296417Sdim      assert(MBBI_prev->getOpcode() == ARM::tPOP);
626296417Sdim      assert(MBB.succ_size() == 1);
627296417Sdim      if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
628296417Sdim        MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
629296417Sdim      else
630296417Sdim        CanRestoreDirectly = false;
631296417Sdim    }
632296417Sdim  }
633280031Sdim
634296417Sdim  if (CanRestoreDirectly) {
635296417Sdim    if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
636296417Sdim      return true;
637296417Sdim    MachineInstrBuilder MIB =
638321369Sdim        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
639321369Sdim            .add(predOps(ARMCC::AL));
640296417Sdim    // Copy implicit ops and popped registers, if any.
641296417Sdim    for (auto MO: MBBI->operands())
642296417Sdim      if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
643321369Sdim        MIB.add(MO);
644296417Sdim    MIB.addReg(ARM::PC, RegState::Define);
645296417Sdim    // Erase the old instruction (tBX_RET or tPOP).
646296417Sdim    MBB.erase(MBBI);
647296417Sdim    return true;
648296417Sdim  }
649280031Sdim
650296417Sdim  // Look for a temporary register to use.
651296417Sdim  // First, compute the liveness information.
652321369Sdim  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
653321369Sdim  LivePhysRegs UsedRegs(TRI);
654309124Sdim  UsedRegs.addLiveOuts(MBB);
655296417Sdim  // The semantic of pristines changed recently and now,
656296417Sdim  // the callee-saved registers that are touched in the function
657296417Sdim  // are not part of the pristines set anymore.
658296417Sdim  // Add those callee-saved now.
659321369Sdim  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
660296417Sdim  for (unsigned i = 0; CSRegs[i]; ++i)
661296417Sdim    UsedRegs.addReg(CSRegs[i]);
662280031Sdim
663296417Sdim  DebugLoc dl = DebugLoc();
664296417Sdim  if (MBBI != MBB.end()) {
665296417Sdim    dl = MBBI->getDebugLoc();
666296417Sdim    auto InstUpToMBBI = MBB.end();
667296417Sdim    while (InstUpToMBBI != MBBI)
668296417Sdim      // The pre-decrement is on purpose here.
669296417Sdim      // We want to have the liveness right before MBBI.
670296417Sdim      UsedRegs.stepBackward(*--InstUpToMBBI);
671296417Sdim  }
672280031Sdim
673296417Sdim  // Look for a register that can be directly use in the POP.
674296417Sdim  unsigned PopReg = 0;
675296417Sdim  // And some temporary register, just in case.
676296417Sdim  unsigned TemporaryReg = 0;
677296417Sdim  BitVector PopFriendly =
678321369Sdim      TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID));
679341825Sdim  // R7 may be used as a frame pointer, hence marked as not generally
680341825Sdim  // allocatable, however there's no reason to not use it as a temporary for
681341825Sdim  // restoring LR.
682341825Sdim  if (STI.useR7AsFramePointer())
683341825Sdim    PopFriendly.set(ARM::R7);
684341825Sdim
685296417Sdim  assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
686296417Sdim  // Rebuild the GPRs from the high registers because they are removed
687296417Sdim  // form the GPR reg class for thumb1.
688296417Sdim  BitVector GPRsNoLRSP =
689321369Sdim      TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID));
690296417Sdim  GPRsNoLRSP |= PopFriendly;
691296417Sdim  GPRsNoLRSP.reset(ARM::LR);
692296417Sdim  GPRsNoLRSP.reset(ARM::SP);
693296417Sdim  GPRsNoLRSP.reset(ARM::PC);
694327952Sdim  findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
695327952Sdim
696341825Sdim  // If we couldn't find a pop-friendly register, try restoring LR before
697341825Sdim  // popping the other callee-saved registers, so we could use one of them as a
698341825Sdim  // temporary.
699327952Sdim  bool UseLDRSP = false;
700327952Sdim  if (!PopReg && MBBI != MBB.begin()) {
701327952Sdim    auto PrevMBBI = MBBI;
702327952Sdim    PrevMBBI--;
703327952Sdim    if (PrevMBBI->getOpcode() == ARM::tPOP) {
704341825Sdim      UsedRegs.stepBackward(*PrevMBBI);
705327952Sdim      findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
706341825Sdim      if (PopReg) {
707341825Sdim        MBBI = PrevMBBI;
708341825Sdim        UseLDRSP = true;
709341825Sdim      }
710280031Sdim    }
711218885Sdim  }
712296417Sdim
713296417Sdim  if (!DoIt && !PopReg && !TemporaryReg)
714296417Sdim    return false;
715296417Sdim
716296417Sdim  assert((PopReg || TemporaryReg) && "Cannot get LR");
717296417Sdim
718327952Sdim  if (UseLDRSP) {
719327952Sdim    assert(PopReg && "Do not know how to get LR");
720327952Sdim    // Load the LR via LDR tmp, [SP, #off]
721327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi))
722327952Sdim      .addReg(PopReg, RegState::Define)
723327952Sdim      .addReg(ARM::SP)
724327952Sdim      .addImm(MBBI->getNumExplicitOperands() - 2)
725327952Sdim      .add(predOps(ARMCC::AL));
726327952Sdim    // Move from the temporary register to the LR.
727327952Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
728327952Sdim      .addReg(ARM::LR, RegState::Define)
729327952Sdim      .addReg(PopReg, RegState::Kill)
730327952Sdim      .add(predOps(ARMCC::AL));
731327952Sdim    // Advance past the pop instruction.
732327952Sdim    MBBI++;
733327952Sdim    // Increment the SP.
734353358Sdim    emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo,
735353358Sdim                                 ArgRegsSaveSize + 4, ARM::NoRegister,
736353358Sdim                                 MachineInstr::NoFlags);
737327952Sdim    return true;
738327952Sdim  }
739327952Sdim
740296417Sdim  if (TemporaryReg) {
741296417Sdim    assert(!PopReg && "Unnecessary MOV is about to be inserted");
742296417Sdim    PopReg = PopFriendly.find_first();
743321369Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
744321369Sdim        .addReg(TemporaryReg, RegState::Define)
745321369Sdim        .addReg(PopReg, RegState::Kill)
746321369Sdim        .add(predOps(ARMCC::AL));
747296417Sdim  }
748296417Sdim
749296417Sdim  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
750296417Sdim    // We couldn't use the direct restoration above, so
751296417Sdim    // perform the opposite conversion: tPOP_RET to tPOP.
752296417Sdim    MachineInstrBuilder MIB =
753321369Sdim        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
754321369Sdim            .add(predOps(ARMCC::AL));
755296417Sdim    bool Popped = false;
756296417Sdim    for (auto MO: MBBI->operands())
757296417Sdim      if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
758296417Sdim          MO.getReg() != ARM::PC) {
759321369Sdim        MIB.add(MO);
760296417Sdim        if (!MO.isImplicit())
761296417Sdim          Popped = true;
762296417Sdim      }
763296417Sdim    // Is there anything left to pop?
764296417Sdim    if (!Popped)
765296417Sdim      MBB.erase(MIB.getInstr());
766296417Sdim    // Erase the old instruction.
767296417Sdim    MBB.erase(MBBI);
768321369Sdim    MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
769321369Sdim               .add(predOps(ARMCC::AL));
770296417Sdim  }
771296417Sdim
772296417Sdim  assert(PopReg && "Do not know how to get LR");
773321369Sdim  BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
774321369Sdim      .add(predOps(ARMCC::AL))
775296417Sdim      .addReg(PopReg, RegState::Define);
776296417Sdim
777353358Sdim  emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize,
778353358Sdim                               ARM::NoRegister, MachineInstr::NoFlags);
779296417Sdim
780321369Sdim  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
781321369Sdim      .addReg(ARM::LR, RegState::Define)
782321369Sdim      .addReg(PopReg, RegState::Kill)
783321369Sdim      .add(predOps(ARMCC::AL));
784296417Sdim
785296417Sdim  if (TemporaryReg)
786321369Sdim    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
787321369Sdim        .addReg(PopReg, RegState::Define)
788321369Sdim        .addReg(TemporaryReg, RegState::Kill)
789321369Sdim        .add(predOps(ARMCC::AL));
790296417Sdim
791296417Sdim  return true;
792218885Sdim}
793218885Sdim
794327952Sdimusing ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>;
795327952Sdim
796314564Sdim// Return the first iteraror after CurrentReg which is present in EnabledRegs,
797314564Sdim// or OrderEnd if no further registers are in that set. This does not advance
798314564Sdim// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
799327952Sdimstatic const unsigned *findNextOrderedReg(const unsigned *CurrentReg,
800327952Sdim                                          const ARMRegSet &EnabledRegs,
801327952Sdim                                          const unsigned *OrderEnd) {
802327952Sdim  while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg])
803314564Sdim    ++CurrentReg;
804314564Sdim  return CurrentReg;
805314564Sdim}
806314564Sdim
807218885Sdimbool Thumb1FrameLowering::
808218885SdimspillCalleeSavedRegisters(MachineBasicBlock &MBB,
809218885Sdim                          MachineBasicBlock::iterator MI,
810218885Sdim                          const std::vector<CalleeSavedInfo> &CSI,
811218885Sdim                          const TargetRegisterInfo *TRI) const {
812218885Sdim  if (CSI.empty())
813218885Sdim    return false;
814218885Sdim
815218885Sdim  DebugLoc DL;
816288943Sdim  const TargetInstrInfo &TII = *STI.getInstrInfo();
817314564Sdim  MachineFunction &MF = *MBB.getParent();
818314564Sdim  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
819314564Sdim      MF.getSubtarget().getRegisterInfo());
820218885Sdim
821327952Sdim  ARMRegSet LoRegsToSave; // r0-r7, lr
822327952Sdim  ARMRegSet HiRegsToSave; // r8-r11
823327952Sdim  ARMRegSet CopyRegs;     // Registers which can be used after pushing
824327952Sdim                          // LoRegs for saving HiRegs.
825314564Sdim
826218885Sdim  for (unsigned i = CSI.size(); i != 0; --i) {
827218885Sdim    unsigned Reg = CSI[i-1].getReg();
828218885Sdim
829314564Sdim    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
830327952Sdim      LoRegsToSave[Reg] = true;
831314564Sdim    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
832327952Sdim      HiRegsToSave[Reg] = true;
833314564Sdim    } else {
834314564Sdim      llvm_unreachable("callee-saved register of unexpected class");
835218885Sdim    }
836218885Sdim
837314564Sdim    if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
838314564Sdim        !MF.getRegInfo().isLiveIn(Reg) &&
839314564Sdim        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
840327952Sdim      CopyRegs[Reg] = true;
841314564Sdim  }
842218885Sdim
843314564Sdim  // Unused argument registers can be used for the high register saving.
844314564Sdim  for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
845314564Sdim    if (!MF.getRegInfo().isLiveIn(ArgReg))
846327952Sdim      CopyRegs[ArgReg] = true;
847314564Sdim
848314564Sdim  // Push the low registers and lr
849321369Sdim  const MachineRegisterInfo &MRI = MF.getRegInfo();
850327952Sdim  if (!LoRegsToSave.none()) {
851321369Sdim    MachineInstrBuilder MIB =
852321369Sdim        BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
853314564Sdim    for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
854327952Sdim      if (LoRegsToSave[Reg]) {
855321369Sdim        bool isKill = !MRI.isLiveIn(Reg);
856321369Sdim        if (isKill && !MRI.isReserved(Reg))
857314564Sdim          MBB.addLiveIn(Reg);
858314564Sdim
859314564Sdim        MIB.addReg(Reg, getKillRegState(isKill));
860314564Sdim      }
861314564Sdim    }
862314564Sdim    MIB.setMIFlags(MachineInstr::FrameSetup);
863218885Sdim  }
864314564Sdim
865314564Sdim  // Push the high registers. There are no store instructions that can access
866314564Sdim  // these registers directly, so we have to move them to low registers, and
867314564Sdim  // push them. This might take multiple pushes, as it is possible for there to
868314564Sdim  // be fewer low registers available than high registers which need saving.
869314564Sdim
870314564Sdim  // These are in reverse order so that in the case where we need to use
871314564Sdim  // multiple PUSH instructions, the order of the registers on the stack still
872314564Sdim  // matches the unwind info. They need to be swicthed back to ascending order
873314564Sdim  // before adding to the PUSH instruction.
874314564Sdim  static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
875314564Sdim                                         ARM::R5, ARM::R4, ARM::R3,
876314564Sdim                                         ARM::R2, ARM::R1, ARM::R0};
877314564Sdim  static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
878314564Sdim
879314564Sdim  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
880314564Sdim  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
881314564Sdim
882314564Sdim  // Find the first register to save.
883314564Sdim  const unsigned *HiRegToSave = findNextOrderedReg(
884314564Sdim      std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
885314564Sdim
886314564Sdim  while (HiRegToSave != AllHighRegsEnd) {
887314564Sdim    // Find the first low register to use.
888314564Sdim    const unsigned *CopyReg =
889314564Sdim        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
890314564Sdim
891314564Sdim    // Create the PUSH, but don't insert it yet (the MOVs need to come first).
892353358Sdim    MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH))
893353358Sdim                                      .add(predOps(ARMCC::AL))
894353358Sdim                                      .setMIFlags(MachineInstr::FrameSetup);
895314564Sdim
896314564Sdim    SmallVector<unsigned, 4> RegsToPush;
897314564Sdim    while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
898327952Sdim      if (HiRegsToSave[*HiRegToSave]) {
899321369Sdim        bool isKill = !MRI.isLiveIn(*HiRegToSave);
900321369Sdim        if (isKill && !MRI.isReserved(*HiRegToSave))
901314564Sdim          MBB.addLiveIn(*HiRegToSave);
902314564Sdim
903314564Sdim        // Emit a MOV from the high reg to the low reg.
904321369Sdim        BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
905321369Sdim            .addReg(*CopyReg, RegState::Define)
906321369Sdim            .addReg(*HiRegToSave, getKillRegState(isKill))
907353358Sdim            .add(predOps(ARMCC::AL))
908353358Sdim            .setMIFlags(MachineInstr::FrameSetup);
909314564Sdim
910314564Sdim        // Record the register that must be added to the PUSH.
911314564Sdim        RegsToPush.push_back(*CopyReg);
912314564Sdim
913314564Sdim        CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
914314564Sdim        HiRegToSave =
915314564Sdim            findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
916314564Sdim      }
917314564Sdim    }
918314564Sdim
919314564Sdim    // Add the low registers to the PUSH, in ascending order.
920321369Sdim    for (unsigned Reg : llvm::reverse(RegsToPush))
921314564Sdim      PushMIB.addReg(Reg, RegState::Kill);
922314564Sdim
923314564Sdim    // Insert the PUSH instruction after the MOVs.
924314564Sdim    MBB.insert(MI, PushMIB);
925314564Sdim  }
926314564Sdim
927218885Sdim  return true;
928218885Sdim}
929218885Sdim
930218885Sdimbool Thumb1FrameLowering::
931218885SdimrestoreCalleeSavedRegisters(MachineBasicBlock &MBB,
932218885Sdim                            MachineBasicBlock::iterator MI,
933327952Sdim                            std::vector<CalleeSavedInfo> &CSI,
934218885Sdim                            const TargetRegisterInfo *TRI) const {
935218885Sdim  if (CSI.empty())
936218885Sdim    return false;
937218885Sdim
938218885Sdim  MachineFunction &MF = *MBB.getParent();
939218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
940288943Sdim  const TargetInstrInfo &TII = *STI.getInstrInfo();
941314564Sdim  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
942314564Sdim      MF.getSubtarget().getRegisterInfo());
943218885Sdim
944251662Sdim  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
945296417Sdim  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
946314564Sdim
947327952Sdim  ARMRegSet LoRegsToRestore;
948327952Sdim  ARMRegSet HiRegsToRestore;
949314564Sdim  // Low registers (r0-r7) which can be used to restore the high registers.
950327952Sdim  ARMRegSet CopyRegs;
951314564Sdim
952314564Sdim  for (CalleeSavedInfo I : CSI) {
953314564Sdim    unsigned Reg = I.getReg();
954314564Sdim
955314564Sdim    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
956327952Sdim      LoRegsToRestore[Reg] = true;
957314564Sdim    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
958327952Sdim      HiRegsToRestore[Reg] = true;
959314564Sdim    } else {
960314564Sdim      llvm_unreachable("callee-saved register of unexpected class");
961314564Sdim    }
962314564Sdim
963314564Sdim    // If this is a low register not used as the frame pointer, we may want to
964314564Sdim    // use it for restoring the high registers.
965314564Sdim    if ((ARM::tGPRRegClass.contains(Reg)) &&
966314564Sdim        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
967327952Sdim      CopyRegs[Reg] = true;
968314564Sdim  }
969314564Sdim
970314564Sdim  // If this is a return block, we may be able to use some unused return value
971314564Sdim  // registers for restoring the high regs.
972314564Sdim  auto Terminator = MBB.getFirstTerminator();
973314564Sdim  if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
974327952Sdim    CopyRegs[ARM::R0] = true;
975327952Sdim    CopyRegs[ARM::R1] = true;
976327952Sdim    CopyRegs[ARM::R2] = true;
977327952Sdim    CopyRegs[ARM::R3] = true;
978314564Sdim    for (auto Op : Terminator->implicit_operands()) {
979314564Sdim      if (Op.isReg())
980327952Sdim        CopyRegs[Op.getReg()] = false;
981314564Sdim    }
982314564Sdim  }
983314564Sdim
984314564Sdim  static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
985314564Sdim                                         ARM::R4, ARM::R5, ARM::R6, ARM::R7};
986314564Sdim  static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
987314564Sdim
988314564Sdim  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
989314564Sdim  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
990314564Sdim
991314564Sdim  // Find the first register to restore.
992314564Sdim  auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
993314564Sdim                                           HiRegsToRestore, AllHighRegsEnd);
994314564Sdim
995314564Sdim  while (HiRegToRestore != AllHighRegsEnd) {
996327952Sdim    assert(!CopyRegs.none());
997314564Sdim    // Find the first low register to use.
998314564Sdim    auto CopyReg =
999314564Sdim        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
1000314564Sdim
1001314564Sdim    // Create the POP instruction.
1002321369Sdim    MachineInstrBuilder PopMIB =
1003321369Sdim        BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
1004314564Sdim
1005314564Sdim    while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
1006314564Sdim      // Add the low register to the POP.
1007314564Sdim      PopMIB.addReg(*CopyReg, RegState::Define);
1008314564Sdim
1009314564Sdim      // Create the MOV from low to high register.
1010321369Sdim      BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
1011321369Sdim          .addReg(*HiRegToRestore, RegState::Define)
1012321369Sdim          .addReg(*CopyReg, RegState::Kill)
1013321369Sdim          .add(predOps(ARMCC::AL));
1014314564Sdim
1015314564Sdim      CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
1016314564Sdim      HiRegToRestore =
1017314564Sdim          findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
1018314564Sdim    }
1019314564Sdim  }
1020314564Sdim
1021321369Sdim  MachineInstrBuilder MIB =
1022321369Sdim      BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
1023314564Sdim
1024296417Sdim  bool NeedsPop = false;
1025218885Sdim  for (unsigned i = CSI.size(); i != 0; --i) {
1026327952Sdim    CalleeSavedInfo &Info = CSI[i-1];
1027327952Sdim    unsigned Reg = Info.getReg();
1028314564Sdim
1029314564Sdim    // High registers (excluding lr) have already been dealt with
1030314564Sdim    if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
1031314564Sdim      continue;
1032314564Sdim
1033218885Sdim    if (Reg == ARM::LR) {
1034327952Sdim      Info.setRestored(false);
1035327952Sdim      if (!MBB.succ_empty() ||
1036327952Sdim          MI->getOpcode() == ARM::TCRETURNdi ||
1037327952Sdim          MI->getOpcode() == ARM::TCRETURNri)
1038296417Sdim        // LR may only be popped into PC, as part of return sequence.
1039296417Sdim        // If this isn't the return sequence, we'll need emitPopSpecialFixUp
1040296417Sdim        // to restore LR the hard way.
1041327952Sdim        // FIXME: if we don't pass any stack arguments it would be actually
1042327952Sdim        // advantageous *and* correct to do the conversion to an ordinary call
1043327952Sdim        // instruction here.
1044218885Sdim        continue;
1045327952Sdim      // Special epilogue for vararg functions. See emitEpilogue
1046327952Sdim      if (isVarArg)
1047327952Sdim        continue;
1048327952Sdim      // ARMv4T requires BX, see emitEpilogue
1049327952Sdim      if (!STI.hasV5TOps())
1050327952Sdim        continue;
1051327952Sdim
1052327952Sdim      // Pop LR into PC.
1053327952Sdim      Reg = ARM::PC;
1054327952Sdim      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
1055327952Sdim      if (MI != MBB.end())
1056327952Sdim        MIB.copyImplicitOps(*MI);
1057327952Sdim      MI = MBB.erase(MI);
1058218885Sdim    }
1059218885Sdim    MIB.addReg(Reg, getDefRegState(true));
1060296417Sdim    NeedsPop = true;
1061218885Sdim  }
1062218885Sdim
1063218885Sdim  // It's illegal to emit pop instruction without operands.
1064296417Sdim  if (NeedsPop)
1065218885Sdim    MBB.insert(MI, &*MIB);
1066218885Sdim  else
1067218885Sdim    MF.DeleteMachineInstr(MIB);
1068218885Sdim
1069218885Sdim  return true;
1070218885Sdim}
1071