1235633Sdim//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
2218885Sdim//
3218885Sdim//                     The LLVM Compiler Infrastructure
4218885Sdim//
5218885Sdim// This file is distributed under the University of Illinois Open Source
6218885Sdim// License. See LICENSE.TXT for details.
7218885Sdim//
8218885Sdim//===----------------------------------------------------------------------===//
9218885Sdim//
10218885Sdim// This file contains the ARM implementation of TargetFrameLowering class.
11218885Sdim//
12218885Sdim//===----------------------------------------------------------------------===//
13218885Sdim
14218885Sdim#include "ARMFrameLowering.h"
15218885Sdim#include "ARMBaseInstrInfo.h"
16218885Sdim#include "ARMBaseRegisterInfo.h"
17218885Sdim#include "ARMMachineFunctionInfo.h"
18235633Sdim#include "MCTargetDesc/ARMAddressingModes.h"
19218885Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
20218885Sdim#include "llvm/CodeGen/MachineFunction.h"
21218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
22218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
23218885Sdim#include "llvm/CodeGen/RegisterScavenging.h"
24252723Sdim#include "llvm/IR/CallingConv.h"
25252723Sdim#include "llvm/IR/Function.h"
26252723Sdim#include "llvm/Support/CommandLine.h"
27218885Sdim#include "llvm/Target/TargetOptions.h"
28218885Sdim
29218885Sdimusing namespace llvm;
30218885Sdim
31235633Sdimstatic cl::opt<bool>
32235633SdimSpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
33235633Sdim                     cl::desc("Align ARM NEON spills in prolog and epilog"));
34235633Sdim
35235633Sdimstatic MachineBasicBlock::iterator
36235633SdimskipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
37235633Sdim                        unsigned NumAlignedDPRCS2Regs);
38235633Sdim
39218885Sdim/// hasFP - Return true if the specified function should have a dedicated frame
40218885Sdim/// pointer register.  This is true if the function has variable sized allocas
41218885Sdim/// or if frame pointer elimination is disabled.
42218885Sdimbool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
43218885Sdim  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
44218885Sdim
45235633Sdim  // iOS requires FP not to be clobbered for backtracing purpose.
46235633Sdim  if (STI.isTargetIOS())
47218885Sdim    return true;
48218885Sdim
49218885Sdim  const MachineFrameInfo *MFI = MF.getFrameInfo();
50218885Sdim  // Always eliminate non-leaf frame pointers.
51235633Sdim  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
52235633Sdim           MFI->hasCalls()) ||
53218885Sdim          RegInfo->needsStackRealignment(MF) ||
54218885Sdim          MFI->hasVarSizedObjects() ||
55218885Sdim          MFI->isFrameAddressTaken());
56218885Sdim}
57218885Sdim
58218885Sdim/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
59218885Sdim/// not required, we reserve argument space for call sites in the function
60218885Sdim/// immediately on entry to the current function.  This eliminates the need for
61218885Sdim/// add/sub sp brackets around call sites.  Returns true if the call frame is
62218885Sdim/// included as part of the stack frame.
63218885Sdimbool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
64218885Sdim  const MachineFrameInfo *FFI = MF.getFrameInfo();
65218885Sdim  unsigned CFSize = FFI->getMaxCallFrameSize();
66218885Sdim  // It's not always a good idea to include the call frame as part of the
67218885Sdim  // stack frame. ARM (especially Thumb) has small immediate offset to
68218885Sdim  // address the stack frame. So a large call frame can cause poor codegen
69218885Sdim  // and may even makes it impossible to scavenge a register.
70218885Sdim  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
71218885Sdim    return false;
72218885Sdim
73218885Sdim  return !MF.getFrameInfo()->hasVarSizedObjects();
74218885Sdim}
75218885Sdim
76218885Sdim/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
77218885Sdim/// call frame pseudos can be simplified.  Unlike most targets, having a FP
78218885Sdim/// is not sufficient here since we still may reference some objects via SP
79218885Sdim/// even when FP is available in Thumb2 mode.
80218885Sdimbool
81218885SdimARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
82218885Sdim  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
83218885Sdim}
84218885Sdim
85218885Sdimstatic bool isCSRestore(MachineInstr *MI,
86218885Sdim                        const ARMBaseInstrInfo &TII,
87235633Sdim                        const uint16_t *CSRegs) {
88218885Sdim  // Integer spill area is handled with "pop".
89263509Sdim  if (isPopOpcode(MI->getOpcode())) {
90218885Sdim    // The first two operands are predicates. The last two are
91218885Sdim    // imp-def and imp-use of SP. Check everything in between.
92218885Sdim    for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
93218885Sdim      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
94218885Sdim        return false;
95218885Sdim    return true;
96218885Sdim  }
97226890Sdim  if ((MI->getOpcode() == ARM::LDR_POST_IMM ||
98226890Sdim       MI->getOpcode() == ARM::LDR_POST_REG ||
99218885Sdim       MI->getOpcode() == ARM::t2LDR_POST) &&
100218885Sdim      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
101218885Sdim      MI->getOperand(1).getReg() == ARM::SP)
102218885Sdim    return true;
103218885Sdim
104218885Sdim  return false;
105218885Sdim}
106218885Sdim
107263509Sdimstatic void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
108263509Sdim                                 MachineBasicBlock::iterator &MBBI, DebugLoc dl,
109263509Sdim                                 const ARMBaseInstrInfo &TII, unsigned DestReg,
110263509Sdim                                 unsigned SrcReg, int NumBytes,
111263509Sdim                                 unsigned MIFlags = MachineInstr::NoFlags,
112263509Sdim                                 ARMCC::CondCodes Pred = ARMCC::AL,
113263509Sdim                                 unsigned PredReg = 0) {
114218885Sdim  if (isARM)
115263509Sdim    emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
116252723Sdim                            Pred, PredReg, TII, MIFlags);
117218885Sdim  else
118263509Sdim    emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
119252723Sdim                           Pred, PredReg, TII, MIFlags);
120218885Sdim}
121218885Sdim
122263509Sdimstatic void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
123263509Sdim                         MachineBasicBlock::iterator &MBBI, DebugLoc dl,
124263509Sdim                         const ARMBaseInstrInfo &TII, int NumBytes,
125263509Sdim                         unsigned MIFlags = MachineInstr::NoFlags,
126263509Sdim                         ARMCC::CondCodes Pred = ARMCC::AL,
127263509Sdim                         unsigned PredReg = 0) {
128263509Sdim  emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
129263509Sdim                       MIFlags, Pred, PredReg);
130263509Sdim}
131263509Sdim
132218885Sdimvoid ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
133218885Sdim  MachineBasicBlock &MBB = MF.front();
134218885Sdim  MachineBasicBlock::iterator MBBI = MBB.begin();
135218885Sdim  MachineFrameInfo  *MFI = MF.getFrameInfo();
136218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
137218885Sdim  const ARMBaseRegisterInfo *RegInfo =
138218885Sdim    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
139218885Sdim  const ARMBaseInstrInfo &TII =
140218885Sdim    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
141218885Sdim  assert(!AFI->isThumb1OnlyFunction() &&
142218885Sdim         "This emitPrologue does not support Thumb1!");
143218885Sdim  bool isARM = !AFI->isThumbFunction();
144263509Sdim  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
145263509Sdim  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
146218885Sdim  unsigned NumBytes = MFI->getStackSize();
147218885Sdim  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
148218885Sdim  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
149218885Sdim  unsigned FramePtr = RegInfo->getFrameRegister(MF);
150218885Sdim
151218885Sdim  // Determine the sizes of each callee-save spill areas and record which frame
152218885Sdim  // belongs to which callee-save spill areas.
153218885Sdim  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
154218885Sdim  int FramePtrSpillFI = 0;
155235633Sdim  int D8SpillFI = 0;
156218885Sdim
157245431Sdim  // All calls are tail calls in GHC calling conv, and functions have no
158245431Sdim  // prologue/epilogue.
159245431Sdim  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
160245431Sdim    return;
161245431Sdim
162218885Sdim  // Allocate the vararg register save area. This is not counted in NumBytes.
163252723Sdim  if (ArgRegsSaveSize)
164252723Sdim    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
165221345Sdim                 MachineInstr::FrameSetup);
166218885Sdim
167218885Sdim  if (!AFI->hasStackFrame()) {
168218885Sdim    if (NumBytes != 0)
169221345Sdim      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
170221345Sdim                   MachineInstr::FrameSetup);
171218885Sdim    return;
172218885Sdim  }
173218885Sdim
174218885Sdim  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
175218885Sdim    unsigned Reg = CSI[i].getReg();
176218885Sdim    int FI = CSI[i].getFrameIdx();
177218885Sdim    switch (Reg) {
178263509Sdim    case ARM::R0:
179263509Sdim    case ARM::R1:
180263509Sdim    case ARM::R2:
181263509Sdim    case ARM::R3:
182218885Sdim    case ARM::R4:
183218885Sdim    case ARM::R5:
184218885Sdim    case ARM::R6:
185218885Sdim    case ARM::R7:
186218885Sdim    case ARM::LR:
187218885Sdim      if (Reg == FramePtr)
188218885Sdim        FramePtrSpillFI = FI;
189218885Sdim      GPRCS1Size += 4;
190218885Sdim      break;
191218885Sdim    case ARM::R8:
192218885Sdim    case ARM::R9:
193218885Sdim    case ARM::R10:
194218885Sdim    case ARM::R11:
195263509Sdim    case ARM::R12:
196218885Sdim      if (Reg == FramePtr)
197218885Sdim        FramePtrSpillFI = FI;
198263509Sdim      if (STI.isTargetIOS())
199218885Sdim        GPRCS2Size += 4;
200263509Sdim      else
201218885Sdim        GPRCS1Size += 4;
202218885Sdim      break;
203218885Sdim    default:
204235633Sdim      // This is a DPR. Exclude the aligned DPRCS2 spills.
205235633Sdim      if (Reg == ARM::D8)
206235633Sdim        D8SpillFI = FI;
207263509Sdim      if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
208235633Sdim        DPRCSSize += 8;
209218885Sdim    }
210218885Sdim  }
211218885Sdim
212218885Sdim  // Move past area 1.
213263509Sdim  MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush;
214263509Sdim  if (GPRCS1Size > 0)
215263509Sdim    FramePtrPush = LastPush = MBBI++;
216218885Sdim
217263509Sdim  // Determine starting offsets of spill areas.
218218885Sdim  bool HasFP = hasFP(MF);
219218885Sdim  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
220218885Sdim  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
221218885Sdim  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
222263509Sdim  int FramePtrOffsetInPush = 0;
223263509Sdim  if (HasFP) {
224263509Sdim    FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
225218885Sdim    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
226218885Sdim                                NumBytes);
227263509Sdim  }
228218885Sdim  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
229218885Sdim  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
230218885Sdim  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
231218885Sdim
232263509Sdim  // Move past area 2.
233263509Sdim  if (GPRCS2Size > 0) {
234263509Sdim    LastPush = MBBI++;
235263509Sdim  }
236263509Sdim
237218885Sdim  // Move past area 3.
238219077Sdim  if (DPRCSSize > 0) {
239263509Sdim    LastPush = MBBI++;
240219077Sdim    // Since vpush register list cannot have gaps, there may be multiple vpush
241219077Sdim    // instructions in the prologue.
242219077Sdim    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
243263509Sdim      LastPush = MBBI++;
244219077Sdim  }
245218885Sdim
246235633Sdim  // Move past the aligned DPRCS2 area.
247235633Sdim  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
248235633Sdim    MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
249235633Sdim    // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
250235633Sdim    // leaves the stack pointer pointing to the DPRCS2 area.
251235633Sdim    //
252235633Sdim    // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
253235633Sdim    NumBytes += MFI->getObjectOffset(D8SpillFI);
254235633Sdim  } else
255235633Sdim    NumBytes = DPRCSOffset;
256235633Sdim
257218885Sdim  if (NumBytes) {
258218885Sdim    // Adjust SP after all the callee-save spills.
259263509Sdim    if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) {
260263509Sdim      if (LastPush == FramePtrPush)
261263509Sdim        FramePtrOffsetInPush += NumBytes;
262263509Sdim    } else
263263509Sdim      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
264263509Sdim                   MachineInstr::FrameSetup);
265263509Sdim
266218885Sdim    if (HasFP && isARM)
267218885Sdim      // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
268218885Sdim      // Note it's not safe to do this in Thumb2 mode because it would have
269218885Sdim      // taken two instructions:
270218885Sdim      // mov sp, r7
271218885Sdim      // sub sp, #24
272218885Sdim      // If an interrupt is taken between the two instructions, then sp is in
273218885Sdim      // an inconsistent state (pointing to the middle of callee-saved area).
274218885Sdim      // The interrupt handler can end up clobbering the registers.
275218885Sdim      AFI->setShouldRestoreSPFromFP(true);
276218885Sdim  }
277218885Sdim
278263509Sdim  // Set FP to point to the stack slot that contains the previous FP.
279263509Sdim  // For iOS, FP is R7, which has now been stored in spill area 1.
280263509Sdim  // Otherwise, if this is not iOS, all the callee-saved registers go
281263509Sdim  // into spill area 1, including the FP in R11.  In either case, it
282263509Sdim  // is in area one and the adjustment needs to take place just after
283263509Sdim  // that push.
284263509Sdim  if (HasFP)
285263509Sdim    emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII,
286263509Sdim                         FramePtr, ARM::SP, FramePtrOffsetInPush,
287263509Sdim                         MachineInstr::FrameSetup);
288263509Sdim
289263509Sdim
290218885Sdim  if (STI.isTargetELF() && hasFP(MF))
291218885Sdim    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
292218885Sdim                             AFI->getFramePtrSpillOffset());
293218885Sdim
294218885Sdim  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
295218885Sdim  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
296218885Sdim  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
297218885Sdim
298218885Sdim  // If we need dynamic stack realignment, do it here. Be paranoid and make
299218885Sdim  // sure if we also have VLAs, we have a base pointer for frame access.
300235633Sdim  // If aligned NEON registers were spilled, the stack has already been
301235633Sdim  // realigned.
302235633Sdim  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
303218885Sdim    unsigned MaxAlign = MFI->getMaxAlignment();
304218885Sdim    assert (!AFI->isThumb1OnlyFunction());
305218885Sdim    if (!AFI->isThumbFunction()) {
306218885Sdim      // Emit bic sp, sp, MaxAlign
307218885Sdim      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
308218885Sdim                                          TII.get(ARM::BICri), ARM::SP)
309218885Sdim                                  .addReg(ARM::SP, RegState::Kill)
310218885Sdim                                  .addImm(MaxAlign-1)));
311218885Sdim    } else {
312218885Sdim      // We cannot use sp as source/dest register here, thus we're emitting the
313218885Sdim      // following sequence:
314218885Sdim      // mov r4, sp
315218885Sdim      // bic r4, r4, MaxAlign
316218885Sdim      // mov sp, r4
317218885Sdim      // FIXME: It will be better just to find spare register here.
318224145Sdim      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
319224145Sdim        .addReg(ARM::SP, RegState::Kill));
320218885Sdim      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
321218885Sdim                                          TII.get(ARM::t2BICri), ARM::R4)
322218885Sdim                                  .addReg(ARM::R4, RegState::Kill)
323218885Sdim                                  .addImm(MaxAlign-1)));
324224145Sdim      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
325224145Sdim        .addReg(ARM::R4, RegState::Kill));
326218885Sdim    }
327218885Sdim
328218885Sdim    AFI->setShouldRestoreSPFromFP(true);
329218885Sdim  }
330218885Sdim
331218885Sdim  // If we need a base pointer, set it up here. It's whatever the value
332218885Sdim  // of the stack pointer is at this point. Any variable size objects
333218885Sdim  // will be allocated after this, so we can still use the base pointer
334218885Sdim  // to reference locals.
335221345Sdim  // FIXME: Clarify FrameSetup flags here.
336218885Sdim  if (RegInfo->hasBasePointer(MF)) {
337218885Sdim    if (isARM)
338218885Sdim      BuildMI(MBB, MBBI, dl,
339218885Sdim              TII.get(ARM::MOVr), RegInfo->getBaseRegister())
340218885Sdim        .addReg(ARM::SP)
341218885Sdim        .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
342218885Sdim    else
343224145Sdim      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
344224145Sdim                             RegInfo->getBaseRegister())
345224145Sdim        .addReg(ARM::SP));
346218885Sdim  }
347218885Sdim
348218885Sdim  // If the frame has variable sized objects then the epilogue must restore
349218885Sdim  // the sp from fp. We can assume there's an FP here since hasFP already
350218885Sdim  // checks for hasVarSizedObjects.
351218885Sdim  if (MFI->hasVarSizedObjects())
352218885Sdim    AFI->setShouldRestoreSPFromFP(true);
353218885Sdim}
354218885Sdim
355218885Sdimvoid ARMFrameLowering::emitEpilogue(MachineFunction &MF,
356218885Sdim                                    MachineBasicBlock &MBB) const {
357218885Sdim  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
358235633Sdim  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
359218885Sdim  unsigned RetOpcode = MBBI->getOpcode();
360218885Sdim  DebugLoc dl = MBBI->getDebugLoc();
361218885Sdim  MachineFrameInfo *MFI = MF.getFrameInfo();
362218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
363218885Sdim  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
364218885Sdim  const ARMBaseInstrInfo &TII =
365218885Sdim    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
366218885Sdim  assert(!AFI->isThumb1OnlyFunction() &&
367218885Sdim         "This emitEpilogue does not support Thumb1!");
368218885Sdim  bool isARM = !AFI->isThumbFunction();
369218885Sdim
370263509Sdim  unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
371263509Sdim  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
372218885Sdim  int NumBytes = (int)MFI->getStackSize();
373218885Sdim  unsigned FramePtr = RegInfo->getFrameRegister(MF);
374218885Sdim
375245431Sdim  // All calls are tail calls in GHC calling conv, and functions have no
376245431Sdim  // prologue/epilogue.
377245431Sdim  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
378245431Sdim    return;
379245431Sdim
380218885Sdim  if (!AFI->hasStackFrame()) {
381218885Sdim    if (NumBytes != 0)
382218885Sdim      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
383218885Sdim  } else {
384218885Sdim    // Unwind MBBI to point to first LDR / VLDRD.
385263509Sdim    const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
386218885Sdim    if (MBBI != MBB.begin()) {
387263509Sdim      do {
388218885Sdim        --MBBI;
389263509Sdim      } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
390218885Sdim      if (!isCSRestore(MBBI, TII, CSRegs))
391218885Sdim        ++MBBI;
392218885Sdim    }
393218885Sdim
394218885Sdim    // Move SP to start of FP callee save spill area.
395218885Sdim    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
396218885Sdim                 AFI->getGPRCalleeSavedArea2Size() +
397218885Sdim                 AFI->getDPRCalleeSavedAreaSize());
398218885Sdim
399218885Sdim    // Reset SP based on frame pointer only if the stack frame extends beyond
400218885Sdim    // frame pointer stack slot or target is ELF and the function has FP.
401218885Sdim    if (AFI->shouldRestoreSPFromFP()) {
402218885Sdim      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
403218885Sdim      if (NumBytes) {
404218885Sdim        if (isARM)
405218885Sdim          emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
406218885Sdim                                  ARMCC::AL, 0, TII);
407218885Sdim        else {
408218885Sdim          // It's not possible to restore SP from FP in a single instruction.
409235633Sdim          // For iOS, this looks like:
410218885Sdim          // mov sp, r7
411218885Sdim          // sub sp, #24
412218885Sdim          // This is bad, if an interrupt is taken after the mov, sp is in an
413218885Sdim          // inconsistent state.
414218885Sdim          // Use the first callee-saved register as a scratch register.
415218885Sdim          assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
416218885Sdim                 "No scratch register to restore SP from FP!");
417218885Sdim          emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
418218885Sdim                                 ARMCC::AL, 0, TII);
419224145Sdim          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
420224145Sdim                                 ARM::SP)
421224145Sdim            .addReg(ARM::R4));
422218885Sdim        }
423218885Sdim      } else {
424218885Sdim        // Thumb2 or ARM.
425218885Sdim        if (isARM)
426218885Sdim          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
427218885Sdim            .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
428218885Sdim        else
429224145Sdim          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
430224145Sdim                                 ARM::SP)
431224145Sdim            .addReg(FramePtr));
432218885Sdim      }
433263509Sdim    } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
434263509Sdim        emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
435218885Sdim
436218885Sdim    // Increment past our save areas.
437219077Sdim    if (AFI->getDPRCalleeSavedAreaSize()) {
438219077Sdim      MBBI++;
439219077Sdim      // Since vpop register list cannot have gaps, there may be multiple vpop
440219077Sdim      // instructions in the epilogue.
441219077Sdim      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
442219077Sdim        MBBI++;
443219077Sdim    }
444218885Sdim    if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
445218885Sdim    if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
446218885Sdim  }
447218885Sdim
448235633Sdim  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
449218885Sdim    // Tail call return: adjust the stack pointer and jump to callee.
450218885Sdim    MBBI = MBB.getLastNonDebugInstr();
451218885Sdim    MachineOperand &JumpTarget = MBBI->getOperand(0);
452218885Sdim
453218885Sdim    // Jump to label or value in register.
454235633Sdim    if (RetOpcode == ARM::TCRETURNdi) {
455235633Sdim      unsigned TCOpcode = STI.isThumb() ?
456235633Sdim               (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
457235633Sdim               ARM::TAILJMPd;
458218885Sdim      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
459218885Sdim      if (JumpTarget.isGlobal())
460218885Sdim        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
461218885Sdim                             JumpTarget.getTargetFlags());
462218885Sdim      else {
463218885Sdim        assert(JumpTarget.isSymbol());
464218885Sdim        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
465218885Sdim                              JumpTarget.getTargetFlags());
466218885Sdim      }
467226890Sdim
468226890Sdim      // Add the default predicate in Thumb mode.
469226890Sdim      if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
470218885Sdim    } else if (RetOpcode == ARM::TCRETURNri) {
471221345Sdim      BuildMI(MBB, MBBI, dl,
472221345Sdim              TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
473218885Sdim        addReg(JumpTarget.getReg(), RegState::Kill);
474218885Sdim    }
475218885Sdim
476218885Sdim    MachineInstr *NewMI = prior(MBBI);
477218885Sdim    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
478218885Sdim      NewMI->addOperand(MBBI->getOperand(i));
479218885Sdim
480218885Sdim    // Delete the pseudo instruction TCRETURN.
481218885Sdim    MBB.erase(MBBI);
482224145Sdim    MBBI = NewMI;
483218885Sdim  }
484218885Sdim
485252723Sdim  if (ArgRegsSaveSize)
486252723Sdim    emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
487218885Sdim}
488218885Sdim
489218885Sdim/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
490218885Sdim/// debug info.  It's the same as what we use for resolving the code-gen
491218885Sdim/// references for now.  FIXME: This can go wrong when references are
492218885Sdim/// SP-relative and simple call frames aren't used.
493218885Sdimint
494218885SdimARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
495218885Sdim                                         unsigned &FrameReg) const {
496218885Sdim  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
497218885Sdim}
498218885Sdim
499218885Sdimint
500218885SdimARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
501221345Sdim                                             int FI, unsigned &FrameReg,
502218885Sdim                                             int SPAdj) const {
503218885Sdim  const MachineFrameInfo *MFI = MF.getFrameInfo();
504218885Sdim  const ARMBaseRegisterInfo *RegInfo =
505218885Sdim    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
506218885Sdim  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
507218885Sdim  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
508218885Sdim  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
509218885Sdim  bool isFixed = MFI->isFixedObjectIndex(FI);
510218885Sdim
511218885Sdim  FrameReg = ARM::SP;
512218885Sdim  Offset += SPAdj;
513218885Sdim
514235633Sdim  // SP can move around if there are allocas.  We may also lose track of SP
515235633Sdim  // when emergency spilling inside a non-reserved call frame setup.
516235633Sdim  bool hasMovingSP = !hasReservedCallFrame(MF);
517235633Sdim
518218885Sdim  // When dynamically realigning the stack, use the frame pointer for
519218885Sdim  // parameters, and the stack/base pointer for locals.
520218885Sdim  if (RegInfo->needsStackRealignment(MF)) {
521218885Sdim    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
522218885Sdim    if (isFixed) {
523218885Sdim      FrameReg = RegInfo->getFrameRegister(MF);
524218885Sdim      Offset = FPOffset;
525235633Sdim    } else if (hasMovingSP) {
526218885Sdim      assert(RegInfo->hasBasePointer(MF) &&
527218885Sdim             "VLAs and dynamic stack alignment, but missing base pointer!");
528218885Sdim      FrameReg = RegInfo->getBaseRegister();
529218885Sdim    }
530218885Sdim    return Offset;
531218885Sdim  }
532218885Sdim
533218885Sdim  // If there is a frame pointer, use it when we can.
534218885Sdim  if (hasFP(MF) && AFI->hasStackFrame()) {
535218885Sdim    // Use frame pointer to reference fixed objects. Use it for locals if
536218885Sdim    // there are VLAs (and thus the SP isn't reliable as a base).
537235633Sdim    if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
538218885Sdim      FrameReg = RegInfo->getFrameRegister(MF);
539218885Sdim      return FPOffset;
540235633Sdim    } else if (hasMovingSP) {
541218885Sdim      assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
542218885Sdim      if (AFI->isThumb2Function()) {
543221345Sdim        // Try to use the frame pointer if we can, else use the base pointer
544221345Sdim        // since it's available. This is handy for the emergency spill slot, in
545221345Sdim        // particular.
546218885Sdim        if (FPOffset >= -255 && FPOffset < 0) {
547218885Sdim          FrameReg = RegInfo->getFrameRegister(MF);
548218885Sdim          return FPOffset;
549218885Sdim        }
550221345Sdim      }
551218885Sdim    } else if (AFI->isThumb2Function()) {
552226890Sdim      // Use  add <rd>, sp, #<imm8>
553221345Sdim      //      ldr <rd>, [sp, #<imm8>]
554221345Sdim      // if at all possible to save space.
555221345Sdim      if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
556221345Sdim        return Offset;
557218885Sdim      // In Thumb2 mode, the negative offset is very limited. Try to avoid
558221345Sdim      // out of range references. ldr <rt>,[<rn>, #-<imm8>]
559218885Sdim      if (FPOffset >= -255 && FPOffset < 0) {
560218885Sdim        FrameReg = RegInfo->getFrameRegister(MF);
561218885Sdim        return FPOffset;
562218885Sdim      }
563218885Sdim    } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
564218885Sdim      // Otherwise, use SP or FP, whichever is closer to the stack slot.
565218885Sdim      FrameReg = RegInfo->getFrameRegister(MF);
566218885Sdim      return FPOffset;
567218885Sdim    }
568218885Sdim  }
569218885Sdim  // Use the base pointer if we have one.
570218885Sdim  if (RegInfo->hasBasePointer(MF))
571218885Sdim    FrameReg = RegInfo->getBaseRegister();
572218885Sdim  return Offset;
573218885Sdim}
574218885Sdim
575218885Sdimint ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
576218885Sdim                                          int FI) const {
577218885Sdim  unsigned FrameReg;
578218885Sdim  return getFrameIndexReference(MF, FI, FrameReg);
579218885Sdim}
580218885Sdim
581218885Sdimvoid ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
582218885Sdim                                    MachineBasicBlock::iterator MI,
583218885Sdim                                    const std::vector<CalleeSavedInfo> &CSI,
584218885Sdim                                    unsigned StmOpc, unsigned StrOpc,
585218885Sdim                                    bool NoGap,
586221345Sdim                                    bool(*Func)(unsigned, bool),
587235633Sdim                                    unsigned NumAlignedDPRCS2Regs,
588221345Sdim                                    unsigned MIFlags) const {
589218885Sdim  MachineFunction &MF = *MBB.getParent();
590218885Sdim  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
591218885Sdim
592218885Sdim  DebugLoc DL;
593218885Sdim  if (MI != MBB.end()) DL = MI->getDebugLoc();
594218885Sdim
595218885Sdim  SmallVector<std::pair<unsigned,bool>, 4> Regs;
596218885Sdim  unsigned i = CSI.size();
597218885Sdim  while (i != 0) {
598218885Sdim    unsigned LastReg = 0;
599218885Sdim    for (; i != 0; --i) {
600218885Sdim      unsigned Reg = CSI[i-1].getReg();
601235633Sdim      if (!(Func)(Reg, STI.isTargetIOS())) continue;
602218885Sdim
603235633Sdim      // D-registers in the aligned area DPRCS2 are NOT spilled here.
604235633Sdim      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
605235633Sdim        continue;
606235633Sdim
607218885Sdim      // Add the callee-saved register as live-in unless it's LR and
608218885Sdim      // @llvm.returnaddress is called. If LR is returned for
609218885Sdim      // @llvm.returnaddress then it's already added to the function and
610218885Sdim      // entry block live-in sets.
611218885Sdim      bool isKill = true;
612218885Sdim      if (Reg == ARM::LR) {
613218885Sdim        if (MF.getFrameInfo()->isReturnAddressTaken() &&
614218885Sdim            MF.getRegInfo().isLiveIn(Reg))
615218885Sdim          isKill = false;
616218885Sdim      }
617218885Sdim
618218885Sdim      if (isKill)
619218885Sdim        MBB.addLiveIn(Reg);
620218885Sdim
621218885Sdim      // If NoGap is true, push consecutive registers and then leave the rest
622218885Sdim      // for other instructions. e.g.
623218885Sdim      // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
624218885Sdim      if (NoGap && LastReg && LastReg != Reg-1)
625218885Sdim        break;
626218885Sdim      LastReg = Reg;
627218885Sdim      Regs.push_back(std::make_pair(Reg, isKill));
628218885Sdim    }
629218885Sdim
630218885Sdim    if (Regs.empty())
631218885Sdim      continue;
632218885Sdim    if (Regs.size() > 1 || StrOpc== 0) {
633218885Sdim      MachineInstrBuilder MIB =
634218885Sdim        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
635221345Sdim                       .addReg(ARM::SP).setMIFlags(MIFlags));
636218885Sdim      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
637218885Sdim        MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
638218885Sdim    } else if (Regs.size() == 1) {
639218885Sdim      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
640218885Sdim                                        ARM::SP)
641218885Sdim        .addReg(Regs[0].first, getKillRegState(Regs[0].second))
642226890Sdim        .addReg(ARM::SP).setMIFlags(MIFlags)
643226890Sdim        .addImm(-4);
644218885Sdim      AddDefaultPred(MIB);
645218885Sdim    }
646218885Sdim    Regs.clear();
647218885Sdim  }
648218885Sdim}
649218885Sdim
650218885Sdimvoid ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
651218885Sdim                                   MachineBasicBlock::iterator MI,
652218885Sdim                                   const std::vector<CalleeSavedInfo> &CSI,
653218885Sdim                                   unsigned LdmOpc, unsigned LdrOpc,
654218885Sdim                                   bool isVarArg, bool NoGap,
655235633Sdim                                   bool(*Func)(unsigned, bool),
656235633Sdim                                   unsigned NumAlignedDPRCS2Regs) const {
657218885Sdim  MachineFunction &MF = *MBB.getParent();
658218885Sdim  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
659218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
660218885Sdim  DebugLoc DL = MI->getDebugLoc();
661218885Sdim  unsigned RetOpcode = MI->getOpcode();
662218885Sdim  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
663235633Sdim                     RetOpcode == ARM::TCRETURNri);
664263509Sdim  bool isInterrupt =
665263509Sdim      RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
666218885Sdim
667218885Sdim  SmallVector<unsigned, 4> Regs;
668218885Sdim  unsigned i = CSI.size();
669218885Sdim  while (i != 0) {
670218885Sdim    unsigned LastReg = 0;
671218885Sdim    bool DeleteRet = false;
672218885Sdim    for (; i != 0; --i) {
673218885Sdim      unsigned Reg = CSI[i-1].getReg();
674235633Sdim      if (!(Func)(Reg, STI.isTargetIOS())) continue;
675218885Sdim
676235633Sdim      // The aligned reloads from area DPRCS2 are not inserted here.
677235633Sdim      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
678235633Sdim        continue;
679235633Sdim
680263509Sdim      if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
681263509Sdim          STI.hasV5TOps()) {
682218885Sdim        Reg = ARM::PC;
683218885Sdim        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
684218885Sdim        // Fold the return instruction into the LDM.
685218885Sdim        DeleteRet = true;
686218885Sdim      }
687218885Sdim
688218885Sdim      // If NoGap is true, pop consecutive registers and then leave the rest
689218885Sdim      // for other instructions. e.g.
690218885Sdim      // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
691218885Sdim      if (NoGap && LastReg && LastReg != Reg-1)
692218885Sdim        break;
693218885Sdim
694218885Sdim      LastReg = Reg;
695218885Sdim      Regs.push_back(Reg);
696218885Sdim    }
697218885Sdim
698218885Sdim    if (Regs.empty())
699218885Sdim      continue;
700218885Sdim    if (Regs.size() > 1 || LdrOpc == 0) {
701218885Sdim      MachineInstrBuilder MIB =
702218885Sdim        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
703218885Sdim                       .addReg(ARM::SP));
704218885Sdim      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
705218885Sdim        MIB.addReg(Regs[i], getDefRegState(true));
706226890Sdim      if (DeleteRet) {
707252723Sdim        MIB.copyImplicitOps(&*MI);
708218885Sdim        MI->eraseFromParent();
709226890Sdim      }
710218885Sdim      MI = MIB;
711218885Sdim    } else if (Regs.size() == 1) {
712218885Sdim      // If we adjusted the reg to PC from LR above, switch it back here. We
713218885Sdim      // only do that for LDM.
714218885Sdim      if (Regs[0] == ARM::PC)
715218885Sdim        Regs[0] = ARM::LR;
716218885Sdim      MachineInstrBuilder MIB =
717218885Sdim        BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
718218885Sdim          .addReg(ARM::SP, RegState::Define)
719218885Sdim          .addReg(ARM::SP);
720218885Sdim      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
721218885Sdim      // that refactoring is complete (eventually).
722226890Sdim      if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
723218885Sdim        MIB.addReg(0);
724218885Sdim        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
725218885Sdim      } else
726218885Sdim        MIB.addImm(4);
727218885Sdim      AddDefaultPred(MIB);
728218885Sdim    }
729218885Sdim    Regs.clear();
730218885Sdim  }
731218885Sdim}
732218885Sdim
733235633Sdim/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
734235633Sdim/// starting from d8.  Also insert stack realignment code and leave the stack
735235633Sdim/// pointer pointing to the d8 spill slot.
736235633Sdimstatic void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
737235633Sdim                                    MachineBasicBlock::iterator MI,
738235633Sdim                                    unsigned NumAlignedDPRCS2Regs,
739235633Sdim                                    const std::vector<CalleeSavedInfo> &CSI,
740235633Sdim                                    const TargetRegisterInfo *TRI) {
741235633Sdim  MachineFunction &MF = *MBB.getParent();
742235633Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
743235633Sdim  DebugLoc DL = MI->getDebugLoc();
744235633Sdim  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
745235633Sdim  MachineFrameInfo &MFI = *MF.getFrameInfo();
746235633Sdim
747235633Sdim  // Mark the D-register spill slots as properly aligned.  Since MFI computes
748235633Sdim  // stack slot layout backwards, this can actually mean that the d-reg stack
749235633Sdim  // slot offsets can be wrong. The offset for d8 will always be correct.
750235633Sdim  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
751235633Sdim    unsigned DNum = CSI[i].getReg() - ARM::D8;
752235633Sdim    if (DNum >= 8)
753235633Sdim      continue;
754235633Sdim    int FI = CSI[i].getFrameIdx();
755235633Sdim    // The even-numbered registers will be 16-byte aligned, the odd-numbered
756235633Sdim    // registers will be 8-byte aligned.
757235633Sdim    MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
758235633Sdim
759235633Sdim    // The stack slot for D8 needs to be maximally aligned because this is
760235633Sdim    // actually the point where we align the stack pointer.  MachineFrameInfo
761235633Sdim    // computes all offsets relative to the incoming stack pointer which is a
762235633Sdim    // bit weird when realigning the stack.  Any extra padding for this
763235633Sdim    // over-alignment is not realized because the code inserted below adjusts
764235633Sdim    // the stack pointer by numregs * 8 before aligning the stack pointer.
765235633Sdim    if (DNum == 0)
766235633Sdim      MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
767235633Sdim  }
768235633Sdim
769235633Sdim  // Move the stack pointer to the d8 spill slot, and align it at the same
770235633Sdim  // time. Leave the stack slot address in the scratch register r4.
771235633Sdim  //
772235633Sdim  //   sub r4, sp, #numregs * 8
773235633Sdim  //   bic r4, r4, #align - 1
774235633Sdim  //   mov sp, r4
775235633Sdim  //
776235633Sdim  bool isThumb = AFI->isThumbFunction();
777235633Sdim  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
778235633Sdim  AFI->setShouldRestoreSPFromFP(true);
779235633Sdim
780235633Sdim  // sub r4, sp, #numregs * 8
781235633Sdim  // The immediate is <= 64, so it doesn't need any special encoding.
782235633Sdim  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
783235633Sdim  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
784235633Sdim                              .addReg(ARM::SP)
785235633Sdim                              .addImm(8 * NumAlignedDPRCS2Regs)));
786235633Sdim
787235633Sdim  // bic r4, r4, #align-1
788235633Sdim  Opc = isThumb ? ARM::t2BICri : ARM::BICri;
789235633Sdim  unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
790235633Sdim  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
791235633Sdim                              .addReg(ARM::R4, RegState::Kill)
792235633Sdim                              .addImm(MaxAlign - 1)));
793235633Sdim
794235633Sdim  // mov sp, r4
795235633Sdim  // The stack pointer must be adjusted before spilling anything, otherwise
796235633Sdim  // the stack slots could be clobbered by an interrupt handler.
797235633Sdim  // Leave r4 live, it is used below.
798235633Sdim  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
799235633Sdim  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
800235633Sdim                            .addReg(ARM::R4);
801235633Sdim  MIB = AddDefaultPred(MIB);
802235633Sdim  if (!isThumb)
803235633Sdim    AddDefaultCC(MIB);
804235633Sdim
805235633Sdim  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
806235633Sdim  // r4 holds the stack slot address.
807235633Sdim  unsigned NextReg = ARM::D8;
808235633Sdim
809235633Sdim  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
810235633Sdim  // The writeback is only needed when emitting two vst1.64 instructions.
811235633Sdim  if (NumAlignedDPRCS2Regs >= 6) {
812235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
813245431Sdim                                               &ARM::QQPRRegClass);
814235633Sdim    MBB.addLiveIn(SupReg);
815235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
816235633Sdim                           ARM::R4)
817235633Sdim                   .addReg(ARM::R4, RegState::Kill).addImm(16)
818235633Sdim                   .addReg(NextReg)
819235633Sdim                   .addReg(SupReg, RegState::ImplicitKill));
820235633Sdim    NextReg += 4;
821235633Sdim    NumAlignedDPRCS2Regs -= 4;
822235633Sdim  }
823235633Sdim
824235633Sdim  // We won't modify r4 beyond this point.  It currently points to the next
825235633Sdim  // register to be spilled.
826235633Sdim  unsigned R4BaseReg = NextReg;
827235633Sdim
828235633Sdim  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
829235633Sdim  if (NumAlignedDPRCS2Regs >= 4) {
830235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
831245431Sdim                                               &ARM::QQPRRegClass);
832235633Sdim    MBB.addLiveIn(SupReg);
833235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
834235633Sdim                   .addReg(ARM::R4).addImm(16).addReg(NextReg)
835235633Sdim                   .addReg(SupReg, RegState::ImplicitKill));
836235633Sdim    NextReg += 4;
837235633Sdim    NumAlignedDPRCS2Regs -= 4;
838235633Sdim  }
839235633Sdim
840235633Sdim  // 16-byte aligned vst1.64 with 2 d-regs.
841235633Sdim  if (NumAlignedDPRCS2Regs >= 2) {
842235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
843245431Sdim                                               &ARM::QPRRegClass);
844235633Sdim    MBB.addLiveIn(SupReg);
845235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
846235633Sdim                   .addReg(ARM::R4).addImm(16).addReg(SupReg));
847235633Sdim    NextReg += 2;
848235633Sdim    NumAlignedDPRCS2Regs -= 2;
849235633Sdim  }
850235633Sdim
851235633Sdim  // Finally, use a vanilla vstr.64 for the odd last register.
852235633Sdim  if (NumAlignedDPRCS2Regs) {
853235633Sdim    MBB.addLiveIn(NextReg);
854235633Sdim    // vstr.64 uses addrmode5 which has an offset scale of 4.
855235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
856235633Sdim                   .addReg(NextReg)
857235633Sdim                   .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
858235633Sdim  }
859235633Sdim
860235633Sdim  // The last spill instruction inserted should kill the scratch register r4.
861235633Sdim  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
862235633Sdim}
863235633Sdim
864235633Sdim/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
865235633Sdim/// iterator to the following instruction.
866235633Sdimstatic MachineBasicBlock::iterator
867235633SdimskipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
868235633Sdim                        unsigned NumAlignedDPRCS2Regs) {
869235633Sdim  //   sub r4, sp, #numregs * 8
870235633Sdim  //   bic r4, r4, #align - 1
871235633Sdim  //   mov sp, r4
872235633Sdim  ++MI; ++MI; ++MI;
873235633Sdim  assert(MI->mayStore() && "Expecting spill instruction");
874235633Sdim
875235633Sdim  // These switches all fall through.
876235633Sdim  switch(NumAlignedDPRCS2Regs) {
877235633Sdim  case 7:
878235633Sdim    ++MI;
879235633Sdim    assert(MI->mayStore() && "Expecting spill instruction");
880235633Sdim  default:
881235633Sdim    ++MI;
882235633Sdim    assert(MI->mayStore() && "Expecting spill instruction");
883235633Sdim  case 1:
884235633Sdim  case 2:
885235633Sdim  case 4:
886235633Sdim    assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
887235633Sdim    ++MI;
888235633Sdim  }
889235633Sdim  return MI;
890235633Sdim}
891235633Sdim
892235633Sdim/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
893235633Sdim/// starting from d8.  These instructions are assumed to execute while the
894235633Sdim/// stack is still aligned, unlike the code inserted by emitPopInst.
895235633Sdimstatic void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
896235633Sdim                                      MachineBasicBlock::iterator MI,
897235633Sdim                                      unsigned NumAlignedDPRCS2Regs,
898235633Sdim                                      const std::vector<CalleeSavedInfo> &CSI,
899235633Sdim                                      const TargetRegisterInfo *TRI) {
900235633Sdim  MachineFunction &MF = *MBB.getParent();
901235633Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
902235633Sdim  DebugLoc DL = MI->getDebugLoc();
903235633Sdim  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
904235633Sdim
905235633Sdim  // Find the frame index assigned to d8.
906235633Sdim  int D8SpillFI = 0;
907235633Sdim  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
908235633Sdim    if (CSI[i].getReg() == ARM::D8) {
909235633Sdim      D8SpillFI = CSI[i].getFrameIdx();
910235633Sdim      break;
911235633Sdim    }
912235633Sdim
913235633Sdim  // Materialize the address of the d8 spill slot into the scratch register r4.
914235633Sdim  // This can be fairly complicated if the stack frame is large, so just use
915235633Sdim  // the normal frame index elimination mechanism to do it.  This code runs as
916235633Sdim  // the initial part of the epilog where the stack and base pointers haven't
917235633Sdim  // been changed yet.
918235633Sdim  bool isThumb = AFI->isThumbFunction();
919235633Sdim  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
920235633Sdim
921235633Sdim  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
922235633Sdim  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
923235633Sdim                              .addFrameIndex(D8SpillFI).addImm(0)));
924235633Sdim
925235633Sdim  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
926235633Sdim  unsigned NextReg = ARM::D8;
927235633Sdim
928235633Sdim  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
929235633Sdim  if (NumAlignedDPRCS2Regs >= 6) {
930235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
931245431Sdim                                               &ARM::QQPRRegClass);
932235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
933235633Sdim                   .addReg(ARM::R4, RegState::Define)
934235633Sdim                   .addReg(ARM::R4, RegState::Kill).addImm(16)
935235633Sdim                   .addReg(SupReg, RegState::ImplicitDefine));
936235633Sdim    NextReg += 4;
937235633Sdim    NumAlignedDPRCS2Regs -= 4;
938235633Sdim  }
939235633Sdim
940235633Sdim  // We won't modify r4 beyond this point.  It currently points to the next
941235633Sdim  // register to be spilled.
942235633Sdim  unsigned R4BaseReg = NextReg;
943235633Sdim
944235633Sdim  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
945235633Sdim  if (NumAlignedDPRCS2Regs >= 4) {
946235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
947245431Sdim                                               &ARM::QQPRRegClass);
948235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
949235633Sdim                   .addReg(ARM::R4).addImm(16)
950235633Sdim                   .addReg(SupReg, RegState::ImplicitDefine));
951235633Sdim    NextReg += 4;
952235633Sdim    NumAlignedDPRCS2Regs -= 4;
953235633Sdim  }
954235633Sdim
955235633Sdim  // 16-byte aligned vld1.64 with 2 d-regs.
956235633Sdim  if (NumAlignedDPRCS2Regs >= 2) {
957235633Sdim    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
958245431Sdim                                               &ARM::QPRRegClass);
959235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
960235633Sdim                   .addReg(ARM::R4).addImm(16));
961235633Sdim    NextReg += 2;
962235633Sdim    NumAlignedDPRCS2Regs -= 2;
963235633Sdim  }
964235633Sdim
965235633Sdim  // Finally, use a vanilla vldr.64 for the remaining odd register.
966235633Sdim  if (NumAlignedDPRCS2Regs)
967235633Sdim    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
968235633Sdim                   .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
969235633Sdim
970235633Sdim  // Last store kills r4.
971235633Sdim  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
972235633Sdim}
973235633Sdim
974218885Sdimbool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
975218885Sdim                                        MachineBasicBlock::iterator MI,
976218885Sdim                                        const std::vector<CalleeSavedInfo> &CSI,
977218885Sdim                                        const TargetRegisterInfo *TRI) const {
978218885Sdim  if (CSI.empty())
979218885Sdim    return false;
980218885Sdim
981218885Sdim  MachineFunction &MF = *MBB.getParent();
982218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
983218885Sdim
984218885Sdim  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
985226890Sdim  unsigned PushOneOpc = AFI->isThumbFunction() ?
986226890Sdim    ARM::t2STR_PRE : ARM::STR_PRE_IMM;
987218885Sdim  unsigned FltOpc = ARM::VSTMDDB_UPD;
988235633Sdim  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
989235633Sdim  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
990221345Sdim               MachineInstr::FrameSetup);
991235633Sdim  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
992221345Sdim               MachineInstr::FrameSetup);
993221345Sdim  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
994235633Sdim               NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
995218885Sdim
996235633Sdim  // The code above does not insert spill code for the aligned DPRCS2 registers.
997235633Sdim  // The stack realignment code will be inserted between the push instructions
998235633Sdim  // and these spills.
999235633Sdim  if (NumAlignedDPRCS2Regs)
1000235633Sdim    emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1001235633Sdim
1002218885Sdim  return true;
1003218885Sdim}
1004218885Sdim
1005218885Sdimbool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
1006218885Sdim                                        MachineBasicBlock::iterator MI,
1007218885Sdim                                        const std::vector<CalleeSavedInfo> &CSI,
1008218885Sdim                                        const TargetRegisterInfo *TRI) const {
1009218885Sdim  if (CSI.empty())
1010218885Sdim    return false;
1011218885Sdim
1012218885Sdim  MachineFunction &MF = *MBB.getParent();
1013218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1014252723Sdim  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
1015235633Sdim  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1016218885Sdim
1017235633Sdim  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
1018235633Sdim  // registers. Do that here instead.
1019235633Sdim  if (NumAlignedDPRCS2Regs)
1020235633Sdim    emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
1021235633Sdim
1022218885Sdim  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
1023226890Sdim  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
1024218885Sdim  unsigned FltOpc = ARM::VLDMDIA_UPD;
1025235633Sdim  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
1026235633Sdim              NumAlignedDPRCS2Regs);
1027218885Sdim  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1028235633Sdim              &isARMArea2Register, 0);
1029218885Sdim  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
1030235633Sdim              &isARMArea1Register, 0);
1031218885Sdim
1032218885Sdim  return true;
1033218885Sdim}
1034218885Sdim
1035218885Sdim// FIXME: Make generic?
1036218885Sdimstatic unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
1037218885Sdim                                       const ARMBaseInstrInfo &TII) {
1038218885Sdim  unsigned FnSize = 0;
1039218885Sdim  for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
1040218885Sdim       MBBI != E; ++MBBI) {
1041218885Sdim    const MachineBasicBlock &MBB = *MBBI;
1042218885Sdim    for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
1043218885Sdim         I != E; ++I)
1044218885Sdim      FnSize += TII.GetInstSizeInBytes(I);
1045218885Sdim  }
1046218885Sdim  return FnSize;
1047218885Sdim}
1048218885Sdim
1049218885Sdim/// estimateRSStackSizeLimit - Look at each instruction that references stack
1050218885Sdim/// frames and return the stack size limit beyond which some of these
1051218885Sdim/// instructions will require a scratch register during their expansion later.
1052218885Sdim// FIXME: Move to TII?
1053218885Sdimstatic unsigned estimateRSStackSizeLimit(MachineFunction &MF,
1054218885Sdim                                         const TargetFrameLowering *TFI) {
1055218885Sdim  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1056218885Sdim  unsigned Limit = (1 << 12) - 1;
1057218885Sdim  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
1058218885Sdim    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
1059218885Sdim         I != E; ++I) {
1060218885Sdim      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
1061218885Sdim        if (!I->getOperand(i).isFI()) continue;
1062218885Sdim
1063218885Sdim        // When using ADDri to get the address of a stack object, 255 is the
1064218885Sdim        // largest offset guaranteed to fit in the immediate offset.
1065218885Sdim        if (I->getOpcode() == ARM::ADDri) {
1066218885Sdim          Limit = std::min(Limit, (1U << 8) - 1);
1067218885Sdim          break;
1068218885Sdim        }
1069218885Sdim
1070218885Sdim        // Otherwise check the addressing mode.
1071218885Sdim        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
1072218885Sdim        case ARMII::AddrMode3:
1073218885Sdim        case ARMII::AddrModeT2_i8:
1074218885Sdim          Limit = std::min(Limit, (1U << 8) - 1);
1075218885Sdim          break;
1076218885Sdim        case ARMII::AddrMode5:
1077218885Sdim        case ARMII::AddrModeT2_i8s4:
1078218885Sdim          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
1079218885Sdim          break;
1080218885Sdim        case ARMII::AddrModeT2_i12:
1081218885Sdim          // i12 supports only positive offset so these will be converted to
1082218885Sdim          // i8 opcodes. See llvm::rewriteT2FrameIndex.
1083218885Sdim          if (TFI->hasFP(MF) && AFI->hasStackFrame())
1084218885Sdim            Limit = std::min(Limit, (1U << 8) - 1);
1085218885Sdim          break;
1086218885Sdim        case ARMII::AddrMode4:
1087218885Sdim        case ARMII::AddrMode6:
1088218885Sdim          // Addressing modes 4 & 6 (load/store) instructions can't encode an
1089218885Sdim          // immediate offset for stack references.
1090218885Sdim          return 0;
1091218885Sdim        default:
1092218885Sdim          break;
1093218885Sdim        }
1094218885Sdim        break; // At most one FI per instruction
1095218885Sdim      }
1096218885Sdim    }
1097218885Sdim  }
1098218885Sdim
1099218885Sdim  return Limit;
1100218885Sdim}
1101218885Sdim
1102235633Sdim// In functions that realign the stack, it can be an advantage to spill the
1103235633Sdim// callee-saved vector registers after realigning the stack. The vst1 and vld1
1104235633Sdim// instructions take alignment hints that can improve performance.
1105235633Sdim//
1106235633Sdimstatic void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
1107235633Sdim  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
1108235633Sdim  if (!SpillAlignedNEONRegs)
1109235633Sdim    return;
1110235633Sdim
1111235633Sdim  // Naked functions don't spill callee-saved registers.
1112252723Sdim  if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
1113252723Sdim                                                     Attribute::Naked))
1114235633Sdim    return;
1115235633Sdim
1116235633Sdim  // We are planning to use NEON instructions vst1 / vld1.
1117235633Sdim  if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
1118235633Sdim    return;
1119235633Sdim
1120235633Sdim  // Don't bother if the default stack alignment is sufficiently high.
1121235633Sdim  if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
1122235633Sdim    return;
1123235633Sdim
1124235633Sdim  // Aligned spills require stack realignment.
1125235633Sdim  const ARMBaseRegisterInfo *RegInfo =
1126235633Sdim    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
1127235633Sdim  if (!RegInfo->canRealignStack(MF))
1128235633Sdim    return;
1129235633Sdim
1130235633Sdim  // We always spill contiguous d-registers starting from d8. Count how many
1131235633Sdim  // needs spilling.  The register allocator will almost always use the
1132235633Sdim  // callee-saved registers in order, but it can happen that there are holes in
1133235633Sdim  // the range.  Registers above the hole will be spilled to the standard DPRCS
1134235633Sdim  // area.
1135235633Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1136235633Sdim  unsigned NumSpills = 0;
1137235633Sdim  for (; NumSpills < 8; ++NumSpills)
1138245431Sdim    if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
1139235633Sdim      break;
1140235633Sdim
1141235633Sdim  // Don't do this for just one d-register. It's not worth it.
1142235633Sdim  if (NumSpills < 2)
1143235633Sdim    return;
1144235633Sdim
1145235633Sdim  // Spill the first NumSpills D-registers after realigning the stack.
1146235633Sdim  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
1147235633Sdim
1148235633Sdim  // A scratch register is required for the vst1 / vld1 instructions.
1149235633Sdim  MF.getRegInfo().setPhysRegUsed(ARM::R4);
1150235633Sdim}
1151235633Sdim
1152218885Sdimvoid
1153218885SdimARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
1154218885Sdim                                                       RegScavenger *RS) const {
1155218885Sdim  // This tells PEI to spill the FP as if it is any other callee-save register
1156218885Sdim  // to take advantage the eliminateFrameIndex machinery. This also ensures it
1157218885Sdim  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
1158218885Sdim  // to combine multiple loads / stores.
1159218885Sdim  bool CanEliminateFrame = true;
1160218885Sdim  bool CS1Spilled = false;
1161218885Sdim  bool LRSpilled = false;
1162218885Sdim  unsigned NumGPRSpills = 0;
1163218885Sdim  SmallVector<unsigned, 4> UnspilledCS1GPRs;
1164218885Sdim  SmallVector<unsigned, 4> UnspilledCS2GPRs;
1165218885Sdim  const ARMBaseRegisterInfo *RegInfo =
1166218885Sdim    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
1167218885Sdim  const ARMBaseInstrInfo &TII =
1168218885Sdim    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
1169218885Sdim  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1170218885Sdim  MachineFrameInfo *MFI = MF.getFrameInfo();
1171245431Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1172218885Sdim  unsigned FramePtr = RegInfo->getFrameRegister(MF);
1173218885Sdim
1174218885Sdim  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
1175218885Sdim  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
1176218885Sdim  // since it's not always possible to restore sp from fp in a single
1177218885Sdim  // instruction.
1178218885Sdim  // FIXME: It will be better just to find spare register here.
1179218885Sdim  if (AFI->isThumb2Function() &&
1180218885Sdim      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
1181245431Sdim    MRI.setPhysRegUsed(ARM::R4);
1182218885Sdim
1183218885Sdim  if (AFI->isThumb1OnlyFunction()) {
1184218885Sdim    // Spill LR if Thumb1 function uses variable length argument lists.
1185252723Sdim    if (AFI->getArgRegsSaveSize() > 0)
1186245431Sdim      MRI.setPhysRegUsed(ARM::LR);
1187218885Sdim
1188224145Sdim    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
1189224145Sdim    // for sure what the stack size will be, but for this, an estimate is good
1190224145Sdim    // enough. If there anything changes it, it'll be a spill, which implies
1191224145Sdim    // we've used all the registers and so R4 is already used, so not marking
1192224145Sdim    // it here will be OK.
1193218885Sdim    // FIXME: It will be better just to find spare register here.
1194252723Sdim    unsigned StackSize = MFI->estimateStackSize(MF);
1195224145Sdim    if (MFI->hasVarSizedObjects() || StackSize > 508)
1196245431Sdim      MRI.setPhysRegUsed(ARM::R4);
1197218885Sdim  }
1198218885Sdim
1199235633Sdim  // See if we can spill vector registers to aligned stack.
1200235633Sdim  checkNumAlignedDPRCS2Regs(MF);
1201235633Sdim
1202218885Sdim  // Spill the BasePtr if it's used.
1203218885Sdim  if (RegInfo->hasBasePointer(MF))
1204245431Sdim    MRI.setPhysRegUsed(RegInfo->getBaseRegister());
1205218885Sdim
1206218885Sdim  // Don't spill FP if the frame can be eliminated. This is determined
1207218885Sdim  // by scanning the callee-save registers to see if any is used.
1208263509Sdim  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1209218885Sdim  for (unsigned i = 0; CSRegs[i]; ++i) {
1210218885Sdim    unsigned Reg = CSRegs[i];
1211218885Sdim    bool Spilled = false;
1212245431Sdim    if (MRI.isPhysRegUsed(Reg)) {
1213218885Sdim      Spilled = true;
1214218885Sdim      CanEliminateFrame = false;
1215218885Sdim    }
1216218885Sdim
1217245431Sdim    if (!ARM::GPRRegClass.contains(Reg))
1218218885Sdim      continue;
1219218885Sdim
1220218885Sdim    if (Spilled) {
1221218885Sdim      NumGPRSpills++;
1222218885Sdim
1223235633Sdim      if (!STI.isTargetIOS()) {
1224218885Sdim        if (Reg == ARM::LR)
1225218885Sdim          LRSpilled = true;
1226218885Sdim        CS1Spilled = true;
1227218885Sdim        continue;
1228218885Sdim      }
1229218885Sdim
1230218885Sdim      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
1231218885Sdim      switch (Reg) {
1232218885Sdim      case ARM::LR:
1233218885Sdim        LRSpilled = true;
1234218885Sdim        // Fallthrough
1235263509Sdim      case ARM::R0: case ARM::R1:
1236263509Sdim      case ARM::R2: case ARM::R3:
1237218885Sdim      case ARM::R4: case ARM::R5:
1238218885Sdim      case ARM::R6: case ARM::R7:
1239218885Sdim        CS1Spilled = true;
1240218885Sdim        break;
1241218885Sdim      default:
1242218885Sdim        break;
1243218885Sdim      }
1244218885Sdim    } else {
1245235633Sdim      if (!STI.isTargetIOS()) {
1246218885Sdim        UnspilledCS1GPRs.push_back(Reg);
1247218885Sdim        continue;
1248218885Sdim      }
1249218885Sdim
1250218885Sdim      switch (Reg) {
1251263509Sdim      case ARM::R0: case ARM::R1:
1252263509Sdim      case ARM::R2: case ARM::R3:
1253218885Sdim      case ARM::R4: case ARM::R5:
1254218885Sdim      case ARM::R6: case ARM::R7:
1255218885Sdim      case ARM::LR:
1256218885Sdim        UnspilledCS1GPRs.push_back(Reg);
1257218885Sdim        break;
1258218885Sdim      default:
1259218885Sdim        UnspilledCS2GPRs.push_back(Reg);
1260218885Sdim        break;
1261218885Sdim      }
1262218885Sdim    }
1263218885Sdim  }
1264218885Sdim
1265218885Sdim  bool ForceLRSpill = false;
1266218885Sdim  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
1267218885Sdim    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
1268218885Sdim    // Force LR to be spilled if the Thumb function size is > 2048. This enables
1269218885Sdim    // use of BL to implement far jump. If it turns out that it's not needed
1270218885Sdim    // then the branch fix up path will undo it.
1271218885Sdim    if (FnSize >= (1 << 11)) {
1272218885Sdim      CanEliminateFrame = false;
1273218885Sdim      ForceLRSpill = true;
1274218885Sdim    }
1275218885Sdim  }
1276218885Sdim
1277218885Sdim  // If any of the stack slot references may be out of range of an immediate
1278218885Sdim  // offset, make sure a register (or a spill slot) is available for the
1279218885Sdim  // register scavenger. Note that if we're indexing off the frame pointer, the
1280218885Sdim  // effective stack size is 4 bytes larger since the FP points to the stack
1281218885Sdim  // slot of the previous FP. Also, if we have variable sized objects in the
1282218885Sdim  // function, stack slot references will often be negative, and some of
1283218885Sdim  // our instructions are positive-offset only, so conservatively consider
1284218885Sdim  // that case to want a spill slot (or register) as well. Similarly, if
1285218885Sdim  // the function adjusts the stack pointer during execution and the
1286218885Sdim  // adjustments aren't already part of our stack size estimate, our offset
1287218885Sdim  // calculations may be off, so be conservative.
1288218885Sdim  // FIXME: We could add logic to be more precise about negative offsets
1289218885Sdim  //        and which instructions will need a scratch register for them. Is it
1290218885Sdim  //        worth the effort and added fragility?
1291218885Sdim  bool BigStack =
1292218885Sdim    (RS &&
1293252723Sdim     (MFI->estimateStackSize(MF) +
1294252723Sdim      ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
1295218885Sdim      estimateRSStackSizeLimit(MF, this)))
1296218885Sdim    || MFI->hasVarSizedObjects()
1297218885Sdim    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
1298218885Sdim
1299218885Sdim  bool ExtraCSSpill = false;
1300218885Sdim  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
1301218885Sdim    AFI->setHasStackFrame(true);
1302218885Sdim
1303218885Sdim    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
1304218885Sdim    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
1305218885Sdim    if (!LRSpilled && CS1Spilled) {
1306245431Sdim      MRI.setPhysRegUsed(ARM::LR);
1307218885Sdim      NumGPRSpills++;
1308263509Sdim      SmallVectorImpl<unsigned>::iterator LRPos;
1309263509Sdim      LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
1310263509Sdim                        (unsigned)ARM::LR);
1311263509Sdim      if (LRPos != UnspilledCS1GPRs.end())
1312263509Sdim        UnspilledCS1GPRs.erase(LRPos);
1313263509Sdim
1314218885Sdim      ForceLRSpill = false;
1315218885Sdim      ExtraCSSpill = true;
1316218885Sdim    }
1317218885Sdim
1318218885Sdim    if (hasFP(MF)) {
1319245431Sdim      MRI.setPhysRegUsed(FramePtr);
1320218885Sdim      NumGPRSpills++;
1321218885Sdim    }
1322218885Sdim
1323218885Sdim    // If stack and double are 8-byte aligned and we are spilling an odd number
1324218885Sdim    // of GPRs, spill one extra callee save GPR so we won't have to pad between
1325218885Sdim    // the integer and double callee save areas.
1326218885Sdim    unsigned TargetAlign = getStackAlignment();
1327218885Sdim    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
1328218885Sdim      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
1329218885Sdim        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
1330218885Sdim          unsigned Reg = UnspilledCS1GPRs[i];
1331218885Sdim          // Don't spill high register if the function is thumb1
1332218885Sdim          if (!AFI->isThumb1OnlyFunction() ||
1333218885Sdim              isARMLowRegister(Reg) || Reg == ARM::LR) {
1334245431Sdim            MRI.setPhysRegUsed(Reg);
1335245431Sdim            if (!MRI.isReserved(Reg))
1336218885Sdim              ExtraCSSpill = true;
1337218885Sdim            break;
1338218885Sdim          }
1339218885Sdim        }
1340218885Sdim      } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
1341218885Sdim        unsigned Reg = UnspilledCS2GPRs.front();
1342245431Sdim        MRI.setPhysRegUsed(Reg);
1343245431Sdim        if (!MRI.isReserved(Reg))
1344218885Sdim          ExtraCSSpill = true;
1345218885Sdim      }
1346218885Sdim    }
1347218885Sdim
1348218885Sdim    // Estimate if we might need to scavenge a register at some point in order
1349218885Sdim    // to materialize a stack offset. If so, either spill one additional
1350218885Sdim    // callee-saved register or reserve a special spill slot to facilitate
1351218885Sdim    // register scavenging. Thumb1 needs a spill slot for stack pointer
1352218885Sdim    // adjustments also, even when the frame itself is small.
1353218885Sdim    if (BigStack && !ExtraCSSpill) {
1354218885Sdim      // If any non-reserved CS register isn't spilled, just spill one or two
1355218885Sdim      // extra. That should take care of it!
1356218885Sdim      unsigned NumExtras = TargetAlign / 4;
1357218885Sdim      SmallVector<unsigned, 2> Extras;
1358218885Sdim      while (NumExtras && !UnspilledCS1GPRs.empty()) {
1359218885Sdim        unsigned Reg = UnspilledCS1GPRs.back();
1360218885Sdim        UnspilledCS1GPRs.pop_back();
1361245431Sdim        if (!MRI.isReserved(Reg) &&
1362218885Sdim            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
1363218885Sdim             Reg == ARM::LR)) {
1364218885Sdim          Extras.push_back(Reg);
1365218885Sdim          NumExtras--;
1366218885Sdim        }
1367218885Sdim      }
1368218885Sdim      // For non-Thumb1 functions, also check for hi-reg CS registers
1369218885Sdim      if (!AFI->isThumb1OnlyFunction()) {
1370218885Sdim        while (NumExtras && !UnspilledCS2GPRs.empty()) {
1371218885Sdim          unsigned Reg = UnspilledCS2GPRs.back();
1372218885Sdim          UnspilledCS2GPRs.pop_back();
1373245431Sdim          if (!MRI.isReserved(Reg)) {
1374218885Sdim            Extras.push_back(Reg);
1375218885Sdim            NumExtras--;
1376218885Sdim          }
1377218885Sdim        }
1378218885Sdim      }
1379218885Sdim      if (Extras.size() && NumExtras == 0) {
1380218885Sdim        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
1381245431Sdim          MRI.setPhysRegUsed(Extras[i]);
1382218885Sdim        }
1383218885Sdim      } else if (!AFI->isThumb1OnlyFunction()) {
1384218885Sdim        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
1385218885Sdim        // closest to SP or frame pointer.
1386245431Sdim        const TargetRegisterClass *RC = &ARM::GPRRegClass;
1387252723Sdim        RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
1388218885Sdim                                                           RC->getAlignment(),
1389218885Sdim                                                           false));
1390218885Sdim      }
1391218885Sdim    }
1392218885Sdim  }
1393218885Sdim
1394218885Sdim  if (ForceLRSpill) {
1395245431Sdim    MRI.setPhysRegUsed(ARM::LR);
1396218885Sdim    AFI->setLRIsSpilledForFarJump(true);
1397218885Sdim  }
1398218885Sdim}
1399252723Sdim
1400252723Sdim
1401252723Sdimvoid ARMFrameLowering::
1402252723SdimeliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
1403252723Sdim                              MachineBasicBlock::iterator I) const {
1404252723Sdim  const ARMBaseInstrInfo &TII =
1405252723Sdim    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
1406252723Sdim  if (!hasReservedCallFrame(MF)) {
1407252723Sdim    // If we have alloca, convert as follows:
1408252723Sdim    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
1409252723Sdim    // ADJCALLSTACKUP   -> add, sp, sp, amount
1410252723Sdim    MachineInstr *Old = I;
1411252723Sdim    DebugLoc dl = Old->getDebugLoc();
1412252723Sdim    unsigned Amount = Old->getOperand(0).getImm();
1413252723Sdim    if (Amount != 0) {
1414252723Sdim      // We need to keep the stack aligned properly.  To do this, we round the
1415252723Sdim      // amount of space needed for the outgoing arguments up to the next
1416252723Sdim      // alignment boundary.
1417252723Sdim      unsigned Align = getStackAlignment();
1418252723Sdim      Amount = (Amount+Align-1)/Align*Align;
1419252723Sdim
1420252723Sdim      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1421252723Sdim      assert(!AFI->isThumb1OnlyFunction() &&
1422252723Sdim             "This eliminateCallFramePseudoInstr does not support Thumb1!");
1423252723Sdim      bool isARM = !AFI->isThumbFunction();
1424252723Sdim
1425252723Sdim      // Replace the pseudo instruction with a new instruction...
1426252723Sdim      unsigned Opc = Old->getOpcode();
1427252723Sdim      int PIdx = Old->findFirstPredOperandIdx();
1428252723Sdim      ARMCC::CondCodes Pred = (PIdx == -1)
1429252723Sdim        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
1430252723Sdim      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
1431252723Sdim        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
1432252723Sdim        unsigned PredReg = Old->getOperand(2).getReg();
1433252723Sdim        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
1434252723Sdim                     Pred, PredReg);
1435252723Sdim      } else {
1436252723Sdim        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
1437252723Sdim        unsigned PredReg = Old->getOperand(3).getReg();
1438252723Sdim        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
1439252723Sdim        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
1440252723Sdim                     Pred, PredReg);
1441252723Sdim      }
1442252723Sdim    }
1443252723Sdim  }
1444252723Sdim  MBB.erase(I);
1445252723Sdim}
1446252723Sdim
1447