1235633Sdim//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// 2218885Sdim// 3218885Sdim// The LLVM Compiler Infrastructure 4218885Sdim// 5218885Sdim// This file is distributed under the University of Illinois Open Source 6218885Sdim// License. See LICENSE.TXT for details. 7218885Sdim// 8218885Sdim//===----------------------------------------------------------------------===// 9218885Sdim// 10218885Sdim// This file contains the ARM implementation of TargetFrameLowering class. 11218885Sdim// 12218885Sdim//===----------------------------------------------------------------------===// 13218885Sdim 14218885Sdim#include "ARMFrameLowering.h" 15218885Sdim#include "ARMBaseInstrInfo.h" 16218885Sdim#include "ARMBaseRegisterInfo.h" 17218885Sdim#include "ARMMachineFunctionInfo.h" 18235633Sdim#include "MCTargetDesc/ARMAddressingModes.h" 19218885Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 20218885Sdim#include "llvm/CodeGen/MachineFunction.h" 21218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 22218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 23218885Sdim#include "llvm/CodeGen/RegisterScavenging.h" 24252723Sdim#include "llvm/IR/CallingConv.h" 25252723Sdim#include "llvm/IR/Function.h" 26252723Sdim#include "llvm/Support/CommandLine.h" 27218885Sdim#include "llvm/Target/TargetOptions.h" 28218885Sdim 29218885Sdimusing namespace llvm; 30218885Sdim 31235633Sdimstatic cl::opt<bool> 32235633SdimSpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), 33235633Sdim cl::desc("Align ARM NEON spills in prolog and epilog")); 34235633Sdim 35235633Sdimstatic MachineBasicBlock::iterator 36235633SdimskipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 37235633Sdim unsigned NumAlignedDPRCS2Regs); 38235633Sdim 39218885Sdim/// hasFP - Return true if the specified function should have a dedicated frame 40218885Sdim/// pointer register. This is true if the function has variable sized allocas 41218885Sdim/// or if frame pointer elimination is disabled. 42218885Sdimbool ARMFrameLowering::hasFP(const MachineFunction &MF) const { 43218885Sdim const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 44218885Sdim 45235633Sdim // iOS requires FP not to be clobbered for backtracing purpose. 46235633Sdim if (STI.isTargetIOS()) 47218885Sdim return true; 48218885Sdim 49218885Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 50218885Sdim // Always eliminate non-leaf frame pointers. 51235633Sdim return ((MF.getTarget().Options.DisableFramePointerElim(MF) && 52235633Sdim MFI->hasCalls()) || 53218885Sdim RegInfo->needsStackRealignment(MF) || 54218885Sdim MFI->hasVarSizedObjects() || 55218885Sdim MFI->isFrameAddressTaken()); 56218885Sdim} 57218885Sdim 58218885Sdim/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 59218885Sdim/// not required, we reserve argument space for call sites in the function 60218885Sdim/// immediately on entry to the current function. This eliminates the need for 61218885Sdim/// add/sub sp brackets around call sites. Returns true if the call frame is 62218885Sdim/// included as part of the stack frame. 63218885Sdimbool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 64218885Sdim const MachineFrameInfo *FFI = MF.getFrameInfo(); 65218885Sdim unsigned CFSize = FFI->getMaxCallFrameSize(); 66218885Sdim // It's not always a good idea to include the call frame as part of the 67218885Sdim // stack frame. ARM (especially Thumb) has small immediate offset to 68218885Sdim // address the stack frame. So a large call frame can cause poor codegen 69218885Sdim // and may even makes it impossible to scavenge a register. 70218885Sdim if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 71218885Sdim return false; 72218885Sdim 73218885Sdim return !MF.getFrameInfo()->hasVarSizedObjects(); 74218885Sdim} 75218885Sdim 76218885Sdim/// canSimplifyCallFramePseudos - If there is a reserved call frame, the 77218885Sdim/// call frame pseudos can be simplified. Unlike most targets, having a FP 78218885Sdim/// is not sufficient here since we still may reference some objects via SP 79218885Sdim/// even when FP is available in Thumb2 mode. 80218885Sdimbool 81218885SdimARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 82218885Sdim return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); 83218885Sdim} 84218885Sdim 85218885Sdimstatic bool isCSRestore(MachineInstr *MI, 86218885Sdim const ARMBaseInstrInfo &TII, 87235633Sdim const uint16_t *CSRegs) { 88218885Sdim // Integer spill area is handled with "pop". 89263509Sdim if (isPopOpcode(MI->getOpcode())) { 90218885Sdim // The first two operands are predicates. The last two are 91218885Sdim // imp-def and imp-use of SP. Check everything in between. 92218885Sdim for (int i = 5, e = MI->getNumOperands(); i != e; ++i) 93218885Sdim if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) 94218885Sdim return false; 95218885Sdim return true; 96218885Sdim } 97226890Sdim if ((MI->getOpcode() == ARM::LDR_POST_IMM || 98226890Sdim MI->getOpcode() == ARM::LDR_POST_REG || 99218885Sdim MI->getOpcode() == ARM::t2LDR_POST) && 100218885Sdim isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && 101218885Sdim MI->getOperand(1).getReg() == ARM::SP) 102218885Sdim return true; 103218885Sdim 104218885Sdim return false; 105218885Sdim} 106218885Sdim 107263509Sdimstatic void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, 108263509Sdim MachineBasicBlock::iterator &MBBI, DebugLoc dl, 109263509Sdim const ARMBaseInstrInfo &TII, unsigned DestReg, 110263509Sdim unsigned SrcReg, int NumBytes, 111263509Sdim unsigned MIFlags = MachineInstr::NoFlags, 112263509Sdim ARMCC::CondCodes Pred = ARMCC::AL, 113263509Sdim unsigned PredReg = 0) { 114218885Sdim if (isARM) 115263509Sdim emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, 116252723Sdim Pred, PredReg, TII, MIFlags); 117218885Sdim else 118263509Sdim emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, 119252723Sdim Pred, PredReg, TII, MIFlags); 120218885Sdim} 121218885Sdim 122263509Sdimstatic void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, 123263509Sdim MachineBasicBlock::iterator &MBBI, DebugLoc dl, 124263509Sdim const ARMBaseInstrInfo &TII, int NumBytes, 125263509Sdim unsigned MIFlags = MachineInstr::NoFlags, 126263509Sdim ARMCC::CondCodes Pred = ARMCC::AL, 127263509Sdim unsigned PredReg = 0) { 128263509Sdim emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, 129263509Sdim MIFlags, Pred, PredReg); 130263509Sdim} 131263509Sdim 132218885Sdimvoid ARMFrameLowering::emitPrologue(MachineFunction &MF) const { 133218885Sdim MachineBasicBlock &MBB = MF.front(); 134218885Sdim MachineBasicBlock::iterator MBBI = MBB.begin(); 135218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 136218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 137218885Sdim const ARMBaseRegisterInfo *RegInfo = 138218885Sdim static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 139218885Sdim const ARMBaseInstrInfo &TII = 140218885Sdim *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 141218885Sdim assert(!AFI->isThumb1OnlyFunction() && 142218885Sdim "This emitPrologue does not support Thumb1!"); 143218885Sdim bool isARM = !AFI->isThumbFunction(); 144263509Sdim unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 145263509Sdim unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); 146218885Sdim unsigned NumBytes = MFI->getStackSize(); 147218885Sdim const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 148218885Sdim DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 149218885Sdim unsigned FramePtr = RegInfo->getFrameRegister(MF); 150218885Sdim 151218885Sdim // Determine the sizes of each callee-save spill areas and record which frame 152218885Sdim // belongs to which callee-save spill areas. 153218885Sdim unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 154218885Sdim int FramePtrSpillFI = 0; 155235633Sdim int D8SpillFI = 0; 156218885Sdim 157245431Sdim // All calls are tail calls in GHC calling conv, and functions have no 158245431Sdim // prologue/epilogue. 159245431Sdim if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 160245431Sdim return; 161245431Sdim 162218885Sdim // Allocate the vararg register save area. This is not counted in NumBytes. 163252723Sdim if (ArgRegsSaveSize) 164252723Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, 165221345Sdim MachineInstr::FrameSetup); 166218885Sdim 167218885Sdim if (!AFI->hasStackFrame()) { 168218885Sdim if (NumBytes != 0) 169221345Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, 170221345Sdim MachineInstr::FrameSetup); 171218885Sdim return; 172218885Sdim } 173218885Sdim 174218885Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 175218885Sdim unsigned Reg = CSI[i].getReg(); 176218885Sdim int FI = CSI[i].getFrameIdx(); 177218885Sdim switch (Reg) { 178263509Sdim case ARM::R0: 179263509Sdim case ARM::R1: 180263509Sdim case ARM::R2: 181263509Sdim case ARM::R3: 182218885Sdim case ARM::R4: 183218885Sdim case ARM::R5: 184218885Sdim case ARM::R6: 185218885Sdim case ARM::R7: 186218885Sdim case ARM::LR: 187218885Sdim if (Reg == FramePtr) 188218885Sdim FramePtrSpillFI = FI; 189218885Sdim GPRCS1Size += 4; 190218885Sdim break; 191218885Sdim case ARM::R8: 192218885Sdim case ARM::R9: 193218885Sdim case ARM::R10: 194218885Sdim case ARM::R11: 195263509Sdim case ARM::R12: 196218885Sdim if (Reg == FramePtr) 197218885Sdim FramePtrSpillFI = FI; 198263509Sdim if (STI.isTargetIOS()) 199218885Sdim GPRCS2Size += 4; 200263509Sdim else 201218885Sdim GPRCS1Size += 4; 202218885Sdim break; 203218885Sdim default: 204235633Sdim // This is a DPR. Exclude the aligned DPRCS2 spills. 205235633Sdim if (Reg == ARM::D8) 206235633Sdim D8SpillFI = FI; 207263509Sdim if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) 208235633Sdim DPRCSSize += 8; 209218885Sdim } 210218885Sdim } 211218885Sdim 212218885Sdim // Move past area 1. 213263509Sdim MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush; 214263509Sdim if (GPRCS1Size > 0) 215263509Sdim FramePtrPush = LastPush = MBBI++; 216218885Sdim 217263509Sdim // Determine starting offsets of spill areas. 218218885Sdim bool HasFP = hasFP(MF); 219218885Sdim unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); 220218885Sdim unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 221218885Sdim unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 222263509Sdim int FramePtrOffsetInPush = 0; 223263509Sdim if (HasFP) { 224263509Sdim FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; 225218885Sdim AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + 226218885Sdim NumBytes); 227263509Sdim } 228218885Sdim AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 229218885Sdim AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 230218885Sdim AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 231218885Sdim 232263509Sdim // Move past area 2. 233263509Sdim if (GPRCS2Size > 0) { 234263509Sdim LastPush = MBBI++; 235263509Sdim } 236263509Sdim 237218885Sdim // Move past area 3. 238219077Sdim if (DPRCSSize > 0) { 239263509Sdim LastPush = MBBI++; 240219077Sdim // Since vpush register list cannot have gaps, there may be multiple vpush 241219077Sdim // instructions in the prologue. 242219077Sdim while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) 243263509Sdim LastPush = MBBI++; 244219077Sdim } 245218885Sdim 246235633Sdim // Move past the aligned DPRCS2 area. 247235633Sdim if (AFI->getNumAlignedDPRCS2Regs() > 0) { 248235633Sdim MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); 249235633Sdim // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and 250235633Sdim // leaves the stack pointer pointing to the DPRCS2 area. 251235633Sdim // 252235633Sdim // Adjust NumBytes to represent the stack slots below the DPRCS2 area. 253235633Sdim NumBytes += MFI->getObjectOffset(D8SpillFI); 254235633Sdim } else 255235633Sdim NumBytes = DPRCSOffset; 256235633Sdim 257218885Sdim if (NumBytes) { 258218885Sdim // Adjust SP after all the callee-save spills. 259263509Sdim if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) { 260263509Sdim if (LastPush == FramePtrPush) 261263509Sdim FramePtrOffsetInPush += NumBytes; 262263509Sdim } else 263263509Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, 264263509Sdim MachineInstr::FrameSetup); 265263509Sdim 266218885Sdim if (HasFP && isARM) 267218885Sdim // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 268218885Sdim // Note it's not safe to do this in Thumb2 mode because it would have 269218885Sdim // taken two instructions: 270218885Sdim // mov sp, r7 271218885Sdim // sub sp, #24 272218885Sdim // If an interrupt is taken between the two instructions, then sp is in 273218885Sdim // an inconsistent state (pointing to the middle of callee-saved area). 274218885Sdim // The interrupt handler can end up clobbering the registers. 275218885Sdim AFI->setShouldRestoreSPFromFP(true); 276218885Sdim } 277218885Sdim 278263509Sdim // Set FP to point to the stack slot that contains the previous FP. 279263509Sdim // For iOS, FP is R7, which has now been stored in spill area 1. 280263509Sdim // Otherwise, if this is not iOS, all the callee-saved registers go 281263509Sdim // into spill area 1, including the FP in R11. In either case, it 282263509Sdim // is in area one and the adjustment needs to take place just after 283263509Sdim // that push. 284263509Sdim if (HasFP) 285263509Sdim emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII, 286263509Sdim FramePtr, ARM::SP, FramePtrOffsetInPush, 287263509Sdim MachineInstr::FrameSetup); 288263509Sdim 289263509Sdim 290218885Sdim if (STI.isTargetELF() && hasFP(MF)) 291218885Sdim MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - 292218885Sdim AFI->getFramePtrSpillOffset()); 293218885Sdim 294218885Sdim AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 295218885Sdim AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 296218885Sdim AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 297218885Sdim 298218885Sdim // If we need dynamic stack realignment, do it here. Be paranoid and make 299218885Sdim // sure if we also have VLAs, we have a base pointer for frame access. 300235633Sdim // If aligned NEON registers were spilled, the stack has already been 301235633Sdim // realigned. 302235633Sdim if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { 303218885Sdim unsigned MaxAlign = MFI->getMaxAlignment(); 304218885Sdim assert (!AFI->isThumb1OnlyFunction()); 305218885Sdim if (!AFI->isThumbFunction()) { 306218885Sdim // Emit bic sp, sp, MaxAlign 307218885Sdim AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 308218885Sdim TII.get(ARM::BICri), ARM::SP) 309218885Sdim .addReg(ARM::SP, RegState::Kill) 310218885Sdim .addImm(MaxAlign-1))); 311218885Sdim } else { 312218885Sdim // We cannot use sp as source/dest register here, thus we're emitting the 313218885Sdim // following sequence: 314218885Sdim // mov r4, sp 315218885Sdim // bic r4, r4, MaxAlign 316218885Sdim // mov sp, r4 317218885Sdim // FIXME: It will be better just to find spare register here. 318224145Sdim AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 319224145Sdim .addReg(ARM::SP, RegState::Kill)); 320218885Sdim AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 321218885Sdim TII.get(ARM::t2BICri), ARM::R4) 322218885Sdim .addReg(ARM::R4, RegState::Kill) 323218885Sdim .addImm(MaxAlign-1))); 324224145Sdim AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 325224145Sdim .addReg(ARM::R4, RegState::Kill)); 326218885Sdim } 327218885Sdim 328218885Sdim AFI->setShouldRestoreSPFromFP(true); 329218885Sdim } 330218885Sdim 331218885Sdim // If we need a base pointer, set it up here. It's whatever the value 332218885Sdim // of the stack pointer is at this point. Any variable size objects 333218885Sdim // will be allocated after this, so we can still use the base pointer 334218885Sdim // to reference locals. 335221345Sdim // FIXME: Clarify FrameSetup flags here. 336218885Sdim if (RegInfo->hasBasePointer(MF)) { 337218885Sdim if (isARM) 338218885Sdim BuildMI(MBB, MBBI, dl, 339218885Sdim TII.get(ARM::MOVr), RegInfo->getBaseRegister()) 340218885Sdim .addReg(ARM::SP) 341218885Sdim .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 342218885Sdim else 343224145Sdim AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 344224145Sdim RegInfo->getBaseRegister()) 345224145Sdim .addReg(ARM::SP)); 346218885Sdim } 347218885Sdim 348218885Sdim // If the frame has variable sized objects then the epilogue must restore 349218885Sdim // the sp from fp. We can assume there's an FP here since hasFP already 350218885Sdim // checks for hasVarSizedObjects. 351218885Sdim if (MFI->hasVarSizedObjects()) 352218885Sdim AFI->setShouldRestoreSPFromFP(true); 353218885Sdim} 354218885Sdim 355218885Sdimvoid ARMFrameLowering::emitEpilogue(MachineFunction &MF, 356218885Sdim MachineBasicBlock &MBB) const { 357218885Sdim MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 358235633Sdim assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); 359218885Sdim unsigned RetOpcode = MBBI->getOpcode(); 360218885Sdim DebugLoc dl = MBBI->getDebugLoc(); 361218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 362218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 363218885Sdim const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 364218885Sdim const ARMBaseInstrInfo &TII = 365218885Sdim *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 366218885Sdim assert(!AFI->isThumb1OnlyFunction() && 367218885Sdim "This emitEpilogue does not support Thumb1!"); 368218885Sdim bool isARM = !AFI->isThumbFunction(); 369218885Sdim 370263509Sdim unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 371263509Sdim unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); 372218885Sdim int NumBytes = (int)MFI->getStackSize(); 373218885Sdim unsigned FramePtr = RegInfo->getFrameRegister(MF); 374218885Sdim 375245431Sdim // All calls are tail calls in GHC calling conv, and functions have no 376245431Sdim // prologue/epilogue. 377245431Sdim if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 378245431Sdim return; 379245431Sdim 380218885Sdim if (!AFI->hasStackFrame()) { 381218885Sdim if (NumBytes != 0) 382218885Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 383218885Sdim } else { 384218885Sdim // Unwind MBBI to point to first LDR / VLDRD. 385263509Sdim const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 386218885Sdim if (MBBI != MBB.begin()) { 387263509Sdim do { 388218885Sdim --MBBI; 389263509Sdim } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); 390218885Sdim if (!isCSRestore(MBBI, TII, CSRegs)) 391218885Sdim ++MBBI; 392218885Sdim } 393218885Sdim 394218885Sdim // Move SP to start of FP callee save spill area. 395218885Sdim NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + 396218885Sdim AFI->getGPRCalleeSavedArea2Size() + 397218885Sdim AFI->getDPRCalleeSavedAreaSize()); 398218885Sdim 399218885Sdim // Reset SP based on frame pointer only if the stack frame extends beyond 400218885Sdim // frame pointer stack slot or target is ELF and the function has FP. 401218885Sdim if (AFI->shouldRestoreSPFromFP()) { 402218885Sdim NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 403218885Sdim if (NumBytes) { 404218885Sdim if (isARM) 405218885Sdim emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, 406218885Sdim ARMCC::AL, 0, TII); 407218885Sdim else { 408218885Sdim // It's not possible to restore SP from FP in a single instruction. 409235633Sdim // For iOS, this looks like: 410218885Sdim // mov sp, r7 411218885Sdim // sub sp, #24 412218885Sdim // This is bad, if an interrupt is taken after the mov, sp is in an 413218885Sdim // inconsistent state. 414218885Sdim // Use the first callee-saved register as a scratch register. 415218885Sdim assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && 416218885Sdim "No scratch register to restore SP from FP!"); 417218885Sdim emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 418218885Sdim ARMCC::AL, 0, TII); 419224145Sdim AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 420224145Sdim ARM::SP) 421224145Sdim .addReg(ARM::R4)); 422218885Sdim } 423218885Sdim } else { 424218885Sdim // Thumb2 or ARM. 425218885Sdim if (isARM) 426218885Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) 427218885Sdim .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 428218885Sdim else 429224145Sdim AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), 430224145Sdim ARM::SP) 431224145Sdim .addReg(FramePtr)); 432218885Sdim } 433263509Sdim } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) 434263509Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 435218885Sdim 436218885Sdim // Increment past our save areas. 437219077Sdim if (AFI->getDPRCalleeSavedAreaSize()) { 438219077Sdim MBBI++; 439219077Sdim // Since vpop register list cannot have gaps, there may be multiple vpop 440219077Sdim // instructions in the epilogue. 441219077Sdim while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) 442219077Sdim MBBI++; 443219077Sdim } 444218885Sdim if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; 445218885Sdim if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; 446218885Sdim } 447218885Sdim 448235633Sdim if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { 449218885Sdim // Tail call return: adjust the stack pointer and jump to callee. 450218885Sdim MBBI = MBB.getLastNonDebugInstr(); 451218885Sdim MachineOperand &JumpTarget = MBBI->getOperand(0); 452218885Sdim 453218885Sdim // Jump to label or value in register. 454235633Sdim if (RetOpcode == ARM::TCRETURNdi) { 455235633Sdim unsigned TCOpcode = STI.isThumb() ? 456235633Sdim (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : 457235633Sdim ARM::TAILJMPd; 458218885Sdim MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 459218885Sdim if (JumpTarget.isGlobal()) 460218885Sdim MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 461218885Sdim JumpTarget.getTargetFlags()); 462218885Sdim else { 463218885Sdim assert(JumpTarget.isSymbol()); 464218885Sdim MIB.addExternalSymbol(JumpTarget.getSymbolName(), 465218885Sdim JumpTarget.getTargetFlags()); 466218885Sdim } 467226890Sdim 468226890Sdim // Add the default predicate in Thumb mode. 469226890Sdim if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0); 470218885Sdim } else if (RetOpcode == ARM::TCRETURNri) { 471221345Sdim BuildMI(MBB, MBBI, dl, 472221345Sdim TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). 473218885Sdim addReg(JumpTarget.getReg(), RegState::Kill); 474218885Sdim } 475218885Sdim 476218885Sdim MachineInstr *NewMI = prior(MBBI); 477218885Sdim for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) 478218885Sdim NewMI->addOperand(MBBI->getOperand(i)); 479218885Sdim 480218885Sdim // Delete the pseudo instruction TCRETURN. 481218885Sdim MBB.erase(MBBI); 482224145Sdim MBBI = NewMI; 483218885Sdim } 484218885Sdim 485252723Sdim if (ArgRegsSaveSize) 486252723Sdim emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize); 487218885Sdim} 488218885Sdim 489218885Sdim/// getFrameIndexReference - Provide a base+offset reference to an FI slot for 490218885Sdim/// debug info. It's the same as what we use for resolving the code-gen 491218885Sdim/// references for now. FIXME: This can go wrong when references are 492218885Sdim/// SP-relative and simple call frames aren't used. 493218885Sdimint 494218885SdimARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 495218885Sdim unsigned &FrameReg) const { 496218885Sdim return ResolveFrameIndexReference(MF, FI, FrameReg, 0); 497218885Sdim} 498218885Sdim 499218885Sdimint 500218885SdimARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, 501221345Sdim int FI, unsigned &FrameReg, 502218885Sdim int SPAdj) const { 503218885Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 504218885Sdim const ARMBaseRegisterInfo *RegInfo = 505218885Sdim static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 506218885Sdim const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 507218885Sdim int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 508218885Sdim int FPOffset = Offset - AFI->getFramePtrSpillOffset(); 509218885Sdim bool isFixed = MFI->isFixedObjectIndex(FI); 510218885Sdim 511218885Sdim FrameReg = ARM::SP; 512218885Sdim Offset += SPAdj; 513218885Sdim 514235633Sdim // SP can move around if there are allocas. We may also lose track of SP 515235633Sdim // when emergency spilling inside a non-reserved call frame setup. 516235633Sdim bool hasMovingSP = !hasReservedCallFrame(MF); 517235633Sdim 518218885Sdim // When dynamically realigning the stack, use the frame pointer for 519218885Sdim // parameters, and the stack/base pointer for locals. 520218885Sdim if (RegInfo->needsStackRealignment(MF)) { 521218885Sdim assert (hasFP(MF) && "dynamic stack realignment without a FP!"); 522218885Sdim if (isFixed) { 523218885Sdim FrameReg = RegInfo->getFrameRegister(MF); 524218885Sdim Offset = FPOffset; 525235633Sdim } else if (hasMovingSP) { 526218885Sdim assert(RegInfo->hasBasePointer(MF) && 527218885Sdim "VLAs and dynamic stack alignment, but missing base pointer!"); 528218885Sdim FrameReg = RegInfo->getBaseRegister(); 529218885Sdim } 530218885Sdim return Offset; 531218885Sdim } 532218885Sdim 533218885Sdim // If there is a frame pointer, use it when we can. 534218885Sdim if (hasFP(MF) && AFI->hasStackFrame()) { 535218885Sdim // Use frame pointer to reference fixed objects. Use it for locals if 536218885Sdim // there are VLAs (and thus the SP isn't reliable as a base). 537235633Sdim if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) { 538218885Sdim FrameReg = RegInfo->getFrameRegister(MF); 539218885Sdim return FPOffset; 540235633Sdim } else if (hasMovingSP) { 541218885Sdim assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); 542218885Sdim if (AFI->isThumb2Function()) { 543221345Sdim // Try to use the frame pointer if we can, else use the base pointer 544221345Sdim // since it's available. This is handy for the emergency spill slot, in 545221345Sdim // particular. 546218885Sdim if (FPOffset >= -255 && FPOffset < 0) { 547218885Sdim FrameReg = RegInfo->getFrameRegister(MF); 548218885Sdim return FPOffset; 549218885Sdim } 550221345Sdim } 551218885Sdim } else if (AFI->isThumb2Function()) { 552226890Sdim // Use add <rd>, sp, #<imm8> 553221345Sdim // ldr <rd>, [sp, #<imm8>] 554221345Sdim // if at all possible to save space. 555221345Sdim if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) 556221345Sdim return Offset; 557218885Sdim // In Thumb2 mode, the negative offset is very limited. Try to avoid 558221345Sdim // out of range references. ldr <rt>,[<rn>, #-<imm8>] 559218885Sdim if (FPOffset >= -255 && FPOffset < 0) { 560218885Sdim FrameReg = RegInfo->getFrameRegister(MF); 561218885Sdim return FPOffset; 562218885Sdim } 563218885Sdim } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { 564218885Sdim // Otherwise, use SP or FP, whichever is closer to the stack slot. 565218885Sdim FrameReg = RegInfo->getFrameRegister(MF); 566218885Sdim return FPOffset; 567218885Sdim } 568218885Sdim } 569218885Sdim // Use the base pointer if we have one. 570218885Sdim if (RegInfo->hasBasePointer(MF)) 571218885Sdim FrameReg = RegInfo->getBaseRegister(); 572218885Sdim return Offset; 573218885Sdim} 574218885Sdim 575218885Sdimint ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, 576218885Sdim int FI) const { 577218885Sdim unsigned FrameReg; 578218885Sdim return getFrameIndexReference(MF, FI, FrameReg); 579218885Sdim} 580218885Sdim 581218885Sdimvoid ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, 582218885Sdim MachineBasicBlock::iterator MI, 583218885Sdim const std::vector<CalleeSavedInfo> &CSI, 584218885Sdim unsigned StmOpc, unsigned StrOpc, 585218885Sdim bool NoGap, 586221345Sdim bool(*Func)(unsigned, bool), 587235633Sdim unsigned NumAlignedDPRCS2Regs, 588221345Sdim unsigned MIFlags) const { 589218885Sdim MachineFunction &MF = *MBB.getParent(); 590218885Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 591218885Sdim 592218885Sdim DebugLoc DL; 593218885Sdim if (MI != MBB.end()) DL = MI->getDebugLoc(); 594218885Sdim 595218885Sdim SmallVector<std::pair<unsigned,bool>, 4> Regs; 596218885Sdim unsigned i = CSI.size(); 597218885Sdim while (i != 0) { 598218885Sdim unsigned LastReg = 0; 599218885Sdim for (; i != 0; --i) { 600218885Sdim unsigned Reg = CSI[i-1].getReg(); 601235633Sdim if (!(Func)(Reg, STI.isTargetIOS())) continue; 602218885Sdim 603235633Sdim // D-registers in the aligned area DPRCS2 are NOT spilled here. 604235633Sdim if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 605235633Sdim continue; 606235633Sdim 607218885Sdim // Add the callee-saved register as live-in unless it's LR and 608218885Sdim // @llvm.returnaddress is called. If LR is returned for 609218885Sdim // @llvm.returnaddress then it's already added to the function and 610218885Sdim // entry block live-in sets. 611218885Sdim bool isKill = true; 612218885Sdim if (Reg == ARM::LR) { 613218885Sdim if (MF.getFrameInfo()->isReturnAddressTaken() && 614218885Sdim MF.getRegInfo().isLiveIn(Reg)) 615218885Sdim isKill = false; 616218885Sdim } 617218885Sdim 618218885Sdim if (isKill) 619218885Sdim MBB.addLiveIn(Reg); 620218885Sdim 621218885Sdim // If NoGap is true, push consecutive registers and then leave the rest 622218885Sdim // for other instructions. e.g. 623218885Sdim // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} 624218885Sdim if (NoGap && LastReg && LastReg != Reg-1) 625218885Sdim break; 626218885Sdim LastReg = Reg; 627218885Sdim Regs.push_back(std::make_pair(Reg, isKill)); 628218885Sdim } 629218885Sdim 630218885Sdim if (Regs.empty()) 631218885Sdim continue; 632218885Sdim if (Regs.size() > 1 || StrOpc== 0) { 633218885Sdim MachineInstrBuilder MIB = 634218885Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) 635221345Sdim .addReg(ARM::SP).setMIFlags(MIFlags)); 636218885Sdim for (unsigned i = 0, e = Regs.size(); i < e; ++i) 637218885Sdim MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); 638218885Sdim } else if (Regs.size() == 1) { 639218885Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), 640218885Sdim ARM::SP) 641218885Sdim .addReg(Regs[0].first, getKillRegState(Regs[0].second)) 642226890Sdim .addReg(ARM::SP).setMIFlags(MIFlags) 643226890Sdim .addImm(-4); 644218885Sdim AddDefaultPred(MIB); 645218885Sdim } 646218885Sdim Regs.clear(); 647218885Sdim } 648218885Sdim} 649218885Sdim 650218885Sdimvoid ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, 651218885Sdim MachineBasicBlock::iterator MI, 652218885Sdim const std::vector<CalleeSavedInfo> &CSI, 653218885Sdim unsigned LdmOpc, unsigned LdrOpc, 654218885Sdim bool isVarArg, bool NoGap, 655235633Sdim bool(*Func)(unsigned, bool), 656235633Sdim unsigned NumAlignedDPRCS2Regs) const { 657218885Sdim MachineFunction &MF = *MBB.getParent(); 658218885Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 659218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 660218885Sdim DebugLoc DL = MI->getDebugLoc(); 661218885Sdim unsigned RetOpcode = MI->getOpcode(); 662218885Sdim bool isTailCall = (RetOpcode == ARM::TCRETURNdi || 663235633Sdim RetOpcode == ARM::TCRETURNri); 664263509Sdim bool isInterrupt = 665263509Sdim RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; 666218885Sdim 667218885Sdim SmallVector<unsigned, 4> Regs; 668218885Sdim unsigned i = CSI.size(); 669218885Sdim while (i != 0) { 670218885Sdim unsigned LastReg = 0; 671218885Sdim bool DeleteRet = false; 672218885Sdim for (; i != 0; --i) { 673218885Sdim unsigned Reg = CSI[i-1].getReg(); 674235633Sdim if (!(Func)(Reg, STI.isTargetIOS())) continue; 675218885Sdim 676235633Sdim // The aligned reloads from area DPRCS2 are not inserted here. 677235633Sdim if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) 678235633Sdim continue; 679235633Sdim 680263509Sdim if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && 681263509Sdim STI.hasV5TOps()) { 682218885Sdim Reg = ARM::PC; 683218885Sdim LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; 684218885Sdim // Fold the return instruction into the LDM. 685218885Sdim DeleteRet = true; 686218885Sdim } 687218885Sdim 688218885Sdim // If NoGap is true, pop consecutive registers and then leave the rest 689218885Sdim // for other instructions. e.g. 690218885Sdim // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} 691218885Sdim if (NoGap && LastReg && LastReg != Reg-1) 692218885Sdim break; 693218885Sdim 694218885Sdim LastReg = Reg; 695218885Sdim Regs.push_back(Reg); 696218885Sdim } 697218885Sdim 698218885Sdim if (Regs.empty()) 699218885Sdim continue; 700218885Sdim if (Regs.size() > 1 || LdrOpc == 0) { 701218885Sdim MachineInstrBuilder MIB = 702218885Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) 703218885Sdim .addReg(ARM::SP)); 704218885Sdim for (unsigned i = 0, e = Regs.size(); i < e; ++i) 705218885Sdim MIB.addReg(Regs[i], getDefRegState(true)); 706226890Sdim if (DeleteRet) { 707252723Sdim MIB.copyImplicitOps(&*MI); 708218885Sdim MI->eraseFromParent(); 709226890Sdim } 710218885Sdim MI = MIB; 711218885Sdim } else if (Regs.size() == 1) { 712218885Sdim // If we adjusted the reg to PC from LR above, switch it back here. We 713218885Sdim // only do that for LDM. 714218885Sdim if (Regs[0] == ARM::PC) 715218885Sdim Regs[0] = ARM::LR; 716218885Sdim MachineInstrBuilder MIB = 717218885Sdim BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) 718218885Sdim .addReg(ARM::SP, RegState::Define) 719218885Sdim .addReg(ARM::SP); 720218885Sdim // ARM mode needs an extra reg0 here due to addrmode2. Will go away once 721218885Sdim // that refactoring is complete (eventually). 722226890Sdim if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) { 723218885Sdim MIB.addReg(0); 724218885Sdim MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); 725218885Sdim } else 726218885Sdim MIB.addImm(4); 727218885Sdim AddDefaultPred(MIB); 728218885Sdim } 729218885Sdim Regs.clear(); 730218885Sdim } 731218885Sdim} 732218885Sdim 733235633Sdim/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers 734235633Sdim/// starting from d8. Also insert stack realignment code and leave the stack 735235633Sdim/// pointer pointing to the d8 spill slot. 736235633Sdimstatic void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, 737235633Sdim MachineBasicBlock::iterator MI, 738235633Sdim unsigned NumAlignedDPRCS2Regs, 739235633Sdim const std::vector<CalleeSavedInfo> &CSI, 740235633Sdim const TargetRegisterInfo *TRI) { 741235633Sdim MachineFunction &MF = *MBB.getParent(); 742235633Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 743235633Sdim DebugLoc DL = MI->getDebugLoc(); 744235633Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 745235633Sdim MachineFrameInfo &MFI = *MF.getFrameInfo(); 746235633Sdim 747235633Sdim // Mark the D-register spill slots as properly aligned. Since MFI computes 748235633Sdim // stack slot layout backwards, this can actually mean that the d-reg stack 749235633Sdim // slot offsets can be wrong. The offset for d8 will always be correct. 750235633Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 751235633Sdim unsigned DNum = CSI[i].getReg() - ARM::D8; 752235633Sdim if (DNum >= 8) 753235633Sdim continue; 754235633Sdim int FI = CSI[i].getFrameIdx(); 755235633Sdim // The even-numbered registers will be 16-byte aligned, the odd-numbered 756235633Sdim // registers will be 8-byte aligned. 757235633Sdim MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16); 758235633Sdim 759235633Sdim // The stack slot for D8 needs to be maximally aligned because this is 760235633Sdim // actually the point where we align the stack pointer. MachineFrameInfo 761235633Sdim // computes all offsets relative to the incoming stack pointer which is a 762235633Sdim // bit weird when realigning the stack. Any extra padding for this 763235633Sdim // over-alignment is not realized because the code inserted below adjusts 764235633Sdim // the stack pointer by numregs * 8 before aligning the stack pointer. 765235633Sdim if (DNum == 0) 766235633Sdim MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); 767235633Sdim } 768235633Sdim 769235633Sdim // Move the stack pointer to the d8 spill slot, and align it at the same 770235633Sdim // time. Leave the stack slot address in the scratch register r4. 771235633Sdim // 772235633Sdim // sub r4, sp, #numregs * 8 773235633Sdim // bic r4, r4, #align - 1 774235633Sdim // mov sp, r4 775235633Sdim // 776235633Sdim bool isThumb = AFI->isThumbFunction(); 777235633Sdim assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 778235633Sdim AFI->setShouldRestoreSPFromFP(true); 779235633Sdim 780235633Sdim // sub r4, sp, #numregs * 8 781235633Sdim // The immediate is <= 64, so it doesn't need any special encoding. 782235633Sdim unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; 783235633Sdim AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 784235633Sdim .addReg(ARM::SP) 785235633Sdim .addImm(8 * NumAlignedDPRCS2Regs))); 786235633Sdim 787235633Sdim // bic r4, r4, #align-1 788235633Sdim Opc = isThumb ? ARM::t2BICri : ARM::BICri; 789235633Sdim unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); 790235633Sdim AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 791235633Sdim .addReg(ARM::R4, RegState::Kill) 792235633Sdim .addImm(MaxAlign - 1))); 793235633Sdim 794235633Sdim // mov sp, r4 795235633Sdim // The stack pointer must be adjusted before spilling anything, otherwise 796235633Sdim // the stack slots could be clobbered by an interrupt handler. 797235633Sdim // Leave r4 live, it is used below. 798235633Sdim Opc = isThumb ? ARM::tMOVr : ARM::MOVr; 799235633Sdim MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) 800235633Sdim .addReg(ARM::R4); 801235633Sdim MIB = AddDefaultPred(MIB); 802235633Sdim if (!isThumb) 803235633Sdim AddDefaultCC(MIB); 804235633Sdim 805235633Sdim // Now spill NumAlignedDPRCS2Regs registers starting from d8. 806235633Sdim // r4 holds the stack slot address. 807235633Sdim unsigned NextReg = ARM::D8; 808235633Sdim 809235633Sdim // 16-byte aligned vst1.64 with 4 d-regs and address writeback. 810235633Sdim // The writeback is only needed when emitting two vst1.64 instructions. 811235633Sdim if (NumAlignedDPRCS2Regs >= 6) { 812235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 813245431Sdim &ARM::QQPRRegClass); 814235633Sdim MBB.addLiveIn(SupReg); 815235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), 816235633Sdim ARM::R4) 817235633Sdim .addReg(ARM::R4, RegState::Kill).addImm(16) 818235633Sdim .addReg(NextReg) 819235633Sdim .addReg(SupReg, RegState::ImplicitKill)); 820235633Sdim NextReg += 4; 821235633Sdim NumAlignedDPRCS2Regs -= 4; 822235633Sdim } 823235633Sdim 824235633Sdim // We won't modify r4 beyond this point. It currently points to the next 825235633Sdim // register to be spilled. 826235633Sdim unsigned R4BaseReg = NextReg; 827235633Sdim 828235633Sdim // 16-byte aligned vst1.64 with 4 d-regs, no writeback. 829235633Sdim if (NumAlignedDPRCS2Regs >= 4) { 830235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 831245431Sdim &ARM::QQPRRegClass); 832235633Sdim MBB.addLiveIn(SupReg); 833235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) 834235633Sdim .addReg(ARM::R4).addImm(16).addReg(NextReg) 835235633Sdim .addReg(SupReg, RegState::ImplicitKill)); 836235633Sdim NextReg += 4; 837235633Sdim NumAlignedDPRCS2Regs -= 4; 838235633Sdim } 839235633Sdim 840235633Sdim // 16-byte aligned vst1.64 with 2 d-regs. 841235633Sdim if (NumAlignedDPRCS2Regs >= 2) { 842235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 843245431Sdim &ARM::QPRRegClass); 844235633Sdim MBB.addLiveIn(SupReg); 845235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) 846235633Sdim .addReg(ARM::R4).addImm(16).addReg(SupReg)); 847235633Sdim NextReg += 2; 848235633Sdim NumAlignedDPRCS2Regs -= 2; 849235633Sdim } 850235633Sdim 851235633Sdim // Finally, use a vanilla vstr.64 for the odd last register. 852235633Sdim if (NumAlignedDPRCS2Regs) { 853235633Sdim MBB.addLiveIn(NextReg); 854235633Sdim // vstr.64 uses addrmode5 which has an offset scale of 4. 855235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) 856235633Sdim .addReg(NextReg) 857235633Sdim .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2)); 858235633Sdim } 859235633Sdim 860235633Sdim // The last spill instruction inserted should kill the scratch register r4. 861235633Sdim llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); 862235633Sdim} 863235633Sdim 864235633Sdim/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an 865235633Sdim/// iterator to the following instruction. 866235633Sdimstatic MachineBasicBlock::iterator 867235633SdimskipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, 868235633Sdim unsigned NumAlignedDPRCS2Regs) { 869235633Sdim // sub r4, sp, #numregs * 8 870235633Sdim // bic r4, r4, #align - 1 871235633Sdim // mov sp, r4 872235633Sdim ++MI; ++MI; ++MI; 873235633Sdim assert(MI->mayStore() && "Expecting spill instruction"); 874235633Sdim 875235633Sdim // These switches all fall through. 876235633Sdim switch(NumAlignedDPRCS2Regs) { 877235633Sdim case 7: 878235633Sdim ++MI; 879235633Sdim assert(MI->mayStore() && "Expecting spill instruction"); 880235633Sdim default: 881235633Sdim ++MI; 882235633Sdim assert(MI->mayStore() && "Expecting spill instruction"); 883235633Sdim case 1: 884235633Sdim case 2: 885235633Sdim case 4: 886235633Sdim assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); 887235633Sdim ++MI; 888235633Sdim } 889235633Sdim return MI; 890235633Sdim} 891235633Sdim 892235633Sdim/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers 893235633Sdim/// starting from d8. These instructions are assumed to execute while the 894235633Sdim/// stack is still aligned, unlike the code inserted by emitPopInst. 895235633Sdimstatic void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, 896235633Sdim MachineBasicBlock::iterator MI, 897235633Sdim unsigned NumAlignedDPRCS2Regs, 898235633Sdim const std::vector<CalleeSavedInfo> &CSI, 899235633Sdim const TargetRegisterInfo *TRI) { 900235633Sdim MachineFunction &MF = *MBB.getParent(); 901235633Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 902235633Sdim DebugLoc DL = MI->getDebugLoc(); 903235633Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 904235633Sdim 905235633Sdim // Find the frame index assigned to d8. 906235633Sdim int D8SpillFI = 0; 907235633Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) 908235633Sdim if (CSI[i].getReg() == ARM::D8) { 909235633Sdim D8SpillFI = CSI[i].getFrameIdx(); 910235633Sdim break; 911235633Sdim } 912235633Sdim 913235633Sdim // Materialize the address of the d8 spill slot into the scratch register r4. 914235633Sdim // This can be fairly complicated if the stack frame is large, so just use 915235633Sdim // the normal frame index elimination mechanism to do it. This code runs as 916235633Sdim // the initial part of the epilog where the stack and base pointers haven't 917235633Sdim // been changed yet. 918235633Sdim bool isThumb = AFI->isThumbFunction(); 919235633Sdim assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); 920235633Sdim 921235633Sdim unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 922235633Sdim AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) 923235633Sdim .addFrameIndex(D8SpillFI).addImm(0))); 924235633Sdim 925235633Sdim // Now restore NumAlignedDPRCS2Regs registers starting from d8. 926235633Sdim unsigned NextReg = ARM::D8; 927235633Sdim 928235633Sdim // 16-byte aligned vld1.64 with 4 d-regs and writeback. 929235633Sdim if (NumAlignedDPRCS2Regs >= 6) { 930235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 931245431Sdim &ARM::QQPRRegClass); 932235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) 933235633Sdim .addReg(ARM::R4, RegState::Define) 934235633Sdim .addReg(ARM::R4, RegState::Kill).addImm(16) 935235633Sdim .addReg(SupReg, RegState::ImplicitDefine)); 936235633Sdim NextReg += 4; 937235633Sdim NumAlignedDPRCS2Regs -= 4; 938235633Sdim } 939235633Sdim 940235633Sdim // We won't modify r4 beyond this point. It currently points to the next 941235633Sdim // register to be spilled. 942235633Sdim unsigned R4BaseReg = NextReg; 943235633Sdim 944235633Sdim // 16-byte aligned vld1.64 with 4 d-regs, no writeback. 945235633Sdim if (NumAlignedDPRCS2Regs >= 4) { 946235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 947245431Sdim &ARM::QQPRRegClass); 948235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) 949235633Sdim .addReg(ARM::R4).addImm(16) 950235633Sdim .addReg(SupReg, RegState::ImplicitDefine)); 951235633Sdim NextReg += 4; 952235633Sdim NumAlignedDPRCS2Regs -= 4; 953235633Sdim } 954235633Sdim 955235633Sdim // 16-byte aligned vld1.64 with 2 d-regs. 956235633Sdim if (NumAlignedDPRCS2Regs >= 2) { 957235633Sdim unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, 958245431Sdim &ARM::QPRRegClass); 959235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) 960235633Sdim .addReg(ARM::R4).addImm(16)); 961235633Sdim NextReg += 2; 962235633Sdim NumAlignedDPRCS2Regs -= 2; 963235633Sdim } 964235633Sdim 965235633Sdim // Finally, use a vanilla vldr.64 for the remaining odd register. 966235633Sdim if (NumAlignedDPRCS2Regs) 967235633Sdim AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) 968235633Sdim .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg))); 969235633Sdim 970235633Sdim // Last store kills r4. 971235633Sdim llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI); 972235633Sdim} 973235633Sdim 974218885Sdimbool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 975218885Sdim MachineBasicBlock::iterator MI, 976218885Sdim const std::vector<CalleeSavedInfo> &CSI, 977218885Sdim const TargetRegisterInfo *TRI) const { 978218885Sdim if (CSI.empty()) 979218885Sdim return false; 980218885Sdim 981218885Sdim MachineFunction &MF = *MBB.getParent(); 982218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 983218885Sdim 984218885Sdim unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; 985226890Sdim unsigned PushOneOpc = AFI->isThumbFunction() ? 986226890Sdim ARM::t2STR_PRE : ARM::STR_PRE_IMM; 987218885Sdim unsigned FltOpc = ARM::VSTMDDB_UPD; 988235633Sdim unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 989235633Sdim emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, 990221345Sdim MachineInstr::FrameSetup); 991235633Sdim emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, 992221345Sdim MachineInstr::FrameSetup); 993221345Sdim emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, 994235633Sdim NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); 995218885Sdim 996235633Sdim // The code above does not insert spill code for the aligned DPRCS2 registers. 997235633Sdim // The stack realignment code will be inserted between the push instructions 998235633Sdim // and these spills. 999235633Sdim if (NumAlignedDPRCS2Regs) 1000235633Sdim emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1001235633Sdim 1002218885Sdim return true; 1003218885Sdim} 1004218885Sdim 1005218885Sdimbool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1006218885Sdim MachineBasicBlock::iterator MI, 1007218885Sdim const std::vector<CalleeSavedInfo> &CSI, 1008218885Sdim const TargetRegisterInfo *TRI) const { 1009218885Sdim if (CSI.empty()) 1010218885Sdim return false; 1011218885Sdim 1012218885Sdim MachineFunction &MF = *MBB.getParent(); 1013218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1014252723Sdim bool isVarArg = AFI->getArgRegsSaveSize() > 0; 1015235633Sdim unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); 1016218885Sdim 1017235633Sdim // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 1018235633Sdim // registers. Do that here instead. 1019235633Sdim if (NumAlignedDPRCS2Regs) 1020235633Sdim emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); 1021235633Sdim 1022218885Sdim unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; 1023226890Sdim unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; 1024218885Sdim unsigned FltOpc = ARM::VLDMDIA_UPD; 1025235633Sdim emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, 1026235633Sdim NumAlignedDPRCS2Regs); 1027218885Sdim emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1028235633Sdim &isARMArea2Register, 0); 1029218885Sdim emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 1030235633Sdim &isARMArea1Register, 0); 1031218885Sdim 1032218885Sdim return true; 1033218885Sdim} 1034218885Sdim 1035218885Sdim// FIXME: Make generic? 1036218885Sdimstatic unsigned GetFunctionSizeInBytes(const MachineFunction &MF, 1037218885Sdim const ARMBaseInstrInfo &TII) { 1038218885Sdim unsigned FnSize = 0; 1039218885Sdim for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); 1040218885Sdim MBBI != E; ++MBBI) { 1041218885Sdim const MachineBasicBlock &MBB = *MBBI; 1042218885Sdim for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); 1043218885Sdim I != E; ++I) 1044218885Sdim FnSize += TII.GetInstSizeInBytes(I); 1045218885Sdim } 1046218885Sdim return FnSize; 1047218885Sdim} 1048218885Sdim 1049218885Sdim/// estimateRSStackSizeLimit - Look at each instruction that references stack 1050218885Sdim/// frames and return the stack size limit beyond which some of these 1051218885Sdim/// instructions will require a scratch register during their expansion later. 1052218885Sdim// FIXME: Move to TII? 1053218885Sdimstatic unsigned estimateRSStackSizeLimit(MachineFunction &MF, 1054218885Sdim const TargetFrameLowering *TFI) { 1055218885Sdim const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1056218885Sdim unsigned Limit = (1 << 12) - 1; 1057218885Sdim for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { 1058218885Sdim for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); 1059218885Sdim I != E; ++I) { 1060218885Sdim for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 1061218885Sdim if (!I->getOperand(i).isFI()) continue; 1062218885Sdim 1063218885Sdim // When using ADDri to get the address of a stack object, 255 is the 1064218885Sdim // largest offset guaranteed to fit in the immediate offset. 1065218885Sdim if (I->getOpcode() == ARM::ADDri) { 1066218885Sdim Limit = std::min(Limit, (1U << 8) - 1); 1067218885Sdim break; 1068218885Sdim } 1069218885Sdim 1070218885Sdim // Otherwise check the addressing mode. 1071218885Sdim switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { 1072218885Sdim case ARMII::AddrMode3: 1073218885Sdim case ARMII::AddrModeT2_i8: 1074218885Sdim Limit = std::min(Limit, (1U << 8) - 1); 1075218885Sdim break; 1076218885Sdim case ARMII::AddrMode5: 1077218885Sdim case ARMII::AddrModeT2_i8s4: 1078218885Sdim Limit = std::min(Limit, ((1U << 8) - 1) * 4); 1079218885Sdim break; 1080218885Sdim case ARMII::AddrModeT2_i12: 1081218885Sdim // i12 supports only positive offset so these will be converted to 1082218885Sdim // i8 opcodes. See llvm::rewriteT2FrameIndex. 1083218885Sdim if (TFI->hasFP(MF) && AFI->hasStackFrame()) 1084218885Sdim Limit = std::min(Limit, (1U << 8) - 1); 1085218885Sdim break; 1086218885Sdim case ARMII::AddrMode4: 1087218885Sdim case ARMII::AddrMode6: 1088218885Sdim // Addressing modes 4 & 6 (load/store) instructions can't encode an 1089218885Sdim // immediate offset for stack references. 1090218885Sdim return 0; 1091218885Sdim default: 1092218885Sdim break; 1093218885Sdim } 1094218885Sdim break; // At most one FI per instruction 1095218885Sdim } 1096218885Sdim } 1097218885Sdim } 1098218885Sdim 1099218885Sdim return Limit; 1100218885Sdim} 1101218885Sdim 1102235633Sdim// In functions that realign the stack, it can be an advantage to spill the 1103235633Sdim// callee-saved vector registers after realigning the stack. The vst1 and vld1 1104235633Sdim// instructions take alignment hints that can improve performance. 1105235633Sdim// 1106235633Sdimstatic void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { 1107235633Sdim MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); 1108235633Sdim if (!SpillAlignedNEONRegs) 1109235633Sdim return; 1110235633Sdim 1111235633Sdim // Naked functions don't spill callee-saved registers. 1112252723Sdim if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 1113252723Sdim Attribute::Naked)) 1114235633Sdim return; 1115235633Sdim 1116235633Sdim // We are planning to use NEON instructions vst1 / vld1. 1117235633Sdim if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON()) 1118235633Sdim return; 1119235633Sdim 1120235633Sdim // Don't bother if the default stack alignment is sufficiently high. 1121235633Sdim if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) 1122235633Sdim return; 1123235633Sdim 1124235633Sdim // Aligned spills require stack realignment. 1125235633Sdim const ARMBaseRegisterInfo *RegInfo = 1126235633Sdim static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 1127235633Sdim if (!RegInfo->canRealignStack(MF)) 1128235633Sdim return; 1129235633Sdim 1130235633Sdim // We always spill contiguous d-registers starting from d8. Count how many 1131235633Sdim // needs spilling. The register allocator will almost always use the 1132235633Sdim // callee-saved registers in order, but it can happen that there are holes in 1133235633Sdim // the range. Registers above the hole will be spilled to the standard DPRCS 1134235633Sdim // area. 1135235633Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1136235633Sdim unsigned NumSpills = 0; 1137235633Sdim for (; NumSpills < 8; ++NumSpills) 1138245431Sdim if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills)) 1139235633Sdim break; 1140235633Sdim 1141235633Sdim // Don't do this for just one d-register. It's not worth it. 1142235633Sdim if (NumSpills < 2) 1143235633Sdim return; 1144235633Sdim 1145235633Sdim // Spill the first NumSpills D-registers after realigning the stack. 1146235633Sdim MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills); 1147235633Sdim 1148235633Sdim // A scratch register is required for the vst1 / vld1 instructions. 1149235633Sdim MF.getRegInfo().setPhysRegUsed(ARM::R4); 1150235633Sdim} 1151235633Sdim 1152218885Sdimvoid 1153218885SdimARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1154218885Sdim RegScavenger *RS) const { 1155218885Sdim // This tells PEI to spill the FP as if it is any other callee-save register 1156218885Sdim // to take advantage the eliminateFrameIndex machinery. This also ensures it 1157218885Sdim // is spilled in the order specified by getCalleeSavedRegs() to make it easier 1158218885Sdim // to combine multiple loads / stores. 1159218885Sdim bool CanEliminateFrame = true; 1160218885Sdim bool CS1Spilled = false; 1161218885Sdim bool LRSpilled = false; 1162218885Sdim unsigned NumGPRSpills = 0; 1163218885Sdim SmallVector<unsigned, 4> UnspilledCS1GPRs; 1164218885Sdim SmallVector<unsigned, 4> UnspilledCS2GPRs; 1165218885Sdim const ARMBaseRegisterInfo *RegInfo = 1166218885Sdim static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 1167218885Sdim const ARMBaseInstrInfo &TII = 1168218885Sdim *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 1169218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1170218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1171245431Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1172218885Sdim unsigned FramePtr = RegInfo->getFrameRegister(MF); 1173218885Sdim 1174218885Sdim // Spill R4 if Thumb2 function requires stack realignment - it will be used as 1175218885Sdim // scratch register. Also spill R4 if Thumb2 function has varsized objects, 1176218885Sdim // since it's not always possible to restore sp from fp in a single 1177218885Sdim // instruction. 1178218885Sdim // FIXME: It will be better just to find spare register here. 1179218885Sdim if (AFI->isThumb2Function() && 1180218885Sdim (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) 1181245431Sdim MRI.setPhysRegUsed(ARM::R4); 1182218885Sdim 1183218885Sdim if (AFI->isThumb1OnlyFunction()) { 1184218885Sdim // Spill LR if Thumb1 function uses variable length argument lists. 1185252723Sdim if (AFI->getArgRegsSaveSize() > 0) 1186245431Sdim MRI.setPhysRegUsed(ARM::LR); 1187218885Sdim 1188224145Sdim // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know 1189224145Sdim // for sure what the stack size will be, but for this, an estimate is good 1190224145Sdim // enough. If there anything changes it, it'll be a spill, which implies 1191224145Sdim // we've used all the registers and so R4 is already used, so not marking 1192224145Sdim // it here will be OK. 1193218885Sdim // FIXME: It will be better just to find spare register here. 1194252723Sdim unsigned StackSize = MFI->estimateStackSize(MF); 1195224145Sdim if (MFI->hasVarSizedObjects() || StackSize > 508) 1196245431Sdim MRI.setPhysRegUsed(ARM::R4); 1197218885Sdim } 1198218885Sdim 1199235633Sdim // See if we can spill vector registers to aligned stack. 1200235633Sdim checkNumAlignedDPRCS2Regs(MF); 1201235633Sdim 1202218885Sdim // Spill the BasePtr if it's used. 1203218885Sdim if (RegInfo->hasBasePointer(MF)) 1204245431Sdim MRI.setPhysRegUsed(RegInfo->getBaseRegister()); 1205218885Sdim 1206218885Sdim // Don't spill FP if the frame can be eliminated. This is determined 1207218885Sdim // by scanning the callee-save registers to see if any is used. 1208263509Sdim const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1209218885Sdim for (unsigned i = 0; CSRegs[i]; ++i) { 1210218885Sdim unsigned Reg = CSRegs[i]; 1211218885Sdim bool Spilled = false; 1212245431Sdim if (MRI.isPhysRegUsed(Reg)) { 1213218885Sdim Spilled = true; 1214218885Sdim CanEliminateFrame = false; 1215218885Sdim } 1216218885Sdim 1217245431Sdim if (!ARM::GPRRegClass.contains(Reg)) 1218218885Sdim continue; 1219218885Sdim 1220218885Sdim if (Spilled) { 1221218885Sdim NumGPRSpills++; 1222218885Sdim 1223235633Sdim if (!STI.isTargetIOS()) { 1224218885Sdim if (Reg == ARM::LR) 1225218885Sdim LRSpilled = true; 1226218885Sdim CS1Spilled = true; 1227218885Sdim continue; 1228218885Sdim } 1229218885Sdim 1230218885Sdim // Keep track if LR and any of R4, R5, R6, and R7 is spilled. 1231218885Sdim switch (Reg) { 1232218885Sdim case ARM::LR: 1233218885Sdim LRSpilled = true; 1234218885Sdim // Fallthrough 1235263509Sdim case ARM::R0: case ARM::R1: 1236263509Sdim case ARM::R2: case ARM::R3: 1237218885Sdim case ARM::R4: case ARM::R5: 1238218885Sdim case ARM::R6: case ARM::R7: 1239218885Sdim CS1Spilled = true; 1240218885Sdim break; 1241218885Sdim default: 1242218885Sdim break; 1243218885Sdim } 1244218885Sdim } else { 1245235633Sdim if (!STI.isTargetIOS()) { 1246218885Sdim UnspilledCS1GPRs.push_back(Reg); 1247218885Sdim continue; 1248218885Sdim } 1249218885Sdim 1250218885Sdim switch (Reg) { 1251263509Sdim case ARM::R0: case ARM::R1: 1252263509Sdim case ARM::R2: case ARM::R3: 1253218885Sdim case ARM::R4: case ARM::R5: 1254218885Sdim case ARM::R6: case ARM::R7: 1255218885Sdim case ARM::LR: 1256218885Sdim UnspilledCS1GPRs.push_back(Reg); 1257218885Sdim break; 1258218885Sdim default: 1259218885Sdim UnspilledCS2GPRs.push_back(Reg); 1260218885Sdim break; 1261218885Sdim } 1262218885Sdim } 1263218885Sdim } 1264218885Sdim 1265218885Sdim bool ForceLRSpill = false; 1266218885Sdim if (!LRSpilled && AFI->isThumb1OnlyFunction()) { 1267218885Sdim unsigned FnSize = GetFunctionSizeInBytes(MF, TII); 1268218885Sdim // Force LR to be spilled if the Thumb function size is > 2048. This enables 1269218885Sdim // use of BL to implement far jump. If it turns out that it's not needed 1270218885Sdim // then the branch fix up path will undo it. 1271218885Sdim if (FnSize >= (1 << 11)) { 1272218885Sdim CanEliminateFrame = false; 1273218885Sdim ForceLRSpill = true; 1274218885Sdim } 1275218885Sdim } 1276218885Sdim 1277218885Sdim // If any of the stack slot references may be out of range of an immediate 1278218885Sdim // offset, make sure a register (or a spill slot) is available for the 1279218885Sdim // register scavenger. Note that if we're indexing off the frame pointer, the 1280218885Sdim // effective stack size is 4 bytes larger since the FP points to the stack 1281218885Sdim // slot of the previous FP. Also, if we have variable sized objects in the 1282218885Sdim // function, stack slot references will often be negative, and some of 1283218885Sdim // our instructions are positive-offset only, so conservatively consider 1284218885Sdim // that case to want a spill slot (or register) as well. Similarly, if 1285218885Sdim // the function adjusts the stack pointer during execution and the 1286218885Sdim // adjustments aren't already part of our stack size estimate, our offset 1287218885Sdim // calculations may be off, so be conservative. 1288218885Sdim // FIXME: We could add logic to be more precise about negative offsets 1289218885Sdim // and which instructions will need a scratch register for them. Is it 1290218885Sdim // worth the effort and added fragility? 1291218885Sdim bool BigStack = 1292218885Sdim (RS && 1293252723Sdim (MFI->estimateStackSize(MF) + 1294252723Sdim ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= 1295218885Sdim estimateRSStackSizeLimit(MF, this))) 1296218885Sdim || MFI->hasVarSizedObjects() 1297218885Sdim || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); 1298218885Sdim 1299218885Sdim bool ExtraCSSpill = false; 1300218885Sdim if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { 1301218885Sdim AFI->setHasStackFrame(true); 1302218885Sdim 1303218885Sdim // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. 1304218885Sdim // Spill LR as well so we can fold BX_RET to the registers restore (LDM). 1305218885Sdim if (!LRSpilled && CS1Spilled) { 1306245431Sdim MRI.setPhysRegUsed(ARM::LR); 1307218885Sdim NumGPRSpills++; 1308263509Sdim SmallVectorImpl<unsigned>::iterator LRPos; 1309263509Sdim LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), 1310263509Sdim (unsigned)ARM::LR); 1311263509Sdim if (LRPos != UnspilledCS1GPRs.end()) 1312263509Sdim UnspilledCS1GPRs.erase(LRPos); 1313263509Sdim 1314218885Sdim ForceLRSpill = false; 1315218885Sdim ExtraCSSpill = true; 1316218885Sdim } 1317218885Sdim 1318218885Sdim if (hasFP(MF)) { 1319245431Sdim MRI.setPhysRegUsed(FramePtr); 1320218885Sdim NumGPRSpills++; 1321218885Sdim } 1322218885Sdim 1323218885Sdim // If stack and double are 8-byte aligned and we are spilling an odd number 1324218885Sdim // of GPRs, spill one extra callee save GPR so we won't have to pad between 1325218885Sdim // the integer and double callee save areas. 1326218885Sdim unsigned TargetAlign = getStackAlignment(); 1327218885Sdim if (TargetAlign == 8 && (NumGPRSpills & 1)) { 1328218885Sdim if (CS1Spilled && !UnspilledCS1GPRs.empty()) { 1329218885Sdim for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { 1330218885Sdim unsigned Reg = UnspilledCS1GPRs[i]; 1331218885Sdim // Don't spill high register if the function is thumb1 1332218885Sdim if (!AFI->isThumb1OnlyFunction() || 1333218885Sdim isARMLowRegister(Reg) || Reg == ARM::LR) { 1334245431Sdim MRI.setPhysRegUsed(Reg); 1335245431Sdim if (!MRI.isReserved(Reg)) 1336218885Sdim ExtraCSSpill = true; 1337218885Sdim break; 1338218885Sdim } 1339218885Sdim } 1340218885Sdim } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { 1341218885Sdim unsigned Reg = UnspilledCS2GPRs.front(); 1342245431Sdim MRI.setPhysRegUsed(Reg); 1343245431Sdim if (!MRI.isReserved(Reg)) 1344218885Sdim ExtraCSSpill = true; 1345218885Sdim } 1346218885Sdim } 1347218885Sdim 1348218885Sdim // Estimate if we might need to scavenge a register at some point in order 1349218885Sdim // to materialize a stack offset. If so, either spill one additional 1350218885Sdim // callee-saved register or reserve a special spill slot to facilitate 1351218885Sdim // register scavenging. Thumb1 needs a spill slot for stack pointer 1352218885Sdim // adjustments also, even when the frame itself is small. 1353218885Sdim if (BigStack && !ExtraCSSpill) { 1354218885Sdim // If any non-reserved CS register isn't spilled, just spill one or two 1355218885Sdim // extra. That should take care of it! 1356218885Sdim unsigned NumExtras = TargetAlign / 4; 1357218885Sdim SmallVector<unsigned, 2> Extras; 1358218885Sdim while (NumExtras && !UnspilledCS1GPRs.empty()) { 1359218885Sdim unsigned Reg = UnspilledCS1GPRs.back(); 1360218885Sdim UnspilledCS1GPRs.pop_back(); 1361245431Sdim if (!MRI.isReserved(Reg) && 1362218885Sdim (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || 1363218885Sdim Reg == ARM::LR)) { 1364218885Sdim Extras.push_back(Reg); 1365218885Sdim NumExtras--; 1366218885Sdim } 1367218885Sdim } 1368218885Sdim // For non-Thumb1 functions, also check for hi-reg CS registers 1369218885Sdim if (!AFI->isThumb1OnlyFunction()) { 1370218885Sdim while (NumExtras && !UnspilledCS2GPRs.empty()) { 1371218885Sdim unsigned Reg = UnspilledCS2GPRs.back(); 1372218885Sdim UnspilledCS2GPRs.pop_back(); 1373245431Sdim if (!MRI.isReserved(Reg)) { 1374218885Sdim Extras.push_back(Reg); 1375218885Sdim NumExtras--; 1376218885Sdim } 1377218885Sdim } 1378218885Sdim } 1379218885Sdim if (Extras.size() && NumExtras == 0) { 1380218885Sdim for (unsigned i = 0, e = Extras.size(); i != e; ++i) { 1381245431Sdim MRI.setPhysRegUsed(Extras[i]); 1382218885Sdim } 1383218885Sdim } else if (!AFI->isThumb1OnlyFunction()) { 1384218885Sdim // note: Thumb1 functions spill to R12, not the stack. Reserve a slot 1385218885Sdim // closest to SP or frame pointer. 1386245431Sdim const TargetRegisterClass *RC = &ARM::GPRRegClass; 1387252723Sdim RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1388218885Sdim RC->getAlignment(), 1389218885Sdim false)); 1390218885Sdim } 1391218885Sdim } 1392218885Sdim } 1393218885Sdim 1394218885Sdim if (ForceLRSpill) { 1395245431Sdim MRI.setPhysRegUsed(ARM::LR); 1396218885Sdim AFI->setLRIsSpilledForFarJump(true); 1397218885Sdim } 1398218885Sdim} 1399252723Sdim 1400252723Sdim 1401252723Sdimvoid ARMFrameLowering:: 1402252723SdimeliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1403252723Sdim MachineBasicBlock::iterator I) const { 1404252723Sdim const ARMBaseInstrInfo &TII = 1405252723Sdim *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 1406252723Sdim if (!hasReservedCallFrame(MF)) { 1407252723Sdim // If we have alloca, convert as follows: 1408252723Sdim // ADJCALLSTACKDOWN -> sub, sp, sp, amount 1409252723Sdim // ADJCALLSTACKUP -> add, sp, sp, amount 1410252723Sdim MachineInstr *Old = I; 1411252723Sdim DebugLoc dl = Old->getDebugLoc(); 1412252723Sdim unsigned Amount = Old->getOperand(0).getImm(); 1413252723Sdim if (Amount != 0) { 1414252723Sdim // We need to keep the stack aligned properly. To do this, we round the 1415252723Sdim // amount of space needed for the outgoing arguments up to the next 1416252723Sdim // alignment boundary. 1417252723Sdim unsigned Align = getStackAlignment(); 1418252723Sdim Amount = (Amount+Align-1)/Align*Align; 1419252723Sdim 1420252723Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1421252723Sdim assert(!AFI->isThumb1OnlyFunction() && 1422252723Sdim "This eliminateCallFramePseudoInstr does not support Thumb1!"); 1423252723Sdim bool isARM = !AFI->isThumbFunction(); 1424252723Sdim 1425252723Sdim // Replace the pseudo instruction with a new instruction... 1426252723Sdim unsigned Opc = Old->getOpcode(); 1427252723Sdim int PIdx = Old->findFirstPredOperandIdx(); 1428252723Sdim ARMCC::CondCodes Pred = (PIdx == -1) 1429252723Sdim ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm(); 1430252723Sdim if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 1431252723Sdim // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. 1432252723Sdim unsigned PredReg = Old->getOperand(2).getReg(); 1433252723Sdim emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags, 1434252723Sdim Pred, PredReg); 1435252723Sdim } else { 1436252723Sdim // Note: PredReg is operand 3 for ADJCALLSTACKUP. 1437252723Sdim unsigned PredReg = Old->getOperand(3).getReg(); 1438252723Sdim assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 1439252723Sdim emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags, 1440252723Sdim Pred, PredReg); 1441252723Sdim } 1442252723Sdim } 1443252723Sdim } 1444252723Sdim MBB.erase(I); 1445252723Sdim} 1446252723Sdim 1447