1327952Sdim//===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// 2218885Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6218885Sdim// 7218885Sdim//===----------------------------------------------------------------------===// 8218885Sdim// 9218885Sdim// This file contains the Thumb1 implementation of TargetFrameLowering class. 10218885Sdim// 11218885Sdim//===----------------------------------------------------------------------===// 12218885Sdim 13218885Sdim#include "Thumb1FrameLowering.h" 14321369Sdim#include "ARMBaseInstrInfo.h" 15321369Sdim#include "ARMBaseRegisterInfo.h" 16218885Sdim#include "ARMMachineFunctionInfo.h" 17321369Sdim#include "ARMSubtarget.h" 18321369Sdim#include "Thumb1InstrInfo.h" 19321369Sdim#include "ThumbRegisterInfo.h" 20327952Sdim#include "Utils/ARMBaseInfo.h" 21321369Sdim#include "llvm/ADT/BitVector.h" 22321369Sdim#include "llvm/ADT/STLExtras.h" 23321369Sdim#include "llvm/ADT/SmallVector.h" 24296417Sdim#include "llvm/CodeGen/LivePhysRegs.h" 25321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 26218885Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 27218885Sdim#include "llvm/CodeGen/MachineFunction.h" 28321369Sdim#include "llvm/CodeGen/MachineInstr.h" 29218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 30276479Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 31321369Sdim#include "llvm/CodeGen/MachineOperand.h" 32218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 33327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 34327952Sdim#include "llvm/CodeGen/TargetOpcodes.h" 35327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 36321369Sdim#include "llvm/IR/DebugLoc.h" 37327952Sdim#include "llvm/MC/MCContext.h" 38321369Sdim#include "llvm/MC/MCDwarf.h" 39327952Sdim#include "llvm/MC/MCRegisterInfo.h" 40321369Sdim#include "llvm/Support/Compiler.h" 41321369Sdim#include "llvm/Support/ErrorHandling.h" 42327952Sdim#include "llvm/Support/MathExtras.h" 43327952Sdim#include <bitset> 44321369Sdim#include <cassert> 45321369Sdim#include <iterator> 46321369Sdim#include <vector> 47218885Sdim 48218885Sdimusing namespace llvm; 49218885Sdim 50276479SdimThumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) 51276479Sdim : ARMFrameLowering(sti) {} 52276479Sdim 53226633Sdimbool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ 54314564Sdim const MachineFrameInfo &MFI = MF.getFrameInfo(); 55314564Sdim unsigned CFSize = MFI.getMaxCallFrameSize(); 56218885Sdim // It's not always a good idea to include the call frame as part of the 57218885Sdim // stack frame. ARM (especially Thumb) has small immediate offset to 58218885Sdim // address the stack frame. So a large call frame can cause poor codegen 59218885Sdim // and may even makes it impossible to scavenge a register. 60218885Sdim if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 61218885Sdim return false; 62218885Sdim 63314564Sdim return !MFI.hasVarSizedObjects(); 64218885Sdim} 65218885Sdim 66353358Sdimstatic void 67353358SdimemitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 68353358Sdim MachineBasicBlock::iterator &MBBI, 69353358Sdim const TargetInstrInfo &TII, const DebugLoc &dl, 70353358Sdim const ThumbRegisterInfo &MRI, int NumBytes, 71353358Sdim unsigned ScratchReg, unsigned MIFlags) { 72353358Sdim // If it would take more than three instructions to adjust the stack pointer 73353358Sdim // using tADDspi/tSUBspi, load an immediate instead. 74353358Sdim if (std::abs(NumBytes) > 508 * 3) { 75353358Sdim // We use a different codepath here from the normal 76353358Sdim // emitThumbRegPlusImmediate so we don't have to deal with register 77353358Sdim // scavenging. (Scavenging could try to use the emergency spill slot 78353358Sdim // before we've actually finished setting up the stack.) 79353358Sdim if (ScratchReg == ARM::NoRegister) 80353358Sdim report_fatal_error("Failed to emit Thumb1 stack adjustment"); 81353358Sdim MachineFunction &MF = *MBB.getParent(); 82353358Sdim const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); 83353358Sdim if (ST.genExecuteOnly()) { 84353358Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg) 85353358Sdim .addImm(NumBytes).setMIFlags(MIFlags); 86353358Sdim } else { 87353358Sdim MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 88353358Sdim 0, MIFlags); 89353358Sdim } 90353358Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) 91353358Sdim .addReg(ARM::SP).addReg(ScratchReg, RegState::Kill) 92353358Sdim .add(predOps(ARMCC::AL)); 93353358Sdim return; 94353358Sdim } 95353358Sdim // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate 96353358Sdim // won't change. 97221345Sdim emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 98221345Sdim MRI, MIFlags); 99353358Sdim 100218885Sdim} 101218885Sdim 102353358Sdimstatic void emitCallSPUpdate(MachineBasicBlock &MBB, 103353358Sdim MachineBasicBlock::iterator &MBBI, 104353358Sdim const TargetInstrInfo &TII, const DebugLoc &dl, 105353358Sdim const ThumbRegisterInfo &MRI, int NumBytes, 106353358Sdim unsigned MIFlags = MachineInstr::NoFlags) { 107353358Sdim emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 108353358Sdim MRI, MIFlags); 109353358Sdim} 110353358Sdim 111353358Sdim 112309124SdimMachineBasicBlock::iterator Thumb1FrameLowering:: 113249423SdimeliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 114249423Sdim MachineBasicBlock::iterator I) const { 115249423Sdim const Thumb1InstrInfo &TII = 116288943Sdim *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 117288943Sdim const ThumbRegisterInfo *RegInfo = 118288943Sdim static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 119249423Sdim if (!hasReservedCallFrame(MF)) { 120249423Sdim // If we have alloca, convert as follows: 121249423Sdim // ADJCALLSTACKDOWN -> sub, sp, sp, amount 122249423Sdim // ADJCALLSTACKUP -> add, sp, sp, amount 123309124Sdim MachineInstr &Old = *I; 124309124Sdim DebugLoc dl = Old.getDebugLoc(); 125321369Sdim unsigned Amount = TII.getFrameSize(Old); 126249423Sdim if (Amount != 0) { 127249423Sdim // We need to keep the stack aligned properly. To do this, we round the 128249423Sdim // amount of space needed for the outgoing arguments up to the next 129249423Sdim // alignment boundary. 130321369Sdim Amount = alignTo(Amount, getStackAlignment()); 131249423Sdim 132249423Sdim // Replace the pseudo instruction with a new instruction... 133309124Sdim unsigned Opc = Old.getOpcode(); 134249423Sdim if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 135353358Sdim emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); 136249423Sdim } else { 137249423Sdim assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 138353358Sdim emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); 139249423Sdim } 140249423Sdim } 141249423Sdim } 142309124Sdim return MBB.erase(I); 143249423Sdim} 144249423Sdim 145288943Sdimvoid Thumb1FrameLowering::emitPrologue(MachineFunction &MF, 146288943Sdim MachineBasicBlock &MBB) const { 147218885Sdim MachineBasicBlock::iterator MBBI = MBB.begin(); 148314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 149218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 150276479Sdim MachineModuleInfo &MMI = MF.getMMI(); 151276479Sdim const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 152288943Sdim const ThumbRegisterInfo *RegInfo = 153288943Sdim static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 154218885Sdim const Thumb1InstrInfo &TII = 155288943Sdim *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 156218885Sdim 157288943Sdim unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 158314564Sdim unsigned NumBytes = MFI.getStackSize(); 159276479Sdim assert(NumBytes >= ArgRegsSaveSize && 160276479Sdim "ArgRegsSaveSize is included in NumBytes"); 161314564Sdim const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 162296417Sdim 163296417Sdim // Debug location must be unknown since the first debug location is used 164296417Sdim // to determine the end of the prologue. 165296417Sdim DebugLoc dl; 166341825Sdim 167360784Sdim Register FramePtr = RegInfo->getFrameRegister(MF); 168218885Sdim unsigned BasePtr = RegInfo->getBaseRegister(); 169276479Sdim int CFAOffset = 0; 170218885Sdim 171218885Sdim // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. 172218885Sdim NumBytes = (NumBytes + 3) & ~3; 173314564Sdim MFI.setStackSize(NumBytes); 174218885Sdim 175218885Sdim // Determine the sizes of each callee-save spill areas and record which frame 176218885Sdim // belongs to which callee-save spill areas. 177218885Sdim unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 178218885Sdim int FramePtrSpillFI = 0; 179218885Sdim 180276479Sdim if (ArgRegsSaveSize) { 181353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, 182353358Sdim ARM::NoRegister, MachineInstr::FrameSetup); 183276479Sdim CFAOffset -= ArgRegsSaveSize; 184314564Sdim unsigned CFIIndex = MF.addFrameInst( 185276479Sdim MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); 186276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 187280031Sdim .addCFIIndex(CFIIndex) 188280031Sdim .setMIFlags(MachineInstr::FrameSetup); 189276479Sdim } 190218885Sdim 191218885Sdim if (!AFI->hasStackFrame()) { 192276479Sdim if (NumBytes - ArgRegsSaveSize != 0) { 193353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 194353358Sdim -(NumBytes - ArgRegsSaveSize), 195353358Sdim ARM::NoRegister, MachineInstr::FrameSetup); 196276479Sdim CFAOffset -= NumBytes - ArgRegsSaveSize; 197314564Sdim unsigned CFIIndex = MF.addFrameInst( 198276479Sdim MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); 199276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 200280031Sdim .addCFIIndex(CFIIndex) 201280031Sdim .setMIFlags(MachineInstr::FrameSetup); 202276479Sdim } 203218885Sdim return; 204218885Sdim } 205218885Sdim 206218885Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 207218885Sdim unsigned Reg = CSI[i].getReg(); 208218885Sdim int FI = CSI[i].getFrameIdx(); 209218885Sdim switch (Reg) { 210276479Sdim case ARM::R8: 211276479Sdim case ARM::R9: 212276479Sdim case ARM::R10: 213276479Sdim case ARM::R11: 214314564Sdim if (STI.splitFramePushPop(MF)) { 215276479Sdim GPRCS2Size += 4; 216276479Sdim break; 217276479Sdim } 218314564Sdim LLVM_FALLTHROUGH; 219218885Sdim case ARM::R4: 220218885Sdim case ARM::R5: 221218885Sdim case ARM::R6: 222218885Sdim case ARM::R7: 223218885Sdim case ARM::LR: 224218885Sdim if (Reg == FramePtr) 225218885Sdim FramePtrSpillFI = FI; 226218885Sdim GPRCS1Size += 4; 227218885Sdim break; 228218885Sdim default: 229218885Sdim DPRCSSize += 8; 230218885Sdim } 231218885Sdim } 232218885Sdim 233218885Sdim if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 234218885Sdim ++MBBI; 235218885Sdim } 236218885Sdim 237218885Sdim // Determine starting offsets of spill areas. 238276479Sdim unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - (GPRCS1Size + GPRCS2Size + DPRCSSize); 239218885Sdim unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 240218885Sdim unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 241249423Sdim bool HasFP = hasFP(MF); 242249423Sdim if (HasFP) 243314564Sdim AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 244249423Sdim NumBytes); 245218885Sdim AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 246218885Sdim AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 247218885Sdim AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 248218885Sdim NumBytes = DPRCSOffset; 249218885Sdim 250261991Sdim int FramePtrOffsetInBlock = 0; 251276479Sdim unsigned adjustedGPRCS1Size = GPRCS1Size; 252314564Sdim if (GPRCS1Size > 0 && GPRCS2Size == 0 && 253314564Sdim tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) { 254261991Sdim FramePtrOffsetInBlock = NumBytes; 255276479Sdim adjustedGPRCS1Size += NumBytes; 256261991Sdim NumBytes = 0; 257261991Sdim } 258261991Sdim 259276479Sdim if (adjustedGPRCS1Size) { 260276479Sdim CFAOffset -= adjustedGPRCS1Size; 261314564Sdim unsigned CFIIndex = MF.addFrameInst( 262276479Sdim MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); 263276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 264280031Sdim .addCFIIndex(CFIIndex) 265280031Sdim .setMIFlags(MachineInstr::FrameSetup); 266276479Sdim } 267276479Sdim for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), 268276479Sdim E = CSI.end(); I != E; ++I) { 269276479Sdim unsigned Reg = I->getReg(); 270276479Sdim int FI = I->getFrameIdx(); 271276479Sdim switch (Reg) { 272276479Sdim case ARM::R8: 273276479Sdim case ARM::R9: 274276479Sdim case ARM::R10: 275276479Sdim case ARM::R11: 276276479Sdim case ARM::R12: 277314564Sdim if (STI.splitFramePushPop(MF)) 278276479Sdim break; 279321369Sdim LLVM_FALLTHROUGH; 280276479Sdim case ARM::R0: 281276479Sdim case ARM::R1: 282276479Sdim case ARM::R2: 283276479Sdim case ARM::R3: 284276479Sdim case ARM::R4: 285276479Sdim case ARM::R5: 286276479Sdim case ARM::R6: 287276479Sdim case ARM::R7: 288276479Sdim case ARM::LR: 289314564Sdim unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 290314564Sdim nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 291276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 292280031Sdim .addCFIIndex(CFIIndex) 293280031Sdim .setMIFlags(MachineInstr::FrameSetup); 294276479Sdim break; 295276479Sdim } 296276479Sdim } 297276479Sdim 298218885Sdim // Adjust FP so it point to the stack slot that contains the previous FP. 299249423Sdim if (HasFP) { 300296417Sdim FramePtrOffsetInBlock += 301314564Sdim MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; 302321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) 303321369Sdim .addReg(ARM::SP) 304321369Sdim .addImm(FramePtrOffsetInBlock / 4) 305321369Sdim .setMIFlags(MachineInstr::FrameSetup) 306321369Sdim .add(predOps(ARMCC::AL)); 307276479Sdim if(FramePtrOffsetInBlock) { 308276479Sdim CFAOffset += FramePtrOffsetInBlock; 309314564Sdim unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( 310276479Sdim nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); 311276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 312280031Sdim .addCFIIndex(CFIIndex) 313280031Sdim .setMIFlags(MachineInstr::FrameSetup); 314276479Sdim } else { 315276479Sdim unsigned CFIIndex = 316314564Sdim MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( 317276479Sdim nullptr, MRI->getDwarfRegNum(FramePtr, true))); 318276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 319280031Sdim .addCFIIndex(CFIIndex) 320280031Sdim .setMIFlags(MachineInstr::FrameSetup); 321276479Sdim } 322224145Sdim if (NumBytes > 508) 323224145Sdim // If offset is > 508 then sp cannot be adjusted in a single instruction, 324218885Sdim // try restoring from fp instead. 325218885Sdim AFI->setShouldRestoreSPFromFP(true); 326218885Sdim } 327218885Sdim 328314564Sdim // Skip past the spilling of r8-r11, which could consist of multiple tPUSH 329314564Sdim // and tMOVr instructions. We don't need to add any call frame information 330314564Sdim // in-between these instructions, because they do not modify the high 331314564Sdim // registers. 332314564Sdim while (true) { 333314564Sdim MachineBasicBlock::iterator OldMBBI = MBBI; 334314564Sdim // Skip a run of tMOVr instructions 335314564Sdim while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr) 336314564Sdim MBBI++; 337314564Sdim if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 338314564Sdim MBBI++; 339314564Sdim } else { 340314564Sdim // We have reached an instruction which is not a push, so the previous 341314564Sdim // run of tMOVr instructions (which may have been empty) was not part of 342314564Sdim // the prologue. Reset MBBI back to the last PUSH of the prologue. 343314564Sdim MBBI = OldMBBI; 344314564Sdim break; 345314564Sdim } 346314564Sdim } 347314564Sdim 348314564Sdim // Emit call frame information for the callee-saved high registers. 349314564Sdim for (auto &I : CSI) { 350314564Sdim unsigned Reg = I.getReg(); 351314564Sdim int FI = I.getFrameIdx(); 352314564Sdim switch (Reg) { 353314564Sdim case ARM::R8: 354314564Sdim case ARM::R9: 355314564Sdim case ARM::R10: 356314564Sdim case ARM::R11: 357314564Sdim case ARM::R12: { 358314564Sdim unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 359314564Sdim nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 360314564Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 361314564Sdim .addCFIIndex(CFIIndex) 362314564Sdim .setMIFlags(MachineInstr::FrameSetup); 363314564Sdim break; 364314564Sdim } 365314564Sdim default: 366314564Sdim break; 367314564Sdim } 368314564Sdim } 369314564Sdim 370276479Sdim if (NumBytes) { 371218885Sdim // Insert it after all the callee-save spills. 372353358Sdim // 373353358Sdim // For a large stack frame, we might need a scratch register to store 374353358Sdim // the size of the frame. We know all callee-save registers are free 375353358Sdim // at this point in the prologue, so pick one. 376353358Sdim unsigned ScratchRegister = ARM::NoRegister; 377353358Sdim for (auto &I : CSI) { 378353358Sdim unsigned Reg = I.getReg(); 379353358Sdim if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 380353358Sdim ScratchRegister = Reg; 381353358Sdim break; 382353358Sdim } 383353358Sdim } 384353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, 385353358Sdim ScratchRegister, MachineInstr::FrameSetup); 386276479Sdim if (!HasFP) { 387276479Sdim CFAOffset -= NumBytes; 388314564Sdim unsigned CFIIndex = MF.addFrameInst( 389276479Sdim MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); 390276479Sdim BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 391280031Sdim .addCFIIndex(CFIIndex) 392280031Sdim .setMIFlags(MachineInstr::FrameSetup); 393276479Sdim } 394276479Sdim } 395218885Sdim 396249423Sdim if (STI.isTargetELF() && HasFP) 397314564Sdim MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 398314564Sdim AFI->getFramePtrSpillOffset()); 399218885Sdim 400218885Sdim AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 401218885Sdim AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 402218885Sdim AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 403218885Sdim 404327952Sdim if (RegInfo->needsStackRealignment(MF)) { 405327952Sdim const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); 406327952Sdim // Emit the following sequence, using R4 as a temporary, since we cannot use 407327952Sdim // SP as a source or destination register for the shifts: 408327952Sdim // mov r4, sp 409327952Sdim // lsrs r4, r4, #NrBitsToZero 410327952Sdim // lsls r4, r4, #NrBitsToZero 411327952Sdim // mov sp, r4 412327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 413327952Sdim .addReg(ARM::SP, RegState::Kill) 414327952Sdim .add(predOps(ARMCC::AL)); 415226633Sdim 416327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) 417327952Sdim .addDef(ARM::CPSR) 418327952Sdim .addReg(ARM::R4, RegState::Kill) 419327952Sdim .addImm(NrBitsToZero) 420327952Sdim .add(predOps(ARMCC::AL)); 421327952Sdim 422327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) 423327952Sdim .addDef(ARM::CPSR) 424327952Sdim .addReg(ARM::R4, RegState::Kill) 425327952Sdim .addImm(NrBitsToZero) 426327952Sdim .add(predOps(ARMCC::AL)); 427327952Sdim 428327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 429327952Sdim .addReg(ARM::R4, RegState::Kill) 430327952Sdim .add(predOps(ARMCC::AL)); 431327952Sdim 432327952Sdim AFI->setShouldRestoreSPFromFP(true); 433327952Sdim } 434327952Sdim 435218885Sdim // If we need a base pointer, set it up here. It's whatever the value 436218885Sdim // of the stack pointer is at this point. Any variable size objects 437218885Sdim // will be allocated after this, so we can still use the base pointer 438218885Sdim // to reference locals. 439218885Sdim if (RegInfo->hasBasePointer(MF)) 440321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) 441321369Sdim .addReg(ARM::SP) 442321369Sdim .add(predOps(ARMCC::AL)); 443221345Sdim 444218885Sdim // If the frame has variable sized objects then the epilogue must restore 445218885Sdim // the sp from fp. We can assume there's an FP here since hasFP already 446218885Sdim // checks for hasVarSizedObjects. 447314564Sdim if (MFI.hasVarSizedObjects()) 448218885Sdim AFI->setShouldRestoreSPFromFP(true); 449321369Sdim 450321369Sdim // In some cases, virtual registers have been introduced, e.g. by uses of 451321369Sdim // emitThumbRegPlusImmInReg. 452321369Sdim MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); 453218885Sdim} 454218885Sdim 455309124Sdimstatic bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) { 456309124Sdim if (MI.getOpcode() == ARM::tLDRspi && MI.getOperand(1).isFI() && 457309124Sdim isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs)) 458218885Sdim return true; 459309124Sdim else if (MI.getOpcode() == ARM::tPOP) { 460218885Sdim return true; 461314564Sdim } else if (MI.getOpcode() == ARM::tMOVr) { 462360784Sdim Register Dst = MI.getOperand(0).getReg(); 463360784Sdim Register Src = MI.getOperand(1).getReg(); 464314564Sdim return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) && 465314564Sdim ARM::hGPRRegClass.contains(Dst)); 466218885Sdim } 467218885Sdim return false; 468218885Sdim} 469218885Sdim 470218885Sdimvoid Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, 471218885Sdim MachineBasicBlock &MBB) const { 472296417Sdim MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 473296417Sdim DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 474314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 475218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 476288943Sdim const ThumbRegisterInfo *RegInfo = 477288943Sdim static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 478218885Sdim const Thumb1InstrInfo &TII = 479288943Sdim *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 480218885Sdim 481288943Sdim unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 482314564Sdim int NumBytes = (int)MFI.getStackSize(); 483276479Sdim assert((unsigned)NumBytes >= ArgRegsSaveSize && 484276479Sdim "ArgRegsSaveSize is included in NumBytes"); 485288943Sdim const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 486360784Sdim Register FramePtr = RegInfo->getFrameRegister(MF); 487218885Sdim 488218885Sdim if (!AFI->hasStackFrame()) { 489276479Sdim if (NumBytes - ArgRegsSaveSize != 0) 490353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 491353358Sdim NumBytes - ArgRegsSaveSize, ARM::NoRegister, 492353358Sdim MachineInstr::NoFlags); 493218885Sdim } else { 494218885Sdim // Unwind MBBI to point to first LDR / VLDRD. 495218885Sdim if (MBBI != MBB.begin()) { 496218885Sdim do 497218885Sdim --MBBI; 498309124Sdim while (MBBI != MBB.begin() && isCSRestore(*MBBI, CSRegs)); 499309124Sdim if (!isCSRestore(*MBBI, CSRegs)) 500218885Sdim ++MBBI; 501218885Sdim } 502218885Sdim 503218885Sdim // Move SP to start of FP callee save spill area. 504218885Sdim NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + 505218885Sdim AFI->getGPRCalleeSavedArea2Size() + 506276479Sdim AFI->getDPRCalleeSavedAreaSize() + 507276479Sdim ArgRegsSaveSize); 508218885Sdim 509218885Sdim if (AFI->shouldRestoreSPFromFP()) { 510218885Sdim NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 511218885Sdim // Reset SP based on frame pointer only if the stack frame extends beyond 512218885Sdim // frame pointer stack slot, the target is ELF and the function has FP, or 513218885Sdim // the target uses var sized objects. 514218885Sdim if (NumBytes) { 515314564Sdim assert(!MFI.getPristineRegs(MF).test(ARM::R4) && 516218885Sdim "No scratch register to restore SP from FP!"); 517221345Sdim emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 518221345Sdim TII, *RegInfo); 519321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 520321369Sdim .addReg(ARM::R4) 521321369Sdim .add(predOps(ARMCC::AL)); 522218885Sdim } else 523321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 524321369Sdim .addReg(FramePtr) 525321369Sdim .add(predOps(ARMCC::AL)); 526218885Sdim } else { 527353358Sdim // For a large stack frame, we might need a scratch register to store 528353358Sdim // the size of the frame. We know all callee-save registers are free 529353358Sdim // at this point in the epilogue, so pick one. 530353358Sdim unsigned ScratchRegister = ARM::NoRegister; 531353358Sdim bool HasFP = hasFP(MF); 532353358Sdim for (auto &I : MFI.getCalleeSavedInfo()) { 533353358Sdim unsigned Reg = I.getReg(); 534353358Sdim if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 535353358Sdim ScratchRegister = Reg; 536353358Sdim break; 537353358Sdim } 538353358Sdim } 539296417Sdim if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && 540309124Sdim &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { 541276479Sdim MachineBasicBlock::iterator PMBBI = std::prev(MBBI); 542309124Sdim if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) 543353358Sdim emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, 544353358Sdim ScratchRegister, MachineInstr::NoFlags); 545309124Sdim } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 546353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, 547353358Sdim ScratchRegister, MachineInstr::NoFlags); 548218885Sdim } 549218885Sdim } 550218885Sdim 551296417Sdim if (needPopSpecialFixUp(MF)) { 552296417Sdim bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); 553296417Sdim (void)Done; 554296417Sdim assert(Done && "Emission of the special fixup failed!?"); 555296417Sdim } 556296417Sdim} 557296417Sdim 558296417Sdimbool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 559296417Sdim if (!needPopSpecialFixUp(*MBB.getParent())) 560296417Sdim return true; 561296417Sdim 562296417Sdim MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 563296417Sdim return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); 564296417Sdim} 565296417Sdim 566296417Sdimbool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { 567296417Sdim ARMFunctionInfo *AFI = 568296417Sdim const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); 569296417Sdim if (AFI->getArgRegsSaveSize()) 570296417Sdim return true; 571296417Sdim 572296417Sdim // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. 573314564Sdim for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) 574280031Sdim if (CSI.getReg() == ARM::LR) 575296417Sdim return true; 576218885Sdim 577296417Sdim return false; 578296417Sdim} 579218885Sdim 580327952Sdimstatic void findTemporariesForLR(const BitVector &GPRsNoLRSP, 581327952Sdim const BitVector &PopFriendly, 582327952Sdim const LivePhysRegs &UsedRegs, unsigned &PopReg, 583327952Sdim unsigned &TmpReg) { 584327952Sdim PopReg = TmpReg = 0; 585327952Sdim for (auto Reg : GPRsNoLRSP.set_bits()) { 586327952Sdim if (!UsedRegs.contains(Reg)) { 587327952Sdim // Remember the first pop-friendly register and exit. 588327952Sdim if (PopFriendly.test(Reg)) { 589327952Sdim PopReg = Reg; 590327952Sdim TmpReg = 0; 591327952Sdim break; 592327952Sdim } 593327952Sdim // Otherwise, remember that the register will be available to 594327952Sdim // save a pop-friendly register. 595327952Sdim TmpReg = Reg; 596327952Sdim } 597327952Sdim } 598327952Sdim} 599327952Sdim 600296417Sdimbool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, 601296417Sdim bool DoIt) const { 602296417Sdim MachineFunction &MF = *MBB.getParent(); 603296417Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 604296417Sdim unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 605296417Sdim const TargetInstrInfo &TII = *STI.getInstrInfo(); 606296417Sdim const ThumbRegisterInfo *RegInfo = 607296417Sdim static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 608218885Sdim 609296417Sdim // If MBBI is a return instruction, or is a tPOP followed by a return 610296417Sdim // instruction in the successor BB, we may be able to directly restore 611296417Sdim // LR in the PC. 612296417Sdim // This is only possible with v5T ops (v4T can't change the Thumb bit via 613296417Sdim // a POP PC instruction), and only if we do not need to emit any SP update. 614296417Sdim // Otherwise, we need a temporary register to pop the value 615296417Sdim // and copy that value into LR. 616296417Sdim auto MBBI = MBB.getFirstTerminator(); 617296417Sdim bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; 618296417Sdim if (CanRestoreDirectly) { 619296417Sdim if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) 620296417Sdim CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || 621296417Sdim MBBI->getOpcode() == ARM::tPOP_RET); 622296417Sdim else { 623296417Sdim auto MBBI_prev = MBBI; 624296417Sdim MBBI_prev--; 625296417Sdim assert(MBBI_prev->getOpcode() == ARM::tPOP); 626296417Sdim assert(MBB.succ_size() == 1); 627296417Sdim if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) 628296417Sdim MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. 629296417Sdim else 630296417Sdim CanRestoreDirectly = false; 631296417Sdim } 632296417Sdim } 633280031Sdim 634296417Sdim if (CanRestoreDirectly) { 635296417Sdim if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) 636296417Sdim return true; 637296417Sdim MachineInstrBuilder MIB = 638321369Sdim BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) 639321369Sdim .add(predOps(ARMCC::AL)); 640296417Sdim // Copy implicit ops and popped registers, if any. 641296417Sdim for (auto MO: MBBI->operands()) 642296417Sdim if (MO.isReg() && (MO.isImplicit() || MO.isDef())) 643321369Sdim MIB.add(MO); 644296417Sdim MIB.addReg(ARM::PC, RegState::Define); 645296417Sdim // Erase the old instruction (tBX_RET or tPOP). 646296417Sdim MBB.erase(MBBI); 647296417Sdim return true; 648296417Sdim } 649280031Sdim 650296417Sdim // Look for a temporary register to use. 651296417Sdim // First, compute the liveness information. 652321369Sdim const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 653321369Sdim LivePhysRegs UsedRegs(TRI); 654309124Sdim UsedRegs.addLiveOuts(MBB); 655296417Sdim // The semantic of pristines changed recently and now, 656296417Sdim // the callee-saved registers that are touched in the function 657296417Sdim // are not part of the pristines set anymore. 658296417Sdim // Add those callee-saved now. 659321369Sdim const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 660296417Sdim for (unsigned i = 0; CSRegs[i]; ++i) 661296417Sdim UsedRegs.addReg(CSRegs[i]); 662280031Sdim 663296417Sdim DebugLoc dl = DebugLoc(); 664296417Sdim if (MBBI != MBB.end()) { 665296417Sdim dl = MBBI->getDebugLoc(); 666296417Sdim auto InstUpToMBBI = MBB.end(); 667296417Sdim while (InstUpToMBBI != MBBI) 668296417Sdim // The pre-decrement is on purpose here. 669296417Sdim // We want to have the liveness right before MBBI. 670296417Sdim UsedRegs.stepBackward(*--InstUpToMBBI); 671296417Sdim } 672280031Sdim 673296417Sdim // Look for a register that can be directly use in the POP. 674296417Sdim unsigned PopReg = 0; 675296417Sdim // And some temporary register, just in case. 676296417Sdim unsigned TemporaryReg = 0; 677296417Sdim BitVector PopFriendly = 678321369Sdim TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); 679341825Sdim // R7 may be used as a frame pointer, hence marked as not generally 680341825Sdim // allocatable, however there's no reason to not use it as a temporary for 681341825Sdim // restoring LR. 682341825Sdim if (STI.useR7AsFramePointer()) 683341825Sdim PopFriendly.set(ARM::R7); 684341825Sdim 685296417Sdim assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); 686296417Sdim // Rebuild the GPRs from the high registers because they are removed 687296417Sdim // form the GPR reg class for thumb1. 688296417Sdim BitVector GPRsNoLRSP = 689321369Sdim TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); 690296417Sdim GPRsNoLRSP |= PopFriendly; 691296417Sdim GPRsNoLRSP.reset(ARM::LR); 692296417Sdim GPRsNoLRSP.reset(ARM::SP); 693296417Sdim GPRsNoLRSP.reset(ARM::PC); 694327952Sdim findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); 695327952Sdim 696341825Sdim // If we couldn't find a pop-friendly register, try restoring LR before 697341825Sdim // popping the other callee-saved registers, so we could use one of them as a 698341825Sdim // temporary. 699327952Sdim bool UseLDRSP = false; 700327952Sdim if (!PopReg && MBBI != MBB.begin()) { 701327952Sdim auto PrevMBBI = MBBI; 702327952Sdim PrevMBBI--; 703327952Sdim if (PrevMBBI->getOpcode() == ARM::tPOP) { 704341825Sdim UsedRegs.stepBackward(*PrevMBBI); 705327952Sdim findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); 706341825Sdim if (PopReg) { 707341825Sdim MBBI = PrevMBBI; 708341825Sdim UseLDRSP = true; 709341825Sdim } 710280031Sdim } 711218885Sdim } 712296417Sdim 713296417Sdim if (!DoIt && !PopReg && !TemporaryReg) 714296417Sdim return false; 715296417Sdim 716296417Sdim assert((PopReg || TemporaryReg) && "Cannot get LR"); 717296417Sdim 718327952Sdim if (UseLDRSP) { 719327952Sdim assert(PopReg && "Do not know how to get LR"); 720327952Sdim // Load the LR via LDR tmp, [SP, #off] 721327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) 722327952Sdim .addReg(PopReg, RegState::Define) 723327952Sdim .addReg(ARM::SP) 724327952Sdim .addImm(MBBI->getNumExplicitOperands() - 2) 725327952Sdim .add(predOps(ARMCC::AL)); 726327952Sdim // Move from the temporary register to the LR. 727327952Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 728327952Sdim .addReg(ARM::LR, RegState::Define) 729327952Sdim .addReg(PopReg, RegState::Kill) 730327952Sdim .add(predOps(ARMCC::AL)); 731327952Sdim // Advance past the pop instruction. 732327952Sdim MBBI++; 733327952Sdim // Increment the SP. 734353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 735353358Sdim ArgRegsSaveSize + 4, ARM::NoRegister, 736353358Sdim MachineInstr::NoFlags); 737327952Sdim return true; 738327952Sdim } 739327952Sdim 740296417Sdim if (TemporaryReg) { 741296417Sdim assert(!PopReg && "Unnecessary MOV is about to be inserted"); 742296417Sdim PopReg = PopFriendly.find_first(); 743321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 744321369Sdim .addReg(TemporaryReg, RegState::Define) 745321369Sdim .addReg(PopReg, RegState::Kill) 746321369Sdim .add(predOps(ARMCC::AL)); 747296417Sdim } 748296417Sdim 749296417Sdim if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { 750296417Sdim // We couldn't use the direct restoration above, so 751296417Sdim // perform the opposite conversion: tPOP_RET to tPOP. 752296417Sdim MachineInstrBuilder MIB = 753321369Sdim BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) 754321369Sdim .add(predOps(ARMCC::AL)); 755296417Sdim bool Popped = false; 756296417Sdim for (auto MO: MBBI->operands()) 757296417Sdim if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && 758296417Sdim MO.getReg() != ARM::PC) { 759321369Sdim MIB.add(MO); 760296417Sdim if (!MO.isImplicit()) 761296417Sdim Popped = true; 762296417Sdim } 763296417Sdim // Is there anything left to pop? 764296417Sdim if (!Popped) 765296417Sdim MBB.erase(MIB.getInstr()); 766296417Sdim // Erase the old instruction. 767296417Sdim MBB.erase(MBBI); 768321369Sdim MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) 769321369Sdim .add(predOps(ARMCC::AL)); 770296417Sdim } 771296417Sdim 772296417Sdim assert(PopReg && "Do not know how to get LR"); 773321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) 774321369Sdim .add(predOps(ARMCC::AL)) 775296417Sdim .addReg(PopReg, RegState::Define); 776296417Sdim 777353358Sdim emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, 778353358Sdim ARM::NoRegister, MachineInstr::NoFlags); 779296417Sdim 780321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 781321369Sdim .addReg(ARM::LR, RegState::Define) 782321369Sdim .addReg(PopReg, RegState::Kill) 783321369Sdim .add(predOps(ARMCC::AL)); 784296417Sdim 785296417Sdim if (TemporaryReg) 786321369Sdim BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 787321369Sdim .addReg(PopReg, RegState::Define) 788321369Sdim .addReg(TemporaryReg, RegState::Kill) 789321369Sdim .add(predOps(ARMCC::AL)); 790296417Sdim 791296417Sdim return true; 792218885Sdim} 793218885Sdim 794327952Sdimusing ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>; 795327952Sdim 796314564Sdim// Return the first iteraror after CurrentReg which is present in EnabledRegs, 797314564Sdim// or OrderEnd if no further registers are in that set. This does not advance 798314564Sdim// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. 799327952Sdimstatic const unsigned *findNextOrderedReg(const unsigned *CurrentReg, 800327952Sdim const ARMRegSet &EnabledRegs, 801327952Sdim const unsigned *OrderEnd) { 802327952Sdim while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) 803314564Sdim ++CurrentReg; 804314564Sdim return CurrentReg; 805314564Sdim} 806314564Sdim 807218885Sdimbool Thumb1FrameLowering:: 808218885SdimspillCalleeSavedRegisters(MachineBasicBlock &MBB, 809218885Sdim MachineBasicBlock::iterator MI, 810218885Sdim const std::vector<CalleeSavedInfo> &CSI, 811218885Sdim const TargetRegisterInfo *TRI) const { 812218885Sdim if (CSI.empty()) 813218885Sdim return false; 814218885Sdim 815218885Sdim DebugLoc DL; 816288943Sdim const TargetInstrInfo &TII = *STI.getInstrInfo(); 817314564Sdim MachineFunction &MF = *MBB.getParent(); 818314564Sdim const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 819314564Sdim MF.getSubtarget().getRegisterInfo()); 820218885Sdim 821327952Sdim ARMRegSet LoRegsToSave; // r0-r7, lr 822327952Sdim ARMRegSet HiRegsToSave; // r8-r11 823327952Sdim ARMRegSet CopyRegs; // Registers which can be used after pushing 824327952Sdim // LoRegs for saving HiRegs. 825314564Sdim 826218885Sdim for (unsigned i = CSI.size(); i != 0; --i) { 827218885Sdim unsigned Reg = CSI[i-1].getReg(); 828218885Sdim 829314564Sdim if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 830327952Sdim LoRegsToSave[Reg] = true; 831314564Sdim } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 832327952Sdim HiRegsToSave[Reg] = true; 833314564Sdim } else { 834314564Sdim llvm_unreachable("callee-saved register of unexpected class"); 835218885Sdim } 836218885Sdim 837314564Sdim if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && 838314564Sdim !MF.getRegInfo().isLiveIn(Reg) && 839314564Sdim !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) 840327952Sdim CopyRegs[Reg] = true; 841314564Sdim } 842218885Sdim 843314564Sdim // Unused argument registers can be used for the high register saving. 844314564Sdim for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 845314564Sdim if (!MF.getRegInfo().isLiveIn(ArgReg)) 846327952Sdim CopyRegs[ArgReg] = true; 847314564Sdim 848314564Sdim // Push the low registers and lr 849321369Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 850327952Sdim if (!LoRegsToSave.none()) { 851321369Sdim MachineInstrBuilder MIB = 852321369Sdim BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 853314564Sdim for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { 854327952Sdim if (LoRegsToSave[Reg]) { 855321369Sdim bool isKill = !MRI.isLiveIn(Reg); 856321369Sdim if (isKill && !MRI.isReserved(Reg)) 857314564Sdim MBB.addLiveIn(Reg); 858314564Sdim 859314564Sdim MIB.addReg(Reg, getKillRegState(isKill)); 860314564Sdim } 861314564Sdim } 862314564Sdim MIB.setMIFlags(MachineInstr::FrameSetup); 863218885Sdim } 864314564Sdim 865314564Sdim // Push the high registers. There are no store instructions that can access 866314564Sdim // these registers directly, so we have to move them to low registers, and 867314564Sdim // push them. This might take multiple pushes, as it is possible for there to 868314564Sdim // be fewer low registers available than high registers which need saving. 869314564Sdim 870314564Sdim // These are in reverse order so that in the case where we need to use 871314564Sdim // multiple PUSH instructions, the order of the registers on the stack still 872314564Sdim // matches the unwind info. They need to be swicthed back to ascending order 873314564Sdim // before adding to the PUSH instruction. 874314564Sdim static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6, 875314564Sdim ARM::R5, ARM::R4, ARM::R3, 876314564Sdim ARM::R2, ARM::R1, ARM::R0}; 877314564Sdim static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8}; 878314564Sdim 879314564Sdim const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); 880314564Sdim const unsigned *AllHighRegsEnd = std::end(AllHighRegs); 881314564Sdim 882314564Sdim // Find the first register to save. 883314564Sdim const unsigned *HiRegToSave = findNextOrderedReg( 884314564Sdim std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd); 885314564Sdim 886314564Sdim while (HiRegToSave != AllHighRegsEnd) { 887314564Sdim // Find the first low register to use. 888314564Sdim const unsigned *CopyReg = 889314564Sdim findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); 890314564Sdim 891314564Sdim // Create the PUSH, but don't insert it yet (the MOVs need to come first). 892353358Sdim MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) 893353358Sdim .add(predOps(ARMCC::AL)) 894353358Sdim .setMIFlags(MachineInstr::FrameSetup); 895314564Sdim 896314564Sdim SmallVector<unsigned, 4> RegsToPush; 897314564Sdim while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { 898327952Sdim if (HiRegsToSave[*HiRegToSave]) { 899321369Sdim bool isKill = !MRI.isLiveIn(*HiRegToSave); 900321369Sdim if (isKill && !MRI.isReserved(*HiRegToSave)) 901314564Sdim MBB.addLiveIn(*HiRegToSave); 902314564Sdim 903314564Sdim // Emit a MOV from the high reg to the low reg. 904321369Sdim BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 905321369Sdim .addReg(*CopyReg, RegState::Define) 906321369Sdim .addReg(*HiRegToSave, getKillRegState(isKill)) 907353358Sdim .add(predOps(ARMCC::AL)) 908353358Sdim .setMIFlags(MachineInstr::FrameSetup); 909314564Sdim 910314564Sdim // Record the register that must be added to the PUSH. 911314564Sdim RegsToPush.push_back(*CopyReg); 912314564Sdim 913314564Sdim CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); 914314564Sdim HiRegToSave = 915314564Sdim findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd); 916314564Sdim } 917314564Sdim } 918314564Sdim 919314564Sdim // Add the low registers to the PUSH, in ascending order. 920321369Sdim for (unsigned Reg : llvm::reverse(RegsToPush)) 921314564Sdim PushMIB.addReg(Reg, RegState::Kill); 922314564Sdim 923314564Sdim // Insert the PUSH instruction after the MOVs. 924314564Sdim MBB.insert(MI, PushMIB); 925314564Sdim } 926314564Sdim 927218885Sdim return true; 928218885Sdim} 929218885Sdim 930218885Sdimbool Thumb1FrameLowering:: 931218885SdimrestoreCalleeSavedRegisters(MachineBasicBlock &MBB, 932218885Sdim MachineBasicBlock::iterator MI, 933327952Sdim std::vector<CalleeSavedInfo> &CSI, 934218885Sdim const TargetRegisterInfo *TRI) const { 935218885Sdim if (CSI.empty()) 936218885Sdim return false; 937218885Sdim 938218885Sdim MachineFunction &MF = *MBB.getParent(); 939218885Sdim ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 940288943Sdim const TargetInstrInfo &TII = *STI.getInstrInfo(); 941314564Sdim const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 942314564Sdim MF.getSubtarget().getRegisterInfo()); 943218885Sdim 944251662Sdim bool isVarArg = AFI->getArgRegsSaveSize() > 0; 945296417Sdim DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 946314564Sdim 947327952Sdim ARMRegSet LoRegsToRestore; 948327952Sdim ARMRegSet HiRegsToRestore; 949314564Sdim // Low registers (r0-r7) which can be used to restore the high registers. 950327952Sdim ARMRegSet CopyRegs; 951314564Sdim 952314564Sdim for (CalleeSavedInfo I : CSI) { 953314564Sdim unsigned Reg = I.getReg(); 954314564Sdim 955314564Sdim if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 956327952Sdim LoRegsToRestore[Reg] = true; 957314564Sdim } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 958327952Sdim HiRegsToRestore[Reg] = true; 959314564Sdim } else { 960314564Sdim llvm_unreachable("callee-saved register of unexpected class"); 961314564Sdim } 962314564Sdim 963314564Sdim // If this is a low register not used as the frame pointer, we may want to 964314564Sdim // use it for restoring the high registers. 965314564Sdim if ((ARM::tGPRRegClass.contains(Reg)) && 966314564Sdim !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) 967327952Sdim CopyRegs[Reg] = true; 968314564Sdim } 969314564Sdim 970314564Sdim // If this is a return block, we may be able to use some unused return value 971314564Sdim // registers for restoring the high regs. 972314564Sdim auto Terminator = MBB.getFirstTerminator(); 973314564Sdim if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { 974327952Sdim CopyRegs[ARM::R0] = true; 975327952Sdim CopyRegs[ARM::R1] = true; 976327952Sdim CopyRegs[ARM::R2] = true; 977327952Sdim CopyRegs[ARM::R3] = true; 978314564Sdim for (auto Op : Terminator->implicit_operands()) { 979314564Sdim if (Op.isReg()) 980327952Sdim CopyRegs[Op.getReg()] = false; 981314564Sdim } 982314564Sdim } 983314564Sdim 984314564Sdim static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3, 985314564Sdim ARM::R4, ARM::R5, ARM::R6, ARM::R7}; 986314564Sdim static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11}; 987314564Sdim 988314564Sdim const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs); 989314564Sdim const unsigned *AllHighRegsEnd = std::end(AllHighRegs); 990314564Sdim 991314564Sdim // Find the first register to restore. 992314564Sdim auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs), 993314564Sdim HiRegsToRestore, AllHighRegsEnd); 994314564Sdim 995314564Sdim while (HiRegToRestore != AllHighRegsEnd) { 996327952Sdim assert(!CopyRegs.none()); 997314564Sdim // Find the first low register to use. 998314564Sdim auto CopyReg = 999314564Sdim findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); 1000314564Sdim 1001314564Sdim // Create the POP instruction. 1002321369Sdim MachineInstrBuilder PopMIB = 1003321369Sdim BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 1004314564Sdim 1005314564Sdim while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { 1006314564Sdim // Add the low register to the POP. 1007314564Sdim PopMIB.addReg(*CopyReg, RegState::Define); 1008314564Sdim 1009314564Sdim // Create the MOV from low to high register. 1010321369Sdim BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1011321369Sdim .addReg(*HiRegToRestore, RegState::Define) 1012321369Sdim .addReg(*CopyReg, RegState::Kill) 1013321369Sdim .add(predOps(ARMCC::AL)); 1014314564Sdim 1015314564Sdim CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd); 1016314564Sdim HiRegToRestore = 1017314564Sdim findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd); 1018314564Sdim } 1019314564Sdim } 1020314564Sdim 1021321369Sdim MachineInstrBuilder MIB = 1022321369Sdim BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 1023314564Sdim 1024296417Sdim bool NeedsPop = false; 1025218885Sdim for (unsigned i = CSI.size(); i != 0; --i) { 1026327952Sdim CalleeSavedInfo &Info = CSI[i-1]; 1027327952Sdim unsigned Reg = Info.getReg(); 1028314564Sdim 1029314564Sdim // High registers (excluding lr) have already been dealt with 1030314564Sdim if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) 1031314564Sdim continue; 1032314564Sdim 1033218885Sdim if (Reg == ARM::LR) { 1034327952Sdim Info.setRestored(false); 1035327952Sdim if (!MBB.succ_empty() || 1036327952Sdim MI->getOpcode() == ARM::TCRETURNdi || 1037327952Sdim MI->getOpcode() == ARM::TCRETURNri) 1038296417Sdim // LR may only be popped into PC, as part of return sequence. 1039296417Sdim // If this isn't the return sequence, we'll need emitPopSpecialFixUp 1040296417Sdim // to restore LR the hard way. 1041327952Sdim // FIXME: if we don't pass any stack arguments it would be actually 1042327952Sdim // advantageous *and* correct to do the conversion to an ordinary call 1043327952Sdim // instruction here. 1044218885Sdim continue; 1045327952Sdim // Special epilogue for vararg functions. See emitEpilogue 1046327952Sdim if (isVarArg) 1047327952Sdim continue; 1048327952Sdim // ARMv4T requires BX, see emitEpilogue 1049327952Sdim if (!STI.hasV5TOps()) 1050327952Sdim continue; 1051327952Sdim 1052327952Sdim // Pop LR into PC. 1053327952Sdim Reg = ARM::PC; 1054327952Sdim (*MIB).setDesc(TII.get(ARM::tPOP_RET)); 1055327952Sdim if (MI != MBB.end()) 1056327952Sdim MIB.copyImplicitOps(*MI); 1057327952Sdim MI = MBB.erase(MI); 1058218885Sdim } 1059218885Sdim MIB.addReg(Reg, getDefRegState(true)); 1060296417Sdim NeedsPop = true; 1061218885Sdim } 1062218885Sdim 1063218885Sdim // It's illegal to emit pop instruction without operands. 1064296417Sdim if (NeedsPop) 1065218885Sdim MBB.insert(MI, &*MIB); 1066218885Sdim else 1067218885Sdim MF.DeleteMachineInstr(MIB); 1068218885Sdim 1069218885Sdim return true; 1070218885Sdim} 1071