1234285Sdim//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===// 2234285Sdim// 3234285Sdim// The LLVM Compiler Infrastructure 4234285Sdim// 5234285Sdim// This file is distributed under the University of Illinois Open Source 6234285Sdim// License. See LICENSE.TXT for details. 7234285Sdim// 8234285Sdim// 9234285Sdim//===----------------------------------------------------------------------===// 10234285Sdim 11288943Sdim#define DEBUG_TYPE "hexagon-pei" 12288943Sdim 13234285Sdim#include "HexagonFrameLowering.h" 14234285Sdim#include "Hexagon.h" 15234285Sdim#include "HexagonInstrInfo.h" 16249423Sdim#include "HexagonMachineFunctionInfo.h" 17234285Sdim#include "HexagonRegisterInfo.h" 18234285Sdim#include "HexagonSubtarget.h" 19234285Sdim#include "HexagonTargetMachine.h" 20234285Sdim#include "llvm/ADT/BitVector.h" 21288943Sdim#include "llvm/ADT/PostOrderIterator.h" 22234285Sdim#include "llvm/ADT/STLExtras.h" 23288943Sdim#include "llvm/CodeGen/MachineDominators.h" 24288943Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 25234285Sdim#include "llvm/CodeGen/MachineFunction.h" 26234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 27249423Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 28234285Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 29288943Sdim#include "llvm/CodeGen/MachinePostDominators.h" 30234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 31234285Sdim#include "llvm/CodeGen/RegisterScavenging.h" 32249423Sdim#include "llvm/IR/Function.h" 33249423Sdim#include "llvm/IR/Type.h" 34249423Sdim#include "llvm/Support/CommandLine.h" 35288943Sdim#include "llvm/Support/Debug.h" 36288943Sdim#include "llvm/Support/raw_ostream.h" 37234285Sdim#include "llvm/Target/TargetInstrInfo.h" 38234285Sdim#include "llvm/Target/TargetMachine.h" 39234285Sdim#include "llvm/Target/TargetOptions.h" 40234285Sdim 41288943Sdim// Hexagon stack frame layout as defined by the ABI: 42288943Sdim// 43288943Sdim// Incoming arguments 44288943Sdim// passed via stack 45288943Sdim// | 46288943Sdim// | 47288943Sdim// SP during function's FP during function's | 48288943Sdim// +-- runtime (top of stack) runtime (bottom) --+ | 49288943Sdim// | | | 50288943Sdim// --++---------------------+------------------+-----------------++-+------- 51288943Sdim// | parameter area for | variable-size | fixed-size |LR| arg 52288943Sdim// | called functions | local objects | local objects |FP| 53288943Sdim// --+----------------------+------------------+-----------------+--+------- 54288943Sdim// <- size known -> <- size unknown -> <- size known -> 55288943Sdim// 56288943Sdim// Low address High address 57288943Sdim// 58288943Sdim// <--- stack growth 59288943Sdim// 60288943Sdim// 61288943Sdim// - In any circumstances, the outgoing function arguments are always accessi- 62288943Sdim// ble using the SP, and the incoming arguments are accessible using the FP. 63288943Sdim// - If the local objects are not aligned, they can always be accessed using 64288943Sdim// the FP. 65288943Sdim// - If there are no variable-sized objects, the local objects can always be 66288943Sdim// accessed using the SP, regardless whether they are aligned or not. (The 67288943Sdim// alignment padding will be at the bottom of the stack (highest address), 68288943Sdim// and so the offset with respect to the SP will be known at the compile- 69288943Sdim// -time.) 70288943Sdim// 71288943Sdim// The only complication occurs if there are both, local aligned objects, and 72288943Sdim// dynamically allocated (variable-sized) objects. The alignment pad will be 73288943Sdim// placed between the FP and the local objects, thus preventing the use of the 74288943Sdim// FP to access the local objects. At the same time, the variable-sized objects 75288943Sdim// will be between the SP and the local objects, thus introducing an unknown 76288943Sdim// distance from the SP to the locals. 77288943Sdim// 78288943Sdim// To avoid this problem, a new register is created that holds the aligned 79288943Sdim// address of the bottom of the stack, referred in the sources as AP (aligned 80288943Sdim// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad 81288943Sdim// that aligns AP to the required boundary (a maximum of the alignments of 82288943Sdim// all stack objects, fixed- and variable-sized). All local objects[1] will 83288943Sdim// then use AP as the base pointer. 84288943Sdim// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get 85288943Sdim// their name from being allocated at fixed locations on the stack, relative 86288943Sdim// to the FP. In the presence of dynamic allocation and local alignment, such 87288943Sdim// objects can only be accessed through the FP. 88288943Sdim// 89288943Sdim// Illustration of the AP: 90288943Sdim// FP --+ 91288943Sdim// | 92288943Sdim// ---------------+---------------------+-----+-----------------------++-+-- 93288943Sdim// Rest of the | Local stack objects | Pad | Fixed stack objects |LR| 94288943Sdim// stack frame | (aligned) | | (CSR, spills, etc.) |FP| 95288943Sdim// ---------------+---------------------+-----+-----------------+-----+--+-- 96288943Sdim// |<-- Multiple of the -->| 97288943Sdim// stack alignment +-- AP 98288943Sdim// 99288943Sdim// The AP is set up at the beginning of the function. Since it is not a dedi- 100288943Sdim// cated (reserved) register, it needs to be kept live throughout the function 101288943Sdim// to be available as the base register for local object accesses. 102288943Sdim// Normally, an address of a stack objects is obtained by a pseudo-instruction 103288943Sdim// TFR_FI. To access local objects with the AP register present, a different 104288943Sdim// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra 105288943Sdim// argument compared to TFR_FI: the first input register is the AP register. 106288943Sdim// This keeps the register live between its definition and its uses. 107288943Sdim 108288943Sdim// The AP register is originally set up using pseudo-instruction ALIGNA: 109288943Sdim// AP = ALIGNA A 110288943Sdim// where 111288943Sdim// A - required stack alignment 112288943Sdim// The alignment value must be the maximum of all alignments required by 113288943Sdim// any stack object. 114288943Sdim 115288943Sdim// The dynamic allocation uses a pseudo-instruction ALLOCA: 116288943Sdim// Rd = ALLOCA Rs, A 117288943Sdim// where 118288943Sdim// Rd - address of the allocated space 119288943Sdim// Rs - minimum size (the actual allocated can be larger to accommodate 120288943Sdim// alignment) 121288943Sdim// A - required alignment 122288943Sdim 123288943Sdim 124234285Sdimusing namespace llvm; 125234285Sdim 126288943Sdimstatic cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", 127288943Sdim cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); 128234285Sdim 129234285Sdim 130288943Sdimstatic cl::opt<int> NumberScavengerSlots("number-scavenger-slots", 131288943Sdim cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), 132288943Sdim cl::ZeroOrMore); 133234285Sdim 134288943Sdimstatic cl::opt<int> SpillFuncThreshold("spill-func-threshold", 135288943Sdim cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), 136288943Sdim cl::init(6), cl::ZeroOrMore); 137234285Sdim 138288943Sdimstatic cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os", 139288943Sdim cl::Hidden, cl::desc("Specify Os spill func threshold"), 140288943Sdim cl::init(1), cl::ZeroOrMore); 141234285Sdim 142288943Sdimstatic cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", 143288943Sdim cl::init(true), cl::Hidden, cl::ZeroOrMore, 144288943Sdim cl::desc("Enable stack frame shrink wrapping")); 145234285Sdim 146288943Sdimstatic cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX), 147288943Sdim cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame " 148288943Sdim "shrink-wraps")); 149234285Sdim 150296417Sdimstatic cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true), 151296417Sdim cl::Hidden, cl::desc("Use allocframe more conservatively")); 152296417Sdim 153296417Sdim 154296417Sdimnamespace llvm { 155296417Sdim void initializeHexagonCallFrameInformationPass(PassRegistry&); 156296417Sdim FunctionPass *createHexagonCallFrameInformation(); 157296417Sdim} 158296417Sdim 159288943Sdimnamespace { 160296417Sdim class HexagonCallFrameInformation : public MachineFunctionPass { 161296417Sdim public: 162296417Sdim static char ID; 163296417Sdim HexagonCallFrameInformation() : MachineFunctionPass(ID) { 164296417Sdim PassRegistry &PR = *PassRegistry::getPassRegistry(); 165296417Sdim initializeHexagonCallFrameInformationPass(PR); 166296417Sdim } 167296417Sdim bool runOnMachineFunction(MachineFunction &MF) override; 168296417Sdim }; 169296417Sdim 170296417Sdim char HexagonCallFrameInformation::ID = 0; 171296417Sdim} 172296417Sdim 173296417Sdimbool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { 174296417Sdim auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); 175296417Sdim bool NeedCFI = MF.getMMI().hasDebugInfo() || 176296417Sdim MF.getFunction()->needsUnwindTableEntry(); 177296417Sdim 178296417Sdim if (!NeedCFI) 179296417Sdim return false; 180296417Sdim HFI.insertCFIInstructions(MF); 181296417Sdim return true; 182296417Sdim} 183296417Sdim 184296417SdimINITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", 185296417Sdim "Hexagon call frame information", false, false) 186296417Sdim 187296417SdimFunctionPass *llvm::createHexagonCallFrameInformation() { 188296417Sdim return new HexagonCallFrameInformation(); 189296417Sdim} 190296417Sdim 191296417Sdim 192296417Sdimnamespace { 193288943Sdim /// Map a register pair Reg to the subregister that has the greater "number", 194288943Sdim /// i.e. D3 (aka R7:6) will be mapped to R7, etc. 195288943Sdim unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI, 196288943Sdim bool hireg = true) { 197288943Sdim if (Reg < Hexagon::D0 || Reg > Hexagon::D15) 198288943Sdim return Reg; 199234285Sdim 200288943Sdim unsigned RegNo = 0; 201288943Sdim for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) { 202288943Sdim if (hireg) { 203288943Sdim if (*SubRegs > RegNo) 204288943Sdim RegNo = *SubRegs; 205288943Sdim } else { 206288943Sdim if (!RegNo || *SubRegs < RegNo) 207288943Sdim RegNo = *SubRegs; 208288943Sdim } 209288943Sdim } 210288943Sdim return RegNo; 211288943Sdim } 212288943Sdim 213288943Sdim /// Returns the callee saved register with the largest id in the vector. 214288943Sdim unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, 215288943Sdim const TargetRegisterInfo &TRI) { 216288943Sdim assert(Hexagon::R1 > 0 && 217288943Sdim "Assume physical registers are encoded as positive integers"); 218288943Sdim if (CSI.empty()) 219288943Sdim return 0; 220288943Sdim 221288943Sdim unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI); 222288943Sdim for (unsigned I = 1, E = CSI.size(); I < E; ++I) { 223288943Sdim unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI); 224288943Sdim if (Reg > Max) 225288943Sdim Max = Reg; 226288943Sdim } 227288943Sdim return Max; 228288943Sdim } 229288943Sdim 230288943Sdim /// Checks if the basic block contains any instruction that needs a stack 231288943Sdim /// frame to be already in place. 232288943Sdim bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR) { 233288943Sdim for (auto &I : MBB) { 234288943Sdim const MachineInstr *MI = &I; 235288943Sdim if (MI->isCall()) 236288943Sdim return true; 237288943Sdim unsigned Opc = MI->getOpcode(); 238288943Sdim switch (Opc) { 239288943Sdim case Hexagon::ALLOCA: 240288943Sdim case Hexagon::ALIGNA: 241288943Sdim return true; 242288943Sdim default: 243288943Sdim break; 244288943Sdim } 245288943Sdim // Check individual operands. 246288943Sdim for (const MachineOperand &MO : MI->operands()) { 247288943Sdim // While the presence of a frame index does not prove that a stack 248288943Sdim // frame will be required, all frame indexes should be within alloc- 249288943Sdim // frame/deallocframe. Otherwise, the code that translates a frame 250288943Sdim // index into an offset would have to be aware of the placement of 251288943Sdim // the frame creation/destruction instructions. 252288943Sdim if (MO.isFI()) 253288943Sdim return true; 254288943Sdim if (!MO.isReg()) 255288943Sdim continue; 256288943Sdim unsigned R = MO.getReg(); 257288943Sdim // Virtual registers will need scavenging, which then may require 258288943Sdim // a stack slot. 259288943Sdim if (TargetRegisterInfo::isVirtualRegister(R)) 260288943Sdim return true; 261288943Sdim if (CSR[R]) 262288943Sdim return true; 263288943Sdim } 264288943Sdim } 265288943Sdim return false; 266288943Sdim } 267288943Sdim 268288943Sdim /// Returns true if MBB has a machine instructions that indicates a tail call 269288943Sdim /// in the block. 270288943Sdim bool hasTailCall(const MachineBasicBlock &MBB) { 271288943Sdim MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); 272288943Sdim unsigned RetOpc = I->getOpcode(); 273288943Sdim return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr; 274288943Sdim } 275288943Sdim 276288943Sdim /// Returns true if MBB contains an instruction that returns. 277288943Sdim bool hasReturn(const MachineBasicBlock &MBB) { 278288943Sdim for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I) 279288943Sdim if (I->isReturn()) 280288943Sdim return true; 281288943Sdim return false; 282288943Sdim } 283234285Sdim} 284234285Sdim 285234285Sdim 286288943Sdim/// Implements shrink-wrapping of the stack frame. By default, stack frame 287288943Sdim/// is created in the function entry block, and is cleaned up in every block 288288943Sdim/// that returns. This function finds alternate blocks: one for the frame 289288943Sdim/// setup (prolog) and one for the cleanup (epilog). 290288943Sdimvoid HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, 291288943Sdim MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { 292288943Sdim static unsigned ShrinkCounter = 0; 293288943Sdim 294288943Sdim if (ShrinkLimit.getPosition()) { 295288943Sdim if (ShrinkCounter >= ShrinkLimit) 296288943Sdim return; 297288943Sdim ShrinkCounter++; 298288943Sdim } 299288943Sdim 300288943Sdim auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); 301288943Sdim auto &HRI = *HST.getRegisterInfo(); 302288943Sdim 303288943Sdim MachineDominatorTree MDT; 304288943Sdim MDT.runOnMachineFunction(MF); 305288943Sdim MachinePostDominatorTree MPT; 306288943Sdim MPT.runOnMachineFunction(MF); 307288943Sdim 308288943Sdim typedef DenseMap<unsigned,unsigned> UnsignedMap; 309288943Sdim UnsignedMap RPO; 310288943Sdim typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; 311288943Sdim RPOTType RPOT(&MF); 312288943Sdim unsigned RPON = 0; 313288943Sdim for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 314288943Sdim RPO[(*I)->getNumber()] = RPON++; 315288943Sdim 316288943Sdim // Don't process functions that have loops, at least for now. Placement 317288943Sdim // of prolog and epilog must take loop structure into account. For simpli- 318288943Sdim // city don't do it right now. 319288943Sdim for (auto &I : MF) { 320288943Sdim unsigned BN = RPO[I.getNumber()]; 321288943Sdim for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) { 322288943Sdim // If found a back-edge, return. 323288943Sdim if (RPO[(*SI)->getNumber()] <= BN) 324288943Sdim return; 325288943Sdim } 326288943Sdim } 327288943Sdim 328288943Sdim // Collect the set of blocks that need a stack frame to execute. Scan 329288943Sdim // each block for uses/defs of callee-saved registers, calls, etc. 330288943Sdim SmallVector<MachineBasicBlock*,16> SFBlocks; 331288943Sdim BitVector CSR(Hexagon::NUM_TARGET_REGS); 332288943Sdim for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P) 333288943Sdim CSR[*P] = true; 334288943Sdim 335288943Sdim for (auto &I : MF) 336288943Sdim if (needsStackFrame(I, CSR)) 337288943Sdim SFBlocks.push_back(&I); 338288943Sdim 339288943Sdim DEBUG({ 340288943Sdim dbgs() << "Blocks needing SF: {"; 341288943Sdim for (auto &B : SFBlocks) 342288943Sdim dbgs() << " BB#" << B->getNumber(); 343288943Sdim dbgs() << " }\n"; 344288943Sdim }); 345288943Sdim // No frame needed? 346288943Sdim if (SFBlocks.empty()) 347288943Sdim return; 348288943Sdim 349288943Sdim // Pick a common dominator and a common post-dominator. 350288943Sdim MachineBasicBlock *DomB = SFBlocks[0]; 351288943Sdim for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { 352288943Sdim DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]); 353288943Sdim if (!DomB) 354288943Sdim break; 355288943Sdim } 356288943Sdim MachineBasicBlock *PDomB = SFBlocks[0]; 357288943Sdim for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { 358288943Sdim PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]); 359288943Sdim if (!PDomB) 360288943Sdim break; 361288943Sdim } 362288943Sdim DEBUG({ 363288943Sdim dbgs() << "Computed dom block: BB#"; 364288943Sdim if (DomB) dbgs() << DomB->getNumber(); 365288943Sdim else dbgs() << "<null>"; 366288943Sdim dbgs() << ", computed pdom block: BB#"; 367288943Sdim if (PDomB) dbgs() << PDomB->getNumber(); 368288943Sdim else dbgs() << "<null>"; 369288943Sdim dbgs() << "\n"; 370288943Sdim }); 371288943Sdim if (!DomB || !PDomB) 372288943Sdim return; 373288943Sdim 374288943Sdim // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. 375288943Sdim if (!MDT.dominates(DomB, PDomB)) { 376288943Sdim DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); 377288943Sdim return; 378288943Sdim } 379288943Sdim if (!MPT.dominates(PDomB, DomB)) { 380288943Sdim DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); 381288943Sdim return; 382288943Sdim } 383288943Sdim 384288943Sdim // Finally, everything seems right. 385288943Sdim PrologB = DomB; 386288943Sdim EpilogB = PDomB; 387288943Sdim} 388288943Sdim 389288943Sdim/// Perform most of the PEI work here: 390288943Sdim/// - saving/restoring of the callee-saved registers, 391288943Sdim/// - stack frame creation and destruction. 392288943Sdim/// Normally, this work is distributed among various functions, but doing it 393288943Sdim/// in one place allows shrink-wrapping of the stack frame. 394288943Sdimvoid HexagonFrameLowering::emitPrologue(MachineFunction &MF, 395288943Sdim MachineBasicBlock &MBB) const { 396288943Sdim auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); 397288943Sdim auto &HRI = *HST.getRegisterInfo(); 398288943Sdim 399288943Sdim assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported"); 400234285Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 401288943Sdim const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 402288943Sdim 403288943Sdim MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; 404288943Sdim if (EnableShrinkWrapping) 405288943Sdim findShrunkPrologEpilog(MF, PrologB, EpilogB); 406288943Sdim 407288943Sdim insertCSRSpillsInBlock(*PrologB, CSI, HRI); 408288943Sdim insertPrologueInBlock(*PrologB); 409288943Sdim 410288943Sdim if (EpilogB) { 411288943Sdim insertCSRRestoresInBlock(*EpilogB, CSI, HRI); 412288943Sdim insertEpilogueInBlock(*EpilogB); 413288943Sdim } else { 414288943Sdim for (auto &B : MF) 415296417Sdim if (B.isReturnBlock()) 416288943Sdim insertCSRRestoresInBlock(B, CSI, HRI); 417288943Sdim 418288943Sdim for (auto &B : MF) 419296417Sdim if (B.isReturnBlock()) 420288943Sdim insertEpilogueInBlock(B); 421288943Sdim } 422288943Sdim} 423288943Sdim 424288943Sdim 425288943Sdimvoid HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const { 426288943Sdim MachineFunction &MF = *MBB.getParent(); 427288943Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 428296417Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 429288943Sdim auto &HII = *HST.getInstrInfo(); 430288943Sdim auto &HRI = *HST.getRegisterInfo(); 431288943Sdim DebugLoc dl; 432234285Sdim 433288943Sdim unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment()); 434288943Sdim 435288943Sdim // Calculate the total stack frame size. 436234285Sdim // Get the number of bytes to allocate from the FrameInfo. 437288943Sdim unsigned FrameSize = MFI->getStackSize(); 438288943Sdim // Round up the max call frame size to the max alignment on the stack. 439288943Sdim unsigned MaxCFA = RoundUpToAlignment(MFI->getMaxCallFrameSize(), MaxAlign); 440288943Sdim MFI->setMaxCallFrameSize(MaxCFA); 441234285Sdim 442288943Sdim FrameSize = MaxCFA + RoundUpToAlignment(FrameSize, MaxAlign); 443288943Sdim MFI->setStackSize(FrameSize); 444288943Sdim 445288943Sdim bool AlignStack = (MaxAlign > getStackAlignment()); 446288943Sdim 447288943Sdim // Get the number of bytes to allocate from the FrameInfo. 448288943Sdim unsigned NumBytes = MFI->getStackSize(); 449288943Sdim unsigned SP = HRI.getStackRegister(); 450288943Sdim unsigned MaxCF = MFI->getMaxCallFrameSize(); 451234285Sdim MachineBasicBlock::iterator InsertPt = MBB.begin(); 452234285Sdim 453288943Sdim auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>(); 454288943Sdim auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts(); 455234285Sdim 456288943Sdim for (auto MI : AdjustRegs) { 457288943Sdim assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca"); 458288943Sdim expandAlloca(MI, HII, SP, MaxCF); 459288943Sdim MI->eraseFromParent(); 460234285Sdim } 461234285Sdim 462296417Sdim if (!hasFP(MF)) 463288943Sdim return; 464234285Sdim 465288943Sdim // Check for overflow. 466288943Sdim // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? 467288943Sdim const unsigned int ALLOCFRAME_MAX = 16384; 468234285Sdim 469288943Sdim // Create a dummy memory operand to avoid allocframe from being treated as 470288943Sdim // a volatile memory reference. 471288943Sdim MachineMemOperand *MMO = 472288943Sdim MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore, 473288943Sdim 4, 4); 474288943Sdim 475288943Sdim if (NumBytes >= ALLOCFRAME_MAX) { 476288943Sdim // Emit allocframe(#0). 477288943Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) 478288943Sdim .addImm(0) 479288943Sdim .addMemOperand(MMO); 480288943Sdim 481288943Sdim // Subtract offset from frame pointer. 482288943Sdim // We use a caller-saved non-parameter register for that. 483288943Sdim unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg(); 484288943Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real), 485288943Sdim CallerSavedReg).addImm(NumBytes); 486288943Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP) 487288943Sdim .addReg(SP) 488288943Sdim .addReg(CallerSavedReg); 489288943Sdim } else { 490288943Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) 491288943Sdim .addImm(NumBytes) 492288943Sdim .addMemOperand(MMO); 493234285Sdim } 494234285Sdim 495288943Sdim if (AlignStack) { 496288943Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) 497288943Sdim .addReg(SP) 498288943Sdim .addImm(-int64_t(MaxAlign)); 499288943Sdim } 500249423Sdim} 501234285Sdim 502288943Sdimvoid HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { 503288943Sdim MachineFunction &MF = *MBB.getParent(); 504296417Sdim if (!hasFP(MF)) 505288943Sdim return; 506234285Sdim 507288943Sdim auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); 508288943Sdim auto &HII = *HST.getInstrInfo(); 509288943Sdim auto &HRI = *HST.getRegisterInfo(); 510288943Sdim unsigned SP = HRI.getStackRegister(); 511261991Sdim 512288943Sdim MachineInstr *RetI = nullptr; 513288943Sdim for (auto &I : MBB) { 514288943Sdim if (!I.isReturn()) 515288943Sdim continue; 516288943Sdim RetI = &I; 517288943Sdim break; 518288943Sdim } 519288943Sdim unsigned RetOpc = RetI ? RetI->getOpcode() : 0; 520261991Sdim 521288943Sdim MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); 522288943Sdim DebugLoc DL; 523288943Sdim if (InsertPt != MBB.end()) 524288943Sdim DL = InsertPt->getDebugLoc(); 525288943Sdim else if (!MBB.empty()) 526288943Sdim DL = std::prev(MBB.end())->getDebugLoc(); 527288943Sdim 528288943Sdim // Handle EH_RETURN. 529288943Sdim if (RetOpc == Hexagon::EH_RETURN_JMPR) { 530288943Sdim BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); 531288943Sdim BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP) 532288943Sdim .addReg(SP) 533288943Sdim .addReg(Hexagon::R28); 534288943Sdim return; 535288943Sdim } 536288943Sdim 537288943Sdim // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- 538288943Sdim // frame instruction if we encounter it. 539288943Sdim if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4) { 540288943Sdim MachineBasicBlock::iterator It = RetI; 541288943Sdim ++It; 542288943Sdim // Delete all instructions after the RESTORE (except labels). 543288943Sdim while (It != MBB.end()) { 544288943Sdim if (!It->isLabel()) 545288943Sdim It = MBB.erase(It); 546288943Sdim else 547288943Sdim ++It; 548234285Sdim } 549288943Sdim return; 550234285Sdim } 551288943Sdim 552288943Sdim // It is possible that the restoring code is a call to a library function. 553288943Sdim // All of the restore* functions include "deallocframe", so we need to make 554288943Sdim // sure that we don't add an extra one. 555288943Sdim bool NeedsDeallocframe = true; 556288943Sdim if (!MBB.empty() && InsertPt != MBB.begin()) { 557288943Sdim MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); 558288943Sdim unsigned COpc = PrevIt->getOpcode(); 559288943Sdim if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4) 560288943Sdim NeedsDeallocframe = false; 561288943Sdim } 562288943Sdim 563288943Sdim if (!NeedsDeallocframe) 564288943Sdim return; 565288943Sdim // If the returning instruction is JMPret, replace it with dealloc_return, 566288943Sdim // otherwise just add deallocframe. The function could be returning via a 567288943Sdim // tail call. 568288943Sdim if (RetOpc != Hexagon::JMPret || DisableDeallocRet) { 569288943Sdim BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe)); 570288943Sdim return; 571288943Sdim } 572288943Sdim unsigned NewOpc = Hexagon::L4_return; 573288943Sdim MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc)); 574288943Sdim // Transfer the function live-out registers. 575288943Sdim NewI->copyImplicitOps(MF, RetI); 576288943Sdim MBB.erase(RetI); 577234285Sdim} 578234285Sdim 579288943Sdim 580296417Sdimnamespace { 581296417Sdim bool IsAllocFrame(MachineBasicBlock::const_iterator It) { 582296417Sdim if (!It->isBundle()) 583296417Sdim return It->getOpcode() == Hexagon::S2_allocframe; 584296417Sdim auto End = It->getParent()->instr_end(); 585296417Sdim MachineBasicBlock::const_instr_iterator I = It.getInstrIterator(); 586296417Sdim while (++I != End && I->isBundled()) 587296417Sdim if (I->getOpcode() == Hexagon::S2_allocframe) 588296417Sdim return true; 589296417Sdim return false; 590296417Sdim } 591296417Sdim 592296417Sdim MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) { 593296417Sdim for (auto &I : B) 594296417Sdim if (IsAllocFrame(I)) 595296417Sdim return I; 596296417Sdim return B.end(); 597296417Sdim } 598296417Sdim} 599296417Sdim 600296417Sdim 601296417Sdimvoid HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { 602296417Sdim for (auto &B : MF) { 603296417Sdim auto AF = FindAllocFrame(B); 604296417Sdim if (AF == B.end()) 605296417Sdim continue; 606296417Sdim insertCFIInstructionsAt(B, ++AF); 607296417Sdim } 608296417Sdim} 609296417Sdim 610296417Sdim 611296417Sdimvoid HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, 612296417Sdim MachineBasicBlock::iterator At) const { 613296417Sdim MachineFunction &MF = *MBB.getParent(); 614296417Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 615296417Sdim MachineModuleInfo &MMI = MF.getMMI(); 616296417Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 617296417Sdim auto &HII = *HST.getInstrInfo(); 618296417Sdim auto &HRI = *HST.getRegisterInfo(); 619296417Sdim 620296417Sdim // If CFI instructions have debug information attached, something goes 621296417Sdim // wrong with the final assembly generation: the prolog_end is placed 622296417Sdim // in a wrong location. 623296417Sdim DebugLoc DL; 624296417Sdim const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); 625296417Sdim 626296417Sdim MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 627296417Sdim 628296417Sdim if (hasFP(MF)) { 629296417Sdim unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); 630296417Sdim unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); 631296417Sdim 632296417Sdim // Define CFA via an offset from the value of FP. 633296417Sdim // 634296417Sdim // -8 -4 0 (SP) 635296417Sdim // --+----+----+--------------------- 636296417Sdim // | FP | LR | increasing addresses --> 637296417Sdim // --+----+----+--------------------- 638296417Sdim // | +-- Old SP (before allocframe) 639296417Sdim // +-- New FP (after allocframe) 640296417Sdim // 641296417Sdim // MCCFIInstruction::createDefCfa subtracts the offset from the register. 642296417Sdim // MCCFIInstruction::createOffset takes the offset without sign change. 643296417Sdim auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); 644296417Sdim BuildMI(MBB, At, DL, CFID) 645296417Sdim .addCFIIndex(MMI.addFrameInst(DefCfa)); 646296417Sdim // R31 (return addr) = CFA - 4 647296417Sdim auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); 648296417Sdim BuildMI(MBB, At, DL, CFID) 649296417Sdim .addCFIIndex(MMI.addFrameInst(OffR31)); 650296417Sdim // R30 (frame ptr) = CFA - 8 651296417Sdim auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); 652296417Sdim BuildMI(MBB, At, DL, CFID) 653296417Sdim .addCFIIndex(MMI.addFrameInst(OffR30)); 654296417Sdim } 655296417Sdim 656296417Sdim static unsigned int RegsToMove[] = { 657296417Sdim Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, 658296417Sdim Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, 659296417Sdim Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, 660296417Sdim Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, 661296417Sdim Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, 662296417Sdim Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, 663296417Sdim Hexagon::NoRegister 664296417Sdim }; 665296417Sdim 666296417Sdim const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 667296417Sdim 668296417Sdim for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { 669296417Sdim unsigned Reg = RegsToMove[i]; 670296417Sdim auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { 671296417Sdim return C.getReg() == Reg; 672296417Sdim }; 673296417Sdim auto F = std::find_if(CSI.begin(), CSI.end(), IfR); 674296417Sdim if (F == CSI.end()) 675296417Sdim continue; 676296417Sdim 677296417Sdim // Subtract 8 to make room for R30 and R31, which are added above. 678296417Sdim unsigned FrameReg; 679296417Sdim int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8; 680296417Sdim 681296417Sdim if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { 682296417Sdim unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); 683296417Sdim auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, 684296417Sdim Offset); 685296417Sdim BuildMI(MBB, At, DL, CFID) 686296417Sdim .addCFIIndex(MMI.addFrameInst(OffReg)); 687296417Sdim } else { 688296417Sdim // Split the double regs into subregs, and generate appropriate 689296417Sdim // cfi_offsets. 690296417Sdim // The only reason, we are split double regs is, llvm-mc does not 691296417Sdim // understand paired registers for cfi_offset. 692296417Sdim // Eg .cfi_offset r1:0, -64 693296417Sdim 694296417Sdim unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg); 695296417Sdim unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg); 696296417Sdim unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); 697296417Sdim unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); 698296417Sdim auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, 699296417Sdim Offset+4); 700296417Sdim BuildMI(MBB, At, DL, CFID) 701296417Sdim .addCFIIndex(MMI.addFrameInst(OffHi)); 702296417Sdim auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, 703296417Sdim Offset); 704296417Sdim BuildMI(MBB, At, DL, CFID) 705296417Sdim .addCFIIndex(MMI.addFrameInst(OffLo)); 706296417Sdim } 707296417Sdim } 708296417Sdim} 709296417Sdim 710296417Sdim 711234285Sdimbool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { 712296417Sdim auto &MFI = *MF.getFrameInfo(); 713296417Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 714296417Sdim 715296417Sdim bool HasFixed = MFI.getNumFixedObjects(); 716296417Sdim bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI) 717296417Sdim .getLocalFrameObjectCount(); 718296417Sdim bool HasExtraAlign = HRI.needsStackRealignment(MF); 719296417Sdim bool HasAlloca = MFI.hasVarSizedObjects(); 720296417Sdim 721296417Sdim // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think 722296417Sdim // that this shouldn't be required, but doing so now because gcc does and 723296417Sdim // gdb can't break at the start of the function without it. Will remove if 724296417Sdim // this turns out to be a gdb bug. 725296417Sdim // 726296417Sdim if (MF.getTarget().getOptLevel() == CodeGenOpt::None) 727296417Sdim return true; 728296417Sdim 729296417Sdim // By default we want to use SP (since it's always there). FP requires 730296417Sdim // some setup (i.e. ALLOCFRAME). 731296417Sdim // Fixed and preallocated objects need FP if the distance from them to 732296417Sdim // the SP is unknown (as is with alloca or aligna). 733296417Sdim if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign)) 734296417Sdim return true; 735296417Sdim 736296417Sdim if (MFI.getStackSize() > 0) { 737296417Sdim if (UseAllocframe) 738296417Sdim return true; 739296417Sdim } 740296417Sdim 741296417Sdim if (MFI.hasCalls() || 742296417Sdim MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR()) 743296417Sdim return true; 744296417Sdim 745296417Sdim return false; 746234285Sdim} 747234285Sdim 748288943Sdim 749288943Sdimenum SpillKind { 750288943Sdim SK_ToMem, 751288943Sdim SK_FromMem, 752288943Sdim SK_FromMemTailcall 753288943Sdim}; 754288943Sdim 755288943Sdimstatic const char * 756288943SdimgetSpillFunctionFor(unsigned MaxReg, SpillKind SpillType) { 757288943Sdim const char * V4SpillToMemoryFunctions[] = { 758288943Sdim "__save_r16_through_r17", 759288943Sdim "__save_r16_through_r19", 760288943Sdim "__save_r16_through_r21", 761288943Sdim "__save_r16_through_r23", 762288943Sdim "__save_r16_through_r25", 763288943Sdim "__save_r16_through_r27" }; 764288943Sdim 765288943Sdim const char * V4SpillFromMemoryFunctions[] = { 766288943Sdim "__restore_r16_through_r17_and_deallocframe", 767288943Sdim "__restore_r16_through_r19_and_deallocframe", 768288943Sdim "__restore_r16_through_r21_and_deallocframe", 769288943Sdim "__restore_r16_through_r23_and_deallocframe", 770288943Sdim "__restore_r16_through_r25_and_deallocframe", 771288943Sdim "__restore_r16_through_r27_and_deallocframe" }; 772288943Sdim 773288943Sdim const char * V4SpillFromMemoryTailcallFunctions[] = { 774288943Sdim "__restore_r16_through_r17_and_deallocframe_before_tailcall", 775288943Sdim "__restore_r16_through_r19_and_deallocframe_before_tailcall", 776288943Sdim "__restore_r16_through_r21_and_deallocframe_before_tailcall", 777288943Sdim "__restore_r16_through_r23_and_deallocframe_before_tailcall", 778288943Sdim "__restore_r16_through_r25_and_deallocframe_before_tailcall", 779288943Sdim "__restore_r16_through_r27_and_deallocframe_before_tailcall" 780288943Sdim }; 781288943Sdim 782288943Sdim const char **SpillFunc = nullptr; 783288943Sdim 784288943Sdim switch(SpillType) { 785288943Sdim case SK_ToMem: 786288943Sdim SpillFunc = V4SpillToMemoryFunctions; 787288943Sdim break; 788288943Sdim case SK_FromMem: 789288943Sdim SpillFunc = V4SpillFromMemoryFunctions; 790288943Sdim break; 791288943Sdim case SK_FromMemTailcall: 792288943Sdim SpillFunc = V4SpillFromMemoryTailcallFunctions; 793288943Sdim break; 794288943Sdim } 795288943Sdim assert(SpillFunc && "Unknown spill kind"); 796288943Sdim 797288943Sdim // Spill all callee-saved registers up to the highest register used. 798288943Sdim switch (MaxReg) { 799288943Sdim case Hexagon::R17: 800288943Sdim return SpillFunc[0]; 801288943Sdim case Hexagon::R19: 802288943Sdim return SpillFunc[1]; 803288943Sdim case Hexagon::R21: 804288943Sdim return SpillFunc[2]; 805288943Sdim case Hexagon::R23: 806288943Sdim return SpillFunc[3]; 807288943Sdim case Hexagon::R25: 808288943Sdim return SpillFunc[4]; 809288943Sdim case Hexagon::R27: 810288943Sdim return SpillFunc[5]; 811288943Sdim default: 812288943Sdim llvm_unreachable("Unhandled maximum callee save register"); 813288943Sdim } 814288943Sdim return 0; 815239462Sdim} 816239462Sdim 817288943Sdim/// Adds all callee-saved registers up to MaxReg to the instruction. 818288943Sdimstatic void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst, 819288943Sdim unsigned MaxReg, bool IsDef) { 820288943Sdim // Add the callee-saved registers as implicit uses. 821288943Sdim for (unsigned R = Hexagon::R16; R <= MaxReg; ++R) { 822288943Sdim MachineOperand ImpUse = MachineOperand::CreateReg(R, IsDef, true); 823288943Sdim Inst->addOperand(ImpUse); 824288943Sdim } 825288943Sdim} 826234285Sdim 827288943Sdim 828296417Sdimint HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, 829296417Sdim int FI, unsigned &FrameReg) const { 830296417Sdim auto &MFI = *MF.getFrameInfo(); 831296417Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 832296417Sdim 833296417Sdim // Large parts of this code are shared with HRI::eliminateFrameIndex. 834296417Sdim int Offset = MFI.getObjectOffset(FI); 835296417Sdim bool HasAlloca = MFI.hasVarSizedObjects(); 836296417Sdim bool HasExtraAlign = HRI.needsStackRealignment(MF); 837296417Sdim bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; 838296417Sdim 839296417Sdim unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister(); 840296417Sdim unsigned AP = 0; 841296417Sdim if (const MachineInstr *AI = getAlignaInstr(MF)) 842296417Sdim AP = AI->getOperand(0).getReg(); 843296417Sdim unsigned FrameSize = MFI.getStackSize(); 844296417Sdim 845296417Sdim bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). 846296417Sdim // Use FP at -O0, except when there are objects with extra alignment. 847296417Sdim // That additional alignment requirement may cause a pad to be inserted, 848296417Sdim // which will make it impossible to use FP to access objects located 849296417Sdim // past the pad. 850296417Sdim if (NoOpt && !HasExtraAlign) 851296417Sdim UseFP = true; 852296417Sdim if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { 853296417Sdim // Fixed and preallocated objects will be located before any padding 854296417Sdim // so FP must be used to access them. 855296417Sdim UseFP |= (HasAlloca || HasExtraAlign); 856296417Sdim } else { 857296417Sdim if (HasAlloca) { 858296417Sdim if (HasExtraAlign) 859296417Sdim UseAP = true; 860296417Sdim else 861296417Sdim UseFP = true; 862296417Sdim } 863296417Sdim } 864296417Sdim 865296417Sdim // If FP was picked, then there had better be FP. 866296417Sdim bool HasFP = hasFP(MF); 867296417Sdim assert((HasFP || !UseFP) && "This function must have frame pointer"); 868296417Sdim 869296417Sdim // Having FP implies allocframe. Allocframe will store extra 8 bytes: 870296417Sdim // FP/LR. If the base register is used to access an object across these 871296417Sdim // 8 bytes, then the offset will need to be adjusted by 8. 872296417Sdim // 873296417Sdim // After allocframe: 874296417Sdim // HexagonISelLowering adds 8 to ---+ 875296417Sdim // the offsets of all stack-based | 876296417Sdim // arguments (*) | 877296417Sdim // | 878296417Sdim // getObjectOffset < 0 0 8 getObjectOffset >= 8 879296417Sdim // ------------------------+-----+------------------------> increasing 880296417Sdim // <local objects> |FP/LR| <input arguments> addresses 881296417Sdim // -----------------+------+-----+------------------------> 882296417Sdim // | | 883296417Sdim // SP/AP point --+ +-- FP points here (**) 884296417Sdim // somewhere on 885296417Sdim // this side of FP/LR 886296417Sdim // 887296417Sdim // (*) See LowerFormalArguments. The FP/LR is assumed to be present. 888296417Sdim // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. 889296417Sdim 890296417Sdim // The lowering assumes that FP/LR is present, and so the offsets of 891296417Sdim // the formal arguments start at 8. If FP/LR is not there we need to 892296417Sdim // reduce the offset by 8. 893296417Sdim if (Offset > 0 && !HasFP) 894296417Sdim Offset -= 8; 895296417Sdim 896296417Sdim if (UseFP) 897296417Sdim FrameReg = FP; 898296417Sdim else if (UseAP) 899296417Sdim FrameReg = AP; 900296417Sdim else 901296417Sdim FrameReg = SP; 902296417Sdim 903296417Sdim // Calculate the actual offset in the instruction. If there is no FP 904296417Sdim // (in other words, no allocframe), then SP will not be adjusted (i.e. 905296417Sdim // there will be no SP -= FrameSize), so the frame size should not be 906296417Sdim // added to the calculated offset. 907296417Sdim int RealOffset = Offset; 908296417Sdim if (!UseFP && !UseAP && HasFP) 909296417Sdim RealOffset = FrameSize+Offset; 910296417Sdim return RealOffset; 911288943Sdim} 912288943Sdim 913288943Sdim 914288943Sdimbool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, 915288943Sdim const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { 916288943Sdim if (CSI.empty()) 917288943Sdim return true; 918288943Sdim 919288943Sdim MachineBasicBlock::iterator MI = MBB.begin(); 920288943Sdim MachineFunction &MF = *MBB.getParent(); 921296417Sdim auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 922288943Sdim 923288943Sdim if (useSpillFunction(MF, CSI)) { 924288943Sdim unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); 925288943Sdim const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem); 926288943Sdim // Call spill function. 927288943Sdim DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 928288943Sdim MachineInstr *SaveRegsCall = 929296417Sdim BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4)) 930288943Sdim .addExternalSymbol(SpillFun); 931288943Sdim // Add callee-saved registers as use. 932288943Sdim addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false); 933288943Sdim // Add live in registers. 934288943Sdim for (unsigned I = 0; I < CSI.size(); ++I) 935288943Sdim MBB.addLiveIn(CSI[I].getReg()); 936288943Sdim return true; 937234285Sdim } 938234285Sdim 939288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 940234285Sdim unsigned Reg = CSI[i].getReg(); 941288943Sdim // Add live in registers. We treat eh_return callee saved register r0 - r3 942288943Sdim // specially. They are not really callee saved registers as they are not 943288943Sdim // supposed to be killed. 944288943Sdim bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); 945288943Sdim int FI = CSI[i].getFrameIdx(); 946288943Sdim const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); 947296417Sdim HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); 948288943Sdim if (IsKill) 949288943Sdim MBB.addLiveIn(Reg); 950288943Sdim } 951288943Sdim return true; 952288943Sdim} 953234285Sdim 954234285Sdim 955288943Sdimbool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, 956288943Sdim const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { 957288943Sdim if (CSI.empty()) 958288943Sdim return false; 959234285Sdim 960288943Sdim MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); 961288943Sdim MachineFunction &MF = *MBB.getParent(); 962296417Sdim auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 963234285Sdim 964288943Sdim if (useRestoreFunction(MF, CSI)) { 965288943Sdim bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); 966288943Sdim unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); 967288943Sdim SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem; 968288943Sdim const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); 969288943Sdim 970288943Sdim // Call spill function. 971288943Sdim DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() 972288943Sdim : MBB.getLastNonDebugInstr()->getDebugLoc(); 973288943Sdim MachineInstr *DeallocCall = nullptr; 974288943Sdim 975288943Sdim if (HasTC) { 976288943Sdim unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; 977296417Sdim DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc)) 978288943Sdim .addExternalSymbol(RestoreFn); 979234285Sdim } else { 980288943Sdim // The block has a return. 981288943Sdim MachineBasicBlock::iterator It = MBB.getFirstTerminator(); 982288943Sdim assert(It->isReturn() && std::next(It) == MBB.end()); 983288943Sdim unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4; 984296417Sdim DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc)) 985288943Sdim .addExternalSymbol(RestoreFn); 986288943Sdim // Transfer the function live-out registers. 987288943Sdim DeallocCall->copyImplicitOps(MF, It); 988234285Sdim } 989288943Sdim addCalleeSaveRegistersAsImpOperand(DeallocCall, MaxR, true); 990288943Sdim return true; 991234285Sdim } 992288943Sdim 993288943Sdim for (unsigned i = 0; i < CSI.size(); ++i) { 994288943Sdim unsigned Reg = CSI[i].getReg(); 995288943Sdim const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); 996288943Sdim int FI = CSI[i].getFrameIdx(); 997296417Sdim HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); 998288943Sdim } 999234285Sdim return true; 1000234285Sdim} 1001234285Sdim 1002234285Sdim 1003288943Sdimvoid HexagonFrameLowering::eliminateCallFramePseudoInstr(MachineFunction &MF, 1004288943Sdim MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { 1005288943Sdim MachineInstr &MI = *I; 1006288943Sdim unsigned Opc = MI.getOpcode(); 1007288943Sdim (void)Opc; // Silence compiler warning. 1008288943Sdim assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && 1009288943Sdim "Cannot handle this call frame pseudo instruction"); 1010288943Sdim MBB.erase(I); 1011288943Sdim} 1012234285Sdim 1013234285Sdim 1014288943Sdimvoid HexagonFrameLowering::processFunctionBeforeFrameFinalized( 1015288943Sdim MachineFunction &MF, RegScavenger *RS) const { 1016288943Sdim // If this function has uses aligned stack and also has variable sized stack 1017288943Sdim // objects, then we need to map all spill slots to fixed positions, so that 1018288943Sdim // they can be accessed through FP. Otherwise they would have to be accessed 1019288943Sdim // via AP, which may not be available at the particular place in the program. 1020288943Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1021288943Sdim bool HasAlloca = MFI->hasVarSizedObjects(); 1022296417Sdim bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment()); 1023288943Sdim 1024296417Sdim if (!HasAlloca || !NeedsAlign) 1025288943Sdim return; 1026288943Sdim 1027288943Sdim unsigned LFS = MFI->getLocalFrameSize(); 1028288943Sdim int Offset = -LFS; 1029288943Sdim for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { 1030288943Sdim if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i)) 1031288943Sdim continue; 1032288943Sdim int S = MFI->getObjectSize(i); 1033288943Sdim LFS += S; 1034288943Sdim Offset -= S; 1035288943Sdim MFI->mapLocalFrameObject(i, Offset); 1036288943Sdim } 1037288943Sdim 1038288943Sdim MFI->setLocalFrameSize(LFS); 1039288943Sdim unsigned A = MFI->getLocalFrameMaxAlign(); 1040288943Sdim assert(A <= 8 && "Unexpected local frame alignment"); 1041288943Sdim if (A == 0) 1042288943Sdim MFI->setLocalFrameMaxAlign(8); 1043288943Sdim MFI->setUseLocalStackAllocationBlock(true); 1044288943Sdim} 1045288943Sdim 1046288943Sdim/// Returns true if there is no caller saved registers available. 1047288943Sdimstatic bool needToReserveScavengingSpillSlots(MachineFunction &MF, 1048288943Sdim const HexagonRegisterInfo &HRI) { 1049288943Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1050288943Sdim const MCPhysReg *CallerSavedRegs = HRI.getCallerSavedRegs(&MF); 1051288943Sdim // Check for an unused caller-saved register. 1052288943Sdim for ( ; *CallerSavedRegs; ++CallerSavedRegs) { 1053288943Sdim MCPhysReg FreeReg = *CallerSavedRegs; 1054296417Sdim if (!MRI.reg_nodbg_empty(FreeReg)) 1055288943Sdim continue; 1056288943Sdim 1057288943Sdim // Check aliased register usage. 1058288943Sdim bool IsCurrentRegUsed = false; 1059288943Sdim for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI) 1060296417Sdim if (!MRI.reg_nodbg_empty(*AI)) { 1061288943Sdim IsCurrentRegUsed = true; 1062288943Sdim break; 1063288943Sdim } 1064288943Sdim if (IsCurrentRegUsed) 1065288943Sdim continue; 1066288943Sdim 1067288943Sdim // Neither directly used nor used through an aliased register. 1068234285Sdim return false; 1069234285Sdim } 1070288943Sdim // All caller-saved registers are used. 1071288943Sdim return true; 1072288943Sdim} 1073234285Sdim 1074234285Sdim 1075288943Sdim/// Replaces the predicate spill code pseudo instructions by valid instructions. 1076288943Sdimbool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF) 1077288943Sdim const { 1078288943Sdim auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); 1079288943Sdim auto &HII = *HST.getInstrInfo(); 1080288943Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1081288943Sdim bool HasReplacedPseudoInst = false; 1082288943Sdim // Replace predicate spill pseudo instructions by real code. 1083288943Sdim // Loop over all of the basic blocks. 1084288943Sdim for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end(); 1085288943Sdim MBBb != MBBe; ++MBBb) { 1086296417Sdim MachineBasicBlock *MBB = &*MBBb; 1087288943Sdim // Traverse the basic block. 1088288943Sdim MachineBasicBlock::iterator NextII; 1089288943Sdim for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end(); 1090288943Sdim MII = NextII) { 1091288943Sdim MachineInstr *MI = MII; 1092288943Sdim NextII = std::next(MII); 1093288943Sdim int Opc = MI->getOpcode(); 1094288943Sdim if (Opc == Hexagon::STriw_pred) { 1095288943Sdim HasReplacedPseudoInst = true; 1096288943Sdim // STriw_pred FI, 0, SrcReg; 1097288943Sdim unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1098288943Sdim unsigned SrcReg = MI->getOperand(2).getReg(); 1099288943Sdim bool IsOrigSrcRegKilled = MI->getOperand(2).isKill(); 1100234285Sdim 1101288943Sdim assert(MI->getOperand(0).isFI() && "Expect a frame index"); 1102288943Sdim assert(Hexagon::PredRegsRegClass.contains(SrcReg) && 1103288943Sdim "Not a predicate register"); 1104288943Sdim 1105288943Sdim // Insert transfer to general purpose register. 1106288943Sdim // VirtReg = C2_tfrpr SrcPredReg 1107288943Sdim BuildMI(*MBB, MII, MI->getDebugLoc(), HII.get(Hexagon::C2_tfrpr), 1108288943Sdim VirtReg).addReg(SrcReg, getKillRegState(IsOrigSrcRegKilled)); 1109288943Sdim 1110288943Sdim // Change instruction to S2_storeri_io. 1111288943Sdim // S2_storeri_io FI, 0, VirtReg 1112288943Sdim MI->setDesc(HII.get(Hexagon::S2_storeri_io)); 1113288943Sdim MI->getOperand(2).setReg(VirtReg); 1114288943Sdim MI->getOperand(2).setIsKill(); 1115288943Sdim 1116288943Sdim } else if (Opc == Hexagon::LDriw_pred) { 1117288943Sdim // DstReg = LDriw_pred FI, 0 1118288943Sdim MachineOperand &M0 = MI->getOperand(0); 1119288943Sdim if (M0.isDead()) { 1120288943Sdim MBB->erase(MII); 1121288943Sdim continue; 1122288943Sdim } 1123288943Sdim 1124288943Sdim unsigned VirtReg = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1125288943Sdim unsigned DestReg = MI->getOperand(0).getReg(); 1126288943Sdim 1127288943Sdim assert(MI->getOperand(1).isFI() && "Expect a frame index"); 1128288943Sdim assert(Hexagon::PredRegsRegClass.contains(DestReg) && 1129288943Sdim "Not a predicate register"); 1130288943Sdim 1131288943Sdim // Change instruction to L2_loadri_io. 1132288943Sdim // VirtReg = L2_loadri_io FI, 0 1133288943Sdim MI->setDesc(HII.get(Hexagon::L2_loadri_io)); 1134288943Sdim MI->getOperand(0).setReg(VirtReg); 1135288943Sdim 1136288943Sdim // Insert transfer to general purpose register. 1137288943Sdim // DestReg = C2_tfrrp VirtReg 1138288943Sdim const MCInstrDesc &D = HII.get(Hexagon::C2_tfrrp); 1139288943Sdim BuildMI(*MBB, std::next(MII), MI->getDebugLoc(), D, DestReg) 1140288943Sdim .addReg(VirtReg, getKillRegState(true)); 1141288943Sdim HasReplacedPseudoInst = true; 1142288943Sdim } 1143234285Sdim } 1144288943Sdim } 1145288943Sdim return HasReplacedPseudoInst; 1146288943Sdim} 1147234285Sdim 1148234285Sdim 1149288943Sdimvoid HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, 1150288943Sdim BitVector &SavedRegs, 1151288943Sdim RegScavenger *RS) const { 1152288943Sdim TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1153288943Sdim 1154288943Sdim auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget()); 1155288943Sdim auto &HRI = *HST.getRegisterInfo(); 1156288943Sdim 1157288943Sdim bool HasEHReturn = MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn(); 1158288943Sdim 1159288943Sdim // If we have a function containing __builtin_eh_return we want to spill and 1160288943Sdim // restore all callee saved registers. Pretend that they are used. 1161288943Sdim if (HasEHReturn) { 1162288943Sdim for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs; 1163288943Sdim ++CSRegs) 1164288943Sdim SavedRegs.set(*CSRegs); 1165288943Sdim } 1166288943Sdim 1167288943Sdim const TargetRegisterClass &RC = Hexagon::IntRegsRegClass; 1168288943Sdim 1169288943Sdim // Replace predicate register pseudo spill code. 1170288943Sdim bool HasReplacedPseudoInst = replacePredRegPseudoSpillCode(MF); 1171288943Sdim 1172288943Sdim // We need to reserve a a spill slot if scavenging could potentially require 1173288943Sdim // spilling a scavenged register. 1174288943Sdim if (HasReplacedPseudoInst && needToReserveScavengingSpillSlots(MF, HRI)) { 1175288943Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1176288943Sdim for (int i=0; i < NumberScavengerSlots; i++) 1177288943Sdim RS->addScavengingFrameIndex( 1178288943Sdim MFI->CreateSpillStackObject(RC.getSize(), RC.getAlignment())); 1179288943Sdim } 1180288943Sdim} 1181288943Sdim 1182288943Sdim 1183288943Sdim#ifndef NDEBUG 1184288943Sdimstatic void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { 1185288943Sdim dbgs() << '{'; 1186288943Sdim for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) { 1187288943Sdim unsigned R = x; 1188288943Sdim dbgs() << ' ' << PrintReg(R, &TRI); 1189288943Sdim } 1190288943Sdim dbgs() << " }"; 1191288943Sdim} 1192288943Sdim#endif 1193288943Sdim 1194288943Sdim 1195288943Sdimbool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, 1196288943Sdim const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { 1197288943Sdim DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on " 1198288943Sdim << MF.getFunction()->getName() << '\n'); 1199288943Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1200288943Sdim BitVector SRegs(Hexagon::NUM_TARGET_REGS); 1201288943Sdim 1202288943Sdim // Generate a set of unique, callee-saved registers (SRegs), where each 1203288943Sdim // register in the set is maximal in terms of sub-/super-register relation, 1204288943Sdim // i.e. for each R in SRegs, no proper super-register of R is also in SRegs. 1205288943Sdim 1206288943Sdim // (1) For each callee-saved register, add that register and all of its 1207288943Sdim // sub-registers to SRegs. 1208288943Sdim DEBUG(dbgs() << "Initial CS registers: {"); 1209288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1210288943Sdim unsigned R = CSI[i].getReg(); 1211288943Sdim DEBUG(dbgs() << ' ' << PrintReg(R, TRI)); 1212288943Sdim for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) 1213288943Sdim SRegs[*SR] = true; 1214288943Sdim } 1215288943Sdim DEBUG(dbgs() << " }\n"); 1216288943Sdim DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); 1217288943Sdim 1218288943Sdim // (2) For each reserved register, remove that register and all of its 1219288943Sdim // sub- and super-registers from SRegs. 1220288943Sdim BitVector Reserved = TRI->getReservedRegs(MF); 1221288943Sdim for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) { 1222288943Sdim unsigned R = x; 1223288943Sdim for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR) 1224288943Sdim SRegs[*SR] = false; 1225288943Sdim } 1226288943Sdim DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n"); 1227288943Sdim DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); 1228288943Sdim 1229288943Sdim // (3) Collect all registers that have at least one sub-register in SRegs, 1230288943Sdim // and also have no sub-registers that are reserved. These will be the can- 1231288943Sdim // didates for saving as a whole instead of their individual sub-registers. 1232288943Sdim // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.) 1233288943Sdim BitVector TmpSup(Hexagon::NUM_TARGET_REGS); 1234288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1235288943Sdim unsigned R = x; 1236288943Sdim for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) 1237288943Sdim TmpSup[*SR] = true; 1238288943Sdim } 1239288943Sdim for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) { 1240288943Sdim unsigned R = x; 1241288943Sdim for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) { 1242288943Sdim if (!Reserved[*SR]) 1243288943Sdim continue; 1244288943Sdim TmpSup[R] = false; 1245288943Sdim break; 1246234285Sdim } 1247234285Sdim } 1248288943Sdim DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n"); 1249288943Sdim 1250288943Sdim // (4) Include all super-registers found in (3) into SRegs. 1251288943Sdim SRegs |= TmpSup; 1252288943Sdim DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); 1253288943Sdim 1254288943Sdim // (5) For each register R in SRegs, if any super-register of R is in SRegs, 1255288943Sdim // remove R from SRegs. 1256288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1257288943Sdim unsigned R = x; 1258288943Sdim for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) { 1259288943Sdim if (!SRegs[*SR]) 1260288943Sdim continue; 1261288943Sdim SRegs[R] = false; 1262288943Sdim break; 1263288943Sdim } 1264288943Sdim } 1265288943Sdim DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); 1266288943Sdim 1267288943Sdim // Now, for each register that has a fixed stack slot, create the stack 1268288943Sdim // object for it. 1269288943Sdim CSI.clear(); 1270288943Sdim 1271288943Sdim typedef TargetFrameLowering::SpillSlot SpillSlot; 1272288943Sdim unsigned NumFixed; 1273288943Sdim int MinOffset = 0; // CS offsets are negative. 1274288943Sdim const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed); 1275288943Sdim for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) { 1276288943Sdim if (!SRegs[S->Reg]) 1277288943Sdim continue; 1278288943Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); 1279288943Sdim int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset); 1280288943Sdim MinOffset = std::min(MinOffset, S->Offset); 1281288943Sdim CSI.push_back(CalleeSavedInfo(S->Reg, FI)); 1282288943Sdim SRegs[S->Reg] = false; 1283288943Sdim } 1284288943Sdim 1285288943Sdim // There can be some registers that don't have fixed slots. For example, 1286288943Sdim // we need to store R0-R3 in functions with exception handling. For each 1287288943Sdim // such register, create a non-fixed stack object. 1288288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1289288943Sdim unsigned R = x; 1290288943Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); 1291288943Sdim int Off = MinOffset - RC->getSize(); 1292288943Sdim unsigned Align = std::min(RC->getAlignment(), getStackAlignment()); 1293288943Sdim assert(isPowerOf2_32(Align)); 1294288943Sdim Off &= -Align; 1295288943Sdim int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off); 1296288943Sdim MinOffset = std::min(MinOffset, Off); 1297288943Sdim CSI.push_back(CalleeSavedInfo(R, FI)); 1298288943Sdim SRegs[R] = false; 1299288943Sdim } 1300288943Sdim 1301288943Sdim DEBUG({ 1302288943Sdim dbgs() << "CS information: {"; 1303288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1304288943Sdim int FI = CSI[i].getFrameIdx(); 1305288943Sdim int Off = MFI->getObjectOffset(FI); 1306288943Sdim dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; 1307288943Sdim if (Off >= 0) 1308288943Sdim dbgs() << '+'; 1309288943Sdim dbgs() << Off; 1310288943Sdim } 1311288943Sdim dbgs() << " }\n"; 1312288943Sdim }); 1313288943Sdim 1314288943Sdim#ifndef NDEBUG 1315288943Sdim // Verify that all registers were handled. 1316288943Sdim bool MissedReg = false; 1317288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1318288943Sdim unsigned R = x; 1319288943Sdim dbgs() << PrintReg(R, TRI) << ' '; 1320288943Sdim MissedReg = true; 1321288943Sdim } 1322288943Sdim if (MissedReg) 1323288943Sdim llvm_unreachable("...there are unhandled callee-saved registers!"); 1324288943Sdim#endif 1325288943Sdim 1326234285Sdim return true; 1327234285Sdim} 1328234285Sdim 1329249423Sdim 1330288943Sdimvoid HexagonFrameLowering::expandAlloca(MachineInstr *AI, 1331288943Sdim const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { 1332288943Sdim MachineBasicBlock &MB = *AI->getParent(); 1333288943Sdim DebugLoc DL = AI->getDebugLoc(); 1334288943Sdim unsigned A = AI->getOperand(2).getImm(); 1335288943Sdim 1336288943Sdim // Have 1337288943Sdim // Rd = alloca Rs, #A 1338288943Sdim // 1339288943Sdim // If Rs and Rd are different registers, use this sequence: 1340288943Sdim // Rd = sub(r29, Rs) 1341288943Sdim // r29 = sub(r29, Rs) 1342288943Sdim // Rd = and(Rd, #-A) ; if necessary 1343288943Sdim // r29 = and(r29, #-A) ; if necessary 1344288943Sdim // Rd = add(Rd, #CF) ; CF size aligned to at most A 1345288943Sdim // otherwise, do 1346288943Sdim // Rd = sub(r29, Rs) 1347288943Sdim // Rd = and(Rd, #-A) ; if necessary 1348288943Sdim // r29 = Rd 1349288943Sdim // Rd = add(Rd, #CF) ; CF size aligned to at most A 1350288943Sdim 1351288943Sdim MachineOperand &RdOp = AI->getOperand(0); 1352288943Sdim MachineOperand &RsOp = AI->getOperand(1); 1353288943Sdim unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg(); 1354288943Sdim 1355288943Sdim // Rd = sub(r29, Rs) 1356288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd) 1357288943Sdim .addReg(SP) 1358288943Sdim .addReg(Rs); 1359288943Sdim if (Rs != Rd) { 1360288943Sdim // r29 = sub(r29, Rs) 1361288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP) 1362288943Sdim .addReg(SP) 1363288943Sdim .addReg(Rs); 1364249423Sdim } 1365288943Sdim if (A > 8) { 1366288943Sdim // Rd = and(Rd, #-A) 1367288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd) 1368288943Sdim .addReg(Rd) 1369288943Sdim .addImm(-int64_t(A)); 1370288943Sdim if (Rs != Rd) 1371288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP) 1372288943Sdim .addReg(SP) 1373288943Sdim .addImm(-int64_t(A)); 1374288943Sdim } 1375288943Sdim if (Rs == Rd) { 1376288943Sdim // r29 = Rd 1377288943Sdim BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP) 1378288943Sdim .addReg(Rd); 1379288943Sdim } 1380288943Sdim if (CF > 0) { 1381288943Sdim // Rd = add(Rd, #CF) 1382288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd) 1383288943Sdim .addReg(Rd) 1384288943Sdim .addImm(CF); 1385288943Sdim } 1386249423Sdim} 1387249423Sdim 1388288943Sdim 1389288943Sdimbool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { 1390288943Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 1391288943Sdim if (!MFI->hasVarSizedObjects()) 1392288943Sdim return false; 1393288943Sdim unsigned MaxA = MFI->getMaxAlignment(); 1394288943Sdim if (MaxA <= getStackAlignment()) 1395288943Sdim return false; 1396288943Sdim return true; 1397234285Sdim} 1398288943Sdim 1399288943Sdim 1400296417Sdimconst MachineInstr *HexagonFrameLowering::getAlignaInstr( 1401296417Sdim const MachineFunction &MF) const { 1402288943Sdim for (auto &B : MF) 1403288943Sdim for (auto &I : B) 1404288943Sdim if (I.getOpcode() == Hexagon::ALIGNA) 1405288943Sdim return &I; 1406288943Sdim return nullptr; 1407288943Sdim} 1408288943Sdim 1409288943Sdim 1410296417Sdim// FIXME: Use Function::optForSize(). 1411288943Sdiminline static bool isOptSize(const MachineFunction &MF) { 1412288943Sdim AttributeSet AF = MF.getFunction()->getAttributes(); 1413288943Sdim return AF.hasAttribute(AttributeSet::FunctionIndex, 1414288943Sdim Attribute::OptimizeForSize); 1415288943Sdim} 1416288943Sdim 1417288943Sdiminline static bool isMinSize(const MachineFunction &MF) { 1418296417Sdim return MF.getFunction()->optForMinSize(); 1419288943Sdim} 1420288943Sdim 1421288943Sdim 1422288943Sdim/// Determine whether the callee-saved register saves and restores should 1423288943Sdim/// be generated via inline code. If this function returns "true", inline 1424288943Sdim/// code will be generated. If this function returns "false", additional 1425288943Sdim/// checks are performed, which may still lead to the inline code. 1426288943Sdimbool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF, 1427288943Sdim const CSIVect &CSI) const { 1428288943Sdim if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) 1429288943Sdim return true; 1430288943Sdim if (!isOptSize(MF) && !isMinSize(MF)) 1431288943Sdim if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) 1432288943Sdim return true; 1433288943Sdim 1434288943Sdim // Check if CSI only has double registers, and if the registers form 1435288943Sdim // a contiguous block starting from D8. 1436288943Sdim BitVector Regs(Hexagon::NUM_TARGET_REGS); 1437288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1438288943Sdim unsigned R = CSI[i].getReg(); 1439288943Sdim if (!Hexagon::DoubleRegsRegClass.contains(R)) 1440288943Sdim return true; 1441288943Sdim Regs[R] = true; 1442288943Sdim } 1443288943Sdim int F = Regs.find_first(); 1444288943Sdim if (F != Hexagon::D8) 1445288943Sdim return true; 1446288943Sdim while (F >= 0) { 1447288943Sdim int N = Regs.find_next(F); 1448288943Sdim if (N >= 0 && N != F+1) 1449288943Sdim return true; 1450288943Sdim F = N; 1451288943Sdim } 1452288943Sdim 1453288943Sdim return false; 1454288943Sdim} 1455288943Sdim 1456288943Sdim 1457288943Sdimbool HexagonFrameLowering::useSpillFunction(MachineFunction &MF, 1458288943Sdim const CSIVect &CSI) const { 1459288943Sdim if (shouldInlineCSR(MF, CSI)) 1460288943Sdim return false; 1461288943Sdim unsigned NumCSI = CSI.size(); 1462288943Sdim if (NumCSI <= 1) 1463288943Sdim return false; 1464288943Sdim 1465288943Sdim unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs 1466288943Sdim : SpillFuncThreshold; 1467288943Sdim return Threshold < NumCSI; 1468288943Sdim} 1469288943Sdim 1470288943Sdim 1471288943Sdimbool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF, 1472288943Sdim const CSIVect &CSI) const { 1473288943Sdim if (shouldInlineCSR(MF, CSI)) 1474288943Sdim return false; 1475288943Sdim unsigned NumCSI = CSI.size(); 1476288943Sdim unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1 1477288943Sdim : SpillFuncThreshold; 1478288943Sdim return Threshold < NumCSI; 1479288943Sdim} 1480