1327952Sdim//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===// 2234285Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6234285Sdim// 7234285Sdim// 8234285Sdim//===----------------------------------------------------------------------===// 9234285Sdim 10321369Sdim#include "HexagonFrameLowering.h" 11309124Sdim#include "HexagonBlockRanges.h" 12234285Sdim#include "HexagonInstrInfo.h" 13249423Sdim#include "HexagonMachineFunctionInfo.h" 14234285Sdim#include "HexagonRegisterInfo.h" 15234285Sdim#include "HexagonSubtarget.h" 16234285Sdim#include "HexagonTargetMachine.h" 17314564Sdim#include "MCTargetDesc/HexagonBaseInfo.h" 18234285Sdim#include "llvm/ADT/BitVector.h" 19314564Sdim#include "llvm/ADT/DenseMap.h" 20314564Sdim#include "llvm/ADT/None.h" 21314564Sdim#include "llvm/ADT/Optional.h" 22288943Sdim#include "llvm/ADT/PostOrderIterator.h" 23314564Sdim#include "llvm/ADT/SetVector.h" 24314564Sdim#include "llvm/ADT/SmallSet.h" 25314564Sdim#include "llvm/ADT/SmallVector.h" 26314564Sdim#include "llvm/CodeGen/LivePhysRegs.h" 27314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 28288943Sdim#include "llvm/CodeGen/MachineDominators.h" 29314564Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 30234285Sdim#include "llvm/CodeGen/MachineFunction.h" 31234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 32314564Sdim#include "llvm/CodeGen/MachineInstr.h" 33249423Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 34314564Sdim#include "llvm/CodeGen/MachineMemOperand.h" 35234285Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 36314564Sdim#include "llvm/CodeGen/MachineOperand.h" 37288943Sdim#include "llvm/CodeGen/MachinePostDominators.h" 38234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 39360784Sdim#include "llvm/CodeGen/PseudoSourceValue.h" 40234285Sdim#include "llvm/CodeGen/RegisterScavenging.h" 41327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 42327952Sdim#include "llvm/IR/Attributes.h" 43314564Sdim#include "llvm/IR/DebugLoc.h" 44249423Sdim#include "llvm/IR/Function.h" 45314564Sdim#include "llvm/MC/MCDwarf.h" 46314564Sdim#include "llvm/MC/MCRegisterInfo.h" 47314564Sdim#include "llvm/Pass.h" 48314564Sdim#include "llvm/Support/CodeGen.h" 49249423Sdim#include "llvm/Support/CommandLine.h" 50327952Sdim#include "llvm/Support/Compiler.h" 51288943Sdim#include "llvm/Support/Debug.h" 52314564Sdim#include "llvm/Support/ErrorHandling.h" 53314564Sdim#include "llvm/Support/MathExtras.h" 54288943Sdim#include "llvm/Support/raw_ostream.h" 55234285Sdim#include "llvm/Target/TargetMachine.h" 56327952Sdim#include "llvm/Target/TargetOptions.h" 57314564Sdim#include <algorithm> 58314564Sdim#include <cassert> 59314564Sdim#include <cstdint> 60314564Sdim#include <iterator> 61314564Sdim#include <limits> 62314564Sdim#include <map> 63314564Sdim#include <utility> 64314564Sdim#include <vector> 65234285Sdim 66321369Sdim#define DEBUG_TYPE "hexagon-pei" 67321369Sdim 68288943Sdim// Hexagon stack frame layout as defined by the ABI: 69288943Sdim// 70288943Sdim// Incoming arguments 71288943Sdim// passed via stack 72288943Sdim// | 73288943Sdim// | 74288943Sdim// SP during function's FP during function's | 75288943Sdim// +-- runtime (top of stack) runtime (bottom) --+ | 76288943Sdim// | | | 77288943Sdim// --++---------------------+------------------+-----------------++-+------- 78288943Sdim// | parameter area for | variable-size | fixed-size |LR| arg 79288943Sdim// | called functions | local objects | local objects |FP| 80288943Sdim// --+----------------------+------------------+-----------------+--+------- 81288943Sdim// <- size known -> <- size unknown -> <- size known -> 82288943Sdim// 83288943Sdim// Low address High address 84288943Sdim// 85288943Sdim// <--- stack growth 86288943Sdim// 87288943Sdim// 88288943Sdim// - In any circumstances, the outgoing function arguments are always accessi- 89288943Sdim// ble using the SP, and the incoming arguments are accessible using the FP. 90288943Sdim// - If the local objects are not aligned, they can always be accessed using 91288943Sdim// the FP. 92288943Sdim// - If there are no variable-sized objects, the local objects can always be 93288943Sdim// accessed using the SP, regardless whether they are aligned or not. (The 94288943Sdim// alignment padding will be at the bottom of the stack (highest address), 95288943Sdim// and so the offset with respect to the SP will be known at the compile- 96288943Sdim// -time.) 97288943Sdim// 98288943Sdim// The only complication occurs if there are both, local aligned objects, and 99288943Sdim// dynamically allocated (variable-sized) objects. The alignment pad will be 100288943Sdim// placed between the FP and the local objects, thus preventing the use of the 101288943Sdim// FP to access the local objects. At the same time, the variable-sized objects 102288943Sdim// will be between the SP and the local objects, thus introducing an unknown 103288943Sdim// distance from the SP to the locals. 104288943Sdim// 105288943Sdim// To avoid this problem, a new register is created that holds the aligned 106288943Sdim// address of the bottom of the stack, referred in the sources as AP (aligned 107288943Sdim// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad 108288943Sdim// that aligns AP to the required boundary (a maximum of the alignments of 109288943Sdim// all stack objects, fixed- and variable-sized). All local objects[1] will 110288943Sdim// then use AP as the base pointer. 111288943Sdim// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get 112288943Sdim// their name from being allocated at fixed locations on the stack, relative 113288943Sdim// to the FP. In the presence of dynamic allocation and local alignment, such 114288943Sdim// objects can only be accessed through the FP. 115288943Sdim// 116288943Sdim// Illustration of the AP: 117288943Sdim// FP --+ 118288943Sdim// | 119288943Sdim// ---------------+---------------------+-----+-----------------------++-+-- 120288943Sdim// Rest of the | Local stack objects | Pad | Fixed stack objects |LR| 121288943Sdim// stack frame | (aligned) | | (CSR, spills, etc.) |FP| 122288943Sdim// ---------------+---------------------+-----+-----------------+-----+--+-- 123288943Sdim// |<-- Multiple of the -->| 124288943Sdim// stack alignment +-- AP 125288943Sdim// 126288943Sdim// The AP is set up at the beginning of the function. Since it is not a dedi- 127288943Sdim// cated (reserved) register, it needs to be kept live throughout the function 128288943Sdim// to be available as the base register for local object accesses. 129288943Sdim// Normally, an address of a stack objects is obtained by a pseudo-instruction 130314564Sdim// PS_fi. To access local objects with the AP register present, a different 131314564Sdim// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra 132314564Sdim// argument compared to PS_fi: the first input register is the AP register. 133288943Sdim// This keeps the register live between its definition and its uses. 134288943Sdim 135314564Sdim// The AP register is originally set up using pseudo-instruction PS_aligna: 136314564Sdim// AP = PS_aligna A 137288943Sdim// where 138288943Sdim// A - required stack alignment 139288943Sdim// The alignment value must be the maximum of all alignments required by 140288943Sdim// any stack object. 141288943Sdim 142314564Sdim// The dynamic allocation uses a pseudo-instruction PS_alloca: 143314564Sdim// Rd = PS_alloca Rs, A 144288943Sdim// where 145288943Sdim// Rd - address of the allocated space 146288943Sdim// Rs - minimum size (the actual allocated can be larger to accommodate 147288943Sdim// alignment) 148288943Sdim// A - required alignment 149288943Sdim 150234285Sdimusing namespace llvm; 151234285Sdim 152288943Sdimstatic cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret", 153288943Sdim cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target")); 154234285Sdim 155309124Sdimstatic cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots", 156288943Sdim cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2), 157288943Sdim cl::ZeroOrMore); 158234285Sdim 159288943Sdimstatic cl::opt<int> SpillFuncThreshold("spill-func-threshold", 160288943Sdim cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), 161288943Sdim cl::init(6), cl::ZeroOrMore); 162234285Sdim 163288943Sdimstatic cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os", 164288943Sdim cl::Hidden, cl::desc("Specify Os spill func threshold"), 165288943Sdim cl::init(1), cl::ZeroOrMore); 166234285Sdim 167309124Sdimstatic cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer", 168309124Sdim cl::Hidden, cl::desc("Enable runtime checks for stack overflow."), 169309124Sdim cl::init(false), cl::ZeroOrMore); 170309124Sdim 171288943Sdimstatic cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame", 172288943Sdim cl::init(true), cl::Hidden, cl::ZeroOrMore, 173288943Sdim cl::desc("Enable stack frame shrink wrapping")); 174234285Sdim 175314564Sdimstatic cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", 176314564Sdim cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore, 177314564Sdim cl::desc("Max count of stack frame shrink-wraps")); 178234285Sdim 179314564Sdimstatic cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long", 180314564Sdim cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), 181314564Sdim cl::init(false), cl::ZeroOrMore); 182314564Sdim 183321369Sdimstatic cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true), 184321369Sdim cl::Hidden, cl::desc("Refrain from using FP whenever possible")); 185296417Sdim 186309124Sdimstatic cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, 187309124Sdim cl::init(true), cl::desc("Optimize spill slots")); 188296417Sdim 189314564Sdim#ifndef NDEBUG 190314564Sdimstatic cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden, 191314564Sdim cl::init(std::numeric_limits<unsigned>::max())); 192314564Sdimstatic unsigned SpillOptCount = 0; 193314564Sdim#endif 194309124Sdim 195296417Sdimnamespace llvm { 196314564Sdim 197296417Sdim void initializeHexagonCallFrameInformationPass(PassRegistry&); 198296417Sdim FunctionPass *createHexagonCallFrameInformation(); 199296417Sdim 200314564Sdim} // end namespace llvm 201314564Sdim 202288943Sdimnamespace { 203314564Sdim 204296417Sdim class HexagonCallFrameInformation : public MachineFunctionPass { 205296417Sdim public: 206296417Sdim static char ID; 207314564Sdim 208296417Sdim HexagonCallFrameInformation() : MachineFunctionPass(ID) { 209296417Sdim PassRegistry &PR = *PassRegistry::getPassRegistry(); 210296417Sdim initializeHexagonCallFrameInformationPass(PR); 211296417Sdim } 212314564Sdim 213296417Sdim bool runOnMachineFunction(MachineFunction &MF) override; 214314564Sdim 215309124Sdim MachineFunctionProperties getRequiredProperties() const override { 216309124Sdim return MachineFunctionProperties().set( 217314564Sdim MachineFunctionProperties::Property::NoVRegs); 218309124Sdim } 219296417Sdim }; 220296417Sdim 221296417Sdim char HexagonCallFrameInformation::ID = 0; 222296417Sdim 223314564Sdim} // end anonymous namespace 224314564Sdim 225296417Sdimbool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) { 226296417Sdim auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering(); 227360784Sdim bool NeedCFI = MF.needsFrameMoves(); 228296417Sdim 229296417Sdim if (!NeedCFI) 230296417Sdim return false; 231296417Sdim HFI.insertCFIInstructions(MF); 232296417Sdim return true; 233296417Sdim} 234296417Sdim 235296417SdimINITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi", 236296417Sdim "Hexagon call frame information", false, false) 237296417Sdim 238296417SdimFunctionPass *llvm::createHexagonCallFrameInformation() { 239296417Sdim return new HexagonCallFrameInformation(); 240296417Sdim} 241296417Sdim 242314564Sdim/// Map a register pair Reg to the subregister that has the greater "number", 243314564Sdim/// i.e. D3 (aka R7:6) will be mapped to R7, etc. 244314564Sdimstatic unsigned getMax32BitSubRegister(unsigned Reg, 245314564Sdim const TargetRegisterInfo &TRI, 246314564Sdim bool hireg = true) { 247288943Sdim if (Reg < Hexagon::D0 || Reg > Hexagon::D15) 248288943Sdim return Reg; 249234285Sdim 250288943Sdim unsigned RegNo = 0; 251288943Sdim for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) { 252288943Sdim if (hireg) { 253288943Sdim if (*SubRegs > RegNo) 254288943Sdim RegNo = *SubRegs; 255288943Sdim } else { 256288943Sdim if (!RegNo || *SubRegs < RegNo) 257288943Sdim RegNo = *SubRegs; 258288943Sdim } 259288943Sdim } 260288943Sdim return RegNo; 261314564Sdim} 262288943Sdim 263314564Sdim/// Returns the callee saved register with the largest id in the vector. 264314564Sdimstatic unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI, 265314564Sdim const TargetRegisterInfo &TRI) { 266309124Sdim static_assert(Hexagon::R1 > 0, 267309124Sdim "Assume physical registers are encoded as positive integers"); 268288943Sdim if (CSI.empty()) 269288943Sdim return 0; 270288943Sdim 271288943Sdim unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI); 272288943Sdim for (unsigned I = 1, E = CSI.size(); I < E; ++I) { 273288943Sdim unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI); 274288943Sdim if (Reg > Max) 275288943Sdim Max = Reg; 276288943Sdim } 277288943Sdim return Max; 278314564Sdim} 279288943Sdim 280314564Sdim/// Checks if the basic block contains any instruction that needs a stack 281314564Sdim/// frame to be already in place. 282314564Sdimstatic bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, 283314564Sdim const HexagonRegisterInfo &HRI) { 284288943Sdim for (auto &I : MBB) { 285288943Sdim const MachineInstr *MI = &I; 286288943Sdim if (MI->isCall()) 287288943Sdim return true; 288288943Sdim unsigned Opc = MI->getOpcode(); 289288943Sdim switch (Opc) { 290314564Sdim case Hexagon::PS_alloca: 291314564Sdim case Hexagon::PS_aligna: 292288943Sdim return true; 293288943Sdim default: 294288943Sdim break; 295288943Sdim } 296288943Sdim // Check individual operands. 297288943Sdim for (const MachineOperand &MO : MI->operands()) { 298288943Sdim // While the presence of a frame index does not prove that a stack 299288943Sdim // frame will be required, all frame indexes should be within alloc- 300288943Sdim // frame/deallocframe. Otherwise, the code that translates a frame 301288943Sdim // index into an offset would have to be aware of the placement of 302288943Sdim // the frame creation/destruction instructions. 303288943Sdim if (MO.isFI()) 304288943Sdim return true; 305321369Sdim if (MO.isReg()) { 306360784Sdim Register R = MO.getReg(); 307321369Sdim // Virtual registers will need scavenging, which then may require 308321369Sdim // a stack slot. 309360784Sdim if (Register::isVirtualRegister(R)) 310321369Sdim return true; 311321369Sdim for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S) 312321369Sdim if (CSR[*S]) 313321369Sdim return true; 314288943Sdim continue; 315321369Sdim } 316321369Sdim if (MO.isRegMask()) { 317321369Sdim // A regmask would normally have all callee-saved registers marked 318321369Sdim // as preserved, so this check would not be needed, but in case of 319321369Sdim // ever having other regmasks (for other calling conventions), 320321369Sdim // make sure they would be processed correctly. 321321369Sdim const uint32_t *BM = MO.getRegMask(); 322321369Sdim for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) { 323321369Sdim unsigned R = x; 324321369Sdim // If this regmask does not preserve a CSR, a frame will be needed. 325321369Sdim if (!(BM[R/32] & (1u << (R%32)))) 326321369Sdim return true; 327321369Sdim } 328321369Sdim } 329288943Sdim } 330288943Sdim } 331288943Sdim return false; 332314564Sdim} 333288943Sdim 334288943Sdim /// Returns true if MBB has a machine instructions that indicates a tail call 335288943Sdim /// in the block. 336314564Sdimstatic bool hasTailCall(const MachineBasicBlock &MBB) { 337288943Sdim MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr(); 338327952Sdim if (I == MBB.end()) 339327952Sdim return false; 340288943Sdim unsigned RetOpc = I->getOpcode(); 341314564Sdim return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r; 342314564Sdim} 343288943Sdim 344314564Sdim/// Returns true if MBB contains an instruction that returns. 345314564Sdimstatic bool hasReturn(const MachineBasicBlock &MBB) { 346288943Sdim for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I) 347288943Sdim if (I->isReturn()) 348288943Sdim return true; 349288943Sdim return false; 350314564Sdim} 351309124Sdim 352314564Sdim/// Returns the "return" instruction from this block, or nullptr if there 353314564Sdim/// isn't any. 354314564Sdimstatic MachineInstr *getReturn(MachineBasicBlock &MBB) { 355309124Sdim for (auto &I : MBB) 356309124Sdim if (I.isReturn()) 357309124Sdim return &I; 358309124Sdim return nullptr; 359314564Sdim} 360309124Sdim 361314564Sdimstatic bool isRestoreCall(unsigned Opc) { 362309124Sdim switch (Opc) { 363309124Sdim case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: 364309124Sdim case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: 365314564Sdim case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT: 366314564Sdim case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC: 367314564Sdim case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT: 368314564Sdim case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC: 369309124Sdim case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4: 370309124Sdim case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC: 371309124Sdim return true; 372309124Sdim } 373309124Sdim return false; 374314564Sdim} 375309124Sdim 376314564Sdimstatic inline bool isOptNone(const MachineFunction &MF) { 377353358Sdim return MF.getFunction().hasOptNone() || 378309124Sdim MF.getTarget().getOptLevel() == CodeGenOpt::None; 379314564Sdim} 380309124Sdim 381314564Sdimstatic inline bool isOptSize(const MachineFunction &MF) { 382327952Sdim const Function &F = MF.getFunction(); 383353358Sdim return F.hasOptSize() && !F.hasMinSize(); 384314564Sdim} 385309124Sdim 386314564Sdimstatic inline bool isMinSize(const MachineFunction &MF) { 387353358Sdim return MF.getFunction().hasMinSize(); 388234285Sdim} 389234285Sdim 390288943Sdim/// Implements shrink-wrapping of the stack frame. By default, stack frame 391288943Sdim/// is created in the function entry block, and is cleaned up in every block 392288943Sdim/// that returns. This function finds alternate blocks: one for the frame 393288943Sdim/// setup (prolog) and one for the cleanup (epilog). 394288943Sdimvoid HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, 395288943Sdim MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const { 396288943Sdim static unsigned ShrinkCounter = 0; 397288943Sdim 398288943Sdim if (ShrinkLimit.getPosition()) { 399288943Sdim if (ShrinkCounter >= ShrinkLimit) 400288943Sdim return; 401288943Sdim ShrinkCounter++; 402288943Sdim } 403288943Sdim 404327952Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 405288943Sdim 406288943Sdim MachineDominatorTree MDT; 407288943Sdim MDT.runOnMachineFunction(MF); 408288943Sdim MachinePostDominatorTree MPT; 409288943Sdim MPT.runOnMachineFunction(MF); 410288943Sdim 411327952Sdim using UnsignedMap = DenseMap<unsigned, unsigned>; 412327952Sdim using RPOTType = ReversePostOrderTraversal<const MachineFunction *>; 413327952Sdim 414288943Sdim UnsignedMap RPO; 415288943Sdim RPOTType RPOT(&MF); 416288943Sdim unsigned RPON = 0; 417288943Sdim for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 418288943Sdim RPO[(*I)->getNumber()] = RPON++; 419288943Sdim 420288943Sdim // Don't process functions that have loops, at least for now. Placement 421288943Sdim // of prolog and epilog must take loop structure into account. For simpli- 422288943Sdim // city don't do it right now. 423288943Sdim for (auto &I : MF) { 424288943Sdim unsigned BN = RPO[I.getNumber()]; 425288943Sdim for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) { 426288943Sdim // If found a back-edge, return. 427288943Sdim if (RPO[(*SI)->getNumber()] <= BN) 428288943Sdim return; 429288943Sdim } 430288943Sdim } 431288943Sdim 432288943Sdim // Collect the set of blocks that need a stack frame to execute. Scan 433288943Sdim // each block for uses/defs of callee-saved registers, calls, etc. 434288943Sdim SmallVector<MachineBasicBlock*,16> SFBlocks; 435288943Sdim BitVector CSR(Hexagon::NUM_TARGET_REGS); 436288943Sdim for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P) 437309124Sdim for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S) 438309124Sdim CSR[*S] = true; 439288943Sdim 440288943Sdim for (auto &I : MF) 441309124Sdim if (needsStackFrame(I, CSR, HRI)) 442288943Sdim SFBlocks.push_back(&I); 443288943Sdim 444341825Sdim LLVM_DEBUG({ 445288943Sdim dbgs() << "Blocks needing SF: {"; 446288943Sdim for (auto &B : SFBlocks) 447327952Sdim dbgs() << " " << printMBBReference(*B); 448288943Sdim dbgs() << " }\n"; 449288943Sdim }); 450288943Sdim // No frame needed? 451288943Sdim if (SFBlocks.empty()) 452288943Sdim return; 453288943Sdim 454288943Sdim // Pick a common dominator and a common post-dominator. 455288943Sdim MachineBasicBlock *DomB = SFBlocks[0]; 456288943Sdim for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { 457288943Sdim DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]); 458288943Sdim if (!DomB) 459288943Sdim break; 460288943Sdim } 461288943Sdim MachineBasicBlock *PDomB = SFBlocks[0]; 462288943Sdim for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) { 463288943Sdim PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]); 464288943Sdim if (!PDomB) 465288943Sdim break; 466288943Sdim } 467341825Sdim LLVM_DEBUG({ 468327952Sdim dbgs() << "Computed dom block: "; 469327952Sdim if (DomB) 470327952Sdim dbgs() << printMBBReference(*DomB); 471327952Sdim else 472327952Sdim dbgs() << "<null>"; 473327952Sdim dbgs() << ", computed pdom block: "; 474327952Sdim if (PDomB) 475327952Sdim dbgs() << printMBBReference(*PDomB); 476327952Sdim else 477327952Sdim dbgs() << "<null>"; 478288943Sdim dbgs() << "\n"; 479288943Sdim }); 480288943Sdim if (!DomB || !PDomB) 481288943Sdim return; 482288943Sdim 483288943Sdim // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. 484288943Sdim if (!MDT.dominates(DomB, PDomB)) { 485341825Sdim LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); 486288943Sdim return; 487288943Sdim } 488288943Sdim if (!MPT.dominates(PDomB, DomB)) { 489341825Sdim LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); 490288943Sdim return; 491288943Sdim } 492288943Sdim 493288943Sdim // Finally, everything seems right. 494288943Sdim PrologB = DomB; 495288943Sdim EpilogB = PDomB; 496288943Sdim} 497288943Sdim 498288943Sdim/// Perform most of the PEI work here: 499288943Sdim/// - saving/restoring of the callee-saved registers, 500288943Sdim/// - stack frame creation and destruction. 501288943Sdim/// Normally, this work is distributed among various functions, but doing it 502288943Sdim/// in one place allows shrink-wrapping of the stack frame. 503288943Sdimvoid HexagonFrameLowering::emitPrologue(MachineFunction &MF, 504288943Sdim MachineBasicBlock &MBB) const { 505327952Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 506288943Sdim 507314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 508314564Sdim const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 509288943Sdim 510288943Sdim MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr; 511288943Sdim if (EnableShrinkWrapping) 512288943Sdim findShrunkPrologEpilog(MF, PrologB, EpilogB); 513288943Sdim 514309124Sdim bool PrologueStubs = false; 515309124Sdim insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs); 516309124Sdim insertPrologueInBlock(*PrologB, PrologueStubs); 517314564Sdim updateEntryPaths(MF, *PrologB); 518288943Sdim 519288943Sdim if (EpilogB) { 520288943Sdim insertCSRRestoresInBlock(*EpilogB, CSI, HRI); 521288943Sdim insertEpilogueInBlock(*EpilogB); 522288943Sdim } else { 523288943Sdim for (auto &B : MF) 524296417Sdim if (B.isReturnBlock()) 525288943Sdim insertCSRRestoresInBlock(B, CSI, HRI); 526288943Sdim 527288943Sdim for (auto &B : MF) 528296417Sdim if (B.isReturnBlock()) 529288943Sdim insertEpilogueInBlock(B); 530309124Sdim 531309124Sdim for (auto &B : MF) { 532309124Sdim if (B.empty()) 533309124Sdim continue; 534309124Sdim MachineInstr *RetI = getReturn(B); 535309124Sdim if (!RetI || isRestoreCall(RetI->getOpcode())) 536309124Sdim continue; 537309124Sdim for (auto &R : CSI) 538309124Sdim RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); 539309124Sdim } 540288943Sdim } 541309124Sdim 542309124Sdim if (EpilogB) { 543309124Sdim // If there is an epilog block, it may not have a return instruction. 544309124Sdim // In such case, we need to add the callee-saved registers as live-ins 545309124Sdim // in all blocks on all paths from the epilog to any return block. 546314564Sdim unsigned MaxBN = MF.getNumBlockIDs(); 547309124Sdim BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1); 548314564Sdim updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path); 549309124Sdim } 550288943Sdim} 551288943Sdim 552344779Sdim/// Returns true if the target can safely skip saving callee-saved registers 553344779Sdim/// for noreturn nounwind functions. 554344779Sdimbool HexagonFrameLowering::enableCalleeSaveSkip( 555344779Sdim const MachineFunction &MF) const { 556344779Sdim const auto &F = MF.getFunction(); 557344779Sdim assert(F.hasFnAttribute(Attribute::NoReturn) && 558344779Sdim F.getFunction().hasFnAttribute(Attribute::NoUnwind) && 559344779Sdim !F.getFunction().hasFnAttribute(Attribute::UWTable)); 560344779Sdim (void)F; 561344779Sdim 562344779Sdim // No need to save callee saved registers if the function does not return. 563344779Sdim return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim(); 564344779Sdim} 565344779Sdim 566344779Sdim// Helper function used to determine when to eliminate the stack frame for 567344779Sdim// functions marked as noreturn and when the noreturn-stack-elim options are 568344779Sdim// specified. When both these conditions are true, then a FP may not be needed 569344779Sdim// if the function makes a call. It is very similar to enableCalleeSaveSkip, 570344779Sdim// but it used to check if the allocframe can be eliminated as well. 571344779Sdimstatic bool enableAllocFrameElim(const MachineFunction &MF) { 572344779Sdim const auto &F = MF.getFunction(); 573344779Sdim const auto &MFI = MF.getFrameInfo(); 574344779Sdim const auto &HST = MF.getSubtarget<HexagonSubtarget>(); 575344779Sdim assert(!MFI.hasVarSizedObjects() && 576344779Sdim !HST.getRegisterInfo()->needsStackRealignment(MF)); 577344779Sdim return F.hasFnAttribute(Attribute::NoReturn) && 578344779Sdim F.hasFnAttribute(Attribute::NoUnwind) && 579344779Sdim !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() && 580344779Sdim MFI.getStackSize() == 0; 581344779Sdim} 582344779Sdim 583309124Sdimvoid HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB, 584309124Sdim bool PrologueStubs) const { 585288943Sdim MachineFunction &MF = *MBB.getParent(); 586314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 587296417Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 588288943Sdim auto &HII = *HST.getInstrInfo(); 589288943Sdim auto &HRI = *HST.getRegisterInfo(); 590234285Sdim 591314564Sdim unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment()); 592288943Sdim 593288943Sdim // Calculate the total stack frame size. 594234285Sdim // Get the number of bytes to allocate from the FrameInfo. 595314564Sdim unsigned FrameSize = MFI.getStackSize(); 596288943Sdim // Round up the max call frame size to the max alignment on the stack. 597314564Sdim unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign); 598314564Sdim MFI.setMaxCallFrameSize(MaxCFA); 599234285Sdim 600309124Sdim FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign); 601314564Sdim MFI.setStackSize(FrameSize); 602288943Sdim 603288943Sdim bool AlignStack = (MaxAlign > getStackAlignment()); 604288943Sdim 605288943Sdim // Get the number of bytes to allocate from the FrameInfo. 606314564Sdim unsigned NumBytes = MFI.getStackSize(); 607288943Sdim unsigned SP = HRI.getStackRegister(); 608314564Sdim unsigned MaxCF = MFI.getMaxCallFrameSize(); 609234285Sdim MachineBasicBlock::iterator InsertPt = MBB.begin(); 610234285Sdim 611314564Sdim SmallVector<MachineInstr *, 4> AdjustRegs; 612314564Sdim for (auto &MBB : MF) 613314564Sdim for (auto &MI : MBB) 614314564Sdim if (MI.getOpcode() == Hexagon::PS_alloca) 615314564Sdim AdjustRegs.push_back(&MI); 616234285Sdim 617288943Sdim for (auto MI : AdjustRegs) { 618314564Sdim assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca"); 619288943Sdim expandAlloca(MI, HII, SP, MaxCF); 620288943Sdim MI->eraseFromParent(); 621234285Sdim } 622234285Sdim 623321369Sdim DebugLoc dl = MBB.findDebugLoc(InsertPt); 624234285Sdim 625321369Sdim if (hasFP(MF)) { 626321369Sdim insertAllocframe(MBB, InsertPt, NumBytes); 627321369Sdim if (AlignStack) { 628321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP) 629321369Sdim .addReg(SP) 630321369Sdim .addImm(-int64_t(MaxAlign)); 631321369Sdim } 632321369Sdim // If the stack-checking is enabled, and we spilled the callee-saved 633321369Sdim // registers inline (i.e. did not use a spill function), then call 634321369Sdim // the stack checker directly. 635321369Sdim if (EnableStackOVFSanitizer && !PrologueStubs) 636321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk)) 637321369Sdim .addExternalSymbol("__runtime_stack_check"); 638321369Sdim } else if (NumBytes > 0) { 639321369Sdim assert(alignTo(NumBytes, 8) == NumBytes); 640321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) 641288943Sdim .addReg(SP) 642321369Sdim .addImm(-int(NumBytes)); 643234285Sdim } 644249423Sdim} 645234285Sdim 646288943Sdimvoid HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const { 647288943Sdim MachineFunction &MF = *MBB.getParent(); 648314564Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 649288943Sdim auto &HII = *HST.getInstrInfo(); 650288943Sdim auto &HRI = *HST.getRegisterInfo(); 651288943Sdim unsigned SP = HRI.getStackRegister(); 652261991Sdim 653321369Sdim MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator(); 654321369Sdim DebugLoc dl = MBB.findDebugLoc(InsertPt); 655321369Sdim 656321369Sdim if (!hasFP(MF)) { 657321369Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 658321369Sdim if (unsigned NumBytes = MFI.getStackSize()) { 659321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) 660321369Sdim .addReg(SP) 661321369Sdim .addImm(NumBytes); 662321369Sdim } 663321369Sdim return; 664321369Sdim } 665321369Sdim 666309124Sdim MachineInstr *RetI = getReturn(MBB); 667288943Sdim unsigned RetOpc = RetI ? RetI->getOpcode() : 0; 668261991Sdim 669288943Sdim // Handle EH_RETURN. 670288943Sdim if (RetOpc == Hexagon::EH_RETURN_JMPR) { 671327952Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) 672327952Sdim .addDef(Hexagon::D15) 673327952Sdim .addReg(Hexagon::R30); 674321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP) 675288943Sdim .addReg(SP) 676288943Sdim .addReg(Hexagon::R28); 677288943Sdim return; 678288943Sdim } 679288943Sdim 680288943Sdim // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc- 681288943Sdim // frame instruction if we encounter it. 682309124Sdim if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 || 683314564Sdim RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC || 684314564Sdim RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT || 685314564Sdim RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) { 686288943Sdim MachineBasicBlock::iterator It = RetI; 687288943Sdim ++It; 688288943Sdim // Delete all instructions after the RESTORE (except labels). 689288943Sdim while (It != MBB.end()) { 690288943Sdim if (!It->isLabel()) 691288943Sdim It = MBB.erase(It); 692288943Sdim else 693288943Sdim ++It; 694234285Sdim } 695288943Sdim return; 696234285Sdim } 697288943Sdim 698288943Sdim // It is possible that the restoring code is a call to a library function. 699288943Sdim // All of the restore* functions include "deallocframe", so we need to make 700288943Sdim // sure that we don't add an extra one. 701288943Sdim bool NeedsDeallocframe = true; 702288943Sdim if (!MBB.empty() && InsertPt != MBB.begin()) { 703288943Sdim MachineBasicBlock::iterator PrevIt = std::prev(InsertPt); 704288943Sdim unsigned COpc = PrevIt->getOpcode(); 705309124Sdim if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 || 706314564Sdim COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC || 707314564Sdim COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT || 708314564Sdim COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC || 709314564Sdim COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr) 710288943Sdim NeedsDeallocframe = false; 711288943Sdim } 712288943Sdim 713288943Sdim if (!NeedsDeallocframe) 714288943Sdim return; 715314564Sdim // If the returning instruction is PS_jmpret, replace it with dealloc_return, 716288943Sdim // otherwise just add deallocframe. The function could be returning via a 717288943Sdim // tail call. 718314564Sdim if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) { 719327952Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe)) 720327952Sdim .addDef(Hexagon::D15) 721327952Sdim .addReg(Hexagon::R30); 722288943Sdim return; 723288943Sdim } 724288943Sdim unsigned NewOpc = Hexagon::L4_return; 725327952Sdim MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc)) 726327952Sdim .addDef(Hexagon::D15) 727327952Sdim .addReg(Hexagon::R30); 728288943Sdim // Transfer the function live-out registers. 729309124Sdim NewI->copyImplicitOps(MF, *RetI); 730288943Sdim MBB.erase(RetI); 731234285Sdim} 732234285Sdim 733321369Sdimvoid HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB, 734321369Sdim MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const { 735321369Sdim MachineFunction &MF = *MBB.getParent(); 736321369Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 737321369Sdim auto &HII = *HST.getInstrInfo(); 738321369Sdim auto &HRI = *HST.getRegisterInfo(); 739321369Sdim 740321369Sdim // Check for overflow. 741321369Sdim // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used? 742321369Sdim const unsigned int ALLOCFRAME_MAX = 16384; 743321369Sdim 744321369Sdim // Create a dummy memory operand to avoid allocframe from being treated as 745321369Sdim // a volatile memory reference. 746321369Sdim auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0), 747321369Sdim MachineMemOperand::MOStore, 4, 4); 748321369Sdim 749321369Sdim DebugLoc dl = MBB.findDebugLoc(InsertPt); 750327952Sdim unsigned SP = HRI.getStackRegister(); 751321369Sdim 752321369Sdim if (NumBytes >= ALLOCFRAME_MAX) { 753321369Sdim // Emit allocframe(#0). 754321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) 755327952Sdim .addDef(SP) 756327952Sdim .addReg(SP) 757321369Sdim .addImm(0) 758321369Sdim .addMemOperand(MMO); 759321369Sdim 760321369Sdim // Subtract the size from the stack pointer. 761321369Sdim unsigned SP = HRI.getStackRegister(); 762321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP) 763321369Sdim .addReg(SP) 764321369Sdim .addImm(-int(NumBytes)); 765321369Sdim } else { 766321369Sdim BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe)) 767327952Sdim .addDef(SP) 768327952Sdim .addReg(SP) 769321369Sdim .addImm(NumBytes) 770321369Sdim .addMemOperand(MMO); 771321369Sdim } 772321369Sdim} 773321369Sdim 774314564Sdimvoid HexagonFrameLowering::updateEntryPaths(MachineFunction &MF, 775314564Sdim MachineBasicBlock &SaveB) const { 776314564Sdim SetVector<unsigned> Worklist; 777288943Sdim 778314564Sdim MachineBasicBlock &EntryB = MF.front(); 779314564Sdim Worklist.insert(EntryB.getNumber()); 780314564Sdim 781314564Sdim unsigned SaveN = SaveB.getNumber(); 782314564Sdim auto &CSI = MF.getFrameInfo().getCalleeSavedInfo(); 783314564Sdim 784314564Sdim for (unsigned i = 0; i < Worklist.size(); ++i) { 785314564Sdim unsigned BN = Worklist[i]; 786314564Sdim MachineBasicBlock &MBB = *MF.getBlockNumbered(BN); 787314564Sdim for (auto &R : CSI) 788314564Sdim if (!MBB.isLiveIn(R.getReg())) 789314564Sdim MBB.addLiveIn(R.getReg()); 790314564Sdim if (BN != SaveN) 791314564Sdim for (auto &SB : MBB.successors()) 792314564Sdim Worklist.insert(SB->getNumber()); 793314564Sdim } 794314564Sdim} 795314564Sdim 796309124Sdimbool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB, 797314564Sdim MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF, 798309124Sdim BitVector &Path) const { 799309124Sdim assert(MBB.getNumber() >= 0); 800309124Sdim unsigned BN = MBB.getNumber(); 801309124Sdim if (Path[BN] || DoneF[BN]) 802309124Sdim return false; 803309124Sdim if (DoneT[BN]) 804309124Sdim return true; 805309124Sdim 806314564Sdim auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo(); 807309124Sdim 808309124Sdim Path[BN] = true; 809309124Sdim bool ReachedExit = false; 810309124Sdim for (auto &SB : MBB.successors()) 811309124Sdim ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path); 812309124Sdim 813309124Sdim if (!MBB.empty() && MBB.back().isReturn()) { 814309124Sdim // Add implicit uses of all callee-saved registers to the reached 815309124Sdim // return instructions. This is to prevent the anti-dependency breaker 816309124Sdim // from renaming these registers. 817309124Sdim MachineInstr &RetI = MBB.back(); 818309124Sdim if (!isRestoreCall(RetI.getOpcode())) 819309124Sdim for (auto &R : CSI) 820309124Sdim RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true)); 821309124Sdim ReachedExit = true; 822309124Sdim } 823309124Sdim 824309124Sdim // We don't want to add unnecessary live-ins to the restore block: since 825309124Sdim // the callee-saved registers are being defined in it, the entry of the 826309124Sdim // restore block cannot be on the path from the definitions to any exit. 827314564Sdim if (ReachedExit && &MBB != &RestoreB) { 828309124Sdim for (auto &R : CSI) 829309124Sdim if (!MBB.isLiveIn(R.getReg())) 830309124Sdim MBB.addLiveIn(R.getReg()); 831309124Sdim DoneT[BN] = true; 832309124Sdim } 833309124Sdim if (!ReachedExit) 834309124Sdim DoneF[BN] = true; 835309124Sdim 836309124Sdim Path[BN] = false; 837309124Sdim return ReachedExit; 838309124Sdim} 839309124Sdim 840314564Sdimstatic Optional<MachineBasicBlock::iterator> 841314564SdimfindCFILocation(MachineBasicBlock &B) { 842314564Sdim // The CFI instructions need to be inserted right after allocframe. 843314564Sdim // An exception to this is a situation where allocframe is bundled 844314564Sdim // with a call: then the CFI instructions need to be inserted before 845314564Sdim // the packet with the allocframe+call (in case the call throws an 846314564Sdim // exception). 847314564Sdim auto End = B.instr_end(); 848309124Sdim 849314564Sdim for (MachineInstr &I : B) { 850314564Sdim MachineBasicBlock::iterator It = I.getIterator(); 851314564Sdim if (!I.isBundle()) { 852314564Sdim if (I.getOpcode() == Hexagon::S2_allocframe) 853314564Sdim return std::next(It); 854314564Sdim continue; 855314564Sdim } 856314564Sdim // I is a bundle. 857314564Sdim bool HasCall = false, HasAllocFrame = false; 858314564Sdim auto T = It.getInstrIterator(); 859314564Sdim while (++T != End && T->isBundled()) { 860314564Sdim if (T->getOpcode() == Hexagon::S2_allocframe) 861314564Sdim HasAllocFrame = true; 862314564Sdim else if (T->isCall()) 863314564Sdim HasCall = true; 864314564Sdim } 865314564Sdim if (HasAllocFrame) 866314564Sdim return HasCall ? It : std::next(It); 867314564Sdim } 868314564Sdim return None; 869296417Sdim} 870296417Sdim 871296417Sdimvoid HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const { 872296417Sdim for (auto &B : MF) { 873314564Sdim auto At = findCFILocation(B); 874314564Sdim if (At.hasValue()) 875314564Sdim insertCFIInstructionsAt(B, At.getValue()); 876296417Sdim } 877296417Sdim} 878296417Sdim 879296417Sdimvoid HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB, 880296417Sdim MachineBasicBlock::iterator At) const { 881296417Sdim MachineFunction &MF = *MBB.getParent(); 882314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 883296417Sdim MachineModuleInfo &MMI = MF.getMMI(); 884296417Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 885296417Sdim auto &HII = *HST.getInstrInfo(); 886296417Sdim auto &HRI = *HST.getRegisterInfo(); 887296417Sdim 888296417Sdim // If CFI instructions have debug information attached, something goes 889296417Sdim // wrong with the final assembly generation: the prolog_end is placed 890296417Sdim // in a wrong location. 891296417Sdim DebugLoc DL; 892296417Sdim const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION); 893296417Sdim 894296417Sdim MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 895309124Sdim bool HasFP = hasFP(MF); 896296417Sdim 897309124Sdim if (HasFP) { 898296417Sdim unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true); 899296417Sdim unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true); 900296417Sdim 901296417Sdim // Define CFA via an offset from the value of FP. 902296417Sdim // 903296417Sdim // -8 -4 0 (SP) 904296417Sdim // --+----+----+--------------------- 905296417Sdim // | FP | LR | increasing addresses --> 906296417Sdim // --+----+----+--------------------- 907296417Sdim // | +-- Old SP (before allocframe) 908296417Sdim // +-- New FP (after allocframe) 909296417Sdim // 910296417Sdim // MCCFIInstruction::createDefCfa subtracts the offset from the register. 911296417Sdim // MCCFIInstruction::createOffset takes the offset without sign change. 912296417Sdim auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8); 913296417Sdim BuildMI(MBB, At, DL, CFID) 914314564Sdim .addCFIIndex(MF.addFrameInst(DefCfa)); 915296417Sdim // R31 (return addr) = CFA - 4 916296417Sdim auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4); 917296417Sdim BuildMI(MBB, At, DL, CFID) 918314564Sdim .addCFIIndex(MF.addFrameInst(OffR31)); 919296417Sdim // R30 (frame ptr) = CFA - 8 920296417Sdim auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8); 921296417Sdim BuildMI(MBB, At, DL, CFID) 922314564Sdim .addCFIIndex(MF.addFrameInst(OffR30)); 923296417Sdim } 924296417Sdim 925296417Sdim static unsigned int RegsToMove[] = { 926296417Sdim Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2, 927296417Sdim Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18, 928296417Sdim Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22, 929296417Sdim Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26, 930296417Sdim Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, 931296417Sdim Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, 932296417Sdim Hexagon::NoRegister 933296417Sdim }; 934296417Sdim 935309124Sdim const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 936296417Sdim 937296417Sdim for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) { 938296417Sdim unsigned Reg = RegsToMove[i]; 939296417Sdim auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool { 940296417Sdim return C.getReg() == Reg; 941296417Sdim }; 942314564Sdim auto F = find_if(CSI, IfR); 943296417Sdim if (F == CSI.end()) 944296417Sdim continue; 945296417Sdim 946309124Sdim int64_t Offset; 947309124Sdim if (HasFP) { 948309124Sdim // If the function has a frame pointer (i.e. has an allocframe), 949309124Sdim // then the CFA has been defined in terms of FP. Any offsets in 950309124Sdim // the following CFI instructions have to be defined relative 951309124Sdim // to FP, which points to the bottom of the stack frame. 952309124Sdim // The function getFrameIndexReference can still choose to use SP 953309124Sdim // for the offset calculation, so we cannot simply call it here. 954309124Sdim // Instead, get the offset (relative to the FP) directly. 955309124Sdim Offset = MFI.getObjectOffset(F->getFrameIdx()); 956309124Sdim } else { 957309124Sdim unsigned FrameReg; 958309124Sdim Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg); 959309124Sdim } 960296417Sdim // Subtract 8 to make room for R30 and R31, which are added above. 961309124Sdim Offset -= 8; 962296417Sdim 963296417Sdim if (Reg < Hexagon::D0 || Reg > Hexagon::D15) { 964296417Sdim unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true); 965296417Sdim auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg, 966296417Sdim Offset); 967296417Sdim BuildMI(MBB, At, DL, CFID) 968314564Sdim .addCFIIndex(MF.addFrameInst(OffReg)); 969296417Sdim } else { 970296417Sdim // Split the double regs into subregs, and generate appropriate 971296417Sdim // cfi_offsets. 972296417Sdim // The only reason, we are split double regs is, llvm-mc does not 973296417Sdim // understand paired registers for cfi_offset. 974296417Sdim // Eg .cfi_offset r1:0, -64 975296417Sdim 976360784Sdim Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi); 977360784Sdim Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo); 978296417Sdim unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true); 979296417Sdim unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true); 980296417Sdim auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg, 981296417Sdim Offset+4); 982296417Sdim BuildMI(MBB, At, DL, CFID) 983314564Sdim .addCFIIndex(MF.addFrameInst(OffHi)); 984296417Sdim auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg, 985296417Sdim Offset); 986296417Sdim BuildMI(MBB, At, DL, CFID) 987314564Sdim .addCFIIndex(MF.addFrameInst(OffLo)); 988296417Sdim } 989296417Sdim } 990296417Sdim} 991296417Sdim 992234285Sdimbool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { 993327952Sdim if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 994321369Sdim return false; 995321369Sdim 996314564Sdim auto &MFI = MF.getFrameInfo(); 997296417Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 998296417Sdim bool HasExtraAlign = HRI.needsStackRealignment(MF); 999296417Sdim bool HasAlloca = MFI.hasVarSizedObjects(); 1000296417Sdim 1001296417Sdim // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think 1002296417Sdim // that this shouldn't be required, but doing so now because gcc does and 1003296417Sdim // gdb can't break at the start of the function without it. Will remove if 1004296417Sdim // this turns out to be a gdb bug. 1005296417Sdim // 1006296417Sdim if (MF.getTarget().getOptLevel() == CodeGenOpt::None) 1007296417Sdim return true; 1008296417Sdim 1009296417Sdim // By default we want to use SP (since it's always there). FP requires 1010296417Sdim // some setup (i.e. ALLOCFRAME). 1011321369Sdim // Both, alloca and stack alignment modify the stack pointer by an 1012321369Sdim // undetermined value, so we need to save it at the entry to the function 1013321369Sdim // (i.e. use allocframe). 1014321369Sdim if (HasAlloca || HasExtraAlign) 1015296417Sdim return true; 1016296417Sdim 1017296417Sdim if (MFI.getStackSize() > 0) { 1018321369Sdim // If FP-elimination is disabled, we have to use FP at this point. 1019321369Sdim const TargetMachine &TM = MF.getTarget(); 1020321369Sdim if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer) 1021296417Sdim return true; 1022321369Sdim if (EnableStackOVFSanitizer) 1023321369Sdim return true; 1024296417Sdim } 1025296417Sdim 1026321369Sdim const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); 1027344779Sdim if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR()) 1028296417Sdim return true; 1029296417Sdim 1030296417Sdim return false; 1031234285Sdim} 1032234285Sdim 1033288943Sdimenum SpillKind { 1034288943Sdim SK_ToMem, 1035288943Sdim SK_FromMem, 1036288943Sdim SK_FromMemTailcall 1037288943Sdim}; 1038288943Sdim 1039309124Sdimstatic const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType, 1040309124Sdim bool Stkchk = false) { 1041288943Sdim const char * V4SpillToMemoryFunctions[] = { 1042288943Sdim "__save_r16_through_r17", 1043288943Sdim "__save_r16_through_r19", 1044288943Sdim "__save_r16_through_r21", 1045288943Sdim "__save_r16_through_r23", 1046288943Sdim "__save_r16_through_r25", 1047288943Sdim "__save_r16_through_r27" }; 1048288943Sdim 1049309124Sdim const char * V4SpillToMemoryStkchkFunctions[] = { 1050309124Sdim "__save_r16_through_r17_stkchk", 1051309124Sdim "__save_r16_through_r19_stkchk", 1052309124Sdim "__save_r16_through_r21_stkchk", 1053309124Sdim "__save_r16_through_r23_stkchk", 1054309124Sdim "__save_r16_through_r25_stkchk", 1055309124Sdim "__save_r16_through_r27_stkchk" }; 1056309124Sdim 1057288943Sdim const char * V4SpillFromMemoryFunctions[] = { 1058288943Sdim "__restore_r16_through_r17_and_deallocframe", 1059288943Sdim "__restore_r16_through_r19_and_deallocframe", 1060288943Sdim "__restore_r16_through_r21_and_deallocframe", 1061288943Sdim "__restore_r16_through_r23_and_deallocframe", 1062288943Sdim "__restore_r16_through_r25_and_deallocframe", 1063288943Sdim "__restore_r16_through_r27_and_deallocframe" }; 1064288943Sdim 1065288943Sdim const char * V4SpillFromMemoryTailcallFunctions[] = { 1066288943Sdim "__restore_r16_through_r17_and_deallocframe_before_tailcall", 1067288943Sdim "__restore_r16_through_r19_and_deallocframe_before_tailcall", 1068288943Sdim "__restore_r16_through_r21_and_deallocframe_before_tailcall", 1069288943Sdim "__restore_r16_through_r23_and_deallocframe_before_tailcall", 1070288943Sdim "__restore_r16_through_r25_and_deallocframe_before_tailcall", 1071288943Sdim "__restore_r16_through_r27_and_deallocframe_before_tailcall" 1072288943Sdim }; 1073288943Sdim 1074288943Sdim const char **SpillFunc = nullptr; 1075288943Sdim 1076288943Sdim switch(SpillType) { 1077288943Sdim case SK_ToMem: 1078309124Sdim SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions 1079309124Sdim : V4SpillToMemoryFunctions; 1080288943Sdim break; 1081288943Sdim case SK_FromMem: 1082288943Sdim SpillFunc = V4SpillFromMemoryFunctions; 1083288943Sdim break; 1084288943Sdim case SK_FromMemTailcall: 1085288943Sdim SpillFunc = V4SpillFromMemoryTailcallFunctions; 1086288943Sdim break; 1087288943Sdim } 1088288943Sdim assert(SpillFunc && "Unknown spill kind"); 1089288943Sdim 1090288943Sdim // Spill all callee-saved registers up to the highest register used. 1091288943Sdim switch (MaxReg) { 1092288943Sdim case Hexagon::R17: 1093288943Sdim return SpillFunc[0]; 1094288943Sdim case Hexagon::R19: 1095288943Sdim return SpillFunc[1]; 1096288943Sdim case Hexagon::R21: 1097288943Sdim return SpillFunc[2]; 1098288943Sdim case Hexagon::R23: 1099288943Sdim return SpillFunc[3]; 1100288943Sdim case Hexagon::R25: 1101288943Sdim return SpillFunc[4]; 1102288943Sdim case Hexagon::R27: 1103288943Sdim return SpillFunc[5]; 1104288943Sdim default: 1105288943Sdim llvm_unreachable("Unhandled maximum callee save register"); 1106288943Sdim } 1107314564Sdim return nullptr; 1108239462Sdim} 1109239462Sdim 1110296417Sdimint HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, 1111296417Sdim int FI, unsigned &FrameReg) const { 1112314564Sdim auto &MFI = MF.getFrameInfo(); 1113296417Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1114296417Sdim 1115296417Sdim int Offset = MFI.getObjectOffset(FI); 1116296417Sdim bool HasAlloca = MFI.hasVarSizedObjects(); 1117296417Sdim bool HasExtraAlign = HRI.needsStackRealignment(MF); 1118296417Sdim bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None; 1119296417Sdim 1120309124Sdim auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); 1121321369Sdim unsigned FrameSize = MFI.getStackSize(); 1122321369Sdim unsigned SP = HRI.getStackRegister(); 1123321369Sdim unsigned FP = HRI.getFrameRegister(); 1124309124Sdim unsigned AP = HMFI.getStackAlignBasePhysReg(); 1125321369Sdim // It may happen that AP will be absent even HasAlloca && HasExtraAlign 1126321369Sdim // is true. HasExtraAlign may be set because of vector spills, without 1127321369Sdim // aligned locals or aligned outgoing function arguments. Since vector 1128321369Sdim // spills will ultimately be "unaligned", it is safe to use FP as the 1129321369Sdim // base register. 1130321369Sdim // In fact, in such a scenario the stack is actually not required to be 1131321369Sdim // aligned, although it may end up being aligned anyway, since this 1132321369Sdim // particular case is not easily detectable. The alignment will be 1133321369Sdim // unnecessary, but not incorrect. 1134321369Sdim // Unfortunately there is no quick way to verify that the above is 1135321369Sdim // indeed the case (and that it's not a result of an error), so just 1136321369Sdim // assume that missing AP will be replaced by FP. 1137321369Sdim // (A better fix would be to rematerialize AP from FP and always align 1138321369Sdim // vector spills.) 1139321369Sdim if (AP == 0) 1140321369Sdim AP = FP; 1141296417Sdim 1142296417Sdim bool UseFP = false, UseAP = false; // Default: use SP (except at -O0). 1143296417Sdim // Use FP at -O0, except when there are objects with extra alignment. 1144296417Sdim // That additional alignment requirement may cause a pad to be inserted, 1145296417Sdim // which will make it impossible to use FP to access objects located 1146296417Sdim // past the pad. 1147296417Sdim if (NoOpt && !HasExtraAlign) 1148296417Sdim UseFP = true; 1149296417Sdim if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) { 1150296417Sdim // Fixed and preallocated objects will be located before any padding 1151296417Sdim // so FP must be used to access them. 1152296417Sdim UseFP |= (HasAlloca || HasExtraAlign); 1153296417Sdim } else { 1154296417Sdim if (HasAlloca) { 1155296417Sdim if (HasExtraAlign) 1156296417Sdim UseAP = true; 1157296417Sdim else 1158296417Sdim UseFP = true; 1159296417Sdim } 1160296417Sdim } 1161296417Sdim 1162296417Sdim // If FP was picked, then there had better be FP. 1163296417Sdim bool HasFP = hasFP(MF); 1164296417Sdim assert((HasFP || !UseFP) && "This function must have frame pointer"); 1165296417Sdim 1166296417Sdim // Having FP implies allocframe. Allocframe will store extra 8 bytes: 1167296417Sdim // FP/LR. If the base register is used to access an object across these 1168296417Sdim // 8 bytes, then the offset will need to be adjusted by 8. 1169296417Sdim // 1170296417Sdim // After allocframe: 1171296417Sdim // HexagonISelLowering adds 8 to ---+ 1172296417Sdim // the offsets of all stack-based | 1173296417Sdim // arguments (*) | 1174296417Sdim // | 1175296417Sdim // getObjectOffset < 0 0 8 getObjectOffset >= 8 1176296417Sdim // ------------------------+-----+------------------------> increasing 1177296417Sdim // <local objects> |FP/LR| <input arguments> addresses 1178296417Sdim // -----------------+------+-----+------------------------> 1179296417Sdim // | | 1180296417Sdim // SP/AP point --+ +-- FP points here (**) 1181296417Sdim // somewhere on 1182296417Sdim // this side of FP/LR 1183296417Sdim // 1184296417Sdim // (*) See LowerFormalArguments. The FP/LR is assumed to be present. 1185296417Sdim // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR. 1186296417Sdim 1187296417Sdim // The lowering assumes that FP/LR is present, and so the offsets of 1188296417Sdim // the formal arguments start at 8. If FP/LR is not there we need to 1189296417Sdim // reduce the offset by 8. 1190296417Sdim if (Offset > 0 && !HasFP) 1191296417Sdim Offset -= 8; 1192296417Sdim 1193296417Sdim if (UseFP) 1194296417Sdim FrameReg = FP; 1195296417Sdim else if (UseAP) 1196296417Sdim FrameReg = AP; 1197296417Sdim else 1198296417Sdim FrameReg = SP; 1199296417Sdim 1200296417Sdim // Calculate the actual offset in the instruction. If there is no FP 1201296417Sdim // (in other words, no allocframe), then SP will not be adjusted (i.e. 1202296417Sdim // there will be no SP -= FrameSize), so the frame size should not be 1203296417Sdim // added to the calculated offset. 1204296417Sdim int RealOffset = Offset; 1205321369Sdim if (!UseFP && !UseAP) 1206296417Sdim RealOffset = FrameSize+Offset; 1207296417Sdim return RealOffset; 1208288943Sdim} 1209288943Sdim 1210288943Sdimbool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB, 1211309124Sdim const CSIVect &CSI, const HexagonRegisterInfo &HRI, 1212309124Sdim bool &PrologueStubs) const { 1213288943Sdim if (CSI.empty()) 1214288943Sdim return true; 1215288943Sdim 1216288943Sdim MachineBasicBlock::iterator MI = MBB.begin(); 1217309124Sdim PrologueStubs = false; 1218288943Sdim MachineFunction &MF = *MBB.getParent(); 1219314564Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 1220314564Sdim auto &HII = *HST.getInstrInfo(); 1221288943Sdim 1222288943Sdim if (useSpillFunction(MF, CSI)) { 1223309124Sdim PrologueStubs = true; 1224288943Sdim unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI); 1225309124Sdim bool StkOvrFlowEnabled = EnableStackOVFSanitizer; 1226309124Sdim const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem, 1227309124Sdim StkOvrFlowEnabled); 1228309124Sdim auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); 1229309124Sdim bool IsPIC = HTM.isPositionIndependent(); 1230314564Sdim bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; 1231309124Sdim 1232288943Sdim // Call spill function. 1233288943Sdim DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 1234309124Sdim unsigned SpillOpc; 1235314564Sdim if (StkOvrFlowEnabled) { 1236314564Sdim if (LongCalls) 1237314564Sdim SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC 1238314564Sdim : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT; 1239314564Sdim else 1240314564Sdim SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC 1241314564Sdim : Hexagon::SAVE_REGISTERS_CALL_V4STK; 1242314564Sdim } else { 1243314564Sdim if (LongCalls) 1244314564Sdim SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC 1245314564Sdim : Hexagon::SAVE_REGISTERS_CALL_V4_EXT; 1246314564Sdim else 1247314564Sdim SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC 1248314564Sdim : Hexagon::SAVE_REGISTERS_CALL_V4; 1249314564Sdim } 1250309124Sdim 1251288943Sdim MachineInstr *SaveRegsCall = 1252309124Sdim BuildMI(MBB, MI, DL, HII.get(SpillOpc)) 1253288943Sdim .addExternalSymbol(SpillFun); 1254314564Sdim 1255288943Sdim // Add callee-saved registers as use. 1256309124Sdim addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true); 1257288943Sdim // Add live in registers. 1258288943Sdim for (unsigned I = 0; I < CSI.size(); ++I) 1259288943Sdim MBB.addLiveIn(CSI[I].getReg()); 1260288943Sdim return true; 1261234285Sdim } 1262234285Sdim 1263288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1264234285Sdim unsigned Reg = CSI[i].getReg(); 1265288943Sdim // Add live in registers. We treat eh_return callee saved register r0 - r3 1266288943Sdim // specially. They are not really callee saved registers as they are not 1267288943Sdim // supposed to be killed. 1268288943Sdim bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg); 1269288943Sdim int FI = CSI[i].getFrameIdx(); 1270288943Sdim const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); 1271296417Sdim HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI); 1272288943Sdim if (IsKill) 1273288943Sdim MBB.addLiveIn(Reg); 1274288943Sdim } 1275288943Sdim return true; 1276288943Sdim} 1277234285Sdim 1278288943Sdimbool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB, 1279288943Sdim const CSIVect &CSI, const HexagonRegisterInfo &HRI) const { 1280288943Sdim if (CSI.empty()) 1281288943Sdim return false; 1282234285Sdim 1283288943Sdim MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); 1284288943Sdim MachineFunction &MF = *MBB.getParent(); 1285314564Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 1286314564Sdim auto &HII = *HST.getInstrInfo(); 1287234285Sdim 1288288943Sdim if (useRestoreFunction(MF, CSI)) { 1289288943Sdim bool HasTC = hasTailCall(MBB) || !hasReturn(MBB); 1290288943Sdim unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI); 1291288943Sdim SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem; 1292288943Sdim const char *RestoreFn = getSpillFunctionFor(MaxR, Kind); 1293309124Sdim auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget()); 1294309124Sdim bool IsPIC = HTM.isPositionIndependent(); 1295314564Sdim bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong; 1296288943Sdim 1297288943Sdim // Call spill function. 1298288943Sdim DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() 1299344779Sdim : MBB.findDebugLoc(MBB.end()); 1300288943Sdim MachineInstr *DeallocCall = nullptr; 1301288943Sdim 1302288943Sdim if (HasTC) { 1303314564Sdim unsigned RetOpc; 1304314564Sdim if (LongCalls) 1305314564Sdim RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC 1306314564Sdim : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT; 1307314564Sdim else 1308314564Sdim RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC 1309314564Sdim : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4; 1310314564Sdim DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc)) 1311288943Sdim .addExternalSymbol(RestoreFn); 1312234285Sdim } else { 1313288943Sdim // The block has a return. 1314288943Sdim MachineBasicBlock::iterator It = MBB.getFirstTerminator(); 1315288943Sdim assert(It->isReturn() && std::next(It) == MBB.end()); 1316314564Sdim unsigned RetOpc; 1317314564Sdim if (LongCalls) 1318314564Sdim RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC 1319314564Sdim : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT; 1320314564Sdim else 1321314564Sdim RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC 1322314564Sdim : Hexagon::RESTORE_DEALLOC_RET_JMP_V4; 1323314564Sdim DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc)) 1324288943Sdim .addExternalSymbol(RestoreFn); 1325288943Sdim // Transfer the function live-out registers. 1326309124Sdim DeallocCall->copyImplicitOps(MF, *It); 1327234285Sdim } 1328309124Sdim addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false); 1329288943Sdim return true; 1330234285Sdim } 1331288943Sdim 1332288943Sdim for (unsigned i = 0; i < CSI.size(); ++i) { 1333288943Sdim unsigned Reg = CSI[i].getReg(); 1334288943Sdim const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg); 1335288943Sdim int FI = CSI[i].getFrameIdx(); 1336296417Sdim HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI); 1337288943Sdim } 1338309124Sdim 1339234285Sdim return true; 1340234285Sdim} 1341234285Sdim 1342309124SdimMachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr( 1343309124Sdim MachineFunction &MF, MachineBasicBlock &MBB, 1344309124Sdim MachineBasicBlock::iterator I) const { 1345288943Sdim MachineInstr &MI = *I; 1346288943Sdim unsigned Opc = MI.getOpcode(); 1347288943Sdim (void)Opc; // Silence compiler warning. 1348288943Sdim assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) && 1349288943Sdim "Cannot handle this call frame pseudo instruction"); 1350309124Sdim return MBB.erase(I); 1351288943Sdim} 1352234285Sdim 1353288943Sdimvoid HexagonFrameLowering::processFunctionBeforeFrameFinalized( 1354288943Sdim MachineFunction &MF, RegScavenger *RS) const { 1355288943Sdim // If this function has uses aligned stack and also has variable sized stack 1356288943Sdim // objects, then we need to map all spill slots to fixed positions, so that 1357288943Sdim // they can be accessed through FP. Otherwise they would have to be accessed 1358288943Sdim // via AP, which may not be available at the particular place in the program. 1359314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 1360314564Sdim bool HasAlloca = MFI.hasVarSizedObjects(); 1361314564Sdim bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment()); 1362288943Sdim 1363296417Sdim if (!HasAlloca || !NeedsAlign) 1364288943Sdim return; 1365288943Sdim 1366360784Sdim SmallSet<int, 4> DealignSlots; 1367314564Sdim unsigned LFS = MFI.getLocalFrameSize(); 1368314564Sdim for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) { 1369314564Sdim if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i)) 1370288943Sdim continue; 1371314564Sdim unsigned S = MFI.getObjectSize(i); 1372309124Sdim // Reduce the alignment to at most 8. This will require unaligned vector 1373309124Sdim // stores if they happen here. 1374314564Sdim unsigned A = std::max(MFI.getObjectAlignment(i), 8U); 1375314564Sdim MFI.setObjectAlignment(i, 8); 1376309124Sdim LFS = alignTo(LFS+S, A); 1377360784Sdim MFI.mapLocalFrameObject(i, -static_cast<int64_t>(LFS)); 1378360784Sdim DealignSlots.insert(i); 1379288943Sdim } 1380288943Sdim 1381314564Sdim MFI.setLocalFrameSize(LFS); 1382360784Sdim Align A = MFI.getLocalFrameMaxAlign(); 1383288943Sdim assert(A <= 8 && "Unexpected local frame alignment"); 1384360784Sdim if (A == 1) 1385360784Sdim MFI.setLocalFrameMaxAlign(Align(8)); 1386314564Sdim MFI.setUseLocalStackAllocationBlock(true); 1387309124Sdim 1388360784Sdim // Go over all MachineMemOperands in the code, and change the ones that 1389360784Sdim // refer to the dealigned stack slots to reflect the new alignment. 1390360784Sdim if (!DealignSlots.empty()) { 1391360784Sdim for (MachineBasicBlock &BB : MF) { 1392360784Sdim for (MachineInstr &MI : BB) { 1393360784Sdim bool KeepOld = true; 1394360784Sdim ArrayRef<MachineMemOperand*> memops = MI.memoperands(); 1395360784Sdim SmallVector<MachineMemOperand*,1> new_memops; 1396360784Sdim for (MachineMemOperand *MMO : memops) { 1397360784Sdim auto *PV = MMO->getPseudoValue(); 1398360784Sdim if (auto *FS = dyn_cast_or_null<FixedStackPseudoSourceValue>(PV)) { 1399360784Sdim int FI = FS->getFrameIndex(); 1400360784Sdim if (DealignSlots.count(FI)) { 1401360784Sdim unsigned A = MFI.getObjectAlignment(FI); 1402360784Sdim auto *NewMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), 1403360784Sdim MMO->getFlags(), MMO->getSize(), A, 1404360784Sdim MMO->getAAInfo(), MMO->getRanges(), 1405360784Sdim MMO->getSyncScopeID(), MMO->getOrdering(), 1406360784Sdim MMO->getFailureOrdering()); 1407360784Sdim new_memops.push_back(NewMMO); 1408360784Sdim KeepOld = false; 1409360784Sdim continue; 1410360784Sdim } 1411360784Sdim } 1412360784Sdim new_memops.push_back(MMO); 1413360784Sdim } 1414360784Sdim if (!KeepOld) 1415360784Sdim MI.setMemRefs(MF, new_memops); 1416360784Sdim } 1417360784Sdim } 1418360784Sdim } 1419360784Sdim 1420309124Sdim // Set the physical aligned-stack base address register. 1421309124Sdim unsigned AP = 0; 1422309124Sdim if (const MachineInstr *AI = getAlignaInstr(MF)) 1423309124Sdim AP = AI->getOperand(0).getReg(); 1424309124Sdim auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); 1425309124Sdim HMFI.setStackAlignBasePhysReg(AP); 1426288943Sdim} 1427288943Sdim 1428309124Sdim/// Returns true if there are no caller-saved registers available in class RC. 1429288943Sdimstatic bool needToReserveScavengingSpillSlots(MachineFunction &MF, 1430309124Sdim const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) { 1431288943Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1432288943Sdim 1433309124Sdim auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool { 1434309124Sdim for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI) 1435309124Sdim if (MRI.isPhysRegUsed(*AI)) 1436309124Sdim return true; 1437309124Sdim return false; 1438309124Sdim }; 1439288943Sdim 1440309124Sdim // Check for an unused caller-saved register. Callee-saved registers 1441309124Sdim // have become pristine by now. 1442309124Sdim for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P) 1443309124Sdim if (!IsUsed(*P)) 1444309124Sdim return false; 1445309124Sdim 1446288943Sdim // All caller-saved registers are used. 1447288943Sdim return true; 1448288943Sdim} 1449234285Sdim 1450288943Sdim#ifndef NDEBUG 1451288943Sdimstatic void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { 1452288943Sdim dbgs() << '{'; 1453288943Sdim for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) { 1454288943Sdim unsigned R = x; 1455327952Sdim dbgs() << ' ' << printReg(R, &TRI); 1456288943Sdim } 1457288943Sdim dbgs() << " }"; 1458288943Sdim} 1459288943Sdim#endif 1460288943Sdim 1461288943Sdimbool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, 1462288943Sdim const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { 1463341825Sdim LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n'); 1464314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 1465288943Sdim BitVector SRegs(Hexagon::NUM_TARGET_REGS); 1466288943Sdim 1467288943Sdim // Generate a set of unique, callee-saved registers (SRegs), where each 1468288943Sdim // register in the set is maximal in terms of sub-/super-register relation, 1469288943Sdim // i.e. for each R in SRegs, no proper super-register of R is also in SRegs. 1470288943Sdim 1471288943Sdim // (1) For each callee-saved register, add that register and all of its 1472288943Sdim // sub-registers to SRegs. 1473341825Sdim LLVM_DEBUG(dbgs() << "Initial CS registers: {"); 1474288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1475288943Sdim unsigned R = CSI[i].getReg(); 1476341825Sdim LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI)); 1477288943Sdim for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) 1478288943Sdim SRegs[*SR] = true; 1479288943Sdim } 1480341825Sdim LLVM_DEBUG(dbgs() << " }\n"); 1481341825Sdim LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); 1482341825Sdim dbgs() << "\n"); 1483288943Sdim 1484288943Sdim // (2) For each reserved register, remove that register and all of its 1485288943Sdim // sub- and super-registers from SRegs. 1486288943Sdim BitVector Reserved = TRI->getReservedRegs(MF); 1487288943Sdim for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) { 1488288943Sdim unsigned R = x; 1489288943Sdim for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR) 1490288943Sdim SRegs[*SR] = false; 1491288943Sdim } 1492341825Sdim LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); 1493341825Sdim dbgs() << "\n"); 1494341825Sdim LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); 1495341825Sdim dbgs() << "\n"); 1496288943Sdim 1497288943Sdim // (3) Collect all registers that have at least one sub-register in SRegs, 1498288943Sdim // and also have no sub-registers that are reserved. These will be the can- 1499288943Sdim // didates for saving as a whole instead of their individual sub-registers. 1500288943Sdim // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.) 1501288943Sdim BitVector TmpSup(Hexagon::NUM_TARGET_REGS); 1502288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1503288943Sdim unsigned R = x; 1504288943Sdim for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) 1505288943Sdim TmpSup[*SR] = true; 1506288943Sdim } 1507288943Sdim for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) { 1508288943Sdim unsigned R = x; 1509288943Sdim for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) { 1510288943Sdim if (!Reserved[*SR]) 1511288943Sdim continue; 1512288943Sdim TmpSup[R] = false; 1513288943Sdim break; 1514234285Sdim } 1515234285Sdim } 1516341825Sdim LLVM_DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); 1517341825Sdim dbgs() << "\n"); 1518288943Sdim 1519288943Sdim // (4) Include all super-registers found in (3) into SRegs. 1520288943Sdim SRegs |= TmpSup; 1521341825Sdim LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); 1522341825Sdim dbgs() << "\n"); 1523288943Sdim 1524288943Sdim // (5) For each register R in SRegs, if any super-register of R is in SRegs, 1525288943Sdim // remove R from SRegs. 1526288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1527288943Sdim unsigned R = x; 1528288943Sdim for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) { 1529288943Sdim if (!SRegs[*SR]) 1530288943Sdim continue; 1531288943Sdim SRegs[R] = false; 1532288943Sdim break; 1533288943Sdim } 1534288943Sdim } 1535341825Sdim LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); 1536341825Sdim dbgs() << "\n"); 1537288943Sdim 1538288943Sdim // Now, for each register that has a fixed stack slot, create the stack 1539288943Sdim // object for it. 1540288943Sdim CSI.clear(); 1541288943Sdim 1542327952Sdim using SpillSlot = TargetFrameLowering::SpillSlot; 1543327952Sdim 1544288943Sdim unsigned NumFixed; 1545288943Sdim int MinOffset = 0; // CS offsets are negative. 1546288943Sdim const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed); 1547288943Sdim for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) { 1548288943Sdim if (!SRegs[S->Reg]) 1549288943Sdim continue; 1550288943Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg); 1551321369Sdim int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset); 1552288943Sdim MinOffset = std::min(MinOffset, S->Offset); 1553288943Sdim CSI.push_back(CalleeSavedInfo(S->Reg, FI)); 1554288943Sdim SRegs[S->Reg] = false; 1555288943Sdim } 1556288943Sdim 1557288943Sdim // There can be some registers that don't have fixed slots. For example, 1558288943Sdim // we need to store R0-R3 in functions with exception handling. For each 1559288943Sdim // such register, create a non-fixed stack object. 1560288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1561288943Sdim unsigned R = x; 1562288943Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R); 1563321369Sdim unsigned Size = TRI->getSpillSize(*RC); 1564321369Sdim int Off = MinOffset - Size; 1565321369Sdim unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment()); 1566288943Sdim assert(isPowerOf2_32(Align)); 1567288943Sdim Off &= -Align; 1568321369Sdim int FI = MFI.CreateFixedSpillStackObject(Size, Off); 1569288943Sdim MinOffset = std::min(MinOffset, Off); 1570288943Sdim CSI.push_back(CalleeSavedInfo(R, FI)); 1571288943Sdim SRegs[R] = false; 1572288943Sdim } 1573288943Sdim 1574341825Sdim LLVM_DEBUG({ 1575288943Sdim dbgs() << "CS information: {"; 1576288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 1577288943Sdim int FI = CSI[i].getFrameIdx(); 1578314564Sdim int Off = MFI.getObjectOffset(FI); 1579327952Sdim dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp"; 1580288943Sdim if (Off >= 0) 1581288943Sdim dbgs() << '+'; 1582288943Sdim dbgs() << Off; 1583288943Sdim } 1584288943Sdim dbgs() << " }\n"; 1585288943Sdim }); 1586288943Sdim 1587288943Sdim#ifndef NDEBUG 1588288943Sdim // Verify that all registers were handled. 1589288943Sdim bool MissedReg = false; 1590288943Sdim for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) { 1591288943Sdim unsigned R = x; 1592327952Sdim dbgs() << printReg(R, TRI) << ' '; 1593288943Sdim MissedReg = true; 1594288943Sdim } 1595288943Sdim if (MissedReg) 1596288943Sdim llvm_unreachable("...there are unhandled callee-saved registers!"); 1597288943Sdim#endif 1598288943Sdim 1599234285Sdim return true; 1600234285Sdim} 1601234285Sdim 1602309124Sdimbool HexagonFrameLowering::expandCopy(MachineBasicBlock &B, 1603309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1604309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1605309124Sdim MachineInstr *MI = &*It; 1606309124Sdim DebugLoc DL = MI->getDebugLoc(); 1607360784Sdim Register DstR = MI->getOperand(0).getReg(); 1608360784Sdim Register SrcR = MI->getOperand(1).getReg(); 1609309124Sdim if (!Hexagon::ModRegsRegClass.contains(DstR) || 1610309124Sdim !Hexagon::ModRegsRegClass.contains(SrcR)) 1611309124Sdim return false; 1612309124Sdim 1613360784Sdim Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1614321369Sdim BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1)); 1615309124Sdim BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR) 1616309124Sdim .addReg(TmpR, RegState::Kill); 1617309124Sdim 1618309124Sdim NewRegs.push_back(TmpR); 1619309124Sdim B.erase(It); 1620309124Sdim return true; 1621309124Sdim} 1622309124Sdim 1623309124Sdimbool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B, 1624309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1625309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1626309124Sdim MachineInstr *MI = &*It; 1627314564Sdim if (!MI->getOperand(0).isFI()) 1628314564Sdim return false; 1629314564Sdim 1630309124Sdim DebugLoc DL = MI->getDebugLoc(); 1631309124Sdim unsigned Opc = MI->getOpcode(); 1632360784Sdim Register SrcR = MI->getOperand(2).getReg(); 1633309124Sdim bool IsKill = MI->getOperand(2).isKill(); 1634309124Sdim int FI = MI->getOperand(0).getIndex(); 1635309124Sdim 1636309124Sdim // TmpR = C2_tfrpr SrcR if SrcR is a predicate register 1637309124Sdim // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register 1638360784Sdim Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1639309124Sdim unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr 1640309124Sdim : Hexagon::A2_tfrcrr; 1641309124Sdim BuildMI(B, It, DL, HII.get(TfrOpc), TmpR) 1642309124Sdim .addReg(SrcR, getKillRegState(IsKill)); 1643309124Sdim 1644309124Sdim // S2_storeri_io FI, 0, TmpR 1645309124Sdim BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io)) 1646344779Sdim .addFrameIndex(FI) 1647344779Sdim .addImm(0) 1648344779Sdim .addReg(TmpR, RegState::Kill) 1649344779Sdim .cloneMemRefs(*MI); 1650309124Sdim 1651309124Sdim NewRegs.push_back(TmpR); 1652309124Sdim B.erase(It); 1653309124Sdim return true; 1654309124Sdim} 1655309124Sdim 1656309124Sdimbool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B, 1657309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1658309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1659309124Sdim MachineInstr *MI = &*It; 1660314564Sdim if (!MI->getOperand(1).isFI()) 1661314564Sdim return false; 1662314564Sdim 1663309124Sdim DebugLoc DL = MI->getDebugLoc(); 1664309124Sdim unsigned Opc = MI->getOpcode(); 1665360784Sdim Register DstR = MI->getOperand(0).getReg(); 1666309124Sdim int FI = MI->getOperand(1).getIndex(); 1667309124Sdim 1668309124Sdim // TmpR = L2_loadri_io FI, 0 1669360784Sdim Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1670309124Sdim BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR) 1671344779Sdim .addFrameIndex(FI) 1672344779Sdim .addImm(0) 1673344779Sdim .cloneMemRefs(*MI); 1674309124Sdim 1675309124Sdim // DstR = C2_tfrrp TmpR if DstR is a predicate register 1676309124Sdim // DstR = A2_tfrrcr TmpR if DstR is a modifier register 1677309124Sdim unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp 1678309124Sdim : Hexagon::A2_tfrrcr; 1679309124Sdim BuildMI(B, It, DL, HII.get(TfrOpc), DstR) 1680309124Sdim .addReg(TmpR, RegState::Kill); 1681309124Sdim 1682309124Sdim NewRegs.push_back(TmpR); 1683309124Sdim B.erase(It); 1684309124Sdim return true; 1685309124Sdim} 1686309124Sdim 1687309124Sdimbool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B, 1688309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1689309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1690309124Sdim MachineInstr *MI = &*It; 1691314564Sdim if (!MI->getOperand(0).isFI()) 1692314564Sdim return false; 1693314564Sdim 1694309124Sdim DebugLoc DL = MI->getDebugLoc(); 1695360784Sdim Register SrcR = MI->getOperand(2).getReg(); 1696309124Sdim bool IsKill = MI->getOperand(2).isKill(); 1697309124Sdim int FI = MI->getOperand(0).getIndex(); 1698327952Sdim auto *RC = &Hexagon::HvxVRRegClass; 1699309124Sdim 1700309124Sdim // Insert transfer to general vector register. 1701309124Sdim // TmpR0 = A2_tfrsi 0x01010101 1702309124Sdim // TmpR1 = V6_vandqrt Qx, TmpR0 1703309124Sdim // store FI, 0, TmpR1 1704360784Sdim Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1705360784Sdim Register TmpR1 = MRI.createVirtualRegister(RC); 1706309124Sdim 1707309124Sdim BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) 1708309124Sdim .addImm(0x01010101); 1709309124Sdim 1710327952Sdim BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1) 1711309124Sdim .addReg(SrcR, getKillRegState(IsKill)) 1712309124Sdim .addReg(TmpR0, RegState::Kill); 1713309124Sdim 1714309124Sdim auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1715309124Sdim HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI); 1716309124Sdim expandStoreVec(B, std::prev(It), MRI, HII, NewRegs); 1717309124Sdim 1718309124Sdim NewRegs.push_back(TmpR0); 1719309124Sdim NewRegs.push_back(TmpR1); 1720309124Sdim B.erase(It); 1721309124Sdim return true; 1722309124Sdim} 1723309124Sdim 1724309124Sdimbool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B, 1725309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1726309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1727309124Sdim MachineInstr *MI = &*It; 1728314564Sdim if (!MI->getOperand(1).isFI()) 1729314564Sdim return false; 1730314564Sdim 1731309124Sdim DebugLoc DL = MI->getDebugLoc(); 1732360784Sdim Register DstR = MI->getOperand(0).getReg(); 1733309124Sdim int FI = MI->getOperand(1).getIndex(); 1734327952Sdim auto *RC = &Hexagon::HvxVRRegClass; 1735309124Sdim 1736309124Sdim // TmpR0 = A2_tfrsi 0x01010101 1737309124Sdim // TmpR1 = load FI, 0 1738309124Sdim // DstR = V6_vandvrt TmpR1, TmpR0 1739360784Sdim Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); 1740360784Sdim Register TmpR1 = MRI.createVirtualRegister(RC); 1741309124Sdim 1742309124Sdim BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0) 1743309124Sdim .addImm(0x01010101); 1744327952Sdim MachineFunction &MF = *B.getParent(); 1745327952Sdim auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1746309124Sdim HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI); 1747309124Sdim expandLoadVec(B, std::prev(It), MRI, HII, NewRegs); 1748309124Sdim 1749327952Sdim BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR) 1750309124Sdim .addReg(TmpR1, RegState::Kill) 1751309124Sdim .addReg(TmpR0, RegState::Kill); 1752309124Sdim 1753309124Sdim NewRegs.push_back(TmpR0); 1754309124Sdim NewRegs.push_back(TmpR1); 1755309124Sdim B.erase(It); 1756309124Sdim return true; 1757309124Sdim} 1758309124Sdim 1759309124Sdimbool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, 1760309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1761309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1762309124Sdim MachineFunction &MF = *B.getParent(); 1763314564Sdim auto &MFI = MF.getFrameInfo(); 1764309124Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1765309124Sdim MachineInstr *MI = &*It; 1766314564Sdim if (!MI->getOperand(0).isFI()) 1767314564Sdim return false; 1768314564Sdim 1769314564Sdim // It is possible that the double vector being stored is only partially 1770314564Sdim // defined. From the point of view of the liveness tracking, it is ok to 1771314564Sdim // store it as a whole, but if we break it up we may end up storing a 1772314564Sdim // register that is entirely undefined. 1773321369Sdim LivePhysRegs LPR(HRI); 1774314564Sdim LPR.addLiveIns(B); 1775344779Sdim SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers; 1776321369Sdim for (auto R = B.begin(); R != It; ++R) { 1777321369Sdim Clobbers.clear(); 1778314564Sdim LPR.stepForward(*R, Clobbers); 1779321369Sdim } 1780314564Sdim 1781309124Sdim DebugLoc DL = MI->getDebugLoc(); 1782360784Sdim Register SrcR = MI->getOperand(2).getReg(); 1783360784Sdim Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo); 1784360784Sdim Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi); 1785309124Sdim bool IsKill = MI->getOperand(2).isKill(); 1786309124Sdim int FI = MI->getOperand(0).getIndex(); 1787360784Sdim bool NeedsAligna = needsAligna(MF); 1788309124Sdim 1789327952Sdim unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); 1790327952Sdim unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); 1791309124Sdim unsigned HasAlign = MFI.getObjectAlignment(FI); 1792309124Sdim unsigned StoreOpc; 1793309124Sdim 1794360784Sdim auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) { 1795360784Sdim return !NeedsAligna && (NeedAlign <= HasAlign); 1796360784Sdim }; 1797360784Sdim 1798309124Sdim // Store low part. 1799314564Sdim if (LPR.contains(SrcLo)) { 1800360784Sdim StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai 1801360784Sdim : Hexagon::V6_vS32Ub_ai; 1802314564Sdim BuildMI(B, It, DL, HII.get(StoreOpc)) 1803344779Sdim .addFrameIndex(FI) 1804344779Sdim .addImm(0) 1805344779Sdim .addReg(SrcLo, getKillRegState(IsKill)) 1806344779Sdim .cloneMemRefs(*MI); 1807314564Sdim } 1808309124Sdim 1809314564Sdim // Store high part. 1810314564Sdim if (LPR.contains(SrcHi)) { 1811360784Sdim StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai 1812360784Sdim : Hexagon::V6_vS32Ub_ai; 1813314564Sdim BuildMI(B, It, DL, HII.get(StoreOpc)) 1814344779Sdim .addFrameIndex(FI) 1815344779Sdim .addImm(Size) 1816344779Sdim .addReg(SrcHi, getKillRegState(IsKill)) 1817344779Sdim .cloneMemRefs(*MI); 1818314564Sdim } 1819309124Sdim 1820309124Sdim B.erase(It); 1821309124Sdim return true; 1822309124Sdim} 1823309124Sdim 1824309124Sdimbool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B, 1825309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1826309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1827309124Sdim MachineFunction &MF = *B.getParent(); 1828314564Sdim auto &MFI = MF.getFrameInfo(); 1829309124Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1830309124Sdim MachineInstr *MI = &*It; 1831314564Sdim if (!MI->getOperand(1).isFI()) 1832314564Sdim return false; 1833314564Sdim 1834309124Sdim DebugLoc DL = MI->getDebugLoc(); 1835360784Sdim Register DstR = MI->getOperand(0).getReg(); 1836360784Sdim Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi); 1837360784Sdim Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo); 1838309124Sdim int FI = MI->getOperand(1).getIndex(); 1839360784Sdim bool NeedsAligna = needsAligna(MF); 1840309124Sdim 1841327952Sdim unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass); 1842327952Sdim unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); 1843309124Sdim unsigned HasAlign = MFI.getObjectAlignment(FI); 1844309124Sdim unsigned LoadOpc; 1845309124Sdim 1846360784Sdim auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) { 1847360784Sdim return !NeedsAligna && (NeedAlign <= HasAlign); 1848360784Sdim }; 1849360784Sdim 1850309124Sdim // Load low part. 1851360784Sdim LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai 1852360784Sdim : Hexagon::V6_vL32Ub_ai; 1853309124Sdim BuildMI(B, It, DL, HII.get(LoadOpc), DstLo) 1854344779Sdim .addFrameIndex(FI) 1855344779Sdim .addImm(0) 1856344779Sdim .cloneMemRefs(*MI); 1857309124Sdim 1858309124Sdim // Load high part. 1859360784Sdim LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai 1860360784Sdim : Hexagon::V6_vL32Ub_ai; 1861309124Sdim BuildMI(B, It, DL, HII.get(LoadOpc), DstHi) 1862344779Sdim .addFrameIndex(FI) 1863344779Sdim .addImm(Size) 1864344779Sdim .cloneMemRefs(*MI); 1865309124Sdim 1866309124Sdim B.erase(It); 1867309124Sdim return true; 1868309124Sdim} 1869309124Sdim 1870309124Sdimbool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B, 1871309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1872309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1873309124Sdim MachineFunction &MF = *B.getParent(); 1874314564Sdim auto &MFI = MF.getFrameInfo(); 1875309124Sdim MachineInstr *MI = &*It; 1876314564Sdim if (!MI->getOperand(0).isFI()) 1877314564Sdim return false; 1878314564Sdim 1879360784Sdim bool NeedsAligna = needsAligna(MF); 1880327952Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1881309124Sdim DebugLoc DL = MI->getDebugLoc(); 1882360784Sdim Register SrcR = MI->getOperand(2).getReg(); 1883309124Sdim bool IsKill = MI->getOperand(2).isKill(); 1884309124Sdim int FI = MI->getOperand(0).getIndex(); 1885309124Sdim 1886327952Sdim unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); 1887309124Sdim unsigned HasAlign = MFI.getObjectAlignment(FI); 1888360784Sdim bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign); 1889360784Sdim unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai 1890360784Sdim : Hexagon::V6_vS32Ub_ai; 1891309124Sdim BuildMI(B, It, DL, HII.get(StoreOpc)) 1892344779Sdim .addFrameIndex(FI) 1893344779Sdim .addImm(0) 1894344779Sdim .addReg(SrcR, getKillRegState(IsKill)) 1895344779Sdim .cloneMemRefs(*MI); 1896309124Sdim 1897309124Sdim B.erase(It); 1898309124Sdim return true; 1899309124Sdim} 1900309124Sdim 1901309124Sdimbool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B, 1902309124Sdim MachineBasicBlock::iterator It, MachineRegisterInfo &MRI, 1903309124Sdim const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const { 1904309124Sdim MachineFunction &MF = *B.getParent(); 1905314564Sdim auto &MFI = MF.getFrameInfo(); 1906309124Sdim MachineInstr *MI = &*It; 1907314564Sdim if (!MI->getOperand(1).isFI()) 1908314564Sdim return false; 1909314564Sdim 1910360784Sdim bool NeedsAligna = needsAligna(MF); 1911327952Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1912309124Sdim DebugLoc DL = MI->getDebugLoc(); 1913360784Sdim Register DstR = MI->getOperand(0).getReg(); 1914309124Sdim int FI = MI->getOperand(1).getIndex(); 1915309124Sdim 1916327952Sdim unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); 1917309124Sdim unsigned HasAlign = MFI.getObjectAlignment(FI); 1918360784Sdim bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign); 1919360784Sdim unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai 1920360784Sdim : Hexagon::V6_vL32Ub_ai; 1921309124Sdim BuildMI(B, It, DL, HII.get(LoadOpc), DstR) 1922344779Sdim .addFrameIndex(FI) 1923344779Sdim .addImm(0) 1924344779Sdim .cloneMemRefs(*MI); 1925309124Sdim 1926309124Sdim B.erase(It); 1927309124Sdim return true; 1928309124Sdim} 1929309124Sdim 1930309124Sdimbool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, 1931309124Sdim SmallVectorImpl<unsigned> &NewRegs) const { 1932327952Sdim auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 1933309124Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1934309124Sdim bool Changed = false; 1935309124Sdim 1936309124Sdim for (auto &B : MF) { 1937309124Sdim // Traverse the basic block. 1938309124Sdim MachineBasicBlock::iterator NextI; 1939309124Sdim for (auto I = B.begin(), E = B.end(); I != E; I = NextI) { 1940309124Sdim MachineInstr *MI = &*I; 1941309124Sdim NextI = std::next(I); 1942309124Sdim unsigned Opc = MI->getOpcode(); 1943309124Sdim 1944309124Sdim switch (Opc) { 1945309124Sdim case TargetOpcode::COPY: 1946309124Sdim Changed |= expandCopy(B, I, MRI, HII, NewRegs); 1947309124Sdim break; 1948309124Sdim case Hexagon::STriw_pred: 1949341825Sdim case Hexagon::STriw_ctr: 1950309124Sdim Changed |= expandStoreInt(B, I, MRI, HII, NewRegs); 1951309124Sdim break; 1952309124Sdim case Hexagon::LDriw_pred: 1953341825Sdim case Hexagon::LDriw_ctr: 1954309124Sdim Changed |= expandLoadInt(B, I, MRI, HII, NewRegs); 1955309124Sdim break; 1956314564Sdim case Hexagon::PS_vstorerq_ai: 1957309124Sdim Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs); 1958309124Sdim break; 1959314564Sdim case Hexagon::PS_vloadrq_ai: 1960309124Sdim Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs); 1961309124Sdim break; 1962314564Sdim case Hexagon::PS_vloadrw_ai: 1963309124Sdim Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs); 1964309124Sdim break; 1965314564Sdim case Hexagon::PS_vstorerw_ai: 1966309124Sdim Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs); 1967309124Sdim break; 1968309124Sdim } 1969309124Sdim } 1970309124Sdim } 1971309124Sdim 1972309124Sdim return Changed; 1973309124Sdim} 1974309124Sdim 1975309124Sdimvoid HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, 1976309124Sdim BitVector &SavedRegs, 1977309124Sdim RegScavenger *RS) const { 1978327952Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 1979309124Sdim 1980309124Sdim SavedRegs.resize(HRI.getNumRegs()); 1981309124Sdim 1982309124Sdim // If we have a function containing __builtin_eh_return we want to spill and 1983309124Sdim // restore all callee saved registers. Pretend that they are used. 1984309124Sdim if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) 1985309124Sdim for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R) 1986309124Sdim SavedRegs.set(*R); 1987309124Sdim 1988309124Sdim // Replace predicate register pseudo spill code. 1989309124Sdim SmallVector<unsigned,8> NewRegs; 1990309124Sdim expandSpillMacros(MF, NewRegs); 1991309124Sdim if (OptimizeSpillSlots && !isOptNone(MF)) 1992309124Sdim optimizeSpillSlots(MF, NewRegs); 1993309124Sdim 1994341825Sdim // We need to reserve a spill slot if scavenging could potentially require 1995309124Sdim // spilling a scavenged register. 1996314564Sdim if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) { 1997314564Sdim MachineFrameInfo &MFI = MF.getFrameInfo(); 1998309124Sdim MachineRegisterInfo &MRI = MF.getRegInfo(); 1999309124Sdim SetVector<const TargetRegisterClass*> SpillRCs; 2000309124Sdim // Reserve an int register in any case, because it could be used to hold 2001309124Sdim // the stack offset in case it does not fit into a spill instruction. 2002309124Sdim SpillRCs.insert(&Hexagon::IntRegsRegClass); 2003309124Sdim 2004309124Sdim for (unsigned VR : NewRegs) 2005309124Sdim SpillRCs.insert(MRI.getRegClass(VR)); 2006309124Sdim 2007309124Sdim for (auto *RC : SpillRCs) { 2008309124Sdim if (!needToReserveScavengingSpillSlots(MF, HRI, RC)) 2009309124Sdim continue; 2010360784Sdim unsigned Num = 1; 2011360784Sdim switch (RC->getID()) { 2012360784Sdim case Hexagon::IntRegsRegClassID: 2013360784Sdim Num = NumberScavengerSlots; 2014360784Sdim break; 2015360784Sdim case Hexagon::HvxQRRegClassID: 2016360784Sdim Num = 2; // Vector predicate spills also need a vector register. 2017360784Sdim break; 2018360784Sdim } 2019321369Sdim unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC); 2020309124Sdim for (unsigned i = 0; i < Num; i++) { 2021309124Sdim int NewFI = MFI.CreateSpillStackObject(S, A); 2022309124Sdim RS->addScavengingFrameIndex(NewFI); 2023309124Sdim } 2024309124Sdim } 2025309124Sdim } 2026309124Sdim 2027309124Sdim TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 2028309124Sdim} 2029309124Sdim 2030309124Sdimunsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF, 2031309124Sdim HexagonBlockRanges::IndexRange &FIR, 2032309124Sdim HexagonBlockRanges::InstrIndexMap &IndexMap, 2033309124Sdim HexagonBlockRanges::RegToRangeMap &DeadMap, 2034309124Sdim const TargetRegisterClass *RC) const { 2035309124Sdim auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 2036309124Sdim auto &MRI = MF.getRegInfo(); 2037309124Sdim 2038309124Sdim auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool { 2039309124Sdim auto F = DeadMap.find({Reg,0}); 2040309124Sdim if (F == DeadMap.end()) 2041309124Sdim return false; 2042309124Sdim for (auto &DR : F->second) 2043309124Sdim if (DR.contains(FIR)) 2044309124Sdim return true; 2045309124Sdim return false; 2046309124Sdim }; 2047309124Sdim 2048309124Sdim for (unsigned Reg : RC->getRawAllocationOrder(MF)) { 2049309124Sdim bool Dead = true; 2050309124Sdim for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) { 2051309124Sdim if (isDead(R.Reg)) 2052309124Sdim continue; 2053309124Sdim Dead = false; 2054309124Sdim break; 2055309124Sdim } 2056309124Sdim if (Dead) 2057309124Sdim return Reg; 2058309124Sdim } 2059309124Sdim return 0; 2060309124Sdim} 2061309124Sdim 2062309124Sdimvoid HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, 2063309124Sdim SmallVectorImpl<unsigned> &VRegs) const { 2064309124Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2065309124Sdim auto &HII = *HST.getInstrInfo(); 2066309124Sdim auto &HRI = *HST.getRegisterInfo(); 2067309124Sdim auto &MRI = MF.getRegInfo(); 2068309124Sdim HexagonBlockRanges HBR(MF); 2069309124Sdim 2070327952Sdim using BlockIndexMap = 2071327952Sdim std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>; 2072327952Sdim using BlockRangeMap = 2073327952Sdim std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>; 2074327952Sdim using IndexType = HexagonBlockRanges::IndexType; 2075309124Sdim 2076309124Sdim struct SlotInfo { 2077309124Sdim BlockRangeMap Map; 2078314564Sdim unsigned Size = 0; 2079314564Sdim const TargetRegisterClass *RC = nullptr; 2080309124Sdim 2081314564Sdim SlotInfo() = default; 2082309124Sdim }; 2083309124Sdim 2084309124Sdim BlockIndexMap BlockIndexes; 2085309124Sdim SmallSet<int,4> BadFIs; 2086309124Sdim std::map<int,SlotInfo> FIRangeMap; 2087309124Sdim 2088309124Sdim // Accumulate register classes: get a common class for a pre-existing 2089309124Sdim // class HaveRC and a new class NewRC. Return nullptr if a common class 2090309124Sdim // cannot be found, otherwise return the resulting class. If HaveRC is 2091309124Sdim // nullptr, assume that it is still unset. 2092321369Sdim auto getCommonRC = 2093321369Sdim [](const TargetRegisterClass *HaveRC, 2094321369Sdim const TargetRegisterClass *NewRC) -> const TargetRegisterClass * { 2095309124Sdim if (HaveRC == nullptr || HaveRC == NewRC) 2096309124Sdim return NewRC; 2097309124Sdim // Different classes, both non-null. Pick the more general one. 2098309124Sdim if (HaveRC->hasSubClassEq(NewRC)) 2099309124Sdim return HaveRC; 2100309124Sdim if (NewRC->hasSubClassEq(HaveRC)) 2101309124Sdim return NewRC; 2102309124Sdim return nullptr; 2103309124Sdim }; 2104309124Sdim 2105309124Sdim // Scan all blocks in the function. Check all occurrences of frame indexes, 2106309124Sdim // and collect relevant information. 2107309124Sdim for (auto &B : MF) { 2108309124Sdim std::map<int,IndexType> LastStore, LastLoad; 2109309124Sdim // Emplace appears not to be supported in gcc 4.7.2-4. 2110309124Sdim //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B)); 2111309124Sdim auto P = BlockIndexes.insert( 2112309124Sdim std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B))); 2113309124Sdim auto &IndexMap = P.first->second; 2114341825Sdim LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n" 2115341825Sdim << IndexMap << '\n'); 2116309124Sdim 2117309124Sdim for (auto &In : B) { 2118309124Sdim int LFI, SFI; 2119309124Sdim bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In); 2120309124Sdim bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In); 2121309124Sdim if (Load && Store) { 2122309124Sdim // If it's both a load and a store, then we won't handle it. 2123309124Sdim BadFIs.insert(LFI); 2124309124Sdim BadFIs.insert(SFI); 2125309124Sdim continue; 2126309124Sdim } 2127309124Sdim // Check for register classes of the register used as the source for 2128309124Sdim // the store, and the register used as the destination for the load. 2129309124Sdim // Also, only accept base+imm_offset addressing modes. Other addressing 2130309124Sdim // modes can have side-effects (post-increments, etc.). For stack 2131309124Sdim // slots they are very unlikely, so there is not much loss due to 2132309124Sdim // this restriction. 2133309124Sdim if (Load || Store) { 2134309124Sdim int TFI = Load ? LFI : SFI; 2135314564Sdim unsigned AM = HII.getAddrMode(In); 2136309124Sdim SlotInfo &SI = FIRangeMap[TFI]; 2137309124Sdim bool Bad = (AM != HexagonII::BaseImmOffset); 2138309124Sdim if (!Bad) { 2139309124Sdim // If the addressing mode is ok, check the register class. 2140314564Sdim unsigned OpNum = Load ? 0 : 2; 2141314564Sdim auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF); 2142309124Sdim RC = getCommonRC(SI.RC, RC); 2143309124Sdim if (RC == nullptr) 2144309124Sdim Bad = true; 2145309124Sdim else 2146309124Sdim SI.RC = RC; 2147309124Sdim } 2148309124Sdim if (!Bad) { 2149309124Sdim // Check sizes. 2150327952Sdim unsigned S = HII.getMemAccessSize(In); 2151309124Sdim if (SI.Size != 0 && SI.Size != S) 2152309124Sdim Bad = true; 2153309124Sdim else 2154309124Sdim SI.Size = S; 2155309124Sdim } 2156314564Sdim if (!Bad) { 2157314564Sdim for (auto *Mo : In.memoperands()) { 2158353358Sdim if (!Mo->isVolatile() && !Mo->isAtomic()) 2159314564Sdim continue; 2160314564Sdim Bad = true; 2161314564Sdim break; 2162314564Sdim } 2163314564Sdim } 2164309124Sdim if (Bad) 2165309124Sdim BadFIs.insert(TFI); 2166309124Sdim } 2167309124Sdim 2168309124Sdim // Locate uses of frame indices. 2169309124Sdim for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) { 2170309124Sdim const MachineOperand &Op = In.getOperand(i); 2171309124Sdim if (!Op.isFI()) 2172309124Sdim continue; 2173309124Sdim int FI = Op.getIndex(); 2174309124Sdim // Make sure that the following operand is an immediate and that 2175309124Sdim // it is 0. This is the offset in the stack object. 2176309124Sdim if (i+1 >= n || !In.getOperand(i+1).isImm() || 2177309124Sdim In.getOperand(i+1).getImm() != 0) 2178309124Sdim BadFIs.insert(FI); 2179309124Sdim if (BadFIs.count(FI)) 2180309124Sdim continue; 2181309124Sdim 2182309124Sdim IndexType Index = IndexMap.getIndex(&In); 2183309124Sdim if (Load) { 2184309124Sdim if (LastStore[FI] == IndexType::None) 2185309124Sdim LastStore[FI] = IndexType::Entry; 2186309124Sdim LastLoad[FI] = Index; 2187309124Sdim } else if (Store) { 2188309124Sdim HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; 2189309124Sdim if (LastStore[FI] != IndexType::None) 2190309124Sdim RL.add(LastStore[FI], LastLoad[FI], false, false); 2191309124Sdim else if (LastLoad[FI] != IndexType::None) 2192309124Sdim RL.add(IndexType::Entry, LastLoad[FI], false, false); 2193309124Sdim LastLoad[FI] = IndexType::None; 2194309124Sdim LastStore[FI] = Index; 2195309124Sdim } else { 2196309124Sdim BadFIs.insert(FI); 2197309124Sdim } 2198309124Sdim } 2199309124Sdim } 2200309124Sdim 2201309124Sdim for (auto &I : LastLoad) { 2202309124Sdim IndexType LL = I.second; 2203309124Sdim if (LL == IndexType::None) 2204309124Sdim continue; 2205309124Sdim auto &RL = FIRangeMap[I.first].Map[&B]; 2206309124Sdim IndexType &LS = LastStore[I.first]; 2207309124Sdim if (LS != IndexType::None) 2208309124Sdim RL.add(LS, LL, false, false); 2209309124Sdim else 2210309124Sdim RL.add(IndexType::Entry, LL, false, false); 2211309124Sdim LS = IndexType::None; 2212309124Sdim } 2213309124Sdim for (auto &I : LastStore) { 2214309124Sdim IndexType LS = I.second; 2215309124Sdim if (LS == IndexType::None) 2216309124Sdim continue; 2217309124Sdim auto &RL = FIRangeMap[I.first].Map[&B]; 2218309124Sdim RL.add(LS, IndexType::None, false, false); 2219309124Sdim } 2220309124Sdim } 2221309124Sdim 2222341825Sdim LLVM_DEBUG({ 2223309124Sdim for (auto &P : FIRangeMap) { 2224309124Sdim dbgs() << "fi#" << P.first; 2225309124Sdim if (BadFIs.count(P.first)) 2226309124Sdim dbgs() << " (bad)"; 2227309124Sdim dbgs() << " RC: "; 2228309124Sdim if (P.second.RC != nullptr) 2229309124Sdim dbgs() << HRI.getRegClassName(P.second.RC) << '\n'; 2230309124Sdim else 2231309124Sdim dbgs() << "<null>\n"; 2232309124Sdim for (auto &R : P.second.Map) 2233327952Sdim dbgs() << " " << printMBBReference(*R.first) << " { " << R.second 2234327952Sdim << "}\n"; 2235309124Sdim } 2236309124Sdim }); 2237309124Sdim 2238309124Sdim // When a slot is loaded from in a block without being stored to in the 2239309124Sdim // same block, it is live-on-entry to this block. To avoid CFG analysis, 2240309124Sdim // consider this slot to be live-on-exit from all blocks. 2241309124Sdim SmallSet<int,4> LoxFIs; 2242309124Sdim 2243309124Sdim std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap; 2244309124Sdim 2245309124Sdim for (auto &P : FIRangeMap) { 2246309124Sdim // P = pair(FI, map: BB->RangeList) 2247309124Sdim if (BadFIs.count(P.first)) 2248309124Sdim continue; 2249309124Sdim for (auto &B : MF) { 2250309124Sdim auto F = P.second.Map.find(&B); 2251309124Sdim // F = pair(BB, RangeList) 2252309124Sdim if (F == P.second.Map.end() || F->second.empty()) 2253309124Sdim continue; 2254309124Sdim HexagonBlockRanges::IndexRange &IR = F->second.front(); 2255309124Sdim if (IR.start() == IndexType::Entry) 2256309124Sdim LoxFIs.insert(P.first); 2257309124Sdim BlockFIMap[&B].push_back(P.first); 2258309124Sdim } 2259309124Sdim } 2260309124Sdim 2261341825Sdim LLVM_DEBUG({ 2262309124Sdim dbgs() << "Block-to-FI map (* -- live-on-exit):\n"; 2263309124Sdim for (auto &P : BlockFIMap) { 2264309124Sdim auto &FIs = P.second; 2265309124Sdim if (FIs.empty()) 2266309124Sdim continue; 2267327952Sdim dbgs() << " " << printMBBReference(*P.first) << ": {"; 2268309124Sdim for (auto I : FIs) { 2269309124Sdim dbgs() << " fi#" << I; 2270309124Sdim if (LoxFIs.count(I)) 2271309124Sdim dbgs() << '*'; 2272309124Sdim } 2273309124Sdim dbgs() << " }\n"; 2274309124Sdim } 2275309124Sdim }); 2276309124Sdim 2277314564Sdim#ifndef NDEBUG 2278314564Sdim bool HasOptLimit = SpillOptMax.getPosition(); 2279314564Sdim#endif 2280314564Sdim 2281309124Sdim // eliminate loads, when all loads eliminated, eliminate all stores. 2282309124Sdim for (auto &B : MF) { 2283309124Sdim auto F = BlockIndexes.find(&B); 2284309124Sdim assert(F != BlockIndexes.end()); 2285309124Sdim HexagonBlockRanges::InstrIndexMap &IM = F->second; 2286309124Sdim HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); 2287309124Sdim HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); 2288341825Sdim LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n" 2289341825Sdim << HexagonBlockRanges::PrintRangeMap(DM, HRI)); 2290309124Sdim 2291309124Sdim for (auto FI : BlockFIMap[&B]) { 2292309124Sdim if (BadFIs.count(FI)) 2293309124Sdim continue; 2294341825Sdim LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n'); 2295309124Sdim HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; 2296309124Sdim for (auto &Range : RL) { 2297341825Sdim LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n'); 2298309124Sdim if (!IndexType::isInstr(Range.start()) || 2299309124Sdim !IndexType::isInstr(Range.end())) 2300309124Sdim continue; 2301314564Sdim MachineInstr &SI = *IM.getInstr(Range.start()); 2302314564Sdim MachineInstr &EI = *IM.getInstr(Range.end()); 2303314564Sdim assert(SI.mayStore() && "Unexpected start instruction"); 2304314564Sdim assert(EI.mayLoad() && "Unexpected end instruction"); 2305314564Sdim MachineOperand &SrcOp = SI.getOperand(2); 2306309124Sdim 2307309124Sdim HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(), 2308309124Sdim SrcOp.getSubReg() }; 2309314564Sdim auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF); 2310309124Sdim // The this-> is needed to unconfuse MSVC. 2311309124Sdim unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); 2312341825Sdim LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) 2313341825Sdim << '\n'); 2314309124Sdim if (FoundR == 0) 2315309124Sdim continue; 2316314564Sdim#ifndef NDEBUG 2317314564Sdim if (HasOptLimit) { 2318314564Sdim if (SpillOptCount >= SpillOptMax) 2319314564Sdim return; 2320314564Sdim SpillOptCount++; 2321314564Sdim } 2322314564Sdim#endif 2323309124Sdim 2324309124Sdim // Generate the copy-in: "FoundR = COPY SrcR" at the store location. 2325314564Sdim MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt; 2326309124Sdim MachineInstr *CopyIn = nullptr; 2327309124Sdim if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) { 2328314564Sdim const DebugLoc &DL = SI.getDebugLoc(); 2329309124Sdim CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR) 2330321369Sdim .add(SrcOp); 2331309124Sdim } 2332309124Sdim 2333309124Sdim ++StartIt; 2334309124Sdim // Check if this is a last store and the FI is live-on-exit. 2335309124Sdim if (LoxFIs.count(FI) && (&Range == &RL.back())) { 2336309124Sdim // Update store's source register. 2337309124Sdim if (unsigned SR = SrcOp.getSubReg()) 2338309124Sdim SrcOp.setReg(HRI.getSubReg(FoundR, SR)); 2339309124Sdim else 2340309124Sdim SrcOp.setReg(FoundR); 2341309124Sdim SrcOp.setSubReg(0); 2342309124Sdim // We are keeping this register live. 2343309124Sdim SrcOp.setIsKill(false); 2344309124Sdim } else { 2345314564Sdim B.erase(&SI); 2346314564Sdim IM.replaceInstr(&SI, CopyIn); 2347309124Sdim } 2348309124Sdim 2349314564Sdim auto EndIt = std::next(EI.getIterator()); 2350309124Sdim for (auto It = StartIt; It != EndIt; It = NextIt) { 2351314564Sdim MachineInstr &MI = *It; 2352309124Sdim NextIt = std::next(It); 2353309124Sdim int TFI; 2354314564Sdim if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI) 2355309124Sdim continue; 2356360784Sdim Register DstR = MI.getOperand(0).getReg(); 2357314564Sdim assert(MI.getOperand(0).getSubReg() == 0); 2358309124Sdim MachineInstr *CopyOut = nullptr; 2359309124Sdim if (DstR != FoundR) { 2360314564Sdim DebugLoc DL = MI.getDebugLoc(); 2361327952Sdim unsigned MemSize = HII.getMemAccessSize(MI); 2362309124Sdim assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset); 2363309124Sdim unsigned CopyOpc = TargetOpcode::COPY; 2364314564Sdim if (HII.isSignExtendingLoad(MI)) 2365309124Sdim CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth; 2366314564Sdim else if (HII.isZeroExtendingLoad(MI)) 2367309124Sdim CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth; 2368309124Sdim CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR) 2369314564Sdim .addReg(FoundR, getKillRegState(&MI == &EI)); 2370309124Sdim } 2371314564Sdim IM.replaceInstr(&MI, CopyOut); 2372309124Sdim B.erase(It); 2373309124Sdim } 2374309124Sdim 2375309124Sdim // Update the dead map. 2376309124Sdim HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 }; 2377309124Sdim for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI)) 2378309124Sdim DM[RR].subtract(Range); 2379309124Sdim } // for Range in range list 2380309124Sdim } 2381309124Sdim } 2382309124Sdim} 2383309124Sdim 2384288943Sdimvoid HexagonFrameLowering::expandAlloca(MachineInstr *AI, 2385288943Sdim const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const { 2386288943Sdim MachineBasicBlock &MB = *AI->getParent(); 2387288943Sdim DebugLoc DL = AI->getDebugLoc(); 2388288943Sdim unsigned A = AI->getOperand(2).getImm(); 2389288943Sdim 2390288943Sdim // Have 2391288943Sdim // Rd = alloca Rs, #A 2392288943Sdim // 2393288943Sdim // If Rs and Rd are different registers, use this sequence: 2394288943Sdim // Rd = sub(r29, Rs) 2395288943Sdim // r29 = sub(r29, Rs) 2396288943Sdim // Rd = and(Rd, #-A) ; if necessary 2397288943Sdim // r29 = and(r29, #-A) ; if necessary 2398288943Sdim // Rd = add(Rd, #CF) ; CF size aligned to at most A 2399288943Sdim // otherwise, do 2400288943Sdim // Rd = sub(r29, Rs) 2401288943Sdim // Rd = and(Rd, #-A) ; if necessary 2402288943Sdim // r29 = Rd 2403288943Sdim // Rd = add(Rd, #CF) ; CF size aligned to at most A 2404288943Sdim 2405288943Sdim MachineOperand &RdOp = AI->getOperand(0); 2406288943Sdim MachineOperand &RsOp = AI->getOperand(1); 2407288943Sdim unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg(); 2408288943Sdim 2409288943Sdim // Rd = sub(r29, Rs) 2410288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd) 2411288943Sdim .addReg(SP) 2412288943Sdim .addReg(Rs); 2413288943Sdim if (Rs != Rd) { 2414288943Sdim // r29 = sub(r29, Rs) 2415288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP) 2416288943Sdim .addReg(SP) 2417288943Sdim .addReg(Rs); 2418249423Sdim } 2419288943Sdim if (A > 8) { 2420288943Sdim // Rd = and(Rd, #-A) 2421288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd) 2422288943Sdim .addReg(Rd) 2423288943Sdim .addImm(-int64_t(A)); 2424288943Sdim if (Rs != Rd) 2425288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP) 2426288943Sdim .addReg(SP) 2427288943Sdim .addImm(-int64_t(A)); 2428288943Sdim } 2429288943Sdim if (Rs == Rd) { 2430288943Sdim // r29 = Rd 2431288943Sdim BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP) 2432288943Sdim .addReg(Rd); 2433288943Sdim } 2434288943Sdim if (CF > 0) { 2435288943Sdim // Rd = add(Rd, #CF) 2436288943Sdim BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd) 2437288943Sdim .addReg(Rd) 2438288943Sdim .addImm(CF); 2439288943Sdim } 2440249423Sdim} 2441249423Sdim 2442288943Sdimbool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const { 2443314564Sdim const MachineFrameInfo &MFI = MF.getFrameInfo(); 2444314564Sdim if (!MFI.hasVarSizedObjects()) 2445288943Sdim return false; 2446360784Sdim // Do not check for max stack object alignment here, because the stack 2447360784Sdim // may not be complete yet. Assume that we will need PS_aligna if there 2448360784Sdim // are variable-sized objects. 2449288943Sdim return true; 2450234285Sdim} 2451288943Sdim 2452296417Sdimconst MachineInstr *HexagonFrameLowering::getAlignaInstr( 2453296417Sdim const MachineFunction &MF) const { 2454288943Sdim for (auto &B : MF) 2455288943Sdim for (auto &I : B) 2456314564Sdim if (I.getOpcode() == Hexagon::PS_aligna) 2457288943Sdim return &I; 2458288943Sdim return nullptr; 2459288943Sdim} 2460288943Sdim 2461309124Sdim/// Adds all callee-saved registers as implicit uses or defs to the 2462309124Sdim/// instruction. 2463309124Sdimvoid HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, 2464309124Sdim const CSIVect &CSI, bool IsDef, bool IsKill) const { 2465309124Sdim // Add the callee-saved registers as implicit uses. 2466309124Sdim for (auto &R : CSI) 2467309124Sdim MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill)); 2468288943Sdim} 2469288943Sdim 2470288943Sdim/// Determine whether the callee-saved register saves and restores should 2471288943Sdim/// be generated via inline code. If this function returns "true", inline 2472288943Sdim/// code will be generated. If this function returns "false", additional 2473288943Sdim/// checks are performed, which may still lead to the inline code. 2474321369Sdimbool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, 2475288943Sdim const CSIVect &CSI) const { 2476288943Sdim if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) 2477288943Sdim return true; 2478321369Sdim if (!hasFP(MF)) 2479321369Sdim return true; 2480288943Sdim if (!isOptSize(MF) && !isMinSize(MF)) 2481288943Sdim if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) 2482288943Sdim return true; 2483288943Sdim 2484288943Sdim // Check if CSI only has double registers, and if the registers form 2485288943Sdim // a contiguous block starting from D8. 2486288943Sdim BitVector Regs(Hexagon::NUM_TARGET_REGS); 2487288943Sdim for (unsigned i = 0, n = CSI.size(); i < n; ++i) { 2488288943Sdim unsigned R = CSI[i].getReg(); 2489288943Sdim if (!Hexagon::DoubleRegsRegClass.contains(R)) 2490288943Sdim return true; 2491288943Sdim Regs[R] = true; 2492288943Sdim } 2493288943Sdim int F = Regs.find_first(); 2494288943Sdim if (F != Hexagon::D8) 2495288943Sdim return true; 2496288943Sdim while (F >= 0) { 2497288943Sdim int N = Regs.find_next(F); 2498288943Sdim if (N >= 0 && N != F+1) 2499288943Sdim return true; 2500288943Sdim F = N; 2501288943Sdim } 2502288943Sdim 2503288943Sdim return false; 2504288943Sdim} 2505288943Sdim 2506321369Sdimbool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF, 2507288943Sdim const CSIVect &CSI) const { 2508288943Sdim if (shouldInlineCSR(MF, CSI)) 2509288943Sdim return false; 2510288943Sdim unsigned NumCSI = CSI.size(); 2511288943Sdim if (NumCSI <= 1) 2512288943Sdim return false; 2513288943Sdim 2514288943Sdim unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs 2515288943Sdim : SpillFuncThreshold; 2516288943Sdim return Threshold < NumCSI; 2517288943Sdim} 2518288943Sdim 2519321369Sdimbool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF, 2520288943Sdim const CSIVect &CSI) const { 2521288943Sdim if (shouldInlineCSR(MF, CSI)) 2522288943Sdim return false; 2523309124Sdim // The restore functions do a bit more than just restoring registers. 2524309124Sdim // The non-returning versions will go back directly to the caller's 2525309124Sdim // caller, others will clean up the stack frame in preparation for 2526309124Sdim // a tail call. Using them can still save code size even if only one 2527309124Sdim // register is getting restores. Make the decision based on -Oz: 2528309124Sdim // using -Os will use inline restore for a single register. 2529309124Sdim if (isMinSize(MF)) 2530309124Sdim return true; 2531288943Sdim unsigned NumCSI = CSI.size(); 2532309124Sdim if (NumCSI <= 1) 2533309124Sdim return false; 2534309124Sdim 2535288943Sdim unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1 2536288943Sdim : SpillFuncThreshold; 2537288943Sdim return Threshold < NumCSI; 2538288943Sdim} 2539314564Sdim 2540314564Sdimbool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const { 2541314564Sdim unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF); 2542314564Sdim auto &HST = MF.getSubtarget<HexagonSubtarget>(); 2543314564Sdim // A fairly simplistic guess as to whether a potential load/store to a 2544321369Sdim // stack location could require an extra register. 2545321369Sdim if (HST.useHVXOps() && StackSize > 256) 2546321369Sdim return true; 2547321369Sdim 2548321369Sdim // Check if the function has store-immediate instructions that access 2549321369Sdim // the stack. Since the offset field is not extendable, if the stack 2550321369Sdim // size exceeds the offset limit (6 bits, shifted), the stores will 2551321369Sdim // require a new base register. 2552321369Sdim bool HasImmStack = false; 2553321369Sdim unsigned MinLS = ~0u; // Log_2 of the memory access size. 2554321369Sdim 2555321369Sdim for (const MachineBasicBlock &B : MF) { 2556321369Sdim for (const MachineInstr &MI : B) { 2557321369Sdim unsigned LS = 0; 2558321369Sdim switch (MI.getOpcode()) { 2559321369Sdim case Hexagon::S4_storeirit_io: 2560321369Sdim case Hexagon::S4_storeirif_io: 2561321369Sdim case Hexagon::S4_storeiri_io: 2562321369Sdim ++LS; 2563321369Sdim LLVM_FALLTHROUGH; 2564321369Sdim case Hexagon::S4_storeirht_io: 2565321369Sdim case Hexagon::S4_storeirhf_io: 2566321369Sdim case Hexagon::S4_storeirh_io: 2567321369Sdim ++LS; 2568321369Sdim LLVM_FALLTHROUGH; 2569321369Sdim case Hexagon::S4_storeirbt_io: 2570321369Sdim case Hexagon::S4_storeirbf_io: 2571321369Sdim case Hexagon::S4_storeirb_io: 2572321369Sdim if (MI.getOperand(0).isFI()) 2573321369Sdim HasImmStack = true; 2574321369Sdim MinLS = std::min(MinLS, LS); 2575321369Sdim break; 2576321369Sdim } 2577321369Sdim } 2578321369Sdim } 2579321369Sdim 2580321369Sdim if (HasImmStack) 2581321369Sdim return !isUInt<6>(StackSize >> MinLS); 2582321369Sdim 2583314564Sdim return false; 2584314564Sdim} 2585