1327952Sdim//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2234285Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6234285Sdim//
7234285Sdim//
8234285Sdim//===----------------------------------------------------------------------===//
9234285Sdim
10321369Sdim#include "HexagonFrameLowering.h"
11309124Sdim#include "HexagonBlockRanges.h"
12234285Sdim#include "HexagonInstrInfo.h"
13249423Sdim#include "HexagonMachineFunctionInfo.h"
14234285Sdim#include "HexagonRegisterInfo.h"
15234285Sdim#include "HexagonSubtarget.h"
16234285Sdim#include "HexagonTargetMachine.h"
17314564Sdim#include "MCTargetDesc/HexagonBaseInfo.h"
18234285Sdim#include "llvm/ADT/BitVector.h"
19314564Sdim#include "llvm/ADT/DenseMap.h"
20314564Sdim#include "llvm/ADT/None.h"
21314564Sdim#include "llvm/ADT/Optional.h"
22288943Sdim#include "llvm/ADT/PostOrderIterator.h"
23314564Sdim#include "llvm/ADT/SetVector.h"
24314564Sdim#include "llvm/ADT/SmallSet.h"
25314564Sdim#include "llvm/ADT/SmallVector.h"
26314564Sdim#include "llvm/CodeGen/LivePhysRegs.h"
27314564Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
28288943Sdim#include "llvm/CodeGen/MachineDominators.h"
29314564Sdim#include "llvm/CodeGen/MachineFrameInfo.h"
30234285Sdim#include "llvm/CodeGen/MachineFunction.h"
31234285Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
32314564Sdim#include "llvm/CodeGen/MachineInstr.h"
33249423Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
34314564Sdim#include "llvm/CodeGen/MachineMemOperand.h"
35234285Sdim#include "llvm/CodeGen/MachineModuleInfo.h"
36314564Sdim#include "llvm/CodeGen/MachineOperand.h"
37288943Sdim#include "llvm/CodeGen/MachinePostDominators.h"
38234285Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
39360784Sdim#include "llvm/CodeGen/PseudoSourceValue.h"
40234285Sdim#include "llvm/CodeGen/RegisterScavenging.h"
41327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
42327952Sdim#include "llvm/IR/Attributes.h"
43314564Sdim#include "llvm/IR/DebugLoc.h"
44249423Sdim#include "llvm/IR/Function.h"
45314564Sdim#include "llvm/MC/MCDwarf.h"
46314564Sdim#include "llvm/MC/MCRegisterInfo.h"
47314564Sdim#include "llvm/Pass.h"
48314564Sdim#include "llvm/Support/CodeGen.h"
49249423Sdim#include "llvm/Support/CommandLine.h"
50327952Sdim#include "llvm/Support/Compiler.h"
51288943Sdim#include "llvm/Support/Debug.h"
52314564Sdim#include "llvm/Support/ErrorHandling.h"
53314564Sdim#include "llvm/Support/MathExtras.h"
54288943Sdim#include "llvm/Support/raw_ostream.h"
55234285Sdim#include "llvm/Target/TargetMachine.h"
56327952Sdim#include "llvm/Target/TargetOptions.h"
57314564Sdim#include <algorithm>
58314564Sdim#include <cassert>
59314564Sdim#include <cstdint>
60314564Sdim#include <iterator>
61314564Sdim#include <limits>
62314564Sdim#include <map>
63314564Sdim#include <utility>
64314564Sdim#include <vector>
65234285Sdim
66321369Sdim#define DEBUG_TYPE "hexagon-pei"
67321369Sdim
68288943Sdim// Hexagon stack frame layout as defined by the ABI:
69288943Sdim//
70288943Sdim//                                                       Incoming arguments
71288943Sdim//                                                       passed via stack
72288943Sdim//                                                                      |
73288943Sdim//                                                                      |
74288943Sdim//        SP during function's                 FP during function's     |
75288943Sdim//    +-- runtime (top of stack)               runtime (bottom) --+     |
76288943Sdim//    |                                                           |     |
77288943Sdim// --++---------------------+------------------+-----------------++-+-------
78288943Sdim//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
79288943Sdim//   |   called functions   |  local objects   |  local objects  |FP|
80288943Sdim// --+----------------------+------------------+-----------------+--+-------
81288943Sdim//    <-    size known    -> <- size unknown -> <- size known  ->
82288943Sdim//
83288943Sdim// Low address                                                 High address
84288943Sdim//
85288943Sdim// <--- stack growth
86288943Sdim//
87288943Sdim//
88288943Sdim// - In any circumstances, the outgoing function arguments are always accessi-
89288943Sdim//   ble using the SP, and the incoming arguments are accessible using the FP.
90288943Sdim// - If the local objects are not aligned, they can always be accessed using
91288943Sdim//   the FP.
92288943Sdim// - If there are no variable-sized objects, the local objects can always be
93288943Sdim//   accessed using the SP, regardless whether they are aligned or not. (The
94288943Sdim//   alignment padding will be at the bottom of the stack (highest address),
95288943Sdim//   and so the offset with respect to the SP will be known at the compile-
96288943Sdim//   -time.)
97288943Sdim//
98288943Sdim// The only complication occurs if there are both, local aligned objects, and
99288943Sdim// dynamically allocated (variable-sized) objects. The alignment pad will be
100288943Sdim// placed between the FP and the local objects, thus preventing the use of the
101288943Sdim// FP to access the local objects. At the same time, the variable-sized objects
102288943Sdim// will be between the SP and the local objects, thus introducing an unknown
103288943Sdim// distance from the SP to the locals.
104288943Sdim//
105288943Sdim// To avoid this problem, a new register is created that holds the aligned
106288943Sdim// address of the bottom of the stack, referred in the sources as AP (aligned
107288943Sdim// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
108288943Sdim// that aligns AP to the required boundary (a maximum of the alignments of
109288943Sdim// all stack objects, fixed- and variable-sized). All local objects[1] will
110288943Sdim// then use AP as the base pointer.
111288943Sdim// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
112288943Sdim// their name from being allocated at fixed locations on the stack, relative
113288943Sdim// to the FP. In the presence of dynamic allocation and local alignment, such
114288943Sdim// objects can only be accessed through the FP.
115288943Sdim//
116288943Sdim// Illustration of the AP:
117288943Sdim//                                                                FP --+
118288943Sdim//                                                                     |
119288943Sdim// ---------------+---------------------+-----+-----------------------++-+--
120288943Sdim//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
121288943Sdim//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
122288943Sdim// ---------------+---------------------+-----+-----------------+-----+--+--
123288943Sdim//                                      |<-- Multiple of the -->|
124288943Sdim//                                           stack alignment    +-- AP
125288943Sdim//
126288943Sdim// The AP is set up at the beginning of the function. Since it is not a dedi-
127288943Sdim// cated (reserved) register, it needs to be kept live throughout the function
128288943Sdim// to be available as the base register for local object accesses.
129288943Sdim// Normally, an address of a stack objects is obtained by a pseudo-instruction
130314564Sdim// PS_fi. To access local objects with the AP register present, a different
131314564Sdim// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
132314564Sdim// argument compared to PS_fi: the first input register is the AP register.
133288943Sdim// This keeps the register live between its definition and its uses.
134288943Sdim
135314564Sdim// The AP register is originally set up using pseudo-instruction PS_aligna:
136314564Sdim//   AP = PS_aligna A
137288943Sdim// where
138288943Sdim//   A  - required stack alignment
139288943Sdim// The alignment value must be the maximum of all alignments required by
140288943Sdim// any stack object.
141288943Sdim
142314564Sdim// The dynamic allocation uses a pseudo-instruction PS_alloca:
143314564Sdim//   Rd = PS_alloca Rs, A
144288943Sdim// where
145288943Sdim//   Rd - address of the allocated space
146288943Sdim//   Rs - minimum size (the actual allocated can be larger to accommodate
147288943Sdim//        alignment)
148288943Sdim//   A  - required alignment
149288943Sdim
150234285Sdimusing namespace llvm;
151234285Sdim
152288943Sdimstatic cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
153288943Sdim    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
154234285Sdim
155309124Sdimstatic cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
156288943Sdim    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
157288943Sdim    cl::ZeroOrMore);
158234285Sdim
159288943Sdimstatic cl::opt<int> SpillFuncThreshold("spill-func-threshold",
160288943Sdim    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
161288943Sdim    cl::init(6), cl::ZeroOrMore);
162234285Sdim
163288943Sdimstatic cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
164288943Sdim    cl::Hidden, cl::desc("Specify Os spill func threshold"),
165288943Sdim    cl::init(1), cl::ZeroOrMore);
166234285Sdim
167309124Sdimstatic cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
168309124Sdim    cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
169309124Sdim    cl::init(false), cl::ZeroOrMore);
170309124Sdim
171288943Sdimstatic cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
172288943Sdim    cl::init(true), cl::Hidden, cl::ZeroOrMore,
173288943Sdim    cl::desc("Enable stack frame shrink wrapping"));
174234285Sdim
175314564Sdimstatic cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
176314564Sdim    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
177314564Sdim    cl::desc("Max count of stack frame shrink-wraps"));
178234285Sdim
179314564Sdimstatic cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
180314564Sdim    cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
181314564Sdim    cl::init(false), cl::ZeroOrMore);
182314564Sdim
183321369Sdimstatic cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
184321369Sdim    cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
185296417Sdim
186309124Sdimstatic cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
187309124Sdim    cl::init(true), cl::desc("Optimize spill slots"));
188296417Sdim
189314564Sdim#ifndef NDEBUG
190314564Sdimstatic cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
191314564Sdim    cl::init(std::numeric_limits<unsigned>::max()));
192314564Sdimstatic unsigned SpillOptCount = 0;
193314564Sdim#endif
194309124Sdim
195296417Sdimnamespace llvm {
196314564Sdim
197296417Sdim  void initializeHexagonCallFrameInformationPass(PassRegistry&);
198296417Sdim  FunctionPass *createHexagonCallFrameInformation();
199296417Sdim
200314564Sdim} // end namespace llvm
201314564Sdim
202288943Sdimnamespace {
203314564Sdim
204296417Sdim  class HexagonCallFrameInformation : public MachineFunctionPass {
205296417Sdim  public:
206296417Sdim    static char ID;
207314564Sdim
208296417Sdim    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
209296417Sdim      PassRegistry &PR = *PassRegistry::getPassRegistry();
210296417Sdim      initializeHexagonCallFrameInformationPass(PR);
211296417Sdim    }
212314564Sdim
213296417Sdim    bool runOnMachineFunction(MachineFunction &MF) override;
214314564Sdim
215309124Sdim    MachineFunctionProperties getRequiredProperties() const override {
216309124Sdim      return MachineFunctionProperties().set(
217314564Sdim          MachineFunctionProperties::Property::NoVRegs);
218309124Sdim    }
219296417Sdim  };
220296417Sdim
221296417Sdim  char HexagonCallFrameInformation::ID = 0;
222296417Sdim
223314564Sdim} // end anonymous namespace
224314564Sdim
225296417Sdimbool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
226296417Sdim  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
227360784Sdim  bool NeedCFI = MF.needsFrameMoves();
228296417Sdim
229296417Sdim  if (!NeedCFI)
230296417Sdim    return false;
231296417Sdim  HFI.insertCFIInstructions(MF);
232296417Sdim  return true;
233296417Sdim}
234296417Sdim
235296417SdimINITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
236296417Sdim                "Hexagon call frame information", false, false)
237296417Sdim
238296417SdimFunctionPass *llvm::createHexagonCallFrameInformation() {
239296417Sdim  return new HexagonCallFrameInformation();
240296417Sdim}
241296417Sdim
242314564Sdim/// Map a register pair Reg to the subregister that has the greater "number",
243314564Sdim/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
244314564Sdimstatic unsigned getMax32BitSubRegister(unsigned Reg,
245314564Sdim                                       const TargetRegisterInfo &TRI,
246314564Sdim                                       bool hireg = true) {
247288943Sdim    if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
248288943Sdim      return Reg;
249234285Sdim
250288943Sdim    unsigned RegNo = 0;
251288943Sdim    for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
252288943Sdim      if (hireg) {
253288943Sdim        if (*SubRegs > RegNo)
254288943Sdim          RegNo = *SubRegs;
255288943Sdim      } else {
256288943Sdim        if (!RegNo || *SubRegs < RegNo)
257288943Sdim          RegNo = *SubRegs;
258288943Sdim      }
259288943Sdim    }
260288943Sdim    return RegNo;
261314564Sdim}
262288943Sdim
263314564Sdim/// Returns the callee saved register with the largest id in the vector.
264314564Sdimstatic unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
265314564Sdim                                     const TargetRegisterInfo &TRI) {
266309124Sdim    static_assert(Hexagon::R1 > 0,
267309124Sdim                  "Assume physical registers are encoded as positive integers");
268288943Sdim    if (CSI.empty())
269288943Sdim      return 0;
270288943Sdim
271288943Sdim    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
272288943Sdim    for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
273288943Sdim      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
274288943Sdim      if (Reg > Max)
275288943Sdim        Max = Reg;
276288943Sdim    }
277288943Sdim    return Max;
278314564Sdim}
279288943Sdim
280314564Sdim/// Checks if the basic block contains any instruction that needs a stack
281314564Sdim/// frame to be already in place.
282314564Sdimstatic bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
283314564Sdim                            const HexagonRegisterInfo &HRI) {
284288943Sdim    for (auto &I : MBB) {
285288943Sdim      const MachineInstr *MI = &I;
286288943Sdim      if (MI->isCall())
287288943Sdim        return true;
288288943Sdim      unsigned Opc = MI->getOpcode();
289288943Sdim      switch (Opc) {
290314564Sdim        case Hexagon::PS_alloca:
291314564Sdim        case Hexagon::PS_aligna:
292288943Sdim          return true;
293288943Sdim        default:
294288943Sdim          break;
295288943Sdim      }
296288943Sdim      // Check individual operands.
297288943Sdim      for (const MachineOperand &MO : MI->operands()) {
298288943Sdim        // While the presence of a frame index does not prove that a stack
299288943Sdim        // frame will be required, all frame indexes should be within alloc-
300288943Sdim        // frame/deallocframe. Otherwise, the code that translates a frame
301288943Sdim        // index into an offset would have to be aware of the placement of
302288943Sdim        // the frame creation/destruction instructions.
303288943Sdim        if (MO.isFI())
304288943Sdim          return true;
305321369Sdim        if (MO.isReg()) {
306360784Sdim          Register R = MO.getReg();
307321369Sdim          // Virtual registers will need scavenging, which then may require
308321369Sdim          // a stack slot.
309360784Sdim          if (Register::isVirtualRegister(R))
310321369Sdim            return true;
311321369Sdim          for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
312321369Sdim            if (CSR[*S])
313321369Sdim              return true;
314288943Sdim          continue;
315321369Sdim        }
316321369Sdim        if (MO.isRegMask()) {
317321369Sdim          // A regmask would normally have all callee-saved registers marked
318321369Sdim          // as preserved, so this check would not be needed, but in case of
319321369Sdim          // ever having other regmasks (for other calling conventions),
320321369Sdim          // make sure they would be processed correctly.
321321369Sdim          const uint32_t *BM = MO.getRegMask();
322321369Sdim          for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
323321369Sdim            unsigned R = x;
324321369Sdim            // If this regmask does not preserve a CSR, a frame will be needed.
325321369Sdim            if (!(BM[R/32] & (1u << (R%32))))
326321369Sdim              return true;
327321369Sdim          }
328321369Sdim        }
329288943Sdim      }
330288943Sdim    }
331288943Sdim    return false;
332314564Sdim}
333288943Sdim
334288943Sdim  /// Returns true if MBB has a machine instructions that indicates a tail call
335288943Sdim  /// in the block.
336314564Sdimstatic bool hasTailCall(const MachineBasicBlock &MBB) {
337288943Sdim    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
338327952Sdim    if (I == MBB.end())
339327952Sdim      return false;
340288943Sdim    unsigned RetOpc = I->getOpcode();
341314564Sdim    return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
342314564Sdim}
343288943Sdim
344314564Sdim/// Returns true if MBB contains an instruction that returns.
345314564Sdimstatic bool hasReturn(const MachineBasicBlock &MBB) {
346288943Sdim    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
347288943Sdim      if (I->isReturn())
348288943Sdim        return true;
349288943Sdim    return false;
350314564Sdim}
351309124Sdim
352314564Sdim/// Returns the "return" instruction from this block, or nullptr if there
353314564Sdim/// isn't any.
354314564Sdimstatic MachineInstr *getReturn(MachineBasicBlock &MBB) {
355309124Sdim    for (auto &I : MBB)
356309124Sdim      if (I.isReturn())
357309124Sdim        return &I;
358309124Sdim    return nullptr;
359314564Sdim}
360309124Sdim
361314564Sdimstatic bool isRestoreCall(unsigned Opc) {
362309124Sdim    switch (Opc) {
363309124Sdim      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
364309124Sdim      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
365314564Sdim      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
366314564Sdim      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
367314564Sdim      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
368314564Sdim      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
369309124Sdim      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
370309124Sdim      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
371309124Sdim        return true;
372309124Sdim    }
373309124Sdim    return false;
374314564Sdim}
375309124Sdim
376314564Sdimstatic inline bool isOptNone(const MachineFunction &MF) {
377353358Sdim    return MF.getFunction().hasOptNone() ||
378309124Sdim           MF.getTarget().getOptLevel() == CodeGenOpt::None;
379314564Sdim}
380309124Sdim
381314564Sdimstatic inline bool isOptSize(const MachineFunction &MF) {
382327952Sdim    const Function &F = MF.getFunction();
383353358Sdim    return F.hasOptSize() && !F.hasMinSize();
384314564Sdim}
385309124Sdim
386314564Sdimstatic inline bool isMinSize(const MachineFunction &MF) {
387353358Sdim    return MF.getFunction().hasMinSize();
388234285Sdim}
389234285Sdim
390288943Sdim/// Implements shrink-wrapping of the stack frame. By default, stack frame
391288943Sdim/// is created in the function entry block, and is cleaned up in every block
392288943Sdim/// that returns. This function finds alternate blocks: one for the frame
393288943Sdim/// setup (prolog) and one for the cleanup (epilog).
394288943Sdimvoid HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
395288943Sdim      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
396288943Sdim  static unsigned ShrinkCounter = 0;
397288943Sdim
398288943Sdim  if (ShrinkLimit.getPosition()) {
399288943Sdim    if (ShrinkCounter >= ShrinkLimit)
400288943Sdim      return;
401288943Sdim    ShrinkCounter++;
402288943Sdim  }
403288943Sdim
404327952Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
405288943Sdim
406288943Sdim  MachineDominatorTree MDT;
407288943Sdim  MDT.runOnMachineFunction(MF);
408288943Sdim  MachinePostDominatorTree MPT;
409288943Sdim  MPT.runOnMachineFunction(MF);
410288943Sdim
411327952Sdim  using UnsignedMap = DenseMap<unsigned, unsigned>;
412327952Sdim  using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
413327952Sdim
414288943Sdim  UnsignedMap RPO;
415288943Sdim  RPOTType RPOT(&MF);
416288943Sdim  unsigned RPON = 0;
417288943Sdim  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
418288943Sdim    RPO[(*I)->getNumber()] = RPON++;
419288943Sdim
420288943Sdim  // Don't process functions that have loops, at least for now. Placement
421288943Sdim  // of prolog and epilog must take loop structure into account. For simpli-
422288943Sdim  // city don't do it right now.
423288943Sdim  for (auto &I : MF) {
424288943Sdim    unsigned BN = RPO[I.getNumber()];
425288943Sdim    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
426288943Sdim      // If found a back-edge, return.
427288943Sdim      if (RPO[(*SI)->getNumber()] <= BN)
428288943Sdim        return;
429288943Sdim    }
430288943Sdim  }
431288943Sdim
432288943Sdim  // Collect the set of blocks that need a stack frame to execute. Scan
433288943Sdim  // each block for uses/defs of callee-saved registers, calls, etc.
434288943Sdim  SmallVector<MachineBasicBlock*,16> SFBlocks;
435288943Sdim  BitVector CSR(Hexagon::NUM_TARGET_REGS);
436288943Sdim  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
437309124Sdim    for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S)
438309124Sdim      CSR[*S] = true;
439288943Sdim
440288943Sdim  for (auto &I : MF)
441309124Sdim    if (needsStackFrame(I, CSR, HRI))
442288943Sdim      SFBlocks.push_back(&I);
443288943Sdim
444341825Sdim  LLVM_DEBUG({
445288943Sdim    dbgs() << "Blocks needing SF: {";
446288943Sdim    for (auto &B : SFBlocks)
447327952Sdim      dbgs() << " " << printMBBReference(*B);
448288943Sdim    dbgs() << " }\n";
449288943Sdim  });
450288943Sdim  // No frame needed?
451288943Sdim  if (SFBlocks.empty())
452288943Sdim    return;
453288943Sdim
454288943Sdim  // Pick a common dominator and a common post-dominator.
455288943Sdim  MachineBasicBlock *DomB = SFBlocks[0];
456288943Sdim  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
457288943Sdim    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
458288943Sdim    if (!DomB)
459288943Sdim      break;
460288943Sdim  }
461288943Sdim  MachineBasicBlock *PDomB = SFBlocks[0];
462288943Sdim  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
463288943Sdim    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
464288943Sdim    if (!PDomB)
465288943Sdim      break;
466288943Sdim  }
467341825Sdim  LLVM_DEBUG({
468327952Sdim    dbgs() << "Computed dom block: ";
469327952Sdim    if (DomB)
470327952Sdim      dbgs() << printMBBReference(*DomB);
471327952Sdim    else
472327952Sdim      dbgs() << "<null>";
473327952Sdim    dbgs() << ", computed pdom block: ";
474327952Sdim    if (PDomB)
475327952Sdim      dbgs() << printMBBReference(*PDomB);
476327952Sdim    else
477327952Sdim      dbgs() << "<null>";
478288943Sdim    dbgs() << "\n";
479288943Sdim  });
480288943Sdim  if (!DomB || !PDomB)
481288943Sdim    return;
482288943Sdim
483288943Sdim  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
484288943Sdim  if (!MDT.dominates(DomB, PDomB)) {
485341825Sdim    LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
486288943Sdim    return;
487288943Sdim  }
488288943Sdim  if (!MPT.dominates(PDomB, DomB)) {
489341825Sdim    LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
490288943Sdim    return;
491288943Sdim  }
492288943Sdim
493288943Sdim  // Finally, everything seems right.
494288943Sdim  PrologB = DomB;
495288943Sdim  EpilogB = PDomB;
496288943Sdim}
497288943Sdim
498288943Sdim/// Perform most of the PEI work here:
499288943Sdim/// - saving/restoring of the callee-saved registers,
500288943Sdim/// - stack frame creation and destruction.
501288943Sdim/// Normally, this work is distributed among various functions, but doing it
502288943Sdim/// in one place allows shrink-wrapping of the stack frame.
503288943Sdimvoid HexagonFrameLowering::emitPrologue(MachineFunction &MF,
504288943Sdim                                        MachineBasicBlock &MBB) const {
505327952Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
506288943Sdim
507314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
508314564Sdim  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
509288943Sdim
510288943Sdim  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
511288943Sdim  if (EnableShrinkWrapping)
512288943Sdim    findShrunkPrologEpilog(MF, PrologB, EpilogB);
513288943Sdim
514309124Sdim  bool PrologueStubs = false;
515309124Sdim  insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
516309124Sdim  insertPrologueInBlock(*PrologB, PrologueStubs);
517314564Sdim  updateEntryPaths(MF, *PrologB);
518288943Sdim
519288943Sdim  if (EpilogB) {
520288943Sdim    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
521288943Sdim    insertEpilogueInBlock(*EpilogB);
522288943Sdim  } else {
523288943Sdim    for (auto &B : MF)
524296417Sdim      if (B.isReturnBlock())
525288943Sdim        insertCSRRestoresInBlock(B, CSI, HRI);
526288943Sdim
527288943Sdim    for (auto &B : MF)
528296417Sdim      if (B.isReturnBlock())
529288943Sdim        insertEpilogueInBlock(B);
530309124Sdim
531309124Sdim    for (auto &B : MF) {
532309124Sdim      if (B.empty())
533309124Sdim        continue;
534309124Sdim      MachineInstr *RetI = getReturn(B);
535309124Sdim      if (!RetI || isRestoreCall(RetI->getOpcode()))
536309124Sdim        continue;
537309124Sdim      for (auto &R : CSI)
538309124Sdim        RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
539309124Sdim    }
540288943Sdim  }
541309124Sdim
542309124Sdim  if (EpilogB) {
543309124Sdim    // If there is an epilog block, it may not have a return instruction.
544309124Sdim    // In such case, we need to add the callee-saved registers as live-ins
545309124Sdim    // in all blocks on all paths from the epilog to any return block.
546314564Sdim    unsigned MaxBN = MF.getNumBlockIDs();
547309124Sdim    BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
548314564Sdim    updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
549309124Sdim  }
550288943Sdim}
551288943Sdim
552344779Sdim/// Returns true if the target can safely skip saving callee-saved registers
553344779Sdim/// for noreturn nounwind functions.
554344779Sdimbool HexagonFrameLowering::enableCalleeSaveSkip(
555344779Sdim    const MachineFunction &MF) const {
556344779Sdim  const auto &F = MF.getFunction();
557344779Sdim  assert(F.hasFnAttribute(Attribute::NoReturn) &&
558344779Sdim         F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
559344779Sdim         !F.getFunction().hasFnAttribute(Attribute::UWTable));
560344779Sdim  (void)F;
561344779Sdim
562344779Sdim  // No need to save callee saved registers if the function does not return.
563344779Sdim  return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
564344779Sdim}
565344779Sdim
566344779Sdim// Helper function used to determine when to eliminate the stack frame for
567344779Sdim// functions marked as noreturn and when the noreturn-stack-elim options are
568344779Sdim// specified. When both these conditions are true, then a FP may not be needed
569344779Sdim// if the function makes a call. It is very similar to enableCalleeSaveSkip,
570344779Sdim// but it used to check if the allocframe can be eliminated as well.
571344779Sdimstatic bool enableAllocFrameElim(const MachineFunction &MF) {
572344779Sdim  const auto &F = MF.getFunction();
573344779Sdim  const auto &MFI = MF.getFrameInfo();
574344779Sdim  const auto &HST = MF.getSubtarget<HexagonSubtarget>();
575344779Sdim  assert(!MFI.hasVarSizedObjects() &&
576344779Sdim         !HST.getRegisterInfo()->needsStackRealignment(MF));
577344779Sdim  return F.hasFnAttribute(Attribute::NoReturn) &&
578344779Sdim    F.hasFnAttribute(Attribute::NoUnwind) &&
579344779Sdim    !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
580344779Sdim    MFI.getStackSize() == 0;
581344779Sdim}
582344779Sdim
583309124Sdimvoid HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
584309124Sdim      bool PrologueStubs) const {
585288943Sdim  MachineFunction &MF = *MBB.getParent();
586314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
587296417Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
588288943Sdim  auto &HII = *HST.getInstrInfo();
589288943Sdim  auto &HRI = *HST.getRegisterInfo();
590234285Sdim
591314564Sdim  unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
592288943Sdim
593288943Sdim  // Calculate the total stack frame size.
594234285Sdim  // Get the number of bytes to allocate from the FrameInfo.
595314564Sdim  unsigned FrameSize = MFI.getStackSize();
596288943Sdim  // Round up the max call frame size to the max alignment on the stack.
597314564Sdim  unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
598314564Sdim  MFI.setMaxCallFrameSize(MaxCFA);
599234285Sdim
600309124Sdim  FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
601314564Sdim  MFI.setStackSize(FrameSize);
602288943Sdim
603288943Sdim  bool AlignStack = (MaxAlign > getStackAlignment());
604288943Sdim
605288943Sdim  // Get the number of bytes to allocate from the FrameInfo.
606314564Sdim  unsigned NumBytes = MFI.getStackSize();
607288943Sdim  unsigned SP = HRI.getStackRegister();
608314564Sdim  unsigned MaxCF = MFI.getMaxCallFrameSize();
609234285Sdim  MachineBasicBlock::iterator InsertPt = MBB.begin();
610234285Sdim
611314564Sdim  SmallVector<MachineInstr *, 4> AdjustRegs;
612314564Sdim  for (auto &MBB : MF)
613314564Sdim    for (auto &MI : MBB)
614314564Sdim      if (MI.getOpcode() == Hexagon::PS_alloca)
615314564Sdim        AdjustRegs.push_back(&MI);
616234285Sdim
617288943Sdim  for (auto MI : AdjustRegs) {
618314564Sdim    assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
619288943Sdim    expandAlloca(MI, HII, SP, MaxCF);
620288943Sdim    MI->eraseFromParent();
621234285Sdim  }
622234285Sdim
623321369Sdim  DebugLoc dl = MBB.findDebugLoc(InsertPt);
624234285Sdim
625321369Sdim  if (hasFP(MF)) {
626321369Sdim    insertAllocframe(MBB, InsertPt, NumBytes);
627321369Sdim    if (AlignStack) {
628321369Sdim      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
629321369Sdim          .addReg(SP)
630321369Sdim          .addImm(-int64_t(MaxAlign));
631321369Sdim    }
632321369Sdim    // If the stack-checking is enabled, and we spilled the callee-saved
633321369Sdim    // registers inline (i.e. did not use a spill function), then call
634321369Sdim    // the stack checker directly.
635321369Sdim    if (EnableStackOVFSanitizer && !PrologueStubs)
636321369Sdim      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
637321369Sdim             .addExternalSymbol("__runtime_stack_check");
638321369Sdim  } else if (NumBytes > 0) {
639321369Sdim    assert(alignTo(NumBytes, 8) == NumBytes);
640321369Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
641288943Sdim      .addReg(SP)
642321369Sdim      .addImm(-int(NumBytes));
643234285Sdim  }
644249423Sdim}
645234285Sdim
646288943Sdimvoid HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
647288943Sdim  MachineFunction &MF = *MBB.getParent();
648314564Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
649288943Sdim  auto &HII = *HST.getInstrInfo();
650288943Sdim  auto &HRI = *HST.getRegisterInfo();
651288943Sdim  unsigned SP = HRI.getStackRegister();
652261991Sdim
653321369Sdim  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
654321369Sdim  DebugLoc dl = MBB.findDebugLoc(InsertPt);
655321369Sdim
656321369Sdim  if (!hasFP(MF)) {
657321369Sdim    MachineFrameInfo &MFI = MF.getFrameInfo();
658321369Sdim    if (unsigned NumBytes = MFI.getStackSize()) {
659321369Sdim      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
660321369Sdim        .addReg(SP)
661321369Sdim        .addImm(NumBytes);
662321369Sdim    }
663321369Sdim    return;
664321369Sdim  }
665321369Sdim
666309124Sdim  MachineInstr *RetI = getReturn(MBB);
667288943Sdim  unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
668261991Sdim
669288943Sdim  // Handle EH_RETURN.
670288943Sdim  if (RetOpc == Hexagon::EH_RETURN_JMPR) {
671327952Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
672327952Sdim        .addDef(Hexagon::D15)
673327952Sdim        .addReg(Hexagon::R30);
674321369Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
675288943Sdim        .addReg(SP)
676288943Sdim        .addReg(Hexagon::R28);
677288943Sdim    return;
678288943Sdim  }
679288943Sdim
680288943Sdim  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
681288943Sdim  // frame instruction if we encounter it.
682309124Sdim  if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
683314564Sdim      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
684314564Sdim      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
685314564Sdim      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {
686288943Sdim    MachineBasicBlock::iterator It = RetI;
687288943Sdim    ++It;
688288943Sdim    // Delete all instructions after the RESTORE (except labels).
689288943Sdim    while (It != MBB.end()) {
690288943Sdim      if (!It->isLabel())
691288943Sdim        It = MBB.erase(It);
692288943Sdim      else
693288943Sdim        ++It;
694234285Sdim    }
695288943Sdim    return;
696234285Sdim  }
697288943Sdim
698288943Sdim  // It is possible that the restoring code is a call to a library function.
699288943Sdim  // All of the restore* functions include "deallocframe", so we need to make
700288943Sdim  // sure that we don't add an extra one.
701288943Sdim  bool NeedsDeallocframe = true;
702288943Sdim  if (!MBB.empty() && InsertPt != MBB.begin()) {
703288943Sdim    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
704288943Sdim    unsigned COpc = PrevIt->getOpcode();
705309124Sdim    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
706314564Sdim        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
707314564Sdim        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
708314564Sdim        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
709314564Sdim        COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr)
710288943Sdim      NeedsDeallocframe = false;
711288943Sdim  }
712288943Sdim
713288943Sdim  if (!NeedsDeallocframe)
714288943Sdim    return;
715314564Sdim  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
716288943Sdim  // otherwise just add deallocframe. The function could be returning via a
717288943Sdim  // tail call.
718314564Sdim  if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
719327952Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
720327952Sdim      .addDef(Hexagon::D15)
721327952Sdim      .addReg(Hexagon::R30);
722288943Sdim    return;
723288943Sdim  }
724288943Sdim  unsigned NewOpc = Hexagon::L4_return;
725327952Sdim  MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
726327952Sdim      .addDef(Hexagon::D15)
727327952Sdim      .addReg(Hexagon::R30);
728288943Sdim  // Transfer the function live-out registers.
729309124Sdim  NewI->copyImplicitOps(MF, *RetI);
730288943Sdim  MBB.erase(RetI);
731234285Sdim}
732234285Sdim
733321369Sdimvoid HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
734321369Sdim      MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
735321369Sdim  MachineFunction &MF = *MBB.getParent();
736321369Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
737321369Sdim  auto &HII = *HST.getInstrInfo();
738321369Sdim  auto &HRI = *HST.getRegisterInfo();
739321369Sdim
740321369Sdim  // Check for overflow.
741321369Sdim  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
742321369Sdim  const unsigned int ALLOCFRAME_MAX = 16384;
743321369Sdim
744321369Sdim  // Create a dummy memory operand to avoid allocframe from being treated as
745321369Sdim  // a volatile memory reference.
746321369Sdim  auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
747321369Sdim                                      MachineMemOperand::MOStore, 4, 4);
748321369Sdim
749321369Sdim  DebugLoc dl = MBB.findDebugLoc(InsertPt);
750327952Sdim  unsigned SP = HRI.getStackRegister();
751321369Sdim
752321369Sdim  if (NumBytes >= ALLOCFRAME_MAX) {
753321369Sdim    // Emit allocframe(#0).
754321369Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
755327952Sdim      .addDef(SP)
756327952Sdim      .addReg(SP)
757321369Sdim      .addImm(0)
758321369Sdim      .addMemOperand(MMO);
759321369Sdim
760321369Sdim    // Subtract the size from the stack pointer.
761321369Sdim    unsigned SP = HRI.getStackRegister();
762321369Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
763321369Sdim      .addReg(SP)
764321369Sdim      .addImm(-int(NumBytes));
765321369Sdim  } else {
766321369Sdim    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
767327952Sdim      .addDef(SP)
768327952Sdim      .addReg(SP)
769321369Sdim      .addImm(NumBytes)
770321369Sdim      .addMemOperand(MMO);
771321369Sdim  }
772321369Sdim}
773321369Sdim
774314564Sdimvoid HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
775314564Sdim      MachineBasicBlock &SaveB) const {
776314564Sdim  SetVector<unsigned> Worklist;
777288943Sdim
778314564Sdim  MachineBasicBlock &EntryB = MF.front();
779314564Sdim  Worklist.insert(EntryB.getNumber());
780314564Sdim
781314564Sdim  unsigned SaveN = SaveB.getNumber();
782314564Sdim  auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
783314564Sdim
784314564Sdim  for (unsigned i = 0; i < Worklist.size(); ++i) {
785314564Sdim    unsigned BN = Worklist[i];
786314564Sdim    MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
787314564Sdim    for (auto &R : CSI)
788314564Sdim      if (!MBB.isLiveIn(R.getReg()))
789314564Sdim        MBB.addLiveIn(R.getReg());
790314564Sdim    if (BN != SaveN)
791314564Sdim      for (auto &SB : MBB.successors())
792314564Sdim        Worklist.insert(SB->getNumber());
793314564Sdim  }
794314564Sdim}
795314564Sdim
796309124Sdimbool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
797314564Sdim      MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
798309124Sdim      BitVector &Path) const {
799309124Sdim  assert(MBB.getNumber() >= 0);
800309124Sdim  unsigned BN = MBB.getNumber();
801309124Sdim  if (Path[BN] || DoneF[BN])
802309124Sdim    return false;
803309124Sdim  if (DoneT[BN])
804309124Sdim    return true;
805309124Sdim
806314564Sdim  auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
807309124Sdim
808309124Sdim  Path[BN] = true;
809309124Sdim  bool ReachedExit = false;
810309124Sdim  for (auto &SB : MBB.successors())
811309124Sdim    ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
812309124Sdim
813309124Sdim  if (!MBB.empty() && MBB.back().isReturn()) {
814309124Sdim    // Add implicit uses of all callee-saved registers to the reached
815309124Sdim    // return instructions. This is to prevent the anti-dependency breaker
816309124Sdim    // from renaming these registers.
817309124Sdim    MachineInstr &RetI = MBB.back();
818309124Sdim    if (!isRestoreCall(RetI.getOpcode()))
819309124Sdim      for (auto &R : CSI)
820309124Sdim        RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
821309124Sdim    ReachedExit = true;
822309124Sdim  }
823309124Sdim
824309124Sdim  // We don't want to add unnecessary live-ins to the restore block: since
825309124Sdim  // the callee-saved registers are being defined in it, the entry of the
826309124Sdim  // restore block cannot be on the path from the definitions to any exit.
827314564Sdim  if (ReachedExit && &MBB != &RestoreB) {
828309124Sdim    for (auto &R : CSI)
829309124Sdim      if (!MBB.isLiveIn(R.getReg()))
830309124Sdim        MBB.addLiveIn(R.getReg());
831309124Sdim    DoneT[BN] = true;
832309124Sdim  }
833309124Sdim  if (!ReachedExit)
834309124Sdim    DoneF[BN] = true;
835309124Sdim
836309124Sdim  Path[BN] = false;
837309124Sdim  return ReachedExit;
838309124Sdim}
839309124Sdim
840314564Sdimstatic Optional<MachineBasicBlock::iterator>
841314564SdimfindCFILocation(MachineBasicBlock &B) {
842314564Sdim    // The CFI instructions need to be inserted right after allocframe.
843314564Sdim    // An exception to this is a situation where allocframe is bundled
844314564Sdim    // with a call: then the CFI instructions need to be inserted before
845314564Sdim    // the packet with the allocframe+call (in case the call throws an
846314564Sdim    // exception).
847314564Sdim    auto End = B.instr_end();
848309124Sdim
849314564Sdim    for (MachineInstr &I : B) {
850314564Sdim      MachineBasicBlock::iterator It = I.getIterator();
851314564Sdim      if (!I.isBundle()) {
852314564Sdim        if (I.getOpcode() == Hexagon::S2_allocframe)
853314564Sdim          return std::next(It);
854314564Sdim        continue;
855314564Sdim      }
856314564Sdim      // I is a bundle.
857314564Sdim      bool HasCall = false, HasAllocFrame = false;
858314564Sdim      auto T = It.getInstrIterator();
859314564Sdim      while (++T != End && T->isBundled()) {
860314564Sdim        if (T->getOpcode() == Hexagon::S2_allocframe)
861314564Sdim          HasAllocFrame = true;
862314564Sdim        else if (T->isCall())
863314564Sdim          HasCall = true;
864314564Sdim      }
865314564Sdim      if (HasAllocFrame)
866314564Sdim        return HasCall ? It : std::next(It);
867314564Sdim    }
868314564Sdim    return None;
869296417Sdim}
870296417Sdim
871296417Sdimvoid HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
872296417Sdim  for (auto &B : MF) {
873314564Sdim    auto At = findCFILocation(B);
874314564Sdim    if (At.hasValue())
875314564Sdim      insertCFIInstructionsAt(B, At.getValue());
876296417Sdim  }
877296417Sdim}
878296417Sdim
879296417Sdimvoid HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
880296417Sdim      MachineBasicBlock::iterator At) const {
881296417Sdim  MachineFunction &MF = *MBB.getParent();
882314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
883296417Sdim  MachineModuleInfo &MMI = MF.getMMI();
884296417Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
885296417Sdim  auto &HII = *HST.getInstrInfo();
886296417Sdim  auto &HRI = *HST.getRegisterInfo();
887296417Sdim
888296417Sdim  // If CFI instructions have debug information attached, something goes
889296417Sdim  // wrong with the final assembly generation: the prolog_end is placed
890296417Sdim  // in a wrong location.
891296417Sdim  DebugLoc DL;
892296417Sdim  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
893296417Sdim
894296417Sdim  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
895309124Sdim  bool HasFP = hasFP(MF);
896296417Sdim
897309124Sdim  if (HasFP) {
898296417Sdim    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
899296417Sdim    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
900296417Sdim
901296417Sdim    // Define CFA via an offset from the value of FP.
902296417Sdim    //
903296417Sdim    //  -8   -4    0 (SP)
904296417Sdim    // --+----+----+---------------------
905296417Sdim    //   | FP | LR |          increasing addresses -->
906296417Sdim    // --+----+----+---------------------
907296417Sdim    //   |         +-- Old SP (before allocframe)
908296417Sdim    //   +-- New FP (after allocframe)
909296417Sdim    //
910296417Sdim    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
911296417Sdim    // MCCFIInstruction::createOffset takes the offset without sign change.
912296417Sdim    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
913296417Sdim    BuildMI(MBB, At, DL, CFID)
914314564Sdim        .addCFIIndex(MF.addFrameInst(DefCfa));
915296417Sdim    // R31 (return addr) = CFA - 4
916296417Sdim    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
917296417Sdim    BuildMI(MBB, At, DL, CFID)
918314564Sdim        .addCFIIndex(MF.addFrameInst(OffR31));
919296417Sdim    // R30 (frame ptr) = CFA - 8
920296417Sdim    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
921296417Sdim    BuildMI(MBB, At, DL, CFID)
922314564Sdim        .addCFIIndex(MF.addFrameInst(OffR30));
923296417Sdim  }
924296417Sdim
925296417Sdim  static unsigned int RegsToMove[] = {
926296417Sdim    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
927296417Sdim    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
928296417Sdim    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
929296417Sdim    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
930296417Sdim    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
931296417Sdim    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
932296417Sdim    Hexagon::NoRegister
933296417Sdim  };
934296417Sdim
935309124Sdim  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
936296417Sdim
937296417Sdim  for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
938296417Sdim    unsigned Reg = RegsToMove[i];
939296417Sdim    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
940296417Sdim      return C.getReg() == Reg;
941296417Sdim    };
942314564Sdim    auto F = find_if(CSI, IfR);
943296417Sdim    if (F == CSI.end())
944296417Sdim      continue;
945296417Sdim
946309124Sdim    int64_t Offset;
947309124Sdim    if (HasFP) {
948309124Sdim      // If the function has a frame pointer (i.e. has an allocframe),
949309124Sdim      // then the CFA has been defined in terms of FP. Any offsets in
950309124Sdim      // the following CFI instructions have to be defined relative
951309124Sdim      // to FP, which points to the bottom of the stack frame.
952309124Sdim      // The function getFrameIndexReference can still choose to use SP
953309124Sdim      // for the offset calculation, so we cannot simply call it here.
954309124Sdim      // Instead, get the offset (relative to the FP) directly.
955309124Sdim      Offset = MFI.getObjectOffset(F->getFrameIdx());
956309124Sdim    } else {
957309124Sdim      unsigned FrameReg;
958309124Sdim      Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
959309124Sdim    }
960296417Sdim    // Subtract 8 to make room for R30 and R31, which are added above.
961309124Sdim    Offset -= 8;
962296417Sdim
963296417Sdim    if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
964296417Sdim      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
965296417Sdim      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
966296417Sdim                                                   Offset);
967296417Sdim      BuildMI(MBB, At, DL, CFID)
968314564Sdim          .addCFIIndex(MF.addFrameInst(OffReg));
969296417Sdim    } else {
970296417Sdim      // Split the double regs into subregs, and generate appropriate
971296417Sdim      // cfi_offsets.
972296417Sdim      // The only reason, we are split double regs is, llvm-mc does not
973296417Sdim      // understand paired registers for cfi_offset.
974296417Sdim      // Eg .cfi_offset r1:0, -64
975296417Sdim
976360784Sdim      Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
977360784Sdim      Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
978296417Sdim      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
979296417Sdim      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
980296417Sdim      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
981296417Sdim                                                  Offset+4);
982296417Sdim      BuildMI(MBB, At, DL, CFID)
983314564Sdim          .addCFIIndex(MF.addFrameInst(OffHi));
984296417Sdim      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
985296417Sdim                                                  Offset);
986296417Sdim      BuildMI(MBB, At, DL, CFID)
987314564Sdim          .addCFIIndex(MF.addFrameInst(OffLo));
988296417Sdim    }
989296417Sdim  }
990296417Sdim}
991296417Sdim
992234285Sdimbool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
993327952Sdim  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
994321369Sdim    return false;
995321369Sdim
996314564Sdim  auto &MFI = MF.getFrameInfo();
997296417Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
998296417Sdim  bool HasExtraAlign = HRI.needsStackRealignment(MF);
999296417Sdim  bool HasAlloca = MFI.hasVarSizedObjects();
1000296417Sdim
1001296417Sdim  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
1002296417Sdim  // that this shouldn't be required, but doing so now because gcc does and
1003296417Sdim  // gdb can't break at the start of the function without it.  Will remove if
1004296417Sdim  // this turns out to be a gdb bug.
1005296417Sdim  //
1006296417Sdim  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
1007296417Sdim    return true;
1008296417Sdim
1009296417Sdim  // By default we want to use SP (since it's always there). FP requires
1010296417Sdim  // some setup (i.e. ALLOCFRAME).
1011321369Sdim  // Both, alloca and stack alignment modify the stack pointer by an
1012321369Sdim  // undetermined value, so we need to save it at the entry to the function
1013321369Sdim  // (i.e. use allocframe).
1014321369Sdim  if (HasAlloca || HasExtraAlign)
1015296417Sdim    return true;
1016296417Sdim
1017296417Sdim  if (MFI.getStackSize() > 0) {
1018321369Sdim    // If FP-elimination is disabled, we have to use FP at this point.
1019321369Sdim    const TargetMachine &TM = MF.getTarget();
1020321369Sdim    if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
1021296417Sdim      return true;
1022321369Sdim    if (EnableStackOVFSanitizer)
1023321369Sdim      return true;
1024296417Sdim  }
1025296417Sdim
1026321369Sdim  const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1027344779Sdim  if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
1028296417Sdim    return true;
1029296417Sdim
1030296417Sdim  return false;
1031234285Sdim}
1032234285Sdim
1033288943Sdimenum SpillKind {
1034288943Sdim  SK_ToMem,
1035288943Sdim  SK_FromMem,
1036288943Sdim  SK_FromMemTailcall
1037288943Sdim};
1038288943Sdim
1039309124Sdimstatic const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
1040309124Sdim      bool Stkchk = false) {
1041288943Sdim  const char * V4SpillToMemoryFunctions[] = {
1042288943Sdim    "__save_r16_through_r17",
1043288943Sdim    "__save_r16_through_r19",
1044288943Sdim    "__save_r16_through_r21",
1045288943Sdim    "__save_r16_through_r23",
1046288943Sdim    "__save_r16_through_r25",
1047288943Sdim    "__save_r16_through_r27" };
1048288943Sdim
1049309124Sdim  const char * V4SpillToMemoryStkchkFunctions[] = {
1050309124Sdim    "__save_r16_through_r17_stkchk",
1051309124Sdim    "__save_r16_through_r19_stkchk",
1052309124Sdim    "__save_r16_through_r21_stkchk",
1053309124Sdim    "__save_r16_through_r23_stkchk",
1054309124Sdim    "__save_r16_through_r25_stkchk",
1055309124Sdim    "__save_r16_through_r27_stkchk" };
1056309124Sdim
1057288943Sdim  const char * V4SpillFromMemoryFunctions[] = {
1058288943Sdim    "__restore_r16_through_r17_and_deallocframe",
1059288943Sdim    "__restore_r16_through_r19_and_deallocframe",
1060288943Sdim    "__restore_r16_through_r21_and_deallocframe",
1061288943Sdim    "__restore_r16_through_r23_and_deallocframe",
1062288943Sdim    "__restore_r16_through_r25_and_deallocframe",
1063288943Sdim    "__restore_r16_through_r27_and_deallocframe" };
1064288943Sdim
1065288943Sdim  const char * V4SpillFromMemoryTailcallFunctions[] = {
1066288943Sdim    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1067288943Sdim    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1068288943Sdim    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1069288943Sdim    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1070288943Sdim    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1071288943Sdim    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1072288943Sdim  };
1073288943Sdim
1074288943Sdim  const char **SpillFunc = nullptr;
1075288943Sdim
1076288943Sdim  switch(SpillType) {
1077288943Sdim  case SK_ToMem:
1078309124Sdim    SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
1079309124Sdim                       : V4SpillToMemoryFunctions;
1080288943Sdim    break;
1081288943Sdim  case SK_FromMem:
1082288943Sdim    SpillFunc = V4SpillFromMemoryFunctions;
1083288943Sdim    break;
1084288943Sdim  case SK_FromMemTailcall:
1085288943Sdim    SpillFunc = V4SpillFromMemoryTailcallFunctions;
1086288943Sdim    break;
1087288943Sdim  }
1088288943Sdim  assert(SpillFunc && "Unknown spill kind");
1089288943Sdim
1090288943Sdim  // Spill all callee-saved registers up to the highest register used.
1091288943Sdim  switch (MaxReg) {
1092288943Sdim  case Hexagon::R17:
1093288943Sdim    return SpillFunc[0];
1094288943Sdim  case Hexagon::R19:
1095288943Sdim    return SpillFunc[1];
1096288943Sdim  case Hexagon::R21:
1097288943Sdim    return SpillFunc[2];
1098288943Sdim  case Hexagon::R23:
1099288943Sdim    return SpillFunc[3];
1100288943Sdim  case Hexagon::R25:
1101288943Sdim    return SpillFunc[4];
1102288943Sdim  case Hexagon::R27:
1103288943Sdim    return SpillFunc[5];
1104288943Sdim  default:
1105288943Sdim    llvm_unreachable("Unhandled maximum callee save register");
1106288943Sdim  }
1107314564Sdim  return nullptr;
1108239462Sdim}
1109239462Sdim
1110296417Sdimint HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1111296417Sdim      int FI, unsigned &FrameReg) const {
1112314564Sdim  auto &MFI = MF.getFrameInfo();
1113296417Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1114296417Sdim
1115296417Sdim  int Offset = MFI.getObjectOffset(FI);
1116296417Sdim  bool HasAlloca = MFI.hasVarSizedObjects();
1117296417Sdim  bool HasExtraAlign = HRI.needsStackRealignment(MF);
1118296417Sdim  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
1119296417Sdim
1120309124Sdim  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1121321369Sdim  unsigned FrameSize = MFI.getStackSize();
1122321369Sdim  unsigned SP = HRI.getStackRegister();
1123321369Sdim  unsigned FP = HRI.getFrameRegister();
1124309124Sdim  unsigned AP = HMFI.getStackAlignBasePhysReg();
1125321369Sdim  // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1126321369Sdim  // is true. HasExtraAlign may be set because of vector spills, without
1127321369Sdim  // aligned locals or aligned outgoing function arguments. Since vector
1128321369Sdim  // spills will ultimately be "unaligned", it is safe to use FP as the
1129321369Sdim  // base register.
1130321369Sdim  // In fact, in such a scenario the stack is actually not required to be
1131321369Sdim  // aligned, although it may end up being aligned anyway, since this
1132321369Sdim  // particular case is not easily detectable. The alignment will be
1133321369Sdim  // unnecessary, but not incorrect.
1134321369Sdim  // Unfortunately there is no quick way to verify that the above is
1135321369Sdim  // indeed the case (and that it's not a result of an error), so just
1136321369Sdim  // assume that missing AP will be replaced by FP.
1137321369Sdim  // (A better fix would be to rematerialize AP from FP and always align
1138321369Sdim  // vector spills.)
1139321369Sdim  if (AP == 0)
1140321369Sdim    AP = FP;
1141296417Sdim
1142296417Sdim  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
1143296417Sdim  // Use FP at -O0, except when there are objects with extra alignment.
1144296417Sdim  // That additional alignment requirement may cause a pad to be inserted,
1145296417Sdim  // which will make it impossible to use FP to access objects located
1146296417Sdim  // past the pad.
1147296417Sdim  if (NoOpt && !HasExtraAlign)
1148296417Sdim    UseFP = true;
1149296417Sdim  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
1150296417Sdim    // Fixed and preallocated objects will be located before any padding
1151296417Sdim    // so FP must be used to access them.
1152296417Sdim    UseFP |= (HasAlloca || HasExtraAlign);
1153296417Sdim  } else {
1154296417Sdim    if (HasAlloca) {
1155296417Sdim      if (HasExtraAlign)
1156296417Sdim        UseAP = true;
1157296417Sdim      else
1158296417Sdim        UseFP = true;
1159296417Sdim    }
1160296417Sdim  }
1161296417Sdim
1162296417Sdim  // If FP was picked, then there had better be FP.
1163296417Sdim  bool HasFP = hasFP(MF);
1164296417Sdim  assert((HasFP || !UseFP) && "This function must have frame pointer");
1165296417Sdim
1166296417Sdim  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1167296417Sdim  // FP/LR. If the base register is used to access an object across these
1168296417Sdim  // 8 bytes, then the offset will need to be adjusted by 8.
1169296417Sdim  //
1170296417Sdim  // After allocframe:
1171296417Sdim  //                    HexagonISelLowering adds 8 to ---+
1172296417Sdim  //                    the offsets of all stack-based   |
1173296417Sdim  //                    arguments (*)                    |
1174296417Sdim  //                                                     |
1175296417Sdim  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1176296417Sdim  // ------------------------+-----+------------------------> increasing
1177296417Sdim  //     <local objects>     |FP/LR|    <input arguments>     addresses
1178296417Sdim  // -----------------+------+-----+------------------------>
1179296417Sdim  //                  |      |
1180296417Sdim  //    SP/AP point --+      +-- FP points here (**)
1181296417Sdim  //    somewhere on
1182296417Sdim  //    this side of FP/LR
1183296417Sdim  //
1184296417Sdim  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1185296417Sdim  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1186296417Sdim
1187296417Sdim  // The lowering assumes that FP/LR is present, and so the offsets of
1188296417Sdim  // the formal arguments start at 8. If FP/LR is not there we need to
1189296417Sdim  // reduce the offset by 8.
1190296417Sdim  if (Offset > 0 && !HasFP)
1191296417Sdim    Offset -= 8;
1192296417Sdim
1193296417Sdim  if (UseFP)
1194296417Sdim    FrameReg = FP;
1195296417Sdim  else if (UseAP)
1196296417Sdim    FrameReg = AP;
1197296417Sdim  else
1198296417Sdim    FrameReg = SP;
1199296417Sdim
1200296417Sdim  // Calculate the actual offset in the instruction. If there is no FP
1201296417Sdim  // (in other words, no allocframe), then SP will not be adjusted (i.e.
1202296417Sdim  // there will be no SP -= FrameSize), so the frame size should not be
1203296417Sdim  // added to the calculated offset.
1204296417Sdim  int RealOffset = Offset;
1205321369Sdim  if (!UseFP && !UseAP)
1206296417Sdim    RealOffset = FrameSize+Offset;
1207296417Sdim  return RealOffset;
1208288943Sdim}
1209288943Sdim
1210288943Sdimbool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1211309124Sdim      const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1212309124Sdim      bool &PrologueStubs) const {
1213288943Sdim  if (CSI.empty())
1214288943Sdim    return true;
1215288943Sdim
1216288943Sdim  MachineBasicBlock::iterator MI = MBB.begin();
1217309124Sdim  PrologueStubs = false;
1218288943Sdim  MachineFunction &MF = *MBB.getParent();
1219314564Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1220314564Sdim  auto &HII = *HST.getInstrInfo();
1221288943Sdim
1222288943Sdim  if (useSpillFunction(MF, CSI)) {
1223309124Sdim    PrologueStubs = true;
1224288943Sdim    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1225309124Sdim    bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1226309124Sdim    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1227309124Sdim                                               StkOvrFlowEnabled);
1228309124Sdim    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1229309124Sdim    bool IsPIC = HTM.isPositionIndependent();
1230314564Sdim    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1231309124Sdim
1232288943Sdim    // Call spill function.
1233288943Sdim    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1234309124Sdim    unsigned SpillOpc;
1235314564Sdim    if (StkOvrFlowEnabled) {
1236314564Sdim      if (LongCalls)
1237314564Sdim        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
1238314564Sdim                         : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1239314564Sdim      else
1240314564Sdim        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
1241314564Sdim                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1242314564Sdim    } else {
1243314564Sdim      if (LongCalls)
1244314564Sdim        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC
1245314564Sdim                         : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1246314564Sdim      else
1247314564Sdim        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
1248314564Sdim                         : Hexagon::SAVE_REGISTERS_CALL_V4;
1249314564Sdim    }
1250309124Sdim
1251288943Sdim    MachineInstr *SaveRegsCall =
1252309124Sdim        BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1253288943Sdim          .addExternalSymbol(SpillFun);
1254314564Sdim
1255288943Sdim    // Add callee-saved registers as use.
1256309124Sdim    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1257288943Sdim    // Add live in registers.
1258288943Sdim    for (unsigned I = 0; I < CSI.size(); ++I)
1259288943Sdim      MBB.addLiveIn(CSI[I].getReg());
1260288943Sdim    return true;
1261234285Sdim  }
1262234285Sdim
1263288943Sdim  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1264234285Sdim    unsigned Reg = CSI[i].getReg();
1265288943Sdim    // Add live in registers. We treat eh_return callee saved register r0 - r3
1266288943Sdim    // specially. They are not really callee saved registers as they are not
1267288943Sdim    // supposed to be killed.
1268288943Sdim    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1269288943Sdim    int FI = CSI[i].getFrameIdx();
1270288943Sdim    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1271296417Sdim    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
1272288943Sdim    if (IsKill)
1273288943Sdim      MBB.addLiveIn(Reg);
1274288943Sdim  }
1275288943Sdim  return true;
1276288943Sdim}
1277234285Sdim
1278288943Sdimbool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1279288943Sdim      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1280288943Sdim  if (CSI.empty())
1281288943Sdim    return false;
1282234285Sdim
1283288943Sdim  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1284288943Sdim  MachineFunction &MF = *MBB.getParent();
1285314564Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1286314564Sdim  auto &HII = *HST.getInstrInfo();
1287234285Sdim
1288288943Sdim  if (useRestoreFunction(MF, CSI)) {
1289288943Sdim    bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1290288943Sdim    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
1291288943Sdim    SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
1292288943Sdim    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1293309124Sdim    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1294309124Sdim    bool IsPIC = HTM.isPositionIndependent();
1295314564Sdim    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1296288943Sdim
1297288943Sdim    // Call spill function.
1298288943Sdim    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1299344779Sdim                                  : MBB.findDebugLoc(MBB.end());
1300288943Sdim    MachineInstr *DeallocCall = nullptr;
1301288943Sdim
1302288943Sdim    if (HasTC) {
1303314564Sdim      unsigned RetOpc;
1304314564Sdim      if (LongCalls)
1305314564Sdim        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
1306314564Sdim                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1307314564Sdim      else
1308314564Sdim        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
1309314564Sdim                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1310314564Sdim      DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1311288943Sdim          .addExternalSymbol(RestoreFn);
1312234285Sdim    } else {
1313288943Sdim      // The block has a return.
1314288943Sdim      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1315288943Sdim      assert(It->isReturn() && std::next(It) == MBB.end());
1316314564Sdim      unsigned RetOpc;
1317314564Sdim      if (LongCalls)
1318314564Sdim        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC
1319314564Sdim                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1320314564Sdim      else
1321314564Sdim        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
1322314564Sdim                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1323314564Sdim      DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1324288943Sdim          .addExternalSymbol(RestoreFn);
1325288943Sdim      // Transfer the function live-out registers.
1326309124Sdim      DeallocCall->copyImplicitOps(MF, *It);
1327234285Sdim    }
1328309124Sdim    addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1329288943Sdim    return true;
1330234285Sdim  }
1331288943Sdim
1332288943Sdim  for (unsigned i = 0; i < CSI.size(); ++i) {
1333288943Sdim    unsigned Reg = CSI[i].getReg();
1334288943Sdim    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1335288943Sdim    int FI = CSI[i].getFrameIdx();
1336296417Sdim    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
1337288943Sdim  }
1338309124Sdim
1339234285Sdim  return true;
1340234285Sdim}
1341234285Sdim
1342309124SdimMachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1343309124Sdim    MachineFunction &MF, MachineBasicBlock &MBB,
1344309124Sdim    MachineBasicBlock::iterator I) const {
1345288943Sdim  MachineInstr &MI = *I;
1346288943Sdim  unsigned Opc = MI.getOpcode();
1347288943Sdim  (void)Opc; // Silence compiler warning.
1348288943Sdim  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1349288943Sdim         "Cannot handle this call frame pseudo instruction");
1350309124Sdim  return MBB.erase(I);
1351288943Sdim}
1352234285Sdim
1353288943Sdimvoid HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1354288943Sdim    MachineFunction &MF, RegScavenger *RS) const {
1355288943Sdim  // If this function has uses aligned stack and also has variable sized stack
1356288943Sdim  // objects, then we need to map all spill slots to fixed positions, so that
1357288943Sdim  // they can be accessed through FP. Otherwise they would have to be accessed
1358288943Sdim  // via AP, which may not be available at the particular place in the program.
1359314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
1360314564Sdim  bool HasAlloca = MFI.hasVarSizedObjects();
1361314564Sdim  bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
1362288943Sdim
1363296417Sdim  if (!HasAlloca || !NeedsAlign)
1364288943Sdim    return;
1365288943Sdim
1366360784Sdim  SmallSet<int, 4> DealignSlots;
1367314564Sdim  unsigned LFS = MFI.getLocalFrameSize();
1368314564Sdim  for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
1369314564Sdim    if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i))
1370288943Sdim      continue;
1371314564Sdim    unsigned S = MFI.getObjectSize(i);
1372309124Sdim    // Reduce the alignment to at most 8. This will require unaligned vector
1373309124Sdim    // stores if they happen here.
1374314564Sdim    unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
1375314564Sdim    MFI.setObjectAlignment(i, 8);
1376309124Sdim    LFS = alignTo(LFS+S, A);
1377360784Sdim    MFI.mapLocalFrameObject(i, -static_cast<int64_t>(LFS));
1378360784Sdim    DealignSlots.insert(i);
1379288943Sdim  }
1380288943Sdim
1381314564Sdim  MFI.setLocalFrameSize(LFS);
1382360784Sdim  Align A = MFI.getLocalFrameMaxAlign();
1383288943Sdim  assert(A <= 8 && "Unexpected local frame alignment");
1384360784Sdim  if (A == 1)
1385360784Sdim    MFI.setLocalFrameMaxAlign(Align(8));
1386314564Sdim  MFI.setUseLocalStackAllocationBlock(true);
1387309124Sdim
1388360784Sdim  // Go over all MachineMemOperands in the code, and change the ones that
1389360784Sdim  // refer to the dealigned stack slots to reflect the new alignment.
1390360784Sdim  if (!DealignSlots.empty()) {
1391360784Sdim    for (MachineBasicBlock &BB : MF) {
1392360784Sdim      for (MachineInstr &MI : BB) {
1393360784Sdim        bool KeepOld = true;
1394360784Sdim        ArrayRef<MachineMemOperand*> memops = MI.memoperands();
1395360784Sdim        SmallVector<MachineMemOperand*,1> new_memops;
1396360784Sdim        for (MachineMemOperand *MMO : memops) {
1397360784Sdim          auto *PV = MMO->getPseudoValue();
1398360784Sdim          if (auto *FS = dyn_cast_or_null<FixedStackPseudoSourceValue>(PV)) {
1399360784Sdim            int FI = FS->getFrameIndex();
1400360784Sdim            if (DealignSlots.count(FI)) {
1401360784Sdim              unsigned A = MFI.getObjectAlignment(FI);
1402360784Sdim              auto *NewMMO = MF.getMachineMemOperand(MMO->getPointerInfo(),
1403360784Sdim                                MMO->getFlags(), MMO->getSize(), A,
1404360784Sdim                                MMO->getAAInfo(), MMO->getRanges(),
1405360784Sdim                                MMO->getSyncScopeID(), MMO->getOrdering(),
1406360784Sdim                                MMO->getFailureOrdering());
1407360784Sdim              new_memops.push_back(NewMMO);
1408360784Sdim              KeepOld = false;
1409360784Sdim              continue;
1410360784Sdim            }
1411360784Sdim          }
1412360784Sdim          new_memops.push_back(MMO);
1413360784Sdim        }
1414360784Sdim        if (!KeepOld)
1415360784Sdim          MI.setMemRefs(MF, new_memops);
1416360784Sdim      }
1417360784Sdim    }
1418360784Sdim  }
1419360784Sdim
1420309124Sdim  // Set the physical aligned-stack base address register.
1421309124Sdim  unsigned AP = 0;
1422309124Sdim  if (const MachineInstr *AI = getAlignaInstr(MF))
1423309124Sdim    AP = AI->getOperand(0).getReg();
1424309124Sdim  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1425309124Sdim  HMFI.setStackAlignBasePhysReg(AP);
1426288943Sdim}
1427288943Sdim
1428309124Sdim/// Returns true if there are no caller-saved registers available in class RC.
1429288943Sdimstatic bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1430309124Sdim      const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1431288943Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1432288943Sdim
1433309124Sdim  auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool {
1434309124Sdim    for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)
1435309124Sdim      if (MRI.isPhysRegUsed(*AI))
1436309124Sdim        return true;
1437309124Sdim    return false;
1438309124Sdim  };
1439288943Sdim
1440309124Sdim  // Check for an unused caller-saved register. Callee-saved registers
1441309124Sdim  // have become pristine by now.
1442309124Sdim  for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)
1443309124Sdim    if (!IsUsed(*P))
1444309124Sdim      return false;
1445309124Sdim
1446288943Sdim  // All caller-saved registers are used.
1447288943Sdim  return true;
1448288943Sdim}
1449234285Sdim
1450288943Sdim#ifndef NDEBUG
1451288943Sdimstatic void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1452288943Sdim  dbgs() << '{';
1453288943Sdim  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1454288943Sdim    unsigned R = x;
1455327952Sdim    dbgs() << ' ' << printReg(R, &TRI);
1456288943Sdim  }
1457288943Sdim  dbgs() << " }";
1458288943Sdim}
1459288943Sdim#endif
1460288943Sdim
1461288943Sdimbool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1462288943Sdim      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1463341825Sdim  LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
1464314564Sdim  MachineFrameInfo &MFI = MF.getFrameInfo();
1465288943Sdim  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1466288943Sdim
1467288943Sdim  // Generate a set of unique, callee-saved registers (SRegs), where each
1468288943Sdim  // register in the set is maximal in terms of sub-/super-register relation,
1469288943Sdim  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1470288943Sdim
1471288943Sdim  // (1) For each callee-saved register, add that register and all of its
1472288943Sdim  // sub-registers to SRegs.
1473341825Sdim  LLVM_DEBUG(dbgs() << "Initial CS registers: {");
1474288943Sdim  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1475288943Sdim    unsigned R = CSI[i].getReg();
1476341825Sdim    LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
1477288943Sdim    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1478288943Sdim      SRegs[*SR] = true;
1479288943Sdim  }
1480341825Sdim  LLVM_DEBUG(dbgs() << " }\n");
1481341825Sdim  LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
1482341825Sdim             dbgs() << "\n");
1483288943Sdim
1484288943Sdim  // (2) For each reserved register, remove that register and all of its
1485288943Sdim  // sub- and super-registers from SRegs.
1486288943Sdim  BitVector Reserved = TRI->getReservedRegs(MF);
1487288943Sdim  for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
1488288943Sdim    unsigned R = x;
1489288943Sdim    for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1490288943Sdim      SRegs[*SR] = false;
1491288943Sdim  }
1492341825Sdim  LLVM_DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI);
1493341825Sdim             dbgs() << "\n");
1494341825Sdim  LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);
1495341825Sdim             dbgs() << "\n");
1496288943Sdim
1497288943Sdim  // (3) Collect all registers that have at least one sub-register in SRegs,
1498288943Sdim  // and also have no sub-registers that are reserved. These will be the can-
1499288943Sdim  // didates for saving as a whole instead of their individual sub-registers.
1500288943Sdim  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1501288943Sdim  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1502288943Sdim  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1503288943Sdim    unsigned R = x;
1504288943Sdim    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
1505288943Sdim      TmpSup[*SR] = true;
1506288943Sdim  }
1507288943Sdim  for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
1508288943Sdim    unsigned R = x;
1509288943Sdim    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
1510288943Sdim      if (!Reserved[*SR])
1511288943Sdim        continue;
1512288943Sdim      TmpSup[R] = false;
1513288943Sdim      break;
1514234285Sdim    }
1515234285Sdim  }
1516341825Sdim  LLVM_DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI);
1517341825Sdim             dbgs() << "\n");
1518288943Sdim
1519288943Sdim  // (4) Include all super-registers found in (3) into SRegs.
1520288943Sdim  SRegs |= TmpSup;
1521341825Sdim  LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);
1522341825Sdim             dbgs() << "\n");
1523288943Sdim
1524288943Sdim  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1525288943Sdim  // remove R from SRegs.
1526288943Sdim  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1527288943Sdim    unsigned R = x;
1528288943Sdim    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
1529288943Sdim      if (!SRegs[*SR])
1530288943Sdim        continue;
1531288943Sdim      SRegs[R] = false;
1532288943Sdim      break;
1533288943Sdim    }
1534288943Sdim  }
1535341825Sdim  LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);
1536341825Sdim             dbgs() << "\n");
1537288943Sdim
1538288943Sdim  // Now, for each register that has a fixed stack slot, create the stack
1539288943Sdim  // object for it.
1540288943Sdim  CSI.clear();
1541288943Sdim
1542327952Sdim  using SpillSlot = TargetFrameLowering::SpillSlot;
1543327952Sdim
1544288943Sdim  unsigned NumFixed;
1545288943Sdim  int MinOffset = 0;  // CS offsets are negative.
1546288943Sdim  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1547288943Sdim  for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
1548288943Sdim    if (!SRegs[S->Reg])
1549288943Sdim      continue;
1550288943Sdim    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1551321369Sdim    int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1552288943Sdim    MinOffset = std::min(MinOffset, S->Offset);
1553288943Sdim    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1554288943Sdim    SRegs[S->Reg] = false;
1555288943Sdim  }
1556288943Sdim
1557288943Sdim  // There can be some registers that don't have fixed slots. For example,
1558288943Sdim  // we need to store R0-R3 in functions with exception handling. For each
1559288943Sdim  // such register, create a non-fixed stack object.
1560288943Sdim  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1561288943Sdim    unsigned R = x;
1562288943Sdim    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1563321369Sdim    unsigned Size = TRI->getSpillSize(*RC);
1564321369Sdim    int Off = MinOffset - Size;
1565321369Sdim    unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
1566288943Sdim    assert(isPowerOf2_32(Align));
1567288943Sdim    Off &= -Align;
1568321369Sdim    int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1569288943Sdim    MinOffset = std::min(MinOffset, Off);
1570288943Sdim    CSI.push_back(CalleeSavedInfo(R, FI));
1571288943Sdim    SRegs[R] = false;
1572288943Sdim  }
1573288943Sdim
1574341825Sdim  LLVM_DEBUG({
1575288943Sdim    dbgs() << "CS information: {";
1576288943Sdim    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1577288943Sdim      int FI = CSI[i].getFrameIdx();
1578314564Sdim      int Off = MFI.getObjectOffset(FI);
1579327952Sdim      dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
1580288943Sdim      if (Off >= 0)
1581288943Sdim        dbgs() << '+';
1582288943Sdim      dbgs() << Off;
1583288943Sdim    }
1584288943Sdim    dbgs() << " }\n";
1585288943Sdim  });
1586288943Sdim
1587288943Sdim#ifndef NDEBUG
1588288943Sdim  // Verify that all registers were handled.
1589288943Sdim  bool MissedReg = false;
1590288943Sdim  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1591288943Sdim    unsigned R = x;
1592327952Sdim    dbgs() << printReg(R, TRI) << ' ';
1593288943Sdim    MissedReg = true;
1594288943Sdim  }
1595288943Sdim  if (MissedReg)
1596288943Sdim    llvm_unreachable("...there are unhandled callee-saved registers!");
1597288943Sdim#endif
1598288943Sdim
1599234285Sdim  return true;
1600234285Sdim}
1601234285Sdim
1602309124Sdimbool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1603309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1604309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1605309124Sdim  MachineInstr *MI = &*It;
1606309124Sdim  DebugLoc DL = MI->getDebugLoc();
1607360784Sdim  Register DstR = MI->getOperand(0).getReg();
1608360784Sdim  Register SrcR = MI->getOperand(1).getReg();
1609309124Sdim  if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1610309124Sdim      !Hexagon::ModRegsRegClass.contains(SrcR))
1611309124Sdim    return false;
1612309124Sdim
1613360784Sdim  Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1614321369Sdim  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
1615309124Sdim  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1616309124Sdim    .addReg(TmpR, RegState::Kill);
1617309124Sdim
1618309124Sdim  NewRegs.push_back(TmpR);
1619309124Sdim  B.erase(It);
1620309124Sdim  return true;
1621309124Sdim}
1622309124Sdim
1623309124Sdimbool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1624309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1625309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1626309124Sdim  MachineInstr *MI = &*It;
1627314564Sdim  if (!MI->getOperand(0).isFI())
1628314564Sdim    return false;
1629314564Sdim
1630309124Sdim  DebugLoc DL = MI->getDebugLoc();
1631309124Sdim  unsigned Opc = MI->getOpcode();
1632360784Sdim  Register SrcR = MI->getOperand(2).getReg();
1633309124Sdim  bool IsKill = MI->getOperand(2).isKill();
1634309124Sdim  int FI = MI->getOperand(0).getIndex();
1635309124Sdim
1636309124Sdim  // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1637309124Sdim  // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1638360784Sdim  Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1639309124Sdim  unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1640309124Sdim                                                 : Hexagon::A2_tfrcrr;
1641309124Sdim  BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1642309124Sdim    .addReg(SrcR, getKillRegState(IsKill));
1643309124Sdim
1644309124Sdim  // S2_storeri_io FI, 0, TmpR
1645309124Sdim  BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1646344779Sdim      .addFrameIndex(FI)
1647344779Sdim      .addImm(0)
1648344779Sdim      .addReg(TmpR, RegState::Kill)
1649344779Sdim      .cloneMemRefs(*MI);
1650309124Sdim
1651309124Sdim  NewRegs.push_back(TmpR);
1652309124Sdim  B.erase(It);
1653309124Sdim  return true;
1654309124Sdim}
1655309124Sdim
1656309124Sdimbool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1657309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1658309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1659309124Sdim  MachineInstr *MI = &*It;
1660314564Sdim  if (!MI->getOperand(1).isFI())
1661314564Sdim    return false;
1662314564Sdim
1663309124Sdim  DebugLoc DL = MI->getDebugLoc();
1664309124Sdim  unsigned Opc = MI->getOpcode();
1665360784Sdim  Register DstR = MI->getOperand(0).getReg();
1666309124Sdim  int FI = MI->getOperand(1).getIndex();
1667309124Sdim
1668309124Sdim  // TmpR = L2_loadri_io FI, 0
1669360784Sdim  Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1670309124Sdim  BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1671344779Sdim      .addFrameIndex(FI)
1672344779Sdim      .addImm(0)
1673344779Sdim      .cloneMemRefs(*MI);
1674309124Sdim
1675309124Sdim  // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1676309124Sdim  // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1677309124Sdim  unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1678309124Sdim                                                 : Hexagon::A2_tfrrcr;
1679309124Sdim  BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1680309124Sdim    .addReg(TmpR, RegState::Kill);
1681309124Sdim
1682309124Sdim  NewRegs.push_back(TmpR);
1683309124Sdim  B.erase(It);
1684309124Sdim  return true;
1685309124Sdim}
1686309124Sdim
1687309124Sdimbool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1688309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1689309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1690309124Sdim  MachineInstr *MI = &*It;
1691314564Sdim  if (!MI->getOperand(0).isFI())
1692314564Sdim    return false;
1693314564Sdim
1694309124Sdim  DebugLoc DL = MI->getDebugLoc();
1695360784Sdim  Register SrcR = MI->getOperand(2).getReg();
1696309124Sdim  bool IsKill = MI->getOperand(2).isKill();
1697309124Sdim  int FI = MI->getOperand(0).getIndex();
1698327952Sdim  auto *RC = &Hexagon::HvxVRRegClass;
1699309124Sdim
1700309124Sdim  // Insert transfer to general vector register.
1701309124Sdim  //   TmpR0 = A2_tfrsi 0x01010101
1702309124Sdim  //   TmpR1 = V6_vandqrt Qx, TmpR0
1703309124Sdim  //   store FI, 0, TmpR1
1704360784Sdim  Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1705360784Sdim  Register TmpR1 = MRI.createVirtualRegister(RC);
1706309124Sdim
1707309124Sdim  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1708309124Sdim    .addImm(0x01010101);
1709309124Sdim
1710327952Sdim  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
1711309124Sdim    .addReg(SrcR, getKillRegState(IsKill))
1712309124Sdim    .addReg(TmpR0, RegState::Kill);
1713309124Sdim
1714309124Sdim  auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1715309124Sdim  HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI);
1716309124Sdim  expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1717309124Sdim
1718309124Sdim  NewRegs.push_back(TmpR0);
1719309124Sdim  NewRegs.push_back(TmpR1);
1720309124Sdim  B.erase(It);
1721309124Sdim  return true;
1722309124Sdim}
1723309124Sdim
1724309124Sdimbool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1725309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1726309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1727309124Sdim  MachineInstr *MI = &*It;
1728314564Sdim  if (!MI->getOperand(1).isFI())
1729314564Sdim    return false;
1730314564Sdim
1731309124Sdim  DebugLoc DL = MI->getDebugLoc();
1732360784Sdim  Register DstR = MI->getOperand(0).getReg();
1733309124Sdim  int FI = MI->getOperand(1).getIndex();
1734327952Sdim  auto *RC = &Hexagon::HvxVRRegClass;
1735309124Sdim
1736309124Sdim  // TmpR0 = A2_tfrsi 0x01010101
1737309124Sdim  // TmpR1 = load FI, 0
1738309124Sdim  // DstR = V6_vandvrt TmpR1, TmpR0
1739360784Sdim  Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1740360784Sdim  Register TmpR1 = MRI.createVirtualRegister(RC);
1741309124Sdim
1742309124Sdim  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1743309124Sdim    .addImm(0x01010101);
1744327952Sdim  MachineFunction &MF = *B.getParent();
1745327952Sdim  auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1746309124Sdim  HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI);
1747309124Sdim  expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1748309124Sdim
1749327952Sdim  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
1750309124Sdim    .addReg(TmpR1, RegState::Kill)
1751309124Sdim    .addReg(TmpR0, RegState::Kill);
1752309124Sdim
1753309124Sdim  NewRegs.push_back(TmpR0);
1754309124Sdim  NewRegs.push_back(TmpR1);
1755309124Sdim  B.erase(It);
1756309124Sdim  return true;
1757309124Sdim}
1758309124Sdim
1759309124Sdimbool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1760309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1761309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1762309124Sdim  MachineFunction &MF = *B.getParent();
1763314564Sdim  auto &MFI = MF.getFrameInfo();
1764309124Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1765309124Sdim  MachineInstr *MI = &*It;
1766314564Sdim  if (!MI->getOperand(0).isFI())
1767314564Sdim    return false;
1768314564Sdim
1769314564Sdim  // It is possible that the double vector being stored is only partially
1770314564Sdim  // defined. From the point of view of the liveness tracking, it is ok to
1771314564Sdim  // store it as a whole, but if we break it up we may end up storing a
1772314564Sdim  // register that is entirely undefined.
1773321369Sdim  LivePhysRegs LPR(HRI);
1774314564Sdim  LPR.addLiveIns(B);
1775344779Sdim  SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;
1776321369Sdim  for (auto R = B.begin(); R != It; ++R) {
1777321369Sdim    Clobbers.clear();
1778314564Sdim    LPR.stepForward(*R, Clobbers);
1779321369Sdim  }
1780314564Sdim
1781309124Sdim  DebugLoc DL = MI->getDebugLoc();
1782360784Sdim  Register SrcR = MI->getOperand(2).getReg();
1783360784Sdim  Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
1784360784Sdim  Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
1785309124Sdim  bool IsKill = MI->getOperand(2).isKill();
1786309124Sdim  int FI = MI->getOperand(0).getIndex();
1787360784Sdim  bool NeedsAligna = needsAligna(MF);
1788309124Sdim
1789327952Sdim  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1790327952Sdim  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1791309124Sdim  unsigned HasAlign = MFI.getObjectAlignment(FI);
1792309124Sdim  unsigned StoreOpc;
1793309124Sdim
1794360784Sdim  auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
1795360784Sdim    return !NeedsAligna && (NeedAlign <= HasAlign);
1796360784Sdim  };
1797360784Sdim
1798309124Sdim  // Store low part.
1799314564Sdim  if (LPR.contains(SrcLo)) {
1800360784Sdim    StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
1801360784Sdim                                               : Hexagon::V6_vS32Ub_ai;
1802314564Sdim    BuildMI(B, It, DL, HII.get(StoreOpc))
1803344779Sdim        .addFrameIndex(FI)
1804344779Sdim        .addImm(0)
1805344779Sdim        .addReg(SrcLo, getKillRegState(IsKill))
1806344779Sdim        .cloneMemRefs(*MI);
1807314564Sdim  }
1808309124Sdim
1809314564Sdim  // Store high part.
1810314564Sdim  if (LPR.contains(SrcHi)) {
1811360784Sdim    StoreOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vS32b_ai
1812360784Sdim                                               : Hexagon::V6_vS32Ub_ai;
1813314564Sdim    BuildMI(B, It, DL, HII.get(StoreOpc))
1814344779Sdim        .addFrameIndex(FI)
1815344779Sdim        .addImm(Size)
1816344779Sdim        .addReg(SrcHi, getKillRegState(IsKill))
1817344779Sdim        .cloneMemRefs(*MI);
1818314564Sdim  }
1819309124Sdim
1820309124Sdim  B.erase(It);
1821309124Sdim  return true;
1822309124Sdim}
1823309124Sdim
1824309124Sdimbool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1825309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1826309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1827309124Sdim  MachineFunction &MF = *B.getParent();
1828314564Sdim  auto &MFI = MF.getFrameInfo();
1829309124Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1830309124Sdim  MachineInstr *MI = &*It;
1831314564Sdim  if (!MI->getOperand(1).isFI())
1832314564Sdim    return false;
1833314564Sdim
1834309124Sdim  DebugLoc DL = MI->getDebugLoc();
1835360784Sdim  Register DstR = MI->getOperand(0).getReg();
1836360784Sdim  Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
1837360784Sdim  Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
1838309124Sdim  int FI = MI->getOperand(1).getIndex();
1839360784Sdim  bool NeedsAligna = needsAligna(MF);
1840309124Sdim
1841327952Sdim  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1842327952Sdim  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1843309124Sdim  unsigned HasAlign = MFI.getObjectAlignment(FI);
1844309124Sdim  unsigned LoadOpc;
1845309124Sdim
1846360784Sdim  auto UseAligned = [&] (unsigned NeedAlign, unsigned HasAlign) {
1847360784Sdim    return !NeedsAligna && (NeedAlign <= HasAlign);
1848360784Sdim  };
1849360784Sdim
1850309124Sdim  // Load low part.
1851360784Sdim  LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
1852360784Sdim                                            : Hexagon::V6_vL32Ub_ai;
1853309124Sdim  BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1854344779Sdim      .addFrameIndex(FI)
1855344779Sdim      .addImm(0)
1856344779Sdim      .cloneMemRefs(*MI);
1857309124Sdim
1858309124Sdim  // Load high part.
1859360784Sdim  LoadOpc = UseAligned(NeedAlign, HasAlign) ? Hexagon::V6_vL32b_ai
1860360784Sdim                                            : Hexagon::V6_vL32Ub_ai;
1861309124Sdim  BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1862344779Sdim      .addFrameIndex(FI)
1863344779Sdim      .addImm(Size)
1864344779Sdim      .cloneMemRefs(*MI);
1865309124Sdim
1866309124Sdim  B.erase(It);
1867309124Sdim  return true;
1868309124Sdim}
1869309124Sdim
1870309124Sdimbool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1871309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1872309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1873309124Sdim  MachineFunction &MF = *B.getParent();
1874314564Sdim  auto &MFI = MF.getFrameInfo();
1875309124Sdim  MachineInstr *MI = &*It;
1876314564Sdim  if (!MI->getOperand(0).isFI())
1877314564Sdim    return false;
1878314564Sdim
1879360784Sdim  bool NeedsAligna = needsAligna(MF);
1880327952Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1881309124Sdim  DebugLoc DL = MI->getDebugLoc();
1882360784Sdim  Register SrcR = MI->getOperand(2).getReg();
1883309124Sdim  bool IsKill = MI->getOperand(2).isKill();
1884309124Sdim  int FI = MI->getOperand(0).getIndex();
1885309124Sdim
1886327952Sdim  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1887309124Sdim  unsigned HasAlign = MFI.getObjectAlignment(FI);
1888360784Sdim  bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
1889360784Sdim  unsigned StoreOpc = UseAligned ? Hexagon::V6_vS32b_ai
1890360784Sdim                                 : Hexagon::V6_vS32Ub_ai;
1891309124Sdim  BuildMI(B, It, DL, HII.get(StoreOpc))
1892344779Sdim      .addFrameIndex(FI)
1893344779Sdim      .addImm(0)
1894344779Sdim      .addReg(SrcR, getKillRegState(IsKill))
1895344779Sdim      .cloneMemRefs(*MI);
1896309124Sdim
1897309124Sdim  B.erase(It);
1898309124Sdim  return true;
1899309124Sdim}
1900309124Sdim
1901309124Sdimbool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
1902309124Sdim      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1903309124Sdim      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1904309124Sdim  MachineFunction &MF = *B.getParent();
1905314564Sdim  auto &MFI = MF.getFrameInfo();
1906309124Sdim  MachineInstr *MI = &*It;
1907314564Sdim  if (!MI->getOperand(1).isFI())
1908314564Sdim    return false;
1909314564Sdim
1910360784Sdim  bool NeedsAligna = needsAligna(MF);
1911327952Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1912309124Sdim  DebugLoc DL = MI->getDebugLoc();
1913360784Sdim  Register DstR = MI->getOperand(0).getReg();
1914309124Sdim  int FI = MI->getOperand(1).getIndex();
1915309124Sdim
1916327952Sdim  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1917309124Sdim  unsigned HasAlign = MFI.getObjectAlignment(FI);
1918360784Sdim  bool UseAligned = !NeedsAligna && (NeedAlign <= HasAlign);
1919360784Sdim  unsigned LoadOpc = UseAligned ? Hexagon::V6_vL32b_ai
1920360784Sdim                                : Hexagon::V6_vL32Ub_ai;
1921309124Sdim  BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
1922344779Sdim      .addFrameIndex(FI)
1923344779Sdim      .addImm(0)
1924344779Sdim      .cloneMemRefs(*MI);
1925309124Sdim
1926309124Sdim  B.erase(It);
1927309124Sdim  return true;
1928309124Sdim}
1929309124Sdim
1930309124Sdimbool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
1931309124Sdim      SmallVectorImpl<unsigned> &NewRegs) const {
1932327952Sdim  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1933309124Sdim  MachineRegisterInfo &MRI = MF.getRegInfo();
1934309124Sdim  bool Changed = false;
1935309124Sdim
1936309124Sdim  for (auto &B : MF) {
1937309124Sdim    // Traverse the basic block.
1938309124Sdim    MachineBasicBlock::iterator NextI;
1939309124Sdim    for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {
1940309124Sdim      MachineInstr *MI = &*I;
1941309124Sdim      NextI = std::next(I);
1942309124Sdim      unsigned Opc = MI->getOpcode();
1943309124Sdim
1944309124Sdim      switch (Opc) {
1945309124Sdim        case TargetOpcode::COPY:
1946309124Sdim          Changed |= expandCopy(B, I, MRI, HII, NewRegs);
1947309124Sdim          break;
1948309124Sdim        case Hexagon::STriw_pred:
1949341825Sdim        case Hexagon::STriw_ctr:
1950309124Sdim          Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
1951309124Sdim          break;
1952309124Sdim        case Hexagon::LDriw_pred:
1953341825Sdim        case Hexagon::LDriw_ctr:
1954309124Sdim          Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
1955309124Sdim          break;
1956314564Sdim        case Hexagon::PS_vstorerq_ai:
1957309124Sdim          Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
1958309124Sdim          break;
1959314564Sdim        case Hexagon::PS_vloadrq_ai:
1960309124Sdim          Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
1961309124Sdim          break;
1962314564Sdim        case Hexagon::PS_vloadrw_ai:
1963309124Sdim          Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
1964309124Sdim          break;
1965314564Sdim        case Hexagon::PS_vstorerw_ai:
1966309124Sdim          Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
1967309124Sdim          break;
1968309124Sdim      }
1969309124Sdim    }
1970309124Sdim  }
1971309124Sdim
1972309124Sdim  return Changed;
1973309124Sdim}
1974309124Sdim
1975309124Sdimvoid HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
1976309124Sdim                                                BitVector &SavedRegs,
1977309124Sdim                                                RegScavenger *RS) const {
1978327952Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1979309124Sdim
1980309124Sdim  SavedRegs.resize(HRI.getNumRegs());
1981309124Sdim
1982309124Sdim  // If we have a function containing __builtin_eh_return we want to spill and
1983309124Sdim  // restore all callee saved registers. Pretend that they are used.
1984309124Sdim  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
1985309124Sdim    for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)
1986309124Sdim      SavedRegs.set(*R);
1987309124Sdim
1988309124Sdim  // Replace predicate register pseudo spill code.
1989309124Sdim  SmallVector<unsigned,8> NewRegs;
1990309124Sdim  expandSpillMacros(MF, NewRegs);
1991309124Sdim  if (OptimizeSpillSlots && !isOptNone(MF))
1992309124Sdim    optimizeSpillSlots(MF, NewRegs);
1993309124Sdim
1994341825Sdim  // We need to reserve a spill slot if scavenging could potentially require
1995309124Sdim  // spilling a scavenged register.
1996314564Sdim  if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
1997314564Sdim    MachineFrameInfo &MFI = MF.getFrameInfo();
1998309124Sdim    MachineRegisterInfo &MRI = MF.getRegInfo();
1999309124Sdim    SetVector<const TargetRegisterClass*> SpillRCs;
2000309124Sdim    // Reserve an int register in any case, because it could be used to hold
2001309124Sdim    // the stack offset in case it does not fit into a spill instruction.
2002309124Sdim    SpillRCs.insert(&Hexagon::IntRegsRegClass);
2003309124Sdim
2004309124Sdim    for (unsigned VR : NewRegs)
2005309124Sdim      SpillRCs.insert(MRI.getRegClass(VR));
2006309124Sdim
2007309124Sdim    for (auto *RC : SpillRCs) {
2008309124Sdim      if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
2009309124Sdim        continue;
2010360784Sdim      unsigned Num = 1;
2011360784Sdim      switch (RC->getID()) {
2012360784Sdim        case Hexagon::IntRegsRegClassID:
2013360784Sdim          Num = NumberScavengerSlots;
2014360784Sdim          break;
2015360784Sdim        case Hexagon::HvxQRRegClassID:
2016360784Sdim          Num = 2; // Vector predicate spills also need a vector register.
2017360784Sdim          break;
2018360784Sdim      }
2019321369Sdim      unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
2020309124Sdim      for (unsigned i = 0; i < Num; i++) {
2021309124Sdim        int NewFI = MFI.CreateSpillStackObject(S, A);
2022309124Sdim        RS->addScavengingFrameIndex(NewFI);
2023309124Sdim      }
2024309124Sdim    }
2025309124Sdim  }
2026309124Sdim
2027309124Sdim  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2028309124Sdim}
2029309124Sdim
2030309124Sdimunsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
2031309124Sdim      HexagonBlockRanges::IndexRange &FIR,
2032309124Sdim      HexagonBlockRanges::InstrIndexMap &IndexMap,
2033309124Sdim      HexagonBlockRanges::RegToRangeMap &DeadMap,
2034309124Sdim      const TargetRegisterClass *RC) const {
2035309124Sdim  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2036309124Sdim  auto &MRI = MF.getRegInfo();
2037309124Sdim
2038309124Sdim  auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
2039309124Sdim    auto F = DeadMap.find({Reg,0});
2040309124Sdim    if (F == DeadMap.end())
2041309124Sdim      return false;
2042309124Sdim    for (auto &DR : F->second)
2043309124Sdim      if (DR.contains(FIR))
2044309124Sdim        return true;
2045309124Sdim    return false;
2046309124Sdim  };
2047309124Sdim
2048309124Sdim  for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
2049309124Sdim    bool Dead = true;
2050309124Sdim    for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
2051309124Sdim      if (isDead(R.Reg))
2052309124Sdim        continue;
2053309124Sdim      Dead = false;
2054309124Sdim      break;
2055309124Sdim    }
2056309124Sdim    if (Dead)
2057309124Sdim      return Reg;
2058309124Sdim  }
2059309124Sdim  return 0;
2060309124Sdim}
2061309124Sdim
2062309124Sdimvoid HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
2063309124Sdim      SmallVectorImpl<unsigned> &VRegs) const {
2064309124Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2065309124Sdim  auto &HII = *HST.getInstrInfo();
2066309124Sdim  auto &HRI = *HST.getRegisterInfo();
2067309124Sdim  auto &MRI = MF.getRegInfo();
2068309124Sdim  HexagonBlockRanges HBR(MF);
2069309124Sdim
2070327952Sdim  using BlockIndexMap =
2071327952Sdim      std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
2072327952Sdim  using BlockRangeMap =
2073327952Sdim      std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
2074327952Sdim  using IndexType = HexagonBlockRanges::IndexType;
2075309124Sdim
2076309124Sdim  struct SlotInfo {
2077309124Sdim    BlockRangeMap Map;
2078314564Sdim    unsigned Size = 0;
2079314564Sdim    const TargetRegisterClass *RC = nullptr;
2080309124Sdim
2081314564Sdim    SlotInfo() = default;
2082309124Sdim  };
2083309124Sdim
2084309124Sdim  BlockIndexMap BlockIndexes;
2085309124Sdim  SmallSet<int,4> BadFIs;
2086309124Sdim  std::map<int,SlotInfo> FIRangeMap;
2087309124Sdim
2088309124Sdim  // Accumulate register classes: get a common class for a pre-existing
2089309124Sdim  // class HaveRC and a new class NewRC. Return nullptr if a common class
2090309124Sdim  // cannot be found, otherwise return the resulting class. If HaveRC is
2091309124Sdim  // nullptr, assume that it is still unset.
2092321369Sdim  auto getCommonRC =
2093321369Sdim      [](const TargetRegisterClass *HaveRC,
2094321369Sdim         const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
2095309124Sdim    if (HaveRC == nullptr || HaveRC == NewRC)
2096309124Sdim      return NewRC;
2097309124Sdim    // Different classes, both non-null. Pick the more general one.
2098309124Sdim    if (HaveRC->hasSubClassEq(NewRC))
2099309124Sdim      return HaveRC;
2100309124Sdim    if (NewRC->hasSubClassEq(HaveRC))
2101309124Sdim      return NewRC;
2102309124Sdim    return nullptr;
2103309124Sdim  };
2104309124Sdim
2105309124Sdim  // Scan all blocks in the function. Check all occurrences of frame indexes,
2106309124Sdim  // and collect relevant information.
2107309124Sdim  for (auto &B : MF) {
2108309124Sdim    std::map<int,IndexType> LastStore, LastLoad;
2109309124Sdim    // Emplace appears not to be supported in gcc 4.7.2-4.
2110309124Sdim    //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2111309124Sdim    auto P = BlockIndexes.insert(
2112309124Sdim                std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
2113309124Sdim    auto &IndexMap = P.first->second;
2114341825Sdim    LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
2115341825Sdim                      << IndexMap << '\n');
2116309124Sdim
2117309124Sdim    for (auto &In : B) {
2118309124Sdim      int LFI, SFI;
2119309124Sdim      bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
2120309124Sdim      bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
2121309124Sdim      if (Load && Store) {
2122309124Sdim        // If it's both a load and a store, then we won't handle it.
2123309124Sdim        BadFIs.insert(LFI);
2124309124Sdim        BadFIs.insert(SFI);
2125309124Sdim        continue;
2126309124Sdim      }
2127309124Sdim      // Check for register classes of the register used as the source for
2128309124Sdim      // the store, and the register used as the destination for the load.
2129309124Sdim      // Also, only accept base+imm_offset addressing modes. Other addressing
2130309124Sdim      // modes can have side-effects (post-increments, etc.). For stack
2131309124Sdim      // slots they are very unlikely, so there is not much loss due to
2132309124Sdim      // this restriction.
2133309124Sdim      if (Load || Store) {
2134309124Sdim        int TFI = Load ? LFI : SFI;
2135314564Sdim        unsigned AM = HII.getAddrMode(In);
2136309124Sdim        SlotInfo &SI = FIRangeMap[TFI];
2137309124Sdim        bool Bad = (AM != HexagonII::BaseImmOffset);
2138309124Sdim        if (!Bad) {
2139309124Sdim          // If the addressing mode is ok, check the register class.
2140314564Sdim          unsigned OpNum = Load ? 0 : 2;
2141314564Sdim          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
2142309124Sdim          RC = getCommonRC(SI.RC, RC);
2143309124Sdim          if (RC == nullptr)
2144309124Sdim            Bad = true;
2145309124Sdim          else
2146309124Sdim            SI.RC = RC;
2147309124Sdim        }
2148309124Sdim        if (!Bad) {
2149309124Sdim          // Check sizes.
2150327952Sdim          unsigned S = HII.getMemAccessSize(In);
2151309124Sdim          if (SI.Size != 0 && SI.Size != S)
2152309124Sdim            Bad = true;
2153309124Sdim          else
2154309124Sdim            SI.Size = S;
2155309124Sdim        }
2156314564Sdim        if (!Bad) {
2157314564Sdim          for (auto *Mo : In.memoperands()) {
2158353358Sdim            if (!Mo->isVolatile() && !Mo->isAtomic())
2159314564Sdim              continue;
2160314564Sdim            Bad = true;
2161314564Sdim            break;
2162314564Sdim          }
2163314564Sdim        }
2164309124Sdim        if (Bad)
2165309124Sdim          BadFIs.insert(TFI);
2166309124Sdim      }
2167309124Sdim
2168309124Sdim      // Locate uses of frame indices.
2169309124Sdim      for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
2170309124Sdim        const MachineOperand &Op = In.getOperand(i);
2171309124Sdim        if (!Op.isFI())
2172309124Sdim          continue;
2173309124Sdim        int FI = Op.getIndex();
2174309124Sdim        // Make sure that the following operand is an immediate and that
2175309124Sdim        // it is 0. This is the offset in the stack object.
2176309124Sdim        if (i+1 >= n || !In.getOperand(i+1).isImm() ||
2177309124Sdim            In.getOperand(i+1).getImm() != 0)
2178309124Sdim          BadFIs.insert(FI);
2179309124Sdim        if (BadFIs.count(FI))
2180309124Sdim          continue;
2181309124Sdim
2182309124Sdim        IndexType Index = IndexMap.getIndex(&In);
2183309124Sdim        if (Load) {
2184309124Sdim          if (LastStore[FI] == IndexType::None)
2185309124Sdim            LastStore[FI] = IndexType::Entry;
2186309124Sdim          LastLoad[FI] = Index;
2187309124Sdim        } else if (Store) {
2188309124Sdim          HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2189309124Sdim          if (LastStore[FI] != IndexType::None)
2190309124Sdim            RL.add(LastStore[FI], LastLoad[FI], false, false);
2191309124Sdim          else if (LastLoad[FI] != IndexType::None)
2192309124Sdim            RL.add(IndexType::Entry, LastLoad[FI], false, false);
2193309124Sdim          LastLoad[FI] = IndexType::None;
2194309124Sdim          LastStore[FI] = Index;
2195309124Sdim        } else {
2196309124Sdim          BadFIs.insert(FI);
2197309124Sdim        }
2198309124Sdim      }
2199309124Sdim    }
2200309124Sdim
2201309124Sdim    for (auto &I : LastLoad) {
2202309124Sdim      IndexType LL = I.second;
2203309124Sdim      if (LL == IndexType::None)
2204309124Sdim        continue;
2205309124Sdim      auto &RL = FIRangeMap[I.first].Map[&B];
2206309124Sdim      IndexType &LS = LastStore[I.first];
2207309124Sdim      if (LS != IndexType::None)
2208309124Sdim        RL.add(LS, LL, false, false);
2209309124Sdim      else
2210309124Sdim        RL.add(IndexType::Entry, LL, false, false);
2211309124Sdim      LS = IndexType::None;
2212309124Sdim    }
2213309124Sdim    for (auto &I : LastStore) {
2214309124Sdim      IndexType LS = I.second;
2215309124Sdim      if (LS == IndexType::None)
2216309124Sdim        continue;
2217309124Sdim      auto &RL = FIRangeMap[I.first].Map[&B];
2218309124Sdim      RL.add(LS, IndexType::None, false, false);
2219309124Sdim    }
2220309124Sdim  }
2221309124Sdim
2222341825Sdim  LLVM_DEBUG({
2223309124Sdim    for (auto &P : FIRangeMap) {
2224309124Sdim      dbgs() << "fi#" << P.first;
2225309124Sdim      if (BadFIs.count(P.first))
2226309124Sdim        dbgs() << " (bad)";
2227309124Sdim      dbgs() << "  RC: ";
2228309124Sdim      if (P.second.RC != nullptr)
2229309124Sdim        dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2230309124Sdim      else
2231309124Sdim        dbgs() << "<null>\n";
2232309124Sdim      for (auto &R : P.second.Map)
2233327952Sdim        dbgs() << "  " << printMBBReference(*R.first) << " { " << R.second
2234327952Sdim               << "}\n";
2235309124Sdim    }
2236309124Sdim  });
2237309124Sdim
2238309124Sdim  // When a slot is loaded from in a block without being stored to in the
2239309124Sdim  // same block, it is live-on-entry to this block. To avoid CFG analysis,
2240309124Sdim  // consider this slot to be live-on-exit from all blocks.
2241309124Sdim  SmallSet<int,4> LoxFIs;
2242309124Sdim
2243309124Sdim  std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2244309124Sdim
2245309124Sdim  for (auto &P : FIRangeMap) {
2246309124Sdim    // P = pair(FI, map: BB->RangeList)
2247309124Sdim    if (BadFIs.count(P.first))
2248309124Sdim      continue;
2249309124Sdim    for (auto &B : MF) {
2250309124Sdim      auto F = P.second.Map.find(&B);
2251309124Sdim      // F = pair(BB, RangeList)
2252309124Sdim      if (F == P.second.Map.end() || F->second.empty())
2253309124Sdim        continue;
2254309124Sdim      HexagonBlockRanges::IndexRange &IR = F->second.front();
2255309124Sdim      if (IR.start() == IndexType::Entry)
2256309124Sdim        LoxFIs.insert(P.first);
2257309124Sdim      BlockFIMap[&B].push_back(P.first);
2258309124Sdim    }
2259309124Sdim  }
2260309124Sdim
2261341825Sdim  LLVM_DEBUG({
2262309124Sdim    dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2263309124Sdim    for (auto &P : BlockFIMap) {
2264309124Sdim      auto &FIs = P.second;
2265309124Sdim      if (FIs.empty())
2266309124Sdim        continue;
2267327952Sdim      dbgs() << "  " << printMBBReference(*P.first) << ": {";
2268309124Sdim      for (auto I : FIs) {
2269309124Sdim        dbgs() << " fi#" << I;
2270309124Sdim        if (LoxFIs.count(I))
2271309124Sdim          dbgs() << '*';
2272309124Sdim      }
2273309124Sdim      dbgs() << " }\n";
2274309124Sdim    }
2275309124Sdim  });
2276309124Sdim
2277314564Sdim#ifndef NDEBUG
2278314564Sdim  bool HasOptLimit = SpillOptMax.getPosition();
2279314564Sdim#endif
2280314564Sdim
2281309124Sdim  // eliminate loads, when all loads eliminated, eliminate all stores.
2282309124Sdim  for (auto &B : MF) {
2283309124Sdim    auto F = BlockIndexes.find(&B);
2284309124Sdim    assert(F != BlockIndexes.end());
2285309124Sdim    HexagonBlockRanges::InstrIndexMap &IM = F->second;
2286309124Sdim    HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2287309124Sdim    HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2288341825Sdim    LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
2289341825Sdim                      << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2290309124Sdim
2291309124Sdim    for (auto FI : BlockFIMap[&B]) {
2292309124Sdim      if (BadFIs.count(FI))
2293309124Sdim        continue;
2294341825Sdim      LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2295309124Sdim      HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2296309124Sdim      for (auto &Range : RL) {
2297341825Sdim        LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2298309124Sdim        if (!IndexType::isInstr(Range.start()) ||
2299309124Sdim            !IndexType::isInstr(Range.end()))
2300309124Sdim          continue;
2301314564Sdim        MachineInstr &SI = *IM.getInstr(Range.start());
2302314564Sdim        MachineInstr &EI = *IM.getInstr(Range.end());
2303314564Sdim        assert(SI.mayStore() && "Unexpected start instruction");
2304314564Sdim        assert(EI.mayLoad() && "Unexpected end instruction");
2305314564Sdim        MachineOperand &SrcOp = SI.getOperand(2);
2306309124Sdim
2307309124Sdim        HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2308309124Sdim                                                  SrcOp.getSubReg() };
2309314564Sdim        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
2310309124Sdim        // The this-> is needed to unconfuse MSVC.
2311309124Sdim        unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2312341825Sdim        LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
2313341825Sdim                          << '\n');
2314309124Sdim        if (FoundR == 0)
2315309124Sdim          continue;
2316314564Sdim#ifndef NDEBUG
2317314564Sdim        if (HasOptLimit) {
2318314564Sdim          if (SpillOptCount >= SpillOptMax)
2319314564Sdim            return;
2320314564Sdim          SpillOptCount++;
2321314564Sdim        }
2322314564Sdim#endif
2323309124Sdim
2324309124Sdim        // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2325314564Sdim        MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2326309124Sdim        MachineInstr *CopyIn = nullptr;
2327309124Sdim        if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
2328314564Sdim          const DebugLoc &DL = SI.getDebugLoc();
2329309124Sdim          CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2330321369Sdim                       .add(SrcOp);
2331309124Sdim        }
2332309124Sdim
2333309124Sdim        ++StartIt;
2334309124Sdim        // Check if this is a last store and the FI is live-on-exit.
2335309124Sdim        if (LoxFIs.count(FI) && (&Range == &RL.back())) {
2336309124Sdim          // Update store's source register.
2337309124Sdim          if (unsigned SR = SrcOp.getSubReg())
2338309124Sdim            SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2339309124Sdim          else
2340309124Sdim            SrcOp.setReg(FoundR);
2341309124Sdim          SrcOp.setSubReg(0);
2342309124Sdim          // We are keeping this register live.
2343309124Sdim          SrcOp.setIsKill(false);
2344309124Sdim        } else {
2345314564Sdim          B.erase(&SI);
2346314564Sdim          IM.replaceInstr(&SI, CopyIn);
2347309124Sdim        }
2348309124Sdim
2349314564Sdim        auto EndIt = std::next(EI.getIterator());
2350309124Sdim        for (auto It = StartIt; It != EndIt; It = NextIt) {
2351314564Sdim          MachineInstr &MI = *It;
2352309124Sdim          NextIt = std::next(It);
2353309124Sdim          int TFI;
2354314564Sdim          if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
2355309124Sdim            continue;
2356360784Sdim          Register DstR = MI.getOperand(0).getReg();
2357314564Sdim          assert(MI.getOperand(0).getSubReg() == 0);
2358309124Sdim          MachineInstr *CopyOut = nullptr;
2359309124Sdim          if (DstR != FoundR) {
2360314564Sdim            DebugLoc DL = MI.getDebugLoc();
2361327952Sdim            unsigned MemSize = HII.getMemAccessSize(MI);
2362309124Sdim            assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2363309124Sdim            unsigned CopyOpc = TargetOpcode::COPY;
2364314564Sdim            if (HII.isSignExtendingLoad(MI))
2365309124Sdim              CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2366314564Sdim            else if (HII.isZeroExtendingLoad(MI))
2367309124Sdim              CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2368309124Sdim            CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2369314564Sdim                        .addReg(FoundR, getKillRegState(&MI == &EI));
2370309124Sdim          }
2371314564Sdim          IM.replaceInstr(&MI, CopyOut);
2372309124Sdim          B.erase(It);
2373309124Sdim        }
2374309124Sdim
2375309124Sdim        // Update the dead map.
2376309124Sdim        HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2377309124Sdim        for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2378309124Sdim          DM[RR].subtract(Range);
2379309124Sdim      } // for Range in range list
2380309124Sdim    }
2381309124Sdim  }
2382309124Sdim}
2383309124Sdim
2384288943Sdimvoid HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2385288943Sdim      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
2386288943Sdim  MachineBasicBlock &MB = *AI->getParent();
2387288943Sdim  DebugLoc DL = AI->getDebugLoc();
2388288943Sdim  unsigned A = AI->getOperand(2).getImm();
2389288943Sdim
2390288943Sdim  // Have
2391288943Sdim  //    Rd  = alloca Rs, #A
2392288943Sdim  //
2393288943Sdim  // If Rs and Rd are different registers, use this sequence:
2394288943Sdim  //    Rd  = sub(r29, Rs)
2395288943Sdim  //    r29 = sub(r29, Rs)
2396288943Sdim  //    Rd  = and(Rd, #-A)    ; if necessary
2397288943Sdim  //    r29 = and(r29, #-A)   ; if necessary
2398288943Sdim  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2399288943Sdim  // otherwise, do
2400288943Sdim  //    Rd  = sub(r29, Rs)
2401288943Sdim  //    Rd  = and(Rd, #-A)    ; if necessary
2402288943Sdim  //    r29 = Rd
2403288943Sdim  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2404288943Sdim
2405288943Sdim  MachineOperand &RdOp = AI->getOperand(0);
2406288943Sdim  MachineOperand &RsOp = AI->getOperand(1);
2407288943Sdim  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
2408288943Sdim
2409288943Sdim  // Rd = sub(r29, Rs)
2410288943Sdim  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2411288943Sdim      .addReg(SP)
2412288943Sdim      .addReg(Rs);
2413288943Sdim  if (Rs != Rd) {
2414288943Sdim    // r29 = sub(r29, Rs)
2415288943Sdim    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2416288943Sdim        .addReg(SP)
2417288943Sdim        .addReg(Rs);
2418249423Sdim  }
2419288943Sdim  if (A > 8) {
2420288943Sdim    // Rd  = and(Rd, #-A)
2421288943Sdim    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2422288943Sdim        .addReg(Rd)
2423288943Sdim        .addImm(-int64_t(A));
2424288943Sdim    if (Rs != Rd)
2425288943Sdim      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2426288943Sdim          .addReg(SP)
2427288943Sdim          .addImm(-int64_t(A));
2428288943Sdim  }
2429288943Sdim  if (Rs == Rd) {
2430288943Sdim    // r29 = Rd
2431288943Sdim    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2432288943Sdim        .addReg(Rd);
2433288943Sdim  }
2434288943Sdim  if (CF > 0) {
2435288943Sdim    // Rd = add(Rd, #CF)
2436288943Sdim    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2437288943Sdim        .addReg(Rd)
2438288943Sdim        .addImm(CF);
2439288943Sdim  }
2440249423Sdim}
2441249423Sdim
2442288943Sdimbool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2443314564Sdim  const MachineFrameInfo &MFI = MF.getFrameInfo();
2444314564Sdim  if (!MFI.hasVarSizedObjects())
2445288943Sdim    return false;
2446360784Sdim  // Do not check for max stack object alignment here, because the stack
2447360784Sdim  // may not be complete yet. Assume that we will need PS_aligna if there
2448360784Sdim  // are variable-sized objects.
2449288943Sdim  return true;
2450234285Sdim}
2451288943Sdim
2452296417Sdimconst MachineInstr *HexagonFrameLowering::getAlignaInstr(
2453296417Sdim      const MachineFunction &MF) const {
2454288943Sdim  for (auto &B : MF)
2455288943Sdim    for (auto &I : B)
2456314564Sdim      if (I.getOpcode() == Hexagon::PS_aligna)
2457288943Sdim        return &I;
2458288943Sdim  return nullptr;
2459288943Sdim}
2460288943Sdim
2461309124Sdim/// Adds all callee-saved registers as implicit uses or defs to the
2462309124Sdim/// instruction.
2463309124Sdimvoid HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2464309124Sdim      const CSIVect &CSI, bool IsDef, bool IsKill) const {
2465309124Sdim  // Add the callee-saved registers as implicit uses.
2466309124Sdim  for (auto &R : CSI)
2467309124Sdim    MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2468288943Sdim}
2469288943Sdim
2470288943Sdim/// Determine whether the callee-saved register saves and restores should
2471288943Sdim/// be generated via inline code. If this function returns "true", inline
2472288943Sdim/// code will be generated. If this function returns "false", additional
2473288943Sdim/// checks are performed, which may still lead to the inline code.
2474321369Sdimbool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2475288943Sdim      const CSIVect &CSI) const {
2476288943Sdim  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2477288943Sdim    return true;
2478321369Sdim  if (!hasFP(MF))
2479321369Sdim    return true;
2480288943Sdim  if (!isOptSize(MF) && !isMinSize(MF))
2481288943Sdim    if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
2482288943Sdim      return true;
2483288943Sdim
2484288943Sdim  // Check if CSI only has double registers, and if the registers form
2485288943Sdim  // a contiguous block starting from D8.
2486288943Sdim  BitVector Regs(Hexagon::NUM_TARGET_REGS);
2487288943Sdim  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
2488288943Sdim    unsigned R = CSI[i].getReg();
2489288943Sdim    if (!Hexagon::DoubleRegsRegClass.contains(R))
2490288943Sdim      return true;
2491288943Sdim    Regs[R] = true;
2492288943Sdim  }
2493288943Sdim  int F = Regs.find_first();
2494288943Sdim  if (F != Hexagon::D8)
2495288943Sdim    return true;
2496288943Sdim  while (F >= 0) {
2497288943Sdim    int N = Regs.find_next(F);
2498288943Sdim    if (N >= 0 && N != F+1)
2499288943Sdim      return true;
2500288943Sdim    F = N;
2501288943Sdim  }
2502288943Sdim
2503288943Sdim  return false;
2504288943Sdim}
2505288943Sdim
2506321369Sdimbool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2507288943Sdim      const CSIVect &CSI) const {
2508288943Sdim  if (shouldInlineCSR(MF, CSI))
2509288943Sdim    return false;
2510288943Sdim  unsigned NumCSI = CSI.size();
2511288943Sdim  if (NumCSI <= 1)
2512288943Sdim    return false;
2513288943Sdim
2514288943Sdim  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
2515288943Sdim                                     : SpillFuncThreshold;
2516288943Sdim  return Threshold < NumCSI;
2517288943Sdim}
2518288943Sdim
2519321369Sdimbool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
2520288943Sdim      const CSIVect &CSI) const {
2521288943Sdim  if (shouldInlineCSR(MF, CSI))
2522288943Sdim    return false;
2523309124Sdim  // The restore functions do a bit more than just restoring registers.
2524309124Sdim  // The non-returning versions will go back directly to the caller's
2525309124Sdim  // caller, others will clean up the stack frame in preparation for
2526309124Sdim  // a tail call. Using them can still save code size even if only one
2527309124Sdim  // register is getting restores. Make the decision based on -Oz:
2528309124Sdim  // using -Os will use inline restore for a single register.
2529309124Sdim  if (isMinSize(MF))
2530309124Sdim    return true;
2531288943Sdim  unsigned NumCSI = CSI.size();
2532309124Sdim  if (NumCSI <= 1)
2533309124Sdim    return false;
2534309124Sdim
2535288943Sdim  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
2536288943Sdim                                     : SpillFuncThreshold;
2537288943Sdim  return Threshold < NumCSI;
2538288943Sdim}
2539314564Sdim
2540314564Sdimbool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
2541314564Sdim  unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
2542314564Sdim  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2543314564Sdim  // A fairly simplistic guess as to whether a potential load/store to a
2544321369Sdim  // stack location could require an extra register.
2545321369Sdim  if (HST.useHVXOps() && StackSize > 256)
2546321369Sdim    return true;
2547321369Sdim
2548321369Sdim  // Check if the function has store-immediate instructions that access
2549321369Sdim  // the stack. Since the offset field is not extendable, if the stack
2550321369Sdim  // size exceeds the offset limit (6 bits, shifted), the stores will
2551321369Sdim  // require a new base register.
2552321369Sdim  bool HasImmStack = false;
2553321369Sdim  unsigned MinLS = ~0u;   // Log_2 of the memory access size.
2554321369Sdim
2555321369Sdim  for (const MachineBasicBlock &B : MF) {
2556321369Sdim    for (const MachineInstr &MI : B) {
2557321369Sdim      unsigned LS = 0;
2558321369Sdim      switch (MI.getOpcode()) {
2559321369Sdim        case Hexagon::S4_storeirit_io:
2560321369Sdim        case Hexagon::S4_storeirif_io:
2561321369Sdim        case Hexagon::S4_storeiri_io:
2562321369Sdim          ++LS;
2563321369Sdim          LLVM_FALLTHROUGH;
2564321369Sdim        case Hexagon::S4_storeirht_io:
2565321369Sdim        case Hexagon::S4_storeirhf_io:
2566321369Sdim        case Hexagon::S4_storeirh_io:
2567321369Sdim          ++LS;
2568321369Sdim          LLVM_FALLTHROUGH;
2569321369Sdim        case Hexagon::S4_storeirbt_io:
2570321369Sdim        case Hexagon::S4_storeirbf_io:
2571321369Sdim        case Hexagon::S4_storeirb_io:
2572321369Sdim          if (MI.getOperand(0).isFI())
2573321369Sdim            HasImmStack = true;
2574321369Sdim          MinLS = std::min(MinLS, LS);
2575321369Sdim          break;
2576321369Sdim      }
2577321369Sdim    }
2578321369Sdim  }
2579321369Sdim
2580321369Sdim  if (HasImmStack)
2581321369Sdim    return !isUInt<6>(StackSize >> MinLS);
2582321369Sdim
2583314564Sdim  return false;
2584314564Sdim}
2585