HexagonFrameLowering.cpp revision 327952
1//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//
9//===----------------------------------------------------------------------===//
10
11#include "HexagonFrameLowering.h"
12#include "HexagonBlockRanges.h"
13#include "HexagonInstrInfo.h"
14#include "HexagonMachineFunctionInfo.h"
15#include "HexagonRegisterInfo.h"
16#include "HexagonSubtarget.h"
17#include "HexagonTargetMachine.h"
18#include "MCTargetDesc/HexagonBaseInfo.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/None.h"
22#include "llvm/ADT/Optional.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/SetVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/CodeGen/LivePhysRegs.h"
28#include "llvm/CodeGen/MachineBasicBlock.h"
29#include "llvm/CodeGen/MachineDominators.h"
30#include "llvm/CodeGen/MachineFrameInfo.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineFunctionPass.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineMemOperand.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineOperand.h"
38#include "llvm/CodeGen/MachinePostDominators.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/RegisterScavenging.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/Function.h"
45#include "llvm/MC/MCDwarf.h"
46#include "llvm/MC/MCRegisterInfo.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/CodeGen.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/Compiler.h"
51#include "llvm/Support/Debug.h"
52#include "llvm/Support/ErrorHandling.h"
53#include "llvm/Support/MathExtras.h"
54#include "llvm/Support/raw_ostream.h"
55#include "llvm/Target/TargetMachine.h"
56#include "llvm/Target/TargetOptions.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <limits>
62#include <map>
63#include <utility>
64#include <vector>
65
66#define DEBUG_TYPE "hexagon-pei"
67
68// Hexagon stack frame layout as defined by the ABI:
69//
70//                                                       Incoming arguments
71//                                                       passed via stack
72//                                                                      |
73//                                                                      |
74//        SP during function's                 FP during function's     |
75//    +-- runtime (top of stack)               runtime (bottom) --+     |
76//    |                                                           |     |
77// --++---------------------+------------------+-----------------++-+-------
78//   |  parameter area for  |  variable-size   |   fixed-size    |LR|  arg
79//   |   called functions   |  local objects   |  local objects  |FP|
80// --+----------------------+------------------+-----------------+--+-------
81//    <-    size known    -> <- size unknown -> <- size known  ->
82//
83// Low address                                                 High address
84//
85// <--- stack growth
86//
87//
88// - In any circumstances, the outgoing function arguments are always accessi-
89//   ble using the SP, and the incoming arguments are accessible using the FP.
90// - If the local objects are not aligned, they can always be accessed using
91//   the FP.
92// - If there are no variable-sized objects, the local objects can always be
93//   accessed using the SP, regardless whether they are aligned or not. (The
94//   alignment padding will be at the bottom of the stack (highest address),
95//   and so the offset with respect to the SP will be known at the compile-
96//   -time.)
97//
98// The only complication occurs if there are both, local aligned objects, and
99// dynamically allocated (variable-sized) objects. The alignment pad will be
100// placed between the FP and the local objects, thus preventing the use of the
101// FP to access the local objects. At the same time, the variable-sized objects
102// will be between the SP and the local objects, thus introducing an unknown
103// distance from the SP to the locals.
104//
105// To avoid this problem, a new register is created that holds the aligned
106// address of the bottom of the stack, referred in the sources as AP (aligned
107// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
108// that aligns AP to the required boundary (a maximum of the alignments of
109// all stack objects, fixed- and variable-sized). All local objects[1] will
110// then use AP as the base pointer.
111// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
112// their name from being allocated at fixed locations on the stack, relative
113// to the FP. In the presence of dynamic allocation and local alignment, such
114// objects can only be accessed through the FP.
115//
116// Illustration of the AP:
117//                                                                FP --+
118//                                                                     |
119// ---------------+---------------------+-----+-----------------------++-+--
120//   Rest of the  | Local stack objects | Pad |  Fixed stack objects  |LR|
121//   stack frame  | (aligned)           |     |  (CSR, spills, etc.)  |FP|
122// ---------------+---------------------+-----+-----------------+-----+--+--
123//                                      |<-- Multiple of the -->|
124//                                           stack alignment    +-- AP
125//
126// The AP is set up at the beginning of the function. Since it is not a dedi-
127// cated (reserved) register, it needs to be kept live throughout the function
128// to be available as the base register for local object accesses.
129// Normally, an address of a stack objects is obtained by a pseudo-instruction
130// PS_fi. To access local objects with the AP register present, a different
131// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
132// argument compared to PS_fi: the first input register is the AP register.
133// This keeps the register live between its definition and its uses.
134
135// The AP register is originally set up using pseudo-instruction PS_aligna:
136//   AP = PS_aligna A
137// where
138//   A  - required stack alignment
139// The alignment value must be the maximum of all alignments required by
140// any stack object.
141
142// The dynamic allocation uses a pseudo-instruction PS_alloca:
143//   Rd = PS_alloca Rs, A
144// where
145//   Rd - address of the allocated space
146//   Rs - minimum size (the actual allocated can be larger to accommodate
147//        alignment)
148//   A  - required alignment
149
150using namespace llvm;
151
152static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
153    cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
154
155static cl::opt<unsigned> NumberScavengerSlots("number-scavenger-slots",
156    cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2),
157    cl::ZeroOrMore);
158
159static cl::opt<int> SpillFuncThreshold("spill-func-threshold",
160    cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"),
161    cl::init(6), cl::ZeroOrMore);
162
163static cl::opt<int> SpillFuncThresholdOs("spill-func-threshold-Os",
164    cl::Hidden, cl::desc("Specify Os spill func threshold"),
165    cl::init(1), cl::ZeroOrMore);
166
167static cl::opt<bool> EnableStackOVFSanitizer("enable-stackovf-sanitizer",
168    cl::Hidden, cl::desc("Enable runtime checks for stack overflow."),
169    cl::init(false), cl::ZeroOrMore);
170
171static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
172    cl::init(true), cl::Hidden, cl::ZeroOrMore,
173    cl::desc("Enable stack frame shrink wrapping"));
174
175static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
176    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
177    cl::desc("Max count of stack frame shrink-wraps"));
178
179static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
180    cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
181    cl::init(false), cl::ZeroOrMore);
182
183static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
184    cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
185
186static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
187    cl::init(true), cl::desc("Optimize spill slots"));
188
189#ifndef NDEBUG
190static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
191    cl::init(std::numeric_limits<unsigned>::max()));
192static unsigned SpillOptCount = 0;
193#endif
194
195namespace llvm {
196
197  void initializeHexagonCallFrameInformationPass(PassRegistry&);
198  FunctionPass *createHexagonCallFrameInformation();
199
200} // end namespace llvm
201
202namespace {
203
204  class HexagonCallFrameInformation : public MachineFunctionPass {
205  public:
206    static char ID;
207
208    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
209      PassRegistry &PR = *PassRegistry::getPassRegistry();
210      initializeHexagonCallFrameInformationPass(PR);
211    }
212
213    bool runOnMachineFunction(MachineFunction &MF) override;
214
215    MachineFunctionProperties getRequiredProperties() const override {
216      return MachineFunctionProperties().set(
217          MachineFunctionProperties::Property::NoVRegs);
218    }
219  };
220
221  char HexagonCallFrameInformation::ID = 0;
222
223} // end anonymous namespace
224
225bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
226  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
227  bool NeedCFI = MF.getMMI().hasDebugInfo() ||
228                 MF.getFunction().needsUnwindTableEntry();
229
230  if (!NeedCFI)
231    return false;
232  HFI.insertCFIInstructions(MF);
233  return true;
234}
235
236INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
237                "Hexagon call frame information", false, false)
238
239FunctionPass *llvm::createHexagonCallFrameInformation() {
240  return new HexagonCallFrameInformation();
241}
242
243/// Map a register pair Reg to the subregister that has the greater "number",
244/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
245static unsigned getMax32BitSubRegister(unsigned Reg,
246                                       const TargetRegisterInfo &TRI,
247                                       bool hireg = true) {
248    if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
249      return Reg;
250
251    unsigned RegNo = 0;
252    for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
253      if (hireg) {
254        if (*SubRegs > RegNo)
255          RegNo = *SubRegs;
256      } else {
257        if (!RegNo || *SubRegs < RegNo)
258          RegNo = *SubRegs;
259      }
260    }
261    return RegNo;
262}
263
264/// Returns the callee saved register with the largest id in the vector.
265static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
266                                     const TargetRegisterInfo &TRI) {
267    static_assert(Hexagon::R1 > 0,
268                  "Assume physical registers are encoded as positive integers");
269    if (CSI.empty())
270      return 0;
271
272    unsigned Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
273    for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
274      unsigned Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);
275      if (Reg > Max)
276        Max = Reg;
277    }
278    return Max;
279}
280
281/// Checks if the basic block contains any instruction that needs a stack
282/// frame to be already in place.
283static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
284                            const HexagonRegisterInfo &HRI) {
285    for (auto &I : MBB) {
286      const MachineInstr *MI = &I;
287      if (MI->isCall())
288        return true;
289      unsigned Opc = MI->getOpcode();
290      switch (Opc) {
291        case Hexagon::PS_alloca:
292        case Hexagon::PS_aligna:
293          return true;
294        default:
295          break;
296      }
297      // Check individual operands.
298      for (const MachineOperand &MO : MI->operands()) {
299        // While the presence of a frame index does not prove that a stack
300        // frame will be required, all frame indexes should be within alloc-
301        // frame/deallocframe. Otherwise, the code that translates a frame
302        // index into an offset would have to be aware of the placement of
303        // the frame creation/destruction instructions.
304        if (MO.isFI())
305          return true;
306        if (MO.isReg()) {
307          unsigned R = MO.getReg();
308          // Virtual registers will need scavenging, which then may require
309          // a stack slot.
310          if (TargetRegisterInfo::isVirtualRegister(R))
311            return true;
312          for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
313            if (CSR[*S])
314              return true;
315          continue;
316        }
317        if (MO.isRegMask()) {
318          // A regmask would normally have all callee-saved registers marked
319          // as preserved, so this check would not be needed, but in case of
320          // ever having other regmasks (for other calling conventions),
321          // make sure they would be processed correctly.
322          const uint32_t *BM = MO.getRegMask();
323          for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
324            unsigned R = x;
325            // If this regmask does not preserve a CSR, a frame will be needed.
326            if (!(BM[R/32] & (1u << (R%32))))
327              return true;
328          }
329        }
330      }
331    }
332    return false;
333}
334
335  /// Returns true if MBB has a machine instructions that indicates a tail call
336  /// in the block.
337static bool hasTailCall(const MachineBasicBlock &MBB) {
338    MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
339    if (I == MBB.end())
340      return false;
341    unsigned RetOpc = I->getOpcode();
342    return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
343}
344
345/// Returns true if MBB contains an instruction that returns.
346static bool hasReturn(const MachineBasicBlock &MBB) {
347    for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
348      if (I->isReturn())
349        return true;
350    return false;
351}
352
353/// Returns the "return" instruction from this block, or nullptr if there
354/// isn't any.
355static MachineInstr *getReturn(MachineBasicBlock &MBB) {
356    for (auto &I : MBB)
357      if (I.isReturn())
358        return &I;
359    return nullptr;
360}
361
362static bool isRestoreCall(unsigned Opc) {
363    switch (Opc) {
364      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
365      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
366      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
367      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
368      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
369      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
370      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
371      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
372        return true;
373    }
374    return false;
375}
376
377static inline bool isOptNone(const MachineFunction &MF) {
378    return MF.getFunction().hasFnAttribute(Attribute::OptimizeNone) ||
379           MF.getTarget().getOptLevel() == CodeGenOpt::None;
380}
381
382static inline bool isOptSize(const MachineFunction &MF) {
383    const Function &F = MF.getFunction();
384    return F.optForSize() && !F.optForMinSize();
385}
386
387static inline bool isMinSize(const MachineFunction &MF) {
388    return MF.getFunction().optForMinSize();
389}
390
391/// Implements shrink-wrapping of the stack frame. By default, stack frame
392/// is created in the function entry block, and is cleaned up in every block
393/// that returns. This function finds alternate blocks: one for the frame
394/// setup (prolog) and one for the cleanup (epilog).
395void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
396      MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
397  static unsigned ShrinkCounter = 0;
398
399  if (ShrinkLimit.getPosition()) {
400    if (ShrinkCounter >= ShrinkLimit)
401      return;
402    ShrinkCounter++;
403  }
404
405  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
406
407  MachineDominatorTree MDT;
408  MDT.runOnMachineFunction(MF);
409  MachinePostDominatorTree MPT;
410  MPT.runOnMachineFunction(MF);
411
412  using UnsignedMap = DenseMap<unsigned, unsigned>;
413  using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
414
415  UnsignedMap RPO;
416  RPOTType RPOT(&MF);
417  unsigned RPON = 0;
418  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
419    RPO[(*I)->getNumber()] = RPON++;
420
421  // Don't process functions that have loops, at least for now. Placement
422  // of prolog and epilog must take loop structure into account. For simpli-
423  // city don't do it right now.
424  for (auto &I : MF) {
425    unsigned BN = RPO[I.getNumber()];
426    for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
427      // If found a back-edge, return.
428      if (RPO[(*SI)->getNumber()] <= BN)
429        return;
430    }
431  }
432
433  // Collect the set of blocks that need a stack frame to execute. Scan
434  // each block for uses/defs of callee-saved registers, calls, etc.
435  SmallVector<MachineBasicBlock*,16> SFBlocks;
436  BitVector CSR(Hexagon::NUM_TARGET_REGS);
437  for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
438    for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S)
439      CSR[*S] = true;
440
441  for (auto &I : MF)
442    if (needsStackFrame(I, CSR, HRI))
443      SFBlocks.push_back(&I);
444
445  DEBUG({
446    dbgs() << "Blocks needing SF: {";
447    for (auto &B : SFBlocks)
448      dbgs() << " " << printMBBReference(*B);
449    dbgs() << " }\n";
450  });
451  // No frame needed?
452  if (SFBlocks.empty())
453    return;
454
455  // Pick a common dominator and a common post-dominator.
456  MachineBasicBlock *DomB = SFBlocks[0];
457  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
458    DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
459    if (!DomB)
460      break;
461  }
462  MachineBasicBlock *PDomB = SFBlocks[0];
463  for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
464    PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
465    if (!PDomB)
466      break;
467  }
468  DEBUG({
469    dbgs() << "Computed dom block: ";
470    if (DomB)
471      dbgs() << printMBBReference(*DomB);
472    else
473      dbgs() << "<null>";
474    dbgs() << ", computed pdom block: ";
475    if (PDomB)
476      dbgs() << printMBBReference(*PDomB);
477    else
478      dbgs() << "<null>";
479    dbgs() << "\n";
480  });
481  if (!DomB || !PDomB)
482    return;
483
484  // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
485  if (!MDT.dominates(DomB, PDomB)) {
486    DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
487    return;
488  }
489  if (!MPT.dominates(PDomB, DomB)) {
490    DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
491    return;
492  }
493
494  // Finally, everything seems right.
495  PrologB = DomB;
496  EpilogB = PDomB;
497}
498
499/// Perform most of the PEI work here:
500/// - saving/restoring of the callee-saved registers,
501/// - stack frame creation and destruction.
502/// Normally, this work is distributed among various functions, but doing it
503/// in one place allows shrink-wrapping of the stack frame.
504void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
505                                        MachineBasicBlock &MBB) const {
506  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
507
508  MachineFrameInfo &MFI = MF.getFrameInfo();
509  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
510
511  MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
512  if (EnableShrinkWrapping)
513    findShrunkPrologEpilog(MF, PrologB, EpilogB);
514
515  bool PrologueStubs = false;
516  insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
517  insertPrologueInBlock(*PrologB, PrologueStubs);
518  updateEntryPaths(MF, *PrologB);
519
520  if (EpilogB) {
521    insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
522    insertEpilogueInBlock(*EpilogB);
523  } else {
524    for (auto &B : MF)
525      if (B.isReturnBlock())
526        insertCSRRestoresInBlock(B, CSI, HRI);
527
528    for (auto &B : MF)
529      if (B.isReturnBlock())
530        insertEpilogueInBlock(B);
531
532    for (auto &B : MF) {
533      if (B.empty())
534        continue;
535      MachineInstr *RetI = getReturn(B);
536      if (!RetI || isRestoreCall(RetI->getOpcode()))
537        continue;
538      for (auto &R : CSI)
539        RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
540    }
541  }
542
543  if (EpilogB) {
544    // If there is an epilog block, it may not have a return instruction.
545    // In such case, we need to add the callee-saved registers as live-ins
546    // in all blocks on all paths from the epilog to any return block.
547    unsigned MaxBN = MF.getNumBlockIDs();
548    BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
549    updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
550  }
551}
552
553void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
554      bool PrologueStubs) const {
555  MachineFunction &MF = *MBB.getParent();
556  MachineFrameInfo &MFI = MF.getFrameInfo();
557  auto &HST = MF.getSubtarget<HexagonSubtarget>();
558  auto &HII = *HST.getInstrInfo();
559  auto &HRI = *HST.getRegisterInfo();
560
561  unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
562
563  // Calculate the total stack frame size.
564  // Get the number of bytes to allocate from the FrameInfo.
565  unsigned FrameSize = MFI.getStackSize();
566  // Round up the max call frame size to the max alignment on the stack.
567  unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
568  MFI.setMaxCallFrameSize(MaxCFA);
569
570  FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
571  MFI.setStackSize(FrameSize);
572
573  bool AlignStack = (MaxAlign > getStackAlignment());
574
575  // Get the number of bytes to allocate from the FrameInfo.
576  unsigned NumBytes = MFI.getStackSize();
577  unsigned SP = HRI.getStackRegister();
578  unsigned MaxCF = MFI.getMaxCallFrameSize();
579  MachineBasicBlock::iterator InsertPt = MBB.begin();
580
581  SmallVector<MachineInstr *, 4> AdjustRegs;
582  for (auto &MBB : MF)
583    for (auto &MI : MBB)
584      if (MI.getOpcode() == Hexagon::PS_alloca)
585        AdjustRegs.push_back(&MI);
586
587  for (auto MI : AdjustRegs) {
588    assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
589    expandAlloca(MI, HII, SP, MaxCF);
590    MI->eraseFromParent();
591  }
592
593  DebugLoc dl = MBB.findDebugLoc(InsertPt);
594
595  if (hasFP(MF)) {
596    insertAllocframe(MBB, InsertPt, NumBytes);
597    if (AlignStack) {
598      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
599          .addReg(SP)
600          .addImm(-int64_t(MaxAlign));
601    }
602    // If the stack-checking is enabled, and we spilled the callee-saved
603    // registers inline (i.e. did not use a spill function), then call
604    // the stack checker directly.
605    if (EnableStackOVFSanitizer && !PrologueStubs)
606      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
607             .addExternalSymbol("__runtime_stack_check");
608  } else if (NumBytes > 0) {
609    assert(alignTo(NumBytes, 8) == NumBytes);
610    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
611      .addReg(SP)
612      .addImm(-int(NumBytes));
613  }
614}
615
616void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
617  MachineFunction &MF = *MBB.getParent();
618  auto &HST = MF.getSubtarget<HexagonSubtarget>();
619  auto &HII = *HST.getInstrInfo();
620  auto &HRI = *HST.getRegisterInfo();
621  unsigned SP = HRI.getStackRegister();
622
623  MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
624  DebugLoc dl = MBB.findDebugLoc(InsertPt);
625
626  if (!hasFP(MF)) {
627    MachineFrameInfo &MFI = MF.getFrameInfo();
628    if (unsigned NumBytes = MFI.getStackSize()) {
629      BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
630        .addReg(SP)
631        .addImm(NumBytes);
632    }
633    return;
634  }
635
636  MachineInstr *RetI = getReturn(MBB);
637  unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
638
639  // Handle EH_RETURN.
640  if (RetOpc == Hexagon::EH_RETURN_JMPR) {
641    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
642        .addDef(Hexagon::D15)
643        .addReg(Hexagon::R30);
644    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
645        .addReg(SP)
646        .addReg(Hexagon::R28);
647    return;
648  }
649
650  // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
651  // frame instruction if we encounter it.
652  if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
653      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
654      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
655      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {
656    MachineBasicBlock::iterator It = RetI;
657    ++It;
658    // Delete all instructions after the RESTORE (except labels).
659    while (It != MBB.end()) {
660      if (!It->isLabel())
661        It = MBB.erase(It);
662      else
663        ++It;
664    }
665    return;
666  }
667
668  // It is possible that the restoring code is a call to a library function.
669  // All of the restore* functions include "deallocframe", so we need to make
670  // sure that we don't add an extra one.
671  bool NeedsDeallocframe = true;
672  if (!MBB.empty() && InsertPt != MBB.begin()) {
673    MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
674    unsigned COpc = PrevIt->getOpcode();
675    if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
676        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
677        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
678        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
679        COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr)
680      NeedsDeallocframe = false;
681  }
682
683  if (!NeedsDeallocframe)
684    return;
685  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
686  // otherwise just add deallocframe. The function could be returning via a
687  // tail call.
688  if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
689    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
690      .addDef(Hexagon::D15)
691      .addReg(Hexagon::R30);
692    return;
693  }
694  unsigned NewOpc = Hexagon::L4_return;
695  MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
696      .addDef(Hexagon::D15)
697      .addReg(Hexagon::R30);
698  // Transfer the function live-out registers.
699  NewI->copyImplicitOps(MF, *RetI);
700  MBB.erase(RetI);
701}
702
703void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
704      MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
705  MachineFunction &MF = *MBB.getParent();
706  auto &HST = MF.getSubtarget<HexagonSubtarget>();
707  auto &HII = *HST.getInstrInfo();
708  auto &HRI = *HST.getRegisterInfo();
709
710  // Check for overflow.
711  // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
712  const unsigned int ALLOCFRAME_MAX = 16384;
713
714  // Create a dummy memory operand to avoid allocframe from being treated as
715  // a volatile memory reference.
716  auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
717                                      MachineMemOperand::MOStore, 4, 4);
718
719  DebugLoc dl = MBB.findDebugLoc(InsertPt);
720  unsigned SP = HRI.getStackRegister();
721
722  if (NumBytes >= ALLOCFRAME_MAX) {
723    // Emit allocframe(#0).
724    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
725      .addDef(SP)
726      .addReg(SP)
727      .addImm(0)
728      .addMemOperand(MMO);
729
730    // Subtract the size from the stack pointer.
731    unsigned SP = HRI.getStackRegister();
732    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
733      .addReg(SP)
734      .addImm(-int(NumBytes));
735  } else {
736    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
737      .addDef(SP)
738      .addReg(SP)
739      .addImm(NumBytes)
740      .addMemOperand(MMO);
741  }
742}
743
744void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
745      MachineBasicBlock &SaveB) const {
746  SetVector<unsigned> Worklist;
747
748  MachineBasicBlock &EntryB = MF.front();
749  Worklist.insert(EntryB.getNumber());
750
751  unsigned SaveN = SaveB.getNumber();
752  auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
753
754  for (unsigned i = 0; i < Worklist.size(); ++i) {
755    unsigned BN = Worklist[i];
756    MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
757    for (auto &R : CSI)
758      if (!MBB.isLiveIn(R.getReg()))
759        MBB.addLiveIn(R.getReg());
760    if (BN != SaveN)
761      for (auto &SB : MBB.successors())
762        Worklist.insert(SB->getNumber());
763  }
764}
765
766bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
767      MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
768      BitVector &Path) const {
769  assert(MBB.getNumber() >= 0);
770  unsigned BN = MBB.getNumber();
771  if (Path[BN] || DoneF[BN])
772    return false;
773  if (DoneT[BN])
774    return true;
775
776  auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
777
778  Path[BN] = true;
779  bool ReachedExit = false;
780  for (auto &SB : MBB.successors())
781    ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
782
783  if (!MBB.empty() && MBB.back().isReturn()) {
784    // Add implicit uses of all callee-saved registers to the reached
785    // return instructions. This is to prevent the anti-dependency breaker
786    // from renaming these registers.
787    MachineInstr &RetI = MBB.back();
788    if (!isRestoreCall(RetI.getOpcode()))
789      for (auto &R : CSI)
790        RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
791    ReachedExit = true;
792  }
793
794  // We don't want to add unnecessary live-ins to the restore block: since
795  // the callee-saved registers are being defined in it, the entry of the
796  // restore block cannot be on the path from the definitions to any exit.
797  if (ReachedExit && &MBB != &RestoreB) {
798    for (auto &R : CSI)
799      if (!MBB.isLiveIn(R.getReg()))
800        MBB.addLiveIn(R.getReg());
801    DoneT[BN] = true;
802  }
803  if (!ReachedExit)
804    DoneF[BN] = true;
805
806  Path[BN] = false;
807  return ReachedExit;
808}
809
810static Optional<MachineBasicBlock::iterator>
811findCFILocation(MachineBasicBlock &B) {
812    // The CFI instructions need to be inserted right after allocframe.
813    // An exception to this is a situation where allocframe is bundled
814    // with a call: then the CFI instructions need to be inserted before
815    // the packet with the allocframe+call (in case the call throws an
816    // exception).
817    auto End = B.instr_end();
818
819    for (MachineInstr &I : B) {
820      MachineBasicBlock::iterator It = I.getIterator();
821      if (!I.isBundle()) {
822        if (I.getOpcode() == Hexagon::S2_allocframe)
823          return std::next(It);
824        continue;
825      }
826      // I is a bundle.
827      bool HasCall = false, HasAllocFrame = false;
828      auto T = It.getInstrIterator();
829      while (++T != End && T->isBundled()) {
830        if (T->getOpcode() == Hexagon::S2_allocframe)
831          HasAllocFrame = true;
832        else if (T->isCall())
833          HasCall = true;
834      }
835      if (HasAllocFrame)
836        return HasCall ? It : std::next(It);
837    }
838    return None;
839}
840
841void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
842  for (auto &B : MF) {
843    auto At = findCFILocation(B);
844    if (At.hasValue())
845      insertCFIInstructionsAt(B, At.getValue());
846  }
847}
848
849void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
850      MachineBasicBlock::iterator At) const {
851  MachineFunction &MF = *MBB.getParent();
852  MachineFrameInfo &MFI = MF.getFrameInfo();
853  MachineModuleInfo &MMI = MF.getMMI();
854  auto &HST = MF.getSubtarget<HexagonSubtarget>();
855  auto &HII = *HST.getInstrInfo();
856  auto &HRI = *HST.getRegisterInfo();
857
858  // If CFI instructions have debug information attached, something goes
859  // wrong with the final assembly generation: the prolog_end is placed
860  // in a wrong location.
861  DebugLoc DL;
862  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
863
864  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
865  bool HasFP = hasFP(MF);
866
867  if (HasFP) {
868    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
869    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
870
871    // Define CFA via an offset from the value of FP.
872    //
873    //  -8   -4    0 (SP)
874    // --+----+----+---------------------
875    //   | FP | LR |          increasing addresses -->
876    // --+----+----+---------------------
877    //   |         +-- Old SP (before allocframe)
878    //   +-- New FP (after allocframe)
879    //
880    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
881    // MCCFIInstruction::createOffset takes the offset without sign change.
882    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
883    BuildMI(MBB, At, DL, CFID)
884        .addCFIIndex(MF.addFrameInst(DefCfa));
885    // R31 (return addr) = CFA - 4
886    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
887    BuildMI(MBB, At, DL, CFID)
888        .addCFIIndex(MF.addFrameInst(OffR31));
889    // R30 (frame ptr) = CFA - 8
890    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
891    BuildMI(MBB, At, DL, CFID)
892        .addCFIIndex(MF.addFrameInst(OffR30));
893  }
894
895  static unsigned int RegsToMove[] = {
896    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
897    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
898    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
899    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
900    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
901    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
902    Hexagon::NoRegister
903  };
904
905  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
906
907  for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
908    unsigned Reg = RegsToMove[i];
909    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
910      return C.getReg() == Reg;
911    };
912    auto F = find_if(CSI, IfR);
913    if (F == CSI.end())
914      continue;
915
916    int64_t Offset;
917    if (HasFP) {
918      // If the function has a frame pointer (i.e. has an allocframe),
919      // then the CFA has been defined in terms of FP. Any offsets in
920      // the following CFI instructions have to be defined relative
921      // to FP, which points to the bottom of the stack frame.
922      // The function getFrameIndexReference can still choose to use SP
923      // for the offset calculation, so we cannot simply call it here.
924      // Instead, get the offset (relative to the FP) directly.
925      Offset = MFI.getObjectOffset(F->getFrameIdx());
926    } else {
927      unsigned FrameReg;
928      Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg);
929    }
930    // Subtract 8 to make room for R30 and R31, which are added above.
931    Offset -= 8;
932
933    if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
934      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
935      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
936                                                   Offset);
937      BuildMI(MBB, At, DL, CFID)
938          .addCFIIndex(MF.addFrameInst(OffReg));
939    } else {
940      // Split the double regs into subregs, and generate appropriate
941      // cfi_offsets.
942      // The only reason, we are split double regs is, llvm-mc does not
943      // understand paired registers for cfi_offset.
944      // Eg .cfi_offset r1:0, -64
945
946      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
947      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
948      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
949      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
950      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
951                                                  Offset+4);
952      BuildMI(MBB, At, DL, CFID)
953          .addCFIIndex(MF.addFrameInst(OffHi));
954      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
955                                                  Offset);
956      BuildMI(MBB, At, DL, CFID)
957          .addCFIIndex(MF.addFrameInst(OffLo));
958    }
959  }
960}
961
962bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
963  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
964    return false;
965
966  auto &MFI = MF.getFrameInfo();
967  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
968  bool HasExtraAlign = HRI.needsStackRealignment(MF);
969  bool HasAlloca = MFI.hasVarSizedObjects();
970
971  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
972  // that this shouldn't be required, but doing so now because gcc does and
973  // gdb can't break at the start of the function without it.  Will remove if
974  // this turns out to be a gdb bug.
975  //
976  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
977    return true;
978
979  // By default we want to use SP (since it's always there). FP requires
980  // some setup (i.e. ALLOCFRAME).
981  // Both, alloca and stack alignment modify the stack pointer by an
982  // undetermined value, so we need to save it at the entry to the function
983  // (i.e. use allocframe).
984  if (HasAlloca || HasExtraAlign)
985    return true;
986
987  if (MFI.getStackSize() > 0) {
988    // If FP-elimination is disabled, we have to use FP at this point.
989    const TargetMachine &TM = MF.getTarget();
990    if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
991      return true;
992    if (EnableStackOVFSanitizer)
993      return true;
994  }
995
996  const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
997  if (MFI.hasCalls() || HMFI.hasClobberLR())
998    return true;
999
1000  return false;
1001}
1002
1003enum SpillKind {
1004  SK_ToMem,
1005  SK_FromMem,
1006  SK_FromMemTailcall
1007};
1008
1009static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
1010      bool Stkchk = false) {
1011  const char * V4SpillToMemoryFunctions[] = {
1012    "__save_r16_through_r17",
1013    "__save_r16_through_r19",
1014    "__save_r16_through_r21",
1015    "__save_r16_through_r23",
1016    "__save_r16_through_r25",
1017    "__save_r16_through_r27" };
1018
1019  const char * V4SpillToMemoryStkchkFunctions[] = {
1020    "__save_r16_through_r17_stkchk",
1021    "__save_r16_through_r19_stkchk",
1022    "__save_r16_through_r21_stkchk",
1023    "__save_r16_through_r23_stkchk",
1024    "__save_r16_through_r25_stkchk",
1025    "__save_r16_through_r27_stkchk" };
1026
1027  const char * V4SpillFromMemoryFunctions[] = {
1028    "__restore_r16_through_r17_and_deallocframe",
1029    "__restore_r16_through_r19_and_deallocframe",
1030    "__restore_r16_through_r21_and_deallocframe",
1031    "__restore_r16_through_r23_and_deallocframe",
1032    "__restore_r16_through_r25_and_deallocframe",
1033    "__restore_r16_through_r27_and_deallocframe" };
1034
1035  const char * V4SpillFromMemoryTailcallFunctions[] = {
1036    "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1037    "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1038    "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1039    "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1040    "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1041    "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1042  };
1043
1044  const char **SpillFunc = nullptr;
1045
1046  switch(SpillType) {
1047  case SK_ToMem:
1048    SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
1049                       : V4SpillToMemoryFunctions;
1050    break;
1051  case SK_FromMem:
1052    SpillFunc = V4SpillFromMemoryFunctions;
1053    break;
1054  case SK_FromMemTailcall:
1055    SpillFunc = V4SpillFromMemoryTailcallFunctions;
1056    break;
1057  }
1058  assert(SpillFunc && "Unknown spill kind");
1059
1060  // Spill all callee-saved registers up to the highest register used.
1061  switch (MaxReg) {
1062  case Hexagon::R17:
1063    return SpillFunc[0];
1064  case Hexagon::R19:
1065    return SpillFunc[1];
1066  case Hexagon::R21:
1067    return SpillFunc[2];
1068  case Hexagon::R23:
1069    return SpillFunc[3];
1070  case Hexagon::R25:
1071    return SpillFunc[4];
1072  case Hexagon::R27:
1073    return SpillFunc[5];
1074  default:
1075    llvm_unreachable("Unhandled maximum callee save register");
1076  }
1077  return nullptr;
1078}
1079
1080int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1081      int FI, unsigned &FrameReg) const {
1082  auto &MFI = MF.getFrameInfo();
1083  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1084
1085  int Offset = MFI.getObjectOffset(FI);
1086  bool HasAlloca = MFI.hasVarSizedObjects();
1087  bool HasExtraAlign = HRI.needsStackRealignment(MF);
1088  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
1089
1090  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1091  unsigned FrameSize = MFI.getStackSize();
1092  unsigned SP = HRI.getStackRegister();
1093  unsigned FP = HRI.getFrameRegister();
1094  unsigned AP = HMFI.getStackAlignBasePhysReg();
1095  // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1096  // is true. HasExtraAlign may be set because of vector spills, without
1097  // aligned locals or aligned outgoing function arguments. Since vector
1098  // spills will ultimately be "unaligned", it is safe to use FP as the
1099  // base register.
1100  // In fact, in such a scenario the stack is actually not required to be
1101  // aligned, although it may end up being aligned anyway, since this
1102  // particular case is not easily detectable. The alignment will be
1103  // unnecessary, but not incorrect.
1104  // Unfortunately there is no quick way to verify that the above is
1105  // indeed the case (and that it's not a result of an error), so just
1106  // assume that missing AP will be replaced by FP.
1107  // (A better fix would be to rematerialize AP from FP and always align
1108  // vector spills.)
1109  if (AP == 0)
1110    AP = FP;
1111
1112  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
1113  // Use FP at -O0, except when there are objects with extra alignment.
1114  // That additional alignment requirement may cause a pad to be inserted,
1115  // which will make it impossible to use FP to access objects located
1116  // past the pad.
1117  if (NoOpt && !HasExtraAlign)
1118    UseFP = true;
1119  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
1120    // Fixed and preallocated objects will be located before any padding
1121    // so FP must be used to access them.
1122    UseFP |= (HasAlloca || HasExtraAlign);
1123  } else {
1124    if (HasAlloca) {
1125      if (HasExtraAlign)
1126        UseAP = true;
1127      else
1128        UseFP = true;
1129    }
1130  }
1131
1132  // If FP was picked, then there had better be FP.
1133  bool HasFP = hasFP(MF);
1134  assert((HasFP || !UseFP) && "This function must have frame pointer");
1135
1136  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1137  // FP/LR. If the base register is used to access an object across these
1138  // 8 bytes, then the offset will need to be adjusted by 8.
1139  //
1140  // After allocframe:
1141  //                    HexagonISelLowering adds 8 to ---+
1142  //                    the offsets of all stack-based   |
1143  //                    arguments (*)                    |
1144  //                                                     |
1145  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
1146  // ------------------------+-----+------------------------> increasing
1147  //     <local objects>     |FP/LR|    <input arguments>     addresses
1148  // -----------------+------+-----+------------------------>
1149  //                  |      |
1150  //    SP/AP point --+      +-- FP points here (**)
1151  //    somewhere on
1152  //    this side of FP/LR
1153  //
1154  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1155  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1156
1157  // The lowering assumes that FP/LR is present, and so the offsets of
1158  // the formal arguments start at 8. If FP/LR is not there we need to
1159  // reduce the offset by 8.
1160  if (Offset > 0 && !HasFP)
1161    Offset -= 8;
1162
1163  if (UseFP)
1164    FrameReg = FP;
1165  else if (UseAP)
1166    FrameReg = AP;
1167  else
1168    FrameReg = SP;
1169
1170  // Calculate the actual offset in the instruction. If there is no FP
1171  // (in other words, no allocframe), then SP will not be adjusted (i.e.
1172  // there will be no SP -= FrameSize), so the frame size should not be
1173  // added to the calculated offset.
1174  int RealOffset = Offset;
1175  if (!UseFP && !UseAP)
1176    RealOffset = FrameSize+Offset;
1177  return RealOffset;
1178}
1179
1180bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1181      const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1182      bool &PrologueStubs) const {
1183  if (CSI.empty())
1184    return true;
1185
1186  MachineBasicBlock::iterator MI = MBB.begin();
1187  PrologueStubs = false;
1188  MachineFunction &MF = *MBB.getParent();
1189  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1190  auto &HII = *HST.getInstrInfo();
1191
1192  if (useSpillFunction(MF, CSI)) {
1193    PrologueStubs = true;
1194    unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1195    bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1196    const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1197                                               StkOvrFlowEnabled);
1198    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1199    bool IsPIC = HTM.isPositionIndependent();
1200    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1201
1202    // Call spill function.
1203    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1204    unsigned SpillOpc;
1205    if (StkOvrFlowEnabled) {
1206      if (LongCalls)
1207        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
1208                         : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1209      else
1210        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
1211                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1212    } else {
1213      if (LongCalls)
1214        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC
1215                         : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1216      else
1217        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
1218                         : Hexagon::SAVE_REGISTERS_CALL_V4;
1219    }
1220
1221    MachineInstr *SaveRegsCall =
1222        BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1223          .addExternalSymbol(SpillFun);
1224
1225    // Add callee-saved registers as use.
1226    addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1227    // Add live in registers.
1228    for (unsigned I = 0; I < CSI.size(); ++I)
1229      MBB.addLiveIn(CSI[I].getReg());
1230    return true;
1231  }
1232
1233  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1234    unsigned Reg = CSI[i].getReg();
1235    // Add live in registers. We treat eh_return callee saved register r0 - r3
1236    // specially. They are not really callee saved registers as they are not
1237    // supposed to be killed.
1238    bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1239    int FI = CSI[i].getFrameIdx();
1240    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1241    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
1242    if (IsKill)
1243      MBB.addLiveIn(Reg);
1244  }
1245  return true;
1246}
1247
1248bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1249      const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1250  if (CSI.empty())
1251    return false;
1252
1253  MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
1254  MachineFunction &MF = *MBB.getParent();
1255  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1256  auto &HII = *HST.getInstrInfo();
1257
1258  if (useRestoreFunction(MF, CSI)) {
1259    bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1260    unsigned MaxR = getMaxCalleeSavedReg(CSI, HRI);
1261    SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;
1262    const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1263    auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1264    bool IsPIC = HTM.isPositionIndependent();
1265    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1266
1267    // Call spill function.
1268    DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1269                                  : MBB.getLastNonDebugInstr()->getDebugLoc();
1270    MachineInstr *DeallocCall = nullptr;
1271
1272    if (HasTC) {
1273      unsigned RetOpc;
1274      if (LongCalls)
1275        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
1276                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1277      else
1278        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
1279                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1280      DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1281          .addExternalSymbol(RestoreFn);
1282    } else {
1283      // The block has a return.
1284      MachineBasicBlock::iterator It = MBB.getFirstTerminator();
1285      assert(It->isReturn() && std::next(It) == MBB.end());
1286      unsigned RetOpc;
1287      if (LongCalls)
1288        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC
1289                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1290      else
1291        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
1292                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1293      DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1294          .addExternalSymbol(RestoreFn);
1295      // Transfer the function live-out registers.
1296      DeallocCall->copyImplicitOps(MF, *It);
1297    }
1298    addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1299    return true;
1300  }
1301
1302  for (unsigned i = 0; i < CSI.size(); ++i) {
1303    unsigned Reg = CSI[i].getReg();
1304    const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1305    int FI = CSI[i].getFrameIdx();
1306    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
1307  }
1308
1309  return true;
1310}
1311
1312MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
1313    MachineFunction &MF, MachineBasicBlock &MBB,
1314    MachineBasicBlock::iterator I) const {
1315  MachineInstr &MI = *I;
1316  unsigned Opc = MI.getOpcode();
1317  (void)Opc; // Silence compiler warning.
1318  assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1319         "Cannot handle this call frame pseudo instruction");
1320  return MBB.erase(I);
1321}
1322
1323void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
1324    MachineFunction &MF, RegScavenger *RS) const {
1325  // If this function has uses aligned stack and also has variable sized stack
1326  // objects, then we need to map all spill slots to fixed positions, so that
1327  // they can be accessed through FP. Otherwise they would have to be accessed
1328  // via AP, which may not be available at the particular place in the program.
1329  MachineFrameInfo &MFI = MF.getFrameInfo();
1330  bool HasAlloca = MFI.hasVarSizedObjects();
1331  bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
1332
1333  if (!HasAlloca || !NeedsAlign)
1334    return;
1335
1336  unsigned LFS = MFI.getLocalFrameSize();
1337  for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
1338    if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i))
1339      continue;
1340    unsigned S = MFI.getObjectSize(i);
1341    // Reduce the alignment to at most 8. This will require unaligned vector
1342    // stores if they happen here.
1343    unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
1344    MFI.setObjectAlignment(i, 8);
1345    LFS = alignTo(LFS+S, A);
1346    MFI.mapLocalFrameObject(i, -LFS);
1347  }
1348
1349  MFI.setLocalFrameSize(LFS);
1350  unsigned A = MFI.getLocalFrameMaxAlign();
1351  assert(A <= 8 && "Unexpected local frame alignment");
1352  if (A == 0)
1353    MFI.setLocalFrameMaxAlign(8);
1354  MFI.setUseLocalStackAllocationBlock(true);
1355
1356  // Set the physical aligned-stack base address register.
1357  unsigned AP = 0;
1358  if (const MachineInstr *AI = getAlignaInstr(MF))
1359    AP = AI->getOperand(0).getReg();
1360  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1361  HMFI.setStackAlignBasePhysReg(AP);
1362}
1363
1364/// Returns true if there are no caller-saved registers available in class RC.
1365static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
1366      const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1367  MachineRegisterInfo &MRI = MF.getRegInfo();
1368
1369  auto IsUsed = [&HRI,&MRI] (unsigned Reg) -> bool {
1370    for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)
1371      if (MRI.isPhysRegUsed(*AI))
1372        return true;
1373    return false;
1374  };
1375
1376  // Check for an unused caller-saved register. Callee-saved registers
1377  // have become pristine by now.
1378  for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)
1379    if (!IsUsed(*P))
1380      return false;
1381
1382  // All caller-saved registers are used.
1383  return true;
1384}
1385
1386#ifndef NDEBUG
1387static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
1388  dbgs() << '{';
1389  for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1390    unsigned R = x;
1391    dbgs() << ' ' << printReg(R, &TRI);
1392  }
1393  dbgs() << " }";
1394}
1395#endif
1396
1397bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
1398      const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1399  DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
1400  MachineFrameInfo &MFI = MF.getFrameInfo();
1401  BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1402
1403  // Generate a set of unique, callee-saved registers (SRegs), where each
1404  // register in the set is maximal in terms of sub-/super-register relation,
1405  // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1406
1407  // (1) For each callee-saved register, add that register and all of its
1408  // sub-registers to SRegs.
1409  DEBUG(dbgs() << "Initial CS registers: {");
1410  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1411    unsigned R = CSI[i].getReg();
1412    DEBUG(dbgs() << ' ' << printReg(R, TRI));
1413    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1414      SRegs[*SR] = true;
1415  }
1416  DEBUG(dbgs() << " }\n");
1417  DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1418
1419  // (2) For each reserved register, remove that register and all of its
1420  // sub- and super-registers from SRegs.
1421  BitVector Reserved = TRI->getReservedRegs(MF);
1422  for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
1423    unsigned R = x;
1424    for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
1425      SRegs[*SR] = false;
1426  }
1427  DEBUG(dbgs() << "Res:     "; dump_registers(Reserved, *TRI); dbgs() << "\n");
1428  DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1429
1430  // (3) Collect all registers that have at least one sub-register in SRegs,
1431  // and also have no sub-registers that are reserved. These will be the can-
1432  // didates for saving as a whole instead of their individual sub-registers.
1433  // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1434  BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1435  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1436    unsigned R = x;
1437    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
1438      TmpSup[*SR] = true;
1439  }
1440  for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
1441    unsigned R = x;
1442    for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
1443      if (!Reserved[*SR])
1444        continue;
1445      TmpSup[R] = false;
1446      break;
1447    }
1448  }
1449  DEBUG(dbgs() << "TmpSup:  "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
1450
1451  // (4) Include all super-registers found in (3) into SRegs.
1452  SRegs |= TmpSup;
1453  DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1454
1455  // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1456  // remove R from SRegs.
1457  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1458    unsigned R = x;
1459    for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
1460      if (!SRegs[*SR])
1461        continue;
1462      SRegs[R] = false;
1463      break;
1464    }
1465  }
1466  DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
1467
1468  // Now, for each register that has a fixed stack slot, create the stack
1469  // object for it.
1470  CSI.clear();
1471
1472  using SpillSlot = TargetFrameLowering::SpillSlot;
1473
1474  unsigned NumFixed;
1475  int MinOffset = 0;  // CS offsets are negative.
1476  const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1477  for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
1478    if (!SRegs[S->Reg])
1479      continue;
1480    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1481    int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1482    MinOffset = std::min(MinOffset, S->Offset);
1483    CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1484    SRegs[S->Reg] = false;
1485  }
1486
1487  // There can be some registers that don't have fixed slots. For example,
1488  // we need to store R0-R3 in functions with exception handling. For each
1489  // such register, create a non-fixed stack object.
1490  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1491    unsigned R = x;
1492    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1493    unsigned Size = TRI->getSpillSize(*RC);
1494    int Off = MinOffset - Size;
1495    unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
1496    assert(isPowerOf2_32(Align));
1497    Off &= -Align;
1498    int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1499    MinOffset = std::min(MinOffset, Off);
1500    CSI.push_back(CalleeSavedInfo(R, FI));
1501    SRegs[R] = false;
1502  }
1503
1504  DEBUG({
1505    dbgs() << "CS information: {";
1506    for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
1507      int FI = CSI[i].getFrameIdx();
1508      int Off = MFI.getObjectOffset(FI);
1509      dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
1510      if (Off >= 0)
1511        dbgs() << '+';
1512      dbgs() << Off;
1513    }
1514    dbgs() << " }\n";
1515  });
1516
1517#ifndef NDEBUG
1518  // Verify that all registers were handled.
1519  bool MissedReg = false;
1520  for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1521    unsigned R = x;
1522    dbgs() << printReg(R, TRI) << ' ';
1523    MissedReg = true;
1524  }
1525  if (MissedReg)
1526    llvm_unreachable("...there are unhandled callee-saved registers!");
1527#endif
1528
1529  return true;
1530}
1531
1532bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1533      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1534      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1535  MachineInstr *MI = &*It;
1536  DebugLoc DL = MI->getDebugLoc();
1537  unsigned DstR = MI->getOperand(0).getReg();
1538  unsigned SrcR = MI->getOperand(1).getReg();
1539  if (!Hexagon::ModRegsRegClass.contains(DstR) ||
1540      !Hexagon::ModRegsRegClass.contains(SrcR))
1541    return false;
1542
1543  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1544  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
1545  BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
1546    .addReg(TmpR, RegState::Kill);
1547
1548  NewRegs.push_back(TmpR);
1549  B.erase(It);
1550  return true;
1551}
1552
1553bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
1554      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1555      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1556  MachineInstr *MI = &*It;
1557  if (!MI->getOperand(0).isFI())
1558    return false;
1559
1560  DebugLoc DL = MI->getDebugLoc();
1561  unsigned Opc = MI->getOpcode();
1562  unsigned SrcR = MI->getOperand(2).getReg();
1563  bool IsKill = MI->getOperand(2).isKill();
1564  int FI = MI->getOperand(0).getIndex();
1565
1566  // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
1567  // TmpR = A2_tfrcrr SrcR  if SrcR is a modifier register
1568  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1569  unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
1570                                                 : Hexagon::A2_tfrcrr;
1571  BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
1572    .addReg(SrcR, getKillRegState(IsKill));
1573
1574  // S2_storeri_io FI, 0, TmpR
1575  BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
1576    .addFrameIndex(FI)
1577    .addImm(0)
1578    .addReg(TmpR, RegState::Kill)
1579    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1580
1581  NewRegs.push_back(TmpR);
1582  B.erase(It);
1583  return true;
1584}
1585
1586bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
1587      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1588      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1589  MachineInstr *MI = &*It;
1590  if (!MI->getOperand(1).isFI())
1591    return false;
1592
1593  DebugLoc DL = MI->getDebugLoc();
1594  unsigned Opc = MI->getOpcode();
1595  unsigned DstR = MI->getOperand(0).getReg();
1596  int FI = MI->getOperand(1).getIndex();
1597
1598  // TmpR = L2_loadri_io FI, 0
1599  unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1600  BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
1601    .addFrameIndex(FI)
1602    .addImm(0)
1603    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1604
1605  // DstR = C2_tfrrp TmpR   if DstR is a predicate register
1606  // DstR = A2_tfrrcr TmpR  if DstR is a modifier register
1607  unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
1608                                                 : Hexagon::A2_tfrrcr;
1609  BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
1610    .addReg(TmpR, RegState::Kill);
1611
1612  NewRegs.push_back(TmpR);
1613  B.erase(It);
1614  return true;
1615}
1616
1617bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
1618      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1619      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1620  MachineInstr *MI = &*It;
1621  if (!MI->getOperand(0).isFI())
1622    return false;
1623
1624  DebugLoc DL = MI->getDebugLoc();
1625  unsigned SrcR = MI->getOperand(2).getReg();
1626  bool IsKill = MI->getOperand(2).isKill();
1627  int FI = MI->getOperand(0).getIndex();
1628  auto *RC = &Hexagon::HvxVRRegClass;
1629
1630  // Insert transfer to general vector register.
1631  //   TmpR0 = A2_tfrsi 0x01010101
1632  //   TmpR1 = V6_vandqrt Qx, TmpR0
1633  //   store FI, 0, TmpR1
1634  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1635  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1636
1637  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1638    .addImm(0x01010101);
1639
1640  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
1641    .addReg(SrcR, getKillRegState(IsKill))
1642    .addReg(TmpR0, RegState::Kill);
1643
1644  auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
1645  HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI);
1646  expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
1647
1648  NewRegs.push_back(TmpR0);
1649  NewRegs.push_back(TmpR1);
1650  B.erase(It);
1651  return true;
1652}
1653
1654bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
1655      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1656      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1657  MachineInstr *MI = &*It;
1658  if (!MI->getOperand(1).isFI())
1659    return false;
1660
1661  DebugLoc DL = MI->getDebugLoc();
1662  unsigned DstR = MI->getOperand(0).getReg();
1663  int FI = MI->getOperand(1).getIndex();
1664  auto *RC = &Hexagon::HvxVRRegClass;
1665
1666  // TmpR0 = A2_tfrsi 0x01010101
1667  // TmpR1 = load FI, 0
1668  // DstR = V6_vandvrt TmpR1, TmpR0
1669  unsigned TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
1670  unsigned TmpR1 = MRI.createVirtualRegister(RC);
1671
1672  BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
1673    .addImm(0x01010101);
1674  MachineFunction &MF = *B.getParent();
1675  auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1676  HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI);
1677  expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
1678
1679  BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
1680    .addReg(TmpR1, RegState::Kill)
1681    .addReg(TmpR0, RegState::Kill);
1682
1683  NewRegs.push_back(TmpR0);
1684  NewRegs.push_back(TmpR1);
1685  B.erase(It);
1686  return true;
1687}
1688
1689bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
1690      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1691      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1692  MachineFunction &MF = *B.getParent();
1693  auto &MFI = MF.getFrameInfo();
1694  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1695  MachineInstr *MI = &*It;
1696  if (!MI->getOperand(0).isFI())
1697    return false;
1698
1699  // It is possible that the double vector being stored is only partially
1700  // defined. From the point of view of the liveness tracking, it is ok to
1701  // store it as a whole, but if we break it up we may end up storing a
1702  // register that is entirely undefined.
1703  LivePhysRegs LPR(HRI);
1704  LPR.addLiveIns(B);
1705  SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
1706  for (auto R = B.begin(); R != It; ++R) {
1707    Clobbers.clear();
1708    LPR.stepForward(*R, Clobbers);
1709    // Dead defs are recorded in Clobbers, but are not automatically removed
1710    // from the live set.
1711    for (auto &C : Clobbers)
1712      if (C.second->isReg() && C.second->isDead())
1713        LPR.removeReg(C.first);
1714  }
1715
1716  DebugLoc DL = MI->getDebugLoc();
1717  unsigned SrcR = MI->getOperand(2).getReg();
1718  unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
1719  unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
1720  bool IsKill = MI->getOperand(2).isKill();
1721  int FI = MI->getOperand(0).getIndex();
1722
1723  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1724  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1725  unsigned HasAlign = MFI.getObjectAlignment(FI);
1726  unsigned StoreOpc;
1727
1728  // Store low part.
1729  if (LPR.contains(SrcLo)) {
1730    StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1731                                     : Hexagon::V6_vS32Ub_ai;
1732    BuildMI(B, It, DL, HII.get(StoreOpc))
1733      .addFrameIndex(FI)
1734      .addImm(0)
1735      .addReg(SrcLo, getKillRegState(IsKill))
1736      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1737  }
1738
1739  // Store high part.
1740  if (LPR.contains(SrcHi)) {
1741    StoreOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vS32b_ai
1742                                                     : Hexagon::V6_vS32Ub_ai;
1743    BuildMI(B, It, DL, HII.get(StoreOpc))
1744      .addFrameIndex(FI)
1745      .addImm(Size)
1746      .addReg(SrcHi, getKillRegState(IsKill))
1747      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1748  }
1749
1750  B.erase(It);
1751  return true;
1752}
1753
1754bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
1755      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1756      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1757  MachineFunction &MF = *B.getParent();
1758  auto &MFI = MF.getFrameInfo();
1759  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1760  MachineInstr *MI = &*It;
1761  if (!MI->getOperand(1).isFI())
1762    return false;
1763
1764  DebugLoc DL = MI->getDebugLoc();
1765  unsigned DstR = MI->getOperand(0).getReg();
1766  unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
1767  unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
1768  int FI = MI->getOperand(1).getIndex();
1769
1770  unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
1771  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1772  unsigned HasAlign = MFI.getObjectAlignment(FI);
1773  unsigned LoadOpc;
1774
1775  // Load low part.
1776  LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1777                                  : Hexagon::V6_vL32Ub_ai;
1778  BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
1779    .addFrameIndex(FI)
1780    .addImm(0)
1781    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1782
1783  // Load high part.
1784  LoadOpc = NeedAlign <= MinAlign(HasAlign, Size) ? Hexagon::V6_vL32b_ai
1785                                                  : Hexagon::V6_vL32Ub_ai;
1786  BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
1787    .addFrameIndex(FI)
1788    .addImm(Size)
1789    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1790
1791  B.erase(It);
1792  return true;
1793}
1794
1795bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
1796      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1797      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1798  MachineFunction &MF = *B.getParent();
1799  auto &MFI = MF.getFrameInfo();
1800  MachineInstr *MI = &*It;
1801  if (!MI->getOperand(0).isFI())
1802    return false;
1803
1804  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1805  DebugLoc DL = MI->getDebugLoc();
1806  unsigned SrcR = MI->getOperand(2).getReg();
1807  bool IsKill = MI->getOperand(2).isKill();
1808  int FI = MI->getOperand(0).getIndex();
1809
1810  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1811  unsigned HasAlign = MFI.getObjectAlignment(FI);
1812  unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
1813                                            : Hexagon::V6_vS32Ub_ai;
1814  BuildMI(B, It, DL, HII.get(StoreOpc))
1815    .addFrameIndex(FI)
1816    .addImm(0)
1817    .addReg(SrcR, getKillRegState(IsKill))
1818    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1819
1820  B.erase(It);
1821  return true;
1822}
1823
1824bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
1825      MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
1826      const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
1827  MachineFunction &MF = *B.getParent();
1828  auto &MFI = MF.getFrameInfo();
1829  MachineInstr *MI = &*It;
1830  if (!MI->getOperand(1).isFI())
1831    return false;
1832
1833  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1834  DebugLoc DL = MI->getDebugLoc();
1835  unsigned DstR = MI->getOperand(0).getReg();
1836  int FI = MI->getOperand(1).getIndex();
1837
1838  unsigned NeedAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
1839  unsigned HasAlign = MFI.getObjectAlignment(FI);
1840  unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
1841                                           : Hexagon::V6_vL32Ub_ai;
1842  BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
1843    .addFrameIndex(FI)
1844    .addImm(0)
1845    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
1846
1847  B.erase(It);
1848  return true;
1849}
1850
1851bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
1852      SmallVectorImpl<unsigned> &NewRegs) const {
1853  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
1854  MachineRegisterInfo &MRI = MF.getRegInfo();
1855  bool Changed = false;
1856
1857  for (auto &B : MF) {
1858    // Traverse the basic block.
1859    MachineBasicBlock::iterator NextI;
1860    for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {
1861      MachineInstr *MI = &*I;
1862      NextI = std::next(I);
1863      unsigned Opc = MI->getOpcode();
1864
1865      switch (Opc) {
1866        case TargetOpcode::COPY:
1867          Changed |= expandCopy(B, I, MRI, HII, NewRegs);
1868          break;
1869        case Hexagon::STriw_pred:
1870        case Hexagon::STriw_mod:
1871          Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
1872          break;
1873        case Hexagon::LDriw_pred:
1874        case Hexagon::LDriw_mod:
1875          Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
1876          break;
1877        case Hexagon::PS_vstorerq_ai:
1878          Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
1879          break;
1880        case Hexagon::PS_vloadrq_ai:
1881          Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
1882          break;
1883        case Hexagon::PS_vloadrw_ai:
1884        case Hexagon::PS_vloadrwu_ai:
1885          Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
1886          break;
1887        case Hexagon::PS_vstorerw_ai:
1888        case Hexagon::PS_vstorerwu_ai:
1889          Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
1890          break;
1891      }
1892    }
1893  }
1894
1895  return Changed;
1896}
1897
1898void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
1899                                                BitVector &SavedRegs,
1900                                                RegScavenger *RS) const {
1901  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1902
1903  SavedRegs.resize(HRI.getNumRegs());
1904
1905  // If we have a function containing __builtin_eh_return we want to spill and
1906  // restore all callee saved registers. Pretend that they are used.
1907  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
1908    for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)
1909      SavedRegs.set(*R);
1910
1911  // Replace predicate register pseudo spill code.
1912  SmallVector<unsigned,8> NewRegs;
1913  expandSpillMacros(MF, NewRegs);
1914  if (OptimizeSpillSlots && !isOptNone(MF))
1915    optimizeSpillSlots(MF, NewRegs);
1916
1917  // We need to reserve a a spill slot if scavenging could potentially require
1918  // spilling a scavenged register.
1919  if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
1920    MachineFrameInfo &MFI = MF.getFrameInfo();
1921    MachineRegisterInfo &MRI = MF.getRegInfo();
1922    SetVector<const TargetRegisterClass*> SpillRCs;
1923    // Reserve an int register in any case, because it could be used to hold
1924    // the stack offset in case it does not fit into a spill instruction.
1925    SpillRCs.insert(&Hexagon::IntRegsRegClass);
1926
1927    for (unsigned VR : NewRegs)
1928      SpillRCs.insert(MRI.getRegClass(VR));
1929
1930    for (auto *RC : SpillRCs) {
1931      if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
1932        continue;
1933      unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1;
1934      unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
1935      for (unsigned i = 0; i < Num; i++) {
1936        int NewFI = MFI.CreateSpillStackObject(S, A);
1937        RS->addScavengingFrameIndex(NewFI);
1938      }
1939    }
1940  }
1941
1942  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1943}
1944
1945unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
1946      HexagonBlockRanges::IndexRange &FIR,
1947      HexagonBlockRanges::InstrIndexMap &IndexMap,
1948      HexagonBlockRanges::RegToRangeMap &DeadMap,
1949      const TargetRegisterClass *RC) const {
1950  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1951  auto &MRI = MF.getRegInfo();
1952
1953  auto isDead = [&FIR,&DeadMap] (unsigned Reg) -> bool {
1954    auto F = DeadMap.find({Reg,0});
1955    if (F == DeadMap.end())
1956      return false;
1957    for (auto &DR : F->second)
1958      if (DR.contains(FIR))
1959        return true;
1960    return false;
1961  };
1962
1963  for (unsigned Reg : RC->getRawAllocationOrder(MF)) {
1964    bool Dead = true;
1965    for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
1966      if (isDead(R.Reg))
1967        continue;
1968      Dead = false;
1969      break;
1970    }
1971    if (Dead)
1972      return Reg;
1973  }
1974  return 0;
1975}
1976
1977void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
1978      SmallVectorImpl<unsigned> &VRegs) const {
1979  auto &HST = MF.getSubtarget<HexagonSubtarget>();
1980  auto &HII = *HST.getInstrInfo();
1981  auto &HRI = *HST.getRegisterInfo();
1982  auto &MRI = MF.getRegInfo();
1983  HexagonBlockRanges HBR(MF);
1984
1985  using BlockIndexMap =
1986      std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
1987  using BlockRangeMap =
1988      std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
1989  using IndexType = HexagonBlockRanges::IndexType;
1990
1991  struct SlotInfo {
1992    BlockRangeMap Map;
1993    unsigned Size = 0;
1994    const TargetRegisterClass *RC = nullptr;
1995
1996    SlotInfo() = default;
1997  };
1998
1999  BlockIndexMap BlockIndexes;
2000  SmallSet<int,4> BadFIs;
2001  std::map<int,SlotInfo> FIRangeMap;
2002
2003  // Accumulate register classes: get a common class for a pre-existing
2004  // class HaveRC and a new class NewRC. Return nullptr if a common class
2005  // cannot be found, otherwise return the resulting class. If HaveRC is
2006  // nullptr, assume that it is still unset.
2007  auto getCommonRC =
2008      [](const TargetRegisterClass *HaveRC,
2009         const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
2010    if (HaveRC == nullptr || HaveRC == NewRC)
2011      return NewRC;
2012    // Different classes, both non-null. Pick the more general one.
2013    if (HaveRC->hasSubClassEq(NewRC))
2014      return HaveRC;
2015    if (NewRC->hasSubClassEq(HaveRC))
2016      return NewRC;
2017    return nullptr;
2018  };
2019
2020  // Scan all blocks in the function. Check all occurrences of frame indexes,
2021  // and collect relevant information.
2022  for (auto &B : MF) {
2023    std::map<int,IndexType> LastStore, LastLoad;
2024    // Emplace appears not to be supported in gcc 4.7.2-4.
2025    //auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2026    auto P = BlockIndexes.insert(
2027                std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
2028    auto &IndexMap = P.first->second;
2029    DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
2030                 << IndexMap << '\n');
2031
2032    for (auto &In : B) {
2033      int LFI, SFI;
2034      bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
2035      bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
2036      if (Load && Store) {
2037        // If it's both a load and a store, then we won't handle it.
2038        BadFIs.insert(LFI);
2039        BadFIs.insert(SFI);
2040        continue;
2041      }
2042      // Check for register classes of the register used as the source for
2043      // the store, and the register used as the destination for the load.
2044      // Also, only accept base+imm_offset addressing modes. Other addressing
2045      // modes can have side-effects (post-increments, etc.). For stack
2046      // slots they are very unlikely, so there is not much loss due to
2047      // this restriction.
2048      if (Load || Store) {
2049        int TFI = Load ? LFI : SFI;
2050        unsigned AM = HII.getAddrMode(In);
2051        SlotInfo &SI = FIRangeMap[TFI];
2052        bool Bad = (AM != HexagonII::BaseImmOffset);
2053        if (!Bad) {
2054          // If the addressing mode is ok, check the register class.
2055          unsigned OpNum = Load ? 0 : 2;
2056          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
2057          RC = getCommonRC(SI.RC, RC);
2058          if (RC == nullptr)
2059            Bad = true;
2060          else
2061            SI.RC = RC;
2062        }
2063        if (!Bad) {
2064          // Check sizes.
2065          unsigned S = HII.getMemAccessSize(In);
2066          if (SI.Size != 0 && SI.Size != S)
2067            Bad = true;
2068          else
2069            SI.Size = S;
2070        }
2071        if (!Bad) {
2072          for (auto *Mo : In.memoperands()) {
2073            if (!Mo->isVolatile())
2074              continue;
2075            Bad = true;
2076            break;
2077          }
2078        }
2079        if (Bad)
2080          BadFIs.insert(TFI);
2081      }
2082
2083      // Locate uses of frame indices.
2084      for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
2085        const MachineOperand &Op = In.getOperand(i);
2086        if (!Op.isFI())
2087          continue;
2088        int FI = Op.getIndex();
2089        // Make sure that the following operand is an immediate and that
2090        // it is 0. This is the offset in the stack object.
2091        if (i+1 >= n || !In.getOperand(i+1).isImm() ||
2092            In.getOperand(i+1).getImm() != 0)
2093          BadFIs.insert(FI);
2094        if (BadFIs.count(FI))
2095          continue;
2096
2097        IndexType Index = IndexMap.getIndex(&In);
2098        if (Load) {
2099          if (LastStore[FI] == IndexType::None)
2100            LastStore[FI] = IndexType::Entry;
2101          LastLoad[FI] = Index;
2102        } else if (Store) {
2103          HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2104          if (LastStore[FI] != IndexType::None)
2105            RL.add(LastStore[FI], LastLoad[FI], false, false);
2106          else if (LastLoad[FI] != IndexType::None)
2107            RL.add(IndexType::Entry, LastLoad[FI], false, false);
2108          LastLoad[FI] = IndexType::None;
2109          LastStore[FI] = Index;
2110        } else {
2111          BadFIs.insert(FI);
2112        }
2113      }
2114    }
2115
2116    for (auto &I : LastLoad) {
2117      IndexType LL = I.second;
2118      if (LL == IndexType::None)
2119        continue;
2120      auto &RL = FIRangeMap[I.first].Map[&B];
2121      IndexType &LS = LastStore[I.first];
2122      if (LS != IndexType::None)
2123        RL.add(LS, LL, false, false);
2124      else
2125        RL.add(IndexType::Entry, LL, false, false);
2126      LS = IndexType::None;
2127    }
2128    for (auto &I : LastStore) {
2129      IndexType LS = I.second;
2130      if (LS == IndexType::None)
2131        continue;
2132      auto &RL = FIRangeMap[I.first].Map[&B];
2133      RL.add(LS, IndexType::None, false, false);
2134    }
2135  }
2136
2137  DEBUG({
2138    for (auto &P : FIRangeMap) {
2139      dbgs() << "fi#" << P.first;
2140      if (BadFIs.count(P.first))
2141        dbgs() << " (bad)";
2142      dbgs() << "  RC: ";
2143      if (P.second.RC != nullptr)
2144        dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2145      else
2146        dbgs() << "<null>\n";
2147      for (auto &R : P.second.Map)
2148        dbgs() << "  " << printMBBReference(*R.first) << " { " << R.second
2149               << "}\n";
2150    }
2151  });
2152
2153  // When a slot is loaded from in a block without being stored to in the
2154  // same block, it is live-on-entry to this block. To avoid CFG analysis,
2155  // consider this slot to be live-on-exit from all blocks.
2156  SmallSet<int,4> LoxFIs;
2157
2158  std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2159
2160  for (auto &P : FIRangeMap) {
2161    // P = pair(FI, map: BB->RangeList)
2162    if (BadFIs.count(P.first))
2163      continue;
2164    for (auto &B : MF) {
2165      auto F = P.second.Map.find(&B);
2166      // F = pair(BB, RangeList)
2167      if (F == P.second.Map.end() || F->second.empty())
2168        continue;
2169      HexagonBlockRanges::IndexRange &IR = F->second.front();
2170      if (IR.start() == IndexType::Entry)
2171        LoxFIs.insert(P.first);
2172      BlockFIMap[&B].push_back(P.first);
2173    }
2174  }
2175
2176  DEBUG({
2177    dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2178    for (auto &P : BlockFIMap) {
2179      auto &FIs = P.second;
2180      if (FIs.empty())
2181        continue;
2182      dbgs() << "  " << printMBBReference(*P.first) << ": {";
2183      for (auto I : FIs) {
2184        dbgs() << " fi#" << I;
2185        if (LoxFIs.count(I))
2186          dbgs() << '*';
2187      }
2188      dbgs() << " }\n";
2189    }
2190  });
2191
2192#ifndef NDEBUG
2193  bool HasOptLimit = SpillOptMax.getPosition();
2194#endif
2195
2196  // eliminate loads, when all loads eliminated, eliminate all stores.
2197  for (auto &B : MF) {
2198    auto F = BlockIndexes.find(&B);
2199    assert(F != BlockIndexes.end());
2200    HexagonBlockRanges::InstrIndexMap &IM = F->second;
2201    HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2202    HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2203    DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
2204                 << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2205
2206    for (auto FI : BlockFIMap[&B]) {
2207      if (BadFIs.count(FI))
2208        continue;
2209      DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2210      HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2211      for (auto &Range : RL) {
2212        DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2213        if (!IndexType::isInstr(Range.start()) ||
2214            !IndexType::isInstr(Range.end()))
2215          continue;
2216        MachineInstr &SI = *IM.getInstr(Range.start());
2217        MachineInstr &EI = *IM.getInstr(Range.end());
2218        assert(SI.mayStore() && "Unexpected start instruction");
2219        assert(EI.mayLoad() && "Unexpected end instruction");
2220        MachineOperand &SrcOp = SI.getOperand(2);
2221
2222        HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2223                                                  SrcOp.getSubReg() };
2224        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
2225        // The this-> is needed to unconfuse MSVC.
2226        unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2227        DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) << '\n');
2228        if (FoundR == 0)
2229          continue;
2230#ifndef NDEBUG
2231        if (HasOptLimit) {
2232          if (SpillOptCount >= SpillOptMax)
2233            return;
2234          SpillOptCount++;
2235        }
2236#endif
2237
2238        // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2239        MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2240        MachineInstr *CopyIn = nullptr;
2241        if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
2242          const DebugLoc &DL = SI.getDebugLoc();
2243          CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2244                       .add(SrcOp);
2245        }
2246
2247        ++StartIt;
2248        // Check if this is a last store and the FI is live-on-exit.
2249        if (LoxFIs.count(FI) && (&Range == &RL.back())) {
2250          // Update store's source register.
2251          if (unsigned SR = SrcOp.getSubReg())
2252            SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2253          else
2254            SrcOp.setReg(FoundR);
2255          SrcOp.setSubReg(0);
2256          // We are keeping this register live.
2257          SrcOp.setIsKill(false);
2258        } else {
2259          B.erase(&SI);
2260          IM.replaceInstr(&SI, CopyIn);
2261        }
2262
2263        auto EndIt = std::next(EI.getIterator());
2264        for (auto It = StartIt; It != EndIt; It = NextIt) {
2265          MachineInstr &MI = *It;
2266          NextIt = std::next(It);
2267          int TFI;
2268          if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
2269            continue;
2270          unsigned DstR = MI.getOperand(0).getReg();
2271          assert(MI.getOperand(0).getSubReg() == 0);
2272          MachineInstr *CopyOut = nullptr;
2273          if (DstR != FoundR) {
2274            DebugLoc DL = MI.getDebugLoc();
2275            unsigned MemSize = HII.getMemAccessSize(MI);
2276            assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2277            unsigned CopyOpc = TargetOpcode::COPY;
2278            if (HII.isSignExtendingLoad(MI))
2279              CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2280            else if (HII.isZeroExtendingLoad(MI))
2281              CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2282            CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2283                        .addReg(FoundR, getKillRegState(&MI == &EI));
2284          }
2285          IM.replaceInstr(&MI, CopyOut);
2286          B.erase(It);
2287        }
2288
2289        // Update the dead map.
2290        HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2291        for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2292          DM[RR].subtract(Range);
2293      } // for Range in range list
2294    }
2295  }
2296}
2297
2298void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
2299      const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
2300  MachineBasicBlock &MB = *AI->getParent();
2301  DebugLoc DL = AI->getDebugLoc();
2302  unsigned A = AI->getOperand(2).getImm();
2303
2304  // Have
2305  //    Rd  = alloca Rs, #A
2306  //
2307  // If Rs and Rd are different registers, use this sequence:
2308  //    Rd  = sub(r29, Rs)
2309  //    r29 = sub(r29, Rs)
2310  //    Rd  = and(Rd, #-A)    ; if necessary
2311  //    r29 = and(r29, #-A)   ; if necessary
2312  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2313  // otherwise, do
2314  //    Rd  = sub(r29, Rs)
2315  //    Rd  = and(Rd, #-A)    ; if necessary
2316  //    r29 = Rd
2317  //    Rd  = add(Rd, #CF)    ; CF size aligned to at most A
2318
2319  MachineOperand &RdOp = AI->getOperand(0);
2320  MachineOperand &RsOp = AI->getOperand(1);
2321  unsigned Rd = RdOp.getReg(), Rs = RsOp.getReg();
2322
2323  // Rd = sub(r29, Rs)
2324  BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2325      .addReg(SP)
2326      .addReg(Rs);
2327  if (Rs != Rd) {
2328    // r29 = sub(r29, Rs)
2329    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)
2330        .addReg(SP)
2331        .addReg(Rs);
2332  }
2333  if (A > 8) {
2334    // Rd  = and(Rd, #-A)
2335    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2336        .addReg(Rd)
2337        .addImm(-int64_t(A));
2338    if (Rs != Rd)
2339      BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2340          .addReg(SP)
2341          .addImm(-int64_t(A));
2342  }
2343  if (Rs == Rd) {
2344    // r29 = Rd
2345    BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)
2346        .addReg(Rd);
2347  }
2348  if (CF > 0) {
2349    // Rd = add(Rd, #CF)
2350    BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)
2351        .addReg(Rd)
2352        .addImm(CF);
2353  }
2354}
2355
2356bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
2357  const MachineFrameInfo &MFI = MF.getFrameInfo();
2358  if (!MFI.hasVarSizedObjects())
2359    return false;
2360  unsigned MaxA = MFI.getMaxAlignment();
2361  if (MaxA <= getStackAlignment())
2362    return false;
2363  return true;
2364}
2365
2366const MachineInstr *HexagonFrameLowering::getAlignaInstr(
2367      const MachineFunction &MF) const {
2368  for (auto &B : MF)
2369    for (auto &I : B)
2370      if (I.getOpcode() == Hexagon::PS_aligna)
2371        return &I;
2372  return nullptr;
2373}
2374
2375/// Adds all callee-saved registers as implicit uses or defs to the
2376/// instruction.
2377void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2378      const CSIVect &CSI, bool IsDef, bool IsKill) const {
2379  // Add the callee-saved registers as implicit uses.
2380  for (auto &R : CSI)
2381    MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2382}
2383
2384/// Determine whether the callee-saved register saves and restores should
2385/// be generated via inline code. If this function returns "true", inline
2386/// code will be generated. If this function returns "false", additional
2387/// checks are performed, which may still lead to the inline code.
2388bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2389      const CSIVect &CSI) const {
2390  if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
2391    return true;
2392  if (!hasFP(MF))
2393    return true;
2394  if (!isOptSize(MF) && !isMinSize(MF))
2395    if (MF.getTarget().getOptLevel() > CodeGenOpt::Default)
2396      return true;
2397
2398  // Check if CSI only has double registers, and if the registers form
2399  // a contiguous block starting from D8.
2400  BitVector Regs(Hexagon::NUM_TARGET_REGS);
2401  for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
2402    unsigned R = CSI[i].getReg();
2403    if (!Hexagon::DoubleRegsRegClass.contains(R))
2404      return true;
2405    Regs[R] = true;
2406  }
2407  int F = Regs.find_first();
2408  if (F != Hexagon::D8)
2409    return true;
2410  while (F >= 0) {
2411    int N = Regs.find_next(F);
2412    if (N >= 0 && N != F+1)
2413      return true;
2414    F = N;
2415  }
2416
2417  return false;
2418}
2419
2420bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2421      const CSIVect &CSI) const {
2422  if (shouldInlineCSR(MF, CSI))
2423    return false;
2424  unsigned NumCSI = CSI.size();
2425  if (NumCSI <= 1)
2426    return false;
2427
2428  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
2429                                     : SpillFuncThreshold;
2430  return Threshold < NumCSI;
2431}
2432
2433bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
2434      const CSIVect &CSI) const {
2435  if (shouldInlineCSR(MF, CSI))
2436    return false;
2437  // The restore functions do a bit more than just restoring registers.
2438  // The non-returning versions will go back directly to the caller's
2439  // caller, others will clean up the stack frame in preparation for
2440  // a tail call. Using them can still save code size even if only one
2441  // register is getting restores. Make the decision based on -Oz:
2442  // using -Os will use inline restore for a single register.
2443  if (isMinSize(MF))
2444    return true;
2445  unsigned NumCSI = CSI.size();
2446  if (NumCSI <= 1)
2447    return false;
2448
2449  unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
2450                                     : SpillFuncThreshold;
2451  return Threshold < NumCSI;
2452}
2453
2454bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
2455  unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
2456  auto &HST = MF.getSubtarget<HexagonSubtarget>();
2457  // A fairly simplistic guess as to whether a potential load/store to a
2458  // stack location could require an extra register.
2459  if (HST.useHVXOps() && StackSize > 256)
2460    return true;
2461
2462  // Check if the function has store-immediate instructions that access
2463  // the stack. Since the offset field is not extendable, if the stack
2464  // size exceeds the offset limit (6 bits, shifted), the stores will
2465  // require a new base register.
2466  bool HasImmStack = false;
2467  unsigned MinLS = ~0u;   // Log_2 of the memory access size.
2468
2469  for (const MachineBasicBlock &B : MF) {
2470    for (const MachineInstr &MI : B) {
2471      unsigned LS = 0;
2472      switch (MI.getOpcode()) {
2473        case Hexagon::S4_storeirit_io:
2474        case Hexagon::S4_storeirif_io:
2475        case Hexagon::S4_storeiri_io:
2476          ++LS;
2477          LLVM_FALLTHROUGH;
2478        case Hexagon::S4_storeirht_io:
2479        case Hexagon::S4_storeirhf_io:
2480        case Hexagon::S4_storeirh_io:
2481          ++LS;
2482          LLVM_FALLTHROUGH;
2483        case Hexagon::S4_storeirbt_io:
2484        case Hexagon::S4_storeirbf_io:
2485        case Hexagon::S4_storeirb_io:
2486          if (MI.getOperand(0).isFI())
2487            HasImmStack = true;
2488          MinLS = std::min(MinLS, LS);
2489          break;
2490      }
2491    }
2492  }
2493
2494  if (HasImmStack)
2495    return !isUInt<6>(StackSize >> MinLS);
2496
2497  return false;
2498}
2499