X86FrameLowering.cpp revision 363496
1//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86FrameLowering.h"
14#include "X86InstrBuilder.h"
15#include "X86InstrInfo.h"
16#include "X86MachineFunctionInfo.h"
17#include "X86Subtarget.h"
18#include "X86TargetMachine.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/Analysis/EHPersonalities.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineModuleInfo.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/WinEHFuncInfo.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Function.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCSymbol.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Target/TargetOptions.h"
33#include <cstdlib>
34
35using namespace llvm;
36
37X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
38                                   MaybeAlign StackAlignOverride)
39    : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
40                          STI.is64Bit() ? -8 : -4),
41      STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
42  // Cache a bunch of frame-related predicates for this subtarget.
43  SlotSize = TRI->getSlotSize();
44  Is64Bit = STI.is64Bit();
45  IsLP64 = STI.isTarget64BitLP64();
46  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
47  Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
48  StackPtr = TRI->getStackRegister();
49}
50
51bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
52  return !MF.getFrameInfo().hasVarSizedObjects() &&
53         !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
54}
55
56/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
57/// call frame pseudos can be simplified.  Having a FP, as in the default
58/// implementation, is not sufficient here since we can't always use it.
59/// Use a more nuanced condition.
60bool
61X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
62  return hasReservedCallFrame(MF) ||
63         (hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
64         TRI->hasBasePointer(MF);
65}
66
67// needsFrameIndexResolution - Do we need to perform FI resolution for
68// this function. Normally, this is required only when the function
69// has any stack objects. However, FI resolution actually has another job,
70// not apparent from the title - it resolves callframesetup/destroy
71// that were not simplified earlier.
72// So, this is required for x86 functions that have push sequences even
73// when there are no stack objects.
74bool
75X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
76  return MF.getFrameInfo().hasStackObjects() ||
77         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
78}
79
80/// hasFP - Return true if the specified function should have a dedicated frame
81/// pointer register.  This is true if the function has variable sized allocas
82/// or if frame pointer elimination is disabled.
83bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
84  const MachineFrameInfo &MFI = MF.getFrameInfo();
85  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
86          TRI->needsStackRealignment(MF) ||
87          MFI.hasVarSizedObjects() ||
88          MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
89          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
90          MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
91          MFI.hasStackMap() || MFI.hasPatchPoint() ||
92          MFI.hasCopyImplyingStackAdjustment());
93}
94
95static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
96  if (IsLP64) {
97    if (isInt<8>(Imm))
98      return X86::SUB64ri8;
99    return X86::SUB64ri32;
100  } else {
101    if (isInt<8>(Imm))
102      return X86::SUB32ri8;
103    return X86::SUB32ri;
104  }
105}
106
107static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
108  if (IsLP64) {
109    if (isInt<8>(Imm))
110      return X86::ADD64ri8;
111    return X86::ADD64ri32;
112  } else {
113    if (isInt<8>(Imm))
114      return X86::ADD32ri8;
115    return X86::ADD32ri;
116  }
117}
118
119static unsigned getSUBrrOpcode(bool IsLP64) {
120  return IsLP64 ? X86::SUB64rr : X86::SUB32rr;
121}
122
123static unsigned getADDrrOpcode(bool IsLP64) {
124  return IsLP64 ? X86::ADD64rr : X86::ADD32rr;
125}
126
127static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
128  if (IsLP64) {
129    if (isInt<8>(Imm))
130      return X86::AND64ri8;
131    return X86::AND64ri32;
132  }
133  if (isInt<8>(Imm))
134    return X86::AND32ri8;
135  return X86::AND32ri;
136}
137
138static unsigned getLEArOpcode(bool IsLP64) {
139  return IsLP64 ? X86::LEA64r : X86::LEA32r;
140}
141
142/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
143/// when it reaches the "return" instruction. We can then pop a stack object
144/// to this register without worry about clobbering it.
145static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
146                                       MachineBasicBlock::iterator &MBBI,
147                                       const X86RegisterInfo *TRI,
148                                       bool Is64Bit) {
149  const MachineFunction *MF = MBB.getParent();
150  if (MF->callsEHReturn())
151    return 0;
152
153  const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
154
155  if (MBBI == MBB.end())
156    return 0;
157
158  switch (MBBI->getOpcode()) {
159  default: return 0;
160  case TargetOpcode::PATCHABLE_RET:
161  case X86::RET:
162  case X86::RETL:
163  case X86::RETQ:
164  case X86::RETIL:
165  case X86::RETIQ:
166  case X86::TCRETURNdi:
167  case X86::TCRETURNri:
168  case X86::TCRETURNmi:
169  case X86::TCRETURNdi64:
170  case X86::TCRETURNri64:
171  case X86::TCRETURNmi64:
172  case X86::EH_RETURN:
173  case X86::EH_RETURN64: {
174    SmallSet<uint16_t, 8> Uses;
175    for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
176      MachineOperand &MO = MBBI->getOperand(i);
177      if (!MO.isReg() || MO.isDef())
178        continue;
179      Register Reg = MO.getReg();
180      if (!Reg)
181        continue;
182      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
183        Uses.insert(*AI);
184    }
185
186    for (auto CS : AvailableRegs)
187      if (!Uses.count(CS) && CS != X86::RIP && CS != X86::RSP &&
188          CS != X86::ESP)
189        return CS;
190  }
191  }
192
193  return 0;
194}
195
196static bool isEAXLiveIn(MachineBasicBlock &MBB) {
197  for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) {
198    unsigned Reg = RegMask.PhysReg;
199
200    if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
201        Reg == X86::AH || Reg == X86::AL)
202      return true;
203  }
204
205  return false;
206}
207
208/// Check if the flags need to be preserved before the terminators.
209/// This would be the case, if the eflags is live-in of the region
210/// composed by the terminators or live-out of that region, without
211/// being defined by a terminator.
212static bool
213flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
214  for (const MachineInstr &MI : MBB.terminators()) {
215    bool BreakNext = false;
216    for (const MachineOperand &MO : MI.operands()) {
217      if (!MO.isReg())
218        continue;
219      Register Reg = MO.getReg();
220      if (Reg != X86::EFLAGS)
221        continue;
222
223      // This terminator needs an eflags that is not defined
224      // by a previous another terminator:
225      // EFLAGS is live-in of the region composed by the terminators.
226      if (!MO.isDef())
227        return true;
228      // This terminator defines the eflags, i.e., we don't need to preserve it.
229      // However, we still need to check this specific terminator does not
230      // read a live-in value.
231      BreakNext = true;
232    }
233    // We found a definition of the eflags, no need to preserve them.
234    if (BreakNext)
235      return false;
236  }
237
238  // None of the terminators use or define the eflags.
239  // Check if they are live-out, that would imply we need to preserve them.
240  for (const MachineBasicBlock *Succ : MBB.successors())
241    if (Succ->isLiveIn(X86::EFLAGS))
242      return true;
243
244  return false;
245}
246
247/// emitSPUpdate - Emit a series of instructions to increment / decrement the
248/// stack pointer by a constant value.
249void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
250                                    MachineBasicBlock::iterator &MBBI,
251                                    const DebugLoc &DL,
252                                    int64_t NumBytes, bool InEpilogue) const {
253  bool isSub = NumBytes < 0;
254  uint64_t Offset = isSub ? -NumBytes : NumBytes;
255  MachineInstr::MIFlag Flag =
256      isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
257
258  uint64_t Chunk = (1LL << 31) - 1;
259
260  if (Offset > Chunk) {
261    // Rather than emit a long series of instructions for large offsets,
262    // load the offset into a register and do one sub/add
263    unsigned Reg = 0;
264    unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
265
266    if (isSub && !isEAXLiveIn(MBB))
267      Reg = Rax;
268    else
269      Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
270
271    unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
272    unsigned AddSubRROpc =
273        isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
274    if (Reg) {
275      BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg)
276          .addImm(Offset)
277          .setMIFlag(Flag);
278      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
279                             .addReg(StackPtr)
280                             .addReg(Reg);
281      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
282      return;
283    } else if (Offset > 8 * Chunk) {
284      // If we would need more than 8 add or sub instructions (a >16GB stack
285      // frame), it's worth spilling RAX to materialize this immediate.
286      //   pushq %rax
287      //   movabsq +-$Offset+-SlotSize, %rax
288      //   addq %rsp, %rax
289      //   xchg %rax, (%rsp)
290      //   movq (%rsp), %rsp
291      assert(Is64Bit && "can't have 32-bit 16GB stack frame");
292      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
293          .addReg(Rax, RegState::Kill)
294          .setMIFlag(Flag);
295      // Subtract is not commutative, so negate the offset and always use add.
296      // Subtract 8 less and add 8 more to account for the PUSH we just did.
297      if (isSub)
298        Offset = -(Offset - SlotSize);
299      else
300        Offset = Offset + SlotSize;
301      BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax)
302          .addImm(Offset)
303          .setMIFlag(Flag);
304      MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
305                             .addReg(Rax)
306                             .addReg(StackPtr);
307      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
308      // Exchange the new SP in RAX with the top of the stack.
309      addRegOffset(
310          BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
311          StackPtr, false, 0);
312      // Load new SP from the top of the stack into RSP.
313      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
314                   StackPtr, false, 0);
315      return;
316    }
317  }
318
319  while (Offset) {
320    uint64_t ThisVal = std::min(Offset, Chunk);
321    if (ThisVal == SlotSize) {
322      // Use push / pop for slot sized adjustments as a size optimization. We
323      // need to find a dead register when using pop.
324      unsigned Reg = isSub
325        ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
326        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
327      if (Reg) {
328        unsigned Opc = isSub
329          ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
330          : (Is64Bit ? X86::POP64r  : X86::POP32r);
331        BuildMI(MBB, MBBI, DL, TII.get(Opc))
332            .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
333            .setMIFlag(Flag);
334        Offset -= ThisVal;
335        continue;
336      }
337    }
338
339    BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
340        .setMIFlag(Flag);
341
342    Offset -= ThisVal;
343  }
344}
345
346MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
347    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
348    const DebugLoc &DL, int64_t Offset, bool InEpilogue) const {
349  assert(Offset != 0 && "zero offset stack adjustment requested");
350
351  // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
352  // is tricky.
353  bool UseLEA;
354  if (!InEpilogue) {
355    // Check if inserting the prologue at the beginning
356    // of MBB would require to use LEA operations.
357    // We need to use LEA operations if EFLAGS is live in, because
358    // it means an instruction will read it before it gets defined.
359    UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
360  } else {
361    // If we can use LEA for SP but we shouldn't, check that none
362    // of the terminators uses the eflags. Otherwise we will insert
363    // a ADD that will redefine the eflags and break the condition.
364    // Alternatively, we could move the ADD, but this may not be possible
365    // and is an optimization anyway.
366    UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
367    if (UseLEA && !STI.useLeaForSP())
368      UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
369    // If that assert breaks, that means we do not do the right thing
370    // in canUseAsEpilogue.
371    assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
372           "We shouldn't have allowed this insertion point");
373  }
374
375  MachineInstrBuilder MI;
376  if (UseLEA) {
377    MI = addRegOffset(BuildMI(MBB, MBBI, DL,
378                              TII.get(getLEArOpcode(Uses64BitFramePtr)),
379                              StackPtr),
380                      StackPtr, false, Offset);
381  } else {
382    bool IsSub = Offset < 0;
383    uint64_t AbsOffset = IsSub ? -Offset : Offset;
384    unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
385                         : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
386    MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
387             .addReg(StackPtr)
388             .addImm(AbsOffset);
389    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
390  }
391  return MI;
392}
393
394int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
395                                     MachineBasicBlock::iterator &MBBI,
396                                     bool doMergeWithPrevious) const {
397  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
398      (!doMergeWithPrevious && MBBI == MBB.end()))
399    return 0;
400
401  MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
402
403  PI = skipDebugInstructionsBackward(PI, MBB.begin());
404  // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
405  // instruction, and that there are no DBG_VALUE or other instructions between
406  // ADD/SUB/LEA and its corresponding CFI instruction.
407  /* TODO: Add support for the case where there are multiple CFI instructions
408    below the ADD/SUB/LEA, e.g.:
409    ...
410    add
411    cfi_def_cfa_offset
412    cfi_offset
413    ...
414  */
415  if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
416    PI = std::prev(PI);
417
418  unsigned Opc = PI->getOpcode();
419  int Offset = 0;
420
421  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
422       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
423      PI->getOperand(0).getReg() == StackPtr){
424    assert(PI->getOperand(1).getReg() == StackPtr);
425    Offset = PI->getOperand(2).getImm();
426  } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
427             PI->getOperand(0).getReg() == StackPtr &&
428             PI->getOperand(1).getReg() == StackPtr &&
429             PI->getOperand(2).getImm() == 1 &&
430             PI->getOperand(3).getReg() == X86::NoRegister &&
431             PI->getOperand(5).getReg() == X86::NoRegister) {
432    // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
433    Offset = PI->getOperand(4).getImm();
434  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
435              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
436             PI->getOperand(0).getReg() == StackPtr) {
437    assert(PI->getOperand(1).getReg() == StackPtr);
438    Offset = -PI->getOperand(2).getImm();
439  } else
440    return 0;
441
442  PI = MBB.erase(PI);
443  if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI);
444  if (!doMergeWithPrevious)
445    MBBI = skipDebugInstructionsForward(PI, MBB.end());
446
447  return Offset;
448}
449
450void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
451                                MachineBasicBlock::iterator MBBI,
452                                const DebugLoc &DL,
453                                const MCCFIInstruction &CFIInst) const {
454  MachineFunction &MF = *MBB.getParent();
455  unsigned CFIIndex = MF.addFrameInst(CFIInst);
456  BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
457      .addCFIIndex(CFIIndex);
458}
459
460void X86FrameLowering::emitCalleeSavedFrameMoves(
461    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
462    const DebugLoc &DL) const {
463  MachineFunction &MF = *MBB.getParent();
464  MachineFrameInfo &MFI = MF.getFrameInfo();
465  MachineModuleInfo &MMI = MF.getMMI();
466  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
467
468  // Add callee saved registers to move list.
469  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
470  if (CSI.empty()) return;
471
472  // Calculate offsets.
473  for (std::vector<CalleeSavedInfo>::const_iterator
474         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
475    int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
476    unsigned Reg = I->getReg();
477
478    unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
479    BuildCFI(MBB, MBBI, DL,
480             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
481  }
482}
483
484void X86FrameLowering::emitStackProbe(MachineFunction &MF,
485                                      MachineBasicBlock &MBB,
486                                      MachineBasicBlock::iterator MBBI,
487                                      const DebugLoc &DL, bool InProlog) const {
488  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
489  if (STI.isTargetWindowsCoreCLR()) {
490    if (InProlog) {
491      emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
492    } else {
493      emitStackProbeInline(MF, MBB, MBBI, DL, false);
494    }
495  } else {
496    emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
497  }
498}
499
500void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
501                                        MachineBasicBlock &PrologMBB) const {
502  const StringRef ChkStkStubSymbol = "__chkstk_stub";
503  MachineInstr *ChkStkStub = nullptr;
504
505  for (MachineInstr &MI : PrologMBB) {
506    if (MI.isCall() && MI.getOperand(0).isSymbol() &&
507        ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) {
508      ChkStkStub = &MI;
509      break;
510    }
511  }
512
513  if (ChkStkStub != nullptr) {
514    assert(!ChkStkStub->isBundled() &&
515           "Not expecting bundled instructions here");
516    MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
517    assert(std::prev(MBBI) == ChkStkStub &&
518           "MBBI expected after __chkstk_stub.");
519    DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
520    emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
521    ChkStkStub->eraseFromParent();
522  }
523}
524
525void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
526                                            MachineBasicBlock &MBB,
527                                            MachineBasicBlock::iterator MBBI,
528                                            const DebugLoc &DL,
529                                            bool InProlog) const {
530  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
531  assert(STI.is64Bit() && "different expansion needed for 32 bit");
532  assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
533  const TargetInstrInfo &TII = *STI.getInstrInfo();
534  const BasicBlock *LLVM_BB = MBB.getBasicBlock();
535
536  // RAX contains the number of bytes of desired stack adjustment.
537  // The handling here assumes this value has already been updated so as to
538  // maintain stack alignment.
539  //
540  // We need to exit with RSP modified by this amount and execute suitable
541  // page touches to notify the OS that we're growing the stack responsibly.
542  // All stack probing must be done without modifying RSP.
543  //
544  // MBB:
545  //    SizeReg = RAX;
546  //    ZeroReg = 0
547  //    CopyReg = RSP
548  //    Flags, TestReg = CopyReg - SizeReg
549  //    FinalReg = !Flags.Ovf ? TestReg : ZeroReg
550  //    LimitReg = gs magic thread env access
551  //    if FinalReg >= LimitReg goto ContinueMBB
552  // RoundBB:
553  //    RoundReg = page address of FinalReg
554  // LoopMBB:
555  //    LoopReg = PHI(LimitReg,ProbeReg)
556  //    ProbeReg = LoopReg - PageSize
557  //    [ProbeReg] = 0
558  //    if (ProbeReg > RoundReg) goto LoopMBB
559  // ContinueMBB:
560  //    RSP = RSP - RAX
561  //    [rest of original MBB]
562
563  // Set up the new basic blocks
564  MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
565  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
566  MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
567
568  MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
569  MF.insert(MBBIter, RoundMBB);
570  MF.insert(MBBIter, LoopMBB);
571  MF.insert(MBBIter, ContinueMBB);
572
573  // Split MBB and move the tail portion down to ContinueMBB.
574  MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
575  ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
576  ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
577
578  // Some useful constants
579  const int64_t ThreadEnvironmentStackLimit = 0x10;
580  const int64_t PageSize = 0x1000;
581  const int64_t PageMask = ~(PageSize - 1);
582
583  // Registers we need. For the normal case we use virtual
584  // registers. For the prolog expansion we use RAX, RCX and RDX.
585  MachineRegisterInfo &MRI = MF.getRegInfo();
586  const TargetRegisterClass *RegClass = &X86::GR64RegClass;
587  const Register SizeReg = InProlog ? X86::RAX
588                                    : MRI.createVirtualRegister(RegClass),
589                 ZeroReg = InProlog ? X86::RCX
590                                    : MRI.createVirtualRegister(RegClass),
591                 CopyReg = InProlog ? X86::RDX
592                                    : MRI.createVirtualRegister(RegClass),
593                 TestReg = InProlog ? X86::RDX
594                                    : MRI.createVirtualRegister(RegClass),
595                 FinalReg = InProlog ? X86::RDX
596                                     : MRI.createVirtualRegister(RegClass),
597                 RoundedReg = InProlog ? X86::RDX
598                                       : MRI.createVirtualRegister(RegClass),
599                 LimitReg = InProlog ? X86::RCX
600                                     : MRI.createVirtualRegister(RegClass),
601                 JoinReg = InProlog ? X86::RCX
602                                    : MRI.createVirtualRegister(RegClass),
603                 ProbeReg = InProlog ? X86::RCX
604                                     : MRI.createVirtualRegister(RegClass);
605
606  // SP-relative offsets where we can save RCX and RDX.
607  int64_t RCXShadowSlot = 0;
608  int64_t RDXShadowSlot = 0;
609
610  // If inlining in the prolog, save RCX and RDX.
611  if (InProlog) {
612    // Compute the offsets. We need to account for things already
613    // pushed onto the stack at this point: return address, frame
614    // pointer (if used), and callee saves.
615    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
616    const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
617    const bool HasFP = hasFP(MF);
618
619    // Check if we need to spill RCX and/or RDX.
620    // Here we assume that no earlier prologue instruction changes RCX and/or
621    // RDX, so checking the block live-ins is enough.
622    const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX);
623    const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX);
624    int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
625    // Assign the initial slot to both registers, then change RDX's slot if both
626    // need to be spilled.
627    if (IsRCXLiveIn)
628      RCXShadowSlot = InitSlot;
629    if (IsRDXLiveIn)
630      RDXShadowSlot = InitSlot;
631    if (IsRDXLiveIn && IsRCXLiveIn)
632      RDXShadowSlot += 8;
633    // Emit the saves if needed.
634    if (IsRCXLiveIn)
635      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
636                   RCXShadowSlot)
637          .addReg(X86::RCX);
638    if (IsRDXLiveIn)
639      addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
640                   RDXShadowSlot)
641          .addReg(X86::RDX);
642  } else {
643    // Not in the prolog. Copy RAX to a virtual reg.
644    BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
645  }
646
647  // Add code to MBB to check for overflow and set the new target stack pointer
648  // to zero if so.
649  BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
650      .addReg(ZeroReg, RegState::Undef)
651      .addReg(ZeroReg, RegState::Undef);
652  BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
653  BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
654      .addReg(CopyReg)
655      .addReg(SizeReg);
656  BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg)
657      .addReg(TestReg)
658      .addReg(ZeroReg)
659      .addImm(X86::COND_B);
660
661  // FinalReg now holds final stack pointer value, or zero if
662  // allocation would overflow. Compare against the current stack
663  // limit from the thread environment block. Note this limit is the
664  // lowest touched page on the stack, not the point at which the OS
665  // will cause an overflow exception, so this is just an optimization
666  // to avoid unnecessarily touching pages that are below the current
667  // SP but already committed to the stack by the OS.
668  BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
669      .addReg(0)
670      .addImm(1)
671      .addReg(0)
672      .addImm(ThreadEnvironmentStackLimit)
673      .addReg(X86::GS);
674  BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
675  // Jump if the desired stack pointer is at or above the stack limit.
676  BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE);
677
678  // Add code to roundMBB to round the final stack pointer to a page boundary.
679  RoundMBB->addLiveIn(FinalReg);
680  BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
681      .addReg(FinalReg)
682      .addImm(PageMask);
683  BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
684
685  // LimitReg now holds the current stack limit, RoundedReg page-rounded
686  // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
687  // and probe until we reach RoundedReg.
688  if (!InProlog) {
689    BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
690        .addReg(LimitReg)
691        .addMBB(RoundMBB)
692        .addReg(ProbeReg)
693        .addMBB(LoopMBB);
694  }
695
696  LoopMBB->addLiveIn(JoinReg);
697  addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
698               false, -PageSize);
699
700  // Probe by storing a byte onto the stack.
701  BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
702      .addReg(ProbeReg)
703      .addImm(1)
704      .addReg(0)
705      .addImm(0)
706      .addReg(0)
707      .addImm(0);
708
709  LoopMBB->addLiveIn(RoundedReg);
710  BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
711      .addReg(RoundedReg)
712      .addReg(ProbeReg);
713  BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE);
714
715  MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
716
717  // If in prolog, restore RDX and RCX.
718  if (InProlog) {
719    if (RCXShadowSlot) // It means we spilled RCX in the prologue.
720      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
721                           TII.get(X86::MOV64rm), X86::RCX),
722                   X86::RSP, false, RCXShadowSlot);
723    if (RDXShadowSlot) // It means we spilled RDX in the prologue.
724      addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL,
725                           TII.get(X86::MOV64rm), X86::RDX),
726                   X86::RSP, false, RDXShadowSlot);
727  }
728
729  // Now that the probing is done, add code to continueMBB to update
730  // the stack pointer for real.
731  ContinueMBB->addLiveIn(SizeReg);
732  BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
733      .addReg(X86::RSP)
734      .addReg(SizeReg);
735
736  // Add the control flow edges we need.
737  MBB.addSuccessor(ContinueMBB);
738  MBB.addSuccessor(RoundMBB);
739  RoundMBB->addSuccessor(LoopMBB);
740  LoopMBB->addSuccessor(ContinueMBB);
741  LoopMBB->addSuccessor(LoopMBB);
742
743  // Mark all the instructions added to the prolog as frame setup.
744  if (InProlog) {
745    for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
746      BeforeMBBI->setFlag(MachineInstr::FrameSetup);
747    }
748    for (MachineInstr &MI : *RoundMBB) {
749      MI.setFlag(MachineInstr::FrameSetup);
750    }
751    for (MachineInstr &MI : *LoopMBB) {
752      MI.setFlag(MachineInstr::FrameSetup);
753    }
754    for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
755         CMBBI != ContinueMBBI; ++CMBBI) {
756      CMBBI->setFlag(MachineInstr::FrameSetup);
757    }
758  }
759}
760
761void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
762                                          MachineBasicBlock &MBB,
763                                          MachineBasicBlock::iterator MBBI,
764                                          const DebugLoc &DL,
765                                          bool InProlog) const {
766  bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
767
768  // FIXME: Add indirect thunk support and remove this.
769  if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
770    report_fatal_error("Emitting stack probe calls on 64-bit with the large "
771                       "code model and indirect thunks not yet implemented.");
772
773  unsigned CallOp;
774  if (Is64Bit)
775    CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
776  else
777    CallOp = X86::CALLpcrel32;
778
779  StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF);
780
781  MachineInstrBuilder CI;
782  MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
783
784  // All current stack probes take AX and SP as input, clobber flags, and
785  // preserve all registers. x86_64 probes leave RSP unmodified.
786  if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
787    // For the large code model, we have to call through a register. Use R11,
788    // as it is scratch in all supported calling conventions.
789    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
790        .addExternalSymbol(MF.createExternalSymbolName(Symbol));
791    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
792  } else {
793    CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp))
794        .addExternalSymbol(MF.createExternalSymbolName(Symbol));
795  }
796
797  unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX;
798  unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP;
799  CI.addReg(AX, RegState::Implicit)
800      .addReg(SP, RegState::Implicit)
801      .addReg(AX, RegState::Define | RegState::Implicit)
802      .addReg(SP, RegState::Define | RegState::Implicit)
803      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
804
805  if (STI.isTargetWin64() || !STI.isOSWindows()) {
806    // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
807    // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
808    // themselves. They also does not clobber %rax so we can reuse it when
809    // adjusting %rsp.
810    // All other platforms do not specify a particular ABI for the stack probe
811    // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
812    BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
813        .addReg(SP)
814        .addReg(AX);
815  }
816
817  if (InProlog) {
818    // Apply the frame setup flag to all inserted instrs.
819    for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
820      ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
821  }
822}
823
824void X86FrameLowering::emitStackProbeInlineStub(
825    MachineFunction &MF, MachineBasicBlock &MBB,
826    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
827
828  assert(InProlog && "ChkStkStub called outside prolog!");
829
830  BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
831      .addExternalSymbol("__chkstk_stub");
832}
833
834static unsigned calculateSetFPREG(uint64_t SPAdjust) {
835  // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
836  // and might require smaller successive adjustments.
837  const uint64_t Win64MaxSEHOffset = 128;
838  uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
839  // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
840  return SEHFrameOffset & -16;
841}
842
843// If we're forcing a stack realignment we can't rely on just the frame
844// info, we need to know the ABI stack alignment as well in case we
845// have a call out.  Otherwise just make sure we have some alignment - we'll
846// go with the minimum SlotSize.
847uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
848  const MachineFrameInfo &MFI = MF.getFrameInfo();
849  uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment.
850  unsigned StackAlign = getStackAlignment();
851  if (MF.getFunction().hasFnAttribute("stackrealign")) {
852    if (MFI.hasCalls())
853      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
854    else if (MaxAlign < SlotSize)
855      MaxAlign = SlotSize;
856  }
857  return MaxAlign;
858}
859
860void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
861                                          MachineBasicBlock::iterator MBBI,
862                                          const DebugLoc &DL, unsigned Reg,
863                                          uint64_t MaxAlign) const {
864  uint64_t Val = -MaxAlign;
865  unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
866  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
867                         .addReg(Reg)
868                         .addImm(Val)
869                         .setMIFlag(MachineInstr::FrameSetup);
870
871  // The EFLAGS implicit def is dead.
872  MI->getOperand(3).setIsDead();
873}
874
875bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
876  // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
877  // clobbered by any interrupt handler.
878  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
879         "MF used frame lowering for wrong subtarget");
880  const Function &Fn = MF.getFunction();
881  const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv());
882  return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone);
883}
884
885
886/// emitPrologue - Push callee-saved registers onto the stack, which
887/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
888/// space for local variables. Also emit labels used by the exception handler to
889/// generate the exception handling frames.
890
891/*
892  Here's a gist of what gets emitted:
893
894  ; Establish frame pointer, if needed
895  [if needs FP]
896      push  %rbp
897      .cfi_def_cfa_offset 16
898      .cfi_offset %rbp, -16
899      .seh_pushreg %rpb
900      mov  %rsp, %rbp
901      .cfi_def_cfa_register %rbp
902
903  ; Spill general-purpose registers
904  [for all callee-saved GPRs]
905      pushq %<reg>
906      [if not needs FP]
907         .cfi_def_cfa_offset (offset from RETADDR)
908      .seh_pushreg %<reg>
909
910  ; If the required stack alignment > default stack alignment
911  ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
912  ; of unknown size in the stack frame.
913  [if stack needs re-alignment]
914      and  $MASK, %rsp
915
916  ; Allocate space for locals
917  [if target is Windows and allocated space > 4096 bytes]
918      ; Windows needs special care for allocations larger
919      ; than one page.
920      mov $NNN, %rax
921      call ___chkstk_ms/___chkstk
922      sub  %rax, %rsp
923  [else]
924      sub  $NNN, %rsp
925
926  [if needs FP]
927      .seh_stackalloc (size of XMM spill slots)
928      .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
929  [else]
930      .seh_stackalloc NNN
931
932  ; Spill XMMs
933  ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
934  ; they may get spilled on any platform, if the current function
935  ; calls @llvm.eh.unwind.init
936  [if needs FP]
937      [for all callee-saved XMM registers]
938          movaps  %<xmm reg>, -MMM(%rbp)
939      [for all callee-saved XMM registers]
940          .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
941              ; i.e. the offset relative to (%rbp - SEHFrameOffset)
942  [else]
943      [for all callee-saved XMM registers]
944          movaps  %<xmm reg>, KKK(%rsp)
945      [for all callee-saved XMM registers]
946          .seh_savexmm %<xmm reg>, KKK
947
948  .seh_endprologue
949
950  [if needs base pointer]
951      mov  %rsp, %rbx
952      [if needs to restore base pointer]
953          mov %rsp, -MMM(%rbp)
954
955  ; Emit CFI info
956  [if needs FP]
957      [for all callee-saved registers]
958          .cfi_offset %<reg>, (offset from %rbp)
959  [else]
960       .cfi_def_cfa_offset (offset from RETADDR)
961      [for all callee-saved registers]
962          .cfi_offset %<reg>, (offset from %rsp)
963
964  Notes:
965  - .seh directives are emitted only for Windows 64 ABI
966  - .cv_fpo directives are emitted on win32 when emitting CodeView
967  - .cfi directives are emitted for all other ABIs
968  - for 32-bit code, substitute %e?? registers for %r??
969*/
970
971void X86FrameLowering::emitPrologue(MachineFunction &MF,
972                                    MachineBasicBlock &MBB) const {
973  assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
974         "MF used frame lowering for wrong subtarget");
975  MachineBasicBlock::iterator MBBI = MBB.begin();
976  MachineFrameInfo &MFI = MF.getFrameInfo();
977  const Function &Fn = MF.getFunction();
978  MachineModuleInfo &MMI = MF.getMMI();
979  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
980  uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
981  uint64_t StackSize = MFI.getStackSize();    // Number of bytes to allocate.
982  bool IsFunclet = MBB.isEHFuncletEntry();
983  EHPersonality Personality = EHPersonality::Unknown;
984  if (Fn.hasPersonalityFn())
985    Personality = classifyEHPersonality(Fn.getPersonalityFn());
986  bool FnHasClrFunclet =
987      MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
988  bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
989  bool HasFP = hasFP(MF);
990  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
991  bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry();
992  // FIXME: Emit FPO data for EH funclets.
993  bool NeedsWinFPO =
994      !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag();
995  bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO;
996  bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves();
997  Register FramePtr = TRI->getFrameRegister(MF);
998  const Register MachineFramePtr =
999      STI.isTarget64BitILP32()
1000          ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
1001  Register BasePtr = TRI->getBaseRegister();
1002  bool HasWinCFI = false;
1003
1004  // Debug location must be unknown since the first debug location is used
1005  // to determine the end of the prologue.
1006  DebugLoc DL;
1007
1008  // Add RETADDR move area to callee saved frame size.
1009  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1010  if (TailCallReturnAddrDelta && IsWin64Prologue)
1011    report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1012
1013  if (TailCallReturnAddrDelta < 0)
1014    X86FI->setCalleeSavedFrameSize(
1015      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
1016
1017  bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
1018  unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
1019
1020  // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1021  // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1022  // stack alignment.
1023  if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
1024      Fn.arg_size() == 2) {
1025    StackSize += 8;
1026    MFI.setStackSize(StackSize);
1027    emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
1028  }
1029
1030  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1031  // function, and use up to 128 bytes of stack space, don't have a frame
1032  // pointer, calls, or dynamic alloca then we do not need to adjust the
1033  // stack pointer (we fit in the Red Zone). We also check that we don't
1034  // push and pop from the stack.
1035  if (has128ByteRedZone(MF) &&
1036      !TRI->needsStackRealignment(MF) &&
1037      !MFI.hasVarSizedObjects() &&             // No dynamic alloca.
1038      !MFI.adjustsStack() &&                   // No calls.
1039      !UseStackProbe &&                        // No stack probes.
1040      !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1041      !MF.shouldSplitStack()) {                // Regular stack
1042    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
1043    if (HasFP) MinSize += SlotSize;
1044    X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
1045    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
1046    MFI.setStackSize(StackSize);
1047  }
1048
1049  // Insert stack pointer adjustment for later moving of return addr.  Only
1050  // applies to tail call optimized functions where the callee argument stack
1051  // size is bigger than the callers.
1052  if (TailCallReturnAddrDelta < 0) {
1053    BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
1054                         /*InEpilogue=*/false)
1055        .setMIFlag(MachineInstr::FrameSetup);
1056  }
1057
1058  // Mapping for machine moves:
1059  //
1060  //   DST: VirtualFP AND
1061  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
1062  //        ELSE                        => DW_CFA_def_cfa
1063  //
1064  //   SRC: VirtualFP AND
1065  //        DST: Register               => DW_CFA_def_cfa_register
1066  //
1067  //   ELSE
1068  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
1069  //        REG < 64                    => DW_CFA_offset + Reg
1070  //        ELSE                        => DW_CFA_offset_extended
1071
1072  uint64_t NumBytes = 0;
1073  int stackGrowth = -SlotSize;
1074
1075  // Find the funclet establisher parameter
1076  Register Establisher = X86::NoRegister;
1077  if (IsClrFunclet)
1078    Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
1079  else if (IsFunclet)
1080    Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
1081
1082  if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
1083    // Immediately spill establisher into the home slot.
1084    // The runtime cares about this.
1085    // MOV64mr %rdx, 16(%rsp)
1086    unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1087    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
1088        .addReg(Establisher)
1089        .setMIFlag(MachineInstr::FrameSetup);
1090    MBB.addLiveIn(Establisher);
1091  }
1092
1093  if (HasFP) {
1094    assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved");
1095
1096    // Calculate required stack adjustment.
1097    uint64_t FrameSize = StackSize - SlotSize;
1098    // If required, include space for extra hidden slot for stashing base pointer.
1099    if (X86FI->getRestoreBasePointer())
1100      FrameSize += SlotSize;
1101
1102    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
1103
1104    // Callee-saved registers are pushed on stack before the stack is realigned.
1105    if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
1106      NumBytes = alignTo(NumBytes, MaxAlign);
1107
1108    // Save EBP/RBP into the appropriate stack slot.
1109    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
1110      .addReg(MachineFramePtr, RegState::Kill)
1111      .setMIFlag(MachineInstr::FrameSetup);
1112
1113    if (NeedsDwarfCFI) {
1114      // Mark the place where EBP/RBP was saved.
1115      // Define the current CFA rule to use the provided offset.
1116      assert(StackSize);
1117      BuildCFI(MBB, MBBI, DL,
1118               MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
1119
1120      // Change the rule for the FramePtr to be an "offset" rule.
1121      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1122      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset(
1123                                  nullptr, DwarfFramePtr, 2 * stackGrowth));
1124    }
1125
1126    if (NeedsWinCFI) {
1127      HasWinCFI = true;
1128      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1129          .addImm(FramePtr)
1130          .setMIFlag(MachineInstr::FrameSetup);
1131    }
1132
1133    if (!IsWin64Prologue && !IsFunclet) {
1134      // Update EBP with the new base value.
1135      BuildMI(MBB, MBBI, DL,
1136              TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1137              FramePtr)
1138          .addReg(StackPtr)
1139          .setMIFlag(MachineInstr::FrameSetup);
1140
1141      if (NeedsDwarfCFI) {
1142        // Mark effective beginning of when frame pointer becomes valid.
1143        // Define the current CFA to use the EBP/RBP register.
1144        unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
1145        BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
1146                                    nullptr, DwarfFramePtr));
1147      }
1148
1149      if (NeedsWinFPO) {
1150        // .cv_fpo_setframe $FramePtr
1151        HasWinCFI = true;
1152        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1153            .addImm(FramePtr)
1154            .addImm(0)
1155            .setMIFlag(MachineInstr::FrameSetup);
1156      }
1157    }
1158  } else {
1159    assert(!IsFunclet && "funclets without FPs not yet implemented");
1160    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
1161  }
1162
1163  // Update the offset adjustment, which is mainly used by codeview to translate
1164  // from ESP to VFRAME relative local variable offsets.
1165  if (!IsFunclet) {
1166    if (HasFP && TRI->needsStackRealignment(MF))
1167      MFI.setOffsetAdjustment(-NumBytes);
1168    else
1169      MFI.setOffsetAdjustment(-StackSize);
1170  }
1171
1172  // For EH funclets, only allocate enough space for outgoing calls. Save the
1173  // NumBytes value that we would've used for the parent frame.
1174  unsigned ParentFrameNumBytes = NumBytes;
1175  if (IsFunclet)
1176    NumBytes = getWinEHFuncletFrameSize(MF);
1177
1178  // Skip the callee-saved push instructions.
1179  bool PushedRegs = false;
1180  int StackOffset = 2 * stackGrowth;
1181
1182  while (MBBI != MBB.end() &&
1183         MBBI->getFlag(MachineInstr::FrameSetup) &&
1184         (MBBI->getOpcode() == X86::PUSH32r ||
1185          MBBI->getOpcode() == X86::PUSH64r)) {
1186    PushedRegs = true;
1187    Register Reg = MBBI->getOperand(0).getReg();
1188    ++MBBI;
1189
1190    if (!HasFP && NeedsDwarfCFI) {
1191      // Mark callee-saved push instruction.
1192      // Define the current CFA rule to use the provided offset.
1193      assert(StackSize);
1194      BuildCFI(MBB, MBBI, DL,
1195               MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
1196      StackOffset += stackGrowth;
1197    }
1198
1199    if (NeedsWinCFI) {
1200      HasWinCFI = true;
1201      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
1202          .addImm(Reg)
1203          .setMIFlag(MachineInstr::FrameSetup);
1204    }
1205  }
1206
1207  // Realign stack after we pushed callee-saved registers (so that we'll be
1208  // able to calculate their offsets from the frame pointer).
1209  // Don't do this for Win64, it needs to realign the stack after the prologue.
1210  if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
1211    assert(HasFP && "There should be a frame pointer if stack is realigned.");
1212    BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
1213
1214    if (NeedsWinCFI) {
1215      HasWinCFI = true;
1216      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign))
1217          .addImm(MaxAlign)
1218          .setMIFlag(MachineInstr::FrameSetup);
1219    }
1220  }
1221
1222  // If there is an SUB32ri of ESP immediately before this instruction, merge
1223  // the two. This can be the case when tail call elimination is enabled and
1224  // the callee has more arguments then the caller.
1225  NumBytes -= mergeSPUpdates(MBB, MBBI, true);
1226
1227  // Adjust stack pointer: ESP -= numbytes.
1228
1229  // Windows and cygwin/mingw require a prologue helper routine when allocating
1230  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
1231  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
1232  // stack and adjust the stack pointer in one go.  The 64-bit version of
1233  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
1234  // responsible for adjusting the stack pointer.  Touching the stack at 4K
1235  // increments is necessary to ensure that the guard pages used by the OS
1236  // virtual memory manager are allocated in correct sequence.
1237  uint64_t AlignedNumBytes = NumBytes;
1238  if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
1239    AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign);
1240  if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
1241    assert(!X86FI->getUsesRedZone() &&
1242           "The Red Zone is not accounted for in stack probes");
1243
1244    // Check whether EAX is livein for this block.
1245    bool isEAXAlive = isEAXLiveIn(MBB);
1246
1247    if (isEAXAlive) {
1248      if (Is64Bit) {
1249        // Save RAX
1250        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1251          .addReg(X86::RAX, RegState::Kill)
1252          .setMIFlag(MachineInstr::FrameSetup);
1253      } else {
1254        // Save EAX
1255        BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
1256          .addReg(X86::EAX, RegState::Kill)
1257          .setMIFlag(MachineInstr::FrameSetup);
1258      }
1259    }
1260
1261    if (Is64Bit) {
1262      // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1263      // Function prologue is responsible for adjusting the stack pointer.
1264      int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes;
1265      if (isUInt<32>(Alloc)) {
1266        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1267            .addImm(Alloc)
1268            .setMIFlag(MachineInstr::FrameSetup);
1269      } else if (isInt<32>(Alloc)) {
1270        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
1271            .addImm(Alloc)
1272            .setMIFlag(MachineInstr::FrameSetup);
1273      } else {
1274        BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
1275            .addImm(Alloc)
1276            .setMIFlag(MachineInstr::FrameSetup);
1277      }
1278    } else {
1279      // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1280      // We'll also use 4 already allocated bytes for EAX.
1281      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
1282          .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
1283          .setMIFlag(MachineInstr::FrameSetup);
1284    }
1285
1286    // Call __chkstk, __chkstk_ms, or __alloca.
1287    emitStackProbe(MF, MBB, MBBI, DL, true);
1288
1289    if (isEAXAlive) {
1290      // Restore RAX/EAX
1291      MachineInstr *MI;
1292      if (Is64Bit)
1293        MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX),
1294                          StackPtr, false, NumBytes - 8);
1295      else
1296        MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
1297                          StackPtr, false, NumBytes - 4);
1298      MI->setFlag(MachineInstr::FrameSetup);
1299      MBB.insert(MBBI, MI);
1300    }
1301  } else if (NumBytes) {
1302    emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false);
1303  }
1304
1305  if (NeedsWinCFI && NumBytes) {
1306    HasWinCFI = true;
1307    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
1308        .addImm(NumBytes)
1309        .setMIFlag(MachineInstr::FrameSetup);
1310  }
1311
1312  int SEHFrameOffset = 0;
1313  unsigned SPOrEstablisher;
1314  if (IsFunclet) {
1315    if (IsClrFunclet) {
1316      // The establisher parameter passed to a CLR funclet is actually a pointer
1317      // to the (mostly empty) frame of its nearest enclosing funclet; we have
1318      // to find the root function establisher frame by loading the PSPSym from
1319      // the intermediate frame.
1320      unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1321      MachinePointerInfo NoInfo;
1322      MBB.addLiveIn(Establisher);
1323      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
1324                   Establisher, false, PSPSlotOffset)
1325          .addMemOperand(MF.getMachineMemOperand(
1326              NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
1327      ;
1328      // Save the root establisher back into the current funclet's (mostly
1329      // empty) frame, in case a sub-funclet or the GC needs it.
1330      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
1331                   false, PSPSlotOffset)
1332          .addReg(Establisher)
1333          .addMemOperand(
1334              MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
1335                                                  MachineMemOperand::MOVolatile,
1336                                      SlotSize, SlotSize));
1337    }
1338    SPOrEstablisher = Establisher;
1339  } else {
1340    SPOrEstablisher = StackPtr;
1341  }
1342
1343  if (IsWin64Prologue && HasFP) {
1344    // Set RBP to a small fixed offset from RSP. In the funclet case, we base
1345    // this calculation on the incoming establisher, which holds the value of
1346    // RSP from the parent frame at the end of the prologue.
1347    SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
1348    if (SEHFrameOffset)
1349      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
1350                   SPOrEstablisher, false, SEHFrameOffset);
1351    else
1352      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
1353          .addReg(SPOrEstablisher);
1354
1355    // If this is not a funclet, emit the CFI describing our frame pointer.
1356    if (NeedsWinCFI && !IsFunclet) {
1357      assert(!NeedsWinFPO && "this setframe incompatible with FPO data");
1358      HasWinCFI = true;
1359      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
1360          .addImm(FramePtr)
1361          .addImm(SEHFrameOffset)
1362          .setMIFlag(MachineInstr::FrameSetup);
1363      if (isAsynchronousEHPersonality(Personality))
1364        MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
1365    }
1366  } else if (IsFunclet && STI.is32Bit()) {
1367    // Reset EBP / ESI to something good for funclets.
1368    MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
1369    // If we're a catch funclet, we can be returned to via catchret. Save ESP
1370    // into the registration node so that the runtime will restore it for us.
1371    if (!MBB.isCleanupFuncletEntry()) {
1372      assert(Personality == EHPersonality::MSVC_CXX);
1373      unsigned FrameReg;
1374      int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
1375      int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
1376      // ESP is the first field, so no extra displacement is needed.
1377      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
1378                   false, EHRegOffset)
1379          .addReg(X86::ESP);
1380    }
1381  }
1382
1383  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
1384    const MachineInstr &FrameInstr = *MBBI;
1385    ++MBBI;
1386
1387    if (NeedsWinCFI) {
1388      int FI;
1389      if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
1390        if (X86::FR64RegClass.contains(Reg)) {
1391          int Offset;
1392          unsigned IgnoredFrameReg;
1393          if (IsWin64Prologue && IsFunclet)
1394            Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg);
1395          else
1396            Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg) +
1397                     SEHFrameOffset;
1398
1399          HasWinCFI = true;
1400          assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data");
1401          BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
1402              .addImm(Reg)
1403              .addImm(Offset)
1404              .setMIFlag(MachineInstr::FrameSetup);
1405        }
1406      }
1407    }
1408  }
1409
1410  if (NeedsWinCFI && HasWinCFI)
1411    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
1412        .setMIFlag(MachineInstr::FrameSetup);
1413
1414  if (FnHasClrFunclet && !IsFunclet) {
1415    // Save the so-called Initial-SP (i.e. the value of the stack pointer
1416    // immediately after the prolog)  into the PSPSlot so that funclets
1417    // and the GC can recover it.
1418    unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
1419    auto PSPInfo = MachinePointerInfo::getFixedStack(
1420        MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
1421    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
1422                 PSPSlotOffset)
1423        .addReg(StackPtr)
1424        .addMemOperand(MF.getMachineMemOperand(
1425            PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
1426            SlotSize, SlotSize));
1427  }
1428
1429  // Realign stack after we spilled callee-saved registers (so that we'll be
1430  // able to calculate their offsets from the frame pointer).
1431  // Win64 requires aligning the stack after the prologue.
1432  if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
1433    assert(HasFP && "There should be a frame pointer if stack is realigned.");
1434    BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
1435  }
1436
1437  // We already dealt with stack realignment and funclets above.
1438  if (IsFunclet && STI.is32Bit())
1439    return;
1440
1441  // If we need a base pointer, set it up here. It's whatever the value
1442  // of the stack pointer is at this point. Any variable size objects
1443  // will be allocated after this, so we can still use the base pointer
1444  // to reference locals.
1445  if (TRI->hasBasePointer(MF)) {
1446    // Update the base pointer with the current stack pointer.
1447    unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
1448    BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
1449      .addReg(SPOrEstablisher)
1450      .setMIFlag(MachineInstr::FrameSetup);
1451    if (X86FI->getRestoreBasePointer()) {
1452      // Stash value of base pointer.  Saving RSP instead of EBP shortens
1453      // dependence chain. Used by SjLj EH.
1454      unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1455      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
1456                   FramePtr, true, X86FI->getRestoreBasePointerOffset())
1457        .addReg(SPOrEstablisher)
1458        .setMIFlag(MachineInstr::FrameSetup);
1459    }
1460
1461    if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
1462      // Stash the value of the frame pointer relative to the base pointer for
1463      // Win32 EH. This supports Win32 EH, which does the inverse of the above:
1464      // it recovers the frame pointer from the base pointer rather than the
1465      // other way around.
1466      unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
1467      unsigned UsedReg;
1468      int Offset =
1469          getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
1470      assert(UsedReg == BasePtr);
1471      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
1472          .addReg(FramePtr)
1473          .setMIFlag(MachineInstr::FrameSetup);
1474    }
1475  }
1476
1477  if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
1478    // Mark end of stack pointer adjustment.
1479    if (!HasFP && NumBytes) {
1480      // Define the current CFA rule to use the provided offset.
1481      assert(StackSize);
1482      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
1483                                  nullptr, -StackSize + stackGrowth));
1484    }
1485
1486    // Emit DWARF info specifying the offsets of the callee-saved registers.
1487    emitCalleeSavedFrameMoves(MBB, MBBI, DL);
1488  }
1489
1490  // X86 Interrupt handling function cannot assume anything about the direction
1491  // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
1492  // in each prologue of interrupt handler function.
1493  //
1494  // FIXME: Create "cld" instruction only in these cases:
1495  // 1. The interrupt handling function uses any of the "rep" instructions.
1496  // 2. Interrupt handling function calls another function.
1497  //
1498  if (Fn.getCallingConv() == CallingConv::X86_INTR)
1499    BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
1500        .setMIFlag(MachineInstr::FrameSetup);
1501
1502  // At this point we know if the function has WinCFI or not.
1503  MF.setHasWinCFI(HasWinCFI);
1504}
1505
1506bool X86FrameLowering::canUseLEAForSPInEpilogue(
1507    const MachineFunction &MF) const {
1508  // We can't use LEA instructions for adjusting the stack pointer if we don't
1509  // have a frame pointer in the Win64 ABI.  Only ADD instructions may be used
1510  // to deallocate the stack.
1511  // This means that we can use LEA for SP in two situations:
1512  // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
1513  // 2. We *have* a frame pointer which means we are permitted to use LEA.
1514  return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
1515}
1516
1517static bool isFuncletReturnInstr(MachineInstr &MI) {
1518  switch (MI.getOpcode()) {
1519  case X86::CATCHRET:
1520  case X86::CLEANUPRET:
1521    return true;
1522  default:
1523    return false;
1524  }
1525  llvm_unreachable("impossible");
1526}
1527
1528// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
1529// stack. It holds a pointer to the bottom of the root function frame.  The
1530// establisher frame pointer passed to a nested funclet may point to the
1531// (mostly empty) frame of its parent funclet, but it will need to find
1532// the frame of the root function to access locals.  To facilitate this,
1533// every funclet copies the pointer to the bottom of the root function
1534// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
1535// same offset for the PSPSym in the root function frame that's used in the
1536// funclets' frames allows each funclet to dynamically accept any ancestor
1537// frame as its establisher argument (the runtime doesn't guarantee the
1538// immediate parent for some reason lost to history), and also allows the GC,
1539// which uses the PSPSym for some bookkeeping, to find it in any funclet's
1540// frame with only a single offset reported for the entire method.
1541unsigned
1542X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
1543  const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
1544  unsigned SPReg;
1545  int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg,
1546                                              /*IgnoreSPUpdates*/ true);
1547  assert(Offset >= 0 && SPReg == TRI->getStackRegister());
1548  return static_cast<unsigned>(Offset);
1549}
1550
1551unsigned
1552X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
1553  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1554  // This is the size of the pushed CSRs.
1555  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1556  // This is the size of callee saved XMMs.
1557  const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
1558  unsigned XMMSize = WinEHXMMSlotInfo.size() *
1559                     TRI->getSpillSize(X86::VR128RegClass);
1560  // This is the amount of stack a funclet needs to allocate.
1561  unsigned UsedSize;
1562  EHPersonality Personality =
1563      classifyEHPersonality(MF.getFunction().getPersonalityFn());
1564  if (Personality == EHPersonality::CoreCLR) {
1565    // CLR funclets need to hold enough space to include the PSPSym, at the
1566    // same offset from the stack pointer (immediately after the prolog) as it
1567    // resides at in the main function.
1568    UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
1569  } else {
1570    // Other funclets just need enough stack for outgoing call arguments.
1571    UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
1572  }
1573  // RBP is not included in the callee saved register block. After pushing RBP,
1574  // everything is 16 byte aligned. Everything we allocate before an outgoing
1575  // call must also be 16 byte aligned.
1576  unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlignment());
1577  // Subtract out the size of the callee saved registers. This is how much stack
1578  // each funclet will allocate.
1579  return FrameSizeMinusRBP + XMMSize - CSSize;
1580}
1581
1582static bool isTailCallOpcode(unsigned Opc) {
1583    return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
1584        Opc == X86::TCRETURNmi ||
1585        Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 ||
1586        Opc == X86::TCRETURNmi64;
1587}
1588
1589void X86FrameLowering::emitEpilogue(MachineFunction &MF,
1590                                    MachineBasicBlock &MBB) const {
1591  const MachineFrameInfo &MFI = MF.getFrameInfo();
1592  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1593  MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator();
1594  MachineBasicBlock::iterator MBBI = Terminator;
1595  DebugLoc DL;
1596  if (MBBI != MBB.end())
1597    DL = MBBI->getDebugLoc();
1598  // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
1599  const bool Is64BitILP32 = STI.isTarget64BitILP32();
1600  Register FramePtr = TRI->getFrameRegister(MF);
1601  unsigned MachineFramePtr =
1602      Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr;
1603
1604  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1605  bool NeedsWin64CFI =
1606      IsWin64Prologue && MF.getFunction().needsUnwindTableEntry();
1607  bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
1608
1609  // Get the number of bytes to allocate from the FrameInfo.
1610  uint64_t StackSize = MFI.getStackSize();
1611  uint64_t MaxAlign = calculateMaxStackAlign(MF);
1612  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1613  bool HasFP = hasFP(MF);
1614  uint64_t NumBytes = 0;
1615
1616  bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() &&
1617                        !MF.getTarget().getTargetTriple().isOSWindows()) &&
1618                       MF.needsFrameMoves();
1619
1620  if (IsFunclet) {
1621    assert(HasFP && "EH funclets without FP not yet implemented");
1622    NumBytes = getWinEHFuncletFrameSize(MF);
1623  } else if (HasFP) {
1624    // Calculate required stack adjustment.
1625    uint64_t FrameSize = StackSize - SlotSize;
1626    NumBytes = FrameSize - CSSize;
1627
1628    // Callee-saved registers were pushed on stack before the stack was
1629    // realigned.
1630    if (TRI->needsStackRealignment(MF) && !IsWin64Prologue)
1631      NumBytes = alignTo(FrameSize, MaxAlign);
1632  } else {
1633    NumBytes = StackSize - CSSize;
1634  }
1635  uint64_t SEHStackAllocAmt = NumBytes;
1636
1637  if (HasFP) {
1638    // Pop EBP.
1639    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
1640            MachineFramePtr)
1641        .setMIFlag(MachineInstr::FrameDestroy);
1642    if (NeedsDwarfCFI) {
1643      unsigned DwarfStackPtr =
1644          TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
1645      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
1646                                  nullptr, DwarfStackPtr, -SlotSize));
1647      --MBBI;
1648    }
1649  }
1650
1651  MachineBasicBlock::iterator FirstCSPop = MBBI;
1652  // Skip the callee-saved pop instructions.
1653  while (MBBI != MBB.begin()) {
1654    MachineBasicBlock::iterator PI = std::prev(MBBI);
1655    unsigned Opc = PI->getOpcode();
1656
1657    if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
1658      if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
1659          (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)))
1660        break;
1661      FirstCSPop = PI;
1662    }
1663
1664    --MBBI;
1665  }
1666  MBBI = FirstCSPop;
1667
1668  if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
1669    emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator);
1670
1671  if (MBBI != MBB.end())
1672    DL = MBBI->getDebugLoc();
1673
1674  // If there is an ADD32ri or SUB32ri of ESP immediately before this
1675  // instruction, merge the two instructions.
1676  if (NumBytes || MFI.hasVarSizedObjects())
1677    NumBytes += mergeSPUpdates(MBB, MBBI, true);
1678
1679  // If dynamic alloca is used, then reset esp to point to the last callee-saved
1680  // slot before popping them off! Same applies for the case, when stack was
1681  // realigned. Don't do this if this was a funclet epilogue, since the funclets
1682  // will not do realignment or dynamic stack allocation.
1683  if ((TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) &&
1684      !IsFunclet) {
1685    if (TRI->needsStackRealignment(MF))
1686      MBBI = FirstCSPop;
1687    unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
1688    uint64_t LEAAmount =
1689        IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
1690
1691    // There are only two legal forms of epilogue:
1692    // - add SEHAllocationSize, %rsp
1693    // - lea SEHAllocationSize(%FramePtr), %rsp
1694    //
1695    // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
1696    // However, we may use this sequence if we have a frame pointer because the
1697    // effects of the prologue can safely be undone.
1698    if (LEAAmount != 0) {
1699      unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
1700      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
1701                   FramePtr, false, LEAAmount);
1702      --MBBI;
1703    } else {
1704      unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
1705      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
1706        .addReg(FramePtr);
1707      --MBBI;
1708    }
1709  } else if (NumBytes) {
1710    // Adjust stack pointer back: ESP += numbytes.
1711    emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
1712    if (!hasFP(MF) && NeedsDwarfCFI) {
1713      // Define the current CFA rule to use the provided offset.
1714      BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaOffset(
1715                                  nullptr, -CSSize - SlotSize));
1716    }
1717    --MBBI;
1718  }
1719
1720  // Windows unwinder will not invoke function's exception handler if IP is
1721  // either in prologue or in epilogue.  This behavior causes a problem when a
1722  // call immediately precedes an epilogue, because the return address points
1723  // into the epilogue.  To cope with that, we insert an epilogue marker here,
1724  // then replace it with a 'nop' if it ends up immediately after a CALL in the
1725  // final emitted code.
1726  if (NeedsWin64CFI && MF.hasWinCFI())
1727    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
1728
1729  if (!hasFP(MF) && NeedsDwarfCFI) {
1730    MBBI = FirstCSPop;
1731    int64_t Offset = -CSSize - SlotSize;
1732    // Mark callee-saved pop instruction.
1733    // Define the current CFA rule to use the provided offset.
1734    while (MBBI != MBB.end()) {
1735      MachineBasicBlock::iterator PI = MBBI;
1736      unsigned Opc = PI->getOpcode();
1737      ++MBBI;
1738      if (Opc == X86::POP32r || Opc == X86::POP64r) {
1739        Offset += SlotSize;
1740        BuildCFI(MBB, MBBI, DL,
1741                 MCCFIInstruction::createDefCfaOffset(nullptr, Offset));
1742      }
1743    }
1744  }
1745
1746  if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
1747    // Add the return addr area delta back since we are not tail calling.
1748    int Offset = -1 * X86FI->getTCReturnAddrDelta();
1749    assert(Offset >= 0 && "TCDelta should never be positive");
1750    if (Offset) {
1751      // Check for possible merge with preceding ADD instruction.
1752      Offset += mergeSPUpdates(MBB, Terminator, true);
1753      emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true);
1754    }
1755  }
1756}
1757
1758int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
1759                                             unsigned &FrameReg) const {
1760  const MachineFrameInfo &MFI = MF.getFrameInfo();
1761
1762  bool IsFixed = MFI.isFixedObjectIndex(FI);
1763  // We can't calculate offset from frame pointer if the stack is realigned,
1764  // so enforce usage of stack/base pointer.  The base pointer is used when we
1765  // have dynamic allocas in addition to dynamic realignment.
1766  if (TRI->hasBasePointer(MF))
1767    FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister();
1768  else if (TRI->needsStackRealignment(MF))
1769    FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister();
1770  else
1771    FrameReg = TRI->getFrameRegister(MF);
1772
1773  // Offset will hold the offset from the stack pointer at function entry to the
1774  // object.
1775  // We need to factor in additional offsets applied during the prologue to the
1776  // frame, base, and stack pointer depending on which is used.
1777  int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
1778  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1779  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
1780  uint64_t StackSize = MFI.getStackSize();
1781  bool HasFP = hasFP(MF);
1782  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
1783  int64_t FPDelta = 0;
1784
1785  // In an x86 interrupt, remove the offset we added to account for the return
1786  // address from any stack object allocated in the caller's frame. Interrupts
1787  // do not have a standard return address. Fixed objects in the current frame,
1788  // such as SSE register spills, should not get this treatment.
1789  if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
1790      Offset >= 0) {
1791    Offset += getOffsetOfLocalArea();
1792  }
1793
1794  if (IsWin64Prologue) {
1795    assert(!MFI.hasCalls() || (StackSize % 16) == 8);
1796
1797    // Calculate required stack adjustment.
1798    uint64_t FrameSize = StackSize - SlotSize;
1799    // If required, include space for extra hidden slot for stashing base pointer.
1800    if (X86FI->getRestoreBasePointer())
1801      FrameSize += SlotSize;
1802    uint64_t NumBytes = FrameSize - CSSize;
1803
1804    uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
1805    if (FI && FI == X86FI->getFAIndex())
1806      return -SEHFrameOffset;
1807
1808    // FPDelta is the offset from the "traditional" FP location of the old base
1809    // pointer followed by return address and the location required by the
1810    // restricted Win64 prologue.
1811    // Add FPDelta to all offsets below that go through the frame pointer.
1812    FPDelta = FrameSize - SEHFrameOffset;
1813    assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
1814           "FPDelta isn't aligned per the Win64 ABI!");
1815  }
1816
1817
1818  if (TRI->hasBasePointer(MF)) {
1819    assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
1820    if (FI < 0) {
1821      // Skip the saved EBP.
1822      return Offset + SlotSize + FPDelta;
1823    } else {
1824      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
1825      return Offset + StackSize;
1826    }
1827  } else if (TRI->needsStackRealignment(MF)) {
1828    if (FI < 0) {
1829      // Skip the saved EBP.
1830      return Offset + SlotSize + FPDelta;
1831    } else {
1832      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
1833      return Offset + StackSize;
1834    }
1835    // FIXME: Support tail calls
1836  } else {
1837    if (!HasFP)
1838      return Offset + StackSize;
1839
1840    // Skip the saved EBP.
1841    Offset += SlotSize;
1842
1843    // Skip the RETADDR move area
1844    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1845    if (TailCallReturnAddrDelta < 0)
1846      Offset -= TailCallReturnAddrDelta;
1847  }
1848
1849  return Offset + FPDelta;
1850}
1851
1852int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF,
1853                                              int FI, unsigned &FrameReg) const {
1854  const MachineFrameInfo &MFI = MF.getFrameInfo();
1855  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1856  const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
1857  const auto it = WinEHXMMSlotInfo.find(FI);
1858
1859  if (it == WinEHXMMSlotInfo.end())
1860    return getFrameIndexReference(MF, FI, FrameReg);
1861
1862  FrameReg = TRI->getStackRegister();
1863  return alignDown(MFI.getMaxCallFrameSize(), getStackAlignment()) + it->second;
1864}
1865
1866int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
1867                                               int FI, unsigned &FrameReg,
1868                                               int Adjustment) const {
1869  const MachineFrameInfo &MFI = MF.getFrameInfo();
1870  FrameReg = TRI->getStackRegister();
1871  return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
1872}
1873
1874int
1875X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
1876                                                 int FI, unsigned &FrameReg,
1877                                                 bool IgnoreSPUpdates) const {
1878
1879  const MachineFrameInfo &MFI = MF.getFrameInfo();
1880  // Does not include any dynamic realign.
1881  const uint64_t StackSize = MFI.getStackSize();
1882  // LLVM arranges the stack as follows:
1883  //   ...
1884  //   ARG2
1885  //   ARG1
1886  //   RETADDR
1887  //   PUSH RBP   <-- RBP points here
1888  //   PUSH CSRs
1889  //   ~~~~~~~    <-- possible stack realignment (non-win64)
1890  //   ...
1891  //   STACK OBJECTS
1892  //   ...        <-- RSP after prologue points here
1893  //   ~~~~~~~    <-- possible stack realignment (win64)
1894  //
1895  // if (hasVarSizedObjects()):
1896  //   ...        <-- "base pointer" (ESI/RBX) points here
1897  //   DYNAMIC ALLOCAS
1898  //   ...        <-- RSP points here
1899  //
1900  // Case 1: In the simple case of no stack realignment and no dynamic
1901  // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
1902  // with fixed offsets from RSP.
1903  //
1904  // Case 2: In the case of stack realignment with no dynamic allocas, fixed
1905  // stack objects are addressed with RBP and regular stack objects with RSP.
1906  //
1907  // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
1908  // to address stack arguments for outgoing calls and nothing else. The "base
1909  // pointer" points to local variables, and RBP points to fixed objects.
1910  //
1911  // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
1912  // answer we give is relative to the SP after the prologue, and not the
1913  // SP in the middle of the function.
1914
1915  if (MFI.isFixedObjectIndex(FI) && TRI->needsStackRealignment(MF) &&
1916      !STI.isTargetWin64())
1917    return getFrameIndexReference(MF, FI, FrameReg);
1918
1919  // If !hasReservedCallFrame the function might have SP adjustement in the
1920  // body.  So, even though the offset is statically known, it depends on where
1921  // we are in the function.
1922  if (!IgnoreSPUpdates && !hasReservedCallFrame(MF))
1923    return getFrameIndexReference(MF, FI, FrameReg);
1924
1925  // We don't handle tail calls, and shouldn't be seeing them either.
1926  assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
1927         "we don't handle this case!");
1928
1929  // This is how the math works out:
1930  //
1931  //  %rsp grows (i.e. gets lower) left to right. Each box below is
1932  //  one word (eight bytes).  Obj0 is the stack slot we're trying to
1933  //  get to.
1934  //
1935  //    ----------------------------------
1936  //    | BP | Obj0 | Obj1 | ... | ObjN |
1937  //    ----------------------------------
1938  //    ^    ^      ^                   ^
1939  //    A    B      C                   E
1940  //
1941  // A is the incoming stack pointer.
1942  // (B - A) is the local area offset (-8 for x86-64) [1]
1943  // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
1944  //
1945  // |(E - B)| is the StackSize (absolute value, positive).  For a
1946  // stack that grown down, this works out to be (B - E). [3]
1947  //
1948  // E is also the value of %rsp after stack has been set up, and we
1949  // want (C - E) -- the value we can add to %rsp to get to Obj0.  Now
1950  // (C - E) == (C - A) - (B - A) + (B - E)
1951  //            { Using [1], [2] and [3] above }
1952  //         == getObjectOffset - LocalAreaOffset + StackSize
1953
1954  return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
1955}
1956
1957bool X86FrameLowering::assignCalleeSavedSpillSlots(
1958    MachineFunction &MF, const TargetRegisterInfo *TRI,
1959    std::vector<CalleeSavedInfo> &CSI) const {
1960  MachineFrameInfo &MFI = MF.getFrameInfo();
1961  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
1962
1963  unsigned CalleeSavedFrameSize = 0;
1964  unsigned XMMCalleeSavedFrameSize = 0;
1965  auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo();
1966  int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
1967
1968  int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
1969
1970  if (TailCallReturnAddrDelta < 0) {
1971    // create RETURNADDR area
1972    //   arg
1973    //   arg
1974    //   RETADDR
1975    //   { ...
1976    //     RETADDR area
1977    //     ...
1978    //   }
1979    //   [EBP]
1980    MFI.CreateFixedObject(-TailCallReturnAddrDelta,
1981                           TailCallReturnAddrDelta - SlotSize, true);
1982  }
1983
1984  // Spill the BasePtr if it's used.
1985  if (this->TRI->hasBasePointer(MF)) {
1986    // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
1987    if (MF.hasEHFunclets()) {
1988      int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize);
1989      X86FI->setHasSEHFramePtrSave(true);
1990      X86FI->setSEHFramePtrSaveIndex(FI);
1991    }
1992  }
1993
1994  if (hasFP(MF)) {
1995    // emitPrologue always spills frame register the first thing.
1996    SpillSlotOffset -= SlotSize;
1997    MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
1998
1999    // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2000    // the frame register, we can delete it from CSI list and not have to worry
2001    // about avoiding it later.
2002    Register FPReg = TRI->getFrameRegister(MF);
2003    for (unsigned i = 0; i < CSI.size(); ++i) {
2004      if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
2005        CSI.erase(CSI.begin() + i);
2006        break;
2007      }
2008    }
2009  }
2010
2011  // Assign slots for GPRs. It increases frame size.
2012  for (unsigned i = CSI.size(); i != 0; --i) {
2013    unsigned Reg = CSI[i - 1].getReg();
2014
2015    if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2016      continue;
2017
2018    SpillSlotOffset -= SlotSize;
2019    CalleeSavedFrameSize += SlotSize;
2020
2021    int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2022    CSI[i - 1].setFrameIdx(SlotIndex);
2023  }
2024
2025  X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
2026  MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
2027
2028  // Assign slots for XMMs.
2029  for (unsigned i = CSI.size(); i != 0; --i) {
2030    unsigned Reg = CSI[i - 1].getReg();
2031    if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2032      continue;
2033
2034    // If this is k-register make sure we lookup via the largest legal type.
2035    MVT VT = MVT::Other;
2036    if (X86::VK16RegClass.contains(Reg))
2037      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2038
2039    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2040    unsigned Size = TRI->getSpillSize(*RC);
2041    unsigned Align = TRI->getSpillAlignment(*RC);
2042    // ensure alignment
2043    assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86");
2044    SpillSlotOffset = -alignTo(-SpillSlotOffset, Align);
2045
2046    // spill into slot
2047    SpillSlotOffset -= Size;
2048    int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
2049    CSI[i - 1].setFrameIdx(SlotIndex);
2050    MFI.ensureMaxAlignment(Align);
2051
2052    // Save the start offset and size of XMM in stack frame for funclets.
2053    if (X86::VR128RegClass.contains(Reg)) {
2054      WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize;
2055      XMMCalleeSavedFrameSize += Size;
2056    }
2057  }
2058
2059  return true;
2060}
2061
2062bool X86FrameLowering::spillCalleeSavedRegisters(
2063    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2064    const std::vector<CalleeSavedInfo> &CSI,
2065    const TargetRegisterInfo *TRI) const {
2066  DebugLoc DL = MBB.findDebugLoc(MI);
2067
2068  // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2069  // for us, and there are no XMM CSRs on Win32.
2070  if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
2071    return true;
2072
2073  // Push GPRs. It increases frame size.
2074  const MachineFunction &MF = *MBB.getParent();
2075  unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
2076  for (unsigned i = CSI.size(); i != 0; --i) {
2077    unsigned Reg = CSI[i - 1].getReg();
2078
2079    if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
2080      continue;
2081
2082    const MachineRegisterInfo &MRI = MF.getRegInfo();
2083    bool isLiveIn = MRI.isLiveIn(Reg);
2084    if (!isLiveIn)
2085      MBB.addLiveIn(Reg);
2086
2087    // Decide whether we can add a kill flag to the use.
2088    bool CanKill = !isLiveIn;
2089    // Check if any subregister is live-in
2090    if (CanKill) {
2091      for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) {
2092        if (MRI.isLiveIn(*AReg)) {
2093          CanKill = false;
2094          break;
2095        }
2096      }
2097    }
2098
2099    // Do not set a kill flag on values that are also marked as live-in. This
2100    // happens with the @llvm-returnaddress intrinsic and with arguments
2101    // passed in callee saved registers.
2102    // Omitting the kill flags is conservatively correct even if the live-in
2103    // is not used after all.
2104    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill))
2105      .setMIFlag(MachineInstr::FrameSetup);
2106  }
2107
2108  // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
2109  // It can be done by spilling XMMs to stack frame.
2110  for (unsigned i = CSI.size(); i != 0; --i) {
2111    unsigned Reg = CSI[i-1].getReg();
2112    if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
2113      continue;
2114
2115    // If this is k-register make sure we lookup via the largest legal type.
2116    MVT VT = MVT::Other;
2117    if (X86::VK16RegClass.contains(Reg))
2118      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2119
2120    // Add the callee-saved register as live-in. It's killed at the spill.
2121    MBB.addLiveIn(Reg);
2122    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2123
2124    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
2125                            TRI);
2126    --MI;
2127    MI->setFlag(MachineInstr::FrameSetup);
2128    ++MI;
2129  }
2130
2131  return true;
2132}
2133
2134void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB,
2135                                               MachineBasicBlock::iterator MBBI,
2136                                               MachineInstr *CatchRet) const {
2137  // SEH shouldn't use catchret.
2138  assert(!isAsynchronousEHPersonality(classifyEHPersonality(
2139             MBB.getParent()->getFunction().getPersonalityFn())) &&
2140         "SEH should not use CATCHRET");
2141  DebugLoc DL = CatchRet->getDebugLoc();
2142  MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB();
2143
2144  // Fill EAX/RAX with the address of the target block.
2145  if (STI.is64Bit()) {
2146    // LEA64r CatchRetTarget(%rip), %rax
2147    BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX)
2148        .addReg(X86::RIP)
2149        .addImm(0)
2150        .addReg(0)
2151        .addMBB(CatchRetTarget)
2152        .addReg(0);
2153  } else {
2154    // MOV32ri $CatchRetTarget, %eax
2155    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
2156        .addMBB(CatchRetTarget);
2157  }
2158
2159  // Record that we've taken the address of CatchRetTarget and no longer just
2160  // reference it in a terminator.
2161  CatchRetTarget->setHasAddressTaken();
2162}
2163
2164bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2165                                               MachineBasicBlock::iterator MI,
2166                                          std::vector<CalleeSavedInfo> &CSI,
2167                                          const TargetRegisterInfo *TRI) const {
2168  if (CSI.empty())
2169    return false;
2170
2171  if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
2172    // Don't restore CSRs in 32-bit EH funclets. Matches
2173    // spillCalleeSavedRegisters.
2174    if (STI.is32Bit())
2175      return true;
2176    // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
2177    // funclets. emitEpilogue transforms these to normal jumps.
2178    if (MI->getOpcode() == X86::CATCHRET) {
2179      const Function &F = MBB.getParent()->getFunction();
2180      bool IsSEH = isAsynchronousEHPersonality(
2181          classifyEHPersonality(F.getPersonalityFn()));
2182      if (IsSEH)
2183        return true;
2184    }
2185  }
2186
2187  DebugLoc DL = MBB.findDebugLoc(MI);
2188
2189  // Reload XMMs from stack frame.
2190  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2191    unsigned Reg = CSI[i].getReg();
2192    if (X86::GR64RegClass.contains(Reg) ||
2193        X86::GR32RegClass.contains(Reg))
2194      continue;
2195
2196    // If this is k-register make sure we lookup via the largest legal type.
2197    MVT VT = MVT::Other;
2198    if (X86::VK16RegClass.contains(Reg))
2199      VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
2200
2201    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
2202    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
2203  }
2204
2205  // POP GPRs.
2206  unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
2207  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2208    unsigned Reg = CSI[i].getReg();
2209    if (!X86::GR64RegClass.contains(Reg) &&
2210        !X86::GR32RegClass.contains(Reg))
2211      continue;
2212
2213    BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
2214        .setMIFlag(MachineInstr::FrameDestroy);
2215  }
2216  return true;
2217}
2218
2219void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
2220                                            BitVector &SavedRegs,
2221                                            RegScavenger *RS) const {
2222  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2223
2224  // Spill the BasePtr if it's used.
2225  if (TRI->hasBasePointer(MF)){
2226    Register BasePtr = TRI->getBaseRegister();
2227    if (STI.isTarget64BitILP32())
2228      BasePtr = getX86SubSuperRegister(BasePtr, 64);
2229    SavedRegs.set(BasePtr);
2230  }
2231}
2232
2233static bool
2234HasNestArgument(const MachineFunction *MF) {
2235  const Function &F = MF->getFunction();
2236  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
2237       I != E; I++) {
2238    if (I->hasNestAttr() && !I->use_empty())
2239      return true;
2240  }
2241  return false;
2242}
2243
2244/// GetScratchRegister - Get a temp register for performing work in the
2245/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
2246/// and the properties of the function either one or two registers will be
2247/// needed. Set primary to true for the first register, false for the second.
2248static unsigned
2249GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
2250  CallingConv::ID CallingConvention = MF.getFunction().getCallingConv();
2251
2252  // Erlang stuff.
2253  if (CallingConvention == CallingConv::HiPE) {
2254    if (Is64Bit)
2255      return Primary ? X86::R14 : X86::R13;
2256    else
2257      return Primary ? X86::EBX : X86::EDI;
2258  }
2259
2260  if (Is64Bit) {
2261    if (IsLP64)
2262      return Primary ? X86::R11 : X86::R12;
2263    else
2264      return Primary ? X86::R11D : X86::R12D;
2265  }
2266
2267  bool IsNested = HasNestArgument(&MF);
2268
2269  if (CallingConvention == CallingConv::X86_FastCall ||
2270      CallingConvention == CallingConv::Fast ||
2271      CallingConvention == CallingConv::Tail) {
2272    if (IsNested)
2273      report_fatal_error("Segmented stacks does not support fastcall with "
2274                         "nested function.");
2275    return Primary ? X86::EAX : X86::ECX;
2276  }
2277  if (IsNested)
2278    return Primary ? X86::EDX : X86::EAX;
2279  return Primary ? X86::ECX : X86::EAX;
2280}
2281
2282// The stack limit in the TCB is set to this many bytes above the actual stack
2283// limit.
2284static const uint64_t kSplitStackAvailable = 256;
2285
2286void X86FrameLowering::adjustForSegmentedStacks(
2287    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2288  MachineFrameInfo &MFI = MF.getFrameInfo();
2289  uint64_t StackSize;
2290  unsigned TlsReg, TlsOffset;
2291  DebugLoc DL;
2292
2293  // To support shrink-wrapping we would need to insert the new blocks
2294  // at the right place and update the branches to PrologueMBB.
2295  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
2296
2297  unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2298  assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
2299         "Scratch register is live-in");
2300
2301  if (MF.getFunction().isVarArg())
2302    report_fatal_error("Segmented stacks do not support vararg functions.");
2303  if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
2304      !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
2305      !STI.isTargetDragonFly())
2306    report_fatal_error("Segmented stacks not supported on this platform.");
2307
2308  // Eventually StackSize will be calculated by a link-time pass; which will
2309  // also decide whether checking code needs to be injected into this particular
2310  // prologue.
2311  StackSize = MFI.getStackSize();
2312
2313  // Do not generate a prologue for leaf functions with a stack of size zero.
2314  // For non-leaf functions we have to allow for the possibility that the
2315  // callis to a non-split function, as in PR37807. This function could also
2316  // take the address of a non-split function. When the linker tries to adjust
2317  // its non-existent prologue, it would fail with an error. Mark the object
2318  // file so that such failures are not errors. See this Go language bug-report
2319  // https://go-review.googlesource.com/c/go/+/148819/
2320  if (StackSize == 0 && !MFI.hasTailCall()) {
2321    MF.getMMI().setHasNosplitStack(true);
2322    return;
2323  }
2324
2325  MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
2326  MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
2327  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2328  bool IsNested = false;
2329
2330  // We need to know if the function has a nest argument only in 64 bit mode.
2331  if (Is64Bit)
2332    IsNested = HasNestArgument(&MF);
2333
2334  // The MOV R10, RAX needs to be in a different block, since the RET we emit in
2335  // allocMBB needs to be last (terminating) instruction.
2336
2337  for (const auto &LI : PrologueMBB.liveins()) {
2338    allocMBB->addLiveIn(LI);
2339    checkMBB->addLiveIn(LI);
2340  }
2341
2342  if (IsNested)
2343    allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
2344
2345  MF.push_front(allocMBB);
2346  MF.push_front(checkMBB);
2347
2348  // When the frame size is less than 256 we just compare the stack
2349  // boundary directly to the value of the stack pointer, per gcc.
2350  bool CompareStackPointer = StackSize < kSplitStackAvailable;
2351
2352  // Read the limit off the current stacklet off the stack_guard location.
2353  if (Is64Bit) {
2354    if (STI.isTargetLinux()) {
2355      TlsReg = X86::FS;
2356      TlsOffset = IsLP64 ? 0x70 : 0x40;
2357    } else if (STI.isTargetDarwin()) {
2358      TlsReg = X86::GS;
2359      TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
2360    } else if (STI.isTargetWin64()) {
2361      TlsReg = X86::GS;
2362      TlsOffset = 0x28; // pvArbitrary, reserved for application use
2363    } else if (STI.isTargetFreeBSD()) {
2364      TlsReg = X86::FS;
2365      TlsOffset = 0x18;
2366    } else if (STI.isTargetDragonFly()) {
2367      TlsReg = X86::FS;
2368      TlsOffset = 0x20; // use tls_tcb.tcb_segstack
2369    } else {
2370      report_fatal_error("Segmented stacks not supported on this platform.");
2371    }
2372
2373    if (CompareStackPointer)
2374      ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
2375    else
2376      BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
2377        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
2378
2379    BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
2380      .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
2381  } else {
2382    if (STI.isTargetLinux()) {
2383      TlsReg = X86::GS;
2384      TlsOffset = 0x30;
2385    } else if (STI.isTargetDarwin()) {
2386      TlsReg = X86::GS;
2387      TlsOffset = 0x48 + 90*4;
2388    } else if (STI.isTargetWin32()) {
2389      TlsReg = X86::FS;
2390      TlsOffset = 0x14; // pvArbitrary, reserved for application use
2391    } else if (STI.isTargetDragonFly()) {
2392      TlsReg = X86::FS;
2393      TlsOffset = 0x10; // use tls_tcb.tcb_segstack
2394    } else if (STI.isTargetFreeBSD()) {
2395      report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
2396    } else {
2397      report_fatal_error("Segmented stacks not supported on this platform.");
2398    }
2399
2400    if (CompareStackPointer)
2401      ScratchReg = X86::ESP;
2402    else
2403      BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
2404        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
2405
2406    if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
2407        STI.isTargetDragonFly()) {
2408      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
2409        .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
2410    } else if (STI.isTargetDarwin()) {
2411
2412      // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
2413      unsigned ScratchReg2;
2414      bool SaveScratch2;
2415      if (CompareStackPointer) {
2416        // The primary scratch register is available for holding the TLS offset.
2417        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2418        SaveScratch2 = false;
2419      } else {
2420        // Need to use a second register to hold the TLS offset
2421        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
2422
2423        // Unfortunately, with fastcc the second scratch register may hold an
2424        // argument.
2425        SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
2426      }
2427
2428      // If Scratch2 is live-in then it needs to be saved.
2429      assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
2430             "Scratch register is live-in and not saved");
2431
2432      if (SaveScratch2)
2433        BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
2434          .addReg(ScratchReg2, RegState::Kill);
2435
2436      BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
2437        .addImm(TlsOffset);
2438      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
2439        .addReg(ScratchReg)
2440        .addReg(ScratchReg2).addImm(1).addReg(0)
2441        .addImm(0)
2442        .addReg(TlsReg);
2443
2444      if (SaveScratch2)
2445        BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
2446    }
2447  }
2448
2449  // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
2450  // It jumps to normal execution of the function body.
2451  BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A);
2452
2453  // On 32 bit we first push the arguments size and then the frame size. On 64
2454  // bit, we pass the stack frame size in r10 and the argument size in r11.
2455  if (Is64Bit) {
2456    // Functions with nested arguments use R10, so it needs to be saved across
2457    // the call to _morestack
2458
2459    const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
2460    const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
2461    const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
2462    const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
2463    const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
2464
2465    if (IsNested)
2466      BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
2467
2468    BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
2469      .addImm(StackSize);
2470    BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
2471      .addImm(X86FI->getArgumentStackSize());
2472  } else {
2473    BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
2474      .addImm(X86FI->getArgumentStackSize());
2475    BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
2476      .addImm(StackSize);
2477  }
2478
2479  // __morestack is in libgcc
2480  if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
2481    // Under the large code model, we cannot assume that __morestack lives
2482    // within 2^31 bytes of the call site, so we cannot use pc-relative
2483    // addressing. We cannot perform the call via a temporary register,
2484    // as the rax register may be used to store the static chain, and all
2485    // other suitable registers may be either callee-save or used for
2486    // parameter passing. We cannot use the stack at this point either
2487    // because __morestack manipulates the stack directly.
2488    //
2489    // To avoid these issues, perform an indirect call via a read-only memory
2490    // location containing the address.
2491    //
2492    // This solution is not perfect, as it assumes that the .rodata section
2493    // is laid out within 2^31 bytes of each function body, but this seems
2494    // to be sufficient for JIT.
2495    // FIXME: Add retpoline support and remove the error here..
2496    if (STI.useIndirectThunkCalls())
2497      report_fatal_error("Emitting morestack calls on 64-bit with the large "
2498                         "code model and thunks not yet implemented.");
2499    BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
2500        .addReg(X86::RIP)
2501        .addImm(0)
2502        .addReg(0)
2503        .addExternalSymbol("__morestack_addr")
2504        .addReg(0);
2505    MF.getMMI().setUsesMorestackAddr(true);
2506  } else {
2507    if (Is64Bit)
2508      BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
2509        .addExternalSymbol("__morestack");
2510    else
2511      BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
2512        .addExternalSymbol("__morestack");
2513  }
2514
2515  if (IsNested)
2516    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
2517  else
2518    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
2519
2520  allocMBB->addSuccessor(&PrologueMBB);
2521
2522  checkMBB->addSuccessor(allocMBB, BranchProbability::getZero());
2523  checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne());
2524
2525#ifdef EXPENSIVE_CHECKS
2526  MF.verify();
2527#endif
2528}
2529
2530/// Lookup an ERTS parameter in the !hipe.literals named metadata node.
2531/// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
2532/// to fields it needs, through a named metadata node "hipe.literals" containing
2533/// name-value pairs.
2534static unsigned getHiPELiteral(
2535    NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) {
2536  for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) {
2537    MDNode *Node = HiPELiteralsMD->getOperand(i);
2538    if (Node->getNumOperands() != 2) continue;
2539    MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0));
2540    ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1));
2541    if (!NodeName || !NodeVal) continue;
2542    ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue());
2543    if (ValConst && NodeName->getString() == LiteralName) {
2544      return ValConst->getZExtValue();
2545    }
2546  }
2547
2548  report_fatal_error("HiPE literal " + LiteralName
2549                     + " required but not provided");
2550}
2551
2552// Return true if there are no non-ehpad successors to MBB and there are no
2553// non-meta instructions between MBBI and MBB.end().
2554static bool blockEndIsUnreachable(const MachineBasicBlock &MBB,
2555                                  MachineBasicBlock::const_iterator MBBI) {
2556  return std::all_of(
2557             MBB.succ_begin(), MBB.succ_end(),
2558             [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) &&
2559         std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) {
2560           return MI.isMetaInstruction();
2561         });
2562}
2563
2564/// Erlang programs may need a special prologue to handle the stack size they
2565/// might need at runtime. That is because Erlang/OTP does not implement a C
2566/// stack but uses a custom implementation of hybrid stack/heap architecture.
2567/// (for more information see Eric Stenman's Ph.D. thesis:
2568/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
2569///
2570/// CheckStack:
2571///       temp0 = sp - MaxStack
2572///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
2573/// OldStart:
2574///       ...
2575/// IncStack:
2576///       call inc_stack   # doubles the stack space
2577///       temp0 = sp - MaxStack
2578///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
2579void X86FrameLowering::adjustForHiPEPrologue(
2580    MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2581  MachineFrameInfo &MFI = MF.getFrameInfo();
2582  DebugLoc DL;
2583
2584  // To support shrink-wrapping we would need to insert the new blocks
2585  // at the right place and update the branches to PrologueMBB.
2586  assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
2587
2588  // HiPE-specific values
2589  NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule()
2590    ->getNamedMetadata("hipe.literals");
2591  if (!HiPELiteralsMD)
2592    report_fatal_error(
2593        "Can't generate HiPE prologue without runtime parameters");
2594  const unsigned HipeLeafWords
2595    = getHiPELiteral(HiPELiteralsMD,
2596                     Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
2597  const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
2598  const unsigned Guaranteed = HipeLeafWords * SlotSize;
2599  unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ?
2600                            MF.getFunction().arg_size() - CCRegisteredArgs : 0;
2601  unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
2602
2603  assert(STI.isTargetLinux() &&
2604         "HiPE prologue is only supported on Linux operating systems.");
2605
2606  // Compute the largest caller's frame that is needed to fit the callees'
2607  // frames. This 'MaxStack' is computed from:
2608  //
2609  // a) the fixed frame size, which is the space needed for all spilled temps,
2610  // b) outgoing on-stack parameter areas, and
2611  // c) the minimum stack space this function needs to make available for the
2612  //    functions it calls (a tunable ABI property).
2613  if (MFI.hasCalls()) {
2614    unsigned MoreStackForCalls = 0;
2615
2616    for (auto &MBB : MF) {
2617      for (auto &MI : MBB) {
2618        if (!MI.isCall())
2619          continue;
2620
2621        // Get callee operand.
2622        const MachineOperand &MO = MI.getOperand(0);
2623
2624        // Only take account of global function calls (no closures etc.).
2625        if (!MO.isGlobal())
2626          continue;
2627
2628        const Function *F = dyn_cast<Function>(MO.getGlobal());
2629        if (!F)
2630          continue;
2631
2632        // Do not update 'MaxStack' for primitive and built-in functions
2633        // (encoded with names either starting with "erlang."/"bif_" or not
2634        // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
2635        // "_", such as the BIF "suspend_0") as they are executed on another
2636        // stack.
2637        if (F->getName().find("erlang.") != StringRef::npos ||
2638            F->getName().find("bif_") != StringRef::npos ||
2639            F->getName().find_first_of("._") == StringRef::npos)
2640          continue;
2641
2642        unsigned CalleeStkArity =
2643          F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
2644        if (HipeLeafWords - 1 > CalleeStkArity)
2645          MoreStackForCalls = std::max(MoreStackForCalls,
2646                               (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
2647      }
2648    }
2649    MaxStack += MoreStackForCalls;
2650  }
2651
2652  // If the stack frame needed is larger than the guaranteed then runtime checks
2653  // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
2654  if (MaxStack > Guaranteed) {
2655    MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
2656    MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
2657
2658    for (const auto &LI : PrologueMBB.liveins()) {
2659      stackCheckMBB->addLiveIn(LI);
2660      incStackMBB->addLiveIn(LI);
2661    }
2662
2663    MF.push_front(incStackMBB);
2664    MF.push_front(stackCheckMBB);
2665
2666    unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
2667    unsigned LEAop, CMPop, CALLop;
2668    SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT");
2669    if (Is64Bit) {
2670      SPReg = X86::RSP;
2671      PReg  = X86::RBP;
2672      LEAop = X86::LEA64r;
2673      CMPop = X86::CMP64rm;
2674      CALLop = X86::CALL64pcrel32;
2675    } else {
2676      SPReg = X86::ESP;
2677      PReg  = X86::EBP;
2678      LEAop = X86::LEA32r;
2679      CMPop = X86::CMP32rm;
2680      CALLop = X86::CALLpcrel32;
2681    }
2682
2683    ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
2684    assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
2685           "HiPE prologue scratch register is live-in");
2686
2687    // Create new MBB for StackCheck:
2688    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
2689                 SPReg, false, -MaxStack);
2690    // SPLimitOffset is in a fixed heap location (pointed by BP).
2691    addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
2692                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
2693    BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE);
2694
2695    // Create new MBB for IncStack:
2696    BuildMI(incStackMBB, DL, TII.get(CALLop)).
2697      addExternalSymbol("inc_stack_0");
2698    addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
2699                 SPReg, false, -MaxStack);
2700    addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
2701                 .addReg(ScratchReg), PReg, false, SPLimitOffset);
2702    BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE);
2703
2704    stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
2705    stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
2706    incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
2707    incStackMBB->addSuccessor(incStackMBB, {1, 100});
2708  }
2709#ifdef EXPENSIVE_CHECKS
2710  MF.verify();
2711#endif
2712}
2713
2714bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
2715                                           MachineBasicBlock::iterator MBBI,
2716                                           const DebugLoc &DL,
2717                                           int Offset) const {
2718
2719  if (Offset <= 0)
2720    return false;
2721
2722  if (Offset % SlotSize)
2723    return false;
2724
2725  int NumPops = Offset / SlotSize;
2726  // This is only worth it if we have at most 2 pops.
2727  if (NumPops != 1 && NumPops != 2)
2728    return false;
2729
2730  // Handle only the trivial case where the adjustment directly follows
2731  // a call. This is the most common one, anyway.
2732  if (MBBI == MBB.begin())
2733    return false;
2734  MachineBasicBlock::iterator Prev = std::prev(MBBI);
2735  if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
2736    return false;
2737
2738  unsigned Regs[2];
2739  unsigned FoundRegs = 0;
2740
2741  auto &MRI = MBB.getParent()->getRegInfo();
2742  auto RegMask = Prev->getOperand(1);
2743
2744  auto &RegClass =
2745      Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
2746  // Try to find up to NumPops free registers.
2747  for (auto Candidate : RegClass) {
2748
2749    // Poor man's liveness:
2750    // Since we're immediately after a call, any register that is clobbered
2751    // by the call and not defined by it can be considered dead.
2752    if (!RegMask.clobbersPhysReg(Candidate))
2753      continue;
2754
2755    // Don't clobber reserved registers
2756    if (MRI.isReserved(Candidate))
2757      continue;
2758
2759    bool IsDef = false;
2760    for (const MachineOperand &MO : Prev->implicit_operands()) {
2761      if (MO.isReg() && MO.isDef() &&
2762          TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) {
2763        IsDef = true;
2764        break;
2765      }
2766    }
2767
2768    if (IsDef)
2769      continue;
2770
2771    Regs[FoundRegs++] = Candidate;
2772    if (FoundRegs == (unsigned)NumPops)
2773      break;
2774  }
2775
2776  if (FoundRegs == 0)
2777    return false;
2778
2779  // If we found only one free register, but need two, reuse the same one twice.
2780  while (FoundRegs < (unsigned)NumPops)
2781    Regs[FoundRegs++] = Regs[0];
2782
2783  for (int i = 0; i < NumPops; ++i)
2784    BuildMI(MBB, MBBI, DL,
2785            TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
2786
2787  return true;
2788}
2789
2790MachineBasicBlock::iterator X86FrameLowering::
2791eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2792                              MachineBasicBlock::iterator I) const {
2793  bool reserveCallFrame = hasReservedCallFrame(MF);
2794  unsigned Opcode = I->getOpcode();
2795  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
2796  DebugLoc DL = I->getDebugLoc();
2797  uint64_t Amount = TII.getFrameSize(*I);
2798  uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
2799  I = MBB.erase(I);
2800  auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
2801
2802  if (!reserveCallFrame) {
2803    // If the stack pointer can be changed after prologue, turn the
2804    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
2805    // adjcallstackdown instruction into 'add ESP, <amt>'
2806
2807    // We need to keep the stack aligned properly.  To do this, we round the
2808    // amount of space needed for the outgoing arguments up to the next
2809    // alignment boundary.
2810    unsigned StackAlign = getStackAlignment();
2811    Amount = alignTo(Amount, StackAlign);
2812
2813    const Function &F = MF.getFunction();
2814    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
2815    bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves();
2816
2817    // If we have any exception handlers in this function, and we adjust
2818    // the SP before calls, we may need to indicate this to the unwinder
2819    // using GNU_ARGS_SIZE. Note that this may be necessary even when
2820    // Amount == 0, because the preceding function may have set a non-0
2821    // GNU_ARGS_SIZE.
2822    // TODO: We don't need to reset this between subsequent functions,
2823    // if it didn't change.
2824    bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
2825
2826    if (HasDwarfEHHandlers && !isDestroy &&
2827        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
2828      BuildCFI(MBB, InsertPos, DL,
2829               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
2830
2831    if (Amount == 0)
2832      return I;
2833
2834    // Factor out the amount that gets handled inside the sequence
2835    // (Pushes of argument for frame setup, callee pops for frame destroy)
2836    Amount -= InternalAmt;
2837
2838    // TODO: This is needed only if we require precise CFA.
2839    // If this is a callee-pop calling convention, emit a CFA adjust for
2840    // the amount the callee popped.
2841    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
2842      BuildCFI(MBB, InsertPos, DL,
2843               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
2844
2845    // Add Amount to SP to destroy a frame, or subtract to setup.
2846    int64_t StackAdjustment = isDestroy ? Amount : -Amount;
2847
2848    if (StackAdjustment) {
2849      // Merge with any previous or following adjustment instruction. Note: the
2850      // instructions merged with here do not have CFI, so their stack
2851      // adjustments do not feed into CfaAdjustment.
2852      StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
2853      StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
2854
2855      if (StackAdjustment) {
2856        if (!(F.hasMinSize() &&
2857              adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
2858          BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
2859                               /*InEpilogue=*/false);
2860      }
2861    }
2862
2863    if (DwarfCFI && !hasFP(MF)) {
2864      // If we don't have FP, but need to generate unwind information,
2865      // we need to set the correct CFA offset after the stack adjustment.
2866      // How much we adjust the CFA offset depends on whether we're emitting
2867      // CFI only for EH purposes or for debugging. EH only requires the CFA
2868      // offset to be correct at each call site, while for debugging we want
2869      // it to be more precise.
2870
2871      int64_t CfaAdjustment = -StackAdjustment;
2872      // TODO: When not using precise CFA, we also need to adjust for the
2873      // InternalAmt here.
2874      if (CfaAdjustment) {
2875        BuildCFI(MBB, InsertPos, DL,
2876                 MCCFIInstruction::createAdjustCfaOffset(nullptr,
2877                                                         CfaAdjustment));
2878      }
2879    }
2880
2881    return I;
2882  }
2883
2884  if (isDestroy && InternalAmt && !blockEndIsUnreachable(MBB, I)) {
2885    // If we are performing frame pointer elimination and if the callee pops
2886    // something off the stack pointer, add it back.  We do this until we have
2887    // more advanced stack pointer tracking ability.
2888    // We are not tracking the stack pointer adjustment by the callee, so make
2889    // sure we restore the stack pointer immediately after the call, there may
2890    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
2891    MachineBasicBlock::iterator CI = I;
2892    MachineBasicBlock::iterator B = MBB.begin();
2893    while (CI != B && !std::prev(CI)->isCall())
2894      --CI;
2895    BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false);
2896  }
2897
2898  return I;
2899}
2900
2901bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
2902  assert(MBB.getParent() && "Block is not attached to a function!");
2903  const MachineFunction &MF = *MBB.getParent();
2904  return !TRI->needsStackRealignment(MF) || !MBB.isLiveIn(X86::EFLAGS);
2905}
2906
2907bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
2908  assert(MBB.getParent() && "Block is not attached to a function!");
2909
2910  // Win64 has strict requirements in terms of epilogue and we are
2911  // not taking a chance at messing with them.
2912  // I.e., unless this block is already an exit block, we can't use
2913  // it as an epilogue.
2914  if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
2915    return false;
2916
2917  if (canUseLEAForSPInEpilogue(*MBB.getParent()))
2918    return true;
2919
2920  // If we cannot use LEA to adjust SP, we may need to use ADD, which
2921  // clobbers the EFLAGS. Check that we do not need to preserve it,
2922  // otherwise, conservatively assume this is not
2923  // safe to insert the epilogue here.
2924  return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
2925}
2926
2927bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2928  // If we may need to emit frameless compact unwind information, give
2929  // up as this is currently broken: PR25614.
2930  return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) &&
2931         // The lowering of segmented stack and HiPE only support entry blocks
2932         // as prologue blocks: PR26107.
2933         // This limitation may be lifted if we fix:
2934         // - adjustForSegmentedStacks
2935         // - adjustForHiPEPrologue
2936         MF.getFunction().getCallingConv() != CallingConv::HiPE &&
2937         !MF.shouldSplitStack();
2938}
2939
2940MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
2941    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
2942    const DebugLoc &DL, bool RestoreSP) const {
2943  assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
2944  assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
2945  assert(STI.is32Bit() && !Uses64BitFramePtr &&
2946         "restoring EBP/ESI on non-32-bit target");
2947
2948  MachineFunction &MF = *MBB.getParent();
2949  Register FramePtr = TRI->getFrameRegister(MF);
2950  Register BasePtr = TRI->getBaseRegister();
2951  WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
2952  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2953  MachineFrameInfo &MFI = MF.getFrameInfo();
2954
2955  // FIXME: Don't set FrameSetup flag in catchret case.
2956
2957  int FI = FuncInfo.EHRegNodeFrameIndex;
2958  int EHRegSize = MFI.getObjectSize(FI);
2959
2960  if (RestoreSP) {
2961    // MOV32rm -EHRegSize(%ebp), %esp
2962    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
2963                 X86::EBP, true, -EHRegSize)
2964        .setMIFlag(MachineInstr::FrameSetup);
2965  }
2966
2967  unsigned UsedReg;
2968  int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
2969  int EndOffset = -EHRegOffset - EHRegSize;
2970  FuncInfo.EHRegNodeEndOffset = EndOffset;
2971
2972  if (UsedReg == FramePtr) {
2973    // ADD $offset, %ebp
2974    unsigned ADDri = getADDriOpcode(false, EndOffset);
2975    BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
2976        .addReg(FramePtr)
2977        .addImm(EndOffset)
2978        .setMIFlag(MachineInstr::FrameSetup)
2979        ->getOperand(3)
2980        .setIsDead();
2981    assert(EndOffset >= 0 &&
2982           "end of registration object above normal EBP position!");
2983  } else if (UsedReg == BasePtr) {
2984    // LEA offset(%ebp), %esi
2985    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
2986                 FramePtr, false, EndOffset)
2987        .setMIFlag(MachineInstr::FrameSetup);
2988    // MOV32rm SavedEBPOffset(%esi), %ebp
2989    assert(X86FI->getHasSEHFramePtrSave());
2990    int Offset =
2991        getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
2992    assert(UsedReg == BasePtr);
2993    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
2994                 UsedReg, true, Offset)
2995        .setMIFlag(MachineInstr::FrameSetup);
2996  } else {
2997    llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
2998  }
2999  return MBBI;
3000}
3001
3002int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
3003  return TRI->getSlotSize();
3004}
3005
3006unsigned X86FrameLowering::getInitialCFARegister(const MachineFunction &MF)
3007    const {
3008  return TRI->getDwarfRegNum(StackPtr, true);
3009}
3010
3011namespace {
3012// Struct used by orderFrameObjects to help sort the stack objects.
3013struct X86FrameSortingObject {
3014  bool IsValid = false;         // true if we care about this Object.
3015  unsigned ObjectIndex = 0;     // Index of Object into MFI list.
3016  unsigned ObjectSize = 0;      // Size of Object in bytes.
3017  unsigned ObjectAlignment = 1; // Alignment of Object in bytes.
3018  unsigned ObjectNumUses = 0;   // Object static number of uses.
3019};
3020
3021// The comparison function we use for std::sort to order our local
3022// stack symbols. The current algorithm is to use an estimated
3023// "density". This takes into consideration the size and number of
3024// uses each object has in order to roughly minimize code size.
3025// So, for example, an object of size 16B that is referenced 5 times
3026// will get higher priority than 4 4B objects referenced 1 time each.
3027// It's not perfect and we may be able to squeeze a few more bytes out of
3028// it (for example : 0(esp) requires fewer bytes, symbols allocated at the
3029// fringe end can have special consideration, given their size is less
3030// important, etc.), but the algorithmic complexity grows too much to be
3031// worth the extra gains we get. This gets us pretty close.
3032// The final order leaves us with objects with highest priority going
3033// at the end of our list.
3034struct X86FrameSortingComparator {
3035  inline bool operator()(const X86FrameSortingObject &A,
3036                         const X86FrameSortingObject &B) {
3037    uint64_t DensityAScaled, DensityBScaled;
3038
3039    // For consistency in our comparison, all invalid objects are placed
3040    // at the end. This also allows us to stop walking when we hit the
3041    // first invalid item after it's all sorted.
3042    if (!A.IsValid)
3043      return false;
3044    if (!B.IsValid)
3045      return true;
3046
3047    // The density is calculated by doing :
3048    //     (double)DensityA = A.ObjectNumUses / A.ObjectSize
3049    //     (double)DensityB = B.ObjectNumUses / B.ObjectSize
3050    // Since this approach may cause inconsistencies in
3051    // the floating point <, >, == comparisons, depending on the floating
3052    // point model with which the compiler was built, we're going
3053    // to scale both sides by multiplying with
3054    // A.ObjectSize * B.ObjectSize. This ends up factoring away
3055    // the division and, with it, the need for any floating point
3056    // arithmetic.
3057    DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) *
3058      static_cast<uint64_t>(B.ObjectSize);
3059    DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) *
3060      static_cast<uint64_t>(A.ObjectSize);
3061
3062    // If the two densities are equal, prioritize highest alignment
3063    // objects. This allows for similar alignment objects
3064    // to be packed together (given the same density).
3065    // There's room for improvement here, also, since we can pack
3066    // similar alignment (different density) objects next to each
3067    // other to save padding. This will also require further
3068    // complexity/iterations, and the overall gain isn't worth it,
3069    // in general. Something to keep in mind, though.
3070    if (DensityAScaled == DensityBScaled)
3071      return A.ObjectAlignment < B.ObjectAlignment;
3072
3073    return DensityAScaled < DensityBScaled;
3074  }
3075};
3076} // namespace
3077
3078// Order the symbols in the local stack.
3079// We want to place the local stack objects in some sort of sensible order.
3080// The heuristic we use is to try and pack them according to static number
3081// of uses and size of object in order to minimize code size.
3082void X86FrameLowering::orderFrameObjects(
3083    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3084  const MachineFrameInfo &MFI = MF.getFrameInfo();
3085
3086  // Don't waste time if there's nothing to do.
3087  if (ObjectsToAllocate.empty())
3088    return;
3089
3090  // Create an array of all MFI objects. We won't need all of these
3091  // objects, but we're going to create a full array of them to make
3092  // it easier to index into when we're counting "uses" down below.
3093  // We want to be able to easily/cheaply access an object by simply
3094  // indexing into it, instead of having to search for it every time.
3095  std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
3096
3097  // Walk the objects we care about and mark them as such in our working
3098  // struct.
3099  for (auto &Obj : ObjectsToAllocate) {
3100    SortingObjects[Obj].IsValid = true;
3101    SortingObjects[Obj].ObjectIndex = Obj;
3102    SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj);
3103    // Set the size.
3104    int ObjectSize = MFI.getObjectSize(Obj);
3105    if (ObjectSize == 0)
3106      // Variable size. Just use 4.
3107      SortingObjects[Obj].ObjectSize = 4;
3108    else
3109      SortingObjects[Obj].ObjectSize = ObjectSize;
3110  }
3111
3112  // Count the number of uses for each object.
3113  for (auto &MBB : MF) {
3114    for (auto &MI : MBB) {
3115      if (MI.isDebugInstr())
3116        continue;
3117      for (const MachineOperand &MO : MI.operands()) {
3118        // Check to see if it's a local stack symbol.
3119        if (!MO.isFI())
3120          continue;
3121        int Index = MO.getIndex();
3122        // Check to see if it falls within our range, and is tagged
3123        // to require ordering.
3124        if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
3125            SortingObjects[Index].IsValid)
3126          SortingObjects[Index].ObjectNumUses++;
3127      }
3128    }
3129  }
3130
3131  // Sort the objects using X86FrameSortingAlgorithm (see its comment for
3132  // info).
3133  llvm::stable_sort(SortingObjects, X86FrameSortingComparator());
3134
3135  // Now modify the original list to represent the final order that
3136  // we want. The order will depend on whether we're going to access them
3137  // from the stack pointer or the frame pointer. For SP, the list should
3138  // end up with the END containing objects that we want with smaller offsets.
3139  // For FP, it should be flipped.
3140  int i = 0;
3141  for (auto &Obj : SortingObjects) {
3142    // All invalid items are sorted at the end, so it's safe to stop.
3143    if (!Obj.IsValid)
3144      break;
3145    ObjectsToAllocate[i++] = Obj.ObjectIndex;
3146  }
3147
3148  // Flip it if we're accessing off of the FP.
3149  if (!TRI->needsStackRealignment(MF) && hasFP(MF))
3150    std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end());
3151}
3152
3153
3154unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
3155  // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
3156  unsigned Offset = 16;
3157  // RBP is immediately pushed.
3158  Offset += SlotSize;
3159  // All callee-saved registers are then pushed.
3160  Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
3161  // Every funclet allocates enough stack space for the largest outgoing call.
3162  Offset += getWinEHFuncletFrameSize(MF);
3163  return Offset;
3164}
3165
3166void X86FrameLowering::processFunctionBeforeFrameFinalized(
3167    MachineFunction &MF, RegScavenger *RS) const {
3168  // Mark the function as not having WinCFI. We will set it back to true in
3169  // emitPrologue if it gets called and emits CFI.
3170  MF.setHasWinCFI(false);
3171
3172  // If this function isn't doing Win64-style C++ EH, we don't need to do
3173  // anything.
3174  const Function &F = MF.getFunction();
3175  if (!STI.is64Bit() || !MF.hasEHFunclets() ||
3176      classifyEHPersonality(F.getPersonalityFn()) != EHPersonality::MSVC_CXX)
3177    return;
3178
3179  // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
3180  // relative to RSP after the prologue.  Find the offset of the last fixed
3181  // object, so that we can allocate a slot immediately following it. If there
3182  // were no fixed objects, use offset -SlotSize, which is immediately after the
3183  // return address. Fixed objects have negative frame indices.
3184  MachineFrameInfo &MFI = MF.getFrameInfo();
3185  WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
3186  int64_t MinFixedObjOffset = -SlotSize;
3187  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
3188    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
3189
3190  for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
3191    for (WinEHHandlerType &H : TBME.HandlerArray) {
3192      int FrameIndex = H.CatchObj.FrameIndex;
3193      if (FrameIndex != INT_MAX) {
3194        // Ensure alignment.
3195        unsigned Align = MFI.getObjectAlignment(FrameIndex);
3196        MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
3197        MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
3198        MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
3199      }
3200    }
3201  }
3202
3203  // Ensure alignment.
3204  MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
3205  int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
3206  int UnwindHelpFI =
3207      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false);
3208  EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
3209
3210  // Store -2 into UnwindHelp on function entry. We have to scan forwards past
3211  // other frame setup instructions.
3212  MachineBasicBlock &MBB = MF.front();
3213  auto MBBI = MBB.begin();
3214  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
3215    ++MBBI;
3216
3217  DebugLoc DL = MBB.findDebugLoc(MBBI);
3218  addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
3219                    UnwindHelpFI)
3220      .addImm(-2);
3221}
3222