1//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PPC implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MCTargetDesc/PPCPredicates.h"
14#include "PPCFrameLowering.h"
15#include "PPCInstrBuilder.h"
16#include "PPCInstrInfo.h"
17#include "PPCMachineFunctionInfo.h"
18#include "PPCSubtarget.h"
19#include "PPCTargetMachine.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineModuleInfo.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/RegisterScavenging.h"
27#include "llvm/IR/Function.h"
28#include "llvm/Target/TargetOptions.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "framelowering"
33STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35STATISTIC(NumPrologProbed, "Number of prologues probed");
36
37static cl::opt<bool>
38EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                     cl::desc("Enable spills in prologue to vector registers."),
40                     cl::init(false), cl::Hidden);
41
42/// VRRegNo - Map from a numbered VR register to its enum value.
43///
44static const MCPhysReg VRRegNo[] = {
45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
49};
50
51static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
52  if (STI.isAIXABI())
53    return STI.isPPC64() ? 16 : 8;
54  // SVR4 ABI:
55  return STI.isPPC64() ? 16 : 4;
56}
57
58static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
59  if (STI.isAIXABI())
60    return STI.isPPC64() ? 40 : 20;
61  return STI.isELFv2ABI() ? 24 : 40;
62}
63
64static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
65  // First slot in the general register save area.
66  return STI.isPPC64() ? -8U : -4U;
67}
68
69static unsigned computeLinkageSize(const PPCSubtarget &STI) {
70  if (STI.isAIXABI() || STI.isPPC64())
71    return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
72
73  // 32-bit SVR4 ABI:
74  return 8;
75}
76
77static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
78  // Third slot in the general purpose register save area.
79  if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
80    return -12U;
81
82  // Second slot in the general purpose register save area.
83  return STI.isPPC64() ? -16U : -8U;
84}
85
86static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
87  return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
88}
89
90PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
91    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
92                          STI.getPlatformStackAlignment(), 0),
93      Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
94      TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
95      FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
96      LinkageSize(computeLinkageSize(Subtarget)),
97      BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
98      CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
99
100// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102    unsigned &NumEntries) const {
103
104// Floating-point register save area offsets.
105#define CALLEE_SAVED_FPRS \
106      {PPC::F31, -8},     \
107      {PPC::F30, -16},    \
108      {PPC::F29, -24},    \
109      {PPC::F28, -32},    \
110      {PPC::F27, -40},    \
111      {PPC::F26, -48},    \
112      {PPC::F25, -56},    \
113      {PPC::F24, -64},    \
114      {PPC::F23, -72},    \
115      {PPC::F22, -80},    \
116      {PPC::F21, -88},    \
117      {PPC::F20, -96},    \
118      {PPC::F19, -104},   \
119      {PPC::F18, -112},   \
120      {PPC::F17, -120},   \
121      {PPC::F16, -128},   \
122      {PPC::F15, -136},   \
123      {PPC::F14, -144}
124
125// 32-bit general purpose register save area offsets shared by ELF and
126// AIX. AIX has an extra CSR with r13.
127#define CALLEE_SAVED_GPRS32 \
128      {PPC::R31, -4},       \
129      {PPC::R30, -8},       \
130      {PPC::R29, -12},      \
131      {PPC::R28, -16},      \
132      {PPC::R27, -20},      \
133      {PPC::R26, -24},      \
134      {PPC::R25, -28},      \
135      {PPC::R24, -32},      \
136      {PPC::R23, -36},      \
137      {PPC::R22, -40},      \
138      {PPC::R21, -44},      \
139      {PPC::R20, -48},      \
140      {PPC::R19, -52},      \
141      {PPC::R18, -56},      \
142      {PPC::R17, -60},      \
143      {PPC::R16, -64},      \
144      {PPC::R15, -68},      \
145      {PPC::R14, -72}
146
147// 64-bit general purpose register save area offsets.
148#define CALLEE_SAVED_GPRS64 \
149      {PPC::X31, -8},       \
150      {PPC::X30, -16},      \
151      {PPC::X29, -24},      \
152      {PPC::X28, -32},      \
153      {PPC::X27, -40},      \
154      {PPC::X26, -48},      \
155      {PPC::X25, -56},      \
156      {PPC::X24, -64},      \
157      {PPC::X23, -72},      \
158      {PPC::X22, -80},      \
159      {PPC::X21, -88},      \
160      {PPC::X20, -96},      \
161      {PPC::X19, -104},     \
162      {PPC::X18, -112},     \
163      {PPC::X17, -120},     \
164      {PPC::X16, -128},     \
165      {PPC::X15, -136},     \
166      {PPC::X14, -144}
167
168// Vector register save area offsets.
169#define CALLEE_SAVED_VRS \
170      {PPC::V31, -16},   \
171      {PPC::V30, -32},   \
172      {PPC::V29, -48},   \
173      {PPC::V28, -64},   \
174      {PPC::V27, -80},   \
175      {PPC::V26, -96},   \
176      {PPC::V25, -112},  \
177      {PPC::V24, -128},  \
178      {PPC::V23, -144},  \
179      {PPC::V22, -160},  \
180      {PPC::V21, -176},  \
181      {PPC::V20, -192}
182
183  // Note that the offsets here overlap, but this is fixed up in
184  // processFunctionBeforeFrameFinalized.
185
186  static const SpillSlot ELFOffsets32[] = {
187      CALLEE_SAVED_FPRS,
188      CALLEE_SAVED_GPRS32,
189
190      // CR save area offset.  We map each of the nonvolatile CR fields
191      // to the slot for CR2, which is the first of the nonvolatile CR
192      // fields to be assigned, so that we only allocate one save slot.
193      // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194      {PPC::CR2, -4},
195
196      // VRSAVE save area offset.
197      {PPC::VRSAVE, -4},
198
199      CALLEE_SAVED_VRS,
200
201      // SPE register save area (overlaps Vector save area).
202      {PPC::S31, -8},
203      {PPC::S30, -16},
204      {PPC::S29, -24},
205      {PPC::S28, -32},
206      {PPC::S27, -40},
207      {PPC::S26, -48},
208      {PPC::S25, -56},
209      {PPC::S24, -64},
210      {PPC::S23, -72},
211      {PPC::S22, -80},
212      {PPC::S21, -88},
213      {PPC::S20, -96},
214      {PPC::S19, -104},
215      {PPC::S18, -112},
216      {PPC::S17, -120},
217      {PPC::S16, -128},
218      {PPC::S15, -136},
219      {PPC::S14, -144}};
220
221  static const SpillSlot ELFOffsets64[] = {
222      CALLEE_SAVED_FPRS,
223      CALLEE_SAVED_GPRS64,
224
225      // VRSAVE save area offset.
226      {PPC::VRSAVE, -4},
227      CALLEE_SAVED_VRS
228  };
229
230  static const SpillSlot AIXOffsets32[] = {
231      CALLEE_SAVED_FPRS,
232      CALLEE_SAVED_GPRS32,
233      // Add AIX's extra CSR.
234      {PPC::R13, -76},
235      // TODO: Update when we add vector support for AIX.
236  };
237
238  static const SpillSlot AIXOffsets64[] = {
239      CALLEE_SAVED_FPRS,
240      CALLEE_SAVED_GPRS64,
241      // TODO: Update when we add vector support for AIX.
242  };
243
244  if (Subtarget.is64BitELFABI()) {
245    NumEntries = array_lengthof(ELFOffsets64);
246    return ELFOffsets64;
247  }
248
249  if (Subtarget.is32BitELFABI()) {
250    NumEntries = array_lengthof(ELFOffsets32);
251    return ELFOffsets32;
252  }
253
254  assert(Subtarget.isAIXABI() && "Unexpected ABI.");
255
256  if (Subtarget.isPPC64()) {
257    NumEntries = array_lengthof(AIXOffsets64);
258    return AIXOffsets64;
259  }
260
261  NumEntries = array_lengthof(AIXOffsets32);
262  return AIXOffsets32;
263}
264
265/// RemoveVRSaveCode - We have found that this function does not need any code
266/// to manipulate the VRSAVE register, even though it uses vector registers.
267/// This can happen when the only registers used are known to be live in or out
268/// of the function.  Remove all of the VRSAVE related code from the function.
269/// FIXME: The removal of the code results in a compile failure at -O0 when the
270/// function contains a function call, as the GPR containing original VRSAVE
271/// contents is spilled and reloaded around the call.  Without the prolog code,
272/// the spill instruction refers to an undefined register.  This code needs
273/// to account for all uses of that GPR.
274static void RemoveVRSaveCode(MachineInstr &MI) {
275  MachineBasicBlock *Entry = MI.getParent();
276  MachineFunction *MF = Entry->getParent();
277
278  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
279  MachineBasicBlock::iterator MBBI = MI;
280  ++MBBI;
281  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
282  MBBI->eraseFromParent();
283
284  bool RemovedAllMTVRSAVEs = true;
285  // See if we can find and remove the MTVRSAVE instruction from all of the
286  // epilog blocks.
287  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
288    // If last instruction is a return instruction, add an epilogue
289    if (I->isReturnBlock()) {
290      bool FoundIt = false;
291      for (MBBI = I->end(); MBBI != I->begin(); ) {
292        --MBBI;
293        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
294          MBBI->eraseFromParent();  // remove it.
295          FoundIt = true;
296          break;
297        }
298      }
299      RemovedAllMTVRSAVEs &= FoundIt;
300    }
301  }
302
303  // If we found and removed all MTVRSAVE instructions, remove the read of
304  // VRSAVE as well.
305  if (RemovedAllMTVRSAVEs) {
306    MBBI = MI;
307    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
308    --MBBI;
309    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
310    MBBI->eraseFromParent();
311  }
312
313  // Finally, nuke the UPDATE_VRSAVE.
314  MI.eraseFromParent();
315}
316
317// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
318// instruction selector.  Based on the vector registers that have been used,
319// transform this into the appropriate ORI instruction.
320static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
321  MachineFunction *MF = MI.getParent()->getParent();
322  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
323  DebugLoc dl = MI.getDebugLoc();
324
325  const MachineRegisterInfo &MRI = MF->getRegInfo();
326  unsigned UsedRegMask = 0;
327  for (unsigned i = 0; i != 32; ++i)
328    if (MRI.isPhysRegModified(VRRegNo[i]))
329      UsedRegMask |= 1 << (31-i);
330
331  // Live in and live out values already must be in the mask, so don't bother
332  // marking them.
333  for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
334    unsigned RegNo = TRI->getEncodingValue(LI.first);
335    if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
336      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
337  }
338
339  // Live out registers appear as use operands on return instructions.
340  for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
341       UsedRegMask != 0 && BI != BE; ++BI) {
342    const MachineBasicBlock &MBB = *BI;
343    if (!MBB.isReturnBlock())
344      continue;
345    const MachineInstr &Ret = MBB.back();
346    for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
347      const MachineOperand &MO = Ret.getOperand(I);
348      if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
349        continue;
350      unsigned RegNo = TRI->getEncodingValue(MO.getReg());
351      UsedRegMask &= ~(1 << (31-RegNo));
352    }
353  }
354
355  // If no registers are used, turn this into a copy.
356  if (UsedRegMask == 0) {
357    // Remove all VRSAVE code.
358    RemoveVRSaveCode(MI);
359    return;
360  }
361
362  Register SrcReg = MI.getOperand(1).getReg();
363  Register DstReg = MI.getOperand(0).getReg();
364
365  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
366    if (DstReg != SrcReg)
367      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
368          .addReg(SrcReg)
369          .addImm(UsedRegMask);
370    else
371      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
372          .addReg(SrcReg, RegState::Kill)
373          .addImm(UsedRegMask);
374  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
375    if (DstReg != SrcReg)
376      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
377          .addReg(SrcReg)
378          .addImm(UsedRegMask >> 16);
379    else
380      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
381          .addReg(SrcReg, RegState::Kill)
382          .addImm(UsedRegMask >> 16);
383  } else {
384    if (DstReg != SrcReg)
385      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
386          .addReg(SrcReg)
387          .addImm(UsedRegMask >> 16);
388    else
389      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
390          .addReg(SrcReg, RegState::Kill)
391          .addImm(UsedRegMask >> 16);
392
393    BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
394        .addReg(DstReg, RegState::Kill)
395        .addImm(UsedRegMask & 0xFFFF);
396  }
397
398  // Remove the old UPDATE_VRSAVE instruction.
399  MI.eraseFromParent();
400}
401
402static bool spillsCR(const MachineFunction &MF) {
403  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
404  return FuncInfo->isCRSpilled();
405}
406
407static bool spillsVRSAVE(const MachineFunction &MF) {
408  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
409  return FuncInfo->isVRSAVESpilled();
410}
411
412static bool hasSpills(const MachineFunction &MF) {
413  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
414  return FuncInfo->hasSpills();
415}
416
417static bool hasNonRISpills(const MachineFunction &MF) {
418  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
419  return FuncInfo->hasNonRISpills();
420}
421
422/// MustSaveLR - Return true if this function requires that we save the LR
423/// register onto the stack in the prolog and restore it in the epilog of the
424/// function.
425static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
426  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
427
428  // We need a save/restore of LR if there is any def of LR (which is
429  // defined by calls, including the PIC setup sequence), or if there is
430  // some use of the LR stack slot (e.g. for builtin_return_address).
431  // (LR comes in 32 and 64 bit versions.)
432  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
433  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
434}
435
436/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
437/// call frame size. Update the MachineFunction object with the stack size.
438unsigned
439PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
440                                                bool UseEstimate) const {
441  unsigned NewMaxCallFrameSize = 0;
442  unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
443                                            &NewMaxCallFrameSize);
444  MF.getFrameInfo().setStackSize(FrameSize);
445  MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
446  return FrameSize;
447}
448
449/// determineFrameLayout - Determine the size of the frame and maximum call
450/// frame size.
451unsigned
452PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
453                                       bool UseEstimate,
454                                       unsigned *NewMaxCallFrameSize) const {
455  const MachineFrameInfo &MFI = MF.getFrameInfo();
456  const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
457
458  // Get the number of bytes to allocate from the FrameInfo
459  unsigned FrameSize =
460    UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
461
462  // Get stack alignments. The frame must be aligned to the greatest of these:
463  Align TargetAlign = getStackAlign(); // alignment required per the ABI
464  Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
465  Align Alignment = std::max(TargetAlign, MaxAlign);
466
467  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
468
469  unsigned LR = RegInfo->getRARegister();
470  bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
471  bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
472                       !MFI.adjustsStack() &&       // No calls.
473                       !MustSaveLR(MF, LR) &&       // No need to save LR.
474                       !FI->mustSaveTOC() &&        // No need to save TOC.
475                       !RegInfo->hasBasePointer(MF); // No special alignment.
476
477  // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
478  // code if all local vars are reg-allocated.
479  bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
480
481  // Check whether we can skip adjusting the stack pointer (by using red zone)
482  if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
483    // No need for frame
484    return 0;
485  }
486
487  // Get the maximum call frame size of all the calls.
488  unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
489
490  // Maximum call frame needs to be at least big enough for linkage area.
491  unsigned minCallFrameSize = getLinkageSize();
492  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
493
494  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
495  // that allocations will be aligned.
496  if (MFI.hasVarSizedObjects())
497    maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
498
499  // Update the new max call frame size if the caller passes in a valid pointer.
500  if (NewMaxCallFrameSize)
501    *NewMaxCallFrameSize = maxCallFrameSize;
502
503  // Include call frame size in total.
504  FrameSize += maxCallFrameSize;
505
506  // Make sure the frame is aligned.
507  FrameSize = alignTo(FrameSize, Alignment);
508
509  return FrameSize;
510}
511
512// hasFP - Return true if the specified function actually has a dedicated frame
513// pointer register.
514bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
515  const MachineFrameInfo &MFI = MF.getFrameInfo();
516  // FIXME: This is pretty much broken by design: hasFP() might be called really
517  // early, before the stack layout was calculated and thus hasFP() might return
518  // true or false here depending on the time of call.
519  return (MFI.getStackSize()) && needsFP(MF);
520}
521
522// needsFP - Return true if the specified function should have a dedicated frame
523// pointer register.  This is true if the function has variable sized allocas or
524// if frame pointer elimination is disabled.
525bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
526  const MachineFrameInfo &MFI = MF.getFrameInfo();
527
528  // Naked functions have no stack frame pushed, so we don't have a frame
529  // pointer.
530  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
531    return false;
532
533  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
534    MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
535    (MF.getTarget().Options.GuaranteedTailCallOpt &&
536     MF.getInfo<PPCFunctionInfo>()->hasFastCall());
537}
538
539void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
540  bool is31 = needsFP(MF);
541  unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
542  unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
543
544  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
545  bool HasBP = RegInfo->hasBasePointer(MF);
546  unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
547  unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
548
549  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
550       BI != BE; ++BI)
551    for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
552      --MBBI;
553      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
554        MachineOperand &MO = MBBI->getOperand(I);
555        if (!MO.isReg())
556          continue;
557
558        switch (MO.getReg()) {
559        case PPC::FP:
560          MO.setReg(FPReg);
561          break;
562        case PPC::FP8:
563          MO.setReg(FP8Reg);
564          break;
565        case PPC::BP:
566          MO.setReg(BPReg);
567          break;
568        case PPC::BP8:
569          MO.setReg(BP8Reg);
570          break;
571
572        }
573      }
574    }
575}
576
577/*  This function will do the following:
578    - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
579      respectively (defaults recommended by the ABI) and return true
580    - If MBB is not an entry block, initialize the register scavenger and look
581      for available registers.
582    - If the defaults (R0/R12) are available, return true
583    - If TwoUniqueRegsRequired is set to true, it looks for two unique
584      registers. Otherwise, look for a single available register.
585      - If the required registers are found, set SR1 and SR2 and return true.
586      - If the required registers are not found, set SR2 or both SR1 and SR2 to
587        PPC::NoRegister and return false.
588
589    Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
590    is not set, this function will attempt to find two different registers, but
591    still return true if only one register is available (and set SR1 == SR2).
592*/
593bool
594PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
595                                      bool UseAtEnd,
596                                      bool TwoUniqueRegsRequired,
597                                      Register *SR1,
598                                      Register *SR2) const {
599  RegScavenger RS;
600  Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
601  Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
602
603  // Set the defaults for the two scratch registers.
604  if (SR1)
605    *SR1 = R0;
606
607  if (SR2) {
608    assert (SR1 && "Asking for the second scratch register but not the first?");
609    *SR2 = R12;
610  }
611
612  // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
613  if ((UseAtEnd && MBB->isReturnBlock()) ||
614      (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
615    return true;
616
617  RS.enterBasicBlock(*MBB);
618
619  if (UseAtEnd && !MBB->empty()) {
620    // The scratch register will be used at the end of the block, so must
621    // consider all registers used within the block
622
623    MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
624    // If no terminator, back iterator up to previous instruction.
625    if (MBBI == MBB->end())
626      MBBI = std::prev(MBBI);
627
628    if (MBBI != MBB->begin())
629      RS.forward(MBBI);
630  }
631
632  // If the two registers are available, we're all good.
633  // Note that we only return here if both R0 and R12 are available because
634  // although the function may not require two unique registers, it may benefit
635  // from having two so we should try to provide them.
636  if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
637    return true;
638
639  // Get the list of callee-saved registers for the target.
640  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
641  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
642
643  // Get all the available registers in the block.
644  BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
645                                     &PPC::GPRCRegClass);
646
647  // We shouldn't use callee-saved registers as scratch registers as they may be
648  // available when looking for a candidate block for shrink wrapping but not
649  // available when the actual prologue/epilogue is being emitted because they
650  // were added as live-in to the prologue block by PrologueEpilogueInserter.
651  for (int i = 0; CSRegs[i]; ++i)
652    BV.reset(CSRegs[i]);
653
654  // Set the first scratch register to the first available one.
655  if (SR1) {
656    int FirstScratchReg = BV.find_first();
657    *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
658  }
659
660  // If there is another one available, set the second scratch register to that.
661  // Otherwise, set it to either PPC::NoRegister if this function requires two
662  // or to whatever SR1 is set to if this function doesn't require two.
663  if (SR2) {
664    int SecondScratchReg = BV.find_next(*SR1);
665    if (SecondScratchReg != -1)
666      *SR2 = SecondScratchReg;
667    else
668      *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
669  }
670
671  // Now that we've done our best to provide both registers, double check
672  // whether we were unable to provide enough.
673  if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
674    return false;
675
676  return true;
677}
678
679// We need a scratch register for spilling LR and for spilling CR. By default,
680// we use two scratch registers to hide latency. However, if only one scratch
681// register is available, we can adjust for that by not overlapping the spill
682// code. However, if we need to realign the stack (i.e. have a base pointer)
683// and the stack frame is large, we need two scratch registers.
684bool
685PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
686  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
687  MachineFunction &MF = *(MBB->getParent());
688  bool HasBP = RegInfo->hasBasePointer(MF);
689  unsigned FrameSize = determineFrameLayout(MF);
690  int NegFrameSize = -FrameSize;
691  bool IsLargeFrame = !isInt<16>(NegFrameSize);
692  MachineFrameInfo &MFI = MF.getFrameInfo();
693  Align MaxAlign = MFI.getMaxAlign();
694  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
695
696  return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
697}
698
699bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
700  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
701
702  return findScratchRegister(TmpMBB, false,
703                             twoUniqueScratchRegsRequired(TmpMBB));
704}
705
706bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
707  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
708
709  return findScratchRegister(TmpMBB, true);
710}
711
712bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
713  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
714  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
715
716  // Abort if there is no register info or function info.
717  if (!RegInfo || !FI)
718    return false;
719
720  // Only move the stack update on ELFv2 ABI and PPC64.
721  if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
722    return false;
723
724  // Check the frame size first and return false if it does not fit the
725  // requirements.
726  // We need a non-zero frame size as well as a frame that will fit in the red
727  // zone. This is because by moving the stack pointer update we are now storing
728  // to the red zone until the stack pointer is updated. If we get an interrupt
729  // inside the prologue but before the stack update we now have a number of
730  // stores to the red zone and those stores must all fit.
731  MachineFrameInfo &MFI = MF.getFrameInfo();
732  unsigned FrameSize = MFI.getStackSize();
733  if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
734    return false;
735
736  // Frame pointers and base pointers complicate matters so don't do anything
737  // if we have them. For example having a frame pointer will sometimes require
738  // a copy of r1 into r31 and that makes keeping track of updates to r1 more
739  // difficult.
740  if (hasFP(MF) || RegInfo->hasBasePointer(MF))
741    return false;
742
743  // Calls to fast_cc functions use different rules for passing parameters on
744  // the stack from the ABI and using PIC base in the function imposes
745  // similar restrictions to using the base pointer. It is not generally safe
746  // to move the stack pointer update in these situations.
747  if (FI->hasFastCall() || FI->usesPICBase())
748    return false;
749
750  // Finally we can move the stack update if we do not require register
751  // scavenging. Register scavenging can introduce more spills and so
752  // may make the frame size larger than we have computed.
753  return !RegInfo->requiresFrameIndexScavenging(MF);
754}
755
756void PPCFrameLowering::emitPrologue(MachineFunction &MF,
757                                    MachineBasicBlock &MBB) const {
758  MachineBasicBlock::iterator MBBI = MBB.begin();
759  MachineFrameInfo &MFI = MF.getFrameInfo();
760  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
761  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
762  const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
763
764  MachineModuleInfo &MMI = MF.getMMI();
765  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
766  DebugLoc dl;
767  // AIX assembler does not support cfi directives.
768  const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
769
770  // Get processor type.
771  bool isPPC64 = Subtarget.isPPC64();
772  // Get the ABI.
773  bool isSVR4ABI = Subtarget.isSVR4ABI();
774  bool isAIXABI = Subtarget.isAIXABI();
775  bool isELFv2ABI = Subtarget.isELFv2ABI();
776  assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
777
778  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
779  // process it.
780  if (!isSVR4ABI)
781    for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
782      if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
783        if (isAIXABI)
784          report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
785        HandleVRSaveUpdate(*MBBI, TII);
786        break;
787      }
788    }
789
790  // Move MBBI back to the beginning of the prologue block.
791  MBBI = MBB.begin();
792
793  // Work out frame sizes.
794  unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
795  int NegFrameSize = -FrameSize;
796  if (!isInt<32>(NegFrameSize))
797    llvm_unreachable("Unhandled stack size!");
798
799  if (MFI.isFrameAddressTaken())
800    replaceFPWithRealFP(MF);
801
802  // Check if the link register (LR) must be saved.
803  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
804  bool MustSaveLR = FI->mustSaveLR();
805  bool MustSaveTOC = FI->mustSaveTOC();
806  const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
807  bool MustSaveCR = !MustSaveCRs.empty();
808  // Do we have a frame pointer and/or base pointer for this function?
809  bool HasFP = hasFP(MF);
810  bool HasBP = RegInfo->hasBasePointer(MF);
811  bool HasRedZone = isPPC64 || !isSVR4ABI;
812
813  Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
814  Register BPReg = RegInfo->getBaseRegister(MF);
815  Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
816  Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
817  Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
818  Register ScratchReg;
819  Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
820  //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
821  const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
822                                                : PPC::MFLR );
823  const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
824                                                 : PPC::STW );
825  const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
826                                                     : PPC::STWU );
827  const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
828                                                        : PPC::STWUX);
829  const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
830                                                          : PPC::LIS );
831  const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
832                                                 : PPC::ORI );
833  const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
834                                              : PPC::OR );
835  const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
836                                                            : PPC::SUBFC);
837  const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
838                                                               : PPC::SUBFIC);
839  const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
840                                                           : PPC::MFCR);
841  const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
842
843  // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
844  // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
845  // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
846  // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
847  assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
848         "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
849
850  // Using the same bool variable as below to suppress compiler warnings.
851  // Stack probe requires two scratch registers, one for old sp, one for large
852  // frame and large probe size.
853  bool SingleScratchReg = findScratchRegister(
854      &MBB, false,
855      twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF),
856      &ScratchReg, &TempReg);
857  assert(SingleScratchReg &&
858         "Required number of registers not available in this block");
859
860  SingleScratchReg = ScratchReg == TempReg;
861
862  int LROffset = getReturnSaveOffset();
863
864  int FPOffset = 0;
865  if (HasFP) {
866    if (isSVR4ABI) {
867      MachineFrameInfo &MFI = MF.getFrameInfo();
868      int FPIndex = FI->getFramePointerSaveIndex();
869      assert(FPIndex && "No Frame Pointer Save Slot!");
870      FPOffset = MFI.getObjectOffset(FPIndex);
871    } else {
872      FPOffset = getFramePointerSaveOffset();
873    }
874  }
875
876  int BPOffset = 0;
877  if (HasBP) {
878    if (isSVR4ABI) {
879      MachineFrameInfo &MFI = MF.getFrameInfo();
880      int BPIndex = FI->getBasePointerSaveIndex();
881      assert(BPIndex && "No Base Pointer Save Slot!");
882      BPOffset = MFI.getObjectOffset(BPIndex);
883    } else {
884      BPOffset = getBasePointerSaveOffset();
885    }
886  }
887
888  int PBPOffset = 0;
889  if (FI->usesPICBase()) {
890    MachineFrameInfo &MFI = MF.getFrameInfo();
891    int PBPIndex = FI->getPICBasePointerSaveIndex();
892    assert(PBPIndex && "No PIC Base Pointer Save Slot!");
893    PBPOffset = MFI.getObjectOffset(PBPIndex);
894  }
895
896  // Get stack alignments.
897  Align MaxAlign = MFI.getMaxAlign();
898  if (HasBP && MaxAlign > 1)
899    assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
900
901  // Frames of 32KB & larger require special handling because they cannot be
902  // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
903  bool isLargeFrame = !isInt<16>(NegFrameSize);
904
905  // Check if we can move the stack update instruction (stdu) down the prologue
906  // past the callee saves. Hopefully this will avoid the situation where the
907  // saves are waiting for the update on the store with update to complete.
908  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
909  bool MovingStackUpdateDown = false;
910
911  // Check if we can move the stack update.
912  if (stackUpdateCanBeMoved(MF)) {
913    const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
914    for (CalleeSavedInfo CSI : Info) {
915      int FrIdx = CSI.getFrameIdx();
916      // If the frame index is not negative the callee saved info belongs to a
917      // stack object that is not a fixed stack object. We ignore non-fixed
918      // stack objects because we won't move the stack update pointer past them.
919      if (FrIdx >= 0)
920        continue;
921
922      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
923        StackUpdateLoc++;
924        MovingStackUpdateDown = true;
925      } else {
926        // We need all of the Frame Indices to meet these conditions.
927        // If they do not, abort the whole operation.
928        StackUpdateLoc = MBBI;
929        MovingStackUpdateDown = false;
930        break;
931      }
932    }
933
934    // If the operation was not aborted then update the object offset.
935    if (MovingStackUpdateDown) {
936      for (CalleeSavedInfo CSI : Info) {
937        int FrIdx = CSI.getFrameIdx();
938        if (FrIdx < 0)
939          MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
940      }
941    }
942  }
943
944  // Where in the prologue we move the CR fields depends on how many scratch
945  // registers we have, and if we need to save the link register or not. This
946  // lambda is to avoid duplicating the logic in 2 places.
947  auto BuildMoveFromCR = [&]() {
948    if (isELFv2ABI && MustSaveCRs.size() == 1) {
949    // In the ELFv2 ABI, we are not required to save all CR fields.
950    // If only one CR field is clobbered, it is more efficient to use
951    // mfocrf to selectively save just that field, because mfocrf has short
952    // latency compares to mfcr.
953      assert(isPPC64 && "V2 ABI is 64-bit only.");
954      MachineInstrBuilder MIB =
955          BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
956      MIB.addReg(MustSaveCRs[0], RegState::Kill);
957    } else {
958      MachineInstrBuilder MIB =
959          BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
960      for (unsigned CRfield : MustSaveCRs)
961        MIB.addReg(CRfield, RegState::ImplicitKill);
962    }
963  };
964
965  // If we need to spill the CR and the LR but we don't have two separate
966  // registers available, we must spill them one at a time
967  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
968    BuildMoveFromCR();
969    BuildMI(MBB, MBBI, dl, StoreWordInst)
970        .addReg(TempReg, getKillRegState(true))
971        .addImm(CRSaveOffset)
972        .addReg(SPReg);
973  }
974
975  if (MustSaveLR)
976    BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
977
978  if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
979    BuildMoveFromCR();
980
981  if (HasRedZone) {
982    if (HasFP)
983      BuildMI(MBB, MBBI, dl, StoreInst)
984        .addReg(FPReg)
985        .addImm(FPOffset)
986        .addReg(SPReg);
987    if (FI->usesPICBase())
988      BuildMI(MBB, MBBI, dl, StoreInst)
989        .addReg(PPC::R30)
990        .addImm(PBPOffset)
991        .addReg(SPReg);
992    if (HasBP)
993      BuildMI(MBB, MBBI, dl, StoreInst)
994        .addReg(BPReg)
995        .addImm(BPOffset)
996        .addReg(SPReg);
997  }
998
999  if (MustSaveLR)
1000    BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1001      .addReg(ScratchReg, getKillRegState(true))
1002      .addImm(LROffset)
1003      .addReg(SPReg);
1004
1005  if (MustSaveCR &&
1006      !(SingleScratchReg && MustSaveLR)) {
1007    assert(HasRedZone && "A red zone is always available on PPC64");
1008    BuildMI(MBB, MBBI, dl, StoreWordInst)
1009      .addReg(TempReg, getKillRegState(true))
1010      .addImm(CRSaveOffset)
1011      .addReg(SPReg);
1012  }
1013
1014  // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1015  if (!FrameSize)
1016    return;
1017
1018  // Adjust stack pointer: r1 += NegFrameSize.
1019  // If there is a preferred stack alignment, align R1 now
1020
1021  if (HasBP && HasRedZone) {
1022    // Save a copy of r1 as the base pointer.
1023    BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1024      .addReg(SPReg)
1025      .addReg(SPReg);
1026  }
1027
1028  // Have we generated a STUX instruction to claim stack frame? If so,
1029  // the negated frame size will be placed in ScratchReg.
1030  bool HasSTUX = false;
1031
1032  // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
1033  // pointer is always stored at SP, we will get a free probe due to an essential
1034  // STU(X) instruction.
1035  if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
1036    // To be consistent with other targets, a pseudo instruction is emitted and
1037    // will be later expanded in `inlineStackProbe`.
1038    BuildMI(MBB, MBBI, dl,
1039            TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
1040                            : PPC::PROBED_STACKALLOC_32))
1041        .addDef(ScratchReg)
1042        .addDef(TempReg) // TempReg stores the old sp.
1043        .addImm(NegFrameSize);
1044    // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
1045    // update the ScratchReg to meet the assumption that ScratchReg contains
1046    // the NegFrameSize. This solution is rather tricky.
1047    if (!HasRedZone) {
1048      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1049          .addReg(TempReg)
1050          .addReg(SPReg);
1051      HasSTUX = true;
1052    }
1053  } else {
1054    // This condition must be kept in sync with canUseAsPrologue.
1055    if (HasBP && MaxAlign > 1) {
1056      if (isPPC64)
1057        BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1058            .addReg(SPReg)
1059            .addImm(0)
1060            .addImm(64 - Log2(MaxAlign));
1061      else // PPC32...
1062        BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1063            .addReg(SPReg)
1064            .addImm(0)
1065            .addImm(32 - Log2(MaxAlign))
1066            .addImm(31);
1067      if (!isLargeFrame) {
1068        BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1069            .addReg(ScratchReg, RegState::Kill)
1070            .addImm(NegFrameSize);
1071      } else {
1072        assert(!SingleScratchReg && "Only a single scratch reg available");
1073        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1074            .addImm(NegFrameSize >> 16);
1075        BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1076            .addReg(TempReg, RegState::Kill)
1077            .addImm(NegFrameSize & 0xFFFF);
1078        BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1079            .addReg(ScratchReg, RegState::Kill)
1080            .addReg(TempReg, RegState::Kill);
1081      }
1082
1083      BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1084          .addReg(SPReg, RegState::Kill)
1085          .addReg(SPReg)
1086          .addReg(ScratchReg);
1087      HasSTUX = true;
1088
1089    } else if (!isLargeFrame) {
1090      BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1091          .addReg(SPReg)
1092          .addImm(NegFrameSize)
1093          .addReg(SPReg);
1094
1095    } else {
1096      BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1097          .addImm(NegFrameSize >> 16);
1098      BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1099          .addReg(ScratchReg, RegState::Kill)
1100          .addImm(NegFrameSize & 0xFFFF);
1101      BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1102          .addReg(SPReg, RegState::Kill)
1103          .addReg(SPReg)
1104          .addReg(ScratchReg);
1105      HasSTUX = true;
1106    }
1107  }
1108
1109  // Save the TOC register after the stack pointer update if a prologue TOC
1110  // save is required for the function.
1111  if (MustSaveTOC) {
1112    assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1113    BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1114      .addReg(TOCReg, getKillRegState(true))
1115      .addImm(TOCSaveOffset)
1116      .addReg(SPReg);
1117  }
1118
1119  if (!HasRedZone) {
1120    assert(!isPPC64 && "A red zone is always available on PPC64");
1121    if (HasSTUX) {
1122      // The negated frame size is in ScratchReg, and the SPReg has been
1123      // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1124      // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1125      // the stack frame (i.e. the old SP), ideally, we would put the old
1126      // SP into a register and use it as the base for the stores. The
1127      // problem is that the only available register may be ScratchReg,
1128      // which could be R0, and R0 cannot be used as a base address.
1129
1130      // First, set ScratchReg to the old SP. This may need to be modified
1131      // later.
1132      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1133        .addReg(ScratchReg, RegState::Kill)
1134        .addReg(SPReg);
1135
1136      if (ScratchReg == PPC::R0) {
1137        // R0 cannot be used as a base register, but it can be used as an
1138        // index in a store-indexed.
1139        int LastOffset = 0;
1140        if (HasFP)  {
1141          // R0 += (FPOffset-LastOffset).
1142          // Need addic, since addi treats R0 as 0.
1143          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1144            .addReg(ScratchReg)
1145            .addImm(FPOffset-LastOffset);
1146          LastOffset = FPOffset;
1147          // Store FP into *R0.
1148          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1149            .addReg(FPReg, RegState::Kill)  // Save FP.
1150            .addReg(PPC::ZERO)
1151            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1152        }
1153        if (FI->usesPICBase()) {
1154          // R0 += (PBPOffset-LastOffset).
1155          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1156            .addReg(ScratchReg)
1157            .addImm(PBPOffset-LastOffset);
1158          LastOffset = PBPOffset;
1159          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1160            .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1161            .addReg(PPC::ZERO)
1162            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1163        }
1164        if (HasBP) {
1165          // R0 += (BPOffset-LastOffset).
1166          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1167            .addReg(ScratchReg)
1168            .addImm(BPOffset-LastOffset);
1169          LastOffset = BPOffset;
1170          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1171            .addReg(BPReg, RegState::Kill)  // Save BP.
1172            .addReg(PPC::ZERO)
1173            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1174          // BP = R0-LastOffset
1175          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1176            .addReg(ScratchReg, RegState::Kill)
1177            .addImm(-LastOffset);
1178        }
1179      } else {
1180        // ScratchReg is not R0, so use it as the base register. It is
1181        // already set to the old SP, so we can use the offsets directly.
1182
1183        // Now that the stack frame has been allocated, save all the necessary
1184        // registers using ScratchReg as the base address.
1185        if (HasFP)
1186          BuildMI(MBB, MBBI, dl, StoreInst)
1187            .addReg(FPReg)
1188            .addImm(FPOffset)
1189            .addReg(ScratchReg);
1190        if (FI->usesPICBase())
1191          BuildMI(MBB, MBBI, dl, StoreInst)
1192            .addReg(PPC::R30)
1193            .addImm(PBPOffset)
1194            .addReg(ScratchReg);
1195        if (HasBP) {
1196          BuildMI(MBB, MBBI, dl, StoreInst)
1197            .addReg(BPReg)
1198            .addImm(BPOffset)
1199            .addReg(ScratchReg);
1200          BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1201            .addReg(ScratchReg, RegState::Kill)
1202            .addReg(ScratchReg);
1203        }
1204      }
1205    } else {
1206      // The frame size is a known 16-bit constant (fitting in the immediate
1207      // field of STWU). To be here we have to be compiling for PPC32.
1208      // Since the SPReg has been decreased by FrameSize, add it back to each
1209      // offset.
1210      if (HasFP)
1211        BuildMI(MBB, MBBI, dl, StoreInst)
1212          .addReg(FPReg)
1213          .addImm(FrameSize + FPOffset)
1214          .addReg(SPReg);
1215      if (FI->usesPICBase())
1216        BuildMI(MBB, MBBI, dl, StoreInst)
1217          .addReg(PPC::R30)
1218          .addImm(FrameSize + PBPOffset)
1219          .addReg(SPReg);
1220      if (HasBP) {
1221        BuildMI(MBB, MBBI, dl, StoreInst)
1222          .addReg(BPReg)
1223          .addImm(FrameSize + BPOffset)
1224          .addReg(SPReg);
1225        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1226          .addReg(SPReg)
1227          .addImm(FrameSize);
1228      }
1229    }
1230  }
1231
1232  // Add Call Frame Information for the instructions we generated above.
1233  if (needsCFI) {
1234    unsigned CFIIndex;
1235
1236    if (HasBP) {
1237      // Define CFA in terms of BP. Do this in preference to using FP/SP,
1238      // because if the stack needed aligning then CFA won't be at a fixed
1239      // offset from FP/SP.
1240      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1241      CFIIndex = MF.addFrameInst(
1242          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1243    } else {
1244      // Adjust the definition of CFA to account for the change in SP.
1245      assert(NegFrameSize);
1246      CFIIndex = MF.addFrameInst(
1247          MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1248    }
1249    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1250        .addCFIIndex(CFIIndex);
1251
1252    if (HasFP) {
1253      // Describe where FP was saved, at a fixed offset from CFA.
1254      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1255      CFIIndex = MF.addFrameInst(
1256          MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1257      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1258          .addCFIIndex(CFIIndex);
1259    }
1260
1261    if (FI->usesPICBase()) {
1262      // Describe where FP was saved, at a fixed offset from CFA.
1263      unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1264      CFIIndex = MF.addFrameInst(
1265          MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1266      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1267          .addCFIIndex(CFIIndex);
1268    }
1269
1270    if (HasBP) {
1271      // Describe where BP was saved, at a fixed offset from CFA.
1272      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1273      CFIIndex = MF.addFrameInst(
1274          MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1275      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276          .addCFIIndex(CFIIndex);
1277    }
1278
1279    if (MustSaveLR) {
1280      // Describe where LR was saved, at a fixed offset from CFA.
1281      unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1282      CFIIndex = MF.addFrameInst(
1283          MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1284      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285          .addCFIIndex(CFIIndex);
1286    }
1287  }
1288
1289  // If there is a frame pointer, copy R1 into R31
1290  if (HasFP) {
1291    BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1292      .addReg(SPReg)
1293      .addReg(SPReg);
1294
1295    if (!HasBP && needsCFI) {
1296      // Change the definition of CFA from SP+offset to FP+offset, because SP
1297      // will change at every alloca.
1298      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1299      unsigned CFIIndex = MF.addFrameInst(
1300          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1301
1302      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1303          .addCFIIndex(CFIIndex);
1304    }
1305  }
1306
1307  if (needsCFI) {
1308    // Describe where callee saved registers were saved, at fixed offsets from
1309    // CFA.
1310    const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1311    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1312      unsigned Reg = CSI[I].getReg();
1313      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1314
1315      // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1316      // subregisters of CR2. We just need to emit a move of CR2.
1317      if (PPC::CRBITRCRegClass.contains(Reg))
1318        continue;
1319
1320      if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1321        continue;
1322
1323      // For SVR4, don't emit a move for the CR spill slot if we haven't
1324      // spilled CRs.
1325      if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1326          && !MustSaveCR)
1327        continue;
1328
1329      // For 64-bit SVR4 when we have spilled CRs, the spill location
1330      // is SP+8, not a frame-relative slot.
1331      if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1332        // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1333        // the whole CR word.  In the ELFv2 ABI, every CR that was
1334        // actually saved gets its own CFI record.
1335        unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1336        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1337            nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1338        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1339            .addCFIIndex(CFIIndex);
1340        continue;
1341      }
1342
1343      if (CSI[I].isSpilledToReg()) {
1344        unsigned SpilledReg = CSI[I].getDstReg();
1345        unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1346            nullptr, MRI->getDwarfRegNum(Reg, true),
1347            MRI->getDwarfRegNum(SpilledReg, true)));
1348        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1349          .addCFIIndex(CFIRegister);
1350      } else {
1351        int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1352        // We have changed the object offset above but we do not want to change
1353        // the actual offsets in the CFI instruction so we have to undo the
1354        // offset change here.
1355        if (MovingStackUpdateDown)
1356          Offset -= NegFrameSize;
1357
1358        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1359            nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1360        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1361            .addCFIIndex(CFIIndex);
1362      }
1363    }
1364  }
1365}
1366
1367void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1368                                        MachineBasicBlock &PrologMBB) const {
1369  // TODO: Generate CFI instructions.
1370  bool isPPC64 = Subtarget.isPPC64();
1371  const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1372  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1373  MachineFrameInfo &MFI = MF.getFrameInfo();
1374  MachineModuleInfo &MMI = MF.getMMI();
1375  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1376  // AIX assembler does not support cfi directives.
1377  const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1378  auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1379    int Opc = MI.getOpcode();
1380    return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1381  });
1382  if (StackAllocMIPos == PrologMBB.end())
1383    return;
1384  const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1385  DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1386  MachineInstr &MI = *StackAllocMIPos;
1387  int64_t NegFrameSize = MI.getOperand(2).getImm();
1388  int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF);
1389  assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1390  int64_t NumBlocks = NegFrameSize / NegProbeSize;
1391  int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1392  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1393  Register ScratchReg = MI.getOperand(0).getReg();
1394  Register FPReg = MI.getOperand(1).getReg();
1395  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1396  bool HasBP = RegInfo->hasBasePointer(MF);
1397  Align MaxAlign = MFI.getMaxAlign();
1398  // Initialize current frame pointer.
1399  const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1400  BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1401  // Subroutines to generate .cfi_* directives.
1402  auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1403                            MachineBasicBlock::iterator MBBI, Register Reg) {
1404    unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1405    unsigned CFIIndex = MF.addFrameInst(
1406        MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1407    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1408        .addCFIIndex(CFIIndex);
1409  };
1410  auto buildDefCFA = [&](MachineBasicBlock &MBB,
1411                         MachineBasicBlock::iterator MBBI, Register Reg,
1412                         int Offset) {
1413    unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1414    unsigned CFIIndex = MBB.getParent()->addFrameInst(
1415        MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1416    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1417        .addCFIIndex(CFIIndex);
1418  };
1419  // Subroutine to determine if we can use the Imm as part of d-form.
1420  auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1421  // Subroutine to materialize the Imm into TempReg.
1422  auto MaterializeImm = [&](MachineBasicBlock &MBB,
1423                            MachineBasicBlock::iterator MBBI, int64_t Imm,
1424                            Register &TempReg) {
1425    assert(isInt<32>(Imm) && "Unhandled imm");
1426    if (isInt<16>(Imm))
1427      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1428          .addImm(Imm);
1429    else {
1430      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1431          .addImm(Imm >> 16);
1432      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1433          .addReg(TempReg)
1434          .addImm(Imm & 0xFFFF);
1435    }
1436  };
1437  // Subroutine to store frame pointer and decrease stack pointer by probe size.
1438  auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1439                              MachineBasicBlock::iterator MBBI, int64_t NegSize,
1440                              Register NegSizeReg, bool UseDForm) {
1441    if (UseDForm)
1442      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1443          .addReg(FPReg)
1444          .addImm(NegSize)
1445          .addReg(SPReg);
1446    else
1447      BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1448          .addReg(FPReg)
1449          .addReg(SPReg)
1450          .addReg(NegSizeReg);
1451  };
1452  // Use FPReg to calculate CFA.
1453  if (needsCFI)
1454    buildDefCFA(PrologMBB, {MI}, FPReg, 0);
1455  // For case HasBP && MaxAlign > 1, we have to align the SP by performing
1456  // SP = SP - SP % MaxAlign.
1457  if (HasBP && MaxAlign > 1) {
1458    if (isPPC64)
1459      BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1460          .addReg(FPReg)
1461          .addImm(0)
1462          .addImm(64 - Log2(MaxAlign));
1463    else
1464      BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1465          .addReg(FPReg)
1466          .addImm(0)
1467          .addImm(32 - Log2(MaxAlign))
1468          .addImm(31);
1469    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC),
1470            SPReg)
1471        .addReg(ScratchReg)
1472        .addReg(SPReg);
1473  }
1474  // Probe residual part.
1475  if (NegResidualSize) {
1476    bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1477    if (!ResidualUseDForm)
1478      MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg);
1479    allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg,
1480                     ResidualUseDForm);
1481  }
1482  bool UseDForm = CanUseDForm(NegProbeSize);
1483  // If number of blocks is small, just probe them directly.
1484  if (NumBlocks < 3) {
1485    if (!UseDForm)
1486      MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1487    for (int i = 0; i < NumBlocks; ++i)
1488      allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
1489    if (needsCFI) {
1490      // Restore using SPReg to calculate CFA.
1491      buildDefCFAReg(PrologMBB, {MI}, SPReg);
1492    }
1493  } else {
1494    // Since CTR is a volatile register and current shrinkwrap implementation
1495    // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1496    // CTR loop to probe.
1497    // Calculate trip count and stores it in CTRReg.
1498    MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg);
1499    BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1500        .addReg(ScratchReg, RegState::Kill);
1501    if (!UseDForm)
1502      MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
1503    // Create MBBs of the loop.
1504    MachineFunction::iterator MBBInsertPoint =
1505        std::next(PrologMBB.getIterator());
1506    MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1507    MF.insert(MBBInsertPoint, LoopMBB);
1508    MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1509    MF.insert(MBBInsertPoint, ExitMBB);
1510    // Synthesize the loop body.
1511    allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1512                     UseDForm);
1513    BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1514        .addMBB(LoopMBB);
1515    LoopMBB->addSuccessor(ExitMBB);
1516    LoopMBB->addSuccessor(LoopMBB);
1517    // Synthesize the exit MBB.
1518    ExitMBB->splice(ExitMBB->end(), &PrologMBB,
1519                    std::next(MachineBasicBlock::iterator(MI)),
1520                    PrologMBB.end());
1521    ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
1522    PrologMBB.addSuccessor(LoopMBB);
1523    if (needsCFI) {
1524      // Restore using SPReg to calculate CFA.
1525      buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1526    }
1527    // Update liveins.
1528    recomputeLiveIns(*LoopMBB);
1529    recomputeLiveIns(*ExitMBB);
1530  }
1531  ++NumPrologProbed;
1532  MI.eraseFromParent();
1533}
1534
1535void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1536                                    MachineBasicBlock &MBB) const {
1537  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1538  DebugLoc dl;
1539
1540  if (MBBI != MBB.end())
1541    dl = MBBI->getDebugLoc();
1542
1543  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1544  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1545
1546  // Get alignment info so we know how to restore the SP.
1547  const MachineFrameInfo &MFI = MF.getFrameInfo();
1548
1549  // Get the number of bytes allocated from the FrameInfo.
1550  int FrameSize = MFI.getStackSize();
1551
1552  // Get processor type.
1553  bool isPPC64 = Subtarget.isPPC64();
1554  // Get the ABI.
1555  bool isSVR4ABI = Subtarget.isSVR4ABI();
1556
1557  // Check if the link register (LR) has been saved.
1558  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1559  bool MustSaveLR = FI->mustSaveLR();
1560  const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1561  bool MustSaveCR = !MustSaveCRs.empty();
1562  // Do we have a frame pointer and/or base pointer for this function?
1563  bool HasFP = hasFP(MF);
1564  bool HasBP = RegInfo->hasBasePointer(MF);
1565  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1566
1567  Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1568  Register BPReg = RegInfo->getBaseRegister(MF);
1569  Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1570  Register ScratchReg;
1571  Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1572  const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1573                                                 : PPC::MTLR );
1574  const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1575                                                 : PPC::LWZ );
1576  const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1577                                                           : PPC::LIS );
1578  const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1579                                              : PPC::OR );
1580  const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1581                                                  : PPC::ORI );
1582  const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1583                                                   : PPC::ADDI );
1584  const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1585                                                : PPC::ADD4 );
1586  const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1587                                                     : PPC::LWZ);
1588  const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1589                                                     : PPC::MTOCRF);
1590  int LROffset = getReturnSaveOffset();
1591
1592  int FPOffset = 0;
1593
1594  // Using the same bool variable as below to suppress compiler warnings.
1595  bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1596                                              &TempReg);
1597  assert(SingleScratchReg &&
1598         "Could not find an available scratch register");
1599
1600  SingleScratchReg = ScratchReg == TempReg;
1601
1602  if (HasFP) {
1603    if (isSVR4ABI) {
1604      int FPIndex = FI->getFramePointerSaveIndex();
1605      assert(FPIndex && "No Frame Pointer Save Slot!");
1606      FPOffset = MFI.getObjectOffset(FPIndex);
1607    } else {
1608      FPOffset = getFramePointerSaveOffset();
1609    }
1610  }
1611
1612  int BPOffset = 0;
1613  if (HasBP) {
1614    if (isSVR4ABI) {
1615      int BPIndex = FI->getBasePointerSaveIndex();
1616      assert(BPIndex && "No Base Pointer Save Slot!");
1617      BPOffset = MFI.getObjectOffset(BPIndex);
1618    } else {
1619      BPOffset = getBasePointerSaveOffset();
1620    }
1621  }
1622
1623  int PBPOffset = 0;
1624  if (FI->usesPICBase()) {
1625    int PBPIndex = FI->getPICBasePointerSaveIndex();
1626    assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1627    PBPOffset = MFI.getObjectOffset(PBPIndex);
1628  }
1629
1630  bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1631
1632  if (IsReturnBlock) {
1633    unsigned RetOpcode = MBBI->getOpcode();
1634    bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1635                      RetOpcode == PPC::TCRETURNdi ||
1636                      RetOpcode == PPC::TCRETURNai ||
1637                      RetOpcode == PPC::TCRETURNri8 ||
1638                      RetOpcode == PPC::TCRETURNdi8 ||
1639                      RetOpcode == PPC::TCRETURNai8;
1640
1641    if (UsesTCRet) {
1642      int MaxTCRetDelta = FI->getTailCallSPDelta();
1643      MachineOperand &StackAdjust = MBBI->getOperand(1);
1644      assert(StackAdjust.isImm() && "Expecting immediate value.");
1645      // Adjust stack pointer.
1646      int StackAdj = StackAdjust.getImm();
1647      int Delta = StackAdj - MaxTCRetDelta;
1648      assert((Delta >= 0) && "Delta must be positive");
1649      if (MaxTCRetDelta>0)
1650        FrameSize += (StackAdj +Delta);
1651      else
1652        FrameSize += StackAdj;
1653    }
1654  }
1655
1656  // Frames of 32KB & larger require special handling because they cannot be
1657  // indexed into with a simple LD/LWZ immediate offset operand.
1658  bool isLargeFrame = !isInt<16>(FrameSize);
1659
1660  // On targets without red zone, the SP needs to be restored last, so that
1661  // all live contents of the stack frame are upwards of the SP. This means
1662  // that we cannot restore SP just now, since there may be more registers
1663  // to restore from the stack frame (e.g. R31). If the frame size is not
1664  // a simple immediate value, we will need a spare register to hold the
1665  // restored SP. If the frame size is known and small, we can simply adjust
1666  // the offsets of the registers to be restored, and still use SP to restore
1667  // them. In such case, the final update of SP will be to add the frame
1668  // size to it.
1669  // To simplify the code, set RBReg to the base register used to restore
1670  // values from the stack, and set SPAdd to the value that needs to be added
1671  // to the SP at the end. The default values are as if red zone was present.
1672  unsigned RBReg = SPReg;
1673  unsigned SPAdd = 0;
1674
1675  // Check if we can move the stack update instruction up the epilogue
1676  // past the callee saves. This will allow the move to LR instruction
1677  // to be executed before the restores of the callee saves which means
1678  // that the callee saves can hide the latency from the MTLR instrcution.
1679  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1680  if (stackUpdateCanBeMoved(MF)) {
1681    const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1682    for (CalleeSavedInfo CSI : Info) {
1683      int FrIdx = CSI.getFrameIdx();
1684      // If the frame index is not negative the callee saved info belongs to a
1685      // stack object that is not a fixed stack object. We ignore non-fixed
1686      // stack objects because we won't move the update of the stack pointer
1687      // past them.
1688      if (FrIdx >= 0)
1689        continue;
1690
1691      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1692        StackUpdateLoc--;
1693      else {
1694        // Abort the operation as we can't update all CSR restores.
1695        StackUpdateLoc = MBBI;
1696        break;
1697      }
1698    }
1699  }
1700
1701  if (FrameSize) {
1702    // In the prologue, the loaded (or persistent) stack pointer value is
1703    // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1704    // zone add this offset back now.
1705
1706    // If this function contained a fastcc call and GuaranteedTailCallOpt is
1707    // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1708    // call which invalidates the stack pointer value in SP(0). So we use the
1709    // value of R31 in this case.
1710    if (FI->hasFastCall()) {
1711      assert(HasFP && "Expecting a valid frame pointer.");
1712      if (!HasRedZone)
1713        RBReg = FPReg;
1714      if (!isLargeFrame) {
1715        BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1716          .addReg(FPReg).addImm(FrameSize);
1717      } else {
1718        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1719          .addImm(FrameSize >> 16);
1720        BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1721          .addReg(ScratchReg, RegState::Kill)
1722          .addImm(FrameSize & 0xFFFF);
1723        BuildMI(MBB, MBBI, dl, AddInst)
1724          .addReg(RBReg)
1725          .addReg(FPReg)
1726          .addReg(ScratchReg);
1727      }
1728    } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1729      if (HasRedZone) {
1730        BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1731          .addReg(SPReg)
1732          .addImm(FrameSize);
1733      } else {
1734        // Make sure that adding FrameSize will not overflow the max offset
1735        // size.
1736        assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1737               "Local offsets should be negative");
1738        SPAdd = FrameSize;
1739        FPOffset += FrameSize;
1740        BPOffset += FrameSize;
1741        PBPOffset += FrameSize;
1742      }
1743    } else {
1744      // We don't want to use ScratchReg as a base register, because it
1745      // could happen to be R0. Use FP instead, but make sure to preserve it.
1746      if (!HasRedZone) {
1747        // If FP is not saved, copy it to ScratchReg.
1748        if (!HasFP)
1749          BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1750            .addReg(FPReg)
1751            .addReg(FPReg);
1752        RBReg = FPReg;
1753      }
1754      BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1755        .addImm(0)
1756        .addReg(SPReg);
1757    }
1758  }
1759  assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1760  // If there is no red zone, ScratchReg may be needed for holding a useful
1761  // value (although not the base register). Make sure it is not overwritten
1762  // too early.
1763
1764  // If we need to restore both the LR and the CR and we only have one
1765  // available scratch register, we must do them one at a time.
1766  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1767    // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1768    // is live here.
1769    assert(HasRedZone && "Expecting red zone");
1770    BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1771      .addImm(CRSaveOffset)
1772      .addReg(SPReg);
1773    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1774      BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1775        .addReg(TempReg, getKillRegState(i == e-1));
1776  }
1777
1778  // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1779  // LR is stored in the caller's stack frame. ScratchReg will be needed
1780  // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1781  // a base register anyway, because it may happen to be R0.
1782  bool LoadedLR = false;
1783  if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1784    BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1785      .addImm(LROffset+SPAdd)
1786      .addReg(RBReg);
1787    LoadedLR = true;
1788  }
1789
1790  if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1791    assert(RBReg == SPReg && "Should be using SP as a base register");
1792    BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1793      .addImm(CRSaveOffset)
1794      .addReg(RBReg);
1795  }
1796
1797  if (HasFP) {
1798    // If there is red zone, restore FP directly, since SP has already been
1799    // restored. Otherwise, restore the value of FP into ScratchReg.
1800    if (HasRedZone || RBReg == SPReg)
1801      BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1802        .addImm(FPOffset)
1803        .addReg(SPReg);
1804    else
1805      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1806        .addImm(FPOffset)
1807        .addReg(RBReg);
1808  }
1809
1810  if (FI->usesPICBase())
1811    BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1812      .addImm(PBPOffset)
1813      .addReg(RBReg);
1814
1815  if (HasBP)
1816    BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1817      .addImm(BPOffset)
1818      .addReg(RBReg);
1819
1820  // There is nothing more to be loaded from the stack, so now we can
1821  // restore SP: SP = RBReg + SPAdd.
1822  if (RBReg != SPReg || SPAdd != 0) {
1823    assert(!HasRedZone && "This should not happen with red zone");
1824    // If SPAdd is 0, generate a copy.
1825    if (SPAdd == 0)
1826      BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1827        .addReg(RBReg)
1828        .addReg(RBReg);
1829    else
1830      BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1831        .addReg(RBReg)
1832        .addImm(SPAdd);
1833
1834    assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1835    if (RBReg == FPReg)
1836      BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1837        .addReg(ScratchReg)
1838        .addReg(ScratchReg);
1839
1840    // Now load the LR from the caller's stack frame.
1841    if (MustSaveLR && !LoadedLR)
1842      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1843        .addImm(LROffset)
1844        .addReg(SPReg);
1845  }
1846
1847  if (MustSaveCR &&
1848      !(SingleScratchReg && MustSaveLR))
1849    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1850      BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1851        .addReg(TempReg, getKillRegState(i == e-1));
1852
1853  if (MustSaveLR)
1854    BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1855
1856  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1857  // call optimization
1858  if (IsReturnBlock) {
1859    unsigned RetOpcode = MBBI->getOpcode();
1860    if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1861        (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1862        MF.getFunction().getCallingConv() == CallingConv::Fast) {
1863      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1864      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1865
1866      if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1867        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1868          .addReg(SPReg).addImm(CallerAllocatedAmt);
1869      } else {
1870        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1871          .addImm(CallerAllocatedAmt >> 16);
1872        BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1873          .addReg(ScratchReg, RegState::Kill)
1874          .addImm(CallerAllocatedAmt & 0xFFFF);
1875        BuildMI(MBB, MBBI, dl, AddInst)
1876          .addReg(SPReg)
1877          .addReg(FPReg)
1878          .addReg(ScratchReg);
1879      }
1880    } else {
1881      createTailCallBranchInstr(MBB);
1882    }
1883  }
1884}
1885
1886void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1887  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1888
1889  // If we got this far a first terminator should exist.
1890  assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1891
1892  DebugLoc dl = MBBI->getDebugLoc();
1893  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1894
1895  // Create branch instruction for pseudo tail call return instruction.
1896  // The TCRETURNdi variants are direct calls. Valid targets for those are
1897  // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1898  // since we can tail call external functions with PC-Rel (i.e. we don't need
1899  // to worry about different TOC pointers). Some of the external functions will
1900  // be MO_GlobalAddress while others like memcpy for example, are going to
1901  // be MO_ExternalSymbol.
1902  unsigned RetOpcode = MBBI->getOpcode();
1903  if (RetOpcode == PPC::TCRETURNdi) {
1904    MBBI = MBB.getLastNonDebugInstr();
1905    MachineOperand &JumpTarget = MBBI->getOperand(0);
1906    if (JumpTarget.isGlobal())
1907      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1908        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1909    else if (JumpTarget.isSymbol())
1910      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1911        addExternalSymbol(JumpTarget.getSymbolName());
1912    else
1913      llvm_unreachable("Expecting Global or External Symbol");
1914  } else if (RetOpcode == PPC::TCRETURNri) {
1915    MBBI = MBB.getLastNonDebugInstr();
1916    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1917    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1918  } else if (RetOpcode == PPC::TCRETURNai) {
1919    MBBI = MBB.getLastNonDebugInstr();
1920    MachineOperand &JumpTarget = MBBI->getOperand(0);
1921    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1922  } else if (RetOpcode == PPC::TCRETURNdi8) {
1923    MBBI = MBB.getLastNonDebugInstr();
1924    MachineOperand &JumpTarget = MBBI->getOperand(0);
1925    if (JumpTarget.isGlobal())
1926      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1927        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1928    else if (JumpTarget.isSymbol())
1929      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1930        addExternalSymbol(JumpTarget.getSymbolName());
1931    else
1932      llvm_unreachable("Expecting Global or External Symbol");
1933  } else if (RetOpcode == PPC::TCRETURNri8) {
1934    MBBI = MBB.getLastNonDebugInstr();
1935    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1936    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1937  } else if (RetOpcode == PPC::TCRETURNai8) {
1938    MBBI = MBB.getLastNonDebugInstr();
1939    MachineOperand &JumpTarget = MBBI->getOperand(0);
1940    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1941  }
1942}
1943
1944void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1945                                            BitVector &SavedRegs,
1946                                            RegScavenger *RS) const {
1947  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1948
1949  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1950
1951  //  Save and clear the LR state.
1952  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1953  unsigned LR = RegInfo->getRARegister();
1954  FI->setMustSaveLR(MustSaveLR(MF, LR));
1955  SavedRegs.reset(LR);
1956
1957  //  Save R31 if necessary
1958  int FPSI = FI->getFramePointerSaveIndex();
1959  const bool isPPC64 = Subtarget.isPPC64();
1960  MachineFrameInfo &MFI = MF.getFrameInfo();
1961
1962  // If the frame pointer save index hasn't been defined yet.
1963  if (!FPSI && needsFP(MF)) {
1964    // Find out what the fix offset of the frame pointer save area.
1965    int FPOffset = getFramePointerSaveOffset();
1966    // Allocate the frame index for frame pointer save area.
1967    FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1968    // Save the result.
1969    FI->setFramePointerSaveIndex(FPSI);
1970  }
1971
1972  int BPSI = FI->getBasePointerSaveIndex();
1973  if (!BPSI && RegInfo->hasBasePointer(MF)) {
1974    int BPOffset = getBasePointerSaveOffset();
1975    // Allocate the frame index for the base pointer save area.
1976    BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1977    // Save the result.
1978    FI->setBasePointerSaveIndex(BPSI);
1979  }
1980
1981  // Reserve stack space for the PIC Base register (R30).
1982  // Only used in SVR4 32-bit.
1983  if (FI->usesPICBase()) {
1984    int PBPSI = MFI.CreateFixedObject(4, -8, true);
1985    FI->setPICBasePointerSaveIndex(PBPSI);
1986  }
1987
1988  // Make sure we don't explicitly spill r31, because, for example, we have
1989  // some inline asm which explicitly clobbers it, when we otherwise have a
1990  // frame pointer and are using r31's spill slot for the prologue/epilogue
1991  // code. Same goes for the base pointer and the PIC base register.
1992  if (needsFP(MF))
1993    SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1994  if (RegInfo->hasBasePointer(MF))
1995    SavedRegs.reset(RegInfo->getBaseRegister(MF));
1996  if (FI->usesPICBase())
1997    SavedRegs.reset(PPC::R30);
1998
1999  // Reserve stack space to move the linkage area to in case of a tail call.
2000  int TCSPDelta = 0;
2001  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2002      (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2003    MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2004  }
2005
2006  // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2007  // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2008  // object at the offset of the CR-save slot in the linkage area. The actual
2009  // save and restore of the condition register will be created as part of the
2010  // prologue and epilogue insertion, but the FixedStack object is needed to
2011  // keep the CalleSavedInfo valid.
2012  if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2013       SavedRegs.test(PPC::CR4))) {
2014    const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2015    const int64_t SpillOffset =
2016        Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2017    int FrameIdx =
2018        MFI.CreateFixedObject(SpillSize, SpillOffset,
2019                              /* IsImmutable */ true, /* IsAliased */ false);
2020    FI->setCRSpillFrameIndex(FrameIdx);
2021  }
2022}
2023
2024void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2025                                                       RegScavenger *RS) const {
2026  // Get callee saved register information.
2027  MachineFrameInfo &MFI = MF.getFrameInfo();
2028  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2029
2030  // If the function is shrink-wrapped, and if the function has a tail call, the
2031  // tail call might not be in the new RestoreBlock, so real branch instruction
2032  // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2033  // RestoreBlock. So we handle this case here.
2034  if (MFI.getSavePoint() && MFI.hasTailCall()) {
2035    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2036    for (MachineBasicBlock &MBB : MF) {
2037      if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2038        createTailCallBranchInstr(MBB);
2039    }
2040  }
2041
2042  // Early exit if no callee saved registers are modified!
2043  if (CSI.empty() && !needsFP(MF)) {
2044    addScavengingSpillSlot(MF, RS);
2045    return;
2046  }
2047
2048  unsigned MinGPR = PPC::R31;
2049  unsigned MinG8R = PPC::X31;
2050  unsigned MinFPR = PPC::F31;
2051  unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2052
2053  bool HasGPSaveArea = false;
2054  bool HasG8SaveArea = false;
2055  bool HasFPSaveArea = false;
2056  bool HasVRSAVESaveArea = false;
2057  bool HasVRSaveArea = false;
2058
2059  SmallVector<CalleeSavedInfo, 18> GPRegs;
2060  SmallVector<CalleeSavedInfo, 18> G8Regs;
2061  SmallVector<CalleeSavedInfo, 18> FPRegs;
2062  SmallVector<CalleeSavedInfo, 18> VRegs;
2063
2064  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2065    unsigned Reg = CSI[i].getReg();
2066    assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2067            (Reg != PPC::X2 && Reg != PPC::R2)) &&
2068           "Not expecting to try to spill R2 in a function that must save TOC");
2069    if (PPC::GPRCRegClass.contains(Reg)) {
2070      HasGPSaveArea = true;
2071
2072      GPRegs.push_back(CSI[i]);
2073
2074      if (Reg < MinGPR) {
2075        MinGPR = Reg;
2076      }
2077    } else if (PPC::G8RCRegClass.contains(Reg)) {
2078      HasG8SaveArea = true;
2079
2080      G8Regs.push_back(CSI[i]);
2081
2082      if (Reg < MinG8R) {
2083        MinG8R = Reg;
2084      }
2085    } else if (PPC::F8RCRegClass.contains(Reg)) {
2086      HasFPSaveArea = true;
2087
2088      FPRegs.push_back(CSI[i]);
2089
2090      if (Reg < MinFPR) {
2091        MinFPR = Reg;
2092      }
2093    } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2094               PPC::CRRCRegClass.contains(Reg)) {
2095      ; // do nothing, as we already know whether CRs are spilled
2096    } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
2097      HasVRSAVESaveArea = true;
2098    } else if (PPC::VRRCRegClass.contains(Reg) ||
2099               PPC::SPERCRegClass.contains(Reg)) {
2100      // Altivec and SPE are mutually exclusive, but have the same stack
2101      // alignment requirements, so overload the save area for both cases.
2102      HasVRSaveArea = true;
2103
2104      VRegs.push_back(CSI[i]);
2105
2106      if (Reg < MinVR) {
2107        MinVR = Reg;
2108      }
2109    } else {
2110      llvm_unreachable("Unknown RegisterClass!");
2111    }
2112  }
2113
2114  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2115  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2116
2117  int64_t LowerBound = 0;
2118
2119  // Take into account stack space reserved for tail calls.
2120  int TCSPDelta = 0;
2121  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2122      (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2123    LowerBound = TCSPDelta;
2124  }
2125
2126  // The Floating-point register save area is right below the back chain word
2127  // of the previous stack frame.
2128  if (HasFPSaveArea) {
2129    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2130      int FI = FPRegs[i].getFrameIdx();
2131
2132      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2133    }
2134
2135    LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2136  }
2137
2138  // Check whether the frame pointer register is allocated. If so, make sure it
2139  // is spilled to the correct offset.
2140  if (needsFP(MF)) {
2141    int FI = PFI->getFramePointerSaveIndex();
2142    assert(FI && "No Frame Pointer Save Slot!");
2143    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2144    // FP is R31/X31, so no need to update MinGPR/MinG8R.
2145    HasGPSaveArea = true;
2146  }
2147
2148  if (PFI->usesPICBase()) {
2149    int FI = PFI->getPICBasePointerSaveIndex();
2150    assert(FI && "No PIC Base Pointer Save Slot!");
2151    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2152
2153    MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2154    HasGPSaveArea = true;
2155  }
2156
2157  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2158  if (RegInfo->hasBasePointer(MF)) {
2159    int FI = PFI->getBasePointerSaveIndex();
2160    assert(FI && "No Base Pointer Save Slot!");
2161    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2162
2163    Register BP = RegInfo->getBaseRegister(MF);
2164    if (PPC::G8RCRegClass.contains(BP)) {
2165      MinG8R = std::min<unsigned>(MinG8R, BP);
2166      HasG8SaveArea = true;
2167    } else if (PPC::GPRCRegClass.contains(BP)) {
2168      MinGPR = std::min<unsigned>(MinGPR, BP);
2169      HasGPSaveArea = true;
2170    }
2171  }
2172
2173  // General register save area starts right below the Floating-point
2174  // register save area.
2175  if (HasGPSaveArea || HasG8SaveArea) {
2176    // Move general register save area spill slots down, taking into account
2177    // the size of the Floating-point register save area.
2178    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2179      if (!GPRegs[i].isSpilledToReg()) {
2180        int FI = GPRegs[i].getFrameIdx();
2181        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2182      }
2183    }
2184
2185    // Move general register save area spill slots down, taking into account
2186    // the size of the Floating-point register save area.
2187    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2188      if (!G8Regs[i].isSpilledToReg()) {
2189        int FI = G8Regs[i].getFrameIdx();
2190        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2191      }
2192    }
2193
2194    unsigned MinReg =
2195      std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2196                         TRI->getEncodingValue(MinG8R));
2197
2198    const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2199    LowerBound -= (31 - MinReg + 1) * GPRegSize;
2200  }
2201
2202  // For 32-bit only, the CR save area is below the general register
2203  // save area.  For 64-bit SVR4, the CR save area is addressed relative
2204  // to the stack pointer and hence does not need an adjustment here.
2205  // Only CR2 (the first nonvolatile spilled) has an associated frame
2206  // index so that we have a single uniform save area.
2207  if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2208    // Adjust the frame index of the CR spill slot.
2209    for (const auto &CSInfo : CSI) {
2210      if (CSInfo.getReg() == PPC::CR2) {
2211        int FI = CSInfo.getFrameIdx();
2212        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2213        break;
2214      }
2215    }
2216
2217    LowerBound -= 4; // The CR save area is always 4 bytes long.
2218  }
2219
2220  if (HasVRSAVESaveArea) {
2221    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2222    //             which have the VRSAVE register class?
2223    // Adjust the frame index of the VRSAVE spill slot.
2224    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2225      unsigned Reg = CSI[i].getReg();
2226
2227      if (PPC::VRSAVERCRegClass.contains(Reg)) {
2228        int FI = CSI[i].getFrameIdx();
2229
2230        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2231      }
2232    }
2233
2234    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2235  }
2236
2237  // Both Altivec and SPE have the same alignment and padding requirements
2238  // within the stack frame.
2239  if (HasVRSaveArea) {
2240    // Insert alignment padding, we need 16-byte alignment. Note: for positive
2241    // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2242    // we are using negative number here (the stack grows downward). We should
2243    // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2244    // is the alignment size ( n = 16 here) and y is the size after aligning.
2245    assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2246    LowerBound &= ~(15);
2247
2248    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2249      int FI = VRegs[i].getFrameIdx();
2250
2251      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2252    }
2253  }
2254
2255  addScavengingSpillSlot(MF, RS);
2256}
2257
2258void
2259PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2260                                         RegScavenger *RS) const {
2261  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2262  // a large stack, which will require scavenging a register to materialize a
2263  // large offset.
2264
2265  // We need to have a scavenger spill slot for spills if the frame size is
2266  // large. In case there is no free register for large-offset addressing,
2267  // this slot is used for the necessary emergency spill. Also, we need the
2268  // slot for dynamic stack allocations.
2269
2270  // The scavenger might be invoked if the frame offset does not fit into
2271  // the 16-bit immediate. We don't know the complete frame size here
2272  // because we've not yet computed callee-saved register spills or the
2273  // needed alignment padding.
2274  unsigned StackSize = determineFrameLayout(MF, true);
2275  MachineFrameInfo &MFI = MF.getFrameInfo();
2276  if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2277      hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2278    const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2279    const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2280    const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2281    const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2282    unsigned Size = TRI.getSpillSize(RC);
2283    Align Alignment = TRI.getSpillAlign(RC);
2284    RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2285
2286    // Might we have over-aligned allocas?
2287    bool HasAlVars =
2288        MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2289
2290    // These kinds of spills might need two registers.
2291    if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2292      RS->addScavengingFrameIndex(
2293          MFI.CreateStackObject(Size, Alignment, false));
2294  }
2295}
2296
2297// This function checks if a callee saved gpr can be spilled to a volatile
2298// vector register. This occurs for leaf functions when the option
2299// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2300// which were not spilled to vectors, return false so the target independent
2301// code can handle them by assigning a FrameIdx to a stack slot.
2302bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2303    MachineFunction &MF, const TargetRegisterInfo *TRI,
2304    std::vector<CalleeSavedInfo> &CSI) const {
2305
2306  if (CSI.empty())
2307    return true; // Early exit if no callee saved registers are modified!
2308
2309  // Early exit if cannot spill gprs to volatile vector registers.
2310  MachineFrameInfo &MFI = MF.getFrameInfo();
2311  if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2312    return false;
2313
2314  // Build a BitVector of VSRs that can be used for spilling GPRs.
2315  BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2316  BitVector BVCalleeSaved(TRI->getNumRegs());
2317  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2318  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2319  for (unsigned i = 0; CSRegs[i]; ++i)
2320    BVCalleeSaved.set(CSRegs[i]);
2321
2322  for (unsigned Reg : BVAllocatable.set_bits()) {
2323    // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2324    // used in the function.
2325    if (BVCalleeSaved[Reg] ||
2326        (!PPC::F8RCRegClass.contains(Reg) &&
2327         !PPC::VFRCRegClass.contains(Reg)) ||
2328        (MF.getRegInfo().isPhysRegUsed(Reg)))
2329      BVAllocatable.reset(Reg);
2330  }
2331
2332  bool AllSpilledToReg = true;
2333  for (auto &CS : CSI) {
2334    if (BVAllocatable.none())
2335      return false;
2336
2337    unsigned Reg = CS.getReg();
2338    if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2339      AllSpilledToReg = false;
2340      continue;
2341    }
2342
2343    unsigned VolatileVFReg = BVAllocatable.find_first();
2344    if (VolatileVFReg < BVAllocatable.size()) {
2345      CS.setDstReg(VolatileVFReg);
2346      BVAllocatable.reset(VolatileVFReg);
2347    } else {
2348      AllSpilledToReg = false;
2349    }
2350  }
2351  return AllSpilledToReg;
2352}
2353
2354bool PPCFrameLowering::spillCalleeSavedRegisters(
2355    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2356    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2357
2358  MachineFunction *MF = MBB.getParent();
2359  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2360  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2361  bool MustSaveTOC = FI->mustSaveTOC();
2362  DebugLoc DL;
2363  bool CRSpilled = false;
2364  MachineInstrBuilder CRMIB;
2365
2366  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2367    unsigned Reg = CSI[i].getReg();
2368    // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2369    if (Reg == PPC::VRSAVE)
2370      continue;
2371
2372    // CR2 through CR4 are the nonvolatile CR fields.
2373    bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2374
2375    // Add the callee-saved register as live-in; it's killed at the spill.
2376    // Do not do this for callee-saved registers that are live-in to the
2377    // function because they will already be marked live-in and this will be
2378    // adding it for a second time. It is an error to add the same register
2379    // to the set more than once.
2380    const MachineRegisterInfo &MRI = MF->getRegInfo();
2381    bool IsLiveIn = MRI.isLiveIn(Reg);
2382    if (!IsLiveIn)
2383       MBB.addLiveIn(Reg);
2384
2385    if (CRSpilled && IsCRField) {
2386      CRMIB.addReg(Reg, RegState::ImplicitKill);
2387      continue;
2388    }
2389
2390    // The actual spill will happen in the prologue.
2391    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2392      continue;
2393
2394    // Insert the spill to the stack frame.
2395    if (IsCRField) {
2396      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2397      if (!Subtarget.is32BitELFABI()) {
2398        // The actual spill will happen at the start of the prologue.
2399        FuncInfo->addMustSaveCR(Reg);
2400      } else {
2401        CRSpilled = true;
2402        FuncInfo->setSpillsCR();
2403
2404        // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2405        // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2406        CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2407                  .addReg(Reg, RegState::ImplicitKill);
2408
2409        MBB.insert(MI, CRMIB);
2410        MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2411                                         .addReg(PPC::R12,
2412                                                 getKillRegState(true)),
2413                                         CSI[i].getFrameIdx()));
2414      }
2415    } else {
2416      if (CSI[i].isSpilledToReg()) {
2417        NumPESpillVSR++;
2418        BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2419          .addReg(Reg, getKillRegState(true));
2420      } else {
2421        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2422        // Use !IsLiveIn for the kill flag.
2423        // We do not want to kill registers that are live in this function
2424        // before their use because they will become undefined registers.
2425        // Functions without NoUnwind need to preserve the order of elements in
2426        // saved vector registers.
2427        if (Subtarget.needsSwapsForVSXMemOps() &&
2428            !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2429          TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2430                                       CSI[i].getFrameIdx(), RC, TRI);
2431        else
2432          TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2433                                  RC, TRI);
2434      }
2435    }
2436  }
2437  return true;
2438}
2439
2440static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2441                       bool CR4Spilled, MachineBasicBlock &MBB,
2442                       MachineBasicBlock::iterator MI,
2443                       ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2444
2445  MachineFunction *MF = MBB.getParent();
2446  const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2447  DebugLoc DL;
2448  unsigned MoveReg = PPC::R12;
2449
2450  // 32-bit:  FP-relative
2451  MBB.insert(MI,
2452             addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2453                               CSI[CSIIndex].getFrameIdx()));
2454
2455  unsigned RestoreOp = PPC::MTOCRF;
2456  if (CR2Spilled)
2457    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2458               .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2459
2460  if (CR3Spilled)
2461    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2462               .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2463
2464  if (CR4Spilled)
2465    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2466               .addReg(MoveReg, getKillRegState(true)));
2467}
2468
2469MachineBasicBlock::iterator PPCFrameLowering::
2470eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2471                              MachineBasicBlock::iterator I) const {
2472  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2473  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2474      I->getOpcode() == PPC::ADJCALLSTACKUP) {
2475    // Add (actually subtract) back the amount the callee popped on return.
2476    if (int CalleeAmt =  I->getOperand(1).getImm()) {
2477      bool is64Bit = Subtarget.isPPC64();
2478      CalleeAmt *= -1;
2479      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2480      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2481      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2482      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2483      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2484      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2485      const DebugLoc &dl = I->getDebugLoc();
2486
2487      if (isInt<16>(CalleeAmt)) {
2488        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2489          .addReg(StackReg, RegState::Kill)
2490          .addImm(CalleeAmt);
2491      } else {
2492        MachineBasicBlock::iterator MBBI = I;
2493        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2494          .addImm(CalleeAmt >> 16);
2495        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2496          .addReg(TmpReg, RegState::Kill)
2497          .addImm(CalleeAmt & 0xFFFF);
2498        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2499          .addReg(StackReg, RegState::Kill)
2500          .addReg(TmpReg);
2501      }
2502    }
2503  }
2504  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2505  return MBB.erase(I);
2506}
2507
2508static bool isCalleeSavedCR(unsigned Reg) {
2509  return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2510}
2511
2512bool PPCFrameLowering::restoreCalleeSavedRegisters(
2513    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2514    MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2515  MachineFunction *MF = MBB.getParent();
2516  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2517  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2518  bool MustSaveTOC = FI->mustSaveTOC();
2519  bool CR2Spilled = false;
2520  bool CR3Spilled = false;
2521  bool CR4Spilled = false;
2522  unsigned CSIIndex = 0;
2523
2524  // Initialize insertion-point logic; we will be restoring in reverse
2525  // order of spill.
2526  MachineBasicBlock::iterator I = MI, BeforeI = I;
2527  bool AtStart = I == MBB.begin();
2528
2529  if (!AtStart)
2530    --BeforeI;
2531
2532  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2533    unsigned Reg = CSI[i].getReg();
2534
2535    // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2536    if (Reg == PPC::VRSAVE)
2537      continue;
2538
2539    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2540      continue;
2541
2542    // Restore of callee saved condition register field is handled during
2543    // epilogue insertion.
2544    if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2545      continue;
2546
2547    if (Reg == PPC::CR2) {
2548      CR2Spilled = true;
2549      // The spill slot is associated only with CR2, which is the
2550      // first nonvolatile spilled.  Save it here.
2551      CSIIndex = i;
2552      continue;
2553    } else if (Reg == PPC::CR3) {
2554      CR3Spilled = true;
2555      continue;
2556    } else if (Reg == PPC::CR4) {
2557      CR4Spilled = true;
2558      continue;
2559    } else {
2560      // On 32-bit ELF when we first encounter a non-CR register after seeing at
2561      // least one CR register, restore all spilled CRs together.
2562      if (CR2Spilled || CR3Spilled || CR4Spilled) {
2563        bool is31 = needsFP(*MF);
2564        restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2565                   CSIIndex);
2566        CR2Spilled = CR3Spilled = CR4Spilled = false;
2567      }
2568
2569      if (CSI[i].isSpilledToReg()) {
2570        DebugLoc DL;
2571        NumPEReloadVSR++;
2572        BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2573            .addReg(CSI[i].getDstReg(), getKillRegState(true));
2574      } else {
2575       // Default behavior for non-CR saves.
2576        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2577
2578        // Functions without NoUnwind need to preserve the order of elements in
2579        // saved vector registers.
2580        if (Subtarget.needsSwapsForVSXMemOps() &&
2581            !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2582          TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2583                                        TRI);
2584        else
2585          TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2586
2587        assert(I != MBB.begin() &&
2588               "loadRegFromStackSlot didn't insert any code!");
2589      }
2590    }
2591
2592    // Insert in reverse order.
2593    if (AtStart)
2594      I = MBB.begin();
2595    else {
2596      I = BeforeI;
2597      ++I;
2598    }
2599  }
2600
2601  // If we haven't yet spilled the CRs, do so now.
2602  if (CR2Spilled || CR3Spilled || CR4Spilled) {
2603    assert(Subtarget.is32BitELFABI() &&
2604           "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2605    bool is31 = needsFP(*MF);
2606    restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2607  }
2608
2609  return true;
2610}
2611
2612unsigned PPCFrameLowering::getTOCSaveOffset() const {
2613  return TOCSaveOffset;
2614}
2615
2616unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2617  return FramePointerSaveOffset;
2618}
2619
2620unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2621  return BasePointerSaveOffset;
2622}
2623
2624bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2625  if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2626    return false;
2627  return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2628          MF.getSubtarget<PPCSubtarget>().isPPC64());
2629}
2630