1//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the PPC implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCFrameLowering.h"
14#include "PPCInstrBuilder.h"
15#include "PPCInstrInfo.h"
16#include "PPCMachineFunctionInfo.h"
17#include "PPCSubtarget.h"
18#include "PPCTargetMachine.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineModuleInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/RegisterScavenging.h"
26#include "llvm/IR/Function.h"
27#include "llvm/Target/TargetOptions.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "framelowering"
32STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34
35static cl::opt<bool>
36EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                     cl::desc("Enable spills in prologue to vector registers."),
38                     cl::init(false), cl::Hidden);
39
40/// VRRegNo - Map from a numbered VR register to its enum value.
41///
42static const MCPhysReg VRRegNo[] = {
43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47};
48
49static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50  if (STI.isDarwinABI() || STI.isAIXABI())
51    return STI.isPPC64() ? 16 : 8;
52  // SVR4 ABI:
53  return STI.isPPC64() ? 16 : 4;
54}
55
56static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57  if (STI.isAIXABI())
58    return STI.isPPC64() ? 40 : 20;
59  return STI.isELFv2ABI() ? 24 : 40;
60}
61
62static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63  // For the Darwin ABI:
64  // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
65  // for saving the frame pointer (if needed.)  While the published ABI has
66  // not used this slot since at least MacOSX 10.2, there is older code
67  // around that does use it, and that needs to continue to work.
68  if (STI.isDarwinABI())
69    return STI.isPPC64() ? -8U : -4U;
70
71  // SVR4 ABI: First slot in the general register save area.
72  return STI.isPPC64() ? -8U : -4U;
73}
74
75static unsigned computeLinkageSize(const PPCSubtarget &STI) {
76  if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
77    return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
78
79  // 32-bit SVR4 ABI:
80  return 8;
81}
82
83static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
84  if (STI.isDarwinABI())
85    return STI.isPPC64() ? -16U : -8U;
86
87  // SVR4 ABI: First slot in the general register save area.
88  return STI.isPPC64()
89             ? -16U
90             : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
91}
92
93static unsigned computeCRSaveOffset() {
94  // The condition register save offset needs to be updated for AIX PPC32.
95  return 8;
96}
97
98PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
99    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
100                          STI.getPlatformStackAlignment(), 0),
101      Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
102      TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
103      FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
104      LinkageSize(computeLinkageSize(Subtarget)),
105      BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
106      CRSaveOffset(computeCRSaveOffset()) {}
107
108// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
109const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
110    unsigned &NumEntries) const {
111  if (Subtarget.isDarwinABI()) {
112    NumEntries = 1;
113    if (Subtarget.isPPC64()) {
114      static const SpillSlot darwin64Offsets = {PPC::X31, -8};
115      return &darwin64Offsets;
116    } else {
117      static const SpillSlot darwinOffsets = {PPC::R31, -4};
118      return &darwinOffsets;
119    }
120  }
121
122  // Early exit if not using the SVR4 ABI.
123  if (!Subtarget.isSVR4ABI()) {
124    NumEntries = 0;
125    return nullptr;
126  }
127
128  // Note that the offsets here overlap, but this is fixed up in
129  // processFunctionBeforeFrameFinalized.
130
131  static const SpillSlot Offsets[] = {
132      // Floating-point register save area offsets.
133      {PPC::F31, -8},
134      {PPC::F30, -16},
135      {PPC::F29, -24},
136      {PPC::F28, -32},
137      {PPC::F27, -40},
138      {PPC::F26, -48},
139      {PPC::F25, -56},
140      {PPC::F24, -64},
141      {PPC::F23, -72},
142      {PPC::F22, -80},
143      {PPC::F21, -88},
144      {PPC::F20, -96},
145      {PPC::F19, -104},
146      {PPC::F18, -112},
147      {PPC::F17, -120},
148      {PPC::F16, -128},
149      {PPC::F15, -136},
150      {PPC::F14, -144},
151
152      // General register save area offsets.
153      {PPC::R31, -4},
154      {PPC::R30, -8},
155      {PPC::R29, -12},
156      {PPC::R28, -16},
157      {PPC::R27, -20},
158      {PPC::R26, -24},
159      {PPC::R25, -28},
160      {PPC::R24, -32},
161      {PPC::R23, -36},
162      {PPC::R22, -40},
163      {PPC::R21, -44},
164      {PPC::R20, -48},
165      {PPC::R19, -52},
166      {PPC::R18, -56},
167      {PPC::R17, -60},
168      {PPC::R16, -64},
169      {PPC::R15, -68},
170      {PPC::R14, -72},
171
172      // CR save area offset.  We map each of the nonvolatile CR fields
173      // to the slot for CR2, which is the first of the nonvolatile CR
174      // fields to be assigned, so that we only allocate one save slot.
175      // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
176      {PPC::CR2, -4},
177
178      // VRSAVE save area offset.
179      {PPC::VRSAVE, -4},
180
181      // Vector register save area
182      {PPC::V31, -16},
183      {PPC::V30, -32},
184      {PPC::V29, -48},
185      {PPC::V28, -64},
186      {PPC::V27, -80},
187      {PPC::V26, -96},
188      {PPC::V25, -112},
189      {PPC::V24, -128},
190      {PPC::V23, -144},
191      {PPC::V22, -160},
192      {PPC::V21, -176},
193      {PPC::V20, -192},
194
195      // SPE register save area (overlaps Vector save area).
196      {PPC::S31, -8},
197      {PPC::S30, -16},
198      {PPC::S29, -24},
199      {PPC::S28, -32},
200      {PPC::S27, -40},
201      {PPC::S26, -48},
202      {PPC::S25, -56},
203      {PPC::S24, -64},
204      {PPC::S23, -72},
205      {PPC::S22, -80},
206      {PPC::S21, -88},
207      {PPC::S20, -96},
208      {PPC::S19, -104},
209      {PPC::S18, -112},
210      {PPC::S17, -120},
211      {PPC::S16, -128},
212      {PPC::S15, -136},
213      {PPC::S14, -144}};
214
215  static const SpillSlot Offsets64[] = {
216      // Floating-point register save area offsets.
217      {PPC::F31, -8},
218      {PPC::F30, -16},
219      {PPC::F29, -24},
220      {PPC::F28, -32},
221      {PPC::F27, -40},
222      {PPC::F26, -48},
223      {PPC::F25, -56},
224      {PPC::F24, -64},
225      {PPC::F23, -72},
226      {PPC::F22, -80},
227      {PPC::F21, -88},
228      {PPC::F20, -96},
229      {PPC::F19, -104},
230      {PPC::F18, -112},
231      {PPC::F17, -120},
232      {PPC::F16, -128},
233      {PPC::F15, -136},
234      {PPC::F14, -144},
235
236      // General register save area offsets.
237      {PPC::X31, -8},
238      {PPC::X30, -16},
239      {PPC::X29, -24},
240      {PPC::X28, -32},
241      {PPC::X27, -40},
242      {PPC::X26, -48},
243      {PPC::X25, -56},
244      {PPC::X24, -64},
245      {PPC::X23, -72},
246      {PPC::X22, -80},
247      {PPC::X21, -88},
248      {PPC::X20, -96},
249      {PPC::X19, -104},
250      {PPC::X18, -112},
251      {PPC::X17, -120},
252      {PPC::X16, -128},
253      {PPC::X15, -136},
254      {PPC::X14, -144},
255
256      // VRSAVE save area offset.
257      {PPC::VRSAVE, -4},
258
259      // Vector register save area
260      {PPC::V31, -16},
261      {PPC::V30, -32},
262      {PPC::V29, -48},
263      {PPC::V28, -64},
264      {PPC::V27, -80},
265      {PPC::V26, -96},
266      {PPC::V25, -112},
267      {PPC::V24, -128},
268      {PPC::V23, -144},
269      {PPC::V22, -160},
270      {PPC::V21, -176},
271      {PPC::V20, -192}};
272
273  if (Subtarget.isPPC64()) {
274    NumEntries = array_lengthof(Offsets64);
275
276    return Offsets64;
277  } else {
278    NumEntries = array_lengthof(Offsets);
279
280    return Offsets;
281  }
282}
283
284/// RemoveVRSaveCode - We have found that this function does not need any code
285/// to manipulate the VRSAVE register, even though it uses vector registers.
286/// This can happen when the only registers used are known to be live in or out
287/// of the function.  Remove all of the VRSAVE related code from the function.
288/// FIXME: The removal of the code results in a compile failure at -O0 when the
289/// function contains a function call, as the GPR containing original VRSAVE
290/// contents is spilled and reloaded around the call.  Without the prolog code,
291/// the spill instruction refers to an undefined register.  This code needs
292/// to account for all uses of that GPR.
293static void RemoveVRSaveCode(MachineInstr &MI) {
294  MachineBasicBlock *Entry = MI.getParent();
295  MachineFunction *MF = Entry->getParent();
296
297  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
298  MachineBasicBlock::iterator MBBI = MI;
299  ++MBBI;
300  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
301  MBBI->eraseFromParent();
302
303  bool RemovedAllMTVRSAVEs = true;
304  // See if we can find and remove the MTVRSAVE instruction from all of the
305  // epilog blocks.
306  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
307    // If last instruction is a return instruction, add an epilogue
308    if (I->isReturnBlock()) {
309      bool FoundIt = false;
310      for (MBBI = I->end(); MBBI != I->begin(); ) {
311        --MBBI;
312        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
313          MBBI->eraseFromParent();  // remove it.
314          FoundIt = true;
315          break;
316        }
317      }
318      RemovedAllMTVRSAVEs &= FoundIt;
319    }
320  }
321
322  // If we found and removed all MTVRSAVE instructions, remove the read of
323  // VRSAVE as well.
324  if (RemovedAllMTVRSAVEs) {
325    MBBI = MI;
326    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
327    --MBBI;
328    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
329    MBBI->eraseFromParent();
330  }
331
332  // Finally, nuke the UPDATE_VRSAVE.
333  MI.eraseFromParent();
334}
335
336// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
337// instruction selector.  Based on the vector registers that have been used,
338// transform this into the appropriate ORI instruction.
339static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
340  MachineFunction *MF = MI.getParent()->getParent();
341  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
342  DebugLoc dl = MI.getDebugLoc();
343
344  const MachineRegisterInfo &MRI = MF->getRegInfo();
345  unsigned UsedRegMask = 0;
346  for (unsigned i = 0; i != 32; ++i)
347    if (MRI.isPhysRegModified(VRRegNo[i]))
348      UsedRegMask |= 1 << (31-i);
349
350  // Live in and live out values already must be in the mask, so don't bother
351  // marking them.
352  for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
353    unsigned RegNo = TRI->getEncodingValue(LI.first);
354    if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
355      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
356  }
357
358  // Live out registers appear as use operands on return instructions.
359  for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
360       UsedRegMask != 0 && BI != BE; ++BI) {
361    const MachineBasicBlock &MBB = *BI;
362    if (!MBB.isReturnBlock())
363      continue;
364    const MachineInstr &Ret = MBB.back();
365    for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
366      const MachineOperand &MO = Ret.getOperand(I);
367      if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
368        continue;
369      unsigned RegNo = TRI->getEncodingValue(MO.getReg());
370      UsedRegMask &= ~(1 << (31-RegNo));
371    }
372  }
373
374  // If no registers are used, turn this into a copy.
375  if (UsedRegMask == 0) {
376    // Remove all VRSAVE code.
377    RemoveVRSaveCode(MI);
378    return;
379  }
380
381  Register SrcReg = MI.getOperand(1).getReg();
382  Register DstReg = MI.getOperand(0).getReg();
383
384  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
385    if (DstReg != SrcReg)
386      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
387          .addReg(SrcReg)
388          .addImm(UsedRegMask);
389    else
390      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
391          .addReg(SrcReg, RegState::Kill)
392          .addImm(UsedRegMask);
393  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
394    if (DstReg != SrcReg)
395      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
396          .addReg(SrcReg)
397          .addImm(UsedRegMask >> 16);
398    else
399      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
400          .addReg(SrcReg, RegState::Kill)
401          .addImm(UsedRegMask >> 16);
402  } else {
403    if (DstReg != SrcReg)
404      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
405          .addReg(SrcReg)
406          .addImm(UsedRegMask >> 16);
407    else
408      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
409          .addReg(SrcReg, RegState::Kill)
410          .addImm(UsedRegMask >> 16);
411
412    BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
413        .addReg(DstReg, RegState::Kill)
414        .addImm(UsedRegMask & 0xFFFF);
415  }
416
417  // Remove the old UPDATE_VRSAVE instruction.
418  MI.eraseFromParent();
419}
420
421static bool spillsCR(const MachineFunction &MF) {
422  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
423  return FuncInfo->isCRSpilled();
424}
425
426static bool spillsVRSAVE(const MachineFunction &MF) {
427  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
428  return FuncInfo->isVRSAVESpilled();
429}
430
431static bool hasSpills(const MachineFunction &MF) {
432  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
433  return FuncInfo->hasSpills();
434}
435
436static bool hasNonRISpills(const MachineFunction &MF) {
437  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
438  return FuncInfo->hasNonRISpills();
439}
440
441/// MustSaveLR - Return true if this function requires that we save the LR
442/// register onto the stack in the prolog and restore it in the epilog of the
443/// function.
444static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
445  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
446
447  // We need a save/restore of LR if there is any def of LR (which is
448  // defined by calls, including the PIC setup sequence), or if there is
449  // some use of the LR stack slot (e.g. for builtin_return_address).
450  // (LR comes in 32 and 64 bit versions.)
451  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
452  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
453}
454
455/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
456/// call frame size. Update the MachineFunction object with the stack size.
457unsigned
458PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
459                                                bool UseEstimate) const {
460  unsigned NewMaxCallFrameSize = 0;
461  unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
462                                            &NewMaxCallFrameSize);
463  MF.getFrameInfo().setStackSize(FrameSize);
464  MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
465  return FrameSize;
466}
467
468/// determineFrameLayout - Determine the size of the frame and maximum call
469/// frame size.
470unsigned
471PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
472                                       bool UseEstimate,
473                                       unsigned *NewMaxCallFrameSize) const {
474  const MachineFrameInfo &MFI = MF.getFrameInfo();
475  const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
476
477  // Get the number of bytes to allocate from the FrameInfo
478  unsigned FrameSize =
479    UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
480
481  // Get stack alignments. The frame must be aligned to the greatest of these:
482  unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
483  unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
484  unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
485
486  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
487
488  unsigned LR = RegInfo->getRARegister();
489  bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
490  bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
491                       !MFI.adjustsStack() &&       // No calls.
492                       !MustSaveLR(MF, LR) &&       // No need to save LR.
493                       !FI->mustSaveTOC() &&        // No need to save TOC.
494                       !RegInfo->hasBasePointer(MF); // No special alignment.
495
496  // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
497  // code if all local vars are reg-allocated.
498  bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
499
500  // Check whether we can skip adjusting the stack pointer (by using red zone)
501  if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
502    // No need for frame
503    return 0;
504  }
505
506  // Get the maximum call frame size of all the calls.
507  unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
508
509  // Maximum call frame needs to be at least big enough for linkage area.
510  unsigned minCallFrameSize = getLinkageSize();
511  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
512
513  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
514  // that allocations will be aligned.
515  if (MFI.hasVarSizedObjects())
516    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
517
518  // Update the new max call frame size if the caller passes in a valid pointer.
519  if (NewMaxCallFrameSize)
520    *NewMaxCallFrameSize = maxCallFrameSize;
521
522  // Include call frame size in total.
523  FrameSize += maxCallFrameSize;
524
525  // Make sure the frame is aligned.
526  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
527
528  return FrameSize;
529}
530
531// hasFP - Return true if the specified function actually has a dedicated frame
532// pointer register.
533bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
534  const MachineFrameInfo &MFI = MF.getFrameInfo();
535  // FIXME: This is pretty much broken by design: hasFP() might be called really
536  // early, before the stack layout was calculated and thus hasFP() might return
537  // true or false here depending on the time of call.
538  return (MFI.getStackSize()) && needsFP(MF);
539}
540
541// needsFP - Return true if the specified function should have a dedicated frame
542// pointer register.  This is true if the function has variable sized allocas or
543// if frame pointer elimination is disabled.
544bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
545  const MachineFrameInfo &MFI = MF.getFrameInfo();
546
547  // Naked functions have no stack frame pushed, so we don't have a frame
548  // pointer.
549  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
550    return false;
551
552  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
553    MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
554    (MF.getTarget().Options.GuaranteedTailCallOpt &&
555     MF.getInfo<PPCFunctionInfo>()->hasFastCall());
556}
557
558void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
559  bool is31 = needsFP(MF);
560  unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
561  unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
562
563  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
564  bool HasBP = RegInfo->hasBasePointer(MF);
565  unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
566  unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
567
568  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
569       BI != BE; ++BI)
570    for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
571      --MBBI;
572      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
573        MachineOperand &MO = MBBI->getOperand(I);
574        if (!MO.isReg())
575          continue;
576
577        switch (MO.getReg()) {
578        case PPC::FP:
579          MO.setReg(FPReg);
580          break;
581        case PPC::FP8:
582          MO.setReg(FP8Reg);
583          break;
584        case PPC::BP:
585          MO.setReg(BPReg);
586          break;
587        case PPC::BP8:
588          MO.setReg(BP8Reg);
589          break;
590
591        }
592      }
593    }
594}
595
596/*  This function will do the following:
597    - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
598      respectively (defaults recommended by the ABI) and return true
599    - If MBB is not an entry block, initialize the register scavenger and look
600      for available registers.
601    - If the defaults (R0/R12) are available, return true
602    - If TwoUniqueRegsRequired is set to true, it looks for two unique
603      registers. Otherwise, look for a single available register.
604      - If the required registers are found, set SR1 and SR2 and return true.
605      - If the required registers are not found, set SR2 or both SR1 and SR2 to
606        PPC::NoRegister and return false.
607
608    Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
609    is not set, this function will attempt to find two different registers, but
610    still return true if only one register is available (and set SR1 == SR2).
611*/
612bool
613PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
614                                      bool UseAtEnd,
615                                      bool TwoUniqueRegsRequired,
616                                      unsigned *SR1,
617                                      unsigned *SR2) const {
618  RegScavenger RS;
619  unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
620  unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
621
622  // Set the defaults for the two scratch registers.
623  if (SR1)
624    *SR1 = R0;
625
626  if (SR2) {
627    assert (SR1 && "Asking for the second scratch register but not the first?");
628    *SR2 = R12;
629  }
630
631  // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
632  if ((UseAtEnd && MBB->isReturnBlock()) ||
633      (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
634    return true;
635
636  RS.enterBasicBlock(*MBB);
637
638  if (UseAtEnd && !MBB->empty()) {
639    // The scratch register will be used at the end of the block, so must
640    // consider all registers used within the block
641
642    MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
643    // If no terminator, back iterator up to previous instruction.
644    if (MBBI == MBB->end())
645      MBBI = std::prev(MBBI);
646
647    if (MBBI != MBB->begin())
648      RS.forward(MBBI);
649  }
650
651  // If the two registers are available, we're all good.
652  // Note that we only return here if both R0 and R12 are available because
653  // although the function may not require two unique registers, it may benefit
654  // from having two so we should try to provide them.
655  if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
656    return true;
657
658  // Get the list of callee-saved registers for the target.
659  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
660  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
661
662  // Get all the available registers in the block.
663  BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
664                                     &PPC::GPRCRegClass);
665
666  // We shouldn't use callee-saved registers as scratch registers as they may be
667  // available when looking for a candidate block for shrink wrapping but not
668  // available when the actual prologue/epilogue is being emitted because they
669  // were added as live-in to the prologue block by PrologueEpilogueInserter.
670  for (int i = 0; CSRegs[i]; ++i)
671    BV.reset(CSRegs[i]);
672
673  // Set the first scratch register to the first available one.
674  if (SR1) {
675    int FirstScratchReg = BV.find_first();
676    *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
677  }
678
679  // If there is another one available, set the second scratch register to that.
680  // Otherwise, set it to either PPC::NoRegister if this function requires two
681  // or to whatever SR1 is set to if this function doesn't require two.
682  if (SR2) {
683    int SecondScratchReg = BV.find_next(*SR1);
684    if (SecondScratchReg != -1)
685      *SR2 = SecondScratchReg;
686    else
687      *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
688  }
689
690  // Now that we've done our best to provide both registers, double check
691  // whether we were unable to provide enough.
692  if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
693    return false;
694
695  return true;
696}
697
698// We need a scratch register for spilling LR and for spilling CR. By default,
699// we use two scratch registers to hide latency. However, if only one scratch
700// register is available, we can adjust for that by not overlapping the spill
701// code. However, if we need to realign the stack (i.e. have a base pointer)
702// and the stack frame is large, we need two scratch registers.
703bool
704PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
705  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
706  MachineFunction &MF = *(MBB->getParent());
707  bool HasBP = RegInfo->hasBasePointer(MF);
708  unsigned FrameSize = determineFrameLayout(MF);
709  int NegFrameSize = -FrameSize;
710  bool IsLargeFrame = !isInt<16>(NegFrameSize);
711  MachineFrameInfo &MFI = MF.getFrameInfo();
712  unsigned MaxAlign = MFI.getMaxAlignment();
713  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
714
715  return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
716}
717
718bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
719  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
720
721  return findScratchRegister(TmpMBB, false,
722                             twoUniqueScratchRegsRequired(TmpMBB));
723}
724
725bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
726  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
727
728  return findScratchRegister(TmpMBB, true);
729}
730
731bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
732  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
733  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
734
735  // Abort if there is no register info or function info.
736  if (!RegInfo || !FI)
737    return false;
738
739  // Only move the stack update on ELFv2 ABI and PPC64.
740  if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
741    return false;
742
743  // Check the frame size first and return false if it does not fit the
744  // requirements.
745  // We need a non-zero frame size as well as a frame that will fit in the red
746  // zone. This is because by moving the stack pointer update we are now storing
747  // to the red zone until the stack pointer is updated. If we get an interrupt
748  // inside the prologue but before the stack update we now have a number of
749  // stores to the red zone and those stores must all fit.
750  MachineFrameInfo &MFI = MF.getFrameInfo();
751  unsigned FrameSize = MFI.getStackSize();
752  if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
753    return false;
754
755  // Frame pointers and base pointers complicate matters so don't do anything
756  // if we have them. For example having a frame pointer will sometimes require
757  // a copy of r1 into r31 and that makes keeping track of updates to r1 more
758  // difficult.
759  if (hasFP(MF) || RegInfo->hasBasePointer(MF))
760    return false;
761
762  // Calls to fast_cc functions use different rules for passing parameters on
763  // the stack from the ABI and using PIC base in the function imposes
764  // similar restrictions to using the base pointer. It is not generally safe
765  // to move the stack pointer update in these situations.
766  if (FI->hasFastCall() || FI->usesPICBase())
767    return false;
768
769  // Finally we can move the stack update if we do not require register
770  // scavenging. Register scavenging can introduce more spills and so
771  // may make the frame size larger than we have computed.
772  return !RegInfo->requiresFrameIndexScavenging(MF);
773}
774
775void PPCFrameLowering::emitPrologue(MachineFunction &MF,
776                                    MachineBasicBlock &MBB) const {
777  MachineBasicBlock::iterator MBBI = MBB.begin();
778  MachineFrameInfo &MFI = MF.getFrameInfo();
779  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
780  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
781
782  MachineModuleInfo &MMI = MF.getMMI();
783  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
784  DebugLoc dl;
785  bool needsCFI = MF.needsFrameMoves();
786
787  // Get processor type.
788  bool isPPC64 = Subtarget.isPPC64();
789  // Get the ABI.
790  bool isSVR4ABI = Subtarget.isSVR4ABI();
791  bool isAIXABI = Subtarget.isAIXABI();
792  bool isELFv2ABI = Subtarget.isELFv2ABI();
793  assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
794         "Unsupported PPC ABI.");
795
796  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
797  // process it.
798  if (!isSVR4ABI)
799    for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
800      if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
801        if (isAIXABI)
802          report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
803        HandleVRSaveUpdate(*MBBI, TII);
804        break;
805      }
806    }
807
808  // Move MBBI back to the beginning of the prologue block.
809  MBBI = MBB.begin();
810
811  // Work out frame sizes.
812  unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
813  int NegFrameSize = -FrameSize;
814  if (!isInt<32>(NegFrameSize))
815    llvm_unreachable("Unhandled stack size!");
816
817  if (MFI.isFrameAddressTaken())
818    replaceFPWithRealFP(MF);
819
820  // Check if the link register (LR) must be saved.
821  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
822  bool MustSaveLR = FI->mustSaveLR();
823  bool MustSaveTOC = FI->mustSaveTOC();
824  const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
825  bool MustSaveCR = !MustSaveCRs.empty();
826  // Do we have a frame pointer and/or base pointer for this function?
827  bool HasFP = hasFP(MF);
828  bool HasBP = RegInfo->hasBasePointer(MF);
829  bool HasRedZone = isPPC64 || !isSVR4ABI;
830
831  unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
832  Register BPReg = RegInfo->getBaseRegister(MF);
833  unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
834  unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
835  unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
836  unsigned ScratchReg  = 0;
837  unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
838  //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
839  const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
840                                                : PPC::MFLR );
841  const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
842                                                 : PPC::STW );
843  const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
844                                                     : PPC::STWU );
845  const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
846                                                        : PPC::STWUX);
847  const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
848                                                          : PPC::LIS );
849  const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
850                                                 : PPC::ORI );
851  const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
852                                              : PPC::OR );
853  const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
854                                                            : PPC::SUBFC);
855  const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
856                                                               : PPC::SUBFIC);
857
858  // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
859  // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
860  // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
861  // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
862  assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
863         "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
864
865  // Using the same bool variable as below to suppress compiler warnings.
866  bool SingleScratchReg =
867    findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
868                        &ScratchReg, &TempReg);
869  assert(SingleScratchReg &&
870         "Required number of registers not available in this block");
871
872  SingleScratchReg = ScratchReg == TempReg;
873
874  int LROffset = getReturnSaveOffset();
875
876  int FPOffset = 0;
877  if (HasFP) {
878    if (isSVR4ABI) {
879      MachineFrameInfo &MFI = MF.getFrameInfo();
880      int FPIndex = FI->getFramePointerSaveIndex();
881      assert(FPIndex && "No Frame Pointer Save Slot!");
882      FPOffset = MFI.getObjectOffset(FPIndex);
883    } else {
884      FPOffset = getFramePointerSaveOffset();
885    }
886  }
887
888  int BPOffset = 0;
889  if (HasBP) {
890    if (isSVR4ABI) {
891      MachineFrameInfo &MFI = MF.getFrameInfo();
892      int BPIndex = FI->getBasePointerSaveIndex();
893      assert(BPIndex && "No Base Pointer Save Slot!");
894      BPOffset = MFI.getObjectOffset(BPIndex);
895    } else {
896      BPOffset = getBasePointerSaveOffset();
897    }
898  }
899
900  int PBPOffset = 0;
901  if (FI->usesPICBase()) {
902    MachineFrameInfo &MFI = MF.getFrameInfo();
903    int PBPIndex = FI->getPICBasePointerSaveIndex();
904    assert(PBPIndex && "No PIC Base Pointer Save Slot!");
905    PBPOffset = MFI.getObjectOffset(PBPIndex);
906  }
907
908  // Get stack alignments.
909  unsigned MaxAlign = MFI.getMaxAlignment();
910  if (HasBP && MaxAlign > 1)
911    assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
912           "Invalid alignment!");
913
914  // Frames of 32KB & larger require special handling because they cannot be
915  // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
916  bool isLargeFrame = !isInt<16>(NegFrameSize);
917
918  assert((isPPC64 || !MustSaveCR) &&
919         "Prologue CR saving supported only in 64-bit mode");
920
921  if (MustSaveCR && isAIXABI)
922    report_fatal_error("Prologue CR saving is unimplemented on AIX.");
923
924  // Check if we can move the stack update instruction (stdu) down the prologue
925  // past the callee saves. Hopefully this will avoid the situation where the
926  // saves are waiting for the update on the store with update to complete.
927  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
928  bool MovingStackUpdateDown = false;
929
930  // Check if we can move the stack update.
931  if (stackUpdateCanBeMoved(MF)) {
932    const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
933    for (CalleeSavedInfo CSI : Info) {
934      int FrIdx = CSI.getFrameIdx();
935      // If the frame index is not negative the callee saved info belongs to a
936      // stack object that is not a fixed stack object. We ignore non-fixed
937      // stack objects because we won't move the stack update pointer past them.
938      if (FrIdx >= 0)
939        continue;
940
941      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
942        StackUpdateLoc++;
943        MovingStackUpdateDown = true;
944      } else {
945        // We need all of the Frame Indices to meet these conditions.
946        // If they do not, abort the whole operation.
947        StackUpdateLoc = MBBI;
948        MovingStackUpdateDown = false;
949        break;
950      }
951    }
952
953    // If the operation was not aborted then update the object offset.
954    if (MovingStackUpdateDown) {
955      for (CalleeSavedInfo CSI : Info) {
956        int FrIdx = CSI.getFrameIdx();
957        if (FrIdx < 0)
958          MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
959      }
960    }
961  }
962
963  // If we need to spill the CR and the LR but we don't have two separate
964  // registers available, we must spill them one at a time
965  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
966    // In the ELFv2 ABI, we are not required to save all CR fields.
967    // If only one or two CR fields are clobbered, it is more efficient to use
968    // mfocrf to selectively save just those fields, because mfocrf has short
969    // latency compares to mfcr.
970    unsigned MfcrOpcode = PPC::MFCR8;
971    unsigned CrState = RegState::ImplicitKill;
972    if (isELFv2ABI && MustSaveCRs.size() == 1) {
973      MfcrOpcode = PPC::MFOCRF8;
974      CrState = RegState::Kill;
975    }
976    MachineInstrBuilder MIB =
977      BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
978    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
979      MIB.addReg(MustSaveCRs[i], CrState);
980    BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
981      .addReg(TempReg, getKillRegState(true))
982      .addImm(getCRSaveOffset())
983      .addReg(SPReg);
984  }
985
986  if (MustSaveLR)
987    BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
988
989  if (MustSaveCR &&
990      !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
991    // In the ELFv2 ABI, we are not required to save all CR fields.
992    // If only one or two CR fields are clobbered, it is more efficient to use
993    // mfocrf to selectively save just those fields, because mfocrf has short
994    // latency compares to mfcr.
995    unsigned MfcrOpcode = PPC::MFCR8;
996    unsigned CrState = RegState::ImplicitKill;
997    if (isELFv2ABI && MustSaveCRs.size() == 1) {
998      MfcrOpcode = PPC::MFOCRF8;
999      CrState = RegState::Kill;
1000    }
1001    MachineInstrBuilder MIB =
1002      BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
1003    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1004      MIB.addReg(MustSaveCRs[i], CrState);
1005  }
1006
1007  if (HasRedZone) {
1008    if (HasFP)
1009      BuildMI(MBB, MBBI, dl, StoreInst)
1010        .addReg(FPReg)
1011        .addImm(FPOffset)
1012        .addReg(SPReg);
1013    if (FI->usesPICBase())
1014      BuildMI(MBB, MBBI, dl, StoreInst)
1015        .addReg(PPC::R30)
1016        .addImm(PBPOffset)
1017        .addReg(SPReg);
1018    if (HasBP)
1019      BuildMI(MBB, MBBI, dl, StoreInst)
1020        .addReg(BPReg)
1021        .addImm(BPOffset)
1022        .addReg(SPReg);
1023  }
1024
1025  if (MustSaveLR)
1026    BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1027      .addReg(ScratchReg, getKillRegState(true))
1028      .addImm(LROffset)
1029      .addReg(SPReg);
1030
1031  if (MustSaveCR &&
1032      !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1033    assert(HasRedZone && "A red zone is always available on PPC64");
1034    BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1035      .addReg(TempReg, getKillRegState(true))
1036      .addImm(getCRSaveOffset())
1037      .addReg(SPReg);
1038  }
1039
1040  // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1041  if (!FrameSize)
1042    return;
1043
1044  // Adjust stack pointer: r1 += NegFrameSize.
1045  // If there is a preferred stack alignment, align R1 now
1046
1047  if (HasBP && HasRedZone) {
1048    // Save a copy of r1 as the base pointer.
1049    BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1050      .addReg(SPReg)
1051      .addReg(SPReg);
1052  }
1053
1054  // Have we generated a STUX instruction to claim stack frame? If so,
1055  // the negated frame size will be placed in ScratchReg.
1056  bool HasSTUX = false;
1057
1058  // This condition must be kept in sync with canUseAsPrologue.
1059  if (HasBP && MaxAlign > 1) {
1060    if (isPPC64)
1061      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1062        .addReg(SPReg)
1063        .addImm(0)
1064        .addImm(64 - Log2_32(MaxAlign));
1065    else // PPC32...
1066      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1067        .addReg(SPReg)
1068        .addImm(0)
1069        .addImm(32 - Log2_32(MaxAlign))
1070        .addImm(31);
1071    if (!isLargeFrame) {
1072      BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1073        .addReg(ScratchReg, RegState::Kill)
1074        .addImm(NegFrameSize);
1075    } else {
1076      assert(!SingleScratchReg && "Only a single scratch reg available");
1077      BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1078        .addImm(NegFrameSize >> 16);
1079      BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1080        .addReg(TempReg, RegState::Kill)
1081        .addImm(NegFrameSize & 0xFFFF);
1082      BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1083        .addReg(ScratchReg, RegState::Kill)
1084        .addReg(TempReg, RegState::Kill);
1085    }
1086
1087    BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1088      .addReg(SPReg, RegState::Kill)
1089      .addReg(SPReg)
1090      .addReg(ScratchReg);
1091    HasSTUX = true;
1092
1093  } else if (!isLargeFrame) {
1094    BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1095      .addReg(SPReg)
1096      .addImm(NegFrameSize)
1097      .addReg(SPReg);
1098
1099  } else {
1100    BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1101      .addImm(NegFrameSize >> 16);
1102    BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1103      .addReg(ScratchReg, RegState::Kill)
1104      .addImm(NegFrameSize & 0xFFFF);
1105    BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1106      .addReg(SPReg, RegState::Kill)
1107      .addReg(SPReg)
1108      .addReg(ScratchReg);
1109    HasSTUX = true;
1110  }
1111
1112  // Save the TOC register after the stack pointer update if a prologue TOC
1113  // save is required for the function.
1114  if (MustSaveTOC) {
1115    assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1116    BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1117      .addReg(TOCReg, getKillRegState(true))
1118      .addImm(TOCSaveOffset)
1119      .addReg(SPReg);
1120  }
1121
1122  if (!HasRedZone) {
1123    assert(!isPPC64 && "A red zone is always available on PPC64");
1124    if (HasSTUX) {
1125      // The negated frame size is in ScratchReg, and the SPReg has been
1126      // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1127      // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1128      // the stack frame (i.e. the old SP), ideally, we would put the old
1129      // SP into a register and use it as the base for the stores. The
1130      // problem is that the only available register may be ScratchReg,
1131      // which could be R0, and R0 cannot be used as a base address.
1132
1133      // First, set ScratchReg to the old SP. This may need to be modified
1134      // later.
1135      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1136        .addReg(ScratchReg, RegState::Kill)
1137        .addReg(SPReg);
1138
1139      if (ScratchReg == PPC::R0) {
1140        // R0 cannot be used as a base register, but it can be used as an
1141        // index in a store-indexed.
1142        int LastOffset = 0;
1143        if (HasFP)  {
1144          // R0 += (FPOffset-LastOffset).
1145          // Need addic, since addi treats R0 as 0.
1146          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1147            .addReg(ScratchReg)
1148            .addImm(FPOffset-LastOffset);
1149          LastOffset = FPOffset;
1150          // Store FP into *R0.
1151          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1152            .addReg(FPReg, RegState::Kill)  // Save FP.
1153            .addReg(PPC::ZERO)
1154            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1155        }
1156        if (FI->usesPICBase()) {
1157          // R0 += (PBPOffset-LastOffset).
1158          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1159            .addReg(ScratchReg)
1160            .addImm(PBPOffset-LastOffset);
1161          LastOffset = PBPOffset;
1162          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1163            .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1164            .addReg(PPC::ZERO)
1165            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1166        }
1167        if (HasBP) {
1168          // R0 += (BPOffset-LastOffset).
1169          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1170            .addReg(ScratchReg)
1171            .addImm(BPOffset-LastOffset);
1172          LastOffset = BPOffset;
1173          BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1174            .addReg(BPReg, RegState::Kill)  // Save BP.
1175            .addReg(PPC::ZERO)
1176            .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1177          // BP = R0-LastOffset
1178          BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1179            .addReg(ScratchReg, RegState::Kill)
1180            .addImm(-LastOffset);
1181        }
1182      } else {
1183        // ScratchReg is not R0, so use it as the base register. It is
1184        // already set to the old SP, so we can use the offsets directly.
1185
1186        // Now that the stack frame has been allocated, save all the necessary
1187        // registers using ScratchReg as the base address.
1188        if (HasFP)
1189          BuildMI(MBB, MBBI, dl, StoreInst)
1190            .addReg(FPReg)
1191            .addImm(FPOffset)
1192            .addReg(ScratchReg);
1193        if (FI->usesPICBase())
1194          BuildMI(MBB, MBBI, dl, StoreInst)
1195            .addReg(PPC::R30)
1196            .addImm(PBPOffset)
1197            .addReg(ScratchReg);
1198        if (HasBP) {
1199          BuildMI(MBB, MBBI, dl, StoreInst)
1200            .addReg(BPReg)
1201            .addImm(BPOffset)
1202            .addReg(ScratchReg);
1203          BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1204            .addReg(ScratchReg, RegState::Kill)
1205            .addReg(ScratchReg);
1206        }
1207      }
1208    } else {
1209      // The frame size is a known 16-bit constant (fitting in the immediate
1210      // field of STWU). To be here we have to be compiling for PPC32.
1211      // Since the SPReg has been decreased by FrameSize, add it back to each
1212      // offset.
1213      if (HasFP)
1214        BuildMI(MBB, MBBI, dl, StoreInst)
1215          .addReg(FPReg)
1216          .addImm(FrameSize + FPOffset)
1217          .addReg(SPReg);
1218      if (FI->usesPICBase())
1219        BuildMI(MBB, MBBI, dl, StoreInst)
1220          .addReg(PPC::R30)
1221          .addImm(FrameSize + PBPOffset)
1222          .addReg(SPReg);
1223      if (HasBP) {
1224        BuildMI(MBB, MBBI, dl, StoreInst)
1225          .addReg(BPReg)
1226          .addImm(FrameSize + BPOffset)
1227          .addReg(SPReg);
1228        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1229          .addReg(SPReg)
1230          .addImm(FrameSize);
1231      }
1232    }
1233  }
1234
1235  // Add Call Frame Information for the instructions we generated above.
1236  if (needsCFI) {
1237    unsigned CFIIndex;
1238
1239    if (HasBP) {
1240      // Define CFA in terms of BP. Do this in preference to using FP/SP,
1241      // because if the stack needed aligning then CFA won't be at a fixed
1242      // offset from FP/SP.
1243      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1244      CFIIndex = MF.addFrameInst(
1245          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1246    } else {
1247      // Adjust the definition of CFA to account for the change in SP.
1248      assert(NegFrameSize);
1249      CFIIndex = MF.addFrameInst(
1250          MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1251    }
1252    BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1253        .addCFIIndex(CFIIndex);
1254
1255    if (HasFP) {
1256      // Describe where FP was saved, at a fixed offset from CFA.
1257      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1258      CFIIndex = MF.addFrameInst(
1259          MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1260      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1261          .addCFIIndex(CFIIndex);
1262    }
1263
1264    if (FI->usesPICBase()) {
1265      // Describe where FP was saved, at a fixed offset from CFA.
1266      unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1267      CFIIndex = MF.addFrameInst(
1268          MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1269      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1270          .addCFIIndex(CFIIndex);
1271    }
1272
1273    if (HasBP) {
1274      // Describe where BP was saved, at a fixed offset from CFA.
1275      unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1276      CFIIndex = MF.addFrameInst(
1277          MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1278      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1279          .addCFIIndex(CFIIndex);
1280    }
1281
1282    if (MustSaveLR) {
1283      // Describe where LR was saved, at a fixed offset from CFA.
1284      unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1285      CFIIndex = MF.addFrameInst(
1286          MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1287      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1288          .addCFIIndex(CFIIndex);
1289    }
1290  }
1291
1292  // If there is a frame pointer, copy R1 into R31
1293  if (HasFP) {
1294    BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1295      .addReg(SPReg)
1296      .addReg(SPReg);
1297
1298    if (!HasBP && needsCFI) {
1299      // Change the definition of CFA from SP+offset to FP+offset, because SP
1300      // will change at every alloca.
1301      unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1302      unsigned CFIIndex = MF.addFrameInst(
1303          MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1304
1305      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1306          .addCFIIndex(CFIIndex);
1307    }
1308  }
1309
1310  if (needsCFI) {
1311    // Describe where callee saved registers were saved, at fixed offsets from
1312    // CFA.
1313    const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1314    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1315      unsigned Reg = CSI[I].getReg();
1316      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1317
1318      // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1319      // subregisters of CR2. We just need to emit a move of CR2.
1320      if (PPC::CRBITRCRegClass.contains(Reg))
1321        continue;
1322
1323      if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1324        continue;
1325
1326      // For SVR4, don't emit a move for the CR spill slot if we haven't
1327      // spilled CRs.
1328      if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1329          && !MustSaveCR)
1330        continue;
1331
1332      // For 64-bit SVR4 when we have spilled CRs, the spill location
1333      // is SP+8, not a frame-relative slot.
1334      if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1335        // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1336        // the whole CR word.  In the ELFv2 ABI, every CR that was
1337        // actually saved gets its own CFI record.
1338        unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1339        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1340            nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1341        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1342            .addCFIIndex(CFIIndex);
1343        continue;
1344      }
1345
1346      if (CSI[I].isSpilledToReg()) {
1347        unsigned SpilledReg = CSI[I].getDstReg();
1348        unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1349            nullptr, MRI->getDwarfRegNum(Reg, true),
1350            MRI->getDwarfRegNum(SpilledReg, true)));
1351        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1352          .addCFIIndex(CFIRegister);
1353      } else {
1354        int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1355        // We have changed the object offset above but we do not want to change
1356        // the actual offsets in the CFI instruction so we have to undo the
1357        // offset change here.
1358        if (MovingStackUpdateDown)
1359          Offset -= NegFrameSize;
1360
1361        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1362            nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1363        BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1364            .addCFIIndex(CFIIndex);
1365      }
1366    }
1367  }
1368}
1369
1370void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1371                                    MachineBasicBlock &MBB) const {
1372  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1373  DebugLoc dl;
1374
1375  if (MBBI != MBB.end())
1376    dl = MBBI->getDebugLoc();
1377
1378  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1379  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1380
1381  // Get alignment info so we know how to restore the SP.
1382  const MachineFrameInfo &MFI = MF.getFrameInfo();
1383
1384  // Get the number of bytes allocated from the FrameInfo.
1385  int FrameSize = MFI.getStackSize();
1386
1387  // Get processor type.
1388  bool isPPC64 = Subtarget.isPPC64();
1389  // Get the ABI.
1390  bool isSVR4ABI = Subtarget.isSVR4ABI();
1391
1392  // Check if the link register (LR) has been saved.
1393  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1394  bool MustSaveLR = FI->mustSaveLR();
1395  const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1396  bool MustSaveCR = !MustSaveCRs.empty();
1397  // Do we have a frame pointer and/or base pointer for this function?
1398  bool HasFP = hasFP(MF);
1399  bool HasBP = RegInfo->hasBasePointer(MF);
1400  bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1401
1402  unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1403  Register BPReg = RegInfo->getBaseRegister(MF);
1404  unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1405  unsigned ScratchReg = 0;
1406  unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1407  const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1408                                                 : PPC::MTLR );
1409  const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1410                                                 : PPC::LWZ );
1411  const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1412                                                           : PPC::LIS );
1413  const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1414                                              : PPC::OR );
1415  const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1416                                                  : PPC::ORI );
1417  const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1418                                                   : PPC::ADDI );
1419  const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1420                                                : PPC::ADD4 );
1421
1422  int LROffset = getReturnSaveOffset();
1423
1424  int FPOffset = 0;
1425
1426  // Using the same bool variable as below to suppress compiler warnings.
1427  bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1428                                              &TempReg);
1429  assert(SingleScratchReg &&
1430         "Could not find an available scratch register");
1431
1432  SingleScratchReg = ScratchReg == TempReg;
1433
1434  if (HasFP) {
1435    if (isSVR4ABI) {
1436      int FPIndex = FI->getFramePointerSaveIndex();
1437      assert(FPIndex && "No Frame Pointer Save Slot!");
1438      FPOffset = MFI.getObjectOffset(FPIndex);
1439    } else {
1440      FPOffset = getFramePointerSaveOffset();
1441    }
1442  }
1443
1444  int BPOffset = 0;
1445  if (HasBP) {
1446    if (isSVR4ABI) {
1447      int BPIndex = FI->getBasePointerSaveIndex();
1448      assert(BPIndex && "No Base Pointer Save Slot!");
1449      BPOffset = MFI.getObjectOffset(BPIndex);
1450    } else {
1451      BPOffset = getBasePointerSaveOffset();
1452    }
1453  }
1454
1455  int PBPOffset = 0;
1456  if (FI->usesPICBase()) {
1457    int PBPIndex = FI->getPICBasePointerSaveIndex();
1458    assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1459    PBPOffset = MFI.getObjectOffset(PBPIndex);
1460  }
1461
1462  bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1463
1464  if (IsReturnBlock) {
1465    unsigned RetOpcode = MBBI->getOpcode();
1466    bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1467                      RetOpcode == PPC::TCRETURNdi ||
1468                      RetOpcode == PPC::TCRETURNai ||
1469                      RetOpcode == PPC::TCRETURNri8 ||
1470                      RetOpcode == PPC::TCRETURNdi8 ||
1471                      RetOpcode == PPC::TCRETURNai8;
1472
1473    if (UsesTCRet) {
1474      int MaxTCRetDelta = FI->getTailCallSPDelta();
1475      MachineOperand &StackAdjust = MBBI->getOperand(1);
1476      assert(StackAdjust.isImm() && "Expecting immediate value.");
1477      // Adjust stack pointer.
1478      int StackAdj = StackAdjust.getImm();
1479      int Delta = StackAdj - MaxTCRetDelta;
1480      assert((Delta >= 0) && "Delta must be positive");
1481      if (MaxTCRetDelta>0)
1482        FrameSize += (StackAdj +Delta);
1483      else
1484        FrameSize += StackAdj;
1485    }
1486  }
1487
1488  // Frames of 32KB & larger require special handling because they cannot be
1489  // indexed into with a simple LD/LWZ immediate offset operand.
1490  bool isLargeFrame = !isInt<16>(FrameSize);
1491
1492  // On targets without red zone, the SP needs to be restored last, so that
1493  // all live contents of the stack frame are upwards of the SP. This means
1494  // that we cannot restore SP just now, since there may be more registers
1495  // to restore from the stack frame (e.g. R31). If the frame size is not
1496  // a simple immediate value, we will need a spare register to hold the
1497  // restored SP. If the frame size is known and small, we can simply adjust
1498  // the offsets of the registers to be restored, and still use SP to restore
1499  // them. In such case, the final update of SP will be to add the frame
1500  // size to it.
1501  // To simplify the code, set RBReg to the base register used to restore
1502  // values from the stack, and set SPAdd to the value that needs to be added
1503  // to the SP at the end. The default values are as if red zone was present.
1504  unsigned RBReg = SPReg;
1505  unsigned SPAdd = 0;
1506
1507  // Check if we can move the stack update instruction up the epilogue
1508  // past the callee saves. This will allow the move to LR instruction
1509  // to be executed before the restores of the callee saves which means
1510  // that the callee saves can hide the latency from the MTLR instrcution.
1511  MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1512  if (stackUpdateCanBeMoved(MF)) {
1513    const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1514    for (CalleeSavedInfo CSI : Info) {
1515      int FrIdx = CSI.getFrameIdx();
1516      // If the frame index is not negative the callee saved info belongs to a
1517      // stack object that is not a fixed stack object. We ignore non-fixed
1518      // stack objects because we won't move the update of the stack pointer
1519      // past them.
1520      if (FrIdx >= 0)
1521        continue;
1522
1523      if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1524        StackUpdateLoc--;
1525      else {
1526        // Abort the operation as we can't update all CSR restores.
1527        StackUpdateLoc = MBBI;
1528        break;
1529      }
1530    }
1531  }
1532
1533  if (FrameSize) {
1534    // In the prologue, the loaded (or persistent) stack pointer value is
1535    // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1536    // zone add this offset back now.
1537
1538    // If this function contained a fastcc call and GuaranteedTailCallOpt is
1539    // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1540    // call which invalidates the stack pointer value in SP(0). So we use the
1541    // value of R31 in this case.
1542    if (FI->hasFastCall()) {
1543      assert(HasFP && "Expecting a valid frame pointer.");
1544      if (!HasRedZone)
1545        RBReg = FPReg;
1546      if (!isLargeFrame) {
1547        BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1548          .addReg(FPReg).addImm(FrameSize);
1549      } else {
1550        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1551          .addImm(FrameSize >> 16);
1552        BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1553          .addReg(ScratchReg, RegState::Kill)
1554          .addImm(FrameSize & 0xFFFF);
1555        BuildMI(MBB, MBBI, dl, AddInst)
1556          .addReg(RBReg)
1557          .addReg(FPReg)
1558          .addReg(ScratchReg);
1559      }
1560    } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1561      if (HasRedZone) {
1562        BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1563          .addReg(SPReg)
1564          .addImm(FrameSize);
1565      } else {
1566        // Make sure that adding FrameSize will not overflow the max offset
1567        // size.
1568        assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1569               "Local offsets should be negative");
1570        SPAdd = FrameSize;
1571        FPOffset += FrameSize;
1572        BPOffset += FrameSize;
1573        PBPOffset += FrameSize;
1574      }
1575    } else {
1576      // We don't want to use ScratchReg as a base register, because it
1577      // could happen to be R0. Use FP instead, but make sure to preserve it.
1578      if (!HasRedZone) {
1579        // If FP is not saved, copy it to ScratchReg.
1580        if (!HasFP)
1581          BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1582            .addReg(FPReg)
1583            .addReg(FPReg);
1584        RBReg = FPReg;
1585      }
1586      BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1587        .addImm(0)
1588        .addReg(SPReg);
1589    }
1590  }
1591  assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1592  // If there is no red zone, ScratchReg may be needed for holding a useful
1593  // value (although not the base register). Make sure it is not overwritten
1594  // too early.
1595
1596  assert((isPPC64 || !MustSaveCR) &&
1597         "Epilogue CR restoring supported only in 64-bit mode");
1598
1599  // If we need to restore both the LR and the CR and we only have one
1600  // available scratch register, we must do them one at a time.
1601  if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1602    // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1603    // is live here.
1604    assert(HasRedZone && "Expecting red zone");
1605    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1606      .addImm(getCRSaveOffset())
1607      .addReg(SPReg);
1608    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1609      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1610        .addReg(TempReg, getKillRegState(i == e-1));
1611  }
1612
1613  // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1614  // LR is stored in the caller's stack frame. ScratchReg will be needed
1615  // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1616  // a base register anyway, because it may happen to be R0.
1617  bool LoadedLR = false;
1618  if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1619    BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1620      .addImm(LROffset+SPAdd)
1621      .addReg(RBReg);
1622    LoadedLR = true;
1623  }
1624
1625  if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1626    // This will only occur for PPC64.
1627    assert(isPPC64 && "Expecting 64-bit mode");
1628    assert(RBReg == SPReg && "Should be using SP as a base register");
1629    BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1630      .addImm(getCRSaveOffset())
1631      .addReg(RBReg);
1632  }
1633
1634  if (HasFP) {
1635    // If there is red zone, restore FP directly, since SP has already been
1636    // restored. Otherwise, restore the value of FP into ScratchReg.
1637    if (HasRedZone || RBReg == SPReg)
1638      BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1639        .addImm(FPOffset)
1640        .addReg(SPReg);
1641    else
1642      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1643        .addImm(FPOffset)
1644        .addReg(RBReg);
1645  }
1646
1647  if (FI->usesPICBase())
1648    BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1649      .addImm(PBPOffset)
1650      .addReg(RBReg);
1651
1652  if (HasBP)
1653    BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1654      .addImm(BPOffset)
1655      .addReg(RBReg);
1656
1657  // There is nothing more to be loaded from the stack, so now we can
1658  // restore SP: SP = RBReg + SPAdd.
1659  if (RBReg != SPReg || SPAdd != 0) {
1660    assert(!HasRedZone && "This should not happen with red zone");
1661    // If SPAdd is 0, generate a copy.
1662    if (SPAdd == 0)
1663      BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1664        .addReg(RBReg)
1665        .addReg(RBReg);
1666    else
1667      BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1668        .addReg(RBReg)
1669        .addImm(SPAdd);
1670
1671    assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1672    if (RBReg == FPReg)
1673      BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1674        .addReg(ScratchReg)
1675        .addReg(ScratchReg);
1676
1677    // Now load the LR from the caller's stack frame.
1678    if (MustSaveLR && !LoadedLR)
1679      BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1680        .addImm(LROffset)
1681        .addReg(SPReg);
1682  }
1683
1684  if (MustSaveCR &&
1685      !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1686    for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1687      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1688        .addReg(TempReg, getKillRegState(i == e-1));
1689
1690  if (MustSaveLR)
1691    BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1692
1693  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1694  // call optimization
1695  if (IsReturnBlock) {
1696    unsigned RetOpcode = MBBI->getOpcode();
1697    if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1698        (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1699        MF.getFunction().getCallingConv() == CallingConv::Fast) {
1700      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1701      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1702
1703      if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1704        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1705          .addReg(SPReg).addImm(CallerAllocatedAmt);
1706      } else {
1707        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1708          .addImm(CallerAllocatedAmt >> 16);
1709        BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1710          .addReg(ScratchReg, RegState::Kill)
1711          .addImm(CallerAllocatedAmt & 0xFFFF);
1712        BuildMI(MBB, MBBI, dl, AddInst)
1713          .addReg(SPReg)
1714          .addReg(FPReg)
1715          .addReg(ScratchReg);
1716      }
1717    } else {
1718      createTailCallBranchInstr(MBB);
1719    }
1720  }
1721}
1722
1723void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1724  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1725
1726  // If we got this far a first terminator should exist.
1727  assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1728
1729  DebugLoc dl = MBBI->getDebugLoc();
1730  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1731
1732  // Create branch instruction for pseudo tail call return instruction
1733  unsigned RetOpcode = MBBI->getOpcode();
1734  if (RetOpcode == PPC::TCRETURNdi) {
1735    MBBI = MBB.getLastNonDebugInstr();
1736    MachineOperand &JumpTarget = MBBI->getOperand(0);
1737    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1738      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1739  } else if (RetOpcode == PPC::TCRETURNri) {
1740    MBBI = MBB.getLastNonDebugInstr();
1741    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1742    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1743  } else if (RetOpcode == PPC::TCRETURNai) {
1744    MBBI = MBB.getLastNonDebugInstr();
1745    MachineOperand &JumpTarget = MBBI->getOperand(0);
1746    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1747  } else if (RetOpcode == PPC::TCRETURNdi8) {
1748    MBBI = MBB.getLastNonDebugInstr();
1749    MachineOperand &JumpTarget = MBBI->getOperand(0);
1750    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1751      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1752  } else if (RetOpcode == PPC::TCRETURNri8) {
1753    MBBI = MBB.getLastNonDebugInstr();
1754    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1755    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1756  } else if (RetOpcode == PPC::TCRETURNai8) {
1757    MBBI = MBB.getLastNonDebugInstr();
1758    MachineOperand &JumpTarget = MBBI->getOperand(0);
1759    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1760  }
1761}
1762
1763void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1764                                            BitVector &SavedRegs,
1765                                            RegScavenger *RS) const {
1766  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1767
1768  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1769
1770  //  Save and clear the LR state.
1771  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1772  unsigned LR = RegInfo->getRARegister();
1773  FI->setMustSaveLR(MustSaveLR(MF, LR));
1774  SavedRegs.reset(LR);
1775
1776  //  Save R31 if necessary
1777  int FPSI = FI->getFramePointerSaveIndex();
1778  const bool isPPC64 = Subtarget.isPPC64();
1779  const bool IsDarwinABI  = Subtarget.isDarwinABI();
1780  MachineFrameInfo &MFI = MF.getFrameInfo();
1781
1782  // If the frame pointer save index hasn't been defined yet.
1783  if (!FPSI && needsFP(MF)) {
1784    // Find out what the fix offset of the frame pointer save area.
1785    int FPOffset = getFramePointerSaveOffset();
1786    // Allocate the frame index for frame pointer save area.
1787    FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1788    // Save the result.
1789    FI->setFramePointerSaveIndex(FPSI);
1790  }
1791
1792  int BPSI = FI->getBasePointerSaveIndex();
1793  if (!BPSI && RegInfo->hasBasePointer(MF)) {
1794    int BPOffset = getBasePointerSaveOffset();
1795    // Allocate the frame index for the base pointer save area.
1796    BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1797    // Save the result.
1798    FI->setBasePointerSaveIndex(BPSI);
1799  }
1800
1801  // Reserve stack space for the PIC Base register (R30).
1802  // Only used in SVR4 32-bit.
1803  if (FI->usesPICBase()) {
1804    int PBPSI = MFI.CreateFixedObject(4, -8, true);
1805    FI->setPICBasePointerSaveIndex(PBPSI);
1806  }
1807
1808  // Make sure we don't explicitly spill r31, because, for example, we have
1809  // some inline asm which explicitly clobbers it, when we otherwise have a
1810  // frame pointer and are using r31's spill slot for the prologue/epilogue
1811  // code. Same goes for the base pointer and the PIC base register.
1812  if (needsFP(MF))
1813    SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1814  if (RegInfo->hasBasePointer(MF))
1815    SavedRegs.reset(RegInfo->getBaseRegister(MF));
1816  if (FI->usesPICBase())
1817    SavedRegs.reset(PPC::R30);
1818
1819  // Reserve stack space to move the linkage area to in case of a tail call.
1820  int TCSPDelta = 0;
1821  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1822      (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1823    MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1824  }
1825
1826  // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1827  // function uses CR 2, 3, or 4.
1828  if (!isPPC64 && !IsDarwinABI &&
1829      (SavedRegs.test(PPC::CR2) ||
1830       SavedRegs.test(PPC::CR3) ||
1831       SavedRegs.test(PPC::CR4))) {
1832    int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1833    FI->setCRSpillFrameIndex(FrameIdx);
1834  }
1835}
1836
1837void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1838                                                       RegScavenger *RS) const {
1839  // Early exit if not using the SVR4 ABI.
1840  if (!Subtarget.isSVR4ABI()) {
1841    addScavengingSpillSlot(MF, RS);
1842    return;
1843  }
1844
1845  // Get callee saved register information.
1846  MachineFrameInfo &MFI = MF.getFrameInfo();
1847  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1848
1849  // If the function is shrink-wrapped, and if the function has a tail call, the
1850  // tail call might not be in the new RestoreBlock, so real branch instruction
1851  // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1852  // RestoreBlock. So we handle this case here.
1853  if (MFI.getSavePoint() && MFI.hasTailCall()) {
1854    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1855    for (MachineBasicBlock &MBB : MF) {
1856      if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1857        createTailCallBranchInstr(MBB);
1858    }
1859  }
1860
1861  // Early exit if no callee saved registers are modified!
1862  if (CSI.empty() && !needsFP(MF)) {
1863    addScavengingSpillSlot(MF, RS);
1864    return;
1865  }
1866
1867  unsigned MinGPR = PPC::R31;
1868  unsigned MinG8R = PPC::X31;
1869  unsigned MinFPR = PPC::F31;
1870  unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1871
1872  bool HasGPSaveArea = false;
1873  bool HasG8SaveArea = false;
1874  bool HasFPSaveArea = false;
1875  bool HasVRSAVESaveArea = false;
1876  bool HasVRSaveArea = false;
1877
1878  SmallVector<CalleeSavedInfo, 18> GPRegs;
1879  SmallVector<CalleeSavedInfo, 18> G8Regs;
1880  SmallVector<CalleeSavedInfo, 18> FPRegs;
1881  SmallVector<CalleeSavedInfo, 18> VRegs;
1882
1883  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1884    unsigned Reg = CSI[i].getReg();
1885    assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1886            (Reg != PPC::X2 && Reg != PPC::R2)) &&
1887           "Not expecting to try to spill R2 in a function that must save TOC");
1888    if (PPC::GPRCRegClass.contains(Reg)) {
1889      HasGPSaveArea = true;
1890
1891      GPRegs.push_back(CSI[i]);
1892
1893      if (Reg < MinGPR) {
1894        MinGPR = Reg;
1895      }
1896    } else if (PPC::G8RCRegClass.contains(Reg)) {
1897      HasG8SaveArea = true;
1898
1899      G8Regs.push_back(CSI[i]);
1900
1901      if (Reg < MinG8R) {
1902        MinG8R = Reg;
1903      }
1904    } else if (PPC::F8RCRegClass.contains(Reg)) {
1905      HasFPSaveArea = true;
1906
1907      FPRegs.push_back(CSI[i]);
1908
1909      if (Reg < MinFPR) {
1910        MinFPR = Reg;
1911      }
1912    } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1913               PPC::CRRCRegClass.contains(Reg)) {
1914      ; // do nothing, as we already know whether CRs are spilled
1915    } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1916      HasVRSAVESaveArea = true;
1917    } else if (PPC::VRRCRegClass.contains(Reg) ||
1918               PPC::SPERCRegClass.contains(Reg)) {
1919      // Altivec and SPE are mutually exclusive, but have the same stack
1920      // alignment requirements, so overload the save area for both cases.
1921      HasVRSaveArea = true;
1922
1923      VRegs.push_back(CSI[i]);
1924
1925      if (Reg < MinVR) {
1926        MinVR = Reg;
1927      }
1928    } else {
1929      llvm_unreachable("Unknown RegisterClass!");
1930    }
1931  }
1932
1933  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1934  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1935
1936  int64_t LowerBound = 0;
1937
1938  // Take into account stack space reserved for tail calls.
1939  int TCSPDelta = 0;
1940  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1941      (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1942    LowerBound = TCSPDelta;
1943  }
1944
1945  // The Floating-point register save area is right below the back chain word
1946  // of the previous stack frame.
1947  if (HasFPSaveArea) {
1948    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1949      int FI = FPRegs[i].getFrameIdx();
1950
1951      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1952    }
1953
1954    LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1955  }
1956
1957  // Check whether the frame pointer register is allocated. If so, make sure it
1958  // is spilled to the correct offset.
1959  if (needsFP(MF)) {
1960    int FI = PFI->getFramePointerSaveIndex();
1961    assert(FI && "No Frame Pointer Save Slot!");
1962    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1963    // FP is R31/X31, so no need to update MinGPR/MinG8R.
1964    HasGPSaveArea = true;
1965  }
1966
1967  if (PFI->usesPICBase()) {
1968    int FI = PFI->getPICBasePointerSaveIndex();
1969    assert(FI && "No PIC Base Pointer Save Slot!");
1970    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1971
1972    MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1973    HasGPSaveArea = true;
1974  }
1975
1976  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1977  if (RegInfo->hasBasePointer(MF)) {
1978    int FI = PFI->getBasePointerSaveIndex();
1979    assert(FI && "No Base Pointer Save Slot!");
1980    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1981
1982    Register BP = RegInfo->getBaseRegister(MF);
1983    if (PPC::G8RCRegClass.contains(BP)) {
1984      MinG8R = std::min<unsigned>(MinG8R, BP);
1985      HasG8SaveArea = true;
1986    } else if (PPC::GPRCRegClass.contains(BP)) {
1987      MinGPR = std::min<unsigned>(MinGPR, BP);
1988      HasGPSaveArea = true;
1989    }
1990  }
1991
1992  // General register save area starts right below the Floating-point
1993  // register save area.
1994  if (HasGPSaveArea || HasG8SaveArea) {
1995    // Move general register save area spill slots down, taking into account
1996    // the size of the Floating-point register save area.
1997    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1998      if (!GPRegs[i].isSpilledToReg()) {
1999        int FI = GPRegs[i].getFrameIdx();
2000        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2001      }
2002    }
2003
2004    // Move general register save area spill slots down, taking into account
2005    // the size of the Floating-point register save area.
2006    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2007      if (!G8Regs[i].isSpilledToReg()) {
2008        int FI = G8Regs[i].getFrameIdx();
2009        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2010      }
2011    }
2012
2013    unsigned MinReg =
2014      std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2015                         TRI->getEncodingValue(MinG8R));
2016
2017    if (Subtarget.isPPC64()) {
2018      LowerBound -= (31 - MinReg + 1) * 8;
2019    } else {
2020      LowerBound -= (31 - MinReg + 1) * 4;
2021    }
2022  }
2023
2024  // For 32-bit only, the CR save area is below the general register
2025  // save area.  For 64-bit SVR4, the CR save area is addressed relative
2026  // to the stack pointer and hence does not need an adjustment here.
2027  // Only CR2 (the first nonvolatile spilled) has an associated frame
2028  // index so that we have a single uniform save area.
2029  if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2030    // Adjust the frame index of the CR spill slot.
2031    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2032      unsigned Reg = CSI[i].getReg();
2033
2034      if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2035          // Leave Darwin logic as-is.
2036          || (!Subtarget.isSVR4ABI() &&
2037              (PPC::CRBITRCRegClass.contains(Reg) ||
2038               PPC::CRRCRegClass.contains(Reg)))) {
2039        int FI = CSI[i].getFrameIdx();
2040
2041        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2042      }
2043    }
2044
2045    LowerBound -= 4; // The CR save area is always 4 bytes long.
2046  }
2047
2048  if (HasVRSAVESaveArea) {
2049    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2050    //             which have the VRSAVE register class?
2051    // Adjust the frame index of the VRSAVE spill slot.
2052    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2053      unsigned Reg = CSI[i].getReg();
2054
2055      if (PPC::VRSAVERCRegClass.contains(Reg)) {
2056        int FI = CSI[i].getFrameIdx();
2057
2058        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2059      }
2060    }
2061
2062    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2063  }
2064
2065  // Both Altivec and SPE have the same alignment and padding requirements
2066  // within the stack frame.
2067  if (HasVRSaveArea) {
2068    // Insert alignment padding, we need 16-byte alignment. Note: for positive
2069    // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2070    // we are using negative number here (the stack grows downward). We should
2071    // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2072    // is the alignment size ( n = 16 here) and y is the size after aligning.
2073    assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2074    LowerBound &= ~(15);
2075
2076    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2077      int FI = VRegs[i].getFrameIdx();
2078
2079      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2080    }
2081  }
2082
2083  addScavengingSpillSlot(MF, RS);
2084}
2085
2086void
2087PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2088                                         RegScavenger *RS) const {
2089  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2090  // a large stack, which will require scavenging a register to materialize a
2091  // large offset.
2092
2093  // We need to have a scavenger spill slot for spills if the frame size is
2094  // large. In case there is no free register for large-offset addressing,
2095  // this slot is used for the necessary emergency spill. Also, we need the
2096  // slot for dynamic stack allocations.
2097
2098  // The scavenger might be invoked if the frame offset does not fit into
2099  // the 16-bit immediate. We don't know the complete frame size here
2100  // because we've not yet computed callee-saved register spills or the
2101  // needed alignment padding.
2102  unsigned StackSize = determineFrameLayout(MF, true);
2103  MachineFrameInfo &MFI = MF.getFrameInfo();
2104  if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2105      hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2106    const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2107    const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2108    const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2109    const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2110    unsigned Size = TRI.getSpillSize(RC);
2111    unsigned Align = TRI.getSpillAlignment(RC);
2112    RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2113
2114    // Might we have over-aligned allocas?
2115    bool HasAlVars = MFI.hasVarSizedObjects() &&
2116                     MFI.getMaxAlignment() > getStackAlignment();
2117
2118    // These kinds of spills might need two registers.
2119    if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2120      RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2121
2122  }
2123}
2124
2125// This function checks if a callee saved gpr can be spilled to a volatile
2126// vector register. This occurs for leaf functions when the option
2127// ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2128// which were not spilled to vectors, return false so the target independent
2129// code can handle them by assigning a FrameIdx to a stack slot.
2130bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2131    MachineFunction &MF, const TargetRegisterInfo *TRI,
2132    std::vector<CalleeSavedInfo> &CSI) const {
2133
2134  if (CSI.empty())
2135    return true; // Early exit if no callee saved registers are modified!
2136
2137  // Early exit if cannot spill gprs to volatile vector registers.
2138  MachineFrameInfo &MFI = MF.getFrameInfo();
2139  if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2140    return false;
2141
2142  // Build a BitVector of VSRs that can be used for spilling GPRs.
2143  BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2144  BitVector BVCalleeSaved(TRI->getNumRegs());
2145  const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2146  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2147  for (unsigned i = 0; CSRegs[i]; ++i)
2148    BVCalleeSaved.set(CSRegs[i]);
2149
2150  for (unsigned Reg : BVAllocatable.set_bits()) {
2151    // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2152    // used in the function.
2153    if (BVCalleeSaved[Reg] ||
2154        (!PPC::F8RCRegClass.contains(Reg) &&
2155         !PPC::VFRCRegClass.contains(Reg)) ||
2156        (MF.getRegInfo().isPhysRegUsed(Reg)))
2157      BVAllocatable.reset(Reg);
2158  }
2159
2160  bool AllSpilledToReg = true;
2161  for (auto &CS : CSI) {
2162    if (BVAllocatable.none())
2163      return false;
2164
2165    unsigned Reg = CS.getReg();
2166    if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2167      AllSpilledToReg = false;
2168      continue;
2169    }
2170
2171    unsigned VolatileVFReg = BVAllocatable.find_first();
2172    if (VolatileVFReg < BVAllocatable.size()) {
2173      CS.setDstReg(VolatileVFReg);
2174      BVAllocatable.reset(VolatileVFReg);
2175    } else {
2176      AllSpilledToReg = false;
2177    }
2178  }
2179  return AllSpilledToReg;
2180}
2181
2182
2183bool
2184PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2185                                     MachineBasicBlock::iterator MI,
2186                                     const std::vector<CalleeSavedInfo> &CSI,
2187                                     const TargetRegisterInfo *TRI) const {
2188
2189  // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2190  // Return false otherwise to maintain pre-existing behavior.
2191  if (!Subtarget.isSVR4ABI())
2192    return false;
2193
2194  MachineFunction *MF = MBB.getParent();
2195  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2196  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2197  bool MustSaveTOC = FI->mustSaveTOC();
2198  DebugLoc DL;
2199  bool CRSpilled = false;
2200  MachineInstrBuilder CRMIB;
2201
2202  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2203    unsigned Reg = CSI[i].getReg();
2204    // Only Darwin actually uses the VRSAVE register, but it can still appear
2205    // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2206    // Darwin, ignore it.
2207    if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2208      continue;
2209
2210    // CR2 through CR4 are the nonvolatile CR fields.
2211    bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2212
2213    // Add the callee-saved register as live-in; it's killed at the spill.
2214    // Do not do this for callee-saved registers that are live-in to the
2215    // function because they will already be marked live-in and this will be
2216    // adding it for a second time. It is an error to add the same register
2217    // to the set more than once.
2218    const MachineRegisterInfo &MRI = MF->getRegInfo();
2219    bool IsLiveIn = MRI.isLiveIn(Reg);
2220    if (!IsLiveIn)
2221       MBB.addLiveIn(Reg);
2222
2223    if (CRSpilled && IsCRField) {
2224      CRMIB.addReg(Reg, RegState::ImplicitKill);
2225      continue;
2226    }
2227
2228    // The actual spill will happen in the prologue.
2229    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2230      continue;
2231
2232    // Insert the spill to the stack frame.
2233    if (IsCRField) {
2234      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2235      if (Subtarget.isPPC64()) {
2236        // The actual spill will happen at the start of the prologue.
2237        FuncInfo->addMustSaveCR(Reg);
2238      } else {
2239        CRSpilled = true;
2240        FuncInfo->setSpillsCR();
2241
2242        // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2243        // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2244        CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2245                  .addReg(Reg, RegState::ImplicitKill);
2246
2247        MBB.insert(MI, CRMIB);
2248        MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2249                                         .addReg(PPC::R12,
2250                                                 getKillRegState(true)),
2251                                         CSI[i].getFrameIdx()));
2252      }
2253    } else {
2254      if (CSI[i].isSpilledToReg()) {
2255        NumPESpillVSR++;
2256        BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2257          .addReg(Reg, getKillRegState(true));
2258      } else {
2259        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2260        // Use !IsLiveIn for the kill flag.
2261        // We do not want to kill registers that are live in this function
2262        // before their use because they will become undefined registers.
2263        TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2264                                CSI[i].getFrameIdx(), RC, TRI);
2265      }
2266    }
2267  }
2268  return true;
2269}
2270
2271static void
2272restoreCRs(bool isPPC64, bool is31,
2273           bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2274           MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2275           const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2276
2277  MachineFunction *MF = MBB.getParent();
2278  const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2279  DebugLoc DL;
2280  unsigned RestoreOp, MoveReg;
2281
2282  if (isPPC64)
2283    // This is handled during epilogue generation.
2284    return;
2285  else {
2286    // 32-bit:  FP-relative
2287    MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2288                                             PPC::R12),
2289                                     CSI[CSIIndex].getFrameIdx()));
2290    RestoreOp = PPC::MTOCRF;
2291    MoveReg = PPC::R12;
2292  }
2293
2294  if (CR2Spilled)
2295    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2296               .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2297
2298  if (CR3Spilled)
2299    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2300               .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2301
2302  if (CR4Spilled)
2303    MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2304               .addReg(MoveReg, getKillRegState(true)));
2305}
2306
2307MachineBasicBlock::iterator PPCFrameLowering::
2308eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2309                              MachineBasicBlock::iterator I) const {
2310  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2311  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2312      I->getOpcode() == PPC::ADJCALLSTACKUP) {
2313    // Add (actually subtract) back the amount the callee popped on return.
2314    if (int CalleeAmt =  I->getOperand(1).getImm()) {
2315      bool is64Bit = Subtarget.isPPC64();
2316      CalleeAmt *= -1;
2317      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2318      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2319      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2320      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2321      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2322      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2323      const DebugLoc &dl = I->getDebugLoc();
2324
2325      if (isInt<16>(CalleeAmt)) {
2326        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2327          .addReg(StackReg, RegState::Kill)
2328          .addImm(CalleeAmt);
2329      } else {
2330        MachineBasicBlock::iterator MBBI = I;
2331        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2332          .addImm(CalleeAmt >> 16);
2333        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2334          .addReg(TmpReg, RegState::Kill)
2335          .addImm(CalleeAmt & 0xFFFF);
2336        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2337          .addReg(StackReg, RegState::Kill)
2338          .addReg(TmpReg);
2339      }
2340    }
2341  }
2342  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2343  return MBB.erase(I);
2344}
2345
2346bool
2347PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2348                                        MachineBasicBlock::iterator MI,
2349                                        std::vector<CalleeSavedInfo> &CSI,
2350                                        const TargetRegisterInfo *TRI) const {
2351
2352  // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2353  // Return false otherwise to maintain pre-existing behavior.
2354  if (!Subtarget.isSVR4ABI())
2355    return false;
2356
2357  MachineFunction *MF = MBB.getParent();
2358  const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2359  PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2360  bool MustSaveTOC = FI->mustSaveTOC();
2361  bool CR2Spilled = false;
2362  bool CR3Spilled = false;
2363  bool CR4Spilled = false;
2364  unsigned CSIIndex = 0;
2365
2366  // Initialize insertion-point logic; we will be restoring in reverse
2367  // order of spill.
2368  MachineBasicBlock::iterator I = MI, BeforeI = I;
2369  bool AtStart = I == MBB.begin();
2370
2371  if (!AtStart)
2372    --BeforeI;
2373
2374  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2375    unsigned Reg = CSI[i].getReg();
2376
2377    // Only Darwin actually uses the VRSAVE register, but it can still appear
2378    // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2379    // Darwin, ignore it.
2380    if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2381      continue;
2382
2383    if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2384      continue;
2385
2386    if (Reg == PPC::CR2) {
2387      CR2Spilled = true;
2388      // The spill slot is associated only with CR2, which is the
2389      // first nonvolatile spilled.  Save it here.
2390      CSIIndex = i;
2391      continue;
2392    } else if (Reg == PPC::CR3) {
2393      CR3Spilled = true;
2394      continue;
2395    } else if (Reg == PPC::CR4) {
2396      CR4Spilled = true;
2397      continue;
2398    } else {
2399      // When we first encounter a non-CR register after seeing at
2400      // least one CR register, restore all spilled CRs together.
2401      if ((CR2Spilled || CR3Spilled || CR4Spilled)
2402          && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2403        bool is31 = needsFP(*MF);
2404        restoreCRs(Subtarget.isPPC64(), is31,
2405                   CR2Spilled, CR3Spilled, CR4Spilled,
2406                   MBB, I, CSI, CSIIndex);
2407        CR2Spilled = CR3Spilled = CR4Spilled = false;
2408      }
2409
2410      if (CSI[i].isSpilledToReg()) {
2411        DebugLoc DL;
2412        NumPEReloadVSR++;
2413        BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2414            .addReg(CSI[i].getDstReg(), getKillRegState(true));
2415      } else {
2416       // Default behavior for non-CR saves.
2417        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2418        TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2419        assert(I != MBB.begin() &&
2420               "loadRegFromStackSlot didn't insert any code!");
2421      }
2422    }
2423
2424    // Insert in reverse order.
2425    if (AtStart)
2426      I = MBB.begin();
2427    else {
2428      I = BeforeI;
2429      ++I;
2430    }
2431  }
2432
2433  // If we haven't yet spilled the CRs, do so now.
2434  if (CR2Spilled || CR3Spilled || CR4Spilled) {
2435    bool is31 = needsFP(*MF);
2436    restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2437               MBB, I, CSI, CSIIndex);
2438  }
2439
2440  return true;
2441}
2442
2443unsigned PPCFrameLowering::getTOCSaveOffset() const {
2444  return TOCSaveOffset;
2445}
2446
2447unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2448  if (Subtarget.isAIXABI())
2449    report_fatal_error("FramePointer is not implemented on AIX yet.");
2450  return FramePointerSaveOffset;
2451}
2452
2453unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2454  if (Subtarget.isAIXABI())
2455    report_fatal_error("BasePointer is not implemented on AIX yet.");
2456  return BasePointerSaveOffset;
2457}
2458
2459bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2460  if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2461    return false;
2462  return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2463          MF.getSubtarget<PPCSubtarget>().isPPC64());
2464}
2465