1234353Sdim//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2218885Sdim// 3218885Sdim// The LLVM Compiler Infrastructure 4218885Sdim// 5218885Sdim// This file is distributed under the University of Illinois Open Source 6218885Sdim// License. See LICENSE.TXT for details. 7218885Sdim// 8218885Sdim//===----------------------------------------------------------------------===// 9218885Sdim// 10218885Sdim// This file contains the X86 implementation of TargetFrameLowering class. 11218885Sdim// 12218885Sdim//===----------------------------------------------------------------------===// 13218885Sdim 14218885Sdim#include "X86FrameLowering.h" 15218885Sdim#include "X86InstrBuilder.h" 16218885Sdim#include "X86InstrInfo.h" 17218885Sdim#include "X86MachineFunctionInfo.h" 18226633Sdim#include "X86Subtarget.h" 19218885Sdim#include "X86TargetMachine.h" 20249423Sdim#include "llvm/ADT/SmallSet.h" 21218885Sdim#include "llvm/CodeGen/MachineFrameInfo.h" 22218885Sdim#include "llvm/CodeGen/MachineFunction.h" 23218885Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 24218885Sdim#include "llvm/CodeGen/MachineModuleInfo.h" 25218885Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 26249423Sdim#include "llvm/IR/DataLayout.h" 27249423Sdim#include "llvm/IR/Function.h" 28221345Sdim#include "llvm/MC/MCAsmInfo.h" 29224145Sdim#include "llvm/MC/MCSymbol.h" 30249423Sdim#include "llvm/Support/CommandLine.h" 31218885Sdim#include "llvm/Target/TargetOptions.h" 32218885Sdim 33218885Sdimusing namespace llvm; 34218885Sdim 35218885Sdim// FIXME: completely move here. 36218885Sdimextern cl::opt<bool> ForceStackAlign; 37218885Sdim 38218885Sdimbool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 39218885Sdim return !MF.getFrameInfo()->hasVarSizedObjects(); 40218885Sdim} 41218885Sdim 42218885Sdim/// hasFP - Return true if the specified function should have a dedicated frame 43218885Sdim/// pointer register. This is true if the function has variable sized allocas 44218885Sdim/// or if frame pointer elimination is disabled. 45218885Sdimbool X86FrameLowering::hasFP(const MachineFunction &MF) const { 46218885Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 47218885Sdim const MachineModuleInfo &MMI = MF.getMMI(); 48239462Sdim const TargetRegisterInfo *RegInfo = TM.getRegisterInfo(); 49218885Sdim 50234353Sdim return (MF.getTarget().Options.DisableFramePointerElim(MF) || 51239462Sdim RegInfo->needsStackRealignment(MF) || 52218885Sdim MFI->hasVarSizedObjects() || 53263765Sdim MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() || 54218885Sdim MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 55239462Sdim MMI.callsUnwindInit() || MMI.callsEHReturn()); 56218885Sdim} 57218885Sdim 58249423Sdimstatic unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) { 59249423Sdim if (IsLP64) { 60218885Sdim if (isInt<8>(Imm)) 61218885Sdim return X86::SUB64ri8; 62218885Sdim return X86::SUB64ri32; 63218885Sdim } else { 64218885Sdim if (isInt<8>(Imm)) 65218885Sdim return X86::SUB32ri8; 66218885Sdim return X86::SUB32ri; 67218885Sdim } 68218885Sdim} 69218885Sdim 70249423Sdimstatic unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) { 71249423Sdim if (IsLP64) { 72218885Sdim if (isInt<8>(Imm)) 73218885Sdim return X86::ADD64ri8; 74218885Sdim return X86::ADD64ri32; 75218885Sdim } else { 76218885Sdim if (isInt<8>(Imm)) 77218885Sdim return X86::ADD32ri8; 78218885Sdim return X86::ADD32ri; 79218885Sdim } 80218885Sdim} 81218885Sdim 82249423Sdimstatic unsigned getLEArOpcode(unsigned IsLP64) { 83249423Sdim return IsLP64 ? X86::LEA64r : X86::LEA32r; 84234353Sdim} 85234353Sdim 86218885Sdim/// findDeadCallerSavedReg - Return a caller-saved register that isn't live 87218885Sdim/// when it reaches the "return" instruction. We can then pop a stack object 88218885Sdim/// to this register without worry about clobbering it. 89218885Sdimstatic unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, 90218885Sdim MachineBasicBlock::iterator &MBBI, 91218885Sdim const TargetRegisterInfo &TRI, 92218885Sdim bool Is64Bit) { 93218885Sdim const MachineFunction *MF = MBB.getParent(); 94218885Sdim const Function *F = MF->getFunction(); 95218885Sdim if (!F || MF->getMMI().callsEHReturn()) 96218885Sdim return 0; 97218885Sdim 98234353Sdim static const uint16_t CallerSavedRegs32Bit[] = { 99226633Sdim X86::EAX, X86::EDX, X86::ECX, 0 100218885Sdim }; 101218885Sdim 102234353Sdim static const uint16_t CallerSavedRegs64Bit[] = { 103218885Sdim X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI, 104226633Sdim X86::R8, X86::R9, X86::R10, X86::R11, 0 105218885Sdim }; 106218885Sdim 107218885Sdim unsigned Opc = MBBI->getOpcode(); 108218885Sdim switch (Opc) { 109218885Sdim default: return 0; 110218885Sdim case X86::RET: 111218885Sdim case X86::RETI: 112218885Sdim case X86::TCRETURNdi: 113218885Sdim case X86::TCRETURNri: 114218885Sdim case X86::TCRETURNmi: 115218885Sdim case X86::TCRETURNdi64: 116218885Sdim case X86::TCRETURNri64: 117218885Sdim case X86::TCRETURNmi64: 118218885Sdim case X86::EH_RETURN: 119218885Sdim case X86::EH_RETURN64: { 120234353Sdim SmallSet<uint16_t, 8> Uses; 121218885Sdim for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) { 122218885Sdim MachineOperand &MO = MBBI->getOperand(i); 123218885Sdim if (!MO.isReg() || MO.isDef()) 124218885Sdim continue; 125218885Sdim unsigned Reg = MO.getReg(); 126218885Sdim if (!Reg) 127218885Sdim continue; 128239462Sdim for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) 129239462Sdim Uses.insert(*AI); 130218885Sdim } 131218885Sdim 132234353Sdim const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit; 133218885Sdim for (; *CS; ++CS) 134218885Sdim if (!Uses.count(*CS)) 135218885Sdim return *CS; 136218885Sdim } 137218885Sdim } 138218885Sdim 139218885Sdim return 0; 140218885Sdim} 141218885Sdim 142218885Sdim 143218885Sdim/// emitSPUpdate - Emit a series of instructions to increment / decrement the 144218885Sdim/// stack pointer by a constant value. 145218885Sdimstatic 146218885Sdimvoid emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 147218885Sdim unsigned StackPtr, int64_t NumBytes, 148249423Sdim bool Is64Bit, bool IsLP64, bool UseLEA, 149234353Sdim const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) { 150218885Sdim bool isSub = NumBytes < 0; 151218885Sdim uint64_t Offset = isSub ? -NumBytes : NumBytes; 152234353Sdim unsigned Opc; 153234353Sdim if (UseLEA) 154249423Sdim Opc = getLEArOpcode(IsLP64); 155234353Sdim else 156234353Sdim Opc = isSub 157249423Sdim ? getSUBriOpcode(IsLP64, Offset) 158249423Sdim : getADDriOpcode(IsLP64, Offset); 159234353Sdim 160218885Sdim uint64_t Chunk = (1LL << 31) - 1; 161218885Sdim DebugLoc DL = MBB.findDebugLoc(MBBI); 162218885Sdim 163218885Sdim while (Offset) { 164218885Sdim uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; 165218885Sdim if (ThisVal == (Is64Bit ? 8 : 4)) { 166218885Sdim // Use push / pop instead. 167218885Sdim unsigned Reg = isSub 168218885Sdim ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 169218885Sdim : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit); 170218885Sdim if (Reg) { 171218885Sdim Opc = isSub 172218885Sdim ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 173218885Sdim : (Is64Bit ? X86::POP64r : X86::POP32r); 174224145Sdim MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc)) 175218885Sdim .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)); 176224145Sdim if (isSub) 177224145Sdim MI->setFlag(MachineInstr::FrameSetup); 178218885Sdim Offset -= ThisVal; 179218885Sdim continue; 180218885Sdim } 181218885Sdim } 182218885Sdim 183234353Sdim MachineInstr *MI = NULL; 184234353Sdim 185234353Sdim if (UseLEA) { 186234353Sdim MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 187234353Sdim StackPtr, false, isSub ? -ThisVal : ThisVal); 188234353Sdim } else { 189234353Sdim MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 190234353Sdim .addReg(StackPtr) 191234353Sdim .addImm(ThisVal); 192234353Sdim MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 193234353Sdim } 194234353Sdim 195224145Sdim if (isSub) 196224145Sdim MI->setFlag(MachineInstr::FrameSetup); 197234353Sdim 198218885Sdim Offset -= ThisVal; 199218885Sdim } 200218885Sdim} 201218885Sdim 202218885Sdim/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. 203218885Sdimstatic 204218885Sdimvoid mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 205218885Sdim unsigned StackPtr, uint64_t *NumBytes = NULL) { 206218885Sdim if (MBBI == MBB.begin()) return; 207218885Sdim 208218885Sdim MachineBasicBlock::iterator PI = prior(MBBI); 209218885Sdim unsigned Opc = PI->getOpcode(); 210218885Sdim if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 211234353Sdim Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 212234353Sdim Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 213218885Sdim PI->getOperand(0).getReg() == StackPtr) { 214218885Sdim if (NumBytes) 215218885Sdim *NumBytes += PI->getOperand(2).getImm(); 216218885Sdim MBB.erase(PI); 217218885Sdim } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 218218885Sdim Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 219218885Sdim PI->getOperand(0).getReg() == StackPtr) { 220218885Sdim if (NumBytes) 221218885Sdim *NumBytes -= PI->getOperand(2).getImm(); 222218885Sdim MBB.erase(PI); 223218885Sdim } 224218885Sdim} 225218885Sdim 226218885Sdim/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. 227218885Sdimstatic 228218885Sdimvoid mergeSPUpdatesDown(MachineBasicBlock &MBB, 229218885Sdim MachineBasicBlock::iterator &MBBI, 230218885Sdim unsigned StackPtr, uint64_t *NumBytes = NULL) { 231234353Sdim // FIXME: THIS ISN'T RUN!!! 232218885Sdim return; 233218885Sdim 234218885Sdim if (MBBI == MBB.end()) return; 235218885Sdim 236218885Sdim MachineBasicBlock::iterator NI = llvm::next(MBBI); 237218885Sdim if (NI == MBB.end()) return; 238218885Sdim 239218885Sdim unsigned Opc = NI->getOpcode(); 240218885Sdim if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 241218885Sdim Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 242218885Sdim NI->getOperand(0).getReg() == StackPtr) { 243218885Sdim if (NumBytes) 244218885Sdim *NumBytes -= NI->getOperand(2).getImm(); 245218885Sdim MBB.erase(NI); 246218885Sdim MBBI = NI; 247218885Sdim } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 248218885Sdim Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 249218885Sdim NI->getOperand(0).getReg() == StackPtr) { 250218885Sdim if (NumBytes) 251218885Sdim *NumBytes += NI->getOperand(2).getImm(); 252218885Sdim MBB.erase(NI); 253218885Sdim MBBI = NI; 254218885Sdim } 255218885Sdim} 256218885Sdim 257218885Sdim/// mergeSPUpdates - Checks the instruction before/after the passed 258234353Sdim/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the 259234353Sdim/// stack adjustment is returned as a positive value for ADD/LEA and a negative for 260218885Sdim/// SUB. 261218885Sdimstatic int mergeSPUpdates(MachineBasicBlock &MBB, 262218885Sdim MachineBasicBlock::iterator &MBBI, 263218885Sdim unsigned StackPtr, 264218885Sdim bool doMergeWithPrevious) { 265218885Sdim if ((doMergeWithPrevious && MBBI == MBB.begin()) || 266218885Sdim (!doMergeWithPrevious && MBBI == MBB.end())) 267218885Sdim return 0; 268218885Sdim 269218885Sdim MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; 270218885Sdim MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI); 271218885Sdim unsigned Opc = PI->getOpcode(); 272218885Sdim int Offset = 0; 273218885Sdim 274218885Sdim if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 275234353Sdim Opc == X86::ADD32ri || Opc == X86::ADD32ri8 || 276234353Sdim Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 277218885Sdim PI->getOperand(0).getReg() == StackPtr){ 278218885Sdim Offset += PI->getOperand(2).getImm(); 279218885Sdim MBB.erase(PI); 280218885Sdim if (!doMergeWithPrevious) MBBI = NI; 281218885Sdim } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 282218885Sdim Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 283218885Sdim PI->getOperand(0).getReg() == StackPtr) { 284218885Sdim Offset -= PI->getOperand(2).getImm(); 285218885Sdim MBB.erase(PI); 286218885Sdim if (!doMergeWithPrevious) MBBI = NI; 287218885Sdim } 288218885Sdim 289218885Sdim return Offset; 290218885Sdim} 291218885Sdim 292218885Sdimstatic bool isEAXLiveIn(MachineFunction &MF) { 293218885Sdim for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), 294218885Sdim EE = MF.getRegInfo().livein_end(); II != EE; ++II) { 295218885Sdim unsigned Reg = II->first; 296218885Sdim 297218885Sdim if (Reg == X86::EAX || Reg == X86::AX || 298218885Sdim Reg == X86::AH || Reg == X86::AL) 299218885Sdim return true; 300218885Sdim } 301218885Sdim 302218885Sdim return false; 303218885Sdim} 304218885Sdim 305218885Sdimvoid X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, 306226633Sdim MCSymbol *Label, 307226633Sdim unsigned FramePtr) const { 308218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 309218885Sdim MachineModuleInfo &MMI = MF.getMMI(); 310263508Sdim const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 311218885Sdim 312218885Sdim // Add callee saved registers to move list. 313218885Sdim const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 314218885Sdim if (CSI.empty()) return; 315218885Sdim 316243830Sdim const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 317218885Sdim bool HasFP = hasFP(MF); 318218885Sdim 319218885Sdim // Calculate amount of bytes used for return address storing. 320243830Sdim int stackGrowth = -RegInfo->getSlotSize(); 321218885Sdim 322218885Sdim // FIXME: This is dirty hack. The code itself is pretty mess right now. 323218885Sdim // It should be rewritten from scratch and generalized sometimes. 324218885Sdim 325221345Sdim // Determine maximum offset (minimum due to stack growth). 326218885Sdim int64_t MaxOffset = 0; 327218885Sdim for (std::vector<CalleeSavedInfo>::const_iterator 328218885Sdim I = CSI.begin(), E = CSI.end(); I != E; ++I) 329218885Sdim MaxOffset = std::min(MaxOffset, 330218885Sdim MFI->getObjectOffset(I->getFrameIdx())); 331218885Sdim 332218885Sdim // Calculate offsets. 333218885Sdim int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; 334218885Sdim for (std::vector<CalleeSavedInfo>::const_iterator 335218885Sdim I = CSI.begin(), E = CSI.end(); I != E; ++I) { 336218885Sdim int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); 337218885Sdim unsigned Reg = I->getReg(); 338218885Sdim Offset = MaxOffset - Offset + saveAreaOffset; 339218885Sdim 340218885Sdim // Don't output a new machine move if we're re-saving the frame 341218885Sdim // pointer. This happens when the PrologEpilogInserter has inserted an extra 342218885Sdim // "PUSH" of the frame pointer -- the "emitPrologue" method automatically 343218885Sdim // generates one when frame pointers are used. If we generate a "machine 344218885Sdim // move" for this extra "PUSH", the linker will lose track of the fact that 345218885Sdim // the frame pointer should have the value of the first "PUSH" when it's 346218885Sdim // trying to unwind. 347218885Sdim // 348218885Sdim // FIXME: This looks inelegant. It's possibly correct, but it's covering up 349218885Sdim // another bug. I.e., one where we generate a prolog like this: 350218885Sdim // 351218885Sdim // pushl %ebp 352218885Sdim // movl %esp, %ebp 353218885Sdim // pushl %ebp 354218885Sdim // pushl %esi 355218885Sdim // ... 356218885Sdim // 357218885Sdim // The immediate re-push of EBP is unnecessary. At the least, it's an 358218885Sdim // optimization bug. EBP can be used as a scratch register in certain 359218885Sdim // cases, but probably not when we have a frame pointer. 360218885Sdim if (HasFP && FramePtr == Reg) 361218885Sdim continue; 362218885Sdim 363263508Sdim unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 364263508Sdim MMI.addFrameInst(MCCFIInstruction::createOffset(Label, DwarfReg, Offset)); 365218885Sdim } 366218885Sdim} 367218885Sdim 368249423Sdim/// usesTheStack - This function checks if any of the users of EFLAGS 369249423Sdim/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has 370249423Sdim/// to use the stack, and if we don't adjust the stack we clobber the first 371249423Sdim/// frame index. 372249423Sdim/// See X86InstrInfo::copyPhysReg. 373263508Sdimstatic bool usesTheStack(const MachineFunction &MF) { 374263508Sdim const MachineRegisterInfo &MRI = MF.getRegInfo(); 375249423Sdim 376249423Sdim for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), 377249423Sdim re = MRI.reg_end(); ri != re; ++ri) 378249423Sdim if (ri->isCopy()) 379249423Sdim return true; 380249423Sdim 381249423Sdim return false; 382249423Sdim} 383249423Sdim 384218885Sdim/// emitPrologue - Push callee-saved registers onto the stack, which 385218885Sdim/// automatically adjust the stack pointer. Adjust the stack pointer to allocate 386218885Sdim/// space for local variables. Also emit labels used by the exception handler to 387218885Sdim/// generate the exception handling frames. 388218885Sdimvoid X86FrameLowering::emitPrologue(MachineFunction &MF) const { 389218885Sdim MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. 390218885Sdim MachineBasicBlock::iterator MBBI = MBB.begin(); 391218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 392218885Sdim const Function *Fn = MF.getFunction(); 393218885Sdim const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 394218885Sdim const X86InstrInfo &TII = *TM.getInstrInfo(); 395218885Sdim MachineModuleInfo &MMI = MF.getMMI(); 396218885Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 397218885Sdim bool needsFrameMoves = MMI.hasDebugInfo() || 398223017Sdim Fn->needsUnwindTableEntry(); 399218885Sdim uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. 400218885Sdim uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. 401218885Sdim bool HasFP = hasFP(MF); 402218885Sdim bool Is64Bit = STI.is64Bit(); 403249423Sdim bool IsLP64 = STI.isTarget64BitLP64(); 404218885Sdim bool IsWin64 = STI.isTargetWin64(); 405234353Sdim bool UseLEA = STI.useLeaForSP(); 406218885Sdim unsigned StackAlign = getStackAlignment(); 407218885Sdim unsigned SlotSize = RegInfo->getSlotSize(); 408218885Sdim unsigned FramePtr = RegInfo->getFrameRegister(MF); 409218885Sdim unsigned StackPtr = RegInfo->getStackRegister(); 410239462Sdim unsigned BasePtr = RegInfo->getBaseRegister(); 411218885Sdim DebugLoc DL; 412218885Sdim 413218885Sdim // If we're forcing a stack realignment we can't rely on just the frame 414218885Sdim // info, we need to know the ABI stack alignment as well in case we 415218885Sdim // have a call out. Otherwise just make sure we have some alignment - we'll 416218885Sdim // go with the minimum SlotSize. 417218885Sdim if (ForceStackAlign) { 418218885Sdim if (MFI->hasCalls()) 419218885Sdim MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 420218885Sdim else if (MaxAlign < SlotSize) 421218885Sdim MaxAlign = SlotSize; 422218885Sdim } 423218885Sdim 424218885Sdim // Add RETADDR move area to callee saved frame size. 425218885Sdim int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 426218885Sdim if (TailCallReturnAddrDelta < 0) 427218885Sdim X86FI->setCalleeSavedFrameSize( 428218885Sdim X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); 429218885Sdim 430218885Sdim // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 431218885Sdim // function, and use up to 128 bytes of stack space, don't have a frame 432218885Sdim // pointer, calls, or dynamic alloca then we do not need to adjust the 433249423Sdim // stack pointer (we fit in the Red Zone). We also check that we don't 434249423Sdim // push and pop from the stack. 435249423Sdim if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 436249423Sdim Attribute::NoRedZone) && 437218885Sdim !RegInfo->needsStackRealignment(MF) && 438234353Sdim !MFI->hasVarSizedObjects() && // No dynamic alloca. 439234353Sdim !MFI->adjustsStack() && // No calls. 440234353Sdim !IsWin64 && // Win64 has no Red Zone 441249423Sdim !usesTheStack(MF) && // Don't push and pop. 442234353Sdim !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack 443218885Sdim uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); 444218885Sdim if (HasFP) MinSize += SlotSize; 445218885Sdim StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 446218885Sdim MFI->setStackSize(StackSize); 447218885Sdim } 448218885Sdim 449218885Sdim // Insert stack pointer adjustment for later moving of return addr. Only 450218885Sdim // applies to tail call optimized functions where the callee argument stack 451218885Sdim // size is bigger than the callers. 452218885Sdim if (TailCallReturnAddrDelta < 0) { 453218885Sdim MachineInstr *MI = 454218885Sdim BuildMI(MBB, MBBI, DL, 455249423Sdim TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)), 456218885Sdim StackPtr) 457218885Sdim .addReg(StackPtr) 458224145Sdim .addImm(-TailCallReturnAddrDelta) 459224145Sdim .setMIFlag(MachineInstr::FrameSetup); 460218885Sdim MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 461218885Sdim } 462218885Sdim 463218885Sdim // Mapping for machine moves: 464218885Sdim // 465218885Sdim // DST: VirtualFP AND 466218885Sdim // SRC: VirtualFP => DW_CFA_def_cfa_offset 467218885Sdim // ELSE => DW_CFA_def_cfa 468218885Sdim // 469218885Sdim // SRC: VirtualFP AND 470218885Sdim // DST: Register => DW_CFA_def_cfa_register 471218885Sdim // 472218885Sdim // ELSE 473218885Sdim // OFFSET < 0 => DW_CFA_offset_extended_sf 474218885Sdim // REG < 64 => DW_CFA_offset + Reg 475218885Sdim // ELSE => DW_CFA_offset_extended 476218885Sdim 477218885Sdim uint64_t NumBytes = 0; 478243830Sdim int stackGrowth = -SlotSize; 479218885Sdim 480218885Sdim if (HasFP) { 481218885Sdim // Calculate required stack adjustment. 482218885Sdim uint64_t FrameSize = StackSize - SlotSize; 483239462Sdim if (RegInfo->needsStackRealignment(MF)) { 484239462Sdim // Callee-saved registers are pushed on stack before the stack 485239462Sdim // is realigned. 486239462Sdim FrameSize -= X86FI->getCalleeSavedFrameSize(); 487239462Sdim NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 488239462Sdim } else { 489239462Sdim NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); 490239462Sdim } 491218885Sdim 492218885Sdim // Get the offset of the stack slot for the EBP register, which is 493218885Sdim // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. 494218885Sdim // Update the frame offset adjustment. 495218885Sdim MFI->setOffsetAdjustment(-NumBytes); 496218885Sdim 497218885Sdim // Save EBP/RBP into the appropriate stack slot. 498218885Sdim BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 499224145Sdim .addReg(FramePtr, RegState::Kill) 500224145Sdim .setMIFlag(MachineInstr::FrameSetup); 501218885Sdim 502218885Sdim if (needsFrameMoves) { 503218885Sdim // Mark the place where EBP/RBP was saved. 504218885Sdim MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 505226633Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 506226633Sdim .addSym(FrameLabel); 507218885Sdim 508218885Sdim // Define the current CFA rule to use the provided offset. 509263508Sdim assert(StackSize); 510263508Sdim MMI.addFrameInst( 511263508Sdim MCCFIInstruction::createDefCfaOffset(FrameLabel, 2 * stackGrowth)); 512218885Sdim 513218885Sdim // Change the rule for the FramePtr to be an "offset" rule. 514263508Sdim unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); 515263508Sdim MMI.addFrameInst(MCCFIInstruction::createOffset(FrameLabel, DwarfFramePtr, 516263508Sdim 2 * stackGrowth)); 517218885Sdim } 518218885Sdim 519226633Sdim // Update EBP with the new base value. 520218885Sdim BuildMI(MBB, MBBI, DL, 521218885Sdim TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) 522224145Sdim .addReg(StackPtr) 523224145Sdim .setMIFlag(MachineInstr::FrameSetup); 524218885Sdim 525218885Sdim if (needsFrameMoves) { 526218885Sdim // Mark effective beginning of when frame pointer becomes valid. 527218885Sdim MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); 528226633Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 529226633Sdim .addSym(FrameLabel); 530218885Sdim 531218885Sdim // Define the current CFA to use the EBP/RBP register. 532263508Sdim unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); 533263508Sdim MMI.addFrameInst( 534263508Sdim MCCFIInstruction::createDefCfaRegister(FrameLabel, DwarfFramePtr)); 535218885Sdim } 536218885Sdim 537218885Sdim // Mark the FramePtr as live-in in every block except the entry. 538218885Sdim for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); 539218885Sdim I != E; ++I) 540218885Sdim I->addLiveIn(FramePtr); 541218885Sdim } else { 542218885Sdim NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); 543218885Sdim } 544218885Sdim 545218885Sdim // Skip the callee-saved push instructions. 546218885Sdim bool PushedRegs = false; 547218885Sdim int StackOffset = 2 * stackGrowth; 548218885Sdim 549218885Sdim while (MBBI != MBB.end() && 550218885Sdim (MBBI->getOpcode() == X86::PUSH32r || 551218885Sdim MBBI->getOpcode() == X86::PUSH64r)) { 552218885Sdim PushedRegs = true; 553226633Sdim MBBI->setFlag(MachineInstr::FrameSetup); 554218885Sdim ++MBBI; 555218885Sdim 556218885Sdim if (!HasFP && needsFrameMoves) { 557218885Sdim // Mark callee-saved push instruction. 558218885Sdim MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 559218885Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); 560218885Sdim 561218885Sdim // Define the current CFA rule to use the provided offset. 562263508Sdim assert(StackSize); 563263508Sdim MMI.addFrameInst( 564263508Sdim MCCFIInstruction::createDefCfaOffset(Label, StackOffset)); 565218885Sdim StackOffset += stackGrowth; 566218885Sdim } 567218885Sdim } 568218885Sdim 569239462Sdim // Realign stack after we pushed callee-saved registers (so that we'll be 570239462Sdim // able to calculate their offsets from the frame pointer). 571239462Sdim 572239462Sdim // NOTE: We push the registers before realigning the stack, so 573239462Sdim // vector callee-saved (xmm) registers may be saved w/o proper 574239462Sdim // alignment in this way. However, currently these regs are saved in 575239462Sdim // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so 576239462Sdim // this shouldn't be a problem. 577239462Sdim if (RegInfo->needsStackRealignment(MF)) { 578239462Sdim assert(HasFP && "There should be a frame pointer if stack is realigned."); 579239462Sdim MachineInstr *MI = 580239462Sdim BuildMI(MBB, MBBI, DL, 581239462Sdim TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr) 582239462Sdim .addReg(StackPtr) 583239462Sdim .addImm(-MaxAlign) 584239462Sdim .setMIFlag(MachineInstr::FrameSetup); 585239462Sdim 586239462Sdim // The EFLAGS implicit def is dead. 587239462Sdim MI->getOperand(3).setIsDead(); 588239462Sdim } 589239462Sdim 590218885Sdim // If there is an SUB32ri of ESP immediately before this instruction, merge 591218885Sdim // the two. This can be the case when tail call elimination is enabled and 592218885Sdim // the callee has more arguments then the caller. 593218885Sdim NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); 594218885Sdim 595218885Sdim // If there is an ADD32ri or SUB32ri of ESP immediately after this 596218885Sdim // instruction, merge the two instructions. 597218885Sdim mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); 598218885Sdim 599218885Sdim // Adjust stack pointer: ESP -= numbytes. 600218885Sdim 601218885Sdim // Windows and cygwin/mingw require a prologue helper routine when allocating 602218885Sdim // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 603218885Sdim // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 604218885Sdim // stack and adjust the stack pointer in one go. The 64-bit version of 605218885Sdim // __chkstk is only responsible for probing the stack. The 64-bit prologue is 606218885Sdim // responsible for adjusting the stack pointer. Touching the stack at 4K 607218885Sdim // increments is necessary to ensure that the guard pages used by the OS 608218885Sdim // virtual memory manager are allocated in correct sequence. 609263508Sdim if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetEnvMacho()) { 610221345Sdim const char *StackProbeSymbol; 611221345Sdim bool isSPUpdateNeeded = false; 612221345Sdim 613221345Sdim if (Is64Bit) { 614221345Sdim if (STI.isTargetCygMing()) 615221345Sdim StackProbeSymbol = "___chkstk"; 616221345Sdim else { 617221345Sdim StackProbeSymbol = "__chkstk"; 618221345Sdim isSPUpdateNeeded = true; 619221345Sdim } 620221345Sdim } else if (STI.isTargetCygMing()) 621221345Sdim StackProbeSymbol = "_alloca"; 622221345Sdim else 623221345Sdim StackProbeSymbol = "_chkstk"; 624221345Sdim 625218885Sdim // Check whether EAX is livein for this function. 626218885Sdim bool isEAXAlive = isEAXLiveIn(MF); 627218885Sdim 628221345Sdim if (isEAXAlive) { 629221345Sdim // Sanity check that EAX is not livein for this function. 630221345Sdim // It should not be, so throw an assert. 631221345Sdim assert(!Is64Bit && "EAX is livein in x64 case!"); 632221345Sdim 633218885Sdim // Save EAX 634218885Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 635226633Sdim .addReg(X86::EAX, RegState::Kill) 636226633Sdim .setMIFlag(MachineInstr::FrameSetup); 637221345Sdim } 638218885Sdim 639221345Sdim if (Is64Bit) { 640221345Sdim // Handle the 64-bit Windows ABI case where we need to call __chkstk. 641221345Sdim // Function prologue is responsible for adjusting the stack pointer. 642221345Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) 643226633Sdim .addImm(NumBytes) 644226633Sdim .setMIFlag(MachineInstr::FrameSetup); 645221345Sdim } else { 646221345Sdim // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 647221345Sdim // We'll also use 4 already allocated bytes for EAX. 648218885Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 649226633Sdim .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 650226633Sdim .setMIFlag(MachineInstr::FrameSetup); 651221345Sdim } 652218885Sdim 653221345Sdim BuildMI(MBB, MBBI, DL, 654221345Sdim TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) 655221345Sdim .addExternalSymbol(StackProbeSymbol) 656221345Sdim .addReg(StackPtr, RegState::Define | RegState::Implicit) 657226633Sdim .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) 658226633Sdim .setMIFlag(MachineInstr::FrameSetup); 659221345Sdim 660263508Sdim // MSVC x64's __chkstk does not adjust %rsp itself. 661263508Sdim // It also does not clobber %rax so we can reuse it when adjusting %rsp. 662263508Sdim if (isSPUpdateNeeded) { 663263508Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) 664263508Sdim .addReg(StackPtr) 665263508Sdim .addReg(X86::RAX) 666263508Sdim .setMIFlag(MachineInstr::FrameSetup); 667263508Sdim } 668221345Sdim 669221345Sdim if (isEAXAlive) { 670221345Sdim // Restore EAX 671221345Sdim MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), 672221345Sdim X86::EAX), 673221345Sdim StackPtr, false, NumBytes - 4); 674226633Sdim MI->setFlag(MachineInstr::FrameSetup); 675221345Sdim MBB.insert(MBBI, MI); 676218885Sdim } 677218885Sdim } else if (NumBytes) 678249423Sdim emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, 679234353Sdim UseLEA, TII, *RegInfo); 680218885Sdim 681239462Sdim // If we need a base pointer, set it up here. It's whatever the value 682239462Sdim // of the stack pointer is at this point. Any variable size objects 683239462Sdim // will be allocated after this, so we can still use the base pointer 684239462Sdim // to reference locals. 685239462Sdim if (RegInfo->hasBasePointer(MF)) { 686239462Sdim // Update the frame pointer with the current stack pointer. 687239462Sdim unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; 688239462Sdim BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 689239462Sdim .addReg(StackPtr) 690239462Sdim .setMIFlag(MachineInstr::FrameSetup); 691239462Sdim } 692239462Sdim 693221345Sdim if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { 694218885Sdim // Mark end of stack pointer adjustment. 695218885Sdim MCSymbol *Label = MMI.getContext().CreateTempSymbol(); 696226633Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)) 697226633Sdim .addSym(Label); 698218885Sdim 699218885Sdim if (!HasFP && NumBytes) { 700218885Sdim // Define the current CFA rule to use the provided offset. 701263508Sdim assert(StackSize); 702263508Sdim MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset( 703263508Sdim Label, -StackSize + stackGrowth)); 704218885Sdim } 705218885Sdim 706218885Sdim // Emit DWARF info specifying the offsets of the callee-saved registers. 707218885Sdim if (PushedRegs) 708218885Sdim emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr); 709218885Sdim } 710218885Sdim} 711218885Sdim 712218885Sdimvoid X86FrameLowering::emitEpilogue(MachineFunction &MF, 713224145Sdim MachineBasicBlock &MBB) const { 714218885Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 715218885Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 716218885Sdim const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 717218885Sdim const X86InstrInfo &TII = *TM.getInstrInfo(); 718218885Sdim MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 719218885Sdim assert(MBBI != MBB.end() && "Returning block has no instructions"); 720218885Sdim unsigned RetOpcode = MBBI->getOpcode(); 721218885Sdim DebugLoc DL = MBBI->getDebugLoc(); 722218885Sdim bool Is64Bit = STI.is64Bit(); 723249423Sdim bool IsLP64 = STI.isTarget64BitLP64(); 724234353Sdim bool UseLEA = STI.useLeaForSP(); 725218885Sdim unsigned StackAlign = getStackAlignment(); 726218885Sdim unsigned SlotSize = RegInfo->getSlotSize(); 727218885Sdim unsigned FramePtr = RegInfo->getFrameRegister(MF); 728218885Sdim unsigned StackPtr = RegInfo->getStackRegister(); 729218885Sdim 730218885Sdim switch (RetOpcode) { 731218885Sdim default: 732218885Sdim llvm_unreachable("Can only insert epilog into returning blocks"); 733218885Sdim case X86::RET: 734218885Sdim case X86::RETI: 735218885Sdim case X86::TCRETURNdi: 736218885Sdim case X86::TCRETURNri: 737218885Sdim case X86::TCRETURNmi: 738218885Sdim case X86::TCRETURNdi64: 739218885Sdim case X86::TCRETURNri64: 740218885Sdim case X86::TCRETURNmi64: 741218885Sdim case X86::EH_RETURN: 742218885Sdim case X86::EH_RETURN64: 743218885Sdim break; // These are ok 744218885Sdim } 745218885Sdim 746218885Sdim // Get the number of bytes to allocate from the FrameInfo. 747218885Sdim uint64_t StackSize = MFI->getStackSize(); 748218885Sdim uint64_t MaxAlign = MFI->getMaxAlignment(); 749218885Sdim unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 750218885Sdim uint64_t NumBytes = 0; 751218885Sdim 752218885Sdim // If we're forcing a stack realignment we can't rely on just the frame 753218885Sdim // info, we need to know the ABI stack alignment as well in case we 754218885Sdim // have a call out. Otherwise just make sure we have some alignment - we'll 755218885Sdim // go with the minimum. 756218885Sdim if (ForceStackAlign) { 757218885Sdim if (MFI->hasCalls()) 758218885Sdim MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 759218885Sdim else 760218885Sdim MaxAlign = MaxAlign ? MaxAlign : 4; 761218885Sdim } 762218885Sdim 763218885Sdim if (hasFP(MF)) { 764218885Sdim // Calculate required stack adjustment. 765218885Sdim uint64_t FrameSize = StackSize - SlotSize; 766239462Sdim if (RegInfo->needsStackRealignment(MF)) { 767239462Sdim // Callee-saved registers were pushed on stack before the stack 768239462Sdim // was realigned. 769239462Sdim FrameSize -= CSSize; 770239462Sdim NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; 771239462Sdim } else { 772239462Sdim NumBytes = FrameSize - CSSize; 773239462Sdim } 774218885Sdim 775218885Sdim // Pop EBP. 776218885Sdim BuildMI(MBB, MBBI, DL, 777218885Sdim TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); 778218885Sdim } else { 779218885Sdim NumBytes = StackSize - CSSize; 780218885Sdim } 781218885Sdim 782218885Sdim // Skip the callee-saved pop instructions. 783218885Sdim while (MBBI != MBB.begin()) { 784218885Sdim MachineBasicBlock::iterator PI = prior(MBBI); 785218885Sdim unsigned Opc = PI->getOpcode(); 786218885Sdim 787218885Sdim if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && 788234353Sdim !PI->isTerminator()) 789218885Sdim break; 790218885Sdim 791218885Sdim --MBBI; 792218885Sdim } 793239462Sdim MachineBasicBlock::iterator FirstCSPop = MBBI; 794218885Sdim 795218885Sdim DL = MBBI->getDebugLoc(); 796218885Sdim 797218885Sdim // If there is an ADD32ri or SUB32ri of ESP immediately before this 798218885Sdim // instruction, merge the two instructions. 799218885Sdim if (NumBytes || MFI->hasVarSizedObjects()) 800218885Sdim mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); 801218885Sdim 802218885Sdim // If dynamic alloca is used, then reset esp to point to the last callee-saved 803218885Sdim // slot before popping them off! Same applies for the case, when stack was 804218885Sdim // realigned. 805239462Sdim if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) { 806239462Sdim if (RegInfo->needsStackRealignment(MF)) 807239462Sdim MBBI = FirstCSPop; 808239462Sdim if (CSSize != 0) { 809249423Sdim unsigned Opc = getLEArOpcode(IsLP64); 810239462Sdim addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 811239462Sdim FramePtr, false, -CSSize); 812218885Sdim } else { 813239462Sdim unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr); 814239462Sdim BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 815218885Sdim .addReg(FramePtr); 816218885Sdim } 817218885Sdim } else if (NumBytes) { 818218885Sdim // Adjust stack pointer back: ESP += numbytes. 819249423Sdim emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA, 820249423Sdim TII, *RegInfo); 821218885Sdim } 822218885Sdim 823218885Sdim // We're returning from function via eh_return. 824218885Sdim if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) { 825218885Sdim MBBI = MBB.getLastNonDebugInstr(); 826218885Sdim MachineOperand &DestAddr = MBBI->getOperand(0); 827218885Sdim assert(DestAddr.isReg() && "Offset should be in register!"); 828218885Sdim BuildMI(MBB, MBBI, DL, 829218885Sdim TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), 830218885Sdim StackPtr).addReg(DestAddr.getReg()); 831218885Sdim } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || 832218885Sdim RetOpcode == X86::TCRETURNmi || 833218885Sdim RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || 834218885Sdim RetOpcode == X86::TCRETURNmi64) { 835218885Sdim bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; 836218885Sdim // Tail call return: adjust the stack pointer and jump to callee. 837218885Sdim MBBI = MBB.getLastNonDebugInstr(); 838218885Sdim MachineOperand &JumpTarget = MBBI->getOperand(0); 839218885Sdim MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1); 840218885Sdim assert(StackAdjust.isImm() && "Expecting immediate value."); 841218885Sdim 842218885Sdim // Adjust stack pointer. 843218885Sdim int StackAdj = StackAdjust.getImm(); 844218885Sdim int MaxTCDelta = X86FI->getTCReturnAddrDelta(); 845218885Sdim int Offset = 0; 846218885Sdim assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); 847218885Sdim 848218885Sdim // Incoporate the retaddr area. 849218885Sdim Offset = StackAdj-MaxTCDelta; 850218885Sdim assert(Offset >= 0 && "Offset should never be negative"); 851218885Sdim 852218885Sdim if (Offset) { 853221345Sdim // Check for possible merge with preceding ADD instruction. 854218885Sdim Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); 855249423Sdim emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64, 856249423Sdim UseLEA, TII, *RegInfo); 857218885Sdim } 858218885Sdim 859218885Sdim // Jump to label or value in register. 860218885Sdim if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { 861218885Sdim MachineInstrBuilder MIB = 862218885Sdim BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) 863218885Sdim ? X86::TAILJMPd : X86::TAILJMPd64)); 864218885Sdim if (JumpTarget.isGlobal()) 865218885Sdim MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 866218885Sdim JumpTarget.getTargetFlags()); 867218885Sdim else { 868218885Sdim assert(JumpTarget.isSymbol()); 869218885Sdim MIB.addExternalSymbol(JumpTarget.getSymbolName(), 870218885Sdim JumpTarget.getTargetFlags()); 871218885Sdim } 872218885Sdim } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) { 873218885Sdim MachineInstrBuilder MIB = 874218885Sdim BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi) 875218885Sdim ? X86::TAILJMPm : X86::TAILJMPm64)); 876218885Sdim for (unsigned i = 0; i != 5; ++i) 877218885Sdim MIB.addOperand(MBBI->getOperand(i)); 878218885Sdim } else if (RetOpcode == X86::TCRETURNri64) { 879218885Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). 880218885Sdim addReg(JumpTarget.getReg(), RegState::Kill); 881218885Sdim } else { 882218885Sdim BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). 883218885Sdim addReg(JumpTarget.getReg(), RegState::Kill); 884218885Sdim } 885218885Sdim 886218885Sdim MachineInstr *NewMI = prior(MBBI); 887249423Sdim NewMI->copyImplicitOps(MF, MBBI); 888218885Sdim 889218885Sdim // Delete the pseudo instruction TCRETURN. 890218885Sdim MBB.erase(MBBI); 891218885Sdim } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) && 892218885Sdim (X86FI->getTCReturnAddrDelta() < 0)) { 893218885Sdim // Add the return addr area delta back since we are not tail calling. 894218885Sdim int delta = -1*X86FI->getTCReturnAddrDelta(); 895218885Sdim MBBI = MBB.getLastNonDebugInstr(); 896218885Sdim 897221345Sdim // Check for possible merge with preceding ADD instruction. 898218885Sdim delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); 899249423Sdim emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII, 900249423Sdim *RegInfo); 901218885Sdim } 902218885Sdim} 903218885Sdim 904218885Sdimint X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 905239462Sdim const X86RegisterInfo *RegInfo = 906218885Sdim static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 907218885Sdim const MachineFrameInfo *MFI = MF.getFrameInfo(); 908218885Sdim int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea(); 909218885Sdim uint64_t StackSize = MFI->getStackSize(); 910218885Sdim 911239462Sdim if (RegInfo->hasBasePointer(MF)) { 912239462Sdim assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!"); 913218885Sdim if (FI < 0) { 914218885Sdim // Skip the saved EBP. 915239462Sdim return Offset + RegInfo->getSlotSize(); 916218885Sdim } else { 917226633Sdim assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 918218885Sdim return Offset + StackSize; 919218885Sdim } 920239462Sdim } else if (RegInfo->needsStackRealignment(MF)) { 921239462Sdim if (FI < 0) { 922239462Sdim // Skip the saved EBP. 923239462Sdim return Offset + RegInfo->getSlotSize(); 924239462Sdim } else { 925239462Sdim assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0); 926239462Sdim return Offset + StackSize; 927239462Sdim } 928218885Sdim // FIXME: Support tail calls 929218885Sdim } else { 930218885Sdim if (!hasFP(MF)) 931218885Sdim return Offset + StackSize; 932218885Sdim 933218885Sdim // Skip the saved EBP. 934239462Sdim Offset += RegInfo->getSlotSize(); 935218885Sdim 936218885Sdim // Skip the RETADDR move area 937218885Sdim const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 938218885Sdim int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 939218885Sdim if (TailCallReturnAddrDelta < 0) 940218885Sdim Offset -= TailCallReturnAddrDelta; 941218885Sdim } 942218885Sdim 943218885Sdim return Offset; 944218885Sdim} 945218885Sdim 946239462Sdimint X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 947239462Sdim unsigned &FrameReg) const { 948239462Sdim const X86RegisterInfo *RegInfo = 949239462Sdim static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo()); 950239462Sdim // We can't calculate offset from frame pointer if the stack is realigned, 951239462Sdim // so enforce usage of stack/base pointer. The base pointer is used when we 952239462Sdim // have dynamic allocas in addition to dynamic realignment. 953239462Sdim if (RegInfo->hasBasePointer(MF)) 954239462Sdim FrameReg = RegInfo->getBaseRegister(); 955239462Sdim else if (RegInfo->needsStackRealignment(MF)) 956239462Sdim FrameReg = RegInfo->getStackRegister(); 957239462Sdim else 958239462Sdim FrameReg = RegInfo->getFrameRegister(MF); 959239462Sdim return getFrameIndexOffset(MF, FI); 960239462Sdim} 961239462Sdim 962218885Sdimbool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 963218885Sdim MachineBasicBlock::iterator MI, 964218885Sdim const std::vector<CalleeSavedInfo> &CSI, 965218885Sdim const TargetRegisterInfo *TRI) const { 966218885Sdim if (CSI.empty()) 967218885Sdim return false; 968218885Sdim 969218885Sdim DebugLoc DL = MBB.findDebugLoc(MI); 970218885Sdim 971218885Sdim MachineFunction &MF = *MBB.getParent(); 972218885Sdim 973218885Sdim unsigned SlotSize = STI.is64Bit() ? 8 : 4; 974218885Sdim unsigned FPReg = TRI->getFrameRegister(MF); 975218885Sdim unsigned CalleeFrameSize = 0; 976218885Sdim 977218885Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 978218885Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 979218885Sdim 980221345Sdim // Push GPRs. It increases frame size. 981218885Sdim unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 982218885Sdim for (unsigned i = CSI.size(); i != 0; --i) { 983218885Sdim unsigned Reg = CSI[i-1].getReg(); 984221345Sdim if (!X86::GR64RegClass.contains(Reg) && 985221345Sdim !X86::GR32RegClass.contains(Reg)) 986221345Sdim continue; 987218885Sdim // Add the callee-saved register as live-in. It's killed at the spill. 988218885Sdim MBB.addLiveIn(Reg); 989218885Sdim if (Reg == FPReg) 990218885Sdim // X86RegisterInfo::emitPrologue will handle spilling of frame register. 991218885Sdim continue; 992221345Sdim CalleeFrameSize += SlotSize; 993224145Sdim BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) 994224145Sdim .setMIFlag(MachineInstr::FrameSetup); 995218885Sdim } 996218885Sdim 997218885Sdim X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 998221345Sdim 999221345Sdim // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 1000221345Sdim // It can be done by spilling XMMs to stack frame. 1001221345Sdim // Note that only Win64 ABI might spill XMMs. 1002221345Sdim for (unsigned i = CSI.size(); i != 0; --i) { 1003221345Sdim unsigned Reg = CSI[i-1].getReg(); 1004221345Sdim if (X86::GR64RegClass.contains(Reg) || 1005221345Sdim X86::GR32RegClass.contains(Reg)) 1006221345Sdim continue; 1007221345Sdim // Add the callee-saved register as live-in. It's killed at the spill. 1008221345Sdim MBB.addLiveIn(Reg); 1009221345Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1010221345Sdim TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), 1011221345Sdim RC, TRI); 1012221345Sdim } 1013221345Sdim 1014218885Sdim return true; 1015218885Sdim} 1016218885Sdim 1017218885Sdimbool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 1018218885Sdim MachineBasicBlock::iterator MI, 1019218885Sdim const std::vector<CalleeSavedInfo> &CSI, 1020218885Sdim const TargetRegisterInfo *TRI) const { 1021218885Sdim if (CSI.empty()) 1022218885Sdim return false; 1023218885Sdim 1024218885Sdim DebugLoc DL = MBB.findDebugLoc(MI); 1025218885Sdim 1026218885Sdim MachineFunction &MF = *MBB.getParent(); 1027218885Sdim const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 1028221345Sdim 1029221345Sdim // Reload XMMs from stack frame. 1030221345Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1031221345Sdim unsigned Reg = CSI[i].getReg(); 1032221345Sdim if (X86::GR64RegClass.contains(Reg) || 1033221345Sdim X86::GR32RegClass.contains(Reg)) 1034221345Sdim continue; 1035221345Sdim const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 1036221345Sdim TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), 1037221345Sdim RC, TRI); 1038221345Sdim } 1039221345Sdim 1040221345Sdim // POP GPRs. 1041218885Sdim unsigned FPReg = TRI->getFrameRegister(MF); 1042218885Sdim unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 1043218885Sdim for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1044218885Sdim unsigned Reg = CSI[i].getReg(); 1045221345Sdim if (!X86::GR64RegClass.contains(Reg) && 1046221345Sdim !X86::GR32RegClass.contains(Reg)) 1047221345Sdim continue; 1048218885Sdim if (Reg == FPReg) 1049218885Sdim // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 1050218885Sdim continue; 1051221345Sdim BuildMI(MBB, MI, DL, TII.get(Opc), Reg); 1052218885Sdim } 1053218885Sdim return true; 1054218885Sdim} 1055218885Sdim 1056218885Sdimvoid 1057218885SdimX86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 1058218885Sdim RegScavenger *RS) const { 1059218885Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1060218885Sdim const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); 1061218885Sdim unsigned SlotSize = RegInfo->getSlotSize(); 1062218885Sdim 1063218885Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1064263508Sdim int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 1065218885Sdim 1066218885Sdim if (TailCallReturnAddrDelta < 0) { 1067218885Sdim // create RETURNADDR area 1068218885Sdim // arg 1069218885Sdim // arg 1070218885Sdim // RETADDR 1071218885Sdim // { ... 1072218885Sdim // RETADDR area 1073218885Sdim // ... 1074218885Sdim // } 1075218885Sdim // [EBP] 1076218885Sdim MFI->CreateFixedObject(-TailCallReturnAddrDelta, 1077263508Sdim TailCallReturnAddrDelta - SlotSize, true); 1078218885Sdim } 1079218885Sdim 1080218885Sdim if (hasFP(MF)) { 1081218885Sdim assert((TailCallReturnAddrDelta <= 0) && 1082218885Sdim "The Delta should always be zero or negative"); 1083218885Sdim const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); 1084218885Sdim 1085218885Sdim // Create a frame entry for the EBP register that must be saved. 1086218885Sdim int FrameIdx = MFI->CreateFixedObject(SlotSize, 1087218885Sdim -(int)SlotSize + 1088218885Sdim TFI.getOffsetOfLocalArea() + 1089218885Sdim TailCallReturnAddrDelta, 1090218885Sdim true); 1091218885Sdim assert(FrameIdx == MFI->getObjectIndexBegin() && 1092218885Sdim "Slot for EBP register must be last in order to be found!"); 1093226633Sdim (void)FrameIdx; 1094218885Sdim } 1095239462Sdim 1096239462Sdim // Spill the BasePtr if it's used. 1097239462Sdim if (RegInfo->hasBasePointer(MF)) 1098239462Sdim MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); 1099218885Sdim} 1100224145Sdim 1101226633Sdimstatic bool 1102226633SdimHasNestArgument(const MachineFunction *MF) { 1103226633Sdim const Function *F = MF->getFunction(); 1104226633Sdim for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); 1105226633Sdim I != E; I++) { 1106226633Sdim if (I->hasNestAttr()) 1107226633Sdim return true; 1108224145Sdim } 1109226633Sdim return false; 1110226633Sdim} 1111224145Sdim 1112249423Sdim/// GetScratchRegister - Get a temp register for performing work in the 1113249423Sdim/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 1114249423Sdim/// and the properties of the function either one or two registers will be 1115249423Sdim/// needed. Set primary to true for the first register, false for the second. 1116226633Sdimstatic unsigned 1117234353SdimGetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { 1118249423Sdim CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); 1119249423Sdim 1120249423Sdim // Erlang stuff. 1121249423Sdim if (CallingConvention == CallingConv::HiPE) { 1122249423Sdim if (Is64Bit) 1123249423Sdim return Primary ? X86::R14 : X86::R13; 1124249423Sdim else 1125249423Sdim return Primary ? X86::EBX : X86::EDI; 1126249423Sdim } 1127249423Sdim 1128234353Sdim if (Is64Bit) 1129234353Sdim return Primary ? X86::R11 : X86::R12; 1130226633Sdim 1131234353Sdim bool IsNested = HasNestArgument(&MF); 1132234353Sdim 1133234353Sdim if (CallingConvention == CallingConv::X86_FastCall || 1134234353Sdim CallingConvention == CallingConv::Fast) { 1135234353Sdim if (IsNested) 1136234353Sdim report_fatal_error("Segmented stacks does not support fastcall with " 1137234353Sdim "nested function."); 1138234353Sdim return Primary ? X86::EAX : X86::ECX; 1139224145Sdim } 1140234353Sdim if (IsNested) 1141234353Sdim return Primary ? X86::EDX : X86::EAX; 1142234353Sdim return Primary ? X86::ECX : X86::EAX; 1143224145Sdim} 1144224145Sdim 1145234353Sdim// The stack limit in the TCB is set to this many bytes above the actual stack 1146234353Sdim// limit. 1147234353Sdimstatic const uint64_t kSplitStackAvailable = 256; 1148234353Sdim 1149226633Sdimvoid 1150226633SdimX86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { 1151226633Sdim MachineBasicBlock &prologueMBB = MF.front(); 1152226633Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1153226633Sdim const X86InstrInfo &TII = *TM.getInstrInfo(); 1154226633Sdim uint64_t StackSize; 1155226633Sdim bool Is64Bit = STI.is64Bit(); 1156226633Sdim unsigned TlsReg, TlsOffset; 1157226633Sdim DebugLoc DL; 1158224145Sdim 1159234353Sdim unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); 1160226633Sdim assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1161226633Sdim "Scratch register is live-in"); 1162224145Sdim 1163226633Sdim if (MF.getFunction()->isVarArg()) 1164226633Sdim report_fatal_error("Segmented stacks do not support vararg functions."); 1165249423Sdim if (!STI.isTargetLinux() && !STI.isTargetDarwin() && 1166249423Sdim !STI.isTargetWin32() && !STI.isTargetFreeBSD()) 1167234353Sdim report_fatal_error("Segmented stacks not supported on this platform."); 1168224145Sdim 1169226633Sdim MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 1170226633Sdim MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 1171226633Sdim X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1172226633Sdim bool IsNested = false; 1173224145Sdim 1174226633Sdim // We need to know if the function has a nest argument only in 64 bit mode. 1175226633Sdim if (Is64Bit) 1176226633Sdim IsNested = HasNestArgument(&MF); 1177224145Sdim 1178226633Sdim // The MOV R10, RAX needs to be in a different block, since the RET we emit in 1179226633Sdim // allocMBB needs to be last (terminating) instruction. 1180224145Sdim 1181226633Sdim for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), 1182226633Sdim e = prologueMBB.livein_end(); i != e; i++) { 1183226633Sdim allocMBB->addLiveIn(*i); 1184226633Sdim checkMBB->addLiveIn(*i); 1185226633Sdim } 1186224145Sdim 1187234353Sdim if (IsNested) 1188226633Sdim allocMBB->addLiveIn(X86::R10); 1189224145Sdim 1190226633Sdim MF.push_front(allocMBB); 1191226633Sdim MF.push_front(checkMBB); 1192224145Sdim 1193226633Sdim // Eventually StackSize will be calculated by a link-time pass; which will 1194226633Sdim // also decide whether checking code needs to be injected into this particular 1195226633Sdim // prologue. 1196226633Sdim StackSize = MFI->getStackSize(); 1197224145Sdim 1198234353Sdim // When the frame size is less than 256 we just compare the stack 1199234353Sdim // boundary directly to the value of the stack pointer, per gcc. 1200234353Sdim bool CompareStackPointer = StackSize < kSplitStackAvailable; 1201234353Sdim 1202226633Sdim // Read the limit off the current stacklet off the stack_guard location. 1203226633Sdim if (Is64Bit) { 1204249423Sdim if (STI.isTargetLinux()) { 1205234353Sdim TlsReg = X86::FS; 1206234353Sdim TlsOffset = 0x70; 1207249423Sdim } else if (STI.isTargetDarwin()) { 1208234353Sdim TlsReg = X86::GS; 1209234353Sdim TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 1210249423Sdim } else if (STI.isTargetFreeBSD()) { 1211234353Sdim TlsReg = X86::FS; 1212234353Sdim TlsOffset = 0x18; 1213234353Sdim } else { 1214234353Sdim report_fatal_error("Segmented stacks not supported on this platform."); 1215234353Sdim } 1216224145Sdim 1217234353Sdim if (CompareStackPointer) 1218234353Sdim ScratchReg = X86::RSP; 1219234353Sdim else 1220234353Sdim BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) 1221234353Sdim .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1222234353Sdim 1223226633Sdim BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) 1224234353Sdim .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1225226633Sdim } else { 1226249423Sdim if (STI.isTargetLinux()) { 1227234353Sdim TlsReg = X86::GS; 1228234353Sdim TlsOffset = 0x30; 1229249423Sdim } else if (STI.isTargetDarwin()) { 1230234353Sdim TlsReg = X86::GS; 1231234353Sdim TlsOffset = 0x48 + 90*4; 1232249423Sdim } else if (STI.isTargetWin32()) { 1233234353Sdim TlsReg = X86::FS; 1234234353Sdim TlsOffset = 0x14; // pvArbitrary, reserved for application use 1235249423Sdim } else if (STI.isTargetFreeBSD()) { 1236234353Sdim report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 1237234353Sdim } else { 1238234353Sdim report_fatal_error("Segmented stacks not supported on this platform."); 1239234353Sdim } 1240224145Sdim 1241234353Sdim if (CompareStackPointer) 1242234353Sdim ScratchReg = X86::ESP; 1243234353Sdim else 1244234353Sdim BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 1245234353Sdim .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 1246234353Sdim 1247249423Sdim if (STI.isTargetLinux() || STI.isTargetWin32()) { 1248234353Sdim BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 1249234353Sdim .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 1250249423Sdim } else if (STI.isTargetDarwin()) { 1251234353Sdim 1252234353Sdim // TlsOffset doesn't fit into a mod r/m byte so we need an extra register 1253234353Sdim unsigned ScratchReg2; 1254234353Sdim bool SaveScratch2; 1255234353Sdim if (CompareStackPointer) { 1256234353Sdim // The primary scratch register is available for holding the TLS offset 1257234353Sdim ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); 1258234353Sdim SaveScratch2 = false; 1259234353Sdim } else { 1260234353Sdim // Need to use a second register to hold the TLS offset 1261234353Sdim ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); 1262234353Sdim 1263234353Sdim // Unfortunately, with fastcc the second scratch register may hold an arg 1264234353Sdim SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 1265234353Sdim } 1266234353Sdim 1267234353Sdim // If Scratch2 is live-in then it needs to be saved 1268234353Sdim assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 1269234353Sdim "Scratch register is live-in and not saved"); 1270234353Sdim 1271234353Sdim if (SaveScratch2) 1272234353Sdim BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 1273234353Sdim .addReg(ScratchReg2, RegState::Kill); 1274234353Sdim 1275234353Sdim BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 1276234353Sdim .addImm(TlsOffset); 1277234353Sdim BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 1278234353Sdim .addReg(ScratchReg) 1279234353Sdim .addReg(ScratchReg2).addImm(1).addReg(0) 1280234353Sdim .addImm(0) 1281234353Sdim .addReg(TlsReg); 1282234353Sdim 1283234353Sdim if (SaveScratch2) 1284234353Sdim BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 1285234353Sdim } 1286224145Sdim } 1287224145Sdim 1288226633Sdim // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 1289226633Sdim // It jumps to normal execution of the function body. 1290234353Sdim BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB); 1291224145Sdim 1292226633Sdim // On 32 bit we first push the arguments size and then the frame size. On 64 1293226633Sdim // bit, we pass the stack frame size in r10 and the argument size in r11. 1294226633Sdim if (Is64Bit) { 1295226633Sdim // Functions with nested arguments use R10, so it needs to be saved across 1296226633Sdim // the call to _morestack 1297224145Sdim 1298226633Sdim if (IsNested) 1299226633Sdim BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); 1300226633Sdim 1301226633Sdim BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) 1302226633Sdim .addImm(StackSize); 1303226633Sdim BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) 1304226633Sdim .addImm(X86FI->getArgumentStackSize()); 1305226633Sdim MF.getRegInfo().setPhysRegUsed(X86::R10); 1306226633Sdim MF.getRegInfo().setPhysRegUsed(X86::R11); 1307224145Sdim } else { 1308226633Sdim BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1309226633Sdim .addImm(X86FI->getArgumentStackSize()); 1310226633Sdim BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 1311226633Sdim .addImm(StackSize); 1312224145Sdim } 1313224145Sdim 1314226633Sdim // __morestack is in libgcc 1315226633Sdim if (Is64Bit) 1316226633Sdim BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 1317226633Sdim .addExternalSymbol("__morestack"); 1318226633Sdim else 1319226633Sdim BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 1320226633Sdim .addExternalSymbol("__morestack"); 1321226633Sdim 1322226633Sdim if (IsNested) 1323234353Sdim BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 1324234353Sdim else 1325234353Sdim BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 1326226633Sdim 1327234353Sdim allocMBB->addSuccessor(&prologueMBB); 1328226633Sdim 1329226633Sdim checkMBB->addSuccessor(allocMBB); 1330226633Sdim checkMBB->addSuccessor(&prologueMBB); 1331226633Sdim 1332226633Sdim#ifdef XDEBUG 1333226633Sdim MF.verify(); 1334226633Sdim#endif 1335224145Sdim} 1336249423Sdim 1337249423Sdim/// Erlang programs may need a special prologue to handle the stack size they 1338249423Sdim/// might need at runtime. That is because Erlang/OTP does not implement a C 1339249423Sdim/// stack but uses a custom implementation of hybrid stack/heap architecture. 1340249423Sdim/// (for more information see Eric Stenman's Ph.D. thesis: 1341249423Sdim/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 1342249423Sdim/// 1343249423Sdim/// CheckStack: 1344249423Sdim/// temp0 = sp - MaxStack 1345249423Sdim/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1346249423Sdim/// OldStart: 1347249423Sdim/// ... 1348249423Sdim/// IncStack: 1349249423Sdim/// call inc_stack # doubles the stack space 1350249423Sdim/// temp0 = sp - MaxStack 1351249423Sdim/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 1352249423Sdimvoid X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { 1353249423Sdim const X86InstrInfo &TII = *TM.getInstrInfo(); 1354249423Sdim MachineFrameInfo *MFI = MF.getFrameInfo(); 1355249423Sdim const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); 1356249423Sdim const bool Is64Bit = STI.is64Bit(); 1357249423Sdim DebugLoc DL; 1358249423Sdim // HiPE-specific values 1359249423Sdim const unsigned HipeLeafWords = 24; 1360249423Sdim const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 1361249423Sdim const unsigned Guaranteed = HipeLeafWords * SlotSize; 1362249423Sdim unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ? 1363249423Sdim MF.getFunction()->arg_size() - CCRegisteredArgs : 0; 1364249423Sdim unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize; 1365249423Sdim 1366249423Sdim assert(STI.isTargetLinux() && 1367249423Sdim "HiPE prologue is only supported on Linux operating systems."); 1368249423Sdim 1369249423Sdim // Compute the largest caller's frame that is needed to fit the callees' 1370249423Sdim // frames. This 'MaxStack' is computed from: 1371249423Sdim // 1372249423Sdim // a) the fixed frame size, which is the space needed for all spilled temps, 1373249423Sdim // b) outgoing on-stack parameter areas, and 1374249423Sdim // c) the minimum stack space this function needs to make available for the 1375249423Sdim // functions it calls (a tunable ABI property). 1376249423Sdim if (MFI->hasCalls()) { 1377249423Sdim unsigned MoreStackForCalls = 0; 1378249423Sdim 1379249423Sdim for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); 1380249423Sdim MBBI != MBBE; ++MBBI) 1381249423Sdim for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end(); 1382249423Sdim MI != ME; ++MI) { 1383249423Sdim if (!MI->isCall()) 1384249423Sdim continue; 1385249423Sdim 1386249423Sdim // Get callee operand. 1387249423Sdim const MachineOperand &MO = MI->getOperand(0); 1388249423Sdim 1389249423Sdim // Only take account of global function calls (no closures etc.). 1390249423Sdim if (!MO.isGlobal()) 1391249423Sdim continue; 1392249423Sdim 1393249423Sdim const Function *F = dyn_cast<Function>(MO.getGlobal()); 1394249423Sdim if (!F) 1395249423Sdim continue; 1396249423Sdim 1397249423Sdim // Do not update 'MaxStack' for primitive and built-in functions 1398249423Sdim // (encoded with names either starting with "erlang."/"bif_" or not 1399249423Sdim // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 1400249423Sdim // "_", such as the BIF "suspend_0") as they are executed on another 1401249423Sdim // stack. 1402249423Sdim if (F->getName().find("erlang.") != StringRef::npos || 1403249423Sdim F->getName().find("bif_") != StringRef::npos || 1404249423Sdim F->getName().find_first_of("._") == StringRef::npos) 1405249423Sdim continue; 1406249423Sdim 1407249423Sdim unsigned CalleeStkArity = 1408249423Sdim F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 1409249423Sdim if (HipeLeafWords - 1 > CalleeStkArity) 1410249423Sdim MoreStackForCalls = std::max(MoreStackForCalls, 1411249423Sdim (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 1412249423Sdim } 1413249423Sdim MaxStack += MoreStackForCalls; 1414249423Sdim } 1415249423Sdim 1416249423Sdim // If the stack frame needed is larger than the guaranteed then runtime checks 1417249423Sdim // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 1418249423Sdim if (MaxStack > Guaranteed) { 1419249423Sdim MachineBasicBlock &prologueMBB = MF.front(); 1420249423Sdim MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 1421249423Sdim MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 1422249423Sdim 1423249423Sdim for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(), 1424249423Sdim E = prologueMBB.livein_end(); I != E; I++) { 1425249423Sdim stackCheckMBB->addLiveIn(*I); 1426249423Sdim incStackMBB->addLiveIn(*I); 1427249423Sdim } 1428249423Sdim 1429249423Sdim MF.push_front(incStackMBB); 1430249423Sdim MF.push_front(stackCheckMBB); 1431249423Sdim 1432249423Sdim unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 1433249423Sdim unsigned LEAop, CMPop, CALLop; 1434249423Sdim if (Is64Bit) { 1435249423Sdim SPReg = X86::RSP; 1436249423Sdim PReg = X86::RBP; 1437249423Sdim LEAop = X86::LEA64r; 1438249423Sdim CMPop = X86::CMP64rm; 1439249423Sdim CALLop = X86::CALL64pcrel32; 1440249423Sdim SPLimitOffset = 0x90; 1441249423Sdim } else { 1442249423Sdim SPReg = X86::ESP; 1443249423Sdim PReg = X86::EBP; 1444249423Sdim LEAop = X86::LEA32r; 1445249423Sdim CMPop = X86::CMP32rm; 1446249423Sdim CALLop = X86::CALLpcrel32; 1447249423Sdim SPLimitOffset = 0x4c; 1448249423Sdim } 1449249423Sdim 1450249423Sdim ScratchReg = GetScratchRegister(Is64Bit, MF, true); 1451249423Sdim assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 1452249423Sdim "HiPE prologue scratch register is live-in"); 1453249423Sdim 1454249423Sdim // Create new MBB for StackCheck: 1455249423Sdim addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 1456249423Sdim SPReg, false, -MaxStack); 1457249423Sdim // SPLimitOffset is in a fixed heap location (pointed by BP). 1458249423Sdim addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 1459249423Sdim .addReg(ScratchReg), PReg, false, SPLimitOffset); 1460249423Sdim BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB); 1461249423Sdim 1462249423Sdim // Create new MBB for IncStack: 1463249423Sdim BuildMI(incStackMBB, DL, TII.get(CALLop)). 1464249423Sdim addExternalSymbol("inc_stack_0"); 1465249423Sdim addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 1466249423Sdim SPReg, false, -MaxStack); 1467249423Sdim addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 1468249423Sdim .addReg(ScratchReg), PReg, false, SPLimitOffset); 1469249423Sdim BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB); 1470249423Sdim 1471249423Sdim stackCheckMBB->addSuccessor(&prologueMBB, 99); 1472249423Sdim stackCheckMBB->addSuccessor(incStackMBB, 1); 1473249423Sdim incStackMBB->addSuccessor(&prologueMBB, 99); 1474249423Sdim incStackMBB->addSuccessor(incStackMBB, 1); 1475249423Sdim } 1476249423Sdim#ifdef XDEBUG 1477249423Sdim MF.verify(); 1478249423Sdim#endif 1479249423Sdim} 1480249423Sdim 1481249423Sdimvoid X86FrameLowering:: 1482249423SdimeliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 1483249423Sdim MachineBasicBlock::iterator I) const { 1484249423Sdim const X86InstrInfo &TII = *TM.getInstrInfo(); 1485249423Sdim const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); 1486249423Sdim unsigned StackPtr = RegInfo.getStackRegister(); 1487249423Sdim bool reseveCallFrame = hasReservedCallFrame(MF); 1488249423Sdim int Opcode = I->getOpcode(); 1489249423Sdim bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 1490249423Sdim bool IsLP64 = STI.isTarget64BitLP64(); 1491249423Sdim DebugLoc DL = I->getDebugLoc(); 1492249423Sdim uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; 1493249423Sdim uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0; 1494249423Sdim I = MBB.erase(I); 1495249423Sdim 1496249423Sdim if (!reseveCallFrame) { 1497249423Sdim // If the stack pointer can be changed after prologue, turn the 1498249423Sdim // adjcallstackup instruction into a 'sub ESP, <amt>' and the 1499249423Sdim // adjcallstackdown instruction into 'add ESP, <amt>' 1500249423Sdim // TODO: consider using push / pop instead of sub + store / add 1501249423Sdim if (Amount == 0) 1502249423Sdim return; 1503249423Sdim 1504249423Sdim // We need to keep the stack aligned properly. To do this, we round the 1505249423Sdim // amount of space needed for the outgoing arguments up to the next 1506249423Sdim // alignment boundary. 1507249423Sdim unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); 1508249423Sdim Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; 1509249423Sdim 1510249423Sdim MachineInstr *New = 0; 1511249423Sdim if (Opcode == TII.getCallFrameSetupOpcode()) { 1512249423Sdim New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), 1513249423Sdim StackPtr) 1514249423Sdim .addReg(StackPtr) 1515249423Sdim .addImm(Amount); 1516249423Sdim } else { 1517249423Sdim assert(Opcode == TII.getCallFrameDestroyOpcode()); 1518249423Sdim 1519249423Sdim // Factor out the amount the callee already popped. 1520249423Sdim Amount -= CalleeAmt; 1521249423Sdim 1522249423Sdim if (Amount) { 1523249423Sdim unsigned Opc = getADDriOpcode(IsLP64, Amount); 1524249423Sdim New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 1525249423Sdim .addReg(StackPtr).addImm(Amount); 1526249423Sdim } 1527249423Sdim } 1528249423Sdim 1529249423Sdim if (New) { 1530249423Sdim // The EFLAGS implicit def is dead. 1531249423Sdim New->getOperand(3).setIsDead(); 1532249423Sdim 1533249423Sdim // Replace the pseudo instruction with a new instruction. 1534249423Sdim MBB.insert(I, New); 1535249423Sdim } 1536249423Sdim 1537249423Sdim return; 1538249423Sdim } 1539249423Sdim 1540249423Sdim if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) { 1541249423Sdim // If we are performing frame pointer elimination and if the callee pops 1542249423Sdim // something off the stack pointer, add it back. We do this until we have 1543249423Sdim // more advanced stack pointer tracking ability. 1544249423Sdim unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt); 1545249423Sdim MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr) 1546249423Sdim .addReg(StackPtr).addImm(CalleeAmt); 1547249423Sdim 1548249423Sdim // The EFLAGS implicit def is dead. 1549249423Sdim New->getOperand(3).setIsDead(); 1550249423Sdim 1551249423Sdim // We are not tracking the stack pointer adjustment by the callee, so make 1552249423Sdim // sure we restore the stack pointer immediately after the call, there may 1553249423Sdim // be spill code inserted between the CALL and ADJCALLSTACKUP instructions. 1554249423Sdim MachineBasicBlock::iterator B = MBB.begin(); 1555249423Sdim while (I != B && !llvm::prior(I)->isCall()) 1556249423Sdim --I; 1557249423Sdim MBB.insert(I, New); 1558249423Sdim } 1559249423Sdim} 1560249423Sdim 1561