1263320SdimPull in r202930 from upstream llvm trunk (by Hans Wennborg): 2263320Sdim 3263320Sdim Check for dynamic allocas and inline asm that clobbers sp before building 4263320Sdim selection dag (PR19012) 5263320Sdim 6263320Sdim In X86SelectionDagInfo::EmitTargetCodeForMemcpy we check with MachineFrameInfo 7263320Sdim to make sure that ESI isn't used as a base pointer register before we choose to 8263320Sdim emit rep movs (which clobbers esi). 9263320Sdim 10263320Sdim The problem is that MachineFrameInfo wouldn't know about dynamic allocas or 11263320Sdim inline asm that clobbers the stack pointer until SelectionDAGBuilder has 12263320Sdim encountered them. 13263320Sdim 14263320Sdim This patch fixes the problem by checking for such things when building the 15263320Sdim FunctionLoweringInfo. 16263320Sdim 17263320Sdim Differential Revision: http://llvm-reviews.chandlerc.com/D2954 18263320Sdim 19263320SdimIntroduced here: http://svn.freebsd.org/changeset/base/263312 20263320Sdim 21263320SdimIndex: lib/CodeGen/MachineFunction.cpp 22263320Sdim=================================================================== 23263320Sdim--- lib/CodeGen/MachineFunction.cpp 24263320Sdim+++ lib/CodeGen/MachineFunction.cpp 25263320Sdim@@ -525,13 +525,14 @@ int MachineFrameInfo::CreateSpillStackObject(uint6 26263320Sdim /// variable sized object is created, whether or not the index returned is 27263320Sdim /// actually used. 28263320Sdim /// 29263320Sdim-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { 30263320Sdim+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, 31263320Sdim+ const AllocaInst *Alloca) { 32263320Sdim HasVarSizedObjects = true; 33263320Sdim Alignment = 34263320Sdim clampStackAlignment(!getFrameLowering()->isStackRealignable() || 35263320Sdim !RealignOption, 36263320Sdim Alignment, getFrameLowering()->getStackAlignment()); 37263320Sdim- Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); 38263320Sdim+ Objects.push_back(StackObject(0, Alignment, 0, false, false, true, Alloca)); 39263320Sdim ensureMaxAlignment(Alignment); 40263320Sdim return (int)Objects.size()-NumFixedObjects-1; 41263320Sdim } 42263320SdimIndex: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 43263320Sdim=================================================================== 44263320Sdim--- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 45263320Sdim+++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 46263320Sdim@@ -419,7 +419,7 @@ bool SelectionDAGISel::runOnMachineFunction(Machin 47263320Sdim SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); 48263320Sdim 49263320Sdim CurDAG->init(*MF, TTI, TLI); 50263320Sdim- FuncInfo->set(Fn, *MF); 51263320Sdim+ FuncInfo->set(Fn, *MF, CurDAG); 52263320Sdim 53263320Sdim if (UseMBPI && OptLevel != CodeGenOpt::None) 54263320Sdim FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>(); 55263320Sdim@@ -429,7 +429,6 @@ bool SelectionDAGISel::runOnMachineFunction(Machin 56263320Sdim SDB->init(GFI, *AA, LibInfo); 57263320Sdim 58263320Sdim MF->setHasInlineAsm(false); 59263320Sdim- MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(false); 60263320Sdim 61263320Sdim SelectAllBasicBlocks(Fn); 62263320Sdim 63263320SdimIndex: lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp 64263320Sdim=================================================================== 65263320Sdim--- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp 66263320Sdim+++ lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp 67263320Sdim@@ -33,6 +33,7 @@ 68263320Sdim #include "llvm/Support/ErrorHandling.h" 69263320Sdim #include "llvm/Support/MathExtras.h" 70263320Sdim #include "llvm/Target/TargetInstrInfo.h" 71263320Sdim+#include "llvm/Target/TargetFrameLowering.h" 72263320Sdim #include "llvm/Target/TargetLowering.h" 73263320Sdim #include "llvm/Target/TargetOptions.h" 74263320Sdim #include "llvm/Target/TargetRegisterInfo.h" 75263320Sdim@@ -55,7 +56,8 @@ static bool isUsedOutsideOfDefiningBlock(const Ins 76263320Sdim return false; 77263320Sdim } 78263320Sdim 79263320Sdim-void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { 80263320Sdim+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, 81263320Sdim+ SelectionDAG *DAG) { 82263320Sdim const TargetLowering *TLI = TM.getTargetLowering(); 83263320Sdim 84263320Sdim Fn = &fn; 85263320Sdim@@ -100,6 +102,43 @@ static bool isUsedOutsideOfDefiningBlock(const Ins 86263320Sdim for (; BB != EB; ++BB) 87263320Sdim for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); 88263320Sdim I != E; ++I) { 89263320Sdim+ // Look for dynamic allocas. 90263320Sdim+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { 91263320Sdim+ if (!AI->isStaticAlloca()) { 92263320Sdim+ unsigned Align = std::max( 93263320Sdim+ (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( 94263320Sdim+ AI->getAllocatedType()), 95263320Sdim+ AI->getAlignment()); 96263320Sdim+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); 97263320Sdim+ if (Align <= StackAlign) 98263320Sdim+ Align = 0; 99263320Sdim+ // Inform the Frame Information that we have variable-sized objects. 100263320Sdim+ MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); 101263320Sdim+ } 102263320Sdim+ } 103263320Sdim+ 104263320Sdim+ // Look for inline asm that clobbers the SP register. 105263320Sdim+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 106263320Sdim+ ImmutableCallSite CS(I); 107263320Sdim+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CS.getCalledValue())) { 108263320Sdim+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); 109263320Sdim+ std::vector<TargetLowering::AsmOperandInfo> Ops = 110263320Sdim+ TLI->ParseConstraints(CS); 111263320Sdim+ for (size_t I = 0, E = Ops.size(); I != E; ++I) { 112263320Sdim+ TargetLowering::AsmOperandInfo &Op = Ops[I]; 113263320Sdim+ if (Op.Type == InlineAsm::isClobber) { 114263320Sdim+ // Clobbers don't have SDValue operands, hence SDValue(). 115263320Sdim+ TLI->ComputeConstraintToUse(Op, SDValue(), DAG); 116263320Sdim+ std::pair<unsigned, const TargetRegisterClass*> PhysReg = 117263320Sdim+ TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, 118263320Sdim+ Op.ConstraintVT); 119263320Sdim+ if (PhysReg.first == SP) 120263320Sdim+ MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); 121263320Sdim+ } 122263320Sdim+ } 123263320Sdim+ } 124263320Sdim+ } 125263320Sdim+ 126263320Sdim // Mark values used outside their block as exported, by allocating 127263320Sdim // a virtual register for them. 128263320Sdim if (isUsedOutsideOfDefiningBlock(I)) 129263320SdimIndex: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 130263320Sdim=================================================================== 131263320Sdim--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 132263320Sdim+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 133263320Sdim@@ -860,10 +860,10 @@ void RegsForValue::AddInlineAsmOperands(unsigned C 134263320Sdim unsigned TheReg = Regs[Reg++]; 135263320Sdim Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); 136263320Sdim 137263320Sdim- // Notice if we clobbered the stack pointer. Yes, inline asm can do this. 138263320Sdim if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { 139263320Sdim- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 140263320Sdim- MFI->setHasInlineAsmWithSPAdjust(true); 141263320Sdim+ // If we clobbered the stack pointer, MFI should know about it. 142263320Sdim+ assert(DAG.getMachineFunction().getFrameInfo()-> 143263320Sdim+ hasInlineAsmWithSPAdjust()); 144263320Sdim } 145263320Sdim } 146263320Sdim } 147263320Sdim@@ -3378,9 +3378,7 @@ void SelectionDAGBuilder::visitAlloca(const Alloca 148263320Sdim setValue(&I, DSA); 149263320Sdim DAG.setRoot(DSA.getValue(1)); 150263320Sdim 151263320Sdim- // Inform the Frame Information that we have just allocated a variable-sized 152263320Sdim- // object. 153263320Sdim- FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1); 154263320Sdim+ assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); 155263320Sdim } 156263320Sdim 157263320Sdim void SelectionDAGBuilder::visitLoad(const LoadInst &I) { 158263320SdimIndex: test/CodeGen/X86/stack-align-memcpy.ll 159263320Sdim=================================================================== 160263320Sdim--- test/CodeGen/X86/stack-align-memcpy.ll 161263320Sdim+++ test/CodeGen/X86/stack-align-memcpy.ll 162263320Sdim@@ -2,6 +2,9 @@ 163263320Sdim 164263320Sdim %struct.foo = type { [88 x i8] } 165263320Sdim 166263320Sdim+declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind 167263320Sdim+declare void @baz(i8*) nounwind 168263320Sdim+ 169263320Sdim ; PR15249 170263320Sdim ; We can't use rep;movsl here because it clobbers the base pointer in %esi. 171263320Sdim define void @test1(%struct.foo* nocapture %x, i32 %y) nounwind { 172263320Sdim@@ -15,4 +18,26 @@ define void @test1(%struct.foo* nocapture %x, i32 173263320Sdim ; CHECK-NOT: rep;movsl 174263320Sdim } 175263320Sdim 176263320Sdim-declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind 177263320Sdim+; PR19012 178263320Sdim+; Also don't clobber %esi if the dynamic alloca comes after the memcpy. 179263320Sdim+define void @test2(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { 180263320Sdim+ call void @bar(i8* %z, %struct.foo* align 4 byval %x) 181263320Sdim+ %dynalloc = alloca i8, i32 %y, align 1 182263320Sdim+ call void @baz(i8* %dynalloc) 183263320Sdim+ ret void 184263320Sdim+ 185263320Sdim+; CHECK-LABEL: test2: 186263320Sdim+; CHECK: movl %esp, %esi 187263320Sdim+; CHECK-NOT: rep;movsl 188263320Sdim+} 189263320Sdim+ 190263320Sdim+; Check that we do use rep movs if we make the alloca static. 191263320Sdim+define void @test3(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { 192263320Sdim+ call void @bar(i8* %z, %struct.foo* align 4 byval %x) 193263320Sdim+ %statalloc = alloca i8, i32 8, align 1 194263320Sdim+ call void @baz(i8* %statalloc) 195263320Sdim+ ret void 196263320Sdim+ 197263320Sdim+; CHECK-LABEL: test3: 198263320Sdim+; CHECK: rep;movsl 199263320Sdim+} 200263320SdimIndex: test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll 201263320Sdim=================================================================== 202263320Sdim--- test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll 203263320Sdim+++ test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll 204263320Sdim@@ -0,0 +1,17 @@ 205263320Sdim+; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s 206263320Sdim+ 207263320Sdim+%struct.foo = type { [88 x i8] } 208263320Sdim+ 209263320Sdim+declare void @bar(i8* nocapture, %struct.foo* align 4 byval) nounwind 210263320Sdim+ 211263320Sdim+; PR19012 212263320Sdim+; Don't clobber %esi if we have inline asm that clobbers %esp. 213263320Sdim+define void @test1(%struct.foo* nocapture %x, i32 %y, i8* %z) nounwind { 214263320Sdim+ call void @bar(i8* %z, %struct.foo* align 4 byval %x) 215263320Sdim+ call void asm sideeffect inteldialect "xor esp, esp", "=*m,~{flags},~{esp},~{esp},~{dirflag},~{fpsr},~{flags}"(i8* %z) 216263320Sdim+ ret void 217263320Sdim+ 218263320Sdim+; CHECK-LABEL: test1: 219263320Sdim+; CHECK: movl %esp, %esi 220263320Sdim+; CHECK-NOT: rep;movsl 221263320Sdim+} 222263320SdimIndex: include/llvm/CodeGen/FunctionLoweringInfo.h 223263320Sdim=================================================================== 224263320Sdim--- include/llvm/CodeGen/FunctionLoweringInfo.h 225263320Sdim+++ include/llvm/CodeGen/FunctionLoweringInfo.h 226263320Sdim@@ -41,6 +41,7 @@ class MachineBasicBlock; 227263320Sdim class MachineFunction; 228263320Sdim class MachineModuleInfo; 229263320Sdim class MachineRegisterInfo; 230263320Sdim+class SelectionDAG; 231263320Sdim class TargetLowering; 232263320Sdim class Value; 233263320Sdim 234263320Sdim@@ -125,7 +126,7 @@ class FunctionLoweringInfo { 235263320Sdim /// set - Initialize this FunctionLoweringInfo with the given Function 236263320Sdim /// and its associated MachineFunction. 237263320Sdim /// 238263320Sdim- void set(const Function &Fn, MachineFunction &MF); 239263320Sdim+ void set(const Function &Fn, MachineFunction &MF, SelectionDAG *DAG); 240263320Sdim 241263320Sdim /// clear - Clear out all the function-specific state. This returns this 242263320Sdim /// FunctionLoweringInfo to an empty state, ready to be used for a 243263320SdimIndex: include/llvm/CodeGen/MachineFrameInfo.h 244263320Sdim=================================================================== 245263320Sdim--- include/llvm/CodeGen/MachineFrameInfo.h 246263320Sdim+++ include/llvm/CodeGen/MachineFrameInfo.h 247263320Sdim@@ -244,6 +244,7 @@ class MachineFrameInfo { 248263320Sdim LocalFrameSize = 0; 249263320Sdim LocalFrameMaxAlign = 0; 250263320Sdim UseLocalStackAllocationBlock = false; 251263320Sdim+ HasInlineAsmWithSPAdjust = false; 252263320Sdim } 253263320Sdim 254263320Sdim /// hasStackObjects - Return true if there are any stack objects in this 255263320Sdim@@ -529,7 +530,7 @@ class MachineFrameInfo { 256263320Sdim /// variable sized object is created, whether or not the index returned is 257263320Sdim /// actually used. 258263320Sdim /// 259263320Sdim- int CreateVariableSizedObject(unsigned Alignment); 260263320Sdim+ int CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca); 261263320Sdim 262263320Sdim /// getCalleeSavedInfo - Returns a reference to call saved info vector for the 263263320Sdim /// current function. 264