1207618Srdivacky//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===// 2207618Srdivacky// 3207618Srdivacky// The LLVM Compiler Infrastructure 4207618Srdivacky// 5207618Srdivacky// This file is distributed under the University of Illinois Open Source 6207618Srdivacky// License. See LICENSE.TXT for details. 7207618Srdivacky// 8207618Srdivacky//===----------------------------------------------------------------------===// 9207618Srdivacky// 10207618Srdivacky// This pass statically checks for common and easily-identified constructs 11207618Srdivacky// which produce undefined or likely unintended behavior in LLVM IR. 12207618Srdivacky// 13207618Srdivacky// It is not a guarantee of correctness, in two ways. First, it isn't 14207618Srdivacky// comprehensive. There are checks which could be done statically which are 15207618Srdivacky// not yet implemented. Some of these are indicated by TODO comments, but 16207618Srdivacky// those aren't comprehensive either. Second, many conditions cannot be 17207618Srdivacky// checked statically. This pass does no dynamic instrumentation, so it 18207618Srdivacky// can't check for all possible problems. 19207618Srdivacky// 20207618Srdivacky// Another limitation is that it assumes all code will be executed. A store 21207618Srdivacky// through a null pointer in a basic block which is never reached is harmless, 22210299Sed// but this pass will warn about it anyway. This is the main reason why most 23210299Sed// of these checks live here instead of in the Verifier pass. 24207618Srdivacky// 25207618Srdivacky// Optimization passes may make conditions that this pass checks for more or 26207618Srdivacky// less obvious. If an optimization pass appears to be introducing a warning, 27207618Srdivacky// it may be that the optimization pass is merely exposing an existing 28207618Srdivacky// condition in the code. 29207618Srdivacky// 30207618Srdivacky// This code may be run before instcombine. In many cases, instcombine checks 31207618Srdivacky// for the same kinds of things and turns instructions with undefined behavior 32207618Srdivacky// into unreachable (or equivalent). Because of this, this pass makes some 33207618Srdivacky// effort to look through bitcasts and so on. 34207618Srdivacky// 35207618Srdivacky//===----------------------------------------------------------------------===// 36207618Srdivacky 37249423Sdim#include "llvm/Analysis/Lint.h" 38249423Sdim#include "llvm/ADT/STLExtras.h" 39207618Srdivacky#include "llvm/Analysis/AliasAnalysis.h" 40210299Sed#include "llvm/Analysis/ConstantFolding.h" 41210299Sed#include "llvm/Analysis/Dominators.h" 42249423Sdim#include "llvm/Analysis/InstructionSimplify.h" 43210299Sed#include "llvm/Analysis/Loads.h" 44249423Sdim#include "llvm/Analysis/Passes.h" 45207618Srdivacky#include "llvm/Analysis/ValueTracking.h" 46207618Srdivacky#include "llvm/Assembly/Writer.h" 47249423Sdim#include "llvm/IR/DataLayout.h" 48249423Sdim#include "llvm/IR/Function.h" 49249423Sdim#include "llvm/IR/IntrinsicInst.h" 50249423Sdim#include "llvm/InstVisitor.h" 51207618Srdivacky#include "llvm/Pass.h" 52207618Srdivacky#include "llvm/PassManager.h" 53207618Srdivacky#include "llvm/Support/CallSite.h" 54207618Srdivacky#include "llvm/Support/Debug.h" 55207618Srdivacky#include "llvm/Support/raw_ostream.h" 56249423Sdim#include "llvm/Target/TargetLibraryInfo.h" 57207618Srdivackyusing namespace llvm; 58207618Srdivacky 59207618Srdivackynamespace { 60207618Srdivacky namespace MemRef { 61207618Srdivacky static unsigned Read = 1; 62207618Srdivacky static unsigned Write = 2; 63207618Srdivacky static unsigned Callee = 4; 64207618Srdivacky static unsigned Branchee = 8; 65207618Srdivacky } 66207618Srdivacky 67207618Srdivacky class Lint : public FunctionPass, public InstVisitor<Lint> { 68207618Srdivacky friend class InstVisitor<Lint>; 69207618Srdivacky 70207618Srdivacky void visitFunction(Function &F); 71207618Srdivacky 72207618Srdivacky void visitCallSite(CallSite CS); 73210299Sed void visitMemoryReference(Instruction &I, Value *Ptr, 74218893Sdim uint64_t Size, unsigned Align, 75226633Sdim Type *Ty, unsigned Flags); 76207618Srdivacky 77207618Srdivacky void visitCallInst(CallInst &I); 78207618Srdivacky void visitInvokeInst(InvokeInst &I); 79207618Srdivacky void visitReturnInst(ReturnInst &I); 80207618Srdivacky void visitLoadInst(LoadInst &I); 81207618Srdivacky void visitStoreInst(StoreInst &I); 82207618Srdivacky void visitXor(BinaryOperator &I); 83207618Srdivacky void visitSub(BinaryOperator &I); 84207618Srdivacky void visitLShr(BinaryOperator &I); 85207618Srdivacky void visitAShr(BinaryOperator &I); 86207618Srdivacky void visitShl(BinaryOperator &I); 87207618Srdivacky void visitSDiv(BinaryOperator &I); 88207618Srdivacky void visitUDiv(BinaryOperator &I); 89207618Srdivacky void visitSRem(BinaryOperator &I); 90207618Srdivacky void visitURem(BinaryOperator &I); 91207618Srdivacky void visitAllocaInst(AllocaInst &I); 92207618Srdivacky void visitVAArgInst(VAArgInst &I); 93207618Srdivacky void visitIndirectBrInst(IndirectBrInst &I); 94207618Srdivacky void visitExtractElementInst(ExtractElementInst &I); 95207618Srdivacky void visitInsertElementInst(InsertElementInst &I); 96207618Srdivacky void visitUnreachableInst(UnreachableInst &I); 97207618Srdivacky 98210299Sed Value *findValue(Value *V, bool OffsetOk) const; 99210299Sed Value *findValueImpl(Value *V, bool OffsetOk, 100210299Sed SmallPtrSet<Value *, 4> &Visited) const; 101210299Sed 102207618Srdivacky public: 103207618Srdivacky Module *Mod; 104207618Srdivacky AliasAnalysis *AA; 105210299Sed DominatorTree *DT; 106243830Sdim DataLayout *TD; 107234353Sdim TargetLibraryInfo *TLI; 108207618Srdivacky 109207618Srdivacky std::string Messages; 110207618Srdivacky raw_string_ostream MessagesStr; 111207618Srdivacky 112207618Srdivacky static char ID; // Pass identification, replacement for typeid 113218893Sdim Lint() : FunctionPass(ID), MessagesStr(Messages) { 114218893Sdim initializeLintPass(*PassRegistry::getPassRegistry()); 115218893Sdim } 116207618Srdivacky 117207618Srdivacky virtual bool runOnFunction(Function &F); 118207618Srdivacky 119207618Srdivacky virtual void getAnalysisUsage(AnalysisUsage &AU) const { 120207618Srdivacky AU.setPreservesAll(); 121207618Srdivacky AU.addRequired<AliasAnalysis>(); 122234353Sdim AU.addRequired<TargetLibraryInfo>(); 123210299Sed AU.addRequired<DominatorTree>(); 124207618Srdivacky } 125207618Srdivacky virtual void print(raw_ostream &O, const Module *M) const {} 126207618Srdivacky 127207618Srdivacky void WriteValue(const Value *V) { 128207618Srdivacky if (!V) return; 129207618Srdivacky if (isa<Instruction>(V)) { 130207618Srdivacky MessagesStr << *V << '\n'; 131207618Srdivacky } else { 132207618Srdivacky WriteAsOperand(MessagesStr, V, true, Mod); 133207618Srdivacky MessagesStr << '\n'; 134207618Srdivacky } 135207618Srdivacky } 136207618Srdivacky 137207618Srdivacky // CheckFailed - A check failed, so print out the condition and the message 138207618Srdivacky // that failed. This provides a nice place to put a breakpoint if you want 139207618Srdivacky // to see why something is not correct. 140207618Srdivacky void CheckFailed(const Twine &Message, 141207618Srdivacky const Value *V1 = 0, const Value *V2 = 0, 142207618Srdivacky const Value *V3 = 0, const Value *V4 = 0) { 143207618Srdivacky MessagesStr << Message.str() << "\n"; 144207618Srdivacky WriteValue(V1); 145207618Srdivacky WriteValue(V2); 146207618Srdivacky WriteValue(V3); 147207618Srdivacky WriteValue(V4); 148207618Srdivacky } 149207618Srdivacky }; 150207618Srdivacky} 151207618Srdivacky 152207618Srdivackychar Lint::ID = 0; 153218893SdimINITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR", 154218893Sdim false, true) 155234353SdimINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 156218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree) 157218893SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 158218893SdimINITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR", 159218893Sdim false, true) 160207618Srdivacky 161207618Srdivacky// Assert - We know that cond should be true, if not print an error message. 162207618Srdivacky#define Assert(C, M) \ 163207618Srdivacky do { if (!(C)) { CheckFailed(M); return; } } while (0) 164207618Srdivacky#define Assert1(C, M, V1) \ 165207618Srdivacky do { if (!(C)) { CheckFailed(M, V1); return; } } while (0) 166207618Srdivacky#define Assert2(C, M, V1, V2) \ 167207618Srdivacky do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0) 168207618Srdivacky#define Assert3(C, M, V1, V2, V3) \ 169207618Srdivacky do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0) 170207618Srdivacky#define Assert4(C, M, V1, V2, V3, V4) \ 171207618Srdivacky do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) 172207618Srdivacky 173207618Srdivacky// Lint::run - This is the main Analysis entry point for a 174207618Srdivacky// function. 175207618Srdivacky// 176207618Srdivackybool Lint::runOnFunction(Function &F) { 177207618Srdivacky Mod = F.getParent(); 178207618Srdivacky AA = &getAnalysis<AliasAnalysis>(); 179210299Sed DT = &getAnalysis<DominatorTree>(); 180243830Sdim TD = getAnalysisIfAvailable<DataLayout>(); 181234353Sdim TLI = &getAnalysis<TargetLibraryInfo>(); 182207618Srdivacky visit(F); 183207618Srdivacky dbgs() << MessagesStr.str(); 184208599Srdivacky Messages.clear(); 185207618Srdivacky return false; 186207618Srdivacky} 187207618Srdivacky 188207618Srdivackyvoid Lint::visitFunction(Function &F) { 189207618Srdivacky // This isn't undefined behavior, it's just a little unusual, and it's a 190207618Srdivacky // fairly common mistake to neglect to name a function. 191207618Srdivacky Assert1(F.hasName() || F.hasLocalLinkage(), 192207618Srdivacky "Unusual: Unnamed function with non-local linkage", &F); 193210299Sed 194210299Sed // TODO: Check for irreducible control flow. 195207618Srdivacky} 196207618Srdivacky 197207618Srdivackyvoid Lint::visitCallSite(CallSite CS) { 198207618Srdivacky Instruction &I = *CS.getInstruction(); 199207618Srdivacky Value *Callee = CS.getCalledValue(); 200207618Srdivacky 201218893Sdim visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, 202218893Sdim 0, 0, MemRef::Callee); 203207618Srdivacky 204210299Sed if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { 205207618Srdivacky Assert1(CS.getCallingConv() == F->getCallingConv(), 206207618Srdivacky "Undefined behavior: Caller and callee calling convention differ", 207207618Srdivacky &I); 208207618Srdivacky 209226633Sdim FunctionType *FT = F->getFunctionType(); 210263508Sdim unsigned NumActualArgs = CS.arg_size(); 211207618Srdivacky 212207618Srdivacky Assert1(FT->isVarArg() ? 213207618Srdivacky FT->getNumParams() <= NumActualArgs : 214207618Srdivacky FT->getNumParams() == NumActualArgs, 215207618Srdivacky "Undefined behavior: Call argument count mismatches callee " 216207618Srdivacky "argument count", &I); 217207618Srdivacky 218210299Sed Assert1(FT->getReturnType() == I.getType(), 219210299Sed "Undefined behavior: Call return type mismatches " 220210299Sed "callee return type", &I); 221207618Srdivacky 222210299Sed // Check argument types (in case the callee was casted) and attributes. 223210299Sed // TODO: Verify that caller and callee attributes are compatible. 224210299Sed Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); 225210299Sed CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 226210299Sed for (; AI != AE; ++AI) { 227210299Sed Value *Actual = *AI; 228210299Sed if (PI != PE) { 229210299Sed Argument *Formal = PI++; 230210299Sed Assert1(Formal->getType() == Actual->getType(), 231210299Sed "Undefined behavior: Call argument type mismatches " 232210299Sed "callee parameter type", &I); 233207618Srdivacky 234218893Sdim // Check that noalias arguments don't alias other arguments. This is 235218893Sdim // not fully precise because we don't know the sizes of the dereferenced 236218893Sdim // memory regions. 237210299Sed if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) 238218893Sdim for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) 239218893Sdim if (AI != BI && (*BI)->getType()->isPointerTy()) { 240218893Sdim AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI); 241218893Sdim Assert1(Result != AliasAnalysis::MustAlias && 242218893Sdim Result != AliasAnalysis::PartialAlias, 243218893Sdim "Unusual: noalias argument aliases another argument", &I); 244218893Sdim } 245210299Sed 246210299Sed // Check that an sret argument points to valid memory. 247210299Sed if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { 248226633Sdim Type *Ty = 249210299Sed cast<PointerType>(Formal->getType())->getElementType(); 250210299Sed visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), 251210299Sed TD ? TD->getABITypeAlignment(Ty) : 0, 252210299Sed Ty, MemRef::Read | MemRef::Write); 253210299Sed } 254210299Sed } 255210299Sed } 256207618Srdivacky } 257207618Srdivacky 258208599Srdivacky if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) 259208599Srdivacky for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); 260208599Srdivacky AI != AE; ++AI) { 261210299Sed Value *Obj = findValue(*AI, /*OffsetOk=*/true); 262210299Sed Assert1(!isa<AllocaInst>(Obj), 263208599Srdivacky "Undefined behavior: Call with \"tail\" keyword references " 264210299Sed "alloca", &I); 265208599Srdivacky } 266207618Srdivacky 267208599Srdivacky 268207618Srdivacky if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) 269207618Srdivacky switch (II->getIntrinsicID()) { 270207618Srdivacky default: break; 271207618Srdivacky 272207618Srdivacky // TODO: Check more intrinsics 273207618Srdivacky 274207618Srdivacky case Intrinsic::memcpy: { 275207618Srdivacky MemCpyInst *MCI = cast<MemCpyInst>(&I); 276210299Sed // TODO: If the size is known, use it. 277218893Sdim visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, 278218893Sdim MCI->getAlignment(), 0, 279207618Srdivacky MemRef::Write); 280218893Sdim visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, 281218893Sdim MCI->getAlignment(), 0, 282207618Srdivacky MemRef::Read); 283207618Srdivacky 284207618Srdivacky // Check that the memcpy arguments don't overlap. The AliasAnalysis API 285207618Srdivacky // isn't expressive enough for what we really want to do. Known partial 286207618Srdivacky // overlap is not distinguished from the case where nothing is known. 287218893Sdim uint64_t Size = 0; 288207618Srdivacky if (const ConstantInt *Len = 289210299Sed dyn_cast<ConstantInt>(findValue(MCI->getLength(), 290210299Sed /*OffsetOk=*/false))) 291207618Srdivacky if (Len->getValue().isIntN(32)) 292207618Srdivacky Size = Len->getValue().getZExtValue(); 293207618Srdivacky Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != 294207618Srdivacky AliasAnalysis::MustAlias, 295207618Srdivacky "Undefined behavior: memcpy source and destination overlap", &I); 296207618Srdivacky break; 297207618Srdivacky } 298207618Srdivacky case Intrinsic::memmove: { 299207618Srdivacky MemMoveInst *MMI = cast<MemMoveInst>(&I); 300210299Sed // TODO: If the size is known, use it. 301218893Sdim visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, 302218893Sdim MMI->getAlignment(), 0, 303207618Srdivacky MemRef::Write); 304218893Sdim visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, 305218893Sdim MMI->getAlignment(), 0, 306207618Srdivacky MemRef::Read); 307207618Srdivacky break; 308207618Srdivacky } 309207618Srdivacky case Intrinsic::memset: { 310207618Srdivacky MemSetInst *MSI = cast<MemSetInst>(&I); 311210299Sed // TODO: If the size is known, use it. 312218893Sdim visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, 313218893Sdim MSI->getAlignment(), 0, 314207618Srdivacky MemRef::Write); 315207618Srdivacky break; 316207618Srdivacky } 317207618Srdivacky 318207618Srdivacky case Intrinsic::vastart: 319207618Srdivacky Assert1(I.getParent()->getParent()->isVarArg(), 320207618Srdivacky "Undefined behavior: va_start called in a non-varargs function", 321207618Srdivacky &I); 322207618Srdivacky 323218893Sdim visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 324218893Sdim 0, 0, MemRef::Read | MemRef::Write); 325207618Srdivacky break; 326207618Srdivacky case Intrinsic::vacopy: 327218893Sdim visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 328218893Sdim 0, 0, MemRef::Write); 329218893Sdim visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, 330218893Sdim 0, 0, MemRef::Read); 331207618Srdivacky break; 332207618Srdivacky case Intrinsic::vaend: 333218893Sdim visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 334218893Sdim 0, 0, MemRef::Read | MemRef::Write); 335207618Srdivacky break; 336207618Srdivacky 337207618Srdivacky case Intrinsic::stackrestore: 338208599Srdivacky // Stackrestore doesn't read or write memory, but it sets the 339208599Srdivacky // stack pointer, which the compiler may read from or write to 340208599Srdivacky // at any time, so check it for both readability and writeability. 341218893Sdim visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, 342218893Sdim 0, 0, MemRef::Read | MemRef::Write); 343207618Srdivacky break; 344207618Srdivacky } 345207618Srdivacky} 346207618Srdivacky 347207618Srdivackyvoid Lint::visitCallInst(CallInst &I) { 348207618Srdivacky return visitCallSite(&I); 349207618Srdivacky} 350207618Srdivacky 351207618Srdivackyvoid Lint::visitInvokeInst(InvokeInst &I) { 352207618Srdivacky return visitCallSite(&I); 353207618Srdivacky} 354207618Srdivacky 355207618Srdivackyvoid Lint::visitReturnInst(ReturnInst &I) { 356207618Srdivacky Function *F = I.getParent()->getParent(); 357207618Srdivacky Assert1(!F->doesNotReturn(), 358207618Srdivacky "Unusual: Return statement in function with noreturn attribute", 359207618Srdivacky &I); 360210299Sed 361210299Sed if (Value *V = I.getReturnValue()) { 362210299Sed Value *Obj = findValue(V, /*OffsetOk=*/true); 363210299Sed Assert1(!isa<AllocaInst>(Obj), 364210299Sed "Unusual: Returning alloca value", &I); 365210299Sed } 366207618Srdivacky} 367207618Srdivacky 368210299Sed// TODO: Check that the reference is in bounds. 369210299Sed// TODO: Check readnone/readonly function attributes. 370207618Srdivackyvoid Lint::visitMemoryReference(Instruction &I, 371218893Sdim Value *Ptr, uint64_t Size, unsigned Align, 372226633Sdim Type *Ty, unsigned Flags) { 373210299Sed // If no memory is being referenced, it doesn't matter if the pointer 374210299Sed // is valid. 375210299Sed if (Size == 0) 376210299Sed return; 377210299Sed 378210299Sed Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); 379207618Srdivacky Assert1(!isa<ConstantPointerNull>(UnderlyingObject), 380207618Srdivacky "Undefined behavior: Null pointer dereference", &I); 381207618Srdivacky Assert1(!isa<UndefValue>(UnderlyingObject), 382207618Srdivacky "Undefined behavior: Undef pointer dereference", &I); 383210299Sed Assert1(!isa<ConstantInt>(UnderlyingObject) || 384210299Sed !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), 385210299Sed "Unusual: All-ones pointer dereference", &I); 386210299Sed Assert1(!isa<ConstantInt>(UnderlyingObject) || 387210299Sed !cast<ConstantInt>(UnderlyingObject)->isOne(), 388210299Sed "Unusual: Address one pointer dereference", &I); 389207618Srdivacky 390207618Srdivacky if (Flags & MemRef::Write) { 391207618Srdivacky if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject)) 392207618Srdivacky Assert1(!GV->isConstant(), 393207618Srdivacky "Undefined behavior: Write to read-only memory", &I); 394207618Srdivacky Assert1(!isa<Function>(UnderlyingObject) && 395207618Srdivacky !isa<BlockAddress>(UnderlyingObject), 396207618Srdivacky "Undefined behavior: Write to text section", &I); 397207618Srdivacky } 398207618Srdivacky if (Flags & MemRef::Read) { 399207618Srdivacky Assert1(!isa<Function>(UnderlyingObject), 400207618Srdivacky "Unusual: Load from function body", &I); 401207618Srdivacky Assert1(!isa<BlockAddress>(UnderlyingObject), 402207618Srdivacky "Undefined behavior: Load from block address", &I); 403207618Srdivacky } 404207618Srdivacky if (Flags & MemRef::Callee) { 405207618Srdivacky Assert1(!isa<BlockAddress>(UnderlyingObject), 406207618Srdivacky "Undefined behavior: Call to block address", &I); 407207618Srdivacky } 408207618Srdivacky if (Flags & MemRef::Branchee) { 409207618Srdivacky Assert1(!isa<Constant>(UnderlyingObject) || 410207618Srdivacky isa<BlockAddress>(UnderlyingObject), 411207618Srdivacky "Undefined behavior: Branch to non-blockaddress", &I); 412207618Srdivacky } 413207618Srdivacky 414243830Sdim // Check for buffer overflows and misalignment. 415249423Sdim // Only handles memory references that read/write something simple like an 416249423Sdim // alloca instruction or a global variable. 417249423Sdim int64_t Offset = 0; 418249423Sdim if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { 419249423Sdim // OK, so the access is to a constant offset from Ptr. Check that Ptr is 420249423Sdim // something we can handle and if so extract the size of this base object 421249423Sdim // along with its alignment. 422249423Sdim uint64_t BaseSize = AliasAnalysis::UnknownSize; 423249423Sdim unsigned BaseAlign = 0; 424207618Srdivacky 425249423Sdim if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { 426249423Sdim Type *ATy = AI->getAllocatedType(); 427249423Sdim if (TD && !AI->isArrayAllocation() && ATy->isSized()) 428249423Sdim BaseSize = TD->getTypeAllocSize(ATy); 429249423Sdim BaseAlign = AI->getAlignment(); 430249423Sdim if (TD && BaseAlign == 0 && ATy->isSized()) 431249423Sdim BaseAlign = TD->getABITypeAlignment(ATy); 432249423Sdim } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { 433249423Sdim // If the global may be defined differently in another compilation unit 434249423Sdim // then don't warn about funky memory accesses. 435249423Sdim if (GV->hasDefinitiveInitializer()) { 436249423Sdim Type *GTy = GV->getType()->getElementType(); 437249423Sdim if (TD && GTy->isSized()) 438249423Sdim BaseSize = TD->getTypeAllocSize(GTy); 439249423Sdim BaseAlign = GV->getAlignment(); 440249423Sdim if (TD && BaseAlign == 0 && GTy->isSized()) 441249423Sdim BaseAlign = TD->getABITypeAlignment(GTy); 442243830Sdim } 443249423Sdim } 444243830Sdim 445249423Sdim // Accesses from before the start or after the end of the object are not 446249423Sdim // defined. 447249423Sdim Assert1(Size == AliasAnalysis::UnknownSize || 448249423Sdim BaseSize == AliasAnalysis::UnknownSize || 449249423Sdim (Offset >= 0 && Offset + Size <= BaseSize), 450249423Sdim "Undefined behavior: Buffer overflow", &I); 451243830Sdim 452249423Sdim // Accesses that say that the memory is more aligned than it is are not 453249423Sdim // defined. 454249423Sdim if (TD && Align == 0 && Ty && Ty->isSized()) 455249423Sdim Align = TD->getABITypeAlignment(Ty); 456249423Sdim Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), 457249423Sdim "Undefined behavior: Memory reference address is misaligned", &I); 458207618Srdivacky } 459207618Srdivacky} 460207618Srdivacky 461207618Srdivackyvoid Lint::visitLoadInst(LoadInst &I) { 462210299Sed visitMemoryReference(I, I.getPointerOperand(), 463210299Sed AA->getTypeStoreSize(I.getType()), I.getAlignment(), 464210299Sed I.getType(), MemRef::Read); 465207618Srdivacky} 466207618Srdivacky 467207618Srdivackyvoid Lint::visitStoreInst(StoreInst &I) { 468210299Sed visitMemoryReference(I, I.getPointerOperand(), 469210299Sed AA->getTypeStoreSize(I.getOperand(0)->getType()), 470210299Sed I.getAlignment(), 471210299Sed I.getOperand(0)->getType(), MemRef::Write); 472207618Srdivacky} 473207618Srdivacky 474207618Srdivackyvoid Lint::visitXor(BinaryOperator &I) { 475207618Srdivacky Assert1(!isa<UndefValue>(I.getOperand(0)) || 476207618Srdivacky !isa<UndefValue>(I.getOperand(1)), 477207618Srdivacky "Undefined result: xor(undef, undef)", &I); 478207618Srdivacky} 479207618Srdivacky 480207618Srdivackyvoid Lint::visitSub(BinaryOperator &I) { 481207618Srdivacky Assert1(!isa<UndefValue>(I.getOperand(0)) || 482207618Srdivacky !isa<UndefValue>(I.getOperand(1)), 483207618Srdivacky "Undefined result: sub(undef, undef)", &I); 484207618Srdivacky} 485207618Srdivacky 486207618Srdivackyvoid Lint::visitLShr(BinaryOperator &I) { 487207618Srdivacky if (ConstantInt *CI = 488210299Sed dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 489207618Srdivacky Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 490207618Srdivacky "Undefined result: Shift count out of range", &I); 491207618Srdivacky} 492207618Srdivacky 493207618Srdivackyvoid Lint::visitAShr(BinaryOperator &I) { 494207618Srdivacky if (ConstantInt *CI = 495210299Sed dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 496207618Srdivacky Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 497207618Srdivacky "Undefined result: Shift count out of range", &I); 498207618Srdivacky} 499207618Srdivacky 500207618Srdivackyvoid Lint::visitShl(BinaryOperator &I) { 501207618Srdivacky if (ConstantInt *CI = 502210299Sed dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false))) 503207618Srdivacky Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()), 504207618Srdivacky "Undefined result: Shift count out of range", &I); 505207618Srdivacky} 506207618Srdivacky 507263508Sdimstatic bool isZero(Value *V, DataLayout *DL) { 508207618Srdivacky // Assume undef could be zero. 509263508Sdim if (isa<UndefValue>(V)) 510263508Sdim return true; 511207618Srdivacky 512263508Sdim VectorType *VecTy = dyn_cast<VectorType>(V->getType()); 513263508Sdim if (!VecTy) { 514263508Sdim unsigned BitWidth = V->getType()->getIntegerBitWidth(); 515263508Sdim APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 516263508Sdim ComputeMaskedBits(V, KnownZero, KnownOne, DL); 517263508Sdim return KnownZero.isAllOnesValue(); 518263508Sdim } 519263508Sdim 520263508Sdim // Per-component check doesn't work with zeroinitializer 521263508Sdim Constant *C = dyn_cast<Constant>(V); 522263508Sdim if (!C) 523263508Sdim return false; 524263508Sdim 525263508Sdim if (C->isZeroValue()) 526263508Sdim return true; 527263508Sdim 528263508Sdim // For a vector, KnownZero will only be true if all values are zero, so check 529263508Sdim // this per component 530263508Sdim unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); 531263508Sdim for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { 532263508Sdim Constant *Elem = C->getAggregateElement(I); 533263508Sdim if (isa<UndefValue>(Elem)) 534263508Sdim return true; 535263508Sdim 536263508Sdim APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); 537263508Sdim ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); 538263508Sdim if (KnownZero.isAllOnesValue()) 539263508Sdim return true; 540263508Sdim } 541263508Sdim 542263508Sdim return false; 543207618Srdivacky} 544207618Srdivacky 545207618Srdivackyvoid Lint::visitSDiv(BinaryOperator &I) { 546207618Srdivacky Assert1(!isZero(I.getOperand(1), TD), 547207618Srdivacky "Undefined behavior: Division by zero", &I); 548207618Srdivacky} 549207618Srdivacky 550207618Srdivackyvoid Lint::visitUDiv(BinaryOperator &I) { 551207618Srdivacky Assert1(!isZero(I.getOperand(1), TD), 552207618Srdivacky "Undefined behavior: Division by zero", &I); 553207618Srdivacky} 554207618Srdivacky 555207618Srdivackyvoid Lint::visitSRem(BinaryOperator &I) { 556207618Srdivacky Assert1(!isZero(I.getOperand(1), TD), 557207618Srdivacky "Undefined behavior: Division by zero", &I); 558207618Srdivacky} 559207618Srdivacky 560207618Srdivackyvoid Lint::visitURem(BinaryOperator &I) { 561207618Srdivacky Assert1(!isZero(I.getOperand(1), TD), 562207618Srdivacky "Undefined behavior: Division by zero", &I); 563207618Srdivacky} 564207618Srdivacky 565207618Srdivackyvoid Lint::visitAllocaInst(AllocaInst &I) { 566207618Srdivacky if (isa<ConstantInt>(I.getArraySize())) 567207618Srdivacky // This isn't undefined behavior, it's just an obvious pessimization. 568207618Srdivacky Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), 569207618Srdivacky "Pessimization: Static alloca outside of entry block", &I); 570210299Sed 571210299Sed // TODO: Check for an unusual size (MSB set?) 572207618Srdivacky} 573207618Srdivacky 574207618Srdivackyvoid Lint::visitVAArgInst(VAArgInst &I) { 575218893Sdim visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, 576207618Srdivacky MemRef::Read | MemRef::Write); 577207618Srdivacky} 578207618Srdivacky 579207618Srdivackyvoid Lint::visitIndirectBrInst(IndirectBrInst &I) { 580218893Sdim visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, 581218893Sdim MemRef::Branchee); 582212904Sdim 583212904Sdim Assert1(I.getNumDestinations() != 0, 584212904Sdim "Undefined behavior: indirectbr with no destinations", &I); 585207618Srdivacky} 586207618Srdivacky 587207618Srdivackyvoid Lint::visitExtractElementInst(ExtractElementInst &I) { 588207618Srdivacky if (ConstantInt *CI = 589210299Sed dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), 590210299Sed /*OffsetOk=*/false))) 591207618Srdivacky Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), 592207618Srdivacky "Undefined result: extractelement index out of range", &I); 593207618Srdivacky} 594207618Srdivacky 595207618Srdivackyvoid Lint::visitInsertElementInst(InsertElementInst &I) { 596207618Srdivacky if (ConstantInt *CI = 597210299Sed dyn_cast<ConstantInt>(findValue(I.getOperand(2), 598210299Sed /*OffsetOk=*/false))) 599207618Srdivacky Assert1(CI->getValue().ult(I.getType()->getNumElements()), 600207618Srdivacky "Undefined result: insertelement index out of range", &I); 601207618Srdivacky} 602207618Srdivacky 603207618Srdivackyvoid Lint::visitUnreachableInst(UnreachableInst &I) { 604207618Srdivacky // This isn't undefined behavior, it's merely suspicious. 605207618Srdivacky Assert1(&I == I.getParent()->begin() || 606207618Srdivacky prior(BasicBlock::iterator(&I))->mayHaveSideEffects(), 607207618Srdivacky "Unusual: unreachable immediately preceded by instruction without " 608207618Srdivacky "side effects", &I); 609207618Srdivacky} 610207618Srdivacky 611210299Sed/// findValue - Look through bitcasts and simple memory reference patterns 612210299Sed/// to identify an equivalent, but more informative, value. If OffsetOk 613210299Sed/// is true, look through getelementptrs with non-zero offsets too. 614210299Sed/// 615210299Sed/// Most analysis passes don't require this logic, because instcombine 616210299Sed/// will simplify most of these kinds of things away. But it's a goal of 617210299Sed/// this Lint pass to be useful even on non-optimized IR. 618210299SedValue *Lint::findValue(Value *V, bool OffsetOk) const { 619210299Sed SmallPtrSet<Value *, 4> Visited; 620210299Sed return findValueImpl(V, OffsetOk, Visited); 621210299Sed} 622210299Sed 623210299Sed/// findValueImpl - Implementation helper for findValue. 624210299SedValue *Lint::findValueImpl(Value *V, bool OffsetOk, 625210299Sed SmallPtrSet<Value *, 4> &Visited) const { 626210299Sed // Detect self-referential values. 627210299Sed if (!Visited.insert(V)) 628210299Sed return UndefValue::get(V->getType()); 629210299Sed 630210299Sed // TODO: Look through sext or zext cast, when the result is known to 631210299Sed // be interpreted as signed or unsigned, respectively. 632210299Sed // TODO: Look through eliminable cast pairs. 633210299Sed // TODO: Look through calls with unique return values. 634210299Sed // TODO: Look through vector insert/extract/shuffle. 635218893Sdim V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts(); 636210299Sed if (LoadInst *L = dyn_cast<LoadInst>(V)) { 637210299Sed BasicBlock::iterator BBI = L; 638210299Sed BasicBlock *BB = L->getParent(); 639210299Sed SmallPtrSet<BasicBlock *, 4> VisitedBlocks; 640210299Sed for (;;) { 641210299Sed if (!VisitedBlocks.insert(BB)) break; 642210299Sed if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), 643210299Sed BB, BBI, 6, AA)) 644210299Sed return findValueImpl(U, OffsetOk, Visited); 645210299Sed if (BBI != BB->begin()) break; 646210299Sed BB = BB->getUniquePredecessor(); 647210299Sed if (!BB) break; 648210299Sed BBI = BB->end(); 649210299Sed } 650210299Sed } else if (PHINode *PN = dyn_cast<PHINode>(V)) { 651218893Sdim if (Value *W = PN->hasConstantValue()) 652218893Sdim if (W != V) 653218893Sdim return findValueImpl(W, OffsetOk, Visited); 654210299Sed } else if (CastInst *CI = dyn_cast<CastInst>(V)) { 655210299Sed if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : 656210299Sed Type::getInt64Ty(V->getContext()))) 657210299Sed return findValueImpl(CI->getOperand(0), OffsetOk, Visited); 658210299Sed } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) { 659210299Sed if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), 660224145Sdim Ex->getIndices())) 661210299Sed if (W != V) 662210299Sed return findValueImpl(W, OffsetOk, Visited); 663210299Sed } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 664210299Sed // Same as above, but for ConstantExpr instead of Instruction. 665210299Sed if (Instruction::isCast(CE->getOpcode())) { 666210299Sed if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), 667210299Sed CE->getOperand(0)->getType(), 668210299Sed CE->getType(), 669210299Sed TD ? TD->getIntPtrType(V->getContext()) : 670210299Sed Type::getInt64Ty(V->getContext()))) 671210299Sed return findValueImpl(CE->getOperand(0), OffsetOk, Visited); 672210299Sed } else if (CE->getOpcode() == Instruction::ExtractValue) { 673221345Sdim ArrayRef<unsigned> Indices = CE->getIndices(); 674224145Sdim if (Value *W = FindInsertedValue(CE->getOperand(0), Indices)) 675210299Sed if (W != V) 676210299Sed return findValueImpl(W, OffsetOk, Visited); 677210299Sed } 678210299Sed } 679210299Sed 680210299Sed // As a last resort, try SimplifyInstruction or constant folding. 681210299Sed if (Instruction *Inst = dyn_cast<Instruction>(V)) { 682234353Sdim if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT)) 683218893Sdim return findValueImpl(W, OffsetOk, Visited); 684210299Sed } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { 685234353Sdim if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI)) 686210299Sed if (W != V) 687210299Sed return findValueImpl(W, OffsetOk, Visited); 688210299Sed } 689210299Sed 690210299Sed return V; 691210299Sed} 692210299Sed 693207618Srdivacky//===----------------------------------------------------------------------===// 694207618Srdivacky// Implement the public interfaces to this file... 695207618Srdivacky//===----------------------------------------------------------------------===// 696207618Srdivacky 697207618SrdivackyFunctionPass *llvm::createLintPass() { 698207618Srdivacky return new Lint(); 699207618Srdivacky} 700207618Srdivacky 701207618Srdivacky/// lintFunction - Check a function for errors, printing messages on stderr. 702207618Srdivacky/// 703207618Srdivackyvoid llvm::lintFunction(const Function &f) { 704207618Srdivacky Function &F = const_cast<Function&>(f); 705207618Srdivacky assert(!F.isDeclaration() && "Cannot lint external functions"); 706207618Srdivacky 707207618Srdivacky FunctionPassManager FPM(F.getParent()); 708207618Srdivacky Lint *V = new Lint(); 709207618Srdivacky FPM.add(V); 710207618Srdivacky FPM.run(F); 711207618Srdivacky} 712207618Srdivacky 713207618Srdivacky/// lintModule - Check a module for errors, printing messages on stderr. 714207618Srdivacky/// 715208599Srdivackyvoid llvm::lintModule(const Module &M) { 716207618Srdivacky PassManager PM; 717207618Srdivacky Lint *V = new Lint(); 718207618Srdivacky PM.add(V); 719207618Srdivacky PM.run(const_cast<Module&>(M)); 720207618Srdivacky} 721