1207618Srdivacky//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2207618Srdivacky//
3207618Srdivacky//                     The LLVM Compiler Infrastructure
4207618Srdivacky//
5207618Srdivacky// This file is distributed under the University of Illinois Open Source
6207618Srdivacky// License. See LICENSE.TXT for details.
7207618Srdivacky//
8207618Srdivacky//===----------------------------------------------------------------------===//
9207618Srdivacky//
10207618Srdivacky// This pass statically checks for common and easily-identified constructs
11207618Srdivacky// which produce undefined or likely unintended behavior in LLVM IR.
12207618Srdivacky//
13207618Srdivacky// It is not a guarantee of correctness, in two ways. First, it isn't
14207618Srdivacky// comprehensive. There are checks which could be done statically which are
15207618Srdivacky// not yet implemented. Some of these are indicated by TODO comments, but
16207618Srdivacky// those aren't comprehensive either. Second, many conditions cannot be
17207618Srdivacky// checked statically. This pass does no dynamic instrumentation, so it
18207618Srdivacky// can't check for all possible problems.
19207618Srdivacky//
20207618Srdivacky// Another limitation is that it assumes all code will be executed. A store
21207618Srdivacky// through a null pointer in a basic block which is never reached is harmless,
22210299Sed// but this pass will warn about it anyway. This is the main reason why most
23210299Sed// of these checks live here instead of in the Verifier pass.
24207618Srdivacky//
25207618Srdivacky// Optimization passes may make conditions that this pass checks for more or
26207618Srdivacky// less obvious. If an optimization pass appears to be introducing a warning,
27207618Srdivacky// it may be that the optimization pass is merely exposing an existing
28207618Srdivacky// condition in the code.
29207618Srdivacky//
30207618Srdivacky// This code may be run before instcombine. In many cases, instcombine checks
31207618Srdivacky// for the same kinds of things and turns instructions with undefined behavior
32207618Srdivacky// into unreachable (or equivalent). Because of this, this pass makes some
33207618Srdivacky// effort to look through bitcasts and so on.
34207618Srdivacky//
35207618Srdivacky//===----------------------------------------------------------------------===//
36207618Srdivacky
37249423Sdim#include "llvm/Analysis/Lint.h"
38249423Sdim#include "llvm/ADT/STLExtras.h"
39207618Srdivacky#include "llvm/Analysis/AliasAnalysis.h"
40210299Sed#include "llvm/Analysis/ConstantFolding.h"
41210299Sed#include "llvm/Analysis/Dominators.h"
42249423Sdim#include "llvm/Analysis/InstructionSimplify.h"
43210299Sed#include "llvm/Analysis/Loads.h"
44249423Sdim#include "llvm/Analysis/Passes.h"
45207618Srdivacky#include "llvm/Analysis/ValueTracking.h"
46207618Srdivacky#include "llvm/Assembly/Writer.h"
47249423Sdim#include "llvm/IR/DataLayout.h"
48249423Sdim#include "llvm/IR/Function.h"
49249423Sdim#include "llvm/IR/IntrinsicInst.h"
50249423Sdim#include "llvm/InstVisitor.h"
51207618Srdivacky#include "llvm/Pass.h"
52207618Srdivacky#include "llvm/PassManager.h"
53207618Srdivacky#include "llvm/Support/CallSite.h"
54207618Srdivacky#include "llvm/Support/Debug.h"
55207618Srdivacky#include "llvm/Support/raw_ostream.h"
56249423Sdim#include "llvm/Target/TargetLibraryInfo.h"
57207618Srdivackyusing namespace llvm;
58207618Srdivacky
59207618Srdivackynamespace {
60207618Srdivacky  namespace MemRef {
61207618Srdivacky    static unsigned Read     = 1;
62207618Srdivacky    static unsigned Write    = 2;
63207618Srdivacky    static unsigned Callee   = 4;
64207618Srdivacky    static unsigned Branchee = 8;
65207618Srdivacky  }
66207618Srdivacky
67207618Srdivacky  class Lint : public FunctionPass, public InstVisitor<Lint> {
68207618Srdivacky    friend class InstVisitor<Lint>;
69207618Srdivacky
70207618Srdivacky    void visitFunction(Function &F);
71207618Srdivacky
72207618Srdivacky    void visitCallSite(CallSite CS);
73210299Sed    void visitMemoryReference(Instruction &I, Value *Ptr,
74218893Sdim                              uint64_t Size, unsigned Align,
75226633Sdim                              Type *Ty, unsigned Flags);
76207618Srdivacky
77207618Srdivacky    void visitCallInst(CallInst &I);
78207618Srdivacky    void visitInvokeInst(InvokeInst &I);
79207618Srdivacky    void visitReturnInst(ReturnInst &I);
80207618Srdivacky    void visitLoadInst(LoadInst &I);
81207618Srdivacky    void visitStoreInst(StoreInst &I);
82207618Srdivacky    void visitXor(BinaryOperator &I);
83207618Srdivacky    void visitSub(BinaryOperator &I);
84207618Srdivacky    void visitLShr(BinaryOperator &I);
85207618Srdivacky    void visitAShr(BinaryOperator &I);
86207618Srdivacky    void visitShl(BinaryOperator &I);
87207618Srdivacky    void visitSDiv(BinaryOperator &I);
88207618Srdivacky    void visitUDiv(BinaryOperator &I);
89207618Srdivacky    void visitSRem(BinaryOperator &I);
90207618Srdivacky    void visitURem(BinaryOperator &I);
91207618Srdivacky    void visitAllocaInst(AllocaInst &I);
92207618Srdivacky    void visitVAArgInst(VAArgInst &I);
93207618Srdivacky    void visitIndirectBrInst(IndirectBrInst &I);
94207618Srdivacky    void visitExtractElementInst(ExtractElementInst &I);
95207618Srdivacky    void visitInsertElementInst(InsertElementInst &I);
96207618Srdivacky    void visitUnreachableInst(UnreachableInst &I);
97207618Srdivacky
98210299Sed    Value *findValue(Value *V, bool OffsetOk) const;
99210299Sed    Value *findValueImpl(Value *V, bool OffsetOk,
100210299Sed                         SmallPtrSet<Value *, 4> &Visited) const;
101210299Sed
102207618Srdivacky  public:
103207618Srdivacky    Module *Mod;
104207618Srdivacky    AliasAnalysis *AA;
105210299Sed    DominatorTree *DT;
106243830Sdim    DataLayout *TD;
107234353Sdim    TargetLibraryInfo *TLI;
108207618Srdivacky
109207618Srdivacky    std::string Messages;
110207618Srdivacky    raw_string_ostream MessagesStr;
111207618Srdivacky
112207618Srdivacky    static char ID; // Pass identification, replacement for typeid
113218893Sdim    Lint() : FunctionPass(ID), MessagesStr(Messages) {
114218893Sdim      initializeLintPass(*PassRegistry::getPassRegistry());
115218893Sdim    }
116207618Srdivacky
117207618Srdivacky    virtual bool runOnFunction(Function &F);
118207618Srdivacky
119207618Srdivacky    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
120207618Srdivacky      AU.setPreservesAll();
121207618Srdivacky      AU.addRequired<AliasAnalysis>();
122234353Sdim      AU.addRequired<TargetLibraryInfo>();
123210299Sed      AU.addRequired<DominatorTree>();
124207618Srdivacky    }
125207618Srdivacky    virtual void print(raw_ostream &O, const Module *M) const {}
126207618Srdivacky
127207618Srdivacky    void WriteValue(const Value *V) {
128207618Srdivacky      if (!V) return;
129207618Srdivacky      if (isa<Instruction>(V)) {
130207618Srdivacky        MessagesStr << *V << '\n';
131207618Srdivacky      } else {
132207618Srdivacky        WriteAsOperand(MessagesStr, V, true, Mod);
133207618Srdivacky        MessagesStr << '\n';
134207618Srdivacky      }
135207618Srdivacky    }
136207618Srdivacky
137207618Srdivacky    // CheckFailed - A check failed, so print out the condition and the message
138207618Srdivacky    // that failed.  This provides a nice place to put a breakpoint if you want
139207618Srdivacky    // to see why something is not correct.
140207618Srdivacky    void CheckFailed(const Twine &Message,
141207618Srdivacky                     const Value *V1 = 0, const Value *V2 = 0,
142207618Srdivacky                     const Value *V3 = 0, const Value *V4 = 0) {
143207618Srdivacky      MessagesStr << Message.str() << "\n";
144207618Srdivacky      WriteValue(V1);
145207618Srdivacky      WriteValue(V2);
146207618Srdivacky      WriteValue(V3);
147207618Srdivacky      WriteValue(V4);
148207618Srdivacky    }
149207618Srdivacky  };
150207618Srdivacky}
151207618Srdivacky
152207618Srdivackychar Lint::ID = 0;
153218893SdimINITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
154218893Sdim                      false, true)
155234353SdimINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
156218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree)
157218893SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis)
158218893SdimINITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
159218893Sdim                    false, true)
160207618Srdivacky
161207618Srdivacky// Assert - We know that cond should be true, if not print an error message.
162207618Srdivacky#define Assert(C, M) \
163207618Srdivacky    do { if (!(C)) { CheckFailed(M); return; } } while (0)
164207618Srdivacky#define Assert1(C, M, V1) \
165207618Srdivacky    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
166207618Srdivacky#define Assert2(C, M, V1, V2) \
167207618Srdivacky    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
168207618Srdivacky#define Assert3(C, M, V1, V2, V3) \
169207618Srdivacky    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
170207618Srdivacky#define Assert4(C, M, V1, V2, V3, V4) \
171207618Srdivacky    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
172207618Srdivacky
173207618Srdivacky// Lint::run - This is the main Analysis entry point for a
174207618Srdivacky// function.
175207618Srdivacky//
176207618Srdivackybool Lint::runOnFunction(Function &F) {
177207618Srdivacky  Mod = F.getParent();
178207618Srdivacky  AA = &getAnalysis<AliasAnalysis>();
179210299Sed  DT = &getAnalysis<DominatorTree>();
180243830Sdim  TD = getAnalysisIfAvailable<DataLayout>();
181234353Sdim  TLI = &getAnalysis<TargetLibraryInfo>();
182207618Srdivacky  visit(F);
183207618Srdivacky  dbgs() << MessagesStr.str();
184208599Srdivacky  Messages.clear();
185207618Srdivacky  return false;
186207618Srdivacky}
187207618Srdivacky
188207618Srdivackyvoid Lint::visitFunction(Function &F) {
189207618Srdivacky  // This isn't undefined behavior, it's just a little unusual, and it's a
190207618Srdivacky  // fairly common mistake to neglect to name a function.
191207618Srdivacky  Assert1(F.hasName() || F.hasLocalLinkage(),
192207618Srdivacky          "Unusual: Unnamed function with non-local linkage", &F);
193210299Sed
194210299Sed  // TODO: Check for irreducible control flow.
195207618Srdivacky}
196207618Srdivacky
197207618Srdivackyvoid Lint::visitCallSite(CallSite CS) {
198207618Srdivacky  Instruction &I = *CS.getInstruction();
199207618Srdivacky  Value *Callee = CS.getCalledValue();
200207618Srdivacky
201218893Sdim  visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
202218893Sdim                       0, 0, MemRef::Callee);
203207618Srdivacky
204210299Sed  if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
205207618Srdivacky    Assert1(CS.getCallingConv() == F->getCallingConv(),
206207618Srdivacky            "Undefined behavior: Caller and callee calling convention differ",
207207618Srdivacky            &I);
208207618Srdivacky
209226633Sdim    FunctionType *FT = F->getFunctionType();
210263508Sdim    unsigned NumActualArgs = CS.arg_size();
211207618Srdivacky
212207618Srdivacky    Assert1(FT->isVarArg() ?
213207618Srdivacky              FT->getNumParams() <= NumActualArgs :
214207618Srdivacky              FT->getNumParams() == NumActualArgs,
215207618Srdivacky            "Undefined behavior: Call argument count mismatches callee "
216207618Srdivacky            "argument count", &I);
217207618Srdivacky
218210299Sed    Assert1(FT->getReturnType() == I.getType(),
219210299Sed            "Undefined behavior: Call return type mismatches "
220210299Sed            "callee return type", &I);
221207618Srdivacky
222210299Sed    // Check argument types (in case the callee was casted) and attributes.
223210299Sed    // TODO: Verify that caller and callee attributes are compatible.
224210299Sed    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
225210299Sed    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
226210299Sed    for (; AI != AE; ++AI) {
227210299Sed      Value *Actual = *AI;
228210299Sed      if (PI != PE) {
229210299Sed        Argument *Formal = PI++;
230210299Sed        Assert1(Formal->getType() == Actual->getType(),
231210299Sed                "Undefined behavior: Call argument type mismatches "
232210299Sed                "callee parameter type", &I);
233207618Srdivacky
234218893Sdim        // Check that noalias arguments don't alias other arguments. This is
235218893Sdim        // not fully precise because we don't know the sizes of the dereferenced
236218893Sdim        // memory regions.
237210299Sed        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
238218893Sdim          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
239218893Sdim            if (AI != BI && (*BI)->getType()->isPointerTy()) {
240218893Sdim              AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
241218893Sdim              Assert1(Result != AliasAnalysis::MustAlias &&
242218893Sdim                      Result != AliasAnalysis::PartialAlias,
243218893Sdim                      "Unusual: noalias argument aliases another argument", &I);
244218893Sdim            }
245210299Sed
246210299Sed        // Check that an sret argument points to valid memory.
247210299Sed        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
248226633Sdim          Type *Ty =
249210299Sed            cast<PointerType>(Formal->getType())->getElementType();
250210299Sed          visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
251210299Sed                               TD ? TD->getABITypeAlignment(Ty) : 0,
252210299Sed                               Ty, MemRef::Read | MemRef::Write);
253210299Sed        }
254210299Sed      }
255210299Sed    }
256207618Srdivacky  }
257207618Srdivacky
258208599Srdivacky  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
259208599Srdivacky    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
260208599Srdivacky         AI != AE; ++AI) {
261210299Sed      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
262210299Sed      Assert1(!isa<AllocaInst>(Obj),
263208599Srdivacky              "Undefined behavior: Call with \"tail\" keyword references "
264210299Sed              "alloca", &I);
265208599Srdivacky    }
266207618Srdivacky
267208599Srdivacky
268207618Srdivacky  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
269207618Srdivacky    switch (II->getIntrinsicID()) {
270207618Srdivacky    default: break;
271207618Srdivacky
272207618Srdivacky    // TODO: Check more intrinsics
273207618Srdivacky
274207618Srdivacky    case Intrinsic::memcpy: {
275207618Srdivacky      MemCpyInst *MCI = cast<MemCpyInst>(&I);
276210299Sed      // TODO: If the size is known, use it.
277218893Sdim      visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
278218893Sdim                           MCI->getAlignment(), 0,
279207618Srdivacky                           MemRef::Write);
280218893Sdim      visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
281218893Sdim                           MCI->getAlignment(), 0,
282207618Srdivacky                           MemRef::Read);
283207618Srdivacky
284207618Srdivacky      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
285207618Srdivacky      // isn't expressive enough for what we really want to do. Known partial
286207618Srdivacky      // overlap is not distinguished from the case where nothing is known.
287218893Sdim      uint64_t Size = 0;
288207618Srdivacky      if (const ConstantInt *Len =
289210299Sed            dyn_cast<ConstantInt>(findValue(MCI->getLength(),
290210299Sed                                            /*OffsetOk=*/false)))
291207618Srdivacky        if (Len->getValue().isIntN(32))
292207618Srdivacky          Size = Len->getValue().getZExtValue();
293207618Srdivacky      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
294207618Srdivacky              AliasAnalysis::MustAlias,
295207618Srdivacky              "Undefined behavior: memcpy source and destination overlap", &I);
296207618Srdivacky      break;
297207618Srdivacky    }
298207618Srdivacky    case Intrinsic::memmove: {
299207618Srdivacky      MemMoveInst *MMI = cast<MemMoveInst>(&I);
300210299Sed      // TODO: If the size is known, use it.
301218893Sdim      visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
302218893Sdim                           MMI->getAlignment(), 0,
303207618Srdivacky                           MemRef::Write);
304218893Sdim      visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
305218893Sdim                           MMI->getAlignment(), 0,
306207618Srdivacky                           MemRef::Read);
307207618Srdivacky      break;
308207618Srdivacky    }
309207618Srdivacky    case Intrinsic::memset: {
310207618Srdivacky      MemSetInst *MSI = cast<MemSetInst>(&I);
311210299Sed      // TODO: If the size is known, use it.
312218893Sdim      visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
313218893Sdim                           MSI->getAlignment(), 0,
314207618Srdivacky                           MemRef::Write);
315207618Srdivacky      break;
316207618Srdivacky    }
317207618Srdivacky
318207618Srdivacky    case Intrinsic::vastart:
319207618Srdivacky      Assert1(I.getParent()->getParent()->isVarArg(),
320207618Srdivacky              "Undefined behavior: va_start called in a non-varargs function",
321207618Srdivacky              &I);
322207618Srdivacky
323218893Sdim      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
324218893Sdim                           0, 0, MemRef::Read | MemRef::Write);
325207618Srdivacky      break;
326207618Srdivacky    case Intrinsic::vacopy:
327218893Sdim      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
328218893Sdim                           0, 0, MemRef::Write);
329218893Sdim      visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
330218893Sdim                           0, 0, MemRef::Read);
331207618Srdivacky      break;
332207618Srdivacky    case Intrinsic::vaend:
333218893Sdim      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
334218893Sdim                           0, 0, MemRef::Read | MemRef::Write);
335207618Srdivacky      break;
336207618Srdivacky
337207618Srdivacky    case Intrinsic::stackrestore:
338208599Srdivacky      // Stackrestore doesn't read or write memory, but it sets the
339208599Srdivacky      // stack pointer, which the compiler may read from or write to
340208599Srdivacky      // at any time, so check it for both readability and writeability.
341218893Sdim      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
342218893Sdim                           0, 0, MemRef::Read | MemRef::Write);
343207618Srdivacky      break;
344207618Srdivacky    }
345207618Srdivacky}
346207618Srdivacky
347207618Srdivackyvoid Lint::visitCallInst(CallInst &I) {
348207618Srdivacky  return visitCallSite(&I);
349207618Srdivacky}
350207618Srdivacky
351207618Srdivackyvoid Lint::visitInvokeInst(InvokeInst &I) {
352207618Srdivacky  return visitCallSite(&I);
353207618Srdivacky}
354207618Srdivacky
355207618Srdivackyvoid Lint::visitReturnInst(ReturnInst &I) {
356207618Srdivacky  Function *F = I.getParent()->getParent();
357207618Srdivacky  Assert1(!F->doesNotReturn(),
358207618Srdivacky          "Unusual: Return statement in function with noreturn attribute",
359207618Srdivacky          &I);
360210299Sed
361210299Sed  if (Value *V = I.getReturnValue()) {
362210299Sed    Value *Obj = findValue(V, /*OffsetOk=*/true);
363210299Sed    Assert1(!isa<AllocaInst>(Obj),
364210299Sed            "Unusual: Returning alloca value", &I);
365210299Sed  }
366207618Srdivacky}
367207618Srdivacky
368210299Sed// TODO: Check that the reference is in bounds.
369210299Sed// TODO: Check readnone/readonly function attributes.
370207618Srdivackyvoid Lint::visitMemoryReference(Instruction &I,
371218893Sdim                                Value *Ptr, uint64_t Size, unsigned Align,
372226633Sdim                                Type *Ty, unsigned Flags) {
373210299Sed  // If no memory is being referenced, it doesn't matter if the pointer
374210299Sed  // is valid.
375210299Sed  if (Size == 0)
376210299Sed    return;
377210299Sed
378210299Sed  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
379207618Srdivacky  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
380207618Srdivacky          "Undefined behavior: Null pointer dereference", &I);
381207618Srdivacky  Assert1(!isa<UndefValue>(UnderlyingObject),
382207618Srdivacky          "Undefined behavior: Undef pointer dereference", &I);
383210299Sed  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
384210299Sed          !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
385210299Sed          "Unusual: All-ones pointer dereference", &I);
386210299Sed  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
387210299Sed          !cast<ConstantInt>(UnderlyingObject)->isOne(),
388210299Sed          "Unusual: Address one pointer dereference", &I);
389207618Srdivacky
390207618Srdivacky  if (Flags & MemRef::Write) {
391207618Srdivacky    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
392207618Srdivacky      Assert1(!GV->isConstant(),
393207618Srdivacky              "Undefined behavior: Write to read-only memory", &I);
394207618Srdivacky    Assert1(!isa<Function>(UnderlyingObject) &&
395207618Srdivacky            !isa<BlockAddress>(UnderlyingObject),
396207618Srdivacky            "Undefined behavior: Write to text section", &I);
397207618Srdivacky  }
398207618Srdivacky  if (Flags & MemRef::Read) {
399207618Srdivacky    Assert1(!isa<Function>(UnderlyingObject),
400207618Srdivacky            "Unusual: Load from function body", &I);
401207618Srdivacky    Assert1(!isa<BlockAddress>(UnderlyingObject),
402207618Srdivacky            "Undefined behavior: Load from block address", &I);
403207618Srdivacky  }
404207618Srdivacky  if (Flags & MemRef::Callee) {
405207618Srdivacky    Assert1(!isa<BlockAddress>(UnderlyingObject),
406207618Srdivacky            "Undefined behavior: Call to block address", &I);
407207618Srdivacky  }
408207618Srdivacky  if (Flags & MemRef::Branchee) {
409207618Srdivacky    Assert1(!isa<Constant>(UnderlyingObject) ||
410207618Srdivacky            isa<BlockAddress>(UnderlyingObject),
411207618Srdivacky            "Undefined behavior: Branch to non-blockaddress", &I);
412207618Srdivacky  }
413207618Srdivacky
414243830Sdim  // Check for buffer overflows and misalignment.
415249423Sdim  // Only handles memory references that read/write something simple like an
416249423Sdim  // alloca instruction or a global variable.
417249423Sdim  int64_t Offset = 0;
418249423Sdim  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) {
419249423Sdim    // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
420249423Sdim    // something we can handle and if so extract the size of this base object
421249423Sdim    // along with its alignment.
422249423Sdim    uint64_t BaseSize = AliasAnalysis::UnknownSize;
423249423Sdim    unsigned BaseAlign = 0;
424207618Srdivacky
425249423Sdim    if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
426249423Sdim      Type *ATy = AI->getAllocatedType();
427249423Sdim      if (TD && !AI->isArrayAllocation() && ATy->isSized())
428249423Sdim        BaseSize = TD->getTypeAllocSize(ATy);
429249423Sdim      BaseAlign = AI->getAlignment();
430249423Sdim      if (TD && BaseAlign == 0 && ATy->isSized())
431249423Sdim        BaseAlign = TD->getABITypeAlignment(ATy);
432249423Sdim    } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
433249423Sdim      // If the global may be defined differently in another compilation unit
434249423Sdim      // then don't warn about funky memory accesses.
435249423Sdim      if (GV->hasDefinitiveInitializer()) {
436249423Sdim        Type *GTy = GV->getType()->getElementType();
437249423Sdim        if (TD && GTy->isSized())
438249423Sdim          BaseSize = TD->getTypeAllocSize(GTy);
439249423Sdim        BaseAlign = GV->getAlignment();
440249423Sdim        if (TD && BaseAlign == 0 && GTy->isSized())
441249423Sdim          BaseAlign = TD->getABITypeAlignment(GTy);
442243830Sdim      }
443249423Sdim    }
444243830Sdim
445249423Sdim    // Accesses from before the start or after the end of the object are not
446249423Sdim    // defined.
447249423Sdim    Assert1(Size == AliasAnalysis::UnknownSize ||
448249423Sdim            BaseSize == AliasAnalysis::UnknownSize ||
449249423Sdim            (Offset >= 0 && Offset + Size <= BaseSize),
450249423Sdim            "Undefined behavior: Buffer overflow", &I);
451243830Sdim
452249423Sdim    // Accesses that say that the memory is more aligned than it is are not
453249423Sdim    // defined.
454249423Sdim    if (TD && Align == 0 && Ty && Ty->isSized())
455249423Sdim      Align = TD->getABITypeAlignment(Ty);
456249423Sdim    Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
457249423Sdim            "Undefined behavior: Memory reference address is misaligned", &I);
458207618Srdivacky  }
459207618Srdivacky}
460207618Srdivacky
461207618Srdivackyvoid Lint::visitLoadInst(LoadInst &I) {
462210299Sed  visitMemoryReference(I, I.getPointerOperand(),
463210299Sed                       AA->getTypeStoreSize(I.getType()), I.getAlignment(),
464210299Sed                       I.getType(), MemRef::Read);
465207618Srdivacky}
466207618Srdivacky
467207618Srdivackyvoid Lint::visitStoreInst(StoreInst &I) {
468210299Sed  visitMemoryReference(I, I.getPointerOperand(),
469210299Sed                       AA->getTypeStoreSize(I.getOperand(0)->getType()),
470210299Sed                       I.getAlignment(),
471210299Sed                       I.getOperand(0)->getType(), MemRef::Write);
472207618Srdivacky}
473207618Srdivacky
474207618Srdivackyvoid Lint::visitXor(BinaryOperator &I) {
475207618Srdivacky  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
476207618Srdivacky          !isa<UndefValue>(I.getOperand(1)),
477207618Srdivacky          "Undefined result: xor(undef, undef)", &I);
478207618Srdivacky}
479207618Srdivacky
480207618Srdivackyvoid Lint::visitSub(BinaryOperator &I) {
481207618Srdivacky  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
482207618Srdivacky          !isa<UndefValue>(I.getOperand(1)),
483207618Srdivacky          "Undefined result: sub(undef, undef)", &I);
484207618Srdivacky}
485207618Srdivacky
486207618Srdivackyvoid Lint::visitLShr(BinaryOperator &I) {
487207618Srdivacky  if (ConstantInt *CI =
488210299Sed        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
489207618Srdivacky    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
490207618Srdivacky            "Undefined result: Shift count out of range", &I);
491207618Srdivacky}
492207618Srdivacky
493207618Srdivackyvoid Lint::visitAShr(BinaryOperator &I) {
494207618Srdivacky  if (ConstantInt *CI =
495210299Sed        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
496207618Srdivacky    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
497207618Srdivacky            "Undefined result: Shift count out of range", &I);
498207618Srdivacky}
499207618Srdivacky
500207618Srdivackyvoid Lint::visitShl(BinaryOperator &I) {
501207618Srdivacky  if (ConstantInt *CI =
502210299Sed        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
503207618Srdivacky    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
504207618Srdivacky            "Undefined result: Shift count out of range", &I);
505207618Srdivacky}
506207618Srdivacky
507263508Sdimstatic bool isZero(Value *V, DataLayout *DL) {
508207618Srdivacky  // Assume undef could be zero.
509263508Sdim  if (isa<UndefValue>(V))
510263508Sdim    return true;
511207618Srdivacky
512263508Sdim  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
513263508Sdim  if (!VecTy) {
514263508Sdim    unsigned BitWidth = V->getType()->getIntegerBitWidth();
515263508Sdim    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
516263508Sdim    ComputeMaskedBits(V, KnownZero, KnownOne, DL);
517263508Sdim    return KnownZero.isAllOnesValue();
518263508Sdim  }
519263508Sdim
520263508Sdim  // Per-component check doesn't work with zeroinitializer
521263508Sdim  Constant *C = dyn_cast<Constant>(V);
522263508Sdim  if (!C)
523263508Sdim    return false;
524263508Sdim
525263508Sdim  if (C->isZeroValue())
526263508Sdim    return true;
527263508Sdim
528263508Sdim  // For a vector, KnownZero will only be true if all values are zero, so check
529263508Sdim  // this per component
530263508Sdim  unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
531263508Sdim  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
532263508Sdim    Constant *Elem = C->getAggregateElement(I);
533263508Sdim    if (isa<UndefValue>(Elem))
534263508Sdim      return true;
535263508Sdim
536263508Sdim    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
537263508Sdim    ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
538263508Sdim    if (KnownZero.isAllOnesValue())
539263508Sdim      return true;
540263508Sdim  }
541263508Sdim
542263508Sdim  return false;
543207618Srdivacky}
544207618Srdivacky
545207618Srdivackyvoid Lint::visitSDiv(BinaryOperator &I) {
546207618Srdivacky  Assert1(!isZero(I.getOperand(1), TD),
547207618Srdivacky          "Undefined behavior: Division by zero", &I);
548207618Srdivacky}
549207618Srdivacky
550207618Srdivackyvoid Lint::visitUDiv(BinaryOperator &I) {
551207618Srdivacky  Assert1(!isZero(I.getOperand(1), TD),
552207618Srdivacky          "Undefined behavior: Division by zero", &I);
553207618Srdivacky}
554207618Srdivacky
555207618Srdivackyvoid Lint::visitSRem(BinaryOperator &I) {
556207618Srdivacky  Assert1(!isZero(I.getOperand(1), TD),
557207618Srdivacky          "Undefined behavior: Division by zero", &I);
558207618Srdivacky}
559207618Srdivacky
560207618Srdivackyvoid Lint::visitURem(BinaryOperator &I) {
561207618Srdivacky  Assert1(!isZero(I.getOperand(1), TD),
562207618Srdivacky          "Undefined behavior: Division by zero", &I);
563207618Srdivacky}
564207618Srdivacky
565207618Srdivackyvoid Lint::visitAllocaInst(AllocaInst &I) {
566207618Srdivacky  if (isa<ConstantInt>(I.getArraySize()))
567207618Srdivacky    // This isn't undefined behavior, it's just an obvious pessimization.
568207618Srdivacky    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
569207618Srdivacky            "Pessimization: Static alloca outside of entry block", &I);
570210299Sed
571210299Sed  // TODO: Check for an unusual size (MSB set?)
572207618Srdivacky}
573207618Srdivacky
574207618Srdivackyvoid Lint::visitVAArgInst(VAArgInst &I) {
575218893Sdim  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
576207618Srdivacky                       MemRef::Read | MemRef::Write);
577207618Srdivacky}
578207618Srdivacky
579207618Srdivackyvoid Lint::visitIndirectBrInst(IndirectBrInst &I) {
580218893Sdim  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
581218893Sdim                       MemRef::Branchee);
582212904Sdim
583212904Sdim  Assert1(I.getNumDestinations() != 0,
584212904Sdim          "Undefined behavior: indirectbr with no destinations", &I);
585207618Srdivacky}
586207618Srdivacky
587207618Srdivackyvoid Lint::visitExtractElementInst(ExtractElementInst &I) {
588207618Srdivacky  if (ConstantInt *CI =
589210299Sed        dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
590210299Sed                                        /*OffsetOk=*/false)))
591207618Srdivacky    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
592207618Srdivacky            "Undefined result: extractelement index out of range", &I);
593207618Srdivacky}
594207618Srdivacky
595207618Srdivackyvoid Lint::visitInsertElementInst(InsertElementInst &I) {
596207618Srdivacky  if (ConstantInt *CI =
597210299Sed        dyn_cast<ConstantInt>(findValue(I.getOperand(2),
598210299Sed                                        /*OffsetOk=*/false)))
599207618Srdivacky    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
600207618Srdivacky            "Undefined result: insertelement index out of range", &I);
601207618Srdivacky}
602207618Srdivacky
603207618Srdivackyvoid Lint::visitUnreachableInst(UnreachableInst &I) {
604207618Srdivacky  // This isn't undefined behavior, it's merely suspicious.
605207618Srdivacky  Assert1(&I == I.getParent()->begin() ||
606207618Srdivacky          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
607207618Srdivacky          "Unusual: unreachable immediately preceded by instruction without "
608207618Srdivacky          "side effects", &I);
609207618Srdivacky}
610207618Srdivacky
611210299Sed/// findValue - Look through bitcasts and simple memory reference patterns
612210299Sed/// to identify an equivalent, but more informative, value.  If OffsetOk
613210299Sed/// is true, look through getelementptrs with non-zero offsets too.
614210299Sed///
615210299Sed/// Most analysis passes don't require this logic, because instcombine
616210299Sed/// will simplify most of these kinds of things away. But it's a goal of
617210299Sed/// this Lint pass to be useful even on non-optimized IR.
618210299SedValue *Lint::findValue(Value *V, bool OffsetOk) const {
619210299Sed  SmallPtrSet<Value *, 4> Visited;
620210299Sed  return findValueImpl(V, OffsetOk, Visited);
621210299Sed}
622210299Sed
623210299Sed/// findValueImpl - Implementation helper for findValue.
624210299SedValue *Lint::findValueImpl(Value *V, bool OffsetOk,
625210299Sed                           SmallPtrSet<Value *, 4> &Visited) const {
626210299Sed  // Detect self-referential values.
627210299Sed  if (!Visited.insert(V))
628210299Sed    return UndefValue::get(V->getType());
629210299Sed
630210299Sed  // TODO: Look through sext or zext cast, when the result is known to
631210299Sed  // be interpreted as signed or unsigned, respectively.
632210299Sed  // TODO: Look through eliminable cast pairs.
633210299Sed  // TODO: Look through calls with unique return values.
634210299Sed  // TODO: Look through vector insert/extract/shuffle.
635218893Sdim  V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
636210299Sed  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
637210299Sed    BasicBlock::iterator BBI = L;
638210299Sed    BasicBlock *BB = L->getParent();
639210299Sed    SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
640210299Sed    for (;;) {
641210299Sed      if (!VisitedBlocks.insert(BB)) break;
642210299Sed      if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
643210299Sed                                              BB, BBI, 6, AA))
644210299Sed        return findValueImpl(U, OffsetOk, Visited);
645210299Sed      if (BBI != BB->begin()) break;
646210299Sed      BB = BB->getUniquePredecessor();
647210299Sed      if (!BB) break;
648210299Sed      BBI = BB->end();
649210299Sed    }
650210299Sed  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
651218893Sdim    if (Value *W = PN->hasConstantValue())
652218893Sdim      if (W != V)
653218893Sdim        return findValueImpl(W, OffsetOk, Visited);
654210299Sed  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
655210299Sed    if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
656210299Sed                            Type::getInt64Ty(V->getContext())))
657210299Sed      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
658210299Sed  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
659210299Sed    if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
660224145Sdim                                     Ex->getIndices()))
661210299Sed      if (W != V)
662210299Sed        return findValueImpl(W, OffsetOk, Visited);
663210299Sed  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
664210299Sed    // Same as above, but for ConstantExpr instead of Instruction.
665210299Sed    if (Instruction::isCast(CE->getOpcode())) {
666210299Sed      if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
667210299Sed                               CE->getOperand(0)->getType(),
668210299Sed                               CE->getType(),
669210299Sed                               TD ? TD->getIntPtrType(V->getContext()) :
670210299Sed                                    Type::getInt64Ty(V->getContext())))
671210299Sed        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
672210299Sed    } else if (CE->getOpcode() == Instruction::ExtractValue) {
673221345Sdim      ArrayRef<unsigned> Indices = CE->getIndices();
674224145Sdim      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
675210299Sed        if (W != V)
676210299Sed          return findValueImpl(W, OffsetOk, Visited);
677210299Sed    }
678210299Sed  }
679210299Sed
680210299Sed  // As a last resort, try SimplifyInstruction or constant folding.
681210299Sed  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
682234353Sdim    if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
683218893Sdim      return findValueImpl(W, OffsetOk, Visited);
684210299Sed  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
685234353Sdim    if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
686210299Sed      if (W != V)
687210299Sed        return findValueImpl(W, OffsetOk, Visited);
688210299Sed  }
689210299Sed
690210299Sed  return V;
691210299Sed}
692210299Sed
693207618Srdivacky//===----------------------------------------------------------------------===//
694207618Srdivacky//  Implement the public interfaces to this file...
695207618Srdivacky//===----------------------------------------------------------------------===//
696207618Srdivacky
697207618SrdivackyFunctionPass *llvm::createLintPass() {
698207618Srdivacky  return new Lint();
699207618Srdivacky}
700207618Srdivacky
701207618Srdivacky/// lintFunction - Check a function for errors, printing messages on stderr.
702207618Srdivacky///
703207618Srdivackyvoid llvm::lintFunction(const Function &f) {
704207618Srdivacky  Function &F = const_cast<Function&>(f);
705207618Srdivacky  assert(!F.isDeclaration() && "Cannot lint external functions");
706207618Srdivacky
707207618Srdivacky  FunctionPassManager FPM(F.getParent());
708207618Srdivacky  Lint *V = new Lint();
709207618Srdivacky  FPM.add(V);
710207618Srdivacky  FPM.run(F);
711207618Srdivacky}
712207618Srdivacky
713207618Srdivacky/// lintModule - Check a module for errors, printing messages on stderr.
714207618Srdivacky///
715208599Srdivackyvoid llvm::lintModule(const Module &M) {
716207618Srdivacky  PassManager PM;
717207618Srdivacky  Lint *V = new Lint();
718207618Srdivacky  PM.add(V);
719207618Srdivacky  PM.run(const_cast<Module&>(M));
720207618Srdivacky}
721