PoisonChecking.cpp revision 360784
1//===- PoisonChecking.cpp - -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements a transform pass which instruments IR such that poison semantics
10// are made explicit.  That is, it provides a (possibly partial) executable
11// semantics for every instruction w.r.t. poison as specified in the LLVM
12// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13// is focused purely on the semantics of LLVM IR, not any particular source
14// language.   If you're looking for something to see if your C/C++ contains
15// UB, this is not it.
16//
17// The rewritten semantics of each instruction will include the following
18// components:
19//
20// 1) The original instruction, unmodified.
21// 2) A propagation rule which translates dynamic information about the poison
22//    state of each input to whether the dynamic output of the instruction
23//    produces poison.
24// 3) A flag validation rule which validates any poison producing flags on the
25//    instruction itself (e.g. checks for overflow on nsw).
26// 4) A check rule which traps (to a handler function) if this instruction must
27//    execute undefined behavior given the poison state of it's inputs.
28//
29// At the moment, the UB detection is done in a best effort manner; that is,
30// the resulting code may produce a false negative result (not report UB when
31// it actually exists according to the LangRef spec), but should never produce
32// a false positive (report UB where it doesn't exist).  The intention is to
33// eventually support a "strict" mode which never dynamically reports a false
34// negative at the cost of rejecting some valid inputs to translation.
35//
36// Use cases for this pass include:
37// - Understanding (and testing!) the implications of the definition of poison
38//   from the LangRef.
39// - Validating the output of a IR fuzzer to ensure that all programs produced
40//   are well defined on the specific input used.
41// - Finding/confirming poison specific miscompiles by checking the poison
42//   status of an input/IR pair is the same before and after an optimization
43//   transform.
44// - Checking that a bugpoint reduction does not introduce UB which didn't
45//   exist in the original program being reduced.
46//
47// The major sources of inaccuracy are currently:
48// - Most validation rules not yet implemented for instructions with poison
49//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
50// - UB which is control dependent on a branch on poison is not yet
51//   reported. Currently, only data flow dependence is modeled.
52// - Poison which is propagated through memory is not modeled.  As such,
53//   storing poison to memory and then reloading it will cause a false negative
54//   as we consider the reloaded value to not be poisoned.
55// - Poison propagation across function boundaries is not modeled.  At the
56//   moment, all arguments and return values are assumed not to be poison.
57// - Undef is not modeled.  In particular, the optimizer's freedom to pick
58//   concrete values for undef bits so as to maximize potential for producing
59//   poison is not modeled.
60//
61//===----------------------------------------------------------------------===//
62
63#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
64#include "llvm/ADT/DenseMap.h"
65#include "llvm/ADT/Statistic.h"
66#include "llvm/Analysis/MemoryBuiltins.h"
67#include "llvm/Analysis/ValueTracking.h"
68#include "llvm/IR/IRBuilder.h"
69#include "llvm/IR/InstVisitor.h"
70#include "llvm/IR/IntrinsicInst.h"
71#include "llvm/IR/PatternMatch.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Debug.h"
74
75using namespace llvm;
76
77#define DEBUG_TYPE "poison-checking"
78
79static cl::opt<bool>
80LocalCheck("poison-checking-function-local",
81           cl::init(false),
82           cl::desc("Check that returns are non-poison (for testing)"));
83
84
85static bool isConstantFalse(Value* V) {
86  assert(V->getType()->isIntegerTy(1));
87  if (auto *CI = dyn_cast<ConstantInt>(V))
88    return CI->isZero();
89  return false;
90}
91
92static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
93  if (Ops.size() == 0)
94    return B.getFalse();
95  unsigned i = 0;
96  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
97  if (i == Ops.size())
98    return B.getFalse();
99  Value *Accum = Ops[i++];
100  for (; i < Ops.size(); i++)
101    if (!isConstantFalse(Ops[i]))
102      Accum = B.CreateOr(Accum, Ops[i]);
103  return Accum;
104}
105
106static void generatePoisonChecksForBinOp(Instruction &I,
107                                         SmallVector<Value*, 2> &Checks) {
108  assert(isa<BinaryOperator>(I));
109
110  IRBuilder<> B(&I);
111  Value *LHS = I.getOperand(0);
112  Value *RHS = I.getOperand(1);
113  switch (I.getOpcode()) {
114  default:
115    return;
116  case Instruction::Add: {
117    if (I.hasNoSignedWrap()) {
118      auto *OverflowOp =
119        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
120      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
121    }
122    if (I.hasNoUnsignedWrap()) {
123      auto *OverflowOp =
124        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
125      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
126    }
127    break;
128  }
129  case Instruction::Sub: {
130    if (I.hasNoSignedWrap()) {
131      auto *OverflowOp =
132        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
133      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
134    }
135    if (I.hasNoUnsignedWrap()) {
136      auto *OverflowOp =
137        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
138      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
139    }
140    break;
141  }
142  case Instruction::Mul: {
143    if (I.hasNoSignedWrap()) {
144      auto *OverflowOp =
145        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
146      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
147    }
148    if (I.hasNoUnsignedWrap()) {
149      auto *OverflowOp =
150        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
151      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
152    }
153    break;
154  }
155  case Instruction::UDiv: {
156    if (I.isExact()) {
157      auto *Check =
158        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
159                     ConstantInt::get(LHS->getType(), 0));
160      Checks.push_back(Check);
161    }
162    break;
163  }
164  case Instruction::SDiv: {
165    if (I.isExact()) {
166      auto *Check =
167        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
168                     ConstantInt::get(LHS->getType(), 0));
169      Checks.push_back(Check);
170    }
171    break;
172  }
173  case Instruction::AShr:
174  case Instruction::LShr:
175  case Instruction::Shl: {
176    Value *ShiftCheck =
177      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
178                   ConstantInt::get(RHS->getType(),
179                                    LHS->getType()->getScalarSizeInBits()));
180    Checks.push_back(ShiftCheck);
181    break;
182  }
183  };
184}
185
186static Value* generatePoisonChecks(Instruction &I) {
187  IRBuilder<> B(&I);
188  SmallVector<Value*, 2> Checks;
189  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
190    generatePoisonChecksForBinOp(I, Checks);
191
192  // Handle non-binops seperately
193  switch (I.getOpcode()) {
194  default:
195    break;
196  case Instruction::ExtractElement: {
197    Value *Vec = I.getOperand(0);
198    if (Vec->getType()->getVectorIsScalable())
199      break;
200    Value *Idx = I.getOperand(1);
201    unsigned NumElts = Vec->getType()->getVectorNumElements();
202    Value *Check =
203      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
204                   ConstantInt::get(Idx->getType(), NumElts));
205    Checks.push_back(Check);
206    break;
207  }
208  case Instruction::InsertElement: {
209    Value *Vec = I.getOperand(0);
210    if (Vec->getType()->getVectorIsScalable())
211      break;
212    Value *Idx = I.getOperand(2);
213    unsigned NumElts = Vec->getType()->getVectorNumElements();
214    Value *Check =
215      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
216                   ConstantInt::get(Idx->getType(), NumElts));
217    Checks.push_back(Check);
218    break;
219  }
220  };
221  return buildOrChain(B, Checks);
222}
223
224static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
225  auto Itr = ValToPoison.find(V);
226  if (Itr != ValToPoison.end())
227    return Itr->second;
228  if (isa<Constant>(V)) {
229    return ConstantInt::getFalse(V->getContext());
230  }
231  // Return false for unknwon values - this implements a non-strict mode where
232  // unhandled IR constructs are simply considered to never produce poison.  At
233  // some point in the future, we probably want a "strict mode" for testing if
234  // nothing else.
235  return ConstantInt::getFalse(V->getContext());
236}
237
238static void CreateAssert(IRBuilder<> &B, Value *Cond) {
239  assert(Cond->getType()->isIntegerTy(1));
240  if (auto *CI = dyn_cast<ConstantInt>(Cond))
241    if (CI->isAllOnesValue())
242      return;
243
244  Module *M = B.GetInsertBlock()->getModule();
245  M->getOrInsertFunction("__poison_checker_assert",
246                         Type::getVoidTy(M->getContext()),
247                         Type::getInt1Ty(M->getContext()));
248  Function *TrapFunc = M->getFunction("__poison_checker_assert");
249  B.CreateCall(TrapFunc, Cond);
250}
251
252static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
253  assert(Cond->getType()->isIntegerTy(1));
254  CreateAssert(B, B.CreateNot(Cond));
255}
256
257static bool rewrite(Function &F) {
258  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
259
260  DenseMap<Value *, Value *> ValToPoison;
261
262  for (BasicBlock &BB : F)
263    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
264      auto *OldPHI = cast<PHINode>(&*I);
265      auto *NewPHI = PHINode::Create(Int1Ty,
266                                     OldPHI->getNumIncomingValues());
267      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
268        NewPHI->addIncoming(UndefValue::get(Int1Ty),
269                            OldPHI->getIncomingBlock(i));
270      NewPHI->insertBefore(OldPHI);
271      ValToPoison[OldPHI] = NewPHI;
272    }
273
274  for (BasicBlock &BB : F)
275    for (Instruction &I : BB) {
276      if (isa<PHINode>(I)) continue;
277
278      IRBuilder<> B(cast<Instruction>(&I));
279
280      // Note: There are many more sources of documented UB, but this pass only
281      // attempts to find UB triggered by propagation of poison.
282      if (Value *Op = const_cast<Value*>(getGuaranteedNonFullPoisonOp(&I)))
283        CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
284
285      if (LocalCheck)
286        if (auto *RI = dyn_cast<ReturnInst>(&I))
287          if (RI->getNumOperands() != 0) {
288            Value *Op = RI->getOperand(0);
289            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
290          }
291
292      SmallVector<Value*, 4> Checks;
293      if (propagatesFullPoison(&I))
294        for (Value *V : I.operands())
295          Checks.push_back(getPoisonFor(ValToPoison, V));
296
297      if (auto *Check = generatePoisonChecks(I))
298        Checks.push_back(Check);
299      ValToPoison[&I] = buildOrChain(B, Checks);
300    }
301
302  for (BasicBlock &BB : F)
303    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
304      auto *OldPHI = cast<PHINode>(&*I);
305      if (!ValToPoison.count(OldPHI))
306        continue; // skip the newly inserted phis
307      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
308      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
309        auto *OldVal = OldPHI->getIncomingValue(i);
310        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
311      }
312    }
313  return true;
314}
315
316
317PreservedAnalyses PoisonCheckingPass::run(Module &M,
318                                          ModuleAnalysisManager &AM) {
319  bool Changed = false;
320  for (auto &F : M)
321    Changed |= rewrite(F);
322
323  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
324}
325
326PreservedAnalyses PoisonCheckingPass::run(Function &F,
327                                          FunctionAnalysisManager &AM) {
328  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
329}
330
331
332/* Major TODO Items:
333   - Control dependent poison UB
334   - Strict mode - (i.e. must analyze every operand)
335     - Poison through memory
336     - Function ABIs
337     - Full coverage of intrinsics, etc.. (ouch)
338
339   Instructions w/Unclear Semantics:
340   - shufflevector - It would seem reasonable for an out of bounds mask element
341     to produce poison, but the LangRef does not state.
342   - and/or - It would seem reasonable for poison to propagate from both
343     arguments, but LangRef doesn't state and propagatesFullPoison doesn't
344     include these two.
345   - all binary ops w/vector operands - The likely interpretation would be that
346     any element overflowing should produce poison for the entire result, but
347     the LangRef does not state.
348   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
349     strange that only certian flags should be documented as producing poison.
350
351   Cases of clear poison semantics not yet implemented:
352   - Exact flags on ashr/lshr produce poison
353   - NSW/NUW flags on shl produce poison
354   - Inbounds flag on getelementptr produce poison
355   - fptosi/fptoui (out of bounds input) produce poison
356   - Scalable vector types for insertelement/extractelement
357   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
358 */
359