1//===- PoisonChecking.cpp - -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implements a transform pass which instruments IR such that poison semantics
10// are made explicit.  That is, it provides a (possibly partial) executable
11// semantics for every instruction w.r.t. poison as specified in the LLVM
12// LangRef.  There are obvious parallels to the sanitizer tools, but this pass
13// is focused purely on the semantics of LLVM IR, not any particular source
14// language.   If you're looking for something to see if your C/C++ contains
15// UB, this is not it.
16//
17// The rewritten semantics of each instruction will include the following
18// components:
19//
20// 1) The original instruction, unmodified.
21// 2) A propagation rule which translates dynamic information about the poison
22//    state of each input to whether the dynamic output of the instruction
23//    produces poison.
24// 3) A creation rule which validates any poison producing flags on the
25//    instruction itself (e.g. checks for overflow on nsw).
26// 4) A check rule which traps (to a handler function) if this instruction must
27//    execute undefined behavior given the poison state of it's inputs.
28//
29// This is a must analysis based transform; that is, the resulting code may
30// produce a false negative result (not report UB when actually exists
31// according to the LangRef spec), but should never produce a false positive
32// (report UB where it doesn't exist).
33//
34// Use cases for this pass include:
35// - Understanding (and testing!) the implications of the definition of poison
36//   from the LangRef.
37// - Validating the output of a IR fuzzer to ensure that all programs produced
38//   are well defined on the specific input used.
39// - Finding/confirming poison specific miscompiles by checking the poison
40//   status of an input/IR pair is the same before and after an optimization
41//   transform.
42// - Checking that a bugpoint reduction does not introduce UB which didn't
43//   exist in the original program being reduced.
44//
45// The major sources of inaccuracy are currently:
46// - Most validation rules not yet implemented for instructions with poison
47//   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
48// - UB which is control dependent on a branch on poison is not yet
49//   reported. Currently, only data flow dependence is modeled.
50// - Poison which is propagated through memory is not modeled.  As such,
51//   storing poison to memory and then reloading it will cause a false negative
52//   as we consider the reloaded value to not be poisoned.
53// - Poison propagation across function boundaries is not modeled.  At the
54//   moment, all arguments and return values are assumed not to be poison.
55// - Undef is not modeled.  In particular, the optimizer's freedom to pick
56//   concrete values for undef bits so as to maximize potential for producing
57//   poison is not modeled.
58//
59//===----------------------------------------------------------------------===//
60
61#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
62#include "llvm/ADT/DenseMap.h"
63#include "llvm/Analysis/ValueTracking.h"
64#include "llvm/IR/IRBuilder.h"
65#include "llvm/Support/CommandLine.h"
66
67using namespace llvm;
68
69#define DEBUG_TYPE "poison-checking"
70
71static cl::opt<bool>
72LocalCheck("poison-checking-function-local",
73           cl::init(false),
74           cl::desc("Check that returns are non-poison (for testing)"));
75
76
77static bool isConstantFalse(Value* V) {
78  assert(V->getType()->isIntegerTy(1));
79  if (auto *CI = dyn_cast<ConstantInt>(V))
80    return CI->isZero();
81  return false;
82}
83
84static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
85  if (Ops.size() == 0)
86    return B.getFalse();
87  unsigned i = 0;
88  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
89  if (i == Ops.size())
90    return B.getFalse();
91  Value *Accum = Ops[i++];
92  for (Value *Op : llvm::drop_begin(Ops, i))
93    if (!isConstantFalse(Op))
94      Accum = B.CreateOr(Accum, Op);
95  return Accum;
96}
97
98static void generateCreationChecksForBinOp(Instruction &I,
99                                           SmallVectorImpl<Value*> &Checks) {
100  assert(isa<BinaryOperator>(I));
101
102  IRBuilder<> B(&I);
103  Value *LHS = I.getOperand(0);
104  Value *RHS = I.getOperand(1);
105  switch (I.getOpcode()) {
106  default:
107    return;
108  case Instruction::Add: {
109    if (I.hasNoSignedWrap()) {
110      auto *OverflowOp =
111        B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
112      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
113    }
114    if (I.hasNoUnsignedWrap()) {
115      auto *OverflowOp =
116        B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
117      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
118    }
119    break;
120  }
121  case Instruction::Sub: {
122    if (I.hasNoSignedWrap()) {
123      auto *OverflowOp =
124        B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
125      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
126    }
127    if (I.hasNoUnsignedWrap()) {
128      auto *OverflowOp =
129        B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
130      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
131    }
132    break;
133  }
134  case Instruction::Mul: {
135    if (I.hasNoSignedWrap()) {
136      auto *OverflowOp =
137        B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
138      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
139    }
140    if (I.hasNoUnsignedWrap()) {
141      auto *OverflowOp =
142        B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
143      Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
144    }
145    break;
146  }
147  case Instruction::UDiv: {
148    if (I.isExact()) {
149      auto *Check =
150        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
151                     ConstantInt::get(LHS->getType(), 0));
152      Checks.push_back(Check);
153    }
154    break;
155  }
156  case Instruction::SDiv: {
157    if (I.isExact()) {
158      auto *Check =
159        B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
160                     ConstantInt::get(LHS->getType(), 0));
161      Checks.push_back(Check);
162    }
163    break;
164  }
165  case Instruction::AShr:
166  case Instruction::LShr:
167  case Instruction::Shl: {
168    Value *ShiftCheck =
169      B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
170                   ConstantInt::get(RHS->getType(),
171                                    LHS->getType()->getScalarSizeInBits()));
172    Checks.push_back(ShiftCheck);
173    break;
174  }
175  };
176}
177
178/// Given an instruction which can produce poison on non-poison inputs
179/// (i.e. canCreatePoison returns true), generate runtime checks to produce
180/// boolean indicators of when poison would result.
181static void generateCreationChecks(Instruction &I,
182                                   SmallVectorImpl<Value*> &Checks) {
183  IRBuilder<> B(&I);
184  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
185    generateCreationChecksForBinOp(I, Checks);
186
187  // Handle non-binops separately
188  switch (I.getOpcode()) {
189  default:
190    // Note there are a couple of missing cases here, once implemented, this
191    // should become an llvm_unreachable.
192    break;
193  case Instruction::ExtractElement: {
194    Value *Vec = I.getOperand(0);
195    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
196    if (!VecVTy)
197      break;
198    Value *Idx = I.getOperand(1);
199    unsigned NumElts = VecVTy->getNumElements();
200    Value *Check =
201      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
202                   ConstantInt::get(Idx->getType(), NumElts));
203    Checks.push_back(Check);
204    break;
205  }
206  case Instruction::InsertElement: {
207    Value *Vec = I.getOperand(0);
208    auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
209    if (!VecVTy)
210      break;
211    Value *Idx = I.getOperand(2);
212    unsigned NumElts = VecVTy->getNumElements();
213    Value *Check =
214      B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
215                   ConstantInt::get(Idx->getType(), NumElts));
216    Checks.push_back(Check);
217    break;
218  }
219  };
220}
221
222static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
223  auto Itr = ValToPoison.find(V);
224  if (Itr != ValToPoison.end())
225    return Itr->second;
226  if (isa<Constant>(V)) {
227    return ConstantInt::getFalse(V->getContext());
228  }
229  // Return false for unknwon values - this implements a non-strict mode where
230  // unhandled IR constructs are simply considered to never produce poison.  At
231  // some point in the future, we probably want a "strict mode" for testing if
232  // nothing else.
233  return ConstantInt::getFalse(V->getContext());
234}
235
236static void CreateAssert(IRBuilder<> &B, Value *Cond) {
237  assert(Cond->getType()->isIntegerTy(1));
238  if (auto *CI = dyn_cast<ConstantInt>(Cond))
239    if (CI->isAllOnesValue())
240      return;
241
242  Module *M = B.GetInsertBlock()->getModule();
243  M->getOrInsertFunction("__poison_checker_assert",
244                         Type::getVoidTy(M->getContext()),
245                         Type::getInt1Ty(M->getContext()));
246  Function *TrapFunc = M->getFunction("__poison_checker_assert");
247  B.CreateCall(TrapFunc, Cond);
248}
249
250static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
251  assert(Cond->getType()->isIntegerTy(1));
252  CreateAssert(B, B.CreateNot(Cond));
253}
254
255static bool rewrite(Function &F) {
256  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
257
258  DenseMap<Value *, Value *> ValToPoison;
259
260  for (BasicBlock &BB : F)
261    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
262      auto *OldPHI = cast<PHINode>(&*I);
263      auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
264      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
265        NewPHI->addIncoming(UndefValue::get(Int1Ty),
266                            OldPHI->getIncomingBlock(i));
267      NewPHI->insertBefore(OldPHI);
268      ValToPoison[OldPHI] = NewPHI;
269    }
270
271  for (BasicBlock &BB : F)
272    for (Instruction &I : BB) {
273      if (isa<PHINode>(I)) continue;
274
275      IRBuilder<> B(cast<Instruction>(&I));
276
277      // Note: There are many more sources of documented UB, but this pass only
278      // attempts to find UB triggered by propagation of poison.
279      SmallVector<const Value *, 4> NonPoisonOps;
280      SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
281      getGuaranteedNonPoisonOps(&I, NonPoisonOps);
282      for (const Value *Op : NonPoisonOps)
283        if (SeenNonPoisonOps.insert(Op).second)
284          CreateAssertNot(B,
285                          getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
286
287      if (LocalCheck)
288        if (auto *RI = dyn_cast<ReturnInst>(&I))
289          if (RI->getNumOperands() != 0) {
290            Value *Op = RI->getOperand(0);
291            CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
292          }
293
294      SmallVector<Value*, 4> Checks;
295      for (const Use &U : I.operands()) {
296        if (ValToPoison.count(U) && propagatesPoison(U))
297          Checks.push_back(getPoisonFor(ValToPoison, U));
298      }
299
300      if (canCreatePoison(cast<Operator>(&I)))
301        generateCreationChecks(I, Checks);
302      ValToPoison[&I] = buildOrChain(B, Checks);
303    }
304
305  for (BasicBlock &BB : F)
306    for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
307      auto *OldPHI = cast<PHINode>(&*I);
308      if (!ValToPoison.count(OldPHI))
309        continue; // skip the newly inserted phis
310      auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
311      for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
312        auto *OldVal = OldPHI->getIncomingValue(i);
313        NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
314      }
315    }
316  return true;
317}
318
319
320PreservedAnalyses PoisonCheckingPass::run(Module &M,
321                                          ModuleAnalysisManager &AM) {
322  bool Changed = false;
323  for (auto &F : M)
324    Changed |= rewrite(F);
325
326  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
327}
328
329PreservedAnalyses PoisonCheckingPass::run(Function &F,
330                                          FunctionAnalysisManager &AM) {
331  return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
332}
333
334/* Major TODO Items:
335   - Control dependent poison UB
336   - Strict mode - (i.e. must analyze every operand)
337     - Poison through memory
338     - Function ABIs
339     - Full coverage of intrinsics, etc.. (ouch)
340
341   Instructions w/Unclear Semantics:
342   - shufflevector - It would seem reasonable for an out of bounds mask element
343     to produce poison, but the LangRef does not state.
344   - all binary ops w/vector operands - The likely interpretation would be that
345     any element overflowing should produce poison for the entire result, but
346     the LangRef does not state.
347   - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
348     strange that only certian flags should be documented as producing poison.
349
350   Cases of clear poison semantics not yet implemented:
351   - Exact flags on ashr/lshr produce poison
352   - NSW/NUW flags on shl produce poison
353   - Inbounds flag on getelementptr produce poison
354   - fptosi/fptoui (out of bounds input) produce poison
355   - Scalable vector types for insertelement/extractelement
356   - Floating point binary ops w/fmf nnan/noinfs flags produce poison
357 */
358