1//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11/// analysis.
12///
13/// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14/// class of bugs on its own.  Instead, it provides a generic dynamic data flow
15/// analysis framework to be used by clients to help detect application-specific
16/// issues within their own code.
17///
18/// The analysis is based on automatic propagation of data flow labels (also
19/// known as taint labels) through a program as it performs computation.  Each
20/// byte of application memory is backed by two bytes of shadow memory which
21/// hold the label.  On Linux/x86_64, memory is laid out as follows:
22///
23/// +--------------------+ 0x800000000000 (top of memory)
24/// | application memory |
25/// +--------------------+ 0x700000008000 (kAppAddr)
26/// |                    |
27/// |       unused       |
28/// |                    |
29/// +--------------------+ 0x200200000000 (kUnusedAddr)
30/// |    union table     |
31/// +--------------------+ 0x200000000000 (kUnionTableAddr)
32/// |   shadow memory    |
33/// +--------------------+ 0x000000010000 (kShadowAddr)
34/// | reserved by kernel |
35/// +--------------------+ 0x000000000000
36///
37/// To derive a shadow memory address from an application memory address,
38/// bits 44-46 are cleared to bring the address into the range
39/// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
40/// account for the double byte representation of shadow labels and move the
41/// address into the shadow memory range.  See the function
42/// DataFlowSanitizer::getShadowAddress below.
43///
44/// For more information, please refer to the design document:
45/// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
46//
47//===----------------------------------------------------------------------===//
48
49#include "llvm/ADT/DenseMap.h"
50#include "llvm/ADT/DenseSet.h"
51#include "llvm/ADT/DepthFirstIterator.h"
52#include "llvm/ADT/None.h"
53#include "llvm/ADT/SmallPtrSet.h"
54#include "llvm/ADT/SmallVector.h"
55#include "llvm/ADT/StringExtras.h"
56#include "llvm/ADT/StringRef.h"
57#include "llvm/ADT/Triple.h"
58#include "llvm/Analysis/ValueTracking.h"
59#include "llvm/IR/Argument.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/BasicBlock.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/DataLayout.h"
65#include "llvm/IR/DerivedTypes.h"
66#include "llvm/IR/Dominators.h"
67#include "llvm/IR/Function.h"
68#include "llvm/IR/GlobalAlias.h"
69#include "llvm/IR/GlobalValue.h"
70#include "llvm/IR/GlobalVariable.h"
71#include "llvm/IR/IRBuilder.h"
72#include "llvm/IR/InlineAsm.h"
73#include "llvm/IR/InstVisitor.h"
74#include "llvm/IR/InstrTypes.h"
75#include "llvm/IR/Instruction.h"
76#include "llvm/IR/Instructions.h"
77#include "llvm/IR/IntrinsicInst.h"
78#include "llvm/IR/LLVMContext.h"
79#include "llvm/IR/MDBuilder.h"
80#include "llvm/IR/Module.h"
81#include "llvm/IR/Type.h"
82#include "llvm/IR/User.h"
83#include "llvm/IR/Value.h"
84#include "llvm/InitializePasses.h"
85#include "llvm/Pass.h"
86#include "llvm/Support/Casting.h"
87#include "llvm/Support/CommandLine.h"
88#include "llvm/Support/ErrorHandling.h"
89#include "llvm/Support/SpecialCaseList.h"
90#include "llvm/Support/VirtualFileSystem.h"
91#include "llvm/Transforms/Instrumentation.h"
92#include "llvm/Transforms/Utils/BasicBlockUtils.h"
93#include "llvm/Transforms/Utils/Local.h"
94#include <algorithm>
95#include <cassert>
96#include <cstddef>
97#include <cstdint>
98#include <iterator>
99#include <memory>
100#include <set>
101#include <string>
102#include <utility>
103#include <vector>
104
105using namespace llvm;
106
107// External symbol to be used when generating the shadow address for
108// architectures with multiple VMAs. Instead of using a constant integer
109// the runtime will set the external mask based on the VMA range.
110static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
111
112// The -dfsan-preserve-alignment flag controls whether this pass assumes that
113// alignment requirements provided by the input IR are correct.  For example,
114// if the input IR contains a load with alignment 8, this flag will cause
115// the shadow load to have alignment 16.  This flag is disabled by default as
116// we have unfortunately encountered too much code (including Clang itself;
117// see PR14291) which performs misaligned access.
118static cl::opt<bool> ClPreserveAlignment(
119    "dfsan-preserve-alignment",
120    cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
121    cl::init(false));
122
123// The ABI list files control how shadow parameters are passed. The pass treats
124// every function labelled "uninstrumented" in the ABI list file as conforming
125// to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
126// additional annotations for those functions, a call to one of those functions
127// will produce a warning message, as the labelling behaviour of the function is
128// unknown.  The other supported annotations are "functional" and "discard",
129// which are described below under DataFlowSanitizer::WrapperKind.
130static cl::list<std::string> ClABIListFiles(
131    "dfsan-abilist",
132    cl::desc("File listing native ABI functions and how the pass treats them"),
133    cl::Hidden);
134
135// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
136// functions (see DataFlowSanitizer::InstrumentedABI below).
137static cl::opt<bool> ClArgsABI(
138    "dfsan-args-abi",
139    cl::desc("Use the argument ABI rather than the TLS ABI"),
140    cl::Hidden);
141
142// Controls whether the pass includes or ignores the labels of pointers in load
143// instructions.
144static cl::opt<bool> ClCombinePointerLabelsOnLoad(
145    "dfsan-combine-pointer-labels-on-load",
146    cl::desc("Combine the label of the pointer with the label of the data when "
147             "loading from memory."),
148    cl::Hidden, cl::init(true));
149
150// Controls whether the pass includes or ignores the labels of pointers in
151// stores instructions.
152static cl::opt<bool> ClCombinePointerLabelsOnStore(
153    "dfsan-combine-pointer-labels-on-store",
154    cl::desc("Combine the label of the pointer with the label of the data when "
155             "storing in memory."),
156    cl::Hidden, cl::init(false));
157
158static cl::opt<bool> ClDebugNonzeroLabels(
159    "dfsan-debug-nonzero-labels",
160    cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
161             "load or return with a nonzero label"),
162    cl::Hidden);
163
164// Experimental feature that inserts callbacks for certain data events.
165// Currently callbacks are only inserted for loads, stores, memory transfers
166// (i.e. memcpy and memmove), and comparisons.
167//
168// If this flag is set to true, the user must provide definitions for the
169// following callback functions:
170//   void __dfsan_load_callback(dfsan_label Label);
171//   void __dfsan_store_callback(dfsan_label Label);
172//   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
173//   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
174static cl::opt<bool> ClEventCallbacks(
175    "dfsan-event-callbacks",
176    cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
177    cl::Hidden, cl::init(false));
178
179static StringRef GetGlobalTypeString(const GlobalValue &G) {
180  // Types of GlobalVariables are always pointer types.
181  Type *GType = G.getValueType();
182  // For now we support excluding struct types only.
183  if (StructType *SGType = dyn_cast<StructType>(GType)) {
184    if (!SGType->isLiteral())
185      return SGType->getName();
186  }
187  return "<unknown type>";
188}
189
190namespace {
191
192class DFSanABIList {
193  std::unique_ptr<SpecialCaseList> SCL;
194
195 public:
196  DFSanABIList() = default;
197
198  void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
199
200  /// Returns whether either this function or its source file are listed in the
201  /// given category.
202  bool isIn(const Function &F, StringRef Category) const {
203    return isIn(*F.getParent(), Category) ||
204           SCL->inSection("dataflow", "fun", F.getName(), Category);
205  }
206
207  /// Returns whether this global alias is listed in the given category.
208  ///
209  /// If GA aliases a function, the alias's name is matched as a function name
210  /// would be.  Similarly, aliases of globals are matched like globals.
211  bool isIn(const GlobalAlias &GA, StringRef Category) const {
212    if (isIn(*GA.getParent(), Category))
213      return true;
214
215    if (isa<FunctionType>(GA.getValueType()))
216      return SCL->inSection("dataflow", "fun", GA.getName(), Category);
217
218    return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
219           SCL->inSection("dataflow", "type", GetGlobalTypeString(GA),
220                          Category);
221  }
222
223  /// Returns whether this module is listed in the given category.
224  bool isIn(const Module &M, StringRef Category) const {
225    return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
226  }
227};
228
229/// TransformedFunction is used to express the result of transforming one
230/// function type into another.  This struct is immutable.  It holds metadata
231/// useful for updating calls of the old function to the new type.
232struct TransformedFunction {
233  TransformedFunction(FunctionType* OriginalType,
234                      FunctionType* TransformedType,
235                      std::vector<unsigned> ArgumentIndexMapping)
236      : OriginalType(OriginalType),
237        TransformedType(TransformedType),
238        ArgumentIndexMapping(ArgumentIndexMapping) {}
239
240  // Disallow copies.
241  TransformedFunction(const TransformedFunction&) = delete;
242  TransformedFunction& operator=(const TransformedFunction&) = delete;
243
244  // Allow moves.
245  TransformedFunction(TransformedFunction&&) = default;
246  TransformedFunction& operator=(TransformedFunction&&) = default;
247
248  /// Type of the function before the transformation.
249  FunctionType *OriginalType;
250
251  /// Type of the function after the transformation.
252  FunctionType *TransformedType;
253
254  /// Transforming a function may change the position of arguments.  This
255  /// member records the mapping from each argument's old position to its new
256  /// position.  Argument positions are zero-indexed.  If the transformation
257  /// from F to F' made the first argument of F into the third argument of F',
258  /// then ArgumentIndexMapping[0] will equal 2.
259  std::vector<unsigned> ArgumentIndexMapping;
260};
261
262/// Given function attributes from a call site for the original function,
263/// return function attributes appropriate for a call to the transformed
264/// function.
265AttributeList TransformFunctionAttributes(
266    const TransformedFunction& TransformedFunction,
267    LLVMContext& Ctx, AttributeList CallSiteAttrs) {
268
269  // Construct a vector of AttributeSet for each function argument.
270  std::vector<llvm::AttributeSet> ArgumentAttributes(
271      TransformedFunction.TransformedType->getNumParams());
272
273  // Copy attributes from the parameter of the original function to the
274  // transformed version.  'ArgumentIndexMapping' holds the mapping from
275  // old argument position to new.
276  for (unsigned i=0, ie = TransformedFunction.ArgumentIndexMapping.size();
277       i < ie; ++i) {
278    unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[i];
279    ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(i);
280  }
281
282  // Copy annotations on varargs arguments.
283  for (unsigned i = TransformedFunction.OriginalType->getNumParams(),
284       ie = CallSiteAttrs.getNumAttrSets(); i<ie; ++i) {
285    ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(i));
286  }
287
288  return AttributeList::get(
289      Ctx,
290      CallSiteAttrs.getFnAttributes(),
291      CallSiteAttrs.getRetAttributes(),
292      llvm::makeArrayRef(ArgumentAttributes));
293}
294
295class DataFlowSanitizer : public ModulePass {
296  friend struct DFSanFunction;
297  friend class DFSanVisitor;
298
299  enum { ShadowWidthBits = 16, ShadowWidthBytes = ShadowWidthBits / 8 };
300
301  /// Which ABI should be used for instrumented functions?
302  enum InstrumentedABI {
303    /// Argument and return value labels are passed through additional
304    /// arguments and by modifying the return type.
305    IA_Args,
306
307    /// Argument and return value labels are passed through TLS variables
308    /// __dfsan_arg_tls and __dfsan_retval_tls.
309    IA_TLS
310  };
311
312  /// How should calls to uninstrumented functions be handled?
313  enum WrapperKind {
314    /// This function is present in an uninstrumented form but we don't know
315    /// how it should be handled.  Print a warning and call the function anyway.
316    /// Don't label the return value.
317    WK_Warning,
318
319    /// This function does not write to (user-accessible) memory, and its return
320    /// value is unlabelled.
321    WK_Discard,
322
323    /// This function does not write to (user-accessible) memory, and the label
324    /// of its return value is the union of the label of its arguments.
325    WK_Functional,
326
327    /// Instead of calling the function, a custom wrapper __dfsw_F is called,
328    /// where F is the name of the function.  This function may wrap the
329    /// original function or provide its own implementation.  This is similar to
330    /// the IA_Args ABI, except that IA_Args uses a struct return type to
331    /// pass the return value shadow in a register, while WK_Custom uses an
332    /// extra pointer argument to return the shadow.  This allows the wrapped
333    /// form of the function type to be expressed in C.
334    WK_Custom
335  };
336
337  Module *Mod;
338  LLVMContext *Ctx;
339  IntegerType *ShadowTy;
340  PointerType *ShadowPtrTy;
341  IntegerType *IntptrTy;
342  ConstantInt *ZeroShadow;
343  ConstantInt *ShadowPtrMask;
344  ConstantInt *ShadowPtrMul;
345  Constant *ArgTLS;
346  Constant *RetvalTLS;
347  void *(*GetArgTLSPtr)();
348  void *(*GetRetvalTLSPtr)();
349  FunctionType *GetArgTLSTy;
350  FunctionType *GetRetvalTLSTy;
351  Constant *GetArgTLS;
352  Constant *GetRetvalTLS;
353  Constant *ExternalShadowMask;
354  FunctionType *DFSanUnionFnTy;
355  FunctionType *DFSanUnionLoadFnTy;
356  FunctionType *DFSanUnimplementedFnTy;
357  FunctionType *DFSanSetLabelFnTy;
358  FunctionType *DFSanNonzeroLabelFnTy;
359  FunctionType *DFSanVarargWrapperFnTy;
360  FunctionType *DFSanLoadStoreCmpCallbackFnTy;
361  FunctionType *DFSanMemTransferCallbackFnTy;
362  FunctionCallee DFSanUnionFn;
363  FunctionCallee DFSanCheckedUnionFn;
364  FunctionCallee DFSanUnionLoadFn;
365  FunctionCallee DFSanUnimplementedFn;
366  FunctionCallee DFSanSetLabelFn;
367  FunctionCallee DFSanNonzeroLabelFn;
368  FunctionCallee DFSanVarargWrapperFn;
369  FunctionCallee DFSanLoadCallbackFn;
370  FunctionCallee DFSanStoreCallbackFn;
371  FunctionCallee DFSanMemTransferCallbackFn;
372  FunctionCallee DFSanCmpCallbackFn;
373  MDNode *ColdCallWeights;
374  DFSanABIList ABIList;
375  DenseMap<Value *, Function *> UnwrappedFnMap;
376  AttrBuilder ReadOnlyNoneAttrs;
377  bool DFSanRuntimeShadowMask = false;
378
379  Value *getShadowAddress(Value *Addr, Instruction *Pos);
380  bool isInstrumented(const Function *F);
381  bool isInstrumented(const GlobalAlias *GA);
382  FunctionType *getArgsFunctionType(FunctionType *T);
383  FunctionType *getTrampolineFunctionType(FunctionType *T);
384  TransformedFunction getCustomFunctionType(FunctionType *T);
385  InstrumentedABI getInstrumentedABI();
386  WrapperKind getWrapperKind(Function *F);
387  void addGlobalNamePrefix(GlobalValue *GV);
388  Function *buildWrapperFunction(Function *F, StringRef NewFName,
389                                 GlobalValue::LinkageTypes NewFLink,
390                                 FunctionType *NewFT);
391  Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName);
392  void initializeCallbackFunctions(Module &M);
393  void initializeRuntimeFunctions(Module &M);
394
395public:
396  static char ID;
397
398  DataFlowSanitizer(
399      const std::vector<std::string> &ABIListFiles = std::vector<std::string>(),
400      void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr);
401
402  bool doInitialization(Module &M) override;
403  bool runOnModule(Module &M) override;
404};
405
406struct DFSanFunction {
407  DataFlowSanitizer &DFS;
408  Function *F;
409  DominatorTree DT;
410  DataFlowSanitizer::InstrumentedABI IA;
411  bool IsNativeABI;
412  Value *ArgTLSPtr = nullptr;
413  Value *RetvalTLSPtr = nullptr;
414  AllocaInst *LabelReturnAlloca = nullptr;
415  DenseMap<Value *, Value *> ValShadowMap;
416  DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
417  std::vector<std::pair<PHINode *, PHINode *>> PHIFixups;
418  DenseSet<Instruction *> SkipInsts;
419  std::vector<Value *> NonZeroChecks;
420  bool AvoidNewBlocks;
421
422  struct CachedCombinedShadow {
423    BasicBlock *Block;
424    Value *Shadow;
425  };
426  DenseMap<std::pair<Value *, Value *>, CachedCombinedShadow>
427      CachedCombinedShadows;
428  DenseMap<Value *, std::set<Value *>> ShadowElements;
429
430  DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
431      : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
432    DT.recalculate(*F);
433    // FIXME: Need to track down the register allocator issue which causes poor
434    // performance in pathological cases with large numbers of basic blocks.
435    AvoidNewBlocks = F->size() > 1000;
436  }
437
438  Value *getArgTLSPtr();
439  Value *getArgTLS(unsigned Index, Instruction *Pos);
440  Value *getRetvalTLS();
441  Value *getShadow(Value *V);
442  void setShadow(Instruction *I, Value *Shadow);
443  Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
444  Value *combineOperandShadows(Instruction *Inst);
445  Value *loadShadow(Value *ShadowAddr, uint64_t Size, uint64_t Align,
446                    Instruction *Pos);
447  void storeShadow(Value *Addr, uint64_t Size, Align Alignment, Value *Shadow,
448                   Instruction *Pos);
449};
450
451class DFSanVisitor : public InstVisitor<DFSanVisitor> {
452public:
453  DFSanFunction &DFSF;
454
455  DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
456
457  const DataLayout &getDataLayout() const {
458    return DFSF.F->getParent()->getDataLayout();
459  }
460
461  // Combines shadow values for all of I's operands. Returns the combined shadow
462  // value.
463  Value *visitOperandShadowInst(Instruction &I);
464
465  void visitUnaryOperator(UnaryOperator &UO);
466  void visitBinaryOperator(BinaryOperator &BO);
467  void visitCastInst(CastInst &CI);
468  void visitCmpInst(CmpInst &CI);
469  void visitGetElementPtrInst(GetElementPtrInst &GEPI);
470  void visitLoadInst(LoadInst &LI);
471  void visitStoreInst(StoreInst &SI);
472  void visitReturnInst(ReturnInst &RI);
473  void visitCallBase(CallBase &CB);
474  void visitPHINode(PHINode &PN);
475  void visitExtractElementInst(ExtractElementInst &I);
476  void visitInsertElementInst(InsertElementInst &I);
477  void visitShuffleVectorInst(ShuffleVectorInst &I);
478  void visitExtractValueInst(ExtractValueInst &I);
479  void visitInsertValueInst(InsertValueInst &I);
480  void visitAllocaInst(AllocaInst &I);
481  void visitSelectInst(SelectInst &I);
482  void visitMemSetInst(MemSetInst &I);
483  void visitMemTransferInst(MemTransferInst &I);
484};
485
486} // end anonymous namespace
487
488char DataFlowSanitizer::ID;
489
490INITIALIZE_PASS(DataFlowSanitizer, "dfsan",
491                "DataFlowSanitizer: dynamic data flow analysis.", false, false)
492
493ModulePass *
494llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
495                                  void *(*getArgTLS)(),
496                                  void *(*getRetValTLS)()) {
497  return new DataFlowSanitizer(ABIListFiles, getArgTLS, getRetValTLS);
498}
499
500DataFlowSanitizer::DataFlowSanitizer(
501    const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
502    void *(*getRetValTLS)())
503    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
504  std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
505  AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
506                         ClABIListFiles.end());
507  // FIXME: should we propagate vfs::FileSystem to this constructor?
508  ABIList.set(
509      SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
510}
511
512FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
513  SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
514  ArgTypes.append(T->getNumParams(), ShadowTy);
515  if (T->isVarArg())
516    ArgTypes.push_back(ShadowPtrTy);
517  Type *RetType = T->getReturnType();
518  if (!RetType->isVoidTy())
519    RetType = StructType::get(RetType, ShadowTy);
520  return FunctionType::get(RetType, ArgTypes, T->isVarArg());
521}
522
523FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
524  assert(!T->isVarArg());
525  SmallVector<Type *, 4> ArgTypes;
526  ArgTypes.push_back(T->getPointerTo());
527  ArgTypes.append(T->param_begin(), T->param_end());
528  ArgTypes.append(T->getNumParams(), ShadowTy);
529  Type *RetType = T->getReturnType();
530  if (!RetType->isVoidTy())
531    ArgTypes.push_back(ShadowPtrTy);
532  return FunctionType::get(T->getReturnType(), ArgTypes, false);
533}
534
535TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
536  SmallVector<Type *, 4> ArgTypes;
537
538  // Some parameters of the custom function being constructed are
539  // parameters of T.  Record the mapping from parameters of T to
540  // parameters of the custom function, so that parameter attributes
541  // at call sites can be updated.
542  std::vector<unsigned> ArgumentIndexMapping;
543  for (unsigned i = 0, ie = T->getNumParams(); i != ie; ++i) {
544    Type* param_type = T->getParamType(i);
545    FunctionType *FT;
546    if (isa<PointerType>(param_type) && (FT = dyn_cast<FunctionType>(
547            cast<PointerType>(param_type)->getElementType()))) {
548      ArgumentIndexMapping.push_back(ArgTypes.size());
549      ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo());
550      ArgTypes.push_back(Type::getInt8PtrTy(*Ctx));
551    } else {
552      ArgumentIndexMapping.push_back(ArgTypes.size());
553      ArgTypes.push_back(param_type);
554    }
555  }
556  for (unsigned i = 0, e = T->getNumParams(); i != e; ++i)
557    ArgTypes.push_back(ShadowTy);
558  if (T->isVarArg())
559    ArgTypes.push_back(ShadowPtrTy);
560  Type *RetType = T->getReturnType();
561  if (!RetType->isVoidTy())
562    ArgTypes.push_back(ShadowPtrTy);
563  return TransformedFunction(
564      T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
565      ArgumentIndexMapping);
566}
567
568bool DataFlowSanitizer::doInitialization(Module &M) {
569  Triple TargetTriple(M.getTargetTriple());
570  bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
571  bool IsMIPS64 = TargetTriple.isMIPS64();
572  bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 ||
573                   TargetTriple.getArch() == Triple::aarch64_be;
574
575  const DataLayout &DL = M.getDataLayout();
576
577  Mod = &M;
578  Ctx = &M.getContext();
579  ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
580  ShadowPtrTy = PointerType::getUnqual(ShadowTy);
581  IntptrTy = DL.getIntPtrType(*Ctx);
582  ZeroShadow = ConstantInt::getSigned(ShadowTy, 0);
583  ShadowPtrMul = ConstantInt::getSigned(IntptrTy, ShadowWidthBytes);
584  if (IsX86_64)
585    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
586  else if (IsMIPS64)
587    ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
588  // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
589  else if (IsAArch64)
590    DFSanRuntimeShadowMask = true;
591  else
592    report_fatal_error("unsupported triple");
593
594  Type *DFSanUnionArgs[2] = { ShadowTy, ShadowTy };
595  DFSanUnionFnTy =
596      FunctionType::get(ShadowTy, DFSanUnionArgs, /*isVarArg=*/ false);
597  Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy };
598  DFSanUnionLoadFnTy =
599      FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false);
600  DFSanUnimplementedFnTy = FunctionType::get(
601      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
602  Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy };
603  DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
604                                        DFSanSetLabelArgs, /*isVarArg=*/false);
605  DFSanNonzeroLabelFnTy = FunctionType::get(
606      Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
607  DFSanVarargWrapperFnTy = FunctionType::get(
608      Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
609  DFSanLoadStoreCmpCallbackFnTy =
610      FunctionType::get(Type::getVoidTy(*Ctx), ShadowTy, /*isVarArg=*/false);
611  Type *DFSanMemTransferCallbackArgs[2] = {ShadowPtrTy, IntptrTy};
612  DFSanMemTransferCallbackFnTy =
613      FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
614                        /*isVarArg=*/false);
615
616  if (GetArgTLSPtr) {
617    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
618    ArgTLS = nullptr;
619    GetArgTLSTy = FunctionType::get(PointerType::getUnqual(ArgTLSTy), false);
620    GetArgTLS = ConstantExpr::getIntToPtr(
621        ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
622        PointerType::getUnqual(GetArgTLSTy));
623  }
624  if (GetRetvalTLSPtr) {
625    RetvalTLS = nullptr;
626    GetRetvalTLSTy = FunctionType::get(PointerType::getUnqual(ShadowTy), false);
627    GetRetvalTLS = ConstantExpr::getIntToPtr(
628        ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
629        PointerType::getUnqual(GetRetvalTLSTy));
630  }
631
632  ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
633  return true;
634}
635
636bool DataFlowSanitizer::isInstrumented(const Function *F) {
637  return !ABIList.isIn(*F, "uninstrumented");
638}
639
640bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
641  return !ABIList.isIn(*GA, "uninstrumented");
642}
643
644DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
645  return ClArgsABI ? IA_Args : IA_TLS;
646}
647
648DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
649  if (ABIList.isIn(*F, "functional"))
650    return WK_Functional;
651  if (ABIList.isIn(*F, "discard"))
652    return WK_Discard;
653  if (ABIList.isIn(*F, "custom"))
654    return WK_Custom;
655
656  return WK_Warning;
657}
658
659void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) {
660  std::string GVName = std::string(GV->getName()), Prefix = "dfs$";
661  GV->setName(Prefix + GVName);
662
663  // Try to change the name of the function in module inline asm.  We only do
664  // this for specific asm directives, currently only ".symver", to try to avoid
665  // corrupting asm which happens to contain the symbol name as a substring.
666  // Note that the substitution for .symver assumes that the versioned symbol
667  // also has an instrumented name.
668  std::string Asm = GV->getParent()->getModuleInlineAsm();
669  std::string SearchStr = ".symver " + GVName + ",";
670  size_t Pos = Asm.find(SearchStr);
671  if (Pos != std::string::npos) {
672    Asm.replace(Pos, SearchStr.size(),
673                ".symver " + Prefix + GVName + "," + Prefix);
674    GV->getParent()->setModuleInlineAsm(Asm);
675  }
676}
677
678Function *
679DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
680                                        GlobalValue::LinkageTypes NewFLink,
681                                        FunctionType *NewFT) {
682  FunctionType *FT = F->getFunctionType();
683  Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
684                                    NewFName, F->getParent());
685  NewF->copyAttributesFrom(F);
686  NewF->removeAttributes(
687      AttributeList::ReturnIndex,
688      AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
689
690  BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
691  if (F->isVarArg()) {
692    NewF->removeAttributes(AttributeList::FunctionIndex,
693                           AttrBuilder().addAttribute("split-stack"));
694    CallInst::Create(DFSanVarargWrapperFn,
695                     IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
696                     BB);
697    new UnreachableInst(*Ctx, BB);
698  } else {
699    std::vector<Value *> Args;
700    unsigned n = FT->getNumParams();
701    for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n)
702      Args.push_back(&*ai);
703    CallInst *CI = CallInst::Create(F, Args, "", BB);
704    if (FT->getReturnType()->isVoidTy())
705      ReturnInst::Create(*Ctx, BB);
706    else
707      ReturnInst::Create(*Ctx, CI, BB);
708  }
709
710  return NewF;
711}
712
713Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
714                                                          StringRef FName) {
715  FunctionType *FTT = getTrampolineFunctionType(FT);
716  FunctionCallee C = Mod->getOrInsertFunction(FName, FTT);
717  Function *F = dyn_cast<Function>(C.getCallee());
718  if (F && F->isDeclaration()) {
719    F->setLinkage(GlobalValue::LinkOnceODRLinkage);
720    BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F);
721    std::vector<Value *> Args;
722    Function::arg_iterator AI = F->arg_begin(); ++AI;
723    for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
724      Args.push_back(&*AI);
725    CallInst *CI = CallInst::Create(FT, &*F->arg_begin(), Args, "", BB);
726    ReturnInst *RI;
727    if (FT->getReturnType()->isVoidTy())
728      RI = ReturnInst::Create(*Ctx, BB);
729    else
730      RI = ReturnInst::Create(*Ctx, CI, BB);
731
732    DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
733    Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
734    for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
735      DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
736    DFSanVisitor(DFSF).visitCallInst(*CI);
737    if (!FT->getReturnType()->isVoidTy())
738      new StoreInst(DFSF.getShadow(RI->getReturnValue()),
739                    &*std::prev(F->arg_end()), RI);
740  }
741
742  return cast<Constant>(C.getCallee());
743}
744
745// Initialize DataFlowSanitizer runtime functions and declare them in the module
746void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
747  {
748    AttributeList AL;
749    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
750                         Attribute::NoUnwind);
751    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
752                         Attribute::ReadNone);
753    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
754                         Attribute::ZExt);
755    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
756    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
757    DFSanUnionFn =
758        Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy, AL);
759  }
760  {
761    AttributeList AL;
762    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
763                         Attribute::NoUnwind);
764    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
765                         Attribute::ReadNone);
766    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
767                         Attribute::ZExt);
768    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
769    AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
770    DFSanCheckedUnionFn =
771        Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy, AL);
772  }
773  {
774    AttributeList AL;
775    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
776                         Attribute::NoUnwind);
777    AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
778                         Attribute::ReadOnly);
779    AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
780                         Attribute::ZExt);
781    DFSanUnionLoadFn =
782        Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
783  }
784  DFSanUnimplementedFn =
785      Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
786  {
787    AttributeList AL;
788    AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
789    DFSanSetLabelFn =
790        Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
791  }
792  DFSanNonzeroLabelFn =
793      Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
794  DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
795                                                  DFSanVarargWrapperFnTy);
796}
797
798// Initializes event callback functions and declare them in the module
799void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
800  DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
801                                                 DFSanLoadStoreCmpCallbackFnTy);
802  DFSanStoreCallbackFn = Mod->getOrInsertFunction(
803      "__dfsan_store_callback", DFSanLoadStoreCmpCallbackFnTy);
804  DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
805      "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
806  DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback",
807                                                DFSanLoadStoreCmpCallbackFnTy);
808}
809
810bool DataFlowSanitizer::runOnModule(Module &M) {
811  if (ABIList.isIn(M, "skip"))
812    return false;
813
814  const unsigned InitialGlobalSize = M.global_size();
815  const unsigned InitialModuleSize = M.size();
816
817  bool Changed = false;
818
819  if (!GetArgTLSPtr) {
820    Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
821    ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
822    if (GlobalVariable *G = dyn_cast<GlobalVariable>(ArgTLS)) {
823      Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
824      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
825    }
826  }
827  if (!GetRetvalTLSPtr) {
828    RetvalTLS = Mod->getOrInsertGlobal("__dfsan_retval_tls", ShadowTy);
829    if (GlobalVariable *G = dyn_cast<GlobalVariable>(RetvalTLS)) {
830      Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
831      G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
832    }
833  }
834
835  ExternalShadowMask =
836      Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
837
838  initializeCallbackFunctions(M);
839  initializeRuntimeFunctions(M);
840
841  std::vector<Function *> FnsToInstrument;
842  SmallPtrSet<Function *, 2> FnsWithNativeABI;
843  for (Function &i : M) {
844    if (!i.isIntrinsic() &&
845        &i != DFSanUnionFn.getCallee()->stripPointerCasts() &&
846        &i != DFSanCheckedUnionFn.getCallee()->stripPointerCasts() &&
847        &i != DFSanUnionLoadFn.getCallee()->stripPointerCasts() &&
848        &i != DFSanUnimplementedFn.getCallee()->stripPointerCasts() &&
849        &i != DFSanSetLabelFn.getCallee()->stripPointerCasts() &&
850        &i != DFSanNonzeroLabelFn.getCallee()->stripPointerCasts() &&
851        &i != DFSanVarargWrapperFn.getCallee()->stripPointerCasts() &&
852        &i != DFSanLoadCallbackFn.getCallee()->stripPointerCasts() &&
853        &i != DFSanStoreCallbackFn.getCallee()->stripPointerCasts() &&
854        &i != DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts() &&
855        &i != DFSanCmpCallbackFn.getCallee()->stripPointerCasts())
856      FnsToInstrument.push_back(&i);
857  }
858
859  // Give function aliases prefixes when necessary, and build wrappers where the
860  // instrumentedness is inconsistent.
861  for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) {
862    GlobalAlias *GA = &*i;
863    ++i;
864    // Don't stop on weak.  We assume people aren't playing games with the
865    // instrumentedness of overridden weak aliases.
866    if (auto F = dyn_cast<Function>(GA->getBaseObject())) {
867      bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
868      if (GAInst && FInst) {
869        addGlobalNamePrefix(GA);
870      } else if (GAInst != FInst) {
871        // Non-instrumented alias of an instrumented function, or vice versa.
872        // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
873        // below will take care of instrumenting it.
874        Function *NewF =
875            buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
876        GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
877        NewF->takeName(GA);
878        GA->eraseFromParent();
879        FnsToInstrument.push_back(NewF);
880      }
881    }
882  }
883
884  ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
885      .addAttribute(Attribute::ReadNone);
886
887  // First, change the ABI of every function in the module.  ABI-listed
888  // functions keep their original ABI and get a wrapper function.
889  for (std::vector<Function *>::iterator i = FnsToInstrument.begin(),
890                                         e = FnsToInstrument.end();
891       i != e; ++i) {
892    Function &F = **i;
893    FunctionType *FT = F.getFunctionType();
894
895    bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
896                              FT->getReturnType()->isVoidTy());
897
898    if (isInstrumented(&F)) {
899      // Instrumented functions get a 'dfs$' prefix.  This allows us to more
900      // easily identify cases of mismatching ABIs.
901      if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
902        FunctionType *NewFT = getArgsFunctionType(FT);
903        Function *NewF = Function::Create(NewFT, F.getLinkage(),
904                                          F.getAddressSpace(), "", &M);
905        NewF->copyAttributesFrom(&F);
906        NewF->removeAttributes(
907            AttributeList::ReturnIndex,
908            AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
909        for (Function::arg_iterator FArg = F.arg_begin(),
910                                    NewFArg = NewF->arg_begin(),
911                                    FArgEnd = F.arg_end();
912             FArg != FArgEnd; ++FArg, ++NewFArg) {
913          FArg->replaceAllUsesWith(&*NewFArg);
914        }
915        NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
916
917        for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
918             UI != UE;) {
919          BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
920          ++UI;
921          if (BA) {
922            BA->replaceAllUsesWith(
923                BlockAddress::get(NewF, BA->getBasicBlock()));
924            delete BA;
925          }
926        }
927        F.replaceAllUsesWith(
928            ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
929        NewF->takeName(&F);
930        F.eraseFromParent();
931        *i = NewF;
932        addGlobalNamePrefix(NewF);
933      } else {
934        addGlobalNamePrefix(&F);
935      }
936    } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
937      // Build a wrapper function for F.  The wrapper simply calls F, and is
938      // added to FnsToInstrument so that any instrumentation according to its
939      // WrapperKind is done in the second pass below.
940      FunctionType *NewFT = getInstrumentedABI() == IA_Args
941                                ? getArgsFunctionType(FT)
942                                : FT;
943
944      // If the function being wrapped has local linkage, then preserve the
945      // function's linkage in the wrapper function.
946      GlobalValue::LinkageTypes wrapperLinkage =
947          F.hasLocalLinkage()
948              ? F.getLinkage()
949              : GlobalValue::LinkOnceODRLinkage;
950
951      Function *NewF = buildWrapperFunction(
952          &F, std::string("dfsw$") + std::string(F.getName()),
953          wrapperLinkage, NewFT);
954      if (getInstrumentedABI() == IA_TLS)
955        NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
956
957      Value *WrappedFnCst =
958          ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
959      F.replaceAllUsesWith(WrappedFnCst);
960
961      UnwrappedFnMap[WrappedFnCst] = &F;
962      *i = NewF;
963
964      if (!F.isDeclaration()) {
965        // This function is probably defining an interposition of an
966        // uninstrumented function and hence needs to keep the original ABI.
967        // But any functions it may call need to use the instrumented ABI, so
968        // we instrument it in a mode which preserves the original ABI.
969        FnsWithNativeABI.insert(&F);
970
971        // This code needs to rebuild the iterators, as they may be invalidated
972        // by the push_back, taking care that the new range does not include
973        // any functions added by this code.
974        size_t N = i - FnsToInstrument.begin(),
975               Count = e - FnsToInstrument.begin();
976        FnsToInstrument.push_back(&F);
977        i = FnsToInstrument.begin() + N;
978        e = FnsToInstrument.begin() + Count;
979      }
980               // Hopefully, nobody will try to indirectly call a vararg
981               // function... yet.
982    } else if (FT->isVarArg()) {
983      UnwrappedFnMap[&F] = &F;
984      *i = nullptr;
985    }
986  }
987
988  for (Function *i : FnsToInstrument) {
989    if (!i || i->isDeclaration())
990      continue;
991
992    removeUnreachableBlocks(*i);
993
994    DFSanFunction DFSF(*this, i, FnsWithNativeABI.count(i));
995
996    // DFSanVisitor may create new basic blocks, which confuses df_iterator.
997    // Build a copy of the list before iterating over it.
998    SmallVector<BasicBlock *, 4> BBList(depth_first(&i->getEntryBlock()));
999
1000    for (BasicBlock *i : BBList) {
1001      Instruction *Inst = &i->front();
1002      while (true) {
1003        // DFSanVisitor may split the current basic block, changing the current
1004        // instruction's next pointer and moving the next instruction to the
1005        // tail block from which we should continue.
1006        Instruction *Next = Inst->getNextNode();
1007        // DFSanVisitor may delete Inst, so keep track of whether it was a
1008        // terminator.
1009        bool IsTerminator = Inst->isTerminator();
1010        if (!DFSF.SkipInsts.count(Inst))
1011          DFSanVisitor(DFSF).visit(Inst);
1012        if (IsTerminator)
1013          break;
1014        Inst = Next;
1015      }
1016    }
1017
1018    // We will not necessarily be able to compute the shadow for every phi node
1019    // until we have visited every block.  Therefore, the code that handles phi
1020    // nodes adds them to the PHIFixups list so that they can be properly
1021    // handled here.
1022    for (std::vector<std::pair<PHINode *, PHINode *>>::iterator
1023             i = DFSF.PHIFixups.begin(),
1024             e = DFSF.PHIFixups.end();
1025         i != e; ++i) {
1026      for (unsigned val = 0, n = i->first->getNumIncomingValues(); val != n;
1027           ++val) {
1028        i->second->setIncomingValue(
1029            val, DFSF.getShadow(i->first->getIncomingValue(val)));
1030      }
1031    }
1032
1033    // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1034    // places (i.e. instructions in basic blocks we haven't even begun visiting
1035    // yet).  To make our life easier, do this work in a pass after the main
1036    // instrumentation.
1037    if (ClDebugNonzeroLabels) {
1038      for (Value *V : DFSF.NonZeroChecks) {
1039        Instruction *Pos;
1040        if (Instruction *I = dyn_cast<Instruction>(V))
1041          Pos = I->getNextNode();
1042        else
1043          Pos = &DFSF.F->getEntryBlock().front();
1044        while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1045          Pos = Pos->getNextNode();
1046        IRBuilder<> IRB(Pos);
1047        Value *Ne = IRB.CreateICmpNE(V, DFSF.DFS.ZeroShadow);
1048        BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1049            Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1050        IRBuilder<> ThenIRB(BI);
1051        ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1052      }
1053    }
1054  }
1055
1056  return Changed || !FnsToInstrument.empty() ||
1057         M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1058}
1059
1060Value *DFSanFunction::getArgTLSPtr() {
1061  if (ArgTLSPtr)
1062    return ArgTLSPtr;
1063  if (DFS.ArgTLS)
1064    return ArgTLSPtr = DFS.ArgTLS;
1065
1066  IRBuilder<> IRB(&F->getEntryBlock().front());
1067  return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLSTy, DFS.GetArgTLS, {});
1068}
1069
1070Value *DFSanFunction::getRetvalTLS() {
1071  if (RetvalTLSPtr)
1072    return RetvalTLSPtr;
1073  if (DFS.RetvalTLS)
1074    return RetvalTLSPtr = DFS.RetvalTLS;
1075
1076  IRBuilder<> IRB(&F->getEntryBlock().front());
1077  return RetvalTLSPtr =
1078             IRB.CreateCall(DFS.GetRetvalTLSTy, DFS.GetRetvalTLS, {});
1079}
1080
1081Value *DFSanFunction::getArgTLS(unsigned Idx, Instruction *Pos) {
1082  IRBuilder<> IRB(Pos);
1083  return IRB.CreateConstGEP2_64(ArrayType::get(DFS.ShadowTy, 64),
1084                                getArgTLSPtr(), 0, Idx);
1085}
1086
1087Value *DFSanFunction::getShadow(Value *V) {
1088  if (!isa<Argument>(V) && !isa<Instruction>(V))
1089    return DFS.ZeroShadow;
1090  Value *&Shadow = ValShadowMap[V];
1091  if (!Shadow) {
1092    if (Argument *A = dyn_cast<Argument>(V)) {
1093      if (IsNativeABI)
1094        return DFS.ZeroShadow;
1095      switch (IA) {
1096      case DataFlowSanitizer::IA_TLS: {
1097        Value *ArgTLSPtr = getArgTLSPtr();
1098        Instruction *ArgTLSPos =
1099            DFS.ArgTLS ? &*F->getEntryBlock().begin()
1100                       : cast<Instruction>(ArgTLSPtr)->getNextNode();
1101        IRBuilder<> IRB(ArgTLSPos);
1102        Shadow =
1103            IRB.CreateLoad(DFS.ShadowTy, getArgTLS(A->getArgNo(), ArgTLSPos));
1104        break;
1105      }
1106      case DataFlowSanitizer::IA_Args: {
1107        unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
1108        Function::arg_iterator i = F->arg_begin();
1109        while (ArgIdx--)
1110          ++i;
1111        Shadow = &*i;
1112        assert(Shadow->getType() == DFS.ShadowTy);
1113        break;
1114      }
1115      }
1116      NonZeroChecks.push_back(Shadow);
1117    } else {
1118      Shadow = DFS.ZeroShadow;
1119    }
1120  }
1121  return Shadow;
1122}
1123
1124void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1125  assert(!ValShadowMap.count(I));
1126  assert(Shadow->getType() == DFS.ShadowTy);
1127  ValShadowMap[I] = Shadow;
1128}
1129
1130Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1131  assert(Addr != RetvalTLS && "Reinstrumenting?");
1132  IRBuilder<> IRB(Pos);
1133  Value *ShadowPtrMaskValue;
1134  if (DFSanRuntimeShadowMask)
1135    ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
1136  else
1137    ShadowPtrMaskValue = ShadowPtrMask;
1138  return IRB.CreateIntToPtr(
1139      IRB.CreateMul(
1140          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
1141                        IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
1142          ShadowPtrMul),
1143      ShadowPtrTy);
1144}
1145
1146// Generates IR to compute the union of the two given shadows, inserting it
1147// before Pos.  Returns the computed union Value.
1148Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1149  if (V1 == DFS.ZeroShadow)
1150    return V2;
1151  if (V2 == DFS.ZeroShadow)
1152    return V1;
1153  if (V1 == V2)
1154    return V1;
1155
1156  auto V1Elems = ShadowElements.find(V1);
1157  auto V2Elems = ShadowElements.find(V2);
1158  if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1159    if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1160                      V2Elems->second.begin(), V2Elems->second.end())) {
1161      return V1;
1162    } else if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1163                             V1Elems->second.begin(), V1Elems->second.end())) {
1164      return V2;
1165    }
1166  } else if (V1Elems != ShadowElements.end()) {
1167    if (V1Elems->second.count(V2))
1168      return V1;
1169  } else if (V2Elems != ShadowElements.end()) {
1170    if (V2Elems->second.count(V1))
1171      return V2;
1172  }
1173
1174  auto Key = std::make_pair(V1, V2);
1175  if (V1 > V2)
1176    std::swap(Key.first, Key.second);
1177  CachedCombinedShadow &CCS = CachedCombinedShadows[Key];
1178  if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1179    return CCS.Shadow;
1180
1181  IRBuilder<> IRB(Pos);
1182  if (AvoidNewBlocks) {
1183    CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
1184    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1185    Call->addParamAttr(0, Attribute::ZExt);
1186    Call->addParamAttr(1, Attribute::ZExt);
1187
1188    CCS.Block = Pos->getParent();
1189    CCS.Shadow = Call;
1190  } else {
1191    BasicBlock *Head = Pos->getParent();
1192    Value *Ne = IRB.CreateICmpNE(V1, V2);
1193    BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1194        Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
1195    IRBuilder<> ThenIRB(BI);
1196    CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
1197    Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1198    Call->addParamAttr(0, Attribute::ZExt);
1199    Call->addParamAttr(1, Attribute::ZExt);
1200
1201    BasicBlock *Tail = BI->getSuccessor(0);
1202    PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
1203    Phi->addIncoming(Call, Call->getParent());
1204    Phi->addIncoming(V1, Head);
1205
1206    CCS.Block = Tail;
1207    CCS.Shadow = Phi;
1208  }
1209
1210  std::set<Value *> UnionElems;
1211  if (V1Elems != ShadowElements.end()) {
1212    UnionElems = V1Elems->second;
1213  } else {
1214    UnionElems.insert(V1);
1215  }
1216  if (V2Elems != ShadowElements.end()) {
1217    UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1218  } else {
1219    UnionElems.insert(V2);
1220  }
1221  ShadowElements[CCS.Shadow] = std::move(UnionElems);
1222
1223  return CCS.Shadow;
1224}
1225
1226// A convenience function which folds the shadows of each of the operands
1227// of the provided instruction Inst, inserting the IR before Inst.  Returns
1228// the computed union Value.
1229Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
1230  if (Inst->getNumOperands() == 0)
1231    return DFS.ZeroShadow;
1232
1233  Value *Shadow = getShadow(Inst->getOperand(0));
1234  for (unsigned i = 1, n = Inst->getNumOperands(); i != n; ++i) {
1235    Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(i)), Inst);
1236  }
1237  return Shadow;
1238}
1239
1240Value *DFSanVisitor::visitOperandShadowInst(Instruction &I) {
1241  Value *CombinedShadow = DFSF.combineOperandShadows(&I);
1242  DFSF.setShadow(&I, CombinedShadow);
1243  return CombinedShadow;
1244}
1245
1246// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where
1247// Addr has alignment Align, and take the union of each of those shadows.
1248Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
1249                                 Instruction *Pos) {
1250  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1251    const auto i = AllocaShadowMap.find(AI);
1252    if (i != AllocaShadowMap.end()) {
1253      IRBuilder<> IRB(Pos);
1254      return IRB.CreateLoad(DFS.ShadowTy, i->second);
1255    }
1256  }
1257
1258  const llvm::Align ShadowAlign(Align * DFS.ShadowWidthBytes);
1259  SmallVector<const Value *, 2> Objs;
1260  GetUnderlyingObjects(Addr, Objs, Pos->getModule()->getDataLayout());
1261  bool AllConstants = true;
1262  for (const Value *Obj : Objs) {
1263    if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
1264      continue;
1265    if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
1266      continue;
1267
1268    AllConstants = false;
1269    break;
1270  }
1271  if (AllConstants)
1272    return DFS.ZeroShadow;
1273
1274  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
1275  switch (Size) {
1276  case 0:
1277    return DFS.ZeroShadow;
1278  case 1: {
1279    LoadInst *LI = new LoadInst(DFS.ShadowTy, ShadowAddr, "", Pos);
1280    LI->setAlignment(ShadowAlign);
1281    return LI;
1282  }
1283  case 2: {
1284    IRBuilder<> IRB(Pos);
1285    Value *ShadowAddr1 = IRB.CreateGEP(DFS.ShadowTy, ShadowAddr,
1286                                       ConstantInt::get(DFS.IntptrTy, 1));
1287    return combineShadows(
1288        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr, ShadowAlign),
1289        IRB.CreateAlignedLoad(DFS.ShadowTy, ShadowAddr1, ShadowAlign), Pos);
1290  }
1291  }
1292  if (!AvoidNewBlocks && Size % (64 / DFS.ShadowWidthBits) == 0) {
1293    // Fast path for the common case where each byte has identical shadow: load
1294    // shadow 64 bits at a time, fall out to a __dfsan_union_load call if any
1295    // shadow is non-equal.
1296    BasicBlock *FallbackBB = BasicBlock::Create(*DFS.Ctx, "", F);
1297    IRBuilder<> FallbackIRB(FallbackBB);
1298    CallInst *FallbackCall = FallbackIRB.CreateCall(
1299        DFS.DFSanUnionLoadFn,
1300        {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
1301    FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1302
1303    // Compare each of the shadows stored in the loaded 64 bits to each other,
1304    // by computing (WideShadow rotl ShadowWidthBits) == WideShadow.
1305    IRBuilder<> IRB(Pos);
1306    Value *WideAddr =
1307        IRB.CreateBitCast(ShadowAddr, Type::getInt64PtrTy(*DFS.Ctx));
1308    Value *WideShadow =
1309        IRB.CreateAlignedLoad(IRB.getInt64Ty(), WideAddr, ShadowAlign);
1310    Value *TruncShadow = IRB.CreateTrunc(WideShadow, DFS.ShadowTy);
1311    Value *ShlShadow = IRB.CreateShl(WideShadow, DFS.ShadowWidthBits);
1312    Value *ShrShadow = IRB.CreateLShr(WideShadow, 64 - DFS.ShadowWidthBits);
1313    Value *RotShadow = IRB.CreateOr(ShlShadow, ShrShadow);
1314    Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
1315
1316    BasicBlock *Head = Pos->getParent();
1317    BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
1318
1319    if (DomTreeNode *OldNode = DT.getNode(Head)) {
1320      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
1321
1322      DomTreeNode *NewNode = DT.addNewBlock(Tail, Head);
1323      for (auto Child : Children)
1324        DT.changeImmediateDominator(Child, NewNode);
1325    }
1326
1327    // In the following code LastBr will refer to the previous basic block's
1328    // conditional branch instruction, whose true successor is fixed up to point
1329    // to the next block during the loop below or to the tail after the final
1330    // iteration.
1331    BranchInst *LastBr = BranchInst::Create(FallbackBB, FallbackBB, ShadowsEq);
1332    ReplaceInstWithInst(Head->getTerminator(), LastBr);
1333    DT.addNewBlock(FallbackBB, Head);
1334
1335    for (uint64_t Ofs = 64 / DFS.ShadowWidthBits; Ofs != Size;
1336         Ofs += 64 / DFS.ShadowWidthBits) {
1337      BasicBlock *NextBB = BasicBlock::Create(*DFS.Ctx, "", F);
1338      DT.addNewBlock(NextBB, LastBr->getParent());
1339      IRBuilder<> NextIRB(NextBB);
1340      WideAddr = NextIRB.CreateGEP(Type::getInt64Ty(*DFS.Ctx), WideAddr,
1341                                   ConstantInt::get(DFS.IntptrTy, 1));
1342      Value *NextWideShadow = NextIRB.CreateAlignedLoad(NextIRB.getInt64Ty(),
1343                                                        WideAddr, ShadowAlign);
1344      ShadowsEq = NextIRB.CreateICmpEQ(WideShadow, NextWideShadow);
1345      LastBr->setSuccessor(0, NextBB);
1346      LastBr = NextIRB.CreateCondBr(ShadowsEq, FallbackBB, FallbackBB);
1347    }
1348
1349    LastBr->setSuccessor(0, Tail);
1350    FallbackIRB.CreateBr(Tail);
1351    PHINode *Shadow = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
1352    Shadow->addIncoming(FallbackCall, FallbackBB);
1353    Shadow->addIncoming(TruncShadow, LastBr->getParent());
1354    return Shadow;
1355  }
1356
1357  IRBuilder<> IRB(Pos);
1358  CallInst *FallbackCall = IRB.CreateCall(
1359      DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
1360  FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
1361  return FallbackCall;
1362}
1363
1364void DFSanVisitor::visitLoadInst(LoadInst &LI) {
1365  auto &DL = LI.getModule()->getDataLayout();
1366  uint64_t Size = DL.getTypeStoreSize(LI.getType());
1367  if (Size == 0) {
1368    DFSF.setShadow(&LI, DFSF.DFS.ZeroShadow);
1369    return;
1370  }
1371
1372  Align Alignment = ClPreserveAlignment ? LI.getAlign() : Align(1);
1373  Value *Shadow =
1374      DFSF.loadShadow(LI.getPointerOperand(), Size, Alignment.value(), &LI);
1375  if (ClCombinePointerLabelsOnLoad) {
1376    Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
1377    Shadow = DFSF.combineShadows(Shadow, PtrShadow, &LI);
1378  }
1379  if (Shadow != DFSF.DFS.ZeroShadow)
1380    DFSF.NonZeroChecks.push_back(Shadow);
1381
1382  DFSF.setShadow(&LI, Shadow);
1383  if (ClEventCallbacks) {
1384    IRBuilder<> IRB(&LI);
1385    IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, Shadow);
1386  }
1387}
1388
1389void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, Align Alignment,
1390                                Value *Shadow, Instruction *Pos) {
1391  if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
1392    const auto i = AllocaShadowMap.find(AI);
1393    if (i != AllocaShadowMap.end()) {
1394      IRBuilder<> IRB(Pos);
1395      IRB.CreateStore(Shadow, i->second);
1396      return;
1397    }
1398  }
1399
1400  const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes);
1401  IRBuilder<> IRB(Pos);
1402  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
1403  if (Shadow == DFS.ZeroShadow) {
1404    IntegerType *ShadowTy =
1405        IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
1406    Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
1407    Value *ExtShadowAddr =
1408        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
1409    IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
1410    return;
1411  }
1412
1413  const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits;
1414  uint64_t Offset = 0;
1415  if (Size >= ShadowVecSize) {
1416    auto *ShadowVecTy = FixedVectorType::get(DFS.ShadowTy, ShadowVecSize);
1417    Value *ShadowVec = UndefValue::get(ShadowVecTy);
1418    for (unsigned i = 0; i != ShadowVecSize; ++i) {
1419      ShadowVec = IRB.CreateInsertElement(
1420          ShadowVec, Shadow, ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), i));
1421    }
1422    Value *ShadowVecAddr =
1423        IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
1424    do {
1425      Value *CurShadowVecAddr =
1426          IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
1427      IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
1428      Size -= ShadowVecSize;
1429      ++Offset;
1430    } while (Size >= ShadowVecSize);
1431    Offset *= ShadowVecSize;
1432  }
1433  while (Size > 0) {
1434    Value *CurShadowAddr =
1435        IRB.CreateConstGEP1_32(DFS.ShadowTy, ShadowAddr, Offset);
1436    IRB.CreateAlignedStore(Shadow, CurShadowAddr, ShadowAlign);
1437    --Size;
1438    ++Offset;
1439  }
1440}
1441
1442void DFSanVisitor::visitStoreInst(StoreInst &SI) {
1443  auto &DL = SI.getModule()->getDataLayout();
1444  uint64_t Size = DL.getTypeStoreSize(SI.getValueOperand()->getType());
1445  if (Size == 0)
1446    return;
1447
1448  const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1);
1449
1450  Value* Shadow = DFSF.getShadow(SI.getValueOperand());
1451  if (ClCombinePointerLabelsOnStore) {
1452    Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
1453    Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
1454  }
1455  DFSF.storeShadow(SI.getPointerOperand(), Size, Alignment, Shadow, &SI);
1456  if (ClEventCallbacks) {
1457    IRBuilder<> IRB(&SI);
1458    IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow);
1459  }
1460}
1461
1462void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
1463  visitOperandShadowInst(UO);
1464}
1465
1466void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
1467  visitOperandShadowInst(BO);
1468}
1469
1470void DFSanVisitor::visitCastInst(CastInst &CI) { visitOperandShadowInst(CI); }
1471
1472void DFSanVisitor::visitCmpInst(CmpInst &CI) {
1473  Value *CombinedShadow = visitOperandShadowInst(CI);
1474  if (ClEventCallbacks) {
1475    IRBuilder<> IRB(&CI);
1476    IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
1477  }
1478}
1479
1480void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1481  visitOperandShadowInst(GEPI);
1482}
1483
1484void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
1485  visitOperandShadowInst(I);
1486}
1487
1488void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
1489  visitOperandShadowInst(I);
1490}
1491
1492void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
1493  visitOperandShadowInst(I);
1494}
1495
1496void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
1497  visitOperandShadowInst(I);
1498}
1499
1500void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
1501  visitOperandShadowInst(I);
1502}
1503
1504void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
1505  bool AllLoadsStores = true;
1506  for (User *U : I.users()) {
1507    if (isa<LoadInst>(U))
1508      continue;
1509
1510    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1511      if (SI->getPointerOperand() == &I)
1512        continue;
1513    }
1514
1515    AllLoadsStores = false;
1516    break;
1517  }
1518  if (AllLoadsStores) {
1519    IRBuilder<> IRB(&I);
1520    DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.ShadowTy);
1521  }
1522  DFSF.setShadow(&I, DFSF.DFS.ZeroShadow);
1523}
1524
1525void DFSanVisitor::visitSelectInst(SelectInst &I) {
1526  Value *CondShadow = DFSF.getShadow(I.getCondition());
1527  Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
1528  Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
1529
1530  if (isa<VectorType>(I.getCondition()->getType())) {
1531    DFSF.setShadow(
1532        &I,
1533        DFSF.combineShadows(
1534            CondShadow, DFSF.combineShadows(TrueShadow, FalseShadow, &I), &I));
1535  } else {
1536    Value *ShadowSel;
1537    if (TrueShadow == FalseShadow) {
1538      ShadowSel = TrueShadow;
1539    } else {
1540      ShadowSel =
1541          SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
1542    }
1543    DFSF.setShadow(&I, DFSF.combineShadows(CondShadow, ShadowSel, &I));
1544  }
1545}
1546
1547void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
1548  IRBuilder<> IRB(&I);
1549  Value *ValShadow = DFSF.getShadow(I.getValue());
1550  IRB.CreateCall(DFSF.DFS.DFSanSetLabelFn,
1551                 {ValShadow, IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(
1552                                                                *DFSF.DFS.Ctx)),
1553                  IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
1554}
1555
1556void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
1557  IRBuilder<> IRB(&I);
1558  Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
1559  Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
1560  Value *LenShadow =
1561      IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
1562                                                    DFSF.DFS.ShadowWidthBytes));
1563  Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
1564  Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
1565  SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
1566  auto *MTI = cast<MemTransferInst>(
1567      IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
1568                     {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
1569  if (ClPreserveAlignment) {
1570    MTI->setDestAlignment(I.getDestAlign() * DFSF.DFS.ShadowWidthBytes);
1571    MTI->setSourceAlignment(I.getSourceAlign() * DFSF.DFS.ShadowWidthBytes);
1572  } else {
1573    MTI->setDestAlignment(Align(DFSF.DFS.ShadowWidthBytes));
1574    MTI->setSourceAlignment(Align(DFSF.DFS.ShadowWidthBytes));
1575  }
1576  if (ClEventCallbacks) {
1577    IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
1578                   {RawDestShadow, I.getLength()});
1579  }
1580}
1581
1582void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
1583  if (!DFSF.IsNativeABI && RI.getReturnValue()) {
1584    switch (DFSF.IA) {
1585    case DataFlowSanitizer::IA_TLS: {
1586      Value *S = DFSF.getShadow(RI.getReturnValue());
1587      IRBuilder<> IRB(&RI);
1588      IRB.CreateStore(S, DFSF.getRetvalTLS());
1589      break;
1590    }
1591    case DataFlowSanitizer::IA_Args: {
1592      IRBuilder<> IRB(&RI);
1593      Type *RT = DFSF.F->getFunctionType()->getReturnType();
1594      Value *InsVal =
1595          IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
1596      Value *InsShadow =
1597          IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
1598      RI.setOperand(0, InsShadow);
1599      break;
1600    }
1601    }
1602  }
1603}
1604
1605void DFSanVisitor::visitCallBase(CallBase &CB) {
1606  Function *F = CB.getCalledFunction();
1607  if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
1608    visitOperandShadowInst(CB);
1609    return;
1610  }
1611
1612  // Calls to this function are synthesized in wrappers, and we shouldn't
1613  // instrument them.
1614  if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
1615    return;
1616
1617  IRBuilder<> IRB(&CB);
1618
1619  DenseMap<Value *, Function *>::iterator i =
1620      DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
1621  if (i != DFSF.DFS.UnwrappedFnMap.end()) {
1622    Function *F = i->second;
1623    switch (DFSF.DFS.getWrapperKind(F)) {
1624    case DataFlowSanitizer::WK_Warning:
1625      CB.setCalledFunction(F);
1626      IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
1627                     IRB.CreateGlobalStringPtr(F->getName()));
1628      DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow);
1629      return;
1630    case DataFlowSanitizer::WK_Discard:
1631      CB.setCalledFunction(F);
1632      DFSF.setShadow(&CB, DFSF.DFS.ZeroShadow);
1633      return;
1634    case DataFlowSanitizer::WK_Functional:
1635      CB.setCalledFunction(F);
1636      visitOperandShadowInst(CB);
1637      return;
1638    case DataFlowSanitizer::WK_Custom:
1639      // Don't try to handle invokes of custom functions, it's too complicated.
1640      // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
1641      // wrapper.
1642      if (CallInst *CI = dyn_cast<CallInst>(&CB)) {
1643        FunctionType *FT = F->getFunctionType();
1644        TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
1645        std::string CustomFName = "__dfsw_";
1646        CustomFName += F->getName();
1647        FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
1648            CustomFName, CustomFn.TransformedType);
1649        if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
1650          CustomFn->copyAttributesFrom(F);
1651
1652          // Custom functions returning non-void will write to the return label.
1653          if (!FT->getReturnType()->isVoidTy()) {
1654            CustomFn->removeAttributes(AttributeList::FunctionIndex,
1655                                       DFSF.DFS.ReadOnlyNoneAttrs);
1656          }
1657        }
1658
1659        std::vector<Value *> Args;
1660
1661        auto i = CB.arg_begin();
1662        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) {
1663          Type *T = (*i)->getType();
1664          FunctionType *ParamFT;
1665          if (isa<PointerType>(T) &&
1666              (ParamFT = dyn_cast<FunctionType>(
1667                   cast<PointerType>(T)->getElementType()))) {
1668            std::string TName = "dfst";
1669            TName += utostr(FT->getNumParams() - n);
1670            TName += "$";
1671            TName += F->getName();
1672            Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName);
1673            Args.push_back(T);
1674            Args.push_back(
1675                IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx)));
1676          } else {
1677            Args.push_back(*i);
1678          }
1679        }
1680
1681        i = CB.arg_begin();
1682        const unsigned ShadowArgStart = Args.size();
1683        for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1684          Args.push_back(DFSF.getShadow(*i));
1685
1686        if (FT->isVarArg()) {
1687          auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
1688                                           CB.arg_size() - FT->getNumParams());
1689          auto *LabelVAAlloca = new AllocaInst(
1690              LabelVATy, getDataLayout().getAllocaAddrSpace(),
1691              "labelva", &DFSF.F->getEntryBlock().front());
1692
1693          for (unsigned n = 0; i != CB.arg_end(); ++i, ++n) {
1694            auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
1695            IRB.CreateStore(DFSF.getShadow(*i), LabelVAPtr);
1696          }
1697
1698          Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
1699        }
1700
1701        if (!FT->getReturnType()->isVoidTy()) {
1702          if (!DFSF.LabelReturnAlloca) {
1703            DFSF.LabelReturnAlloca =
1704              new AllocaInst(DFSF.DFS.ShadowTy,
1705                             getDataLayout().getAllocaAddrSpace(),
1706                             "labelreturn", &DFSF.F->getEntryBlock().front());
1707          }
1708          Args.push_back(DFSF.LabelReturnAlloca);
1709        }
1710
1711        for (i = CB.arg_begin() + FT->getNumParams(); i != CB.arg_end(); ++i)
1712          Args.push_back(*i);
1713
1714        CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
1715        CustomCI->setCallingConv(CI->getCallingConv());
1716        CustomCI->setAttributes(TransformFunctionAttributes(CustomFn,
1717            CI->getContext(), CI->getAttributes()));
1718
1719        // Update the parameter attributes of the custom call instruction to
1720        // zero extend the shadow parameters. This is required for targets
1721        // which consider ShadowTy an illegal type.
1722        for (unsigned n = 0; n < FT->getNumParams(); n++) {
1723          const unsigned ArgNo = ShadowArgStart + n;
1724          if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
1725            CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
1726        }
1727
1728        if (!FT->getReturnType()->isVoidTy()) {
1729          LoadInst *LabelLoad =
1730              IRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.LabelReturnAlloca);
1731          DFSF.setShadow(CustomCI, LabelLoad);
1732        }
1733
1734        CI->replaceAllUsesWith(CustomCI);
1735        CI->eraseFromParent();
1736        return;
1737      }
1738      break;
1739    }
1740  }
1741
1742  FunctionType *FT = CB.getFunctionType();
1743  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
1744    for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) {
1745      IRB.CreateStore(DFSF.getShadow(CB.getArgOperand(i)),
1746                      DFSF.getArgTLS(i, &CB));
1747    }
1748  }
1749
1750  Instruction *Next = nullptr;
1751  if (!CB.getType()->isVoidTy()) {
1752    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
1753      if (II->getNormalDest()->getSinglePredecessor()) {
1754        Next = &II->getNormalDest()->front();
1755      } else {
1756        BasicBlock *NewBB =
1757            SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
1758        Next = &NewBB->front();
1759      }
1760    } else {
1761      assert(CB.getIterator() != CB.getParent()->end());
1762      Next = CB.getNextNode();
1763    }
1764
1765    if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
1766      IRBuilder<> NextIRB(Next);
1767      LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.ShadowTy, DFSF.getRetvalTLS());
1768      DFSF.SkipInsts.insert(LI);
1769      DFSF.setShadow(&CB, LI);
1770      DFSF.NonZeroChecks.push_back(LI);
1771    }
1772  }
1773
1774  // Do all instrumentation for IA_Args down here to defer tampering with the
1775  // CFG in a way that SplitEdge may be able to detect.
1776  if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
1777    FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
1778    Value *Func =
1779        IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT));
1780    std::vector<Value *> Args;
1781
1782    auto i = CB.arg_begin(), E = CB.arg_end();
1783    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1784      Args.push_back(*i);
1785
1786    i = CB.arg_begin();
1787    for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
1788      Args.push_back(DFSF.getShadow(*i));
1789
1790    if (FT->isVarArg()) {
1791      unsigned VarArgSize = CB.arg_size() - FT->getNumParams();
1792      ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
1793      AllocaInst *VarArgShadow =
1794        new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
1795                       "", &DFSF.F->getEntryBlock().front());
1796      Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
1797      for (unsigned n = 0; i != E; ++i, ++n) {
1798        IRB.CreateStore(
1799            DFSF.getShadow(*i),
1800            IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, n));
1801        Args.push_back(*i);
1802      }
1803    }
1804
1805    CallBase *NewCB;
1806    if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
1807      NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
1808                               II->getUnwindDest(), Args);
1809    } else {
1810      NewCB = IRB.CreateCall(NewFT, Func, Args);
1811    }
1812    NewCB->setCallingConv(CB.getCallingConv());
1813    NewCB->setAttributes(CB.getAttributes().removeAttributes(
1814        *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
1815        AttributeFuncs::typeIncompatible(NewCB->getType())));
1816
1817    if (Next) {
1818      ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next);
1819      DFSF.SkipInsts.insert(ExVal);
1820      ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next);
1821      DFSF.SkipInsts.insert(ExShadow);
1822      DFSF.setShadow(ExVal, ExShadow);
1823      DFSF.NonZeroChecks.push_back(ExShadow);
1824
1825      CB.replaceAllUsesWith(ExVal);
1826    }
1827
1828    CB.eraseFromParent();
1829  }
1830}
1831
1832void DFSanVisitor::visitPHINode(PHINode &PN) {
1833  PHINode *ShadowPN =
1834      PHINode::Create(DFSF.DFS.ShadowTy, PN.getNumIncomingValues(), "", &PN);
1835
1836  // Give the shadow phi node valid predecessors to fool SplitEdge into working.
1837  Value *UndefShadow = UndefValue::get(DFSF.DFS.ShadowTy);
1838  for (PHINode::block_iterator i = PN.block_begin(), e = PN.block_end(); i != e;
1839       ++i) {
1840    ShadowPN->addIncoming(UndefShadow, *i);
1841  }
1842
1843  DFSF.PHIFixups.push_back(std::make_pair(&PN, ShadowPN));
1844  DFSF.setShadow(&PN, ShadowPN);
1845}
1846