IRDynamicChecks.cpp revision 360784
1//===-- IRDynamicChecks.cpp -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/IR/Constants.h"
10#include "llvm/IR/DataLayout.h"
11#include "llvm/IR/Function.h"
12#include "llvm/IR/Instructions.h"
13#include "llvm/IR/Module.h"
14#include "llvm/IR/Value.h"
15#include "llvm/Support/raw_ostream.h"
16
17#include "IRDynamicChecks.h"
18
19#include "lldb/Expression/UtilityFunction.h"
20#include "lldb/Target/ExecutionContext.h"
21#include "lldb/Target/Process.h"
22#include "lldb/Target/StackFrame.h"
23#include "lldb/Target/Target.h"
24#include "lldb/Utility/ConstString.h"
25#include "lldb/Utility/Log.h"
26
27#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
28
29using namespace llvm;
30using namespace lldb_private;
31
32static char ID;
33
34#define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
35#define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
36
37static const char g_valid_pointer_check_text[] =
38    "extern \"C\" void\n"
39    "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
40    "{\n"
41    "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
42    "}";
43
44ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
45    : DynamicCheckerFunctions(DCF_Clang) {}
46
47ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
48
49bool ClangDynamicCheckerFunctions::Install(
50    DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
51  Status error;
52  m_valid_pointer_check.reset(
53      exe_ctx.GetTargetRef().GetUtilityFunctionForLanguage(
54          g_valid_pointer_check_text, lldb::eLanguageTypeC,
55          VALID_POINTER_CHECK_NAME, error));
56  if (error.Fail())
57    return false;
58
59  if (!m_valid_pointer_check->Install(diagnostic_manager, exe_ctx))
60    return false;
61
62  Process *process = exe_ctx.GetProcessPtr();
63
64  if (process) {
65    ObjCLanguageRuntime *objc_language_runtime =
66        ObjCLanguageRuntime::Get(*process);
67
68    if (objc_language_runtime) {
69      m_objc_object_check.reset(objc_language_runtime->CreateObjectChecker(
70          VALID_OBJC_OBJECT_CHECK_NAME));
71
72      if (!m_objc_object_check->Install(diagnostic_manager, exe_ctx))
73        return false;
74    }
75  }
76
77  return true;
78}
79
80bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
81                                                         Stream &message) {
82  // FIXME: We have to get the checkers to know why they scotched the call in
83  // more detail,
84  // so we can print a better message here.
85  if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
86    message.Printf("Attempted to dereference an invalid pointer.");
87    return true;
88  } else if (m_objc_object_check &&
89             m_objc_object_check->ContainsAddress(addr)) {
90    message.Printf("Attempted to dereference an invalid ObjC Object or send it "
91                   "an unrecognized selector");
92    return true;
93  }
94  return false;
95}
96
97static std::string PrintValue(llvm::Value *V, bool truncate = false) {
98  std::string s;
99  raw_string_ostream rso(s);
100  V->print(rso);
101  rso.flush();
102  if (truncate)
103    s.resize(s.length() - 1);
104  return s;
105}
106
107/// \class Instrumenter IRDynamicChecks.cpp
108/// Finds and instruments individual LLVM IR instructions
109///
110/// When instrumenting LLVM IR, it is frequently desirable to first search for
111/// instructions, and then later modify them.  This way iterators remain
112/// intact, and multiple passes can look at the same code base without
113/// treading on each other's toes.
114///
115/// The Instrumenter class implements this functionality.  A client first
116/// calls Inspect on a function, which populates a list of instructions to be
117/// instrumented.  Then, later, when all passes' Inspect functions have been
118/// called, the client calls Instrument, which adds the desired
119/// instrumentation.
120///
121/// A subclass of Instrumenter must override InstrumentInstruction, which
122/// is responsible for adding whatever instrumentation is necessary.
123///
124/// A subclass of Instrumenter may override:
125///
126/// - InspectInstruction [default: does nothing]
127///
128/// - InspectBasicBlock [default: iterates through the instructions in a
129///   basic block calling InspectInstruction]
130///
131/// - InspectFunction [default: iterates through the basic blocks in a
132///   function calling InspectBasicBlock]
133class Instrumenter {
134public:
135  /// Constructor
136  ///
137  /// \param[in] module
138  ///     The module being instrumented.
139  Instrumenter(llvm::Module &module,
140               std::shared_ptr<UtilityFunction> checker_function)
141      : m_module(module), m_checker_function(checker_function),
142        m_i8ptr_ty(nullptr), m_intptr_ty(nullptr) {}
143
144  virtual ~Instrumenter() = default;
145
146  /// Inspect a function to find instructions to instrument
147  ///
148  /// \param[in] function
149  ///     The function to inspect.
150  ///
151  /// \return
152  ///     True on success; false on error.
153  bool Inspect(llvm::Function &function) { return InspectFunction(function); }
154
155  /// Instrument all the instructions found by Inspect()
156  ///
157  /// \return
158  ///     True on success; false on error.
159  bool Instrument() {
160    for (InstIterator ii = m_to_instrument.begin(),
161                      last_ii = m_to_instrument.end();
162         ii != last_ii; ++ii) {
163      if (!InstrumentInstruction(*ii))
164        return false;
165    }
166
167    return true;
168  }
169
170protected:
171  /// Add instrumentation to a single instruction
172  ///
173  /// \param[in] inst
174  ///     The instruction to be instrumented.
175  ///
176  /// \return
177  ///     True on success; false otherwise.
178  virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
179
180  /// Register a single instruction to be instrumented
181  ///
182  /// \param[in] inst
183  ///     The instruction to be instrumented.
184  void RegisterInstruction(llvm::Instruction &i) {
185    m_to_instrument.push_back(&i);
186  }
187
188  /// Determine whether a single instruction is interesting to instrument,
189  /// and, if so, call RegisterInstruction
190  ///
191  /// \param[in] i
192  ///     The instruction to be inspected.
193  ///
194  /// \return
195  ///     False if there was an error scanning; true otherwise.
196  virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
197
198  /// Scan a basic block to see if any instructions are interesting
199  ///
200  /// \param[in] bb
201  ///     The basic block to be inspected.
202  ///
203  /// \return
204  ///     False if there was an error scanning; true otherwise.
205  virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
206    for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
207         ii != last_ii; ++ii) {
208      if (!InspectInstruction(*ii))
209        return false;
210    }
211
212    return true;
213  }
214
215  /// Scan a function to see if any instructions are interesting
216  ///
217  /// \param[in] f
218  ///     The function to be inspected.
219  ///
220  /// \return
221  ///     False if there was an error scanning; true otherwise.
222  virtual bool InspectFunction(llvm::Function &f) {
223    for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
224         bbi != last_bbi; ++bbi) {
225      if (!InspectBasicBlock(*bbi))
226        return false;
227    }
228
229    return true;
230  }
231
232  /// Build a function pointer for a function with signature void
233  /// (*)(uint8_t*) with a given address
234  ///
235  /// \param[in] start_address
236  ///     The address of the function.
237  ///
238  /// \return
239  ///     The function pointer, for use in a CallInst.
240  llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
241    llvm::Type *param_array[1];
242
243    param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
244
245    ArrayRef<llvm::Type *> params(param_array, 1);
246
247    FunctionType *fun_ty = FunctionType::get(
248        llvm::Type::getVoidTy(m_module.getContext()), params, true);
249    PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
250    Constant *fun_addr_int =
251        ConstantInt::get(GetIntptrTy(), start_address, false);
252    return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
253  }
254
255  /// Build a function pointer for a function with signature void
256  /// (*)(uint8_t*, uint8_t*) with a given address
257  ///
258  /// \param[in] start_address
259  ///     The address of the function.
260  ///
261  /// \return
262  ///     The function pointer, for use in a CallInst.
263  llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
264    llvm::Type *param_array[2];
265
266    param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
267    param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
268
269    ArrayRef<llvm::Type *> params(param_array, 2);
270
271    FunctionType *fun_ty = FunctionType::get(
272        llvm::Type::getVoidTy(m_module.getContext()), params, true);
273    PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
274    Constant *fun_addr_int =
275        ConstantInt::get(GetIntptrTy(), start_address, false);
276    return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
277  }
278
279  PointerType *GetI8PtrTy() {
280    if (!m_i8ptr_ty)
281      m_i8ptr_ty = llvm::Type::getInt8PtrTy(m_module.getContext());
282
283    return m_i8ptr_ty;
284  }
285
286  IntegerType *GetIntptrTy() {
287    if (!m_intptr_ty) {
288      llvm::DataLayout data_layout(&m_module);
289
290      m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
291                                          data_layout.getPointerSizeInBits());
292    }
293
294    return m_intptr_ty;
295  }
296
297  typedef std::vector<llvm::Instruction *> InstVector;
298  typedef InstVector::iterator InstIterator;
299
300  InstVector m_to_instrument; ///< List of instructions the inspector found
301  llvm::Module &m_module;     ///< The module which is being instrumented
302  std::shared_ptr<UtilityFunction>
303      m_checker_function; ///< The dynamic checker function for the process
304
305private:
306  PointerType *m_i8ptr_ty;
307  IntegerType *m_intptr_ty;
308};
309
310class ValidPointerChecker : public Instrumenter {
311public:
312  ValidPointerChecker(llvm::Module &module,
313                      std::shared_ptr<UtilityFunction> checker_function)
314      : Instrumenter(module, checker_function),
315        m_valid_pointer_check_func(nullptr) {}
316
317  ~ValidPointerChecker() override = default;
318
319protected:
320  bool InstrumentInstruction(llvm::Instruction *inst) override {
321    Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
322
323    LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
324              PrintValue(inst).c_str());
325
326    if (!m_valid_pointer_check_func)
327      m_valid_pointer_check_func =
328          BuildPointerValidatorFunc(m_checker_function->StartAddress());
329
330    llvm::Value *dereferenced_ptr = nullptr;
331
332    if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
333      dereferenced_ptr = li->getPointerOperand();
334    else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
335      dereferenced_ptr = si->getPointerOperand();
336    else
337      return false;
338
339    // Insert an instruction to cast the loaded value to int8_t*
340
341    BitCastInst *bit_cast =
342        new BitCastInst(dereferenced_ptr, GetI8PtrTy(), "", inst);
343
344    // Insert an instruction to call the helper with the result
345
346    llvm::Value *arg_array[1];
347
348    arg_array[0] = bit_cast;
349
350    llvm::ArrayRef<llvm::Value *> args(arg_array, 1);
351
352    CallInst::Create(m_valid_pointer_check_func, args, "", inst);
353
354    return true;
355  }
356
357  bool InspectInstruction(llvm::Instruction &i) override {
358    if (dyn_cast<llvm::LoadInst>(&i) || dyn_cast<llvm::StoreInst>(&i))
359      RegisterInstruction(i);
360
361    return true;
362  }
363
364private:
365  llvm::FunctionCallee m_valid_pointer_check_func;
366};
367
368class ObjcObjectChecker : public Instrumenter {
369public:
370  ObjcObjectChecker(llvm::Module &module,
371                    std::shared_ptr<UtilityFunction> checker_function)
372      : Instrumenter(module, checker_function),
373        m_objc_object_check_func(nullptr) {}
374
375  ~ObjcObjectChecker() override = default;
376
377  enum msgSend_type {
378    eMsgSend = 0,
379    eMsgSendSuper,
380    eMsgSendSuper_stret,
381    eMsgSend_fpret,
382    eMsgSend_stret
383  };
384
385  std::map<llvm::Instruction *, msgSend_type> msgSend_types;
386
387protected:
388  bool InstrumentInstruction(llvm::Instruction *inst) override {
389    CallInst *call_inst = dyn_cast<CallInst>(inst);
390
391    if (!call_inst)
392      return false; // call_inst really shouldn't be nullptr, because otherwise
393                    // InspectInstruction wouldn't have registered it
394
395    if (!m_objc_object_check_func)
396      m_objc_object_check_func =
397          BuildObjectCheckerFunc(m_checker_function->StartAddress());
398
399    // id objc_msgSend(id theReceiver, SEL theSelector, ...)
400
401    llvm::Value *target_object;
402    llvm::Value *selector;
403
404    switch (msgSend_types[inst]) {
405    case eMsgSend:
406    case eMsgSend_fpret:
407      // On arm64, clang uses objc_msgSend for scalar and struct return
408      // calls.  The call instruction will record which was used.
409      if (call_inst->hasStructRetAttr()) {
410        target_object = call_inst->getArgOperand(1);
411        selector = call_inst->getArgOperand(2);
412      } else {
413        target_object = call_inst->getArgOperand(0);
414        selector = call_inst->getArgOperand(1);
415      }
416      break;
417    case eMsgSend_stret:
418      target_object = call_inst->getArgOperand(1);
419      selector = call_inst->getArgOperand(2);
420      break;
421    case eMsgSendSuper:
422    case eMsgSendSuper_stret:
423      return true;
424    }
425
426    // These objects should always be valid according to Sean Calannan
427    assert(target_object);
428    assert(selector);
429
430    // Insert an instruction to cast the receiver id to int8_t*
431
432    BitCastInst *bit_cast =
433        new BitCastInst(target_object, GetI8PtrTy(), "", inst);
434
435    // Insert an instruction to call the helper with the result
436
437    llvm::Value *arg_array[2];
438
439    arg_array[0] = bit_cast;
440    arg_array[1] = selector;
441
442    ArrayRef<llvm::Value *> args(arg_array, 2);
443
444    CallInst::Create(m_objc_object_check_func, args, "", inst);
445
446    return true;
447  }
448
449  static llvm::Function *GetFunction(llvm::Value *value) {
450    if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
451      return function;
452    }
453
454    if (llvm::ConstantExpr *const_expr =
455            llvm::dyn_cast<llvm::ConstantExpr>(value)) {
456      switch (const_expr->getOpcode()) {
457      default:
458        return nullptr;
459      case llvm::Instruction::BitCast:
460        return GetFunction(const_expr->getOperand(0));
461      }
462    }
463
464    return nullptr;
465  }
466
467  static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
468    return GetFunction(inst->getCalledValue());
469  }
470
471  bool InspectInstruction(llvm::Instruction &i) override {
472    Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
473
474    CallInst *call_inst = dyn_cast<CallInst>(&i);
475
476    if (call_inst) {
477      const llvm::Function *called_function = GetCalledFunction(call_inst);
478
479      if (!called_function)
480        return true;
481
482      std::string name_str = called_function->getName().str();
483      const char *name_cstr = name_str.c_str();
484
485      LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
486                PrintValue(call_inst).c_str());
487
488      if (name_str.find("objc_msgSend") == std::string::npos)
489        return true;
490
491      if (!strcmp(name_cstr, "objc_msgSend")) {
492        RegisterInstruction(i);
493        msgSend_types[&i] = eMsgSend;
494        return true;
495      }
496
497      if (!strcmp(name_cstr, "objc_msgSend_stret")) {
498        RegisterInstruction(i);
499        msgSend_types[&i] = eMsgSend_stret;
500        return true;
501      }
502
503      if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
504        RegisterInstruction(i);
505        msgSend_types[&i] = eMsgSend_fpret;
506        return true;
507      }
508
509      if (!strcmp(name_cstr, "objc_msgSendSuper")) {
510        RegisterInstruction(i);
511        msgSend_types[&i] = eMsgSendSuper;
512        return true;
513      }
514
515      if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
516        RegisterInstruction(i);
517        msgSend_types[&i] = eMsgSendSuper_stret;
518        return true;
519      }
520
521      LLDB_LOGF(log,
522                "Function name '%s' contains 'objc_msgSend' but is not handled",
523                name_str.c_str());
524
525      return true;
526    }
527
528    return true;
529  }
530
531private:
532  llvm::FunctionCallee m_objc_object_check_func;
533};
534
535IRDynamicChecks::IRDynamicChecks(
536    ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
537    : ModulePass(ID), m_func_name(func_name),
538      m_checker_functions(checker_functions) {}
539
540IRDynamicChecks::~IRDynamicChecks() = default;
541
542bool IRDynamicChecks::runOnModule(llvm::Module &M) {
543  Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
544
545  llvm::Function *function = M.getFunction(StringRef(m_func_name));
546
547  if (!function) {
548    LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
549
550    return false;
551  }
552
553  if (m_checker_functions.m_valid_pointer_check) {
554    ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
555
556    if (!vpc.Inspect(*function))
557      return false;
558
559    if (!vpc.Instrument())
560      return false;
561  }
562
563  if (m_checker_functions.m_objc_object_check) {
564    ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
565
566    if (!ooc.Inspect(*function))
567      return false;
568
569    if (!ooc.Instrument())
570      return false;
571  }
572
573  if (log && log->GetVerbose()) {
574    std::string s;
575    raw_string_ostream oss(s);
576
577    M.print(oss, nullptr);
578
579    oss.flush();
580
581    LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
582  }
583
584  return true;
585}
586
587void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
588
589PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
590  return PMT_ModulePassManager;
591}
592