1//===-- IRDynamicChecks.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/IR/Constants.h"
10#include "llvm/IR/DataLayout.h"
11#include "llvm/IR/Function.h"
12#include "llvm/IR/Instructions.h"
13#include "llvm/IR/Module.h"
14#include "llvm/IR/Value.h"
15#include "llvm/Support/raw_ostream.h"
16
17#include "IRDynamicChecks.h"
18
19#include "lldb/Expression/UtilityFunction.h"
20#include "lldb/Target/ExecutionContext.h"
21#include "lldb/Target/Process.h"
22#include "lldb/Target/StackFrame.h"
23#include "lldb/Target/Target.h"
24#include "lldb/Utility/ConstString.h"
25#include "lldb/Utility/LLDBLog.h"
26#include "lldb/Utility/Log.h"
27
28#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
29
30using namespace llvm;
31using namespace lldb_private;
32
33static char ID;
34
35#define VALID_POINTER_CHECK_NAME "_$__lldb_valid_pointer_check"
36#define VALID_OBJC_OBJECT_CHECK_NAME "$__lldb_objc_object_check"
37
38static const char g_valid_pointer_check_text[] =
39    "extern \"C\" void\n"
40    "_$__lldb_valid_pointer_check (unsigned char *$__lldb_arg_ptr)\n"
41    "{\n"
42    "    unsigned char $__lldb_local_val = *$__lldb_arg_ptr;\n"
43    "}";
44
45ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions()
46    : DynamicCheckerFunctions(DCF_Clang) {}
47
48ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default;
49
50llvm::Error ClangDynamicCheckerFunctions::Install(
51    DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) {
52  Expected<std::unique_ptr<UtilityFunction>> utility_fn =
53      exe_ctx.GetTargetRef().CreateUtilityFunction(
54          g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME,
55          lldb::eLanguageTypeC, exe_ctx);
56  if (!utility_fn)
57    return utility_fn.takeError();
58  m_valid_pointer_check = std::move(*utility_fn);
59
60  if (Process *process = exe_ctx.GetProcessPtr()) {
61    ObjCLanguageRuntime *objc_language_runtime =
62        ObjCLanguageRuntime::Get(*process);
63
64    if (objc_language_runtime) {
65      Expected<std::unique_ptr<UtilityFunction>> checker_fn =
66          objc_language_runtime->CreateObjectChecker(VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx);
67      if (!checker_fn)
68        return checker_fn.takeError();
69      m_objc_object_check = std::move(*checker_fn);
70    }
71  }
72
73  return Error::success();
74}
75
76bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr,
77                                                         Stream &message) {
78  // FIXME: We have to get the checkers to know why they scotched the call in
79  // more detail,
80  // so we can print a better message here.
81  if (m_valid_pointer_check && m_valid_pointer_check->ContainsAddress(addr)) {
82    message.Printf("Attempted to dereference an invalid pointer.");
83    return true;
84  } else if (m_objc_object_check &&
85             m_objc_object_check->ContainsAddress(addr)) {
86    message.Printf("Attempted to dereference an invalid ObjC Object or send it "
87                   "an unrecognized selector");
88    return true;
89  }
90  return false;
91}
92
93static std::string PrintValue(llvm::Value *V, bool truncate = false) {
94  std::string s;
95  raw_string_ostream rso(s);
96  V->print(rso);
97  rso.flush();
98  if (truncate)
99    s.resize(s.length() - 1);
100  return s;
101}
102
103/// \class Instrumenter IRDynamicChecks.cpp
104/// Finds and instruments individual LLVM IR instructions
105///
106/// When instrumenting LLVM IR, it is frequently desirable to first search for
107/// instructions, and then later modify them.  This way iterators remain
108/// intact, and multiple passes can look at the same code base without
109/// treading on each other's toes.
110///
111/// The Instrumenter class implements this functionality.  A client first
112/// calls Inspect on a function, which populates a list of instructions to be
113/// instrumented.  Then, later, when all passes' Inspect functions have been
114/// called, the client calls Instrument, which adds the desired
115/// instrumentation.
116///
117/// A subclass of Instrumenter must override InstrumentInstruction, which
118/// is responsible for adding whatever instrumentation is necessary.
119///
120/// A subclass of Instrumenter may override:
121///
122/// - InspectInstruction [default: does nothing]
123///
124/// - InspectBasicBlock [default: iterates through the instructions in a
125///   basic block calling InspectInstruction]
126///
127/// - InspectFunction [default: iterates through the basic blocks in a
128///   function calling InspectBasicBlock]
129class Instrumenter {
130public:
131  /// Constructor
132  ///
133  /// \param[in] module
134  ///     The module being instrumented.
135  Instrumenter(llvm::Module &module,
136               std::shared_ptr<UtilityFunction> checker_function)
137      : m_module(module), m_checker_function(checker_function) {}
138
139  virtual ~Instrumenter() = default;
140
141  /// Inspect a function to find instructions to instrument
142  ///
143  /// \param[in] function
144  ///     The function to inspect.
145  ///
146  /// \return
147  ///     True on success; false on error.
148  bool Inspect(llvm::Function &function) { return InspectFunction(function); }
149
150  /// Instrument all the instructions found by Inspect()
151  ///
152  /// \return
153  ///     True on success; false on error.
154  bool Instrument() {
155    for (InstIterator ii = m_to_instrument.begin(),
156                      last_ii = m_to_instrument.end();
157         ii != last_ii; ++ii) {
158      if (!InstrumentInstruction(*ii))
159        return false;
160    }
161
162    return true;
163  }
164
165protected:
166  /// Add instrumentation to a single instruction
167  ///
168  /// \param[in] inst
169  ///     The instruction to be instrumented.
170  ///
171  /// \return
172  ///     True on success; false otherwise.
173  virtual bool InstrumentInstruction(llvm::Instruction *inst) = 0;
174
175  /// Register a single instruction to be instrumented
176  ///
177  /// \param[in] inst
178  ///     The instruction to be instrumented.
179  void RegisterInstruction(llvm::Instruction &inst) {
180    m_to_instrument.push_back(&inst);
181  }
182
183  /// Determine whether a single instruction is interesting to instrument,
184  /// and, if so, call RegisterInstruction
185  ///
186  /// \param[in] i
187  ///     The instruction to be inspected.
188  ///
189  /// \return
190  ///     False if there was an error scanning; true otherwise.
191  virtual bool InspectInstruction(llvm::Instruction &i) { return true; }
192
193  /// Scan a basic block to see if any instructions are interesting
194  ///
195  /// \param[in] bb
196  ///     The basic block to be inspected.
197  ///
198  /// \return
199  ///     False if there was an error scanning; true otherwise.
200  virtual bool InspectBasicBlock(llvm::BasicBlock &bb) {
201    for (llvm::BasicBlock::iterator ii = bb.begin(), last_ii = bb.end();
202         ii != last_ii; ++ii) {
203      if (!InspectInstruction(*ii))
204        return false;
205    }
206
207    return true;
208  }
209
210  /// Scan a function to see if any instructions are interesting
211  ///
212  /// \param[in] f
213  ///     The function to be inspected.
214  ///
215  /// \return
216  ///     False if there was an error scanning; true otherwise.
217  virtual bool InspectFunction(llvm::Function &f) {
218    for (llvm::Function::iterator bbi = f.begin(), last_bbi = f.end();
219         bbi != last_bbi; ++bbi) {
220      if (!InspectBasicBlock(*bbi))
221        return false;
222    }
223
224    return true;
225  }
226
227  /// Build a function pointer for a function with signature void
228  /// (*)(uint8_t*) with a given address
229  ///
230  /// \param[in] start_address
231  ///     The address of the function.
232  ///
233  /// \return
234  ///     The function pointer, for use in a CallInst.
235  llvm::FunctionCallee BuildPointerValidatorFunc(lldb::addr_t start_address) {
236    llvm::Type *param_array[1];
237
238    param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
239
240    ArrayRef<llvm::Type *> params(param_array, 1);
241
242    FunctionType *fun_ty = FunctionType::get(
243        llvm::Type::getVoidTy(m_module.getContext()), params, true);
244    PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
245    Constant *fun_addr_int =
246        ConstantInt::get(GetIntptrTy(), start_address, false);
247    return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
248  }
249
250  /// Build a function pointer for a function with signature void
251  /// (*)(uint8_t*, uint8_t*) with a given address
252  ///
253  /// \param[in] start_address
254  ///     The address of the function.
255  ///
256  /// \return
257  ///     The function pointer, for use in a CallInst.
258  llvm::FunctionCallee BuildObjectCheckerFunc(lldb::addr_t start_address) {
259    llvm::Type *param_array[2];
260
261    param_array[0] = const_cast<llvm::PointerType *>(GetI8PtrTy());
262    param_array[1] = const_cast<llvm::PointerType *>(GetI8PtrTy());
263
264    ArrayRef<llvm::Type *> params(param_array, 2);
265
266    FunctionType *fun_ty = FunctionType::get(
267        llvm::Type::getVoidTy(m_module.getContext()), params, true);
268    PointerType *fun_ptr_ty = PointerType::getUnqual(fun_ty);
269    Constant *fun_addr_int =
270        ConstantInt::get(GetIntptrTy(), start_address, false);
271    return {fun_ty, ConstantExpr::getIntToPtr(fun_addr_int, fun_ptr_ty)};
272  }
273
274  PointerType *GetI8PtrTy() {
275    if (!m_i8ptr_ty)
276      m_i8ptr_ty = llvm::PointerType::getUnqual(m_module.getContext());
277
278    return m_i8ptr_ty;
279  }
280
281  IntegerType *GetIntptrTy() {
282    if (!m_intptr_ty) {
283      llvm::DataLayout data_layout(&m_module);
284
285      m_intptr_ty = llvm::Type::getIntNTy(m_module.getContext(),
286                                          data_layout.getPointerSizeInBits());
287    }
288
289    return m_intptr_ty;
290  }
291
292  typedef std::vector<llvm::Instruction *> InstVector;
293  typedef InstVector::iterator InstIterator;
294
295  InstVector m_to_instrument; ///< List of instructions the inspector found
296  llvm::Module &m_module;     ///< The module which is being instrumented
297  std::shared_ptr<UtilityFunction>
298      m_checker_function; ///< The dynamic checker function for the process
299
300private:
301  PointerType *m_i8ptr_ty = nullptr;
302  IntegerType *m_intptr_ty = nullptr;
303};
304
305class ValidPointerChecker : public Instrumenter {
306public:
307  ValidPointerChecker(llvm::Module &module,
308                      std::shared_ptr<UtilityFunction> checker_function)
309      : Instrumenter(module, checker_function),
310        m_valid_pointer_check_func(nullptr) {}
311
312  ~ValidPointerChecker() override = default;
313
314protected:
315  bool InstrumentInstruction(llvm::Instruction *inst) override {
316    Log *log = GetLog(LLDBLog::Expressions);
317
318    LLDB_LOGF(log, "Instrumenting load/store instruction: %s\n",
319              PrintValue(inst).c_str());
320
321    if (!m_valid_pointer_check_func)
322      m_valid_pointer_check_func =
323          BuildPointerValidatorFunc(m_checker_function->StartAddress());
324
325    llvm::Value *dereferenced_ptr = nullptr;
326
327    if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(inst))
328      dereferenced_ptr = li->getPointerOperand();
329    else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(inst))
330      dereferenced_ptr = si->getPointerOperand();
331    else
332      return false;
333
334    // Insert an instruction to call the helper with the result
335    CallInst::Create(m_valid_pointer_check_func, dereferenced_ptr, "", inst);
336
337    return true;
338  }
339
340  bool InspectInstruction(llvm::Instruction &i) override {
341    if (isa<llvm::LoadInst>(&i) || isa<llvm::StoreInst>(&i))
342      RegisterInstruction(i);
343
344    return true;
345  }
346
347private:
348  llvm::FunctionCallee m_valid_pointer_check_func;
349};
350
351class ObjcObjectChecker : public Instrumenter {
352public:
353  ObjcObjectChecker(llvm::Module &module,
354                    std::shared_ptr<UtilityFunction> checker_function)
355      : Instrumenter(module, checker_function),
356        m_objc_object_check_func(nullptr) {}
357
358  ~ObjcObjectChecker() override = default;
359
360  enum msgSend_type {
361    eMsgSend = 0,
362    eMsgSendSuper,
363    eMsgSendSuper_stret,
364    eMsgSend_fpret,
365    eMsgSend_stret
366  };
367
368  std::map<llvm::Instruction *, msgSend_type> msgSend_types;
369
370protected:
371  bool InstrumentInstruction(llvm::Instruction *inst) override {
372    CallInst *call_inst = dyn_cast<CallInst>(inst);
373
374    if (!call_inst)
375      return false; // call_inst really shouldn't be nullptr, because otherwise
376                    // InspectInstruction wouldn't have registered it
377
378    if (!m_objc_object_check_func)
379      m_objc_object_check_func =
380          BuildObjectCheckerFunc(m_checker_function->StartAddress());
381
382    // id objc_msgSend(id theReceiver, SEL theSelector, ...)
383
384    llvm::Value *target_object;
385    llvm::Value *selector;
386
387    switch (msgSend_types[inst]) {
388    case eMsgSend:
389    case eMsgSend_fpret:
390      // On arm64, clang uses objc_msgSend for scalar and struct return
391      // calls.  The call instruction will record which was used.
392      if (call_inst->hasStructRetAttr()) {
393        target_object = call_inst->getArgOperand(1);
394        selector = call_inst->getArgOperand(2);
395      } else {
396        target_object = call_inst->getArgOperand(0);
397        selector = call_inst->getArgOperand(1);
398      }
399      break;
400    case eMsgSend_stret:
401      target_object = call_inst->getArgOperand(1);
402      selector = call_inst->getArgOperand(2);
403      break;
404    case eMsgSendSuper:
405    case eMsgSendSuper_stret:
406      return true;
407    }
408
409    // These objects should always be valid according to Sean Calannan
410    assert(target_object);
411    assert(selector);
412
413    // Insert an instruction to call the helper with the result
414
415    llvm::Value *arg_array[2];
416
417    arg_array[0] = target_object;
418    arg_array[1] = selector;
419
420    ArrayRef<llvm::Value *> args(arg_array, 2);
421
422    CallInst::Create(m_objc_object_check_func, args, "", inst);
423
424    return true;
425  }
426
427  static llvm::Function *GetFunction(llvm::Value *value) {
428    if (llvm::Function *function = llvm::dyn_cast<llvm::Function>(value)) {
429      return function;
430    }
431
432    if (llvm::ConstantExpr *const_expr =
433            llvm::dyn_cast<llvm::ConstantExpr>(value)) {
434      switch (const_expr->getOpcode()) {
435      default:
436        return nullptr;
437      case llvm::Instruction::BitCast:
438        return GetFunction(const_expr->getOperand(0));
439      }
440    }
441
442    return nullptr;
443  }
444
445  static llvm::Function *GetCalledFunction(llvm::CallInst *inst) {
446    return GetFunction(inst->getCalledOperand());
447  }
448
449  bool InspectInstruction(llvm::Instruction &i) override {
450    Log *log = GetLog(LLDBLog::Expressions);
451
452    CallInst *call_inst = dyn_cast<CallInst>(&i);
453
454    if (call_inst) {
455      const llvm::Function *called_function = GetCalledFunction(call_inst);
456
457      if (!called_function)
458        return true;
459
460      std::string name_str = called_function->getName().str();
461      const char *name_cstr = name_str.c_str();
462
463      LLDB_LOGF(log, "Found call to %s: %s\n", name_cstr,
464                PrintValue(call_inst).c_str());
465
466      if (name_str.find("objc_msgSend") == std::string::npos)
467        return true;
468
469      if (!strcmp(name_cstr, "objc_msgSend")) {
470        RegisterInstruction(i);
471        msgSend_types[&i] = eMsgSend;
472        return true;
473      }
474
475      if (!strcmp(name_cstr, "objc_msgSend_stret")) {
476        RegisterInstruction(i);
477        msgSend_types[&i] = eMsgSend_stret;
478        return true;
479      }
480
481      if (!strcmp(name_cstr, "objc_msgSend_fpret")) {
482        RegisterInstruction(i);
483        msgSend_types[&i] = eMsgSend_fpret;
484        return true;
485      }
486
487      if (!strcmp(name_cstr, "objc_msgSendSuper")) {
488        RegisterInstruction(i);
489        msgSend_types[&i] = eMsgSendSuper;
490        return true;
491      }
492
493      if (!strcmp(name_cstr, "objc_msgSendSuper_stret")) {
494        RegisterInstruction(i);
495        msgSend_types[&i] = eMsgSendSuper_stret;
496        return true;
497      }
498
499      LLDB_LOGF(log,
500                "Function name '%s' contains 'objc_msgSend' but is not handled",
501                name_str.c_str());
502
503      return true;
504    }
505
506    return true;
507  }
508
509private:
510  llvm::FunctionCallee m_objc_object_check_func;
511};
512
513IRDynamicChecks::IRDynamicChecks(
514    ClangDynamicCheckerFunctions &checker_functions, const char *func_name)
515    : ModulePass(ID), m_func_name(func_name),
516      m_checker_functions(checker_functions) {}
517
518IRDynamicChecks::~IRDynamicChecks() = default;
519
520bool IRDynamicChecks::runOnModule(llvm::Module &M) {
521  Log *log = GetLog(LLDBLog::Expressions);
522
523  llvm::Function *function = M.getFunction(StringRef(m_func_name));
524
525  if (!function) {
526    LLDB_LOGF(log, "Couldn't find %s() in the module", m_func_name.c_str());
527
528    return false;
529  }
530
531  if (m_checker_functions.m_valid_pointer_check) {
532    ValidPointerChecker vpc(M, m_checker_functions.m_valid_pointer_check);
533
534    if (!vpc.Inspect(*function))
535      return false;
536
537    if (!vpc.Instrument())
538      return false;
539  }
540
541  if (m_checker_functions.m_objc_object_check) {
542    ObjcObjectChecker ooc(M, m_checker_functions.m_objc_object_check);
543
544    if (!ooc.Inspect(*function))
545      return false;
546
547    if (!ooc.Instrument())
548      return false;
549  }
550
551  if (log && log->GetVerbose()) {
552    std::string s;
553    raw_string_ostream oss(s);
554
555    M.print(oss, nullptr);
556
557    oss.flush();
558
559    LLDB_LOGF(log, "Module after dynamic checks: \n%s", s.c_str());
560  }
561
562  return true;
563}
564
565void IRDynamicChecks::assignPassManager(PMStack &PMS, PassManagerType T) {}
566
567PassManagerType IRDynamicChecks::getPotentialPassManagerType() const {
568  return PMT_ModulePassManager;
569}
570