1//===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Symbolizer is used by sanitizers to map instruction address to a location in
10// source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11// defined in the program, or (if they are missing) tries to find and
12// launch "llvm-symbolizer" commandline tool in a separate process and
13// communicate with it.
14//
15// Generally we should try to avoid calling system library functions during
16// symbolization (and use their replacements from sanitizer_libc.h instead).
17//===----------------------------------------------------------------------===//
18#ifndef SANITIZER_SYMBOLIZER_H
19#define SANITIZER_SYMBOLIZER_H
20
21#include "sanitizer_common.h"
22#include "sanitizer_mutex.h"
23#include "sanitizer_vector.h"
24
25namespace __sanitizer {
26
27struct AddressInfo {
28  // Owns all the string members. Storage for them is
29  // (de)allocated using sanitizer internal allocator.
30  uptr address;
31
32  char *module;
33  uptr module_offset;
34  ModuleArch module_arch;
35
36  static const uptr kUnknown = ~(uptr)0;
37  char *function;
38  uptr function_offset;
39
40  char *file;
41  int line;
42  int column;
43
44  AddressInfo();
45  // Deletes all strings and resets all fields.
46  void Clear();
47  void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
48};
49
50// Linked list of symbolized frames (each frame is described by AddressInfo).
51struct SymbolizedStack {
52  SymbolizedStack *next;
53  AddressInfo info;
54  static SymbolizedStack *New(uptr addr);
55  // Deletes current, and all subsequent frames in the linked list.
56  // The object cannot be accessed after the call to this function.
57  void ClearAll();
58
59 private:
60  SymbolizedStack();
61};
62
63// For now, DataInfo is used to describe global variable.
64struct DataInfo {
65  // Owns all the string members. Storage for them is
66  // (de)allocated using sanitizer internal allocator.
67  char *module;
68  uptr module_offset;
69  ModuleArch module_arch;
70
71  char *file;
72  uptr line;
73  char *name;
74  uptr start;
75  uptr size;
76
77  DataInfo();
78  void Clear();
79};
80
81struct LocalInfo {
82  char *function_name = nullptr;
83  char *name = nullptr;
84  char *decl_file = nullptr;
85  unsigned decl_line = 0;
86
87  bool has_frame_offset = false;
88  bool has_size = false;
89  bool has_tag_offset = false;
90
91  sptr frame_offset;
92  uptr size;
93  uptr tag_offset;
94
95  void Clear();
96};
97
98struct FrameInfo {
99  char *module;
100  uptr module_offset;
101  ModuleArch module_arch;
102
103  InternalMmapVector<LocalInfo> locals;
104  void Clear();
105};
106
107class SymbolizerTool;
108
109class Symbolizer final {
110 public:
111  /// Initialize and return platform-specific implementation of symbolizer
112  /// (if it wasn't already initialized).
113  static Symbolizer *GetOrInit();
114  static void LateInitialize();
115  // Returns a list of symbolized frames for a given address (containing
116  // all inlined functions, if necessary).
117  SymbolizedStack *SymbolizePC(uptr address);
118  bool SymbolizeData(uptr address, DataInfo *info);
119  bool SymbolizeFrame(uptr address, FrameInfo *info);
120
121  // The module names Symbolizer returns are stable and unique for every given
122  // module.  It is safe to store and compare them as pointers.
123  bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
124                                   uptr *module_address);
125  const char *GetModuleNameForPc(uptr pc) {
126    const char *module_name = nullptr;
127    uptr unused;
128    if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
129      return module_name;
130    return nullptr;
131  }
132
133  // Release internal caches (if any).
134  void Flush();
135  // Attempts to demangle the provided C++ mangled name.
136  const char *Demangle(const char *name);
137
138  // Allow user to install hooks that would be called before/after Symbolizer
139  // does the actual file/line info fetching. Specific sanitizers may need this
140  // to distinguish system library calls made in user code from calls made
141  // during in-process symbolization.
142  typedef void (*StartSymbolizationHook)();
143  typedef void (*EndSymbolizationHook)();
144  // May be called at most once.
145  void AddHooks(StartSymbolizationHook start_hook,
146                EndSymbolizationHook end_hook);
147
148  void RefreshModules();
149  const LoadedModule *FindModuleForAddress(uptr address);
150
151  void InvalidateModuleList();
152
153 private:
154  // GetModuleNameAndOffsetForPC has to return a string to the caller.
155  // Since the corresponding module might get unloaded later, we should create
156  // our owned copies of the strings that we can safely return.
157  // ModuleNameOwner does not provide any synchronization, thus calls to
158  // its method should be protected by |mu_|.
159  class ModuleNameOwner {
160   public:
161    explicit ModuleNameOwner(BlockingMutex *synchronized_by)
162        : last_match_(nullptr), mu_(synchronized_by) {
163      storage_.reserve(kInitialCapacity);
164    }
165    const char *GetOwnedCopy(const char *str);
166
167   private:
168    static const uptr kInitialCapacity = 1000;
169    InternalMmapVector<const char*> storage_;
170    const char *last_match_;
171
172    BlockingMutex *mu_;
173  } module_names_;
174
175  /// Platform-specific function for creating a Symbolizer object.
176  static Symbolizer *PlatformInit();
177
178  bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
179                                         uptr *module_offset,
180                                         ModuleArch *module_arch);
181  ListOfModules modules_;
182  ListOfModules fallback_modules_;
183  // If stale, need to reload the modules before looking up addresses.
184  bool modules_fresh_;
185
186  // Platform-specific default demangler, must not return nullptr.
187  const char *PlatformDemangle(const char *name);
188
189  static Symbolizer *symbolizer_;
190  static StaticSpinMutex init_mu_;
191
192  // Mutex locked from public methods of |Symbolizer|, so that the internals
193  // (including individual symbolizer tools and platform-specific methods) are
194  // always synchronized.
195  BlockingMutex mu_;
196
197  IntrusiveList<SymbolizerTool> tools_;
198
199  explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
200
201  static LowLevelAllocator symbolizer_allocator_;
202
203  StartSymbolizationHook start_hook_;
204  EndSymbolizationHook end_hook_;
205  class SymbolizerScope {
206   public:
207    explicit SymbolizerScope(const Symbolizer *sym);
208    ~SymbolizerScope();
209   private:
210    const Symbolizer *sym_;
211  };
212};
213
214#ifdef SANITIZER_WINDOWS
215void InitializeDbgHelpIfNeeded();
216#endif
217
218}  // namespace __sanitizer
219
220#endif  // SANITIZER_SYMBOLIZER_H
221