1238901Sandrew//===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2238901Sandrew//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6238901Sandrew//
7238901Sandrew//===----------------------------------------------------------------------===//
8238901Sandrew//
9274201Sdim// Symbolizer is used by sanitizers to map instruction address to a location in
10274201Sdim// source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11274201Sdim// defined in the program, or (if they are missing) tries to find and
12274201Sdim// launch "llvm-symbolizer" commandline tool in a separate process and
13274201Sdim// communicate with it.
14238901Sandrew//
15274201Sdim// Generally we should try to avoid calling system library functions during
16274201Sdim// symbolization (and use their replacements from sanitizer_libc.h instead).
17238901Sandrew//===----------------------------------------------------------------------===//
18238901Sandrew#ifndef SANITIZER_SYMBOLIZER_H
19238901Sandrew#define SANITIZER_SYMBOLIZER_H
20238901Sandrew
21276789Sdim#include "sanitizer_common.h"
22276789Sdim#include "sanitizer_mutex.h"
23353358Sdim#include "sanitizer_vector.h"
24238901Sandrew
25238901Sandrewnamespace __sanitizer {
26238901Sandrew
27238901Sandrewstruct AddressInfo {
28276789Sdim  // Owns all the string members. Storage for them is
29276789Sdim  // (de)allocated using sanitizer internal allocator.
30238901Sandrew  uptr address;
31276789Sdim
32238901Sandrew  char *module;
33238901Sandrew  uptr module_offset;
34314564Sdim  ModuleArch module_arch;
35276789Sdim
36276789Sdim  static const uptr kUnknown = ~(uptr)0;
37238901Sandrew  char *function;
38276789Sdim  uptr function_offset;
39276789Sdim
40238901Sandrew  char *file;
41238901Sandrew  int line;
42238901Sandrew  int column;
43238901Sandrew
44276789Sdim  AddressInfo();
45276789Sdim  // Deletes all strings and resets all fields.
46276789Sdim  void Clear();
47314564Sdim  void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
48276789Sdim};
49274201Sdim
50276789Sdim// Linked list of symbolized frames (each frame is described by AddressInfo).
51276789Sdimstruct SymbolizedStack {
52276789Sdim  SymbolizedStack *next;
53276789Sdim  AddressInfo info;
54276789Sdim  static SymbolizedStack *New(uptr addr);
55276789Sdim  // Deletes current, and all subsequent frames in the linked list.
56276789Sdim  // The object cannot be accessed after the call to this function.
57276789Sdim  void ClearAll();
58245614Sandrew
59276789Sdim private:
60276789Sdim  SymbolizedStack();
61238901Sandrew};
62238901Sandrew
63276789Sdim// For now, DataInfo is used to describe global variable.
64245614Sandrewstruct DataInfo {
65276789Sdim  // Owns all the string members. Storage for them is
66276789Sdim  // (de)allocated using sanitizer internal allocator.
67245614Sandrew  char *module;
68245614Sandrew  uptr module_offset;
69314564Sdim  ModuleArch module_arch;
70314564Sdim
71309124Sdim  char *file;
72309124Sdim  uptr line;
73245614Sandrew  char *name;
74245614Sandrew  uptr start;
75245614Sandrew  uptr size;
76276789Sdim
77276789Sdim  DataInfo();
78276789Sdim  void Clear();
79245614Sandrew};
80245614Sandrew
81353358Sdimstruct LocalInfo {
82353358Sdim  char *function_name = nullptr;
83353358Sdim  char *name = nullptr;
84353358Sdim  char *decl_file = nullptr;
85353358Sdim  unsigned decl_line = 0;
86353358Sdim
87353358Sdim  bool has_frame_offset = false;
88353358Sdim  bool has_size = false;
89353358Sdim  bool has_tag_offset = false;
90353358Sdim
91353358Sdim  sptr frame_offset;
92353358Sdim  uptr size;
93353358Sdim  uptr tag_offset;
94353358Sdim
95353358Sdim  void Clear();
96353358Sdim};
97353358Sdim
98353358Sdimstruct FrameInfo {
99353358Sdim  char *module;
100353358Sdim  uptr module_offset;
101353358Sdim  ModuleArch module_arch;
102353358Sdim
103353358Sdim  InternalMmapVector<LocalInfo> locals;
104353358Sdim  void Clear();
105353358Sdim};
106353358Sdim
107288943Sdimclass SymbolizerTool;
108288943Sdim
109288943Sdimclass Symbolizer final {
110274201Sdim public:
111276789Sdim  /// Initialize and return platform-specific implementation of symbolizer
112276789Sdim  /// (if it wasn't already initialized).
113274201Sdim  static Symbolizer *GetOrInit();
114309124Sdim  static void LateInitialize();
115276789Sdim  // Returns a list of symbolized frames for a given address (containing
116276789Sdim  // all inlined functions, if necessary).
117288943Sdim  SymbolizedStack *SymbolizePC(uptr address);
118288943Sdim  bool SymbolizeData(uptr address, DataInfo *info);
119353358Sdim  bool SymbolizeFrame(uptr address, FrameInfo *info);
120288943Sdim
121288943Sdim  // The module names Symbolizer returns are stable and unique for every given
122288943Sdim  // module.  It is safe to store and compare them as pointers.
123288943Sdim  bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
124288943Sdim                                   uptr *module_address);
125288943Sdim  const char *GetModuleNameForPc(uptr pc) {
126288943Sdim    const char *module_name = nullptr;
127288943Sdim    uptr unused;
128288943Sdim    if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
129288943Sdim      return module_name;
130288943Sdim    return nullptr;
131274201Sdim  }
132288943Sdim
133274201Sdim  // Release internal caches (if any).
134288943Sdim  void Flush();
135274201Sdim  // Attempts to demangle the provided C++ mangled name.
136288943Sdim  const char *Demangle(const char *name);
137238901Sandrew
138274201Sdim  // Allow user to install hooks that would be called before/after Symbolizer
139274201Sdim  // does the actual file/line info fetching. Specific sanitizers may need this
140274201Sdim  // to distinguish system library calls made in user code from calls made
141274201Sdim  // during in-process symbolization.
142274201Sdim  typedef void (*StartSymbolizationHook)();
143274201Sdim  typedef void (*EndSymbolizationHook)();
144274201Sdim  // May be called at most once.
145274201Sdim  void AddHooks(StartSymbolizationHook start_hook,
146274201Sdim                EndSymbolizationHook end_hook);
147251034Sed
148327952Sdim  void RefreshModules();
149309124Sdim  const LoadedModule *FindModuleForAddress(uptr address);
150309124Sdim
151327952Sdim  void InvalidateModuleList();
152327952Sdim
153274201Sdim private:
154288943Sdim  // GetModuleNameAndOffsetForPC has to return a string to the caller.
155288943Sdim  // Since the corresponding module might get unloaded later, we should create
156288943Sdim  // our owned copies of the strings that we can safely return.
157288943Sdim  // ModuleNameOwner does not provide any synchronization, thus calls to
158288943Sdim  // its method should be protected by |mu_|.
159288943Sdim  class ModuleNameOwner {
160288943Sdim   public:
161288943Sdim    explicit ModuleNameOwner(BlockingMutex *synchronized_by)
162341825Sdim        : last_match_(nullptr), mu_(synchronized_by) {
163341825Sdim      storage_.reserve(kInitialCapacity);
164341825Sdim    }
165288943Sdim    const char *GetOwnedCopy(const char *str);
166288943Sdim
167288943Sdim   private:
168288943Sdim    static const uptr kInitialCapacity = 1000;
169288943Sdim    InternalMmapVector<const char*> storage_;
170288943Sdim    const char *last_match_;
171288943Sdim
172288943Sdim    BlockingMutex *mu_;
173288943Sdim  } module_names_;
174288943Sdim
175274201Sdim  /// Platform-specific function for creating a Symbolizer object.
176276789Sdim  static Symbolizer *PlatformInit();
177238901Sandrew
178288943Sdim  bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
179314564Sdim                                         uptr *module_offset,
180314564Sdim                                         ModuleArch *module_arch);
181309124Sdim  ListOfModules modules_;
182327952Sdim  ListOfModules fallback_modules_;
183288943Sdim  // If stale, need to reload the modules before looking up addresses.
184288943Sdim  bool modules_fresh_;
185288943Sdim
186288943Sdim  // Platform-specific default demangler, must not return nullptr.
187288943Sdim  const char *PlatformDemangle(const char *name);
188288943Sdim
189274201Sdim  static Symbolizer *symbolizer_;
190274201Sdim  static StaticSpinMutex init_mu_;
191245614Sandrew
192288943Sdim  // Mutex locked from public methods of |Symbolizer|, so that the internals
193288943Sdim  // (including individual symbolizer tools and platform-specific methods) are
194288943Sdim  // always synchronized.
195288943Sdim  BlockingMutex mu_;
196238901Sandrew
197288943Sdim  IntrusiveList<SymbolizerTool> tools_;
198288943Sdim
199288943Sdim  explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
200288943Sdim
201274201Sdim  static LowLevelAllocator symbolizer_allocator_;
202238901Sandrew
203274201Sdim  StartSymbolizationHook start_hook_;
204274201Sdim  EndSymbolizationHook end_hook_;
205274201Sdim  class SymbolizerScope {
206274201Sdim   public:
207274201Sdim    explicit SymbolizerScope(const Symbolizer *sym);
208274201Sdim    ~SymbolizerScope();
209274201Sdim   private:
210274201Sdim    const Symbolizer *sym_;
211238901Sandrew  };
212238901Sandrew};
213238901Sandrew
214309124Sdim#ifdef SANITIZER_WINDOWS
215309124Sdimvoid InitializeDbgHelpIfNeeded();
216309124Sdim#endif
217309124Sdim
218238901Sandrew}  // namespace __sanitizer
219238901Sandrew
220238901Sandrew#endif  // SANITIZER_SYMBOLIZER_H
221