1238901Sandrew//===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===// 2238901Sandrew// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6238901Sandrew// 7238901Sandrew//===----------------------------------------------------------------------===// 8238901Sandrew// 9274201Sdim// Symbolizer is used by sanitizers to map instruction address to a location in 10274201Sdim// source code at run-time. Symbolizer either uses __sanitizer_symbolize_* 11274201Sdim// defined in the program, or (if they are missing) tries to find and 12274201Sdim// launch "llvm-symbolizer" commandline tool in a separate process and 13274201Sdim// communicate with it. 14238901Sandrew// 15274201Sdim// Generally we should try to avoid calling system library functions during 16274201Sdim// symbolization (and use their replacements from sanitizer_libc.h instead). 17238901Sandrew//===----------------------------------------------------------------------===// 18238901Sandrew#ifndef SANITIZER_SYMBOLIZER_H 19238901Sandrew#define SANITIZER_SYMBOLIZER_H 20238901Sandrew 21276789Sdim#include "sanitizer_common.h" 22276789Sdim#include "sanitizer_mutex.h" 23353358Sdim#include "sanitizer_vector.h" 24238901Sandrew 25238901Sandrewnamespace __sanitizer { 26238901Sandrew 27238901Sandrewstruct AddressInfo { 28276789Sdim // Owns all the string members. Storage for them is 29276789Sdim // (de)allocated using sanitizer internal allocator. 30238901Sandrew uptr address; 31276789Sdim 32238901Sandrew char *module; 33238901Sandrew uptr module_offset; 34314564Sdim ModuleArch module_arch; 35276789Sdim 36276789Sdim static const uptr kUnknown = ~(uptr)0; 37238901Sandrew char *function; 38276789Sdim uptr function_offset; 39276789Sdim 40238901Sandrew char *file; 41238901Sandrew int line; 42238901Sandrew int column; 43238901Sandrew 44276789Sdim AddressInfo(); 45276789Sdim // Deletes all strings and resets all fields. 46276789Sdim void Clear(); 47314564Sdim void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch); 48276789Sdim}; 49274201Sdim 50276789Sdim// Linked list of symbolized frames (each frame is described by AddressInfo). 51276789Sdimstruct SymbolizedStack { 52276789Sdim SymbolizedStack *next; 53276789Sdim AddressInfo info; 54276789Sdim static SymbolizedStack *New(uptr addr); 55276789Sdim // Deletes current, and all subsequent frames in the linked list. 56276789Sdim // The object cannot be accessed after the call to this function. 57276789Sdim void ClearAll(); 58245614Sandrew 59276789Sdim private: 60276789Sdim SymbolizedStack(); 61238901Sandrew}; 62238901Sandrew 63276789Sdim// For now, DataInfo is used to describe global variable. 64245614Sandrewstruct DataInfo { 65276789Sdim // Owns all the string members. Storage for them is 66276789Sdim // (de)allocated using sanitizer internal allocator. 67245614Sandrew char *module; 68245614Sandrew uptr module_offset; 69314564Sdim ModuleArch module_arch; 70314564Sdim 71309124Sdim char *file; 72309124Sdim uptr line; 73245614Sandrew char *name; 74245614Sandrew uptr start; 75245614Sandrew uptr size; 76276789Sdim 77276789Sdim DataInfo(); 78276789Sdim void Clear(); 79245614Sandrew}; 80245614Sandrew 81353358Sdimstruct LocalInfo { 82353358Sdim char *function_name = nullptr; 83353358Sdim char *name = nullptr; 84353358Sdim char *decl_file = nullptr; 85353358Sdim unsigned decl_line = 0; 86353358Sdim 87353358Sdim bool has_frame_offset = false; 88353358Sdim bool has_size = false; 89353358Sdim bool has_tag_offset = false; 90353358Sdim 91353358Sdim sptr frame_offset; 92353358Sdim uptr size; 93353358Sdim uptr tag_offset; 94353358Sdim 95353358Sdim void Clear(); 96353358Sdim}; 97353358Sdim 98353358Sdimstruct FrameInfo { 99353358Sdim char *module; 100353358Sdim uptr module_offset; 101353358Sdim ModuleArch module_arch; 102353358Sdim 103353358Sdim InternalMmapVector<LocalInfo> locals; 104353358Sdim void Clear(); 105353358Sdim}; 106353358Sdim 107288943Sdimclass SymbolizerTool; 108288943Sdim 109288943Sdimclass Symbolizer final { 110274201Sdim public: 111276789Sdim /// Initialize and return platform-specific implementation of symbolizer 112276789Sdim /// (if it wasn't already initialized). 113274201Sdim static Symbolizer *GetOrInit(); 114309124Sdim static void LateInitialize(); 115276789Sdim // Returns a list of symbolized frames for a given address (containing 116276789Sdim // all inlined functions, if necessary). 117288943Sdim SymbolizedStack *SymbolizePC(uptr address); 118288943Sdim bool SymbolizeData(uptr address, DataInfo *info); 119353358Sdim bool SymbolizeFrame(uptr address, FrameInfo *info); 120288943Sdim 121288943Sdim // The module names Symbolizer returns are stable and unique for every given 122288943Sdim // module. It is safe to store and compare them as pointers. 123288943Sdim bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 124288943Sdim uptr *module_address); 125288943Sdim const char *GetModuleNameForPc(uptr pc) { 126288943Sdim const char *module_name = nullptr; 127288943Sdim uptr unused; 128288943Sdim if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused)) 129288943Sdim return module_name; 130288943Sdim return nullptr; 131274201Sdim } 132288943Sdim 133274201Sdim // Release internal caches (if any). 134288943Sdim void Flush(); 135274201Sdim // Attempts to demangle the provided C++ mangled name. 136288943Sdim const char *Demangle(const char *name); 137238901Sandrew 138274201Sdim // Allow user to install hooks that would be called before/after Symbolizer 139274201Sdim // does the actual file/line info fetching. Specific sanitizers may need this 140274201Sdim // to distinguish system library calls made in user code from calls made 141274201Sdim // during in-process symbolization. 142274201Sdim typedef void (*StartSymbolizationHook)(); 143274201Sdim typedef void (*EndSymbolizationHook)(); 144274201Sdim // May be called at most once. 145274201Sdim void AddHooks(StartSymbolizationHook start_hook, 146274201Sdim EndSymbolizationHook end_hook); 147251034Sed 148327952Sdim void RefreshModules(); 149309124Sdim const LoadedModule *FindModuleForAddress(uptr address); 150309124Sdim 151327952Sdim void InvalidateModuleList(); 152327952Sdim 153274201Sdim private: 154288943Sdim // GetModuleNameAndOffsetForPC has to return a string to the caller. 155288943Sdim // Since the corresponding module might get unloaded later, we should create 156288943Sdim // our owned copies of the strings that we can safely return. 157288943Sdim // ModuleNameOwner does not provide any synchronization, thus calls to 158288943Sdim // its method should be protected by |mu_|. 159288943Sdim class ModuleNameOwner { 160288943Sdim public: 161288943Sdim explicit ModuleNameOwner(BlockingMutex *synchronized_by) 162341825Sdim : last_match_(nullptr), mu_(synchronized_by) { 163341825Sdim storage_.reserve(kInitialCapacity); 164341825Sdim } 165288943Sdim const char *GetOwnedCopy(const char *str); 166288943Sdim 167288943Sdim private: 168288943Sdim static const uptr kInitialCapacity = 1000; 169288943Sdim InternalMmapVector<const char*> storage_; 170288943Sdim const char *last_match_; 171288943Sdim 172288943Sdim BlockingMutex *mu_; 173288943Sdim } module_names_; 174288943Sdim 175274201Sdim /// Platform-specific function for creating a Symbolizer object. 176276789Sdim static Symbolizer *PlatformInit(); 177238901Sandrew 178288943Sdim bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name, 179314564Sdim uptr *module_offset, 180314564Sdim ModuleArch *module_arch); 181309124Sdim ListOfModules modules_; 182327952Sdim ListOfModules fallback_modules_; 183288943Sdim // If stale, need to reload the modules before looking up addresses. 184288943Sdim bool modules_fresh_; 185288943Sdim 186288943Sdim // Platform-specific default demangler, must not return nullptr. 187288943Sdim const char *PlatformDemangle(const char *name); 188288943Sdim 189274201Sdim static Symbolizer *symbolizer_; 190274201Sdim static StaticSpinMutex init_mu_; 191245614Sandrew 192288943Sdim // Mutex locked from public methods of |Symbolizer|, so that the internals 193288943Sdim // (including individual symbolizer tools and platform-specific methods) are 194288943Sdim // always synchronized. 195288943Sdim BlockingMutex mu_; 196238901Sandrew 197288943Sdim IntrusiveList<SymbolizerTool> tools_; 198288943Sdim 199288943Sdim explicit Symbolizer(IntrusiveList<SymbolizerTool> tools); 200288943Sdim 201274201Sdim static LowLevelAllocator symbolizer_allocator_; 202238901Sandrew 203274201Sdim StartSymbolizationHook start_hook_; 204274201Sdim EndSymbolizationHook end_hook_; 205274201Sdim class SymbolizerScope { 206274201Sdim public: 207274201Sdim explicit SymbolizerScope(const Symbolizer *sym); 208274201Sdim ~SymbolizerScope(); 209274201Sdim private: 210274201Sdim const Symbolizer *sym_; 211238901Sandrew }; 212238901Sandrew}; 213238901Sandrew 214309124Sdim#ifdef SANITIZER_WINDOWS 215309124Sdimvoid InitializeDbgHelpIfNeeded(); 216309124Sdim#endif 217309124Sdim 218238901Sandrew} // namespace __sanitizer 219238901Sandrew 220238901Sandrew#endif // SANITIZER_SYMBOLIZER_H 221