sanitizer_symbolizer_libcdep.cc revision 1.3
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// This file is distributed under the University of Illinois Open Source 4// License. See LICENSE.TXT for details. 5// 6//===----------------------------------------------------------------------===// 7// 8// This file is shared between AddressSanitizer and ThreadSanitizer 9// run-time libraries. 10//===----------------------------------------------------------------------===// 11 12#include "sanitizer_allocator_internal.h" 13#include "sanitizer_internal_defs.h" 14#include "sanitizer_symbolizer_internal.h" 15 16namespace __sanitizer { 17 18const char *ExtractToken(const char *str, const char *delims, char **result) { 19 uptr prefix_len = internal_strcspn(str, delims); 20 *result = (char*)InternalAlloc(prefix_len + 1); 21 internal_memcpy(*result, str, prefix_len); 22 (*result)[prefix_len] = '\0'; 23 const char *prefix_end = str + prefix_len; 24 if (*prefix_end != '\0') prefix_end++; 25 return prefix_end; 26} 27 28const char *ExtractInt(const char *str, const char *delims, int *result) { 29 char *buff; 30 const char *ret = ExtractToken(str, delims, &buff); 31 if (buff != 0) { 32 *result = (int)internal_atoll(buff); 33 } 34 InternalFree(buff); 35 return ret; 36} 37 38const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 39 char *buff; 40 const char *ret = ExtractToken(str, delims, &buff); 41 if (buff != 0) { 42 *result = (uptr)internal_atoll(buff); 43 } 44 InternalFree(buff); 45 return ret; 46} 47 48const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 49 char **result) { 50 const char *found_delimiter = internal_strstr(str, delimiter); 51 uptr prefix_len = 52 found_delimiter ? found_delimiter - str : internal_strlen(str); 53 *result = (char *)InternalAlloc(prefix_len + 1); 54 internal_memcpy(*result, str, prefix_len); 55 (*result)[prefix_len] = '\0'; 56 const char *prefix_end = str + prefix_len; 57 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 58 return prefix_end; 59} 60 61SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 62 BlockingMutexLock l(&mu_); 63 const char *module_name; 64 uptr module_offset; 65 SymbolizedStack *res = SymbolizedStack::New(addr); 66 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 67 return res; 68 // Always fill data about module name and offset. 69 res->info.FillModuleInfo(module_name, module_offset); 70 for (auto &tool : tools_) { 71 SymbolizerScope sym_scope(this); 72 if (tool.SymbolizePC(addr, res)) { 73 return res; 74 } 75 } 76 return res; 77} 78 79bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 80 BlockingMutexLock l(&mu_); 81 const char *module_name; 82 uptr module_offset; 83 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset)) 84 return false; 85 info->Clear(); 86 info->module = internal_strdup(module_name); 87 info->module_offset = module_offset; 88 for (auto &tool : tools_) { 89 SymbolizerScope sym_scope(this); 90 if (tool.SymbolizeData(addr, info)) { 91 return true; 92 } 93 } 94 return true; 95} 96 97bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 98 uptr *module_address) { 99 BlockingMutexLock l(&mu_); 100 const char *internal_module_name = nullptr; 101 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 102 module_address)) 103 return false; 104 105 if (module_name) 106 *module_name = module_names_.GetOwnedCopy(internal_module_name); 107 return true; 108} 109 110void Symbolizer::Flush() { 111 BlockingMutexLock l(&mu_); 112 for (auto &tool : tools_) { 113 SymbolizerScope sym_scope(this); 114 tool.Flush(); 115 } 116} 117 118const char *Symbolizer::Demangle(const char *name) { 119 BlockingMutexLock l(&mu_); 120 for (auto &tool : tools_) { 121 SymbolizerScope sym_scope(this); 122 if (const char *demangled = tool.Demangle(name)) 123 return demangled; 124 } 125 return PlatformDemangle(name); 126} 127 128void Symbolizer::PrepareForSandboxing() { 129 BlockingMutexLock l(&mu_); 130 PlatformPrepareForSandboxing(); 131} 132 133bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 134 const char **module_name, 135 uptr *module_offset) { 136 const LoadedModule *module = FindModuleForAddress(address); 137 if (module == nullptr) 138 return false; 139 *module_name = module->full_name(); 140 *module_offset = address - module->base_address(); 141 return true; 142} 143 144const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 145 bool modules_were_reloaded = false; 146 if (!modules_fresh_) { 147 modules_.init(); 148 RAW_CHECK(modules_.size() > 0); 149 modules_fresh_ = true; 150 modules_were_reloaded = true; 151 } 152 for (uptr i = 0; i < modules_.size(); i++) { 153 if (modules_[i].containsAddress(address)) { 154 return &modules_[i]; 155 } 156 } 157 // Reload the modules and look up again, if we haven't tried it yet. 158 if (!modules_were_reloaded) { 159 // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors. 160 // It's too aggressive to reload the list of modules each time we fail 161 // to find a module for a given address. 162 modules_fresh_ = false; 163 return FindModuleForAddress(address); 164 } 165 return 0; 166} 167 168Symbolizer *Symbolizer::GetOrInit() { 169 SpinMutexLock l(&init_mu_); 170 if (symbolizer_) 171 return symbolizer_; 172 symbolizer_ = PlatformInit(); 173 CHECK(symbolizer_); 174 return symbolizer_; 175} 176 177// For now we assume the following protocol: 178// For each request of the form 179// <module_name> <module_offset> 180// passed to STDIN, external symbolizer prints to STDOUT response: 181// <function_name> 182// <file_name>:<line_number>:<column_number> 183// <function_name> 184// <file_name>:<line_number>:<column_number> 185// ... 186// <empty line> 187class LLVMSymbolizerProcess : public SymbolizerProcess { 188 public: 189 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} 190 191 private: 192 bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 193 // Empty line marks the end of llvm-symbolizer output. 194 return length >= 2 && buffer[length - 1] == '\n' && 195 buffer[length - 2] == '\n'; 196 } 197 198 void GetArgV(const char *path_to_binary, 199 const char *(&argv)[kArgVMax]) const override { 200#if defined(__x86_64h__) 201 const char* const kSymbolizerArch = "--default-arch=x86_64h"; 202#elif defined(__x86_64__) 203 const char* const kSymbolizerArch = "--default-arch=x86_64"; 204#elif defined(__i386__) 205 const char* const kSymbolizerArch = "--default-arch=i386"; 206#elif defined(__aarch64__) 207 const char* const kSymbolizerArch = "--default-arch=arm64"; 208#elif defined(__arm__) 209 const char* const kSymbolizerArch = "--default-arch=arm"; 210#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 211 const char* const kSymbolizerArch = "--default-arch=powerpc64"; 212#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 213 const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 214#elif defined(__s390x__) 215 const char* const kSymbolizerArch = "--default-arch=s390x"; 216#elif defined(__s390__) 217 const char* const kSymbolizerArch = "--default-arch=s390"; 218#else 219 const char* const kSymbolizerArch = "--default-arch=unknown"; 220#endif 221 222 const char *const inline_flag = common_flags()->symbolize_inline_frames 223 ? "--inlining=true" 224 : "--inlining=false"; 225 int i = 0; 226 argv[i++] = path_to_binary; 227 argv[i++] = inline_flag; 228 argv[i++] = kSymbolizerArch; 229 argv[i++] = nullptr; 230 } 231}; 232 233LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 234 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 235 236// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 237// Windows, so extract tokens from the right hand side first. The column info is 238// also optional. 239static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 240 char *file_line_info = 0; 241 str = ExtractToken(str, "\n", &file_line_info); 242 CHECK(file_line_info); 243 // Parse the last :<int>, which must be there. 244 char *last_colon = internal_strrchr(file_line_info, ':'); 245 CHECK(last_colon); 246 int line_or_column = internal_atoll(last_colon + 1); 247 // Truncate the string at the last colon and find the next-to-last colon. 248 *last_colon = '\0'; 249 last_colon = internal_strrchr(file_line_info, ':'); 250 if (last_colon && IsDigit(last_colon[1])) { 251 // If the second-to-last colon is followed by a digit, it must be the line 252 // number, and the previous parsed number was a column. 253 info->line = internal_atoll(last_colon + 1); 254 info->column = line_or_column; 255 *last_colon = '\0'; 256 } else { 257 // Otherwise, we have line info but no column info. 258 info->line = line_or_column; 259 info->column = 0; 260 } 261 ExtractToken(file_line_info, "", &info->file); 262 InternalFree(file_line_info); 263 return str; 264} 265 266// Parses one or more two-line strings in the following format: 267// <function_name> 268// <file_name>:<line_number>[:<column_number>] 269// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 270// them use the same output format. 271void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 272 bool top_frame = true; 273 SymbolizedStack *last = res; 274 while (true) { 275 char *function_name = 0; 276 str = ExtractToken(str, "\n", &function_name); 277 CHECK(function_name); 278 if (function_name[0] == '\0') { 279 // There are no more frames. 280 InternalFree(function_name); 281 break; 282 } 283 SymbolizedStack *cur; 284 if (top_frame) { 285 cur = res; 286 top_frame = false; 287 } else { 288 cur = SymbolizedStack::New(res->info.address); 289 cur->info.FillModuleInfo(res->info.module, res->info.module_offset); 290 last->next = cur; 291 last = cur; 292 } 293 294 AddressInfo *info = &cur->info; 295 info->function = function_name; 296 str = ParseFileLineInfo(info, str); 297 298 // Functions and filenames can be "??", in which case we write 0 299 // to address info to mark that names are unknown. 300 if (0 == internal_strcmp(info->function, "??")) { 301 InternalFree(info->function); 302 info->function = 0; 303 } 304 if (0 == internal_strcmp(info->file, "??")) { 305 InternalFree(info->file); 306 info->file = 0; 307 } 308 } 309} 310 311// Parses a two-line string in the following format: 312// <symbol_name> 313// <start_address> <size> 314// Used by LLVMSymbolizer and InternalSymbolizer. 315void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 316 str = ExtractToken(str, "\n", &info->name); 317 str = ExtractUptr(str, " ", &info->start); 318 str = ExtractUptr(str, "\n", &info->size); 319} 320 321bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 322 if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module, 323 stack->info.module_offset)) { 324 ParseSymbolizePCOutput(buf, stack); 325 return true; 326 } 327 return false; 328} 329 330bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 331 if (const char *buf = 332 SendCommand(/*is_data*/ true, info->module, info->module_offset)) { 333 ParseSymbolizeDataOutput(buf, info); 334 info->start += (addr - info->module_offset); // Add the base address. 335 return true; 336 } 337 return false; 338} 339 340const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name, 341 uptr module_offset) { 342 CHECK(module_name); 343 internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", 344 is_data ? "DATA " : "", module_name, module_offset); 345 return symbolizer_process_->SendCommand(buffer_); 346} 347 348SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) 349 : path_(path), 350 input_fd_(kInvalidFd), 351 output_fd_(kInvalidFd), 352 times_restarted_(0), 353 failed_to_start_(false), 354 reported_invalid_path_(false), 355 use_forkpty_(use_forkpty) { 356 CHECK(path_); 357 CHECK_NE(path_[0], '\0'); 358} 359 360const char *SymbolizerProcess::SendCommand(const char *command) { 361 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 362 // Start or restart symbolizer if we failed to send command to it. 363 if (const char *res = SendCommandImpl(command)) 364 return res; 365 Restart(); 366 } 367 if (!failed_to_start_) { 368 Report("WARNING: Failed to use and restart external symbolizer!\n"); 369 failed_to_start_ = true; 370 } 371 return 0; 372} 373 374const char *SymbolizerProcess::SendCommandImpl(const char *command) { 375 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 376 return 0; 377 if (!WriteToSymbolizer(command, internal_strlen(command))) 378 return 0; 379 if (!ReadFromSymbolizer(buffer_, kBufferSize)) 380 return 0; 381 return buffer_; 382} 383 384bool SymbolizerProcess::Restart() { 385 if (input_fd_ != kInvalidFd) 386 CloseFile(input_fd_); 387 if (output_fd_ != kInvalidFd) 388 CloseFile(output_fd_); 389 return StartSymbolizerSubprocess(); 390} 391 392bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { 393 if (max_length == 0) 394 return true; 395 uptr read_len = 0; 396 while (true) { 397 uptr just_read = 0; 398 bool success = ReadFromFile(input_fd_, buffer + read_len, 399 max_length - read_len - 1, &just_read); 400 // We can't read 0 bytes, as we don't expect external symbolizer to close 401 // its stdout. 402 if (!success || just_read == 0) { 403 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 404 return false; 405 } 406 read_len += just_read; 407 if (ReachedEndOfOutput(buffer, read_len)) 408 break; 409 } 410 buffer[read_len] = '\0'; 411 return true; 412} 413 414bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 415 if (length == 0) 416 return true; 417 uptr write_len = 0; 418 bool success = WriteToFile(output_fd_, buffer, length, &write_len); 419 if (!success || write_len != length) { 420 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 421 return false; 422 } 423 return true; 424} 425 426} // namespace __sanitizer 427