1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===// 2// 3// This file is distributed under the University of Illinois Open Source 4// License. See LICENSE.TXT for details. 5// 6//===----------------------------------------------------------------------===// 7// 8// This file is shared between AddressSanitizer and ThreadSanitizer 9// run-time libraries. 10//===----------------------------------------------------------------------===// 11 12#include "sanitizer_allocator_internal.h" 13#include "sanitizer_internal_defs.h" 14#include "sanitizer_symbolizer_internal.h" 15 16namespace __sanitizer { 17 18Symbolizer *Symbolizer::GetOrInit() { 19 SpinMutexLock l(&init_mu_); 20 if (symbolizer_) 21 return symbolizer_; 22 symbolizer_ = PlatformInit(); 23 CHECK(symbolizer_); 24 return symbolizer_; 25} 26 27// See sanitizer_symbolizer_markup.cc. 28#if !SANITIZER_SYMBOLIZER_MARKUP 29 30const char *ExtractToken(const char *str, const char *delims, char **result) { 31 uptr prefix_len = internal_strcspn(str, delims); 32 *result = (char*)InternalAlloc(prefix_len + 1); 33 internal_memcpy(*result, str, prefix_len); 34 (*result)[prefix_len] = '\0'; 35 const char *prefix_end = str + prefix_len; 36 if (*prefix_end != '\0') prefix_end++; 37 return prefix_end; 38} 39 40const char *ExtractInt(const char *str, const char *delims, int *result) { 41 char *buff; 42 const char *ret = ExtractToken(str, delims, &buff); 43 if (buff != 0) { 44 *result = (int)internal_atoll(buff); 45 } 46 InternalFree(buff); 47 return ret; 48} 49 50const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 51 char *buff; 52 const char *ret = ExtractToken(str, delims, &buff); 53 if (buff != 0) { 54 *result = (uptr)internal_atoll(buff); 55 } 56 InternalFree(buff); 57 return ret; 58} 59 60const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 61 char **result) { 62 const char *found_delimiter = internal_strstr(str, delimiter); 63 uptr prefix_len = 64 found_delimiter ? found_delimiter - str : internal_strlen(str); 65 *result = (char *)InternalAlloc(prefix_len + 1); 66 internal_memcpy(*result, str, prefix_len); 67 (*result)[prefix_len] = '\0'; 68 const char *prefix_end = str + prefix_len; 69 if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 70 return prefix_end; 71} 72 73SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 74 BlockingMutexLock l(&mu_); 75 const char *module_name; 76 uptr module_offset; 77 ModuleArch arch; 78 SymbolizedStack *res = SymbolizedStack::New(addr); 79 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset, 80 &arch)) 81 return res; 82 // Always fill data about module name and offset. 83 res->info.FillModuleInfo(module_name, module_offset, arch); 84 for (auto &tool : tools_) { 85 SymbolizerScope sym_scope(this); 86 if (tool.SymbolizePC(addr, res)) { 87 return res; 88 } 89 } 90 return res; 91} 92 93bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 94 BlockingMutexLock l(&mu_); 95 const char *module_name; 96 uptr module_offset; 97 ModuleArch arch; 98 if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset, 99 &arch)) 100 return false; 101 info->Clear(); 102 info->module = internal_strdup(module_name); 103 info->module_offset = module_offset; 104 info->module_arch = arch; 105 for (auto &tool : tools_) { 106 SymbolizerScope sym_scope(this); 107 if (tool.SymbolizeData(addr, info)) { 108 return true; 109 } 110 } 111 return true; 112} 113 114bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 115 uptr *module_address) { 116 BlockingMutexLock l(&mu_); 117 const char *internal_module_name = nullptr; 118 ModuleArch arch; 119 if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 120 module_address, &arch)) 121 return false; 122 123 if (module_name) 124 *module_name = module_names_.GetOwnedCopy(internal_module_name); 125 return true; 126} 127 128void Symbolizer::Flush() { 129 BlockingMutexLock l(&mu_); 130 for (auto &tool : tools_) { 131 SymbolizerScope sym_scope(this); 132 tool.Flush(); 133 } 134} 135 136const char *Symbolizer::Demangle(const char *name) { 137 BlockingMutexLock l(&mu_); 138 for (auto &tool : tools_) { 139 SymbolizerScope sym_scope(this); 140 if (const char *demangled = tool.Demangle(name)) 141 return demangled; 142 } 143 return PlatformDemangle(name); 144} 145 146bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 147 const char **module_name, 148 uptr *module_offset, 149 ModuleArch *module_arch) { 150 const LoadedModule *module = FindModuleForAddress(address); 151 if (module == nullptr) 152 return false; 153 *module_name = module->full_name(); 154 *module_offset = address - module->base_address(); 155 *module_arch = module->arch(); 156 return true; 157} 158 159void Symbolizer::RefreshModules() { 160 modules_.init(); 161 fallback_modules_.fallbackInit(); 162 RAW_CHECK(modules_.size() > 0); 163 modules_fresh_ = true; 164} 165 166static const LoadedModule *SearchForModule(const ListOfModules &modules, 167 uptr address) { 168 for (uptr i = 0; i < modules.size(); i++) { 169 if (modules[i].containsAddress(address)) { 170 return &modules[i]; 171 } 172 } 173 return nullptr; 174} 175 176const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 177 bool modules_were_reloaded = false; 178 if (!modules_fresh_) { 179 RefreshModules(); 180 modules_were_reloaded = true; 181 } 182 const LoadedModule *module = SearchForModule(modules_, address); 183 if (module) return module; 184 185 // dlopen/dlclose interceptors invalidate the module list, but when 186 // interception is disabled, we need to retry if the lookup fails in 187 // case the module list changed. 188#if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE 189 if (!modules_were_reloaded) { 190 RefreshModules(); 191 module = SearchForModule(modules_, address); 192 if (module) return module; 193 } 194#endif 195 196 if (fallback_modules_.size()) { 197 module = SearchForModule(fallback_modules_, address); 198 } 199 return module; 200} 201 202// For now we assume the following protocol: 203// For each request of the form 204// <module_name> <module_offset> 205// passed to STDIN, external symbolizer prints to STDOUT response: 206// <function_name> 207// <file_name>:<line_number>:<column_number> 208// <function_name> 209// <file_name>:<line_number>:<column_number> 210// ... 211// <empty line> 212class LLVMSymbolizerProcess : public SymbolizerProcess { 213 public: 214 explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {} 215 216 private: 217 bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 218 // Empty line marks the end of llvm-symbolizer output. 219 return length >= 2 && buffer[length - 1] == '\n' && 220 buffer[length - 2] == '\n'; 221 } 222 223 // When adding a new architecture, don't forget to also update 224 // script/asan_symbolize.py and sanitizer_common.h. 225 void GetArgV(const char *path_to_binary, 226 const char *(&argv)[kArgVMax]) const override { 227#if defined(__x86_64h__) 228 const char* const kSymbolizerArch = "--default-arch=x86_64h"; 229#elif defined(__x86_64__) 230 const char* const kSymbolizerArch = "--default-arch=x86_64"; 231#elif defined(__i386__) 232 const char* const kSymbolizerArch = "--default-arch=i386"; 233#elif defined(__aarch64__) 234 const char* const kSymbolizerArch = "--default-arch=arm64"; 235#elif defined(__arm__) 236 const char* const kSymbolizerArch = "--default-arch=arm"; 237#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 238 const char* const kSymbolizerArch = "--default-arch=powerpc64"; 239#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 240 const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 241#elif defined(__s390x__) 242 const char* const kSymbolizerArch = "--default-arch=s390x"; 243#elif defined(__s390__) 244 const char* const kSymbolizerArch = "--default-arch=s390"; 245#else 246 const char* const kSymbolizerArch = "--default-arch=unknown"; 247#endif 248 249 const char *const inline_flag = common_flags()->symbolize_inline_frames 250 ? "--inlining=true" 251 : "--inlining=false"; 252 int i = 0; 253 argv[i++] = path_to_binary; 254 argv[i++] = inline_flag; 255 argv[i++] = kSymbolizerArch; 256 argv[i++] = nullptr; 257 } 258}; 259 260LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 261 : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 262 263// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 264// Windows, so extract tokens from the right hand side first. The column info is 265// also optional. 266static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 267 char *file_line_info = 0; 268 str = ExtractToken(str, "\n", &file_line_info); 269 CHECK(file_line_info); 270 271 if (uptr size = internal_strlen(file_line_info)) { 272 char *back = file_line_info + size - 1; 273 for (int i = 0; i < 2; ++i) { 274 while (back > file_line_info && IsDigit(*back)) --back; 275 if (*back != ':' || !IsDigit(back[1])) break; 276 info->column = info->line; 277 info->line = internal_atoll(back + 1); 278 // Truncate the string at the colon to keep only filename. 279 *back = '\0'; 280 --back; 281 } 282 ExtractToken(file_line_info, "", &info->file); 283 } 284 285 InternalFree(file_line_info); 286 return str; 287} 288 289// Parses one or more two-line strings in the following format: 290// <function_name> 291// <file_name>:<line_number>[:<column_number>] 292// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 293// them use the same output format. 294void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 295 bool top_frame = true; 296 SymbolizedStack *last = res; 297 while (true) { 298 char *function_name = 0; 299 str = ExtractToken(str, "\n", &function_name); 300 CHECK(function_name); 301 if (function_name[0] == '\0') { 302 // There are no more frames. 303 InternalFree(function_name); 304 break; 305 } 306 SymbolizedStack *cur; 307 if (top_frame) { 308 cur = res; 309 top_frame = false; 310 } else { 311 cur = SymbolizedStack::New(res->info.address); 312 cur->info.FillModuleInfo(res->info.module, res->info.module_offset, 313 res->info.module_arch); 314 last->next = cur; 315 last = cur; 316 } 317 318 AddressInfo *info = &cur->info; 319 info->function = function_name; 320 str = ParseFileLineInfo(info, str); 321 322 // Functions and filenames can be "??", in which case we write 0 323 // to address info to mark that names are unknown. 324 if (0 == internal_strcmp(info->function, "??")) { 325 InternalFree(info->function); 326 info->function = 0; 327 } 328 if (0 == internal_strcmp(info->file, "??")) { 329 InternalFree(info->file); 330 info->file = 0; 331 } 332 } 333} 334 335// Parses a two-line string in the following format: 336// <symbol_name> 337// <start_address> <size> 338// Used by LLVMSymbolizer and InternalSymbolizer. 339void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 340 str = ExtractToken(str, "\n", &info->name); 341 str = ExtractUptr(str, " ", &info->start); 342 str = ExtractUptr(str, "\n", &info->size); 343} 344 345bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 346 AddressInfo *info = &stack->info; 347 const char *buf = FormatAndSendCommand( 348 /*is_data*/ false, info->module, info->module_offset, info->module_arch); 349 if (buf) { 350 ParseSymbolizePCOutput(buf, stack); 351 return true; 352 } 353 return false; 354} 355 356bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 357 const char *buf = FormatAndSendCommand( 358 /*is_data*/ true, info->module, info->module_offset, info->module_arch); 359 if (buf) { 360 ParseSymbolizeDataOutput(buf, info); 361 info->start += (addr - info->module_offset); // Add the base address. 362 return true; 363 } 364 return false; 365} 366 367const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data, 368 const char *module_name, 369 uptr module_offset, 370 ModuleArch arch) { 371 CHECK(module_name); 372 const char *is_data_str = is_data ? "DATA " : ""; 373 if (arch == kModuleArchUnknown) { 374 if (internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str, 375 module_name, 376 module_offset) >= static_cast<int>(kBufferSize)) { 377 Report("WARNING: Command buffer too small"); 378 return nullptr; 379 } 380 } else { 381 if (internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n", 382 is_data_str, module_name, ModuleArchToString(arch), 383 module_offset) >= static_cast<int>(kBufferSize)) { 384 Report("WARNING: Command buffer too small"); 385 return nullptr; 386 } 387 } 388 return symbolizer_process_->SendCommand(buffer_); 389} 390 391SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty) 392 : path_(path), 393 input_fd_(kInvalidFd), 394 output_fd_(kInvalidFd), 395 times_restarted_(0), 396 failed_to_start_(false), 397 reported_invalid_path_(false), 398 use_forkpty_(use_forkpty) { 399 CHECK(path_); 400 CHECK_NE(path_[0], '\0'); 401} 402 403static bool IsSameModule(const char* path) { 404 if (const char* ProcessName = GetProcessName()) { 405 if (const char* SymbolizerName = StripModuleName(path)) { 406 return !internal_strcmp(ProcessName, SymbolizerName); 407 } 408 } 409 return false; 410} 411 412const char *SymbolizerProcess::SendCommand(const char *command) { 413 if (failed_to_start_) 414 return nullptr; 415 if (IsSameModule(path_)) { 416 Report("WARNING: Symbolizer was blocked from starting itself!\n"); 417 failed_to_start_ = true; 418 return nullptr; 419 } 420 for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 421 // Start or restart symbolizer if we failed to send command to it. 422 if (const char *res = SendCommandImpl(command)) 423 return res; 424 Restart(); 425 } 426 if (!failed_to_start_) { 427 Report("WARNING: Failed to use and restart external symbolizer!\n"); 428 failed_to_start_ = true; 429 } 430 return 0; 431} 432 433const char *SymbolizerProcess::SendCommandImpl(const char *command) { 434 if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 435 return 0; 436 if (!WriteToSymbolizer(command, internal_strlen(command))) 437 return 0; 438 if (!ReadFromSymbolizer(buffer_, kBufferSize)) 439 return 0; 440 return buffer_; 441} 442 443bool SymbolizerProcess::Restart() { 444 if (input_fd_ != kInvalidFd) 445 CloseFile(input_fd_); 446 if (output_fd_ != kInvalidFd) 447 CloseFile(output_fd_); 448 return StartSymbolizerSubprocess(); 449} 450 451bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) { 452 if (max_length == 0) 453 return true; 454 uptr read_len = 0; 455 while (true) { 456 uptr just_read = 0; 457 bool success = ReadFromFile(input_fd_, buffer + read_len, 458 max_length - read_len - 1, &just_read); 459 // We can't read 0 bytes, as we don't expect external symbolizer to close 460 // its stdout. 461 if (!success || just_read == 0) { 462 Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 463 return false; 464 } 465 read_len += just_read; 466 if (ReachedEndOfOutput(buffer, read_len)) 467 break; 468 if (read_len + 1 == max_length) { 469 Report("WARNING: Symbolizer buffer too small\n"); 470 read_len = 0; 471 break; 472 } 473 } 474 buffer[read_len] = '\0'; 475 return true; 476} 477 478bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 479 if (length == 0) 480 return true; 481 uptr write_len = 0; 482 bool success = WriteToFile(output_fd_, buffer, length, &write_len); 483 if (!success || write_len != length) { 484 Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 485 return false; 486 } 487 return true; 488} 489 490#endif // !SANITIZER_SYMBOLIZER_MARKUP 491 492} // namespace __sanitizer 493