sanitizer_symbolizer_libcdep.cc revision 1.3
1//===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
2//
3// This file is distributed under the University of Illinois Open Source
4// License. See LICENSE.TXT for details.
5//
6//===----------------------------------------------------------------------===//
7//
8// This file is shared between AddressSanitizer and ThreadSanitizer
9// run-time libraries.
10//===----------------------------------------------------------------------===//
11
12#include "sanitizer_allocator_internal.h"
13#include "sanitizer_internal_defs.h"
14#include "sanitizer_symbolizer_internal.h"
15
16namespace __sanitizer {
17
18const char *ExtractToken(const char *str, const char *delims, char **result) {
19  uptr prefix_len = internal_strcspn(str, delims);
20  *result = (char*)InternalAlloc(prefix_len + 1);
21  internal_memcpy(*result, str, prefix_len);
22  (*result)[prefix_len] = '\0';
23  const char *prefix_end = str + prefix_len;
24  if (*prefix_end != '\0') prefix_end++;
25  return prefix_end;
26}
27
28const char *ExtractInt(const char *str, const char *delims, int *result) {
29  char *buff;
30  const char *ret = ExtractToken(str, delims, &buff);
31  if (buff != 0) {
32    *result = (int)internal_atoll(buff);
33  }
34  InternalFree(buff);
35  return ret;
36}
37
38const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
39  char *buff;
40  const char *ret = ExtractToken(str, delims, &buff);
41  if (buff != 0) {
42    *result = (uptr)internal_atoll(buff);
43  }
44  InternalFree(buff);
45  return ret;
46}
47
48const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
49                                      char **result) {
50  const char *found_delimiter = internal_strstr(str, delimiter);
51  uptr prefix_len =
52      found_delimiter ? found_delimiter - str : internal_strlen(str);
53  *result = (char *)InternalAlloc(prefix_len + 1);
54  internal_memcpy(*result, str, prefix_len);
55  (*result)[prefix_len] = '\0';
56  const char *prefix_end = str + prefix_len;
57  if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
58  return prefix_end;
59}
60
61SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
62  BlockingMutexLock l(&mu_);
63  const char *module_name;
64  uptr module_offset;
65  SymbolizedStack *res = SymbolizedStack::New(addr);
66  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
67    return res;
68  // Always fill data about module name and offset.
69  res->info.FillModuleInfo(module_name, module_offset);
70  for (auto &tool : tools_) {
71    SymbolizerScope sym_scope(this);
72    if (tool.SymbolizePC(addr, res)) {
73      return res;
74    }
75  }
76  return res;
77}
78
79bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
80  BlockingMutexLock l(&mu_);
81  const char *module_name;
82  uptr module_offset;
83  if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset))
84    return false;
85  info->Clear();
86  info->module = internal_strdup(module_name);
87  info->module_offset = module_offset;
88  for (auto &tool : tools_) {
89    SymbolizerScope sym_scope(this);
90    if (tool.SymbolizeData(addr, info)) {
91      return true;
92    }
93  }
94  return true;
95}
96
97bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
98                                             uptr *module_address) {
99  BlockingMutexLock l(&mu_);
100  const char *internal_module_name = nullptr;
101  if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
102                                         module_address))
103    return false;
104
105  if (module_name)
106    *module_name = module_names_.GetOwnedCopy(internal_module_name);
107  return true;
108}
109
110void Symbolizer::Flush() {
111  BlockingMutexLock l(&mu_);
112  for (auto &tool : tools_) {
113    SymbolizerScope sym_scope(this);
114    tool.Flush();
115  }
116}
117
118const char *Symbolizer::Demangle(const char *name) {
119  BlockingMutexLock l(&mu_);
120  for (auto &tool : tools_) {
121    SymbolizerScope sym_scope(this);
122    if (const char *demangled = tool.Demangle(name))
123      return demangled;
124  }
125  return PlatformDemangle(name);
126}
127
128void Symbolizer::PrepareForSandboxing() {
129  BlockingMutexLock l(&mu_);
130  PlatformPrepareForSandboxing();
131}
132
133bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
134                                                   const char **module_name,
135                                                   uptr *module_offset) {
136  const LoadedModule *module = FindModuleForAddress(address);
137  if (module == nullptr)
138    return false;
139  *module_name = module->full_name();
140  *module_offset = address - module->base_address();
141  return true;
142}
143
144const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
145  bool modules_were_reloaded = false;
146  if (!modules_fresh_) {
147    modules_.init();
148    RAW_CHECK(modules_.size() > 0);
149    modules_fresh_ = true;
150    modules_were_reloaded = true;
151  }
152  for (uptr i = 0; i < modules_.size(); i++) {
153    if (modules_[i].containsAddress(address)) {
154      return &modules_[i];
155    }
156  }
157  // Reload the modules and look up again, if we haven't tried it yet.
158  if (!modules_were_reloaded) {
159    // FIXME: set modules_fresh_ from dlopen()/dlclose() interceptors.
160    // It's too aggressive to reload the list of modules each time we fail
161    // to find a module for a given address.
162    modules_fresh_ = false;
163    return FindModuleForAddress(address);
164  }
165  return 0;
166}
167
168Symbolizer *Symbolizer::GetOrInit() {
169  SpinMutexLock l(&init_mu_);
170  if (symbolizer_)
171    return symbolizer_;
172  symbolizer_ = PlatformInit();
173  CHECK(symbolizer_);
174  return symbolizer_;
175}
176
177// For now we assume the following protocol:
178// For each request of the form
179//   <module_name> <module_offset>
180// passed to STDIN, external symbolizer prints to STDOUT response:
181//   <function_name>
182//   <file_name>:<line_number>:<column_number>
183//   <function_name>
184//   <file_name>:<line_number>:<column_number>
185//   ...
186//   <empty line>
187class LLVMSymbolizerProcess : public SymbolizerProcess {
188 public:
189  explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
190
191 private:
192  bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
193    // Empty line marks the end of llvm-symbolizer output.
194    return length >= 2 && buffer[length - 1] == '\n' &&
195           buffer[length - 2] == '\n';
196  }
197
198  void GetArgV(const char *path_to_binary,
199               const char *(&argv)[kArgVMax]) const override {
200#if defined(__x86_64h__)
201    const char* const kSymbolizerArch = "--default-arch=x86_64h";
202#elif defined(__x86_64__)
203    const char* const kSymbolizerArch = "--default-arch=x86_64";
204#elif defined(__i386__)
205    const char* const kSymbolizerArch = "--default-arch=i386";
206#elif defined(__aarch64__)
207    const char* const kSymbolizerArch = "--default-arch=arm64";
208#elif defined(__arm__)
209    const char* const kSymbolizerArch = "--default-arch=arm";
210#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
211    const char* const kSymbolizerArch = "--default-arch=powerpc64";
212#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
213    const char* const kSymbolizerArch = "--default-arch=powerpc64le";
214#elif defined(__s390x__)
215    const char* const kSymbolizerArch = "--default-arch=s390x";
216#elif defined(__s390__)
217    const char* const kSymbolizerArch = "--default-arch=s390";
218#else
219    const char* const kSymbolizerArch = "--default-arch=unknown";
220#endif
221
222    const char *const inline_flag = common_flags()->symbolize_inline_frames
223                                        ? "--inlining=true"
224                                        : "--inlining=false";
225    int i = 0;
226    argv[i++] = path_to_binary;
227    argv[i++] = inline_flag;
228    argv[i++] = kSymbolizerArch;
229    argv[i++] = nullptr;
230  }
231};
232
233LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
234    : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
235
236// Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
237// Windows, so extract tokens from the right hand side first. The column info is
238// also optional.
239static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
240  char *file_line_info = 0;
241  str = ExtractToken(str, "\n", &file_line_info);
242  CHECK(file_line_info);
243  // Parse the last :<int>, which must be there.
244  char *last_colon = internal_strrchr(file_line_info, ':');
245  CHECK(last_colon);
246  int line_or_column = internal_atoll(last_colon + 1);
247  // Truncate the string at the last colon and find the next-to-last colon.
248  *last_colon = '\0';
249  last_colon = internal_strrchr(file_line_info, ':');
250  if (last_colon && IsDigit(last_colon[1])) {
251    // If the second-to-last colon is followed by a digit, it must be the line
252    // number, and the previous parsed number was a column.
253    info->line = internal_atoll(last_colon + 1);
254    info->column = line_or_column;
255    *last_colon = '\0';
256  } else {
257    // Otherwise, we have line info but no column info.
258    info->line = line_or_column;
259    info->column = 0;
260  }
261  ExtractToken(file_line_info, "", &info->file);
262  InternalFree(file_line_info);
263  return str;
264}
265
266// Parses one or more two-line strings in the following format:
267//   <function_name>
268//   <file_name>:<line_number>[:<column_number>]
269// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
270// them use the same output format.
271void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
272  bool top_frame = true;
273  SymbolizedStack *last = res;
274  while (true) {
275    char *function_name = 0;
276    str = ExtractToken(str, "\n", &function_name);
277    CHECK(function_name);
278    if (function_name[0] == '\0') {
279      // There are no more frames.
280      InternalFree(function_name);
281      break;
282    }
283    SymbolizedStack *cur;
284    if (top_frame) {
285      cur = res;
286      top_frame = false;
287    } else {
288      cur = SymbolizedStack::New(res->info.address);
289      cur->info.FillModuleInfo(res->info.module, res->info.module_offset);
290      last->next = cur;
291      last = cur;
292    }
293
294    AddressInfo *info = &cur->info;
295    info->function = function_name;
296    str = ParseFileLineInfo(info, str);
297
298    // Functions and filenames can be "??", in which case we write 0
299    // to address info to mark that names are unknown.
300    if (0 == internal_strcmp(info->function, "??")) {
301      InternalFree(info->function);
302      info->function = 0;
303    }
304    if (0 == internal_strcmp(info->file, "??")) {
305      InternalFree(info->file);
306      info->file = 0;
307    }
308  }
309}
310
311// Parses a two-line string in the following format:
312//   <symbol_name>
313//   <start_address> <size>
314// Used by LLVMSymbolizer and InternalSymbolizer.
315void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
316  str = ExtractToken(str, "\n", &info->name);
317  str = ExtractUptr(str, " ", &info->start);
318  str = ExtractUptr(str, "\n", &info->size);
319}
320
321bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
322  if (const char *buf = SendCommand(/*is_data*/ false, stack->info.module,
323                                    stack->info.module_offset)) {
324    ParseSymbolizePCOutput(buf, stack);
325    return true;
326  }
327  return false;
328}
329
330bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
331  if (const char *buf =
332          SendCommand(/*is_data*/ true, info->module, info->module_offset)) {
333    ParseSymbolizeDataOutput(buf, info);
334    info->start += (addr - info->module_offset); // Add the base address.
335    return true;
336  }
337  return false;
338}
339
340const char *LLVMSymbolizer::SendCommand(bool is_data, const char *module_name,
341                                        uptr module_offset) {
342  CHECK(module_name);
343  internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n",
344                    is_data ? "DATA " : "", module_name, module_offset);
345  return symbolizer_process_->SendCommand(buffer_);
346}
347
348SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
349    : path_(path),
350      input_fd_(kInvalidFd),
351      output_fd_(kInvalidFd),
352      times_restarted_(0),
353      failed_to_start_(false),
354      reported_invalid_path_(false),
355      use_forkpty_(use_forkpty) {
356  CHECK(path_);
357  CHECK_NE(path_[0], '\0');
358}
359
360const char *SymbolizerProcess::SendCommand(const char *command) {
361  for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
362    // Start or restart symbolizer if we failed to send command to it.
363    if (const char *res = SendCommandImpl(command))
364      return res;
365    Restart();
366  }
367  if (!failed_to_start_) {
368    Report("WARNING: Failed to use and restart external symbolizer!\n");
369    failed_to_start_ = true;
370  }
371  return 0;
372}
373
374const char *SymbolizerProcess::SendCommandImpl(const char *command) {
375  if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
376      return 0;
377  if (!WriteToSymbolizer(command, internal_strlen(command)))
378      return 0;
379  if (!ReadFromSymbolizer(buffer_, kBufferSize))
380      return 0;
381  return buffer_;
382}
383
384bool SymbolizerProcess::Restart() {
385  if (input_fd_ != kInvalidFd)
386    CloseFile(input_fd_);
387  if (output_fd_ != kInvalidFd)
388    CloseFile(output_fd_);
389  return StartSymbolizerSubprocess();
390}
391
392bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
393  if (max_length == 0)
394    return true;
395  uptr read_len = 0;
396  while (true) {
397    uptr just_read = 0;
398    bool success = ReadFromFile(input_fd_, buffer + read_len,
399                                max_length - read_len - 1, &just_read);
400    // We can't read 0 bytes, as we don't expect external symbolizer to close
401    // its stdout.
402    if (!success || just_read == 0) {
403      Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
404      return false;
405    }
406    read_len += just_read;
407    if (ReachedEndOfOutput(buffer, read_len))
408      break;
409  }
410  buffer[read_len] = '\0';
411  return true;
412}
413
414bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
415  if (length == 0)
416    return true;
417  uptr write_len = 0;
418  bool success = WriteToFile(output_fd_, buffer, length, &write_len);
419  if (!success || write_len != length) {
420    Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
421    return false;
422  }
423  return true;
424}
425
426}  // namespace __sanitizer
427