1//===-- sanitizer_symbolizer_internal.h -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Header for internal classes and functions to be used by implementations of
10// symbolizers.
11//
12//===----------------------------------------------------------------------===//
13#ifndef SANITIZER_SYMBOLIZER_INTERNAL_H
14#define SANITIZER_SYMBOLIZER_INTERNAL_H
15
16#include "sanitizer_symbolizer.h"
17#include "sanitizer_file.h"
18#include "sanitizer_vector.h"
19
20namespace __sanitizer {
21
22// Parsing helpers, 'str' is searched for delimiter(s) and a string or uptr
23// is extracted. When extracting a string, a newly allocated (using
24// InternalAlloc) and null-terminataed buffer is returned. They return a pointer
25// to the next characted after the found delimiter.
26const char *ExtractToken(const char *str, const char *delims, char **result);
27const char *ExtractInt(const char *str, const char *delims, int *result);
28const char *ExtractUptr(const char *str, const char *delims, uptr *result);
29const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
30                                      char **result);
31
32const char *DemangleSwiftAndCXX(const char *name);
33
34// SymbolizerTool is an interface that is implemented by individual "tools"
35// that can perform symbolication (external llvm-symbolizer, libbacktrace,
36// Windows DbgHelp symbolizer, etc.).
37class SymbolizerTool {
38 public:
39  // The main |Symbolizer| class implements a "fallback chain" of symbolizer
40  // tools. In a request to symbolize an address, if one tool returns false,
41  // the next tool in the chain will be tried.
42  SymbolizerTool *next;
43
44  SymbolizerTool() : next(nullptr) { }
45
46  // Can't declare pure virtual functions in sanitizer runtimes:
47  // __cxa_pure_virtual might be unavailable.
48
49  // The |stack| parameter is inout. It is pre-filled with the address,
50  // module base and module offset values and is to be used to construct
51  // other stack frames.
52  virtual bool SymbolizePC(uptr addr, SymbolizedStack *stack) {
53    UNIMPLEMENTED();
54  }
55
56  // The |info| parameter is inout. It is pre-filled with the module base
57  // and module offset values.
58  virtual bool SymbolizeData(uptr addr, DataInfo *info) {
59    UNIMPLEMENTED();
60  }
61
62  virtual bool SymbolizeFrame(uptr addr, FrameInfo *info) {
63    return false;
64  }
65
66  virtual void Flush() {}
67
68  // Return nullptr to fallback to the default platform-specific demangler.
69  virtual const char *Demangle(const char *name) {
70    return nullptr;
71  }
72};
73
74// SymbolizerProcess encapsulates communication between the tool and
75// external symbolizer program, running in a different subprocess.
76// SymbolizerProcess may not be used from two threads simultaneously.
77class SymbolizerProcess {
78 public:
79  explicit SymbolizerProcess(const char *path, bool use_posix_spawn = false);
80  const char *SendCommand(const char *command);
81
82 protected:
83  /// The maximum number of arguments required to invoke a tool process.
84  static const unsigned kArgVMax = 6;
85
86  // Customizable by subclasses.
87  virtual bool StartSymbolizerSubprocess();
88  virtual bool ReadFromSymbolizer(char *buffer, uptr max_length);
89
90 private:
91  virtual bool ReachedEndOfOutput(const char *buffer, uptr length) const {
92    UNIMPLEMENTED();
93  }
94
95  /// Fill in an argv array to invoke the child process.
96  virtual void GetArgV(const char *path_to_binary,
97                       const char *(&argv)[kArgVMax]) const {
98    UNIMPLEMENTED();
99  }
100
101  bool Restart();
102  const char *SendCommandImpl(const char *command);
103  bool WriteToSymbolizer(const char *buffer, uptr length);
104
105  const char *path_;
106  fd_t input_fd_;
107  fd_t output_fd_;
108
109  static const uptr kBufferSize = 16 * 1024;
110  char buffer_[kBufferSize];
111
112  static const uptr kMaxTimesRestarted = 5;
113  static const int kSymbolizerStartupTimeMillis = 10;
114  uptr times_restarted_;
115  bool failed_to_start_;
116  bool reported_invalid_path_;
117  bool use_posix_spawn_;
118};
119
120class LLVMSymbolizerProcess;
121
122// This tool invokes llvm-symbolizer in a subprocess. It should be as portable
123// as the llvm-symbolizer tool is.
124class LLVMSymbolizer : public SymbolizerTool {
125 public:
126  explicit LLVMSymbolizer(const char *path, LowLevelAllocator *allocator);
127
128  bool SymbolizePC(uptr addr, SymbolizedStack *stack) override;
129  bool SymbolizeData(uptr addr, DataInfo *info) override;
130  bool SymbolizeFrame(uptr addr, FrameInfo *info) override;
131
132 private:
133  const char *FormatAndSendCommand(const char *command_prefix,
134                                   const char *module_name, uptr module_offset,
135                                   ModuleArch arch);
136
137  LLVMSymbolizerProcess *symbolizer_process_;
138  static const uptr kBufferSize = 16 * 1024;
139  char buffer_[kBufferSize];
140};
141
142// Parses one or more two-line strings in the following format:
143//   <function_name>
144//   <file_name>:<line_number>[:<column_number>]
145// Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
146// them use the same output format.  Returns true if any useful debug
147// information was found.
148void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res);
149
150// Parses a two-line string in the following format:
151//   <symbol_name>
152//   <start_address> <size>
153// Used by LLVMSymbolizer and InternalSymbolizer.
154void ParseSymbolizeDataOutput(const char *str, DataInfo *info);
155
156}  // namespace __sanitizer
157
158#endif  // SANITIZER_SYMBOLIZER_INTERNAL_H
159