llvm-symbolizer.cpp revision 353358
1//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This utility works much like "addr2line". It is able of transforming
10// tuples (module name, module offset) to code locations (function name,
11// file, line number, column number). It is targeted for compiler-rt tools
12// (especially AddressSanitizer and ThreadSanitizer) that can use it
13// to symbolize stack traces in their error reports.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/StringRef.h"
18#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
19#include "llvm/DebugInfo/Symbolize/Symbolize.h"
20#include "llvm/Support/COM.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/InitLLVM.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/raw_ostream.h"
27#include <cstdio>
28#include <cstring>
29#include <string>
30
31using namespace llvm;
32using namespace symbolize;
33
34static cl::opt<bool>
35ClUseSymbolTable("use-symbol-table", cl::init(true),
36                 cl::desc("Prefer names in symbol table to names "
37                          "in debug info"));
38
39static cl::opt<FunctionNameKind> ClPrintFunctions(
40    "functions", cl::init(FunctionNameKind::LinkageName),
41    cl::desc("Print function name for a given address"), cl::ValueOptional,
42    cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"),
43               clEnumValN(FunctionNameKind::ShortName, "short",
44                          "print short function name"),
45               clEnumValN(FunctionNameKind::LinkageName, "linkage",
46                          "print function linkage name"),
47               // Sentinel value for unspecified value.
48               clEnumValN(FunctionNameKind::LinkageName, "", "")));
49static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"),
50                                       cl::NotHidden, cl::Grouping,
51                                       cl::aliasopt(ClPrintFunctions));
52
53static cl::opt<bool>
54    ClUseRelativeAddress("relative-address", cl::init(false),
55                         cl::desc("Interpret addresses as relative addresses"),
56                         cl::ReallyHidden);
57
58static cl::opt<bool>
59    ClPrintInlining("inlining", cl::init(true),
60                    cl::desc("Print all inlined frames for a given address"));
61static cl::alias
62    ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"),
63                          cl::NotHidden, cl::aliasopt(ClPrintInlining),
64                          cl::Grouping);
65static cl::alias
66    ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"),
67                                cl::NotHidden, cl::aliasopt(ClPrintInlining));
68
69// -basenames, -s
70static cl::opt<bool> ClBasenames("basenames", cl::init(false),
71                                 cl::desc("Strip directory names from paths"));
72static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"),
73                                  cl::NotHidden, cl::aliasopt(ClBasenames));
74
75// -demangle, -C, -no-demangle
76static cl::opt<bool>
77ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
78static cl::alias
79ClDemangleShort("C", cl::desc("Alias for -demangle"),
80                cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping);
81static cl::opt<bool>
82ClNoDemangle("no-demangle", cl::init(false),
83             cl::desc("Don't demangle function names"));
84
85static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
86                                          cl::desc("Default architecture "
87                                                   "(for multi-arch objects)"));
88
89// -obj, -exe, -e
90static cl::opt<std::string>
91ClBinaryName("obj", cl::init(""),
92             cl::desc("Path to object file to be symbolized (if not provided, "
93                      "object file should be specified for each input line)"));
94static cl::alias
95ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
96                     cl::NotHidden, cl::aliasopt(ClBinaryName));
97static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
98                                    cl::NotHidden, cl::Grouping, cl::Prefix,
99                                    cl::aliasopt(ClBinaryName));
100
101static cl::opt<std::string>
102    ClDwpName("dwp", cl::init(""),
103              cl::desc("Path to DWP file to be use for any split CUs"));
104
105static cl::list<std::string>
106ClDsymHint("dsym-hint", cl::ZeroOrMore,
107           cl::desc("Path to .dSYM bundles to search for debug info for the "
108                    "object files"));
109
110// -print-address, -addresses, -a
111static cl::opt<bool>
112ClPrintAddress("print-address", cl::init(false),
113               cl::desc("Show address before line information"));
114static cl::alias
115ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
116                             cl::NotHidden, cl::aliasopt(ClPrintAddress));
117static cl::alias
118ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
119                     cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping);
120
121// -pretty-print, -p
122static cl::opt<bool>
123    ClPrettyPrint("pretty-print", cl::init(false),
124                  cl::desc("Make the output more human friendly"));
125static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
126                                    cl::NotHidden,
127                                    cl::aliasopt(ClPrettyPrint), cl::Grouping);
128
129static cl::opt<int> ClPrintSourceContextLines(
130    "print-source-context-lines", cl::init(0),
131    cl::desc("Print N number of source file context"));
132
133static cl::opt<bool> ClVerbose("verbose", cl::init(false),
134                               cl::desc("Print verbose line info"));
135
136// -adjust-vma
137static cl::opt<uint64_t>
138    ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"),
139                cl::desc("Add specified offset to object file addresses"));
140
141static cl::list<std::string> ClInputAddresses(cl::Positional,
142                                              cl::desc("<input addresses>..."),
143                                              cl::ZeroOrMore);
144
145static cl::opt<std::string>
146    ClFallbackDebugPath("fallback-debug-path", cl::init(""),
147                        cl::desc("Fallback path for debug binaries."));
148
149static cl::opt<DIPrinter::OutputStyle>
150    ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
151                  cl::desc("Specify print style"),
152                  cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM",
153                                        "LLVM default style"),
154                             clEnumValN(DIPrinter::OutputStyle::GNU, "GNU",
155                                        "GNU addr2line style")));
156
157static cl::extrahelp
158    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
159
160template<typename T>
161static bool error(Expected<T> &ResOrErr) {
162  if (ResOrErr)
163    return false;
164  logAllUnhandledErrors(ResOrErr.takeError(), errs(),
165                        "LLVMSymbolizer: error reading file: ");
166  return true;
167}
168
169enum class Command {
170  Code,
171  Data,
172  Frame,
173};
174
175static bool parseCommand(StringRef InputString, Command &Cmd,
176                         std::string &ModuleName, uint64_t &ModuleOffset) {
177  const char kDelimiters[] = " \n\r";
178  ModuleName = "";
179  if (InputString.consume_front("CODE ")) {
180    Cmd = Command::Code;
181  } else if (InputString.consume_front("DATA ")) {
182    Cmd = Command::Data;
183  } else if (InputString.consume_front("FRAME ")) {
184    Cmd = Command::Frame;
185  } else {
186    // If no cmd, assume it's CODE.
187    Cmd = Command::Code;
188  }
189  const char *pos = InputString.data();
190  // Skip delimiters and parse input filename (if needed).
191  if (ClBinaryName.empty()) {
192    pos += strspn(pos, kDelimiters);
193    if (*pos == '"' || *pos == '\'') {
194      char quote = *pos;
195      pos++;
196      const char *end = strchr(pos, quote);
197      if (!end)
198        return false;
199      ModuleName = std::string(pos, end - pos);
200      pos = end + 1;
201    } else {
202      int name_length = strcspn(pos, kDelimiters);
203      ModuleName = std::string(pos, name_length);
204      pos += name_length;
205    }
206  } else {
207    ModuleName = ClBinaryName;
208  }
209  // Skip delimiters and parse module offset.
210  pos += strspn(pos, kDelimiters);
211  int offset_length = strcspn(pos, kDelimiters);
212  return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
213}
214
215static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
216                           DIPrinter &Printer) {
217  Command Cmd;
218  std::string ModuleName;
219  uint64_t Offset = 0;
220  if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
221    outs() << InputString;
222    return;
223  }
224
225  if (ClPrintAddress) {
226    outs() << "0x";
227    outs().write_hex(Offset);
228    StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
229    outs() << Delimiter;
230  }
231  Offset -= ClAdjustVMA;
232  if (Cmd == Command::Data) {
233    auto ResOrErr = Symbolizer.symbolizeData(
234        ModuleName, {Offset, object::SectionedAddress::UndefSection});
235    Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
236  } else if (Cmd == Command::Frame) {
237    auto ResOrErr = Symbolizer.symbolizeFrame(
238        ModuleName, {Offset, object::SectionedAddress::UndefSection});
239    if (!error(ResOrErr)) {
240      for (DILocal Local : *ResOrErr)
241        Printer << Local;
242      if (ResOrErr->empty())
243        outs() << "??\n";
244    }
245  } else if (ClPrintInlining) {
246    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
247        ModuleName, {Offset, object::SectionedAddress::UndefSection});
248    Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
249  } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) {
250    // With ClPrintFunctions == FunctionNameKind::LinkageName (default)
251    // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode()
252    // may override the name of an inlined function with the name of the topmost
253    // caller function in the inlining chain. This contradicts the existing
254    // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
255    // the topmost function, which suits our needs better.
256    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
257        ModuleName, {Offset, object::SectionedAddress::UndefSection});
258    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
259  } else {
260    auto ResOrErr = Symbolizer.symbolizeCode(
261        ModuleName, {Offset, object::SectionedAddress::UndefSection});
262    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
263  }
264  if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
265    outs() << "\n";
266}
267
268int main(int argc, char **argv) {
269  InitLLVM X(argc, argv);
270
271  bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
272
273  if (IsAddr2Line) {
274    ClDemangle.setInitialValue(false);
275    ClPrintFunctions.setInitialValue(FunctionNameKind::None);
276    ClPrintInlining.setInitialValue(false);
277    ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
278  }
279
280  llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
281  cl::ParseCommandLineOptions(argc, argv, IsAddr2Line ? "llvm-addr2line\n"
282                                                      : "llvm-symbolizer\n");
283
284  // If both --demangle and --no-demangle are specified then pick the last one.
285  if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
286    ClDemangle = !ClNoDemangle;
287
288  LLVMSymbolizer::Options Opts;
289  Opts.PrintFunctions = ClPrintFunctions;
290  Opts.UseSymbolTable = ClUseSymbolTable;
291  Opts.Demangle = ClDemangle;
292  Opts.RelativeAddresses = ClUseRelativeAddress;
293  Opts.DefaultArch = ClDefaultArch;
294  Opts.FallbackDebugPath = ClFallbackDebugPath;
295  Opts.DWPName = ClDwpName;
296
297  for (const auto &hint : ClDsymHint) {
298    if (sys::path::extension(hint) == ".dSYM") {
299      Opts.DsymHints.push_back(hint);
300    } else {
301      errs() << "Warning: invalid dSYM hint: \"" << hint <<
302                "\" (must have the '.dSYM' extension).\n";
303    }
304  }
305  LLVMSymbolizer Symbolizer(Opts);
306
307  DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
308                    ClPrettyPrint, ClPrintSourceContextLines, ClVerbose,
309                    ClBasenames, ClOutputStyle);
310
311  if (ClInputAddresses.empty()) {
312    const int kMaxInputStringLength = 1024;
313    char InputString[kMaxInputStringLength];
314
315    while (fgets(InputString, sizeof(InputString), stdin)) {
316      symbolizeInput(InputString, Symbolizer, Printer);
317      outs().flush();
318    }
319  } else {
320    for (StringRef Address : ClInputAddresses)
321      symbolizeInput(Address, Symbolizer, Printer);
322  }
323
324  return 0;
325}
326