1//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This utility works much like "addr2line". It is able of transforming
10// tuples (module name, module offset) to code locations (function name,
11// file, line number, column number). It is targeted for compiler-rt tools
12// (especially AddressSanitizer and ThreadSanitizer) that can use it
13// to symbolize stack traces in their error reports.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/StringRef.h"
18#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
19#include "llvm/DebugInfo/Symbolize/Symbolize.h"
20#include "llvm/Support/COM.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/InitLLVM.h"
25#include "llvm/Support/Path.h"
26#include "llvm/Support/raw_ostream.h"
27#include <algorithm>
28#include <cstdio>
29#include <cstring>
30#include <string>
31
32using namespace llvm;
33using namespace symbolize;
34
35static cl::opt<bool>
36ClUseSymbolTable("use-symbol-table", cl::init(true),
37                 cl::desc("Prefer names in symbol table to names "
38                          "in debug info"));
39
40static cl::opt<FunctionNameKind> ClPrintFunctions(
41    "functions", cl::init(FunctionNameKind::LinkageName),
42    cl::desc("Print function name for a given address"), cl::ValueOptional,
43    cl::values(clEnumValN(FunctionNameKind::None, "none", "omit function name"),
44               clEnumValN(FunctionNameKind::ShortName, "short",
45                          "print short function name"),
46               clEnumValN(FunctionNameKind::LinkageName, "linkage",
47                          "print function linkage name"),
48               // Sentinel value for unspecified value.
49               clEnumValN(FunctionNameKind::LinkageName, "", "")));
50static cl::alias ClPrintFunctionsShort("f", cl::desc("Alias for -functions"),
51                                       cl::NotHidden, cl::Grouping,
52                                       cl::aliasopt(ClPrintFunctions));
53
54static cl::opt<bool>
55    ClUseRelativeAddress("relative-address", cl::init(false),
56                         cl::desc("Interpret addresses as relative addresses"),
57                         cl::ReallyHidden);
58
59static cl::opt<bool> ClUntagAddresses(
60    "untag-addresses", cl::init(true),
61    cl::desc("Remove memory tags from addresses before symbolization"));
62
63static cl::opt<bool>
64    ClPrintInlining("inlining", cl::init(true),
65                    cl::desc("Print all inlined frames for a given address"));
66static cl::alias
67    ClPrintInliningAliasI("i", cl::desc("Alias for -inlining"),
68                          cl::NotHidden, cl::aliasopt(ClPrintInlining),
69                          cl::Grouping);
70static cl::alias
71    ClPrintInliningAliasInlines("inlines", cl::desc("Alias for -inlining"),
72                                cl::NotHidden, cl::aliasopt(ClPrintInlining));
73
74// -basenames, -s
75static cl::opt<bool> ClBasenames("basenames", cl::init(false),
76                                 cl::desc("Strip directory names from paths"));
77static cl::alias ClBasenamesShort("s", cl::desc("Alias for -basenames"),
78                                  cl::NotHidden, cl::aliasopt(ClBasenames));
79
80// -demangle, -C, -no-demangle
81static cl::opt<bool>
82ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names"));
83static cl::alias
84ClDemangleShort("C", cl::desc("Alias for -demangle"),
85                cl::NotHidden, cl::aliasopt(ClDemangle), cl::Grouping);
86static cl::opt<bool>
87ClNoDemangle("no-demangle", cl::init(false),
88             cl::desc("Don't demangle function names"));
89
90static cl::opt<std::string> ClDefaultArch("default-arch", cl::init(""),
91                                          cl::desc("Default architecture "
92                                                   "(for multi-arch objects)"));
93
94// -obj, -exe, -e
95static cl::opt<std::string>
96ClBinaryName("obj", cl::init(""),
97             cl::desc("Path to object file to be symbolized (if not provided, "
98                      "object file should be specified for each input line)"));
99static cl::alias
100ClBinaryNameAliasExe("exe", cl::desc("Alias for -obj"),
101                     cl::NotHidden, cl::aliasopt(ClBinaryName));
102static cl::alias ClBinaryNameAliasE("e", cl::desc("Alias for -obj"),
103                                    cl::NotHidden, cl::Grouping, cl::Prefix,
104                                    cl::aliasopt(ClBinaryName));
105
106static cl::opt<std::string>
107    ClDwpName("dwp", cl::init(""),
108              cl::desc("Path to DWP file to be use for any split CUs"));
109
110static cl::list<std::string>
111ClDsymHint("dsym-hint", cl::ZeroOrMore,
112           cl::desc("Path to .dSYM bundles to search for debug info for the "
113                    "object files"));
114
115// -print-address, -addresses, -a
116static cl::opt<bool>
117ClPrintAddress("print-address", cl::init(false),
118               cl::desc("Show address before line information"));
119static cl::alias
120ClPrintAddressAliasAddresses("addresses", cl::desc("Alias for -print-address"),
121                             cl::NotHidden, cl::aliasopt(ClPrintAddress));
122static cl::alias
123ClPrintAddressAliasA("a", cl::desc("Alias for -print-address"),
124                     cl::NotHidden, cl::aliasopt(ClPrintAddress), cl::Grouping);
125
126// -pretty-print, -p
127static cl::opt<bool>
128    ClPrettyPrint("pretty-print", cl::init(false),
129                  cl::desc("Make the output more human friendly"));
130static cl::alias ClPrettyPrintShort("p", cl::desc("Alias for -pretty-print"),
131                                    cl::NotHidden,
132                                    cl::aliasopt(ClPrettyPrint), cl::Grouping);
133
134static cl::opt<int> ClPrintSourceContextLines(
135    "print-source-context-lines", cl::init(0),
136    cl::desc("Print N number of source file context"));
137
138static cl::opt<bool> ClVerbose("verbose", cl::init(false),
139                               cl::desc("Print verbose line info"));
140
141// -adjust-vma
142static cl::opt<uint64_t>
143    ClAdjustVMA("adjust-vma", cl::init(0), cl::value_desc("offset"),
144                cl::desc("Add specified offset to object file addresses"));
145
146static cl::list<std::string> ClInputAddresses(cl::Positional,
147                                              cl::desc("<input addresses>..."),
148                                              cl::ZeroOrMore);
149
150static cl::opt<std::string>
151    ClFallbackDebugPath("fallback-debug-path", cl::init(""),
152                        cl::desc("Fallback path for debug binaries."));
153
154static cl::list<std::string>
155    ClDebugFileDirectory("debug-file-directory", cl::ZeroOrMore,
156                         cl::value_desc("dir"),
157                         cl::desc("Path to directory where to look for debug "
158                                  "files."));
159
160static cl::opt<DIPrinter::OutputStyle>
161    ClOutputStyle("output-style", cl::init(DIPrinter::OutputStyle::LLVM),
162                  cl::desc("Specify print style"),
163                  cl::values(clEnumValN(DIPrinter::OutputStyle::LLVM, "LLVM",
164                                        "LLVM default style"),
165                             clEnumValN(DIPrinter::OutputStyle::GNU, "GNU",
166                                        "GNU addr2line style")));
167
168static cl::extrahelp
169    HelpResponse("\nPass @FILE as argument to read options from FILE.\n");
170
171template<typename T>
172static bool error(Expected<T> &ResOrErr) {
173  if (ResOrErr)
174    return false;
175  logAllUnhandledErrors(ResOrErr.takeError(), errs(),
176                        "LLVMSymbolizer: error reading file: ");
177  return true;
178}
179
180enum class Command {
181  Code,
182  Data,
183  Frame,
184};
185
186static bool parseCommand(StringRef InputString, Command &Cmd,
187                         std::string &ModuleName, uint64_t &ModuleOffset) {
188  const char kDelimiters[] = " \n\r";
189  ModuleName = "";
190  if (InputString.consume_front("CODE ")) {
191    Cmd = Command::Code;
192  } else if (InputString.consume_front("DATA ")) {
193    Cmd = Command::Data;
194  } else if (InputString.consume_front("FRAME ")) {
195    Cmd = Command::Frame;
196  } else {
197    // If no cmd, assume it's CODE.
198    Cmd = Command::Code;
199  }
200  const char *pos = InputString.data();
201  // Skip delimiters and parse input filename (if needed).
202  if (ClBinaryName.empty()) {
203    pos += strspn(pos, kDelimiters);
204    if (*pos == '"' || *pos == '\'') {
205      char quote = *pos;
206      pos++;
207      const char *end = strchr(pos, quote);
208      if (!end)
209        return false;
210      ModuleName = std::string(pos, end - pos);
211      pos = end + 1;
212    } else {
213      int name_length = strcspn(pos, kDelimiters);
214      ModuleName = std::string(pos, name_length);
215      pos += name_length;
216    }
217  } else {
218    ModuleName = ClBinaryName;
219  }
220  // Skip delimiters and parse module offset.
221  pos += strspn(pos, kDelimiters);
222  int offset_length = strcspn(pos, kDelimiters);
223  return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
224}
225
226static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
227                           DIPrinter &Printer) {
228  Command Cmd;
229  std::string ModuleName;
230  uint64_t Offset = 0;
231  if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
232    outs() << InputString << "\n";
233    return;
234  }
235
236  if (ClPrintAddress) {
237    outs() << "0x";
238    outs().write_hex(Offset);
239    StringRef Delimiter = ClPrettyPrint ? ": " : "\n";
240    outs() << Delimiter;
241  }
242  Offset -= ClAdjustVMA;
243  if (Cmd == Command::Data) {
244    auto ResOrErr = Symbolizer.symbolizeData(
245        ModuleName, {Offset, object::SectionedAddress::UndefSection});
246    Printer << (error(ResOrErr) ? DIGlobal() : ResOrErr.get());
247  } else if (Cmd == Command::Frame) {
248    auto ResOrErr = Symbolizer.symbolizeFrame(
249        ModuleName, {Offset, object::SectionedAddress::UndefSection});
250    if (!error(ResOrErr)) {
251      for (DILocal Local : *ResOrErr)
252        Printer << Local;
253      if (ResOrErr->empty())
254        outs() << "??\n";
255    }
256  } else if (ClPrintInlining) {
257    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
258        ModuleName, {Offset, object::SectionedAddress::UndefSection});
259    Printer << (error(ResOrErr) ? DIInliningInfo() : ResOrErr.get());
260  } else if (ClOutputStyle == DIPrinter::OutputStyle::GNU) {
261    // With ClPrintFunctions == FunctionNameKind::LinkageName (default)
262    // and ClUseSymbolTable == true (also default), Symbolizer.symbolizeCode()
263    // may override the name of an inlined function with the name of the topmost
264    // caller function in the inlining chain. This contradicts the existing
265    // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
266    // the topmost function, which suits our needs better.
267    auto ResOrErr = Symbolizer.symbolizeInlinedCode(
268        ModuleName, {Offset, object::SectionedAddress::UndefSection});
269    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get().getFrame(0));
270  } else {
271    auto ResOrErr = Symbolizer.symbolizeCode(
272        ModuleName, {Offset, object::SectionedAddress::UndefSection});
273    Printer << (error(ResOrErr) ? DILineInfo() : ResOrErr.get());
274  }
275  if (ClOutputStyle == DIPrinter::OutputStyle::LLVM)
276    outs() << "\n";
277}
278
279int main(int argc, char **argv) {
280  InitLLVM X(argc, argv);
281
282  bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
283
284  if (IsAddr2Line) {
285    ClDemangle.setInitialValue(false);
286    ClPrintFunctions.setInitialValue(FunctionNameKind::None);
287    ClPrintInlining.setInitialValue(false);
288    ClUntagAddresses.setInitialValue(false);
289    ClOutputStyle.setInitialValue(DIPrinter::OutputStyle::GNU);
290  }
291
292  llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
293  cl::ParseCommandLineOptions(
294      argc, argv, IsAddr2Line ? "llvm-addr2line\n" : "llvm-symbolizer\n",
295      /*Errs=*/nullptr,
296      IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" : "LLVM_SYMBOLIZER_OPTS");
297
298  // If both --demangle and --no-demangle are specified then pick the last one.
299  if (ClNoDemangle.getPosition() > ClDemangle.getPosition())
300    ClDemangle = !ClNoDemangle;
301
302  LLVMSymbolizer::Options Opts;
303  Opts.PrintFunctions = ClPrintFunctions;
304  Opts.UseSymbolTable = ClUseSymbolTable;
305  Opts.Demangle = ClDemangle;
306  Opts.RelativeAddresses = ClUseRelativeAddress;
307  Opts.UntagAddresses = ClUntagAddresses;
308  Opts.DefaultArch = ClDefaultArch;
309  Opts.FallbackDebugPath = ClFallbackDebugPath;
310  Opts.DWPName = ClDwpName;
311  Opts.DebugFileDirectory = ClDebugFileDirectory;
312
313  for (const auto &hint : ClDsymHint) {
314    if (sys::path::extension(hint) == ".dSYM") {
315      Opts.DsymHints.push_back(hint);
316    } else {
317      errs() << "Warning: invalid dSYM hint: \"" << hint <<
318                "\" (must have the '.dSYM' extension).\n";
319    }
320  }
321  LLVMSymbolizer Symbolizer(Opts);
322
323  DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
324                    ClPrettyPrint, ClPrintSourceContextLines, ClVerbose,
325                    ClBasenames, ClOutputStyle);
326
327  if (ClInputAddresses.empty()) {
328    const int kMaxInputStringLength = 1024;
329    char InputString[kMaxInputStringLength];
330
331    while (fgets(InputString, sizeof(InputString), stdin)) {
332      // Strip newline characters.
333      std::string StrippedInputString(InputString);
334      StrippedInputString.erase(
335          std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
336                         [](char c) { return c == '\r' || c == '\n'; }),
337          StrippedInputString.end());
338      symbolizeInput(StrippedInputString, Symbolizer, Printer);
339      outs().flush();
340    }
341  } else {
342    for (StringRef Address : ClInputAddresses)
343      symbolizeInput(Address, Symbolizer, Printer);
344  }
345
346  return 0;
347}
348