Symbolize.cpp revision 292941
1//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation for LLVM symbolization library.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/DebugInfo/Symbolize/Symbolize.h"
15
16#include "SymbolizableObjectFile.h"
17
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/Config/config.h"
20#include "llvm/DebugInfo/DWARF/DWARFContext.h"
21#include "llvm/DebugInfo/PDB/PDB.h"
22#include "llvm/DebugInfo/PDB/PDBContext.h"
23#include "llvm/Object/ELFObjectFile.h"
24#include "llvm/Object/MachO.h"
25#include "llvm/Object/MachOUniversal.h"
26#include "llvm/Support/COFF.h"
27#include "llvm/Support/Casting.h"
28#include "llvm/Support/Compression.h"
29#include "llvm/Support/DataExtractor.h"
30#include "llvm/Support/Errc.h"
31#include "llvm/Support/FileSystem.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/Path.h"
34#include <stdlib.h>
35
36#if defined(_MSC_VER)
37#include <Windows.h>
38#include <DbgHelp.h>
39#pragma comment(lib, "dbghelp.lib")
40
41// Windows.h conflicts with our COFF header definitions.
42#ifdef IMAGE_FILE_MACHINE_I386
43#undef IMAGE_FILE_MACHINE_I386
44#endif
45#endif
46
47namespace llvm {
48namespace symbolize {
49
50ErrorOr<DILineInfo> LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
51                                                  uint64_t ModuleOffset) {
52  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
53  if (auto EC = InfoOrErr.getError())
54    return EC;
55  SymbolizableModule *Info = InfoOrErr.get();
56
57  // If the user is giving us relative addresses, add the preferred base of the
58  // object to the offset before we do the query. It's what DIContext expects.
59  if (Opts.RelativeAddresses)
60    ModuleOffset += Info->getModulePreferredBase();
61
62  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
63                                            Opts.UseSymbolTable);
64  if (Opts.Demangle)
65    LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
66  return LineInfo;
67}
68
69ErrorOr<DIInliningInfo>
70LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
71                                     uint64_t ModuleOffset) {
72  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
73  if (auto EC = InfoOrErr.getError())
74    return EC;
75  SymbolizableModule *Info = InfoOrErr.get();
76
77  // If the user is giving us relative addresses, add the preferred base of the
78  // object to the offset before we do the query. It's what DIContext expects.
79  if (Opts.RelativeAddresses)
80    ModuleOffset += Info->getModulePreferredBase();
81
82  DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
83      ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
84  if (Opts.Demangle) {
85    for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
86      auto *Frame = InlinedContext.getMutableFrame(i);
87      Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
88    }
89  }
90  return InlinedContext;
91}
92
93ErrorOr<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
94                                                uint64_t ModuleOffset) {
95  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
96  if (auto EC = InfoOrErr.getError())
97    return EC;
98  SymbolizableModule *Info = InfoOrErr.get();
99
100  // If the user is giving us relative addresses, add the preferred base of
101  // the object to the offset before we do the query. It's what DIContext
102  // expects.
103  if (Opts.RelativeAddresses)
104    ModuleOffset += Info->getModulePreferredBase();
105
106  DIGlobal Global = Info->symbolizeData(ModuleOffset);
107  if (Opts.Demangle)
108    Global.Name = DemangleName(Global.Name, Info);
109  return Global;
110}
111
112void LLVMSymbolizer::flush() {
113  ObjectForUBPathAndArch.clear();
114  BinaryForPath.clear();
115  ObjectPairForPathArch.clear();
116  Modules.clear();
117}
118
119// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
120// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
121// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
122// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
123static
124std::string getDarwinDWARFResourceForPath(
125    const std::string &Path, const std::string &Basename) {
126  SmallString<16> ResourceName = StringRef(Path);
127  if (sys::path::extension(Path) != ".dSYM") {
128    ResourceName += ".dSYM";
129  }
130  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
131  sys::path::append(ResourceName, Basename);
132  return ResourceName.str();
133}
134
135static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
136  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
137      MemoryBuffer::getFileOrSTDIN(Path);
138  if (!MB)
139    return false;
140  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
141}
142
143static bool findDebugBinary(const std::string &OrigPath,
144                            const std::string &DebuglinkName, uint32_t CRCHash,
145                            std::string &Result) {
146  std::string OrigRealPath = OrigPath;
147#if defined(HAVE_REALPATH)
148  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
149    OrigRealPath = RP;
150    free(RP);
151  }
152#endif
153  SmallString<16> OrigDir(OrigRealPath);
154  llvm::sys::path::remove_filename(OrigDir);
155  SmallString<16> DebugPath = OrigDir;
156  // Try /path/to/original_binary/debuglink_name
157  llvm::sys::path::append(DebugPath, DebuglinkName);
158  if (checkFileCRC(DebugPath, CRCHash)) {
159    Result = DebugPath.str();
160    return true;
161  }
162  // Try /path/to/original_binary/.debug/debuglink_name
163  DebugPath = OrigRealPath;
164  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
165  if (checkFileCRC(DebugPath, CRCHash)) {
166    Result = DebugPath.str();
167    return true;
168  }
169  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
170  DebugPath = "/usr/lib/debug";
171  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
172                          DebuglinkName);
173  if (checkFileCRC(DebugPath, CRCHash)) {
174    Result = DebugPath.str();
175    return true;
176  }
177  return false;
178}
179
180static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
181                                    uint32_t &CRCHash) {
182  if (!Obj)
183    return false;
184  for (const SectionRef &Section : Obj->sections()) {
185    StringRef Name;
186    Section.getName(Name);
187    Name = Name.substr(Name.find_first_not_of("._"));
188    if (Name == "gnu_debuglink") {
189      StringRef Data;
190      Section.getContents(Data);
191      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
192      uint32_t Offset = 0;
193      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
194        // 4-byte align the offset.
195        Offset = (Offset + 3) & ~0x3;
196        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
197          DebugName = DebugNameStr;
198          CRCHash = DE.getU32(&Offset);
199          return true;
200        }
201      }
202      break;
203    }
204  }
205  return false;
206}
207
208static
209bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
210                             const MachOObjectFile *Obj) {
211  ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
212  ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
213  if (dbg_uuid.empty() || bin_uuid.empty())
214    return false;
215  return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
216}
217
218ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
219    const MachOObjectFile *MachExeObj, const std::string &ArchName) {
220  // On Darwin we may find DWARF in separate object file in
221  // resource directory.
222  std::vector<std::string> DsymPaths;
223  StringRef Filename = sys::path::filename(ExePath);
224  DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
225  for (const auto &Path : Opts.DsymHints) {
226    DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
227  }
228  for (const auto &Path : DsymPaths) {
229    auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
230    if (!DbgObjOrErr)
231      continue;
232    ObjectFile *DbgObj = DbgObjOrErr.get();
233    const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
234    if (!MachDbgObj)
235      continue;
236    if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
237      return DbgObj;
238  }
239  return nullptr;
240}
241
242ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
243                                                  const ObjectFile *Obj,
244                                                  const std::string &ArchName) {
245  std::string DebuglinkName;
246  uint32_t CRCHash;
247  std::string DebugBinaryPath;
248  if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
249    return nullptr;
250  if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
251    return nullptr;
252  auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
253  if (!DbgObjOrErr)
254    return nullptr;
255  return DbgObjOrErr.get();
256}
257
258ErrorOr<LLVMSymbolizer::ObjectPair>
259LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
260                                      const std::string &ArchName) {
261  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
262  if (I != ObjectPairForPathArch.end())
263    return I->second;
264
265  auto ObjOrErr = getOrCreateObject(Path, ArchName);
266  if (auto EC = ObjOrErr.getError()) {
267    ObjectPairForPathArch.insert(
268        std::make_pair(std::make_pair(Path, ArchName), EC));
269    return EC;
270  }
271
272  ObjectFile *Obj = ObjOrErr.get();
273  assert(Obj != nullptr);
274  ObjectFile *DbgObj = nullptr;
275
276  if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
277    DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
278  if (!DbgObj)
279    DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
280  if (!DbgObj)
281    DbgObj = Obj;
282  ObjectPair Res = std::make_pair(Obj, DbgObj);
283  ObjectPairForPathArch.insert(
284      std::make_pair(std::make_pair(Path, ArchName), Res));
285  return Res;
286}
287
288ErrorOr<ObjectFile *>
289LLVMSymbolizer::getOrCreateObject(const std::string &Path,
290                                  const std::string &ArchName) {
291  const auto &I = BinaryForPath.find(Path);
292  Binary *Bin = nullptr;
293  if (I == BinaryForPath.end()) {
294    ErrorOr<OwningBinary<Binary>> BinOrErr = createBinary(Path);
295    if (auto EC = BinOrErr.getError()) {
296      BinaryForPath.insert(std::make_pair(Path, EC));
297      return EC;
298    }
299    Bin = BinOrErr->getBinary();
300    BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
301  } else if (auto EC = I->second.getError()) {
302    return EC;
303  } else {
304    Bin = I->second->getBinary();
305  }
306
307  assert(Bin != nullptr);
308
309  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
310    const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
311    if (I != ObjectForUBPathAndArch.end()) {
312      if (auto EC = I->second.getError())
313        return EC;
314      return I->second->get();
315    }
316    ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
317        UB->getObjectForArch(ArchName);
318    if (auto EC = ObjOrErr.getError()) {
319      ObjectForUBPathAndArch.insert(
320          std::make_pair(std::make_pair(Path, ArchName), EC));
321      return EC;
322    }
323    ObjectFile *Res = ObjOrErr->get();
324    ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
325                                                 std::move(ObjOrErr.get())));
326    return Res;
327  }
328  if (Bin->isObject()) {
329    return cast<ObjectFile>(Bin);
330  }
331  return object_error::arch_not_found;
332}
333
334ErrorOr<SymbolizableModule *>
335LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
336  const auto &I = Modules.find(ModuleName);
337  if (I != Modules.end()) {
338    auto &InfoOrErr = I->second;
339    if (auto EC = InfoOrErr.getError())
340      return EC;
341    return InfoOrErr->get();
342  }
343  std::string BinaryName = ModuleName;
344  std::string ArchName = Opts.DefaultArch;
345  size_t ColonPos = ModuleName.find_last_of(':');
346  // Verify that substring after colon form a valid arch name.
347  if (ColonPos != std::string::npos) {
348    std::string ArchStr = ModuleName.substr(ColonPos + 1);
349    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
350      BinaryName = ModuleName.substr(0, ColonPos);
351      ArchName = ArchStr;
352    }
353  }
354  auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
355  if (auto EC = ObjectsOrErr.getError()) {
356    // Failed to find valid object file.
357    Modules.insert(std::make_pair(ModuleName, EC));
358    return EC;
359  }
360  ObjectPair Objects = ObjectsOrErr.get();
361
362  std::unique_ptr<DIContext> Context;
363  if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
364    // If this is a COFF object, assume it contains PDB debug information.  If
365    // we don't find any we will fall back to the DWARF case.
366    std::unique_ptr<IPDBSession> Session;
367    PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
368                                         Objects.first->getFileName(), Session);
369    if (Error == PDB_ErrorCode::Success) {
370      Context.reset(new PDBContext(*CoffObject, std::move(Session)));
371    }
372  }
373  if (!Context)
374    Context.reset(new DWARFContextInMemory(*Objects.second));
375  assert(Context);
376  auto InfoOrErr =
377      SymbolizableObjectFile::create(Objects.first, std::move(Context));
378  auto InsertResult =
379      Modules.insert(std::make_pair(ModuleName, std::move(InfoOrErr)));
380  assert(InsertResult.second);
381  if (auto EC = InsertResult.first->second.getError())
382    return EC;
383  return InsertResult.first->second->get();
384}
385
386// Undo these various manglings for Win32 extern "C" functions:
387// cdecl       - _foo
388// stdcall     - _foo@12
389// fastcall    - @foo@12
390// vectorcall  - foo@@12
391// These are all different linkage names for 'foo'.
392static StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
393  // Remove any '_' or '@' prefix.
394  char Front = SymbolName.empty() ? '\0' : SymbolName[0];
395  if (Front == '_' || Front == '@')
396    SymbolName = SymbolName.drop_front();
397
398  // Remove any '@[0-9]+' suffix.
399  if (Front != '?') {
400    size_t AtPos = SymbolName.rfind('@');
401    if (AtPos != StringRef::npos &&
402        std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
403                    [](char C) { return C >= '0' && C <= '9'; })) {
404      SymbolName = SymbolName.substr(0, AtPos);
405    }
406  }
407
408  // Remove any ending '@' for vectorcall.
409  if (SymbolName.endswith("@"))
410    SymbolName = SymbolName.drop_back();
411
412  return SymbolName;
413}
414
415#if !defined(_MSC_VER)
416// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
417extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
418                                size_t *length, int *status);
419#endif
420
421std::string LLVMSymbolizer::DemangleName(const std::string &Name,
422                                         const SymbolizableModule *ModInfo) {
423#if !defined(_MSC_VER)
424  // We can spoil names of symbols with C linkage, so use an heuristic
425  // approach to check if the name should be demangled.
426  if (Name.substr(0, 2) == "_Z") {
427    int status = 0;
428    char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
429    if (status != 0)
430      return Name;
431    std::string Result = DemangledName;
432    free(DemangledName);
433    return Result;
434  }
435#else
436  if (!Name.empty() && Name.front() == '?') {
437    // Only do MSVC C++ demangling on symbols starting with '?'.
438    char DemangledName[1024] = {0};
439    DWORD result = ::UnDecorateSymbolName(
440        Name.c_str(), DemangledName, 1023,
441        UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
442            UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
443            UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
444            UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
445            UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
446            UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
447    return (result == 0) ? Name : std::string(DemangledName);
448  }
449#endif
450  if (ModInfo && ModInfo->isWin32Module())
451    return std::string(demanglePE32ExternCFunc(Name));
452  return Name;
453}
454
455} // namespace symbolize
456} // namespace llvm
457