1296465Sdelphij//===-- LLVMSymbolize.cpp -------------------------------------------------===//
2110010Smarkm//
3110010Smarkm//                     The LLVM Compiler Infrastructure
4142429Snectar//
5110010Smarkm// This file is distributed under the University of Illinois Open Source
6110010Smarkm// License. See LICENSE.TXT for details.
7110010Smarkm//
8110010Smarkm//===----------------------------------------------------------------------===//
9110010Smarkm//
10110010Smarkm// Implementation for LLVM symbolization library.
11110010Smarkm//
12110010Smarkm//===----------------------------------------------------------------------===//
13110010Smarkm
14110010Smarkm#include "LLVMSymbolize.h"
15110010Smarkm#include "llvm/ADT/STLExtras.h"
16110010Smarkm#include "llvm/Config/config.h"
17110010Smarkm#include "llvm/Object/MachO.h"
18110010Smarkm#include "llvm/Support/Casting.h"
19110010Smarkm#include "llvm/Support/Compression.h"
20215698Ssimon#include "llvm/Support/DataExtractor.h"
21215698Ssimon#include "llvm/Support/FileSystem.h"
22215698Ssimon#include "llvm/Support/MemoryBuffer.h"
23215698Ssimon#include "llvm/Support/Path.h"
24215698Ssimon
25110010Smarkm#include <sstream>
26110010Smarkm#include <stdlib.h>
27110010Smarkm
28110010Smarkmnamespace llvm {
29110010Smarkmnamespace symbolize {
30110010Smarkm
31110010Smarkmstatic bool error(error_code ec) {
32110010Smarkm  if (!ec)
33110010Smarkm    return false;
34110010Smarkm  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
35110010Smarkm  return true;
36110010Smarkm}
37110010Smarkm
38110010Smarkmstatic uint32_t
39110010SmarkmgetDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) {
40110010Smarkm  uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo |
41279265Sdelphij                   llvm::DILineInfoSpecifier::AbsoluteFilePath;
42279265Sdelphij  if (Opts.PrintFunctions)
43110010Smarkm    Flags |= llvm::DILineInfoSpecifier::FunctionName;
44110010Smarkm  return Flags;
45215698Ssimon}
46215698Ssimon
47215698Ssimonstatic void patchFunctionNameInDILineInfo(const std::string &NewFunctionName,
48215698Ssimon                                          DILineInfo &LineInfo) {
49142429Snectar  std::string FileName = LineInfo.getFileName();
50215698Ssimon  LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName),
51142429Snectar                        LineInfo.getLine(), LineInfo.getColumn());
52142429Snectar}
53279265Sdelphij
54279265SdelphijModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
55279265Sdelphij    : Module(Obj), DebugInfoContext(DICtx) {
56110010Smarkm  error_code ec;
57279265Sdelphij  for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols();
58279265Sdelphij       si != se; si.increment(ec)) {
59279265Sdelphij    if (error(ec))
60279265Sdelphij      return;
61279265Sdelphij    SymbolRef::Type SymbolType;
62279265Sdelphij    if (error(si->getType(SymbolType)))
63215698Ssimon      continue;
64279265Sdelphij    if (SymbolType != SymbolRef::ST_Function &&
65279265Sdelphij        SymbolType != SymbolRef::ST_Data)
66279265Sdelphij      continue;
67279265Sdelphij    uint64_t SymbolAddress;
68279265Sdelphij    if (error(si->getAddress(SymbolAddress)) ||
69215698Ssimon        SymbolAddress == UnknownAddressOrSize)
70279265Sdelphij      continue;
71110010Smarkm    uint64_t SymbolSize;
72110010Smarkm    // Getting symbol size is linear for Mach-O files, so assume that symbol
73110010Smarkm    // occupies the memory range up to the following symbol.
74110010Smarkm    if (isa<MachOObjectFile>(Obj))
75110010Smarkm      SymbolSize = 0;
76110010Smarkm    else if (error(si->getSize(SymbolSize)) ||
77110010Smarkm             SymbolSize == UnknownAddressOrSize)
78110010Smarkm      continue;
79110010Smarkm    StringRef SymbolName;
80110010Smarkm    if (error(si->getName(SymbolName)))
81110010Smarkm      continue;
82110010Smarkm    // Mach-O symbol table names have leading underscore, skip it.
83110010Smarkm    if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
84110010Smarkm      SymbolName = SymbolName.drop_front();
85110010Smarkm    // FIXME: If a function has alias, there are two entries in symbol table
86110010Smarkm    // with same address size. Make sure we choose the correct one.
87110010Smarkm    SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
88110010Smarkm    SymbolDesc SD = { SymbolAddress, SymbolSize };
89110010Smarkm    M.insert(std::make_pair(SD, SymbolName));
90110010Smarkm  }
91110010Smarkm}
92110010Smarkm
93110010Smarkmbool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
94110010Smarkm                                        std::string &Name, uint64_t &Addr,
95110010Smarkm                                        uint64_t &Size) const {
96110010Smarkm  const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects;
97110010Smarkm  if (M.empty())
98110010Smarkm    return false;
99110010Smarkm  SymbolDesc SD = { Address, Address };
100110010Smarkm  SymbolMapTy::const_iterator it = M.upper_bound(SD);
101110010Smarkm  if (it == M.begin())
102110010Smarkm    return false;
103110010Smarkm  --it;
104110010Smarkm  if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address)
105110010Smarkm    return false;
106110010Smarkm  Name = it->second.str();
107110010Smarkm  Addr = it->first.Addr;
108110010Smarkm  Size = it->first.Size;
109110010Smarkm  return true;
110110010Smarkm}
111110010Smarkm
112110010SmarkmDILineInfo ModuleInfo::symbolizeCode(
113110010Smarkm    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
114110010Smarkm  DILineInfo LineInfo;
115110010Smarkm  if (DebugInfoContext) {
116110010Smarkm    LineInfo = DebugInfoContext->getLineInfoForAddress(
117110010Smarkm        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
118110010Smarkm  }
119110010Smarkm  // Override function name from symbol table if necessary.
120110010Smarkm  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
121110010Smarkm    std::string FunctionName;
122110010Smarkm    uint64_t Start, Size;
123110010Smarkm    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
124110010Smarkm                               FunctionName, Start, Size)) {
125110010Smarkm      patchFunctionNameInDILineInfo(FunctionName, LineInfo);
126110010Smarkm    }
127110010Smarkm  }
128110010Smarkm  return LineInfo;
129110010Smarkm}
130110010Smarkm
131110010SmarkmDIInliningInfo ModuleInfo::symbolizeInlinedCode(
132110010Smarkm    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
133142429Snectar  DIInliningInfo InlinedContext;
134110010Smarkm  if (DebugInfoContext) {
135110655Snectar    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
136296465Sdelphij        ModuleOffset, getDILineInfoSpecifierFlags(Opts));
137215698Ssimon  }
138215698Ssimon  // Make sure there is at least one frame in context.
139215698Ssimon  if (InlinedContext.getNumberOfFrames() == 0) {
140215698Ssimon    InlinedContext.addFrame(DILineInfo());
141110010Smarkm  }
142142429Snectar  // Override the function name in lower frame with name from symbol table.
143110010Smarkm  if (Opts.PrintFunctions && Opts.UseSymbolTable) {
144110010Smarkm    DIInliningInfo PatchedInlinedContext;
145110010Smarkm    for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
146110010Smarkm      DILineInfo LineInfo = InlinedContext.getFrame(i);
147110010Smarkm      if (i == n - 1) {
148110010Smarkm        std::string FunctionName;
149110010Smarkm        uint64_t Start, Size;
150110010Smarkm        if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
151110010Smarkm                                   FunctionName, Start, Size)) {
152110010Smarkm          patchFunctionNameInDILineInfo(FunctionName, LineInfo);
153142429Snectar        }
154110010Smarkm      }
155111151Snectar      PatchedInlinedContext.addFrame(LineInfo);
156110010Smarkm    }
157110010Smarkm    InlinedContext = PatchedInlinedContext;
158110010Smarkm  }
159110010Smarkm  return InlinedContext;
160110010Smarkm}
161110010Smarkm
162142429Snectarbool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
163110010Smarkm                               uint64_t &Start, uint64_t &Size) const {
164279265Sdelphij  return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
165279265Sdelphij                                Size);
166110010Smarkm}
167110010Smarkm
168142429Snectarconst char LLVMSymbolizer::kBadString[] = "??";
169110010Smarkm
170110010Smarkmstd::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
171110010Smarkm                                          uint64_t ModuleOffset) {
172142429Snectar  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
173110010Smarkm  if (Info == 0)
174110010Smarkm    return printDILineInfo(DILineInfo());
175110010Smarkm  if (Opts.PrintInlining) {
176110010Smarkm    DIInliningInfo InlinedContext =
177142429Snectar        Info->symbolizeInlinedCode(ModuleOffset, Opts);
178110010Smarkm    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
179110010Smarkm    assert(FramesNum > 0);
180110010Smarkm    std::string Result;
181110010Smarkm    for (uint32_t i = 0; i < FramesNum; i++) {
182142429Snectar      DILineInfo LineInfo = InlinedContext.getFrame(i);
183110010Smarkm      Result += printDILineInfo(LineInfo);
184110010Smarkm    }
185142429Snectar    return Result;
186110010Smarkm  }
187110010Smarkm  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
188142429Snectar  return printDILineInfo(LineInfo);
189110010Smarkm}
190110010Smarkm
191110010Smarkmstd::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
192142429Snectar                                          uint64_t ModuleOffset) {
193110010Smarkm  std::string Name = kBadString;
194110010Smarkm  uint64_t Start = 0;
195110010Smarkm  uint64_t Size = 0;
196142429Snectar  if (Opts.UseSymbolTable) {
197142429Snectar    if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
198110010Smarkm      if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
199142429Snectar        Name = DemangleGlobalName(Name);
200110010Smarkm    }
201215698Ssimon  }
202110010Smarkm  std::stringstream ss;
203142429Snectar  ss << Name << "\n" << Start << " " << Size << "\n";
204110010Smarkm  return ss.str();
205110010Smarkm}
206110010Smarkm
207110010Smarkmvoid LLVMSymbolizer::flush() {
208142429Snectar  DeleteContainerSeconds(Modules);
209111151Snectar  DeleteContainerPointers(ParsedBinariesAndObjects);
210111151Snectar  BinaryForPath.clear();
211111151Snectar  ObjectFileForArch.clear();
212111151Snectar}
213111151Snectar
214110010Smarkmstatic std::string getDarwinDWARFResourceForPath(const std::string &Path) {
215110010Smarkm  StringRef Basename = sys::path::filename(Path);
216110010Smarkm  const std::string &DSymDirectory = Path + ".dSYM";
217110010Smarkm  SmallString<16> ResourceName = StringRef(DSymDirectory);
218110010Smarkm  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
219215698Ssimon  sys::path::append(ResourceName, Basename);
220215698Ssimon  return ResourceName.str();
221110010Smarkm}
222142429Snectar
223110010Smarkmstatic bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
224110010Smarkm  OwningPtr<MemoryBuffer> MB;
225110010Smarkm  if (MemoryBuffer::getFileOrSTDIN(Path, MB))
226110010Smarkm    return false;
227142429Snectar  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB->getBuffer());
228142429Snectar}
229
230static bool findDebugBinary(const std::string &OrigPath,
231                            const std::string &DebuglinkName, uint32_t CRCHash,
232                            std::string &Result) {
233  std::string OrigRealPath = OrigPath;
234#if defined(HAVE_REALPATH)
235  if (char *RP = realpath(OrigPath.c_str(), NULL)) {
236    OrigRealPath = RP;
237    free(RP);
238  }
239#endif
240  SmallString<16> OrigDir(OrigRealPath);
241  llvm::sys::path::remove_filename(OrigDir);
242  SmallString<16> DebugPath = OrigDir;
243  // Try /path/to/original_binary/debuglink_name
244  llvm::sys::path::append(DebugPath, DebuglinkName);
245  if (checkFileCRC(DebugPath, CRCHash)) {
246    Result = DebugPath.str();
247    return true;
248  }
249  // Try /path/to/original_binary/.debug/debuglink_name
250  DebugPath = OrigRealPath;
251  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
252  if (checkFileCRC(DebugPath, CRCHash)) {
253    Result = DebugPath.str();
254    return true;
255  }
256  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
257  DebugPath = "/usr/lib/debug";
258  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
259                          DebuglinkName);
260  if (checkFileCRC(DebugPath, CRCHash)) {
261    Result = DebugPath.str();
262    return true;
263  }
264  return false;
265}
266
267static bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName,
268                                    uint32_t &CRCHash) {
269  const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin);
270  if (!Obj)
271    return false;
272  error_code EC;
273  for (section_iterator I = Obj->begin_sections(), E = Obj->end_sections();
274       I != E; I.increment(EC)) {
275    StringRef Name;
276    I->getName(Name);
277    Name = Name.substr(Name.find_first_not_of("._"));
278    if (Name == "gnu_debuglink") {
279      StringRef Data;
280      I->getContents(Data);
281      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
282      uint32_t Offset = 0;
283      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
284        // 4-byte align the offset.
285        Offset = (Offset + 3) & ~0x3;
286        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
287          DebugName = DebugNameStr;
288          CRCHash = DE.getU32(&Offset);
289          return true;
290        }
291      }
292      break;
293    }
294  }
295  return false;
296}
297
298LLVMSymbolizer::BinaryPair
299LLVMSymbolizer::getOrCreateBinary(const std::string &Path) {
300  BinaryMapTy::iterator I = BinaryForPath.find(Path);
301  if (I != BinaryForPath.end())
302    return I->second;
303  Binary *Bin = 0;
304  Binary *DbgBin = 0;
305  OwningPtr<Binary> ParsedBinary;
306  OwningPtr<Binary> ParsedDbgBinary;
307  if (!error(createBinary(Path, ParsedBinary))) {
308    // Check if it's a universal binary.
309    Bin = ParsedBinary.take();
310    ParsedBinariesAndObjects.push_back(Bin);
311    if (Bin->isMachO() || Bin->isMachOUniversalBinary()) {
312      // On Darwin we may find DWARF in separate object file in
313      // resource directory.
314      const std::string &ResourcePath =
315          getDarwinDWARFResourceForPath(Path);
316      bool ResourceFileExists = false;
317      if (!sys::fs::exists(ResourcePath, ResourceFileExists) &&
318          ResourceFileExists &&
319          !error(createBinary(ResourcePath, ParsedDbgBinary))) {
320        DbgBin = ParsedDbgBinary.take();
321        ParsedBinariesAndObjects.push_back(DbgBin);
322      }
323    }
324    // Try to locate the debug binary using .gnu_debuglink section.
325    if (DbgBin == 0) {
326      std::string DebuglinkName;
327      uint32_t CRCHash;
328      std::string DebugBinaryPath;
329      if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) &&
330          findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath) &&
331          !error(createBinary(DebugBinaryPath, ParsedDbgBinary))) {
332        DbgBin = ParsedDbgBinary.take();
333        ParsedBinariesAndObjects.push_back(DbgBin);
334      }
335    }
336  }
337  if (DbgBin == 0)
338    DbgBin = Bin;
339  BinaryPair Res = std::make_pair(Bin, DbgBin);
340  BinaryForPath[Path] = Res;
341  return Res;
342}
343
344ObjectFile *
345LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) {
346  if (Bin == 0)
347    return 0;
348  ObjectFile *Res = 0;
349  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
350    ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find(
351        std::make_pair(UB, ArchName));
352    if (I != ObjectFileForArch.end())
353      return I->second;
354    OwningPtr<ObjectFile> ParsedObj;
355    if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) {
356      Res = ParsedObj.take();
357      ParsedBinariesAndObjects.push_back(Res);
358    }
359    ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
360  } else if (Bin->isObject()) {
361    Res = cast<ObjectFile>(Bin);
362  }
363  return Res;
364}
365
366ModuleInfo *
367LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
368  ModuleMapTy::iterator I = Modules.find(ModuleName);
369  if (I != Modules.end())
370    return I->second;
371  std::string BinaryName = ModuleName;
372  std::string ArchName = Opts.DefaultArch;
373  size_t ColonPos = ModuleName.find_last_of(':');
374  // Verify that substring after colon form a valid arch name.
375  if (ColonPos != std::string::npos) {
376    std::string ArchStr = ModuleName.substr(ColonPos + 1);
377    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
378      BinaryName = ModuleName.substr(0, ColonPos);
379      ArchName = ArchStr;
380    }
381  }
382  BinaryPair Binaries = getOrCreateBinary(BinaryName);
383  ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName);
384  ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName);
385
386  if (Obj == 0) {
387    // Failed to find valid object file.
388    Modules.insert(make_pair(ModuleName, (ModuleInfo *)0));
389    return 0;
390  }
391  DIContext *Context = DIContext::getDWARFContext(DbgObj);
392  assert(Context);
393  ModuleInfo *Info = new ModuleInfo(Obj, Context);
394  Modules.insert(make_pair(ModuleName, Info));
395  return Info;
396}
397
398std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
399  // By default, DILineInfo contains "<invalid>" for function/filename it
400  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
401  static const std::string kDILineInfoBadString = "<invalid>";
402  std::stringstream Result;
403  if (Opts.PrintFunctions) {
404    std::string FunctionName = LineInfo.getFunctionName();
405    if (FunctionName == kDILineInfoBadString)
406      FunctionName = kBadString;
407    else if (Opts.Demangle)
408      FunctionName = DemangleName(FunctionName);
409    Result << FunctionName << "\n";
410  }
411  std::string Filename = LineInfo.getFileName();
412  if (Filename == kDILineInfoBadString)
413    Filename = kBadString;
414  Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn()
415         << "\n";
416  return Result.str();
417}
418
419#if !defined(_MSC_VER)
420// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
421extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
422                                size_t *length, int *status);
423#endif
424
425std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
426#if !defined(_MSC_VER)
427  int status = 0;
428  char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status);
429  if (status != 0)
430    return Name;
431  std::string Result = DemangledName;
432  free(DemangledName);
433  return Result;
434#else
435  return Name;
436#endif
437}
438
439std::string LLVMSymbolizer::DemangleGlobalName(const std::string &Name) {
440  // We can spoil names of globals with C linkage, so use an heuristic
441  // approach to check if the name should be demangled.
442  return (Name.substr(0, 2) == "_Z") ? DemangleName(Name) : Name;
443}
444
445} // namespace symbolize
446} // namespace llvm
447