1249259Sdim//===-- LLVMSymbolize.cpp -------------------------------------------------===// 2249259Sdim// 3249259Sdim// The LLVM Compiler Infrastructure 4249259Sdim// 5249259Sdim// This file is distributed under the University of Illinois Open Source 6249259Sdim// License. See LICENSE.TXT for details. 7249259Sdim// 8249259Sdim//===----------------------------------------------------------------------===// 9249259Sdim// 10249259Sdim// Implementation for LLVM symbolization library. 11249259Sdim// 12249259Sdim//===----------------------------------------------------------------------===// 13249259Sdim 14249259Sdim#include "LLVMSymbolize.h" 15249259Sdim#include "llvm/ADT/STLExtras.h" 16263508Sdim#include "llvm/Config/config.h" 17249259Sdim#include "llvm/Object/MachO.h" 18249259Sdim#include "llvm/Support/Casting.h" 19263508Sdim#include "llvm/Support/Compression.h" 20263508Sdim#include "llvm/Support/DataExtractor.h" 21263508Sdim#include "llvm/Support/FileSystem.h" 22263508Sdim#include "llvm/Support/MemoryBuffer.h" 23249259Sdim#include "llvm/Support/Path.h" 24249259Sdim 25249259Sdim#include <sstream> 26263508Sdim#include <stdlib.h> 27249259Sdim 28249259Sdimnamespace llvm { 29249259Sdimnamespace symbolize { 30249259Sdim 31249259Sdimstatic bool error(error_code ec) { 32249259Sdim if (!ec) 33249259Sdim return false; 34249259Sdim errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n"; 35249259Sdim return true; 36249259Sdim} 37249259Sdim 38249259Sdimstatic uint32_t 39249259SdimgetDILineInfoSpecifierFlags(const LLVMSymbolizer::Options &Opts) { 40249259Sdim uint32_t Flags = llvm::DILineInfoSpecifier::FileLineInfo | 41249259Sdim llvm::DILineInfoSpecifier::AbsoluteFilePath; 42249259Sdim if (Opts.PrintFunctions) 43249259Sdim Flags |= llvm::DILineInfoSpecifier::FunctionName; 44249259Sdim return Flags; 45249259Sdim} 46249259Sdim 47249259Sdimstatic void patchFunctionNameInDILineInfo(const std::string &NewFunctionName, 48249259Sdim DILineInfo &LineInfo) { 49249259Sdim std::string FileName = LineInfo.getFileName(); 50249259Sdim LineInfo = DILineInfo(StringRef(FileName), StringRef(NewFunctionName), 51249259Sdim LineInfo.getLine(), LineInfo.getColumn()); 52249259Sdim} 53249259Sdim 54249259SdimModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx) 55249259Sdim : Module(Obj), DebugInfoContext(DICtx) { 56249259Sdim error_code ec; 57249259Sdim for (symbol_iterator si = Module->begin_symbols(), se = Module->end_symbols(); 58249259Sdim si != se; si.increment(ec)) { 59249259Sdim if (error(ec)) 60249259Sdim return; 61249259Sdim SymbolRef::Type SymbolType; 62249259Sdim if (error(si->getType(SymbolType))) 63249259Sdim continue; 64249259Sdim if (SymbolType != SymbolRef::ST_Function && 65249259Sdim SymbolType != SymbolRef::ST_Data) 66249259Sdim continue; 67249259Sdim uint64_t SymbolAddress; 68249259Sdim if (error(si->getAddress(SymbolAddress)) || 69249259Sdim SymbolAddress == UnknownAddressOrSize) 70249259Sdim continue; 71249259Sdim uint64_t SymbolSize; 72263508Sdim // Getting symbol size is linear for Mach-O files, so assume that symbol 73263508Sdim // occupies the memory range up to the following symbol. 74263508Sdim if (isa<MachOObjectFile>(Obj)) 75263508Sdim SymbolSize = 0; 76263508Sdim else if (error(si->getSize(SymbolSize)) || 77263508Sdim SymbolSize == UnknownAddressOrSize) 78249259Sdim continue; 79249259Sdim StringRef SymbolName; 80249259Sdim if (error(si->getName(SymbolName))) 81249259Sdim continue; 82263508Sdim // Mach-O symbol table names have leading underscore, skip it. 83263508Sdim if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_') 84263508Sdim SymbolName = SymbolName.drop_front(); 85249259Sdim // FIXME: If a function has alias, there are two entries in symbol table 86249259Sdim // with same address size. Make sure we choose the correct one. 87249259Sdim SymbolMapTy &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; 88263508Sdim SymbolDesc SD = { SymbolAddress, SymbolSize }; 89249259Sdim M.insert(std::make_pair(SD, SymbolName)); 90249259Sdim } 91249259Sdim} 92249259Sdim 93249259Sdimbool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, 94249259Sdim std::string &Name, uint64_t &Addr, 95249259Sdim uint64_t &Size) const { 96249259Sdim const SymbolMapTy &M = Type == SymbolRef::ST_Function ? Functions : Objects; 97263508Sdim if (M.empty()) 98249259Sdim return false; 99263508Sdim SymbolDesc SD = { Address, Address }; 100263508Sdim SymbolMapTy::const_iterator it = M.upper_bound(SD); 101263508Sdim if (it == M.begin()) 102249259Sdim return false; 103263508Sdim --it; 104263508Sdim if (it->first.Size != 0 && it->first.Addr + it->first.Size <= Address) 105263508Sdim return false; 106249259Sdim Name = it->second.str(); 107249259Sdim Addr = it->first.Addr; 108263508Sdim Size = it->first.Size; 109249259Sdim return true; 110249259Sdim} 111249259Sdim 112249259SdimDILineInfo ModuleInfo::symbolizeCode( 113249259Sdim uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 114249259Sdim DILineInfo LineInfo; 115249259Sdim if (DebugInfoContext) { 116249259Sdim LineInfo = DebugInfoContext->getLineInfoForAddress( 117249259Sdim ModuleOffset, getDILineInfoSpecifierFlags(Opts)); 118249259Sdim } 119249259Sdim // Override function name from symbol table if necessary. 120249259Sdim if (Opts.PrintFunctions && Opts.UseSymbolTable) { 121249259Sdim std::string FunctionName; 122249259Sdim uint64_t Start, Size; 123249259Sdim if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 124249259Sdim FunctionName, Start, Size)) { 125249259Sdim patchFunctionNameInDILineInfo(FunctionName, LineInfo); 126249259Sdim } 127249259Sdim } 128249259Sdim return LineInfo; 129249259Sdim} 130249259Sdim 131249259SdimDIInliningInfo ModuleInfo::symbolizeInlinedCode( 132249259Sdim uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const { 133249259Sdim DIInliningInfo InlinedContext; 134249259Sdim if (DebugInfoContext) { 135249259Sdim InlinedContext = DebugInfoContext->getInliningInfoForAddress( 136249259Sdim ModuleOffset, getDILineInfoSpecifierFlags(Opts)); 137249259Sdim } 138249259Sdim // Make sure there is at least one frame in context. 139249259Sdim if (InlinedContext.getNumberOfFrames() == 0) { 140249259Sdim InlinedContext.addFrame(DILineInfo()); 141249259Sdim } 142249259Sdim // Override the function name in lower frame with name from symbol table. 143249259Sdim if (Opts.PrintFunctions && Opts.UseSymbolTable) { 144249259Sdim DIInliningInfo PatchedInlinedContext; 145249259Sdim for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 146249259Sdim DILineInfo LineInfo = InlinedContext.getFrame(i); 147249259Sdim if (i == n - 1) { 148249259Sdim std::string FunctionName; 149249259Sdim uint64_t Start, Size; 150249259Sdim if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset, 151249259Sdim FunctionName, Start, Size)) { 152249259Sdim patchFunctionNameInDILineInfo(FunctionName, LineInfo); 153249259Sdim } 154249259Sdim } 155249259Sdim PatchedInlinedContext.addFrame(LineInfo); 156249259Sdim } 157249259Sdim InlinedContext = PatchedInlinedContext; 158249259Sdim } 159249259Sdim return InlinedContext; 160249259Sdim} 161249259Sdim 162249259Sdimbool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name, 163249259Sdim uint64_t &Start, uint64_t &Size) const { 164249259Sdim return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start, 165249259Sdim Size); 166249259Sdim} 167249259Sdim 168249259Sdimconst char LLVMSymbolizer::kBadString[] = "??"; 169249259Sdim 170249259Sdimstd::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 171249259Sdim uint64_t ModuleOffset) { 172249259Sdim ModuleInfo *Info = getOrCreateModuleInfo(ModuleName); 173249259Sdim if (Info == 0) 174249259Sdim return printDILineInfo(DILineInfo()); 175249259Sdim if (Opts.PrintInlining) { 176249259Sdim DIInliningInfo InlinedContext = 177249259Sdim Info->symbolizeInlinedCode(ModuleOffset, Opts); 178249259Sdim uint32_t FramesNum = InlinedContext.getNumberOfFrames(); 179249259Sdim assert(FramesNum > 0); 180249259Sdim std::string Result; 181249259Sdim for (uint32_t i = 0; i < FramesNum; i++) { 182249259Sdim DILineInfo LineInfo = InlinedContext.getFrame(i); 183249259Sdim Result += printDILineInfo(LineInfo); 184249259Sdim } 185249259Sdim return Result; 186249259Sdim } 187249259Sdim DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts); 188249259Sdim return printDILineInfo(LineInfo); 189249259Sdim} 190249259Sdim 191249259Sdimstd::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 192249259Sdim uint64_t ModuleOffset) { 193249259Sdim std::string Name = kBadString; 194249259Sdim uint64_t Start = 0; 195249259Sdim uint64_t Size = 0; 196249259Sdim if (Opts.UseSymbolTable) { 197249259Sdim if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) { 198263508Sdim if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle) 199263508Sdim Name = DemangleGlobalName(Name); 200249259Sdim } 201249259Sdim } 202249259Sdim std::stringstream ss; 203249259Sdim ss << Name << "\n" << Start << " " << Size << "\n"; 204249259Sdim return ss.str(); 205249259Sdim} 206249259Sdim 207249259Sdimvoid LLVMSymbolizer::flush() { 208249259Sdim DeleteContainerSeconds(Modules); 209263508Sdim DeleteContainerPointers(ParsedBinariesAndObjects); 210263508Sdim BinaryForPath.clear(); 211263508Sdim ObjectFileForArch.clear(); 212249259Sdim} 213249259Sdim 214263508Sdimstatic std::string getDarwinDWARFResourceForPath(const std::string &Path) { 215249259Sdim StringRef Basename = sys::path::filename(Path); 216249259Sdim const std::string &DSymDirectory = Path + ".dSYM"; 217249259Sdim SmallString<16> ResourceName = StringRef(DSymDirectory); 218249259Sdim sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 219249259Sdim sys::path::append(ResourceName, Basename); 220249259Sdim return ResourceName.str(); 221249259Sdim} 222249259Sdim 223263508Sdimstatic bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 224263508Sdim OwningPtr<MemoryBuffer> MB; 225263508Sdim if (MemoryBuffer::getFileOrSTDIN(Path, MB)) 226263508Sdim return false; 227263508Sdim return !zlib::isAvailable() || CRCHash == zlib::crc32(MB->getBuffer()); 228263508Sdim} 229263508Sdim 230263508Sdimstatic bool findDebugBinary(const std::string &OrigPath, 231263508Sdim const std::string &DebuglinkName, uint32_t CRCHash, 232263508Sdim std::string &Result) { 233263508Sdim std::string OrigRealPath = OrigPath; 234263508Sdim#if defined(HAVE_REALPATH) 235263508Sdim if (char *RP = realpath(OrigPath.c_str(), NULL)) { 236263508Sdim OrigRealPath = RP; 237263508Sdim free(RP); 238263508Sdim } 239263508Sdim#endif 240263508Sdim SmallString<16> OrigDir(OrigRealPath); 241263508Sdim llvm::sys::path::remove_filename(OrigDir); 242263508Sdim SmallString<16> DebugPath = OrigDir; 243263508Sdim // Try /path/to/original_binary/debuglink_name 244263508Sdim llvm::sys::path::append(DebugPath, DebuglinkName); 245263508Sdim if (checkFileCRC(DebugPath, CRCHash)) { 246263508Sdim Result = DebugPath.str(); 247263508Sdim return true; 248263508Sdim } 249263508Sdim // Try /path/to/original_binary/.debug/debuglink_name 250263508Sdim DebugPath = OrigRealPath; 251263508Sdim llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 252263508Sdim if (checkFileCRC(DebugPath, CRCHash)) { 253263508Sdim Result = DebugPath.str(); 254263508Sdim return true; 255263508Sdim } 256263508Sdim // Try /usr/lib/debug/path/to/original_binary/debuglink_name 257263508Sdim DebugPath = "/usr/lib/debug"; 258263508Sdim llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 259263508Sdim DebuglinkName); 260263508Sdim if (checkFileCRC(DebugPath, CRCHash)) { 261263508Sdim Result = DebugPath.str(); 262263508Sdim return true; 263263508Sdim } 264263508Sdim return false; 265263508Sdim} 266263508Sdim 267263508Sdimstatic bool getGNUDebuglinkContents(const Binary *Bin, std::string &DebugName, 268263508Sdim uint32_t &CRCHash) { 269263508Sdim const ObjectFile *Obj = dyn_cast<ObjectFile>(Bin); 270263508Sdim if (!Obj) 271263508Sdim return false; 272263508Sdim error_code EC; 273263508Sdim for (section_iterator I = Obj->begin_sections(), E = Obj->end_sections(); 274263508Sdim I != E; I.increment(EC)) { 275263508Sdim StringRef Name; 276263508Sdim I->getName(Name); 277263508Sdim Name = Name.substr(Name.find_first_not_of("._")); 278263508Sdim if (Name == "gnu_debuglink") { 279263508Sdim StringRef Data; 280263508Sdim I->getContents(Data); 281263508Sdim DataExtractor DE(Data, Obj->isLittleEndian(), 0); 282263508Sdim uint32_t Offset = 0; 283263508Sdim if (const char *DebugNameStr = DE.getCStr(&Offset)) { 284263508Sdim // 4-byte align the offset. 285263508Sdim Offset = (Offset + 3) & ~0x3; 286263508Sdim if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 287263508Sdim DebugName = DebugNameStr; 288263508Sdim CRCHash = DE.getU32(&Offset); 289263508Sdim return true; 290263508Sdim } 291263508Sdim } 292263508Sdim break; 293263508Sdim } 294263508Sdim } 295263508Sdim return false; 296263508Sdim} 297263508Sdim 298263508SdimLLVMSymbolizer::BinaryPair 299263508SdimLLVMSymbolizer::getOrCreateBinary(const std::string &Path) { 300263508Sdim BinaryMapTy::iterator I = BinaryForPath.find(Path); 301263508Sdim if (I != BinaryForPath.end()) 302263508Sdim return I->second; 303263508Sdim Binary *Bin = 0; 304263508Sdim Binary *DbgBin = 0; 305263508Sdim OwningPtr<Binary> ParsedBinary; 306263508Sdim OwningPtr<Binary> ParsedDbgBinary; 307263508Sdim if (!error(createBinary(Path, ParsedBinary))) { 308263508Sdim // Check if it's a universal binary. 309263508Sdim Bin = ParsedBinary.take(); 310263508Sdim ParsedBinariesAndObjects.push_back(Bin); 311263508Sdim if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { 312263508Sdim // On Darwin we may find DWARF in separate object file in 313263508Sdim // resource directory. 314263508Sdim const std::string &ResourcePath = 315263508Sdim getDarwinDWARFResourceForPath(Path); 316263508Sdim bool ResourceFileExists = false; 317263508Sdim if (!sys::fs::exists(ResourcePath, ResourceFileExists) && 318263508Sdim ResourceFileExists && 319263508Sdim !error(createBinary(ResourcePath, ParsedDbgBinary))) { 320263508Sdim DbgBin = ParsedDbgBinary.take(); 321263508Sdim ParsedBinariesAndObjects.push_back(DbgBin); 322263508Sdim } 323263508Sdim } 324263508Sdim // Try to locate the debug binary using .gnu_debuglink section. 325263508Sdim if (DbgBin == 0) { 326263508Sdim std::string DebuglinkName; 327263508Sdim uint32_t CRCHash; 328263508Sdim std::string DebugBinaryPath; 329263508Sdim if (getGNUDebuglinkContents(Bin, DebuglinkName, CRCHash) && 330263508Sdim findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath) && 331263508Sdim !error(createBinary(DebugBinaryPath, ParsedDbgBinary))) { 332263508Sdim DbgBin = ParsedDbgBinary.take(); 333263508Sdim ParsedBinariesAndObjects.push_back(DbgBin); 334263508Sdim } 335263508Sdim } 336263508Sdim } 337263508Sdim if (DbgBin == 0) 338263508Sdim DbgBin = Bin; 339263508Sdim BinaryPair Res = std::make_pair(Bin, DbgBin); 340263508Sdim BinaryForPath[Path] = Res; 341263508Sdim return Res; 342263508Sdim} 343263508Sdim 344263508SdimObjectFile * 345263508SdimLLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { 346263508Sdim if (Bin == 0) 347263508Sdim return 0; 348263508Sdim ObjectFile *Res = 0; 349263508Sdim if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) { 350263508Sdim ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( 351263508Sdim std::make_pair(UB, ArchName)); 352263508Sdim if (I != ObjectFileForArch.end()) 353263508Sdim return I->second; 354263508Sdim OwningPtr<ObjectFile> ParsedObj; 355263508Sdim if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) { 356263508Sdim Res = ParsedObj.take(); 357263508Sdim ParsedBinariesAndObjects.push_back(Res); 358263508Sdim } 359263508Sdim ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; 360263508Sdim } else if (Bin->isObject()) { 361263508Sdim Res = cast<ObjectFile>(Bin); 362263508Sdim } 363263508Sdim return Res; 364263508Sdim} 365263508Sdim 366249259SdimModuleInfo * 367249259SdimLLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 368249259Sdim ModuleMapTy::iterator I = Modules.find(ModuleName); 369249259Sdim if (I != Modules.end()) 370249259Sdim return I->second; 371263508Sdim std::string BinaryName = ModuleName; 372263508Sdim std::string ArchName = Opts.DefaultArch; 373263508Sdim size_t ColonPos = ModuleName.find_last_of(':'); 374263508Sdim // Verify that substring after colon form a valid arch name. 375263508Sdim if (ColonPos != std::string::npos) { 376263508Sdim std::string ArchStr = ModuleName.substr(ColonPos + 1); 377263508Sdim if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 378263508Sdim BinaryName = ModuleName.substr(0, ColonPos); 379263508Sdim ArchName = ArchStr; 380263508Sdim } 381263508Sdim } 382263508Sdim BinaryPair Binaries = getOrCreateBinary(BinaryName); 383263508Sdim ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); 384263508Sdim ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); 385249259Sdim 386249259Sdim if (Obj == 0) { 387263508Sdim // Failed to find valid object file. 388249259Sdim Modules.insert(make_pair(ModuleName, (ModuleInfo *)0)); 389249259Sdim return 0; 390249259Sdim } 391263508Sdim DIContext *Context = DIContext::getDWARFContext(DbgObj); 392263508Sdim assert(Context); 393249259Sdim ModuleInfo *Info = new ModuleInfo(Obj, Context); 394249259Sdim Modules.insert(make_pair(ModuleName, Info)); 395249259Sdim return Info; 396249259Sdim} 397249259Sdim 398249259Sdimstd::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const { 399249259Sdim // By default, DILineInfo contains "<invalid>" for function/filename it 400249259Sdim // cannot fetch. We replace it to "??" to make our output closer to addr2line. 401249259Sdim static const std::string kDILineInfoBadString = "<invalid>"; 402249259Sdim std::stringstream Result; 403249259Sdim if (Opts.PrintFunctions) { 404249259Sdim std::string FunctionName = LineInfo.getFunctionName(); 405249259Sdim if (FunctionName == kDILineInfoBadString) 406249259Sdim FunctionName = kBadString; 407263508Sdim else if (Opts.Demangle) 408263508Sdim FunctionName = DemangleName(FunctionName); 409249259Sdim Result << FunctionName << "\n"; 410249259Sdim } 411249259Sdim std::string Filename = LineInfo.getFileName(); 412249259Sdim if (Filename == kDILineInfoBadString) 413249259Sdim Filename = kBadString; 414249259Sdim Result << Filename << ":" << LineInfo.getLine() << ":" << LineInfo.getColumn() 415249259Sdim << "\n"; 416249259Sdim return Result.str(); 417249259Sdim} 418249259Sdim 419249259Sdim#if !defined(_MSC_VER) 420249259Sdim// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 421249259Sdimextern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 422249259Sdim size_t *length, int *status); 423249259Sdim#endif 424249259Sdim 425263508Sdimstd::string LLVMSymbolizer::DemangleName(const std::string &Name) { 426249259Sdim#if !defined(_MSC_VER) 427249259Sdim int status = 0; 428249259Sdim char *DemangledName = __cxa_demangle(Name.c_str(), 0, 0, &status); 429249259Sdim if (status != 0) 430263508Sdim return Name; 431263508Sdim std::string Result = DemangledName; 432249259Sdim free(DemangledName); 433263508Sdim return Result; 434263508Sdim#else 435263508Sdim return Name; 436249259Sdim#endif 437249259Sdim} 438249259Sdim 439263508Sdimstd::string LLVMSymbolizer::DemangleGlobalName(const std::string &Name) { 440263508Sdim // We can spoil names of globals with C linkage, so use an heuristic 441263508Sdim // approach to check if the name should be demangled. 442263508Sdim return (Name.substr(0, 2) == "_Z") ? DemangleName(Name) : Name; 443263508Sdim} 444263508Sdim 445249259Sdim} // namespace symbolize 446249259Sdim} // namespace llvm 447