Symbolize.cpp revision 321369
1//===-- LLVMSymbolize.cpp -------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implementation for LLVM symbolization library. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/DebugInfo/Symbolize/Symbolize.h" 15 16#include "SymbolizableObjectFile.h" 17 18#include "llvm/ADT/STLExtras.h" 19#include "llvm/BinaryFormat/COFF.h" 20#include "llvm/Config/config.h" 21#include "llvm/DebugInfo/DWARF/DWARFContext.h" 22#include "llvm/DebugInfo/PDB/PDB.h" 23#include "llvm/DebugInfo/PDB/PDBContext.h" 24#include "llvm/Object/COFF.h" 25#include "llvm/Object/ELFObjectFile.h" 26#include "llvm/Object/MachO.h" 27#include "llvm/Object/MachOUniversal.h" 28#include "llvm/Support/Casting.h" 29#include "llvm/Support/Compression.h" 30#include "llvm/Support/DataExtractor.h" 31#include "llvm/Support/Errc.h" 32#include "llvm/Support/FileSystem.h" 33#include "llvm/Support/MemoryBuffer.h" 34#include "llvm/Support/Path.h" 35#include <algorithm> 36#include <cassert> 37#include <cstdlib> 38#include <cstring> 39 40#if defined(_MSC_VER) 41#include <Windows.h> 42 43// This must be included after windows.h. 44#include <DbgHelp.h> 45#pragma comment(lib, "dbghelp.lib") 46 47// Windows.h conflicts with our COFF header definitions. 48#ifdef IMAGE_FILE_MACHINE_I386 49#undef IMAGE_FILE_MACHINE_I386 50#endif 51#endif 52 53namespace llvm { 54namespace symbolize { 55 56Expected<DILineInfo> LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 57 uint64_t ModuleOffset) { 58 SymbolizableModule *Info; 59 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 60 Info = InfoOrErr.get(); 61 else 62 return InfoOrErr.takeError(); 63 64 // A null module means an error has already been reported. Return an empty 65 // result. 66 if (!Info) 67 return DILineInfo(); 68 69 // If the user is giving us relative addresses, add the preferred base of the 70 // object to the offset before we do the query. It's what DIContext expects. 71 if (Opts.RelativeAddresses) 72 ModuleOffset += Info->getModulePreferredBase(); 73 74 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, 75 Opts.UseSymbolTable); 76 if (Opts.Demangle) 77 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 78 return LineInfo; 79} 80 81Expected<DIInliningInfo> 82LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 83 uint64_t ModuleOffset) { 84 SymbolizableModule *Info; 85 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 86 Info = InfoOrErr.get(); 87 else 88 return InfoOrErr.takeError(); 89 90 // A null module means an error has already been reported. Return an empty 91 // result. 92 if (!Info) 93 return DIInliningInfo(); 94 95 // If the user is giving us relative addresses, add the preferred base of the 96 // object to the offset before we do the query. It's what DIContext expects. 97 if (Opts.RelativeAddresses) 98 ModuleOffset += Info->getModulePreferredBase(); 99 100 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 101 ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); 102 if (Opts.Demangle) { 103 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 104 auto *Frame = InlinedContext.getMutableFrame(i); 105 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 106 } 107 } 108 return InlinedContext; 109} 110 111Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 112 uint64_t ModuleOffset) { 113 SymbolizableModule *Info; 114 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 115 Info = InfoOrErr.get(); 116 else 117 return InfoOrErr.takeError(); 118 119 // A null module means an error has already been reported. Return an empty 120 // result. 121 if (!Info) 122 return DIGlobal(); 123 124 // If the user is giving us relative addresses, add the preferred base of 125 // the object to the offset before we do the query. It's what DIContext 126 // expects. 127 if (Opts.RelativeAddresses) 128 ModuleOffset += Info->getModulePreferredBase(); 129 130 DIGlobal Global = Info->symbolizeData(ModuleOffset); 131 if (Opts.Demangle) 132 Global.Name = DemangleName(Global.Name, Info); 133 return Global; 134} 135 136void LLVMSymbolizer::flush() { 137 ObjectForUBPathAndArch.clear(); 138 BinaryForPath.clear(); 139 ObjectPairForPathArch.clear(); 140 Modules.clear(); 141} 142 143namespace { 144 145// For Path="/path/to/foo" and Basename="foo" assume that debug info is in 146// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 147// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 148// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 149std::string getDarwinDWARFResourceForPath( 150 const std::string &Path, const std::string &Basename) { 151 SmallString<16> ResourceName = StringRef(Path); 152 if (sys::path::extension(Path) != ".dSYM") { 153 ResourceName += ".dSYM"; 154 } 155 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 156 sys::path::append(ResourceName, Basename); 157 return ResourceName.str(); 158} 159 160bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 161 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 162 MemoryBuffer::getFileOrSTDIN(Path); 163 if (!MB) 164 return false; 165 return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); 166} 167 168bool findDebugBinary(const std::string &OrigPath, 169 const std::string &DebuglinkName, uint32_t CRCHash, 170 std::string &Result) { 171 std::string OrigRealPath = OrigPath; 172#if defined(HAVE_REALPATH) 173 if (char *RP = realpath(OrigPath.c_str(), nullptr)) { 174 OrigRealPath = RP; 175 free(RP); 176 } 177#endif 178 SmallString<16> OrigDir(OrigRealPath); 179 llvm::sys::path::remove_filename(OrigDir); 180 SmallString<16> DebugPath = OrigDir; 181 // Try /path/to/original_binary/debuglink_name 182 llvm::sys::path::append(DebugPath, DebuglinkName); 183 if (checkFileCRC(DebugPath, CRCHash)) { 184 Result = DebugPath.str(); 185 return true; 186 } 187 // Try /path/to/original_binary/.debug/debuglink_name 188 DebugPath = OrigRealPath; 189 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 190 if (checkFileCRC(DebugPath, CRCHash)) { 191 Result = DebugPath.str(); 192 return true; 193 } 194 // Try /usr/lib/debug/path/to/original_binary/debuglink_name 195 DebugPath = "/usr/lib/debug"; 196 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 197 DebuglinkName); 198 if (checkFileCRC(DebugPath, CRCHash)) { 199 Result = DebugPath.str(); 200 return true; 201 } 202 return false; 203} 204 205bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 206 uint32_t &CRCHash) { 207 if (!Obj) 208 return false; 209 for (const SectionRef &Section : Obj->sections()) { 210 StringRef Name; 211 Section.getName(Name); 212 Name = Name.substr(Name.find_first_not_of("._")); 213 if (Name == "gnu_debuglink") { 214 StringRef Data; 215 Section.getContents(Data); 216 DataExtractor DE(Data, Obj->isLittleEndian(), 0); 217 uint32_t Offset = 0; 218 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 219 // 4-byte align the offset. 220 Offset = (Offset + 3) & ~0x3; 221 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 222 DebugName = DebugNameStr; 223 CRCHash = DE.getU32(&Offset); 224 return true; 225 } 226 } 227 break; 228 } 229 } 230 return false; 231} 232 233bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 234 const MachOObjectFile *Obj) { 235 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 236 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 237 if (dbg_uuid.empty() || bin_uuid.empty()) 238 return false; 239 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 240} 241 242} // end anonymous namespace 243 244ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 245 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 246 // On Darwin we may find DWARF in separate object file in 247 // resource directory. 248 std::vector<std::string> DsymPaths; 249 StringRef Filename = sys::path::filename(ExePath); 250 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 251 for (const auto &Path : Opts.DsymHints) { 252 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 253 } 254 for (const auto &Path : DsymPaths) { 255 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 256 if (!DbgObjOrErr) { 257 // Ignore errors, the file might not exist. 258 consumeError(DbgObjOrErr.takeError()); 259 continue; 260 } 261 ObjectFile *DbgObj = DbgObjOrErr.get(); 262 if (!DbgObj) 263 continue; 264 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 265 if (!MachDbgObj) 266 continue; 267 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 268 return DbgObj; 269 } 270 return nullptr; 271} 272 273ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 274 const ObjectFile *Obj, 275 const std::string &ArchName) { 276 std::string DebuglinkName; 277 uint32_t CRCHash; 278 std::string DebugBinaryPath; 279 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 280 return nullptr; 281 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 282 return nullptr; 283 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 284 if (!DbgObjOrErr) { 285 // Ignore errors, the file might not exist. 286 consumeError(DbgObjOrErr.takeError()); 287 return nullptr; 288 } 289 return DbgObjOrErr.get(); 290} 291 292Expected<LLVMSymbolizer::ObjectPair> 293LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 294 const std::string &ArchName) { 295 const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 296 if (I != ObjectPairForPathArch.end()) { 297 return I->second; 298 } 299 300 auto ObjOrErr = getOrCreateObject(Path, ArchName); 301 if (!ObjOrErr) { 302 ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName), 303 ObjectPair(nullptr, nullptr))); 304 return ObjOrErr.takeError(); 305 } 306 307 ObjectFile *Obj = ObjOrErr.get(); 308 assert(Obj != nullptr); 309 ObjectFile *DbgObj = nullptr; 310 311 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 312 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 313 if (!DbgObj) 314 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 315 if (!DbgObj) 316 DbgObj = Obj; 317 ObjectPair Res = std::make_pair(Obj, DbgObj); 318 ObjectPairForPathArch.insert( 319 std::make_pair(std::make_pair(Path, ArchName), Res)); 320 return Res; 321} 322 323Expected<ObjectFile *> 324LLVMSymbolizer::getOrCreateObject(const std::string &Path, 325 const std::string &ArchName) { 326 const auto &I = BinaryForPath.find(Path); 327 Binary *Bin = nullptr; 328 if (I == BinaryForPath.end()) { 329 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 330 if (!BinOrErr) { 331 BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>())); 332 return BinOrErr.takeError(); 333 } 334 Bin = BinOrErr->getBinary(); 335 BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get()))); 336 } else { 337 Bin = I->second.getBinary(); 338 } 339 340 if (!Bin) 341 return static_cast<ObjectFile *>(nullptr); 342 343 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 344 const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 345 if (I != ObjectForUBPathAndArch.end()) { 346 return I->second.get(); 347 } 348 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 349 UB->getObjectForArch(ArchName); 350 if (!ObjOrErr) { 351 ObjectForUBPathAndArch.insert(std::make_pair( 352 std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>())); 353 return ObjOrErr.takeError(); 354 } 355 ObjectFile *Res = ObjOrErr->get(); 356 ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName), 357 std::move(ObjOrErr.get()))); 358 return Res; 359 } 360 if (Bin->isObject()) { 361 return cast<ObjectFile>(Bin); 362 } 363 return errorCodeToError(object_error::arch_not_found); 364} 365 366Expected<SymbolizableModule *> 367LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { 368 const auto &I = Modules.find(ModuleName); 369 if (I != Modules.end()) { 370 return I->second.get(); 371 } 372 std::string BinaryName = ModuleName; 373 std::string ArchName = Opts.DefaultArch; 374 size_t ColonPos = ModuleName.find_last_of(':'); 375 // Verify that substring after colon form a valid arch name. 376 if (ColonPos != std::string::npos) { 377 std::string ArchStr = ModuleName.substr(ColonPos + 1); 378 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 379 BinaryName = ModuleName.substr(0, ColonPos); 380 ArchName = ArchStr; 381 } 382 } 383 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 384 if (!ObjectsOrErr) { 385 // Failed to find valid object file. 386 Modules.insert( 387 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 388 return ObjectsOrErr.takeError(); 389 } 390 ObjectPair Objects = ObjectsOrErr.get(); 391 392 std::unique_ptr<DIContext> Context; 393 // If this is a COFF object containing PDB info, use a PDBContext to 394 // symbolize. Otherwise, use DWARF. 395 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 396 const codeview::DebugInfo *DebugInfo; 397 StringRef PDBFileName; 398 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 399 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 400#if 0 401 using namespace pdb; 402 std::unique_ptr<IPDBSession> Session; 403 if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, 404 Objects.first->getFileName(), Session)) { 405 Modules.insert( 406 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 407 return std::move(Err); 408 } 409 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 410#else 411 return make_error<StringError>( 412 "PDB support not compiled in", 413 std::make_error_code(std::errc::not_supported)); 414#endif 415 } 416 } 417 if (!Context) 418 Context.reset(new DWARFContextInMemory(*Objects.second)); 419 assert(Context); 420 auto InfoOrErr = 421 SymbolizableObjectFile::create(Objects.first, std::move(Context)); 422 std::unique_ptr<SymbolizableModule> SymMod; 423 if (InfoOrErr) 424 SymMod = std::move(InfoOrErr.get()); 425 auto InsertResult = 426 Modules.insert(std::make_pair(ModuleName, std::move(SymMod))); 427 assert(InsertResult.second); 428 if (auto EC = InfoOrErr.getError()) 429 return errorCodeToError(EC); 430 return InsertResult.first->second.get(); 431} 432 433namespace { 434 435// Undo these various manglings for Win32 extern "C" functions: 436// cdecl - _foo 437// stdcall - _foo@12 438// fastcall - @foo@12 439// vectorcall - foo@@12 440// These are all different linkage names for 'foo'. 441StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 442 // Remove any '_' or '@' prefix. 443 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 444 if (Front == '_' || Front == '@') 445 SymbolName = SymbolName.drop_front(); 446 447 // Remove any '@[0-9]+' suffix. 448 if (Front != '?') { 449 size_t AtPos = SymbolName.rfind('@'); 450 if (AtPos != StringRef::npos && 451 std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), 452 [](char C) { return C >= '0' && C <= '9'; })) { 453 SymbolName = SymbolName.substr(0, AtPos); 454 } 455 } 456 457 // Remove any ending '@' for vectorcall. 458 if (SymbolName.endswith("@")) 459 SymbolName = SymbolName.drop_back(); 460 461 return SymbolName; 462} 463 464} // end anonymous namespace 465 466#if !defined(_MSC_VER) 467// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 468extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 469 size_t *length, int *status); 470#endif 471 472std::string 473LLVMSymbolizer::DemangleName(const std::string &Name, 474 const SymbolizableModule *DbiModuleDescriptor) { 475#if !defined(_MSC_VER) 476 // We can spoil names of symbols with C linkage, so use an heuristic 477 // approach to check if the name should be demangled. 478 if (Name.substr(0, 2) == "_Z") { 479 int status = 0; 480 char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); 481 if (status != 0) 482 return Name; 483 std::string Result = DemangledName; 484 free(DemangledName); 485 return Result; 486 } 487#else 488 if (!Name.empty() && Name.front() == '?') { 489 // Only do MSVC C++ demangling on symbols starting with '?'. 490 char DemangledName[1024] = {0}; 491 DWORD result = ::UnDecorateSymbolName( 492 Name.c_str(), DemangledName, 1023, 493 UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected 494 UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc 495 UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications 496 UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers 497 UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords 498 UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types 499 return (result == 0) ? Name : std::string(DemangledName); 500 } 501#endif 502 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 503 return std::string(demanglePE32ExternCFunc(Name)); 504 return Name; 505} 506 507} // namespace symbolize 508} // namespace llvm 509