Symbolize.cpp revision 327952
1//===-- LLVMSymbolize.cpp -------------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Implementation for LLVM symbolization library. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/DebugInfo/Symbolize/Symbolize.h" 15 16#include "SymbolizableObjectFile.h" 17 18#include "llvm/ADT/STLExtras.h" 19#include "llvm/BinaryFormat/COFF.h" 20#include "llvm/Config/config.h" 21#include "llvm/DebugInfo/DWARF/DWARFContext.h" 22#include "llvm/DebugInfo/PDB/PDB.h" 23#include "llvm/DebugInfo/PDB/PDBContext.h" 24#include "llvm/Object/COFF.h" 25#include "llvm/Object/MachO.h" 26#include "llvm/Object/MachOUniversal.h" 27#include "llvm/Support/Casting.h" 28#include "llvm/Support/Compression.h" 29#include "llvm/Support/DataExtractor.h" 30#include "llvm/Support/Errc.h" 31#include "llvm/Support/FileSystem.h" 32#include "llvm/Support/MemoryBuffer.h" 33#include "llvm/Support/Path.h" 34#include <algorithm> 35#include <cassert> 36#include <cstdlib> 37#include <cstring> 38 39#if defined(_MSC_VER) 40#include <Windows.h> 41 42// This must be included after windows.h. 43#include <DbgHelp.h> 44#pragma comment(lib, "dbghelp.lib") 45 46// Windows.h conflicts with our COFF header definitions. 47#ifdef IMAGE_FILE_MACHINE_I386 48#undef IMAGE_FILE_MACHINE_I386 49#endif 50#endif 51 52namespace llvm { 53namespace symbolize { 54 55Expected<DILineInfo> 56LLVMSymbolizer::symbolizeCode(const std::string &ModuleName, 57 uint64_t ModuleOffset, StringRef DWPName) { 58 SymbolizableModule *Info; 59 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName)) 60 Info = InfoOrErr.get(); 61 else 62 return InfoOrErr.takeError(); 63 64 // A null module means an error has already been reported. Return an empty 65 // result. 66 if (!Info) 67 return DILineInfo(); 68 69 // If the user is giving us relative addresses, add the preferred base of the 70 // object to the offset before we do the query. It's what DIContext expects. 71 if (Opts.RelativeAddresses) 72 ModuleOffset += Info->getModulePreferredBase(); 73 74 DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, 75 Opts.UseSymbolTable); 76 if (Opts.Demangle) 77 LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); 78 return LineInfo; 79} 80 81Expected<DIInliningInfo> 82LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, 83 uint64_t ModuleOffset, StringRef DWPName) { 84 SymbolizableModule *Info; 85 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName)) 86 Info = InfoOrErr.get(); 87 else 88 return InfoOrErr.takeError(); 89 90 // A null module means an error has already been reported. Return an empty 91 // result. 92 if (!Info) 93 return DIInliningInfo(); 94 95 // If the user is giving us relative addresses, add the preferred base of the 96 // object to the offset before we do the query. It's what DIContext expects. 97 if (Opts.RelativeAddresses) 98 ModuleOffset += Info->getModulePreferredBase(); 99 100 DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( 101 ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); 102 if (Opts.Demangle) { 103 for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { 104 auto *Frame = InlinedContext.getMutableFrame(i); 105 Frame->FunctionName = DemangleName(Frame->FunctionName, Info); 106 } 107 } 108 return InlinedContext; 109} 110 111Expected<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName, 112 uint64_t ModuleOffset) { 113 SymbolizableModule *Info; 114 if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName)) 115 Info = InfoOrErr.get(); 116 else 117 return InfoOrErr.takeError(); 118 119 // A null module means an error has already been reported. Return an empty 120 // result. 121 if (!Info) 122 return DIGlobal(); 123 124 // If the user is giving us relative addresses, add the preferred base of 125 // the object to the offset before we do the query. It's what DIContext 126 // expects. 127 if (Opts.RelativeAddresses) 128 ModuleOffset += Info->getModulePreferredBase(); 129 130 DIGlobal Global = Info->symbolizeData(ModuleOffset); 131 if (Opts.Demangle) 132 Global.Name = DemangleName(Global.Name, Info); 133 return Global; 134} 135 136void LLVMSymbolizer::flush() { 137 ObjectForUBPathAndArch.clear(); 138 BinaryForPath.clear(); 139 ObjectPairForPathArch.clear(); 140 Modules.clear(); 141} 142 143namespace { 144 145// For Path="/path/to/foo" and Basename="foo" assume that debug info is in 146// /path/to/foo.dSYM/Contents/Resources/DWARF/foo. 147// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in 148// /path/to/bar.dSYM/Contents/Resources/DWARF/foo. 149std::string getDarwinDWARFResourceForPath( 150 const std::string &Path, const std::string &Basename) { 151 SmallString<16> ResourceName = StringRef(Path); 152 if (sys::path::extension(Path) != ".dSYM") { 153 ResourceName += ".dSYM"; 154 } 155 sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); 156 sys::path::append(ResourceName, Basename); 157 return ResourceName.str(); 158} 159 160bool checkFileCRC(StringRef Path, uint32_t CRCHash) { 161 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 162 MemoryBuffer::getFileOrSTDIN(Path); 163 if (!MB) 164 return false; 165 return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer()); 166} 167 168bool findDebugBinary(const std::string &OrigPath, 169 const std::string &DebuglinkName, uint32_t CRCHash, 170 std::string &Result) { 171 std::string OrigRealPath = OrigPath; 172#if defined(HAVE_REALPATH) 173 if (char *RP = realpath(OrigPath.c_str(), nullptr)) { 174 OrigRealPath = RP; 175 free(RP); 176 } 177#endif 178 SmallString<16> OrigDir(OrigRealPath); 179 llvm::sys::path::remove_filename(OrigDir); 180 SmallString<16> DebugPath = OrigDir; 181 // Try /path/to/original_binary/debuglink_name 182 llvm::sys::path::append(DebugPath, DebuglinkName); 183 if (checkFileCRC(DebugPath, CRCHash)) { 184 Result = DebugPath.str(); 185 return true; 186 } 187 // Try /path/to/original_binary/.debug/debuglink_name 188 DebugPath = OrigRealPath; 189 llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); 190 if (checkFileCRC(DebugPath, CRCHash)) { 191 Result = DebugPath.str(); 192 return true; 193 } 194 // Try /usr/lib/debug/path/to/original_binary/debuglink_name 195 DebugPath = "/usr/lib/debug"; 196 llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), 197 DebuglinkName); 198 if (checkFileCRC(DebugPath, CRCHash)) { 199 Result = DebugPath.str(); 200 return true; 201 } 202 return false; 203} 204 205bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName, 206 uint32_t &CRCHash) { 207 if (!Obj) 208 return false; 209 for (const SectionRef &Section : Obj->sections()) { 210 StringRef Name; 211 Section.getName(Name); 212 Name = Name.substr(Name.find_first_not_of("._")); 213 if (Name == "gnu_debuglink") { 214 StringRef Data; 215 Section.getContents(Data); 216 DataExtractor DE(Data, Obj->isLittleEndian(), 0); 217 uint32_t Offset = 0; 218 if (const char *DebugNameStr = DE.getCStr(&Offset)) { 219 // 4-byte align the offset. 220 Offset = (Offset + 3) & ~0x3; 221 if (DE.isValidOffsetForDataOfSize(Offset, 4)) { 222 DebugName = DebugNameStr; 223 CRCHash = DE.getU32(&Offset); 224 return true; 225 } 226 } 227 break; 228 } 229 } 230 return false; 231} 232 233bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj, 234 const MachOObjectFile *Obj) { 235 ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid(); 236 ArrayRef<uint8_t> bin_uuid = Obj->getUuid(); 237 if (dbg_uuid.empty() || bin_uuid.empty()) 238 return false; 239 return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size()); 240} 241 242} // end anonymous namespace 243 244ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, 245 const MachOObjectFile *MachExeObj, const std::string &ArchName) { 246 // On Darwin we may find DWARF in separate object file in 247 // resource directory. 248 std::vector<std::string> DsymPaths; 249 StringRef Filename = sys::path::filename(ExePath); 250 DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); 251 for (const auto &Path : Opts.DsymHints) { 252 DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); 253 } 254 for (const auto &Path : DsymPaths) { 255 auto DbgObjOrErr = getOrCreateObject(Path, ArchName); 256 if (!DbgObjOrErr) { 257 // Ignore errors, the file might not exist. 258 consumeError(DbgObjOrErr.takeError()); 259 continue; 260 } 261 ObjectFile *DbgObj = DbgObjOrErr.get(); 262 if (!DbgObj) 263 continue; 264 const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj); 265 if (!MachDbgObj) 266 continue; 267 if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) 268 return DbgObj; 269 } 270 return nullptr; 271} 272 273ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path, 274 const ObjectFile *Obj, 275 const std::string &ArchName) { 276 std::string DebuglinkName; 277 uint32_t CRCHash; 278 std::string DebugBinaryPath; 279 if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash)) 280 return nullptr; 281 if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) 282 return nullptr; 283 auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); 284 if (!DbgObjOrErr) { 285 // Ignore errors, the file might not exist. 286 consumeError(DbgObjOrErr.takeError()); 287 return nullptr; 288 } 289 return DbgObjOrErr.get(); 290} 291 292Expected<LLVMSymbolizer::ObjectPair> 293LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, 294 const std::string &ArchName) { 295 const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); 296 if (I != ObjectPairForPathArch.end()) { 297 return I->second; 298 } 299 300 auto ObjOrErr = getOrCreateObject(Path, ArchName); 301 if (!ObjOrErr) { 302 ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName), 303 ObjectPair(nullptr, nullptr))); 304 return ObjOrErr.takeError(); 305 } 306 307 ObjectFile *Obj = ObjOrErr.get(); 308 assert(Obj != nullptr); 309 ObjectFile *DbgObj = nullptr; 310 311 if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj)) 312 DbgObj = lookUpDsymFile(Path, MachObj, ArchName); 313 if (!DbgObj) 314 DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName); 315 if (!DbgObj) 316 DbgObj = Obj; 317 ObjectPair Res = std::make_pair(Obj, DbgObj); 318 ObjectPairForPathArch.insert( 319 std::make_pair(std::make_pair(Path, ArchName), Res)); 320 return Res; 321} 322 323Expected<ObjectFile *> 324LLVMSymbolizer::getOrCreateObject(const std::string &Path, 325 const std::string &ArchName) { 326 const auto &I = BinaryForPath.find(Path); 327 Binary *Bin = nullptr; 328 if (I == BinaryForPath.end()) { 329 Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path); 330 if (!BinOrErr) { 331 BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>())); 332 return BinOrErr.takeError(); 333 } 334 Bin = BinOrErr->getBinary(); 335 BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get()))); 336 } else { 337 Bin = I->second.getBinary(); 338 } 339 340 if (!Bin) 341 return static_cast<ObjectFile *>(nullptr); 342 343 if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) { 344 const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName)); 345 if (I != ObjectForUBPathAndArch.end()) { 346 return I->second.get(); 347 } 348 Expected<std::unique_ptr<ObjectFile>> ObjOrErr = 349 UB->getObjectForArch(ArchName); 350 if (!ObjOrErr) { 351 ObjectForUBPathAndArch.insert(std::make_pair( 352 std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>())); 353 return ObjOrErr.takeError(); 354 } 355 ObjectFile *Res = ObjOrErr->get(); 356 ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName), 357 std::move(ObjOrErr.get()))); 358 return Res; 359 } 360 if (Bin->isObject()) { 361 return cast<ObjectFile>(Bin); 362 } 363 return errorCodeToError(object_error::arch_not_found); 364} 365 366Expected<SymbolizableModule *> 367LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName, 368 StringRef DWPName) { 369 const auto &I = Modules.find(ModuleName); 370 if (I != Modules.end()) { 371 return I->second.get(); 372 } 373 std::string BinaryName = ModuleName; 374 std::string ArchName = Opts.DefaultArch; 375 size_t ColonPos = ModuleName.find_last_of(':'); 376 // Verify that substring after colon form a valid arch name. 377 if (ColonPos != std::string::npos) { 378 std::string ArchStr = ModuleName.substr(ColonPos + 1); 379 if (Triple(ArchStr).getArch() != Triple::UnknownArch) { 380 BinaryName = ModuleName.substr(0, ColonPos); 381 ArchName = ArchStr; 382 } 383 } 384 auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); 385 if (!ObjectsOrErr) { 386 // Failed to find valid object file. 387 Modules.insert( 388 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 389 return ObjectsOrErr.takeError(); 390 } 391 ObjectPair Objects = ObjectsOrErr.get(); 392 393 std::unique_ptr<DIContext> Context; 394 // If this is a COFF object containing PDB info, use a PDBContext to 395 // symbolize. Otherwise, use DWARF. 396 if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) { 397 const codeview::DebugInfo *DebugInfo; 398 StringRef PDBFileName; 399 auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); 400 if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { 401#if 0 402 using namespace pdb; 403 std::unique_ptr<IPDBSession> Session; 404 if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, 405 Objects.first->getFileName(), Session)) { 406 Modules.insert( 407 std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>())); 408 return std::move(Err); 409 } 410 Context.reset(new PDBContext(*CoffObject, std::move(Session))); 411#else 412 return make_error<StringError>( 413 "PDB support not compiled in", 414 std::make_error_code(std::errc::not_supported)); 415#endif 416 } 417 } 418 if (!Context) 419 Context = DWARFContext::create(*Objects.second, nullptr, 420 DWARFContext::defaultErrorHandler, DWPName); 421 assert(Context); 422 auto InfoOrErr = 423 SymbolizableObjectFile::create(Objects.first, std::move(Context)); 424 std::unique_ptr<SymbolizableModule> SymMod; 425 if (InfoOrErr) 426 SymMod = std::move(InfoOrErr.get()); 427 auto InsertResult = 428 Modules.insert(std::make_pair(ModuleName, std::move(SymMod))); 429 assert(InsertResult.second); 430 if (auto EC = InfoOrErr.getError()) 431 return errorCodeToError(EC); 432 return InsertResult.first->second.get(); 433} 434 435namespace { 436 437// Undo these various manglings for Win32 extern "C" functions: 438// cdecl - _foo 439// stdcall - _foo@12 440// fastcall - @foo@12 441// vectorcall - foo@@12 442// These are all different linkage names for 'foo'. 443StringRef demanglePE32ExternCFunc(StringRef SymbolName) { 444 // Remove any '_' or '@' prefix. 445 char Front = SymbolName.empty() ? '\0' : SymbolName[0]; 446 if (Front == '_' || Front == '@') 447 SymbolName = SymbolName.drop_front(); 448 449 // Remove any '@[0-9]+' suffix. 450 if (Front != '?') { 451 size_t AtPos = SymbolName.rfind('@'); 452 if (AtPos != StringRef::npos && 453 std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(), 454 [](char C) { return C >= '0' && C <= '9'; })) { 455 SymbolName = SymbolName.substr(0, AtPos); 456 } 457 } 458 459 // Remove any ending '@' for vectorcall. 460 if (SymbolName.endswith("@")) 461 SymbolName = SymbolName.drop_back(); 462 463 return SymbolName; 464} 465 466} // end anonymous namespace 467 468#if !defined(_MSC_VER) 469// Assume that __cxa_demangle is provided by libcxxabi (except for Windows). 470extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer, 471 size_t *length, int *status); 472#endif 473 474std::string 475LLVMSymbolizer::DemangleName(const std::string &Name, 476 const SymbolizableModule *DbiModuleDescriptor) { 477#if !defined(_MSC_VER) 478 // We can spoil names of symbols with C linkage, so use an heuristic 479 // approach to check if the name should be demangled. 480 if (Name.substr(0, 2) == "_Z") { 481 int status = 0; 482 char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status); 483 if (status != 0) 484 return Name; 485 std::string Result = DemangledName; 486 free(DemangledName); 487 return Result; 488 } 489#else 490 if (!Name.empty() && Name.front() == '?') { 491 // Only do MSVC C++ demangling on symbols starting with '?'. 492 char DemangledName[1024] = {0}; 493 DWORD result = ::UnDecorateSymbolName( 494 Name.c_str(), DemangledName, 1023, 495 UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected 496 UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc 497 UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications 498 UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers 499 UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords 500 UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types 501 return (result == 0) ? Name : std::string(DemangledName); 502 } 503#endif 504 if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) 505 return std::string(demanglePE32ExternCFunc(Name)); 506 return Name; 507} 508 509} // namespace symbolize 510} // namespace llvm 511