InputFile.cpp revision 360784
1//===- InputFile.cpp ------------------------------------------ *- C++ --*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "InputFile.h" 10 11#include "FormatUtil.h" 12#include "LinePrinter.h" 13 14#include "llvm/BinaryFormat/Magic.h" 15#include "llvm/DebugInfo/CodeView/CodeView.h" 16#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" 17#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" 18#include "llvm/DebugInfo/PDB/Native/DbiStream.h" 19#include "llvm/DebugInfo/PDB/Native/NativeSession.h" 20#include "llvm/DebugInfo/PDB/Native/PDBFile.h" 21#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" 22#include "llvm/DebugInfo/PDB/Native/RawError.h" 23#include "llvm/DebugInfo/PDB/Native/TpiStream.h" 24#include "llvm/DebugInfo/PDB/PDB.h" 25#include "llvm/Object/COFF.h" 26#include "llvm/Support/FileSystem.h" 27#include "llvm/Support/FormatVariadic.h" 28 29using namespace llvm; 30using namespace llvm::codeview; 31using namespace llvm::object; 32using namespace llvm::pdb; 33 34InputFile::InputFile() {} 35InputFile::~InputFile() {} 36 37static Expected<ModuleDebugStreamRef> 38getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) { 39 ExitOnError Err("Unexpected error: "); 40 41 auto &Dbi = Err(File.getPDBDbiStream()); 42 const auto &Modules = Dbi.modules(); 43 if (Index >= Modules.getModuleCount()) 44 return make_error<RawError>(raw_error_code::index_out_of_bounds, 45 "Invalid module index"); 46 47 auto Modi = Modules.getModuleDescriptor(Index); 48 49 ModuleName = Modi.getModuleName(); 50 51 uint16_t ModiStream = Modi.getModuleStreamIndex(); 52 if (ModiStream == kInvalidStreamIndex) 53 return make_error<RawError>(raw_error_code::no_stream, 54 "Module stream not present"); 55 56 auto ModStreamData = File.createIndexedStream(ModiStream); 57 58 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); 59 if (auto EC = ModS.reload()) 60 return make_error<RawError>(raw_error_code::corrupt_file, 61 "Invalid module stream"); 62 63 return std::move(ModS); 64} 65 66static inline bool isCodeViewDebugSubsection(object::SectionRef Section, 67 StringRef Name, 68 BinaryStreamReader &Reader) { 69 if (Expected<StringRef> NameOrErr = Section.getName()) { 70 if (*NameOrErr != Name) 71 return false; 72 } else { 73 consumeError(NameOrErr.takeError()); 74 return false; 75 } 76 77 Expected<StringRef> ContentsOrErr = Section.getContents(); 78 if (!ContentsOrErr) { 79 consumeError(ContentsOrErr.takeError()); 80 return false; 81 } 82 83 Reader = BinaryStreamReader(*ContentsOrErr, support::little); 84 uint32_t Magic; 85 if (Reader.bytesRemaining() < sizeof(uint32_t)) 86 return false; 87 cantFail(Reader.readInteger(Magic)); 88 if (Magic != COFF::DEBUG_SECTION_MAGIC) 89 return false; 90 return true; 91} 92 93static inline bool isDebugSSection(object::SectionRef Section, 94 DebugSubsectionArray &Subsections) { 95 BinaryStreamReader Reader; 96 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) 97 return false; 98 99 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); 100 return true; 101} 102 103static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { 104 BinaryStreamReader Reader; 105 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && 106 !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) 107 return false; 108 cantFail(Reader.readArray(Types, Reader.bytesRemaining())); 109 return true; 110} 111 112static std::string formatChecksumKind(FileChecksumKind Kind) { 113 switch (Kind) { 114 RETURN_CASE(FileChecksumKind, None, "None"); 115 RETURN_CASE(FileChecksumKind, MD5, "MD5"); 116 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); 117 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); 118 } 119 return formatUnknownEnum(Kind); 120} 121 122template <typename... Args> 123static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) { 124 if (Append) 125 Printer.format(std::forward<Args>(args)...); 126 else 127 Printer.formatLine(std::forward<Args>(args)...); 128} 129 130SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { 131 if (!File) 132 return; 133 134 if (File->isPdb()) 135 initializeForPdb(GroupIndex); 136 else { 137 Name = ".debug$S"; 138 uint32_t I = 0; 139 for (const auto &S : File->obj().sections()) { 140 DebugSubsectionArray SS; 141 if (!isDebugSSection(S, SS)) 142 continue; 143 144 if (!SC.hasChecksums() || !SC.hasStrings()) 145 SC.initialize(SS); 146 147 if (I == GroupIndex) 148 Subsections = SS; 149 150 if (SC.hasChecksums() && SC.hasStrings()) 151 break; 152 } 153 rebuildChecksumMap(); 154 } 155} 156 157StringRef SymbolGroup::name() const { return Name; } 158 159void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { 160 Subsections = SS; 161} 162 163void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } 164 165void SymbolGroup::initializeForPdb(uint32_t Modi) { 166 assert(File && File->isPdb()); 167 168 // PDB always uses the same string table, but each module has its own 169 // checksums. So we only set the strings if they're not already set. 170 if (!SC.hasStrings()) { 171 auto StringTable = File->pdb().getStringTable(); 172 if (StringTable) 173 SC.setStrings(StringTable->getStringTable()); 174 else 175 consumeError(StringTable.takeError()); 176 } 177 178 SC.resetChecksums(); 179 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); 180 if (!MDS) { 181 consumeError(MDS.takeError()); 182 return; 183 } 184 185 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS)); 186 Subsections = DebugStream->getSubsectionsArray(); 187 SC.initialize(Subsections); 188 rebuildChecksumMap(); 189} 190 191void SymbolGroup::rebuildChecksumMap() { 192 if (!SC.hasChecksums()) 193 return; 194 195 for (const auto &Entry : SC.checksums()) { 196 auto S = SC.strings().getString(Entry.FileNameOffset); 197 if (!S) 198 continue; 199 ChecksumsByFile[*S] = Entry; 200 } 201} 202 203const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { 204 assert(File && File->isPdb() && DebugStream); 205 return *DebugStream; 206} 207 208Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const { 209 return SC.strings().getString(Offset); 210} 211 212void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, 213 bool Append) const { 214 auto FC = ChecksumsByFile.find(File); 215 if (FC == ChecksumsByFile.end()) { 216 formatInternal(Printer, Append, "- (no checksum) {0}", File); 217 return; 218 } 219 220 formatInternal(Printer, Append, "- ({0}: {1}) {2}", 221 formatChecksumKind(FC->getValue().Kind), 222 toHex(FC->getValue().Checksum), File); 223} 224 225void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, 226 uint32_t Offset, 227 bool Append) const { 228 if (!SC.hasChecksums()) { 229 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 230 return; 231 } 232 233 auto Iter = SC.checksums().getArray().at(Offset); 234 if (Iter == SC.checksums().getArray().end()) { 235 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 236 return; 237 } 238 239 uint32_t FO = Iter->FileNameOffset; 240 auto ExpectedFile = getNameFromStringTable(FO); 241 if (!ExpectedFile) { 242 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); 243 consumeError(ExpectedFile.takeError()); 244 return; 245 } 246 if (Iter->Kind == FileChecksumKind::None) { 247 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); 248 } else { 249 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, 250 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); 251 } 252} 253 254Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) { 255 InputFile IF; 256 if (!llvm::sys::fs::exists(Path)) 257 return make_error<StringError>(formatv("File {0} not found", Path), 258 inconvertibleErrorCode()); 259 260 file_magic Magic; 261 if (auto EC = identify_magic(Path, Magic)) 262 return make_error<StringError>( 263 formatv("Unable to identify file type for file {0}", Path), EC); 264 265 if (Magic == file_magic::coff_object) { 266 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path); 267 if (!BinaryOrErr) 268 return BinaryOrErr.takeError(); 269 270 IF.CoffObject = std::move(*BinaryOrErr); 271 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary()); 272 return std::move(IF); 273 } 274 275 if (Magic == file_magic::pdb) { 276 std::unique_ptr<IPDBSession> Session; 277 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) 278 return std::move(Err); 279 280 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release())); 281 IF.PdbOrObj = &IF.PdbSession->getPDBFile(); 282 283 return std::move(IF); 284 } 285 286 if (!AllowUnknownFile) 287 return make_error<StringError>( 288 formatv("File {0} is not a supported file type", Path), 289 inconvertibleErrorCode()); 290 291 auto Result = MemoryBuffer::getFile(Path, -1LL, false); 292 if (!Result) 293 return make_error<StringError>( 294 formatv("File {0} could not be opened", Path), Result.getError()); 295 296 IF.UnknownFile = std::move(*Result); 297 IF.PdbOrObj = IF.UnknownFile.get(); 298 return std::move(IF); 299} 300 301PDBFile &InputFile::pdb() { 302 assert(isPdb()); 303 return *PdbOrObj.get<PDBFile *>(); 304} 305 306const PDBFile &InputFile::pdb() const { 307 assert(isPdb()); 308 return *PdbOrObj.get<PDBFile *>(); 309} 310 311object::COFFObjectFile &InputFile::obj() { 312 assert(isObj()); 313 return *PdbOrObj.get<object::COFFObjectFile *>(); 314} 315 316const object::COFFObjectFile &InputFile::obj() const { 317 assert(isObj()); 318 return *PdbOrObj.get<object::COFFObjectFile *>(); 319} 320 321MemoryBuffer &InputFile::unknown() { 322 assert(isUnknown()); 323 return *PdbOrObj.get<MemoryBuffer *>(); 324} 325 326const MemoryBuffer &InputFile::unknown() const { 327 assert(isUnknown()); 328 return *PdbOrObj.get<MemoryBuffer *>(); 329} 330 331StringRef InputFile::getFilePath() const { 332 if (isPdb()) 333 return pdb().getFilePath(); 334 if (isObj()) 335 return obj().getFileName(); 336 assert(isUnknown()); 337 return unknown().getBufferIdentifier(); 338} 339 340bool InputFile::hasTypes() const { 341 if (isPdb()) 342 return pdb().hasPDBTpiStream(); 343 344 for (const auto &Section : obj().sections()) { 345 CVTypeArray Types; 346 if (isDebugTSection(Section, Types)) 347 return true; 348 } 349 return false; 350} 351 352bool InputFile::hasIds() const { 353 if (isObj()) 354 return false; 355 return pdb().hasPDBIpiStream(); 356} 357 358bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); } 359 360bool InputFile::isObj() const { 361 return PdbOrObj.is<object::COFFObjectFile *>(); 362} 363 364bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); } 365 366codeview::LazyRandomTypeCollection & 367InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { 368 if (Types && Kind == kTypes) 369 return *Types; 370 if (Ids && Kind == kIds) 371 return *Ids; 372 373 if (Kind == kIds) { 374 assert(isPdb() && pdb().hasPDBIpiStream()); 375 } 376 377 // If the collection was already initialized, we should have just returned it 378 // in step 1. 379 if (isPdb()) { 380 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; 381 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() 382 : pdb().getPDBTpiStream()); 383 384 auto &Array = Stream.typeArray(); 385 uint32_t Count = Stream.getNumTypeRecords(); 386 auto Offsets = Stream.getTypeIndexOffsets(); 387 Collection = 388 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets); 389 return *Collection; 390 } 391 392 assert(isObj()); 393 assert(Kind == kTypes); 394 assert(!Types); 395 396 for (const auto &Section : obj().sections()) { 397 CVTypeArray Records; 398 if (!isDebugTSection(Section, Records)) 399 continue; 400 401 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100); 402 return *Types; 403 } 404 405 Types = std::make_unique<LazyRandomTypeCollection>(100); 406 return *Types; 407} 408 409codeview::LazyRandomTypeCollection &InputFile::types() { 410 return getOrCreateTypeCollection(kTypes); 411} 412 413codeview::LazyRandomTypeCollection &InputFile::ids() { 414 // Object files have only one type stream that contains both types and ids. 415 // Similarly, some PDBs don't contain an IPI stream, and for those both types 416 // and IDs are in the same stream. 417 if (isObj() || !pdb().hasPDBIpiStream()) 418 return types(); 419 420 return getOrCreateTypeCollection(kIds); 421} 422 423iterator_range<SymbolGroupIterator> InputFile::symbol_groups() { 424 return make_range<SymbolGroupIterator>(symbol_groups_begin(), 425 symbol_groups_end()); 426} 427 428SymbolGroupIterator InputFile::symbol_groups_begin() { 429 return SymbolGroupIterator(*this); 430} 431 432SymbolGroupIterator InputFile::symbol_groups_end() { 433 return SymbolGroupIterator(); 434} 435 436SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} 437 438SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { 439 if (File.isObj()) { 440 SectionIter = File.obj().section_begin(); 441 scanToNextDebugS(); 442 } 443} 444 445bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { 446 bool E = isEnd(); 447 bool RE = R.isEnd(); 448 if (E || RE) 449 return E == RE; 450 451 if (Value.File != R.Value.File) 452 return false; 453 return Index == R.Index; 454} 455 456const SymbolGroup &SymbolGroupIterator::operator*() const { 457 assert(!isEnd()); 458 return Value; 459} 460SymbolGroup &SymbolGroupIterator::operator*() { 461 assert(!isEnd()); 462 return Value; 463} 464 465SymbolGroupIterator &SymbolGroupIterator::operator++() { 466 assert(Value.File && !isEnd()); 467 ++Index; 468 if (isEnd()) 469 return *this; 470 471 if (Value.File->isPdb()) { 472 Value.updatePdbModi(Index); 473 return *this; 474 } 475 476 scanToNextDebugS(); 477 return *this; 478} 479 480void SymbolGroupIterator::scanToNextDebugS() { 481 assert(SectionIter.hasValue()); 482 auto End = Value.File->obj().section_end(); 483 auto &Iter = *SectionIter; 484 assert(!isEnd()); 485 486 while (++Iter != End) { 487 DebugSubsectionArray SS; 488 SectionRef SR = *Iter; 489 if (!isDebugSSection(SR, SS)) 490 continue; 491 492 Value.updateDebugS(SS); 493 return; 494 } 495} 496 497bool SymbolGroupIterator::isEnd() const { 498 if (!Value.File) 499 return true; 500 if (Value.File->isPdb()) { 501 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); 502 uint32_t Count = Dbi.modules().getModuleCount(); 503 assert(Index <= Count); 504 return Index == Count; 505 } 506 507 assert(SectionIter.hasValue()); 508 return *SectionIter == Value.File->obj().section_end(); 509} 510