1//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" 10#include "llvm/ADT/SmallVector.h" 11#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" 12#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" 13#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" 14#include "llvm/DebugInfo/CodeView/TypeIndex.h" 15#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" 16#include "llvm/DebugInfo/CodeView/TypeRecord.h" 17#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" 18#include "llvm/Support/Error.h" 19#include <optional> 20 21using namespace llvm; 22using namespace llvm::codeview; 23 24static inline size_t slotForIndex(TypeIndex Idx) { 25 assert(!Idx.isSimple() && "simple type indices have no slots"); 26 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex; 27} 28 29namespace { 30 31/// Implementation of CodeView type stream merging. 32/// 33/// A CodeView type stream is a series of records that reference each other 34/// through type indices. A type index is either "simple", meaning it is less 35/// than 0x1000 and refers to a builtin type, or it is complex, meaning it 36/// refers to a prior type record in the current stream. The type index of a 37/// record is equal to the number of records before it in the stream plus 38/// 0x1000. 39/// 40/// Type records are only allowed to use type indices smaller than their own, so 41/// a type stream is effectively a topologically sorted DAG. Cycles occurring in 42/// the type graph of the source program are resolved with forward declarations 43/// of composite types. This class implements the following type stream merging 44/// algorithm, which relies on this DAG structure: 45/// 46/// - Begin with a new empty stream, and a new empty hash table that maps from 47/// type record contents to new type index. 48/// - For each new type stream, maintain a map from source type index to 49/// destination type index. 50/// - For each record, copy it and rewrite its type indices to be valid in the 51/// destination type stream. 52/// - If the new type record is not already present in the destination stream 53/// hash table, append it to the destination type stream, assign it the next 54/// type index, and update the two hash tables. 55/// - If the type record already exists in the destination stream, discard it 56/// and update the type index map to forward the source type index to the 57/// existing destination type index. 58/// 59/// As an additional complication, type stream merging actually produces two 60/// streams: an item (or IPI) stream and a type stream, as this is what is 61/// actually stored in the final PDB. We choose which records go where by 62/// looking at the record kind. 63class TypeStreamMerger { 64public: 65 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest) 66 : IndexMap(SourceToDest) { 67 // When dealing with precompiled headers objects, all data in SourceToDest 68 // belongs to the precompiled headers object, and is assumed to be already 69 // remapped to the target PDB. Any forthcoming type that will be merged in 70 // might potentially back-reference this data. We also don't want to resolve 71 // twice the types in the precompiled object. 72 CurIndex += SourceToDest.size(); 73 } 74 75 static const TypeIndex Untranslated; 76 77 // Local hashing entry points 78 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds, 79 MergingTypeTableBuilder &DestTypes, 80 const CVTypeArray &IdsAndTypes, 81 std::optional<PCHMergerInfo> &PCHInfo); 82 Error mergeIdRecords(MergingTypeTableBuilder &Dest, 83 ArrayRef<TypeIndex> TypeSourceToDest, 84 const CVTypeArray &Ids); 85 Error mergeTypeRecords(MergingTypeTableBuilder &Dest, 86 const CVTypeArray &Types); 87 88 // Global hashing entry points 89 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds, 90 GlobalTypeTableBuilder &DestTypes, 91 const CVTypeArray &IdsAndTypes, 92 ArrayRef<GloballyHashedType> Hashes, 93 std::optional<PCHMergerInfo> &PCHInfo); 94 Error mergeIdRecords(GlobalTypeTableBuilder &Dest, 95 ArrayRef<TypeIndex> TypeSourceToDest, 96 const CVTypeArray &Ids, 97 ArrayRef<GloballyHashedType> Hashes); 98 Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, 99 ArrayRef<GloballyHashedType> Hashes, 100 std::optional<PCHMergerInfo> &PCHInfo); 101 102private: 103 Error doit(const CVTypeArray &Types); 104 105 Error remapAllTypes(const CVTypeArray &Types); 106 107 Error remapType(const CVType &Type); 108 109 void addMapping(TypeIndex Idx); 110 111 inline bool remapTypeIndex(TypeIndex &Idx) { 112 // If we're mapping a pure index stream, then IndexMap only contains 113 // mappings from OldIdStream -> NewIdStream, in which case we will need to 114 // use the special mapping from OldTypeStream -> NewTypeStream which was 115 // computed externally. Regardless, we use this special map if and only if 116 // we are doing an id-only mapping. 117 if (!hasTypeStream()) 118 return remapIndex(Idx, TypeLookup); 119 120 assert(TypeLookup.empty()); 121 return remapIndex(Idx, IndexMap); 122 } 123 inline bool remapItemIndex(TypeIndex &Idx) { 124 assert(hasIdStream()); 125 return remapIndex(Idx, IndexMap); 126 } 127 128 bool hasTypeStream() const { 129 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream); 130 } 131 132 bool hasIdStream() const { 133 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream); 134 } 135 136 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType, 137 MutableArrayRef<uint8_t> Storage); 138 139 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) { 140 if (LLVM_LIKELY(remapIndexSimple(Idx, Map))) 141 return true; 142 143 return remapIndexFallback(Idx, Map); 144 } 145 146 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const { 147 // Simple types are unchanged. 148 if (Idx.isSimple()) 149 return true; 150 151 // Check if this type index refers to a record we've already translated 152 // successfully. If it refers to a type later in the stream or a record we 153 // had to defer, defer it until later pass. 154 unsigned MapPos = slotForIndex(Idx); 155 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated)) 156 return false; 157 158 Idx = Map[MapPos]; 159 return true; 160 } 161 162 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map); 163 164 Error errorCorruptRecord() const { 165 return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record); 166 } 167 168 Expected<bool> shouldRemapType(const CVType &Type); 169 170 std::optional<Error> LastError; 171 172 bool UseGlobalHashes = false; 173 174 bool IsSecondPass = false; 175 176 unsigned NumBadIndices = 0; 177 178 TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; 179 180 MergingTypeTableBuilder *DestIdStream = nullptr; 181 MergingTypeTableBuilder *DestTypeStream = nullptr; 182 183 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr; 184 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr; 185 186 ArrayRef<GloballyHashedType> GlobalHashes; 187 188 // If we're only mapping id records, this array contains the mapping for 189 // type records. 190 ArrayRef<TypeIndex> TypeLookup; 191 192 /// Map from source type index to destination type index. Indexed by source 193 /// type index minus 0x1000. 194 SmallVectorImpl<TypeIndex> &IndexMap; 195 196 /// Temporary storage that we use to copy a record's data while re-writing 197 /// its type indices. 198 SmallVector<uint8_t, 256> RemapStorage; 199 200 std::optional<PCHMergerInfo> PCHInfo; 201}; 202 203} // end anonymous namespace 204 205const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); 206 207void TypeStreamMerger::addMapping(TypeIndex Idx) { 208 if (!IsSecondPass) { 209 assert(IndexMap.size() == slotForIndex(CurIndex) && 210 "visitKnownRecord should add one index map entry"); 211 IndexMap.push_back(Idx); 212 } else { 213 assert(slotForIndex(CurIndex) < IndexMap.size()); 214 IndexMap[slotForIndex(CurIndex)] = Idx; 215 } 216} 217 218bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx, 219 ArrayRef<TypeIndex> Map) { 220 size_t MapPos = slotForIndex(Idx); 221 222 // If this is the second pass and this index isn't in the map, then it points 223 // outside the current type stream, and this is a corrupt record. 224 if (IsSecondPass && MapPos >= Map.size()) { 225 // FIXME: Print a more useful error. We can give the current record and the 226 // index that we think its pointing to. 227 if (LastError) 228 LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); 229 else 230 LastError = errorCorruptRecord(); 231 } 232 233 ++NumBadIndices; 234 235 // This type index is invalid. Remap this to "not translated by cvpack", 236 // and return failure. 237 Idx = Untranslated; 238 return false; 239} 240 241// Local hashing entry points 242Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest, 243 const CVTypeArray &Types) { 244 DestTypeStream = &Dest; 245 UseGlobalHashes = false; 246 247 return doit(Types); 248} 249 250Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest, 251 ArrayRef<TypeIndex> TypeSourceToDest, 252 const CVTypeArray &Ids) { 253 DestIdStream = &Dest; 254 TypeLookup = TypeSourceToDest; 255 UseGlobalHashes = false; 256 257 return doit(Ids); 258} 259 260Error TypeStreamMerger::mergeTypesAndIds( 261 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, 262 const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) { 263 DestIdStream = &DestIds; 264 DestTypeStream = &DestTypes; 265 UseGlobalHashes = false; 266 auto Err = doit(IdsAndTypes); 267 PCHInfo = this->PCHInfo; 268 return Err; 269} 270 271// Global hashing entry points 272Error TypeStreamMerger::mergeTypeRecords( 273 GlobalTypeTableBuilder &Dest, const CVTypeArray &Types, 274 ArrayRef<GloballyHashedType> Hashes, 275 std::optional<PCHMergerInfo> &PCHInfo) { 276 DestGlobalTypeStream = &Dest; 277 UseGlobalHashes = true; 278 GlobalHashes = Hashes; 279 auto Err = doit(Types); 280 PCHInfo = this->PCHInfo; 281 return Err; 282} 283 284Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest, 285 ArrayRef<TypeIndex> TypeSourceToDest, 286 const CVTypeArray &Ids, 287 ArrayRef<GloballyHashedType> Hashes) { 288 DestGlobalIdStream = &Dest; 289 TypeLookup = TypeSourceToDest; 290 UseGlobalHashes = true; 291 GlobalHashes = Hashes; 292 293 return doit(Ids); 294} 295 296Error TypeStreamMerger::mergeTypesAndIds( 297 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, 298 const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes, 299 std::optional<PCHMergerInfo> &PCHInfo) { 300 DestGlobalIdStream = &DestIds; 301 DestGlobalTypeStream = &DestTypes; 302 UseGlobalHashes = true; 303 GlobalHashes = Hashes; 304 auto Err = doit(IdsAndTypes); 305 PCHInfo = this->PCHInfo; 306 return Err; 307} 308 309Error TypeStreamMerger::doit(const CVTypeArray &Types) { 310 if (auto EC = remapAllTypes(Types)) 311 return EC; 312 313 // If we found bad indices but no other errors, try doing another pass and see 314 // if we can resolve the indices that weren't in the map on the first pass. 315 // This may require multiple passes, but we should always make progress. MASM 316 // is the only known CodeView producer that makes type streams that aren't 317 // topologically sorted. The standard library contains MASM-produced objects, 318 // so this is important to handle correctly, but we don't have to be too 319 // efficient. MASM type streams are usually very small. 320 while (!LastError && NumBadIndices > 0) { 321 unsigned BadIndicesRemaining = NumBadIndices; 322 IsSecondPass = true; 323 NumBadIndices = 0; 324 CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); 325 326 if (auto EC = remapAllTypes(Types)) 327 return EC; 328 329 assert(NumBadIndices <= BadIndicesRemaining && 330 "second pass found more bad indices"); 331 if (!LastError && NumBadIndices == BadIndicesRemaining) { 332 return llvm::make_error<CodeViewError>( 333 cv_error_code::corrupt_record, "Input type graph contains cycles"); 334 } 335 } 336 337 if (LastError) 338 return std::move(*LastError); 339 return Error::success(); 340} 341 342Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { 343 BinaryStreamRef Stream = Types.getUnderlyingStream(); 344 ArrayRef<uint8_t> Buffer; 345 cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); 346 347 return forEachCodeViewRecord<CVType>( 348 Buffer, [this](const CVType &T) { return remapType(T); }); 349} 350 351Error TypeStreamMerger::remapType(const CVType &Type) { 352 auto R = shouldRemapType(Type); 353 if (!R) 354 return R.takeError(); 355 356 TypeIndex DestIdx = Untranslated; 357 if (*R) { 358 auto DoSerialize = 359 [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { 360 return remapIndices(Type, Storage); 361 }; 362 unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); 363 364 if (LLVM_LIKELY(UseGlobalHashes)) { 365 GlobalTypeTableBuilder &Dest = 366 isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; 367 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; 368 DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); 369 } else { 370 MergingTypeTableBuilder &Dest = 371 isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; 372 373 RemapStorage.resize(AlignedSize); 374 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); 375 if (!Result.empty()) 376 DestIdx = Dest.insertRecordBytes(Result); 377 } 378 } 379 addMapping(DestIdx); 380 381 ++CurIndex; 382 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) && 383 "visitKnownRecord should add one index map entry"); 384 return Error::success(); 385} 386 387ArrayRef<uint8_t> 388TypeStreamMerger::remapIndices(const CVType &OriginalType, 389 MutableArrayRef<uint8_t> Storage) { 390 unsigned Align = OriginalType.RecordData.size() & 3; 391 assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) && 392 "The storage buffer size is not a multiple of 4 bytes which will " 393 "cause misalignment in the output TPI stream!"); 394 395 SmallVector<TiReference, 4> Refs; 396 discoverTypeIndices(OriginalType.RecordData, Refs); 397 if (Refs.empty() && Align == 0) 398 return OriginalType.RecordData; 399 400 ::memcpy(Storage.data(), OriginalType.RecordData.data(), 401 OriginalType.RecordData.size()); 402 403 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix); 404 405 for (auto &Ref : Refs) { 406 TypeIndex *DestTIs = 407 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset); 408 409 for (size_t I = 0; I < Ref.Count; ++I) { 410 TypeIndex &TI = DestTIs[I]; 411 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI) 412 : remapTypeIndex(TI); 413 if (LLVM_UNLIKELY(!Success)) 414 return {}; 415 } 416 } 417 418 if (Align > 0) { 419 RecordPrefix *StorageHeader = 420 reinterpret_cast<RecordPrefix *>(Storage.data()); 421 StorageHeader->RecordLen += 4 - Align; 422 423 DestContent = Storage.data() + OriginalType.RecordData.size(); 424 for (; Align < 4; ++Align) 425 *DestContent++ = LF_PAD4 - Align; 426 } 427 return Storage; 428} 429 430Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest, 431 SmallVectorImpl<TypeIndex> &SourceToDest, 432 const CVTypeArray &Types) { 433 TypeStreamMerger M(SourceToDest); 434 return M.mergeTypeRecords(Dest, Types); 435} 436 437Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest, 438 ArrayRef<TypeIndex> TypeSourceToDest, 439 SmallVectorImpl<TypeIndex> &SourceToDest, 440 const CVTypeArray &Ids) { 441 TypeStreamMerger M(SourceToDest); 442 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); 443} 444 445Error llvm::codeview::mergeTypeAndIdRecords( 446 MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, 447 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, 448 std::optional<PCHMergerInfo> &PCHInfo) { 449 TypeStreamMerger M(SourceToDest); 450 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo); 451} 452 453Error llvm::codeview::mergeTypeAndIdRecords( 454 GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes, 455 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes, 456 ArrayRef<GloballyHashedType> Hashes, 457 std::optional<PCHMergerInfo> &PCHInfo) { 458 TypeStreamMerger M(SourceToDest); 459 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo); 460} 461 462Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest, 463 SmallVectorImpl<TypeIndex> &SourceToDest, 464 const CVTypeArray &Types, 465 ArrayRef<GloballyHashedType> Hashes, 466 std::optional<PCHMergerInfo> &PCHInfo) { 467 TypeStreamMerger M(SourceToDest); 468 return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo); 469} 470 471Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest, 472 ArrayRef<TypeIndex> Types, 473 SmallVectorImpl<TypeIndex> &SourceToDest, 474 const CVTypeArray &Ids, 475 ArrayRef<GloballyHashedType> Hashes) { 476 TypeStreamMerger M(SourceToDest); 477 return M.mergeIdRecords(Dest, Types, Ids, Hashes); 478} 479 480Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) { 481 // For object files containing precompiled types, we need to extract the 482 // signature, through EndPrecompRecord. This is done here for performance 483 // reasons, to avoid re-parsing the Types stream. 484 if (Type.kind() == LF_ENDPRECOMP) { 485 EndPrecompRecord EP; 486 if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type), 487 EP)) 488 return joinErrors(std::move(EC), errorCorruptRecord()); 489 // Only one record of this kind can appear in a OBJ. 490 if (PCHInfo) 491 return errorCorruptRecord(); 492 PCHInfo.emplace(PCHMergerInfo{EP.getSignature(), CurIndex.toArrayIndex()}); 493 return false; 494 } 495 return true; 496} 497