1//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10#include "llvm/ADT/SmallVector.h"
11#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
12#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
13#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
14#include "llvm/DebugInfo/CodeView/TypeIndex.h"
15#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
16#include "llvm/DebugInfo/CodeView/TypeRecord.h"
17#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
18#include "llvm/Support/Error.h"
19#include <optional>
20
21using namespace llvm;
22using namespace llvm::codeview;
23
24static inline size_t slotForIndex(TypeIndex Idx) {
25  assert(!Idx.isSimple() && "simple type indices have no slots");
26  return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
27}
28
29namespace {
30
31/// Implementation of CodeView type stream merging.
32///
33/// A CodeView type stream is a series of records that reference each other
34/// through type indices. A type index is either "simple", meaning it is less
35/// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36/// refers to a prior type record in the current stream. The type index of a
37/// record is equal to the number of records before it in the stream plus
38/// 0x1000.
39///
40/// Type records are only allowed to use type indices smaller than their own, so
41/// a type stream is effectively a topologically sorted DAG. Cycles occurring in
42/// the type graph of the source program are resolved with forward declarations
43/// of composite types. This class implements the following type stream merging
44/// algorithm, which relies on this DAG structure:
45///
46/// - Begin with a new empty stream, and a new empty hash table that maps from
47///   type record contents to new type index.
48/// - For each new type stream, maintain a map from source type index to
49///   destination type index.
50/// - For each record, copy it and rewrite its type indices to be valid in the
51///   destination type stream.
52/// - If the new type record is not already present in the destination stream
53///   hash table, append it to the destination type stream, assign it the next
54///   type index, and update the two hash tables.
55/// - If the type record already exists in the destination stream, discard it
56///   and update the type index map to forward the source type index to the
57///   existing destination type index.
58///
59/// As an additional complication, type stream merging actually produces two
60/// streams: an item (or IPI) stream and a type stream, as this is what is
61/// actually stored in the final PDB. We choose which records go where by
62/// looking at the record kind.
63class TypeStreamMerger {
64public:
65  explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66      : IndexMap(SourceToDest) {
67    // When dealing with precompiled headers objects, all data in SourceToDest
68    // belongs to the precompiled headers object, and is assumed to be already
69    // remapped to the target PDB. Any forthcoming type that will be merged in
70    // might potentially back-reference this data. We also don't want to resolve
71    // twice the types in the precompiled object.
72    CurIndex += SourceToDest.size();
73  }
74
75  static const TypeIndex Untranslated;
76
77  // Local hashing entry points
78  Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79                         MergingTypeTableBuilder &DestTypes,
80                         const CVTypeArray &IdsAndTypes,
81                         std::optional<PCHMergerInfo> &PCHInfo);
82  Error mergeIdRecords(MergingTypeTableBuilder &Dest,
83                       ArrayRef<TypeIndex> TypeSourceToDest,
84                       const CVTypeArray &Ids);
85  Error mergeTypeRecords(MergingTypeTableBuilder &Dest,
86                         const CVTypeArray &Types);
87
88  // Global hashing entry points
89  Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
90                         GlobalTypeTableBuilder &DestTypes,
91                         const CVTypeArray &IdsAndTypes,
92                         ArrayRef<GloballyHashedType> Hashes,
93                         std::optional<PCHMergerInfo> &PCHInfo);
94  Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
95                       ArrayRef<TypeIndex> TypeSourceToDest,
96                       const CVTypeArray &Ids,
97                       ArrayRef<GloballyHashedType> Hashes);
98  Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
99                         ArrayRef<GloballyHashedType> Hashes,
100                         std::optional<PCHMergerInfo> &PCHInfo);
101
102private:
103  Error doit(const CVTypeArray &Types);
104
105  Error remapAllTypes(const CVTypeArray &Types);
106
107  Error remapType(const CVType &Type);
108
109  void addMapping(TypeIndex Idx);
110
111  inline bool remapTypeIndex(TypeIndex &Idx) {
112    // If we're mapping a pure index stream, then IndexMap only contains
113    // mappings from OldIdStream -> NewIdStream, in which case we will need to
114    // use the special mapping from OldTypeStream -> NewTypeStream which was
115    // computed externally.  Regardless, we use this special map if and only if
116    // we are doing an id-only mapping.
117    if (!hasTypeStream())
118      return remapIndex(Idx, TypeLookup);
119
120    assert(TypeLookup.empty());
121    return remapIndex(Idx, IndexMap);
122  }
123  inline bool remapItemIndex(TypeIndex &Idx) {
124    assert(hasIdStream());
125    return remapIndex(Idx, IndexMap);
126  }
127
128  bool hasTypeStream() const {
129    return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
130  }
131
132  bool hasIdStream() const {
133    return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
134  }
135
136  ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
137                                 MutableArrayRef<uint8_t> Storage);
138
139  inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
140    if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
141      return true;
142
143    return remapIndexFallback(Idx, Map);
144  }
145
146  inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
147    // Simple types are unchanged.
148    if (Idx.isSimple())
149      return true;
150
151    // Check if this type index refers to a record we've already translated
152    // successfully. If it refers to a type later in the stream or a record we
153    // had to defer, defer it until later pass.
154    unsigned MapPos = slotForIndex(Idx);
155    if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
156      return false;
157
158    Idx = Map[MapPos];
159    return true;
160  }
161
162  bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
163
164  Error errorCorruptRecord() const {
165    return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
166  }
167
168  Expected<bool> shouldRemapType(const CVType &Type);
169
170  std::optional<Error> LastError;
171
172  bool UseGlobalHashes = false;
173
174  bool IsSecondPass = false;
175
176  unsigned NumBadIndices = 0;
177
178  TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
179
180  MergingTypeTableBuilder *DestIdStream = nullptr;
181  MergingTypeTableBuilder *DestTypeStream = nullptr;
182
183  GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
184  GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
185
186  ArrayRef<GloballyHashedType> GlobalHashes;
187
188  // If we're only mapping id records, this array contains the mapping for
189  // type records.
190  ArrayRef<TypeIndex> TypeLookup;
191
192  /// Map from source type index to destination type index. Indexed by source
193  /// type index minus 0x1000.
194  SmallVectorImpl<TypeIndex> &IndexMap;
195
196  /// Temporary storage that we use to copy a record's data while re-writing
197  /// its type indices.
198  SmallVector<uint8_t, 256> RemapStorage;
199
200  std::optional<PCHMergerInfo> PCHInfo;
201};
202
203} // end anonymous namespace
204
205const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
206
207void TypeStreamMerger::addMapping(TypeIndex Idx) {
208  if (!IsSecondPass) {
209    assert(IndexMap.size() == slotForIndex(CurIndex) &&
210           "visitKnownRecord should add one index map entry");
211    IndexMap.push_back(Idx);
212  } else {
213    assert(slotForIndex(CurIndex) < IndexMap.size());
214    IndexMap[slotForIndex(CurIndex)] = Idx;
215  }
216}
217
218bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
219                                          ArrayRef<TypeIndex> Map) {
220  size_t MapPos = slotForIndex(Idx);
221
222  // If this is the second pass and this index isn't in the map, then it points
223  // outside the current type stream, and this is a corrupt record.
224  if (IsSecondPass && MapPos >= Map.size()) {
225    // FIXME: Print a more useful error. We can give the current record and the
226    // index that we think its pointing to.
227    if (LastError)
228      LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
229    else
230      LastError = errorCorruptRecord();
231  }
232
233  ++NumBadIndices;
234
235  // This type index is invalid. Remap this to "not translated by cvpack",
236  // and return failure.
237  Idx = Untranslated;
238  return false;
239}
240
241// Local hashing entry points
242Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
243                                         const CVTypeArray &Types) {
244  DestTypeStream = &Dest;
245  UseGlobalHashes = false;
246
247  return doit(Types);
248}
249
250Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
251                                       ArrayRef<TypeIndex> TypeSourceToDest,
252                                       const CVTypeArray &Ids) {
253  DestIdStream = &Dest;
254  TypeLookup = TypeSourceToDest;
255  UseGlobalHashes = false;
256
257  return doit(Ids);
258}
259
260Error TypeStreamMerger::mergeTypesAndIds(
261    MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
262    const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) {
263  DestIdStream = &DestIds;
264  DestTypeStream = &DestTypes;
265  UseGlobalHashes = false;
266  auto Err = doit(IdsAndTypes);
267  PCHInfo = this->PCHInfo;
268  return Err;
269}
270
271// Global hashing entry points
272Error TypeStreamMerger::mergeTypeRecords(
273    GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
274    ArrayRef<GloballyHashedType> Hashes,
275    std::optional<PCHMergerInfo> &PCHInfo) {
276  DestGlobalTypeStream = &Dest;
277  UseGlobalHashes = true;
278  GlobalHashes = Hashes;
279  auto Err = doit(Types);
280  PCHInfo = this->PCHInfo;
281  return Err;
282}
283
284Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
285                                       ArrayRef<TypeIndex> TypeSourceToDest,
286                                       const CVTypeArray &Ids,
287                                       ArrayRef<GloballyHashedType> Hashes) {
288  DestGlobalIdStream = &Dest;
289  TypeLookup = TypeSourceToDest;
290  UseGlobalHashes = true;
291  GlobalHashes = Hashes;
292
293  return doit(Ids);
294}
295
296Error TypeStreamMerger::mergeTypesAndIds(
297    GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
298    const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes,
299    std::optional<PCHMergerInfo> &PCHInfo) {
300  DestGlobalIdStream = &DestIds;
301  DestGlobalTypeStream = &DestTypes;
302  UseGlobalHashes = true;
303  GlobalHashes = Hashes;
304  auto Err = doit(IdsAndTypes);
305  PCHInfo = this->PCHInfo;
306  return Err;
307}
308
309Error TypeStreamMerger::doit(const CVTypeArray &Types) {
310  if (auto EC = remapAllTypes(Types))
311    return EC;
312
313  // If we found bad indices but no other errors, try doing another pass and see
314  // if we can resolve the indices that weren't in the map on the first pass.
315  // This may require multiple passes, but we should always make progress. MASM
316  // is the only known CodeView producer that makes type streams that aren't
317  // topologically sorted. The standard library contains MASM-produced objects,
318  // so this is important to handle correctly, but we don't have to be too
319  // efficient. MASM type streams are usually very small.
320  while (!LastError && NumBadIndices > 0) {
321    unsigned BadIndicesRemaining = NumBadIndices;
322    IsSecondPass = true;
323    NumBadIndices = 0;
324    CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
325
326    if (auto EC = remapAllTypes(Types))
327      return EC;
328
329    assert(NumBadIndices <= BadIndicesRemaining &&
330           "second pass found more bad indices");
331    if (!LastError && NumBadIndices == BadIndicesRemaining) {
332      return llvm::make_error<CodeViewError>(
333          cv_error_code::corrupt_record, "Input type graph contains cycles");
334    }
335  }
336
337  if (LastError)
338    return std::move(*LastError);
339  return Error::success();
340}
341
342Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
343  BinaryStreamRef Stream = Types.getUnderlyingStream();
344  ArrayRef<uint8_t> Buffer;
345  cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
346
347  return forEachCodeViewRecord<CVType>(
348      Buffer, [this](const CVType &T) { return remapType(T); });
349}
350
351Error TypeStreamMerger::remapType(const CVType &Type) {
352  auto R = shouldRemapType(Type);
353  if (!R)
354    return R.takeError();
355
356  TypeIndex DestIdx = Untranslated;
357  if (*R) {
358    auto DoSerialize =
359        [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
360      return remapIndices(Type, Storage);
361    };
362    unsigned AlignedSize = alignTo(Type.RecordData.size(), 4);
363
364    if (LLVM_LIKELY(UseGlobalHashes)) {
365      GlobalTypeTableBuilder &Dest =
366          isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
367      GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
368      DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize);
369    } else {
370      MergingTypeTableBuilder &Dest =
371          isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
372
373      RemapStorage.resize(AlignedSize);
374      ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
375      if (!Result.empty())
376        DestIdx = Dest.insertRecordBytes(Result);
377    }
378  }
379  addMapping(DestIdx);
380
381  ++CurIndex;
382  assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
383         "visitKnownRecord should add one index map entry");
384  return Error::success();
385}
386
387ArrayRef<uint8_t>
388TypeStreamMerger::remapIndices(const CVType &OriginalType,
389                               MutableArrayRef<uint8_t> Storage) {
390  unsigned Align = OriginalType.RecordData.size() & 3;
391  assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) &&
392         "The storage buffer size is not a multiple of 4 bytes which will "
393         "cause misalignment in the output TPI stream!");
394
395  SmallVector<TiReference, 4> Refs;
396  discoverTypeIndices(OriginalType.RecordData, Refs);
397  if (Refs.empty() && Align == 0)
398    return OriginalType.RecordData;
399
400  ::memcpy(Storage.data(), OriginalType.RecordData.data(),
401           OriginalType.RecordData.size());
402
403  uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
404
405  for (auto &Ref : Refs) {
406    TypeIndex *DestTIs =
407        reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
408
409    for (size_t I = 0; I < Ref.Count; ++I) {
410      TypeIndex &TI = DestTIs[I];
411      bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
412                                                       : remapTypeIndex(TI);
413      if (LLVM_UNLIKELY(!Success))
414        return {};
415    }
416  }
417
418  if (Align > 0) {
419    RecordPrefix *StorageHeader =
420        reinterpret_cast<RecordPrefix *>(Storage.data());
421    StorageHeader->RecordLen += 4 - Align;
422
423    DestContent = Storage.data() + OriginalType.RecordData.size();
424    for (; Align < 4; ++Align)
425      *DestContent++ = LF_PAD4 - Align;
426  }
427  return Storage;
428}
429
430Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest,
431                                       SmallVectorImpl<TypeIndex> &SourceToDest,
432                                       const CVTypeArray &Types) {
433  TypeStreamMerger M(SourceToDest);
434  return M.mergeTypeRecords(Dest, Types);
435}
436
437Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
438                                     ArrayRef<TypeIndex> TypeSourceToDest,
439                                     SmallVectorImpl<TypeIndex> &SourceToDest,
440                                     const CVTypeArray &Ids) {
441  TypeStreamMerger M(SourceToDest);
442  return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
443}
444
445Error llvm::codeview::mergeTypeAndIdRecords(
446    MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
447    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
448    std::optional<PCHMergerInfo> &PCHInfo) {
449  TypeStreamMerger M(SourceToDest);
450  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo);
451}
452
453Error llvm::codeview::mergeTypeAndIdRecords(
454    GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
455    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
456    ArrayRef<GloballyHashedType> Hashes,
457    std::optional<PCHMergerInfo> &PCHInfo) {
458  TypeStreamMerger M(SourceToDest);
459  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo);
460}
461
462Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
463                                       SmallVectorImpl<TypeIndex> &SourceToDest,
464                                       const CVTypeArray &Types,
465                                       ArrayRef<GloballyHashedType> Hashes,
466                                       std::optional<PCHMergerInfo> &PCHInfo) {
467  TypeStreamMerger M(SourceToDest);
468  return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo);
469}
470
471Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
472                                     ArrayRef<TypeIndex> Types,
473                                     SmallVectorImpl<TypeIndex> &SourceToDest,
474                                     const CVTypeArray &Ids,
475                                     ArrayRef<GloballyHashedType> Hashes) {
476  TypeStreamMerger M(SourceToDest);
477  return M.mergeIdRecords(Dest, Types, Ids, Hashes);
478}
479
480Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
481  // For object files containing precompiled types, we need to extract the
482  // signature, through EndPrecompRecord. This is done here for performance
483  // reasons, to avoid re-parsing the Types stream.
484  if (Type.kind() == LF_ENDPRECOMP) {
485    EndPrecompRecord EP;
486    if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
487                                                  EP))
488      return joinErrors(std::move(EC), errorCorruptRecord());
489    // Only one record of this kind can appear in a OBJ.
490    if (PCHInfo)
491      return errorCorruptRecord();
492    PCHInfo.emplace(PCHMergerInfo{EP.getSignature(), CurIndex.toArrayIndex()});
493    return false;
494  }
495  return true;
496}
497