TypeStreamMerger.cpp revision 363496
1//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
10#include "llvm/ADT/SmallString.h"
11#include "llvm/ADT/StringExtras.h"
12#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
13#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
14#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
15#include "llvm/DebugInfo/CodeView/TypeIndex.h"
16#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
17#include "llvm/DebugInfo/CodeView/TypeRecord.h"
18#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
19#include "llvm/Support/Error.h"
20
21using namespace llvm;
22using namespace llvm::codeview;
23
24static inline size_t slotForIndex(TypeIndex Idx) {
25  assert(!Idx.isSimple() && "simple type indices have no slots");
26  return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
27}
28
29namespace {
30
31/// Implementation of CodeView type stream merging.
32///
33/// A CodeView type stream is a series of records that reference each other
34/// through type indices. A type index is either "simple", meaning it is less
35/// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36/// refers to a prior type record in the current stream. The type index of a
37/// record is equal to the number of records before it in the stream plus
38/// 0x1000.
39///
40/// Type records are only allowed to use type indices smaller than their own, so
41/// a type stream is effectively a topologically sorted DAG. Cycles occuring in
42/// the type graph of the source program are resolved with forward declarations
43/// of composite types. This class implements the following type stream merging
44/// algorithm, which relies on this DAG structure:
45///
46/// - Begin with a new empty stream, and a new empty hash table that maps from
47///   type record contents to new type index.
48/// - For each new type stream, maintain a map from source type index to
49///   destination type index.
50/// - For each record, copy it and rewrite its type indices to be valid in the
51///   destination type stream.
52/// - If the new type record is not already present in the destination stream
53///   hash table, append it to the destination type stream, assign it the next
54///   type index, and update the two hash tables.
55/// - If the type record already exists in the destination stream, discard it
56///   and update the type index map to forward the source type index to the
57///   existing destination type index.
58///
59/// As an additional complication, type stream merging actually produces two
60/// streams: an item (or IPI) stream and a type stream, as this is what is
61/// actually stored in the final PDB. We choose which records go where by
62/// looking at the record kind.
63class TypeStreamMerger {
64public:
65  explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66      : IndexMap(SourceToDest) {
67    // When dealing with precompiled headers objects, all data in SourceToDest
68    // belongs to the precompiled headers object, and is assumed to be already
69    // remapped to the target PDB. Any forthcoming type that will be merged in
70    // might potentially back-reference this data. We also don't want to resolve
71    // twice the types in the precompiled object.
72    CurIndex += SourceToDest.size();
73  }
74
75  static const TypeIndex Untranslated;
76
77  // Local hashing entry points
78  Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79                         MergingTypeTableBuilder &DestTypes,
80                         const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
81  Error mergeIdRecords(MergingTypeTableBuilder &Dest,
82                       ArrayRef<TypeIndex> TypeSourceToDest,
83                       const CVTypeArray &Ids);
84  Error mergeTypeRecords(MergingTypeTableBuilder &Dest,
85                         const CVTypeArray &Types);
86
87  // Global hashing entry points
88  Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
89                         GlobalTypeTableBuilder &DestTypes,
90                         const CVTypeArray &IdsAndTypes,
91                         ArrayRef<GloballyHashedType> Hashes,
92                         Optional<uint32_t> &S);
93  Error mergeIdRecords(GlobalTypeTableBuilder &Dest,
94                       ArrayRef<TypeIndex> TypeSourceToDest,
95                       const CVTypeArray &Ids,
96                       ArrayRef<GloballyHashedType> Hashes);
97  Error mergeTypeRecords(GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
98                         ArrayRef<GloballyHashedType> Hashes,
99                         Optional<uint32_t> &S);
100
101private:
102  Error doit(const CVTypeArray &Types);
103
104  Error remapAllTypes(const CVTypeArray &Types);
105
106  Error remapType(const CVType &Type);
107
108  void addMapping(TypeIndex Idx);
109
110  inline bool remapTypeIndex(TypeIndex &Idx) {
111    // If we're mapping a pure index stream, then IndexMap only contains
112    // mappings from OldIdStream -> NewIdStream, in which case we will need to
113    // use the special mapping from OldTypeStream -> NewTypeStream which was
114    // computed externally.  Regardless, we use this special map if and only if
115    // we are doing an id-only mapping.
116    if (!hasTypeStream())
117      return remapIndex(Idx, TypeLookup);
118
119    assert(TypeLookup.empty());
120    return remapIndex(Idx, IndexMap);
121  }
122  inline bool remapItemIndex(TypeIndex &Idx) {
123    assert(hasIdStream());
124    return remapIndex(Idx, IndexMap);
125  }
126
127  bool hasTypeStream() const {
128    return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
129  }
130
131  bool hasIdStream() const {
132    return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
133  }
134
135  ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
136                                 MutableArrayRef<uint8_t> Storage);
137
138  inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
139    if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
140      return true;
141
142    return remapIndexFallback(Idx, Map);
143  }
144
145  inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
146    // Simple types are unchanged.
147    if (Idx.isSimple())
148      return true;
149
150    // Check if this type index refers to a record we've already translated
151    // successfully. If it refers to a type later in the stream or a record we
152    // had to defer, defer it until later pass.
153    unsigned MapPos = slotForIndex(Idx);
154    if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
155      return false;
156
157    Idx = Map[MapPos];
158    return true;
159  }
160
161  bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
162
163  Error errorCorruptRecord() const {
164    return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
165  }
166
167  Expected<bool> shouldRemapType(const CVType &Type);
168
169  Optional<Error> LastError;
170
171  bool UseGlobalHashes = false;
172
173  bool IsSecondPass = false;
174
175  unsigned NumBadIndices = 0;
176
177  TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
178
179  MergingTypeTableBuilder *DestIdStream = nullptr;
180  MergingTypeTableBuilder *DestTypeStream = nullptr;
181
182  GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
183  GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
184
185  ArrayRef<GloballyHashedType> GlobalHashes;
186
187  // If we're only mapping id records, this array contains the mapping for
188  // type records.
189  ArrayRef<TypeIndex> TypeLookup;
190
191  /// Map from source type index to destination type index. Indexed by source
192  /// type index minus 0x1000.
193  SmallVectorImpl<TypeIndex> &IndexMap;
194
195  /// Temporary storage that we use to copy a record's data while re-writing
196  /// its type indices.
197  SmallVector<uint8_t, 256> RemapStorage;
198
199  Optional<uint32_t> PCHSignature;
200};
201
202} // end anonymous namespace
203
204const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
205
206void TypeStreamMerger::addMapping(TypeIndex Idx) {
207  if (!IsSecondPass) {
208    assert(IndexMap.size() == slotForIndex(CurIndex) &&
209           "visitKnownRecord should add one index map entry");
210    IndexMap.push_back(Idx);
211  } else {
212    assert(slotForIndex(CurIndex) < IndexMap.size());
213    IndexMap[slotForIndex(CurIndex)] = Idx;
214  }
215}
216
217bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
218                                          ArrayRef<TypeIndex> Map) {
219  size_t MapPos = slotForIndex(Idx);
220
221  // If this is the second pass and this index isn't in the map, then it points
222  // outside the current type stream, and this is a corrupt record.
223  if (IsSecondPass && MapPos >= Map.size()) {
224    // FIXME: Print a more useful error. We can give the current record and the
225    // index that we think its pointing to.
226    if (LastError)
227      LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
228    else
229      LastError = errorCorruptRecord();
230  }
231
232  ++NumBadIndices;
233
234  // This type index is invalid. Remap this to "not translated by cvpack",
235  // and return failure.
236  Idx = Untranslated;
237  return false;
238}
239
240// Local hashing entry points
241Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
242                                         const CVTypeArray &Types) {
243  DestTypeStream = &Dest;
244  UseGlobalHashes = false;
245
246  return doit(Types);
247}
248
249Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
250                                       ArrayRef<TypeIndex> TypeSourceToDest,
251                                       const CVTypeArray &Ids) {
252  DestIdStream = &Dest;
253  TypeLookup = TypeSourceToDest;
254  UseGlobalHashes = false;
255
256  return doit(Ids);
257}
258
259Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
260                                         MergingTypeTableBuilder &DestTypes,
261                                         const CVTypeArray &IdsAndTypes,
262                                         Optional<uint32_t> &S) {
263  DestIdStream = &DestIds;
264  DestTypeStream = &DestTypes;
265  UseGlobalHashes = false;
266  auto Err = doit(IdsAndTypes);
267  S = PCHSignature;
268  return Err;
269}
270
271// Global hashing entry points
272Error TypeStreamMerger::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
273                                         const CVTypeArray &Types,
274                                         ArrayRef<GloballyHashedType> Hashes,
275                                         Optional<uint32_t> &S) {
276  DestGlobalTypeStream = &Dest;
277  UseGlobalHashes = true;
278  GlobalHashes = Hashes;
279  auto Err = doit(Types);
280  S = PCHSignature;
281  return Err;
282}
283
284Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
285                                       ArrayRef<TypeIndex> TypeSourceToDest,
286                                       const CVTypeArray &Ids,
287                                       ArrayRef<GloballyHashedType> Hashes) {
288  DestGlobalIdStream = &Dest;
289  TypeLookup = TypeSourceToDest;
290  UseGlobalHashes = true;
291  GlobalHashes = Hashes;
292
293  return doit(Ids);
294}
295
296Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
297                                         GlobalTypeTableBuilder &DestTypes,
298                                         const CVTypeArray &IdsAndTypes,
299                                         ArrayRef<GloballyHashedType> Hashes,
300                                         Optional<uint32_t> &S) {
301  DestGlobalIdStream = &DestIds;
302  DestGlobalTypeStream = &DestTypes;
303  UseGlobalHashes = true;
304  GlobalHashes = Hashes;
305  auto Err = doit(IdsAndTypes);
306  S = PCHSignature;
307  return Err;
308}
309
310Error TypeStreamMerger::doit(const CVTypeArray &Types) {
311  if (auto EC = remapAllTypes(Types))
312    return EC;
313
314  // If we found bad indices but no other errors, try doing another pass and see
315  // if we can resolve the indices that weren't in the map on the first pass.
316  // This may require multiple passes, but we should always make progress. MASM
317  // is the only known CodeView producer that makes type streams that aren't
318  // topologically sorted. The standard library contains MASM-produced objects,
319  // so this is important to handle correctly, but we don't have to be too
320  // efficient. MASM type streams are usually very small.
321  while (!LastError && NumBadIndices > 0) {
322    unsigned BadIndicesRemaining = NumBadIndices;
323    IsSecondPass = true;
324    NumBadIndices = 0;
325    CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
326
327    if (auto EC = remapAllTypes(Types))
328      return EC;
329
330    assert(NumBadIndices <= BadIndicesRemaining &&
331           "second pass found more bad indices");
332    if (!LastError && NumBadIndices == BadIndicesRemaining) {
333      return llvm::make_error<CodeViewError>(
334          cv_error_code::corrupt_record, "Input type graph contains cycles");
335    }
336  }
337
338  if (LastError)
339    return std::move(*LastError);
340  return Error::success();
341}
342
343Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
344  BinaryStreamRef Stream = Types.getUnderlyingStream();
345  ArrayRef<uint8_t> Buffer;
346  cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
347
348  return forEachCodeViewRecord<CVType>(
349      Buffer, [this](const CVType &T) { return remapType(T); });
350}
351
352Error TypeStreamMerger::remapType(const CVType &Type) {
353  auto R = shouldRemapType(Type);
354  if (!R)
355    return R.takeError();
356
357  TypeIndex DestIdx = Untranslated;
358  if (*R) {
359    auto DoSerialize =
360        [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
361      return remapIndices(Type, Storage);
362    };
363    unsigned AlignedSize = alignTo(Type.RecordData.size(), 4);
364
365    if (LLVM_LIKELY(UseGlobalHashes)) {
366      GlobalTypeTableBuilder &Dest =
367          isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
368      GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
369      DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize);
370    } else {
371      MergingTypeTableBuilder &Dest =
372          isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
373
374      RemapStorage.resize(AlignedSize);
375      ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
376      if (!Result.empty())
377        DestIdx = Dest.insertRecordBytes(Result);
378    }
379  }
380  addMapping(DestIdx);
381
382  ++CurIndex;
383  assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
384         "visitKnownRecord should add one index map entry");
385  return Error::success();
386}
387
388ArrayRef<uint8_t>
389TypeStreamMerger::remapIndices(const CVType &OriginalType,
390                               MutableArrayRef<uint8_t> Storage) {
391  unsigned Align = OriginalType.RecordData.size() & 3;
392  unsigned AlignedSize = alignTo(OriginalType.RecordData.size(), 4);
393  assert(Storage.size() == AlignedSize &&
394         "The storage buffer size is not a multiple of 4 bytes which will "
395         "cause misalignment in the output TPI stream!");
396
397  SmallVector<TiReference, 4> Refs;
398  discoverTypeIndices(OriginalType.RecordData, Refs);
399  if (Refs.empty() && Align == 0)
400    return OriginalType.RecordData;
401
402  ::memcpy(Storage.data(), OriginalType.RecordData.data(),
403           OriginalType.RecordData.size());
404
405  uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
406
407  for (auto &Ref : Refs) {
408    TypeIndex *DestTIs =
409        reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
410
411    for (size_t I = 0; I < Ref.Count; ++I) {
412      TypeIndex &TI = DestTIs[I];
413      bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
414                                                       : remapTypeIndex(TI);
415      if (LLVM_UNLIKELY(!Success))
416        return {};
417    }
418  }
419
420  if (Align > 0) {
421    RecordPrefix *StorageHeader =
422        reinterpret_cast<RecordPrefix *>(Storage.data());
423    StorageHeader->RecordLen += 4 - Align;
424
425    DestContent = Storage.data() + OriginalType.RecordData.size();
426    for (; Align < 4; ++Align)
427      *DestContent++ = LF_PAD4 - Align;
428  }
429  return Storage;
430}
431
432Error llvm::codeview::mergeTypeRecords(MergingTypeTableBuilder &Dest,
433                                       SmallVectorImpl<TypeIndex> &SourceToDest,
434                                       const CVTypeArray &Types) {
435  TypeStreamMerger M(SourceToDest);
436  return M.mergeTypeRecords(Dest, Types);
437}
438
439Error llvm::codeview::mergeIdRecords(MergingTypeTableBuilder &Dest,
440                                     ArrayRef<TypeIndex> TypeSourceToDest,
441                                     SmallVectorImpl<TypeIndex> &SourceToDest,
442                                     const CVTypeArray &Ids) {
443  TypeStreamMerger M(SourceToDest);
444  return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
445}
446
447Error llvm::codeview::mergeTypeAndIdRecords(
448    MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes,
449    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
450    Optional<uint32_t> &PCHSignature) {
451  TypeStreamMerger M(SourceToDest);
452  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
453}
454
455Error llvm::codeview::mergeTypeAndIdRecords(
456    GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
457    SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
458    ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
459  TypeStreamMerger M(SourceToDest);
460  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
461                            PCHSignature);
462}
463
464Error llvm::codeview::mergeTypeRecords(GlobalTypeTableBuilder &Dest,
465                                       SmallVectorImpl<TypeIndex> &SourceToDest,
466                                       const CVTypeArray &Types,
467                                       ArrayRef<GloballyHashedType> Hashes,
468                                       Optional<uint32_t> &PCHSignature) {
469  TypeStreamMerger M(SourceToDest);
470  return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
471}
472
473Error llvm::codeview::mergeIdRecords(GlobalTypeTableBuilder &Dest,
474                                     ArrayRef<TypeIndex> Types,
475                                     SmallVectorImpl<TypeIndex> &SourceToDest,
476                                     const CVTypeArray &Ids,
477                                     ArrayRef<GloballyHashedType> Hashes) {
478  TypeStreamMerger M(SourceToDest);
479  return M.mergeIdRecords(Dest, Types, Ids, Hashes);
480}
481
482Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
483  // For object files containing precompiled types, we need to extract the
484  // signature, through EndPrecompRecord. This is done here for performance
485  // reasons, to avoid re-parsing the Types stream.
486  if (Type.kind() == LF_ENDPRECOMP) {
487    EndPrecompRecord EP;
488    if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
489                                                  EP))
490      return joinErrors(std::move(EC), errorCorruptRecord());
491    if (PCHSignature.hasValue())
492      return errorCorruptRecord();
493    PCHSignature.emplace(EP.getSignature());
494    return false;
495  }
496  return true;
497}
498