1//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h"
9
10#include "llvm/ADT/ArrayRef.h"
11#include "llvm/Support/Endian.h"
12
13using namespace llvm;
14using namespace llvm::codeview;
15
16static inline MethodKind getMethodKind(uint16_t Attrs) {
17  Attrs &= uint16_t(MethodOptions::MethodKindMask);
18  Attrs >>= 2;
19  return MethodKind(Attrs);
20}
21
22static inline bool isIntroVirtual(uint16_t Attrs) {
23  MethodKind MK = getMethodKind(Attrs);
24  return MK == MethodKind::IntroducingVirtual ||
25         MK == MethodKind::PureIntroducingVirtual;
26}
27
28static inline PointerMode getPointerMode(uint32_t Attrs) {
29  return static_cast<PointerMode>((Attrs >> PointerRecord::PointerModeShift) &
30                                  PointerRecord::PointerModeMask);
31}
32
33static inline bool isMemberPointer(uint32_t Attrs) {
34  PointerMode Mode = getPointerMode(Attrs);
35  return Mode == PointerMode::PointerToDataMember ||
36         Mode == PointerMode::PointerToMemberFunction;
37}
38
39static inline uint32_t getEncodedIntegerLength(ArrayRef<uint8_t> Data) {
40  uint16_t N = support::endian::read16le(Data.data());
41  if (N < LF_NUMERIC)
42    return 2;
43
44  assert(N <= LF_UQUADWORD);
45
46  constexpr uint32_t Sizes[] = {
47      1,  // LF_CHAR
48      2,  // LF_SHORT
49      2,  // LF_USHORT
50      4,  // LF_LONG
51      4,  // LF_ULONG
52      4,  // LF_REAL32
53      8,  // LF_REAL64
54      10, // LF_REAL80
55      16, // LF_REAL128
56      8,  // LF_QUADWORD
57      8,  // LF_UQUADWORD
58  };
59
60  return 2 + Sizes[N - LF_NUMERIC];
61}
62
63static inline uint32_t getCStringLength(ArrayRef<uint8_t> Data) {
64  const char *S = reinterpret_cast<const char *>(Data.data());
65  return strlen(S) + 1;
66}
67
68static void handleMethodOverloadList(ArrayRef<uint8_t> Content,
69                                     SmallVectorImpl<TiReference> &Refs) {
70  uint32_t Offset = 0;
71
72  while (!Content.empty()) {
73    // Array of:
74    //   0: Attrs
75    //   2: Padding
76    //   4: TypeIndex
77    //   if (isIntroVirtual())
78    //     8: VFTableOffset
79
80    // At least 8 bytes are guaranteed.  4 extra bytes come iff function is an
81    // intro virtual.
82    uint32_t Len = 8;
83
84    uint16_t Attrs = support::endian::read16le(Content.data());
85    Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
86
87    if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
88      Len += 4;
89    Offset += Len;
90    Content = Content.drop_front(Len);
91  }
92}
93
94static uint32_t handleBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
95                                SmallVectorImpl<TiReference> &Refs) {
96  // 0: Kind
97  // 2: Padding
98  // 4: TypeIndex
99  // 8: Encoded Integer
100  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
101  return 8 + getEncodedIntegerLength(Data.drop_front(8));
102}
103
104static uint32_t handleEnumerator(ArrayRef<uint8_t> Data, uint32_t Offset,
105                                 SmallVectorImpl<TiReference> &Refs) {
106  // 0: Kind
107  // 2: Padding
108  // 4: Encoded Integer
109  // <next>: Name
110  uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4));
111  return Size + getCStringLength(Data.drop_front(Size));
112}
113
114static uint32_t handleDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
115                                 SmallVectorImpl<TiReference> &Refs) {
116  // 0: Kind
117  // 2: Padding
118  // 4: TypeIndex
119  // 8: Encoded Integer
120  // <next>: Name
121  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
122  uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8));
123  return Size + getCStringLength(Data.drop_front(Size));
124}
125
126static uint32_t handleOverloadedMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
127                                       SmallVectorImpl<TiReference> &Refs) {
128  // 0: Kind
129  // 2: Padding
130  // 4: TypeIndex
131  // 8: Name
132  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
133  return 8 + getCStringLength(Data.drop_front(8));
134}
135
136static uint32_t handleOneMethod(ArrayRef<uint8_t> Data, uint32_t Offset,
137                                SmallVectorImpl<TiReference> &Refs) {
138  // 0: Kind
139  // 2: Attributes
140  // 4: Type
141  // if (isIntroVirtual)
142  //   8: VFTableOffset
143  // <next>: Name
144  uint32_t Size = 8;
145  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
146
147  uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data());
148  if (LLVM_UNLIKELY(isIntroVirtual(Attrs)))
149    Size += 4;
150
151  return Size + getCStringLength(Data.drop_front(Size));
152}
153
154static uint32_t handleNestedType(ArrayRef<uint8_t> Data, uint32_t Offset,
155                                 SmallVectorImpl<TiReference> &Refs) {
156  // 0: Kind
157  // 2: Padding
158  // 4: TypeIndex
159  // 8: Name
160  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
161  return 8 + getCStringLength(Data.drop_front(8));
162}
163
164static uint32_t handleStaticDataMember(ArrayRef<uint8_t> Data, uint32_t Offset,
165                                       SmallVectorImpl<TiReference> &Refs) {
166  // 0: Kind
167  // 2: Padding
168  // 4: TypeIndex
169  // 8: Name
170  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
171  return 8 + getCStringLength(Data.drop_front(8));
172}
173
174static uint32_t handleVirtualBaseClass(ArrayRef<uint8_t> Data, uint32_t Offset,
175                                       bool IsIndirect,
176                                       SmallVectorImpl<TiReference> &Refs) {
177  // 0: Kind
178  // 2: Attrs
179  // 4: TypeIndex
180  // 8: TypeIndex
181  // 12: Encoded Integer
182  // <next>: Encoded Integer
183  uint32_t Size = 12;
184  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2});
185  Size += getEncodedIntegerLength(Data.drop_front(Size));
186  Size += getEncodedIntegerLength(Data.drop_front(Size));
187  return Size;
188}
189
190static uint32_t handleVFPtr(ArrayRef<uint8_t> Data, uint32_t Offset,
191                            SmallVectorImpl<TiReference> &Refs) {
192  // 0: Kind
193  // 2: Padding
194  // 4: TypeIndex
195  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
196  return 8;
197}
198
199static uint32_t handleListContinuation(ArrayRef<uint8_t> Data, uint32_t Offset,
200                                       SmallVectorImpl<TiReference> &Refs) {
201  // 0: Kind
202  // 2: Padding
203  // 4: TypeIndex
204  Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1});
205  return 8;
206}
207
208static void handleFieldList(ArrayRef<uint8_t> Content,
209                            SmallVectorImpl<TiReference> &Refs) {
210  uint32_t Offset = 0;
211  uint32_t ThisLen = 0;
212  while (!Content.empty()) {
213    TypeLeafKind Kind =
214        static_cast<TypeLeafKind>(support::endian::read16le(Content.data()));
215    switch (Kind) {
216    case LF_BCLASS:
217      ThisLen = handleBaseClass(Content, Offset, Refs);
218      break;
219    case LF_ENUMERATE:
220      ThisLen = handleEnumerator(Content, Offset, Refs);
221      break;
222    case LF_MEMBER:
223      ThisLen = handleDataMember(Content, Offset, Refs);
224      break;
225    case LF_METHOD:
226      ThisLen = handleOverloadedMethod(Content, Offset, Refs);
227      break;
228    case LF_ONEMETHOD:
229      ThisLen = handleOneMethod(Content, Offset, Refs);
230      break;
231    case LF_NESTTYPE:
232      ThisLen = handleNestedType(Content, Offset, Refs);
233      break;
234    case LF_STMEMBER:
235      ThisLen = handleStaticDataMember(Content, Offset, Refs);
236      break;
237    case LF_VBCLASS:
238    case LF_IVBCLASS:
239      ThisLen =
240          handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs);
241      break;
242    case LF_VFUNCTAB:
243      ThisLen = handleVFPtr(Content, Offset, Refs);
244      break;
245    case LF_INDEX:
246      ThisLen = handleListContinuation(Content, Offset, Refs);
247      break;
248    default:
249      return;
250    }
251    Content = Content.drop_front(ThisLen);
252    Offset += ThisLen;
253    if (!Content.empty()) {
254      uint8_t Pad = Content.front();
255      if (Pad >= LF_PAD0) {
256        uint32_t Skip = Pad & 0x0F;
257        Content = Content.drop_front(Skip);
258        Offset += Skip;
259      }
260    }
261  }
262}
263
264static void handlePointer(ArrayRef<uint8_t> Content,
265                          SmallVectorImpl<TiReference> &Refs) {
266  Refs.push_back({TiRefKind::TypeRef, 0, 1});
267
268  uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data());
269  if (isMemberPointer(Attrs))
270    Refs.push_back({TiRefKind::TypeRef, 8, 1});
271}
272
273static void discoverTypeIndices(ArrayRef<uint8_t> Content, TypeLeafKind Kind,
274                                SmallVectorImpl<TiReference> &Refs) {
275  uint32_t Count;
276  // FIXME: In the future it would be nice if we could avoid hardcoding these
277  // values.  One idea is to define some structures representing these types
278  // that would allow the use of offsetof().
279  switch (Kind) {
280  case TypeLeafKind::LF_FUNC_ID:
281    Refs.push_back({TiRefKind::IndexRef, 0, 1});
282    Refs.push_back({TiRefKind::TypeRef, 4, 1});
283    break;
284  case TypeLeafKind::LF_MFUNC_ID:
285    Refs.push_back({TiRefKind::TypeRef, 0, 2});
286    break;
287  case TypeLeafKind::LF_STRING_ID:
288    Refs.push_back({TiRefKind::IndexRef, 0, 1});
289    break;
290  case TypeLeafKind::LF_SUBSTR_LIST:
291    Count = support::endian::read32le(Content.data());
292    if (Count > 0)
293      Refs.push_back({TiRefKind::IndexRef, 4, Count});
294    break;
295  case TypeLeafKind::LF_BUILDINFO:
296    Count = support::endian::read16le(Content.data());
297    if (Count > 0)
298      Refs.push_back({TiRefKind::IndexRef, 2, Count});
299    break;
300  case TypeLeafKind::LF_UDT_SRC_LINE:
301    Refs.push_back({TiRefKind::TypeRef, 0, 1});
302    Refs.push_back({TiRefKind::IndexRef, 4, 1});
303    break;
304  case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
305    Refs.push_back({TiRefKind::TypeRef, 0, 1});
306    break;
307  case TypeLeafKind::LF_MODIFIER:
308    Refs.push_back({TiRefKind::TypeRef, 0, 1});
309    break;
310  case TypeLeafKind::LF_PROCEDURE:
311    Refs.push_back({TiRefKind::TypeRef, 0, 1});
312    Refs.push_back({TiRefKind::TypeRef, 8, 1});
313    break;
314  case TypeLeafKind::LF_MFUNCTION:
315    Refs.push_back({TiRefKind::TypeRef, 0, 3});
316    Refs.push_back({TiRefKind::TypeRef, 16, 1});
317    break;
318  case TypeLeafKind::LF_ARGLIST:
319    Count = support::endian::read32le(Content.data());
320    if (Count > 0)
321      Refs.push_back({TiRefKind::TypeRef, 4, Count});
322    break;
323  case TypeLeafKind::LF_ARRAY:
324    Refs.push_back({TiRefKind::TypeRef, 0, 2});
325    break;
326  case TypeLeafKind::LF_CLASS:
327  case TypeLeafKind::LF_STRUCTURE:
328  case TypeLeafKind::LF_INTERFACE:
329    Refs.push_back({TiRefKind::TypeRef, 4, 3});
330    break;
331  case TypeLeafKind::LF_UNION:
332    Refs.push_back({TiRefKind::TypeRef, 4, 1});
333    break;
334  case TypeLeafKind::LF_ENUM:
335    Refs.push_back({TiRefKind::TypeRef, 4, 2});
336    break;
337  case TypeLeafKind::LF_BITFIELD:
338    Refs.push_back({TiRefKind::TypeRef, 0, 1});
339    break;
340  case TypeLeafKind::LF_VFTABLE:
341    Refs.push_back({TiRefKind::TypeRef, 0, 2});
342    break;
343  case TypeLeafKind::LF_VTSHAPE:
344    break;
345  case TypeLeafKind::LF_METHODLIST:
346    handleMethodOverloadList(Content, Refs);
347    break;
348  case TypeLeafKind::LF_FIELDLIST:
349    handleFieldList(Content, Refs);
350    break;
351  case TypeLeafKind::LF_POINTER:
352    handlePointer(Content, Refs);
353    break;
354  default:
355    break;
356  }
357}
358
359static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind,
360                                SmallVectorImpl<TiReference> &Refs) {
361  uint32_t Count;
362  // FIXME: In the future it would be nice if we could avoid hardcoding these
363  // values.  One idea is to define some structures representing these types
364  // that would allow the use of offsetof().
365  switch (Kind) {
366  case SymbolKind::S_GPROC32_ID:
367  case SymbolKind::S_LPROC32_ID:
368  case SymbolKind::S_LPROC32_DPC:
369  case SymbolKind::S_LPROC32_DPC_ID:
370    Refs.push_back({TiRefKind::IndexRef, 24, 1}); // LF_FUNC_ID
371    break;
372  case SymbolKind::S_GPROC32:
373  case SymbolKind::S_LPROC32:
374    Refs.push_back({TiRefKind::TypeRef, 24, 1}); // Type
375    break;
376  case SymbolKind::S_UDT:
377    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // UDT
378    break;
379  case SymbolKind::S_GDATA32:
380  case SymbolKind::S_LDATA32:
381    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
382    break;
383  case SymbolKind::S_BUILDINFO:
384    Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags
385    break;
386  case SymbolKind::S_LTHREAD32:
387  case SymbolKind::S_GTHREAD32:
388    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
389    break;
390  case SymbolKind::S_FILESTATIC:
391    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
392    break;
393  case SymbolKind::S_LOCAL:
394    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
395    break;
396  case SymbolKind::S_REGISTER:
397    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
398    break;
399  case SymbolKind::S_CONSTANT:
400    Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type
401    break;
402  case SymbolKind::S_BPREL32:
403  case SymbolKind::S_REGREL32:
404    Refs.push_back({TiRefKind::TypeRef, 4, 1}); // Type
405    break;
406  case SymbolKind::S_CALLSITEINFO:
407    Refs.push_back({TiRefKind::TypeRef, 8, 1}); // Call signature
408    break;
409  case SymbolKind::S_CALLERS:
410  case SymbolKind::S_CALLEES:
411  case SymbolKind::S_INLINEES:
412    // The record is a count followed by an array of type indices.
413    Count = *reinterpret_cast<const ulittle32_t *>(Content.data());
414    Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees
415    break;
416  case SymbolKind::S_INLINESITE:
417    Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee
418    break;
419  case SymbolKind::S_HEAPALLOCSITE:
420    Refs.push_back({TiRefKind::TypeRef, 8, 1}); // UDT allocated
421    break;
422
423  // Defranges don't have types, just registers and code offsets.
424  case SymbolKind::S_DEFRANGE_REGISTER:
425  case SymbolKind::S_DEFRANGE_REGISTER_REL:
426  case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL:
427  case SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE:
428  case SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER:
429  case SymbolKind::S_DEFRANGE_SUBFIELD:
430    break;
431
432  // No type references.
433  case SymbolKind::S_LABEL32:
434  case SymbolKind::S_OBJNAME:
435  case SymbolKind::S_COMPILE:
436  case SymbolKind::S_COMPILE2:
437  case SymbolKind::S_COMPILE3:
438  case SymbolKind::S_ENVBLOCK:
439  case SymbolKind::S_BLOCK32:
440  case SymbolKind::S_FRAMEPROC:
441  case SymbolKind::S_THUNK32:
442  case SymbolKind::S_FRAMECOOKIE:
443  case SymbolKind::S_UNAMESPACE:
444    break;
445  // Scope ending symbols.
446  case SymbolKind::S_END:
447  case SymbolKind::S_INLINESITE_END:
448  case SymbolKind::S_PROC_ID_END:
449    break;
450  default:
451    return false; // Unknown symbol.
452  }
453  return true;
454}
455
456void llvm::codeview::discoverTypeIndices(const CVType &Type,
457                                         SmallVectorImpl<TiReference> &Refs) {
458  ::discoverTypeIndices(Type.content(), Type.kind(), Refs);
459}
460
461static void resolveTypeIndexReferences(ArrayRef<uint8_t> RecordData,
462                                       ArrayRef<TiReference> Refs,
463                                       SmallVectorImpl<TypeIndex> &Indices) {
464  Indices.clear();
465
466  if (Refs.empty())
467    return;
468
469  RecordData = RecordData.drop_front(sizeof(RecordPrefix));
470
471  BinaryStreamReader Reader(RecordData, support::little);
472  for (const auto &Ref : Refs) {
473    Reader.setOffset(Ref.Offset);
474    FixedStreamArray<TypeIndex> Run;
475    cantFail(Reader.readArray(Run, Ref.Count));
476    Indices.append(Run.begin(), Run.end());
477  }
478}
479
480void llvm::codeview::discoverTypeIndices(const CVType &Type,
481                                         SmallVectorImpl<TypeIndex> &Indices) {
482  return discoverTypeIndices(Type.RecordData, Indices);
483}
484
485void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
486                                         SmallVectorImpl<TypeIndex> &Indices) {
487  SmallVector<TiReference, 4> Refs;
488  discoverTypeIndices(RecordData, Refs);
489  resolveTypeIndexReferences(RecordData, Refs, Indices);
490}
491
492void llvm::codeview::discoverTypeIndices(ArrayRef<uint8_t> RecordData,
493                                         SmallVectorImpl<TiReference> &Refs) {
494  const RecordPrefix *P =
495      reinterpret_cast<const RecordPrefix *>(RecordData.data());
496  TypeLeafKind K = static_cast<TypeLeafKind>(uint16_t(P->RecordKind));
497  ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs);
498}
499
500bool llvm::codeview::discoverTypeIndicesInSymbol(
501    const CVSymbol &Sym, SmallVectorImpl<TiReference> &Refs) {
502  SymbolKind K = Sym.kind();
503  return ::discoverTypeIndices(Sym.content(), K, Refs);
504}
505
506bool llvm::codeview::discoverTypeIndicesInSymbol(
507    ArrayRef<uint8_t> RecordData, SmallVectorImpl<TiReference> &Refs) {
508  const RecordPrefix *P =
509      reinterpret_cast<const RecordPrefix *>(RecordData.data());
510  SymbolKind K = static_cast<SymbolKind>(uint16_t(P->RecordKind));
511  return ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K,
512                               Refs);
513}
514
515bool llvm::codeview::discoverTypeIndicesInSymbol(
516    ArrayRef<uint8_t> RecordData, SmallVectorImpl<TypeIndex> &Indices) {
517  SmallVector<TiReference, 2> Refs;
518  if (!discoverTypeIndicesInSymbol(RecordData, Refs))
519    return false;
520  resolveTypeIndexReferences(RecordData, Refs, Indices);
521  return true;
522}
523