1//===- DwarfTransformer.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include <thread>
10#include <unordered_set>
11
12#include "llvm/DebugInfo/DIContext.h"
13#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
14#include "llvm/DebugInfo/DWARF/DWARFContext.h"
15#include "llvm/Support/Error.h"
16#include "llvm/Support/ThreadPool.h"
17#include "llvm/Support/raw_ostream.h"
18
19#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
20#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
21#include "llvm/DebugInfo/GSYM/GsymCreator.h"
22#include "llvm/DebugInfo/GSYM/GsymReader.h"
23#include "llvm/DebugInfo/GSYM/InlineInfo.h"
24#include <optional>
25
26using namespace llvm;
27using namespace gsym;
28
29struct llvm::gsym::CUInfo {
30  const DWARFDebugLine::LineTable *LineTable;
31  const char *CompDir;
32  std::vector<uint32_t> FileCache;
33  uint64_t Language = 0;
34  uint8_t AddrSize = 0;
35
36  CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) {
37    LineTable = DICtx.getLineTableForUnit(CU);
38    CompDir = CU->getCompilationDir();
39    FileCache.clear();
40    if (LineTable)
41      FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX);
42    DWARFDie Die = CU->getUnitDIE();
43    Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0);
44    AddrSize = CU->getAddressByteSize();
45  }
46
47  /// Return true if Addr is the highest address for a given compile unit. The
48  /// highest address is encoded as -1, of all ones in the address. These high
49  /// addresses are used by some linkers to indicate that a function has been
50  /// dead stripped or didn't end up in the linked executable.
51  bool isHighestAddress(uint64_t Addr) const {
52    if (AddrSize == 4)
53      return Addr == UINT32_MAX;
54    else if (AddrSize == 8)
55      return Addr == UINT64_MAX;
56    return false;
57  }
58
59  /// Convert a DWARF compile unit file index into a GSYM global file index.
60  ///
61  /// Each compile unit in DWARF has its own file table in the line table
62  /// prologue. GSYM has a single large file table that applies to all files
63  /// from all of the info in a GSYM file. This function converts between the
64  /// two and caches and DWARF CU file index that has already been converted so
65  /// the first client that asks for a compile unit file index will end up
66  /// doing the conversion, and subsequent clients will get the cached GSYM
67  /// index.
68  std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
69                                               uint32_t DwarfFileIdx) {
70    if (!LineTable || DwarfFileIdx >= FileCache.size())
71      return std::nullopt;
72    uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
73    if (GsymFileIdx != UINT32_MAX)
74      return GsymFileIdx;
75    std::string File;
76    if (LineTable->getFileNameByIndex(
77            DwarfFileIdx, CompDir,
78            DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File))
79      GsymFileIdx = Gsym.insertFile(File);
80    else
81      GsymFileIdx = 0;
82    return GsymFileIdx;
83  }
84};
85
86
87static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
88  if (DWARFDie SpecDie =
89          Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) {
90    if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie))
91      return SpecParent;
92  }
93  if (DWARFDie AbstDie =
94          Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) {
95    if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie))
96      return AbstParent;
97  }
98
99  // We never want to follow parent for inlined subroutine - that would
100  // give us information about where the function is inlined, not what
101  // function is inlined
102  if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine)
103    return DWARFDie();
104
105  DWARFDie ParentDie = Die.getParent();
106  if (!ParentDie)
107    return DWARFDie();
108
109  switch (ParentDie.getTag()) {
110  case dwarf::DW_TAG_namespace:
111  case dwarf::DW_TAG_structure_type:
112  case dwarf::DW_TAG_union_type:
113  case dwarf::DW_TAG_class_type:
114  case dwarf::DW_TAG_subprogram:
115    return ParentDie; // Found parent decl context DIE
116  case dwarf::DW_TAG_lexical_block:
117    return GetParentDeclContextDIE(ParentDie);
118  default:
119    break;
120  }
121
122  return DWARFDie();
123}
124
125/// Get the GsymCreator string table offset for the qualified name for the
126/// DIE passed in. This function will avoid making copies of any strings in
127/// the GsymCreator when possible. We don't need to copy a string when the
128/// string comes from our .debug_str section or is an inlined string in the
129/// .debug_info. If we create a qualified name string in this function by
130/// combining multiple strings in the DWARF string table or info, we will make
131/// a copy of the string when we add it to the string table.
132static std::optional<uint32_t>
133getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
134  // If the dwarf has mangled name, use mangled name
135  if (auto LinkageName = Die.getLinkageName()) {
136    // We have seen cases were linkage name is actually empty.
137    if (strlen(LinkageName) > 0)
138      return Gsym.insertString(LinkageName, /* Copy */ false);
139  }
140
141  StringRef ShortName(Die.getName(DINameKind::ShortName));
142  if (ShortName.empty())
143    return std::nullopt;
144
145  // For C++ and ObjC, prepend names of all parent declaration contexts
146  if (!(Language == dwarf::DW_LANG_C_plus_plus ||
147        Language == dwarf::DW_LANG_C_plus_plus_03 ||
148        Language == dwarf::DW_LANG_C_plus_plus_11 ||
149        Language == dwarf::DW_LANG_C_plus_plus_14 ||
150        Language == dwarf::DW_LANG_ObjC_plus_plus ||
151        // This should not be needed for C, but we see C++ code marked as C
152        // in some binaries. This should hurt, so let's do it for C as well
153        Language == dwarf::DW_LANG_C))
154    return Gsym.insertString(ShortName, /* Copy */ false);
155
156  // Some GCC optimizations create functions with names ending with .isra.<num>
157  // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name
158  // If it looks like it could be the case, don't add any prefix
159  if (ShortName.starts_with("_Z") &&
160      (ShortName.contains(".isra.") || ShortName.contains(".part.")))
161    return Gsym.insertString(ShortName, /* Copy */ false);
162
163  DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die);
164  if (ParentDeclCtxDie) {
165    std::string Name = ShortName.str();
166    while (ParentDeclCtxDie) {
167      StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName));
168      if (!ParentName.empty()) {
169        // "lambda" names are wrapped in < >. Replace with { }
170        // to be consistent with demangled names and not to confuse with
171        // templates
172        if (ParentName.front() == '<' && ParentName.back() == '>')
173          Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" +
174                "::" + Name;
175        else
176          Name = ParentName.str() + "::" + Name;
177      }
178      ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie);
179    }
180    // Copy the name since we created a new name in a std::string.
181    return Gsym.insertString(Name, /* Copy */ true);
182  }
183  // Don't copy the name since it exists in the DWARF object file.
184  return Gsym.insertString(ShortName, /* Copy */ false);
185}
186
187static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) {
188  bool CheckChildren = true;
189  switch (Die.getTag()) {
190  case dwarf::DW_TAG_subprogram:
191    // Don't look into functions within functions.
192    CheckChildren = Depth == 0;
193    break;
194  case dwarf::DW_TAG_inlined_subroutine:
195    return true;
196  default:
197    break;
198  }
199  if (!CheckChildren)
200    return false;
201  for (DWARFDie ChildDie : Die.children()) {
202    if (hasInlineInfo(ChildDie, Depth + 1))
203      return true;
204  }
205  return false;
206}
207
208static AddressRanges
209ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
210  AddressRanges Ranges;
211  for (const DWARFAddressRange &DwarfRange : DwarfRanges) {
212    if (DwarfRange.LowPC < DwarfRange.HighPC)
213      Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC});
214  }
215  return Ranges;
216}
217
218static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,
219                            DWARFDie Die, uint32_t Depth, FunctionInfo &FI,
220                            InlineInfo &Parent,
221                            const AddressRanges &AllParentRanges,
222                            bool &WarnIfEmpty) {
223  if (!hasInlineInfo(Die, Depth))
224    return;
225
226  dwarf::Tag Tag = Die.getTag();
227  if (Tag == dwarf::DW_TAG_inlined_subroutine) {
228    // create new InlineInfo and append to parent.children
229    InlineInfo II;
230    AddressRanges AllInlineRanges;
231    Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
232    if (RangesOrError) {
233      AllInlineRanges = ConvertDWARFRanges(RangesOrError.get());
234      uint32_t EmptyCount = 0;
235      for (const AddressRange &InlineRange : AllInlineRanges) {
236        // Check for empty inline range in case inline function was outlined
237        // or has not code
238        if (InlineRange.empty()) {
239          ++EmptyCount;
240        } else {
241          if (Parent.Ranges.contains(InlineRange)) {
242            II.Ranges.insert(InlineRange);
243          } else {
244            // Only warn if the current inline range is not within any of all
245            // of the parent ranges. If we have a DW_TAG_subpgram with multiple
246            // ranges we will emit a FunctionInfo for each range of that
247            // function that only emits information within the current range,
248            // so we only want to emit an error if the DWARF has issues, not
249            // when a range currently just isn't in the range we are currently
250            // parsing for.
251            if (AllParentRanges.contains(InlineRange)) {
252              WarnIfEmpty = false;
253            } else if (Log) {
254              *Log << "error: inlined function DIE at "
255                   << HEX32(Die.getOffset()) << " has a range ["
256                   << HEX64(InlineRange.start()) << " - "
257                   << HEX64(InlineRange.end()) << ") that isn't contained in "
258                   << "any parent address ranges, this inline range will be "
259                      "removed.\n";
260            }
261          }
262        }
263      }
264      // If we have all empty ranges for the inlines, then don't warn if we
265      // have an empty InlineInfo at the top level as all inline functions
266      // were elided.
267      if (EmptyCount == AllInlineRanges.size())
268        WarnIfEmpty = false;
269    }
270    if (II.Ranges.empty())
271      return;
272
273    if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
274      II.Name = *NameIndex;
275    const uint64_t DwarfFileIdx = dwarf::toUnsigned(
276        Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
277    std::optional<uint32_t> OptGSymFileIdx =
278        CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
279    if (OptGSymFileIdx) {
280      II.CallFile = OptGSymFileIdx.value();
281      II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
282      // parse all children and append to parent
283      for (DWARFDie ChildDie : Die.children())
284        parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
285                        AllInlineRanges, WarnIfEmpty);
286      Parent.Children.emplace_back(std::move(II));
287    } else if (Log) {
288      *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
289           << " has an invalid file index " << DwarfFileIdx
290           << " in its DW_AT_call_file attribute, this inline entry and all "
291           << "children will be removed.\n";
292    }
293    return;
294  }
295  if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
296    // skip this Die and just recurse down
297    for (DWARFDie ChildDie : Die.children())
298      parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, Parent,
299                      AllParentRanges, WarnIfEmpty);
300  }
301}
302
303static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
304                                     DWARFDie Die, GsymCreator &Gsym,
305                                     FunctionInfo &FI) {
306  std::vector<uint32_t> RowVector;
307  const uint64_t StartAddress = FI.startAddress();
308  const uint64_t EndAddress = FI.endAddress();
309  const uint64_t RangeSize = EndAddress - StartAddress;
310  const object::SectionedAddress SecAddress{
311      StartAddress, object::SectionedAddress::UndefSection};
312
313
314  if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
315    // If we have a DW_TAG_subprogram but no line entries, fall back to using
316    // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
317    std::string FilePath = Die.getDeclFile(
318        DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
319    if (FilePath.empty()) {
320      // If we had a DW_AT_decl_file, but got no file then we need to emit a
321      // warning.
322      if (Log) {
323        const uint64_t DwarfFileIdx = dwarf::toUnsigned(
324            Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
325        *Log << "error: function DIE at " << HEX32(Die.getOffset())
326             << " has an invalid file index " << DwarfFileIdx
327             << " in its DW_AT_decl_file attribute, unable to create a single "
328             << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
329             << "attributes.\n";
330      }
331      return;
332    }
333    if (auto Line =
334            dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
335      LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
336      FI.OptLineTable = LineTable();
337      FI.OptLineTable->push(LE);
338    }
339    return;
340  }
341
342  FI.OptLineTable = LineTable();
343  DWARFDebugLine::Row PrevRow;
344  for (uint32_t RowIndex : RowVector) {
345    // Take file number and line/column from the row.
346    const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
347    std::optional<uint32_t> OptFileIdx =
348        CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
349    if (!OptFileIdx) {
350      if (Log) {
351        *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
352             << "a line entry with invalid DWARF file index, this entry will "
353             << "be removed:\n";
354        Row.dumpTableHeader(*Log, /*Indent=*/0);
355        Row.dump(*Log);
356        *Log << "\n";
357      }
358      continue;
359    }
360    const uint32_t FileIdx = OptFileIdx.value();
361    uint64_t RowAddress = Row.Address.Address;
362    // Watch out for a RowAddress that is in the middle of a line table entry
363    // in the DWARF. If we pass an address in between two line table entries
364    // we will get a RowIndex for the previous valid line table row which won't
365    // be contained in our function. This is usually a bug in the DWARF due to
366    // linker problems or LTO or other DWARF re-linking so it is worth emitting
367    // an error, but not worth stopping the creation of the GSYM.
368    if (!FI.Range.contains(RowAddress)) {
369      if (RowAddress < FI.Range.start()) {
370        if (Log) {
371          *Log << "error: DIE has a start address whose LowPC is between the "
372                  "line table Row[" << RowIndex << "] with address "
373               << HEX64(RowAddress) << " and the next one.\n";
374          Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
375        }
376        RowAddress = FI.Range.start();
377      } else {
378        continue;
379      }
380    }
381
382    LineEntry LE(RowAddress, FileIdx, Row.Line);
383    if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) {
384      // We have seen full duplicate line tables for functions in some
385      // DWARF files. Watch for those here by checking the last
386      // row was the function's end address (HighPC) and that the
387      // current line table entry's address is the same as the first
388      // line entry we already have in our "function_info.Lines". If
389      // so break out after printing a warning.
390      auto FirstLE = FI.OptLineTable->first();
391      if (FirstLE && *FirstLE == LE) {
392        if (Log && !Gsym.isQuiet()) {
393          *Log << "warning: duplicate line table detected for DIE:\n";
394          Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
395        }
396      } else {
397        if (Log) {
398          *Log << "error: line table has addresses that do not "
399               << "monotonically increase:\n";
400          for (uint32_t RowIndex2 : RowVector)
401            CUI.LineTable->Rows[RowIndex2].dump(*Log);
402          Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE());
403        }
404      }
405      break;
406    }
407
408    // Skip multiple line entries for the same file and line.
409    auto LastLE = FI.OptLineTable->last();
410    if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line)
411        continue;
412    // Only push a row if it isn't an end sequence. End sequence markers are
413    // included for the last address in a function or the last contiguous
414    // address in a sequence.
415    if (Row.EndSequence) {
416      // End sequence means that the next line entry could have a lower address
417      // that the previous entries. So we clear the previous row so we don't
418      // trigger the line table error about address that do not monotonically
419      // increase.
420      PrevRow = DWARFDebugLine::Row();
421    } else {
422      FI.OptLineTable->push(LE);
423      PrevRow = Row;
424    }
425  }
426  // If not line table rows were added, clear the line table so we don't encode
427  // on in the GSYM file.
428  if (FI.OptLineTable->empty())
429    FI.OptLineTable = std::nullopt;
430}
431
432void DwarfTransformer::handleDie(raw_ostream *OS, CUInfo &CUI, DWARFDie Die) {
433  switch (Die.getTag()) {
434  case dwarf::DW_TAG_subprogram: {
435    Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges();
436    if (!RangesOrError) {
437      consumeError(RangesOrError.takeError());
438      break;
439    }
440    const DWARFAddressRangesVector &Ranges = RangesOrError.get();
441    if (Ranges.empty())
442      break;
443    auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym);
444    if (!NameIndex) {
445      if (OS) {
446        *OS << "error: function at " << HEX64(Die.getOffset())
447            << " has no name\n ";
448        Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
449      }
450      break;
451    }
452    // All ranges for the subprogram DIE in case it has multiple. We need to
453    // pass this down into parseInlineInfo so we don't warn about inline
454    // ranges that are not in the current subrange of a function when they
455    // actually are in another subgrange. We do this because when a function
456    // has discontiguos ranges, we create multiple function entries with only
457    // the info for that range contained inside of it.
458    AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges);
459
460    // Create a function_info for each range
461    for (const DWARFAddressRange &Range : Ranges) {
462      // The low PC must be less than the high PC. Many linkers don't remove
463      // DWARF for functions that don't get linked into the final executable.
464      // If both the high and low pc have relocations, linkers will often set
465      // the address values for both to the same value to indicate the function
466      // has been remove. Other linkers have been known to set the one or both
467      // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8
468      // byte addresses to indicate the function isn't valid. The check below
469      // tries to watch for these cases and abort if it runs into them.
470      if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC))
471        break;
472
473      // Many linkers can't remove DWARF and might set the LowPC to zero. Since
474      // high PC can be an offset from the low PC in more recent DWARF versions
475      // we need to watch for a zero'ed low pc which we do using
476      // ValidTextRanges below.
477      if (!Gsym.IsValidTextAddress(Range.LowPC)) {
478        // We expect zero and -1 to be invalid addresses in DWARF depending
479        // on the linker of the DWARF. This indicates a function was stripped
480        // and the debug info wasn't able to be stripped from the DWARF. If
481        // the LowPC isn't zero or -1, then we should emit an error.
482        if (Range.LowPC != 0) {
483          if (!Gsym.isQuiet()) {
484            // Unexpected invalid address, emit a warning
485            if (OS) {
486              *OS << "warning: DIE has an address range whose start address "
487                     "is not in any executable sections ("
488                  << *Gsym.GetValidTextRanges()
489                  << ") and will not be processed:\n";
490              Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
491            }
492          }
493        }
494        break;
495      }
496
497      FunctionInfo FI;
498      FI.Range = {Range.LowPC, Range.HighPC};
499      FI.Name = *NameIndex;
500      if (CUI.LineTable)
501        convertFunctionLineTable(OS, CUI, Die, Gsym, FI);
502
503      if (hasInlineInfo(Die, 0)) {
504        FI.Inline = InlineInfo();
505        FI.Inline->Name = *NameIndex;
506        FI.Inline->Ranges.insert(FI.Range);
507        bool WarnIfEmpty = true;
508        parseInlineInfo(Gsym, OS, CUI, Die, 0, FI, *FI.Inline,
509                        AllSubprogramRanges, WarnIfEmpty);
510        // Make sure we at least got some valid inline info other than just
511        // the top level function. If we didn't then remove the inline info
512        // from the function info. We have seen cases where LTO tries to modify
513        // the DWARF for functions and it messes up the address ranges for
514        // the inline functions so it is no longer valid.
515        //
516        // By checking if there are any valid children on the top level inline
517        // information object, we will know if we got anything valid from the
518        // debug info.
519        if (FI.Inline->Children.empty()) {
520          if (WarnIfEmpty && OS && !Gsym.isQuiet()) {
521            *OS << "warning: DIE contains inline function information that has "
522                  "no valid ranges, removing inline information:\n";
523            Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE());
524          }
525          FI.Inline = std::nullopt;
526        }
527      }
528      Gsym.addFunctionInfo(std::move(FI));
529    }
530  } break;
531  default:
532    break;
533  }
534  for (DWARFDie ChildDie : Die.children())
535    handleDie(OS, CUI, ChildDie);
536}
537
538Error DwarfTransformer::convert(uint32_t NumThreads, raw_ostream *OS) {
539  size_t NumBefore = Gsym.getNumFunctionInfos();
540  auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
541    DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false);
542    if (DwarfUnit.getDWOId()) {
543      DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit();
544      if (OS && !DWOCU->isDWOUnit()) {
545        std::string DWOName = dwarf::toString(
546            DwarfUnit.getUnitDIE().find(
547                {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
548            "");
549        *OS << "warning: Unable to retrieve DWO .debug_info section for "
550            << DWOName << "\n";
551      } else {
552        ReturnDie = DWOCU->getUnitDIE(false);
553      }
554    }
555    return ReturnDie;
556  };
557  if (NumThreads == 1) {
558    // Parse all DWARF data from this thread, use the same string/file table
559    // for everything
560    for (const auto &CU : DICtx.compile_units()) {
561      DWARFDie Die = getDie(*CU);
562      CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
563      handleDie(OS, CUI, Die);
564    }
565  } else {
566    // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
567    // front before we start accessing any DIEs since there might be
568    // cross compile unit references in the DWARF. If we don't do this we can
569    // end up crashing.
570
571    // We need to call getAbbreviations sequentially first so that getUnitDIE()
572    // only works with its local data.
573    for (const auto &CU : DICtx.compile_units())
574      CU->getAbbreviations();
575
576    // Now parse all DIEs in case we have cross compile unit references in a
577    // thread pool.
578    ThreadPool pool(hardware_concurrency(NumThreads));
579    for (const auto &CU : DICtx.compile_units())
580      pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
581    pool.wait();
582
583    // Now convert all DWARF to GSYM in a thread pool.
584    std::mutex LogMutex;
585    for (const auto &CU : DICtx.compile_units()) {
586      DWARFDie Die = getDie(*CU);
587      if (Die) {
588        CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
589        pool.async([this, CUI, &LogMutex, OS, Die]() mutable {
590          std::string ThreadLogStorage;
591          raw_string_ostream ThreadOS(ThreadLogStorage);
592          handleDie(OS ? &ThreadOS: nullptr, CUI, Die);
593          ThreadOS.flush();
594          if (OS && !ThreadLogStorage.empty()) {
595            // Print ThreadLogStorage lines into an actual stream under a lock
596            std::lock_guard<std::mutex> guard(LogMutex);
597            *OS << ThreadLogStorage;
598          }
599        });
600      }
601    }
602    pool.wait();
603  }
604  size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
605  if (OS)
606    *OS << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
607  return Error::success();
608}
609
610llvm::Error DwarfTransformer::verify(StringRef GsymPath, raw_ostream &Log) {
611  Log << "Verifying GSYM file \"" << GsymPath << "\":\n";
612
613  auto Gsym = GsymReader::openFile(GsymPath);
614  if (!Gsym)
615    return Gsym.takeError();
616
617  auto NumAddrs = Gsym->getNumAddresses();
618  DILineInfoSpecifier DLIS(
619      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
620      DILineInfoSpecifier::FunctionNameKind::LinkageName);
621  std::string gsymFilename;
622  for (uint32_t I = 0; I < NumAddrs; ++I) {
623    auto FuncAddr = Gsym->getAddress(I);
624    if (!FuncAddr)
625        return createStringError(std::errc::invalid_argument,
626                                  "failed to extract address[%i]", I);
627
628    auto FI = Gsym->getFunctionInfo(*FuncAddr);
629    if (!FI)
630      return createStringError(std::errc::invalid_argument,
631                            "failed to extract function info for address 0x%"
632                            PRIu64, *FuncAddr);
633
634    for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) {
635      const object::SectionedAddress SectAddr{
636          Addr, object::SectionedAddress::UndefSection};
637      auto LR = Gsym->lookup(Addr);
638      if (!LR)
639        return LR.takeError();
640
641      auto DwarfInlineInfos =
642          DICtx.getInliningInfoForAddress(SectAddr, DLIS);
643      uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames();
644      if (NumDwarfInlineInfos == 0) {
645        DwarfInlineInfos.addFrame(
646            DICtx.getLineInfoForAddress(SectAddr, DLIS));
647      }
648
649      // Check for 1 entry that has no file and line info
650      if (NumDwarfInlineInfos == 1 &&
651          DwarfInlineInfos.getFrame(0).FileName == "<invalid>") {
652        DwarfInlineInfos = DIInliningInfo();
653        NumDwarfInlineInfos = 0;
654      }
655      if (NumDwarfInlineInfos > 0 &&
656          NumDwarfInlineInfos != LR->Locations.size()) {
657        Log << "error: address " << HEX64(Addr) << " has "
658            << NumDwarfInlineInfos << " DWARF inline frames and GSYM has "
659            << LR->Locations.size() << "\n";
660        Log << "    " << NumDwarfInlineInfos << " DWARF frames:\n";
661        for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) {
662          const auto &dii = DwarfInlineInfos.getFrame(Idx);
663          Log << "    [" << Idx << "]: " << dii.FunctionName << " @ "
664              << dii.FileName << ':' << dii.Line << '\n';
665        }
666        Log << "    " << LR->Locations.size() << " GSYM frames:\n";
667        for (size_t Idx = 0, count = LR->Locations.size();
668              Idx < count; ++Idx) {
669          const auto &gii = LR->Locations[Idx];
670          Log << "    [" << Idx << "]: " << gii.Name << " @ " << gii.Dir
671              << '/' << gii.Base << ':' << gii.Line << '\n';
672        }
673        DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS);
674        Gsym->dump(Log, *FI);
675        continue;
676      }
677
678      for (size_t Idx = 0, count = LR->Locations.size(); Idx < count;
679            ++Idx) {
680        const auto &gii = LR->Locations[Idx];
681        if (Idx < NumDwarfInlineInfos) {
682          const auto &dii = DwarfInlineInfos.getFrame(Idx);
683          gsymFilename = LR->getSourceFile(Idx);
684          // Verify function name
685          if (dii.FunctionName.find(gii.Name.str()) != 0)
686            Log << "error: address " << HEX64(Addr) << " DWARF function \""
687                << dii.FunctionName.c_str()
688                << "\" doesn't match GSYM function \"" << gii.Name << "\"\n";
689          // Verify source file path
690          if (dii.FileName != gsymFilename)
691            Log << "error: address " << HEX64(Addr) << " DWARF path \""
692                << dii.FileName.c_str() << "\" doesn't match GSYM path \""
693                << gsymFilename.c_str() << "\"\n";
694          // Verify source file line
695          if (dii.Line != gii.Line)
696            Log << "error: address " << HEX64(Addr) << " DWARF line "
697                << dii.Line << " != GSYM line " << gii.Line << "\n";
698        }
699      }
700    }
701  }
702  return Error::success();
703}
704