1//===- GsymCreator.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//===----------------------------------------------------------------------===//
7
8#include "llvm/DebugInfo/GSYM/GsymCreator.h"
9#include "llvm/DebugInfo/GSYM/FileWriter.h"
10#include "llvm/DebugInfo/GSYM/Header.h"
11#include "llvm/DebugInfo/GSYM/LineTable.h"
12#include "llvm/MC/StringTableBuilder.h"
13#include "llvm/Support/raw_ostream.h"
14
15#include <algorithm>
16#include <cassert>
17#include <functional>
18#include <vector>
19
20using namespace llvm;
21using namespace gsym;
22
23GsymCreator::GsymCreator(bool Quiet)
24    : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
25  insertFile(StringRef());
26}
27
28uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
29  llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
30  llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
31  // We must insert the strings first, then call the FileEntry constructor.
32  // If we inline the insertString() function call into the constructor, the
33  // call order is undefined due to parameter lists not having any ordering
34  // requirements.
35  const uint32_t Dir = insertString(directory);
36  const uint32_t Base = insertString(filename);
37  FileEntry FE(Dir, Base);
38
39  std::lock_guard<std::mutex> Guard(Mutex);
40  const auto NextIndex = Files.size();
41  // Find FE in hash map and insert if not present.
42  auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
43  if (R.second)
44    Files.emplace_back(FE);
45  return R.first->second;
46}
47
48llvm::Error GsymCreator::save(StringRef Path,
49                              llvm::support::endianness ByteOrder) const {
50  std::error_code EC;
51  raw_fd_ostream OutStrm(Path, EC);
52  if (EC)
53    return llvm::errorCodeToError(EC);
54  FileWriter O(OutStrm, ByteOrder);
55  return encode(O);
56}
57
58llvm::Error GsymCreator::encode(FileWriter &O) const {
59  std::lock_guard<std::mutex> Guard(Mutex);
60  if (Funcs.empty())
61    return createStringError(std::errc::invalid_argument,
62                             "no functions to encode");
63  if (!Finalized)
64    return createStringError(std::errc::invalid_argument,
65                             "GsymCreator wasn't finalized prior to encoding");
66
67  if (Funcs.size() > UINT32_MAX)
68    return createStringError(std::errc::invalid_argument,
69                             "too many FunctionInfos");
70
71  const uint64_t MinAddr =
72      BaseAddress ? *BaseAddress : Funcs.front().startAddress();
73  const uint64_t MaxAddr = Funcs.back().startAddress();
74  const uint64_t AddrDelta = MaxAddr - MinAddr;
75  Header Hdr;
76  Hdr.Magic = GSYM_MAGIC;
77  Hdr.Version = GSYM_VERSION;
78  Hdr.AddrOffSize = 0;
79  Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
80  Hdr.BaseAddress = MinAddr;
81  Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
82  Hdr.StrtabOffset = 0; // We will fix this up later.
83  Hdr.StrtabSize = 0;   // We will fix this up later.
84  memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
85  if (UUID.size() > sizeof(Hdr.UUID))
86    return createStringError(std::errc::invalid_argument,
87                             "invalid UUID size %u", (uint32_t)UUID.size());
88  // Set the address offset size correctly in the GSYM header.
89  if (AddrDelta <= UINT8_MAX)
90    Hdr.AddrOffSize = 1;
91  else if (AddrDelta <= UINT16_MAX)
92    Hdr.AddrOffSize = 2;
93  else if (AddrDelta <= UINT32_MAX)
94    Hdr.AddrOffSize = 4;
95  else
96    Hdr.AddrOffSize = 8;
97  // Copy the UUID value if we have one.
98  if (UUID.size() > 0)
99    memcpy(Hdr.UUID, UUID.data(), UUID.size());
100  // Write out the header.
101  llvm::Error Err = Hdr.encode(O);
102  if (Err)
103    return Err;
104
105  // Write out the address offsets.
106  O.alignTo(Hdr.AddrOffSize);
107  for (const auto &FuncInfo : Funcs) {
108    uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
109    switch (Hdr.AddrOffSize) {
110    case 1:
111      O.writeU8(static_cast<uint8_t>(AddrOffset));
112      break;
113    case 2:
114      O.writeU16(static_cast<uint16_t>(AddrOffset));
115      break;
116    case 4:
117      O.writeU32(static_cast<uint32_t>(AddrOffset));
118      break;
119    case 8:
120      O.writeU64(AddrOffset);
121      break;
122    }
123  }
124
125  // Write out all zeros for the AddrInfoOffsets.
126  O.alignTo(4);
127  const off_t AddrInfoOffsetsOffset = O.tell();
128  for (size_t i = 0, n = Funcs.size(); i < n; ++i)
129    O.writeU32(0);
130
131  // Write out the file table
132  O.alignTo(4);
133  assert(!Files.empty());
134  assert(Files[0].Dir == 0);
135  assert(Files[0].Base == 0);
136  size_t NumFiles = Files.size();
137  if (NumFiles > UINT32_MAX)
138    return createStringError(std::errc::invalid_argument, "too many files");
139  O.writeU32(static_cast<uint32_t>(NumFiles));
140  for (auto File : Files) {
141    O.writeU32(File.Dir);
142    O.writeU32(File.Base);
143  }
144
145  // Write out the sting table.
146  const off_t StrtabOffset = O.tell();
147  StrTab.write(O.get_stream());
148  const off_t StrtabSize = O.tell() - StrtabOffset;
149  std::vector<uint32_t> AddrInfoOffsets;
150
151  // Write out the address infos for each function info.
152  for (const auto &FuncInfo : Funcs) {
153    if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
154      AddrInfoOffsets.push_back(OffsetOrErr.get());
155    else
156      return OffsetOrErr.takeError();
157  }
158  // Fixup the string table offset and size in the header
159  O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
160  O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
161
162  // Fixup all address info offsets
163  uint64_t Offset = 0;
164  for (auto AddrInfoOffset : AddrInfoOffsets) {
165    O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
166    Offset += 4;
167  }
168  return ErrorSuccess();
169}
170
171// Similar to std::remove_if, but the predicate is binary and it is passed both
172// the previous and the current element.
173template <class ForwardIt, class BinaryPredicate>
174static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt,
175                                BinaryPredicate Pred) {
176  if (FirstIt != LastIt) {
177    auto PrevIt = FirstIt++;
178    FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) {
179      return Pred(*PrevIt++, Curr);
180    });
181    if (FirstIt != LastIt)
182      for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;)
183        if (!Pred(*PrevIt, *CurrIt)) {
184          PrevIt = FirstIt;
185          *FirstIt++ = std::move(*CurrIt);
186        }
187  }
188  return FirstIt;
189}
190
191llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
192  std::lock_guard<std::mutex> Guard(Mutex);
193  if (Finalized)
194    return createStringError(std::errc::invalid_argument, "already finalized");
195  Finalized = true;
196
197  // Sort function infos so we can emit sorted functions.
198  llvm::sort(Funcs);
199
200  // Don't let the string table indexes change by finalizing in order.
201  StrTab.finalizeInOrder();
202
203  // Remove duplicates function infos that have both entries from debug info
204  // (DWARF or Breakpad) and entries from the SymbolTable.
205  //
206  // Also handle overlapping function. Usually there shouldn't be any, but they
207  // can and do happen in some rare cases.
208  //
209  // (a)          (b)         (c)
210  //     ^  ^       ^            ^
211  //     |X |Y      |X ^         |X
212  //     |  |       |  |Y        |  ^
213  //     |  |       |  v         v  |Y
214  //     v  v       v               v
215  //
216  // In (a) and (b), Y is ignored and X will be reported for the full range.
217  // In (c), both functions will be included in the result and lookups for an
218  // address in the intersection will return Y because of binary search.
219  //
220  // Note that in case of (b), we cannot include Y in the result because then
221  // we wouldn't find any function for range (end of Y, end of X)
222  // with binary search
223  auto NumBefore = Funcs.size();
224  Funcs.erase(
225      removeIfBinary(Funcs.begin(), Funcs.end(),
226                     [&](const auto &Prev, const auto &Curr) {
227                       // Empty ranges won't intersect, but we still need to
228                       // catch the case where we have multiple symbols at the
229                       // same address and coalesce them.
230                       const bool ranges_equal = Prev.Range == Curr.Range;
231                       if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
232                         // Overlapping ranges or empty identical ranges.
233                         if (ranges_equal) {
234                           // Same address range. Check if one is from debug
235                           // info and the other is from a symbol table. If
236                           // so, then keep the one with debug info. Our
237                           // sorting guarantees that entries with matching
238                           // address ranges that have debug info are last in
239                           // the sort.
240                           if (Prev == Curr) {
241                             // FunctionInfo entries match exactly (range,
242                             // lines, inlines)
243
244                             // We used to output a warning here, but this was
245                             // so frequent on some binaries, in particular
246                             // when those were built with GCC, that it slowed
247                             // down processing extremely.
248                             return true;
249                           } else {
250                             if (!Prev.hasRichInfo() && Curr.hasRichInfo()) {
251                               // Same address range, one with no debug info
252                               // (symbol) and the next with debug info. Keep
253                               // the latter.
254                               return true;
255                             } else {
256                               if (!Quiet) {
257                                 OS << "warning: same address range contains "
258                                       "different debug "
259                                    << "info. Removing:\n"
260                                    << Prev << "\nIn favor of this one:\n"
261                                    << Curr << "\n";
262                               }
263                               return true;
264                             }
265                           }
266                         } else {
267                           if (!Quiet) { // print warnings about overlaps
268                             OS << "warning: function ranges overlap:\n"
269                                << Prev << "\n"
270                                << Curr << "\n";
271                           }
272                         }
273                       } else if (Prev.Range.size() == 0 &&
274                                  Curr.Range.contains(Prev.Range.start())) {
275                         if (!Quiet) {
276                           OS << "warning: removing symbol:\n"
277                              << Prev << "\nKeeping:\n"
278                              << Curr << "\n";
279                         }
280                         return true;
281                       }
282
283                       return false;
284                     }),
285      Funcs.end());
286
287  // If our last function info entry doesn't have a size and if we have valid
288  // text ranges, we should set the size of the last entry since any search for
289  // a high address might match our last entry. By fixing up this size, we can
290  // help ensure we don't cause lookups to always return the last symbol that
291  // has no size when doing lookups.
292  if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
293    if (auto Range =
294            ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
295      Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
296    }
297  }
298  OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
299     << Funcs.size() << " total\n";
300  return Error::success();
301}
302
303uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
304  if (S.empty())
305    return 0;
306
307  // The hash can be calculated outside the lock.
308  CachedHashStringRef CHStr(S);
309  std::lock_guard<std::mutex> Guard(Mutex);
310  if (Copy) {
311    // We need to provide backing storage for the string if requested
312    // since StringTableBuilder stores references to strings. Any string
313    // that comes from a section in an object file doesn't need to be
314    // copied, but any string created by code will need to be copied.
315    // This allows GsymCreator to be really fast when parsing DWARF and
316    // other object files as most strings don't need to be copied.
317    if (!StrTab.contains(CHStr))
318      CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
319                                  CHStr.hash()};
320  }
321  return StrTab.add(CHStr);
322}
323
324void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
325  std::lock_guard<std::mutex> Guard(Mutex);
326  Ranges.insert(FI.Range);
327  Funcs.emplace_back(std::move(FI));
328}
329
330void GsymCreator::forEachFunctionInfo(
331    std::function<bool(FunctionInfo &)> const &Callback) {
332  std::lock_guard<std::mutex> Guard(Mutex);
333  for (auto &FI : Funcs) {
334    if (!Callback(FI))
335      break;
336  }
337}
338
339void GsymCreator::forEachFunctionInfo(
340    std::function<bool(const FunctionInfo &)> const &Callback) const {
341  std::lock_guard<std::mutex> Guard(Mutex);
342  for (const auto &FI : Funcs) {
343    if (!Callback(FI))
344      break;
345  }
346}
347
348size_t GsymCreator::getNumFunctionInfos() const {
349  std::lock_guard<std::mutex> Guard(Mutex);
350  return Funcs.size();
351}
352
353bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
354  if (ValidTextRanges)
355    return ValidTextRanges->contains(Addr);
356  return true; // No valid text ranges has been set, so accept all ranges.
357}
358
359bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
360  std::lock_guard<std::mutex> Guard(Mutex);
361  return Ranges.contains(Addr);
362}
363