1321369Sdim//===- SymbolizableObjectFile.cpp -----------------------------------------===//
2292915Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6292915Sdim//
7292915Sdim//===----------------------------------------------------------------------===//
8292915Sdim//
9292915Sdim// Implementation of SymbolizableObjectFile class.
10292915Sdim//
11292915Sdim//===----------------------------------------------------------------------===//
12292915Sdim
13292915Sdim#include "SymbolizableObjectFile.h"
14321369Sdim#include "llvm/ADT/STLExtras.h"
15321369Sdim#include "llvm/ADT/StringRef.h"
16321369Sdim#include "llvm/ADT/Triple.h"
17321369Sdim#include "llvm/BinaryFormat/COFF.h"
18321369Sdim#include "llvm/DebugInfo/DWARF/DWARFContext.h"
19321369Sdim#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20309124Sdim#include "llvm/Object/COFF.h"
21321369Sdim#include "llvm/Object/ObjectFile.h"
22292915Sdim#include "llvm/Object/SymbolSize.h"
23321369Sdim#include "llvm/Support/Casting.h"
24292915Sdim#include "llvm/Support/DataExtractor.h"
25321369Sdim#include "llvm/Support/Error.h"
26321369Sdim#include <algorithm>
27321369Sdim#include <cstdint>
28321369Sdim#include <memory>
29321369Sdim#include <string>
30321369Sdim#include <system_error>
31321369Sdim#include <utility>
32321369Sdim#include <vector>
33292915Sdim
34321369Sdimusing namespace llvm;
35292915Sdimusing namespace object;
36321369Sdimusing namespace symbolize;
37292915Sdim
38292915Sdimstatic DILineInfoSpecifier
39292915SdimgetDILineInfoSpecifier(FunctionNameKind FNKind) {
40292915Sdim  return DILineInfoSpecifier(
41292915Sdim      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
42292915Sdim}
43292915Sdim
44292915SdimErrorOr<std::unique_ptr<SymbolizableObjectFile>>
45353358SdimSymbolizableObjectFile::create(const object::ObjectFile *Obj,
46360784Sdim                               std::unique_ptr<DIContext> DICtx,
47360784Sdim                               bool UntagAddresses) {
48353358Sdim  assert(DICtx);
49292915Sdim  std::unique_ptr<SymbolizableObjectFile> res(
50360784Sdim      new SymbolizableObjectFile(Obj, std::move(DICtx), UntagAddresses));
51292915Sdim  std::unique_ptr<DataExtractor> OpdExtractor;
52292915Sdim  uint64_t OpdAddress = 0;
53292915Sdim  // Find the .opd (function descriptor) section if any, for big-endian
54292915Sdim  // PowerPC64 ELF.
55292915Sdim  if (Obj->getArch() == Triple::ppc64) {
56292915Sdim    for (section_iterator Section : Obj->sections()) {
57360784Sdim      Expected<StringRef> NameOrErr = Section->getName();
58360784Sdim      if (!NameOrErr)
59360784Sdim        return errorToErrorCode(NameOrErr.takeError());
60360784Sdim
61360784Sdim      if (*NameOrErr == ".opd") {
62353358Sdim        Expected<StringRef> E = Section->getContents();
63353358Sdim        if (!E)
64353358Sdim          return errorToErrorCode(E.takeError());
65353358Sdim        OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
66292915Sdim                                             Obj->getBytesInAddress()));
67292915Sdim        OpdAddress = Section->getAddress();
68292915Sdim        break;
69292915Sdim      }
70292915Sdim    }
71292915Sdim  }
72292915Sdim  std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
73292915Sdim      computeSymbolSizes(*Obj);
74292915Sdim  for (auto &P : Symbols)
75292915Sdim    res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
76292915Sdim
77292915Sdim  // If this is a COFF object and we didn't find any symbols, try the export
78292915Sdim  // table.
79292915Sdim  if (Symbols.empty()) {
80292915Sdim    if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
81292915Sdim      if (auto EC = res->addCoffExportSymbols(CoffObj))
82292915Sdim        return EC;
83292915Sdim  }
84353358Sdim
85353358Sdim  std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
86353358Sdim                                                &Os = res->Objects;
87353358Sdim  auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
88353358Sdim    // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
89353358Sdim    // pick the one with the largest Size. This helps us avoid symbols with no
90353358Sdim    // size information (Size=0).
91353358Sdim    llvm::sort(S);
92353358Sdim    auto I = S.begin(), E = S.end(), J = S.begin();
93353358Sdim    while (I != E) {
94353358Sdim      auto OI = I;
95353358Sdim      while (++I != E && OI->first.Addr == I->first.Addr) {
96353358Sdim      }
97353358Sdim      *J++ = I[-1];
98353358Sdim    }
99353358Sdim    S.erase(J, S.end());
100353358Sdim  };
101353358Sdim  Uniquify(Fs);
102353358Sdim  Uniquify(Os);
103353358Sdim
104292915Sdim  return std::move(res);
105292915Sdim}
106292915Sdim
107353358SdimSymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile *Obj,
108360784Sdim                                               std::unique_ptr<DIContext> DICtx,
109360784Sdim                                               bool UntagAddresses)
110360784Sdim    : Module(Obj), DebugInfoContext(std::move(DICtx)),
111360784Sdim      UntagAddresses(UntagAddresses) {}
112292915Sdim
113292915Sdimnamespace {
114321369Sdim
115292915Sdimstruct OffsetNamePair {
116292915Sdim  uint32_t Offset;
117292915Sdim  StringRef Name;
118321369Sdim
119292915Sdim  bool operator<(const OffsetNamePair &R) const {
120292915Sdim    return Offset < R.Offset;
121292915Sdim  }
122292915Sdim};
123292915Sdim
124321369Sdim} // end anonymous namespace
125321369Sdim
126292915Sdimstd::error_code SymbolizableObjectFile::addCoffExportSymbols(
127292915Sdim    const COFFObjectFile *CoffObj) {
128292915Sdim  // Get all export names and offsets.
129292915Sdim  std::vector<OffsetNamePair> ExportSyms;
130292915Sdim  for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
131292915Sdim    StringRef Name;
132292915Sdim    uint32_t Offset;
133292915Sdim    if (auto EC = Ref.getSymbolName(Name))
134292915Sdim      return EC;
135292915Sdim    if (auto EC = Ref.getExportRVA(Offset))
136292915Sdim      return EC;
137292915Sdim    ExportSyms.push_back(OffsetNamePair{Offset, Name});
138292915Sdim  }
139292915Sdim  if (ExportSyms.empty())
140292915Sdim    return std::error_code();
141292915Sdim
142292915Sdim  // Sort by ascending offset.
143292915Sdim  array_pod_sort(ExportSyms.begin(), ExportSyms.end());
144292915Sdim
145292915Sdim  // Approximate the symbol sizes by assuming they run to the next symbol.
146292915Sdim  // FIXME: This assumes all exports are functions.
147292915Sdim  uint64_t ImageBase = CoffObj->getImageBase();
148292915Sdim  for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
149292915Sdim    OffsetNamePair &Export = *I;
150292915Sdim    // FIXME: The last export has a one byte size now.
151292915Sdim    uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
152292915Sdim    uint64_t SymbolStart = ImageBase + Export.Offset;
153292915Sdim    uint64_t SymbolSize = NextOffset - Export.Offset;
154292915Sdim    SymbolDesc SD = {SymbolStart, SymbolSize};
155353358Sdim    Functions.emplace_back(SD, Export.Name);
156292915Sdim  }
157292915Sdim  return std::error_code();
158292915Sdim}
159292915Sdim
160292915Sdimstd::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
161292915Sdim                                                  uint64_t SymbolSize,
162292915Sdim                                                  DataExtractor *OpdExtractor,
163292915Sdim                                                  uint64_t OpdAddress) {
164353358Sdim  // Avoid adding symbols from an unknown/undefined section.
165353358Sdim  const ObjectFile *Obj = Symbol.getObject();
166353358Sdim  Expected<section_iterator> Sec = Symbol.getSection();
167353358Sdim  if (!Sec || (Obj && Obj->section_end() == *Sec))
168353358Sdim    return std::error_code();
169309124Sdim  Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
170309124Sdim  if (!SymbolTypeOrErr)
171309124Sdim    return errorToErrorCode(SymbolTypeOrErr.takeError());
172309124Sdim  SymbolRef::Type SymbolType = *SymbolTypeOrErr;
173292915Sdim  if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
174292915Sdim    return std::error_code();
175309124Sdim  Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
176309124Sdim  if (!SymbolAddressOrErr)
177309124Sdim    return errorToErrorCode(SymbolAddressOrErr.takeError());
178292915Sdim  uint64_t SymbolAddress = *SymbolAddressOrErr;
179360784Sdim  if (UntagAddresses) {
180360784Sdim    // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
181360784Sdim    // into bits 56-63 instead of masking them out.
182360784Sdim    SymbolAddress &= (1ull << 56) - 1;
183360784Sdim    SymbolAddress = (int64_t(SymbolAddress) << 8) >> 8;
184360784Sdim  }
185292915Sdim  if (OpdExtractor) {
186292915Sdim    // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
187292915Sdim    // function descriptors. The first word of the descriptor is a pointer to
188292915Sdim    // the function's code.
189292915Sdim    // For the purposes of symbolization, pretend the symbol's address is that
190292915Sdim    // of the function's code, not the descriptor.
191292915Sdim    uint64_t OpdOffset = SymbolAddress - OpdAddress;
192360784Sdim    if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
193360784Sdim      SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
194292915Sdim  }
195309124Sdim  Expected<StringRef> SymbolNameOrErr = Symbol.getName();
196309124Sdim  if (!SymbolNameOrErr)
197309124Sdim    return errorToErrorCode(SymbolNameOrErr.takeError());
198292915Sdim  StringRef SymbolName = *SymbolNameOrErr;
199292915Sdim  // Mach-O symbol table names have leading underscore, skip it.
200321369Sdim  if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
201292915Sdim    SymbolName = SymbolName.drop_front();
202292915Sdim  // FIXME: If a function has alias, there are two entries in symbol table
203292915Sdim  // with same address size. Make sure we choose the correct one.
204292915Sdim  auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
205292915Sdim  SymbolDesc SD = { SymbolAddress, SymbolSize };
206353358Sdim  M.emplace_back(SD, SymbolName);
207292915Sdim  return std::error_code();
208292915Sdim}
209292915Sdim
210292915Sdim// Return true if this is a 32-bit x86 PE COFF module.
211292915Sdimbool SymbolizableObjectFile::isWin32Module() const {
212292915Sdim  auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
213292915Sdim  return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
214292915Sdim}
215292915Sdim
216292915Sdimuint64_t SymbolizableObjectFile::getModulePreferredBase() const {
217292915Sdim  if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
218292915Sdim    return CoffObject->getImageBase();
219292915Sdim  return 0;
220292915Sdim}
221292915Sdim
222292915Sdimbool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
223292915Sdim                                                    uint64_t Address,
224292915Sdim                                                    std::string &Name,
225292915Sdim                                                    uint64_t &Addr,
226292915Sdim                                                    uint64_t &Size) const {
227353358Sdim  const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
228353358Sdim  std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
229353358Sdim  auto SymbolIterator = llvm::upper_bound(Symbols, SD);
230353358Sdim  if (SymbolIterator == Symbols.begin())
231292915Sdim    return false;
232292915Sdim  --SymbolIterator;
233292915Sdim  if (SymbolIterator->first.Size != 0 &&
234292915Sdim      SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
235292915Sdim    return false;
236292915Sdim  Name = SymbolIterator->second.str();
237292915Sdim  Addr = SymbolIterator->first.Addr;
238292915Sdim  Size = SymbolIterator->first.Size;
239292915Sdim  return true;
240292915Sdim}
241292915Sdim
242292915Sdimbool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
243292915Sdim    FunctionNameKind FNKind, bool UseSymbolTable) const {
244292915Sdim  // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
245292915Sdim  // better answers for linkage names than the DIContext. Otherwise, we are
246292915Sdim  // probably using PEs and PDBs, and we shouldn't do the override. PE files
247292915Sdim  // generally only contain the names of exported symbols.
248292915Sdim  return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
249292915Sdim         isa<DWARFContext>(DebugInfoContext.get());
250292915Sdim}
251292915Sdim
252353358SdimDILineInfo
253353358SdimSymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
254353358Sdim                                      FunctionNameKind FNKind,
255353358Sdim                                      bool UseSymbolTable) const {
256353358Sdim  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
257353358Sdim    ModuleOffset.SectionIndex =
258353358Sdim        getModuleSectionIndexForAddress(ModuleOffset.Address);
259353358Sdim  DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress(
260353358Sdim      ModuleOffset, getDILineInfoSpecifier(FNKind));
261353358Sdim
262292915Sdim  // Override function name from symbol table if necessary.
263292915Sdim  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
264292915Sdim    std::string FunctionName;
265292915Sdim    uint64_t Start, Size;
266353358Sdim    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
267292915Sdim                               FunctionName, Start, Size)) {
268292915Sdim      LineInfo.FunctionName = FunctionName;
269292915Sdim    }
270292915Sdim  }
271292915Sdim  return LineInfo;
272292915Sdim}
273292915Sdim
274292915SdimDIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
275353358Sdim    object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
276353358Sdim    bool UseSymbolTable) const {
277353358Sdim  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
278353358Sdim    ModuleOffset.SectionIndex =
279353358Sdim        getModuleSectionIndexForAddress(ModuleOffset.Address);
280353358Sdim  DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress(
281353358Sdim      ModuleOffset, getDILineInfoSpecifier(FNKind));
282292915Sdim
283292915Sdim  // Make sure there is at least one frame in context.
284292915Sdim  if (InlinedContext.getNumberOfFrames() == 0)
285292915Sdim    InlinedContext.addFrame(DILineInfo());
286292915Sdim
287292915Sdim  // Override the function name in lower frame with name from symbol table.
288292915Sdim  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
289292915Sdim    std::string FunctionName;
290292915Sdim    uint64_t Start, Size;
291353358Sdim    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
292292915Sdim                               FunctionName, Start, Size)) {
293292915Sdim      InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
294292915Sdim          ->FunctionName = FunctionName;
295292915Sdim    }
296292915Sdim  }
297292915Sdim
298292915Sdim  return InlinedContext;
299292915Sdim}
300292915Sdim
301353358SdimDIGlobal SymbolizableObjectFile::symbolizeData(
302353358Sdim    object::SectionedAddress ModuleOffset) const {
303292915Sdim  DIGlobal Res;
304353358Sdim  getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
305353358Sdim                         Res.Start, Res.Size);
306292915Sdim  return Res;
307292915Sdim}
308353358Sdim
309353358Sdimstd::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
310353358Sdim    object::SectionedAddress ModuleOffset) const {
311353358Sdim  if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
312353358Sdim    ModuleOffset.SectionIndex =
313353358Sdim        getModuleSectionIndexForAddress(ModuleOffset.Address);
314353358Sdim  return DebugInfoContext->getLocalsForAddress(ModuleOffset);
315353358Sdim}
316353358Sdim
317353358Sdim/// Search for the first occurence of specified Address in ObjectFile.
318353358Sdimuint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
319353358Sdim    uint64_t Address) const {
320353358Sdim
321353358Sdim  for (SectionRef Sec : Module->sections()) {
322353358Sdim    if (!Sec.isText() || Sec.isVirtual())
323353358Sdim      continue;
324353358Sdim
325353358Sdim    if (Address >= Sec.getAddress() &&
326353358Sdim        Address < Sec.getAddress() + Sec.getSize())
327353358Sdim      return Sec.getIndex();
328353358Sdim  }
329353358Sdim
330353358Sdim  return object::SectionedAddress::UndefSection;
331353358Sdim}
332