1353940Sdim//===- GsymReader.h ---------------------------------------------*- C++ -*-===//
2353940Sdim//
3357095Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4357095Sdim// See https://llvm.org/LICENSE.txt for license information.
5357095Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6353940Sdim//
7353940Sdim//===----------------------------------------------------------------------===//
8353940Sdim
9353940Sdim#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
10353940Sdim#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
11353940Sdim
12353940Sdim
13353940Sdim#include "llvm/ADT/ArrayRef.h"
14353940Sdim#include "llvm/DebugInfo/GSYM/FileEntry.h"
15353940Sdim#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
16353940Sdim#include "llvm/DebugInfo/GSYM/Header.h"
17353940Sdim#include "llvm/DebugInfo/GSYM/LineEntry.h"
18353940Sdim#include "llvm/DebugInfo/GSYM/StringTable.h"
19353940Sdim#include "llvm/Support/DataExtractor.h"
20353940Sdim#include "llvm/Support/Endian.h"
21353940Sdim#include "llvm/Support/ErrorOr.h"
22353940Sdim
23353940Sdim#include <inttypes.h>
24353940Sdim#include <memory>
25353940Sdim#include <stdint.h>
26353940Sdim#include <string>
27353940Sdim#include <vector>
28353940Sdim
29353940Sdimnamespace llvm {
30353940Sdimclass MemoryBuffer;
31353940Sdimclass raw_ostream;
32353940Sdim
33353940Sdimnamespace gsym {
34353940Sdim
35353940Sdim/// GsymReader is used to read GSYM data from a file or buffer.
36353940Sdim///
37353940Sdim/// This class is optimized for very quick lookups when the endianness matches
38353940Sdim/// the host system. The Header, address table, address info offsets, and file
39353940Sdim/// table is designed to be mmap'ed as read only into memory and used without
40353940Sdim/// any parsing needed. If the endianness doesn't match, we swap these objects
41353940Sdim/// and tables into GsymReader::SwappedData and then point our header and
42353940Sdim/// ArrayRefs to this swapped internal data.
43353940Sdim///
44353940Sdim/// GsymReader objects must use one of the static functions to create an
45353940Sdim/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
46353940Sdim
47353940Sdimclass GsymReader {
48353940Sdim  GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
49353940Sdim  llvm::Error parse();
50353940Sdim
51353940Sdim  std::unique_ptr<MemoryBuffer> MemBuffer;
52353940Sdim  StringRef GsymBytes;
53353940Sdim  llvm::support::endianness Endian;
54353940Sdim  const Header *Hdr = nullptr;
55353940Sdim  ArrayRef<uint8_t> AddrOffsets;
56353940Sdim  ArrayRef<uint32_t> AddrInfoOffsets;
57353940Sdim  ArrayRef<FileEntry> Files;
58353940Sdim  StringTable StrTab;
59353940Sdim  /// When the GSYM file's endianness doesn't match the host system then
60353940Sdim  /// we must decode all data structures that need to be swapped into
61353940Sdim  /// local storage and set point the ArrayRef objects above to these swapped
62353940Sdim  /// copies.
63353940Sdim  struct SwappedData {
64353940Sdim    Header Hdr;
65353940Sdim    std::vector<uint8_t> AddrOffsets;
66353940Sdim    std::vector<uint32_t> AddrInfoOffsets;
67353940Sdim    std::vector<FileEntry> Files;
68353940Sdim  };
69353940Sdim  std::unique_ptr<SwappedData> Swap;
70353940Sdim
71353940Sdimpublic:
72353940Sdim  GsymReader(GsymReader &&RHS);
73353940Sdim  ~GsymReader();
74353940Sdim
75353940Sdim  /// Construct a GsymReader from a file on disk.
76353940Sdim  ///
77353940Sdim  /// \param Path The file path the GSYM file to read.
78353940Sdim  /// \returns An expected GsymReader that contains the object or an error
79353940Sdim  /// object that indicates reason for failing to read the GSYM.
80353940Sdim  static llvm::Expected<GsymReader> openFile(StringRef Path);
81353940Sdim
82353940Sdim  /// Construct a GsymReader from a buffer.
83353940Sdim  ///
84353940Sdim  /// \param Bytes A set of bytes that will be copied and owned by the
85353940Sdim  /// returned object on success.
86353940Sdim  /// \returns An expected GsymReader that contains the object or an error
87353940Sdim  /// object that indicates reason for failing to read the GSYM.
88353940Sdim  static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
89353940Sdim
90353940Sdim  /// Access the GSYM header.
91353940Sdim  /// \returns A native endian version of the GSYM header.
92353940Sdim  const Header &getHeader() const;
93353940Sdim
94353940Sdim  /// Get the full function info for an address.
95353940Sdim  ///
96357095Sdim  /// This should be called when a client will store a copy of the complete
97357095Sdim  /// FunctionInfo for a given address. For one off lookups, use the lookup()
98357095Sdim  /// function below.
99357095Sdim  ///
100357095Sdim  /// Symbolication server processes might want to parse the entire function
101357095Sdim  /// info for a given address and cache it if the process stays around to
102357095Sdim  /// service many symbolication addresses, like for parsing profiling
103357095Sdim  /// information.
104357095Sdim  ///
105353940Sdim  /// \param Addr A virtual address from the orignal object file to lookup.
106357095Sdim  ///
107353940Sdim  /// \returns An expected FunctionInfo that contains the function info object
108353940Sdim  /// or an error object that indicates reason for failing to lookup the
109357095Sdim  /// address.
110353940Sdim  llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
111353940Sdim
112357095Sdim  /// Lookup an address in the a GSYM.
113357095Sdim  ///
114357095Sdim  /// Lookup just the information needed for a specific address \a Addr. This
115357095Sdim  /// function is faster that calling getFunctionInfo() as it will only return
116357095Sdim  /// information that pertains to \a Addr and allows the parsing to skip any
117357095Sdim  /// extra information encoded for other addresses. For example the line table
118357095Sdim  /// parsing can stop when a matching LineEntry has been fouhnd, and the
119357095Sdim  /// InlineInfo can stop parsing early once a match has been found and also
120357095Sdim  /// skip information that doesn't match. This avoids memory allocations and
121357095Sdim  /// is much faster for lookups.
122357095Sdim  ///
123357095Sdim  /// \param Addr A virtual address from the orignal object file to lookup.
124357095Sdim  /// \returns An expected LookupResult that contains only the information
125357095Sdim  /// needed for the current address, or an error object that indicates reason
126357095Sdim  /// for failing to lookup the address.
127357095Sdim  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
128357095Sdim
129353940Sdim  /// Get a string from the string table.
130353940Sdim  ///
131353940Sdim  /// \param Offset The string table offset for the string to retrieve.
132353940Sdim  /// \returns The string from the strin table.
133353940Sdim  StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
134353940Sdim
135353940Sdim  /// Get the a file entry for the suppplied file index.
136353940Sdim  ///
137353940Sdim  /// Used to convert any file indexes in the FunctionInfo data back into
138353940Sdim  /// files. This function can be used for iteration, but is more commonly used
139353940Sdim  /// for random access when doing lookups.
140353940Sdim  ///
141353940Sdim  /// \param Index An index into the file table.
142353940Sdim  /// \returns An optional FileInfo that will be valid if the file index is
143353940Sdim  /// valid, or llvm::None if the file index is out of bounds,
144353940Sdim  Optional<FileEntry> getFile(uint32_t Index) const {
145353940Sdim    if (Index < Files.size())
146353940Sdim      return Files[Index];
147353940Sdim    return llvm::None;
148353940Sdim  }
149353940Sdim
150357095Sdimprotected:
151357095Sdim  /// Gets an address from the address table.
152357095Sdim  ///
153357095Sdim  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
154357095Sdim  ///
155357095Sdim  /// \param Index A index into the address table.
156357095Sdim  /// \returns A resolved virtual address for adddress in the address table
157357095Sdim  /// or llvm::None if Index is out of bounds.
158357095Sdim  Optional<uint64_t> getAddress(size_t Index) const;
159357095Sdim
160353940Sdim  /// Get an appropriate address info offsets array.
161353940Sdim  ///
162353940Sdim  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
163353940Sdim  /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
164353940Sdim  /// internally as a array of bytes that are in the correct endianness. When
165353940Sdim  /// we access this table we must get an array that matches those sizes. This
166353940Sdim  /// templatized helper function is used when accessing address offsets in the
167353940Sdim  /// AddrOffsets member variable.
168353940Sdim  ///
169353940Sdim  /// \returns An ArrayRef of an appropriate address offset size.
170353940Sdim  template <class T> ArrayRef<T>
171353940Sdim  getAddrOffsets() const {
172353940Sdim    return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
173353940Sdim                       AddrOffsets.size()/sizeof(T));
174353940Sdim  }
175353940Sdim
176353940Sdim  /// Get an appropriate address from the address table.
177353940Sdim  ///
178353940Sdim  /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
179353940Sdim  /// byte address offsets from the The gsym::Header::BaseAddress. The table is
180353940Sdim  /// stored internally as a array of bytes that are in the correct endianness.
181353940Sdim  /// In order to extract an address from the address table we must access the
182353940Sdim  /// address offset using the correct size and then add it to the BaseAddress
183353940Sdim  /// in the header.
184353940Sdim  ///
185353940Sdim  /// \param Index An index into the AddrOffsets array.
186353940Sdim  /// \returns An virtual address that matches the original object file for the
187353940Sdim  /// address as the specified index, or llvm::None if Index is out of bounds.
188353940Sdim  template <class T> Optional<uint64_t>
189353940Sdim  addressForIndex(size_t Index) const {
190353940Sdim    ArrayRef<T> AIO = getAddrOffsets<T>();
191353940Sdim    if (Index < AIO.size())
192353940Sdim      return AIO[Index] + Hdr->BaseAddress;
193353940Sdim    return llvm::None;
194353940Sdim  }
195353940Sdim  /// Lookup an address offset in the AddrOffsets table.
196353940Sdim  ///
197353940Sdim  /// Given an address offset, look it up using a binary search of the
198353940Sdim  /// AddrOffsets table.
199353940Sdim  ///
200353940Sdim  /// \param AddrOffset An address offset, that has already been computed by
201353940Sdim  /// subtracting the gsym::Header::BaseAddress.
202353940Sdim  /// \returns The matching address offset index. This index will be used to
203353940Sdim  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
204353940Sdim  template <class T>
205353940Sdim  uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const {
206353940Sdim    ArrayRef<T> AIO = getAddrOffsets<T>();
207353940Sdim    const auto Begin = AIO.begin();
208353940Sdim    const auto End = AIO.end();
209353940Sdim    auto Iter = std::lower_bound(Begin, End, AddrOffset);
210353940Sdim    if (Iter == End || AddrOffset < *Iter)
211353940Sdim      --Iter;
212353940Sdim    return std::distance(Begin, Iter);
213353940Sdim  }
214353940Sdim
215353940Sdim  /// Create a GSYM from a memory buffer.
216353940Sdim  ///
217353940Sdim  /// Called by both openFile() and copyBuffer(), this function does all of the
218353940Sdim  /// work of parsing the GSYM file and returning an error.
219353940Sdim  ///
220353940Sdim  /// \param MemBuffer A memory buffer that will transfer ownership into the
221353940Sdim  /// GsymReader.
222353940Sdim  /// \returns An expected GsymReader that contains the object or an error
223353940Sdim  /// object that indicates reason for failing to read the GSYM.
224353940Sdim  static llvm::Expected<llvm::gsym::GsymReader>
225353940Sdim  create(std::unique_ptr<MemoryBuffer> &MemBuffer);
226353940Sdim
227353940Sdim
228353940Sdim  /// Given an address, find the address index.
229353940Sdim  ///
230353940Sdim  /// Binary search the address table and find the matching address index.
231353940Sdim  ///
232353940Sdim  /// \param Addr A virtual address that matches the original object file
233353940Sdim  /// to lookup.
234353940Sdim  /// \returns An index into the address table. This index can be used to
235353940Sdim  /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
236353940Sdim  /// Returns an error if the address isn't in the GSYM with details of why.
237353940Sdim  Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
238353940Sdim
239353940Sdim  /// Given an address index, get the offset for the FunctionInfo.
240353940Sdim  ///
241353940Sdim  /// Looking up an address is done by finding the corresponding address
242353940Sdim  /// index for the address. This index is then used to get the offset of the
243353940Sdim  /// FunctionInfo data that we will decode using this function.
244353940Sdim  ///
245353940Sdim  /// \param Index An index into the address table.
246353940Sdim  /// \returns An optional GSYM data offset for the offset of the FunctionInfo
247353940Sdim  /// that needs to be decoded.
248353940Sdim  Optional<uint64_t> getAddressInfoOffset(size_t Index) const;
249353940Sdim};
250353940Sdim
251353940Sdim} // namespace gsym
252353940Sdim} // namespace llvm
253353940Sdim
254353940Sdim#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
255