1353940Sdim//===- GsymReader.h ---------------------------------------------*- C++ -*-===// 2353940Sdim// 3357095Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4357095Sdim// See https://llvm.org/LICENSE.txt for license information. 5357095Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6353940Sdim// 7353940Sdim//===----------------------------------------------------------------------===// 8353940Sdim 9353940Sdim#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 10353940Sdim#define LLVM_DEBUGINFO_GSYM_GSYMREADER_H 11353940Sdim 12353940Sdim 13353940Sdim#include "llvm/ADT/ArrayRef.h" 14353940Sdim#include "llvm/DebugInfo/GSYM/FileEntry.h" 15353940Sdim#include "llvm/DebugInfo/GSYM/FunctionInfo.h" 16353940Sdim#include "llvm/DebugInfo/GSYM/Header.h" 17353940Sdim#include "llvm/DebugInfo/GSYM/LineEntry.h" 18353940Sdim#include "llvm/DebugInfo/GSYM/StringTable.h" 19353940Sdim#include "llvm/Support/DataExtractor.h" 20353940Sdim#include "llvm/Support/Endian.h" 21353940Sdim#include "llvm/Support/ErrorOr.h" 22353940Sdim 23353940Sdim#include <inttypes.h> 24353940Sdim#include <memory> 25353940Sdim#include <stdint.h> 26353940Sdim#include <string> 27353940Sdim#include <vector> 28353940Sdim 29353940Sdimnamespace llvm { 30353940Sdimclass MemoryBuffer; 31353940Sdimclass raw_ostream; 32353940Sdim 33353940Sdimnamespace gsym { 34353940Sdim 35353940Sdim/// GsymReader is used to read GSYM data from a file or buffer. 36353940Sdim/// 37353940Sdim/// This class is optimized for very quick lookups when the endianness matches 38353940Sdim/// the host system. The Header, address table, address info offsets, and file 39353940Sdim/// table is designed to be mmap'ed as read only into memory and used without 40353940Sdim/// any parsing needed. If the endianness doesn't match, we swap these objects 41353940Sdim/// and tables into GsymReader::SwappedData and then point our header and 42353940Sdim/// ArrayRefs to this swapped internal data. 43353940Sdim/// 44353940Sdim/// GsymReader objects must use one of the static functions to create an 45353940Sdim/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...). 46353940Sdim 47353940Sdimclass GsymReader { 48353940Sdim GsymReader(std::unique_ptr<MemoryBuffer> Buffer); 49353940Sdim llvm::Error parse(); 50353940Sdim 51353940Sdim std::unique_ptr<MemoryBuffer> MemBuffer; 52353940Sdim StringRef GsymBytes; 53353940Sdim llvm::support::endianness Endian; 54353940Sdim const Header *Hdr = nullptr; 55353940Sdim ArrayRef<uint8_t> AddrOffsets; 56353940Sdim ArrayRef<uint32_t> AddrInfoOffsets; 57353940Sdim ArrayRef<FileEntry> Files; 58353940Sdim StringTable StrTab; 59353940Sdim /// When the GSYM file's endianness doesn't match the host system then 60353940Sdim /// we must decode all data structures that need to be swapped into 61353940Sdim /// local storage and set point the ArrayRef objects above to these swapped 62353940Sdim /// copies. 63353940Sdim struct SwappedData { 64353940Sdim Header Hdr; 65353940Sdim std::vector<uint8_t> AddrOffsets; 66353940Sdim std::vector<uint32_t> AddrInfoOffsets; 67353940Sdim std::vector<FileEntry> Files; 68353940Sdim }; 69353940Sdim std::unique_ptr<SwappedData> Swap; 70353940Sdim 71353940Sdimpublic: 72353940Sdim GsymReader(GsymReader &&RHS); 73353940Sdim ~GsymReader(); 74353940Sdim 75353940Sdim /// Construct a GsymReader from a file on disk. 76353940Sdim /// 77353940Sdim /// \param Path The file path the GSYM file to read. 78353940Sdim /// \returns An expected GsymReader that contains the object or an error 79353940Sdim /// object that indicates reason for failing to read the GSYM. 80353940Sdim static llvm::Expected<GsymReader> openFile(StringRef Path); 81353940Sdim 82353940Sdim /// Construct a GsymReader from a buffer. 83353940Sdim /// 84353940Sdim /// \param Bytes A set of bytes that will be copied and owned by the 85353940Sdim /// returned object on success. 86353940Sdim /// \returns An expected GsymReader that contains the object or an error 87353940Sdim /// object that indicates reason for failing to read the GSYM. 88353940Sdim static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes); 89353940Sdim 90353940Sdim /// Access the GSYM header. 91353940Sdim /// \returns A native endian version of the GSYM header. 92353940Sdim const Header &getHeader() const; 93353940Sdim 94353940Sdim /// Get the full function info for an address. 95353940Sdim /// 96357095Sdim /// This should be called when a client will store a copy of the complete 97357095Sdim /// FunctionInfo for a given address. For one off lookups, use the lookup() 98357095Sdim /// function below. 99357095Sdim /// 100357095Sdim /// Symbolication server processes might want to parse the entire function 101357095Sdim /// info for a given address and cache it if the process stays around to 102357095Sdim /// service many symbolication addresses, like for parsing profiling 103357095Sdim /// information. 104357095Sdim /// 105353940Sdim /// \param Addr A virtual address from the orignal object file to lookup. 106357095Sdim /// 107353940Sdim /// \returns An expected FunctionInfo that contains the function info object 108353940Sdim /// or an error object that indicates reason for failing to lookup the 109357095Sdim /// address. 110353940Sdim llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const; 111353940Sdim 112357095Sdim /// Lookup an address in the a GSYM. 113357095Sdim /// 114357095Sdim /// Lookup just the information needed for a specific address \a Addr. This 115357095Sdim /// function is faster that calling getFunctionInfo() as it will only return 116357095Sdim /// information that pertains to \a Addr and allows the parsing to skip any 117357095Sdim /// extra information encoded for other addresses. For example the line table 118357095Sdim /// parsing can stop when a matching LineEntry has been fouhnd, and the 119357095Sdim /// InlineInfo can stop parsing early once a match has been found and also 120357095Sdim /// skip information that doesn't match. This avoids memory allocations and 121357095Sdim /// is much faster for lookups. 122357095Sdim /// 123357095Sdim /// \param Addr A virtual address from the orignal object file to lookup. 124357095Sdim /// \returns An expected LookupResult that contains only the information 125357095Sdim /// needed for the current address, or an error object that indicates reason 126357095Sdim /// for failing to lookup the address. 127357095Sdim llvm::Expected<LookupResult> lookup(uint64_t Addr) const; 128357095Sdim 129353940Sdim /// Get a string from the string table. 130353940Sdim /// 131353940Sdim /// \param Offset The string table offset for the string to retrieve. 132353940Sdim /// \returns The string from the strin table. 133353940Sdim StringRef getString(uint32_t Offset) const { return StrTab[Offset]; } 134353940Sdim 135353940Sdim /// Get the a file entry for the suppplied file index. 136353940Sdim /// 137353940Sdim /// Used to convert any file indexes in the FunctionInfo data back into 138353940Sdim /// files. This function can be used for iteration, but is more commonly used 139353940Sdim /// for random access when doing lookups. 140353940Sdim /// 141353940Sdim /// \param Index An index into the file table. 142353940Sdim /// \returns An optional FileInfo that will be valid if the file index is 143353940Sdim /// valid, or llvm::None if the file index is out of bounds, 144353940Sdim Optional<FileEntry> getFile(uint32_t Index) const { 145353940Sdim if (Index < Files.size()) 146353940Sdim return Files[Index]; 147353940Sdim return llvm::None; 148353940Sdim } 149353940Sdim 150357095Sdimprotected: 151357095Sdim /// Gets an address from the address table. 152357095Sdim /// 153357095Sdim /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress. 154357095Sdim /// 155357095Sdim /// \param Index A index into the address table. 156357095Sdim /// \returns A resolved virtual address for adddress in the address table 157357095Sdim /// or llvm::None if Index is out of bounds. 158357095Sdim Optional<uint64_t> getAddress(size_t Index) const; 159357095Sdim 160353940Sdim /// Get an appropriate address info offsets array. 161353940Sdim /// 162353940Sdim /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 163353940Sdim /// byte offsets from the The gsym::Header::BaseAddress. The table is stored 164353940Sdim /// internally as a array of bytes that are in the correct endianness. When 165353940Sdim /// we access this table we must get an array that matches those sizes. This 166353940Sdim /// templatized helper function is used when accessing address offsets in the 167353940Sdim /// AddrOffsets member variable. 168353940Sdim /// 169353940Sdim /// \returns An ArrayRef of an appropriate address offset size. 170353940Sdim template <class T> ArrayRef<T> 171353940Sdim getAddrOffsets() const { 172353940Sdim return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()), 173353940Sdim AddrOffsets.size()/sizeof(T)); 174353940Sdim } 175353940Sdim 176353940Sdim /// Get an appropriate address from the address table. 177353940Sdim /// 178353940Sdim /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8 179353940Sdim /// byte address offsets from the The gsym::Header::BaseAddress. The table is 180353940Sdim /// stored internally as a array of bytes that are in the correct endianness. 181353940Sdim /// In order to extract an address from the address table we must access the 182353940Sdim /// address offset using the correct size and then add it to the BaseAddress 183353940Sdim /// in the header. 184353940Sdim /// 185353940Sdim /// \param Index An index into the AddrOffsets array. 186353940Sdim /// \returns An virtual address that matches the original object file for the 187353940Sdim /// address as the specified index, or llvm::None if Index is out of bounds. 188353940Sdim template <class T> Optional<uint64_t> 189353940Sdim addressForIndex(size_t Index) const { 190353940Sdim ArrayRef<T> AIO = getAddrOffsets<T>(); 191353940Sdim if (Index < AIO.size()) 192353940Sdim return AIO[Index] + Hdr->BaseAddress; 193353940Sdim return llvm::None; 194353940Sdim } 195353940Sdim /// Lookup an address offset in the AddrOffsets table. 196353940Sdim /// 197353940Sdim /// Given an address offset, look it up using a binary search of the 198353940Sdim /// AddrOffsets table. 199353940Sdim /// 200353940Sdim /// \param AddrOffset An address offset, that has already been computed by 201353940Sdim /// subtracting the gsym::Header::BaseAddress. 202353940Sdim /// \returns The matching address offset index. This index will be used to 203353940Sdim /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 204353940Sdim template <class T> 205353940Sdim uint64_t getAddressOffsetIndex(const uint64_t AddrOffset) const { 206353940Sdim ArrayRef<T> AIO = getAddrOffsets<T>(); 207353940Sdim const auto Begin = AIO.begin(); 208353940Sdim const auto End = AIO.end(); 209353940Sdim auto Iter = std::lower_bound(Begin, End, AddrOffset); 210353940Sdim if (Iter == End || AddrOffset < *Iter) 211353940Sdim --Iter; 212353940Sdim return std::distance(Begin, Iter); 213353940Sdim } 214353940Sdim 215353940Sdim /// Create a GSYM from a memory buffer. 216353940Sdim /// 217353940Sdim /// Called by both openFile() and copyBuffer(), this function does all of the 218353940Sdim /// work of parsing the GSYM file and returning an error. 219353940Sdim /// 220353940Sdim /// \param MemBuffer A memory buffer that will transfer ownership into the 221353940Sdim /// GsymReader. 222353940Sdim /// \returns An expected GsymReader that contains the object or an error 223353940Sdim /// object that indicates reason for failing to read the GSYM. 224353940Sdim static llvm::Expected<llvm::gsym::GsymReader> 225353940Sdim create(std::unique_ptr<MemoryBuffer> &MemBuffer); 226353940Sdim 227353940Sdim 228353940Sdim /// Given an address, find the address index. 229353940Sdim /// 230353940Sdim /// Binary search the address table and find the matching address index. 231353940Sdim /// 232353940Sdim /// \param Addr A virtual address that matches the original object file 233353940Sdim /// to lookup. 234353940Sdim /// \returns An index into the address table. This index can be used to 235353940Sdim /// extract the FunctionInfo data's offset from the AddrInfoOffsets array. 236353940Sdim /// Returns an error if the address isn't in the GSYM with details of why. 237353940Sdim Expected<uint64_t> getAddressIndex(const uint64_t Addr) const; 238353940Sdim 239353940Sdim /// Given an address index, get the offset for the FunctionInfo. 240353940Sdim /// 241353940Sdim /// Looking up an address is done by finding the corresponding address 242353940Sdim /// index for the address. This index is then used to get the offset of the 243353940Sdim /// FunctionInfo data that we will decode using this function. 244353940Sdim /// 245353940Sdim /// \param Index An index into the address table. 246353940Sdim /// \returns An optional GSYM data offset for the offset of the FunctionInfo 247353940Sdim /// that needs to be decoded. 248353940Sdim Optional<uint64_t> getAddressInfoOffset(size_t Index) const; 249353940Sdim}; 250353940Sdim 251353940Sdim} // namespace gsym 252353940Sdim} // namespace llvm 253353940Sdim 254353940Sdim#endif // #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H 255