1#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
2#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
3//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
4//
5// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6// See https://llvm.org/LICENSE.txt for license information.
7// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8//
9//===----------------------------------------------------------------------===//
10//
11// This file contains support for reading MemProf profiling data.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
19#include "llvm/DebugInfo/Symbolize/Symbolize.h"
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/Object/Binary.h"
22#include "llvm/Object/ObjectFile.h"
23#include "llvm/ProfileData/InstrProfReader.h"
24#include "llvm/ProfileData/MemProf.h"
25#include "llvm/ProfileData/MemProfData.inc"
26#include "llvm/Support/Error.h"
27#include "llvm/Support/MemoryBuffer.h"
28
29#include <functional>
30
31namespace llvm {
32namespace memprof {
33// A class for memprof profile data populated directly from external
34// sources.
35// TODO: Rename this file to MemProfReader.h to better reflect the contents.
36class MemProfReader {
37public:
38  // The MemProfReader only holds memory profile information.
39  InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
40
41  using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
42  using Iterator = InstrProfIterator<GuidMemProfRecordPair, MemProfReader>;
43  Iterator end() { return Iterator(); }
44  Iterator begin() {
45    Iter = FunctionProfileData.begin();
46    return Iterator(this);
47  }
48
49  // Return a const reference to the internal Id to Frame mappings.
50  const llvm::DenseMap<FrameId, Frame> &getFrameMapping() const {
51    return IdToFrame;
52  }
53
54  // Return a const reference to the internal function profile data.
55  const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> &
56  getProfileData() const {
57    return FunctionProfileData;
58  }
59
60  virtual Error
61  readNextRecord(GuidMemProfRecordPair &GuidRecord,
62                 std::function<const Frame(const FrameId)> Callback = nullptr) {
63    if (FunctionProfileData.empty())
64      return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
65
66    if (Iter == FunctionProfileData.end())
67      return make_error<InstrProfError>(instrprof_error::eof);
68
69    if (Callback == nullptr)
70      Callback =
71          std::bind(&MemProfReader::idToFrame, this, std::placeholders::_1);
72
73    const IndexedMemProfRecord &IndexedRecord = Iter->second;
74    GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, Callback)};
75    Iter++;
76    return Error::success();
77  }
78
79  // Allow default construction for derived classes which can populate the
80  // contents after construction.
81  MemProfReader() = default;
82  virtual ~MemProfReader() = default;
83
84  // Initialize the MemProfReader with the frame mappings and profile contents.
85  MemProfReader(
86      llvm::DenseMap<FrameId, Frame> FrameIdMap,
87      llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> ProfData)
88      : IdToFrame(std::move(FrameIdMap)),
89        FunctionProfileData(std::move(ProfData)) {}
90
91protected:
92  // A helper method to extract the frame from the IdToFrame map.
93  const Frame &idToFrame(const FrameId Id) const {
94    auto It = IdToFrame.find(Id);
95    assert(It != IdToFrame.end() && "Id not found in map.");
96    return It->getSecond();
97  }
98  // A mapping from FrameId (a hash of the contents) to the frame.
99  llvm::DenseMap<FrameId, Frame> IdToFrame;
100  // A mapping from function GUID, hash of the canonical function symbol to the
101  // memprof profile data for that function, i.e allocation and callsite info.
102  llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> FunctionProfileData;
103  // An iterator to the internal function profile data structure.
104  llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
105};
106
107// Map from id (recorded from sanitizer stack depot) to virtual addresses for
108// each program counter address in the callstack.
109using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t>>;
110
111// Specializes the MemProfReader class to populate the contents from raw binary
112// memprof profiles from instrumentation based profiling.
113class RawMemProfReader final : public MemProfReader {
114public:
115  RawMemProfReader(const RawMemProfReader &) = delete;
116  RawMemProfReader &operator=(const RawMemProfReader &) = delete;
117  virtual ~RawMemProfReader() override = default;
118
119  // Prints the contents of the profile in YAML format.
120  void printYAML(raw_ostream &OS);
121
122  // Return true if the \p DataBuffer starts with magic bytes indicating it is
123  // a raw binary memprof profile.
124  static bool hasFormat(const MemoryBuffer &DataBuffer);
125  // Return true if the file at \p Path starts with magic bytes indicating it is
126  // a raw binary memprof profile.
127  static bool hasFormat(const StringRef Path);
128
129  // Create a RawMemProfReader after sanity checking the contents of the file at
130  // \p Path or the \p Buffer. The binary from which the profile has been
131  // collected is specified via a path in \p ProfiledBinary.
132  static Expected<std::unique_ptr<RawMemProfReader>>
133  create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
134  static Expected<std::unique_ptr<RawMemProfReader>>
135  create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
136         bool KeepName = false);
137
138  // Returns a list of build ids recorded in the segment information.
139  static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
140
141  virtual Error
142  readNextRecord(GuidMemProfRecordPair &GuidRecord,
143                 std::function<const Frame(const FrameId)> Callback) override;
144
145  // Constructor for unittests only.
146  RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
147                   llvm::SmallVectorImpl<SegmentEntry> &Seg,
148                   llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
149                   CallStackMap &SM, bool KeepName = false)
150      : SegmentInfo(Seg.begin(), Seg.end()), CallstackProfileData(Prof),
151        StackMap(SM), KeepSymbolName(KeepName) {
152    // We don't call initialize here since there is no raw profile to read. The
153    // test should pass in the raw profile as structured data.
154
155    // If there is an error here then the mock symbolizer has not been
156    // initialized properly.
157    if (Error E = symbolizeAndFilterStackFrames(std::move(Sym)))
158      report_fatal_error(std::move(E));
159    if (Error E = mapRawProfileToRecords())
160      report_fatal_error(std::move(E));
161  }
162
163private:
164  RawMemProfReader(object::OwningBinary<object::Binary> &&Bin, bool KeepName)
165      : Binary(std::move(Bin)), KeepSymbolName(KeepName) {}
166  // Initializes the RawMemProfReader with the contents in `DataBuffer`.
167  Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
168  // Read and parse the contents of the `DataBuffer` as a binary format profile.
169  Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
170  // Initialize the segment mapping information for symbolization.
171  Error setupForSymbolization();
172  // Symbolize and cache all the virtual addresses we encounter in the
173  // callstacks from the raw profile. Also prune callstack frames which we can't
174  // symbolize or those that belong to the runtime. For profile entries where
175  // the entire callstack is pruned, we drop the entry from the profile.
176  Error symbolizeAndFilterStackFrames(
177      std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer);
178  // Construct memprof records for each function and store it in the
179  // `FunctionProfileData` map. A function may have allocation profile data or
180  // callsite data or both.
181  Error mapRawProfileToRecords();
182
183  object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
184
185  // The profiled binary.
186  object::OwningBinary<object::Binary> Binary;
187  // The preferred load address of the executable segment.
188  uint64_t PreferredTextSegmentAddress = 0;
189  // The base address of the text segment in the process during profiling.
190  uint64_t ProfiledTextSegmentStart = 0;
191  // The limit address of the text segment in the process during profiling.
192  uint64_t ProfiledTextSegmentEnd = 0;
193
194  // The memory mapped segment information for all executable segments in the
195  // profiled binary (filtered from the raw profile using the build id).
196  llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
197
198  // A map from callstack id (same as key in CallStackMap below) to the heap
199  // information recorded for that allocation context.
200  llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
201  CallStackMap StackMap;
202
203  // Cached symbolization from PC to Frame.
204  llvm::DenseMap<uint64_t, llvm::SmallVector<FrameId>> SymbolizedFrame;
205
206  // Whether to keep the symbol name for each frame after hashing.
207  bool KeepSymbolName = false;
208  // A mapping of the hash to symbol name, only used if KeepSymbolName is true.
209  llvm::DenseMap<uint64_t, std::string> GuidToSymbolName;
210};
211} // namespace memprof
212} // namespace llvm
213
214#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
215