1//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading profiling data for instrumentation
10// based PGO and coverage.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
15#define LLVM_PROFILEDATA_INSTRPROFREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/ProfileSummary.h"
20#include "llvm/ProfileData/InstrProf.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
23#include "llvm/Support/LineIterator.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/OnDiskHashTable.h"
26#include "llvm/Support/SwapByteOrder.h"
27#include <algorithm>
28#include <cassert>
29#include <cstddef>
30#include <cstdint>
31#include <iterator>
32#include <memory>
33#include <utility>
34#include <vector>
35
36namespace llvm {
37
38class InstrProfReader;
39
40/// A file format agnostic iterator over profiling data.
41class InstrProfIterator : public std::iterator<std::input_iterator_tag,
42                                               NamedInstrProfRecord> {
43  InstrProfReader *Reader = nullptr;
44  value_type Record;
45
46  void Increment();
47
48public:
49  InstrProfIterator() = default;
50  InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
51
52  InstrProfIterator &operator++() { Increment(); return *this; }
53  bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
54  bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
55  value_type &operator*() { return Record; }
56  value_type *operator->() { return &Record; }
57};
58
59/// Base class and interface for reading profiling data of any known instrprof
60/// format. Provides an iterator over NamedInstrProfRecords.
61class InstrProfReader {
62  instrprof_error LastError = instrprof_error::success;
63
64public:
65  InstrProfReader() = default;
66  virtual ~InstrProfReader() = default;
67
68  /// Read the header.  Required before reading first record.
69  virtual Error readHeader() = 0;
70
71  /// Read a single record.
72  virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
73
74  /// Iterator over profile data.
75  InstrProfIterator begin() { return InstrProfIterator(this); }
76  InstrProfIterator end() { return InstrProfIterator(); }
77
78  virtual bool isIRLevelProfile() const = 0;
79
80  virtual bool hasCSIRLevelProfile() const = 0;
81
82  /// Return the PGO symtab. There are three different readers:
83  /// Raw, Text, and Indexed profile readers. The first two types
84  /// of readers are used only by llvm-profdata tool, while the indexed
85  /// profile reader is also used by llvm-cov tool and the compiler (
86  /// backend or frontend). Since creating PGO symtab can create
87  /// significant runtime and memory overhead (as it touches data
88  /// for the whole program), InstrProfSymtab for the indexed profile
89  /// reader should be created on demand and it is recommended to be
90  /// only used for dumping purpose with llvm-proftool, not with the
91  /// compiler.
92  virtual InstrProfSymtab &getSymtab() = 0;
93
94  /// Compute the sum of counts and return in Sum.
95  void accumulateCounts(CountSumOrPercent &Sum, bool IsCS);
96
97protected:
98  std::unique_ptr<InstrProfSymtab> Symtab;
99
100  /// Set the current error and return same.
101  Error error(instrprof_error Err) {
102    LastError = Err;
103    if (Err == instrprof_error::success)
104      return Error::success();
105    return make_error<InstrProfError>(Err);
106  }
107
108  Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
109
110  /// Clear the current error and return a successful one.
111  Error success() { return error(instrprof_error::success); }
112
113public:
114  /// Return true if the reader has finished reading the profile data.
115  bool isEOF() { return LastError == instrprof_error::eof; }
116
117  /// Return true if the reader encountered an error reading profiling data.
118  bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
119
120  /// Get the current error.
121  Error getError() {
122    if (hasError())
123      return make_error<InstrProfError>(LastError);
124    return Error::success();
125  }
126
127  /// Factory method to create an appropriately typed reader for the given
128  /// instrprof file.
129  static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
130
131  static Expected<std::unique_ptr<InstrProfReader>>
132  create(std::unique_ptr<MemoryBuffer> Buffer);
133};
134
135/// Reader for the simple text based instrprof format.
136///
137/// This format is a simple text format that's suitable for test data. Records
138/// are separated by one or more blank lines, and record fields are separated by
139/// new lines.
140///
141/// Each record consists of a function name, a function hash, a number of
142/// counters, and then each counter value, in that order.
143class TextInstrProfReader : public InstrProfReader {
144private:
145  /// The profile data file contents.
146  std::unique_ptr<MemoryBuffer> DataBuffer;
147  /// Iterator over the profile data.
148  line_iterator Line;
149  bool IsIRLevelProfile = false;
150  bool HasCSIRLevelProfile = false;
151
152  Error readValueProfileData(InstrProfRecord &Record);
153
154public:
155  TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
156      : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
157  TextInstrProfReader(const TextInstrProfReader &) = delete;
158  TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
159
160  /// Return true if the given buffer is in text instrprof format.
161  static bool hasFormat(const MemoryBuffer &Buffer);
162
163  bool isIRLevelProfile() const override { return IsIRLevelProfile; }
164
165  bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; }
166
167  /// Read the header.
168  Error readHeader() override;
169
170  /// Read a single record.
171  Error readNextRecord(NamedInstrProfRecord &Record) override;
172
173  InstrProfSymtab &getSymtab() override {
174    assert(Symtab.get());
175    return *Symtab.get();
176  }
177};
178
179/// Reader for the raw instrprof binary format from runtime.
180///
181/// This format is a raw memory dump of the instrumentation-baed profiling data
182/// from the runtime.  It has no index.
183///
184/// Templated on the unsigned type whose size matches pointers on the platform
185/// that wrote the profile.
186template <class IntPtrT>
187class RawInstrProfReader : public InstrProfReader {
188private:
189  /// The profile data file contents.
190  std::unique_ptr<MemoryBuffer> DataBuffer;
191  bool ShouldSwapBytes;
192  // The value of the version field of the raw profile data header. The lower 56
193  // bits specifies the format version and the most significant 8 bits specify
194  // the variant types of the profile.
195  uint64_t Version;
196  uint64_t CountersDelta;
197  uint64_t NamesDelta;
198  const RawInstrProf::ProfileData<IntPtrT> *Data;
199  const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
200  const uint64_t *CountersStart;
201  const char *NamesStart;
202  uint64_t NamesSize;
203  // After value profile is all read, this pointer points to
204  // the header of next profile data (if exists)
205  const uint8_t *ValueDataStart;
206  uint32_t ValueKindLast;
207  uint32_t CurValueDataSize;
208
209public:
210  RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
211      : DataBuffer(std::move(DataBuffer)) {}
212  RawInstrProfReader(const RawInstrProfReader &) = delete;
213  RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
214
215  static bool hasFormat(const MemoryBuffer &DataBuffer);
216  Error readHeader() override;
217  Error readNextRecord(NamedInstrProfRecord &Record) override;
218
219  bool isIRLevelProfile() const override {
220    return (Version & VARIANT_MASK_IR_PROF) != 0;
221  }
222
223  bool hasCSIRLevelProfile() const override {
224    return (Version & VARIANT_MASK_CSIR_PROF) != 0;
225  }
226
227  InstrProfSymtab &getSymtab() override {
228    assert(Symtab.get());
229    return *Symtab.get();
230  }
231
232private:
233  Error createSymtab(InstrProfSymtab &Symtab);
234  Error readNextHeader(const char *CurrentPos);
235  Error readHeader(const RawInstrProf::Header &Header);
236
237  template <class IntT> IntT swap(IntT Int) const {
238    return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
239  }
240
241  support::endianness getDataEndianness() const {
242    support::endianness HostEndian = getHostEndianness();
243    if (!ShouldSwapBytes)
244      return HostEndian;
245    if (HostEndian == support::little)
246      return support::big;
247    else
248      return support::little;
249  }
250
251  inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
252    return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
253  }
254
255  Error readName(NamedInstrProfRecord &Record);
256  Error readFuncHash(NamedInstrProfRecord &Record);
257  Error readRawCounts(InstrProfRecord &Record);
258  Error readValueProfilingData(InstrProfRecord &Record);
259  bool atEnd() const { return Data == DataEnd; }
260
261  void advanceData() {
262    Data++;
263    ValueDataStart += CurValueDataSize;
264  }
265
266  const char *getNextHeaderPos() const {
267      assert(atEnd());
268      return (const char *)ValueDataStart;
269  }
270
271  /// Get the offset of \p CounterPtr from the start of the counters section of
272  /// the profile. The offset has units of "number of counters", i.e. increasing
273  /// the offset by 1 corresponds to an increase in the *byte offset* by 8.
274  ptrdiff_t getCounterOffset(IntPtrT CounterPtr) const {
275    return (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
276  }
277
278  const uint64_t *getCounter(ptrdiff_t Offset) const {
279    return CountersStart + Offset;
280  }
281
282  StringRef getName(uint64_t NameRef) const {
283    return Symtab->getFuncName(swap(NameRef));
284  }
285};
286
287using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
288using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
289
290namespace IndexedInstrProf {
291
292enum class HashT : uint32_t;
293
294} // end namespace IndexedInstrProf
295
296/// Trait for lookups into the on-disk hash table for the binary instrprof
297/// format.
298class InstrProfLookupTrait {
299  std::vector<NamedInstrProfRecord> DataBuffer;
300  IndexedInstrProf::HashT HashType;
301  unsigned FormatVersion;
302  // Endianness of the input value profile data.
303  // It should be LE by default, but can be changed
304  // for testing purpose.
305  support::endianness ValueProfDataEndianness = support::little;
306
307public:
308  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
309      : HashType(HashType), FormatVersion(FormatVersion) {}
310
311  using data_type = ArrayRef<NamedInstrProfRecord>;
312
313  using internal_key_type = StringRef;
314  using external_key_type = StringRef;
315  using hash_value_type = uint64_t;
316  using offset_type = uint64_t;
317
318  static bool EqualKey(StringRef A, StringRef B) { return A == B; }
319  static StringRef GetInternalKey(StringRef K) { return K; }
320  static StringRef GetExternalKey(StringRef K) { return K; }
321
322  hash_value_type ComputeHash(StringRef K);
323
324  static std::pair<offset_type, offset_type>
325  ReadKeyDataLength(const unsigned char *&D) {
326    using namespace support;
327
328    offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
329    offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
330    return std::make_pair(KeyLen, DataLen);
331  }
332
333  StringRef ReadKey(const unsigned char *D, offset_type N) {
334    return StringRef((const char *)D, N);
335  }
336
337  bool readValueProfilingData(const unsigned char *&D,
338                              const unsigned char *const End);
339  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
340
341  // Used for testing purpose only.
342  void setValueProfDataEndianness(support::endianness Endianness) {
343    ValueProfDataEndianness = Endianness;
344  }
345};
346
347struct InstrProfReaderIndexBase {
348  virtual ~InstrProfReaderIndexBase() = default;
349
350  // Read all the profile records with the same key pointed to the current
351  // iterator.
352  virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
353
354  // Read all the profile records with the key equal to FuncName
355  virtual Error getRecords(StringRef FuncName,
356                                     ArrayRef<NamedInstrProfRecord> &Data) = 0;
357  virtual void advanceToNextKey() = 0;
358  virtual bool atEnd() const = 0;
359  virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
360  virtual uint64_t getVersion() const = 0;
361  virtual bool isIRLevelProfile() const = 0;
362  virtual bool hasCSIRLevelProfile() const = 0;
363  virtual Error populateSymtab(InstrProfSymtab &) = 0;
364};
365
366using OnDiskHashTableImplV3 =
367    OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
368
369template <typename HashTableImpl>
370class InstrProfReaderItaniumRemapper;
371
372template <typename HashTableImpl>
373class InstrProfReaderIndex : public InstrProfReaderIndexBase {
374private:
375  std::unique_ptr<HashTableImpl> HashTable;
376  typename HashTableImpl::data_iterator RecordIterator;
377  uint64_t FormatVersion;
378
379  friend class InstrProfReaderItaniumRemapper<HashTableImpl>;
380
381public:
382  InstrProfReaderIndex(const unsigned char *Buckets,
383                       const unsigned char *const Payload,
384                       const unsigned char *const Base,
385                       IndexedInstrProf::HashT HashType, uint64_t Version);
386  ~InstrProfReaderIndex() override = default;
387
388  Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
389  Error getRecords(StringRef FuncName,
390                   ArrayRef<NamedInstrProfRecord> &Data) override;
391  void advanceToNextKey() override { RecordIterator++; }
392
393  bool atEnd() const override {
394    return RecordIterator == HashTable->data_end();
395  }
396
397  void setValueProfDataEndianness(support::endianness Endianness) override {
398    HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
399  }
400
401  uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
402
403  bool isIRLevelProfile() const override {
404    return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
405  }
406
407  bool hasCSIRLevelProfile() const override {
408    return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0;
409  }
410
411  Error populateSymtab(InstrProfSymtab &Symtab) override {
412    return Symtab.create(HashTable->keys());
413  }
414};
415
416/// Name matcher supporting fuzzy matching of symbol names to names in profiles.
417class InstrProfReaderRemapper {
418public:
419  virtual ~InstrProfReaderRemapper() {}
420  virtual Error populateRemappings() { return Error::success(); }
421  virtual Error getRecords(StringRef FuncName,
422                           ArrayRef<NamedInstrProfRecord> &Data) = 0;
423};
424
425/// Reader for the indexed binary instrprof format.
426class IndexedInstrProfReader : public InstrProfReader {
427private:
428  /// The profile data file contents.
429  std::unique_ptr<MemoryBuffer> DataBuffer;
430  /// The profile remapping file contents.
431  std::unique_ptr<MemoryBuffer> RemappingBuffer;
432  /// The index into the profile data.
433  std::unique_ptr<InstrProfReaderIndexBase> Index;
434  /// The profile remapping file contents.
435  std::unique_ptr<InstrProfReaderRemapper> Remapper;
436  /// Profile summary data.
437  std::unique_ptr<ProfileSummary> Summary;
438  /// Context sensitive profile summary data.
439  std::unique_ptr<ProfileSummary> CS_Summary;
440  // Index to the current record in the record array.
441  unsigned RecordIndex;
442
443  // Read the profile summary. Return a pointer pointing to one byte past the
444  // end of the summary data if it exists or the input \c Cur.
445  // \c UseCS indicates whether to use the context-sensitive profile summary.
446  const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
447                                   const unsigned char *Cur, bool UseCS);
448
449public:
450  IndexedInstrProfReader(
451      std::unique_ptr<MemoryBuffer> DataBuffer,
452      std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr)
453      : DataBuffer(std::move(DataBuffer)),
454        RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {}
455  IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
456  IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
457
458  /// Return the profile version.
459  uint64_t getVersion() const { return Index->getVersion(); }
460  bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
461  bool hasCSIRLevelProfile() const override {
462    return Index->hasCSIRLevelProfile();
463  }
464
465  /// Return true if the given buffer is in an indexed instrprof format.
466  static bool hasFormat(const MemoryBuffer &DataBuffer);
467
468  /// Read the file header.
469  Error readHeader() override;
470  /// Read a single record.
471  Error readNextRecord(NamedInstrProfRecord &Record) override;
472
473  /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
474  Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
475                                               uint64_t FuncHash);
476
477  /// Fill Counts with the profile data for the given function name.
478  Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
479                          std::vector<uint64_t> &Counts);
480
481  /// Return the maximum of all known function counts.
482  /// \c UseCS indicates whether to use the context-sensitive count.
483  uint64_t getMaximumFunctionCount(bool UseCS) {
484    if (UseCS) {
485      assert(CS_Summary && "No context sensitive profile summary");
486      return CS_Summary->getMaxFunctionCount();
487    } else {
488      assert(Summary && "No profile summary");
489      return Summary->getMaxFunctionCount();
490    }
491  }
492
493  /// Factory method to create an indexed reader.
494  static Expected<std::unique_ptr<IndexedInstrProfReader>>
495  create(const Twine &Path, const Twine &RemappingPath = "");
496
497  static Expected<std::unique_ptr<IndexedInstrProfReader>>
498  create(std::unique_ptr<MemoryBuffer> Buffer,
499         std::unique_ptr<MemoryBuffer> RemappingBuffer = nullptr);
500
501  // Used for testing purpose only.
502  void setValueProfDataEndianness(support::endianness Endianness) {
503    Index->setValueProfDataEndianness(Endianness);
504  }
505
506  // See description in the base class. This interface is designed
507  // to be used by llvm-profdata (for dumping). Avoid using this when
508  // the client is the compiler.
509  InstrProfSymtab &getSymtab() override;
510
511  /// Return the profile summary.
512  /// \c UseCS indicates whether to use the context-sensitive summary.
513  ProfileSummary &getSummary(bool UseCS) {
514    if (UseCS) {
515      assert(CS_Summary && "No context sensitive summary");
516      return *(CS_Summary.get());
517    } else {
518      assert(Summary && "No profile summary");
519      return *(Summary.get());
520    }
521  }
522};
523
524} // end namespace llvm
525
526#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
527