1//===- InstrProf.h - Instrumented profiling format support ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Instrumentation-based profiling data is generated by instrumented
10// binaries through library functions in compiler-rt, and read by the clang
11// frontend to feed PGO.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_PROFILEDATA_INSTRPROF_H
16#define LLVM_PROFILEDATA_INSTRPROF_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/StringSet.h"
22#include "llvm/ADT/Triple.h"
23#include "llvm/IR/GlobalValue.h"
24#include "llvm/IR/ProfileSummary.h"
25#include "llvm/ProfileData/InstrProfData.inc"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Compiler.h"
28#include "llvm/Support/Endian.h"
29#include "llvm/Support/Error.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/Host.h"
32#include "llvm/Support/MD5.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include <algorithm>
36#include <cassert>
37#include <cstddef>
38#include <cstdint>
39#include <cstring>
40#include <list>
41#include <memory>
42#include <string>
43#include <system_error>
44#include <utility>
45#include <vector>
46
47namespace llvm {
48
49class Function;
50class GlobalVariable;
51struct InstrProfRecord;
52class InstrProfSymtab;
53class Instruction;
54class MDNode;
55class Module;
56
57enum InstrProfSectKind {
58#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
59#include "llvm/ProfileData/InstrProfData.inc"
60};
61
62/// Return the name of the profile section corresponding to \p IPSK.
63///
64/// The name of the section depends on the object format type \p OF. If
65/// \p AddSegmentInfo is true, a segment prefix and additional linker hints may
66/// be added to the section name (this is the default).
67std::string getInstrProfSectionName(InstrProfSectKind IPSK,
68                                    Triple::ObjectFormatType OF,
69                                    bool AddSegmentInfo = true);
70
71/// Return the name profile runtime entry point to do value profiling
72/// for a given site.
73inline StringRef getInstrProfValueProfFuncName() {
74  return INSTR_PROF_VALUE_PROF_FUNC_STR;
75}
76
77/// Return the name profile runtime entry point to do value range profiling.
78inline StringRef getInstrProfValueRangeProfFuncName() {
79  return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR;
80}
81
82/// Return the name prefix of variables containing instrumented function names.
83inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
84
85/// Return the name prefix of variables containing per-function control data.
86inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
87
88/// Return the name prefix of profile counter variables.
89inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
90
91/// Return the name prefix of value profile variables.
92inline StringRef getInstrProfValuesVarPrefix() { return "__profvp_"; }
93
94/// Return the name of value profile node array variables:
95inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; }
96
97/// Return the name of the variable holding the strings (possibly compressed)
98/// of all function's PGO names.
99inline StringRef getInstrProfNamesVarName() {
100  return "__llvm_prf_nm";
101}
102
103/// Return the name of a covarage mapping variable (internal linkage)
104/// for each instrumented source module. Such variables are allocated
105/// in the __llvm_covmap section.
106inline StringRef getCoverageMappingVarName() {
107  return "__llvm_coverage_mapping";
108}
109
110/// Return the name of the internal variable recording the array
111/// of PGO name vars referenced by the coverage mapping. The owning
112/// functions of those names are not emitted by FE (e.g, unused inline
113/// functions.)
114inline StringRef getCoverageUnusedNamesVarName() {
115  return "__llvm_coverage_names";
116}
117
118/// Return the name of function that registers all the per-function control
119/// data at program startup time by calling __llvm_register_function. This
120/// function has internal linkage and is called by  __llvm_profile_init
121/// runtime method. This function is not generated for these platforms:
122/// Darwin, Linux, and FreeBSD.
123inline StringRef getInstrProfRegFuncsName() {
124  return "__llvm_profile_register_functions";
125}
126
127/// Return the name of the runtime interface that registers per-function control
128/// data for one instrumented function.
129inline StringRef getInstrProfRegFuncName() {
130  return "__llvm_profile_register_function";
131}
132
133/// Return the name of the runtime interface that registers the PGO name strings.
134inline StringRef getInstrProfNamesRegFuncName() {
135  return "__llvm_profile_register_names_function";
136}
137
138/// Return the name of the runtime initialization method that is generated by
139/// the compiler. The function calls __llvm_profile_register_functions and
140/// __llvm_profile_override_default_filename functions if needed. This function
141/// has internal linkage and invoked at startup time via init_array.
142inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
143
144/// Return the name of the hook variable defined in profile runtime library.
145/// A reference to the variable causes the linker to link in the runtime
146/// initialization module (which defines the hook variable).
147inline StringRef getInstrProfRuntimeHookVarName() {
148  return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR);
149}
150
151/// Return the name of the compiler generated function that references the
152/// runtime hook variable. The function is a weak global.
153inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
154  return "__llvm_profile_runtime_user";
155}
156
157inline StringRef getInstrProfCounterBiasVarName() {
158  return "__llvm_profile_counter_bias";
159}
160
161/// Return the marker used to separate PGO names during serialization.
162inline StringRef getInstrProfNameSeparator() { return "\01"; }
163
164/// Return the modified name for function \c F suitable to be
165/// used the key for profile lookup. Variable \c InLTO indicates if this
166/// is called in LTO optimization passes.
167std::string getPGOFuncName(const Function &F, bool InLTO = false,
168                           uint64_t Version = INSTR_PROF_INDEX_VERSION);
169
170/// Return the modified name for a function suitable to be
171/// used the key for profile lookup. The function's original
172/// name is \c RawFuncName and has linkage of type \c Linkage.
173/// The function is defined in module \c FileName.
174std::string getPGOFuncName(StringRef RawFuncName,
175                           GlobalValue::LinkageTypes Linkage,
176                           StringRef FileName,
177                           uint64_t Version = INSTR_PROF_INDEX_VERSION);
178
179/// Return the name of the global variable used to store a function
180/// name in PGO instrumentation. \c FuncName is the name of the function
181/// returned by the \c getPGOFuncName call.
182std::string getPGOFuncNameVarName(StringRef FuncName,
183                                  GlobalValue::LinkageTypes Linkage);
184
185/// Create and return the global variable for function name used in PGO
186/// instrumentation. \c FuncName is the name of the function returned
187/// by \c getPGOFuncName call.
188GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName);
189
190/// Create and return the global variable for function name used in PGO
191/// instrumentation.  /// \c FuncName is the name of the function
192/// returned by \c getPGOFuncName call, \c M is the owning module,
193/// and \c Linkage is the linkage of the instrumented function.
194GlobalVariable *createPGOFuncNameVar(Module &M,
195                                     GlobalValue::LinkageTypes Linkage,
196                                     StringRef PGOFuncName);
197
198/// Return the initializer in string of the PGO name var \c NameVar.
199StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar);
200
201/// Given a PGO function name, remove the filename prefix and return
202/// the original (static) function name.
203StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
204                                   StringRef FileName = "<unknown>");
205
206/// Given a vector of strings (function PGO names) \c NameStrs, the
207/// method generates a combined string \c Result thatis ready to be
208/// serialized.  The \c Result string is comprised of three fields:
209/// The first field is the legnth of the uncompressed strings, and the
210/// the second field is the length of the zlib-compressed string.
211/// Both fields are encoded in ULEB128.  If \c doCompress is false, the
212///  third field is the uncompressed strings; otherwise it is the
213/// compressed string. When the string compression is off, the
214/// second field will have value zero.
215Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
216                                bool doCompression, std::string &Result);
217
218/// Produce \c Result string with the same format described above. The input
219/// is vector of PGO function name variables that are referenced.
220Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
221                                std::string &Result, bool doCompression = true);
222
223/// \c NameStrings is a string composed of one of more sub-strings encoded in
224/// the format described above. The substrings are separated by 0 or more zero
225/// bytes. This method decodes the string and populates the \c Symtab.
226Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
227
228/// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being
229/// set in IR PGO compilation.
230bool isIRPGOFlagSet(const Module *M);
231
232/// Check if we can safely rename this Comdat function. Instances of the same
233/// comdat function may have different control flows thus can not share the
234/// same counter variable.
235bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken = false);
236
237enum InstrProfValueKind : uint32_t {
238#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
239#include "llvm/ProfileData/InstrProfData.inc"
240};
241
242/// Get the value profile data for value site \p SiteIdx from \p InstrProfR
243/// and annotate the instruction \p Inst with the value profile meta data.
244/// Annotate up to \p MaxMDCount (default 3) number of records per value site.
245void annotateValueSite(Module &M, Instruction &Inst,
246                       const InstrProfRecord &InstrProfR,
247                       InstrProfValueKind ValueKind, uint32_t SiteIndx,
248                       uint32_t MaxMDCount = 3);
249
250/// Same as the above interface but using an ArrayRef, as well as \p Sum.
251void annotateValueSite(Module &M, Instruction &Inst,
252                       ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
253                       InstrProfValueKind ValueKind, uint32_t MaxMDCount);
254
255/// Extract the value profile data from \p Inst which is annotated with
256/// value profile meta data. Return false if there is no value data annotated,
257/// otherwise  return true.
258bool getValueProfDataFromInst(const Instruction &Inst,
259                              InstrProfValueKind ValueKind,
260                              uint32_t MaxNumValueData,
261                              InstrProfValueData ValueData[],
262                              uint32_t &ActualNumValueData, uint64_t &TotalC);
263
264inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; }
265
266/// Return the PGOFuncName meta data associated with a function.
267MDNode *getPGOFuncNameMetadata(const Function &F);
268
269/// Create the PGOFuncName meta data if PGOFuncName is different from
270/// function's raw name. This should only apply to internal linkage functions
271/// declared by users only.
272void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
273
274/// Check if we can use Comdat for profile variables. This will eliminate
275/// the duplicated profile variables for Comdat functions.
276bool needsComdatForCounter(const Function &F, const Module &M);
277
278const std::error_category &instrprof_category();
279
280enum class instrprof_error {
281  success = 0,
282  eof,
283  unrecognized_format,
284  bad_magic,
285  bad_header,
286  unsupported_version,
287  unsupported_hash_type,
288  too_large,
289  truncated,
290  malformed,
291  unknown_function,
292  hash_mismatch,
293  count_mismatch,
294  counter_overflow,
295  value_site_count_mismatch,
296  compress_failed,
297  uncompress_failed,
298  empty_raw_profile,
299  zlib_unavailable
300};
301
302inline std::error_code make_error_code(instrprof_error E) {
303  return std::error_code(static_cast<int>(E), instrprof_category());
304}
305
306class InstrProfError : public ErrorInfo<InstrProfError> {
307public:
308  InstrProfError(instrprof_error Err) : Err(Err) {
309    assert(Err != instrprof_error::success && "Not an error");
310  }
311
312  std::string message() const override;
313
314  void log(raw_ostream &OS) const override { OS << message(); }
315
316  std::error_code convertToErrorCode() const override {
317    return make_error_code(Err);
318  }
319
320  instrprof_error get() const { return Err; }
321
322  /// Consume an Error and return the raw enum value contained within it. The
323  /// Error must either be a success value, or contain a single InstrProfError.
324  static instrprof_error take(Error E) {
325    auto Err = instrprof_error::success;
326    handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
327      assert(Err == instrprof_error::success && "Multiple errors encountered");
328      Err = IPE.get();
329    });
330    return Err;
331  }
332
333  static char ID;
334
335private:
336  instrprof_error Err;
337};
338
339class SoftInstrProfErrors {
340  /// Count the number of soft instrprof_errors encountered and keep track of
341  /// the first such error for reporting purposes.
342
343  /// The first soft error encountered.
344  instrprof_error FirstError = instrprof_error::success;
345
346  /// The number of hash mismatches.
347  unsigned NumHashMismatches = 0;
348
349  /// The number of count mismatches.
350  unsigned NumCountMismatches = 0;
351
352  /// The number of counter overflows.
353  unsigned NumCounterOverflows = 0;
354
355  /// The number of value site count mismatches.
356  unsigned NumValueSiteCountMismatches = 0;
357
358public:
359  SoftInstrProfErrors() = default;
360
361  ~SoftInstrProfErrors() {
362    assert(FirstError == instrprof_error::success &&
363           "Unchecked soft error encountered");
364  }
365
366  /// Track a soft error (\p IE) and increment its associated counter.
367  void addError(instrprof_error IE);
368
369  /// Get the number of hash mismatches.
370  unsigned getNumHashMismatches() const { return NumHashMismatches; }
371
372  /// Get the number of count mismatches.
373  unsigned getNumCountMismatches() const { return NumCountMismatches; }
374
375  /// Get the number of counter overflows.
376  unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
377
378  /// Get the number of value site count mismatches.
379  unsigned getNumValueSiteCountMismatches() const {
380    return NumValueSiteCountMismatches;
381  }
382
383  /// Return the first encountered error and reset FirstError to a success
384  /// value.
385  Error takeError() {
386    if (FirstError == instrprof_error::success)
387      return Error::success();
388    auto E = make_error<InstrProfError>(FirstError);
389    FirstError = instrprof_error::success;
390    return E;
391  }
392};
393
394namespace object {
395
396class SectionRef;
397
398} // end namespace object
399
400namespace IndexedInstrProf {
401
402uint64_t ComputeHash(StringRef K);
403
404} // end namespace IndexedInstrProf
405
406/// A symbol table used for function PGO name look-up with keys
407/// (such as pointers, md5hash values) to the function. A function's
408/// PGO name or name's md5hash are used in retrieving the profile
409/// data of the function. See \c getPGOFuncName() method for details
410/// on how PGO name is formed.
411class InstrProfSymtab {
412public:
413  using AddrHashMap = std::vector<std::pair<uint64_t, uint64_t>>;
414
415private:
416  StringRef Data;
417  uint64_t Address = 0;
418  // Unique name strings.
419  StringSet<> NameTab;
420  // A map from MD5 keys to function name strings.
421  std::vector<std::pair<uint64_t, StringRef>> MD5NameMap;
422  // A map from MD5 keys to function define. We only populate this map
423  // when build the Symtab from a Module.
424  std::vector<std::pair<uint64_t, Function *>> MD5FuncMap;
425  // A map from function runtime address to function name MD5 hash.
426  // This map is only populated and used by raw instr profile reader.
427  AddrHashMap AddrToMD5Map;
428  bool Sorted = false;
429
430  static StringRef getExternalSymbol() {
431    return "** External Symbol **";
432  }
433
434  // If the symtab is created by a series of calls to \c addFuncName, \c
435  // finalizeSymtab needs to be called before looking up function names.
436  // This is required because the underlying map is a vector (for space
437  // efficiency) which needs to be sorted.
438  inline void finalizeSymtab();
439
440public:
441  InstrProfSymtab() = default;
442
443  /// Create InstrProfSymtab from an object file section which
444  /// contains function PGO names. When section may contain raw
445  /// string data or string data in compressed form. This method
446  /// only initialize the symtab with reference to the data and
447  /// the section base address. The decompression will be delayed
448  /// until before it is used. See also \c create(StringRef) method.
449  Error create(object::SectionRef &Section);
450
451  /// This interface is used by reader of CoverageMapping test
452  /// format.
453  inline Error create(StringRef D, uint64_t BaseAddr);
454
455  /// \c NameStrings is a string composed of one of more sub-strings
456  ///  encoded in the format described in \c collectPGOFuncNameStrings.
457  /// This method is a wrapper to \c readPGOFuncNameStrings method.
458  inline Error create(StringRef NameStrings);
459
460  /// A wrapper interface to populate the PGO symtab with functions
461  /// decls from module \c M. This interface is used by transformation
462  /// passes such as indirect function call promotion. Variable \c InLTO
463  /// indicates if this is called from LTO optimization passes.
464  Error create(Module &M, bool InLTO = false);
465
466  /// Create InstrProfSymtab from a set of names iteratable from
467  /// \p IterRange. This interface is used by IndexedProfReader.
468  template <typename NameIterRange> Error create(const NameIterRange &IterRange);
469
470  /// Update the symtab by adding \p FuncName to the table. This interface
471  /// is used by the raw and text profile readers.
472  Error addFuncName(StringRef FuncName) {
473    if (FuncName.empty())
474      return make_error<InstrProfError>(instrprof_error::malformed);
475    auto Ins = NameTab.insert(FuncName);
476    if (Ins.second) {
477      MD5NameMap.push_back(std::make_pair(
478          IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey()));
479      Sorted = false;
480    }
481    return Error::success();
482  }
483
484  /// Map a function address to its name's MD5 hash. This interface
485  /// is only used by the raw profiler reader.
486  void mapAddress(uint64_t Addr, uint64_t MD5Val) {
487    AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
488  }
489
490  /// Return a function's hash, or 0, if the function isn't in this SymTab.
491  uint64_t getFunctionHashFromAddress(uint64_t Address);
492
493  /// Return function's PGO name from the function name's symbol
494  /// address in the object file. If an error occurs, return
495  /// an empty string.
496  StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
497
498  /// Return function's PGO name from the name's md5 hash value.
499  /// If not found, return an empty string.
500  inline StringRef getFuncName(uint64_t FuncMD5Hash);
501
502  /// Just like getFuncName, except that it will return a non-empty StringRef
503  /// if the function is external to this symbol table. All such cases
504  /// will be represented using the same StringRef value.
505  inline StringRef getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash);
506
507  /// True if Symbol is the value used to represent external symbols.
508  static bool isExternalSymbol(const StringRef &Symbol) {
509    return Symbol == InstrProfSymtab::getExternalSymbol();
510  }
511
512  /// Return function from the name's md5 hash. Return nullptr if not found.
513  inline Function *getFunction(uint64_t FuncMD5Hash);
514
515  /// Return the function's original assembly name by stripping off
516  /// the prefix attached (to symbols with priviate linkage). For
517  /// global functions, it returns the same string as getFuncName.
518  inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
519
520  /// Return the name section data.
521  inline StringRef getNameData() const { return Data; }
522};
523
524Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
525  Data = D;
526  Address = BaseAddr;
527  return Error::success();
528}
529
530Error InstrProfSymtab::create(StringRef NameStrings) {
531  return readPGOFuncNameStrings(NameStrings, *this);
532}
533
534template <typename NameIterRange>
535Error InstrProfSymtab::create(const NameIterRange &IterRange) {
536  for (auto Name : IterRange)
537    if (Error E = addFuncName(Name))
538      return E;
539
540  finalizeSymtab();
541  return Error::success();
542}
543
544void InstrProfSymtab::finalizeSymtab() {
545  if (Sorted)
546    return;
547  llvm::sort(MD5NameMap, less_first());
548  llvm::sort(MD5FuncMap, less_first());
549  llvm::sort(AddrToMD5Map, less_first());
550  AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
551                     AddrToMD5Map.end());
552  Sorted = true;
553}
554
555StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) {
556  StringRef ret = getFuncName(FuncMD5Hash);
557  if (ret.empty())
558    return InstrProfSymtab::getExternalSymbol();
559  return ret;
560}
561
562StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
563  finalizeSymtab();
564  auto Result =
565      std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash,
566                       [](const std::pair<uint64_t, StringRef> &LHS,
567                          uint64_t RHS) { return LHS.first < RHS; });
568  if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash)
569    return Result->second;
570  return StringRef();
571}
572
573Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
574  finalizeSymtab();
575  auto Result =
576      std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash,
577                       [](const std::pair<uint64_t, Function*> &LHS,
578                          uint64_t RHS) { return LHS.first < RHS; });
579  if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash)
580    return Result->second;
581  return nullptr;
582}
583
584// See also getPGOFuncName implementation. These two need to be
585// matched.
586StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
587  StringRef PGOName = getFuncName(FuncMD5Hash);
588  size_t S = PGOName.find_first_of(':');
589  if (S == StringRef::npos)
590    return PGOName;
591  return PGOName.drop_front(S + 1);
592}
593
594// To store the sums of profile count values, or the percentage of
595// the sums of the total count values.
596struct CountSumOrPercent {
597  uint64_t NumEntries;
598  double CountSum;
599  double ValueCounts[IPVK_Last - IPVK_First + 1];
600  CountSumOrPercent() : NumEntries(0), CountSum(0.0f), ValueCounts() {}
601  void reset() {
602    NumEntries = 0;
603    CountSum = 0.0f;
604    for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++)
605      ValueCounts[I] = 0.0f;
606  }
607};
608
609// Function level or program level overlap information.
610struct OverlapStats {
611  enum OverlapStatsLevel { ProgramLevel, FunctionLevel };
612  // Sum of the total count values for the base profile.
613  CountSumOrPercent Base;
614  // Sum of the total count values for the test profile.
615  CountSumOrPercent Test;
616  // Overlap lap score. Should be in range of [0.0f to 1.0f].
617  CountSumOrPercent Overlap;
618  CountSumOrPercent Mismatch;
619  CountSumOrPercent Unique;
620  OverlapStatsLevel Level;
621  const std::string *BaseFilename;
622  const std::string *TestFilename;
623  StringRef FuncName;
624  uint64_t FuncHash;
625  bool Valid;
626
627  OverlapStats(OverlapStatsLevel L = ProgramLevel)
628      : Level(L), BaseFilename(nullptr), TestFilename(nullptr), FuncHash(0),
629        Valid(false) {}
630
631  void dump(raw_fd_ostream &OS) const;
632
633  void setFuncInfo(StringRef Name, uint64_t Hash) {
634    FuncName = Name;
635    FuncHash = Hash;
636  }
637
638  Error accumulateCounts(const std::string &BaseFilename,
639                         const std::string &TestFilename, bool IsCS);
640  void addOneMismatch(const CountSumOrPercent &MismatchFunc);
641  void addOneUnique(const CountSumOrPercent &UniqueFunc);
642
643  static inline double score(uint64_t Val1, uint64_t Val2, double Sum1,
644                             double Sum2) {
645    if (Sum1 < 1.0f || Sum2 < 1.0f)
646      return 0.0f;
647    return std::min(Val1 / Sum1, Val2 / Sum2);
648  }
649};
650
651// This is used to filter the functions whose overlap information
652// to be output.
653struct OverlapFuncFilters {
654  uint64_t ValueCutoff;
655  const std::string NameFilter;
656};
657
658struct InstrProfValueSiteRecord {
659  /// Value profiling data pairs at a given value site.
660  std::list<InstrProfValueData> ValueData;
661
662  InstrProfValueSiteRecord() { ValueData.clear(); }
663  template <class InputIterator>
664  InstrProfValueSiteRecord(InputIterator F, InputIterator L)
665      : ValueData(F, L) {}
666
667  /// Sort ValueData ascending by Value
668  void sortByTargetValues() {
669    ValueData.sort(
670        [](const InstrProfValueData &left, const InstrProfValueData &right) {
671          return left.Value < right.Value;
672        });
673  }
674  /// Sort ValueData Descending by Count
675  inline void sortByCount();
676
677  /// Merge data from another InstrProfValueSiteRecord
678  /// Optionally scale merged counts by \p Weight.
679  void merge(InstrProfValueSiteRecord &Input, uint64_t Weight,
680             function_ref<void(instrprof_error)> Warn);
681  /// Scale up value profile data counts.
682  void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
683
684  /// Compute the overlap b/w this record and Input record.
685  void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind,
686               OverlapStats &Overlap, OverlapStats &FuncLevelOverlap);
687};
688
689/// Profiling information for a single function.
690struct InstrProfRecord {
691  std::vector<uint64_t> Counts;
692
693  InstrProfRecord() = default;
694  InstrProfRecord(std::vector<uint64_t> Counts) : Counts(std::move(Counts)) {}
695  InstrProfRecord(InstrProfRecord &&) = default;
696  InstrProfRecord(const InstrProfRecord &RHS)
697      : Counts(RHS.Counts),
698        ValueData(RHS.ValueData
699                      ? std::make_unique<ValueProfData>(*RHS.ValueData)
700                      : nullptr) {}
701  InstrProfRecord &operator=(InstrProfRecord &&) = default;
702  InstrProfRecord &operator=(const InstrProfRecord &RHS) {
703    Counts = RHS.Counts;
704    if (!RHS.ValueData) {
705      ValueData = nullptr;
706      return *this;
707    }
708    if (!ValueData)
709      ValueData = std::make_unique<ValueProfData>(*RHS.ValueData);
710    else
711      *ValueData = *RHS.ValueData;
712    return *this;
713  }
714
715  /// Return the number of value profile kinds with non-zero number
716  /// of profile sites.
717  inline uint32_t getNumValueKinds() const;
718  /// Return the number of instrumented sites for ValueKind.
719  inline uint32_t getNumValueSites(uint32_t ValueKind) const;
720
721  /// Return the total number of ValueData for ValueKind.
722  inline uint32_t getNumValueData(uint32_t ValueKind) const;
723
724  /// Return the number of value data collected for ValueKind at profiling
725  /// site: Site.
726  inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
727                                         uint32_t Site) const;
728
729  /// Return the array of profiled values at \p Site. If \p TotalC
730  /// is not null, the total count of all target values at this site
731  /// will be stored in \c *TotalC.
732  inline std::unique_ptr<InstrProfValueData[]>
733  getValueForSite(uint32_t ValueKind, uint32_t Site,
734                  uint64_t *TotalC = nullptr) const;
735
736  /// Get the target value/counts of kind \p ValueKind collected at site
737  /// \p Site and store the result in array \p Dest. Return the total
738  /// counts of all target values at this site.
739  inline uint64_t getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind,
740                                  uint32_t Site) const;
741
742  /// Reserve space for NumValueSites sites.
743  inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
744
745  /// Add ValueData for ValueKind at value Site.
746  void addValueData(uint32_t ValueKind, uint32_t Site,
747                    InstrProfValueData *VData, uint32_t N,
748                    InstrProfSymtab *SymTab);
749
750  /// Merge the counts in \p Other into this one.
751  /// Optionally scale merged counts by \p Weight.
752  void merge(InstrProfRecord &Other, uint64_t Weight,
753             function_ref<void(instrprof_error)> Warn);
754
755  /// Scale up profile counts (including value profile data) by
756  /// \p Weight.
757  void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn);
758
759  /// Sort value profile data (per site) by count.
760  void sortValueData() {
761    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
762      for (auto &SR : getValueSitesForKind(Kind))
763        SR.sortByCount();
764  }
765
766  /// Clear value data entries and edge counters.
767  void Clear() {
768    Counts.clear();
769    clearValueData();
770  }
771
772  /// Clear value data entries
773  void clearValueData() { ValueData = nullptr; }
774
775  /// Compute the sums of all counts and store in Sum.
776  void accumulateCounts(CountSumOrPercent &Sum) const;
777
778  /// Compute the overlap b/w this IntrprofRecord and Other.
779  void overlap(InstrProfRecord &Other, OverlapStats &Overlap,
780               OverlapStats &FuncLevelOverlap, uint64_t ValueCutoff);
781
782  /// Compute the overlap of value profile counts.
783  void overlapValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
784                            OverlapStats &Overlap,
785                            OverlapStats &FuncLevelOverlap);
786
787private:
788  struct ValueProfData {
789    std::vector<InstrProfValueSiteRecord> IndirectCallSites;
790    std::vector<InstrProfValueSiteRecord> MemOPSizes;
791  };
792  std::unique_ptr<ValueProfData> ValueData;
793
794  MutableArrayRef<InstrProfValueSiteRecord>
795  getValueSitesForKind(uint32_t ValueKind) {
796    // Cast to /add/ const (should be an implicit_cast, ideally, if that's ever
797    // implemented in LLVM) to call the const overload of this function, then
798    // cast away the constness from the result.
799    auto AR = const_cast<const InstrProfRecord *>(this)->getValueSitesForKind(
800        ValueKind);
801    return makeMutableArrayRef(
802        const_cast<InstrProfValueSiteRecord *>(AR.data()), AR.size());
803  }
804  ArrayRef<InstrProfValueSiteRecord>
805  getValueSitesForKind(uint32_t ValueKind) const {
806    if (!ValueData)
807      return None;
808    switch (ValueKind) {
809    case IPVK_IndirectCallTarget:
810      return ValueData->IndirectCallSites;
811    case IPVK_MemOPSize:
812      return ValueData->MemOPSizes;
813    default:
814      llvm_unreachable("Unknown value kind!");
815    }
816  }
817
818  std::vector<InstrProfValueSiteRecord> &
819  getOrCreateValueSitesForKind(uint32_t ValueKind) {
820    if (!ValueData)
821      ValueData = std::make_unique<ValueProfData>();
822    switch (ValueKind) {
823    case IPVK_IndirectCallTarget:
824      return ValueData->IndirectCallSites;
825    case IPVK_MemOPSize:
826      return ValueData->MemOPSizes;
827    default:
828      llvm_unreachable("Unknown value kind!");
829    }
830  }
831
832  // Map indirect call target name hash to name string.
833  uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
834                      InstrProfSymtab *SymTab);
835
836  // Merge Value Profile data from Src record to this record for ValueKind.
837  // Scale merged value counts by \p Weight.
838  void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src,
839                          uint64_t Weight,
840                          function_ref<void(instrprof_error)> Warn);
841
842  // Scale up value profile data count.
843  void scaleValueProfData(uint32_t ValueKind, uint64_t Weight,
844                          function_ref<void(instrprof_error)> Warn);
845};
846
847struct NamedInstrProfRecord : InstrProfRecord {
848  StringRef Name;
849  uint64_t Hash;
850
851  // We reserve this bit as the flag for context sensitive profile record.
852  static const int CS_FLAG_IN_FUNC_HASH = 60;
853
854  NamedInstrProfRecord() = default;
855  NamedInstrProfRecord(StringRef Name, uint64_t Hash,
856                       std::vector<uint64_t> Counts)
857      : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {}
858
859  static bool hasCSFlagInHash(uint64_t FuncHash) {
860    return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1);
861  }
862  static void setCSFlagInHash(uint64_t &FuncHash) {
863    FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH);
864  }
865};
866
867uint32_t InstrProfRecord::getNumValueKinds() const {
868  uint32_t NumValueKinds = 0;
869  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
870    NumValueKinds += !(getValueSitesForKind(Kind).empty());
871  return NumValueKinds;
872}
873
874uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
875  uint32_t N = 0;
876  for (auto &SR : getValueSitesForKind(ValueKind))
877    N += SR.ValueData.size();
878  return N;
879}
880
881uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
882  return getValueSitesForKind(ValueKind).size();
883}
884
885uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
886                                                 uint32_t Site) const {
887  return getValueSitesForKind(ValueKind)[Site].ValueData.size();
888}
889
890std::unique_ptr<InstrProfValueData[]>
891InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site,
892                                 uint64_t *TotalC) const {
893  uint64_t Dummy = 0;
894  uint64_t &TotalCount = (TotalC == nullptr ? Dummy : *TotalC);
895  uint32_t N = getNumValueDataForSite(ValueKind, Site);
896  if (N == 0) {
897    TotalCount = 0;
898    return std::unique_ptr<InstrProfValueData[]>(nullptr);
899  }
900
901  auto VD = std::make_unique<InstrProfValueData[]>(N);
902  TotalCount = getValueForSite(VD.get(), ValueKind, Site);
903
904  return VD;
905}
906
907uint64_t InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
908                                          uint32_t ValueKind,
909                                          uint32_t Site) const {
910  uint32_t I = 0;
911  uint64_t TotalCount = 0;
912  for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
913    Dest[I].Value = V.Value;
914    Dest[I].Count = V.Count;
915    TotalCount = SaturatingAdd(TotalCount, V.Count);
916    I++;
917  }
918  return TotalCount;
919}
920
921void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
922  if (!NumValueSites)
923    return;
924  getOrCreateValueSitesForKind(ValueKind).reserve(NumValueSites);
925}
926
927inline support::endianness getHostEndianness() {
928  return sys::IsLittleEndianHost ? support::little : support::big;
929}
930
931// Include definitions for value profile data
932#define INSTR_PROF_VALUE_PROF_DATA
933#include "llvm/ProfileData/InstrProfData.inc"
934
935void InstrProfValueSiteRecord::sortByCount() {
936  ValueData.sort(
937      [](const InstrProfValueData &left, const InstrProfValueData &right) {
938        return left.Count > right.Count;
939      });
940  // Now truncate
941  size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
942  if (ValueData.size() > max_s)
943    ValueData.resize(max_s);
944}
945
946namespace IndexedInstrProf {
947
948enum class HashT : uint32_t {
949  MD5,
950  Last = MD5
951};
952
953inline uint64_t ComputeHash(HashT Type, StringRef K) {
954  switch (Type) {
955  case HashT::MD5:
956    return MD5Hash(K);
957  }
958  llvm_unreachable("Unhandled hash type");
959}
960
961const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
962
963enum ProfVersion {
964  // Version 1 is the first version. In this version, the value of
965  // a key/value pair can only include profile data of a single function.
966  // Due to this restriction, the number of block counters for a given
967  // function is not recorded but derived from the length of the value.
968  Version1 = 1,
969  // The version 2 format supports recording profile data of multiple
970  // functions which share the same key in one value field. To support this,
971  // the number block counters is recorded as an uint64_t field right after the
972  // function structural hash.
973  Version2 = 2,
974  // Version 3 supports value profile data. The value profile data is expected
975  // to follow the block counter profile data.
976  Version3 = 3,
977  // In this version, profile summary data \c IndexedInstrProf::Summary is
978  // stored after the profile header.
979  Version4 = 4,
980  // In this version, the frontend PGO stable hash algorithm defaults to V2.
981  Version5 = 5,
982  // In this version, the frontend PGO stable hash algorithm got fixed and
983  // may produce hashes different from Version5.
984  Version6 = 6,
985  // The current version is 5.
986  CurrentVersion = INSTR_PROF_INDEX_VERSION
987};
988const uint64_t Version = ProfVersion::CurrentVersion;
989
990const HashT HashType = HashT::MD5;
991
992inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
993
994// This structure defines the file header of the LLVM profile
995// data file in indexed-format.
996struct Header {
997  uint64_t Magic;
998  uint64_t Version;
999  uint64_t Unused; // Becomes unused since version 4
1000  uint64_t HashType;
1001  uint64_t HashOffset;
1002};
1003
1004// Profile summary data recorded in the profile data file in indexed
1005// format. It is introduced in version 4. The summary data follows
1006// right after the profile file header.
1007struct Summary {
1008  struct Entry {
1009    uint64_t Cutoff; ///< The required percentile of total execution count.
1010    uint64_t
1011        MinBlockCount;  ///< The minimum execution count for this percentile.
1012    uint64_t NumBlocks; ///< Number of blocks >= the minumum execution count.
1013  };
1014  // The field kind enumerator to assigned value mapping should remain
1015  // unchanged  when a new kind is added or an old kind gets deleted in
1016  // the future.
1017  enum SummaryFieldKind {
1018    /// The total number of functions instrumented.
1019    TotalNumFunctions = 0,
1020    /// Total number of instrumented blocks/edges.
1021    TotalNumBlocks = 1,
1022    /// The maximal execution count among all functions.
1023    /// This field does not exist for profile data from IR based
1024    /// instrumentation.
1025    MaxFunctionCount = 2,
1026    /// Max block count of the program.
1027    MaxBlockCount = 3,
1028    /// Max internal block count of the program (excluding entry blocks).
1029    MaxInternalBlockCount = 4,
1030    /// The sum of all instrumented block counts.
1031    TotalBlockCount = 5,
1032    NumKinds = TotalBlockCount + 1
1033  };
1034
1035  // The number of summmary fields following the summary header.
1036  uint64_t NumSummaryFields;
1037  // The number of Cutoff Entries (Summary::Entry) following summary fields.
1038  uint64_t NumCutoffEntries;
1039
1040  Summary() = delete;
1041  Summary(uint32_t Size) { memset(this, 0, Size); }
1042
1043  void operator delete(void *ptr) { ::operator delete(ptr); }
1044
1045  static uint32_t getSize(uint32_t NumSumFields, uint32_t NumCutoffEntries) {
1046    return sizeof(Summary) + NumCutoffEntries * sizeof(Entry) +
1047           NumSumFields * sizeof(uint64_t);
1048  }
1049
1050  const uint64_t *getSummaryDataBase() const {
1051    return reinterpret_cast<const uint64_t *>(this + 1);
1052  }
1053
1054  uint64_t *getSummaryDataBase() {
1055    return reinterpret_cast<uint64_t *>(this + 1);
1056  }
1057
1058  const Entry *getCutoffEntryBase() const {
1059    return reinterpret_cast<const Entry *>(
1060        &getSummaryDataBase()[NumSummaryFields]);
1061  }
1062
1063  Entry *getCutoffEntryBase() {
1064    return reinterpret_cast<Entry *>(&getSummaryDataBase()[NumSummaryFields]);
1065  }
1066
1067  uint64_t get(SummaryFieldKind K) const {
1068    return getSummaryDataBase()[K];
1069  }
1070
1071  void set(SummaryFieldKind K, uint64_t V) {
1072    getSummaryDataBase()[K] = V;
1073  }
1074
1075  const Entry &getEntry(uint32_t I) const { return getCutoffEntryBase()[I]; }
1076
1077  void setEntry(uint32_t I, const ProfileSummaryEntry &E) {
1078    Entry &ER = getCutoffEntryBase()[I];
1079    ER.Cutoff = E.Cutoff;
1080    ER.MinBlockCount = E.MinCount;
1081    ER.NumBlocks = E.NumCounts;
1082  }
1083};
1084
1085inline std::unique_ptr<Summary> allocSummary(uint32_t TotalSize) {
1086  return std::unique_ptr<Summary>(new (::operator new(TotalSize))
1087                                      Summary(TotalSize));
1088}
1089
1090} // end namespace IndexedInstrProf
1091
1092namespace RawInstrProf {
1093
1094// Version 1: First version
1095// Version 2: Added value profile data section. Per-function control data
1096// struct has more fields to describe value profile information.
1097// Version 3: Compressed name section support. Function PGO name reference
1098// from control data struct is changed from raw pointer to Name's MD5 value.
1099// Version 4: ValueDataBegin and ValueDataSizes fields are removed from the
1100// raw header.
1101// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
1102// sensitive records.
1103const uint64_t Version = INSTR_PROF_RAW_VERSION;
1104
1105template <class IntPtrT> inline uint64_t getMagic();
1106template <> inline uint64_t getMagic<uint64_t>() {
1107  return INSTR_PROF_RAW_MAGIC_64;
1108}
1109
1110template <> inline uint64_t getMagic<uint32_t>() {
1111  return INSTR_PROF_RAW_MAGIC_32;
1112}
1113
1114// Per-function profile data header/control structure.
1115// The definition should match the structure defined in
1116// compiler-rt/lib/profile/InstrProfiling.h.
1117// It should also match the synthesized type in
1118// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
1119template <class IntPtrT> struct alignas(8) ProfileData {
1120  #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
1121  #include "llvm/ProfileData/InstrProfData.inc"
1122};
1123
1124// File header structure of the LLVM profile data in raw format.
1125// The definition should match the header referenced in
1126// compiler-rt/lib/profile/InstrProfilingFile.c  and
1127// InstrProfilingBuffer.c.
1128struct Header {
1129#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
1130#include "llvm/ProfileData/InstrProfData.inc"
1131};
1132
1133} // end namespace RawInstrProf
1134
1135// Parse MemOP Size range option.
1136void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
1137                                 int64_t &RangeLast);
1138
1139// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1140// aware this is an ir_level profile so it can set the version flag.
1141void createIRLevelProfileFlagVar(Module &M, bool IsCS);
1142
1143// Create the variable for the profile file name.
1144void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
1145
1146// Whether to compress function names in profile records, and filenames in
1147// code coverage mappings. Used by the Instrumentation library and unit tests.
1148extern cl::opt<bool> DoInstrProfNameCompression;
1149
1150} // end namespace llvm
1151#endif // LLVM_PROFILEDATA_INSTRPROF_H
1152