195267Ssheldonh//===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
287866Ssheldonh//
387866Ssheldonh// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
487866Ssheldonh// See https://llvm.org/LICENSE.txt for license information.
587866Ssheldonh// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
687866Ssheldonh//
787866Ssheldonh//===----------------------------------------------------------------------===//
887866Ssheldonh//
987866Ssheldonh// llvm-profdata merges .profdata files.
1087866Ssheldonh//
1187866Ssheldonh//===----------------------------------------------------------------------===//
1287866Ssheldonh
1387866Ssheldonh#include "llvm/ADT/SmallSet.h"
1487866Ssheldonh#include "llvm/ADT/SmallVector.h"
1587866Ssheldonh#include "llvm/ADT/StringRef.h"
1687866Ssheldonh#include "llvm/IR/LLVMContext.h"
1787866Ssheldonh#include "llvm/Object/Binary.h"
1887866Ssheldonh#include "llvm/ProfileData/InstrProfCorrelator.h"
1987866Ssheldonh#include "llvm/ProfileData/InstrProfReader.h"
2087866Ssheldonh#include "llvm/ProfileData/InstrProfWriter.h"
2187866Ssheldonh#include "llvm/ProfileData/MemProf.h"
2287866Ssheldonh#include "llvm/ProfileData/ProfileCommon.h"
2387866Ssheldonh#include "llvm/ProfileData/RawMemProfReader.h"
2487866Ssheldonh#include "llvm/ProfileData/SampleProfReader.h"
2588282Ssheldonh#include "llvm/ProfileData/SampleProfWriter.h"
2688282Ssheldonh#include "llvm/Support/BalancedPartitioning.h"
2787866Ssheldonh#include "llvm/Support/CommandLine.h"
2887866Ssheldonh#include "llvm/Support/Discriminator.h"
2995267Ssheldonh#include "llvm/Support/Errc.h"
3095267Ssheldonh#include "llvm/Support/FileSystem.h"
3187866Ssheldonh#include "llvm/Support/Format.h"
3287866Ssheldonh#include "llvm/Support/FormattedStream.h"
3387866Ssheldonh#include "llvm/Support/LLVMDriver.h"
3487866Ssheldonh#include "llvm/Support/MD5.h"
3587866Ssheldonh#include "llvm/Support/MemoryBuffer.h"
3687866Ssheldonh#include "llvm/Support/Path.h"
3787866Ssheldonh#include "llvm/Support/Regex.h"
3887866Ssheldonh#include "llvm/Support/ThreadPool.h"
3987866Ssheldonh#include "llvm/Support/Threading.h"
4087866Ssheldonh#include "llvm/Support/VirtualFileSystem.h"
4187866Ssheldonh#include "llvm/Support/WithColor.h"
4287866Ssheldonh#include "llvm/Support/raw_ostream.h"
4387866Ssheldonh#include <algorithm>
4487866Ssheldonh#include <cmath>
4587866Ssheldonh#include <optional>
4687866Ssheldonh#include <queue>
4787866Ssheldonh
48using namespace llvm;
49using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
50
51// https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
52// on each subcommand.
53cl::SubCommand ShowSubcommand(
54    "show",
55    "Takes a profile data file and displays the profiles. See detailed "
56    "documentation in "
57    "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
58cl::SubCommand OrderSubcommand(
59    "order",
60    "Reads temporal profiling traces from a profile and outputs a function "
61    "order that reduces the number of page faults for those traces. See "
62    "detailed documentation in "
63    "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
64cl::SubCommand OverlapSubcommand(
65    "overlap",
66    "Computes and displays the overlap between two profiles. See detailed "
67    "documentation in "
68    "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
69cl::SubCommand MergeSubcommand(
70    "merge",
71    "Takes several profiles and merge them together. See detailed "
72    "documentation in "
73    "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
74
75namespace {
76enum ProfileKinds { instr, sample, memory };
77enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
78} // namespace
79
80enum ProfileFormat {
81  PF_None = 0,
82  PF_Text,
83  PF_Compact_Binary, // Deprecated
84  PF_Ext_Binary,
85  PF_GCC,
86  PF_Binary
87};
88
89enum class ShowFormat { Text, Json, Yaml };
90
91// Common options.
92cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
93                                    cl::init("-"), cl::desc("Output file"),
94                                    cl::sub(ShowSubcommand),
95                                    cl::sub(OrderSubcommand),
96                                    cl::sub(OverlapSubcommand),
97                                    cl::sub(MergeSubcommand));
98// NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
99// will be used. llvm::cl::alias::done() method asserts this condition.
100cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
101                          cl::aliasopt(OutputFilename));
102
103// Options common to at least two commands.
104cl::opt<ProfileKinds> ProfileKind(
105    cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
106    cl::sub(OverlapSubcommand), cl::init(instr),
107    cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
108               clEnumVal(sample, "Sample profile")));
109cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"),
110                              cl::sub(ShowSubcommand),
111                              cl::sub(OrderSubcommand));
112cl::opt<unsigned> MaxDbgCorrelationWarnings(
113    "max-debug-info-correlation-warnings",
114    cl::desc("The maximum number of warnings to emit when correlating "
115             "profile from debug info (0 = no limit)"),
116    cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(5));
117cl::opt<std::string> ProfiledBinary(
118    "profiled-binary", cl::init(""),
119    cl::desc("Path to binary from which the profile was collected."),
120    cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
121cl::opt<std::string> DebugInfoFilename(
122    "debug-info", cl::init(""),
123    cl::desc(
124        "For show, read and extract profile metadata from debug info and show "
125        "the functions it found. For merge, use the provided debug info to "
126        "correlate the raw profile."),
127    cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
128cl::opt<std::string>
129    BinaryFilename("binary-file", cl::init(""),
130                   cl::desc("For merge, use the provided unstripped bianry to "
131                            "correlate the raw profile."),
132                   cl::sub(MergeSubcommand));
133cl::opt<std::string> FuncNameFilter(
134    "function",
135    cl::desc("Only functions matching the filter are shown in the output. For "
136             "overlapping CSSPGO, this takes a function name with calling "
137             "context."),
138    cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
139    cl::sub(MergeSubcommand));
140
141// TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
142// factor out the common cl::sub in cl::opt constructor for subcommand-specific
143// options.
144
145// Options specific to merge subcommand.
146cl::list<std::string> InputFilenames(cl::Positional, cl::sub(MergeSubcommand),
147                                     cl::desc("<filename...>"));
148cl::list<std::string> WeightedInputFilenames("weighted-input",
149                                             cl::sub(MergeSubcommand),
150                                             cl::desc("<weight>,<filename>"));
151cl::opt<ProfileFormat> OutputFormat(
152    cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
153    cl::init(PF_Ext_Binary),
154    cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
155               clEnumValN(PF_Ext_Binary, "extbinary",
156                          "Extensible binary encoding "
157                          "(default)"),
158               clEnumValN(PF_Text, "text", "Text encoding"),
159               clEnumValN(PF_GCC, "gcc",
160                          "GCC encoding (only meaningful for -sample)")));
161cl::opt<std::string>
162    InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand),
163                       cl::desc("Path to file containing newline-separated "
164                                "[<weight>,]<filename> entries"));
165cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
166                              cl::aliasopt(InputFilenamesFile));
167cl::opt<bool> DumpInputFileList(
168    "dump-input-file-list", cl::init(false), cl::Hidden,
169    cl::sub(MergeSubcommand),
170    cl::desc("Dump the list of input files and their weights, then exit"));
171cl::opt<std::string> RemappingFile("remapping-file", cl::value_desc("file"),
172                                   cl::sub(MergeSubcommand),
173                                   cl::desc("Symbol remapping file"));
174cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
175                         cl::aliasopt(RemappingFile));
176cl::opt<bool>
177    UseMD5("use-md5", cl::init(false), cl::Hidden,
178           cl::desc("Choose to use MD5 to represent string in name table (only "
179                    "meaningful for -extbinary)"),
180           cl::sub(MergeSubcommand));
181cl::opt<bool> CompressAllSections(
182    "compress-all-sections", cl::init(false), cl::Hidden,
183    cl::sub(MergeSubcommand),
184    cl::desc("Compress all sections when writing the profile (only "
185             "meaningful for -extbinary)"));
186cl::opt<bool> SampleMergeColdContext(
187    "sample-merge-cold-context", cl::init(false), cl::Hidden,
188    cl::sub(MergeSubcommand),
189    cl::desc(
190        "Merge context sample profiles whose count is below cold threshold"));
191cl::opt<bool> SampleTrimColdContext(
192    "sample-trim-cold-context", cl::init(false), cl::Hidden,
193    cl::sub(MergeSubcommand),
194    cl::desc(
195        "Trim context sample profiles whose count is below cold threshold"));
196cl::opt<uint32_t> SampleColdContextFrameDepth(
197    "sample-frame-depth-for-cold-context", cl::init(1),
198    cl::sub(MergeSubcommand),
199    cl::desc("Keep the last K frames while merging cold profile. 1 means the "
200             "context-less base profile"));
201cl::opt<size_t> OutputSizeLimit(
202    "output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand),
203    cl::desc("Trim cold functions until profile size is below specified "
204             "limit in bytes. This uses a heursitic and functions may be "
205             "excessively trimmed"));
206cl::opt<bool> GenPartialProfile(
207    "gen-partial-profile", cl::init(false), cl::Hidden,
208    cl::sub(MergeSubcommand),
209    cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
210cl::opt<std::string> SupplInstrWithSample(
211    "supplement-instr-with-sample", cl::init(""), cl::Hidden,
212    cl::sub(MergeSubcommand),
213    cl::desc("Supplement an instr profile with sample profile, to correct "
214             "the profile unrepresentativeness issue. The sample "
215             "profile is the input of the flag. Output will be in instr "
216             "format (The flag only works with -instr)"));
217cl::opt<float> ZeroCounterThreshold(
218    "zero-counter-threshold", cl::init(0.7), cl::Hidden,
219    cl::sub(MergeSubcommand),
220    cl::desc("For the function which is cold in instr profile but hot in "
221             "sample profile, if the ratio of the number of zero counters "
222             "divided by the total number of counters is above the "
223             "threshold, the profile of the function will be regarded as "
224             "being harmful for performance and will be dropped."));
225cl::opt<unsigned> SupplMinSizeThreshold(
226    "suppl-min-size-threshold", cl::init(10), cl::Hidden,
227    cl::sub(MergeSubcommand),
228    cl::desc("If the size of a function is smaller than the threshold, "
229             "assume it can be inlined by PGO early inliner and it won't "
230             "be adjusted based on sample profile."));
231cl::opt<unsigned> InstrProfColdThreshold(
232    "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
233    cl::sub(MergeSubcommand),
234    cl::desc("User specified cold threshold for instr profile which will "
235             "override the cold threshold got from profile summary. "));
236// WARNING: This reservoir size value is propagated to any input indexed
237// profiles for simplicity. Changing this value between invocations could
238// result in sample bias.
239cl::opt<uint64_t> TemporalProfTraceReservoirSize(
240    "temporal-profile-trace-reservoir-size", cl::init(100),
241    cl::sub(MergeSubcommand),
242    cl::desc("The maximum number of stored temporal profile traces (default: "
243             "100)"));
244cl::opt<uint64_t> TemporalProfMaxTraceLength(
245    "temporal-profile-max-trace-length", cl::init(10000),
246    cl::sub(MergeSubcommand),
247    cl::desc("The maximum length of a single temporal profile trace "
248             "(default: 10000)"));
249cl::opt<std::string> FuncNameNegativeFilter(
250    "no-function", cl::init(""),
251    cl::sub(MergeSubcommand),
252    cl::desc("Exclude functions matching the filter from the output."));
253
254cl::opt<FailureMode>
255    FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
256             cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
257             cl::values(clEnumValN(warnOnly, "warn",
258                                   "Do not fail and just print warnings."),
259                        clEnumValN(failIfAnyAreInvalid, "any",
260                                   "Fail if any profile is invalid."),
261                        clEnumValN(failIfAllAreInvalid, "all",
262                                   "Fail only if all profiles are invalid.")));
263
264cl::opt<bool> OutputSparse(
265    "sparse", cl::init(false), cl::sub(MergeSubcommand),
266    cl::desc("Generate a sparse profile (only meaningful for -instr)"));
267cl::opt<unsigned> NumThreads(
268    "num-threads", cl::init(0), cl::sub(MergeSubcommand),
269    cl::desc("Number of merge threads to use (default: autodetect)"));
270cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
271                      cl::aliasopt(NumThreads));
272
273cl::opt<std::string> ProfileSymbolListFile(
274    "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand),
275    cl::desc("Path to file containing the list of function symbols "
276             "used to populate profile symbol list"));
277
278cl::opt<SampleProfileLayout> ProfileLayout(
279    "convert-sample-profile-layout",
280    cl::desc("Convert the generated profile to a profile with a new layout"),
281    cl::sub(MergeSubcommand), cl::init(SPL_None),
282    cl::values(
283        clEnumValN(SPL_Nest, "nest",
284                   "Nested profile, the input should be CS flat profile"),
285        clEnumValN(SPL_Flat, "flat",
286                   "Profile with nested inlinee flatten out")));
287
288cl::opt<bool> DropProfileSymbolList(
289    "drop-profile-symbol-list", cl::init(false), cl::Hidden,
290    cl::sub(MergeSubcommand),
291    cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
292             "(only meaningful for -sample)"));
293
294// Options specific to overlap subcommand.
295cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
296                                  cl::desc("<base profile file>"),
297                                  cl::sub(OverlapSubcommand));
298cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
299                                  cl::desc("<test profile file>"),
300                                  cl::sub(OverlapSubcommand));
301
302cl::opt<unsigned long long> SimilarityCutoff(
303    "similarity-cutoff", cl::init(0),
304    cl::desc("For sample profiles, list function names (with calling context "
305             "for csspgo) for overlapped functions "
306             "with similarities below the cutoff (percentage times 10000)."),
307    cl::sub(OverlapSubcommand));
308
309cl::opt<bool> IsCS(
310    "cs", cl::init(false),
311    cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
312    cl::sub(OverlapSubcommand));
313
314cl::opt<unsigned long long> OverlapValueCutoff(
315    "value-cutoff", cl::init(-1),
316    cl::desc(
317        "Function level overlap information for every function (with calling "
318        "context for csspgo) in test "
319        "profile with max count value greater then the parameter value"),
320    cl::sub(OverlapSubcommand));
321
322// Options unique to show subcommand.
323cl::opt<bool> ShowCounts("counts", cl::init(false),
324                         cl::desc("Show counter values for shown functions"),
325                         cl::sub(ShowSubcommand));
326cl::opt<ShowFormat>
327    SFormat("show-format", cl::init(ShowFormat::Text),
328            cl::desc("Emit output in the selected format if supported"),
329            cl::sub(ShowSubcommand),
330            cl::values(clEnumValN(ShowFormat::Text, "text",
331                                  "emit normal text output (default)"),
332                       clEnumValN(ShowFormat::Json, "json", "emit JSON"),
333                       clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
334// TODO: Consider replacing this with `--show-format=text-encoding`.
335cl::opt<bool>
336    TextFormat("text", cl::init(false),
337               cl::desc("Show instr profile data in text dump format"),
338               cl::sub(ShowSubcommand));
339cl::opt<bool>
340    JsonFormat("json",
341               cl::desc("Show sample profile data in the JSON format "
342                        "(deprecated, please use --show-format=json)"),
343               cl::sub(ShowSubcommand));
344cl::opt<bool> ShowIndirectCallTargets(
345    "ic-targets", cl::init(false),
346    cl::desc("Show indirect call site target values for shown functions"),
347    cl::sub(ShowSubcommand));
348cl::opt<bool> ShowMemOPSizes(
349    "memop-sizes", cl::init(false),
350    cl::desc("Show the profiled sizes of the memory intrinsic calls "
351             "for shown functions"),
352    cl::sub(ShowSubcommand));
353cl::opt<bool> ShowDetailedSummary("detailed-summary", cl::init(false),
354                                  cl::desc("Show detailed profile summary"),
355                                  cl::sub(ShowSubcommand));
356cl::list<uint32_t> DetailedSummaryCutoffs(
357    cl::CommaSeparated, "detailed-summary-cutoffs",
358    cl::desc(
359        "Cutoff percentages (times 10000) for generating detailed summary"),
360    cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
361cl::opt<bool>
362    ShowHotFuncList("hot-func-list", cl::init(false),
363                    cl::desc("Show profile summary of a list of hot functions"),
364                    cl::sub(ShowSubcommand));
365cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
366                               cl::desc("Details for each and every function"),
367                               cl::sub(ShowSubcommand));
368cl::opt<bool> ShowCS("showcs", cl::init(false),
369                     cl::desc("Show context sensitive counts"),
370                     cl::sub(ShowSubcommand));
371cl::opt<ProfileKinds> ShowProfileKind(
372    cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
373    cl::init(instr),
374    cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
375               clEnumVal(sample, "Sample profile"),
376               clEnumVal(memory, "MemProf memory access profile")));
377cl::opt<uint32_t> TopNFunctions(
378    "topn", cl::init(0),
379    cl::desc("Show the list of functions with the largest internal counts"),
380    cl::sub(ShowSubcommand));
381cl::opt<uint32_t> ShowValueCutoff(
382    "value-cutoff", cl::init(0),
383    cl::desc("Set the count value cutoff. Functions with the maximum count "
384             "less than this value will not be printed out. (Default is 0)"),
385    cl::sub(ShowSubcommand));
386cl::opt<bool> OnlyListBelow(
387    "list-below-cutoff", cl::init(false),
388    cl::desc("Only output names of functions whose max count values are "
389             "below the cutoff value"),
390    cl::sub(ShowSubcommand));
391cl::opt<bool> ShowProfileSymbolList(
392    "show-prof-sym-list", cl::init(false),
393    cl::desc("Show profile symbol list if it exists in the profile. "),
394    cl::sub(ShowSubcommand));
395cl::opt<bool> ShowSectionInfoOnly(
396    "show-sec-info-only", cl::init(false),
397    cl::desc("Show the information of each section in the sample profile. "
398             "The flag is only usable when the sample profile is in "
399             "extbinary format"),
400    cl::sub(ShowSubcommand));
401cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
402                            cl::desc("Show binary ids in the profile. "),
403                            cl::sub(ShowSubcommand));
404cl::opt<bool> ShowTemporalProfTraces(
405    "temporal-profile-traces",
406    cl::desc("Show temporal profile traces in the profile."),
407    cl::sub(ShowSubcommand));
408
409cl::opt<bool>
410    ShowCovered("covered", cl::init(false),
411                cl::desc("Show only the functions that have been executed."),
412                cl::sub(ShowSubcommand));
413
414cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
415                                 cl::desc("Show profile version. "),
416                                 cl::sub(ShowSubcommand));
417
418// We use this string to indicate that there are
419// multiple static functions map to the same name.
420const std::string DuplicateNameStr = "----";
421
422static void warn(Twine Message, std::string Whence = "",
423                 std::string Hint = "") {
424  WithColor::warning();
425  if (!Whence.empty())
426    errs() << Whence << ": ";
427  errs() << Message << "\n";
428  if (!Hint.empty())
429    WithColor::note() << Hint << "\n";
430}
431
432static void warn(Error E, StringRef Whence = "") {
433  if (E.isA<InstrProfError>()) {
434    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
435      warn(IPE.message(), std::string(Whence), std::string(""));
436    });
437  }
438}
439
440static void exitWithError(Twine Message, std::string Whence = "",
441                          std::string Hint = "") {
442  WithColor::error();
443  if (!Whence.empty())
444    errs() << Whence << ": ";
445  errs() << Message << "\n";
446  if (!Hint.empty())
447    WithColor::note() << Hint << "\n";
448  ::exit(1);
449}
450
451static void exitWithError(Error E, StringRef Whence = "") {
452  if (E.isA<InstrProfError>()) {
453    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
454      instrprof_error instrError = IPE.get();
455      StringRef Hint = "";
456      if (instrError == instrprof_error::unrecognized_format) {
457        // Hint in case user missed specifying the profile type.
458        Hint = "Perhaps you forgot to use the --sample or --memory option?";
459      }
460      exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
461    });
462    return;
463  }
464
465  exitWithError(toString(std::move(E)), std::string(Whence));
466}
467
468static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
469  exitWithError(EC.message(), std::string(Whence));
470}
471
472static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
473                                 StringRef Whence = "") {
474  if (FailMode == failIfAnyAreInvalid)
475    exitWithErrorCode(EC, Whence);
476  else
477    warn(EC.message(), std::string(Whence));
478}
479
480static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
481                                   StringRef WhenceFunction = "",
482                                   bool ShowHint = true) {
483  if (!WhenceFile.empty())
484    errs() << WhenceFile << ": ";
485  if (!WhenceFunction.empty())
486    errs() << WhenceFunction << ": ";
487
488  auto IPE = instrprof_error::success;
489  E = handleErrors(std::move(E),
490                   [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
491                     IPE = E->get();
492                     return Error(std::move(E));
493                   });
494  errs() << toString(std::move(E)) << "\n";
495
496  if (ShowHint) {
497    StringRef Hint = "";
498    if (IPE != instrprof_error::success) {
499      switch (IPE) {
500      case instrprof_error::hash_mismatch:
501      case instrprof_error::count_mismatch:
502      case instrprof_error::value_site_count_mismatch:
503        Hint = "Make sure that all profile data to be merged is generated "
504               "from the same binary.";
505        break;
506      default:
507        break;
508      }
509    }
510
511    if (!Hint.empty())
512      errs() << Hint << "\n";
513  }
514}
515
516namespace {
517/// A remapper from original symbol names to new symbol names based on a file
518/// containing a list of mappings from old name to new name.
519class SymbolRemapper {
520  std::unique_ptr<MemoryBuffer> File;
521  DenseMap<StringRef, StringRef> RemappingTable;
522
523public:
524  /// Build a SymbolRemapper from a file containing a list of old/new symbols.
525  static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
526    auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
527    if (!BufOrError)
528      exitWithErrorCode(BufOrError.getError(), InputFile);
529
530    auto Remapper = std::make_unique<SymbolRemapper>();
531    Remapper->File = std::move(BufOrError.get());
532
533    for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
534         !LineIt.is_at_eof(); ++LineIt) {
535      std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
536      if (Parts.first.empty() || Parts.second.empty() ||
537          Parts.second.count(' ')) {
538        exitWithError("unexpected line in remapping file",
539                      (InputFile + ":" + Twine(LineIt.line_number())).str(),
540                      "expected 'old_symbol new_symbol'");
541      }
542      Remapper->RemappingTable.insert(Parts);
543    }
544    return Remapper;
545  }
546
547  /// Attempt to map the given old symbol into a new symbol.
548  ///
549  /// \return The new symbol, or \p Name if no such symbol was found.
550  StringRef operator()(StringRef Name) {
551    StringRef New = RemappingTable.lookup(Name);
552    return New.empty() ? Name : New;
553  }
554
555  FunctionId operator()(FunctionId Name) {
556    // MD5 name cannot be remapped.
557    if (!Name.isStringRef())
558      return Name;
559    StringRef New = RemappingTable.lookup(Name.stringRef());
560    return New.empty() ? Name : FunctionId(New);
561  }
562};
563}
564
565struct WeightedFile {
566  std::string Filename;
567  uint64_t Weight;
568};
569typedef SmallVector<WeightedFile, 5> WeightedFileVector;
570
571/// Keep track of merged data and reported errors.
572struct WriterContext {
573  std::mutex Lock;
574  InstrProfWriter Writer;
575  std::vector<std::pair<Error, std::string>> Errors;
576  std::mutex &ErrLock;
577  SmallSet<instrprof_error, 4> &WriterErrorCodes;
578
579  WriterContext(bool IsSparse, std::mutex &ErrLock,
580                SmallSet<instrprof_error, 4> &WriterErrorCodes,
581                uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
582      : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock),
583        WriterErrorCodes(WriterErrorCodes) {}
584};
585
586/// Computer the overlap b/w profile BaseFilename and TestFileName,
587/// and store the program level result to Overlap.
588static void overlapInput(const std::string &BaseFilename,
589                         const std::string &TestFilename, WriterContext *WC,
590                         OverlapStats &Overlap,
591                         const OverlapFuncFilters &FuncFilter,
592                         raw_fd_ostream &OS, bool IsCS) {
593  auto FS = vfs::getRealFileSystem();
594  auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
595  if (Error E = ReaderOrErr.takeError()) {
596    // Skip the empty profiles by returning sliently.
597    auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
598    if (ErrorCode != instrprof_error::empty_raw_profile)
599      WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
600                              TestFilename);
601    return;
602  }
603
604  auto Reader = std::move(ReaderOrErr.get());
605  for (auto &I : *Reader) {
606    OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
607    FuncOverlap.setFuncInfo(I.Name, I.Hash);
608
609    WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
610    FuncOverlap.dump(OS);
611  }
612}
613
614/// Load an input into a writer context.
615static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
616                      const InstrProfCorrelator *Correlator,
617                      const StringRef ProfiledBinary, WriterContext *WC) {
618  std::unique_lock<std::mutex> CtxGuard{WC->Lock};
619
620  // Copy the filename, because llvm::ThreadPool copied the input "const
621  // WeightedFile &" by value, making a reference to the filename within it
622  // invalid outside of this packaged task.
623  std::string Filename = Input.Filename;
624
625  using ::llvm::memprof::RawMemProfReader;
626  if (RawMemProfReader::hasFormat(Input.Filename)) {
627    auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
628    if (!ReaderOrErr) {
629      exitWithError(ReaderOrErr.takeError(), Input.Filename);
630    }
631    std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
632    // Check if the profile types can be merged, e.g. clang frontend profiles
633    // should not be merged with memprof profiles.
634    if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
635      consumeError(std::move(E));
636      WC->Errors.emplace_back(
637          make_error<StringError>(
638              "Cannot merge MemProf profile with Clang generated profile.",
639              std::error_code()),
640          Filename);
641      return;
642    }
643
644    auto MemProfError = [&](Error E) {
645      auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
646      WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
647                              Filename);
648    };
649
650    // Add the frame mappings into the writer context.
651    const auto &IdToFrame = Reader->getFrameMapping();
652    for (const auto &I : IdToFrame) {
653      bool Succeeded = WC->Writer.addMemProfFrame(
654          /*Id=*/I.first, /*Frame=*/I.getSecond(), MemProfError);
655      // If we weren't able to add the frame mappings then it doesn't make sense
656      // to try to add the records from this profile.
657      if (!Succeeded)
658        return;
659    }
660    const auto &FunctionProfileData = Reader->getProfileData();
661    // Add the memprof records into the writer context.
662    for (const auto &I : FunctionProfileData) {
663      WC->Writer.addMemProfRecord(/*Id=*/I.first, /*Record=*/I.second);
664    }
665    return;
666  }
667
668  auto FS = vfs::getRealFileSystem();
669  // TODO: This only saves the first non-fatal error from InstrProfReader, and
670  // then added to WriterContext::Errors. However, this is not extensible, if
671  // we have more non-fatal errors from InstrProfReader in the future. How
672  // should this interact with different -failure-mode?
673  std::optional<std::pair<Error, std::string>> ReaderWarning;
674  auto Warn = [&](Error E) {
675    if (ReaderWarning) {
676      consumeError(std::move(E));
677      return;
678    }
679    // Only show the first time an error occurs in this file.
680    auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
681    ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
682  };
683  auto ReaderOrErr =
684      InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
685  if (Error E = ReaderOrErr.takeError()) {
686    // Skip the empty profiles by returning silently.
687    auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
688    if (ErrCode != instrprof_error::empty_raw_profile)
689      WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
690                              Filename);
691    return;
692  }
693
694  auto Reader = std::move(ReaderOrErr.get());
695  if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
696    consumeError(std::move(E));
697    WC->Errors.emplace_back(
698        make_error<StringError>(
699            "Merge IR generated profile with Clang generated profile.",
700            std::error_code()),
701        Filename);
702    return;
703  }
704
705  for (auto &I : *Reader) {
706    if (Remapper)
707      I.Name = (*Remapper)(I.Name);
708    const StringRef FuncName = I.Name;
709    bool Reported = false;
710    WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
711      if (Reported) {
712        consumeError(std::move(E));
713        return;
714      }
715      Reported = true;
716      // Only show hint the first time an error occurs.
717      auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
718      std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
719      bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
720      handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
721                             Input.Filename, FuncName, firstTime);
722    });
723  }
724
725  if (Reader->hasTemporalProfile()) {
726    auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
727    if (!Traces.empty())
728      WC->Writer.addTemporalProfileTraces(
729          Traces, Reader->getTemporalProfTraceStreamSize());
730  }
731  if (Reader->hasError()) {
732    if (Error E = Reader->getError()) {
733      WC->Errors.emplace_back(std::move(E), Filename);
734      return;
735    }
736  }
737
738  std::vector<llvm::object::BuildID> BinaryIds;
739  if (Error E = Reader->readBinaryIds(BinaryIds)) {
740    WC->Errors.emplace_back(std::move(E), Filename);
741    return;
742  }
743  WC->Writer.addBinaryIds(BinaryIds);
744
745  if (ReaderWarning) {
746    WC->Errors.emplace_back(std::move(ReaderWarning->first),
747                            ReaderWarning->second);
748  }
749}
750
751/// Merge the \p Src writer context into \p Dst.
752static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
753  for (auto &ErrorPair : Src->Errors)
754    Dst->Errors.push_back(std::move(ErrorPair));
755  Src->Errors.clear();
756
757  if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
758    exitWithError(std::move(E));
759
760  Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
761    auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
762    std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
763    bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
764    if (firstTime)
765      warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
766  });
767}
768
769static StringRef
770getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
771  return Val.first();
772}
773
774static std::string
775getFuncName(const SampleProfileMap::value_type &Val) {
776  return Val.second.getContext().toString();
777}
778
779template <typename T>
780static void filterFunctions(T &ProfileMap) {
781  bool hasFilter = !FuncNameFilter.empty();
782  bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
783  if (!hasFilter && !hasNegativeFilter)
784    return;
785
786  // If filter starts with '?' it is MSVC mangled name, not a regex.
787  llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
788  if (hasFilter && FuncNameFilter[0] == '?' &&
789      ProbablyMSVCMangledName.match(FuncNameFilter))
790    FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
791  if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
792      ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
793    FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
794
795  size_t Count = ProfileMap.size();
796  llvm::Regex Pattern(FuncNameFilter);
797  llvm::Regex NegativePattern(FuncNameNegativeFilter);
798  std::string Error;
799  if (hasFilter && !Pattern.isValid(Error))
800    exitWithError(Error);
801  if (hasNegativeFilter && !NegativePattern.isValid(Error))
802    exitWithError(Error);
803
804  // Handle MD5 profile, so it is still able to match using the original name.
805  std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
806  std::string NegativeMD5Name =
807      std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
808
809  for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
810    auto Tmp = I++;
811    const auto &FuncName = getFuncName(*Tmp);
812    // Negative filter has higher precedence than positive filter.
813    if ((hasNegativeFilter &&
814         (NegativePattern.match(FuncName) ||
815          (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
816        (hasFilter && !(Pattern.match(FuncName) ||
817                        (FunctionSamples::UseMD5 && MD5Name == FuncName))))
818      ProfileMap.erase(Tmp);
819  }
820
821  llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
822               << "in the original profile are filtered.\n";
823}
824
825static void writeInstrProfile(StringRef OutputFilename,
826                              ProfileFormat OutputFormat,
827                              InstrProfWriter &Writer) {
828  std::error_code EC;
829  raw_fd_ostream Output(OutputFilename.data(), EC,
830                        OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
831                                                : sys::fs::OF_None);
832  if (EC)
833    exitWithErrorCode(EC, OutputFilename);
834
835  if (OutputFormat == PF_Text) {
836    if (Error E = Writer.writeText(Output))
837      warn(std::move(E));
838  } else {
839    if (Output.is_displayed())
840      exitWithError("cannot write a non-text format profile to the terminal");
841    if (Error E = Writer.write(Output))
842      warn(std::move(E));
843  }
844}
845
846static void mergeInstrProfile(const WeightedFileVector &Inputs,
847                              SymbolRemapper *Remapper,
848                              int MaxDbgCorrelationWarnings,
849                              const StringRef ProfiledBinary) {
850  const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
851  const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
852  if (OutputFormat == PF_Compact_Binary)
853    exitWithError("Compact Binary is deprecated");
854  if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
855      OutputFormat != PF_Text)
856    exitWithError("unknown format is specified");
857
858  // TODO: Maybe we should support correlation with mixture of different
859  // correlation modes(w/wo debug-info/object correlation).
860  if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
861    exitWithError("Expected only one of -debug-info, -binary-file");
862  std::string CorrelateFilename;
863  ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
864  if (!DebugInfoFilename.empty()) {
865    CorrelateFilename = DebugInfoFilename;
866    CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
867  } else if (!BinaryFilename.empty()) {
868    CorrelateFilename = BinaryFilename;
869    CorrelateKind = ProfCorrelatorKind::BINARY;
870  }
871
872  std::unique_ptr<InstrProfCorrelator> Correlator;
873  if (CorrelateKind != InstrProfCorrelator::NONE) {
874    if (auto Err = InstrProfCorrelator::get(CorrelateFilename, CorrelateKind)
875                       .moveInto(Correlator))
876      exitWithError(std::move(Err), CorrelateFilename);
877    if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
878      exitWithError(std::move(Err), CorrelateFilename);
879  }
880
881  std::mutex ErrorLock;
882  SmallSet<instrprof_error, 4> WriterErrorCodes;
883
884  // If NumThreads is not specified, auto-detect a good default.
885  if (NumThreads == 0)
886    NumThreads = std::min(hardware_concurrency().compute_thread_count(),
887                          unsigned((Inputs.size() + 1) / 2));
888
889  // Initialize the writer contexts.
890  SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
891  for (unsigned I = 0; I < NumThreads; ++I)
892    Contexts.emplace_back(std::make_unique<WriterContext>(
893        OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
894        MaxTraceLength));
895
896  if (NumThreads == 1) {
897    for (const auto &Input : Inputs)
898      loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
899                Contexts[0].get());
900  } else {
901    ThreadPool Pool(hardware_concurrency(NumThreads));
902
903    // Load the inputs in parallel (N/NumThreads serial steps).
904    unsigned Ctx = 0;
905    for (const auto &Input : Inputs) {
906      Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
907                 Contexts[Ctx].get());
908      Ctx = (Ctx + 1) % NumThreads;
909    }
910    Pool.wait();
911
912    // Merge the writer contexts together (~ lg(NumThreads) serial steps).
913    unsigned Mid = Contexts.size() / 2;
914    unsigned End = Contexts.size();
915    assert(Mid > 0 && "Expected more than one context");
916    do {
917      for (unsigned I = 0; I < Mid; ++I)
918        Pool.async(mergeWriterContexts, Contexts[I].get(),
919                   Contexts[I + Mid].get());
920      Pool.wait();
921      if (End & 1) {
922        Pool.async(mergeWriterContexts, Contexts[0].get(),
923                   Contexts[End - 1].get());
924        Pool.wait();
925      }
926      End = Mid;
927      Mid /= 2;
928    } while (Mid > 0);
929  }
930
931  // Handle deferred errors encountered during merging. If the number of errors
932  // is equal to the number of inputs the merge failed.
933  unsigned NumErrors = 0;
934  for (std::unique_ptr<WriterContext> &WC : Contexts) {
935    for (auto &ErrorPair : WC->Errors) {
936      ++NumErrors;
937      warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
938    }
939  }
940  if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
941      (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
942    exitWithError("no profile can be merged");
943
944  filterFunctions(Contexts[0]->Writer.getProfileData());
945
946  writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
947}
948
949/// The profile entry for a function in instrumentation profile.
950struct InstrProfileEntry {
951  uint64_t MaxCount = 0;
952  uint64_t NumEdgeCounters = 0;
953  float ZeroCounterRatio = 0.0;
954  InstrProfRecord *ProfRecord;
955  InstrProfileEntry(InstrProfRecord *Record);
956  InstrProfileEntry() = default;
957};
958
959InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
960  ProfRecord = Record;
961  uint64_t CntNum = Record->Counts.size();
962  uint64_t ZeroCntNum = 0;
963  for (size_t I = 0; I < CntNum; ++I) {
964    MaxCount = std::max(MaxCount, Record->Counts[I]);
965    ZeroCntNum += !Record->Counts[I];
966  }
967  ZeroCounterRatio = (float)ZeroCntNum / CntNum;
968  NumEdgeCounters = CntNum;
969}
970
971/// Either set all the counters in the instr profile entry \p IFE to
972/// -1 / -2 /in order to drop the profile or scale up the
973/// counters in \p IFP to be above hot / cold threshold. We use
974/// the ratio of zero counters in the profile of a function to
975/// decide the profile is helpful or harmful for performance,
976/// and to choose whether to scale up or drop it.
977static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
978                                    uint64_t HotInstrThreshold,
979                                    uint64_t ColdInstrThreshold,
980                                    float ZeroCounterThreshold) {
981  InstrProfRecord *ProfRecord = IFE.ProfRecord;
982  if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
983    // If all or most of the counters of the function are zero, the
984    // profile is unaccountable and should be dropped. Reset all the
985    // counters to be -1 / -2 and PGO profile-use will drop the profile.
986    // All counters being -1 also implies that the function is hot so
987    // PGO profile-use will also set the entry count metadata to be
988    // above hot threshold.
989    // All counters being -2 implies that the function is warm so
990    // PGO profile-use will also set the entry count metadata to be
991    // above cold threshold.
992    auto Kind =
993        (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
994    ProfRecord->setPseudoCount(Kind);
995    return;
996  }
997
998  // Scale up the MaxCount to be multiple times above hot / cold threshold.
999  const unsigned MultiplyFactor = 3;
1000  uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1001  uint64_t Numerator = Threshold * MultiplyFactor;
1002
1003  // Make sure Threshold for warm counters is below the HotInstrThreshold.
1004  if (!SetToHot && Threshold >= HotInstrThreshold) {
1005    Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1006  }
1007
1008  uint64_t Denominator = IFE.MaxCount;
1009  if (Numerator <= Denominator)
1010    return;
1011  ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
1012    warn(toString(make_error<InstrProfError>(E)));
1013  });
1014}
1015
1016const uint64_t ColdPercentileIdx = 15;
1017const uint64_t HotPercentileIdx = 11;
1018
1019using sampleprof::FSDiscriminatorPass;
1020
1021// Internal options to set FSDiscriminatorPass. Used in merge and show
1022// commands.
1023static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1024    "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
1025    cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1026             "pass beyond this value. The enum values are defined in "
1027             "Support/Discriminator.h"),
1028    cl::values(clEnumVal(Base, "Use base discriminators only"),
1029               clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1030               clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1031               clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1032               clEnumVal(PassLast, "Use all discriminator bits (default)")));
1033
1034static unsigned getDiscriminatorMask() {
1035  return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
1036}
1037
1038/// Adjust the instr profile in \p WC based on the sample profile in
1039/// \p Reader.
1040static void
1041adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1042                   std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1043                   unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1044                   unsigned InstrProfColdThreshold) {
1045  // Function to its entry in instr profile.
1046  StringMap<InstrProfileEntry> InstrProfileMap;
1047  StringMap<StringRef> StaticFuncMap;
1048  InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1049
1050  auto checkSampleProfileHasFUnique = [&Reader]() {
1051    for (const auto &PD : Reader->getProfiles()) {
1052      auto &FContext = PD.second.getContext();
1053      if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
1054          std::string::npos) {
1055        return true;
1056      }
1057    }
1058    return false;
1059  };
1060
1061  bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1062
1063  auto buildStaticFuncMap = [&StaticFuncMap,
1064                             SampleProfileHasFUnique](const StringRef Name) {
1065    std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1066    size_t PrefixPos = StringRef::npos;
1067    for (auto &FilePrefix : FilePrefixes) {
1068      std::string NamePrefix = FilePrefix + kGlobalIdentifierDelimiter;
1069      PrefixPos = Name.find_insensitive(NamePrefix);
1070      if (PrefixPos == StringRef::npos)
1071        continue;
1072      PrefixPos += NamePrefix.size();
1073      break;
1074    }
1075
1076    if (PrefixPos == StringRef::npos) {
1077      return;
1078    }
1079
1080    StringRef NewName = Name.drop_front(PrefixPos);
1081    StringRef FName = Name.substr(0, PrefixPos - 1);
1082    if (NewName.size() == 0) {
1083      return;
1084    }
1085
1086    // This name should have a static linkage.
1087    size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
1088    bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1089
1090    // If sample profile and instrumented profile do not agree on symbol
1091    // uniqification.
1092    if (SampleProfileHasFUnique != ProfileHasFUnique) {
1093      // If instrumented profile uses -funique-internal-linkage-symbols,
1094      // we need to trim the name.
1095      if (ProfileHasFUnique) {
1096        NewName = NewName.substr(0, PostfixPos);
1097      } else {
1098        // If sample profile uses -funique-internal-linkage-symbols,
1099        // we build the map.
1100        std::string NStr =
1101            NewName.str() + getUniqueInternalLinkagePostfix(FName);
1102        NewName = StringRef(NStr);
1103        StaticFuncMap[NewName] = Name;
1104        return;
1105      }
1106    }
1107
1108    if (!StaticFuncMap.contains(NewName)) {
1109      StaticFuncMap[NewName] = Name;
1110    } else {
1111      StaticFuncMap[NewName] = DuplicateNameStr;
1112    }
1113  };
1114
1115  // We need to flatten the SampleFDO profile as the InstrFDO
1116  // profile does not have inlined callsite profiles.
1117  // One caveat is the pre-inlined function -- their samples
1118  // should be collapsed into the caller function.
1119  // Here we do a DFS traversal to get the flatten profile
1120  // info: the sum of entrycount and the max of maxcount.
1121  // Here is the algorithm:
1122  //   recursive (FS, root_name) {
1123  //      name = FS->getName();
1124  //      get samples for FS;
1125  //      if (InstrProf.find(name) {
1126  //        root_name = name;
1127  //      } else {
1128  //        if (name is in static_func map) {
1129  //          root_name = static_name;
1130  //        }
1131  //      }
1132  //      update the Map entry for root_name;
1133  //      for (subfs: FS) {
1134  //        recursive(subfs, root_name);
1135  //      }
1136  //   }
1137  //
1138  // Here is an example.
1139  //
1140  // SampleProfile:
1141  // foo:12345:1000
1142  // 1: 1000
1143  // 2.1: 1000
1144  // 15: 5000
1145  // 4: bar:1000
1146  //  1: 1000
1147  //  2: goo:3000
1148  //   1: 3000
1149  // 8: bar:40000
1150  //  1: 10000
1151  //  2: goo:30000
1152  //   1: 30000
1153  //
1154  // InstrProfile has two entries:
1155  //  foo
1156  //  bar.cc;bar
1157  //
1158  // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1159  // {"foo", {1000, 5000}}
1160  // {"bar.cc;bar", {11000, 30000}}
1161  //
1162  // foo's has an entry count of 1000, and max body count of 5000.
1163  // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1164  // 10000), and max count of 30000 (from the callsite in line 8).
1165  //
1166  // Note that goo's count will remain in bar.cc;bar() as it does not have an
1167  // entry in InstrProfile.
1168  llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1169  auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1170                            &InstrProfileMap](const FunctionSamples &FS,
1171                                              const StringRef &RootName) {
1172    auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1173                                     const StringRef &RootName,
1174                                     auto &BuildImpl) -> void {
1175      std::string NameStr = FS.getFunction().str();
1176      const StringRef Name = NameStr;
1177      const StringRef *NewRootName = &RootName;
1178      uint64_t EntrySample = FS.getHeadSamplesEstimate();
1179      uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1180
1181      auto It = InstrProfileMap.find(Name);
1182      if (It != InstrProfileMap.end()) {
1183        NewRootName = &Name;
1184      } else {
1185        auto NewName = StaticFuncMap.find(Name);
1186        if (NewName != StaticFuncMap.end()) {
1187          It = InstrProfileMap.find(NewName->second.str());
1188          if (NewName->second != DuplicateNameStr) {
1189            NewRootName = &NewName->second;
1190          }
1191        } else {
1192          // Here the EntrySample is of an inlined function, so we should not
1193          // update the EntrySample in the map.
1194          EntrySample = 0;
1195        }
1196      }
1197      EntrySample += FlattenSampleMap[*NewRootName].first;
1198      MaxBodySample =
1199          std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
1200      FlattenSampleMap[*NewRootName] =
1201          std::make_pair(EntrySample, MaxBodySample);
1202
1203      for (const auto &C : FS.getCallsiteSamples())
1204        for (const auto &F : C.second)
1205          BuildImpl(F.second, *NewRootName, BuildImpl);
1206    };
1207    BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1208  };
1209
1210  for (auto &PD : WC->Writer.getProfileData()) {
1211    // Populate IPBuilder.
1212    for (const auto &PDV : PD.getValue()) {
1213      InstrProfRecord Record = PDV.second;
1214      IPBuilder.addRecord(Record);
1215    }
1216
1217    // If a function has multiple entries in instr profile, skip it.
1218    if (PD.getValue().size() != 1)
1219      continue;
1220
1221    // Initialize InstrProfileMap.
1222    InstrProfRecord *R = &PD.getValue().begin()->second;
1223    StringRef FullName = PD.getKey();
1224    InstrProfileMap[FullName] = InstrProfileEntry(R);
1225    buildStaticFuncMap(FullName);
1226  }
1227
1228  for (auto &PD : Reader->getProfiles()) {
1229    sampleprof::FunctionSamples &FS = PD.second;
1230    std::string Name = FS.getFunction().str();
1231    BuildMaxSampleMap(FS, Name);
1232  }
1233
1234  ProfileSummary InstrPS = *IPBuilder.getSummary();
1235  ProfileSummary SamplePS = Reader->getSummary();
1236
1237  // Compute cold thresholds for instr profile and sample profile.
1238  uint64_t HotSampleThreshold =
1239      ProfileSummaryBuilder::getEntryForPercentile(
1240          SamplePS.getDetailedSummary(),
1241          ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1242          .MinCount;
1243  uint64_t ColdSampleThreshold =
1244      ProfileSummaryBuilder::getEntryForPercentile(
1245          SamplePS.getDetailedSummary(),
1246          ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1247          .MinCount;
1248  uint64_t HotInstrThreshold =
1249      ProfileSummaryBuilder::getEntryForPercentile(
1250          InstrPS.getDetailedSummary(),
1251          ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1252          .MinCount;
1253  uint64_t ColdInstrThreshold =
1254      InstrProfColdThreshold
1255          ? InstrProfColdThreshold
1256          : ProfileSummaryBuilder::getEntryForPercentile(
1257                InstrPS.getDetailedSummary(),
1258                ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1259                .MinCount;
1260
1261  // Find hot/warm functions in sample profile which is cold in instr profile
1262  // and adjust the profiles of those functions in the instr profile.
1263  for (const auto &E : FlattenSampleMap) {
1264    uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
1265    if (SampleMaxCount < ColdSampleThreshold)
1266      continue;
1267    StringRef Name = E.first();
1268    auto It = InstrProfileMap.find(Name);
1269    if (It == InstrProfileMap.end()) {
1270      auto NewName = StaticFuncMap.find(Name);
1271      if (NewName != StaticFuncMap.end()) {
1272        It = InstrProfileMap.find(NewName->second.str());
1273        if (NewName->second == DuplicateNameStr) {
1274          WithColor::warning()
1275              << "Static function " << Name
1276              << " has multiple promoted names, cannot adjust profile.\n";
1277        }
1278      }
1279    }
1280    if (It == InstrProfileMap.end() ||
1281        It->second.MaxCount > ColdInstrThreshold ||
1282        It->second.NumEdgeCounters < SupplMinSizeThreshold)
1283      continue;
1284    bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1285    updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
1286                            ColdInstrThreshold, ZeroCounterThreshold);
1287  }
1288}
1289
1290/// The main function to supplement instr profile with sample profile.
1291/// \Inputs contains the instr profile. \p SampleFilename specifies the
1292/// sample profile. \p OutputFilename specifies the output profile name.
1293/// \p OutputFormat specifies the output profile format. \p OutputSparse
1294/// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1295/// specifies the minimal size for the functions whose profile will be
1296/// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1297/// a function contains too many zero counters and whether its profile
1298/// should be dropped. \p InstrProfColdThreshold is the user specified
1299/// cold threshold which will override the cold threshold got from the
1300/// instr profile summary.
1301static void supplementInstrProfile(const WeightedFileVector &Inputs,
1302                                   StringRef SampleFilename, bool OutputSparse,
1303                                   unsigned SupplMinSizeThreshold,
1304                                   float ZeroCounterThreshold,
1305                                   unsigned InstrProfColdThreshold) {
1306  if (OutputFilename.compare("-") == 0)
1307    exitWithError("cannot write indexed profdata format to stdout");
1308  if (Inputs.size() != 1)
1309    exitWithError("expect one input to be an instr profile");
1310  if (Inputs[0].Weight != 1)
1311    exitWithError("expect instr profile doesn't have weight");
1312
1313  StringRef InstrFilename = Inputs[0].Filename;
1314
1315  // Read sample profile.
1316  LLVMContext Context;
1317  auto FS = vfs::getRealFileSystem();
1318  auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1319      SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
1320  if (std::error_code EC = ReaderOrErr.getError())
1321    exitWithErrorCode(EC, SampleFilename);
1322  auto Reader = std::move(ReaderOrErr.get());
1323  if (std::error_code EC = Reader->read())
1324    exitWithErrorCode(EC, SampleFilename);
1325
1326  // Read instr profile.
1327  std::mutex ErrorLock;
1328  SmallSet<instrprof_error, 4> WriterErrorCodes;
1329  auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
1330                                            WriterErrorCodes);
1331  loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
1332  if (WC->Errors.size() > 0)
1333    exitWithError(std::move(WC->Errors[0].first), InstrFilename);
1334
1335  adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1336                     InstrProfColdThreshold);
1337  writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
1338}
1339
1340/// Make a copy of the given function samples with all symbol names remapped
1341/// by the provided symbol remapper.
1342static sampleprof::FunctionSamples
1343remapSamples(const sampleprof::FunctionSamples &Samples,
1344             SymbolRemapper &Remapper, sampleprof_error &Error) {
1345  sampleprof::FunctionSamples Result;
1346  Result.setFunction(Remapper(Samples.getFunction()));
1347  Result.addTotalSamples(Samples.getTotalSamples());
1348  Result.addHeadSamples(Samples.getHeadSamples());
1349  for (const auto &BodySample : Samples.getBodySamples()) {
1350    uint32_t MaskedDiscriminator =
1351        BodySample.first.Discriminator & getDiscriminatorMask();
1352    Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
1353                          BodySample.second.getSamples());
1354    for (const auto &Target : BodySample.second.getCallTargets()) {
1355      Result.addCalledTargetSamples(BodySample.first.LineOffset,
1356                                    MaskedDiscriminator,
1357                                    Remapper(Target.first), Target.second);
1358    }
1359  }
1360  for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1361    sampleprof::FunctionSamplesMap &Target =
1362        Result.functionSamplesAt(CallsiteSamples.first);
1363    for (const auto &Callsite : CallsiteSamples.second) {
1364      sampleprof::FunctionSamples Remapped =
1365          remapSamples(Callsite.second, Remapper, Error);
1366      MergeResult(Error, Target[Remapped.getFunction()].merge(Remapped));
1367    }
1368  }
1369  return Result;
1370}
1371
1372static sampleprof::SampleProfileFormat FormatMap[] = {
1373    sampleprof::SPF_None,
1374    sampleprof::SPF_Text,
1375    sampleprof::SPF_None,
1376    sampleprof::SPF_Ext_Binary,
1377    sampleprof::SPF_GCC,
1378    sampleprof::SPF_Binary};
1379
1380static std::unique_ptr<MemoryBuffer>
1381getInputFileBuf(const StringRef &InputFile) {
1382  if (InputFile == "")
1383    return {};
1384
1385  auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
1386  if (!BufOrError)
1387    exitWithErrorCode(BufOrError.getError(), InputFile);
1388
1389  return std::move(*BufOrError);
1390}
1391
1392static void populateProfileSymbolList(MemoryBuffer *Buffer,
1393                                      sampleprof::ProfileSymbolList &PSL) {
1394  if (!Buffer)
1395    return;
1396
1397  SmallVector<StringRef, 32> SymbolVec;
1398  StringRef Data = Buffer->getBuffer();
1399  Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1400
1401  for (StringRef SymbolStr : SymbolVec)
1402    PSL.add(SymbolStr.trim());
1403}
1404
1405static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1406                                  ProfileFormat OutputFormat,
1407                                  MemoryBuffer *Buffer,
1408                                  sampleprof::ProfileSymbolList &WriterList,
1409                                  bool CompressAllSections, bool UseMD5,
1410                                  bool GenPartialProfile) {
1411  populateProfileSymbolList(Buffer, WriterList);
1412  if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1413    warn("Profile Symbol list is not empty but the output format is not "
1414         "ExtBinary format. The list will be lost in the output. ");
1415
1416  Writer.setProfileSymbolList(&WriterList);
1417
1418  if (CompressAllSections) {
1419    if (OutputFormat != PF_Ext_Binary)
1420      warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1421    else
1422      Writer.setToCompressAllSections();
1423  }
1424  if (UseMD5) {
1425    if (OutputFormat != PF_Ext_Binary)
1426      warn("-use-md5 is ignored. Specify -extbinary to enable it");
1427    else
1428      Writer.setUseMD5();
1429  }
1430  if (GenPartialProfile) {
1431    if (OutputFormat != PF_Ext_Binary)
1432      warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1433    else
1434      Writer.setPartialProfile();
1435  }
1436}
1437
1438static void mergeSampleProfile(const WeightedFileVector &Inputs,
1439                               SymbolRemapper *Remapper,
1440                               StringRef ProfileSymbolListFile,
1441                               size_t OutputSizeLimit) {
1442  using namespace sampleprof;
1443  SampleProfileMap ProfileMap;
1444  SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1445  LLVMContext Context;
1446  sampleprof::ProfileSymbolList WriterList;
1447  std::optional<bool> ProfileIsProbeBased;
1448  std::optional<bool> ProfileIsCS;
1449  for (const auto &Input : Inputs) {
1450    auto FS = vfs::getRealFileSystem();
1451    auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1452                                                   FSDiscriminatorPassOption);
1453    if (std::error_code EC = ReaderOrErr.getError()) {
1454      warnOrExitGivenError(FailMode, EC, Input.Filename);
1455      continue;
1456    }
1457
1458    // We need to keep the readers around until after all the files are
1459    // read so that we do not lose the function names stored in each
1460    // reader's memory. The function names are needed to write out the
1461    // merged profile map.
1462    Readers.push_back(std::move(ReaderOrErr.get()));
1463    const auto Reader = Readers.back().get();
1464    if (std::error_code EC = Reader->read()) {
1465      warnOrExitGivenError(FailMode, EC, Input.Filename);
1466      Readers.pop_back();
1467      continue;
1468    }
1469
1470    SampleProfileMap &Profiles = Reader->getProfiles();
1471    if (ProfileIsProbeBased &&
1472        ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1473      exitWithError(
1474          "cannot merge probe-based profile with non-probe-based profile");
1475    ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1476    if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1477      exitWithError("cannot merge CS profile with non-CS profile");
1478    ProfileIsCS = FunctionSamples::ProfileIsCS;
1479    for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1480         I != E; ++I) {
1481      sampleprof_error Result = sampleprof_error::success;
1482      FunctionSamples Remapped =
1483          Remapper ? remapSamples(I->second, *Remapper, Result)
1484                   : FunctionSamples();
1485      FunctionSamples &Samples = Remapper ? Remapped : I->second;
1486      SampleContext FContext = Samples.getContext();
1487      MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
1488      if (Result != sampleprof_error::success) {
1489        std::error_code EC = make_error_code(Result);
1490        handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1491                               FContext.toString());
1492      }
1493    }
1494
1495    if (!DropProfileSymbolList) {
1496      std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1497          Reader->getProfileSymbolList();
1498      if (ReaderList)
1499        WriterList.merge(*ReaderList);
1500    }
1501  }
1502
1503  if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1504    // Use threshold calculated from profile summary unless specified.
1505    SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1506    auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1507    uint64_t SampleProfColdThreshold =
1508        ProfileSummaryBuilder::getColdCountThreshold(
1509            (Summary->getDetailedSummary()));
1510
1511    // Trim and merge cold context profile using cold threshold above;
1512    SampleContextTrimmer(ProfileMap)
1513        .trimAndMergeColdContextProfiles(
1514            SampleProfColdThreshold, SampleTrimColdContext,
1515            SampleMergeColdContext, SampleColdContextFrameDepth, false);
1516  }
1517
1518  if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1519    ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1520    ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1521  } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1522    ProfileConverter CSConverter(ProfileMap);
1523    CSConverter.convertCSProfiles();
1524    ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1525  }
1526
1527  filterFunctions(ProfileMap);
1528
1529  auto WriterOrErr =
1530      SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1531  if (std::error_code EC = WriterOrErr.getError())
1532    exitWithErrorCode(EC, OutputFilename);
1533
1534  auto Writer = std::move(WriterOrErr.get());
1535  // WriterList will have StringRef refering to string in Buffer.
1536  // Make sure Buffer lives as long as WriterList.
1537  auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1538  handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1539                        CompressAllSections, UseMD5, GenPartialProfile);
1540
1541  // If OutputSizeLimit is 0 (default), it is the same as write().
1542  if (std::error_code EC =
1543          Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1544    exitWithErrorCode(std::move(EC));
1545}
1546
1547static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1548  StringRef WeightStr, FileName;
1549  std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1550
1551  uint64_t Weight;
1552  if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1553    exitWithError("input weight must be a positive integer");
1554
1555  return {std::string(FileName), Weight};
1556}
1557
1558static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1559  StringRef Filename = WF.Filename;
1560  uint64_t Weight = WF.Weight;
1561
1562  // If it's STDIN just pass it on.
1563  if (Filename == "-") {
1564    WNI.push_back({std::string(Filename), Weight});
1565    return;
1566  }
1567
1568  llvm::sys::fs::file_status Status;
1569  llvm::sys::fs::status(Filename, Status);
1570  if (!llvm::sys::fs::exists(Status))
1571    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1572                      Filename);
1573  // If it's a source file, collect it.
1574  if (llvm::sys::fs::is_regular_file(Status)) {
1575    WNI.push_back({std::string(Filename), Weight});
1576    return;
1577  }
1578
1579  if (llvm::sys::fs::is_directory(Status)) {
1580    std::error_code EC;
1581    for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1582         F != E && !EC; F.increment(EC)) {
1583      if (llvm::sys::fs::is_regular_file(F->path())) {
1584        addWeightedInput(WNI, {F->path(), Weight});
1585      }
1586    }
1587    if (EC)
1588      exitWithErrorCode(EC, Filename);
1589  }
1590}
1591
1592static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1593                                    WeightedFileVector &WFV) {
1594  if (!Buffer)
1595    return;
1596
1597  SmallVector<StringRef, 8> Entries;
1598  StringRef Data = Buffer->getBuffer();
1599  Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1600  for (const StringRef &FileWeightEntry : Entries) {
1601    StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1602    // Skip comments.
1603    if (SanitizedEntry.starts_with("#"))
1604      continue;
1605    // If there's no comma, it's an unweighted profile.
1606    else if (!SanitizedEntry.contains(','))
1607      addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1608    else
1609      addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1610  }
1611}
1612
1613static int merge_main(int argc, const char *argv[]) {
1614  WeightedFileVector WeightedInputs;
1615  for (StringRef Filename : InputFilenames)
1616    addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1617  for (StringRef WeightedFilename : WeightedInputFilenames)
1618    addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1619
1620  // Make sure that the file buffer stays alive for the duration of the
1621  // weighted input vector's lifetime.
1622  auto Buffer = getInputFileBuf(InputFilenamesFile);
1623  parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1624
1625  if (WeightedInputs.empty())
1626    exitWithError("no input files specified. See " +
1627                  sys::path::filename(argv[0]) + " " + argv[1] + " -help");
1628
1629  if (DumpInputFileList) {
1630    for (auto &WF : WeightedInputs)
1631      outs() << WF.Weight << "," << WF.Filename << "\n";
1632    return 0;
1633  }
1634
1635  std::unique_ptr<SymbolRemapper> Remapper;
1636  if (!RemappingFile.empty())
1637    Remapper = SymbolRemapper::create(RemappingFile);
1638
1639  if (!SupplInstrWithSample.empty()) {
1640    if (ProfileKind != instr)
1641      exitWithError(
1642          "-supplement-instr-with-sample can only work with -instr. ");
1643
1644    supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputSparse,
1645                           SupplMinSizeThreshold, ZeroCounterThreshold,
1646                           InstrProfColdThreshold);
1647    return 0;
1648  }
1649
1650  if (ProfileKind == instr)
1651    mergeInstrProfile(WeightedInputs, Remapper.get(), MaxDbgCorrelationWarnings,
1652                      ProfiledBinary);
1653  else
1654    mergeSampleProfile(WeightedInputs, Remapper.get(), ProfileSymbolListFile,
1655                       OutputSizeLimit);
1656  return 0;
1657}
1658
1659/// Computer the overlap b/w profile BaseFilename and profile TestFilename.
1660static void overlapInstrProfile(const std::string &BaseFilename,
1661                                const std::string &TestFilename,
1662                                const OverlapFuncFilters &FuncFilter,
1663                                raw_fd_ostream &OS, bool IsCS) {
1664  std::mutex ErrorLock;
1665  SmallSet<instrprof_error, 4> WriterErrorCodes;
1666  WriterContext Context(false, ErrorLock, WriterErrorCodes);
1667  WeightedFile WeightedInput{BaseFilename, 1};
1668  OverlapStats Overlap;
1669  Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1670  if (E)
1671    exitWithError(std::move(E), "error in getting profile count sums");
1672  if (Overlap.Base.CountSum < 1.0f) {
1673    OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1674    exit(0);
1675  }
1676  if (Overlap.Test.CountSum < 1.0f) {
1677    OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1678    exit(0);
1679  }
1680  loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1681  overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1682               IsCS);
1683  Overlap.dump(OS);
1684}
1685
1686namespace {
1687struct SampleOverlapStats {
1688  SampleContext BaseName;
1689  SampleContext TestName;
1690  // Number of overlap units
1691  uint64_t OverlapCount = 0;
1692  // Total samples of overlap units
1693  uint64_t OverlapSample = 0;
1694  // Number of and total samples of units that only present in base or test
1695  // profile
1696  uint64_t BaseUniqueCount = 0;
1697  uint64_t BaseUniqueSample = 0;
1698  uint64_t TestUniqueCount = 0;
1699  uint64_t TestUniqueSample = 0;
1700  // Number of units and total samples in base or test profile
1701  uint64_t BaseCount = 0;
1702  uint64_t BaseSample = 0;
1703  uint64_t TestCount = 0;
1704  uint64_t TestSample = 0;
1705  // Number of and total samples of units that present in at least one profile
1706  uint64_t UnionCount = 0;
1707  uint64_t UnionSample = 0;
1708  // Weighted similarity
1709  double Similarity = 0.0;
1710  // For SampleOverlapStats instances representing functions, weights of the
1711  // function in base and test profiles
1712  double BaseWeight = 0.0;
1713  double TestWeight = 0.0;
1714
1715  SampleOverlapStats() = default;
1716};
1717} // end anonymous namespace
1718
1719namespace {
1720struct FuncSampleStats {
1721  uint64_t SampleSum = 0;
1722  uint64_t MaxSample = 0;
1723  uint64_t HotBlockCount = 0;
1724  FuncSampleStats() = default;
1725  FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1726                  uint64_t HotBlockCount)
1727      : SampleSum(SampleSum), MaxSample(MaxSample),
1728        HotBlockCount(HotBlockCount) {}
1729};
1730} // end anonymous namespace
1731
1732namespace {
1733enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1734
1735// Class for updating merging steps for two sorted maps. The class should be
1736// instantiated with a map iterator type.
1737template <class T> class MatchStep {
1738public:
1739  MatchStep() = delete;
1740
1741  MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1742      : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1743        SecondEnd(SecondEnd), Status(MS_None) {}
1744
1745  bool areBothFinished() const {
1746    return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1747  }
1748
1749  bool isFirstFinished() const { return FirstIter == FirstEnd; }
1750
1751  bool isSecondFinished() const { return SecondIter == SecondEnd; }
1752
1753  /// Advance one step based on the previous match status unless the previous
1754  /// status is MS_None. Then update Status based on the comparison between two
1755  /// container iterators at the current step. If the previous status is
1756  /// MS_None, it means two iterators are at the beginning and no comparison has
1757  /// been made, so we simply update Status without advancing the iterators.
1758  void updateOneStep();
1759
1760  T getFirstIter() const { return FirstIter; }
1761
1762  T getSecondIter() const { return SecondIter; }
1763
1764  MatchStatus getMatchStatus() const { return Status; }
1765
1766private:
1767  // Current iterator and end iterator of the first container.
1768  T FirstIter;
1769  T FirstEnd;
1770  // Current iterator and end iterator of the second container.
1771  T SecondIter;
1772  T SecondEnd;
1773  // Match status of the current step.
1774  MatchStatus Status;
1775};
1776} // end anonymous namespace
1777
1778template <class T> void MatchStep<T>::updateOneStep() {
1779  switch (Status) {
1780  case MS_Match:
1781    ++FirstIter;
1782    ++SecondIter;
1783    break;
1784  case MS_FirstUnique:
1785    ++FirstIter;
1786    break;
1787  case MS_SecondUnique:
1788    ++SecondIter;
1789    break;
1790  case MS_None:
1791    break;
1792  }
1793
1794  // Update Status according to iterators at the current step.
1795  if (areBothFinished())
1796    return;
1797  if (FirstIter != FirstEnd &&
1798      (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1799    Status = MS_FirstUnique;
1800  else if (SecondIter != SecondEnd &&
1801           (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1802    Status = MS_SecondUnique;
1803  else
1804    Status = MS_Match;
1805}
1806
1807// Return the sum of line/block samples, the max line/block sample, and the
1808// number of line/block samples above the given threshold in a function
1809// including its inlinees.
1810static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1811                               FuncSampleStats &FuncStats,
1812                               uint64_t HotThreshold) {
1813  for (const auto &L : Func.getBodySamples()) {
1814    uint64_t Sample = L.second.getSamples();
1815    FuncStats.SampleSum += Sample;
1816    FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1817    if (Sample >= HotThreshold)
1818      ++FuncStats.HotBlockCount;
1819  }
1820
1821  for (const auto &C : Func.getCallsiteSamples()) {
1822    for (const auto &F : C.second)
1823      getFuncSampleStats(F.second, FuncStats, HotThreshold);
1824  }
1825}
1826
1827/// Predicate that determines if a function is hot with a given threshold. We
1828/// keep it separate from its callsites for possible extension in the future.
1829static bool isFunctionHot(const FuncSampleStats &FuncStats,
1830                          uint64_t HotThreshold) {
1831  // We intentionally compare the maximum sample count in a function with the
1832  // HotThreshold to get an approximate determination on hot functions.
1833  return (FuncStats.MaxSample >= HotThreshold);
1834}
1835
1836namespace {
1837class SampleOverlapAggregator {
1838public:
1839  SampleOverlapAggregator(const std::string &BaseFilename,
1840                          const std::string &TestFilename,
1841                          double LowSimilarityThreshold, double Epsilon,
1842                          const OverlapFuncFilters &FuncFilter)
1843      : BaseFilename(BaseFilename), TestFilename(TestFilename),
1844        LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
1845        FuncFilter(FuncFilter) {}
1846
1847  /// Detect 0-sample input profile and report to output stream. This interface
1848  /// should be called after loadProfiles().
1849  bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
1850
1851  /// Write out function-level similarity statistics for functions specified by
1852  /// options --function, --value-cutoff, and --similarity-cutoff.
1853  void dumpFuncSimilarity(raw_fd_ostream &OS) const;
1854
1855  /// Write out program-level similarity and overlap statistics.
1856  void dumpProgramSummary(raw_fd_ostream &OS) const;
1857
1858  /// Write out hot-function and hot-block statistics for base_profile,
1859  /// test_profile, and their overlap. For both cases, the overlap HO is
1860  /// calculated as follows:
1861  ///    Given the number of functions (or blocks) that are hot in both profiles
1862  ///    HCommon and the number of functions (or blocks) that are hot in at
1863  ///    least one profile HUnion, HO = HCommon / HUnion.
1864  void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
1865
1866  /// This function tries matching functions in base and test profiles. For each
1867  /// pair of matched functions, it aggregates the function-level
1868  /// similarity into a profile-level similarity. It also dump function-level
1869  /// similarity information of functions specified by --function,
1870  /// --value-cutoff, and --similarity-cutoff options. The program-level
1871  /// similarity PS is computed as follows:
1872  ///     Given function-level similarity FS(A) for all function A, the
1873  ///     weight of function A in base profile WB(A), and the weight of function
1874  ///     A in test profile WT(A), compute PS(base_profile, test_profile) =
1875  ///     sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
1876  ///     meaning no-overlap.
1877  void computeSampleProfileOverlap(raw_fd_ostream &OS);
1878
1879  /// Initialize ProfOverlap with the sum of samples in base and test
1880  /// profiles. This function also computes and keeps the sum of samples and
1881  /// max sample counts of each function in BaseStats and TestStats for later
1882  /// use to avoid re-computations.
1883  void initializeSampleProfileOverlap();
1884
1885  /// Load profiles specified by BaseFilename and TestFilename.
1886  std::error_code loadProfiles();
1887
1888  using FuncSampleStatsMap =
1889      std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
1890
1891private:
1892  SampleOverlapStats ProfOverlap;
1893  SampleOverlapStats HotFuncOverlap;
1894  SampleOverlapStats HotBlockOverlap;
1895  std::string BaseFilename;
1896  std::string TestFilename;
1897  std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
1898  std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
1899  // BaseStats and TestStats hold FuncSampleStats for each function, with
1900  // function name as the key.
1901  FuncSampleStatsMap BaseStats;
1902  FuncSampleStatsMap TestStats;
1903  // Low similarity threshold in floating point number
1904  double LowSimilarityThreshold;
1905  // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
1906  // for tracking hot blocks.
1907  uint64_t BaseHotThreshold;
1908  uint64_t TestHotThreshold;
1909  // A small threshold used to round the results of floating point accumulations
1910  // to resolve imprecision.
1911  const double Epsilon;
1912  std::multimap<double, SampleOverlapStats, std::greater<double>>
1913      FuncSimilarityDump;
1914  // FuncFilter carries specifications in options --value-cutoff and
1915  // --function.
1916  OverlapFuncFilters FuncFilter;
1917  // Column offsets for printing the function-level details table.
1918  static const unsigned int TestWeightCol = 15;
1919  static const unsigned int SimilarityCol = 30;
1920  static const unsigned int OverlapCol = 43;
1921  static const unsigned int BaseUniqueCol = 53;
1922  static const unsigned int TestUniqueCol = 67;
1923  static const unsigned int BaseSampleCol = 81;
1924  static const unsigned int TestSampleCol = 96;
1925  static const unsigned int FuncNameCol = 111;
1926
1927  /// Return a similarity of two line/block sample counters in the same
1928  /// function in base and test profiles. The line/block-similarity BS(i) is
1929  /// computed as follows:
1930  ///    For an offsets i, given the sample count at i in base profile BB(i),
1931  ///    the sample count at i in test profile BT(i), the sum of sample counts
1932  ///    in this function in base profile SB, and the sum of sample counts in
1933  ///    this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
1934  ///    BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
1935  double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
1936                                const SampleOverlapStats &FuncOverlap) const;
1937
1938  void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
1939                             uint64_t HotBlockCount);
1940
1941  void getHotFunctions(const FuncSampleStatsMap &ProfStats,
1942                       FuncSampleStatsMap &HotFunc,
1943                       uint64_t HotThreshold) const;
1944
1945  void computeHotFuncOverlap();
1946
1947  /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1948  /// Difference for two sample units in a matched function according to the
1949  /// given match status.
1950  void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
1951                                     uint64_t HotBlockCount,
1952                                     SampleOverlapStats &FuncOverlap,
1953                                     double &Difference, MatchStatus Status);
1954
1955  /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
1956  /// Difference for unmatched callees that only present in one profile in a
1957  /// matched caller function.
1958  void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
1959                                SampleOverlapStats &FuncOverlap,
1960                                double &Difference, MatchStatus Status);
1961
1962  /// This function updates sample overlap statistics of an overlap function in
1963  /// base and test profile. It also calculates a function-internal similarity
1964  /// FIS as follows:
1965  ///    For offsets i that have samples in at least one profile in this
1966  ///    function A, given BS(i) returned by computeBlockSimilarity(), compute
1967  ///    FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
1968  ///    0.0 meaning no overlap.
1969  double computeSampleFunctionInternalOverlap(
1970      const sampleprof::FunctionSamples &BaseFunc,
1971      const sampleprof::FunctionSamples &TestFunc,
1972      SampleOverlapStats &FuncOverlap);
1973
1974  /// Function-level similarity (FS) is a weighted value over function internal
1975  /// similarity (FIS). This function computes a function's FS from its FIS by
1976  /// applying the weight.
1977  double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
1978                                 uint64_t TestFuncSample) const;
1979
1980  /// The function-level similarity FS(A) for a function A is computed as
1981  /// follows:
1982  ///     Compute a function-internal similarity FIS(A) by
1983  ///     computeSampleFunctionInternalOverlap(). Then, with the weight of
1984  ///     function A in base profile WB(A), and the weight of function A in test
1985  ///     profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
1986  ///     ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
1987  double
1988  computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
1989                               const sampleprof::FunctionSamples *TestFunc,
1990                               SampleOverlapStats *FuncOverlap,
1991                               uint64_t BaseFuncSample,
1992                               uint64_t TestFuncSample);
1993
1994  /// Profile-level similarity (PS) is a weighted aggregate over function-level
1995  /// similarities (FS). This method weights the FS value by the function
1996  /// weights in the base and test profiles for the aggregation.
1997  double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
1998                            uint64_t TestFuncSample) const;
1999};
2000} // end anonymous namespace
2001
2002bool SampleOverlapAggregator::detectZeroSampleProfile(
2003    raw_fd_ostream &OS) const {
2004  bool HaveZeroSample = false;
2005  if (ProfOverlap.BaseSample == 0) {
2006    OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2007    HaveZeroSample = true;
2008  }
2009  if (ProfOverlap.TestSample == 0) {
2010    OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2011    HaveZeroSample = true;
2012  }
2013  return HaveZeroSample;
2014}
2015
2016double SampleOverlapAggregator::computeBlockSimilarity(
2017    uint64_t BaseSample, uint64_t TestSample,
2018    const SampleOverlapStats &FuncOverlap) const {
2019  double BaseFrac = 0.0;
2020  double TestFrac = 0.0;
2021  if (FuncOverlap.BaseSample > 0)
2022    BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2023  if (FuncOverlap.TestSample > 0)
2024    TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2025  return 1.0 - std::fabs(BaseFrac - TestFrac);
2026}
2027
2028void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2029                                                    uint64_t TestSample,
2030                                                    uint64_t HotBlockCount) {
2031  bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2032  bool IsTestHot = (TestSample >= TestHotThreshold);
2033  if (!IsBaseHot && !IsTestHot)
2034    return;
2035
2036  HotBlockOverlap.UnionCount += HotBlockCount;
2037  if (IsBaseHot)
2038    HotBlockOverlap.BaseCount += HotBlockCount;
2039  if (IsTestHot)
2040    HotBlockOverlap.TestCount += HotBlockCount;
2041  if (IsBaseHot && IsTestHot)
2042    HotBlockOverlap.OverlapCount += HotBlockCount;
2043}
2044
2045void SampleOverlapAggregator::getHotFunctions(
2046    const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2047    uint64_t HotThreshold) const {
2048  for (const auto &F : ProfStats) {
2049    if (isFunctionHot(F.second, HotThreshold))
2050      HotFunc.emplace(F.first, F.second);
2051  }
2052}
2053
2054void SampleOverlapAggregator::computeHotFuncOverlap() {
2055  FuncSampleStatsMap BaseHotFunc;
2056  getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
2057  HotFuncOverlap.BaseCount = BaseHotFunc.size();
2058
2059  FuncSampleStatsMap TestHotFunc;
2060  getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
2061  HotFuncOverlap.TestCount = TestHotFunc.size();
2062  HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2063
2064  for (const auto &F : BaseHotFunc) {
2065    if (TestHotFunc.count(F.first))
2066      ++HotFuncOverlap.OverlapCount;
2067    else
2068      ++HotFuncOverlap.UnionCount;
2069  }
2070}
2071
2072void SampleOverlapAggregator::updateOverlapStatsForFunction(
2073    uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2074    SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2075  assert(Status != MS_None &&
2076         "Match status should be updated before updating overlap statistics");
2077  if (Status == MS_FirstUnique) {
2078    TestSample = 0;
2079    FuncOverlap.BaseUniqueSample += BaseSample;
2080  } else if (Status == MS_SecondUnique) {
2081    BaseSample = 0;
2082    FuncOverlap.TestUniqueSample += TestSample;
2083  } else {
2084    ++FuncOverlap.OverlapCount;
2085  }
2086
2087  FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
2088  FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
2089  Difference +=
2090      1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2091  updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2092}
2093
2094void SampleOverlapAggregator::updateForUnmatchedCallee(
2095    const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2096    double &Difference, MatchStatus Status) {
2097  assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2098         "Status must be either of the two unmatched cases");
2099  FuncSampleStats FuncStats;
2100  if (Status == MS_FirstUnique) {
2101    getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
2102    updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
2103                                  FuncStats.HotBlockCount, FuncOverlap,
2104                                  Difference, Status);
2105  } else {
2106    getFuncSampleStats(Func, FuncStats, TestHotThreshold);
2107    updateOverlapStatsForFunction(0, FuncStats.SampleSum,
2108                                  FuncStats.HotBlockCount, FuncOverlap,
2109                                  Difference, Status);
2110  }
2111}
2112
2113double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2114    const sampleprof::FunctionSamples &BaseFunc,
2115    const sampleprof::FunctionSamples &TestFunc,
2116    SampleOverlapStats &FuncOverlap) {
2117
2118  using namespace sampleprof;
2119
2120  double Difference = 0;
2121
2122  // Accumulate Difference for regular line/block samples in the function.
2123  // We match them through sort-merge join algorithm because
2124  // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2125  // by their offsets.
2126  MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2127      BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2128      TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2129  BlockIterStep.updateOneStep();
2130  while (!BlockIterStep.areBothFinished()) {
2131    uint64_t BaseSample =
2132        BlockIterStep.isFirstFinished()
2133            ? 0
2134            : BlockIterStep.getFirstIter()->second.getSamples();
2135    uint64_t TestSample =
2136        BlockIterStep.isSecondFinished()
2137            ? 0
2138            : BlockIterStep.getSecondIter()->second.getSamples();
2139    updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
2140                                  Difference, BlockIterStep.getMatchStatus());
2141
2142    BlockIterStep.updateOneStep();
2143  }
2144
2145  // Accumulate Difference for callsite lines in the function. We match
2146  // them through sort-merge algorithm because
2147  // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2148  // ordered by their offsets.
2149  MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2150      BaseFunc.getCallsiteSamples().cbegin(),
2151      BaseFunc.getCallsiteSamples().cend(),
2152      TestFunc.getCallsiteSamples().cbegin(),
2153      TestFunc.getCallsiteSamples().cend());
2154  CallsiteIterStep.updateOneStep();
2155  while (!CallsiteIterStep.areBothFinished()) {
2156    MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2157    assert(CallsiteStepStatus != MS_None &&
2158           "Match status should be updated before entering loop body");
2159
2160    if (CallsiteStepStatus != MS_Match) {
2161      auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2162                          ? CallsiteIterStep.getFirstIter()
2163                          : CallsiteIterStep.getSecondIter();
2164      for (const auto &F : Callsite->second)
2165        updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
2166                                 CallsiteStepStatus);
2167    } else {
2168      // There may be multiple inlinees at the same offset, so we need to try
2169      // matching all of them. This match is implemented through sort-merge
2170      // algorithm because callsite records at the same offset are ordered by
2171      // function names.
2172      MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2173          CallsiteIterStep.getFirstIter()->second.cbegin(),
2174          CallsiteIterStep.getFirstIter()->second.cend(),
2175          CallsiteIterStep.getSecondIter()->second.cbegin(),
2176          CallsiteIterStep.getSecondIter()->second.cend());
2177      CalleeIterStep.updateOneStep();
2178      while (!CalleeIterStep.areBothFinished()) {
2179        MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2180        if (CalleeStepStatus != MS_Match) {
2181          auto Callee = (CalleeStepStatus == MS_FirstUnique)
2182                            ? CalleeIterStep.getFirstIter()
2183                            : CalleeIterStep.getSecondIter();
2184          updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
2185                                   CalleeStepStatus);
2186        } else {
2187          // An inlined function can contain other inlinees inside, so compute
2188          // the Difference recursively.
2189          Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2190                                      CalleeIterStep.getFirstIter()->second,
2191                                      CalleeIterStep.getSecondIter()->second,
2192                                      FuncOverlap);
2193        }
2194        CalleeIterStep.updateOneStep();
2195      }
2196    }
2197    CallsiteIterStep.updateOneStep();
2198  }
2199
2200  // Difference reflects the total differences of line/block samples in this
2201  // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2202  // reflect the similarity between function profiles in [0.0f to 1.0f].
2203  return (2.0 - Difference) / 2;
2204}
2205
2206double SampleOverlapAggregator::weightForFuncSimilarity(
2207    double FuncInternalSimilarity, uint64_t BaseFuncSample,
2208    uint64_t TestFuncSample) const {
2209  // Compute the weight as the distance between the function weights in two
2210  // profiles.
2211  double BaseFrac = 0.0;
2212  double TestFrac = 0.0;
2213  assert(ProfOverlap.BaseSample > 0 &&
2214         "Total samples in base profile should be greater than 0");
2215  BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2216  assert(ProfOverlap.TestSample > 0 &&
2217         "Total samples in test profile should be greater than 0");
2218  TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2219  double WeightDistance = std::fabs(BaseFrac - TestFrac);
2220
2221  // Take WeightDistance into the similarity.
2222  return FuncInternalSimilarity * (1 - WeightDistance);
2223}
2224
2225double
2226SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2227                                            uint64_t BaseFuncSample,
2228                                            uint64_t TestFuncSample) const {
2229
2230  double BaseFrac = 0.0;
2231  double TestFrac = 0.0;
2232  assert(ProfOverlap.BaseSample > 0 &&
2233         "Total samples in base profile should be greater than 0");
2234  BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2235  assert(ProfOverlap.TestSample > 0 &&
2236         "Total samples in test profile should be greater than 0");
2237  TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2238  return FuncSimilarity * (BaseFrac + TestFrac);
2239}
2240
2241double SampleOverlapAggregator::computeSampleFunctionOverlap(
2242    const sampleprof::FunctionSamples *BaseFunc,
2243    const sampleprof::FunctionSamples *TestFunc,
2244    SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2245    uint64_t TestFuncSample) {
2246  // Default function internal similarity before weighted, meaning two functions
2247  // has no overlap.
2248  const double DefaultFuncInternalSimilarity = 0;
2249  double FuncSimilarity;
2250  double FuncInternalSimilarity;
2251
2252  // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2253  // In this case, we use DefaultFuncInternalSimilarity as the function internal
2254  // similarity.
2255  if (!BaseFunc || !TestFunc) {
2256    FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2257  } else {
2258    assert(FuncOverlap != nullptr &&
2259           "FuncOverlap should be provided in this case");
2260    FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2261        *BaseFunc, *TestFunc, *FuncOverlap);
2262    // Now, FuncInternalSimilarity may be a little less than 0 due to
2263    // imprecision of floating point accumulations. Make it zero if the
2264    // difference is below Epsilon.
2265    FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
2266                                 ? 0
2267                                 : FuncInternalSimilarity;
2268  }
2269  FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2270                                           BaseFuncSample, TestFuncSample);
2271  return FuncSimilarity;
2272}
2273
2274void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2275  using namespace sampleprof;
2276
2277  std::unordered_map<SampleContext, const FunctionSamples *,
2278                     SampleContext::Hash>
2279      BaseFuncProf;
2280  const auto &BaseProfiles = BaseReader->getProfiles();
2281  for (const auto &BaseFunc : BaseProfiles) {
2282    BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
2283  }
2284  ProfOverlap.UnionCount = BaseFuncProf.size();
2285
2286  const auto &TestProfiles = TestReader->getProfiles();
2287  for (const auto &TestFunc : TestProfiles) {
2288    SampleOverlapStats FuncOverlap;
2289    FuncOverlap.TestName = TestFunc.second.getContext();
2290    assert(TestStats.count(FuncOverlap.TestName) &&
2291           "TestStats should have records for all functions in test profile "
2292           "except inlinees");
2293    FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2294
2295    bool Matched = false;
2296    const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
2297    if (Match == BaseFuncProf.end()) {
2298      const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2299      ++ProfOverlap.TestUniqueCount;
2300      ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2301      FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2302
2303      updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
2304
2305      double FuncSimilarity = computeSampleFunctionOverlap(
2306          nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
2307      ProfOverlap.Similarity +=
2308          weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2309
2310      ++ProfOverlap.UnionCount;
2311      ProfOverlap.UnionSample += FuncStats.SampleSum;
2312    } else {
2313      ++ProfOverlap.OverlapCount;
2314
2315      // Two functions match with each other. Compute function-level overlap and
2316      // aggregate them into profile-level overlap.
2317      FuncOverlap.BaseName = Match->second->getContext();
2318      assert(BaseStats.count(FuncOverlap.BaseName) &&
2319             "BaseStats should have records for all functions in base profile "
2320             "except inlinees");
2321      FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2322
2323      FuncOverlap.Similarity = computeSampleFunctionOverlap(
2324          Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2325          FuncOverlap.TestSample);
2326      ProfOverlap.Similarity +=
2327          weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2328                             FuncOverlap.TestSample);
2329      ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2330      ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2331
2332      // Accumulate the percentage of base unique and test unique samples into
2333      // ProfOverlap.
2334      ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2335      ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2336
2337      // Remove matched base functions for later reporting functions not found
2338      // in test profile.
2339      BaseFuncProf.erase(Match);
2340      Matched = true;
2341    }
2342
2343    // Print function-level similarity information if specified by options.
2344    assert(TestStats.count(FuncOverlap.TestName) &&
2345           "TestStats should have records for all functions in test profile "
2346           "except inlinees");
2347    if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2348        (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2349        (Matched && !FuncFilter.NameFilter.empty() &&
2350         FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2351             std::string::npos)) {
2352      assert(ProfOverlap.BaseSample > 0 &&
2353             "Total samples in base profile should be greater than 0");
2354      FuncOverlap.BaseWeight =
2355          static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2356      assert(ProfOverlap.TestSample > 0 &&
2357             "Total samples in test profile should be greater than 0");
2358      FuncOverlap.TestWeight =
2359          static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2360      FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2361    }
2362  }
2363
2364  // Traverse through functions in base profile but not in test profile.
2365  for (const auto &F : BaseFuncProf) {
2366    assert(BaseStats.count(F.second->getContext()) &&
2367           "BaseStats should have records for all functions in base profile "
2368           "except inlinees");
2369    const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2370    ++ProfOverlap.BaseUniqueCount;
2371    ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2372
2373    updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2374
2375    double FuncSimilarity = computeSampleFunctionOverlap(
2376        nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2377    ProfOverlap.Similarity +=
2378        weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2379
2380    ProfOverlap.UnionSample += FuncStats.SampleSum;
2381  }
2382
2383  // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2384  // of floating point accumulations. Make it 1.0 if the difference is below
2385  // Epsilon.
2386  ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2387                               ? 1
2388                               : ProfOverlap.Similarity;
2389
2390  computeHotFuncOverlap();
2391}
2392
2393void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2394  const auto &BaseProf = BaseReader->getProfiles();
2395  for (const auto &I : BaseProf) {
2396    ++ProfOverlap.BaseCount;
2397    FuncSampleStats FuncStats;
2398    getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2399    ProfOverlap.BaseSample += FuncStats.SampleSum;
2400    BaseStats.emplace(I.second.getContext(), FuncStats);
2401  }
2402
2403  const auto &TestProf = TestReader->getProfiles();
2404  for (const auto &I : TestProf) {
2405    ++ProfOverlap.TestCount;
2406    FuncSampleStats FuncStats;
2407    getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2408    ProfOverlap.TestSample += FuncStats.SampleSum;
2409    TestStats.emplace(I.second.getContext(), FuncStats);
2410  }
2411
2412  ProfOverlap.BaseName = StringRef(BaseFilename);
2413  ProfOverlap.TestName = StringRef(TestFilename);
2414}
2415
2416void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2417  using namespace sampleprof;
2418
2419  if (FuncSimilarityDump.empty())
2420    return;
2421
2422  formatted_raw_ostream FOS(OS);
2423  FOS << "Function-level details:\n";
2424  FOS << "Base weight";
2425  FOS.PadToColumn(TestWeightCol);
2426  FOS << "Test weight";
2427  FOS.PadToColumn(SimilarityCol);
2428  FOS << "Similarity";
2429  FOS.PadToColumn(OverlapCol);
2430  FOS << "Overlap";
2431  FOS.PadToColumn(BaseUniqueCol);
2432  FOS << "Base unique";
2433  FOS.PadToColumn(TestUniqueCol);
2434  FOS << "Test unique";
2435  FOS.PadToColumn(BaseSampleCol);
2436  FOS << "Base samples";
2437  FOS.PadToColumn(TestSampleCol);
2438  FOS << "Test samples";
2439  FOS.PadToColumn(FuncNameCol);
2440  FOS << "Function name\n";
2441  for (const auto &F : FuncSimilarityDump) {
2442    double OverlapPercent =
2443        F.second.UnionSample > 0
2444            ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2445            : 0;
2446    double BaseUniquePercent =
2447        F.second.BaseSample > 0
2448            ? static_cast<double>(F.second.BaseUniqueSample) /
2449                  F.second.BaseSample
2450            : 0;
2451    double TestUniquePercent =
2452        F.second.TestSample > 0
2453            ? static_cast<double>(F.second.TestUniqueSample) /
2454                  F.second.TestSample
2455            : 0;
2456
2457    FOS << format("%.2f%%", F.second.BaseWeight * 100);
2458    FOS.PadToColumn(TestWeightCol);
2459    FOS << format("%.2f%%", F.second.TestWeight * 100);
2460    FOS.PadToColumn(SimilarityCol);
2461    FOS << format("%.2f%%", F.second.Similarity * 100);
2462    FOS.PadToColumn(OverlapCol);
2463    FOS << format("%.2f%%", OverlapPercent * 100);
2464    FOS.PadToColumn(BaseUniqueCol);
2465    FOS << format("%.2f%%", BaseUniquePercent * 100);
2466    FOS.PadToColumn(TestUniqueCol);
2467    FOS << format("%.2f%%", TestUniquePercent * 100);
2468    FOS.PadToColumn(BaseSampleCol);
2469    FOS << F.second.BaseSample;
2470    FOS.PadToColumn(TestSampleCol);
2471    FOS << F.second.TestSample;
2472    FOS.PadToColumn(FuncNameCol);
2473    FOS << F.second.TestName.toString() << "\n";
2474  }
2475}
2476
2477void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2478  OS << "Profile overlap infomation for base_profile: "
2479     << ProfOverlap.BaseName.toString()
2480     << " and test_profile: " << ProfOverlap.TestName.toString()
2481     << "\nProgram level:\n";
2482
2483  OS << "  Whole program profile similarity: "
2484     << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2485
2486  assert(ProfOverlap.UnionSample > 0 &&
2487         "Total samples in two profile should be greater than 0");
2488  double OverlapPercent =
2489      static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2490  assert(ProfOverlap.BaseSample > 0 &&
2491         "Total samples in base profile should be greater than 0");
2492  double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2493                             ProfOverlap.BaseSample;
2494  assert(ProfOverlap.TestSample > 0 &&
2495         "Total samples in test profile should be greater than 0");
2496  double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2497                             ProfOverlap.TestSample;
2498
2499  OS << "  Whole program sample overlap: "
2500     << format("%.3f%%", OverlapPercent * 100) << "\n";
2501  OS << "    percentage of samples unique in base profile: "
2502     << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2503  OS << "    percentage of samples unique in test profile: "
2504     << format("%.3f%%", TestUniquePercent * 100) << "\n";
2505  OS << "    total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2506     << "    total samples in test profile: " << ProfOverlap.TestSample << "\n";
2507
2508  assert(ProfOverlap.UnionCount > 0 &&
2509         "There should be at least one function in two input profiles");
2510  double FuncOverlapPercent =
2511      static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2512  OS << "  Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2513     << "\n";
2514  OS << "    overlap functions: " << ProfOverlap.OverlapCount << "\n";
2515  OS << "    functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2516     << "\n";
2517  OS << "    functions unique in test profile: " << ProfOverlap.TestUniqueCount
2518     << "\n";
2519}
2520
2521void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2522    raw_fd_ostream &OS) const {
2523  assert(HotFuncOverlap.UnionCount > 0 &&
2524         "There should be at least one hot function in two input profiles");
2525  OS << "  Hot-function overlap: "
2526     << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2527                             HotFuncOverlap.UnionCount * 100)
2528     << "\n";
2529  OS << "    overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2530  OS << "    hot functions unique in base profile: "
2531     << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2532  OS << "    hot functions unique in test profile: "
2533     << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2534
2535  assert(HotBlockOverlap.UnionCount > 0 &&
2536         "There should be at least one hot block in two input profiles");
2537  OS << "  Hot-block overlap: "
2538     << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2539                             HotBlockOverlap.UnionCount * 100)
2540     << "\n";
2541  OS << "    overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2542  OS << "    hot blocks unique in base profile: "
2543     << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2544  OS << "    hot blocks unique in test profile: "
2545     << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2546}
2547
2548std::error_code SampleOverlapAggregator::loadProfiles() {
2549  using namespace sampleprof;
2550
2551  LLVMContext Context;
2552  auto FS = vfs::getRealFileSystem();
2553  auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2554                                                     FSDiscriminatorPassOption);
2555  if (std::error_code EC = BaseReaderOrErr.getError())
2556    exitWithErrorCode(EC, BaseFilename);
2557
2558  auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2559                                                     FSDiscriminatorPassOption);
2560  if (std::error_code EC = TestReaderOrErr.getError())
2561    exitWithErrorCode(EC, TestFilename);
2562
2563  BaseReader = std::move(BaseReaderOrErr.get());
2564  TestReader = std::move(TestReaderOrErr.get());
2565
2566  if (std::error_code EC = BaseReader->read())
2567    exitWithErrorCode(EC, BaseFilename);
2568  if (std::error_code EC = TestReader->read())
2569    exitWithErrorCode(EC, TestFilename);
2570  if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2571    exitWithError(
2572        "cannot compare probe-based profile with non-probe-based profile");
2573  if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2574    exitWithError("cannot compare CS profile with non-CS profile");
2575
2576  // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2577  // profile summary.
2578  ProfileSummary &BasePS = BaseReader->getSummary();
2579  ProfileSummary &TestPS = TestReader->getSummary();
2580  BaseHotThreshold =
2581      ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2582  TestHotThreshold =
2583      ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2584
2585  return std::error_code();
2586}
2587
2588void overlapSampleProfile(const std::string &BaseFilename,
2589                          const std::string &TestFilename,
2590                          const OverlapFuncFilters &FuncFilter,
2591                          uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2592  using namespace sampleprof;
2593
2594  // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2595  // report 2--3 places after decimal point in percentage numbers.
2596  SampleOverlapAggregator OverlapAggr(
2597      BaseFilename, TestFilename,
2598      static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2599  if (std::error_code EC = OverlapAggr.loadProfiles())
2600    exitWithErrorCode(EC);
2601
2602  OverlapAggr.initializeSampleProfileOverlap();
2603  if (OverlapAggr.detectZeroSampleProfile(OS))
2604    return;
2605
2606  OverlapAggr.computeSampleProfileOverlap(OS);
2607
2608  OverlapAggr.dumpProgramSummary(OS);
2609  OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2610  OverlapAggr.dumpFuncSimilarity(OS);
2611}
2612
2613static int overlap_main(int argc, const char *argv[]) {
2614  std::error_code EC;
2615  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2616  if (EC)
2617    exitWithErrorCode(EC, OutputFilename);
2618
2619  if (ProfileKind == instr)
2620    overlapInstrProfile(BaseFilename, TestFilename,
2621                        OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2622                        OS, IsCS);
2623  else
2624    overlapSampleProfile(BaseFilename, TestFilename,
2625                         OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2626                         SimilarityCutoff, OS);
2627
2628  return 0;
2629}
2630
2631namespace {
2632struct ValueSitesStats {
2633  ValueSitesStats() = default;
2634  uint64_t TotalNumValueSites = 0;
2635  uint64_t TotalNumValueSitesWithValueProfile = 0;
2636  uint64_t TotalNumValues = 0;
2637  std::vector<unsigned> ValueSitesHistogram;
2638};
2639} // namespace
2640
2641static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2642                                  ValueSitesStats &Stats, raw_fd_ostream &OS,
2643                                  InstrProfSymtab *Symtab) {
2644  uint32_t NS = Func.getNumValueSites(VK);
2645  Stats.TotalNumValueSites += NS;
2646  for (size_t I = 0; I < NS; ++I) {
2647    uint32_t NV = Func.getNumValueDataForSite(VK, I);
2648    std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, I);
2649    Stats.TotalNumValues += NV;
2650    if (NV) {
2651      Stats.TotalNumValueSitesWithValueProfile++;
2652      if (NV > Stats.ValueSitesHistogram.size())
2653        Stats.ValueSitesHistogram.resize(NV, 0);
2654      Stats.ValueSitesHistogram[NV - 1]++;
2655    }
2656
2657    uint64_t SiteSum = 0;
2658    for (uint32_t V = 0; V < NV; V++)
2659      SiteSum += VD[V].Count;
2660    if (SiteSum == 0)
2661      SiteSum = 1;
2662
2663    for (uint32_t V = 0; V < NV; V++) {
2664      OS << "\t[ " << format("%2u", I) << ", ";
2665      if (Symtab == nullptr)
2666        OS << format("%4" PRIu64, VD[V].Value);
2667      else
2668        OS << Symtab->getFuncOrVarName(VD[V].Value);
2669      OS << ", " << format("%10" PRId64, VD[V].Count) << " ] ("
2670         << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n";
2671    }
2672  }
2673}
2674
2675static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2676                                ValueSitesStats &Stats) {
2677  OS << "  Total number of sites: " << Stats.TotalNumValueSites << "\n";
2678  OS << "  Total number of sites with values: "
2679     << Stats.TotalNumValueSitesWithValueProfile << "\n";
2680  OS << "  Total number of profiled values: " << Stats.TotalNumValues << "\n";
2681
2682  OS << "  Value sites histogram:\n\tNumTargets, SiteCount\n";
2683  for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2684    if (Stats.ValueSitesHistogram[I] > 0)
2685      OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2686  }
2687}
2688
2689static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2690  if (SFormat == ShowFormat::Json)
2691    exitWithError("JSON output is not supported for instr profiles");
2692  if (SFormat == ShowFormat::Yaml)
2693    exitWithError("YAML output is not supported for instr profiles");
2694  auto FS = vfs::getRealFileSystem();
2695  auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2696  std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2697  if (ShowDetailedSummary && Cutoffs.empty()) {
2698    Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2699  }
2700  InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2701  if (Error E = ReaderOrErr.takeError())
2702    exitWithError(std::move(E), Filename);
2703
2704  auto Reader = std::move(ReaderOrErr.get());
2705  bool IsIRInstr = Reader->isIRLevelProfile();
2706  size_t ShownFunctions = 0;
2707  size_t BelowCutoffFunctions = 0;
2708  int NumVPKind = IPVK_Last - IPVK_First + 1;
2709  std::vector<ValueSitesStats> VPStats(NumVPKind);
2710
2711  auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2712                   const std::pair<std::string, uint64_t> &v2) {
2713    return v1.second > v2.second;
2714  };
2715
2716  std::priority_queue<std::pair<std::string, uint64_t>,
2717                      std::vector<std::pair<std::string, uint64_t>>,
2718                      decltype(MinCmp)>
2719      HottestFuncs(MinCmp);
2720
2721  if (!TextFormat && OnlyListBelow) {
2722    OS << "The list of functions with the maximum counter less than "
2723       << ShowValueCutoff << ":\n";
2724  }
2725
2726  // Add marker so that IR-level instrumentation round-trips properly.
2727  if (TextFormat && IsIRInstr)
2728    OS << ":ir\n";
2729
2730  for (const auto &Func : *Reader) {
2731    if (Reader->isIRLevelProfile()) {
2732      bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2733      if (FuncIsCS != ShowCS)
2734        continue;
2735    }
2736    bool Show = ShowAllFunctions ||
2737                (!FuncNameFilter.empty() && Func.Name.contains(FuncNameFilter));
2738
2739    bool doTextFormatDump = (Show && TextFormat);
2740
2741    if (doTextFormatDump) {
2742      InstrProfSymtab &Symtab = Reader->getSymtab();
2743      InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2744                                         OS);
2745      continue;
2746    }
2747
2748    assert(Func.Counts.size() > 0 && "function missing entry counter");
2749    Builder.addRecord(Func);
2750
2751    if (ShowCovered) {
2752      if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2753        OS << Func.Name << "\n";
2754      continue;
2755    }
2756
2757    uint64_t FuncMax = 0;
2758    uint64_t FuncSum = 0;
2759
2760    auto PseudoKind = Func.getCountPseudoKind();
2761    if (PseudoKind != InstrProfRecord::NotPseudo) {
2762      if (Show) {
2763        if (!ShownFunctions)
2764          OS << "Counters:\n";
2765        ++ShownFunctions;
2766        OS << "  " << Func.Name << ":\n"
2767           << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2768           << "    Counters: " << Func.Counts.size();
2769        if (PseudoKind == InstrProfRecord::PseudoHot)
2770          OS << "    <PseudoHot>\n";
2771        else if (PseudoKind == InstrProfRecord::PseudoWarm)
2772          OS << "    <PseudoWarm>\n";
2773        else
2774          llvm_unreachable("Unknown PseudoKind");
2775      }
2776      continue;
2777    }
2778
2779    for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2780      FuncMax = std::max(FuncMax, Func.Counts[I]);
2781      FuncSum += Func.Counts[I];
2782    }
2783
2784    if (FuncMax < ShowValueCutoff) {
2785      ++BelowCutoffFunctions;
2786      if (OnlyListBelow) {
2787        OS << "  " << Func.Name << ": (Max = " << FuncMax
2788           << " Sum = " << FuncSum << ")\n";
2789      }
2790      continue;
2791    } else if (OnlyListBelow)
2792      continue;
2793
2794    if (TopNFunctions) {
2795      if (HottestFuncs.size() == TopNFunctions) {
2796        if (HottestFuncs.top().second < FuncMax) {
2797          HottestFuncs.pop();
2798          HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2799        }
2800      } else
2801        HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2802    }
2803
2804    if (Show) {
2805      if (!ShownFunctions)
2806        OS << "Counters:\n";
2807
2808      ++ShownFunctions;
2809
2810      OS << "  " << Func.Name << ":\n"
2811         << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2812         << "    Counters: " << Func.Counts.size() << "\n";
2813      if (!IsIRInstr)
2814        OS << "    Function count: " << Func.Counts[0] << "\n";
2815
2816      if (ShowIndirectCallTargets)
2817        OS << "    Indirect Call Site Count: "
2818           << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2819
2820      uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2821      if (ShowMemOPSizes && NumMemOPCalls > 0)
2822        OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
2823           << "\n";
2824
2825      if (ShowCounts) {
2826        OS << "    Block counts: [";
2827        size_t Start = (IsIRInstr ? 0 : 1);
2828        for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2829          OS << (I == Start ? "" : ", ") << Func.Counts[I];
2830        }
2831        OS << "]\n";
2832      }
2833
2834      if (ShowIndirectCallTargets) {
2835        OS << "    Indirect Target Results:\n";
2836        traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2837                              VPStats[IPVK_IndirectCallTarget], OS,
2838                              &(Reader->getSymtab()));
2839      }
2840
2841      if (ShowMemOPSizes && NumMemOPCalls > 0) {
2842        OS << "    Memory Intrinsic Size Results:\n";
2843        traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
2844                              nullptr);
2845      }
2846    }
2847  }
2848  if (Reader->hasError())
2849    exitWithError(Reader->getError(), Filename);
2850
2851  if (TextFormat || ShowCovered)
2852    return 0;
2853  std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
2854  bool IsIR = Reader->isIRLevelProfile();
2855  OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
2856  if (IsIR)
2857    OS << "  entry_first = " << Reader->instrEntryBBEnabled();
2858  OS << "\n";
2859  if (ShowAllFunctions || !FuncNameFilter.empty())
2860    OS << "Functions shown: " << ShownFunctions << "\n";
2861  OS << "Total functions: " << PS->getNumFunctions() << "\n";
2862  if (ShowValueCutoff > 0) {
2863    OS << "Number of functions with maximum count (< " << ShowValueCutoff
2864       << "): " << BelowCutoffFunctions << "\n";
2865    OS << "Number of functions with maximum count (>= " << ShowValueCutoff
2866       << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
2867  }
2868  OS << "Maximum function count: " << PS->getMaxFunctionCount() << "\n";
2869  OS << "Maximum internal block count: " << PS->getMaxInternalCount() << "\n";
2870
2871  if (TopNFunctions) {
2872    std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
2873    while (!HottestFuncs.empty()) {
2874      SortedHottestFuncs.emplace_back(HottestFuncs.top());
2875      HottestFuncs.pop();
2876    }
2877    OS << "Top " << TopNFunctions
2878       << " functions with the largest internal block counts: \n";
2879    for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
2880      OS << "  " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
2881  }
2882
2883  if (ShownFunctions && ShowIndirectCallTargets) {
2884    OS << "Statistics for indirect call sites profile:\n";
2885    showValueSitesStats(OS, IPVK_IndirectCallTarget,
2886                        VPStats[IPVK_IndirectCallTarget]);
2887  }
2888
2889  if (ShownFunctions && ShowMemOPSizes) {
2890    OS << "Statistics for memory intrinsic calls sizes profile:\n";
2891    showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
2892  }
2893
2894  if (ShowDetailedSummary) {
2895    OS << "Total number of blocks: " << PS->getNumCounts() << "\n";
2896    OS << "Total count: " << PS->getTotalCount() << "\n";
2897    PS->printDetailedSummary(OS);
2898  }
2899
2900  if (ShowBinaryIds)
2901    if (Error E = Reader->printBinaryIds(OS))
2902      exitWithError(std::move(E), Filename);
2903
2904  if (ShowProfileVersion)
2905    OS << "Profile version: " << Reader->getVersion() << "\n";
2906
2907  if (ShowTemporalProfTraces) {
2908    auto &Traces = Reader->getTemporalProfTraces();
2909    OS << "Temporal Profile Traces (samples=" << Traces.size()
2910       << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
2911    for (unsigned i = 0; i < Traces.size(); i++) {
2912      OS << "  Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
2913         << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
2914      for (auto &NameRef : Traces[i].FunctionNameRefs)
2915        OS << "    " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
2916    }
2917  }
2918
2919  return 0;
2920}
2921
2922static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
2923                            raw_fd_ostream &OS) {
2924  if (!Reader->dumpSectionInfo(OS)) {
2925    WithColor::warning() << "-show-sec-info-only is only supported for "
2926                         << "sample profile in extbinary format and is "
2927                         << "ignored for other formats.\n";
2928    return;
2929  }
2930}
2931
2932namespace {
2933struct HotFuncInfo {
2934  std::string FuncName;
2935  uint64_t TotalCount = 0;
2936  double TotalCountPercent = 0.0f;
2937  uint64_t MaxCount = 0;
2938  uint64_t EntryCount = 0;
2939
2940  HotFuncInfo() = default;
2941
2942  HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
2943      : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
2944        MaxCount(MS), EntryCount(ES) {}
2945};
2946} // namespace
2947
2948// Print out detailed information about hot functions in PrintValues vector.
2949// Users specify titles and offset of every columns through ColumnTitle and
2950// ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
2951// and at least 4. Besides, users can optionally give a HotFuncMetric string to
2952// print out or let it be an empty string.
2953static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
2954                                const std::vector<int> &ColumnOffset,
2955                                const std::vector<HotFuncInfo> &PrintValues,
2956                                uint64_t HotFuncCount, uint64_t TotalFuncCount,
2957                                uint64_t HotProfCount, uint64_t TotalProfCount,
2958                                const std::string &HotFuncMetric,
2959                                uint32_t TopNFunctions, raw_fd_ostream &OS) {
2960  assert(ColumnOffset.size() == ColumnTitle.size() &&
2961         "ColumnOffset and ColumnTitle should have the same size");
2962  assert(ColumnTitle.size() >= 4 &&
2963         "ColumnTitle should have at least 4 elements");
2964  assert(TotalFuncCount > 0 &&
2965         "There should be at least one function in the profile");
2966  double TotalProfPercent = 0;
2967  if (TotalProfCount > 0)
2968    TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
2969
2970  formatted_raw_ostream FOS(OS);
2971  FOS << HotFuncCount << " out of " << TotalFuncCount
2972      << " functions with profile ("
2973      << format("%.2f%%",
2974                (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
2975      << ") are considered hot functions";
2976  if (!HotFuncMetric.empty())
2977    FOS << " (" << HotFuncMetric << ")";
2978  FOS << ".\n";
2979  FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
2980      << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
2981
2982  for (size_t I = 0; I < ColumnTitle.size(); ++I) {
2983    FOS.PadToColumn(ColumnOffset[I]);
2984    FOS << ColumnTitle[I];
2985  }
2986  FOS << "\n";
2987
2988  uint32_t Count = 0;
2989  for (const auto &R : PrintValues) {
2990    if (TopNFunctions && (Count++ == TopNFunctions))
2991      break;
2992    FOS.PadToColumn(ColumnOffset[0]);
2993    FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
2994    FOS.PadToColumn(ColumnOffset[1]);
2995    FOS << R.MaxCount;
2996    FOS.PadToColumn(ColumnOffset[2]);
2997    FOS << R.EntryCount;
2998    FOS.PadToColumn(ColumnOffset[3]);
2999    FOS << R.FuncName << "\n";
3000  }
3001}
3002
3003static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3004                               ProfileSummary &PS, uint32_t TopN,
3005                               raw_fd_ostream &OS) {
3006  using namespace sampleprof;
3007
3008  const uint32_t HotFuncCutoff = 990000;
3009  auto &SummaryVector = PS.getDetailedSummary();
3010  uint64_t MinCountThreshold = 0;
3011  for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3012    if (SummaryEntry.Cutoff == HotFuncCutoff) {
3013      MinCountThreshold = SummaryEntry.MinCount;
3014      break;
3015    }
3016  }
3017
3018  // Traverse all functions in the profile and keep only hot functions.
3019  // The following loop also calculates the sum of total samples of all
3020  // functions.
3021  std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3022                std::greater<uint64_t>>
3023      HotFunc;
3024  uint64_t ProfileTotalSample = 0;
3025  uint64_t HotFuncSample = 0;
3026  uint64_t HotFuncCount = 0;
3027
3028  for (const auto &I : Profiles) {
3029    FuncSampleStats FuncStats;
3030    const FunctionSamples &FuncProf = I.second;
3031    ProfileTotalSample += FuncProf.getTotalSamples();
3032    getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
3033
3034    if (isFunctionHot(FuncStats, MinCountThreshold)) {
3035      HotFunc.emplace(FuncProf.getTotalSamples(),
3036                      std::make_pair(&(I.second), FuncStats.MaxSample));
3037      HotFuncSample += FuncProf.getTotalSamples();
3038      ++HotFuncCount;
3039    }
3040  }
3041
3042  std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3043                                       "Entry sample", "Function name"};
3044  std::vector<int> ColumnOffset{0, 24, 42, 58};
3045  std::string Metric =
3046      std::string("max sample >= ") + std::to_string(MinCountThreshold);
3047  std::vector<HotFuncInfo> PrintValues;
3048  for (const auto &FuncPair : HotFunc) {
3049    const FunctionSamples &Func = *FuncPair.second.first;
3050    double TotalSamplePercent =
3051        (ProfileTotalSample > 0)
3052            ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3053            : 0;
3054    PrintValues.emplace_back(
3055        HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3056                    TotalSamplePercent, FuncPair.second.second,
3057                    Func.getHeadSamplesEstimate()));
3058  }
3059  dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3060                      Profiles.size(), HotFuncSample, ProfileTotalSample,
3061                      Metric, TopN, OS);
3062
3063  return 0;
3064}
3065
3066static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3067  if (SFormat == ShowFormat::Yaml)
3068    exitWithError("YAML output is not supported for sample profiles");
3069  using namespace sampleprof;
3070  LLVMContext Context;
3071  auto FS = vfs::getRealFileSystem();
3072  auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
3073                                                 FSDiscriminatorPassOption);
3074  if (std::error_code EC = ReaderOrErr.getError())
3075    exitWithErrorCode(EC, Filename);
3076
3077  auto Reader = std::move(ReaderOrErr.get());
3078  if (ShowSectionInfoOnly) {
3079    showSectionInfo(Reader.get(), OS);
3080    return 0;
3081  }
3082
3083  if (std::error_code EC = Reader->read())
3084    exitWithErrorCode(EC, Filename);
3085
3086  if (ShowAllFunctions || FuncNameFilter.empty()) {
3087    if (SFormat == ShowFormat::Json)
3088      Reader->dumpJson(OS);
3089    else
3090      Reader->dump(OS);
3091  } else {
3092    if (SFormat == ShowFormat::Json)
3093      exitWithError(
3094          "the JSON format is supported only when all functions are to "
3095          "be printed");
3096
3097    // TODO: parse context string to support filtering by contexts.
3098    FunctionSamples *FS = Reader->getSamplesFor(StringRef(FuncNameFilter));
3099    Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
3100  }
3101
3102  if (ShowProfileSymbolList) {
3103    std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3104        Reader->getProfileSymbolList();
3105    ReaderList->dump(OS);
3106  }
3107
3108  if (ShowDetailedSummary) {
3109    auto &PS = Reader->getSummary();
3110    PS.printSummary(OS);
3111    PS.printDetailedSummary(OS);
3112  }
3113
3114  if (ShowHotFuncList || TopNFunctions)
3115    showHotFunctionList(Reader->getProfiles(), Reader->getSummary(),
3116                        TopNFunctions, OS);
3117
3118  return 0;
3119}
3120
3121static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3122  if (SFormat == ShowFormat::Json)
3123    exitWithError("JSON output is not supported for MemProf");
3124  auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3125      Filename, ProfiledBinary, /*KeepNames=*/true);
3126  if (Error E = ReaderOr.takeError())
3127    // Since the error can be related to the profile or the binary we do not
3128    // pass whence. Instead additional context is provided where necessary in
3129    // the error message.
3130    exitWithError(std::move(E), /*Whence*/ "");
3131
3132  std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3133      ReaderOr.get().release());
3134
3135  Reader->printYAML(OS);
3136  return 0;
3137}
3138
3139static int showDebugInfoCorrelation(const std::string &Filename,
3140                                    ShowFormat SFormat, raw_fd_ostream &OS) {
3141  if (SFormat == ShowFormat::Json)
3142    exitWithError("JSON output is not supported for debug info correlation");
3143  std::unique_ptr<InstrProfCorrelator> Correlator;
3144  if (auto Err =
3145          InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO)
3146              .moveInto(Correlator))
3147    exitWithError(std::move(Err), Filename);
3148  if (SFormat == ShowFormat::Yaml) {
3149    if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
3150      exitWithError(std::move(Err), Filename);
3151    return 0;
3152  }
3153
3154  if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
3155    exitWithError(std::move(Err), Filename);
3156
3157  InstrProfSymtab Symtab;
3158  if (auto Err = Symtab.create(
3159          StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3160    exitWithError(std::move(Err), Filename);
3161
3162  if (ShowProfileSymbolList)
3163    Symtab.dumpNames(OS);
3164  // TODO: Read "Profile Data Type" from debug info to compute and show how many
3165  // counters the section holds.
3166  if (ShowDetailedSummary)
3167    OS << "Counters section size: 0x"
3168       << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
3169  OS << "Found " << Correlator->getDataSize() << " functions\n";
3170
3171  return 0;
3172}
3173
3174static int show_main(int argc, const char *argv[]) {
3175  if (Filename.empty() && DebugInfoFilename.empty())
3176    exitWithError(
3177        "the positional argument '<profdata-file>' is required unless '--" +
3178        DebugInfoFilename.ArgStr + "' is provided");
3179
3180  if (Filename == OutputFilename) {
3181    errs() << sys::path::filename(argv[0]) << " " << argv[1]
3182           << ": Input file name cannot be the same as the output file name!\n";
3183    return 1;
3184  }
3185  if (JsonFormat)
3186    SFormat = ShowFormat::Json;
3187
3188  std::error_code EC;
3189  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3190  if (EC)
3191    exitWithErrorCode(EC, OutputFilename);
3192
3193  if (ShowAllFunctions && !FuncNameFilter.empty())
3194    WithColor::warning() << "-function argument ignored: showing all functions\n";
3195
3196  if (!DebugInfoFilename.empty())
3197    return showDebugInfoCorrelation(DebugInfoFilename, SFormat, OS);
3198
3199  if (ShowProfileKind == instr)
3200    return showInstrProfile(SFormat, OS);
3201  if (ShowProfileKind == sample)
3202    return showSampleProfile(SFormat, OS);
3203  return showMemProfProfile(SFormat, OS);
3204}
3205
3206static int order_main(int argc, const char *argv[]) {
3207  std::error_code EC;
3208  raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3209  if (EC)
3210    exitWithErrorCode(EC, OutputFilename);
3211  auto FS = vfs::getRealFileSystem();
3212  auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3213  if (Error E = ReaderOrErr.takeError())
3214    exitWithError(std::move(E), Filename);
3215
3216  auto Reader = std::move(ReaderOrErr.get());
3217  for (auto &I : *Reader) {
3218    // Read all entries
3219    (void)I;
3220  }
3221  auto &Traces = Reader->getTemporalProfTraces();
3222  auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
3223  BalancedPartitioningConfig Config;
3224  BalancedPartitioning BP(Config);
3225  BP.run(Nodes);
3226
3227  OS << "# Ordered " << Nodes.size() << " functions\n";
3228  OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3229        "linkage and this output does not take that into account. Some "
3230        "post-processing may be required before passing to the linker via "
3231        "-order_file.\n";
3232  for (auto &N : Nodes) {
3233    auto [Filename, ParsedFuncName] =
3234        getParsedIRPGOFuncName(Reader->getSymtab().getFuncOrVarName(N.Id));
3235    if (!Filename.empty())
3236      OS << "# " << Filename << "\n";
3237    OS << ParsedFuncName << "\n";
3238  }
3239  return 0;
3240}
3241
3242int llvm_profdata_main(int argc, char **argvNonConst,
3243                       const llvm::ToolContext &) {
3244  const char **argv = const_cast<const char **>(argvNonConst);
3245
3246  StringRef ProgName(sys::path::filename(argv[0]));
3247
3248  if (argc < 2) {
3249    errs() << ProgName
3250           << ": No subcommand specified! Run llvm-profata --help for usage.\n";
3251    return 1;
3252  }
3253
3254  cl::ParseCommandLineOptions(argc, argv, "LLVM profile data\n");
3255
3256  if (ShowSubcommand)
3257    return show_main(argc, argv);
3258
3259  if (OrderSubcommand)
3260    return order_main(argc, argv);
3261
3262  if (OverlapSubcommand)
3263    return overlap_main(argc, argv);
3264
3265  if (MergeSubcommand)
3266    return merge_main(argc, argv);
3267
3268  errs() << ProgName
3269         << ": Unknown command. Run llvm-profdata --help for usage.\n";
3270  return 1;
3271}
3272