1//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the class that reads LLVM sample profiles. It
10// supports three file formats: text, binary and gcov.
11//
12// The textual representation is useful for debugging and testing purposes. The
13// binary representation is more compact, resulting in smaller file sizes.
14//
15// The gcov encoding is the one generated by GCC's AutoFDO profile creation
16// tool (https://github.com/google/autofdo)
17//
18// All three encodings can be used interchangeably as an input sample profile.
19//
20//===----------------------------------------------------------------------===//
21
22#include "llvm/ProfileData/SampleProfReader.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/StringRef.h"
26#include "llvm/IR/Module.h"
27#include "llvm/IR/ProfileSummary.h"
28#include "llvm/ProfileData/ProfileCommon.h"
29#include "llvm/ProfileData/SampleProf.h"
30#include "llvm/Support/CommandLine.h"
31#include "llvm/Support/Compression.h"
32#include "llvm/Support/ErrorOr.h"
33#include "llvm/Support/JSON.h"
34#include "llvm/Support/LEB128.h"
35#include "llvm/Support/LineIterator.h"
36#include "llvm/Support/MD5.h"
37#include "llvm/Support/MemoryBuffer.h"
38#include "llvm/Support/raw_ostream.h"
39#include <algorithm>
40#include <cstddef>
41#include <cstdint>
42#include <limits>
43#include <memory>
44#include <system_error>
45#include <vector>
46
47using namespace llvm;
48using namespace sampleprof;
49
50#define DEBUG_TYPE "samplepgo-reader"
51
52// This internal option specifies if the profile uses FS discriminators.
53// It only applies to text, binary and compact binary format profiles.
54// For ext-binary format profiles, the flag is set in the summary.
55static cl::opt<bool> ProfileIsFSDisciminator(
56    "profile-isfs", cl::Hidden, cl::init(false),
57    cl::desc("Profile uses flow sensitive discriminators"));
58
59/// Dump the function profile for \p FName.
60///
61/// \param FContext Name + context of the function to print.
62/// \param OS Stream to emit the output to.
63void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
64                                              raw_ostream &OS) {
65  OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
66}
67
68/// Dump all the function profiles found on stream \p OS.
69void SampleProfileReader::dump(raw_ostream &OS) {
70  std::vector<NameFunctionSamples> V;
71  sortFuncProfiles(Profiles, V);
72  for (const auto &I : V)
73    dumpFunctionProfile(I.first, OS);
74}
75
76static void dumpFunctionProfileJson(const FunctionSamples &S,
77                                    json::OStream &JOS, bool TopLevel = false) {
78  auto DumpBody = [&](const BodySampleMap &BodySamples) {
79    for (const auto &I : BodySamples) {
80      const LineLocation &Loc = I.first;
81      const SampleRecord &Sample = I.second;
82      JOS.object([&] {
83        JOS.attribute("line", Loc.LineOffset);
84        if (Loc.Discriminator)
85          JOS.attribute("discriminator", Loc.Discriminator);
86        JOS.attribute("samples", Sample.getSamples());
87
88        auto CallTargets = Sample.getSortedCallTargets();
89        if (!CallTargets.empty()) {
90          JOS.attributeArray("calls", [&] {
91            for (const auto &J : CallTargets) {
92              JOS.object([&] {
93                JOS.attribute("function", J.first);
94                JOS.attribute("samples", J.second);
95              });
96            }
97          });
98        }
99      });
100    }
101  };
102
103  auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) {
104    for (const auto &I : CallsiteSamples)
105      for (const auto &FS : I.second) {
106        const LineLocation &Loc = I.first;
107        const FunctionSamples &CalleeSamples = FS.second;
108        JOS.object([&] {
109          JOS.attribute("line", Loc.LineOffset);
110          if (Loc.Discriminator)
111            JOS.attribute("discriminator", Loc.Discriminator);
112          JOS.attributeArray(
113              "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); });
114        });
115      }
116  };
117
118  JOS.object([&] {
119    JOS.attribute("name", S.getName());
120    JOS.attribute("total", S.getTotalSamples());
121    if (TopLevel)
122      JOS.attribute("head", S.getHeadSamples());
123
124    const auto &BodySamples = S.getBodySamples();
125    if (!BodySamples.empty())
126      JOS.attributeArray("body", [&] { DumpBody(BodySamples); });
127
128    const auto &CallsiteSamples = S.getCallsiteSamples();
129    if (!CallsiteSamples.empty())
130      JOS.attributeArray("callsites",
131                         [&] { DumpCallsiteSamples(CallsiteSamples); });
132  });
133}
134
135/// Dump all the function profiles found on stream \p OS in the JSON format.
136void SampleProfileReader::dumpJson(raw_ostream &OS) {
137  std::vector<NameFunctionSamples> V;
138  sortFuncProfiles(Profiles, V);
139  json::OStream JOS(OS, 2);
140  JOS.arrayBegin();
141  for (const auto &F : V)
142    dumpFunctionProfileJson(*F.second, JOS, true);
143  JOS.arrayEnd();
144
145  // Emit a newline character at the end as json::OStream doesn't emit one.
146  OS << "\n";
147}
148
149/// Parse \p Input as function head.
150///
151/// Parse one line of \p Input, and update function name in \p FName,
152/// function's total sample count in \p NumSamples, function's entry
153/// count in \p NumHeadSamples.
154///
155/// \returns true if parsing is successful.
156static bool ParseHead(const StringRef &Input, StringRef &FName,
157                      uint64_t &NumSamples, uint64_t &NumHeadSamples) {
158  if (Input[0] == ' ')
159    return false;
160  size_t n2 = Input.rfind(':');
161  size_t n1 = Input.rfind(':', n2 - 1);
162  FName = Input.substr(0, n1);
163  if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
164    return false;
165  if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
166    return false;
167  return true;
168}
169
170/// Returns true if line offset \p L is legal (only has 16 bits).
171static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
172
173/// Parse \p Input that contains metadata.
174/// Possible metadata:
175/// - CFG Checksum information:
176///     !CFGChecksum: 12345
177/// - CFG Checksum information:
178///     !Attributes: 1
179/// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
180static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
181                          uint32_t &Attributes) {
182  if (Input.startswith("!CFGChecksum:")) {
183    StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
184    return !CFGInfo.getAsInteger(10, FunctionHash);
185  }
186
187  if (Input.startswith("!Attributes:")) {
188    StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
189    return !Attrib.getAsInteger(10, Attributes);
190  }
191
192  return false;
193}
194
195enum class LineType {
196  CallSiteProfile,
197  BodyProfile,
198  Metadata,
199};
200
201/// Parse \p Input as line sample.
202///
203/// \param Input input line.
204/// \param LineTy Type of this line.
205/// \param Depth the depth of the inline stack.
206/// \param NumSamples total samples of the line/inlined callsite.
207/// \param LineOffset line offset to the start of the function.
208/// \param Discriminator discriminator of the line.
209/// \param TargetCountMap map from indirect call target to count.
210/// \param FunctionHash the function's CFG hash, used by pseudo probe.
211///
212/// returns true if parsing is successful.
213static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
214                      uint64_t &NumSamples, uint32_t &LineOffset,
215                      uint32_t &Discriminator, StringRef &CalleeName,
216                      DenseMap<StringRef, uint64_t> &TargetCountMap,
217                      uint64_t &FunctionHash, uint32_t &Attributes) {
218  for (Depth = 0; Input[Depth] == ' '; Depth++)
219    ;
220  if (Depth == 0)
221    return false;
222
223  if (Input[Depth] == '!') {
224    LineTy = LineType::Metadata;
225    return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
226  }
227
228  size_t n1 = Input.find(':');
229  StringRef Loc = Input.substr(Depth, n1 - Depth);
230  size_t n2 = Loc.find('.');
231  if (n2 == StringRef::npos) {
232    if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
233      return false;
234    Discriminator = 0;
235  } else {
236    if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
237      return false;
238    if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
239      return false;
240  }
241
242  StringRef Rest = Input.substr(n1 + 2);
243  if (isDigit(Rest[0])) {
244    LineTy = LineType::BodyProfile;
245    size_t n3 = Rest.find(' ');
246    if (n3 == StringRef::npos) {
247      if (Rest.getAsInteger(10, NumSamples))
248        return false;
249    } else {
250      if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
251        return false;
252    }
253    // Find call targets and their sample counts.
254    // Note: In some cases, there are symbols in the profile which are not
255    // mangled. To accommodate such cases, use colon + integer pairs as the
256    // anchor points.
257    // An example:
258    // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
259    // ":1000" and ":437" are used as anchor points so the string above will
260    // be interpreted as
261    // target: _M_construct<char *>
262    // count: 1000
263    // target: string_view<std::allocator<char> >
264    // count: 437
265    while (n3 != StringRef::npos) {
266      n3 += Rest.substr(n3).find_first_not_of(' ');
267      Rest = Rest.substr(n3);
268      n3 = Rest.find_first_of(':');
269      if (n3 == StringRef::npos || n3 == 0)
270        return false;
271
272      StringRef Target;
273      uint64_t count, n4;
274      while (true) {
275        // Get the segment after the current colon.
276        StringRef AfterColon = Rest.substr(n3 + 1);
277        // Get the target symbol before the current colon.
278        Target = Rest.substr(0, n3);
279        // Check if the word after the current colon is an integer.
280        n4 = AfterColon.find_first_of(' ');
281        n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
282        StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
283        if (!WordAfterColon.getAsInteger(10, count))
284          break;
285
286        // Try to find the next colon.
287        uint64_t n5 = AfterColon.find_first_of(':');
288        if (n5 == StringRef::npos)
289          return false;
290        n3 += n5 + 1;
291      }
292
293      // An anchor point is found. Save the {target, count} pair
294      TargetCountMap[Target] = count;
295      if (n4 == Rest.size())
296        break;
297      // Change n3 to the next blank space after colon + integer pair.
298      n3 = n4;
299    }
300  } else {
301    LineTy = LineType::CallSiteProfile;
302    size_t n3 = Rest.find_last_of(':');
303    CalleeName = Rest.substr(0, n3);
304    if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
305      return false;
306  }
307  return true;
308}
309
310/// Load samples from a text file.
311///
312/// See the documentation at the top of the file for an explanation of
313/// the expected format.
314///
315/// \returns true if the file was loaded successfully, false otherwise.
316std::error_code SampleProfileReaderText::readImpl() {
317  line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
318  sampleprof_error Result = sampleprof_error::success;
319
320  InlineCallStack InlineStack;
321  uint32_t TopLevelProbeProfileCount = 0;
322
323  // DepthMetadata tracks whether we have processed metadata for the current
324  // top-level or nested function profile.
325  uint32_t DepthMetadata = 0;
326
327  ProfileIsFS = ProfileIsFSDisciminator;
328  FunctionSamples::ProfileIsFS = ProfileIsFS;
329  for (; !LineIt.is_at_eof(); ++LineIt) {
330    if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
331      continue;
332    // Read the header of each function.
333    //
334    // Note that for function identifiers we are actually expecting
335    // mangled names, but we may not always get them. This happens when
336    // the compiler decides not to emit the function (e.g., it was inlined
337    // and removed). In this case, the binary will not have the linkage
338    // name for the function, so the profiler will emit the function's
339    // unmangled name, which may contain characters like ':' and '>' in its
340    // name (member functions, templates, etc).
341    //
342    // The only requirement we place on the identifier, then, is that it
343    // should not begin with a number.
344    if ((*LineIt)[0] != ' ') {
345      uint64_t NumSamples, NumHeadSamples;
346      StringRef FName;
347      if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
348        reportError(LineIt.line_number(),
349                    "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
350        return sampleprof_error::malformed;
351      }
352      DepthMetadata = 0;
353      SampleContext FContext(FName, CSNameTable);
354      if (FContext.hasContext())
355        ++CSProfileCount;
356      Profiles[FContext] = FunctionSamples();
357      FunctionSamples &FProfile = Profiles[FContext];
358      FProfile.setContext(FContext);
359      MergeResult(Result, FProfile.addTotalSamples(NumSamples));
360      MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
361      InlineStack.clear();
362      InlineStack.push_back(&FProfile);
363    } else {
364      uint64_t NumSamples;
365      StringRef FName;
366      DenseMap<StringRef, uint64_t> TargetCountMap;
367      uint32_t Depth, LineOffset, Discriminator;
368      LineType LineTy;
369      uint64_t FunctionHash = 0;
370      uint32_t Attributes = 0;
371      if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
372                     Discriminator, FName, TargetCountMap, FunctionHash,
373                     Attributes)) {
374        reportError(LineIt.line_number(),
375                    "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
376                        *LineIt);
377        return sampleprof_error::malformed;
378      }
379      if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
380        // Metadata must be put at the end of a function profile.
381        reportError(LineIt.line_number(),
382                    "Found non-metadata after metadata: " + *LineIt);
383        return sampleprof_error::malformed;
384      }
385
386      // Here we handle FS discriminators.
387      Discriminator &= getDiscriminatorMask();
388
389      while (InlineStack.size() > Depth) {
390        InlineStack.pop_back();
391      }
392      switch (LineTy) {
393      case LineType::CallSiteProfile: {
394        FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
395            LineLocation(LineOffset, Discriminator))[std::string(FName)];
396        FSamples.setName(FName);
397        MergeResult(Result, FSamples.addTotalSamples(NumSamples));
398        InlineStack.push_back(&FSamples);
399        DepthMetadata = 0;
400        break;
401      }
402      case LineType::BodyProfile: {
403        while (InlineStack.size() > Depth) {
404          InlineStack.pop_back();
405        }
406        FunctionSamples &FProfile = *InlineStack.back();
407        for (const auto &name_count : TargetCountMap) {
408          MergeResult(Result, FProfile.addCalledTargetSamples(
409                                  LineOffset, Discriminator, name_count.first,
410                                  name_count.second));
411        }
412        MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
413                                                    NumSamples));
414        break;
415      }
416      case LineType::Metadata: {
417        FunctionSamples &FProfile = *InlineStack.back();
418        if (FunctionHash) {
419          FProfile.setFunctionHash(FunctionHash);
420          if (Depth == 1)
421            ++TopLevelProbeProfileCount;
422        }
423        FProfile.getContext().setAllAttributes(Attributes);
424        if (Attributes & (uint32_t)ContextShouldBeInlined)
425          ProfileIsPreInlined = true;
426        DepthMetadata = Depth;
427        break;
428      }
429      }
430    }
431  }
432
433  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
434         "Cannot have both context-sensitive and regular profile");
435  ProfileIsCS = (CSProfileCount > 0);
436  assert((TopLevelProbeProfileCount == 0 ||
437          TopLevelProbeProfileCount == Profiles.size()) &&
438         "Cannot have both probe-based profiles and regular profiles");
439  ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
440  FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
441  FunctionSamples::ProfileIsCS = ProfileIsCS;
442  FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined;
443
444  if (Result == sampleprof_error::success)
445    computeSummary();
446
447  return Result;
448}
449
450bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
451  bool result = false;
452
453  // Check that the first non-comment line is a valid function header.
454  line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
455  if (!LineIt.is_at_eof()) {
456    if ((*LineIt)[0] != ' ') {
457      uint64_t NumSamples, NumHeadSamples;
458      StringRef FName;
459      result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
460    }
461  }
462
463  return result;
464}
465
466template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
467  unsigned NumBytesRead = 0;
468  std::error_code EC;
469  uint64_t Val = decodeULEB128(Data, &NumBytesRead);
470
471  if (Val > std::numeric_limits<T>::max())
472    EC = sampleprof_error::malformed;
473  else if (Data + NumBytesRead > End)
474    EC = sampleprof_error::truncated;
475  else
476    EC = sampleprof_error::success;
477
478  if (EC) {
479    reportError(0, EC.message());
480    return EC;
481  }
482
483  Data += NumBytesRead;
484  return static_cast<T>(Val);
485}
486
487ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
488  std::error_code EC;
489  StringRef Str(reinterpret_cast<const char *>(Data));
490  if (Data + Str.size() + 1 > End) {
491    EC = sampleprof_error::truncated;
492    reportError(0, EC.message());
493    return EC;
494  }
495
496  Data += Str.size() + 1;
497  return Str;
498}
499
500template <typename T>
501ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
502  std::error_code EC;
503
504  if (Data + sizeof(T) > End) {
505    EC = sampleprof_error::truncated;
506    reportError(0, EC.message());
507    return EC;
508  }
509
510  using namespace support;
511  T Val = endian::readNext<T, little, unaligned>(Data);
512  return Val;
513}
514
515template <typename T>
516inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
517  std::error_code EC;
518  auto Idx = readNumber<uint32_t>();
519  if (std::error_code EC = Idx.getError())
520    return EC;
521  if (*Idx >= Table.size())
522    return sampleprof_error::truncated_name_table;
523  return *Idx;
524}
525
526ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
527  auto Idx = readStringIndex(NameTable);
528  if (std::error_code EC = Idx.getError())
529    return EC;
530
531  return NameTable[*Idx];
532}
533
534ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
535  auto FName(readStringFromTable());
536  if (std::error_code EC = FName.getError())
537    return EC;
538  return SampleContext(*FName);
539}
540
541ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
542  if (!FixedLengthMD5)
543    return SampleProfileReaderBinary::readStringFromTable();
544
545  // read NameTable index.
546  auto Idx = readStringIndex(NameTable);
547  if (std::error_code EC = Idx.getError())
548    return EC;
549
550  // Check whether the name to be accessed has been accessed before,
551  // if not, read it from memory directly.
552  StringRef &SR = NameTable[*Idx];
553  if (SR.empty()) {
554    const uint8_t *SavedData = Data;
555    Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
556    auto FID = readUnencodedNumber<uint64_t>();
557    if (std::error_code EC = FID.getError())
558      return EC;
559    // Save the string converted from uint64_t in MD5StringBuf. All the
560    // references to the name are all StringRefs refering to the string
561    // in MD5StringBuf.
562    MD5StringBuf->push_back(std::to_string(*FID));
563    SR = MD5StringBuf->back();
564    Data = SavedData;
565  }
566  return SR;
567}
568
569ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
570  auto Idx = readStringIndex(NameTable);
571  if (std::error_code EC = Idx.getError())
572    return EC;
573
574  return StringRef(NameTable[*Idx]);
575}
576
577std::error_code
578SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
579  auto NumSamples = readNumber<uint64_t>();
580  if (std::error_code EC = NumSamples.getError())
581    return EC;
582  FProfile.addTotalSamples(*NumSamples);
583
584  // Read the samples in the body.
585  auto NumRecords = readNumber<uint32_t>();
586  if (std::error_code EC = NumRecords.getError())
587    return EC;
588
589  for (uint32_t I = 0; I < *NumRecords; ++I) {
590    auto LineOffset = readNumber<uint64_t>();
591    if (std::error_code EC = LineOffset.getError())
592      return EC;
593
594    if (!isOffsetLegal(*LineOffset)) {
595      return std::error_code();
596    }
597
598    auto Discriminator = readNumber<uint64_t>();
599    if (std::error_code EC = Discriminator.getError())
600      return EC;
601
602    auto NumSamples = readNumber<uint64_t>();
603    if (std::error_code EC = NumSamples.getError())
604      return EC;
605
606    auto NumCalls = readNumber<uint32_t>();
607    if (std::error_code EC = NumCalls.getError())
608      return EC;
609
610    // Here we handle FS discriminators:
611    uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
612
613    for (uint32_t J = 0; J < *NumCalls; ++J) {
614      auto CalledFunction(readStringFromTable());
615      if (std::error_code EC = CalledFunction.getError())
616        return EC;
617
618      auto CalledFunctionSamples = readNumber<uint64_t>();
619      if (std::error_code EC = CalledFunctionSamples.getError())
620        return EC;
621
622      FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
623                                      *CalledFunction, *CalledFunctionSamples);
624    }
625
626    FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
627  }
628
629  // Read all the samples for inlined function calls.
630  auto NumCallsites = readNumber<uint32_t>();
631  if (std::error_code EC = NumCallsites.getError())
632    return EC;
633
634  for (uint32_t J = 0; J < *NumCallsites; ++J) {
635    auto LineOffset = readNumber<uint64_t>();
636    if (std::error_code EC = LineOffset.getError())
637      return EC;
638
639    auto Discriminator = readNumber<uint64_t>();
640    if (std::error_code EC = Discriminator.getError())
641      return EC;
642
643    auto FName(readStringFromTable());
644    if (std::error_code EC = FName.getError())
645      return EC;
646
647    // Here we handle FS discriminators:
648    uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
649
650    FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
651        LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
652    CalleeProfile.setName(*FName);
653    if (std::error_code EC = readProfile(CalleeProfile))
654      return EC;
655  }
656
657  return sampleprof_error::success;
658}
659
660std::error_code
661SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
662  Data = Start;
663  auto NumHeadSamples = readNumber<uint64_t>();
664  if (std::error_code EC = NumHeadSamples.getError())
665    return EC;
666
667  ErrorOr<SampleContext> FContext(readSampleContextFromTable());
668  if (std::error_code EC = FContext.getError())
669    return EC;
670
671  Profiles[*FContext] = FunctionSamples();
672  FunctionSamples &FProfile = Profiles[*FContext];
673  FProfile.setContext(*FContext);
674  FProfile.addHeadSamples(*NumHeadSamples);
675
676  if (FContext->hasContext())
677    CSProfileCount++;
678
679  if (std::error_code EC = readProfile(FProfile))
680    return EC;
681  return sampleprof_error::success;
682}
683
684std::error_code SampleProfileReaderBinary::readImpl() {
685  ProfileIsFS = ProfileIsFSDisciminator;
686  FunctionSamples::ProfileIsFS = ProfileIsFS;
687  while (!at_eof()) {
688    if (std::error_code EC = readFuncProfile(Data))
689      return EC;
690  }
691
692  return sampleprof_error::success;
693}
694
695ErrorOr<SampleContextFrames>
696SampleProfileReaderExtBinaryBase::readContextFromTable() {
697  auto ContextIdx = readNumber<uint32_t>();
698  if (std::error_code EC = ContextIdx.getError())
699    return EC;
700  if (*ContextIdx >= CSNameTable->size())
701    return sampleprof_error::truncated_name_table;
702  return (*CSNameTable)[*ContextIdx];
703}
704
705ErrorOr<SampleContext>
706SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
707  if (ProfileIsCS) {
708    auto FContext(readContextFromTable());
709    if (std::error_code EC = FContext.getError())
710      return EC;
711    return SampleContext(*FContext);
712  } else {
713    auto FName(readStringFromTable());
714    if (std::error_code EC = FName.getError())
715      return EC;
716    return SampleContext(*FName);
717  }
718}
719
720std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
721    const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
722  Data = Start;
723  End = Start + Size;
724  switch (Entry.Type) {
725  case SecProfSummary:
726    if (std::error_code EC = readSummary())
727      return EC;
728    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
729      Summary->setPartialProfile(true);
730    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
731      FunctionSamples::ProfileIsCS = ProfileIsCS = true;
732    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
733      FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
734    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
735      FunctionSamples::ProfileIsFS = ProfileIsFS = true;
736    break;
737  case SecNameTable: {
738    FixedLengthMD5 =
739        hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
740    bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
741    assert((!FixedLengthMD5 || UseMD5) &&
742           "If FixedLengthMD5 is true, UseMD5 has to be true");
743    FunctionSamples::HasUniqSuffix =
744        hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
745    if (std::error_code EC = readNameTableSec(UseMD5))
746      return EC;
747    break;
748  }
749  case SecCSNameTable: {
750    if (std::error_code EC = readCSNameTableSec())
751      return EC;
752    break;
753  }
754  case SecLBRProfile:
755    if (std::error_code EC = readFuncProfiles())
756      return EC;
757    break;
758  case SecFuncOffsetTable:
759    FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
760    if (std::error_code EC = readFuncOffsetTable())
761      return EC;
762    break;
763  case SecFuncMetadata: {
764    ProfileIsProbeBased =
765        hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
766    FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
767    bool HasAttribute =
768        hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
769    if (std::error_code EC = readFuncMetadata(HasAttribute))
770      return EC;
771    break;
772  }
773  case SecProfileSymbolList:
774    if (std::error_code EC = readProfileSymbolList())
775      return EC;
776    break;
777  default:
778    if (std::error_code EC = readCustomSection(Entry))
779      return EC;
780    break;
781  }
782  return sampleprof_error::success;
783}
784
785bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
786  if (!M)
787    return false;
788  FuncsToUse.clear();
789  for (auto &F : *M)
790    FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
791  return true;
792}
793
794std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
795  // If there are more than one FuncOffsetTable, the profile read associated
796  // with previous FuncOffsetTable has to be done before next FuncOffsetTable
797  // is read.
798  FuncOffsetTable.clear();
799
800  auto Size = readNumber<uint64_t>();
801  if (std::error_code EC = Size.getError())
802    return EC;
803
804  FuncOffsetTable.reserve(*Size);
805
806  if (FuncOffsetsOrdered) {
807    OrderedFuncOffsets =
808        std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
809    OrderedFuncOffsets->reserve(*Size);
810  }
811
812  for (uint64_t I = 0; I < *Size; ++I) {
813    auto FContext(readSampleContextFromTable());
814    if (std::error_code EC = FContext.getError())
815      return EC;
816
817    auto Offset = readNumber<uint64_t>();
818    if (std::error_code EC = Offset.getError())
819      return EC;
820
821    FuncOffsetTable[*FContext] = *Offset;
822    if (FuncOffsetsOrdered)
823      OrderedFuncOffsets->emplace_back(*FContext, *Offset);
824  }
825
826  return sampleprof_error::success;
827}
828
829std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
830  // Collect functions used by current module if the Reader has been
831  // given a module.
832  // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
833  // which will query FunctionSamples::HasUniqSuffix, so it has to be
834  // called after FunctionSamples::HasUniqSuffix is set, i.e. after
835  // NameTable section is read.
836  bool LoadFuncsToBeUsed = collectFuncsFromModule();
837
838  // When LoadFuncsToBeUsed is false, load all the function profiles.
839  const uint8_t *Start = Data;
840  if (!LoadFuncsToBeUsed) {
841    while (Data < End) {
842      if (std::error_code EC = readFuncProfile(Data))
843        return EC;
844    }
845    assert(Data == End && "More data is read than expected");
846  } else {
847    // Load function profiles on demand.
848    if (Remapper) {
849      for (auto Name : FuncsToUse) {
850        Remapper->insert(Name);
851      }
852    }
853
854    if (ProfileIsCS) {
855      DenseSet<uint64_t> FuncGuidsToUse;
856      if (useMD5()) {
857        for (auto Name : FuncsToUse)
858          FuncGuidsToUse.insert(Function::getGUID(Name));
859      }
860
861      // For each function in current module, load all context profiles for
862      // the function as well as their callee contexts which can help profile
863      // guided importing for ThinLTO. This can be achieved by walking
864      // through an ordered context container, where contexts are laid out
865      // as if they were walked in preorder of a context trie. While
866      // traversing the trie, a link to the highest common ancestor node is
867      // kept so that all of its decendants will be loaded.
868      assert(OrderedFuncOffsets.get() &&
869             "func offset table should always be sorted in CS profile");
870      const SampleContext *CommonContext = nullptr;
871      for (const auto &NameOffset : *OrderedFuncOffsets) {
872        const auto &FContext = NameOffset.first;
873        auto FName = FContext.getName();
874        // For function in the current module, keep its farthest ancestor
875        // context. This can be used to load itself and its child and
876        // sibling contexts.
877        if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
878            (!useMD5() && (FuncsToUse.count(FName) ||
879                           (Remapper && Remapper->exist(FName))))) {
880          if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
881            CommonContext = &FContext;
882        }
883
884        if (CommonContext == &FContext ||
885            (CommonContext && CommonContext->IsPrefixOf(FContext))) {
886          // Load profile for the current context which originated from
887          // the common ancestor.
888          const uint8_t *FuncProfileAddr = Start + NameOffset.second;
889          assert(FuncProfileAddr < End && "out of LBRProfile section");
890          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
891            return EC;
892        }
893      }
894    } else {
895      if (useMD5()) {
896        for (auto Name : FuncsToUse) {
897          auto GUID = std::to_string(MD5Hash(Name));
898          auto iter = FuncOffsetTable.find(StringRef(GUID));
899          if (iter == FuncOffsetTable.end())
900            continue;
901          const uint8_t *FuncProfileAddr = Start + iter->second;
902          assert(FuncProfileAddr < End && "out of LBRProfile section");
903          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
904            return EC;
905        }
906      } else {
907        for (auto NameOffset : FuncOffsetTable) {
908          SampleContext FContext(NameOffset.first);
909          auto FuncName = FContext.getName();
910          if (!FuncsToUse.count(FuncName) &&
911              (!Remapper || !Remapper->exist(FuncName)))
912            continue;
913          const uint8_t *FuncProfileAddr = Start + NameOffset.second;
914          assert(FuncProfileAddr < End && "out of LBRProfile section");
915          if (std::error_code EC = readFuncProfile(FuncProfileAddr))
916            return EC;
917        }
918      }
919    }
920    Data = End;
921  }
922  assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
923         "Cannot have both context-sensitive and regular profile");
924  assert((!CSProfileCount || ProfileIsCS) &&
925         "Section flag should be consistent with actual profile");
926  return sampleprof_error::success;
927}
928
929std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
930  if (!ProfSymList)
931    ProfSymList = std::make_unique<ProfileSymbolList>();
932
933  if (std::error_code EC = ProfSymList->read(Data, End - Data))
934    return EC;
935
936  Data = End;
937  return sampleprof_error::success;
938}
939
940std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
941    const uint8_t *SecStart, const uint64_t SecSize,
942    const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
943  Data = SecStart;
944  End = SecStart + SecSize;
945  auto DecompressSize = readNumber<uint64_t>();
946  if (std::error_code EC = DecompressSize.getError())
947    return EC;
948  DecompressBufSize = *DecompressSize;
949
950  auto CompressSize = readNumber<uint64_t>();
951  if (std::error_code EC = CompressSize.getError())
952    return EC;
953
954  if (!llvm::compression::zlib::isAvailable())
955    return sampleprof_error::zlib_unavailable;
956
957  uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize);
958  size_t UCSize = DecompressBufSize;
959  llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize),
960                                                Buffer, UCSize);
961  if (E)
962    return sampleprof_error::uncompress_failed;
963  DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
964  return sampleprof_error::success;
965}
966
967std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
968  const uint8_t *BufStart =
969      reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
970
971  for (auto &Entry : SecHdrTable) {
972    // Skip empty section.
973    if (!Entry.Size)
974      continue;
975
976    // Skip sections without context when SkipFlatProf is true.
977    if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
978      continue;
979
980    const uint8_t *SecStart = BufStart + Entry.Offset;
981    uint64_t SecSize = Entry.Size;
982
983    // If the section is compressed, decompress it into a buffer
984    // DecompressBuf before reading the actual data. The pointee of
985    // 'Data' will be changed to buffer hold by DecompressBuf
986    // temporarily when reading the actual data.
987    bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
988    if (isCompressed) {
989      const uint8_t *DecompressBuf;
990      uint64_t DecompressBufSize;
991      if (std::error_code EC = decompressSection(
992              SecStart, SecSize, DecompressBuf, DecompressBufSize))
993        return EC;
994      SecStart = DecompressBuf;
995      SecSize = DecompressBufSize;
996    }
997
998    if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
999      return EC;
1000    if (Data != SecStart + SecSize)
1001      return sampleprof_error::malformed;
1002
1003    // Change the pointee of 'Data' from DecompressBuf to original Buffer.
1004    if (isCompressed) {
1005      Data = BufStart + Entry.Offset;
1006      End = BufStart + Buffer->getBufferSize();
1007    }
1008  }
1009
1010  return sampleprof_error::success;
1011}
1012
1013std::error_code SampleProfileReaderCompactBinary::readImpl() {
1014  // Collect functions used by current module if the Reader has been
1015  // given a module.
1016  bool LoadFuncsToBeUsed = collectFuncsFromModule();
1017  ProfileIsFS = ProfileIsFSDisciminator;
1018  FunctionSamples::ProfileIsFS = ProfileIsFS;
1019  std::vector<uint64_t> OffsetsToUse;
1020  if (!LoadFuncsToBeUsed) {
1021    // load all the function profiles.
1022    for (auto FuncEntry : FuncOffsetTable) {
1023      OffsetsToUse.push_back(FuncEntry.second);
1024    }
1025  } else {
1026    // load function profiles on demand.
1027    for (auto Name : FuncsToUse) {
1028      auto GUID = std::to_string(MD5Hash(Name));
1029      auto iter = FuncOffsetTable.find(StringRef(GUID));
1030      if (iter == FuncOffsetTable.end())
1031        continue;
1032      OffsetsToUse.push_back(iter->second);
1033    }
1034  }
1035
1036  for (auto Offset : OffsetsToUse) {
1037    const uint8_t *SavedData = Data;
1038    if (std::error_code EC = readFuncProfile(
1039            reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1040            Offset))
1041      return EC;
1042    Data = SavedData;
1043  }
1044  return sampleprof_error::success;
1045}
1046
1047std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
1048  if (Magic == SPMagic())
1049    return sampleprof_error::success;
1050  return sampleprof_error::bad_magic;
1051}
1052
1053std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
1054  if (Magic == SPMagic(SPF_Ext_Binary))
1055    return sampleprof_error::success;
1056  return sampleprof_error::bad_magic;
1057}
1058
1059std::error_code
1060SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
1061  if (Magic == SPMagic(SPF_Compact_Binary))
1062    return sampleprof_error::success;
1063  return sampleprof_error::bad_magic;
1064}
1065
1066std::error_code SampleProfileReaderBinary::readNameTable() {
1067  auto Size = readNumber<uint32_t>();
1068  if (std::error_code EC = Size.getError())
1069    return EC;
1070  NameTable.reserve(*Size + NameTable.size());
1071  for (uint32_t I = 0; I < *Size; ++I) {
1072    auto Name(readString());
1073    if (std::error_code EC = Name.getError())
1074      return EC;
1075    NameTable.push_back(*Name);
1076  }
1077
1078  return sampleprof_error::success;
1079}
1080
1081std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1082  auto Size = readNumber<uint64_t>();
1083  if (std::error_code EC = Size.getError())
1084    return EC;
1085  MD5StringBuf = std::make_unique<std::vector<std::string>>();
1086  MD5StringBuf->reserve(*Size);
1087  if (FixedLengthMD5) {
1088    // Preallocate and initialize NameTable so we can check whether a name
1089    // index has been read before by checking whether the element in the
1090    // NameTable is empty, meanwhile readStringIndex can do the boundary
1091    // check using the size of NameTable.
1092    NameTable.resize(*Size + NameTable.size());
1093
1094    MD5NameMemStart = Data;
1095    Data = Data + (*Size) * sizeof(uint64_t);
1096    return sampleprof_error::success;
1097  }
1098  NameTable.reserve(*Size);
1099  for (uint64_t I = 0; I < *Size; ++I) {
1100    auto FID = readNumber<uint64_t>();
1101    if (std::error_code EC = FID.getError())
1102      return EC;
1103    MD5StringBuf->push_back(std::to_string(*FID));
1104    // NameTable is a vector of StringRef. Here it is pushing back a
1105    // StringRef initialized with the last string in MD5stringBuf.
1106    NameTable.push_back(MD5StringBuf->back());
1107  }
1108  return sampleprof_error::success;
1109}
1110
1111std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
1112  if (IsMD5)
1113    return readMD5NameTable();
1114  return SampleProfileReaderBinary::readNameTable();
1115}
1116
1117// Read in the CS name table section, which basically contains a list of context
1118// vectors. Each element of a context vector, aka a frame, refers to the
1119// underlying raw function names that are stored in the name table, as well as
1120// a callsite identifier that only makes sense for non-leaf frames.
1121std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1122  auto Size = readNumber<uint32_t>();
1123  if (std::error_code EC = Size.getError())
1124    return EC;
1125
1126  std::vector<SampleContextFrameVector> *PNameVec =
1127      new std::vector<SampleContextFrameVector>();
1128  PNameVec->reserve(*Size);
1129  for (uint32_t I = 0; I < *Size; ++I) {
1130    PNameVec->emplace_back(SampleContextFrameVector());
1131    auto ContextSize = readNumber<uint32_t>();
1132    if (std::error_code EC = ContextSize.getError())
1133      return EC;
1134    for (uint32_t J = 0; J < *ContextSize; ++J) {
1135      auto FName(readStringFromTable());
1136      if (std::error_code EC = FName.getError())
1137        return EC;
1138      auto LineOffset = readNumber<uint64_t>();
1139      if (std::error_code EC = LineOffset.getError())
1140        return EC;
1141
1142      if (!isOffsetLegal(*LineOffset))
1143        return std::error_code();
1144
1145      auto Discriminator = readNumber<uint64_t>();
1146      if (std::error_code EC = Discriminator.getError())
1147        return EC;
1148
1149      PNameVec->back().emplace_back(
1150          FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1151    }
1152  }
1153
1154  // From this point the underlying object of CSNameTable should be immutable.
1155  CSNameTable.reset(PNameVec);
1156  return sampleprof_error::success;
1157}
1158
1159std::error_code
1160
1161SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1162                                                   FunctionSamples *FProfile) {
1163  if (Data < End) {
1164    if (ProfileIsProbeBased) {
1165      auto Checksum = readNumber<uint64_t>();
1166      if (std::error_code EC = Checksum.getError())
1167        return EC;
1168      if (FProfile)
1169        FProfile->setFunctionHash(*Checksum);
1170    }
1171
1172    if (ProfileHasAttribute) {
1173      auto Attributes = readNumber<uint32_t>();
1174      if (std::error_code EC = Attributes.getError())
1175        return EC;
1176      if (FProfile)
1177        FProfile->getContext().setAllAttributes(*Attributes);
1178    }
1179
1180    if (!ProfileIsCS) {
1181      // Read all the attributes for inlined function calls.
1182      auto NumCallsites = readNumber<uint32_t>();
1183      if (std::error_code EC = NumCallsites.getError())
1184        return EC;
1185
1186      for (uint32_t J = 0; J < *NumCallsites; ++J) {
1187        auto LineOffset = readNumber<uint64_t>();
1188        if (std::error_code EC = LineOffset.getError())
1189          return EC;
1190
1191        auto Discriminator = readNumber<uint64_t>();
1192        if (std::error_code EC = Discriminator.getError())
1193          return EC;
1194
1195        auto FContext(readSampleContextFromTable());
1196        if (std::error_code EC = FContext.getError())
1197          return EC;
1198
1199        FunctionSamples *CalleeProfile = nullptr;
1200        if (FProfile) {
1201          CalleeProfile = const_cast<FunctionSamples *>(
1202              &FProfile->functionSamplesAt(LineLocation(
1203                  *LineOffset,
1204                  *Discriminator))[std::string(FContext.get().getName())]);
1205        }
1206        if (std::error_code EC =
1207                readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1208          return EC;
1209      }
1210    }
1211  }
1212
1213  return sampleprof_error::success;
1214}
1215
1216std::error_code
1217SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1218  while (Data < End) {
1219    auto FContext(readSampleContextFromTable());
1220    if (std::error_code EC = FContext.getError())
1221      return EC;
1222    FunctionSamples *FProfile = nullptr;
1223    auto It = Profiles.find(*FContext);
1224    if (It != Profiles.end())
1225      FProfile = &It->second;
1226
1227    if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1228      return EC;
1229  }
1230
1231  assert(Data == End && "More data is read than expected");
1232  return sampleprof_error::success;
1233}
1234
1235std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1236  auto Size = readNumber<uint64_t>();
1237  if (std::error_code EC = Size.getError())
1238    return EC;
1239  NameTable.reserve(*Size);
1240  for (uint64_t I = 0; I < *Size; ++I) {
1241    auto FID = readNumber<uint64_t>();
1242    if (std::error_code EC = FID.getError())
1243      return EC;
1244    NameTable.push_back(std::to_string(*FID));
1245  }
1246  return sampleprof_error::success;
1247}
1248
1249std::error_code
1250SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1251  SecHdrTableEntry Entry;
1252  auto Type = readUnencodedNumber<uint64_t>();
1253  if (std::error_code EC = Type.getError())
1254    return EC;
1255  Entry.Type = static_cast<SecType>(*Type);
1256
1257  auto Flags = readUnencodedNumber<uint64_t>();
1258  if (std::error_code EC = Flags.getError())
1259    return EC;
1260  Entry.Flags = *Flags;
1261
1262  auto Offset = readUnencodedNumber<uint64_t>();
1263  if (std::error_code EC = Offset.getError())
1264    return EC;
1265  Entry.Offset = *Offset;
1266
1267  auto Size = readUnencodedNumber<uint64_t>();
1268  if (std::error_code EC = Size.getError())
1269    return EC;
1270  Entry.Size = *Size;
1271
1272  Entry.LayoutIndex = Idx;
1273  SecHdrTable.push_back(std::move(Entry));
1274  return sampleprof_error::success;
1275}
1276
1277std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1278  auto EntryNum = readUnencodedNumber<uint64_t>();
1279  if (std::error_code EC = EntryNum.getError())
1280    return EC;
1281
1282  for (uint64_t i = 0; i < (*EntryNum); i++)
1283    if (std::error_code EC = readSecHdrTableEntry(i))
1284      return EC;
1285
1286  return sampleprof_error::success;
1287}
1288
1289std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1290  const uint8_t *BufStart =
1291      reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1292  Data = BufStart;
1293  End = BufStart + Buffer->getBufferSize();
1294
1295  if (std::error_code EC = readMagicIdent())
1296    return EC;
1297
1298  if (std::error_code EC = readSecHdrTable())
1299    return EC;
1300
1301  return sampleprof_error::success;
1302}
1303
1304uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1305  uint64_t Size = 0;
1306  for (auto &Entry : SecHdrTable) {
1307    if (Entry.Type == Type)
1308      Size += Entry.Size;
1309  }
1310  return Size;
1311}
1312
1313uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1314  // Sections in SecHdrTable is not necessarily in the same order as
1315  // sections in the profile because section like FuncOffsetTable needs
1316  // to be written after section LBRProfile but needs to be read before
1317  // section LBRProfile, so we cannot simply use the last entry in
1318  // SecHdrTable to calculate the file size.
1319  uint64_t FileSize = 0;
1320  for (auto &Entry : SecHdrTable) {
1321    FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1322  }
1323  return FileSize;
1324}
1325
1326static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1327  std::string Flags;
1328  if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1329    Flags.append("{compressed,");
1330  else
1331    Flags.append("{");
1332
1333  if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1334    Flags.append("flat,");
1335
1336  switch (Entry.Type) {
1337  case SecNameTable:
1338    if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1339      Flags.append("fixlenmd5,");
1340    else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1341      Flags.append("md5,");
1342    if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1343      Flags.append("uniq,");
1344    break;
1345  case SecProfSummary:
1346    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1347      Flags.append("partial,");
1348    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1349      Flags.append("context,");
1350    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined))
1351      Flags.append("preInlined,");
1352    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1353      Flags.append("fs-discriminator,");
1354    break;
1355  case SecFuncOffsetTable:
1356    if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1357      Flags.append("ordered,");
1358    break;
1359  case SecFuncMetadata:
1360    if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1361      Flags.append("probe,");
1362    if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1363      Flags.append("attr,");
1364    break;
1365  default:
1366    break;
1367  }
1368  char &last = Flags.back();
1369  if (last == ',')
1370    last = '}';
1371  else
1372    Flags.append("}");
1373  return Flags;
1374}
1375
1376bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1377  uint64_t TotalSecsSize = 0;
1378  for (auto &Entry : SecHdrTable) {
1379    OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1380       << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1381       << "\n";
1382    ;
1383    TotalSecsSize += Entry.Size;
1384  }
1385  uint64_t HeaderSize = SecHdrTable.front().Offset;
1386  assert(HeaderSize + TotalSecsSize == getFileSize() &&
1387         "Size of 'header + sections' doesn't match the total size of profile");
1388
1389  OS << "Header Size: " << HeaderSize << "\n";
1390  OS << "Total Sections Size: " << TotalSecsSize << "\n";
1391  OS << "File Size: " << getFileSize() << "\n";
1392  return true;
1393}
1394
1395std::error_code SampleProfileReaderBinary::readMagicIdent() {
1396  // Read and check the magic identifier.
1397  auto Magic = readNumber<uint64_t>();
1398  if (std::error_code EC = Magic.getError())
1399    return EC;
1400  else if (std::error_code EC = verifySPMagic(*Magic))
1401    return EC;
1402
1403  // Read the version number.
1404  auto Version = readNumber<uint64_t>();
1405  if (std::error_code EC = Version.getError())
1406    return EC;
1407  else if (*Version != SPVersion())
1408    return sampleprof_error::unsupported_version;
1409
1410  return sampleprof_error::success;
1411}
1412
1413std::error_code SampleProfileReaderBinary::readHeader() {
1414  Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1415  End = Data + Buffer->getBufferSize();
1416
1417  if (std::error_code EC = readMagicIdent())
1418    return EC;
1419
1420  if (std::error_code EC = readSummary())
1421    return EC;
1422
1423  if (std::error_code EC = readNameTable())
1424    return EC;
1425  return sampleprof_error::success;
1426}
1427
1428std::error_code SampleProfileReaderCompactBinary::readHeader() {
1429  SampleProfileReaderBinary::readHeader();
1430  if (std::error_code EC = readFuncOffsetTable())
1431    return EC;
1432  return sampleprof_error::success;
1433}
1434
1435std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1436  auto TableOffset = readUnencodedNumber<uint64_t>();
1437  if (std::error_code EC = TableOffset.getError())
1438    return EC;
1439
1440  const uint8_t *SavedData = Data;
1441  const uint8_t *TableStart =
1442      reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1443      *TableOffset;
1444  Data = TableStart;
1445
1446  auto Size = readNumber<uint64_t>();
1447  if (std::error_code EC = Size.getError())
1448    return EC;
1449
1450  FuncOffsetTable.reserve(*Size);
1451  for (uint64_t I = 0; I < *Size; ++I) {
1452    auto FName(readStringFromTable());
1453    if (std::error_code EC = FName.getError())
1454      return EC;
1455
1456    auto Offset = readNumber<uint64_t>();
1457    if (std::error_code EC = Offset.getError())
1458      return EC;
1459
1460    FuncOffsetTable[*FName] = *Offset;
1461  }
1462  End = TableStart;
1463  Data = SavedData;
1464  return sampleprof_error::success;
1465}
1466
1467bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1468  if (!M)
1469    return false;
1470  FuncsToUse.clear();
1471  for (auto &F : *M)
1472    FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1473  return true;
1474}
1475
1476std::error_code SampleProfileReaderBinary::readSummaryEntry(
1477    std::vector<ProfileSummaryEntry> &Entries) {
1478  auto Cutoff = readNumber<uint64_t>();
1479  if (std::error_code EC = Cutoff.getError())
1480    return EC;
1481
1482  auto MinBlockCount = readNumber<uint64_t>();
1483  if (std::error_code EC = MinBlockCount.getError())
1484    return EC;
1485
1486  auto NumBlocks = readNumber<uint64_t>();
1487  if (std::error_code EC = NumBlocks.getError())
1488    return EC;
1489
1490  Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1491  return sampleprof_error::success;
1492}
1493
1494std::error_code SampleProfileReaderBinary::readSummary() {
1495  auto TotalCount = readNumber<uint64_t>();
1496  if (std::error_code EC = TotalCount.getError())
1497    return EC;
1498
1499  auto MaxBlockCount = readNumber<uint64_t>();
1500  if (std::error_code EC = MaxBlockCount.getError())
1501    return EC;
1502
1503  auto MaxFunctionCount = readNumber<uint64_t>();
1504  if (std::error_code EC = MaxFunctionCount.getError())
1505    return EC;
1506
1507  auto NumBlocks = readNumber<uint64_t>();
1508  if (std::error_code EC = NumBlocks.getError())
1509    return EC;
1510
1511  auto NumFunctions = readNumber<uint64_t>();
1512  if (std::error_code EC = NumFunctions.getError())
1513    return EC;
1514
1515  auto NumSummaryEntries = readNumber<uint64_t>();
1516  if (std::error_code EC = NumSummaryEntries.getError())
1517    return EC;
1518
1519  std::vector<ProfileSummaryEntry> Entries;
1520  for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1521    std::error_code EC = readSummaryEntry(Entries);
1522    if (EC != sampleprof_error::success)
1523      return EC;
1524  }
1525  Summary = std::make_unique<ProfileSummary>(
1526      ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1527      *MaxFunctionCount, *NumBlocks, *NumFunctions);
1528
1529  return sampleprof_error::success;
1530}
1531
1532bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1533  const uint8_t *Data =
1534      reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1535  uint64_t Magic = decodeULEB128(Data);
1536  return Magic == SPMagic();
1537}
1538
1539bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1540  const uint8_t *Data =
1541      reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1542  uint64_t Magic = decodeULEB128(Data);
1543  return Magic == SPMagic(SPF_Ext_Binary);
1544}
1545
1546bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1547  const uint8_t *Data =
1548      reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1549  uint64_t Magic = decodeULEB128(Data);
1550  return Magic == SPMagic(SPF_Compact_Binary);
1551}
1552
1553std::error_code SampleProfileReaderGCC::skipNextWord() {
1554  uint32_t dummy;
1555  if (!GcovBuffer.readInt(dummy))
1556    return sampleprof_error::truncated;
1557  return sampleprof_error::success;
1558}
1559
1560template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1561  if (sizeof(T) <= sizeof(uint32_t)) {
1562    uint32_t Val;
1563    if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1564      return static_cast<T>(Val);
1565  } else if (sizeof(T) <= sizeof(uint64_t)) {
1566    uint64_t Val;
1567    if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1568      return static_cast<T>(Val);
1569  }
1570
1571  std::error_code EC = sampleprof_error::malformed;
1572  reportError(0, EC.message());
1573  return EC;
1574}
1575
1576ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1577  StringRef Str;
1578  if (!GcovBuffer.readString(Str))
1579    return sampleprof_error::truncated;
1580  return Str;
1581}
1582
1583std::error_code SampleProfileReaderGCC::readHeader() {
1584  // Read the magic identifier.
1585  if (!GcovBuffer.readGCDAFormat())
1586    return sampleprof_error::unrecognized_format;
1587
1588  // Read the version number. Note - the GCC reader does not validate this
1589  // version, but the profile creator generates v704.
1590  GCOV::GCOVVersion version;
1591  if (!GcovBuffer.readGCOVVersion(version))
1592    return sampleprof_error::unrecognized_format;
1593
1594  if (version != GCOV::V407)
1595    return sampleprof_error::unsupported_version;
1596
1597  // Skip the empty integer.
1598  if (std::error_code EC = skipNextWord())
1599    return EC;
1600
1601  return sampleprof_error::success;
1602}
1603
1604std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1605  uint32_t Tag;
1606  if (!GcovBuffer.readInt(Tag))
1607    return sampleprof_error::truncated;
1608
1609  if (Tag != Expected)
1610    return sampleprof_error::malformed;
1611
1612  if (std::error_code EC = skipNextWord())
1613    return EC;
1614
1615  return sampleprof_error::success;
1616}
1617
1618std::error_code SampleProfileReaderGCC::readNameTable() {
1619  if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1620    return EC;
1621
1622  uint32_t Size;
1623  if (!GcovBuffer.readInt(Size))
1624    return sampleprof_error::truncated;
1625
1626  for (uint32_t I = 0; I < Size; ++I) {
1627    StringRef Str;
1628    if (!GcovBuffer.readString(Str))
1629      return sampleprof_error::truncated;
1630    Names.push_back(std::string(Str));
1631  }
1632
1633  return sampleprof_error::success;
1634}
1635
1636std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1637  if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1638    return EC;
1639
1640  uint32_t NumFunctions;
1641  if (!GcovBuffer.readInt(NumFunctions))
1642    return sampleprof_error::truncated;
1643
1644  InlineCallStack Stack;
1645  for (uint32_t I = 0; I < NumFunctions; ++I)
1646    if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1647      return EC;
1648
1649  computeSummary();
1650  return sampleprof_error::success;
1651}
1652
1653std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1654    const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1655  uint64_t HeadCount = 0;
1656  if (InlineStack.size() == 0)
1657    if (!GcovBuffer.readInt64(HeadCount))
1658      return sampleprof_error::truncated;
1659
1660  uint32_t NameIdx;
1661  if (!GcovBuffer.readInt(NameIdx))
1662    return sampleprof_error::truncated;
1663
1664  StringRef Name(Names[NameIdx]);
1665
1666  uint32_t NumPosCounts;
1667  if (!GcovBuffer.readInt(NumPosCounts))
1668    return sampleprof_error::truncated;
1669
1670  uint32_t NumCallsites;
1671  if (!GcovBuffer.readInt(NumCallsites))
1672    return sampleprof_error::truncated;
1673
1674  FunctionSamples *FProfile = nullptr;
1675  if (InlineStack.size() == 0) {
1676    // If this is a top function that we have already processed, do not
1677    // update its profile again.  This happens in the presence of
1678    // function aliases.  Since these aliases share the same function
1679    // body, there will be identical replicated profiles for the
1680    // original function.  In this case, we simply not bother updating
1681    // the profile of the original function.
1682    FProfile = &Profiles[Name];
1683    FProfile->addHeadSamples(HeadCount);
1684    if (FProfile->getTotalSamples() > 0)
1685      Update = false;
1686  } else {
1687    // Otherwise, we are reading an inlined instance. The top of the
1688    // inline stack contains the profile of the caller. Insert this
1689    // callee in the caller's CallsiteMap.
1690    FunctionSamples *CallerProfile = InlineStack.front();
1691    uint32_t LineOffset = Offset >> 16;
1692    uint32_t Discriminator = Offset & 0xffff;
1693    FProfile = &CallerProfile->functionSamplesAt(
1694        LineLocation(LineOffset, Discriminator))[std::string(Name)];
1695  }
1696  FProfile->setName(Name);
1697
1698  for (uint32_t I = 0; I < NumPosCounts; ++I) {
1699    uint32_t Offset;
1700    if (!GcovBuffer.readInt(Offset))
1701      return sampleprof_error::truncated;
1702
1703    uint32_t NumTargets;
1704    if (!GcovBuffer.readInt(NumTargets))
1705      return sampleprof_error::truncated;
1706
1707    uint64_t Count;
1708    if (!GcovBuffer.readInt64(Count))
1709      return sampleprof_error::truncated;
1710
1711    // The line location is encoded in the offset as:
1712    //   high 16 bits: line offset to the start of the function.
1713    //   low 16 bits: discriminator.
1714    uint32_t LineOffset = Offset >> 16;
1715    uint32_t Discriminator = Offset & 0xffff;
1716
1717    InlineCallStack NewStack;
1718    NewStack.push_back(FProfile);
1719    llvm::append_range(NewStack, InlineStack);
1720    if (Update) {
1721      // Walk up the inline stack, adding the samples on this line to
1722      // the total sample count of the callers in the chain.
1723      for (auto *CallerProfile : NewStack)
1724        CallerProfile->addTotalSamples(Count);
1725
1726      // Update the body samples for the current profile.
1727      FProfile->addBodySamples(LineOffset, Discriminator, Count);
1728    }
1729
1730    // Process the list of functions called at an indirect call site.
1731    // These are all the targets that a function pointer (or virtual
1732    // function) resolved at runtime.
1733    for (uint32_t J = 0; J < NumTargets; J++) {
1734      uint32_t HistVal;
1735      if (!GcovBuffer.readInt(HistVal))
1736        return sampleprof_error::truncated;
1737
1738      if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1739        return sampleprof_error::malformed;
1740
1741      uint64_t TargetIdx;
1742      if (!GcovBuffer.readInt64(TargetIdx))
1743        return sampleprof_error::truncated;
1744      StringRef TargetName(Names[TargetIdx]);
1745
1746      uint64_t TargetCount;
1747      if (!GcovBuffer.readInt64(TargetCount))
1748        return sampleprof_error::truncated;
1749
1750      if (Update)
1751        FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1752                                         TargetName, TargetCount);
1753    }
1754  }
1755
1756  // Process all the inlined callers into the current function. These
1757  // are all the callsites that were inlined into this function.
1758  for (uint32_t I = 0; I < NumCallsites; I++) {
1759    // The offset is encoded as:
1760    //   high 16 bits: line offset to the start of the function.
1761    //   low 16 bits: discriminator.
1762    uint32_t Offset;
1763    if (!GcovBuffer.readInt(Offset))
1764      return sampleprof_error::truncated;
1765    InlineCallStack NewStack;
1766    NewStack.push_back(FProfile);
1767    llvm::append_range(NewStack, InlineStack);
1768    if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1769      return EC;
1770  }
1771
1772  return sampleprof_error::success;
1773}
1774
1775/// Read a GCC AutoFDO profile.
1776///
1777/// This format is generated by the Linux Perf conversion tool at
1778/// https://github.com/google/autofdo.
1779std::error_code SampleProfileReaderGCC::readImpl() {
1780  assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1781  // Read the string table.
1782  if (std::error_code EC = readNameTable())
1783    return EC;
1784
1785  // Read the source profile.
1786  if (std::error_code EC = readFunctionProfiles())
1787    return EC;
1788
1789  return sampleprof_error::success;
1790}
1791
1792bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1793  StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1794  return Magic == "adcg*704";
1795}
1796
1797void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1798  // If the reader uses MD5 to represent string, we can't remap it because
1799  // we don't know what the original function names were.
1800  if (Reader.useMD5()) {
1801    Ctx.diagnose(DiagnosticInfoSampleProfile(
1802        Reader.getBuffer()->getBufferIdentifier(),
1803        "Profile data remapping cannot be applied to profile data "
1804        "in compact format (original mangled names are not available).",
1805        DS_Warning));
1806    return;
1807  }
1808
1809  // CSSPGO-TODO: Remapper is not yet supported.
1810  // We will need to remap the entire context string.
1811  assert(Remappings && "should be initialized while creating remapper");
1812  for (auto &Sample : Reader.getProfiles()) {
1813    DenseSet<StringRef> NamesInSample;
1814    Sample.second.findAllNames(NamesInSample);
1815    for (auto &Name : NamesInSample)
1816      if (auto Key = Remappings->insert(Name))
1817        NameMap.insert({Key, Name});
1818  }
1819
1820  RemappingApplied = true;
1821}
1822
1823std::optional<StringRef>
1824SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1825  if (auto Key = Remappings->lookup(Fname))
1826    return NameMap.lookup(Key);
1827  return std::nullopt;
1828}
1829
1830/// Prepare a memory buffer for the contents of \p Filename.
1831///
1832/// \returns an error code indicating the status of the buffer.
1833static ErrorOr<std::unique_ptr<MemoryBuffer>>
1834setupMemoryBuffer(const Twine &Filename) {
1835  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1836  if (std::error_code EC = BufferOrErr.getError())
1837    return EC;
1838  auto Buffer = std::move(BufferOrErr.get());
1839
1840  return std::move(Buffer);
1841}
1842
1843/// Create a sample profile reader based on the format of the input file.
1844///
1845/// \param Filename The file to open.
1846///
1847/// \param C The LLVM context to use to emit diagnostics.
1848///
1849/// \param P The FSDiscriminatorPass.
1850///
1851/// \param RemapFilename The file used for profile remapping.
1852///
1853/// \returns an error code indicating the status of the created reader.
1854ErrorOr<std::unique_ptr<SampleProfileReader>>
1855SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1856                            FSDiscriminatorPass P,
1857                            const std::string RemapFilename) {
1858  auto BufferOrError = setupMemoryBuffer(Filename);
1859  if (std::error_code EC = BufferOrError.getError())
1860    return EC;
1861  return create(BufferOrError.get(), C, P, RemapFilename);
1862}
1863
1864/// Create a sample profile remapper from the given input, to remap the
1865/// function names in the given profile data.
1866///
1867/// \param Filename The file to open.
1868///
1869/// \param Reader The profile reader the remapper is going to be applied to.
1870///
1871/// \param C The LLVM context to use to emit diagnostics.
1872///
1873/// \returns an error code indicating the status of the created reader.
1874ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1875SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1876                                           SampleProfileReader &Reader,
1877                                           LLVMContext &C) {
1878  auto BufferOrError = setupMemoryBuffer(Filename);
1879  if (std::error_code EC = BufferOrError.getError())
1880    return EC;
1881  return create(BufferOrError.get(), Reader, C);
1882}
1883
1884/// Create a sample profile remapper from the given input, to remap the
1885/// function names in the given profile data.
1886///
1887/// \param B The memory buffer to create the reader from (assumes ownership).
1888///
1889/// \param C The LLVM context to use to emit diagnostics.
1890///
1891/// \param Reader The profile reader the remapper is going to be applied to.
1892///
1893/// \returns an error code indicating the status of the created reader.
1894ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1895SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1896                                           SampleProfileReader &Reader,
1897                                           LLVMContext &C) {
1898  auto Remappings = std::make_unique<SymbolRemappingReader>();
1899  if (Error E = Remappings->read(*B)) {
1900    handleAllErrors(
1901        std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1902          C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1903                                                 ParseError.getLineNum(),
1904                                                 ParseError.getMessage()));
1905        });
1906    return sampleprof_error::malformed;
1907  }
1908
1909  return std::make_unique<SampleProfileReaderItaniumRemapper>(
1910      std::move(B), std::move(Remappings), Reader);
1911}
1912
1913/// Create a sample profile reader based on the format of the input data.
1914///
1915/// \param B The memory buffer to create the reader from (assumes ownership).
1916///
1917/// \param C The LLVM context to use to emit diagnostics.
1918///
1919/// \param P The FSDiscriminatorPass.
1920///
1921/// \param RemapFilename The file used for profile remapping.
1922///
1923/// \returns an error code indicating the status of the created reader.
1924ErrorOr<std::unique_ptr<SampleProfileReader>>
1925SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1926                            FSDiscriminatorPass P,
1927                            const std::string RemapFilename) {
1928  std::unique_ptr<SampleProfileReader> Reader;
1929  if (SampleProfileReaderRawBinary::hasFormat(*B))
1930    Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1931  else if (SampleProfileReaderExtBinary::hasFormat(*B))
1932    Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1933  else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1934    Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1935  else if (SampleProfileReaderGCC::hasFormat(*B))
1936    Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1937  else if (SampleProfileReaderText::hasFormat(*B))
1938    Reader.reset(new SampleProfileReaderText(std::move(B), C));
1939  else
1940    return sampleprof_error::unrecognized_format;
1941
1942  if (!RemapFilename.empty()) {
1943    auto ReaderOrErr =
1944        SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1945    if (std::error_code EC = ReaderOrErr.getError()) {
1946      std::string Msg = "Could not create remapper: " + EC.message();
1947      C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1948      return EC;
1949    }
1950    Reader->Remapper = std::move(ReaderOrErr.get());
1951  }
1952
1953  if (std::error_code EC = Reader->readHeader()) {
1954    return EC;
1955  }
1956
1957  Reader->setDiscriminatorMaskedBitFrom(P);
1958
1959  return std::move(Reader);
1960}
1961
1962// For text and GCC file formats, we compute the summary after reading the
1963// profile. Binary format has the profile summary in its header.
1964void SampleProfileReader::computeSummary() {
1965  SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1966  Summary = Builder.computeSummaryForProfiles(Profiles);
1967}
1968