1//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Defines the XRay Profile class representing the latency profile generated by
10// XRay's profiling mode.
11//
12//===----------------------------------------------------------------------===//
13#include "llvm/XRay/Profile.h"
14
15#include "llvm/Support/DataExtractor.h"
16#include "llvm/Support/Error.h"
17#include "llvm/Support/FileSystem.h"
18#include "llvm/XRay/Trace.h"
19#include <deque>
20#include <memory>
21
22namespace llvm {
23namespace xray {
24
25Profile::Profile(const Profile &O) {
26  // We need to re-create all the tries from the original (O), into the current
27  // Profile being initialized, through the Block instances we see.
28  for (const auto &Block : O) {
29    Blocks.push_back({Block.Thread, {}});
30    auto &B = Blocks.back();
31    for (const auto &PathData : Block.PathData)
32      B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
33                            PathData.second});
34  }
35}
36
37Profile &Profile::operator=(const Profile &O) {
38  Profile P = O;
39  *this = std::move(P);
40  return *this;
41}
42
43namespace {
44
45struct BlockHeader {
46  uint32_t Size;
47  uint32_t Number;
48  uint64_t Thread;
49};
50
51static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
52                                             uint64_t &Offset) {
53  BlockHeader H;
54  uint64_t CurrentOffset = Offset;
55  H.Size = Extractor.getU32(&Offset);
56  if (Offset == CurrentOffset)
57    return make_error<StringError>(
58        Twine("Error parsing block header size at offset '") +
59            Twine(CurrentOffset) + "'",
60        std::make_error_code(std::errc::invalid_argument));
61  CurrentOffset = Offset;
62  H.Number = Extractor.getU32(&Offset);
63  if (Offset == CurrentOffset)
64    return make_error<StringError>(
65        Twine("Error parsing block header number at offset '") +
66            Twine(CurrentOffset) + "'",
67        std::make_error_code(std::errc::invalid_argument));
68  CurrentOffset = Offset;
69  H.Thread = Extractor.getU64(&Offset);
70  if (Offset == CurrentOffset)
71    return make_error<StringError>(
72        Twine("Error parsing block header thread id at offset '") +
73            Twine(CurrentOffset) + "'",
74        std::make_error_code(std::errc::invalid_argument));
75  return H;
76}
77
78static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
79                                                       uint64_t &Offset) {
80  // We're reading a sequence of int32_t's until we find a 0.
81  std::vector<Profile::FuncID> Path;
82  auto CurrentOffset = Offset;
83  int32_t FuncId;
84  do {
85    FuncId = Extractor.getSigned(&Offset, 4);
86    if (CurrentOffset == Offset)
87      return make_error<StringError>(
88          Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
89          std::make_error_code(std::errc::invalid_argument));
90    CurrentOffset = Offset;
91    Path.push_back(FuncId);
92  } while (FuncId != 0);
93  return std::move(Path);
94}
95
96static Expected<Profile::Data> readData(DataExtractor &Extractor,
97                                        uint64_t &Offset) {
98  // We expect a certain number of elements for Data:
99  //   - A 64-bit CallCount
100  //   - A 64-bit CumulativeLocalTime counter
101  Profile::Data D;
102  auto CurrentOffset = Offset;
103  D.CallCount = Extractor.getU64(&Offset);
104  if (CurrentOffset == Offset)
105    return make_error<StringError>(
106        Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
107            "'",
108        std::make_error_code(std::errc::invalid_argument));
109  CurrentOffset = Offset;
110  D.CumulativeLocalTime = Extractor.getU64(&Offset);
111  if (CurrentOffset == Offset)
112    return make_error<StringError>(
113        Twine("Error parsing cumulative local time at offset '") +
114            Twine(CurrentOffset) + "'",
115        std::make_error_code(std::errc::invalid_argument));
116  return D;
117}
118
119} // namespace
120
121Error Profile::addBlock(Block &&B) {
122  if (B.PathData.empty())
123    return make_error<StringError>(
124        "Block may not have empty path data.",
125        std::make_error_code(std::errc::invalid_argument));
126
127  Blocks.emplace_back(std::move(B));
128  return Error::success();
129}
130
131Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
132  auto It = PathIDMap.find(P);
133  if (It == PathIDMap.end())
134    return make_error<StringError>(
135        Twine("PathID not found: ") + Twine(P),
136        std::make_error_code(std::errc::invalid_argument));
137  std::vector<Profile::FuncID> Path;
138  for (auto Node = It->second; Node; Node = Node->Caller)
139    Path.push_back(Node->Func);
140  return std::move(Path);
141}
142
143Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
144  if (P.empty())
145    return 0;
146
147  auto RootToLeafPath = reverse(P);
148
149  // Find the root.
150  auto It = RootToLeafPath.begin();
151  auto PathRoot = *It++;
152  auto RootIt =
153      find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
154
155  // If we've not seen this root before, remember it.
156  TrieNode *Node = nullptr;
157  if (RootIt == Roots.end()) {
158    NodeStorage.emplace_back();
159    Node = &NodeStorage.back();
160    Node->Func = PathRoot;
161    Roots.push_back(Node);
162  } else {
163    Node = *RootIt;
164  }
165
166  // Now traverse the path, re-creating if necessary.
167  while (It != RootToLeafPath.end()) {
168    auto NodeFuncID = *It++;
169    auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
170      return N->Func == NodeFuncID;
171    });
172    if (CalleeIt == Node->Callees.end()) {
173      NodeStorage.emplace_back();
174      auto NewNode = &NodeStorage.back();
175      NewNode->Func = NodeFuncID;
176      NewNode->Caller = Node;
177      Node->Callees.push_back(NewNode);
178      Node = NewNode;
179    } else {
180      Node = *CalleeIt;
181    }
182  }
183
184  // At this point, Node *must* be pointing at the leaf.
185  assert(Node->Func == P.front());
186  if (Node->ID == 0) {
187    Node->ID = NextID++;
188    PathIDMap.insert({Node->ID, Node});
189  }
190  return Node->ID;
191}
192
193Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
194  Profile Merged;
195  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
196  using PathDataMapPtr = std::unique_ptr<PathDataMap>;
197  using PathDataVector = decltype(Profile::Block::PathData);
198  using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
199  ThreadProfileIndexMap ThreadProfileIndex;
200
201  for (const auto &P : {std::ref(L), std::ref(R)})
202    for (const auto &Block : P.get()) {
203      ThreadProfileIndexMap::iterator It;
204      std::tie(It, std::ignore) = ThreadProfileIndex.insert(
205          {Block.Thread, PathDataMapPtr{new PathDataMap()}});
206      for (const auto &PathAndData : Block.PathData) {
207        auto &PathID = PathAndData.first;
208        auto &Data = PathAndData.second;
209        auto NewPathID =
210            Merged.internPath(cantFail(P.get().expandPath(PathID)));
211        PathDataMap::iterator PathDataIt;
212        bool Inserted;
213        std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
214        if (!Inserted) {
215          auto &ExistingData = PathDataIt->second;
216          ExistingData.CallCount += Data.CallCount;
217          ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
218        }
219      }
220    }
221
222  for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
223    PathDataVector PathAndData;
224    PathAndData.reserve(IndexedThreadBlock.second->size());
225    copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
226    cantFail(
227        Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
228  }
229  return Merged;
230}
231
232Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
233  Profile Merged;
234  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
235  PathDataMap PathData;
236  using PathDataVector = decltype(Profile::Block::PathData);
237  for (const auto &P : {std::ref(L), std::ref(R)})
238    for (const auto &Block : P.get())
239      for (const auto &PathAndData : Block.PathData) {
240        auto &PathId = PathAndData.first;
241        auto &Data = PathAndData.second;
242        auto NewPathID =
243            Merged.internPath(cantFail(P.get().expandPath(PathId)));
244        PathDataMap::iterator PathDataIt;
245        bool Inserted;
246        std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
247        if (!Inserted) {
248          auto &ExistingData = PathDataIt->second;
249          ExistingData.CallCount += Data.CallCount;
250          ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
251        }
252      }
253
254  // In the end there's a single Block, for thread 0.
255  PathDataVector Block;
256  Block.reserve(PathData.size());
257  copy(PathData, std::back_inserter(Block));
258  cantFail(Merged.addBlock({0, std::move(Block)}));
259  return Merged;
260}
261
262Expected<Profile> loadProfile(StringRef Filename) {
263  Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
264  if (!FdOrErr)
265    return FdOrErr.takeError();
266
267  uint64_t FileSize;
268  if (auto EC = sys::fs::file_size(Filename, FileSize))
269    return make_error<StringError>(
270        Twine("Cannot get filesize of '") + Filename + "'", EC);
271
272  std::error_code EC;
273  sys::fs::mapped_file_region MappedFile(
274      *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
275      EC);
276  sys::fs::closeFile(*FdOrErr);
277  if (EC)
278    return make_error<StringError>(
279        Twine("Cannot mmap profile '") + Filename + "'", EC);
280  StringRef Data(MappedFile.data(), MappedFile.size());
281
282  Profile P;
283  uint64_t Offset = 0;
284  DataExtractor Extractor(Data, true, 8);
285
286  // For each block we get from the file:
287  while (Offset != MappedFile.size()) {
288    auto HeaderOrError = readBlockHeader(Extractor, Offset);
289    if (!HeaderOrError)
290      return HeaderOrError.takeError();
291
292    // TODO: Maybe store this header information for each block, even just for
293    // debugging?
294    const auto &Header = HeaderOrError.get();
295
296    // Read in the path data.
297    auto PathOrError = readPath(Extractor, Offset);
298    if (!PathOrError)
299      return PathOrError.takeError();
300    const auto &Path = PathOrError.get();
301
302    // For each path we encounter, we should intern it to get a PathID.
303    auto DataOrError = readData(Extractor, Offset);
304    if (!DataOrError)
305      return DataOrError.takeError();
306    auto &Data = DataOrError.get();
307
308    if (auto E =
309            P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
310                                      {{P.internPath(Path), std::move(Data)}}}))
311      return std::move(E);
312  }
313
314  return P;
315}
316
317namespace {
318
319struct StackEntry {
320  uint64_t Timestamp;
321  Profile::FuncID FuncId;
322};
323
324} // namespace
325
326Expected<Profile> profileFromTrace(const Trace &T) {
327  Profile P;
328
329  // The implementation of the algorithm re-creates the execution of
330  // the functions based on the trace data. To do this, we set up a number of
331  // data structures to track the execution context of every thread in the
332  // Trace.
333  DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
334  DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
335      ThreadPathData;
336
337  //  We then do a pass through the Trace to account data on a per-thread-basis.
338  for (const auto &E : T) {
339    auto &TSD = ThreadStacks[E.TId];
340    switch (E.Type) {
341    case RecordTypes::ENTER:
342    case RecordTypes::ENTER_ARG:
343
344      // Push entries into the function call stack.
345      TSD.push_back({E.TSC, E.FuncId});
346      break;
347
348    case RecordTypes::EXIT:
349    case RecordTypes::TAIL_EXIT:
350
351      // Exits cause some accounting to happen, based on the state of the stack.
352      // For each function we pop off the stack, we take note of the path and
353      // record the cumulative state for this path. As we're doing this, we
354      // intern the path into the Profile.
355      while (!TSD.empty()) {
356        auto Top = TSD.back();
357        auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
358        SmallVector<Profile::FuncID, 16> Path;
359        transform(reverse(TSD), std::back_inserter(Path),
360                  std::mem_fn(&StackEntry::FuncId));
361        auto InternedPath = P.internPath(Path);
362        auto &TPD = ThreadPathData[E.TId][InternedPath];
363        ++TPD.CallCount;
364        TPD.CumulativeLocalTime += FunctionLocalTime;
365        TSD.pop_back();
366
367        // If we've matched the corresponding entry event for this function,
368        // then we exit the loop.
369        if (Top.FuncId == E.FuncId)
370          break;
371
372        // FIXME: Consider the intermediate times and the cumulative tree time
373        // as well.
374      }
375
376      break;
377
378    case RecordTypes::CUSTOM_EVENT:
379    case RecordTypes::TYPED_EVENT:
380      // TODO: Support an extension point to allow handling of custom and typed
381      // events in profiles.
382      break;
383    }
384  }
385
386  // Once we've gone through the Trace, we now create one Block per thread in
387  // the Profile.
388  for (const auto &ThreadPaths : ThreadPathData) {
389    const auto &TID = ThreadPaths.first;
390    const auto &PathsData = ThreadPaths.second;
391    if (auto E = P.addBlock({
392            TID,
393            std::vector<std::pair<Profile::PathID, Profile::Data>>(
394                PathsData.begin(), PathsData.end()),
395        }))
396      return std::move(E);
397  }
398
399  return P;
400}
401
402} // namespace xray
403} // namespace llvm
404