1//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements hierarchical time profiler.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Support/TimeProfiler.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/ADT/StringMap.h"
16#include "llvm/Support/CommandLine.h"
17#include "llvm/Support/JSON.h"
18#include "llvm/Support/Path.h"
19#include "llvm/Support/Process.h"
20#include "llvm/Support/Threading.h"
21#include <algorithm>
22#include <cassert>
23#include <chrono>
24#include <mutex>
25#include <string>
26#include <vector>
27
28using namespace std::chrono;
29using namespace llvm;
30
31static std::mutex Mu;
32// List of all instances
33static std::vector<TimeTraceProfiler *>
34    ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
35// Per Thread instance
36static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
37
38TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
39  return TimeTraceProfilerInstance;
40}
41
42typedef duration<steady_clock::rep, steady_clock::period> DurationType;
43typedef time_point<steady_clock> TimePointType;
44typedef std::pair<size_t, DurationType> CountAndDurationType;
45typedef std::pair<std::string, CountAndDurationType>
46    NameAndCountAndDurationType;
47
48namespace {
49struct Entry {
50  const TimePointType Start;
51  TimePointType End;
52  const std::string Name;
53  const std::string Detail;
54
55  Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
56      : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
57        Detail(std::move(Dt)) {}
58
59  // Calculate timings for FlameGraph. Cast time points to microsecond precision
60  // rather than casting duration. This avoid truncation issues causing inner
61  // scopes overruning outer scopes.
62  steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
63    return (time_point_cast<microseconds>(Start) -
64            time_point_cast<microseconds>(StartTime))
65        .count();
66  }
67
68  steady_clock::rep getFlameGraphDurUs() const {
69    return (time_point_cast<microseconds>(End) -
70            time_point_cast<microseconds>(Start))
71        .count();
72  }
73};
74} // namespace
75
76struct llvm::TimeTraceProfiler {
77  TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
78      : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
79        ProcName(ProcName), Pid(sys::Process::getProcessId()),
80        Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
81    llvm::get_thread_name(ThreadName);
82  }
83
84  void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
85    Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
86                       Detail());
87  }
88
89  void end() {
90    assert(!Stack.empty() && "Must call begin() first");
91    Entry &E = Stack.back();
92    E.End = steady_clock::now();
93
94    // Check that end times monotonically increase.
95    assert((Entries.empty() ||
96            (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
97             Entries.back().getFlameGraphStartUs(StartTime) +
98                 Entries.back().getFlameGraphDurUs())) &&
99           "TimeProfiler scope ended earlier than previous scope");
100
101    // Calculate duration at full precision for overall counts.
102    DurationType Duration = E.End - E.Start;
103
104    // Only include sections longer or equal to TimeTraceGranularity msec.
105    if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
106      Entries.emplace_back(E);
107
108    // Track total time taken by each "name", but only the topmost levels of
109    // them; e.g. if there's a template instantiation that instantiates other
110    // templates from within, we only want to add the topmost one. "topmost"
111    // happens to be the ones that don't have any currently open entries above
112    // itself.
113    if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
114          return Val.Name == E.Name;
115        }) == Stack.rend()) {
116      auto &CountAndTotal = CountAndTotalPerName[E.Name];
117      CountAndTotal.first++;
118      CountAndTotal.second += Duration;
119    }
120
121    Stack.pop_back();
122  }
123
124  // Write events from this TimeTraceProfilerInstance and
125  // ThreadTimeTraceProfilerInstances.
126  void write(raw_pwrite_stream &OS) {
127    // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
128    std::lock_guard<std::mutex> Lock(Mu);
129    assert(Stack.empty() &&
130           "All profiler sections should be ended when calling write");
131    assert(llvm::all_of(ThreadTimeTraceProfilerInstances,
132                        [](const auto &TTP) { return TTP->Stack.empty(); }) &&
133           "All profiler sections should be ended when calling write");
134
135    json::OStream J(OS);
136    J.objectBegin();
137    J.attributeBegin("traceEvents");
138    J.arrayBegin();
139
140    // Emit all events for the main flame graph.
141    auto writeEvent = [&](const auto &E, uint64_t Tid) {
142      auto StartUs = E.getFlameGraphStartUs(StartTime);
143      auto DurUs = E.getFlameGraphDurUs();
144
145      J.object([&] {
146        J.attribute("pid", Pid);
147        J.attribute("tid", int64_t(Tid));
148        J.attribute("ph", "X");
149        J.attribute("ts", StartUs);
150        J.attribute("dur", DurUs);
151        J.attribute("name", E.Name);
152        if (!E.Detail.empty()) {
153          J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
154        }
155      });
156    };
157    for (const Entry &E : Entries)
158      writeEvent(E, this->Tid);
159    for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
160      for (const Entry &E : TTP->Entries)
161        writeEvent(E, TTP->Tid);
162
163    // Emit totals by section name as additional "thread" events, sorted from
164    // longest one.
165    // Find highest used thread id.
166    uint64_t MaxTid = this->Tid;
167    for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
168      MaxTid = std::max(MaxTid, TTP->Tid);
169
170    // Combine all CountAndTotalPerName from threads into one.
171    StringMap<CountAndDurationType> AllCountAndTotalPerName;
172    auto combineStat = [&](const auto &Stat) {
173      StringRef Key = Stat.getKey();
174      auto Value = Stat.getValue();
175      auto &CountAndTotal = AllCountAndTotalPerName[Key];
176      CountAndTotal.first += Value.first;
177      CountAndTotal.second += Value.second;
178    };
179    for (const auto &Stat : CountAndTotalPerName)
180      combineStat(Stat);
181    for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
182      for (const auto &Stat : TTP->CountAndTotalPerName)
183        combineStat(Stat);
184
185    std::vector<NameAndCountAndDurationType> SortedTotals;
186    SortedTotals.reserve(AllCountAndTotalPerName.size());
187    for (const auto &Total : AllCountAndTotalPerName)
188      SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
189
190    llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
191                                const NameAndCountAndDurationType &B) {
192      return A.second.second > B.second.second;
193    });
194
195    // Report totals on separate threads of tracing file.
196    uint64_t TotalTid = MaxTid + 1;
197    for (const NameAndCountAndDurationType &Total : SortedTotals) {
198      auto DurUs = duration_cast<microseconds>(Total.second.second).count();
199      auto Count = AllCountAndTotalPerName[Total.first].first;
200
201      J.object([&] {
202        J.attribute("pid", Pid);
203        J.attribute("tid", int64_t(TotalTid));
204        J.attribute("ph", "X");
205        J.attribute("ts", 0);
206        J.attribute("dur", DurUs);
207        J.attribute("name", "Total " + Total.first);
208        J.attributeObject("args", [&] {
209          J.attribute("count", int64_t(Count));
210          J.attribute("avg ms", int64_t(DurUs / Count / 1000));
211        });
212      });
213
214      ++TotalTid;
215    }
216
217    auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
218                                  StringRef arg) {
219      J.object([&] {
220        J.attribute("cat", "");
221        J.attribute("pid", Pid);
222        J.attribute("tid", int64_t(Tid));
223        J.attribute("ts", 0);
224        J.attribute("ph", "M");
225        J.attribute("name", Name);
226        J.attributeObject("args", [&] { J.attribute("name", arg); });
227      });
228    };
229
230    writeMetadataEvent("process_name", Tid, ProcName);
231    writeMetadataEvent("thread_name", Tid, ThreadName);
232    for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
233      writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
234
235    J.arrayEnd();
236    J.attributeEnd();
237
238    // Emit the absolute time when this TimeProfiler started.
239    // This can be used to combine the profiling data from
240    // multiple processes and preserve actual time intervals.
241    J.attribute("beginningOfTime",
242                time_point_cast<microseconds>(BeginningOfTime)
243                    .time_since_epoch()
244                    .count());
245
246    J.objectEnd();
247  }
248
249  SmallVector<Entry, 16> Stack;
250  SmallVector<Entry, 128> Entries;
251  StringMap<CountAndDurationType> CountAndTotalPerName;
252  const time_point<system_clock> BeginningOfTime;
253  const TimePointType StartTime;
254  const std::string ProcName;
255  const sys::Process::Pid Pid;
256  SmallString<0> ThreadName;
257  const uint64_t Tid;
258
259  // Minimum time granularity (in microseconds)
260  const unsigned TimeTraceGranularity;
261};
262
263void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
264                                       StringRef ProcName) {
265  assert(TimeTraceProfilerInstance == nullptr &&
266         "Profiler should not be initialized");
267  TimeTraceProfilerInstance = new TimeTraceProfiler(
268      TimeTraceGranularity, llvm::sys::path::filename(ProcName));
269}
270
271// Removes all TimeTraceProfilerInstances.
272// Called from main thread.
273void llvm::timeTraceProfilerCleanup() {
274  delete TimeTraceProfilerInstance;
275  std::lock_guard<std::mutex> Lock(Mu);
276  for (auto TTP : ThreadTimeTraceProfilerInstances)
277    delete TTP;
278  ThreadTimeTraceProfilerInstances.clear();
279}
280
281// Finish TimeTraceProfilerInstance on a worker thread.
282// This doesn't remove the instance, just moves the pointer to global vector.
283void llvm::timeTraceProfilerFinishThread() {
284  std::lock_guard<std::mutex> Lock(Mu);
285  ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
286  TimeTraceProfilerInstance = nullptr;
287}
288
289void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
290  assert(TimeTraceProfilerInstance != nullptr &&
291         "Profiler object can't be null");
292  TimeTraceProfilerInstance->write(OS);
293}
294
295Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
296                                   StringRef FallbackFileName) {
297  assert(TimeTraceProfilerInstance != nullptr &&
298         "Profiler object can't be null");
299
300  std::string Path = PreferredFileName.str();
301  if (Path.empty()) {
302    Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
303    Path += ".time-trace";
304  }
305
306  std::error_code EC;
307  raw_fd_ostream OS(Path, EC, sys::fs::OF_Text);
308  if (EC)
309    return createStringError(EC, "Could not open " + Path);
310
311  timeTraceProfilerWrite(OS);
312  return Error::success();
313}
314
315void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
316  if (TimeTraceProfilerInstance != nullptr)
317    TimeTraceProfilerInstance->begin(std::string(Name),
318                                     [&]() { return std::string(Detail); });
319}
320
321void llvm::timeTraceProfilerBegin(StringRef Name,
322                                  llvm::function_ref<std::string()> Detail) {
323  if (TimeTraceProfilerInstance != nullptr)
324    TimeTraceProfilerInstance->begin(std::string(Name), Detail);
325}
326
327void llvm::timeTraceProfilerEnd() {
328  if (TimeTraceProfilerInstance != nullptr)
329    TimeTraceProfilerInstance->end();
330}
331