1345153Sdim#ifndef KMP_STATS_H
2345153Sdim#define KMP_STATS_H
3345153Sdim
4345153Sdim/** @file kmp_stats.h
5345153Sdim * Functions for collecting statistics.
6345153Sdim */
7345153Sdim
8345153Sdim//===----------------------------------------------------------------------===//
9345153Sdim//
10353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
11353358Sdim// See https://llvm.org/LICENSE.txt for license information.
12353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
13345153Sdim//
14345153Sdim//===----------------------------------------------------------------------===//
15345153Sdim
16345153Sdim#include "kmp_config.h"
17345153Sdim#include "kmp_debug.h"
18345153Sdim
19345153Sdim#if KMP_STATS_ENABLED
20345153Sdim/* Statistics accumulator.
21345153Sdim   Accumulates number of samples and computes min, max, mean, standard deviation
22345153Sdim   on the fly.
23345153Sdim
24345153Sdim   Online variance calculation algorithm from
25345153Sdim   http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
26345153Sdim */
27345153Sdim
28345153Sdim#include "kmp_stats_timing.h"
29345153Sdim#include <limits>
30345153Sdim#include <math.h>
31345153Sdim#include <new> // placement new
32345153Sdim#include <stdint.h>
33345153Sdim#include <string>
34345153Sdim#include <vector>
35345153Sdim
36345153Sdim/* Enable developer statistics here if you want them. They are more detailed
37345153Sdim   than is useful for application characterisation and are intended for the
38345153Sdim   runtime library developer. */
39345153Sdim#define KMP_DEVELOPER_STATS 0
40345153Sdim
41345153Sdim/* Enable/Disable histogram output */
42345153Sdim#define KMP_STATS_HIST 0
43345153Sdim
44345153Sdim/*!
45345153Sdim * @ingroup STATS_GATHERING
46345153Sdim * \brief flags to describe the statistic (timer or counter)
47345153Sdim *
48345153Sdim */
49345153Sdimenum stats_flags_e {
50345153Sdim  noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic
51345153Sdim  onlyInMaster = 1 << 1, //!< statistic is valid only for master
52345153Sdim  noUnits = 1 << 2, //!< statistic doesn't need units printed next to it
53345153Sdim  notInMaster = 1 << 3, //!< statistic is valid only for non-master threads
54345153Sdim  logEvent = 1 << 4 //!< statistic can be logged on the event timeline when
55345153Sdim  //! KMP_STATS_EVENTS is on (valid only for timers)
56345153Sdim};
57345153Sdim
58345153Sdim/*!
59345153Sdim * @ingroup STATS_GATHERING
60345153Sdim * \brief the states which a thread can be in
61345153Sdim *
62345153Sdim */
63345153Sdimenum stats_state_e {
64345153Sdim  IDLE,
65345153Sdim  SERIAL_REGION,
66345153Sdim  FORK_JOIN_BARRIER,
67345153Sdim  PLAIN_BARRIER,
68345153Sdim  TASKWAIT,
69345153Sdim  TASKYIELD,
70345153Sdim  TASKGROUP,
71345153Sdim  IMPLICIT_TASK,
72353358Sdim  EXPLICIT_TASK,
73353358Sdim  TEAMS_REGION
74345153Sdim};
75345153Sdim
76345153Sdim/*!
77345153Sdim * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
78345153Sdim *
79345153Sdim * @param macro a user defined macro that takes three arguments -
80345153Sdim * macro(COUNTER_NAME, flags, arg)
81345153Sdim * @param arg a user defined argument to send to the user defined macro
82345153Sdim *
83345153Sdim * \details A counter counts the occurrence of some event. Each thread
84345153Sdim * accumulates its own count, at the end of execution the counts are aggregated
85345153Sdim * treating each thread as a separate measurement. (Unless onlyInMaster is set,
86345153Sdim * in which case there's only a single measurement). The min,mean,max are
87345153Sdim * therefore the values for the threads. Adding the counter here and then
88345153Sdim * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you
89345153Sdim * need to do. All of the tables and printing is generated from this macro.
90345153Sdim * Format is "macro(name, flags, arg)"
91345153Sdim *
92345153Sdim * @ingroup STATS_GATHERING
93345153Sdim */
94345153Sdim// clang-format off
95345153Sdim#define KMP_FOREACH_COUNTER(macro, arg)                                        \
96345153Sdim  macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg)   \
97345153Sdim  macro(OMP_NESTED_PARALLEL, 0, arg)                                           \
98345153Sdim  macro(OMP_LOOP_STATIC, 0, arg)                                               \
99345153Sdim  macro(OMP_LOOP_STATIC_STEAL, 0, arg)                                         \
100345153Sdim  macro(OMP_LOOP_DYNAMIC, 0, arg)                                              \
101345153Sdim  macro(OMP_DISTRIBUTE, 0, arg)                                                \
102345153Sdim  macro(OMP_BARRIER, 0, arg)                                                   \
103345153Sdim  macro(OMP_CRITICAL, 0, arg)                                                  \
104345153Sdim  macro(OMP_SINGLE, 0, arg)                                                    \
105345153Sdim  macro(OMP_MASTER, 0, arg)                                                    \
106345153Sdim  macro(OMP_TEAMS, 0, arg)                                                     \
107345153Sdim  macro(OMP_set_lock, 0, arg)                                                  \
108345153Sdim  macro(OMP_test_lock, 0, arg)                                                 \
109345153Sdim  macro(REDUCE_wait, 0, arg)                                                   \
110345153Sdim  macro(REDUCE_nowait, 0, arg)                                                 \
111345153Sdim  macro(OMP_TASKYIELD, 0, arg)                                                 \
112345153Sdim  macro(OMP_TASKLOOP, 0, arg)                                                  \
113345153Sdim  macro(TASK_executed, 0, arg)                                                 \
114345153Sdim  macro(TASK_cancelled, 0, arg)                                                \
115345153Sdim  macro(TASK_stolen, 0, arg)
116345153Sdim// clang-format on
117345153Sdim
118345153Sdim/*!
119345153Sdim * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
120345153Sdim *
121345153Sdim * @param macro a user defined macro that takes three arguments -
122345153Sdim * macro(TIMER_NAME, flags, arg)
123345153Sdim * @param arg a user defined argument to send to the user defined macro
124345153Sdim *
125345153Sdim * \details A timer collects multiple samples of some count in each thread and
126345153Sdim * then finally aggregates all of the samples from all of the threads. For most
127345153Sdim * timers the printing code also provides an aggregation over the thread totals.
128345153Sdim * These are printed as TOTAL_foo. The count is normally a time (in ticks),
129345153Sdim * hence the name "timer". (But can be any value, so we use this for "number of
130345153Sdim * arguments passed to fork" as well). For timers the threads are not
131345153Sdim * significant, it's the individual observations that count, so the statistics
132345153Sdim * are at that level. Format is "macro(name, flags, arg)"
133345153Sdim *
134345153Sdim * @ingroup STATS_GATHERING2
135345153Sdim */
136345153Sdim// clang-format off
137345153Sdim#define KMP_FOREACH_TIMER(macro, arg)                                          \
138345153Sdim  macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg)                 \
139345153Sdim  macro (OMP_parallel, stats_flags_e::logEvent, arg)                           \
140345153Sdim  macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg)                  \
141353358Sdim  macro (OMP_teams, stats_flags_e::logEvent, arg)                              \
142353358Sdim  macro (OMP_teams_overhead, stats_flags_e::logEvent, arg)                     \
143345153Sdim  macro (OMP_loop_static, 0, arg)                                              \
144345153Sdim  macro (OMP_loop_static_scheduling, 0, arg)                                   \
145345153Sdim  macro (OMP_loop_dynamic, 0, arg)                                             \
146345153Sdim  macro (OMP_loop_dynamic_scheduling, 0, arg)                                  \
147353358Sdim  macro (OMP_distribute, 0, arg)                                               \
148353358Sdim  macro (OMP_distribute_scheduling, 0, arg)                                    \
149345153Sdim  macro (OMP_critical, 0, arg)                                                 \
150345153Sdim  macro (OMP_critical_wait, 0, arg)                                            \
151345153Sdim  macro (OMP_single, 0, arg)                                                   \
152345153Sdim  macro (OMP_master, 0, arg)                                                   \
153345153Sdim  macro (OMP_task_immediate, 0, arg)                                           \
154345153Sdim  macro (OMP_task_taskwait, 0, arg)                                            \
155345153Sdim  macro (OMP_task_taskyield, 0, arg)                                           \
156345153Sdim  macro (OMP_task_taskgroup, 0, arg)                                           \
157345153Sdim  macro (OMP_task_join_bar, 0, arg)                                            \
158345153Sdim  macro (OMP_task_plain_bar, 0, arg)                                           \
159345153Sdim  macro (OMP_taskloop_scheduling, 0, arg)                                      \
160345153Sdim  macro (OMP_plain_barrier, stats_flags_e::logEvent, arg)                      \
161345153Sdim  macro (OMP_idle, stats_flags_e::logEvent, arg)                               \
162345153Sdim  macro (OMP_fork_barrier, stats_flags_e::logEvent, arg)                       \
163345153Sdim  macro (OMP_join_barrier, stats_flags_e::logEvent, arg)                       \
164345153Sdim  macro (OMP_serial, stats_flags_e::logEvent, arg)                             \
165345153Sdim  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,  \
166345153Sdim         arg)                                                                  \
167345153Sdim  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal,   \
168345153Sdim         arg)                                                                  \
169345153Sdim  macro (OMP_loop_static_iterations,                                           \
170345153Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
171353358Sdim  macro (OMP_loop_static_total_iterations,                                     \
172353358Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
173345153Sdim  macro (OMP_loop_dynamic_iterations,                                          \
174345153Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
175353358Sdim  macro (OMP_loop_dynamic_total_iterations,                                    \
176353358Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
177353358Sdim  macro (OMP_distribute_iterations,                                            \
178353358Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
179345153Sdim  KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
180345153Sdim// clang-format on
181345153Sdim
182345153Sdim// OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either
183345153Sdim//                           initializing OpenMP or being created by a master)
184345153Sdim//                           until the thread is destroyed
185345153Sdim// OMP_parallel           -- Time thread spends executing work directly
186345153Sdim//                           within a #pragma omp parallel
187345153Sdim// OMP_parallel_overhead  -- Time thread spends setting up a parallel region
188345153Sdim// OMP_loop_static        -- Time thread spends executing loop iterations from
189345153Sdim//                           a statically scheduled loop
190345153Sdim// OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations
191345153Sdim//                               from a statically scheduled loop
192345153Sdim// OMP_loop_dynamic       -- Time thread spends executing loop iterations from
193345153Sdim//                           a dynamically scheduled loop
194345153Sdim// OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations
195345153Sdim//                                from a dynamically scheduled loop
196345153Sdim// OMP_critical           -- Time thread spends executing critical section
197345153Sdim// OMP_critical_wait      -- Time thread spends waiting to enter
198345153Sdim//                           a critcal seciton
199345153Sdim// OMP_single             -- Time spent executing a "single" region
200345153Sdim// OMP_master             -- Time spent executing a "master" region
201345153Sdim// OMP_task_immediate     -- Time spent executing non-deferred tasks
202345153Sdim// OMP_task_taskwait      -- Time spent executing tasks inside a taskwait
203345153Sdim//                           construct
204345153Sdim// OMP_task_taskyield     -- Time spent executing tasks inside a taskyield
205345153Sdim//                           construct
206345153Sdim// OMP_task_taskgroup     -- Time spent executing tasks inside a taskygroup
207345153Sdim//                           construct
208345153Sdim// OMP_task_join_bar      -- Time spent executing tasks inside a join barrier
209345153Sdim// OMP_task_plain_bar     -- Time spent executing tasks inside a barrier
210345153Sdim//                           construct
211345153Sdim// OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop
212345153Sdim//                            construct
213345153Sdim// OMP_plain_barrier      -- Time spent in a #pragma omp barrier construct or
214345153Sdim//                           inside implicit barrier at end of worksharing
215345153Sdim//                           construct
216345153Sdim// OMP_idle               -- Time worker threads spend waiting for next
217345153Sdim//                           parallel region
218345153Sdim// OMP_fork_barrier       -- Time spent in a the fork barrier surrounding a
219345153Sdim//                           parallel region
220345153Sdim// OMP_join_barrier       -- Time spent in a the join barrier surrounding a
221345153Sdim//                           parallel region
222345153Sdim// OMP_serial             -- Time thread zero spends executing serial code
223345153Sdim// OMP_set_numthreads     -- Values passed to omp_set_num_threads
224345153Sdim// OMP_PARALLEL_args      -- Number of arguments passed to a parallel region
225345153Sdim// OMP_loop_static_iterations -- Number of iterations thread is assigned for
226345153Sdim//                               statically scheduled loops
227345153Sdim// OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for
228345153Sdim//                                dynamically scheduled loops
229345153Sdim
230345153Sdim#if (KMP_DEVELOPER_STATS)
231345153Sdim// Timers which are of interest to runtime library developers, not end users.
232345153Sdim// These have to be explicitly enabled in addition to the other stats.
233345153Sdim
234345153Sdim// KMP_fork_barrier       -- time in __kmp_fork_barrier
235345153Sdim// KMP_join_barrier       -- time in __kmp_join_barrier
236345153Sdim// KMP_barrier            -- time in __kmp_barrier
237345153Sdim// KMP_end_split_barrier  -- time in __kmp_end_split_barrier
238345153Sdim// KMP_setup_icv_copy     -- time in __kmp_setup_icv_copy
239345153Sdim// KMP_icv_copy           -- start/stop timer for any ICV copying
240345153Sdim// KMP_linear_gather      -- time in __kmp_linear_barrier_gather
241345153Sdim// KMP_linear_release     -- time in __kmp_linear_barrier_release
242345153Sdim// KMP_tree_gather        -- time in __kmp_tree_barrier_gather
243345153Sdim// KMP_tree_release       -- time in __kmp_tree_barrier_release
244345153Sdim// KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
245345153Sdim// KMP_hyper_release      -- time in __kmp_hyper_barrier_release
246345153Sdim// clang-format off
247345153Sdim#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                                \
248345153Sdim  macro(KMP_fork_call, 0, arg)                                                 \
249345153Sdim  macro(KMP_join_call, 0, arg)                                                 \
250345153Sdim  macro(KMP_end_split_barrier, 0, arg)                                         \
251345153Sdim  macro(KMP_hier_gather, 0, arg)                                               \
252345153Sdim  macro(KMP_hier_release, 0, arg)                                              \
253345153Sdim  macro(KMP_hyper_gather, 0, arg)                                              \
254345153Sdim  macro(KMP_hyper_release, 0, arg)                                             \
255345153Sdim  macro(KMP_linear_gather, 0, arg)                                             \
256345153Sdim  macro(KMP_linear_release, 0, arg)                                            \
257345153Sdim  macro(KMP_tree_gather, 0, arg)                                               \
258345153Sdim  macro(KMP_tree_release, 0, arg)                                              \
259345153Sdim  macro(USER_resume, 0, arg)                                                   \
260345153Sdim  macro(USER_suspend, 0, arg)                                                  \
261345153Sdim  macro(KMP_allocate_team, 0, arg)                                             \
262345153Sdim  macro(KMP_setup_icv_copy, 0, arg)                                            \
263345153Sdim  macro(USER_icv_copy, 0, arg)                                                 \
264345153Sdim  macro (FOR_static_steal_stolen,                                              \
265345153Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)                 \
266345153Sdim  macro (FOR_static_steal_chunks,                                              \
267345153Sdim         stats_flags_e::noUnits | stats_flags_e::noTotal, arg)
268345153Sdim#else
269345153Sdim#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
270345153Sdim#endif
271345153Sdim// clang-format on
272345153Sdim
273345153Sdim/*!
274345153Sdim * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
275345153Sdim *
276345153Sdim * @param macro a user defined macro that takes three arguments -
277345153Sdim * macro(TIMER_NAME, flags, arg)
278345153Sdim * @param arg a user defined argument to send to the user defined macro
279345153Sdim *
280345153Sdim * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE
281345153Sdim * BAD THINGS WILL HAPPEN!
282345153Sdim *
283345153Sdim * \details Explicit timers are ones where we need to allocate a timer itself
284345153Sdim * (as well as the accumulated timing statistics). We allocate these on a
285345153Sdim * per-thread basis, and explicitly start and stop them. Block timers just
286345153Sdim * allocate the timer itself on the stack, and use the destructor to notice
287345153Sdim * block exit; they don't need to be defined here. The name here should be the
288345153Sdim * same as that of a timer above.
289345153Sdim *
290345153Sdim * @ingroup STATS_GATHERING
291345153Sdim*/
292345153Sdim#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
293345153Sdim
294345153Sdim#define ENUMERATE(name, ignore, prefix) prefix##name,
295345153Sdimenum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
296345153Sdim
297345153Sdimenum explicit_timer_e {
298345153Sdim  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
299345153Sdim};
300345153Sdim
301345153Sdimenum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
302345153Sdim#undef ENUMERATE
303345153Sdim
304345153Sdim/*
305345153Sdim * A logarithmic histogram. It accumulates the number of values in each power of
306345153Sdim * ten bin.  So 1<=x<10, 10<=x<100, ...
307345153Sdim * Mostly useful where we have some big outliers and want to see information
308345153Sdim * about them.
309345153Sdim */
310345153Sdimclass logHistogram {
311345153Sdim  enum {
312345153Sdim    numBins = 31, /* Number of powers of 10. If this changes you need to change
313345153Sdim                   * the initializer for binMax */
314345153Sdim
315345153Sdim    /*
316345153Sdim     * If you want to use this to analyse values that may be less than 1, (for
317345153Sdim     * instance times in s), then the logOffset gives you negative powers.
318345153Sdim     * In our case here, we're just looking at times in ticks, or counts, so we
319345153Sdim     * can never see values with magnitude < 1 (other than zero), so we can set
320345153Sdim     * it to 0.  As above change the initializer if you change this.
321345153Sdim     */
322345153Sdim    logOffset = 0
323345153Sdim  };
324345153Sdim  uint32_t KMP_ALIGN_CACHE zeroCount;
325345153Sdim  struct {
326345153Sdim    uint32_t count;
327345153Sdim    double total;
328345153Sdim  } bins[numBins];
329345153Sdim
330345153Sdim  static double binMax[numBins];
331345153Sdim
332345153Sdim#ifdef KMP_DEBUG
333345153Sdim  uint64_t _total;
334345153Sdim
335345153Sdim  void check() const {
336345153Sdim    uint64_t t = zeroCount;
337345153Sdim    for (int i = 0; i < numBins; i++)
338345153Sdim      t += bins[i].count;
339345153Sdim    KMP_DEBUG_ASSERT(t == _total);
340345153Sdim  }
341345153Sdim#else
342345153Sdim  void check() const {}
343345153Sdim#endif
344345153Sdim
345345153Sdimpublic:
346345153Sdim  logHistogram() { reset(); }
347345153Sdim
348345153Sdim  logHistogram(logHistogram const &o) {
349345153Sdim    for (int i = 0; i < numBins; i++)
350345153Sdim      bins[i] = o.bins[i];
351345153Sdim#ifdef KMP_DEBUG
352345153Sdim    _total = o._total;
353345153Sdim#endif
354345153Sdim  }
355345153Sdim
356345153Sdim  void reset() {
357345153Sdim    zeroCount = 0;
358345153Sdim    for (int i = 0; i < numBins; i++) {
359345153Sdim      bins[i].count = 0;
360345153Sdim      bins[i].total = 0;
361345153Sdim    }
362345153Sdim
363345153Sdim#ifdef KMP_DEBUG
364345153Sdim    _total = 0;
365345153Sdim#endif
366345153Sdim  }
367345153Sdim  uint32_t count(int b) const { return bins[b + logOffset].count; }
368345153Sdim  double total(int b) const { return bins[b + logOffset].total; }
369345153Sdim  static uint32_t findBin(double sample);
370345153Sdim
371345153Sdim  logHistogram &operator+=(logHistogram const &o) {
372345153Sdim    zeroCount += o.zeroCount;
373345153Sdim    for (int i = 0; i < numBins; i++) {
374345153Sdim      bins[i].count += o.bins[i].count;
375345153Sdim      bins[i].total += o.bins[i].total;
376345153Sdim    }
377345153Sdim#ifdef KMP_DEBUG
378345153Sdim    _total += o._total;
379345153Sdim    check();
380345153Sdim#endif
381345153Sdim
382345153Sdim    return *this;
383345153Sdim  }
384345153Sdim
385345153Sdim  void addSample(double sample);
386345153Sdim  int minBin() const;
387345153Sdim  int maxBin() const;
388345153Sdim
389345153Sdim  std::string format(char) const;
390345153Sdim};
391345153Sdim
392345153Sdimclass statistic {
393345153Sdim  double KMP_ALIGN_CACHE minVal;
394345153Sdim  double maxVal;
395345153Sdim  double meanVal;
396345153Sdim  double m2;
397345153Sdim  uint64_t sampleCount;
398345153Sdim  double offset;
399345153Sdim  bool collectingHist;
400345153Sdim  logHistogram hist;
401345153Sdim
402345153Sdimpublic:
403345153Sdim  statistic(bool doHist = bool(KMP_STATS_HIST)) {
404345153Sdim    reset();
405345153Sdim    collectingHist = doHist;
406345153Sdim  }
407345153Sdim  statistic(statistic const &o)
408345153Sdim      : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
409345153Sdim        sampleCount(o.sampleCount), offset(o.offset),
410345153Sdim        collectingHist(o.collectingHist), hist(o.hist) {}
411345153Sdim  statistic(double minv, double maxv, double meanv, uint64_t sc, double sd)
412345153Sdim      : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc),
413345153Sdim        sampleCount(sc), offset(0.0), collectingHist(false) {}
414345153Sdim  bool haveHist() const { return collectingHist; }
415345153Sdim  double getMin() const { return minVal; }
416345153Sdim  double getMean() const { return meanVal; }
417345153Sdim  double getMax() const { return maxVal; }
418345153Sdim  uint64_t getCount() const { return sampleCount; }
419345153Sdim  double getSD() const { return sqrt(m2 / sampleCount); }
420345153Sdim  double getTotal() const { return sampleCount * meanVal; }
421345153Sdim  logHistogram const *getHist() const { return &hist; }
422345153Sdim  void setOffset(double d) { offset = d; }
423345153Sdim
424345153Sdim  void reset() {
425345153Sdim    minVal = std::numeric_limits<double>::max();
426345153Sdim    maxVal = -minVal;
427345153Sdim    meanVal = 0.0;
428345153Sdim    m2 = 0.0;
429345153Sdim    sampleCount = 0;
430345153Sdim    offset = 0.0;
431345153Sdim    hist.reset();
432345153Sdim  }
433345153Sdim  void addSample(double sample);
434345153Sdim  void scale(double factor);
435345153Sdim  void scaleDown(double f) { scale(1. / f); }
436345153Sdim  void forceCount(uint64_t count) { sampleCount = count; }
437345153Sdim  statistic &operator+=(statistic const &other);
438345153Sdim
439345153Sdim  std::string format(char unit, bool total = false) const;
440345153Sdim  std::string formatHist(char unit) const { return hist.format(unit); }
441345153Sdim};
442345153Sdim
443345153Sdimstruct statInfo {
444345153Sdim  const char *name;
445345153Sdim  uint32_t flags;
446345153Sdim};
447345153Sdim
448345153Sdimclass timeStat : public statistic {
449345153Sdim  static statInfo timerInfo[];
450345153Sdim
451345153Sdimpublic:
452345153Sdim  timeStat() : statistic() {}
453345153Sdim  static const char *name(timer_e e) { return timerInfo[e].name; }
454345153Sdim  static bool noTotal(timer_e e) {
455345153Sdim    return timerInfo[e].flags & stats_flags_e::noTotal;
456345153Sdim  }
457345153Sdim  static bool masterOnly(timer_e e) {
458345153Sdim    return timerInfo[e].flags & stats_flags_e::onlyInMaster;
459345153Sdim  }
460345153Sdim  static bool workerOnly(timer_e e) {
461345153Sdim    return timerInfo[e].flags & stats_flags_e::notInMaster;
462345153Sdim  }
463345153Sdim  static bool noUnits(timer_e e) {
464345153Sdim    return timerInfo[e].flags & stats_flags_e::noUnits;
465345153Sdim  }
466345153Sdim  static bool logEvent(timer_e e) {
467345153Sdim    return timerInfo[e].flags & stats_flags_e::logEvent;
468345153Sdim  }
469345153Sdim  static void clearEventFlags() {
470345153Sdim    for (int i = 0; i < TIMER_LAST; i++) {
471345153Sdim      timerInfo[i].flags &= (~(stats_flags_e::logEvent));
472345153Sdim    }
473345153Sdim  }
474345153Sdim};
475345153Sdim
476345153Sdim// Where we need explicitly to start and end the timer, this version can be used
477345153Sdim// Since these timers normally aren't nicely scoped, so don't have a good place
478345153Sdim// to live on the stack of the thread, they're more work to use.
479345153Sdimclass explicitTimer {
480345153Sdim  timeStat *stat;
481345153Sdim  timer_e timerEnumValue;
482345153Sdim  tsc_tick_count startTime;
483345153Sdim  tsc_tick_count pauseStartTime;
484345153Sdim  tsc_tick_count::tsc_interval_t totalPauseTime;
485345153Sdim
486345153Sdimpublic:
487345153Sdim  explicitTimer(timeStat *s, timer_e te)
488345153Sdim      : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0),
489345153Sdim        totalPauseTime() {}
490345153Sdim
491345153Sdim  // void setStat(timeStat *s) { stat = s; }
492345153Sdim  void start(tsc_tick_count tick);
493345153Sdim  void pause(tsc_tick_count tick) { pauseStartTime = tick; }
494345153Sdim  void resume(tsc_tick_count tick) {
495345153Sdim    totalPauseTime += (tick - pauseStartTime);
496345153Sdim  }
497345153Sdim  void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr);
498345153Sdim  void reset() {
499345153Sdim    startTime = 0;
500345153Sdim    pauseStartTime = 0;
501345153Sdim    totalPauseTime = 0;
502345153Sdim  }
503345153Sdim  timer_e get_type() const { return timerEnumValue; }
504345153Sdim};
505345153Sdim
506345153Sdim// Where you need to partition a threads clock ticks into separate states
507345153Sdim// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
508345153Sdim// DOING_NOTHING would render these conditions:
509345153Sdim// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
510345153Sdim// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
511345153Sdim// versa
512345153Sdimclass partitionedTimers {
513345153Sdimprivate:
514345153Sdim  std::vector<explicitTimer> timer_stack;
515345153Sdim
516345153Sdimpublic:
517345153Sdim  partitionedTimers();
518345153Sdim  void init(explicitTimer timer);
519345153Sdim  void exchange(explicitTimer timer);
520345153Sdim  void push(explicitTimer timer);
521345153Sdim  void pop();
522345153Sdim  void windup();
523345153Sdim};
524345153Sdim
525345153Sdim// Special wrapper around the partioned timers to aid timing code blocks
526345153Sdim// It avoids the need to have an explicit end, leaving the scope suffices.
527345153Sdimclass blockPartitionedTimer {
528345153Sdim  partitionedTimers *part_timers;
529345153Sdim
530345153Sdimpublic:
531345153Sdim  blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer)
532345153Sdim      : part_timers(pt) {
533345153Sdim    part_timers->push(timer);
534345153Sdim  }
535345153Sdim  ~blockPartitionedTimer() { part_timers->pop(); }
536345153Sdim};
537345153Sdim
538345153Sdim// Special wrapper around the thread state to aid in keeping state in code
539345153Sdim// blocks It avoids the need to have an explicit end, leaving the scope
540345153Sdim// suffices.
541345153Sdimclass blockThreadState {
542345153Sdim  stats_state_e *state_pointer;
543345153Sdim  stats_state_e old_state;
544345153Sdim
545345153Sdimpublic:
546345153Sdim  blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
547345153Sdim      : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
548345153Sdim    *state_pointer = new_state;
549345153Sdim  }
550345153Sdim  ~blockThreadState() { *state_pointer = old_state; }
551345153Sdim};
552345153Sdim
553345153Sdim// If all you want is a count, then you can use this...
554345153Sdim// The individual per-thread counts will be aggregated into a statistic at
555345153Sdim// program exit.
556345153Sdimclass counter {
557345153Sdim  uint64_t value;
558345153Sdim  static const statInfo counterInfo[];
559345153Sdim
560345153Sdimpublic:
561345153Sdim  counter() : value(0) {}
562345153Sdim  void increment() { value++; }
563345153Sdim  uint64_t getValue() const { return value; }
564345153Sdim  void reset() { value = 0; }
565345153Sdim  static const char *name(counter_e e) { return counterInfo[e].name; }
566345153Sdim  static bool masterOnly(counter_e e) {
567345153Sdim    return counterInfo[e].flags & stats_flags_e::onlyInMaster;
568345153Sdim  }
569345153Sdim};
570345153Sdim
571345153Sdim/* ****************************************************************
572345153Sdim    Class to implement an event
573345153Sdim
574345153Sdim    There are four components to an event: start time, stop time
575345153Sdim    nest_level, and timer_name.
576345153Sdim    The start and stop time should be obvious (recorded in clock ticks).
577345153Sdim    The nest_level relates to the bar width in the timeline graph.
578345153Sdim    The timer_name is used to determine which timer event triggered this event.
579345153Sdim
580345153Sdim    the interface to this class is through four read-only operations:
581345153Sdim    1) getStart()     -- returns the start time as 64 bit integer
582345153Sdim    2) getStop()      -- returns the stop time as 64 bit integer
583345153Sdim    3) getNestLevel() -- returns the nest level of the event
584345153Sdim    4) getTimerName() -- returns the timer name that triggered event
585345153Sdim
586345153Sdim    *MORE ON NEST_LEVEL*
587345153Sdim    The nest level is used in the bar graph that represents the timeline.
588345153Sdim    Its main purpose is for showing how events are nested inside eachother.
589345153Sdim    For example, say events, A, B, and C are recorded.  If the timeline
590345153Sdim    looks like this:
591345153Sdim
592345153SdimBegin -------------------------------------------------------------> Time
593345153Sdim         |    |          |        |          |              |
594345153Sdim         A    B          C        C          B              A
595345153Sdim       start start     start     end        end            end
596345153Sdim
597345153Sdim       Then A, B, C will have a nest level of 1, 2, 3 respectively.
598345153Sdim       These values are then used to calculate the barwidth so you can
599345153Sdim       see that inside A, B has occurred, and inside B, C has occurred.
600345153Sdim       Currently, this is shown with A's bar width being larger than B's
601345153Sdim       bar width, and B's bar width being larger than C's bar width.
602345153Sdim
603345153Sdim**************************************************************** */
604345153Sdimclass kmp_stats_event {
605345153Sdim  uint64_t start;
606345153Sdim  uint64_t stop;
607345153Sdim  int nest_level;
608345153Sdim  timer_e timer_name;
609345153Sdim
610345153Sdimpublic:
611345153Sdim  kmp_stats_event()
612345153Sdim      : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
613345153Sdim  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
614345153Sdim      : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
615345153Sdim  inline uint64_t getStart() const { return start; }
616345153Sdim  inline uint64_t getStop() const { return stop; }
617345153Sdim  inline int getNestLevel() const { return nest_level; }
618345153Sdim  inline timer_e getTimerName() const { return timer_name; }
619345153Sdim};
620345153Sdim
621345153Sdim/* ****************************************************************
622345153Sdim    Class to implement a dynamically expandable array of events
623345153Sdim
624345153Sdim    ---------------------------------------------------------
625345153Sdim    | event 1 | event 2 | event 3 | event 4 | ... | event N |
626345153Sdim    ---------------------------------------------------------
627345153Sdim
628345153Sdim    An event is pushed onto the back of this array at every
629345153Sdim    explicitTimer->stop() call.  The event records the thread #,
630345153Sdim    start time, stop time, and nest level related to the bar width.
631345153Sdim
632345153Sdim    The event vector starts at size INIT_SIZE and grows (doubles in size)
633345153Sdim    if needed.  An implication of this behavior is that log(N)
634345153Sdim    reallocations are needed (where N is number of events).  If you want
635345153Sdim    to avoid reallocations, then set INIT_SIZE to a large value.
636345153Sdim
637345153Sdim    the interface to this class is through six operations:
638345153Sdim    1) reset() -- sets the internal_size back to 0 but does not deallocate any
639345153Sdim       memory
640345153Sdim    2) size()  -- returns the number of valid elements in the vector
641345153Sdim    3) push_back(start, stop, nest, timer_name) -- pushes an event onto
642345153Sdim       the back of the array
643345153Sdim    4) deallocate() -- frees all memory associated with the vector
644345153Sdim    5) sort() -- sorts the vector by start time
645345153Sdim    6) operator[index] or at(index) -- returns event reference at that index
646345153Sdim**************************************************************** */
647345153Sdimclass kmp_stats_event_vector {
648345153Sdim  kmp_stats_event *events;
649345153Sdim  int internal_size;
650345153Sdim  int allocated_size;
651345153Sdim  static const int INIT_SIZE = 1024;
652345153Sdim
653345153Sdimpublic:
654345153Sdim  kmp_stats_event_vector() {
655345153Sdim    events =
656345153Sdim        (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
657345153Sdim    internal_size = 0;
658345153Sdim    allocated_size = INIT_SIZE;
659345153Sdim  }
660345153Sdim  ~kmp_stats_event_vector() {}
661345153Sdim  inline void reset() { internal_size = 0; }
662345153Sdim  inline int size() const { return internal_size; }
663345153Sdim  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
664345153Sdim                 timer_e name) {
665345153Sdim    int i;
666345153Sdim    if (internal_size == allocated_size) {
667345153Sdim      kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
668345153Sdim          sizeof(kmp_stats_event) * allocated_size * 2);
669345153Sdim      for (i = 0; i < internal_size; i++)
670345153Sdim        tmp[i] = events[i];
671345153Sdim      __kmp_free(events);
672345153Sdim      events = tmp;
673345153Sdim      allocated_size *= 2;
674345153Sdim    }
675345153Sdim    events[internal_size] =
676345153Sdim        kmp_stats_event(start_time, stop_time, nest_level, name);
677345153Sdim    internal_size++;
678345153Sdim    return;
679345153Sdim  }
680345153Sdim  void deallocate();
681345153Sdim  void sort();
682345153Sdim  const kmp_stats_event &operator[](int index) const { return events[index]; }
683345153Sdim  kmp_stats_event &operator[](int index) { return events[index]; }
684345153Sdim  const kmp_stats_event &at(int index) const { return events[index]; }
685345153Sdim  kmp_stats_event &at(int index) { return events[index]; }
686345153Sdim};
687345153Sdim
688345153Sdim/* ****************************************************************
689345153Sdim    Class to implement a doubly-linked, circular, statistics list
690345153Sdim
691345153Sdim    |---| ---> |---| ---> |---| ---> |---| ---> ... next
692345153Sdim    |   |      |   |      |   |      |   |
693345153Sdim    |---| <--- |---| <--- |---| <--- |---| <--- ... prev
694345153Sdim    Sentinel   first      second     third
695345153Sdim    Node       node       node       node
696345153Sdim
697345153Sdim    The Sentinel Node is the user handle on the list.
698345153Sdim    The first node corresponds to thread 0's statistics.
699345153Sdim    The second node corresponds to thread 1's statistics and so on...
700345153Sdim
701345153Sdim    Each node has a _timers, _counters, and _explicitTimers array to hold that
702345153Sdim    thread's statistics. The _explicitTimers point to the correct _timer and
703345153Sdim    update its statistics at every stop() call. The explicitTimers' pointers are
704345153Sdim    set up in the constructor. Each node also has an event vector to hold that
705345153Sdim    thread's timing events. The event vector expands as necessary and records
706345153Sdim    the start-stop times for each timer.
707345153Sdim
708345153Sdim    The nestLevel variable is for plotting events and is related
709345153Sdim    to the bar width in the timeline graph.
710345153Sdim
711345153Sdim    Every thread will have a thread local pointer to its node in
712345153Sdim    the list.  The sentinel node is used by the master thread to
713345153Sdim    store "dummy" statistics before __kmp_create_worker() is called.
714345153Sdim**************************************************************** */
715345153Sdimclass kmp_stats_list {
716345153Sdim  int gtid;
717345153Sdim  timeStat _timers[TIMER_LAST + 1];
718345153Sdim  counter _counters[COUNTER_LAST + 1];
719345153Sdim  explicitTimer thread_life_timer;
720345153Sdim  partitionedTimers _partitionedTimers;
721345153Sdim  int _nestLevel; // one per thread
722345153Sdim  kmp_stats_event_vector _event_vector;
723345153Sdim  kmp_stats_list *next;
724345153Sdim  kmp_stats_list *prev;
725345153Sdim  stats_state_e state;
726345153Sdim  int thread_is_idle_flag;
727345153Sdim
728345153Sdimpublic:
729345153Sdim  kmp_stats_list()
730345153Sdim      : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life],
731345153Sdim                          TIMER_OMP_worker_thread_life),
732345153Sdim        _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
733345153Sdim        thread_is_idle_flag(0) {}
734345153Sdim  ~kmp_stats_list() {}
735345153Sdim  inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
736345153Sdim  inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
737345153Sdim  inline partitionedTimers *getPartitionedTimers() {
738345153Sdim    return &_partitionedTimers;
739345153Sdim  }
740345153Sdim  inline timeStat *getTimers() { return _timers; }
741345153Sdim  inline counter *getCounters() { return _counters; }
742345153Sdim  inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
743345153Sdim  inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); }
744345153Sdim  inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); }
745345153Sdim  inline void resetEventVector() { _event_vector.reset(); }
746345153Sdim  inline void incrementNestValue() { _nestLevel++; }
747345153Sdim  inline int getNestValue() { return _nestLevel; }
748345153Sdim  inline void decrementNestValue() { _nestLevel--; }
749345153Sdim  inline int getGtid() const { return gtid; }
750345153Sdim  inline void setGtid(int newgtid) { gtid = newgtid; }
751345153Sdim  inline void setState(stats_state_e newstate) { state = newstate; }
752345153Sdim  inline stats_state_e getState() const { return state; }
753345153Sdim  inline stats_state_e *getStatePointer() { return &state; }
754345153Sdim  inline bool isIdle() { return thread_is_idle_flag == 1; }
755345153Sdim  inline void setIdleFlag() { thread_is_idle_flag = 1; }
756345153Sdim  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
757345153Sdim  kmp_stats_list *push_back(int gtid); // returns newly created list node
758345153Sdim  inline void push_event(uint64_t start_time, uint64_t stop_time,
759345153Sdim                         int nest_level, timer_e name) {
760345153Sdim    _event_vector.push_back(start_time, stop_time, nest_level, name);
761345153Sdim  }
762345153Sdim  void deallocate();
763345153Sdim  class iterator;
764345153Sdim  kmp_stats_list::iterator begin();
765345153Sdim  kmp_stats_list::iterator end();
766345153Sdim  int size();
767345153Sdim  class iterator {
768345153Sdim    kmp_stats_list *ptr;
769345153Sdim    friend kmp_stats_list::iterator kmp_stats_list::begin();
770345153Sdim    friend kmp_stats_list::iterator kmp_stats_list::end();
771345153Sdim
772345153Sdim  public:
773345153Sdim    iterator();
774345153Sdim    ~iterator();
775345153Sdim    iterator operator++();
776345153Sdim    iterator operator++(int dummy);
777345153Sdim    iterator operator--();
778345153Sdim    iterator operator--(int dummy);
779345153Sdim    bool operator!=(const iterator &rhs);
780345153Sdim    bool operator==(const iterator &rhs);
781345153Sdim    kmp_stats_list *operator*() const; // dereference operator
782345153Sdim  };
783345153Sdim};
784345153Sdim
785345153Sdim/* ****************************************************************
786345153Sdim   Class to encapsulate all output functions and the environment variables
787345153Sdim
788345153Sdim   This module holds filenames for various outputs (normal stats, events, plot
789345153Sdim   file), as well as coloring information for the plot file.
790345153Sdim
791345153Sdim   The filenames and flags variables are read from environment variables.
792345153Sdim   These are read once by the constructor of the global variable
793345153Sdim   __kmp_stats_output which calls init().
794345153Sdim
795345153Sdim   During this init() call, event flags for the timeStat::timerInfo[] global
796345153Sdim   array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
797345153Sdim
798345153Sdim   The only interface function that is public is outputStats(heading).  This
799345153Sdim   function should print out everything it needs to, either to files or stderr,
800345153Sdim   depending on the environment variables described below
801345153Sdim
802345153Sdim   ENVIRONMENT VARIABLES:
803345153Sdim   KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
804345153Sdim                     file, otherwise, print to stderr
805345153Sdim   KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
806345153Sdim                        either KMP_STATS_FILE or stderr
807345153Sdim   KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
808345153Sdim                          otherwise, the plot file is sent to "events.plt"
809345153Sdim   KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
810345153Sdim                       events
811345153Sdim   KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
812345153Sdim                            otherwise, output is sent to "events.dat"
813345153Sdim**************************************************************** */
814345153Sdimclass kmp_stats_output_module {
815345153Sdim
816345153Sdimpublic:
817345153Sdim  struct rgb_color {
818345153Sdim    float r;
819345153Sdim    float g;
820345153Sdim    float b;
821345153Sdim  };
822345153Sdim
823345153Sdimprivate:
824345153Sdim  std::string outputFileName;
825345153Sdim  static const char *eventsFileName;
826345153Sdim  static const char *plotFileName;
827345153Sdim  static int printPerThreadFlag;
828345153Sdim  static int printPerThreadEventsFlag;
829345153Sdim  static const rgb_color globalColorArray[];
830345153Sdim  static rgb_color timerColorInfo[];
831345153Sdim
832345153Sdim  void init();
833345153Sdim  static void setupEventColors();
834345153Sdim  static void printPloticusFile();
835345153Sdim  static void printHeaderInfo(FILE *statsOut);
836345153Sdim  static void printTimerStats(FILE *statsOut, statistic const *theStats,
837345153Sdim                              statistic const *totalStats);
838345153Sdim  static void printCounterStats(FILE *statsOut, statistic const *theStats);
839345153Sdim  static void printCounters(FILE *statsOut, counter const *theCounters);
840345153Sdim  static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
841345153Sdim                          int gtid);
842345153Sdim  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
843345153Sdim  static void windupExplicitTimers();
844345153Sdim  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
845345153Sdim
846345153Sdimpublic:
847345153Sdim  kmp_stats_output_module() { init(); }
848345153Sdim  void outputStats(const char *heading);
849345153Sdim};
850345153Sdim
851345153Sdim#ifdef __cplusplus
852345153Sdimextern "C" {
853345153Sdim#endif
854345153Sdimvoid __kmp_stats_init();
855345153Sdimvoid __kmp_stats_fini();
856345153Sdimvoid __kmp_reset_stats();
857345153Sdimvoid __kmp_output_stats(const char *);
858345153Sdimvoid __kmp_accumulate_stats_at_exit(void);
859345153Sdim// thread local pointer to stats node within list
860345153Sdimextern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
861345153Sdim// head to stats list.
862345153Sdimextern kmp_stats_list *__kmp_stats_list;
863345153Sdim// lock for __kmp_stats_list
864345153Sdimextern kmp_tas_lock_t __kmp_stats_lock;
865345153Sdim// reference start time
866345153Sdimextern tsc_tick_count __kmp_stats_start_time;
867345153Sdim// interface to output
868345153Sdimextern kmp_stats_output_module __kmp_stats_output;
869345153Sdim
870345153Sdim#ifdef __cplusplus
871345153Sdim}
872345153Sdim#endif
873345153Sdim
874345153Sdim// Simple, standard interfaces that drop out completely if stats aren't enabled
875345153Sdim
876345153Sdim/*!
877345153Sdim * \brief Adds value to specified timer (name).
878345153Sdim *
879345153Sdim * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
880345153Sdim * @param value double precision sample value to add to statistics for the timer
881345153Sdim *
882345153Sdim * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to
883345153Sdim * a timer statistics.
884345153Sdim *
885345153Sdim * @ingroup STATS_GATHERING
886345153Sdim*/
887345153Sdim#define KMP_COUNT_VALUE(name, value)                                           \
888345153Sdim  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
889345153Sdim
890345153Sdim/*!
891345153Sdim * \brief Increments specified counter (name).
892345153Sdim *
893345153Sdim * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro
894345153Sdim *
895345153Sdim * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics
896345153Sdim * counter for the executing thread.
897345153Sdim *
898345153Sdim * @ingroup STATS_GATHERING
899345153Sdim*/
900345153Sdim#define KMP_COUNT_BLOCK(name)                                                  \
901345153Sdim  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
902345153Sdim
903345153Sdim/*!
904345153Sdim * \brief Outputs the current thread statistics and reset them.
905345153Sdim *
906345153Sdim * @param heading_string heading put above the final stats output
907345153Sdim *
908345153Sdim * \details Explicitly stops all timers and outputs all stats. Environment
909345153Sdim * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a
910345153Sdim * filename instead of stderr. Environment variable,
911345153Sdim * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific
912345153Sdim * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be
913345153Sdim * defined with any value, which will print out thread specific stats, or it can
914345153Sdim * be undefined (not specified in the environment) and thread specific stats
915345153Sdim * won't be printed. It should be noted that all statistics are reset when this
916345153Sdim * macro is called.
917345153Sdim *
918345153Sdim * @ingroup STATS_GATHERING
919345153Sdim*/
920345153Sdim#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
921345153Sdim
922345153Sdim/*!
923345153Sdim * \brief Initializes the paritioned timers to begin with name.
924345153Sdim *
925345153Sdim * @param name timer which you want this thread to begin with
926345153Sdim *
927345153Sdim * @ingroup STATS_GATHERING
928345153Sdim*/
929345153Sdim#define KMP_INIT_PARTITIONED_TIMERS(name)                                      \
930345153Sdim  __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer(          \
931345153Sdim      __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
932345153Sdim
933345153Sdim#define KMP_TIME_PARTITIONED_BLOCK(name)                                       \
934345153Sdim  blockPartitionedTimer __PBLOCKTIME__(                                        \
935345153Sdim      __kmp_stats_thread_ptr->getPartitionedTimers(),                          \
936345153Sdim      explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name),            \
937345153Sdim                    TIMER_##name))
938345153Sdim
939345153Sdim#define KMP_PUSH_PARTITIONED_TIMER(name)                                       \
940345153Sdim  __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer(          \
941345153Sdim      __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
942345153Sdim
943345153Sdim#define KMP_POP_PARTITIONED_TIMER()                                            \
944345153Sdim  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
945345153Sdim
946345153Sdim#define KMP_EXCHANGE_PARTITIONED_TIMER(name)                                   \
947345153Sdim  __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer(      \
948345153Sdim      __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
949345153Sdim
950345153Sdim#define KMP_SET_THREAD_STATE(state_name)                                       \
951345153Sdim  __kmp_stats_thread_ptr->setState(state_name)
952345153Sdim
953345153Sdim#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
954345153Sdim
955345153Sdim#define KMP_SET_THREAD_STATE_BLOCK(state_name)                                 \
956345153Sdim  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
957345153Sdim                                    state_name)
958345153Sdim
959345153Sdim/*!
960345153Sdim * \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
961345153Sdim *
962345153Sdim * \details Reset all stats for all threads.
963345153Sdim *
964345153Sdim * @ingroup STATS_GATHERING
965345153Sdim*/
966345153Sdim#define KMP_RESET_STATS() __kmp_reset_stats()
967345153Sdim
968345153Sdim#if (KMP_DEVELOPER_STATS)
969345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
970345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
971345153Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
972353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) KMP_PUSH_PARTITIONED_TIMER(n)
973353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) KMP_POP_PARTITIONED_TIMER(n)
974353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n)                            \
975353358Sdim  KMP_EXCHANGE_PARTITIONED_TIMER(n)
976345153Sdim#else
977345153Sdim// Null definitions
978345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
979345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
980345153Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
981353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
982353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
983353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
984345153Sdim#endif
985345153Sdim
986345153Sdim#else // KMP_STATS_ENABLED
987345153Sdim
988345153Sdim// Null definitions
989345153Sdim#define KMP_COUNT_VALUE(n, v) ((void)0)
990345153Sdim#define KMP_COUNT_BLOCK(n) ((void)0)
991345153Sdim
992345153Sdim#define KMP_OUTPUT_STATS(heading_string) ((void)0)
993345153Sdim#define KMP_RESET_STATS() ((void)0)
994345153Sdim
995345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
996345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
997353358Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
998353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
999353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
1000353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
1001345153Sdim#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
1002345153Sdim#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
1003345153Sdim#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
1004345153Sdim#define KMP_POP_PARTITIONED_TIMER() ((void)0)
1005345153Sdim#define KMP_SET_THREAD_STATE(state_name) ((void)0)
1006345153Sdim#define KMP_GET_THREAD_STATE() ((void)0)
1007345153Sdim#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
1008345153Sdim#endif // KMP_STATS_ENABLED
1009345153Sdim
1010345153Sdim#endif // KMP_STATS_H
1011