1345153Sdim#ifndef KMP_STATS_H 2345153Sdim#define KMP_STATS_H 3345153Sdim 4345153Sdim/** @file kmp_stats.h 5345153Sdim * Functions for collecting statistics. 6345153Sdim */ 7345153Sdim 8345153Sdim//===----------------------------------------------------------------------===// 9345153Sdim// 10353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 11353358Sdim// See https://llvm.org/LICENSE.txt for license information. 12353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 13345153Sdim// 14345153Sdim//===----------------------------------------------------------------------===// 15345153Sdim 16345153Sdim#include "kmp_config.h" 17345153Sdim#include "kmp_debug.h" 18345153Sdim 19345153Sdim#if KMP_STATS_ENABLED 20345153Sdim/* Statistics accumulator. 21345153Sdim Accumulates number of samples and computes min, max, mean, standard deviation 22345153Sdim on the fly. 23345153Sdim 24345153Sdim Online variance calculation algorithm from 25345153Sdim http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm 26345153Sdim */ 27345153Sdim 28345153Sdim#include "kmp_stats_timing.h" 29345153Sdim#include <limits> 30345153Sdim#include <math.h> 31345153Sdim#include <new> // placement new 32345153Sdim#include <stdint.h> 33345153Sdim#include <string> 34345153Sdim#include <vector> 35345153Sdim 36345153Sdim/* Enable developer statistics here if you want them. They are more detailed 37345153Sdim than is useful for application characterisation and are intended for the 38345153Sdim runtime library developer. */ 39345153Sdim#define KMP_DEVELOPER_STATS 0 40345153Sdim 41345153Sdim/* Enable/Disable histogram output */ 42345153Sdim#define KMP_STATS_HIST 0 43345153Sdim 44345153Sdim/*! 45345153Sdim * @ingroup STATS_GATHERING 46345153Sdim * \brief flags to describe the statistic (timer or counter) 47345153Sdim * 48345153Sdim */ 49345153Sdimenum stats_flags_e { 50345153Sdim noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic 51345153Sdim onlyInMaster = 1 << 1, //!< statistic is valid only for master 52345153Sdim noUnits = 1 << 2, //!< statistic doesn't need units printed next to it 53345153Sdim notInMaster = 1 << 3, //!< statistic is valid only for non-master threads 54345153Sdim logEvent = 1 << 4 //!< statistic can be logged on the event timeline when 55345153Sdim //! KMP_STATS_EVENTS is on (valid only for timers) 56345153Sdim}; 57345153Sdim 58345153Sdim/*! 59345153Sdim * @ingroup STATS_GATHERING 60345153Sdim * \brief the states which a thread can be in 61345153Sdim * 62345153Sdim */ 63345153Sdimenum stats_state_e { 64345153Sdim IDLE, 65345153Sdim SERIAL_REGION, 66345153Sdim FORK_JOIN_BARRIER, 67345153Sdim PLAIN_BARRIER, 68345153Sdim TASKWAIT, 69345153Sdim TASKYIELD, 70345153Sdim TASKGROUP, 71345153Sdim IMPLICIT_TASK, 72353358Sdim EXPLICIT_TASK, 73353358Sdim TEAMS_REGION 74345153Sdim}; 75345153Sdim 76345153Sdim/*! 77345153Sdim * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h 78345153Sdim * 79345153Sdim * @param macro a user defined macro that takes three arguments - 80345153Sdim * macro(COUNTER_NAME, flags, arg) 81345153Sdim * @param arg a user defined argument to send to the user defined macro 82345153Sdim * 83345153Sdim * \details A counter counts the occurrence of some event. Each thread 84345153Sdim * accumulates its own count, at the end of execution the counts are aggregated 85345153Sdim * treating each thread as a separate measurement. (Unless onlyInMaster is set, 86345153Sdim * in which case there's only a single measurement). The min,mean,max are 87345153Sdim * therefore the values for the threads. Adding the counter here and then 88345153Sdim * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you 89345153Sdim * need to do. All of the tables and printing is generated from this macro. 90345153Sdim * Format is "macro(name, flags, arg)" 91345153Sdim * 92345153Sdim * @ingroup STATS_GATHERING 93345153Sdim */ 94345153Sdim// clang-format off 95345153Sdim#define KMP_FOREACH_COUNTER(macro, arg) \ 96345153Sdim macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \ 97345153Sdim macro(OMP_NESTED_PARALLEL, 0, arg) \ 98345153Sdim macro(OMP_LOOP_STATIC, 0, arg) \ 99345153Sdim macro(OMP_LOOP_STATIC_STEAL, 0, arg) \ 100345153Sdim macro(OMP_LOOP_DYNAMIC, 0, arg) \ 101345153Sdim macro(OMP_DISTRIBUTE, 0, arg) \ 102345153Sdim macro(OMP_BARRIER, 0, arg) \ 103345153Sdim macro(OMP_CRITICAL, 0, arg) \ 104345153Sdim macro(OMP_SINGLE, 0, arg) \ 105345153Sdim macro(OMP_MASTER, 0, arg) \ 106345153Sdim macro(OMP_TEAMS, 0, arg) \ 107345153Sdim macro(OMP_set_lock, 0, arg) \ 108345153Sdim macro(OMP_test_lock, 0, arg) \ 109345153Sdim macro(REDUCE_wait, 0, arg) \ 110345153Sdim macro(REDUCE_nowait, 0, arg) \ 111345153Sdim macro(OMP_TASKYIELD, 0, arg) \ 112345153Sdim macro(OMP_TASKLOOP, 0, arg) \ 113345153Sdim macro(TASK_executed, 0, arg) \ 114345153Sdim macro(TASK_cancelled, 0, arg) \ 115345153Sdim macro(TASK_stolen, 0, arg) 116345153Sdim// clang-format on 117345153Sdim 118345153Sdim/*! 119345153Sdim * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h 120345153Sdim * 121345153Sdim * @param macro a user defined macro that takes three arguments - 122345153Sdim * macro(TIMER_NAME, flags, arg) 123345153Sdim * @param arg a user defined argument to send to the user defined macro 124345153Sdim * 125345153Sdim * \details A timer collects multiple samples of some count in each thread and 126345153Sdim * then finally aggregates all of the samples from all of the threads. For most 127345153Sdim * timers the printing code also provides an aggregation over the thread totals. 128345153Sdim * These are printed as TOTAL_foo. The count is normally a time (in ticks), 129345153Sdim * hence the name "timer". (But can be any value, so we use this for "number of 130345153Sdim * arguments passed to fork" as well). For timers the threads are not 131345153Sdim * significant, it's the individual observations that count, so the statistics 132345153Sdim * are at that level. Format is "macro(name, flags, arg)" 133345153Sdim * 134345153Sdim * @ingroup STATS_GATHERING2 135345153Sdim */ 136345153Sdim// clang-format off 137345153Sdim#define KMP_FOREACH_TIMER(macro, arg) \ 138345153Sdim macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ 139345153Sdim macro (OMP_parallel, stats_flags_e::logEvent, arg) \ 140345153Sdim macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \ 141353358Sdim macro (OMP_teams, stats_flags_e::logEvent, arg) \ 142353358Sdim macro (OMP_teams_overhead, stats_flags_e::logEvent, arg) \ 143345153Sdim macro (OMP_loop_static, 0, arg) \ 144345153Sdim macro (OMP_loop_static_scheduling, 0, arg) \ 145345153Sdim macro (OMP_loop_dynamic, 0, arg) \ 146345153Sdim macro (OMP_loop_dynamic_scheduling, 0, arg) \ 147353358Sdim macro (OMP_distribute, 0, arg) \ 148353358Sdim macro (OMP_distribute_scheduling, 0, arg) \ 149345153Sdim macro (OMP_critical, 0, arg) \ 150345153Sdim macro (OMP_critical_wait, 0, arg) \ 151345153Sdim macro (OMP_single, 0, arg) \ 152345153Sdim macro (OMP_master, 0, arg) \ 153345153Sdim macro (OMP_task_immediate, 0, arg) \ 154345153Sdim macro (OMP_task_taskwait, 0, arg) \ 155345153Sdim macro (OMP_task_taskyield, 0, arg) \ 156345153Sdim macro (OMP_task_taskgroup, 0, arg) \ 157345153Sdim macro (OMP_task_join_bar, 0, arg) \ 158345153Sdim macro (OMP_task_plain_bar, 0, arg) \ 159345153Sdim macro (OMP_taskloop_scheduling, 0, arg) \ 160345153Sdim macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ 161345153Sdim macro (OMP_idle, stats_flags_e::logEvent, arg) \ 162345153Sdim macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ 163345153Sdim macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ 164345153Sdim macro (OMP_serial, stats_flags_e::logEvent, arg) \ 165345153Sdim macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 166345153Sdim arg) \ 167345153Sdim macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ 168345153Sdim arg) \ 169345153Sdim macro (OMP_loop_static_iterations, \ 170345153Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 171353358Sdim macro (OMP_loop_static_total_iterations, \ 172353358Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 173345153Sdim macro (OMP_loop_dynamic_iterations, \ 174345153Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 175353358Sdim macro (OMP_loop_dynamic_total_iterations, \ 176353358Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 177353358Sdim macro (OMP_distribute_iterations, \ 178353358Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 179345153Sdim KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 180345153Sdim// clang-format on 181345153Sdim 182345153Sdim// OMP_worker_thread_life -- Time from thread becoming an OpenMP thread (either 183345153Sdim// initializing OpenMP or being created by a master) 184345153Sdim// until the thread is destroyed 185345153Sdim// OMP_parallel -- Time thread spends executing work directly 186345153Sdim// within a #pragma omp parallel 187345153Sdim// OMP_parallel_overhead -- Time thread spends setting up a parallel region 188345153Sdim// OMP_loop_static -- Time thread spends executing loop iterations from 189345153Sdim// a statically scheduled loop 190345153Sdim// OMP_loop_static_scheduling -- Time thread spends scheduling loop iterations 191345153Sdim// from a statically scheduled loop 192345153Sdim// OMP_loop_dynamic -- Time thread spends executing loop iterations from 193345153Sdim// a dynamically scheduled loop 194345153Sdim// OMP_loop_dynamic_scheduling -- Time thread spends scheduling loop iterations 195345153Sdim// from a dynamically scheduled loop 196345153Sdim// OMP_critical -- Time thread spends executing critical section 197345153Sdim// OMP_critical_wait -- Time thread spends waiting to enter 198345153Sdim// a critcal seciton 199345153Sdim// OMP_single -- Time spent executing a "single" region 200345153Sdim// OMP_master -- Time spent executing a "master" region 201345153Sdim// OMP_task_immediate -- Time spent executing non-deferred tasks 202345153Sdim// OMP_task_taskwait -- Time spent executing tasks inside a taskwait 203345153Sdim// construct 204345153Sdim// OMP_task_taskyield -- Time spent executing tasks inside a taskyield 205345153Sdim// construct 206345153Sdim// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup 207345153Sdim// construct 208345153Sdim// OMP_task_join_bar -- Time spent executing tasks inside a join barrier 209345153Sdim// OMP_task_plain_bar -- Time spent executing tasks inside a barrier 210345153Sdim// construct 211345153Sdim// OMP_taskloop_scheduling -- Time spent scheduling tasks inside a taskloop 212345153Sdim// construct 213345153Sdim// OMP_plain_barrier -- Time spent in a #pragma omp barrier construct or 214345153Sdim// inside implicit barrier at end of worksharing 215345153Sdim// construct 216345153Sdim// OMP_idle -- Time worker threads spend waiting for next 217345153Sdim// parallel region 218345153Sdim// OMP_fork_barrier -- Time spent in a the fork barrier surrounding a 219345153Sdim// parallel region 220345153Sdim// OMP_join_barrier -- Time spent in a the join barrier surrounding a 221345153Sdim// parallel region 222345153Sdim// OMP_serial -- Time thread zero spends executing serial code 223345153Sdim// OMP_set_numthreads -- Values passed to omp_set_num_threads 224345153Sdim// OMP_PARALLEL_args -- Number of arguments passed to a parallel region 225345153Sdim// OMP_loop_static_iterations -- Number of iterations thread is assigned for 226345153Sdim// statically scheduled loops 227345153Sdim// OMP_loop_dynamic_iterations -- Number of iterations thread is assigned for 228345153Sdim// dynamically scheduled loops 229345153Sdim 230345153Sdim#if (KMP_DEVELOPER_STATS) 231345153Sdim// Timers which are of interest to runtime library developers, not end users. 232345153Sdim// These have to be explicitly enabled in addition to the other stats. 233345153Sdim 234345153Sdim// KMP_fork_barrier -- time in __kmp_fork_barrier 235345153Sdim// KMP_join_barrier -- time in __kmp_join_barrier 236345153Sdim// KMP_barrier -- time in __kmp_barrier 237345153Sdim// KMP_end_split_barrier -- time in __kmp_end_split_barrier 238345153Sdim// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy 239345153Sdim// KMP_icv_copy -- start/stop timer for any ICV copying 240345153Sdim// KMP_linear_gather -- time in __kmp_linear_barrier_gather 241345153Sdim// KMP_linear_release -- time in __kmp_linear_barrier_release 242345153Sdim// KMP_tree_gather -- time in __kmp_tree_barrier_gather 243345153Sdim// KMP_tree_release -- time in __kmp_tree_barrier_release 244345153Sdim// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather 245345153Sdim// KMP_hyper_release -- time in __kmp_hyper_barrier_release 246345153Sdim// clang-format off 247345153Sdim#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ 248345153Sdim macro(KMP_fork_call, 0, arg) \ 249345153Sdim macro(KMP_join_call, 0, arg) \ 250345153Sdim macro(KMP_end_split_barrier, 0, arg) \ 251345153Sdim macro(KMP_hier_gather, 0, arg) \ 252345153Sdim macro(KMP_hier_release, 0, arg) \ 253345153Sdim macro(KMP_hyper_gather, 0, arg) \ 254345153Sdim macro(KMP_hyper_release, 0, arg) \ 255345153Sdim macro(KMP_linear_gather, 0, arg) \ 256345153Sdim macro(KMP_linear_release, 0, arg) \ 257345153Sdim macro(KMP_tree_gather, 0, arg) \ 258345153Sdim macro(KMP_tree_release, 0, arg) \ 259345153Sdim macro(USER_resume, 0, arg) \ 260345153Sdim macro(USER_suspend, 0, arg) \ 261345153Sdim macro(KMP_allocate_team, 0, arg) \ 262345153Sdim macro(KMP_setup_icv_copy, 0, arg) \ 263345153Sdim macro(USER_icv_copy, 0, arg) \ 264345153Sdim macro (FOR_static_steal_stolen, \ 265345153Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ 266345153Sdim macro (FOR_static_steal_chunks, \ 267345153Sdim stats_flags_e::noUnits | stats_flags_e::noTotal, arg) 268345153Sdim#else 269345153Sdim#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) 270345153Sdim#endif 271345153Sdim// clang-format on 272345153Sdim 273345153Sdim/*! 274345153Sdim * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. 275345153Sdim * 276345153Sdim * @param macro a user defined macro that takes three arguments - 277345153Sdim * macro(TIMER_NAME, flags, arg) 278345153Sdim * @param arg a user defined argument to send to the user defined macro 279345153Sdim * 280345153Sdim * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE 281345153Sdim * BAD THINGS WILL HAPPEN! 282345153Sdim * 283345153Sdim * \details Explicit timers are ones where we need to allocate a timer itself 284345153Sdim * (as well as the accumulated timing statistics). We allocate these on a 285345153Sdim * per-thread basis, and explicitly start and stop them. Block timers just 286345153Sdim * allocate the timer itself on the stack, and use the destructor to notice 287345153Sdim * block exit; they don't need to be defined here. The name here should be the 288345153Sdim * same as that of a timer above. 289345153Sdim * 290345153Sdim * @ingroup STATS_GATHERING 291345153Sdim*/ 292345153Sdim#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) 293345153Sdim 294345153Sdim#define ENUMERATE(name, ignore, prefix) prefix##name, 295345153Sdimenum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST }; 296345153Sdim 297345153Sdimenum explicit_timer_e { 298345153Sdim KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST 299345153Sdim}; 300345153Sdim 301345153Sdimenum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST }; 302345153Sdim#undef ENUMERATE 303345153Sdim 304345153Sdim/* 305345153Sdim * A logarithmic histogram. It accumulates the number of values in each power of 306345153Sdim * ten bin. So 1<=x<10, 10<=x<100, ... 307345153Sdim * Mostly useful where we have some big outliers and want to see information 308345153Sdim * about them. 309345153Sdim */ 310345153Sdimclass logHistogram { 311345153Sdim enum { 312345153Sdim numBins = 31, /* Number of powers of 10. If this changes you need to change 313345153Sdim * the initializer for binMax */ 314345153Sdim 315345153Sdim /* 316345153Sdim * If you want to use this to analyse values that may be less than 1, (for 317345153Sdim * instance times in s), then the logOffset gives you negative powers. 318345153Sdim * In our case here, we're just looking at times in ticks, or counts, so we 319345153Sdim * can never see values with magnitude < 1 (other than zero), so we can set 320345153Sdim * it to 0. As above change the initializer if you change this. 321345153Sdim */ 322345153Sdim logOffset = 0 323345153Sdim }; 324345153Sdim uint32_t KMP_ALIGN_CACHE zeroCount; 325345153Sdim struct { 326345153Sdim uint32_t count; 327345153Sdim double total; 328345153Sdim } bins[numBins]; 329345153Sdim 330345153Sdim static double binMax[numBins]; 331345153Sdim 332345153Sdim#ifdef KMP_DEBUG 333345153Sdim uint64_t _total; 334345153Sdim 335345153Sdim void check() const { 336345153Sdim uint64_t t = zeroCount; 337345153Sdim for (int i = 0; i < numBins; i++) 338345153Sdim t += bins[i].count; 339345153Sdim KMP_DEBUG_ASSERT(t == _total); 340345153Sdim } 341345153Sdim#else 342345153Sdim void check() const {} 343345153Sdim#endif 344345153Sdim 345345153Sdimpublic: 346345153Sdim logHistogram() { reset(); } 347345153Sdim 348345153Sdim logHistogram(logHistogram const &o) { 349345153Sdim for (int i = 0; i < numBins; i++) 350345153Sdim bins[i] = o.bins[i]; 351345153Sdim#ifdef KMP_DEBUG 352345153Sdim _total = o._total; 353345153Sdim#endif 354345153Sdim } 355345153Sdim 356345153Sdim void reset() { 357345153Sdim zeroCount = 0; 358345153Sdim for (int i = 0; i < numBins; i++) { 359345153Sdim bins[i].count = 0; 360345153Sdim bins[i].total = 0; 361345153Sdim } 362345153Sdim 363345153Sdim#ifdef KMP_DEBUG 364345153Sdim _total = 0; 365345153Sdim#endif 366345153Sdim } 367345153Sdim uint32_t count(int b) const { return bins[b + logOffset].count; } 368345153Sdim double total(int b) const { return bins[b + logOffset].total; } 369345153Sdim static uint32_t findBin(double sample); 370345153Sdim 371345153Sdim logHistogram &operator+=(logHistogram const &o) { 372345153Sdim zeroCount += o.zeroCount; 373345153Sdim for (int i = 0; i < numBins; i++) { 374345153Sdim bins[i].count += o.bins[i].count; 375345153Sdim bins[i].total += o.bins[i].total; 376345153Sdim } 377345153Sdim#ifdef KMP_DEBUG 378345153Sdim _total += o._total; 379345153Sdim check(); 380345153Sdim#endif 381345153Sdim 382345153Sdim return *this; 383345153Sdim } 384345153Sdim 385345153Sdim void addSample(double sample); 386345153Sdim int minBin() const; 387345153Sdim int maxBin() const; 388345153Sdim 389345153Sdim std::string format(char) const; 390345153Sdim}; 391345153Sdim 392345153Sdimclass statistic { 393345153Sdim double KMP_ALIGN_CACHE minVal; 394345153Sdim double maxVal; 395345153Sdim double meanVal; 396345153Sdim double m2; 397345153Sdim uint64_t sampleCount; 398345153Sdim double offset; 399345153Sdim bool collectingHist; 400345153Sdim logHistogram hist; 401345153Sdim 402345153Sdimpublic: 403345153Sdim statistic(bool doHist = bool(KMP_STATS_HIST)) { 404345153Sdim reset(); 405345153Sdim collectingHist = doHist; 406345153Sdim } 407345153Sdim statistic(statistic const &o) 408345153Sdim : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), 409345153Sdim sampleCount(o.sampleCount), offset(o.offset), 410345153Sdim collectingHist(o.collectingHist), hist(o.hist) {} 411345153Sdim statistic(double minv, double maxv, double meanv, uint64_t sc, double sd) 412345153Sdim : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc), 413345153Sdim sampleCount(sc), offset(0.0), collectingHist(false) {} 414345153Sdim bool haveHist() const { return collectingHist; } 415345153Sdim double getMin() const { return minVal; } 416345153Sdim double getMean() const { return meanVal; } 417345153Sdim double getMax() const { return maxVal; } 418345153Sdim uint64_t getCount() const { return sampleCount; } 419345153Sdim double getSD() const { return sqrt(m2 / sampleCount); } 420345153Sdim double getTotal() const { return sampleCount * meanVal; } 421345153Sdim logHistogram const *getHist() const { return &hist; } 422345153Sdim void setOffset(double d) { offset = d; } 423345153Sdim 424345153Sdim void reset() { 425345153Sdim minVal = std::numeric_limits<double>::max(); 426345153Sdim maxVal = -minVal; 427345153Sdim meanVal = 0.0; 428345153Sdim m2 = 0.0; 429345153Sdim sampleCount = 0; 430345153Sdim offset = 0.0; 431345153Sdim hist.reset(); 432345153Sdim } 433345153Sdim void addSample(double sample); 434345153Sdim void scale(double factor); 435345153Sdim void scaleDown(double f) { scale(1. / f); } 436345153Sdim void forceCount(uint64_t count) { sampleCount = count; } 437345153Sdim statistic &operator+=(statistic const &other); 438345153Sdim 439345153Sdim std::string format(char unit, bool total = false) const; 440345153Sdim std::string formatHist(char unit) const { return hist.format(unit); } 441345153Sdim}; 442345153Sdim 443345153Sdimstruct statInfo { 444345153Sdim const char *name; 445345153Sdim uint32_t flags; 446345153Sdim}; 447345153Sdim 448345153Sdimclass timeStat : public statistic { 449345153Sdim static statInfo timerInfo[]; 450345153Sdim 451345153Sdimpublic: 452345153Sdim timeStat() : statistic() {} 453345153Sdim static const char *name(timer_e e) { return timerInfo[e].name; } 454345153Sdim static bool noTotal(timer_e e) { 455345153Sdim return timerInfo[e].flags & stats_flags_e::noTotal; 456345153Sdim } 457345153Sdim static bool masterOnly(timer_e e) { 458345153Sdim return timerInfo[e].flags & stats_flags_e::onlyInMaster; 459345153Sdim } 460345153Sdim static bool workerOnly(timer_e e) { 461345153Sdim return timerInfo[e].flags & stats_flags_e::notInMaster; 462345153Sdim } 463345153Sdim static bool noUnits(timer_e e) { 464345153Sdim return timerInfo[e].flags & stats_flags_e::noUnits; 465345153Sdim } 466345153Sdim static bool logEvent(timer_e e) { 467345153Sdim return timerInfo[e].flags & stats_flags_e::logEvent; 468345153Sdim } 469345153Sdim static void clearEventFlags() { 470345153Sdim for (int i = 0; i < TIMER_LAST; i++) { 471345153Sdim timerInfo[i].flags &= (~(stats_flags_e::logEvent)); 472345153Sdim } 473345153Sdim } 474345153Sdim}; 475345153Sdim 476345153Sdim// Where we need explicitly to start and end the timer, this version can be used 477345153Sdim// Since these timers normally aren't nicely scoped, so don't have a good place 478345153Sdim// to live on the stack of the thread, they're more work to use. 479345153Sdimclass explicitTimer { 480345153Sdim timeStat *stat; 481345153Sdim timer_e timerEnumValue; 482345153Sdim tsc_tick_count startTime; 483345153Sdim tsc_tick_count pauseStartTime; 484345153Sdim tsc_tick_count::tsc_interval_t totalPauseTime; 485345153Sdim 486345153Sdimpublic: 487345153Sdim explicitTimer(timeStat *s, timer_e te) 488345153Sdim : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0), 489345153Sdim totalPauseTime() {} 490345153Sdim 491345153Sdim // void setStat(timeStat *s) { stat = s; } 492345153Sdim void start(tsc_tick_count tick); 493345153Sdim void pause(tsc_tick_count tick) { pauseStartTime = tick; } 494345153Sdim void resume(tsc_tick_count tick) { 495345153Sdim totalPauseTime += (tick - pauseStartTime); 496345153Sdim } 497345153Sdim void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr = nullptr); 498345153Sdim void reset() { 499345153Sdim startTime = 0; 500345153Sdim pauseStartTime = 0; 501345153Sdim totalPauseTime = 0; 502345153Sdim } 503345153Sdim timer_e get_type() const { return timerEnumValue; } 504345153Sdim}; 505345153Sdim 506345153Sdim// Where you need to partition a threads clock ticks into separate states 507345153Sdim// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and 508345153Sdim// DOING_NOTHING would render these conditions: 509345153Sdim// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive 510345153Sdim// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice 511345153Sdim// versa 512345153Sdimclass partitionedTimers { 513345153Sdimprivate: 514345153Sdim std::vector<explicitTimer> timer_stack; 515345153Sdim 516345153Sdimpublic: 517345153Sdim partitionedTimers(); 518345153Sdim void init(explicitTimer timer); 519345153Sdim void exchange(explicitTimer timer); 520345153Sdim void push(explicitTimer timer); 521345153Sdim void pop(); 522345153Sdim void windup(); 523345153Sdim}; 524345153Sdim 525345153Sdim// Special wrapper around the partioned timers to aid timing code blocks 526345153Sdim// It avoids the need to have an explicit end, leaving the scope suffices. 527345153Sdimclass blockPartitionedTimer { 528345153Sdim partitionedTimers *part_timers; 529345153Sdim 530345153Sdimpublic: 531345153Sdim blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer) 532345153Sdim : part_timers(pt) { 533345153Sdim part_timers->push(timer); 534345153Sdim } 535345153Sdim ~blockPartitionedTimer() { part_timers->pop(); } 536345153Sdim}; 537345153Sdim 538345153Sdim// Special wrapper around the thread state to aid in keeping state in code 539345153Sdim// blocks It avoids the need to have an explicit end, leaving the scope 540345153Sdim// suffices. 541345153Sdimclass blockThreadState { 542345153Sdim stats_state_e *state_pointer; 543345153Sdim stats_state_e old_state; 544345153Sdim 545345153Sdimpublic: 546345153Sdim blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state) 547345153Sdim : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { 548345153Sdim *state_pointer = new_state; 549345153Sdim } 550345153Sdim ~blockThreadState() { *state_pointer = old_state; } 551345153Sdim}; 552345153Sdim 553345153Sdim// If all you want is a count, then you can use this... 554345153Sdim// The individual per-thread counts will be aggregated into a statistic at 555345153Sdim// program exit. 556345153Sdimclass counter { 557345153Sdim uint64_t value; 558345153Sdim static const statInfo counterInfo[]; 559345153Sdim 560345153Sdimpublic: 561345153Sdim counter() : value(0) {} 562345153Sdim void increment() { value++; } 563345153Sdim uint64_t getValue() const { return value; } 564345153Sdim void reset() { value = 0; } 565345153Sdim static const char *name(counter_e e) { return counterInfo[e].name; } 566345153Sdim static bool masterOnly(counter_e e) { 567345153Sdim return counterInfo[e].flags & stats_flags_e::onlyInMaster; 568345153Sdim } 569345153Sdim}; 570345153Sdim 571345153Sdim/* **************************************************************** 572345153Sdim Class to implement an event 573345153Sdim 574345153Sdim There are four components to an event: start time, stop time 575345153Sdim nest_level, and timer_name. 576345153Sdim The start and stop time should be obvious (recorded in clock ticks). 577345153Sdim The nest_level relates to the bar width in the timeline graph. 578345153Sdim The timer_name is used to determine which timer event triggered this event. 579345153Sdim 580345153Sdim the interface to this class is through four read-only operations: 581345153Sdim 1) getStart() -- returns the start time as 64 bit integer 582345153Sdim 2) getStop() -- returns the stop time as 64 bit integer 583345153Sdim 3) getNestLevel() -- returns the nest level of the event 584345153Sdim 4) getTimerName() -- returns the timer name that triggered event 585345153Sdim 586345153Sdim *MORE ON NEST_LEVEL* 587345153Sdim The nest level is used in the bar graph that represents the timeline. 588345153Sdim Its main purpose is for showing how events are nested inside eachother. 589345153Sdim For example, say events, A, B, and C are recorded. If the timeline 590345153Sdim looks like this: 591345153Sdim 592345153SdimBegin -------------------------------------------------------------> Time 593345153Sdim | | | | | | 594345153Sdim A B C C B A 595345153Sdim start start start end end end 596345153Sdim 597345153Sdim Then A, B, C will have a nest level of 1, 2, 3 respectively. 598345153Sdim These values are then used to calculate the barwidth so you can 599345153Sdim see that inside A, B has occurred, and inside B, C has occurred. 600345153Sdim Currently, this is shown with A's bar width being larger than B's 601345153Sdim bar width, and B's bar width being larger than C's bar width. 602345153Sdim 603345153Sdim**************************************************************** */ 604345153Sdimclass kmp_stats_event { 605345153Sdim uint64_t start; 606345153Sdim uint64_t stop; 607345153Sdim int nest_level; 608345153Sdim timer_e timer_name; 609345153Sdim 610345153Sdimpublic: 611345153Sdim kmp_stats_event() 612345153Sdim : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} 613345153Sdim kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) 614345153Sdim : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} 615345153Sdim inline uint64_t getStart() const { return start; } 616345153Sdim inline uint64_t getStop() const { return stop; } 617345153Sdim inline int getNestLevel() const { return nest_level; } 618345153Sdim inline timer_e getTimerName() const { return timer_name; } 619345153Sdim}; 620345153Sdim 621345153Sdim/* **************************************************************** 622345153Sdim Class to implement a dynamically expandable array of events 623345153Sdim 624345153Sdim --------------------------------------------------------- 625345153Sdim | event 1 | event 2 | event 3 | event 4 | ... | event N | 626345153Sdim --------------------------------------------------------- 627345153Sdim 628345153Sdim An event is pushed onto the back of this array at every 629345153Sdim explicitTimer->stop() call. The event records the thread #, 630345153Sdim start time, stop time, and nest level related to the bar width. 631345153Sdim 632345153Sdim The event vector starts at size INIT_SIZE and grows (doubles in size) 633345153Sdim if needed. An implication of this behavior is that log(N) 634345153Sdim reallocations are needed (where N is number of events). If you want 635345153Sdim to avoid reallocations, then set INIT_SIZE to a large value. 636345153Sdim 637345153Sdim the interface to this class is through six operations: 638345153Sdim 1) reset() -- sets the internal_size back to 0 but does not deallocate any 639345153Sdim memory 640345153Sdim 2) size() -- returns the number of valid elements in the vector 641345153Sdim 3) push_back(start, stop, nest, timer_name) -- pushes an event onto 642345153Sdim the back of the array 643345153Sdim 4) deallocate() -- frees all memory associated with the vector 644345153Sdim 5) sort() -- sorts the vector by start time 645345153Sdim 6) operator[index] or at(index) -- returns event reference at that index 646345153Sdim**************************************************************** */ 647345153Sdimclass kmp_stats_event_vector { 648345153Sdim kmp_stats_event *events; 649345153Sdim int internal_size; 650345153Sdim int allocated_size; 651345153Sdim static const int INIT_SIZE = 1024; 652345153Sdim 653345153Sdimpublic: 654345153Sdim kmp_stats_event_vector() { 655345153Sdim events = 656345153Sdim (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE); 657345153Sdim internal_size = 0; 658345153Sdim allocated_size = INIT_SIZE; 659345153Sdim } 660345153Sdim ~kmp_stats_event_vector() {} 661345153Sdim inline void reset() { internal_size = 0; } 662345153Sdim inline int size() const { return internal_size; } 663345153Sdim void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, 664345153Sdim timer_e name) { 665345153Sdim int i; 666345153Sdim if (internal_size == allocated_size) { 667345153Sdim kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate( 668345153Sdim sizeof(kmp_stats_event) * allocated_size * 2); 669345153Sdim for (i = 0; i < internal_size; i++) 670345153Sdim tmp[i] = events[i]; 671345153Sdim __kmp_free(events); 672345153Sdim events = tmp; 673345153Sdim allocated_size *= 2; 674345153Sdim } 675345153Sdim events[internal_size] = 676345153Sdim kmp_stats_event(start_time, stop_time, nest_level, name); 677345153Sdim internal_size++; 678345153Sdim return; 679345153Sdim } 680345153Sdim void deallocate(); 681345153Sdim void sort(); 682345153Sdim const kmp_stats_event &operator[](int index) const { return events[index]; } 683345153Sdim kmp_stats_event &operator[](int index) { return events[index]; } 684345153Sdim const kmp_stats_event &at(int index) const { return events[index]; } 685345153Sdim kmp_stats_event &at(int index) { return events[index]; } 686345153Sdim}; 687345153Sdim 688345153Sdim/* **************************************************************** 689345153Sdim Class to implement a doubly-linked, circular, statistics list 690345153Sdim 691345153Sdim |---| ---> |---| ---> |---| ---> |---| ---> ... next 692345153Sdim | | | | | | | | 693345153Sdim |---| <--- |---| <--- |---| <--- |---| <--- ... prev 694345153Sdim Sentinel first second third 695345153Sdim Node node node node 696345153Sdim 697345153Sdim The Sentinel Node is the user handle on the list. 698345153Sdim The first node corresponds to thread 0's statistics. 699345153Sdim The second node corresponds to thread 1's statistics and so on... 700345153Sdim 701345153Sdim Each node has a _timers, _counters, and _explicitTimers array to hold that 702345153Sdim thread's statistics. The _explicitTimers point to the correct _timer and 703345153Sdim update its statistics at every stop() call. The explicitTimers' pointers are 704345153Sdim set up in the constructor. Each node also has an event vector to hold that 705345153Sdim thread's timing events. The event vector expands as necessary and records 706345153Sdim the start-stop times for each timer. 707345153Sdim 708345153Sdim The nestLevel variable is for plotting events and is related 709345153Sdim to the bar width in the timeline graph. 710345153Sdim 711345153Sdim Every thread will have a thread local pointer to its node in 712345153Sdim the list. The sentinel node is used by the master thread to 713345153Sdim store "dummy" statistics before __kmp_create_worker() is called. 714345153Sdim**************************************************************** */ 715345153Sdimclass kmp_stats_list { 716345153Sdim int gtid; 717345153Sdim timeStat _timers[TIMER_LAST + 1]; 718345153Sdim counter _counters[COUNTER_LAST + 1]; 719345153Sdim explicitTimer thread_life_timer; 720345153Sdim partitionedTimers _partitionedTimers; 721345153Sdim int _nestLevel; // one per thread 722345153Sdim kmp_stats_event_vector _event_vector; 723345153Sdim kmp_stats_list *next; 724345153Sdim kmp_stats_list *prev; 725345153Sdim stats_state_e state; 726345153Sdim int thread_is_idle_flag; 727345153Sdim 728345153Sdimpublic: 729345153Sdim kmp_stats_list() 730345153Sdim : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life], 731345153Sdim TIMER_OMP_worker_thread_life), 732345153Sdim _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE), 733345153Sdim thread_is_idle_flag(0) {} 734345153Sdim ~kmp_stats_list() {} 735345153Sdim inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; } 736345153Sdim inline counter *getCounter(counter_e idx) { return &_counters[idx]; } 737345153Sdim inline partitionedTimers *getPartitionedTimers() { 738345153Sdim return &_partitionedTimers; 739345153Sdim } 740345153Sdim inline timeStat *getTimers() { return _timers; } 741345153Sdim inline counter *getCounters() { return _counters; } 742345153Sdim inline kmp_stats_event_vector &getEventVector() { return _event_vector; } 743345153Sdim inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); } 744345153Sdim inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(), this); } 745345153Sdim inline void resetEventVector() { _event_vector.reset(); } 746345153Sdim inline void incrementNestValue() { _nestLevel++; } 747345153Sdim inline int getNestValue() { return _nestLevel; } 748345153Sdim inline void decrementNestValue() { _nestLevel--; } 749345153Sdim inline int getGtid() const { return gtid; } 750345153Sdim inline void setGtid(int newgtid) { gtid = newgtid; } 751345153Sdim inline void setState(stats_state_e newstate) { state = newstate; } 752345153Sdim inline stats_state_e getState() const { return state; } 753345153Sdim inline stats_state_e *getStatePointer() { return &state; } 754345153Sdim inline bool isIdle() { return thread_is_idle_flag == 1; } 755345153Sdim inline void setIdleFlag() { thread_is_idle_flag = 1; } 756345153Sdim inline void resetIdleFlag() { thread_is_idle_flag = 0; } 757345153Sdim kmp_stats_list *push_back(int gtid); // returns newly created list node 758345153Sdim inline void push_event(uint64_t start_time, uint64_t stop_time, 759345153Sdim int nest_level, timer_e name) { 760345153Sdim _event_vector.push_back(start_time, stop_time, nest_level, name); 761345153Sdim } 762345153Sdim void deallocate(); 763345153Sdim class iterator; 764345153Sdim kmp_stats_list::iterator begin(); 765345153Sdim kmp_stats_list::iterator end(); 766345153Sdim int size(); 767345153Sdim class iterator { 768345153Sdim kmp_stats_list *ptr; 769345153Sdim friend kmp_stats_list::iterator kmp_stats_list::begin(); 770345153Sdim friend kmp_stats_list::iterator kmp_stats_list::end(); 771345153Sdim 772345153Sdim public: 773345153Sdim iterator(); 774345153Sdim ~iterator(); 775345153Sdim iterator operator++(); 776345153Sdim iterator operator++(int dummy); 777345153Sdim iterator operator--(); 778345153Sdim iterator operator--(int dummy); 779345153Sdim bool operator!=(const iterator &rhs); 780345153Sdim bool operator==(const iterator &rhs); 781345153Sdim kmp_stats_list *operator*() const; // dereference operator 782345153Sdim }; 783345153Sdim}; 784345153Sdim 785345153Sdim/* **************************************************************** 786345153Sdim Class to encapsulate all output functions and the environment variables 787345153Sdim 788345153Sdim This module holds filenames for various outputs (normal stats, events, plot 789345153Sdim file), as well as coloring information for the plot file. 790345153Sdim 791345153Sdim The filenames and flags variables are read from environment variables. 792345153Sdim These are read once by the constructor of the global variable 793345153Sdim __kmp_stats_output which calls init(). 794345153Sdim 795345153Sdim During this init() call, event flags for the timeStat::timerInfo[] global 796345153Sdim array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). 797345153Sdim 798345153Sdim The only interface function that is public is outputStats(heading). This 799345153Sdim function should print out everything it needs to, either to files or stderr, 800345153Sdim depending on the environment variables described below 801345153Sdim 802345153Sdim ENVIRONMENT VARIABLES: 803345153Sdim KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this 804345153Sdim file, otherwise, print to stderr 805345153Sdim KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to 806345153Sdim either KMP_STATS_FILE or stderr 807345153Sdim KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, 808345153Sdim otherwise, the plot file is sent to "events.plt" 809345153Sdim KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log 810345153Sdim events 811345153Sdim KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, 812345153Sdim otherwise, output is sent to "events.dat" 813345153Sdim**************************************************************** */ 814345153Sdimclass kmp_stats_output_module { 815345153Sdim 816345153Sdimpublic: 817345153Sdim struct rgb_color { 818345153Sdim float r; 819345153Sdim float g; 820345153Sdim float b; 821345153Sdim }; 822345153Sdim 823345153Sdimprivate: 824345153Sdim std::string outputFileName; 825345153Sdim static const char *eventsFileName; 826345153Sdim static const char *plotFileName; 827345153Sdim static int printPerThreadFlag; 828345153Sdim static int printPerThreadEventsFlag; 829345153Sdim static const rgb_color globalColorArray[]; 830345153Sdim static rgb_color timerColorInfo[]; 831345153Sdim 832345153Sdim void init(); 833345153Sdim static void setupEventColors(); 834345153Sdim static void printPloticusFile(); 835345153Sdim static void printHeaderInfo(FILE *statsOut); 836345153Sdim static void printTimerStats(FILE *statsOut, statistic const *theStats, 837345153Sdim statistic const *totalStats); 838345153Sdim static void printCounterStats(FILE *statsOut, statistic const *theStats); 839345153Sdim static void printCounters(FILE *statsOut, counter const *theCounters); 840345153Sdim static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents, 841345153Sdim int gtid); 842345153Sdim static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } 843345153Sdim static void windupExplicitTimers(); 844345153Sdim bool eventPrintingEnabled() const { return printPerThreadEventsFlag; } 845345153Sdim 846345153Sdimpublic: 847345153Sdim kmp_stats_output_module() { init(); } 848345153Sdim void outputStats(const char *heading); 849345153Sdim}; 850345153Sdim 851345153Sdim#ifdef __cplusplus 852345153Sdimextern "C" { 853345153Sdim#endif 854345153Sdimvoid __kmp_stats_init(); 855345153Sdimvoid __kmp_stats_fini(); 856345153Sdimvoid __kmp_reset_stats(); 857345153Sdimvoid __kmp_output_stats(const char *); 858345153Sdimvoid __kmp_accumulate_stats_at_exit(void); 859345153Sdim// thread local pointer to stats node within list 860345153Sdimextern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr; 861345153Sdim// head to stats list. 862345153Sdimextern kmp_stats_list *__kmp_stats_list; 863345153Sdim// lock for __kmp_stats_list 864345153Sdimextern kmp_tas_lock_t __kmp_stats_lock; 865345153Sdim// reference start time 866345153Sdimextern tsc_tick_count __kmp_stats_start_time; 867345153Sdim// interface to output 868345153Sdimextern kmp_stats_output_module __kmp_stats_output; 869345153Sdim 870345153Sdim#ifdef __cplusplus 871345153Sdim} 872345153Sdim#endif 873345153Sdim 874345153Sdim// Simple, standard interfaces that drop out completely if stats aren't enabled 875345153Sdim 876345153Sdim/*! 877345153Sdim * \brief Adds value to specified timer (name). 878345153Sdim * 879345153Sdim * @param name timer name as specified under the KMP_FOREACH_TIMER() macro 880345153Sdim * @param value double precision sample value to add to statistics for the timer 881345153Sdim * 882345153Sdim * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to 883345153Sdim * a timer statistics. 884345153Sdim * 885345153Sdim * @ingroup STATS_GATHERING 886345153Sdim*/ 887345153Sdim#define KMP_COUNT_VALUE(name, value) \ 888345153Sdim __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) 889345153Sdim 890345153Sdim/*! 891345153Sdim * \brief Increments specified counter (name). 892345153Sdim * 893345153Sdim * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro 894345153Sdim * 895345153Sdim * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics 896345153Sdim * counter for the executing thread. 897345153Sdim * 898345153Sdim * @ingroup STATS_GATHERING 899345153Sdim*/ 900345153Sdim#define KMP_COUNT_BLOCK(name) \ 901345153Sdim __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() 902345153Sdim 903345153Sdim/*! 904345153Sdim * \brief Outputs the current thread statistics and reset them. 905345153Sdim * 906345153Sdim * @param heading_string heading put above the final stats output 907345153Sdim * 908345153Sdim * \details Explicitly stops all timers and outputs all stats. Environment 909345153Sdim * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a 910345153Sdim * filename instead of stderr. Environment variable, 911345153Sdim * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific 912345153Sdim * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be 913345153Sdim * defined with any value, which will print out thread specific stats, or it can 914345153Sdim * be undefined (not specified in the environment) and thread specific stats 915345153Sdim * won't be printed. It should be noted that all statistics are reset when this 916345153Sdim * macro is called. 917345153Sdim * 918345153Sdim * @ingroup STATS_GATHERING 919345153Sdim*/ 920345153Sdim#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) 921345153Sdim 922345153Sdim/*! 923345153Sdim * \brief Initializes the paritioned timers to begin with name. 924345153Sdim * 925345153Sdim * @param name timer which you want this thread to begin with 926345153Sdim * 927345153Sdim * @ingroup STATS_GATHERING 928345153Sdim*/ 929345153Sdim#define KMP_INIT_PARTITIONED_TIMERS(name) \ 930345153Sdim __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \ 931345153Sdim __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 932345153Sdim 933345153Sdim#define KMP_TIME_PARTITIONED_BLOCK(name) \ 934345153Sdim blockPartitionedTimer __PBLOCKTIME__( \ 935345153Sdim __kmp_stats_thread_ptr->getPartitionedTimers(), \ 936345153Sdim explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ 937345153Sdim TIMER_##name)) 938345153Sdim 939345153Sdim#define KMP_PUSH_PARTITIONED_TIMER(name) \ 940345153Sdim __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \ 941345153Sdim __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 942345153Sdim 943345153Sdim#define KMP_POP_PARTITIONED_TIMER() \ 944345153Sdim __kmp_stats_thread_ptr->getPartitionedTimers()->pop() 945345153Sdim 946345153Sdim#define KMP_EXCHANGE_PARTITIONED_TIMER(name) \ 947345153Sdim __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \ 948345153Sdim __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)) 949345153Sdim 950345153Sdim#define KMP_SET_THREAD_STATE(state_name) \ 951345153Sdim __kmp_stats_thread_ptr->setState(state_name) 952345153Sdim 953345153Sdim#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() 954345153Sdim 955345153Sdim#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ 956345153Sdim blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ 957345153Sdim state_name) 958345153Sdim 959345153Sdim/*! 960345153Sdim * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) 961345153Sdim * 962345153Sdim * \details Reset all stats for all threads. 963345153Sdim * 964345153Sdim * @ingroup STATS_GATHERING 965345153Sdim*/ 966345153Sdim#define KMP_RESET_STATS() __kmp_reset_stats() 967345153Sdim 968345153Sdim#if (KMP_DEVELOPER_STATS) 969345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) 970345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) 971345153Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) 972353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) KMP_PUSH_PARTITIONED_TIMER(n) 973353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) KMP_POP_PARTITIONED_TIMER(n) 974353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) \ 975353358Sdim KMP_EXCHANGE_PARTITIONED_TIMER(n) 976345153Sdim#else 977345153Sdim// Null definitions 978345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 979345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 980345153Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 981353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 982353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 983353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 984345153Sdim#endif 985345153Sdim 986345153Sdim#else // KMP_STATS_ENABLED 987345153Sdim 988345153Sdim// Null definitions 989345153Sdim#define KMP_COUNT_VALUE(n, v) ((void)0) 990345153Sdim#define KMP_COUNT_BLOCK(n) ((void)0) 991345153Sdim 992345153Sdim#define KMP_OUTPUT_STATS(heading_string) ((void)0) 993345153Sdim#define KMP_RESET_STATS() ((void)0) 994345153Sdim 995345153Sdim#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) 996345153Sdim#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) 997353358Sdim#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) 998353358Sdim#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 999353358Sdim#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 1000353358Sdim#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0) 1001345153Sdim#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) 1002345153Sdim#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) 1003345153Sdim#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) 1004345153Sdim#define KMP_POP_PARTITIONED_TIMER() ((void)0) 1005345153Sdim#define KMP_SET_THREAD_STATE(state_name) ((void)0) 1006345153Sdim#define KMP_GET_THREAD_STATE() ((void)0) 1007345153Sdim#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) 1008345153Sdim#endif // KMP_STATS_ENABLED 1009345153Sdim 1010345153Sdim#endif // KMP_STATS_H 1011