kmp_wait_release.h revision 360784
1/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
15
16#include "kmp.h"
17#include "kmp_itt.h"
18#include "kmp_stats.h"
19#if OMPT_SUPPORT
20#include "ompt-specific.h"
21#endif
22
23/*!
24@defgroup WAIT_RELEASE Wait/Release operations
25
26The definitions and functions here implement the lowest level thread
27synchronizations of suspending a thread and awaking it. They are used to build
28higher level operations such as barriers and fork/join.
29*/
30
31/*!
32@ingroup WAIT_RELEASE
33@{
34*/
35
36/*!
37 * The flag_type describes the storage used for the flag.
38 */
39enum flag_type {
40  flag32, /**< 32 bit flags */
41  flag64, /**< 64 bit flags */
42  flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
43};
44
45/*!
46 * Base class for wait/release volatile flag
47 */
48template <typename P> class kmp_flag_native {
49  volatile P *loc;
50  flag_type t;
51
52public:
53  typedef P flag_t;
54  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55  volatile P *get() { return loc; }
56  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61};
62
63/*!
64 * Base class for wait/release atomic flag
65 */
66template <typename P> class kmp_flag {
67  std::atomic<P>
68      *loc; /**< Pointer to the flag storage that is modified by another thread
69             */
70  flag_type t; /**< "Type" of the flag in loc */
71public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
74  /*!
75   * @result the pointer to the actual flag
76   */
77  std::atomic<P> *get() { return loc; }
78  /*!
79   * @result void* pointer to the actual flag
80   */
81  void *get_void_p() { return RCAST(void *, loc); }
82  /*!
83   * @param new_loc in   set loc to point at new_loc
84   */
85  void set(std::atomic<P> *new_loc) { loc = new_loc; }
86  /*!
87   * @result the flag_type
88   */
89  flag_type get_type() { return t; }
90  /*!
91   * @result flag value
92   */
93  P load() { return loc->load(std::memory_order_acquire); }
94  /*!
95   * @param val the new flag value to be stored
96   */
97  void store(P val) { loc->store(val, std::memory_order_release); }
98  // Derived classes must provide the following:
99  /*
100  kmp_info_t * get_waiter(kmp_uint32 i);
101  kmp_uint32 get_num_waiters();
102  bool done_check();
103  bool done_check_val(P old_loc);
104  bool notdone_check();
105  P internal_release();
106  void suspend(int th_gtid);
107  void resume(int th_gtid);
108  P set_sleeping();
109  P unset_sleeping();
110  bool is_sleeping();
111  bool is_any_sleeping();
112  bool is_sleeping_val(P old_loc);
113  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114                    int *thread_finished
115                    USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116                    is_constrained);
117  */
118};
119
120#if OMPT_SUPPORT
121OMPT_NOINLINE
122static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123                                     ompt_state_t ompt_state,
124                                     ompt_data_t *tId) {
125  int ds_tid = this_thr->th.th_info.ds.ds_tid;
126  if (ompt_state == ompt_state_wait_barrier_implicit) {
127    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128#if OMPT_OPTIONAL
129    void *codeptr = NULL;
130    if (ompt_enabled.ompt_callback_sync_region_wait) {
131      ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133          codeptr);
134    }
135    if (ompt_enabled.ompt_callback_sync_region) {
136      ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137          ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138          codeptr);
139    }
140#endif
141    if (!KMP_MASTER_TID(ds_tid)) {
142      if (ompt_enabled.ompt_callback_implicit_task) {
143        int flags = this_thr->th.ompt_thread_info.parallel_flags;
144        flags = (flags & ompt_parallel_league) ? ompt_task_initial
145                                               : ompt_task_implicit;
146        ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
147            ompt_scope_end, NULL, tId, 0, ds_tid, flags);
148      }
149      // return to idle state
150      this_thr->th.ompt_thread_info.state = ompt_state_idle;
151    } else {
152      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
153    }
154  }
155}
156#endif
157
158/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
159   __kmp_wait_*  must make certain that another thread calls __kmp_release
160   to wake it back up to prevent deadlocks!
161
162   NOTE: We may not belong to a team at this point.  */
163template <class C, int final_spin, bool cancellable = false,
164          bool sleepable = true>
165static inline bool
166__kmp_wait_template(kmp_info_t *this_thr,
167                    C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
168#if USE_ITT_BUILD && USE_ITT_NOTIFY
169  volatile void *spin = flag->get();
170#endif
171  kmp_uint32 spins;
172  int th_gtid;
173  int tasks_completed = FALSE;
174  int oversubscribed;
175#if !KMP_USE_MONITOR
176  kmp_uint64 poll_count;
177  kmp_uint64 hibernate_goal;
178#else
179  kmp_uint32 hibernate;
180#endif
181
182  KMP_FSYNC_SPIN_INIT(spin, NULL);
183  if (flag->done_check()) {
184    KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
185    return false;
186  }
187  th_gtid = this_thr->th.th_info.ds.ds_gtid;
188  if (cancellable) {
189    kmp_team_t *team = this_thr->th.th_team;
190    if (team && team->t.t_cancel_request == cancel_parallel)
191      return true;
192  }
193#if KMP_OS_UNIX
194  if (final_spin)
195    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
196#endif
197  KA_TRACE(20,
198           ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
199#if KMP_STATS_ENABLED
200  stats_state_e thread_state = KMP_GET_THREAD_STATE();
201#endif
202
203/* OMPT Behavior:
204THIS function is called from
205  __kmp_barrier (2 times)  (implicit or explicit barrier in parallel regions)
206            these have join / fork behavior
207
208       In these cases, we don't change the state or trigger events in THIS
209function.
210       Events are triggered in the calling code (__kmp_barrier):
211
212                state := ompt_state_overhead
213            barrier-begin
214            barrier-wait-begin
215                state := ompt_state_wait_barrier
216          call join-barrier-implementation (finally arrive here)
217          {}
218          call fork-barrier-implementation (finally arrive here)
219          {}
220                state := ompt_state_overhead
221            barrier-wait-end
222            barrier-end
223                state := ompt_state_work_parallel
224
225
226  __kmp_fork_barrier  (after thread creation, before executing implicit task)
227          call fork-barrier-implementation (finally arrive here)
228          {} // worker arrive here with state = ompt_state_idle
229
230
231  __kmp_join_barrier  (implicit barrier at end of parallel region)
232                state := ompt_state_barrier_implicit
233            barrier-begin
234            barrier-wait-begin
235          call join-barrier-implementation (finally arrive here
236final_spin=FALSE)
237          {
238          }
239  __kmp_fork_barrier  (implicit barrier at end of parallel region)
240          call fork-barrier-implementation (finally arrive here final_spin=TRUE)
241
242       Worker after task-team is finished:
243            barrier-wait-end
244            barrier-end
245            implicit-task-end
246            idle-begin
247                state := ompt_state_idle
248
249       Before leaving, if state = ompt_state_idle
250            idle-end
251                state := ompt_state_overhead
252*/
253#if OMPT_SUPPORT
254  ompt_state_t ompt_entry_state;
255  ompt_data_t *tId;
256  if (ompt_enabled.enabled) {
257    ompt_entry_state = this_thr->th.ompt_thread_info.state;
258    if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
259        KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
260      ompt_lw_taskteam_t *team =
261          this_thr->th.th_team->t.ompt_serialized_team_info;
262      if (team) {
263        tId = &(team->ompt_task_info.task_data);
264      } else {
265        tId = OMPT_CUR_TASK_DATA(this_thr);
266      }
267    } else {
268      tId = &(this_thr->th.ompt_thread_info.task_data);
269    }
270    if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
271                       this_thr->th.th_task_team == NULL)) {
272      // implicit task is done. Either no taskqueue, or task-team finished
273      __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
274    }
275  }
276#endif
277
278  KMP_INIT_YIELD(spins); // Setup for waiting
279
280  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
281      __kmp_pause_status == kmp_soft_paused) {
282#if KMP_USE_MONITOR
283// The worker threads cannot rely on the team struct existing at this point.
284// Use the bt values cached in the thread struct instead.
285#ifdef KMP_ADJUST_BLOCKTIME
286    if (__kmp_pause_status == kmp_soft_paused ||
287        (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
288      // Force immediate suspend if not set by user and more threads than
289      // available procs
290      hibernate = 0;
291    else
292      hibernate = this_thr->th.th_team_bt_intervals;
293#else
294    hibernate = this_thr->th.th_team_bt_intervals;
295#endif /* KMP_ADJUST_BLOCKTIME */
296
297    /* If the blocktime is nonzero, we want to make sure that we spin wait for
298       the entirety of the specified #intervals, plus up to one interval more.
299       This increment make certain that this thread doesn't go to sleep too
300       soon.  */
301    if (hibernate != 0)
302      hibernate++;
303
304    // Add in the current time value.
305    hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
306    KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
307                  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
308                  hibernate - __kmp_global.g.g_time.dt.t_value));
309#else
310    if (__kmp_pause_status == kmp_soft_paused) {
311      // Force immediate suspend
312      hibernate_goal = KMP_NOW();
313    } else
314      hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
315    poll_count = 0;
316#endif // KMP_USE_MONITOR
317  }
318
319  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
320  KMP_MB();
321
322  // Main wait spin loop
323  while (flag->notdone_check()) {
324    kmp_task_team_t *task_team = NULL;
325    if (__kmp_tasking_mode != tskm_immediate_exec) {
326      task_team = this_thr->th.th_task_team;
327      /* If the thread's task team pointer is NULL, it means one of 3 things:
328         1) A newly-created thread is first being released by
329         __kmp_fork_barrier(), and its task team has not been set up yet.
330         2) All tasks have been executed to completion.
331         3) Tasking is off for this region.  This could be because we are in a
332         serialized region (perhaps the outer one), or else tasking was manually
333         disabled (KMP_TASKING=0).  */
334      if (task_team != NULL) {
335        if (TCR_SYNC_4(task_team->tt.tt_active)) {
336          if (KMP_TASKING_ENABLED(task_team))
337            flag->execute_tasks(
338                this_thr, th_gtid, final_spin,
339                &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
340          else
341            this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
342        } else {
343          KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
344#if OMPT_SUPPORT
345          // task-team is done now, other cases should be catched above
346          if (final_spin && ompt_enabled.enabled)
347            __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
348#endif
349          this_thr->th.th_task_team = NULL;
350          this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351        }
352      } else {
353        this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
354      } // if
355    } // if
356
357    KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
358    if (TCR_4(__kmp_global.g.g_done)) {
359      if (__kmp_global.g.g_abort)
360        __kmp_abort_thread();
361      break;
362    }
363
364    // If we are oversubscribed, or have waited a bit (and
365    // KMP_LIBRARY=throughput), then yield
366    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
367
368#if KMP_STATS_ENABLED
369    // Check if thread has been signalled to idle state
370    // This indicates that the logical "join-barrier" has finished
371    if (this_thr->th.th_stats->isIdle() &&
372        KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
373      KMP_SET_THREAD_STATE(IDLE);
374      KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
375    }
376#endif
377    // Check if the barrier surrounding this wait loop has been cancelled
378    if (cancellable) {
379      kmp_team_t *team = this_thr->th.th_team;
380      if (team && team->t.t_cancel_request == cancel_parallel)
381        break;
382    }
383
384    // Don't suspend if KMP_BLOCKTIME is set to "infinite"
385    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
386        __kmp_pause_status != kmp_soft_paused)
387      continue;
388
389    // Don't suspend if there is a likelihood of new tasks being spawned.
390    if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
391      continue;
392
393#if KMP_USE_MONITOR
394    // If we have waited a bit more, fall asleep
395    if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
396      continue;
397#else
398    if (KMP_BLOCKING(hibernate_goal, poll_count++))
399      continue;
400#endif
401    // Don't suspend if wait loop designated non-sleepable
402    // in template parameters
403    if (!sleepable)
404      continue;
405
406    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
407        __kmp_pause_status != kmp_soft_paused)
408      continue;
409
410    KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
411
412#if KMP_OS_UNIX
413    if (final_spin)
414      KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
415#endif
416    flag->suspend(th_gtid);
417#if KMP_OS_UNIX
418    if (final_spin)
419      KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
420#endif
421
422    if (TCR_4(__kmp_global.g.g_done)) {
423      if (__kmp_global.g.g_abort)
424        __kmp_abort_thread();
425      break;
426    } else if (__kmp_tasking_mode != tskm_immediate_exec &&
427               this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
428      this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
429    }
430    // TODO: If thread is done with work and times out, disband/free
431  }
432
433#if OMPT_SUPPORT
434  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
435  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
436#if OMPT_OPTIONAL
437    if (final_spin) {
438      __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
439      ompt_exit_state = this_thr->th.ompt_thread_info.state;
440    }
441#endif
442    if (ompt_exit_state == ompt_state_idle) {
443      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
444    }
445  }
446#endif
447#if KMP_STATS_ENABLED
448  // If we were put into idle state, pop that off the state stack
449  if (KMP_GET_THREAD_STATE() == IDLE) {
450    KMP_POP_PARTITIONED_TIMER();
451    KMP_SET_THREAD_STATE(thread_state);
452    this_thr->th.th_stats->resetIdleFlag();
453  }
454#endif
455
456#if KMP_OS_UNIX
457  if (final_spin)
458    KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
459#endif
460  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
461  if (cancellable) {
462    kmp_team_t *team = this_thr->th.th_team;
463    if (team && team->t.t_cancel_request == cancel_parallel) {
464      if (tasks_completed) {
465        // undo the previous decrement of unfinished_threads so that the
466        // thread can decrement at the join barrier with no problem
467        kmp_task_team_t *task_team = this_thr->th.th_task_team;
468        std::atomic<kmp_int32> *unfinished_threads =
469            &(task_team->tt.tt_unfinished_threads);
470        KMP_ATOMIC_INC(unfinished_threads);
471      }
472      return true;
473    }
474  }
475  return false;
476}
477
478/* Release any threads specified as waiting on the flag by releasing the flag
479   and resume the waiting thread if indicated by the sleep bit(s). A thread that
480   calls __kmp_wait_template must call this function to wake up the potentially
481   sleeping thread and prevent deadlocks!  */
482template <class C> static inline void __kmp_release_template(C *flag) {
483#ifdef KMP_DEBUG
484  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
485#endif
486  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
487  KMP_DEBUG_ASSERT(flag->get());
488  KMP_FSYNC_RELEASING(flag->get_void_p());
489
490  flag->internal_release();
491
492  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
493                 flag->load()));
494
495  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
496    // Only need to check sleep stuff if infinite block time not set.
497    // Are *any* threads waiting on flag sleeping?
498    if (flag->is_any_sleeping()) {
499      for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
500        // if sleeping waiter exists at i, sets current_waiter to i inside flag
501        kmp_info_t *waiter = flag->get_waiter(i);
502        if (waiter) {
503          int wait_gtid = waiter->th.th_info.ds.ds_gtid;
504          // Wake up thread if needed
505          KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
506                        "flag(%p) set\n",
507                        gtid, wait_gtid, flag->get()));
508          flag->resume(wait_gtid); // unsets flag's current_waiter when done
509        }
510      }
511    }
512  }
513}
514
515template <typename FlagType> struct flag_traits {};
516
517template <> struct flag_traits<kmp_uint32> {
518  typedef kmp_uint32 flag_t;
519  static const flag_type t = flag32;
520  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
521  static inline flag_t test_then_add4(volatile flag_t *f) {
522    return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
523  }
524  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
525    return KMP_TEST_THEN_OR32(f, v);
526  }
527  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
528    return KMP_TEST_THEN_AND32(f, v);
529  }
530};
531
532template <> struct flag_traits<kmp_uint64> {
533  typedef kmp_uint64 flag_t;
534  static const flag_type t = flag64;
535  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
536  static inline flag_t test_then_add4(volatile flag_t *f) {
537    return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
538  }
539  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
540    return KMP_TEST_THEN_OR64(f, v);
541  }
542  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
543    return KMP_TEST_THEN_AND64(f, v);
544  }
545};
546
547// Basic flag that does not use C11 Atomics
548template <typename FlagType>
549class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
550  typedef flag_traits<FlagType> traits_type;
551  FlagType checker; /**< Value to compare flag to to check if flag has been
552                       released. */
553  kmp_info_t
554      *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
555  kmp_uint32
556      num_waiting_threads; /**< Number of threads sleeping on this thread. */
557public:
558  kmp_basic_flag_native(volatile FlagType *p)
559      : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
560  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
561      : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
562    waiting_threads[0] = thr;
563  }
564  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
565      : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
566        num_waiting_threads(0) {}
567  /*!
568   * param i in   index into waiting_threads
569   * @result the thread that is waiting at index i
570   */
571  kmp_info_t *get_waiter(kmp_uint32 i) {
572    KMP_DEBUG_ASSERT(i < num_waiting_threads);
573    return waiting_threads[i];
574  }
575  /*!
576   * @result num_waiting_threads
577   */
578  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
579  /*!
580   * @param thr in   the thread which is now waiting
581   *
582   * Insert a waiting thread at index 0.
583   */
584  void set_waiter(kmp_info_t *thr) {
585    waiting_threads[0] = thr;
586    num_waiting_threads = 1;
587  }
588  /*!
589   * @result true if the flag object has been released.
590   */
591  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
592  /*!
593   * @param old_loc in   old value of flag
594   * @result true if the flag's old value indicates it was released.
595   */
596  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
597  /*!
598   * @result true if the flag object is not yet released.
599   * Used in __kmp_wait_template like:
600   * @code
601   * while (flag.notdone_check()) { pause(); }
602   * @endcode
603   */
604  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
605  /*!
606   * @result Actual flag value before release was applied.
607   * Trigger all waiting threads to run by modifying flag to release state.
608   */
609  void internal_release() {
610    (void)traits_type::test_then_add4((volatile FlagType *)this->get());
611  }
612  /*!
613   * @result Actual flag value before sleep bit(s) set.
614   * Notes that there is at least one thread sleeping on the flag by setting
615   * sleep bit(s).
616   */
617  FlagType set_sleeping() {
618    return traits_type::test_then_or((volatile FlagType *)this->get(),
619                                     KMP_BARRIER_SLEEP_STATE);
620  }
621  /*!
622   * @result Actual flag value before sleep bit(s) cleared.
623   * Notes that there are no longer threads sleeping on the flag by clearing
624   * sleep bit(s).
625   */
626  FlagType unset_sleeping() {
627    return traits_type::test_then_and((volatile FlagType *)this->get(),
628                                      ~KMP_BARRIER_SLEEP_STATE);
629  }
630  /*!
631   * @param old_loc in   old value of flag
632   * Test whether there are threads sleeping on the flag's old value in old_loc.
633   */
634  bool is_sleeping_val(FlagType old_loc) {
635    return old_loc & KMP_BARRIER_SLEEP_STATE;
636  }
637  /*!
638   * Test whether there are threads sleeping on the flag.
639   */
640  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
641  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
642  kmp_uint8 *get_stolen() { return NULL; }
643  enum barrier_type get_bt() { return bs_last_barrier; }
644};
645
646template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
647  typedef flag_traits<FlagType> traits_type;
648  FlagType checker; /**< Value to compare flag to to check if flag has been
649                       released. */
650  kmp_info_t
651      *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
652  kmp_uint32
653      num_waiting_threads; /**< Number of threads sleeping on this thread. */
654public:
655  kmp_basic_flag(std::atomic<FlagType> *p)
656      : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
657  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
658      : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
659    waiting_threads[0] = thr;
660  }
661  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
662      : kmp_flag<FlagType>(p, traits_type::t), checker(c),
663        num_waiting_threads(0) {}
664  /*!
665   * param i in   index into waiting_threads
666   * @result the thread that is waiting at index i
667   */
668  kmp_info_t *get_waiter(kmp_uint32 i) {
669    KMP_DEBUG_ASSERT(i < num_waiting_threads);
670    return waiting_threads[i];
671  }
672  /*!
673   * @result num_waiting_threads
674   */
675  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
676  /*!
677   * @param thr in   the thread which is now waiting
678   *
679   * Insert a waiting thread at index 0.
680   */
681  void set_waiter(kmp_info_t *thr) {
682    waiting_threads[0] = thr;
683    num_waiting_threads = 1;
684  }
685  /*!
686   * @result true if the flag object has been released.
687   */
688  bool done_check() { return this->load() == checker; }
689  /*!
690   * @param old_loc in   old value of flag
691   * @result true if the flag's old value indicates it was released.
692   */
693  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
694  /*!
695   * @result true if the flag object is not yet released.
696   * Used in __kmp_wait_template like:
697   * @code
698   * while (flag.notdone_check()) { pause(); }
699   * @endcode
700   */
701  bool notdone_check() { return this->load() != checker; }
702  /*!
703   * @result Actual flag value before release was applied.
704   * Trigger all waiting threads to run by modifying flag to release state.
705   */
706  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
707  /*!
708   * @result Actual flag value before sleep bit(s) set.
709   * Notes that there is at least one thread sleeping on the flag by setting
710   * sleep bit(s).
711   */
712  FlagType set_sleeping() {
713    return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
714  }
715  /*!
716   * @result Actual flag value before sleep bit(s) cleared.
717   * Notes that there are no longer threads sleeping on the flag by clearing
718   * sleep bit(s).
719   */
720  FlagType unset_sleeping() {
721    return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
722  }
723  /*!
724   * @param old_loc in   old value of flag
725   * Test whether there are threads sleeping on the flag's old value in old_loc.
726   */
727  bool is_sleeping_val(FlagType old_loc) {
728    return old_loc & KMP_BARRIER_SLEEP_STATE;
729  }
730  /*!
731   * Test whether there are threads sleeping on the flag.
732   */
733  bool is_sleeping() { return is_sleeping_val(this->load()); }
734  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
735  kmp_uint8 *get_stolen() { return NULL; }
736  enum barrier_type get_bt() { return bs_last_barrier; }
737};
738
739class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
740public:
741  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
742  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
743      : kmp_basic_flag<kmp_uint32>(p, thr) {}
744  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
745      : kmp_basic_flag<kmp_uint32>(p, c) {}
746  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
747  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
748  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
749                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
750                    kmp_int32 is_constrained) {
751    return __kmp_execute_tasks_32(
752        this_thr, gtid, this, final_spin,
753        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
754  }
755  void wait(kmp_info_t *this_thr,
756            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
757    if (final_spin)
758      __kmp_wait_template<kmp_flag_32, TRUE>(
759          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
760    else
761      __kmp_wait_template<kmp_flag_32, FALSE>(
762          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
763  }
764  void release() { __kmp_release_template(this); }
765  flag_type get_ptr_type() { return flag32; }
766};
767
768class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
769public:
770  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
771  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
772      : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
773  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
774      : kmp_basic_flag_native<kmp_uint64>(p, c) {}
775  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
776  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
777  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
778                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
779                    kmp_int32 is_constrained) {
780    return __kmp_execute_tasks_64(
781        this_thr, gtid, this, final_spin,
782        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
783  }
784  void wait(kmp_info_t *this_thr,
785            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
786    if (final_spin)
787      __kmp_wait_template<kmp_flag_64, TRUE>(
788          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
789    else
790      __kmp_wait_template<kmp_flag_64, FALSE>(
791          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
792  }
793  bool wait_cancellable_nosleep(kmp_info_t *this_thr,
794                                int final_spin
795                                    USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
796    bool retval = false;
797    if (final_spin)
798      retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
799          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800    else
801      retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
802          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
803    return retval;
804  }
805  void release() { __kmp_release_template(this); }
806  flag_type get_ptr_type() { return flag64; }
807};
808
809// Hierarchical 64-bit on-core barrier instantiation
810class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
811  kmp_uint64 checker;
812  kmp_info_t *waiting_threads[1];
813  kmp_uint32 num_waiting_threads;
814  kmp_uint32
815      offset; /**< Portion of flag that is of interest for an operation. */
816  bool flag_switch; /**< Indicates a switch in flag location. */
817  enum barrier_type bt; /**< Barrier type. */
818  kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
819                           location. */
820#if USE_ITT_BUILD
821  void *
822      itt_sync_obj; /**< ITT object that must be passed to new flag location. */
823#endif
824  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
825    return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
826  }
827
828public:
829  kmp_flag_oncore(volatile kmp_uint64 *p)
830      : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831        flag_switch(false) {}
832  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
833      : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
834        offset(idx), flag_switch(false) {}
835  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
836                  enum barrier_type bar_t,
837                  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
838      : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
839        num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
840        this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
841  kmp_info_t *get_waiter(kmp_uint32 i) {
842    KMP_DEBUG_ASSERT(i < num_waiting_threads);
843    return waiting_threads[i];
844  }
845  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
846  void set_waiter(kmp_info_t *thr) {
847    waiting_threads[0] = thr;
848    num_waiting_threads = 1;
849  }
850  bool done_check_val(kmp_uint64 old_loc) {
851    return byteref(&old_loc, offset) == checker;
852  }
853  bool done_check() { return done_check_val(*get()); }
854  bool notdone_check() {
855    // Calculate flag_switch
856    if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
857      flag_switch = true;
858    if (byteref(get(), offset) != 1 && !flag_switch)
859      return true;
860    else if (flag_switch) {
861      this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
862      kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
863                       (kmp_uint64)KMP_BARRIER_STATE_BUMP);
864      __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
865    }
866    return false;
867  }
868  void internal_release() {
869    // Other threads can write their own bytes simultaneously.
870    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
871      byteref(get(), offset) = 1;
872    } else {
873      kmp_uint64 mask = 0;
874      byteref(&mask, offset) = 1;
875      KMP_TEST_THEN_OR64(get(), mask);
876    }
877  }
878  kmp_uint64 set_sleeping() {
879    return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
880  }
881  kmp_uint64 unset_sleeping() {
882    return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
883  }
884  bool is_sleeping_val(kmp_uint64 old_loc) {
885    return old_loc & KMP_BARRIER_SLEEP_STATE;
886  }
887  bool is_sleeping() { return is_sleeping_val(*get()); }
888  bool is_any_sleeping() { return is_sleeping_val(*get()); }
889  void wait(kmp_info_t *this_thr, int final_spin) {
890    if (final_spin)
891      __kmp_wait_template<kmp_flag_oncore, TRUE>(
892          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893    else
894      __kmp_wait_template<kmp_flag_oncore, FALSE>(
895          this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
896  }
897  void release() { __kmp_release_template(this); }
898  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
899  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
900  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
901                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
902                    kmp_int32 is_constrained) {
903    return __kmp_execute_tasks_oncore(
904        this_thr, gtid, this, final_spin,
905        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
906  }
907  kmp_uint8 *get_stolen() { return NULL; }
908  enum barrier_type get_bt() { return bt; }
909  flag_type get_ptr_type() { return flag_oncore; }
910};
911
912// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
913// associated with int gtid.
914static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
915  if (!flag)
916    return;
917
918  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
919  case flag32:
920    __kmp_resume_32(gtid, NULL);
921    break;
922  case flag64:
923    __kmp_resume_64(gtid, NULL);
924    break;
925  case flag_oncore:
926    __kmp_resume_oncore(gtid, NULL);
927    break;
928  }
929}
930
931/*!
932@}
933*/
934
935#endif // KMP_WAIT_RELEASE_H
936