1345153Sdim/*
2345153Sdim * kmp_lock.cpp -- lock-related functions
3345153Sdim */
4345153Sdim
5345153Sdim//===----------------------------------------------------------------------===//
6345153Sdim//
7353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8353358Sdim// See https://llvm.org/LICENSE.txt for license information.
9353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10345153Sdim//
11345153Sdim//===----------------------------------------------------------------------===//
12345153Sdim
13345153Sdim#include <stddef.h>
14345153Sdim#include <atomic>
15345153Sdim
16345153Sdim#include "kmp.h"
17345153Sdim#include "kmp_i18n.h"
18345153Sdim#include "kmp_io.h"
19345153Sdim#include "kmp_itt.h"
20345153Sdim#include "kmp_lock.h"
21345153Sdim#include "kmp_wait_release.h"
22345153Sdim#include "kmp_wrapper_getpid.h"
23345153Sdim
24345153Sdim#include "tsan_annotations.h"
25345153Sdim
26345153Sdim#if KMP_USE_FUTEX
27345153Sdim#include <sys/syscall.h>
28345153Sdim#include <unistd.h>
29345153Sdim// We should really include <futex.h>, but that causes compatibility problems on
30345153Sdim// different Linux* OS distributions that either require that you include (or
31345153Sdim// break when you try to include) <pci/types.h>. Since all we need is the two
32345153Sdim// macros below (which are part of the kernel ABI, so can't change) we just
33345153Sdim// define the constants here and don't include <futex.h>
34345153Sdim#ifndef FUTEX_WAIT
35345153Sdim#define FUTEX_WAIT 0
36345153Sdim#endif
37345153Sdim#ifndef FUTEX_WAKE
38345153Sdim#define FUTEX_WAKE 1
39345153Sdim#endif
40345153Sdim#endif
41345153Sdim
42345153Sdim/* Implement spin locks for internal library use.             */
43345153Sdim/* The algorithm implemented is Lamport's bakery lock [1974]. */
44345153Sdim
45345153Sdimvoid __kmp_validate_locks(void) {
46345153Sdim  int i;
47345153Sdim  kmp_uint32 x, y;
48345153Sdim
49345153Sdim  /* Check to make sure unsigned arithmetic does wraps properly */
50345153Sdim  x = ~((kmp_uint32)0) - 2;
51345153Sdim  y = x - 2;
52345153Sdim
53345153Sdim  for (i = 0; i < 8; ++i, ++x, ++y) {
54345153Sdim    kmp_uint32 z = (x - y);
55345153Sdim    KMP_ASSERT(z == 2);
56345153Sdim  }
57345153Sdim
58345153Sdim  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
59345153Sdim}
60345153Sdim
61345153Sdim/* ------------------------------------------------------------------------ */
62345153Sdim/* test and set locks */
63345153Sdim
64345153Sdim// For the non-nested locks, we can only assume that the first 4 bytes were
65345153Sdim// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
66345153Sdim// compiler only allocates a 4 byte pointer on IA-32 architecture.  On
67345153Sdim// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
68345153Sdim//
69345153Sdim// gcc reserves >= 8 bytes for nested locks, so we can assume that the
70345153Sdim// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
71345153Sdim
72345153Sdimstatic kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
73345153Sdim  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
74345153Sdim}
75345153Sdim
76345153Sdimstatic inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
77345153Sdim  return lck->lk.depth_locked != -1;
78345153Sdim}
79345153Sdim
80345153Sdim__forceinline static int
81345153Sdim__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
82345153Sdim  KMP_MB();
83345153Sdim
84345153Sdim#ifdef USE_LOCK_PROFILE
85345153Sdim  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
86345153Sdim  if ((curr != 0) && (curr != gtid + 1))
87345153Sdim    __kmp_printf("LOCK CONTENTION: %p\n", lck);
88345153Sdim/* else __kmp_printf( "." );*/
89345153Sdim#endif /* USE_LOCK_PROFILE */
90345153Sdim
91345153Sdim  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
92345153Sdim  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
93345153Sdim
94345153Sdim  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
95345153Sdim      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
96345153Sdim    KMP_FSYNC_ACQUIRED(lck);
97345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
98345153Sdim  }
99345153Sdim
100345153Sdim  kmp_uint32 spins;
101345153Sdim  KMP_FSYNC_PREPARE(lck);
102345153Sdim  KMP_INIT_YIELD(spins);
103345153Sdim  kmp_backoff_t backoff = __kmp_spin_backoff_params;
104353358Sdim  do {
105345153Sdim    __kmp_spin_backoff(&backoff);
106353358Sdim    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
107353358Sdim  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
108353358Sdim           !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
109345153Sdim  KMP_FSYNC_ACQUIRED(lck);
110345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
111345153Sdim}
112345153Sdim
113345153Sdimint __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
114345153Sdim  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
115345153Sdim  ANNOTATE_TAS_ACQUIRED(lck);
116345153Sdim  return retval;
117345153Sdim}
118345153Sdim
119345153Sdimstatic int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
120345153Sdim                                              kmp_int32 gtid) {
121345153Sdim  char const *const func = "omp_set_lock";
122345153Sdim  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
123345153Sdim      __kmp_is_tas_lock_nestable(lck)) {
124345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
125345153Sdim  }
126345153Sdim  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
127345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
128345153Sdim  }
129345153Sdim  return __kmp_acquire_tas_lock(lck, gtid);
130345153Sdim}
131345153Sdim
132345153Sdimint __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
133345153Sdim  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
134345153Sdim  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
135345153Sdim  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
136345153Sdim      __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
137345153Sdim    KMP_FSYNC_ACQUIRED(lck);
138345153Sdim    return TRUE;
139345153Sdim  }
140345153Sdim  return FALSE;
141345153Sdim}
142345153Sdim
143345153Sdimstatic int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
144345153Sdim                                           kmp_int32 gtid) {
145345153Sdim  char const *const func = "omp_test_lock";
146345153Sdim  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
147345153Sdim      __kmp_is_tas_lock_nestable(lck)) {
148345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
149345153Sdim  }
150345153Sdim  return __kmp_test_tas_lock(lck, gtid);
151345153Sdim}
152345153Sdim
153345153Sdimint __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
154345153Sdim  KMP_MB(); /* Flush all pending memory write invalidates.  */
155345153Sdim
156345153Sdim  KMP_FSYNC_RELEASING(lck);
157345153Sdim  ANNOTATE_TAS_RELEASED(lck);
158345153Sdim  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
159345153Sdim  KMP_MB(); /* Flush all pending memory write invalidates.  */
160345153Sdim
161353358Sdim  KMP_YIELD_OVERSUB();
162345153Sdim  return KMP_LOCK_RELEASED;
163345153Sdim}
164345153Sdim
165345153Sdimstatic int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
166345153Sdim                                              kmp_int32 gtid) {
167345153Sdim  char const *const func = "omp_unset_lock";
168345153Sdim  KMP_MB(); /* in case another processor initialized lock */
169345153Sdim  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
170345153Sdim      __kmp_is_tas_lock_nestable(lck)) {
171345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
172345153Sdim  }
173345153Sdim  if (__kmp_get_tas_lock_owner(lck) == -1) {
174345153Sdim    KMP_FATAL(LockUnsettingFree, func);
175345153Sdim  }
176345153Sdim  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
177345153Sdim      (__kmp_get_tas_lock_owner(lck) != gtid)) {
178345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
179345153Sdim  }
180345153Sdim  return __kmp_release_tas_lock(lck, gtid);
181345153Sdim}
182345153Sdim
183345153Sdimvoid __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
184345153Sdim  lck->lk.poll = KMP_LOCK_FREE(tas);
185345153Sdim}
186345153Sdim
187345153Sdimvoid __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
188345153Sdim
189345153Sdimstatic void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
190345153Sdim  char const *const func = "omp_destroy_lock";
191345153Sdim  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
192345153Sdim      __kmp_is_tas_lock_nestable(lck)) {
193345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
194345153Sdim  }
195345153Sdim  if (__kmp_get_tas_lock_owner(lck) != -1) {
196345153Sdim    KMP_FATAL(LockStillOwned, func);
197345153Sdim  }
198345153Sdim  __kmp_destroy_tas_lock(lck);
199345153Sdim}
200345153Sdim
201345153Sdim// nested test and set locks
202345153Sdim
203345153Sdimint __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
204345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
205345153Sdim
206345153Sdim  if (__kmp_get_tas_lock_owner(lck) == gtid) {
207345153Sdim    lck->lk.depth_locked += 1;
208345153Sdim    return KMP_LOCK_ACQUIRED_NEXT;
209345153Sdim  } else {
210345153Sdim    __kmp_acquire_tas_lock_timed_template(lck, gtid);
211345153Sdim    ANNOTATE_TAS_ACQUIRED(lck);
212345153Sdim    lck->lk.depth_locked = 1;
213345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
214345153Sdim  }
215345153Sdim}
216345153Sdim
217345153Sdimstatic int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
218345153Sdim                                                     kmp_int32 gtid) {
219345153Sdim  char const *const func = "omp_set_nest_lock";
220345153Sdim  if (!__kmp_is_tas_lock_nestable(lck)) {
221345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
222345153Sdim  }
223345153Sdim  return __kmp_acquire_nested_tas_lock(lck, gtid);
224345153Sdim}
225345153Sdim
226345153Sdimint __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
227345153Sdim  int retval;
228345153Sdim
229345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
230345153Sdim
231345153Sdim  if (__kmp_get_tas_lock_owner(lck) == gtid) {
232345153Sdim    retval = ++lck->lk.depth_locked;
233345153Sdim  } else if (!__kmp_test_tas_lock(lck, gtid)) {
234345153Sdim    retval = 0;
235345153Sdim  } else {
236345153Sdim    KMP_MB();
237345153Sdim    retval = lck->lk.depth_locked = 1;
238345153Sdim  }
239345153Sdim  return retval;
240345153Sdim}
241345153Sdim
242345153Sdimstatic int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
243345153Sdim                                                  kmp_int32 gtid) {
244345153Sdim  char const *const func = "omp_test_nest_lock";
245345153Sdim  if (!__kmp_is_tas_lock_nestable(lck)) {
246345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
247345153Sdim  }
248345153Sdim  return __kmp_test_nested_tas_lock(lck, gtid);
249345153Sdim}
250345153Sdim
251345153Sdimint __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
252345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
253345153Sdim
254345153Sdim  KMP_MB();
255345153Sdim  if (--(lck->lk.depth_locked) == 0) {
256345153Sdim    __kmp_release_tas_lock(lck, gtid);
257345153Sdim    return KMP_LOCK_RELEASED;
258345153Sdim  }
259345153Sdim  return KMP_LOCK_STILL_HELD;
260345153Sdim}
261345153Sdim
262345153Sdimstatic int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
263345153Sdim                                                     kmp_int32 gtid) {
264345153Sdim  char const *const func = "omp_unset_nest_lock";
265345153Sdim  KMP_MB(); /* in case another processor initialized lock */
266345153Sdim  if (!__kmp_is_tas_lock_nestable(lck)) {
267345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
268345153Sdim  }
269345153Sdim  if (__kmp_get_tas_lock_owner(lck) == -1) {
270345153Sdim    KMP_FATAL(LockUnsettingFree, func);
271345153Sdim  }
272345153Sdim  if (__kmp_get_tas_lock_owner(lck) != gtid) {
273345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
274345153Sdim  }
275345153Sdim  return __kmp_release_nested_tas_lock(lck, gtid);
276345153Sdim}
277345153Sdim
278345153Sdimvoid __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
279345153Sdim  __kmp_init_tas_lock(lck);
280345153Sdim  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
281345153Sdim}
282345153Sdim
283345153Sdimvoid __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
284345153Sdim  __kmp_destroy_tas_lock(lck);
285345153Sdim  lck->lk.depth_locked = 0;
286345153Sdim}
287345153Sdim
288345153Sdimstatic void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
289345153Sdim  char const *const func = "omp_destroy_nest_lock";
290345153Sdim  if (!__kmp_is_tas_lock_nestable(lck)) {
291345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
292345153Sdim  }
293345153Sdim  if (__kmp_get_tas_lock_owner(lck) != -1) {
294345153Sdim    KMP_FATAL(LockStillOwned, func);
295345153Sdim  }
296345153Sdim  __kmp_destroy_nested_tas_lock(lck);
297345153Sdim}
298345153Sdim
299345153Sdim#if KMP_USE_FUTEX
300345153Sdim
301345153Sdim/* ------------------------------------------------------------------------ */
302345153Sdim/* futex locks */
303345153Sdim
304345153Sdim// futex locks are really just test and set locks, with a different method
305345153Sdim// of handling contention.  They take the same amount of space as test and
306345153Sdim// set locks, and are allocated the same way (i.e. use the area allocated by
307345153Sdim// the compiler for non-nested locks / allocate nested locks on the heap).
308345153Sdim
309345153Sdimstatic kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
310345153Sdim  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
311345153Sdim}
312345153Sdim
313345153Sdimstatic inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
314345153Sdim  return lck->lk.depth_locked != -1;
315345153Sdim}
316345153Sdim
317345153Sdim__forceinline static int
318345153Sdim__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
319345153Sdim  kmp_int32 gtid_code = (gtid + 1) << 1;
320345153Sdim
321345153Sdim  KMP_MB();
322345153Sdim
323345153Sdim#ifdef USE_LOCK_PROFILE
324345153Sdim  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
325345153Sdim  if ((curr != 0) && (curr != gtid_code))
326345153Sdim    __kmp_printf("LOCK CONTENTION: %p\n", lck);
327345153Sdim/* else __kmp_printf( "." );*/
328345153Sdim#endif /* USE_LOCK_PROFILE */
329345153Sdim
330345153Sdim  KMP_FSYNC_PREPARE(lck);
331345153Sdim  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
332345153Sdim                  lck, lck->lk.poll, gtid));
333345153Sdim
334345153Sdim  kmp_int32 poll_val;
335345153Sdim
336345153Sdim  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
337345153Sdim              &(lck->lk.poll), KMP_LOCK_FREE(futex),
338345153Sdim              KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
339345153Sdim
340345153Sdim    kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
341345153Sdim    KA_TRACE(
342345153Sdim        1000,
343345153Sdim        ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
344345153Sdim         lck, gtid, poll_val, cond));
345345153Sdim
346345153Sdim    // NOTE: if you try to use the following condition for this branch
347345153Sdim    //
348345153Sdim    // if ( poll_val & 1 == 0 )
349345153Sdim    //
350345153Sdim    // Then the 12.0 compiler has a bug where the following block will
351345153Sdim    // always be skipped, regardless of the value of the LSB of poll_val.
352345153Sdim    if (!cond) {
353345153Sdim      // Try to set the lsb in the poll to indicate to the owner
354345153Sdim      // thread that they need to wake this thread up.
355345153Sdim      if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
356345153Sdim                                       poll_val | KMP_LOCK_BUSY(1, futex))) {
357345153Sdim        KA_TRACE(
358345153Sdim            1000,
359345153Sdim            ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
360345153Sdim             lck, lck->lk.poll, gtid));
361345153Sdim        continue;
362345153Sdim      }
363345153Sdim      poll_val |= KMP_LOCK_BUSY(1, futex);
364345153Sdim
365345153Sdim      KA_TRACE(1000,
366345153Sdim               ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
367345153Sdim                lck->lk.poll, gtid));
368345153Sdim    }
369345153Sdim
370345153Sdim    KA_TRACE(
371345153Sdim        1000,
372345153Sdim        ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
373345153Sdim         lck, gtid, poll_val));
374345153Sdim
375345153Sdim    kmp_int32 rc;
376345153Sdim    if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
377345153Sdim                      NULL, 0)) != 0) {
378345153Sdim      KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
379345153Sdim                      "failed (rc=%d errno=%d)\n",
380345153Sdim                      lck, gtid, poll_val, rc, errno));
381345153Sdim      continue;
382345153Sdim    }
383345153Sdim
384345153Sdim    KA_TRACE(1000,
385345153Sdim             ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
386345153Sdim              lck, gtid, poll_val));
387345153Sdim    // This thread has now done a successful futex wait call and was entered on
388345153Sdim    // the OS futex queue.  We must now perform a futex wake call when releasing
389345153Sdim    // the lock, as we have no idea how many other threads are in the queue.
390345153Sdim    gtid_code |= 1;
391345153Sdim  }
392345153Sdim
393345153Sdim  KMP_FSYNC_ACQUIRED(lck);
394345153Sdim  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
395345153Sdim                  lck->lk.poll, gtid));
396345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
397345153Sdim}
398345153Sdim
399345153Sdimint __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
400345153Sdim  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
401345153Sdim  ANNOTATE_FUTEX_ACQUIRED(lck);
402345153Sdim  return retval;
403345153Sdim}
404345153Sdim
405345153Sdimstatic int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
406345153Sdim                                                kmp_int32 gtid) {
407345153Sdim  char const *const func = "omp_set_lock";
408345153Sdim  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
409345153Sdim      __kmp_is_futex_lock_nestable(lck)) {
410345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
411345153Sdim  }
412345153Sdim  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
413345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
414345153Sdim  }
415345153Sdim  return __kmp_acquire_futex_lock(lck, gtid);
416345153Sdim}
417345153Sdim
418345153Sdimint __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
419345153Sdim  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
420345153Sdim                                  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
421345153Sdim    KMP_FSYNC_ACQUIRED(lck);
422345153Sdim    return TRUE;
423345153Sdim  }
424345153Sdim  return FALSE;
425345153Sdim}
426345153Sdim
427345153Sdimstatic int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
428345153Sdim                                             kmp_int32 gtid) {
429345153Sdim  char const *const func = "omp_test_lock";
430345153Sdim  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
431345153Sdim      __kmp_is_futex_lock_nestable(lck)) {
432345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
433345153Sdim  }
434345153Sdim  return __kmp_test_futex_lock(lck, gtid);
435345153Sdim}
436345153Sdim
437345153Sdimint __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
438345153Sdim  KMP_MB(); /* Flush all pending memory write invalidates.  */
439345153Sdim
440345153Sdim  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
441345153Sdim                  lck, lck->lk.poll, gtid));
442345153Sdim
443345153Sdim  KMP_FSYNC_RELEASING(lck);
444345153Sdim  ANNOTATE_FUTEX_RELEASED(lck);
445345153Sdim
446345153Sdim  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447345153Sdim
448345153Sdim  KA_TRACE(1000,
449345153Sdim           ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450345153Sdim            lck, gtid, poll_val));
451345153Sdim
452345153Sdim  if (KMP_LOCK_STRIP(poll_val) & 1) {
453345153Sdim    KA_TRACE(1000,
454345153Sdim             ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455345153Sdim              lck, gtid));
456345153Sdim    syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457345153Sdim            NULL, NULL, 0);
458345153Sdim  }
459345153Sdim
460345153Sdim  KMP_MB(); /* Flush all pending memory write invalidates.  */
461345153Sdim
462345153Sdim  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463345153Sdim                  lck->lk.poll, gtid));
464345153Sdim
465353358Sdim  KMP_YIELD_OVERSUB();
466345153Sdim  return KMP_LOCK_RELEASED;
467345153Sdim}
468345153Sdim
469345153Sdimstatic int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470345153Sdim                                                kmp_int32 gtid) {
471345153Sdim  char const *const func = "omp_unset_lock";
472345153Sdim  KMP_MB(); /* in case another processor initialized lock */
473345153Sdim  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474345153Sdim      __kmp_is_futex_lock_nestable(lck)) {
475345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
476345153Sdim  }
477345153Sdim  if (__kmp_get_futex_lock_owner(lck) == -1) {
478345153Sdim    KMP_FATAL(LockUnsettingFree, func);
479345153Sdim  }
480345153Sdim  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481345153Sdim      (__kmp_get_futex_lock_owner(lck) != gtid)) {
482345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
483345153Sdim  }
484345153Sdim  return __kmp_release_futex_lock(lck, gtid);
485345153Sdim}
486345153Sdim
487345153Sdimvoid __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488345153Sdim  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489345153Sdim}
490345153Sdim
491345153Sdimvoid __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492345153Sdim
493345153Sdimstatic void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494345153Sdim  char const *const func = "omp_destroy_lock";
495345153Sdim  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496345153Sdim      __kmp_is_futex_lock_nestable(lck)) {
497345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
498345153Sdim  }
499345153Sdim  if (__kmp_get_futex_lock_owner(lck) != -1) {
500345153Sdim    KMP_FATAL(LockStillOwned, func);
501345153Sdim  }
502345153Sdim  __kmp_destroy_futex_lock(lck);
503345153Sdim}
504345153Sdim
505345153Sdim// nested futex locks
506345153Sdim
507345153Sdimint __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
509345153Sdim
510345153Sdim  if (__kmp_get_futex_lock_owner(lck) == gtid) {
511345153Sdim    lck->lk.depth_locked += 1;
512345153Sdim    return KMP_LOCK_ACQUIRED_NEXT;
513345153Sdim  } else {
514345153Sdim    __kmp_acquire_futex_lock_timed_template(lck, gtid);
515345153Sdim    ANNOTATE_FUTEX_ACQUIRED(lck);
516345153Sdim    lck->lk.depth_locked = 1;
517345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
518345153Sdim  }
519345153Sdim}
520345153Sdim
521345153Sdimstatic int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
522345153Sdim                                                       kmp_int32 gtid) {
523345153Sdim  char const *const func = "omp_set_nest_lock";
524345153Sdim  if (!__kmp_is_futex_lock_nestable(lck)) {
525345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
526345153Sdim  }
527345153Sdim  return __kmp_acquire_nested_futex_lock(lck, gtid);
528345153Sdim}
529345153Sdim
530345153Sdimint __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
531345153Sdim  int retval;
532345153Sdim
533345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
534345153Sdim
535345153Sdim  if (__kmp_get_futex_lock_owner(lck) == gtid) {
536345153Sdim    retval = ++lck->lk.depth_locked;
537345153Sdim  } else if (!__kmp_test_futex_lock(lck, gtid)) {
538345153Sdim    retval = 0;
539345153Sdim  } else {
540345153Sdim    KMP_MB();
541345153Sdim    retval = lck->lk.depth_locked = 1;
542345153Sdim  }
543345153Sdim  return retval;
544345153Sdim}
545345153Sdim
546345153Sdimstatic int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
547345153Sdim                                                    kmp_int32 gtid) {
548345153Sdim  char const *const func = "omp_test_nest_lock";
549345153Sdim  if (!__kmp_is_futex_lock_nestable(lck)) {
550345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
551345153Sdim  }
552345153Sdim  return __kmp_test_nested_futex_lock(lck, gtid);
553345153Sdim}
554345153Sdim
555345153Sdimint __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
556345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
557345153Sdim
558345153Sdim  KMP_MB();
559345153Sdim  if (--(lck->lk.depth_locked) == 0) {
560345153Sdim    __kmp_release_futex_lock(lck, gtid);
561345153Sdim    return KMP_LOCK_RELEASED;
562345153Sdim  }
563345153Sdim  return KMP_LOCK_STILL_HELD;
564345153Sdim}
565345153Sdim
566345153Sdimstatic int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
567345153Sdim                                                       kmp_int32 gtid) {
568345153Sdim  char const *const func = "omp_unset_nest_lock";
569345153Sdim  KMP_MB(); /* in case another processor initialized lock */
570345153Sdim  if (!__kmp_is_futex_lock_nestable(lck)) {
571345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
572345153Sdim  }
573345153Sdim  if (__kmp_get_futex_lock_owner(lck) == -1) {
574345153Sdim    KMP_FATAL(LockUnsettingFree, func);
575345153Sdim  }
576345153Sdim  if (__kmp_get_futex_lock_owner(lck) != gtid) {
577345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
578345153Sdim  }
579345153Sdim  return __kmp_release_nested_futex_lock(lck, gtid);
580345153Sdim}
581345153Sdim
582345153Sdimvoid __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
583345153Sdim  __kmp_init_futex_lock(lck);
584345153Sdim  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
585345153Sdim}
586345153Sdim
587345153Sdimvoid __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
588345153Sdim  __kmp_destroy_futex_lock(lck);
589345153Sdim  lck->lk.depth_locked = 0;
590345153Sdim}
591345153Sdim
592345153Sdimstatic void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
593345153Sdim  char const *const func = "omp_destroy_nest_lock";
594345153Sdim  if (!__kmp_is_futex_lock_nestable(lck)) {
595345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
596345153Sdim  }
597345153Sdim  if (__kmp_get_futex_lock_owner(lck) != -1) {
598345153Sdim    KMP_FATAL(LockStillOwned, func);
599345153Sdim  }
600345153Sdim  __kmp_destroy_nested_futex_lock(lck);
601345153Sdim}
602345153Sdim
603345153Sdim#endif // KMP_USE_FUTEX
604345153Sdim
605345153Sdim/* ------------------------------------------------------------------------ */
606345153Sdim/* ticket (bakery) locks */
607345153Sdim
608345153Sdimstatic kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
609345153Sdim  return std::atomic_load_explicit(&lck->lk.owner_id,
610345153Sdim                                   std::memory_order_relaxed) -
611345153Sdim         1;
612345153Sdim}
613345153Sdim
614345153Sdimstatic inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
615345153Sdim  return std::atomic_load_explicit(&lck->lk.depth_locked,
616345153Sdim                                   std::memory_order_relaxed) != -1;
617345153Sdim}
618345153Sdim
619345153Sdimstatic kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
620345153Sdim  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
621345153Sdim                                   std::memory_order_acquire) == my_ticket;
622345153Sdim}
623345153Sdim
624345153Sdim__forceinline static int
625345153Sdim__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
626345153Sdim                                         kmp_int32 gtid) {
627345153Sdim  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
628345153Sdim      &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
629345153Sdim
630345153Sdim#ifdef USE_LOCK_PROFILE
631345153Sdim  if (std::atomic_load_explicit(&lck->lk.now_serving,
632345153Sdim                                std::memory_order_relaxed) != my_ticket)
633345153Sdim    __kmp_printf("LOCK CONTENTION: %p\n", lck);
634345153Sdim/* else __kmp_printf( "." );*/
635345153Sdim#endif /* USE_LOCK_PROFILE */
636345153Sdim
637345153Sdim  if (std::atomic_load_explicit(&lck->lk.now_serving,
638345153Sdim                                std::memory_order_acquire) == my_ticket) {
639345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
640345153Sdim  }
641353358Sdim  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
642345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
643345153Sdim}
644345153Sdim
645345153Sdimint __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
646345153Sdim  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
647345153Sdim  ANNOTATE_TICKET_ACQUIRED(lck);
648345153Sdim  return retval;
649345153Sdim}
650345153Sdim
651345153Sdimstatic int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
652345153Sdim                                                 kmp_int32 gtid) {
653345153Sdim  char const *const func = "omp_set_lock";
654345153Sdim
655345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
656345153Sdim                                 std::memory_order_relaxed)) {
657345153Sdim    KMP_FATAL(LockIsUninitialized, func);
658345153Sdim  }
659345153Sdim  if (lck->lk.self != lck) {
660345153Sdim    KMP_FATAL(LockIsUninitialized, func);
661345153Sdim  }
662345153Sdim  if (__kmp_is_ticket_lock_nestable(lck)) {
663345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
664345153Sdim  }
665345153Sdim  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
666345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
667345153Sdim  }
668345153Sdim
669345153Sdim  __kmp_acquire_ticket_lock(lck, gtid);
670345153Sdim
671345153Sdim  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
672345153Sdim                             std::memory_order_relaxed);
673345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
674345153Sdim}
675345153Sdim
676345153Sdimint __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
677345153Sdim  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
678345153Sdim                                                   std::memory_order_relaxed);
679345153Sdim
680345153Sdim  if (std::atomic_load_explicit(&lck->lk.now_serving,
681345153Sdim                                std::memory_order_relaxed) == my_ticket) {
682345153Sdim    kmp_uint32 next_ticket = my_ticket + 1;
683345153Sdim    if (std::atomic_compare_exchange_strong_explicit(
684345153Sdim            &lck->lk.next_ticket, &my_ticket, next_ticket,
685345153Sdim            std::memory_order_acquire, std::memory_order_acquire)) {
686345153Sdim      return TRUE;
687345153Sdim    }
688345153Sdim  }
689345153Sdim  return FALSE;
690345153Sdim}
691345153Sdim
692345153Sdimstatic int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
693345153Sdim                                              kmp_int32 gtid) {
694345153Sdim  char const *const func = "omp_test_lock";
695345153Sdim
696345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
697345153Sdim                                 std::memory_order_relaxed)) {
698345153Sdim    KMP_FATAL(LockIsUninitialized, func);
699345153Sdim  }
700345153Sdim  if (lck->lk.self != lck) {
701345153Sdim    KMP_FATAL(LockIsUninitialized, func);
702345153Sdim  }
703345153Sdim  if (__kmp_is_ticket_lock_nestable(lck)) {
704345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
705345153Sdim  }
706345153Sdim
707345153Sdim  int retval = __kmp_test_ticket_lock(lck, gtid);
708345153Sdim
709345153Sdim  if (retval) {
710345153Sdim    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
711345153Sdim                               std::memory_order_relaxed);
712345153Sdim  }
713345153Sdim  return retval;
714345153Sdim}
715345153Sdim
716345153Sdimint __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
717345153Sdim  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
718345153Sdim                                                  std::memory_order_relaxed) -
719345153Sdim                        std::atomic_load_explicit(&lck->lk.now_serving,
720345153Sdim                                                  std::memory_order_relaxed);
721345153Sdim
722345153Sdim  ANNOTATE_TICKET_RELEASED(lck);
723345153Sdim  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
724345153Sdim                                 std::memory_order_release);
725345153Sdim
726345153Sdim  KMP_YIELD(distance >
727345153Sdim            (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
728345153Sdim  return KMP_LOCK_RELEASED;
729345153Sdim}
730345153Sdim
731345153Sdimstatic int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
732345153Sdim                                                 kmp_int32 gtid) {
733345153Sdim  char const *const func = "omp_unset_lock";
734345153Sdim
735345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
736345153Sdim                                 std::memory_order_relaxed)) {
737345153Sdim    KMP_FATAL(LockIsUninitialized, func);
738345153Sdim  }
739345153Sdim  if (lck->lk.self != lck) {
740345153Sdim    KMP_FATAL(LockIsUninitialized, func);
741345153Sdim  }
742345153Sdim  if (__kmp_is_ticket_lock_nestable(lck)) {
743345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
744345153Sdim  }
745345153Sdim  if (__kmp_get_ticket_lock_owner(lck) == -1) {
746345153Sdim    KMP_FATAL(LockUnsettingFree, func);
747345153Sdim  }
748345153Sdim  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
749345153Sdim      (__kmp_get_ticket_lock_owner(lck) != gtid)) {
750345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
751345153Sdim  }
752345153Sdim  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
753345153Sdim  return __kmp_release_ticket_lock(lck, gtid);
754345153Sdim}
755345153Sdim
756345153Sdimvoid __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
757345153Sdim  lck->lk.location = NULL;
758345153Sdim  lck->lk.self = lck;
759345153Sdim  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
760345153Sdim                             std::memory_order_relaxed);
761345153Sdim  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
762345153Sdim                             std::memory_order_relaxed);
763345153Sdim  std::atomic_store_explicit(
764345153Sdim      &lck->lk.owner_id, 0,
765345153Sdim      std::memory_order_relaxed); // no thread owns the lock.
766345153Sdim  std::atomic_store_explicit(
767345153Sdim      &lck->lk.depth_locked, -1,
768345153Sdim      std::memory_order_relaxed); // -1 => not a nested lock.
769345153Sdim  std::atomic_store_explicit(&lck->lk.initialized, true,
770345153Sdim                             std::memory_order_release);
771345153Sdim}
772345153Sdim
773345153Sdimvoid __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
774345153Sdim  std::atomic_store_explicit(&lck->lk.initialized, false,
775345153Sdim                             std::memory_order_release);
776345153Sdim  lck->lk.self = NULL;
777345153Sdim  lck->lk.location = NULL;
778345153Sdim  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
779345153Sdim                             std::memory_order_relaxed);
780345153Sdim  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
781345153Sdim                             std::memory_order_relaxed);
782345153Sdim  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
783345153Sdim  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
784345153Sdim                             std::memory_order_relaxed);
785345153Sdim}
786345153Sdim
787345153Sdimstatic void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
788345153Sdim  char const *const func = "omp_destroy_lock";
789345153Sdim
790345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
791345153Sdim                                 std::memory_order_relaxed)) {
792345153Sdim    KMP_FATAL(LockIsUninitialized, func);
793345153Sdim  }
794345153Sdim  if (lck->lk.self != lck) {
795345153Sdim    KMP_FATAL(LockIsUninitialized, func);
796345153Sdim  }
797345153Sdim  if (__kmp_is_ticket_lock_nestable(lck)) {
798345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
799345153Sdim  }
800345153Sdim  if (__kmp_get_ticket_lock_owner(lck) != -1) {
801345153Sdim    KMP_FATAL(LockStillOwned, func);
802345153Sdim  }
803345153Sdim  __kmp_destroy_ticket_lock(lck);
804345153Sdim}
805345153Sdim
806345153Sdim// nested ticket locks
807345153Sdim
808345153Sdimint __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
809345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
810345153Sdim
811345153Sdim  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
812345153Sdim    std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
813345153Sdim                                   std::memory_order_relaxed);
814345153Sdim    return KMP_LOCK_ACQUIRED_NEXT;
815345153Sdim  } else {
816345153Sdim    __kmp_acquire_ticket_lock_timed_template(lck, gtid);
817345153Sdim    ANNOTATE_TICKET_ACQUIRED(lck);
818345153Sdim    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
819345153Sdim                               std::memory_order_relaxed);
820345153Sdim    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
821345153Sdim                               std::memory_order_relaxed);
822345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
823345153Sdim  }
824345153Sdim}
825345153Sdim
826345153Sdimstatic int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
827345153Sdim                                                        kmp_int32 gtid) {
828345153Sdim  char const *const func = "omp_set_nest_lock";
829345153Sdim
830345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
831345153Sdim                                 std::memory_order_relaxed)) {
832345153Sdim    KMP_FATAL(LockIsUninitialized, func);
833345153Sdim  }
834345153Sdim  if (lck->lk.self != lck) {
835345153Sdim    KMP_FATAL(LockIsUninitialized, func);
836345153Sdim  }
837345153Sdim  if (!__kmp_is_ticket_lock_nestable(lck)) {
838345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
839345153Sdim  }
840345153Sdim  return __kmp_acquire_nested_ticket_lock(lck, gtid);
841345153Sdim}
842345153Sdim
843345153Sdimint __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
844345153Sdim  int retval;
845345153Sdim
846345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
847345153Sdim
848345153Sdim  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
849345153Sdim    retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
850345153Sdim                                            std::memory_order_relaxed) +
851345153Sdim             1;
852345153Sdim  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
853345153Sdim    retval = 0;
854345153Sdim  } else {
855345153Sdim    std::atomic_store_explicit(&lck->lk.depth_locked, 1,
856345153Sdim                               std::memory_order_relaxed);
857345153Sdim    std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
858345153Sdim                               std::memory_order_relaxed);
859345153Sdim    retval = 1;
860345153Sdim  }
861345153Sdim  return retval;
862345153Sdim}
863345153Sdim
864345153Sdimstatic int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
865345153Sdim                                                     kmp_int32 gtid) {
866345153Sdim  char const *const func = "omp_test_nest_lock";
867345153Sdim
868345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
869345153Sdim                                 std::memory_order_relaxed)) {
870345153Sdim    KMP_FATAL(LockIsUninitialized, func);
871345153Sdim  }
872345153Sdim  if (lck->lk.self != lck) {
873345153Sdim    KMP_FATAL(LockIsUninitialized, func);
874345153Sdim  }
875345153Sdim  if (!__kmp_is_ticket_lock_nestable(lck)) {
876345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
877345153Sdim  }
878345153Sdim  return __kmp_test_nested_ticket_lock(lck, gtid);
879345153Sdim}
880345153Sdim
881345153Sdimint __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
882345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
883345153Sdim
884345153Sdim  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
885345153Sdim                                      std::memory_order_relaxed) -
886345153Sdim       1) == 0) {
887345153Sdim    std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
888345153Sdim    __kmp_release_ticket_lock(lck, gtid);
889345153Sdim    return KMP_LOCK_RELEASED;
890345153Sdim  }
891345153Sdim  return KMP_LOCK_STILL_HELD;
892345153Sdim}
893345153Sdim
894345153Sdimstatic int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
895345153Sdim                                                        kmp_int32 gtid) {
896345153Sdim  char const *const func = "omp_unset_nest_lock";
897345153Sdim
898345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
899345153Sdim                                 std::memory_order_relaxed)) {
900345153Sdim    KMP_FATAL(LockIsUninitialized, func);
901345153Sdim  }
902345153Sdim  if (lck->lk.self != lck) {
903345153Sdim    KMP_FATAL(LockIsUninitialized, func);
904345153Sdim  }
905345153Sdim  if (!__kmp_is_ticket_lock_nestable(lck)) {
906345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
907345153Sdim  }
908345153Sdim  if (__kmp_get_ticket_lock_owner(lck) == -1) {
909345153Sdim    KMP_FATAL(LockUnsettingFree, func);
910345153Sdim  }
911345153Sdim  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
912345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
913345153Sdim  }
914345153Sdim  return __kmp_release_nested_ticket_lock(lck, gtid);
915345153Sdim}
916345153Sdim
917345153Sdimvoid __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
918345153Sdim  __kmp_init_ticket_lock(lck);
919345153Sdim  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
920345153Sdim                             std::memory_order_relaxed);
921345153Sdim  // >= 0 for nestable locks, -1 for simple locks
922345153Sdim}
923345153Sdim
924345153Sdimvoid __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
925345153Sdim  __kmp_destroy_ticket_lock(lck);
926345153Sdim  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
927345153Sdim                             std::memory_order_relaxed);
928345153Sdim}
929345153Sdim
930345153Sdimstatic void
931345153Sdim__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
932345153Sdim  char const *const func = "omp_destroy_nest_lock";
933345153Sdim
934345153Sdim  if (!std::atomic_load_explicit(&lck->lk.initialized,
935345153Sdim                                 std::memory_order_relaxed)) {
936345153Sdim    KMP_FATAL(LockIsUninitialized, func);
937345153Sdim  }
938345153Sdim  if (lck->lk.self != lck) {
939345153Sdim    KMP_FATAL(LockIsUninitialized, func);
940345153Sdim  }
941345153Sdim  if (!__kmp_is_ticket_lock_nestable(lck)) {
942345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
943345153Sdim  }
944345153Sdim  if (__kmp_get_ticket_lock_owner(lck) != -1) {
945345153Sdim    KMP_FATAL(LockStillOwned, func);
946345153Sdim  }
947345153Sdim  __kmp_destroy_nested_ticket_lock(lck);
948345153Sdim}
949345153Sdim
950345153Sdim// access functions to fields which don't exist for all lock kinds.
951345153Sdim
952345153Sdimstatic const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
953345153Sdim  return lck->lk.location;
954345153Sdim}
955345153Sdim
956345153Sdimstatic void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
957345153Sdim                                           const ident_t *loc) {
958345153Sdim  lck->lk.location = loc;
959345153Sdim}
960345153Sdim
961345153Sdimstatic kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
962345153Sdim  return lck->lk.flags;
963345153Sdim}
964345153Sdim
965345153Sdimstatic void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
966345153Sdim                                        kmp_lock_flags_t flags) {
967345153Sdim  lck->lk.flags = flags;
968345153Sdim}
969345153Sdim
970345153Sdim/* ------------------------------------------------------------------------ */
971345153Sdim/* queuing locks */
972345153Sdim
973345153Sdim/* First the states
974345153Sdim   (head,tail) =              0, 0  means lock is unheld, nobody on queue
975345153Sdim                 UINT_MAX or -1, 0  means lock is held, nobody on queue
976345153Sdim                              h, h  means lock held or about to transition,
977345153Sdim                                    1 element on queue
978345153Sdim                              h, t  h <> t, means lock is held or about to
979345153Sdim                                    transition, >1 elements on queue
980345153Sdim
981345153Sdim   Now the transitions
982345153Sdim      Acquire(0,0)  = -1 ,0
983345153Sdim      Release(0,0)  = Error
984345153Sdim      Acquire(-1,0) =  h ,h    h > 0
985345153Sdim      Release(-1,0) =  0 ,0
986345153Sdim      Acquire(h,h)  =  h ,t    h > 0, t > 0, h <> t
987345153Sdim      Release(h,h)  = -1 ,0    h > 0
988345153Sdim      Acquire(h,t)  =  h ,t'   h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
989345153Sdim      Release(h,t)  =  h',t    h > 0, t > 0, h <> t, h <> h', h' maybe = t
990345153Sdim
991345153Sdim   And pictorially
992345153Sdim
993345153Sdim           +-----+
994345153Sdim           | 0, 0|------- release -------> Error
995345153Sdim           +-----+
996345153Sdim             |  ^
997345153Sdim      acquire|  |release
998345153Sdim             |  |
999345153Sdim             |  |
1000345153Sdim             v  |
1001345153Sdim           +-----+
1002345153Sdim           |-1, 0|
1003345153Sdim           +-----+
1004345153Sdim             |  ^
1005345153Sdim      acquire|  |release
1006345153Sdim             |  |
1007345153Sdim             |  |
1008345153Sdim             v  |
1009345153Sdim           +-----+
1010345153Sdim           | h, h|
1011345153Sdim           +-----+
1012345153Sdim             |  ^
1013345153Sdim      acquire|  |release
1014345153Sdim             |  |
1015345153Sdim             |  |
1016345153Sdim             v  |
1017345153Sdim           +-----+
1018345153Sdim           | h, t|----- acquire, release loopback ---+
1019345153Sdim           +-----+                                   |
1020345153Sdim                ^                                    |
1021345153Sdim                |                                    |
1022345153Sdim                +------------------------------------+
1023345153Sdim */
1024345153Sdim
1025345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1026345153Sdim
1027345153Sdim/* Stuff for circular trace buffer */
1028345153Sdim#define TRACE_BUF_ELE 1024
1029345153Sdimstatic char traces[TRACE_BUF_ELE][128] = {0};
1030345153Sdimstatic int tc = 0;
1031345153Sdim#define TRACE_LOCK(X, Y)                                                       \
1032345153Sdim  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1033345153Sdim#define TRACE_LOCK_T(X, Y, Z)                                                  \
1034345153Sdim  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1035345153Sdim#define TRACE_LOCK_HT(X, Y, Z, Q)                                              \
1036345153Sdim  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y,   \
1037345153Sdim               Z, Q);
1038345153Sdim
1039345153Sdimstatic void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1040345153Sdim                                    kmp_queuing_lock_t *lck, kmp_int32 head_id,
1041345153Sdim                                    kmp_int32 tail_id) {
1042345153Sdim  kmp_int32 t, i;
1043345153Sdim
1044345153Sdim  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1045345153Sdim
1046345153Sdim  i = tc % TRACE_BUF_ELE;
1047345153Sdim  __kmp_printf_no_lock("%s\n", traces[i]);
1048345153Sdim  i = (i + 1) % TRACE_BUF_ELE;
1049345153Sdim  while (i != (tc % TRACE_BUF_ELE)) {
1050345153Sdim    __kmp_printf_no_lock("%s", traces[i]);
1051345153Sdim    i = (i + 1) % TRACE_BUF_ELE;
1052345153Sdim  }
1053345153Sdim  __kmp_printf_no_lock("\n");
1054345153Sdim
1055345153Sdim  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1056345153Sdim                       "next_wait:%d, head_id:%d, tail_id:%d\n",
1057345153Sdim                       gtid + 1, this_thr->th.th_spin_here,
1058345153Sdim                       this_thr->th.th_next_waiting, head_id, tail_id);
1059345153Sdim
1060345153Sdim  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1061345153Sdim
1062345153Sdim  if (lck->lk.head_id >= 1) {
1063345153Sdim    t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1064345153Sdim    while (t > 0) {
1065345153Sdim      __kmp_printf_no_lock("-> %d ", t);
1066345153Sdim      t = __kmp_threads[t - 1]->th.th_next_waiting;
1067345153Sdim    }
1068345153Sdim  }
1069345153Sdim  __kmp_printf_no_lock(";  tail: %d ", lck->lk.tail_id);
1070345153Sdim  __kmp_printf_no_lock("\n\n");
1071345153Sdim}
1072345153Sdim
1073345153Sdim#endif /* DEBUG_QUEUING_LOCKS */
1074345153Sdim
1075345153Sdimstatic kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1076345153Sdim  return TCR_4(lck->lk.owner_id) - 1;
1077345153Sdim}
1078345153Sdim
1079345153Sdimstatic inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1080345153Sdim  return lck->lk.depth_locked != -1;
1081345153Sdim}
1082345153Sdim
1083345153Sdim/* Acquire a lock using a the queuing lock implementation */
1084345153Sdimtemplate <bool takeTime>
1085345153Sdim/* [TLW] The unused template above is left behind because of what BEB believes
1086345153Sdim   is a potential compiler problem with __forceinline. */
1087345153Sdim__forceinline static int
1088345153Sdim__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1089345153Sdim                                          kmp_int32 gtid) {
1090345153Sdim  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1091345153Sdim  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1092345153Sdim  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1093345153Sdim  volatile kmp_uint32 *spin_here_p;
1094345153Sdim  kmp_int32 need_mf = 1;
1095345153Sdim
1096345153Sdim#if OMPT_SUPPORT
1097345153Sdim  ompt_state_t prev_state = ompt_state_undefined;
1098345153Sdim#endif
1099345153Sdim
1100345153Sdim  KA_TRACE(1000,
1101345153Sdim           ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1102345153Sdim
1103345153Sdim  KMP_FSYNC_PREPARE(lck);
1104345153Sdim  KMP_DEBUG_ASSERT(this_thr != NULL);
1105345153Sdim  spin_here_p = &this_thr->th.th_spin_here;
1106345153Sdim
1107345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1108345153Sdim  TRACE_LOCK(gtid + 1, "acq ent");
1109345153Sdim  if (*spin_here_p)
1110345153Sdim    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1111345153Sdim  if (this_thr->th.th_next_waiting != 0)
1112345153Sdim    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1113345153Sdim#endif
1114345153Sdim  KMP_DEBUG_ASSERT(!*spin_here_p);
1115345153Sdim  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1116345153Sdim
1117345153Sdim  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1118345153Sdim     head_id_p that may follow, not just in execution order, but also in
1119345153Sdim     visibility order. This way, when a releasing thread observes the changes to
1120345153Sdim     the queue by this thread, it can rightly assume that spin_here_p has
1121345153Sdim     already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1122345153Sdim     not premature.  If the releasing thread sets spin_here_p to FALSE before
1123345153Sdim     this thread sets it to TRUE, this thread will hang. */
1124345153Sdim  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1125345153Sdim
1126345153Sdim  while (1) {
1127345153Sdim    kmp_int32 enqueued;
1128345153Sdim    kmp_int32 head;
1129345153Sdim    kmp_int32 tail;
1130345153Sdim
1131345153Sdim    head = *head_id_p;
1132345153Sdim
1133345153Sdim    switch (head) {
1134345153Sdim
1135345153Sdim    case -1: {
1136345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1137345153Sdim      tail = *tail_id_p;
1138345153Sdim      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1139345153Sdim#endif
1140345153Sdim      tail = 0; /* to make sure next link asynchronously read is not set
1141345153Sdim                accidentally; this assignment prevents us from entering the
1142345153Sdim                if ( t > 0 ) condition in the enqueued case below, which is not
1143345153Sdim                necessary for this state transition */
1144345153Sdim
1145345153Sdim      need_mf = 0;
1146345153Sdim      /* try (-1,0)->(tid,tid) */
1147345153Sdim      enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1148345153Sdim                                             KMP_PACK_64(-1, 0),
1149345153Sdim                                             KMP_PACK_64(gtid + 1, gtid + 1));
1150345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1151345153Sdim      if (enqueued)
1152345153Sdim        TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1153345153Sdim#endif
1154345153Sdim    } break;
1155345153Sdim
1156345153Sdim    default: {
1157345153Sdim      tail = *tail_id_p;
1158345153Sdim      KMP_DEBUG_ASSERT(tail != gtid + 1);
1159345153Sdim
1160345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1161345153Sdim      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1162345153Sdim#endif
1163345153Sdim
1164345153Sdim      if (tail == 0) {
1165345153Sdim        enqueued = FALSE;
1166345153Sdim      } else {
1167345153Sdim        need_mf = 0;
1168345153Sdim        /* try (h,t) or (h,h)->(h,tid) */
1169345153Sdim        enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1170345153Sdim
1171345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1172345153Sdim        if (enqueued)
1173345153Sdim          TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1174345153Sdim#endif
1175345153Sdim      }
1176345153Sdim    } break;
1177345153Sdim
1178345153Sdim    case 0: /* empty queue */
1179345153Sdim    {
1180345153Sdim      kmp_int32 grabbed_lock;
1181345153Sdim
1182345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1183345153Sdim      tail = *tail_id_p;
1184345153Sdim      TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1185345153Sdim#endif
1186345153Sdim      /* try (0,0)->(-1,0) */
1187345153Sdim
1188345153Sdim      /* only legal transition out of head = 0 is head = -1 with no change to
1189345153Sdim       * tail */
1190345153Sdim      grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1191345153Sdim
1192345153Sdim      if (grabbed_lock) {
1193345153Sdim
1194345153Sdim        *spin_here_p = FALSE;
1195345153Sdim
1196345153Sdim        KA_TRACE(
1197345153Sdim            1000,
1198345153Sdim            ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1199345153Sdim             lck, gtid));
1200345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1201345153Sdim        TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1202345153Sdim#endif
1203345153Sdim
1204345153Sdim#if OMPT_SUPPORT
1205345153Sdim        if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1206345153Sdim          /* change the state before clearing wait_id */
1207345153Sdim          this_thr->th.ompt_thread_info.state = prev_state;
1208345153Sdim          this_thr->th.ompt_thread_info.wait_id = 0;
1209345153Sdim        }
1210345153Sdim#endif
1211345153Sdim
1212345153Sdim        KMP_FSYNC_ACQUIRED(lck);
1213345153Sdim        return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1214345153Sdim      }
1215345153Sdim      enqueued = FALSE;
1216345153Sdim    } break;
1217345153Sdim    }
1218345153Sdim
1219345153Sdim#if OMPT_SUPPORT
1220345153Sdim    if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1221345153Sdim      /* this thread will spin; set wait_id before entering wait state */
1222345153Sdim      prev_state = this_thr->th.ompt_thread_info.state;
1223345153Sdim      this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1224345153Sdim      this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1225345153Sdim    }
1226345153Sdim#endif
1227345153Sdim
1228345153Sdim    if (enqueued) {
1229345153Sdim      if (tail > 0) {
1230345153Sdim        kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1231345153Sdim        KMP_ASSERT(tail_thr != NULL);
1232345153Sdim        tail_thr->th.th_next_waiting = gtid + 1;
1233345153Sdim        /* corresponding wait for this write in release code */
1234345153Sdim      }
1235345153Sdim      KA_TRACE(1000,
1236345153Sdim               ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1237345153Sdim                lck, gtid));
1238345153Sdim
1239345153Sdim      KMP_MB();
1240353358Sdim      // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1241353358Sdim      KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1242345153Sdim
1243345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1244345153Sdim      TRACE_LOCK(gtid + 1, "acq spin");
1245345153Sdim
1246345153Sdim      if (this_thr->th.th_next_waiting != 0)
1247345153Sdim        __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1248345153Sdim#endif
1249345153Sdim      KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1250345153Sdim      KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1251345153Sdim                      "waiting on queue\n",
1252345153Sdim                      lck, gtid));
1253345153Sdim
1254345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1255345153Sdim      TRACE_LOCK(gtid + 1, "acq exit 2");
1256345153Sdim#endif
1257345153Sdim
1258345153Sdim#if OMPT_SUPPORT
1259345153Sdim      /* change the state before clearing wait_id */
1260345153Sdim      this_thr->th.ompt_thread_info.state = prev_state;
1261345153Sdim      this_thr->th.ompt_thread_info.wait_id = 0;
1262345153Sdim#endif
1263345153Sdim
1264345153Sdim      /* got lock, we were dequeued by the thread that released lock */
1265345153Sdim      return KMP_LOCK_ACQUIRED_FIRST;
1266345153Sdim    }
1267345153Sdim
1268345153Sdim    /* Yield if number of threads > number of logical processors */
1269345153Sdim    /* ToDo: Not sure why this should only be in oversubscription case,
1270345153Sdim       maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1271353358Sdim    KMP_YIELD_OVERSUB();
1272353358Sdim
1273345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1274345153Sdim    TRACE_LOCK(gtid + 1, "acq retry");
1275345153Sdim#endif
1276345153Sdim  }
1277345153Sdim  KMP_ASSERT2(0, "should not get here");
1278345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
1279345153Sdim}
1280345153Sdim
1281345153Sdimint __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1282345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1283345153Sdim
1284345153Sdim  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1285345153Sdim  ANNOTATE_QUEUING_ACQUIRED(lck);
1286345153Sdim  return retval;
1287345153Sdim}
1288345153Sdim
1289345153Sdimstatic int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1290345153Sdim                                                  kmp_int32 gtid) {
1291345153Sdim  char const *const func = "omp_set_lock";
1292345153Sdim  if (lck->lk.initialized != lck) {
1293345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1294345153Sdim  }
1295345153Sdim  if (__kmp_is_queuing_lock_nestable(lck)) {
1296345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
1297345153Sdim  }
1298345153Sdim  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1299345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
1300345153Sdim  }
1301345153Sdim
1302345153Sdim  __kmp_acquire_queuing_lock(lck, gtid);
1303345153Sdim
1304345153Sdim  lck->lk.owner_id = gtid + 1;
1305345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
1306345153Sdim}
1307345153Sdim
1308345153Sdimint __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1309345153Sdim  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1310345153Sdim  kmp_int32 head;
1311345153Sdim#ifdef KMP_DEBUG
1312345153Sdim  kmp_info_t *this_thr;
1313345153Sdim#endif
1314345153Sdim
1315345153Sdim  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1316345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1317345153Sdim#ifdef KMP_DEBUG
1318345153Sdim  this_thr = __kmp_thread_from_gtid(gtid);
1319345153Sdim  KMP_DEBUG_ASSERT(this_thr != NULL);
1320345153Sdim  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1321345153Sdim#endif
1322345153Sdim
1323345153Sdim  head = *head_id_p;
1324345153Sdim
1325345153Sdim  if (head == 0) { /* nobody on queue, nobody holding */
1326345153Sdim    /* try (0,0)->(-1,0) */
1327345153Sdim    if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1328345153Sdim      KA_TRACE(1000,
1329345153Sdim               ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1330345153Sdim      KMP_FSYNC_ACQUIRED(lck);
1331345153Sdim      ANNOTATE_QUEUING_ACQUIRED(lck);
1332345153Sdim      return TRUE;
1333345153Sdim    }
1334345153Sdim  }
1335345153Sdim
1336345153Sdim  KA_TRACE(1000,
1337345153Sdim           ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1338345153Sdim  return FALSE;
1339345153Sdim}
1340345153Sdim
1341345153Sdimstatic int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1342345153Sdim                                               kmp_int32 gtid) {
1343345153Sdim  char const *const func = "omp_test_lock";
1344345153Sdim  if (lck->lk.initialized != lck) {
1345345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1346345153Sdim  }
1347345153Sdim  if (__kmp_is_queuing_lock_nestable(lck)) {
1348345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
1349345153Sdim  }
1350345153Sdim
1351345153Sdim  int retval = __kmp_test_queuing_lock(lck, gtid);
1352345153Sdim
1353345153Sdim  if (retval) {
1354345153Sdim    lck->lk.owner_id = gtid + 1;
1355345153Sdim  }
1356345153Sdim  return retval;
1357345153Sdim}
1358345153Sdim
1359345153Sdimint __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1360345153Sdim  kmp_info_t *this_thr;
1361345153Sdim  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1362345153Sdim  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1363345153Sdim
1364345153Sdim  KA_TRACE(1000,
1365345153Sdim           ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1366345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1367345153Sdim  this_thr = __kmp_thread_from_gtid(gtid);
1368345153Sdim  KMP_DEBUG_ASSERT(this_thr != NULL);
1369345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1370345153Sdim  TRACE_LOCK(gtid + 1, "rel ent");
1371345153Sdim
1372345153Sdim  if (this_thr->th.th_spin_here)
1373345153Sdim    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1374345153Sdim  if (this_thr->th.th_next_waiting != 0)
1375345153Sdim    __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1376345153Sdim#endif
1377345153Sdim  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1378345153Sdim  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1379345153Sdim
1380345153Sdim  KMP_FSYNC_RELEASING(lck);
1381345153Sdim  ANNOTATE_QUEUING_RELEASED(lck);
1382345153Sdim
1383345153Sdim  while (1) {
1384345153Sdim    kmp_int32 dequeued;
1385345153Sdim    kmp_int32 head;
1386345153Sdim    kmp_int32 tail;
1387345153Sdim
1388345153Sdim    head = *head_id_p;
1389345153Sdim
1390345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1391345153Sdim    tail = *tail_id_p;
1392345153Sdim    TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1393345153Sdim    if (head == 0)
1394345153Sdim      __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1395345153Sdim#endif
1396345153Sdim    KMP_DEBUG_ASSERT(head !=
1397345153Sdim                     0); /* holding the lock, head must be -1 or queue head */
1398345153Sdim
1399345153Sdim    if (head == -1) { /* nobody on queue */
1400345153Sdim      /* try (-1,0)->(0,0) */
1401345153Sdim      if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1402345153Sdim        KA_TRACE(
1403345153Sdim            1000,
1404345153Sdim            ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1405345153Sdim             lck, gtid));
1406345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1407345153Sdim        TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1408345153Sdim#endif
1409345153Sdim
1410345153Sdim#if OMPT_SUPPORT
1411345153Sdim/* nothing to do - no other thread is trying to shift blame */
1412345153Sdim#endif
1413345153Sdim        return KMP_LOCK_RELEASED;
1414345153Sdim      }
1415345153Sdim      dequeued = FALSE;
1416345153Sdim    } else {
1417345153Sdim      KMP_MB();
1418345153Sdim      tail = *tail_id_p;
1419345153Sdim      if (head == tail) { /* only one thread on the queue */
1420345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1421345153Sdim        if (head <= 0)
1422345153Sdim          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1423345153Sdim#endif
1424345153Sdim        KMP_DEBUG_ASSERT(head > 0);
1425345153Sdim
1426345153Sdim        /* try (h,h)->(-1,0) */
1427345153Sdim        dequeued = KMP_COMPARE_AND_STORE_REL64(
1428345153Sdim            RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1429345153Sdim            KMP_PACK_64(-1, 0));
1430345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1431345153Sdim        TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1432345153Sdim#endif
1433345153Sdim
1434345153Sdim      } else {
1435345153Sdim        volatile kmp_int32 *waiting_id_p;
1436345153Sdim        kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1437345153Sdim        KMP_DEBUG_ASSERT(head_thr != NULL);
1438345153Sdim        waiting_id_p = &head_thr->th.th_next_waiting;
1439345153Sdim
1440345153Sdim/* Does this require synchronous reads? */
1441345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1442345153Sdim        if (head <= 0 || tail <= 0)
1443345153Sdim          __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1444345153Sdim#endif
1445345153Sdim        KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1446345153Sdim
1447345153Sdim        /* try (h,t)->(h',t) or (t,t) */
1448345153Sdim        KMP_MB();
1449345153Sdim        /* make sure enqueuing thread has time to update next waiting thread
1450345153Sdim         * field */
1451353358Sdim        *head_id_p =
1452353358Sdim            KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1453345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1454345153Sdim        TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1455345153Sdim#endif
1456345153Sdim        dequeued = TRUE;
1457345153Sdim      }
1458345153Sdim    }
1459345153Sdim
1460345153Sdim    if (dequeued) {
1461345153Sdim      kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1462345153Sdim      KMP_DEBUG_ASSERT(head_thr != NULL);
1463345153Sdim
1464345153Sdim/* Does this require synchronous reads? */
1465345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1466345153Sdim      if (head <= 0 || tail <= 0)
1467345153Sdim        __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1468345153Sdim#endif
1469345153Sdim      KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1470345153Sdim
1471345153Sdim      /* For clean code only. Thread not released until next statement prevents
1472345153Sdim         race with acquire code. */
1473345153Sdim      head_thr->th.th_next_waiting = 0;
1474345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1475345153Sdim      TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1476345153Sdim#endif
1477345153Sdim
1478345153Sdim      KMP_MB();
1479345153Sdim      /* reset spin value */
1480345153Sdim      head_thr->th.th_spin_here = FALSE;
1481345153Sdim
1482345153Sdim      KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1483345153Sdim                      "dequeuing\n",
1484345153Sdim                      lck, gtid));
1485345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1486345153Sdim      TRACE_LOCK(gtid + 1, "rel exit 2");
1487345153Sdim#endif
1488345153Sdim      return KMP_LOCK_RELEASED;
1489345153Sdim    }
1490345153Sdim/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1491345153Sdim   threads */
1492345153Sdim
1493345153Sdim#ifdef DEBUG_QUEUING_LOCKS
1494345153Sdim    TRACE_LOCK(gtid + 1, "rel retry");
1495345153Sdim#endif
1496345153Sdim
1497345153Sdim  } /* while */
1498345153Sdim  KMP_ASSERT2(0, "should not get here");
1499345153Sdim  return KMP_LOCK_RELEASED;
1500345153Sdim}
1501345153Sdim
1502345153Sdimstatic int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1503345153Sdim                                                  kmp_int32 gtid) {
1504345153Sdim  char const *const func = "omp_unset_lock";
1505345153Sdim  KMP_MB(); /* in case another processor initialized lock */
1506345153Sdim  if (lck->lk.initialized != lck) {
1507345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1508345153Sdim  }
1509345153Sdim  if (__kmp_is_queuing_lock_nestable(lck)) {
1510345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
1511345153Sdim  }
1512345153Sdim  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1513345153Sdim    KMP_FATAL(LockUnsettingFree, func);
1514345153Sdim  }
1515345153Sdim  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1516345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
1517345153Sdim  }
1518345153Sdim  lck->lk.owner_id = 0;
1519345153Sdim  return __kmp_release_queuing_lock(lck, gtid);
1520345153Sdim}
1521345153Sdim
1522345153Sdimvoid __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1523345153Sdim  lck->lk.location = NULL;
1524345153Sdim  lck->lk.head_id = 0;
1525345153Sdim  lck->lk.tail_id = 0;
1526345153Sdim  lck->lk.next_ticket = 0;
1527345153Sdim  lck->lk.now_serving = 0;
1528345153Sdim  lck->lk.owner_id = 0; // no thread owns the lock.
1529345153Sdim  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1530345153Sdim  lck->lk.initialized = lck;
1531345153Sdim
1532345153Sdim  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1533345153Sdim}
1534345153Sdim
1535345153Sdimvoid __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1536345153Sdim  lck->lk.initialized = NULL;
1537345153Sdim  lck->lk.location = NULL;
1538345153Sdim  lck->lk.head_id = 0;
1539345153Sdim  lck->lk.tail_id = 0;
1540345153Sdim  lck->lk.next_ticket = 0;
1541345153Sdim  lck->lk.now_serving = 0;
1542345153Sdim  lck->lk.owner_id = 0;
1543345153Sdim  lck->lk.depth_locked = -1;
1544345153Sdim}
1545345153Sdim
1546345153Sdimstatic void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1547345153Sdim  char const *const func = "omp_destroy_lock";
1548345153Sdim  if (lck->lk.initialized != lck) {
1549345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1550345153Sdim  }
1551345153Sdim  if (__kmp_is_queuing_lock_nestable(lck)) {
1552345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
1553345153Sdim  }
1554345153Sdim  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1555345153Sdim    KMP_FATAL(LockStillOwned, func);
1556345153Sdim  }
1557345153Sdim  __kmp_destroy_queuing_lock(lck);
1558345153Sdim}
1559345153Sdim
1560345153Sdim// nested queuing locks
1561345153Sdim
1562345153Sdimint __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1563345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1564345153Sdim
1565345153Sdim  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1566345153Sdim    lck->lk.depth_locked += 1;
1567345153Sdim    return KMP_LOCK_ACQUIRED_NEXT;
1568345153Sdim  } else {
1569345153Sdim    __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1570345153Sdim    ANNOTATE_QUEUING_ACQUIRED(lck);
1571345153Sdim    KMP_MB();
1572345153Sdim    lck->lk.depth_locked = 1;
1573345153Sdim    KMP_MB();
1574345153Sdim    lck->lk.owner_id = gtid + 1;
1575345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
1576345153Sdim  }
1577345153Sdim}
1578345153Sdim
1579345153Sdimstatic int
1580345153Sdim__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1581345153Sdim                                              kmp_int32 gtid) {
1582345153Sdim  char const *const func = "omp_set_nest_lock";
1583345153Sdim  if (lck->lk.initialized != lck) {
1584345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1585345153Sdim  }
1586345153Sdim  if (!__kmp_is_queuing_lock_nestable(lck)) {
1587345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
1588345153Sdim  }
1589345153Sdim  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1590345153Sdim}
1591345153Sdim
1592345153Sdimint __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1593345153Sdim  int retval;
1594345153Sdim
1595345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1596345153Sdim
1597345153Sdim  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1598345153Sdim    retval = ++lck->lk.depth_locked;
1599345153Sdim  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1600345153Sdim    retval = 0;
1601345153Sdim  } else {
1602345153Sdim    KMP_MB();
1603345153Sdim    retval = lck->lk.depth_locked = 1;
1604345153Sdim    KMP_MB();
1605345153Sdim    lck->lk.owner_id = gtid + 1;
1606345153Sdim  }
1607345153Sdim  return retval;
1608345153Sdim}
1609345153Sdim
1610345153Sdimstatic int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1611345153Sdim                                                      kmp_int32 gtid) {
1612345153Sdim  char const *const func = "omp_test_nest_lock";
1613345153Sdim  if (lck->lk.initialized != lck) {
1614345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1615345153Sdim  }
1616345153Sdim  if (!__kmp_is_queuing_lock_nestable(lck)) {
1617345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
1618345153Sdim  }
1619345153Sdim  return __kmp_test_nested_queuing_lock(lck, gtid);
1620345153Sdim}
1621345153Sdim
1622345153Sdimint __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1623345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
1624345153Sdim
1625345153Sdim  KMP_MB();
1626345153Sdim  if (--(lck->lk.depth_locked) == 0) {
1627345153Sdim    KMP_MB();
1628345153Sdim    lck->lk.owner_id = 0;
1629345153Sdim    __kmp_release_queuing_lock(lck, gtid);
1630345153Sdim    return KMP_LOCK_RELEASED;
1631345153Sdim  }
1632345153Sdim  return KMP_LOCK_STILL_HELD;
1633345153Sdim}
1634345153Sdim
1635345153Sdimstatic int
1636345153Sdim__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1637345153Sdim                                              kmp_int32 gtid) {
1638345153Sdim  char const *const func = "omp_unset_nest_lock";
1639345153Sdim  KMP_MB(); /* in case another processor initialized lock */
1640345153Sdim  if (lck->lk.initialized != lck) {
1641345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1642345153Sdim  }
1643345153Sdim  if (!__kmp_is_queuing_lock_nestable(lck)) {
1644345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
1645345153Sdim  }
1646345153Sdim  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1647345153Sdim    KMP_FATAL(LockUnsettingFree, func);
1648345153Sdim  }
1649345153Sdim  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1650345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
1651345153Sdim  }
1652345153Sdim  return __kmp_release_nested_queuing_lock(lck, gtid);
1653345153Sdim}
1654345153Sdim
1655345153Sdimvoid __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1656345153Sdim  __kmp_init_queuing_lock(lck);
1657345153Sdim  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1658345153Sdim}
1659345153Sdim
1660345153Sdimvoid __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1661345153Sdim  __kmp_destroy_queuing_lock(lck);
1662345153Sdim  lck->lk.depth_locked = 0;
1663345153Sdim}
1664345153Sdim
1665345153Sdimstatic void
1666345153Sdim__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1667345153Sdim  char const *const func = "omp_destroy_nest_lock";
1668345153Sdim  if (lck->lk.initialized != lck) {
1669345153Sdim    KMP_FATAL(LockIsUninitialized, func);
1670345153Sdim  }
1671345153Sdim  if (!__kmp_is_queuing_lock_nestable(lck)) {
1672345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
1673345153Sdim  }
1674345153Sdim  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1675345153Sdim    KMP_FATAL(LockStillOwned, func);
1676345153Sdim  }
1677345153Sdim  __kmp_destroy_nested_queuing_lock(lck);
1678345153Sdim}
1679345153Sdim
1680345153Sdim// access functions to fields which don't exist for all lock kinds.
1681345153Sdim
1682345153Sdimstatic const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1683345153Sdim  return lck->lk.location;
1684345153Sdim}
1685345153Sdim
1686345153Sdimstatic void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1687345153Sdim                                            const ident_t *loc) {
1688345153Sdim  lck->lk.location = loc;
1689345153Sdim}
1690345153Sdim
1691345153Sdimstatic kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1692345153Sdim  return lck->lk.flags;
1693345153Sdim}
1694345153Sdim
1695345153Sdimstatic void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1696345153Sdim                                         kmp_lock_flags_t flags) {
1697345153Sdim  lck->lk.flags = flags;
1698345153Sdim}
1699345153Sdim
1700345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
1701345153Sdim
1702345153Sdim/* RTM Adaptive locks */
1703345153Sdim
1704345153Sdim#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) ||                          \
1705345153Sdim    (KMP_COMPILER_MSVC && _MSC_VER >= 1700) ||                                 \
1706345153Sdim    (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT)
1707345153Sdim
1708345153Sdim#include <immintrin.h>
1709345153Sdim#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1710345153Sdim
1711345153Sdim#else
1712345153Sdim
1713345153Sdim// Values from the status register after failed speculation.
1714345153Sdim#define _XBEGIN_STARTED (~0u)
1715345153Sdim#define _XABORT_EXPLICIT (1 << 0)
1716345153Sdim#define _XABORT_RETRY (1 << 1)
1717345153Sdim#define _XABORT_CONFLICT (1 << 2)
1718345153Sdim#define _XABORT_CAPACITY (1 << 3)
1719345153Sdim#define _XABORT_DEBUG (1 << 4)
1720345153Sdim#define _XABORT_NESTED (1 << 5)
1721345153Sdim#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1722345153Sdim
1723345153Sdim// Aborts for which it's worth trying again immediately
1724345153Sdim#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1725345153Sdim
1726345153Sdim#define STRINGIZE_INTERNAL(arg) #arg
1727345153Sdim#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1728345153Sdim
1729345153Sdim// Access to RTM instructions
1730345153Sdim/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1731345153Sdim  an abort. This is the same definition as the compiler intrinsic that will be
1732345153Sdim  supported at some point. */
1733345153Sdimstatic __inline int _xbegin() {
1734345153Sdim  int res = -1;
1735345153Sdim
1736345153Sdim#if KMP_OS_WINDOWS
1737345153Sdim#if KMP_ARCH_X86_64
1738345153Sdim  _asm {
1739345153Sdim        _emit 0xC7
1740345153Sdim        _emit 0xF8
1741345153Sdim        _emit 2
1742345153Sdim        _emit 0
1743345153Sdim        _emit 0
1744345153Sdim        _emit 0
1745345153Sdim        jmp   L2
1746345153Sdim        mov   res, eax
1747345153Sdim    L2:
1748345153Sdim  }
1749345153Sdim#else /* IA32 */
1750345153Sdim  _asm {
1751345153Sdim        _emit 0xC7
1752345153Sdim        _emit 0xF8
1753345153Sdim        _emit 2
1754345153Sdim        _emit 0
1755345153Sdim        _emit 0
1756345153Sdim        _emit 0
1757345153Sdim        jmp   L2
1758345153Sdim        mov   res, eax
1759345153Sdim    L2:
1760345153Sdim  }
1761345153Sdim#endif // KMP_ARCH_X86_64
1762345153Sdim#else
1763345153Sdim  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1764345153Sdim     returned in %eax(%rax) on abort.  Other register values are restored, so
1765345153Sdim     don't need to be killed.
1766345153Sdim
1767345153Sdim     We must also mark 'res' as an input and an output, since otherwise
1768345153Sdim     'res=-1' may be dropped as being dead, whereas we do need the assignment on
1769345153Sdim     the successful (i.e., non-abort) path. */
1770345153Sdim  __asm__ volatile("1: .byte  0xC7; .byte 0xF8;\n"
1771345153Sdim                   "   .long  1f-1b-6\n"
1772345153Sdim                   "    jmp   2f\n"
1773345153Sdim                   "1:  movl  %%eax,%0\n"
1774345153Sdim                   "2:"
1775345153Sdim                   : "+r"(res)::"memory", "%eax");
1776345153Sdim#endif // KMP_OS_WINDOWS
1777345153Sdim  return res;
1778345153Sdim}
1779345153Sdim
1780345153Sdim/* Transaction end */
1781345153Sdimstatic __inline void _xend() {
1782345153Sdim#if KMP_OS_WINDOWS
1783345153Sdim  __asm {
1784345153Sdim        _emit 0x0f
1785345153Sdim        _emit 0x01
1786345153Sdim        _emit 0xd5
1787345153Sdim  }
1788345153Sdim#else
1789345153Sdim  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1790345153Sdim#endif
1791345153Sdim}
1792345153Sdim
1793345153Sdim/* This is a macro, the argument must be a single byte constant which can be
1794345153Sdim   evaluated by the inline assembler, since it is emitted as a byte into the
1795345153Sdim   assembly code. */
1796345153Sdim// clang-format off
1797345153Sdim#if KMP_OS_WINDOWS
1798345153Sdim#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1799345153Sdim#else
1800345153Sdim#define _xabort(ARG)                                                           \
1801345153Sdim  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1802345153Sdim#endif
1803345153Sdim// clang-format on
1804345153Sdim#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1805345153Sdim
1806345153Sdim// Statistics is collected for testing purpose
1807345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS
1808345153Sdim
1809345153Sdim// We accumulate speculative lock statistics when the lock is destroyed. We
1810345153Sdim// keep locks that haven't been destroyed in the liveLocks list so that we can
1811345153Sdim// grab their statistics too.
1812345153Sdimstatic kmp_adaptive_lock_statistics_t destroyedStats;
1813345153Sdim
1814345153Sdim// To hold the list of live locks.
1815345153Sdimstatic kmp_adaptive_lock_info_t liveLocks;
1816345153Sdim
1817345153Sdim// A lock so we can safely update the list of locks.
1818345153Sdimstatic kmp_bootstrap_lock_t chain_lock =
1819345153Sdim    KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1820345153Sdim
1821345153Sdim// Initialize the list of stats.
1822345153Sdimvoid __kmp_init_speculative_stats() {
1823345153Sdim  kmp_adaptive_lock_info_t *lck = &liveLocks;
1824345153Sdim
1825345153Sdim  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1826345153Sdim         sizeof(lck->stats));
1827345153Sdim  lck->stats.next = lck;
1828345153Sdim  lck->stats.prev = lck;
1829345153Sdim
1830345153Sdim  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1831345153Sdim  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1832345153Sdim
1833345153Sdim  __kmp_init_bootstrap_lock(&chain_lock);
1834345153Sdim}
1835345153Sdim
1836345153Sdim// Insert the lock into the circular list
1837345153Sdimstatic void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1838345153Sdim  __kmp_acquire_bootstrap_lock(&chain_lock);
1839345153Sdim
1840345153Sdim  lck->stats.next = liveLocks.stats.next;
1841345153Sdim  lck->stats.prev = &liveLocks;
1842345153Sdim
1843345153Sdim  liveLocks.stats.next = lck;
1844345153Sdim  lck->stats.next->stats.prev = lck;
1845345153Sdim
1846345153Sdim  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1847345153Sdim  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1848345153Sdim
1849345153Sdim  __kmp_release_bootstrap_lock(&chain_lock);
1850345153Sdim}
1851345153Sdim
1852345153Sdimstatic void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1853345153Sdim  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1854345153Sdim  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1855345153Sdim
1856345153Sdim  kmp_adaptive_lock_info_t *n = lck->stats.next;
1857345153Sdim  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1858345153Sdim
1859345153Sdim  n->stats.prev = p;
1860345153Sdim  p->stats.next = n;
1861345153Sdim}
1862345153Sdim
1863345153Sdimstatic void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864345153Sdim  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1865345153Sdim         sizeof(lck->stats));
1866345153Sdim  __kmp_remember_lock(lck);
1867345153Sdim}
1868345153Sdim
1869345153Sdimstatic void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1870345153Sdim                            kmp_adaptive_lock_info_t *lck) {
1871345153Sdim  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1872345153Sdim
1873345153Sdim  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1874345153Sdim  t->successfulSpeculations += s->successfulSpeculations;
1875345153Sdim  t->hardFailedSpeculations += s->hardFailedSpeculations;
1876345153Sdim  t->softFailedSpeculations += s->softFailedSpeculations;
1877345153Sdim  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1878345153Sdim  t->lemmingYields += s->lemmingYields;
1879345153Sdim}
1880345153Sdim
1881345153Sdimstatic void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1882345153Sdim  __kmp_acquire_bootstrap_lock(&chain_lock);
1883345153Sdim
1884345153Sdim  __kmp_add_stats(&destroyedStats, lck);
1885345153Sdim  __kmp_forget_lock(lck);
1886345153Sdim
1887345153Sdim  __kmp_release_bootstrap_lock(&chain_lock);
1888345153Sdim}
1889345153Sdim
1890345153Sdimstatic float percent(kmp_uint32 count, kmp_uint32 total) {
1891345153Sdim  return (total == 0) ? 0.0 : (100.0 * count) / total;
1892345153Sdim}
1893345153Sdim
1894345153Sdimstatic FILE *__kmp_open_stats_file() {
1895345153Sdim  if (strcmp(__kmp_speculative_statsfile, "-") == 0)
1896345153Sdim    return stdout;
1897345153Sdim
1898345153Sdim  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1899345153Sdim  char buffer[buffLen];
1900345153Sdim  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1901345153Sdim               (kmp_int32)getpid());
1902345153Sdim  FILE *result = fopen(&buffer[0], "w");
1903345153Sdim
1904345153Sdim  // Maybe we should issue a warning here...
1905345153Sdim  return result ? result : stdout;
1906345153Sdim}
1907345153Sdim
1908345153Sdimvoid __kmp_print_speculative_stats() {
1909345153Sdim  kmp_adaptive_lock_statistics_t total = destroyedStats;
1910345153Sdim  kmp_adaptive_lock_info_t *lck;
1911345153Sdim
1912345153Sdim  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1913345153Sdim    __kmp_add_stats(&total, lck);
1914345153Sdim  }
1915345153Sdim  kmp_adaptive_lock_statistics_t *t = &total;
1916345153Sdim  kmp_uint32 totalSections =
1917345153Sdim      t->nonSpeculativeAcquires + t->successfulSpeculations;
1918345153Sdim  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1919345153Sdim                                 t->hardFailedSpeculations +
1920345153Sdim                                 t->softFailedSpeculations;
1921345153Sdim  if (totalSections <= 0)
1922345153Sdim    return;
1923345153Sdim
1924345153Sdim  FILE *statsFile = __kmp_open_stats_file();
1925345153Sdim
1926345153Sdim  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1927345153Sdim  fprintf(statsFile, " Lock parameters: \n"
1928345153Sdim                     "   max_soft_retries               : %10d\n"
1929345153Sdim                     "   max_badness                    : %10d\n",
1930345153Sdim          __kmp_adaptive_backoff_params.max_soft_retries,
1931345153Sdim          __kmp_adaptive_backoff_params.max_badness);
1932345153Sdim  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1933345153Sdim          t->nonSpeculativeAcquireAttempts);
1934345153Sdim  fprintf(statsFile, " Total critical sections          : %10d\n",
1935345153Sdim          totalSections);
1936345153Sdim  fprintf(statsFile, " Successful speculations          : %10d (%5.1f%%)\n",
1937345153Sdim          t->successfulSpeculations,
1938345153Sdim          percent(t->successfulSpeculations, totalSections));
1939345153Sdim  fprintf(statsFile, " Non-speculative acquires         : %10d (%5.1f%%)\n",
1940345153Sdim          t->nonSpeculativeAcquires,
1941345153Sdim          percent(t->nonSpeculativeAcquires, totalSections));
1942345153Sdim  fprintf(statsFile, " Lemming yields                   : %10d\n\n",
1943345153Sdim          t->lemmingYields);
1944345153Sdim
1945345153Sdim  fprintf(statsFile, " Speculative acquire attempts     : %10d\n",
1946345153Sdim          totalSpeculations);
1947345153Sdim  fprintf(statsFile, " Successes                        : %10d (%5.1f%%)\n",
1948345153Sdim          t->successfulSpeculations,
1949345153Sdim          percent(t->successfulSpeculations, totalSpeculations));
1950345153Sdim  fprintf(statsFile, " Soft failures                    : %10d (%5.1f%%)\n",
1951345153Sdim          t->softFailedSpeculations,
1952345153Sdim          percent(t->softFailedSpeculations, totalSpeculations));
1953345153Sdim  fprintf(statsFile, " Hard failures                    : %10d (%5.1f%%)\n",
1954345153Sdim          t->hardFailedSpeculations,
1955345153Sdim          percent(t->hardFailedSpeculations, totalSpeculations));
1956345153Sdim
1957345153Sdim  if (statsFile != stdout)
1958345153Sdim    fclose(statsFile);
1959345153Sdim}
1960345153Sdim
1961345153Sdim#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1962345153Sdim#else
1963345153Sdim#define KMP_INC_STAT(lck, stat)
1964345153Sdim
1965345153Sdim#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1966345153Sdim
1967345153Sdimstatic inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1968345153Sdim  // It is enough to check that the head_id is zero.
1969345153Sdim  // We don't also need to check the tail.
1970345153Sdim  bool res = lck->lk.head_id == 0;
1971345153Sdim
1972345153Sdim// We need a fence here, since we must ensure that no memory operations
1973345153Sdim// from later in this thread float above that read.
1974345153Sdim#if KMP_COMPILER_ICC
1975345153Sdim  _mm_mfence();
1976345153Sdim#else
1977345153Sdim  __sync_synchronize();
1978345153Sdim#endif
1979345153Sdim
1980345153Sdim  return res;
1981345153Sdim}
1982345153Sdim
1983345153Sdim// Functions for manipulating the badness
1984345153Sdimstatic __inline void
1985345153Sdim__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1986345153Sdim  // Reset the badness to zero so we eagerly try to speculate again
1987345153Sdim  lck->lk.adaptive.badness = 0;
1988345153Sdim  KMP_INC_STAT(lck, successfulSpeculations);
1989345153Sdim}
1990345153Sdim
1991345153Sdim// Create a bit mask with one more set bit.
1992345153Sdimstatic __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1993345153Sdim  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1994345153Sdim  if (newBadness > lck->lk.adaptive.max_badness) {
1995345153Sdim    return;
1996345153Sdim  } else {
1997345153Sdim    lck->lk.adaptive.badness = newBadness;
1998345153Sdim  }
1999345153Sdim}
2000345153Sdim
2001345153Sdim// Check whether speculation should be attempted.
2002345153Sdimstatic __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
2003345153Sdim                                           kmp_int32 gtid) {
2004345153Sdim  kmp_uint32 badness = lck->lk.adaptive.badness;
2005345153Sdim  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
2006345153Sdim  int res = (attempts & badness) == 0;
2007345153Sdim  return res;
2008345153Sdim}
2009345153Sdim
2010345153Sdim// Attempt to acquire only the speculative lock.
2011345153Sdim// Does not back off to the non-speculative lock.
2012345153Sdimstatic int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
2013345153Sdim                                         kmp_int32 gtid) {
2014345153Sdim  int retries = lck->lk.adaptive.max_soft_retries;
2015345153Sdim
2016345153Sdim  // We don't explicitly count the start of speculation, rather we record the
2017345153Sdim  // results (success, hard fail, soft fail). The sum of all of those is the
2018345153Sdim  // total number of times we started speculation since all speculations must
2019345153Sdim  // end one of those ways.
2020345153Sdim  do {
2021345153Sdim    kmp_uint32 status = _xbegin();
2022345153Sdim    // Switch this in to disable actual speculation but exercise at least some
2023345153Sdim    // of the rest of the code. Useful for debugging...
2024345153Sdim    // kmp_uint32 status = _XABORT_NESTED;
2025345153Sdim
2026345153Sdim    if (status == _XBEGIN_STARTED) {
2027345153Sdim      /* We have successfully started speculation. Check that no-one acquired
2028345153Sdim         the lock for real between when we last looked and now. This also gets
2029345153Sdim         the lock cache line into our read-set, which we need so that we'll
2030345153Sdim         abort if anyone later claims it for real. */
2031345153Sdim      if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2032345153Sdim        // Lock is now visibly acquired, so someone beat us to it. Abort the
2033345153Sdim        // transaction so we'll restart from _xbegin with the failure status.
2034345153Sdim        _xabort(0x01);
2035345153Sdim        KMP_ASSERT2(0, "should not get here");
2036345153Sdim      }
2037345153Sdim      return 1; // Lock has been acquired (speculatively)
2038345153Sdim    } else {
2039345153Sdim      // We have aborted, update the statistics
2040345153Sdim      if (status & SOFT_ABORT_MASK) {
2041345153Sdim        KMP_INC_STAT(lck, softFailedSpeculations);
2042345153Sdim        // and loop round to retry.
2043345153Sdim      } else {
2044345153Sdim        KMP_INC_STAT(lck, hardFailedSpeculations);
2045345153Sdim        // Give up if we had a hard failure.
2046345153Sdim        break;
2047345153Sdim      }
2048345153Sdim    }
2049345153Sdim  } while (retries--); // Loop while we have retries, and didn't fail hard.
2050345153Sdim
2051345153Sdim  // Either we had a hard failure or we didn't succeed softly after
2052345153Sdim  // the full set of attempts, so back off the badness.
2053345153Sdim  __kmp_step_badness(lck);
2054345153Sdim  return 0;
2055345153Sdim}
2056345153Sdim
2057345153Sdim// Attempt to acquire the speculative lock, or back off to the non-speculative
2058345153Sdim// one if the speculative lock cannot be acquired.
2059345153Sdim// We can succeed speculatively, non-speculatively, or fail.
2060345153Sdimstatic int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2061345153Sdim  // First try to acquire the lock speculatively
2062345153Sdim  if (__kmp_should_speculate(lck, gtid) &&
2063345153Sdim      __kmp_test_adaptive_lock_only(lck, gtid))
2064345153Sdim    return 1;
2065345153Sdim
2066345153Sdim  // Speculative acquisition failed, so try to acquire it non-speculatively.
2067345153Sdim  // Count the non-speculative acquire attempt
2068345153Sdim  lck->lk.adaptive.acquire_attempts++;
2069345153Sdim
2070345153Sdim  // Use base, non-speculative lock.
2071345153Sdim  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2072345153Sdim    KMP_INC_STAT(lck, nonSpeculativeAcquires);
2073345153Sdim    return 1; // Lock is acquired (non-speculatively)
2074345153Sdim  } else {
2075345153Sdim    return 0; // Failed to acquire the lock, it's already visibly locked.
2076345153Sdim  }
2077345153Sdim}
2078345153Sdim
2079345153Sdimstatic int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2080345153Sdim                                                kmp_int32 gtid) {
2081345153Sdim  char const *const func = "omp_test_lock";
2082345153Sdim  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2083345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2084345153Sdim  }
2085345153Sdim
2086345153Sdim  int retval = __kmp_test_adaptive_lock(lck, gtid);
2087345153Sdim
2088345153Sdim  if (retval) {
2089345153Sdim    lck->lk.qlk.owner_id = gtid + 1;
2090345153Sdim  }
2091345153Sdim  return retval;
2092345153Sdim}
2093345153Sdim
2094345153Sdim// Block until we can acquire a speculative, adaptive lock. We check whether we
2095345153Sdim// should be trying to speculate. If we should be, we check the real lock to see
2096345153Sdim// if it is free, and, if not, pause without attempting to acquire it until it
2097345153Sdim// is. Then we try the speculative acquire. This means that although we suffer
2098345153Sdim// from lemmings a little (because all we can't acquire the lock speculatively
2099345153Sdim// until the queue of threads waiting has cleared), we don't get into a state
2100345153Sdim// where we can never acquire the lock speculatively (because we force the queue
2101345153Sdim// to clear by preventing new arrivals from entering the queue). This does mean
2102345153Sdim// that when we're trying to break lemmings, the lock is no longer fair. However
2103345153Sdim// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2104345153Sdim// problem.
2105345153Sdimstatic void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2106345153Sdim                                        kmp_int32 gtid) {
2107345153Sdim  if (__kmp_should_speculate(lck, gtid)) {
2108345153Sdim    if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2109345153Sdim      if (__kmp_test_adaptive_lock_only(lck, gtid))
2110345153Sdim        return;
2111345153Sdim      // We tried speculation and failed, so give up.
2112345153Sdim    } else {
2113345153Sdim      // We can't try speculation until the lock is free, so we pause here
2114345153Sdim      // (without suspending on the queueing lock, to allow it to drain, then
2115345153Sdim      // try again. All other threads will also see the same result for
2116345153Sdim      // shouldSpeculate, so will be doing the same if they try to claim the
2117345153Sdim      // lock from now on.
2118345153Sdim      while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2119345153Sdim        KMP_INC_STAT(lck, lemmingYields);
2120353358Sdim        KMP_YIELD(TRUE);
2121345153Sdim      }
2122345153Sdim
2123345153Sdim      if (__kmp_test_adaptive_lock_only(lck, gtid))
2124345153Sdim        return;
2125345153Sdim    }
2126345153Sdim  }
2127345153Sdim
2128345153Sdim  // Speculative acquisition failed, so acquire it non-speculatively.
2129345153Sdim  // Count the non-speculative acquire attempt
2130345153Sdim  lck->lk.adaptive.acquire_attempts++;
2131345153Sdim
2132345153Sdim  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2133345153Sdim  // We have acquired the base lock, so count that.
2134345153Sdim  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2135345153Sdim  ANNOTATE_QUEUING_ACQUIRED(lck);
2136345153Sdim}
2137345153Sdim
2138345153Sdimstatic void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2139345153Sdim                                                    kmp_int32 gtid) {
2140345153Sdim  char const *const func = "omp_set_lock";
2141345153Sdim  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2142345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2143345153Sdim  }
2144345153Sdim  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2145345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
2146345153Sdim  }
2147345153Sdim
2148345153Sdim  __kmp_acquire_adaptive_lock(lck, gtid);
2149345153Sdim
2150345153Sdim  lck->lk.qlk.owner_id = gtid + 1;
2151345153Sdim}
2152345153Sdim
2153345153Sdimstatic int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2154345153Sdim                                       kmp_int32 gtid) {
2155345153Sdim  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2156345153Sdim          lck))) { // If the lock doesn't look claimed we must be speculating.
2157345153Sdim    // (Or the user's code is buggy and they're releasing without locking;
2158345153Sdim    // if we had XTEST we'd be able to check that case...)
2159345153Sdim    _xend(); // Exit speculation
2160345153Sdim    __kmp_update_badness_after_success(lck);
2161345153Sdim  } else { // Since the lock *is* visibly locked we're not speculating,
2162345153Sdim    // so should use the underlying lock's release scheme.
2163345153Sdim    __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2164345153Sdim  }
2165345153Sdim  return KMP_LOCK_RELEASED;
2166345153Sdim}
2167345153Sdim
2168345153Sdimstatic int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2169345153Sdim                                                   kmp_int32 gtid) {
2170345153Sdim  char const *const func = "omp_unset_lock";
2171345153Sdim  KMP_MB(); /* in case another processor initialized lock */
2172345153Sdim  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2173345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2174345153Sdim  }
2175345153Sdim  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2176345153Sdim    KMP_FATAL(LockUnsettingFree, func);
2177345153Sdim  }
2178345153Sdim  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2179345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
2180345153Sdim  }
2181345153Sdim  lck->lk.qlk.owner_id = 0;
2182345153Sdim  __kmp_release_adaptive_lock(lck, gtid);
2183345153Sdim  return KMP_LOCK_RELEASED;
2184345153Sdim}
2185345153Sdim
2186345153Sdimstatic void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2187345153Sdim  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2188345153Sdim  lck->lk.adaptive.badness = 0;
2189345153Sdim  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2190345153Sdim  lck->lk.adaptive.max_soft_retries =
2191345153Sdim      __kmp_adaptive_backoff_params.max_soft_retries;
2192345153Sdim  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2193345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS
2194345153Sdim  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2195345153Sdim#endif
2196345153Sdim  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2197345153Sdim}
2198345153Sdim
2199345153Sdimstatic void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2200345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS
2201345153Sdim  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2202345153Sdim#endif
2203345153Sdim  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2204345153Sdim  // Nothing needed for the speculative part.
2205345153Sdim}
2206345153Sdim
2207345153Sdimstatic void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2208345153Sdim  char const *const func = "omp_destroy_lock";
2209345153Sdim  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2210345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2211345153Sdim  }
2212345153Sdim  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2213345153Sdim    KMP_FATAL(LockStillOwned, func);
2214345153Sdim  }
2215345153Sdim  __kmp_destroy_adaptive_lock(lck);
2216345153Sdim}
2217345153Sdim
2218345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS
2219345153Sdim
2220345153Sdim/* ------------------------------------------------------------------------ */
2221345153Sdim/* DRDPA ticket locks                                                */
2222345153Sdim/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2223345153Sdim
2224345153Sdimstatic kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2225345153Sdim  return lck->lk.owner_id - 1;
2226345153Sdim}
2227345153Sdim
2228345153Sdimstatic inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2229345153Sdim  return lck->lk.depth_locked != -1;
2230345153Sdim}
2231345153Sdim
2232345153Sdim__forceinline static int
2233345153Sdim__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2234345153Sdim  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2235345153Sdim  kmp_uint64 mask = lck->lk.mask; // atomic load
2236345153Sdim  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2237345153Sdim
2238345153Sdim#ifdef USE_LOCK_PROFILE
2239345153Sdim  if (polls[ticket & mask] != ticket)
2240345153Sdim    __kmp_printf("LOCK CONTENTION: %p\n", lck);
2241345153Sdim/* else __kmp_printf( "." );*/
2242345153Sdim#endif /* USE_LOCK_PROFILE */
2243345153Sdim
2244345153Sdim  // Now spin-wait, but reload the polls pointer and mask, in case the
2245345153Sdim  // polling area has been reconfigured.  Unless it is reconfigured, the
2246345153Sdim  // reloads stay in L1 cache and are cheap.
2247345153Sdim  //
2248353358Sdim  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2249353358Sdim  // The current implementation of KMP_WAIT doesn't allow for mask
2250345153Sdim  // and poll to be re-read every spin iteration.
2251345153Sdim  kmp_uint32 spins;
2252345153Sdim  KMP_FSYNC_PREPARE(lck);
2253345153Sdim  KMP_INIT_YIELD(spins);
2254345153Sdim  while (polls[ticket & mask] < ticket) { // atomic load
2255353358Sdim    KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2256345153Sdim    // Re-read the mask and the poll pointer from the lock structure.
2257345153Sdim    //
2258345153Sdim    // Make certain that "mask" is read before "polls" !!!
2259345153Sdim    //
2260345153Sdim    // If another thread picks reconfigures the polling area and updates their
2261345153Sdim    // values, and we get the new value of mask and the old polls pointer, we
2262345153Sdim    // could access memory beyond the end of the old polling area.
2263345153Sdim    mask = lck->lk.mask; // atomic load
2264345153Sdim    polls = lck->lk.polls; // atomic load
2265345153Sdim  }
2266345153Sdim
2267345153Sdim  // Critical section starts here
2268345153Sdim  KMP_FSYNC_ACQUIRED(lck);
2269345153Sdim  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2270345153Sdim                  ticket, lck));
2271345153Sdim  lck->lk.now_serving = ticket; // non-volatile store
2272345153Sdim
2273345153Sdim  // Deallocate a garbage polling area if we know that we are the last
2274345153Sdim  // thread that could possibly access it.
2275345153Sdim  //
2276345153Sdim  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2277345153Sdim  // ticket.
2278345153Sdim  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2279345153Sdim    __kmp_free(lck->lk.old_polls);
2280345153Sdim    lck->lk.old_polls = NULL;
2281345153Sdim    lck->lk.cleanup_ticket = 0;
2282345153Sdim  }
2283345153Sdim
2284345153Sdim  // Check to see if we should reconfigure the polling area.
2285345153Sdim  // If there is still a garbage polling area to be deallocated from a
2286345153Sdim  // previous reconfiguration, let a later thread reconfigure it.
2287345153Sdim  if (lck->lk.old_polls == NULL) {
2288345153Sdim    bool reconfigure = false;
2289345153Sdim    std::atomic<kmp_uint64> *old_polls = polls;
2290345153Sdim    kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2291345153Sdim
2292345153Sdim    if (TCR_4(__kmp_nth) >
2293345153Sdim        (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2294345153Sdim      // We are in oversubscription mode.  Contract the polling area
2295345153Sdim      // down to a single location, if that hasn't been done already.
2296345153Sdim      if (num_polls > 1) {
2297345153Sdim        reconfigure = true;
2298345153Sdim        num_polls = TCR_4(lck->lk.num_polls);
2299345153Sdim        mask = 0;
2300345153Sdim        num_polls = 1;
2301345153Sdim        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2302345153Sdim                                                          sizeof(*polls));
2303345153Sdim        polls[0] = ticket;
2304345153Sdim      }
2305345153Sdim    } else {
2306345153Sdim      // We are in under/fully subscribed mode.  Check the number of
2307345153Sdim      // threads waiting on the lock.  The size of the polling area
2308345153Sdim      // should be at least the number of threads waiting.
2309345153Sdim      kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2310345153Sdim      if (num_waiting > num_polls) {
2311345153Sdim        kmp_uint32 old_num_polls = num_polls;
2312345153Sdim        reconfigure = true;
2313345153Sdim        do {
2314345153Sdim          mask = (mask << 1) | 1;
2315345153Sdim          num_polls *= 2;
2316345153Sdim        } while (num_polls <= num_waiting);
2317345153Sdim
2318345153Sdim        // Allocate the new polling area, and copy the relevant portion
2319345153Sdim        // of the old polling area to the new area.  __kmp_allocate()
2320345153Sdim        // zeroes the memory it allocates, and most of the old area is
2321345153Sdim        // just zero padding, so we only copy the release counters.
2322345153Sdim        polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2323345153Sdim                                                          sizeof(*polls));
2324345153Sdim        kmp_uint32 i;
2325345153Sdim        for (i = 0; i < old_num_polls; i++) {
2326345153Sdim          polls[i].store(old_polls[i]);
2327345153Sdim        }
2328345153Sdim      }
2329345153Sdim    }
2330345153Sdim
2331345153Sdim    if (reconfigure) {
2332345153Sdim      // Now write the updated fields back to the lock structure.
2333345153Sdim      //
2334345153Sdim      // Make certain that "polls" is written before "mask" !!!
2335345153Sdim      //
2336345153Sdim      // If another thread picks up the new value of mask and the old polls
2337345153Sdim      // pointer , it could access memory beyond the end of the old polling
2338345153Sdim      // area.
2339345153Sdim      //
2340345153Sdim      // On x86, we need memory fences.
2341345153Sdim      KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2342345153Sdim                      "lock %p to %d polls\n",
2343345153Sdim                      ticket, lck, num_polls));
2344345153Sdim
2345345153Sdim      lck->lk.old_polls = old_polls;
2346345153Sdim      lck->lk.polls = polls; // atomic store
2347345153Sdim
2348345153Sdim      KMP_MB();
2349345153Sdim
2350345153Sdim      lck->lk.num_polls = num_polls;
2351345153Sdim      lck->lk.mask = mask; // atomic store
2352345153Sdim
2353345153Sdim      KMP_MB();
2354345153Sdim
2355345153Sdim      // Only after the new polling area and mask have been flushed
2356345153Sdim      // to main memory can we update the cleanup ticket field.
2357345153Sdim      //
2358345153Sdim      // volatile load / non-volatile store
2359345153Sdim      lck->lk.cleanup_ticket = lck->lk.next_ticket;
2360345153Sdim    }
2361345153Sdim  }
2362345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
2363345153Sdim}
2364345153Sdim
2365345153Sdimint __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2366345153Sdim  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2367345153Sdim  ANNOTATE_DRDPA_ACQUIRED(lck);
2368345153Sdim  return retval;
2369345153Sdim}
2370345153Sdim
2371345153Sdimstatic int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2372345153Sdim                                                kmp_int32 gtid) {
2373345153Sdim  char const *const func = "omp_set_lock";
2374345153Sdim  if (lck->lk.initialized != lck) {
2375345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2376345153Sdim  }
2377345153Sdim  if (__kmp_is_drdpa_lock_nestable(lck)) {
2378345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
2379345153Sdim  }
2380345153Sdim  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2381345153Sdim    KMP_FATAL(LockIsAlreadyOwned, func);
2382345153Sdim  }
2383345153Sdim
2384345153Sdim  __kmp_acquire_drdpa_lock(lck, gtid);
2385345153Sdim
2386345153Sdim  lck->lk.owner_id = gtid + 1;
2387345153Sdim  return KMP_LOCK_ACQUIRED_FIRST;
2388345153Sdim}
2389345153Sdim
2390345153Sdimint __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2391345153Sdim  // First get a ticket, then read the polls pointer and the mask.
2392345153Sdim  // The polls pointer must be read before the mask!!! (See above)
2393345153Sdim  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2394345153Sdim  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2395345153Sdim  kmp_uint64 mask = lck->lk.mask; // atomic load
2396345153Sdim  if (polls[ticket & mask] == ticket) {
2397345153Sdim    kmp_uint64 next_ticket = ticket + 1;
2398345153Sdim    if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2399345153Sdim                                       next_ticket)) {
2400345153Sdim      KMP_FSYNC_ACQUIRED(lck);
2401345153Sdim      KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2402345153Sdim                      ticket, lck));
2403345153Sdim      lck->lk.now_serving = ticket; // non-volatile store
2404345153Sdim
2405345153Sdim      // Since no threads are waiting, there is no possibility that we would
2406345153Sdim      // want to reconfigure the polling area.  We might have the cleanup ticket
2407345153Sdim      // value (which says that it is now safe to deallocate old_polls), but
2408345153Sdim      // we'll let a later thread which calls __kmp_acquire_lock do that - this
2409345153Sdim      // routine isn't supposed to block, and we would risk blocks if we called
2410345153Sdim      // __kmp_free() to do the deallocation.
2411345153Sdim      return TRUE;
2412345153Sdim    }
2413345153Sdim  }
2414345153Sdim  return FALSE;
2415345153Sdim}
2416345153Sdim
2417345153Sdimstatic int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2418345153Sdim                                             kmp_int32 gtid) {
2419345153Sdim  char const *const func = "omp_test_lock";
2420345153Sdim  if (lck->lk.initialized != lck) {
2421345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2422345153Sdim  }
2423345153Sdim  if (__kmp_is_drdpa_lock_nestable(lck)) {
2424345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
2425345153Sdim  }
2426345153Sdim
2427345153Sdim  int retval = __kmp_test_drdpa_lock(lck, gtid);
2428345153Sdim
2429345153Sdim  if (retval) {
2430345153Sdim    lck->lk.owner_id = gtid + 1;
2431345153Sdim  }
2432345153Sdim  return retval;
2433345153Sdim}
2434345153Sdim
2435345153Sdimint __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2436345153Sdim  // Read the ticket value from the lock data struct, then the polls pointer and
2437345153Sdim  // the mask.  The polls pointer must be read before the mask!!! (See above)
2438345153Sdim  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2439345153Sdim  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2440345153Sdim  kmp_uint64 mask = lck->lk.mask; // atomic load
2441345153Sdim  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2442345153Sdim                  ticket - 1, lck));
2443345153Sdim  KMP_FSYNC_RELEASING(lck);
2444345153Sdim  ANNOTATE_DRDPA_RELEASED(lck);
2445345153Sdim  polls[ticket & mask] = ticket; // atomic store
2446345153Sdim  return KMP_LOCK_RELEASED;
2447345153Sdim}
2448345153Sdim
2449345153Sdimstatic int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2450345153Sdim                                                kmp_int32 gtid) {
2451345153Sdim  char const *const func = "omp_unset_lock";
2452345153Sdim  KMP_MB(); /* in case another processor initialized lock */
2453345153Sdim  if (lck->lk.initialized != lck) {
2454345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2455345153Sdim  }
2456345153Sdim  if (__kmp_is_drdpa_lock_nestable(lck)) {
2457345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
2458345153Sdim  }
2459345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2460345153Sdim    KMP_FATAL(LockUnsettingFree, func);
2461345153Sdim  }
2462345153Sdim  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2463345153Sdim      (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2464345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
2465345153Sdim  }
2466345153Sdim  lck->lk.owner_id = 0;
2467345153Sdim  return __kmp_release_drdpa_lock(lck, gtid);
2468345153Sdim}
2469345153Sdim
2470345153Sdimvoid __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2471345153Sdim  lck->lk.location = NULL;
2472345153Sdim  lck->lk.mask = 0;
2473345153Sdim  lck->lk.num_polls = 1;
2474345153Sdim  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2475345153Sdim      lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2476345153Sdim  lck->lk.cleanup_ticket = 0;
2477345153Sdim  lck->lk.old_polls = NULL;
2478345153Sdim  lck->lk.next_ticket = 0;
2479345153Sdim  lck->lk.now_serving = 0;
2480345153Sdim  lck->lk.owner_id = 0; // no thread owns the lock.
2481345153Sdim  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2482345153Sdim  lck->lk.initialized = lck;
2483345153Sdim
2484345153Sdim  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2485345153Sdim}
2486345153Sdim
2487345153Sdimvoid __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2488345153Sdim  lck->lk.initialized = NULL;
2489345153Sdim  lck->lk.location = NULL;
2490345153Sdim  if (lck->lk.polls.load() != NULL) {
2491345153Sdim    __kmp_free(lck->lk.polls.load());
2492345153Sdim    lck->lk.polls = NULL;
2493345153Sdim  }
2494345153Sdim  if (lck->lk.old_polls != NULL) {
2495345153Sdim    __kmp_free(lck->lk.old_polls);
2496345153Sdim    lck->lk.old_polls = NULL;
2497345153Sdim  }
2498345153Sdim  lck->lk.mask = 0;
2499345153Sdim  lck->lk.num_polls = 0;
2500345153Sdim  lck->lk.cleanup_ticket = 0;
2501345153Sdim  lck->lk.next_ticket = 0;
2502345153Sdim  lck->lk.now_serving = 0;
2503345153Sdim  lck->lk.owner_id = 0;
2504345153Sdim  lck->lk.depth_locked = -1;
2505345153Sdim}
2506345153Sdim
2507345153Sdimstatic void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2508345153Sdim  char const *const func = "omp_destroy_lock";
2509345153Sdim  if (lck->lk.initialized != lck) {
2510345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2511345153Sdim  }
2512345153Sdim  if (__kmp_is_drdpa_lock_nestable(lck)) {
2513345153Sdim    KMP_FATAL(LockNestableUsedAsSimple, func);
2514345153Sdim  }
2515345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2516345153Sdim    KMP_FATAL(LockStillOwned, func);
2517345153Sdim  }
2518345153Sdim  __kmp_destroy_drdpa_lock(lck);
2519345153Sdim}
2520345153Sdim
2521345153Sdim// nested drdpa ticket locks
2522345153Sdim
2523345153Sdimint __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2524345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
2525345153Sdim
2526345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2527345153Sdim    lck->lk.depth_locked += 1;
2528345153Sdim    return KMP_LOCK_ACQUIRED_NEXT;
2529345153Sdim  } else {
2530345153Sdim    __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2531345153Sdim    ANNOTATE_DRDPA_ACQUIRED(lck);
2532345153Sdim    KMP_MB();
2533345153Sdim    lck->lk.depth_locked = 1;
2534345153Sdim    KMP_MB();
2535345153Sdim    lck->lk.owner_id = gtid + 1;
2536345153Sdim    return KMP_LOCK_ACQUIRED_FIRST;
2537345153Sdim  }
2538345153Sdim}
2539345153Sdim
2540345153Sdimstatic void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2541345153Sdim                                                        kmp_int32 gtid) {
2542345153Sdim  char const *const func = "omp_set_nest_lock";
2543345153Sdim  if (lck->lk.initialized != lck) {
2544345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2545345153Sdim  }
2546345153Sdim  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2547345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
2548345153Sdim  }
2549345153Sdim  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2550345153Sdim}
2551345153Sdim
2552345153Sdimint __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2553345153Sdim  int retval;
2554345153Sdim
2555345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
2556345153Sdim
2557345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2558345153Sdim    retval = ++lck->lk.depth_locked;
2559345153Sdim  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2560345153Sdim    retval = 0;
2561345153Sdim  } else {
2562345153Sdim    KMP_MB();
2563345153Sdim    retval = lck->lk.depth_locked = 1;
2564345153Sdim    KMP_MB();
2565345153Sdim    lck->lk.owner_id = gtid + 1;
2566345153Sdim  }
2567345153Sdim  return retval;
2568345153Sdim}
2569345153Sdim
2570345153Sdimstatic int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2571345153Sdim                                                    kmp_int32 gtid) {
2572345153Sdim  char const *const func = "omp_test_nest_lock";
2573345153Sdim  if (lck->lk.initialized != lck) {
2574345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2575345153Sdim  }
2576345153Sdim  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
2578345153Sdim  }
2579345153Sdim  return __kmp_test_nested_drdpa_lock(lck, gtid);
2580345153Sdim}
2581345153Sdim
2582345153Sdimint __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2583345153Sdim  KMP_DEBUG_ASSERT(gtid >= 0);
2584345153Sdim
2585345153Sdim  KMP_MB();
2586345153Sdim  if (--(lck->lk.depth_locked) == 0) {
2587345153Sdim    KMP_MB();
2588345153Sdim    lck->lk.owner_id = 0;
2589345153Sdim    __kmp_release_drdpa_lock(lck, gtid);
2590345153Sdim    return KMP_LOCK_RELEASED;
2591345153Sdim  }
2592345153Sdim  return KMP_LOCK_STILL_HELD;
2593345153Sdim}
2594345153Sdim
2595345153Sdimstatic int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2596345153Sdim                                                       kmp_int32 gtid) {
2597345153Sdim  char const *const func = "omp_unset_nest_lock";
2598345153Sdim  KMP_MB(); /* in case another processor initialized lock */
2599345153Sdim  if (lck->lk.initialized != lck) {
2600345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2601345153Sdim  }
2602345153Sdim  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2603345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
2604345153Sdim  }
2605345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2606345153Sdim    KMP_FATAL(LockUnsettingFree, func);
2607345153Sdim  }
2608345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2609345153Sdim    KMP_FATAL(LockUnsettingSetByAnother, func);
2610345153Sdim  }
2611345153Sdim  return __kmp_release_nested_drdpa_lock(lck, gtid);
2612345153Sdim}
2613345153Sdim
2614345153Sdimvoid __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2615345153Sdim  __kmp_init_drdpa_lock(lck);
2616345153Sdim  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2617345153Sdim}
2618345153Sdim
2619345153Sdimvoid __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2620345153Sdim  __kmp_destroy_drdpa_lock(lck);
2621345153Sdim  lck->lk.depth_locked = 0;
2622345153Sdim}
2623345153Sdim
2624345153Sdimstatic void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2625345153Sdim  char const *const func = "omp_destroy_nest_lock";
2626345153Sdim  if (lck->lk.initialized != lck) {
2627345153Sdim    KMP_FATAL(LockIsUninitialized, func);
2628345153Sdim  }
2629345153Sdim  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2630345153Sdim    KMP_FATAL(LockSimpleUsedAsNestable, func);
2631345153Sdim  }
2632345153Sdim  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2633345153Sdim    KMP_FATAL(LockStillOwned, func);
2634345153Sdim  }
2635345153Sdim  __kmp_destroy_nested_drdpa_lock(lck);
2636345153Sdim}
2637345153Sdim
2638345153Sdim// access functions to fields which don't exist for all lock kinds.
2639345153Sdim
2640345153Sdimstatic const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2641345153Sdim  return lck->lk.location;
2642345153Sdim}
2643345153Sdim
2644345153Sdimstatic void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2645345153Sdim                                          const ident_t *loc) {
2646345153Sdim  lck->lk.location = loc;
2647345153Sdim}
2648345153Sdim
2649345153Sdimstatic kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2650345153Sdim  return lck->lk.flags;
2651345153Sdim}
2652345153Sdim
2653345153Sdimstatic void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2654345153Sdim                                       kmp_lock_flags_t flags) {
2655345153Sdim  lck->lk.flags = flags;
2656345153Sdim}
2657345153Sdim
2658345153Sdim// Time stamp counter
2659345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2660345153Sdim#define __kmp_tsc() __kmp_hardware_timestamp()
2661345153Sdim// Runtime's default backoff parameters
2662345153Sdimkmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2663345153Sdim#else
2664345153Sdim// Use nanoseconds for other platforms
2665345153Sdimextern kmp_uint64 __kmp_now_nsec();
2666345153Sdimkmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2667345153Sdim#define __kmp_tsc() __kmp_now_nsec()
2668345153Sdim#endif
2669345153Sdim
2670345153Sdim// A useful predicate for dealing with timestamps that may wrap.
2671345153Sdim// Is a before b? Since the timestamps may wrap, this is asking whether it's
2672345153Sdim// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2673345153Sdim// Times where going clockwise is less distance than going anti-clockwise
2674345153Sdim// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2675345153Sdim// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2676345153Sdim// signed(b) = 0 captures the actual difference
2677345153Sdimstatic inline bool before(kmp_uint64 a, kmp_uint64 b) {
2678345153Sdim  return ((kmp_int64)b - (kmp_int64)a) > 0;
2679345153Sdim}
2680345153Sdim
2681345153Sdim// Truncated binary exponential backoff function
2682345153Sdimvoid __kmp_spin_backoff(kmp_backoff_t *boff) {
2683345153Sdim  // We could flatten this loop, but making it a nested loop gives better result
2684345153Sdim  kmp_uint32 i;
2685345153Sdim  for (i = boff->step; i > 0; i--) {
2686345153Sdim    kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2687345153Sdim    do {
2688345153Sdim      KMP_CPU_PAUSE();
2689345153Sdim    } while (before(__kmp_tsc(), goal));
2690345153Sdim  }
2691345153Sdim  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2692345153Sdim}
2693345153Sdim
2694345153Sdim#if KMP_USE_DYNAMIC_LOCK
2695345153Sdim
2696345153Sdim// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2697345153Sdim// lock word.
2698345153Sdimstatic void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2699345153Sdim                                   kmp_dyna_lockseq_t seq) {
2700345153Sdim  TCW_4(*lck, KMP_GET_D_TAG(seq));
2701345153Sdim  KA_TRACE(
2702345153Sdim      20,
2703345153Sdim      ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2704345153Sdim}
2705345153Sdim
2706345153Sdim#if KMP_USE_TSX
2707345153Sdim
2708345153Sdim// HLE lock functions - imported from the testbed runtime.
2709345153Sdim#define HLE_ACQUIRE ".byte 0xf2;"
2710345153Sdim#define HLE_RELEASE ".byte 0xf3;"
2711345153Sdim
2712345153Sdimstatic inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2713345153Sdim  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2714345153Sdim  return v;
2715345153Sdim}
2716345153Sdim
2717345153Sdimstatic void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2718345153Sdim
2719345153Sdimstatic void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2720345153Sdim  TCW_4(*lck, 0);
2721345153Sdim}
2722345153Sdim
2723345153Sdimstatic void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2724345153Sdim  // Use gtid for KMP_LOCK_BUSY if necessary
2725345153Sdim  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2726345153Sdim    int delay = 1;
2727345153Sdim    do {
2728345153Sdim      while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2729345153Sdim        for (int i = delay; i != 0; --i)
2730345153Sdim          KMP_CPU_PAUSE();
2731345153Sdim        delay = ((delay << 1) | 1) & 7;
2732345153Sdim      }
2733345153Sdim    } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2734345153Sdim  }
2735345153Sdim}
2736345153Sdim
2737345153Sdimstatic void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2738345153Sdim                                               kmp_int32 gtid) {
2739345153Sdim  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2740345153Sdim}
2741345153Sdim
2742345153Sdimstatic int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2743345153Sdim  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2744345153Sdim                   : "=m"(*lck)
2745345153Sdim                   : "r"(KMP_LOCK_FREE(hle))
2746345153Sdim                   : "memory");
2747345153Sdim  return KMP_LOCK_RELEASED;
2748345153Sdim}
2749345153Sdim
2750345153Sdimstatic int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2751345153Sdim                                              kmp_int32 gtid) {
2752345153Sdim  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2753345153Sdim}
2754345153Sdim
2755345153Sdimstatic int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2756345153Sdim  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2757345153Sdim}
2758345153Sdim
2759345153Sdimstatic int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2760345153Sdim                                           kmp_int32 gtid) {
2761345153Sdim  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2762345153Sdim}
2763345153Sdim
2764345153Sdimstatic void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) {
2765345153Sdim  __kmp_init_queuing_lock(lck);
2766345153Sdim}
2767345153Sdim
2768345153Sdimstatic void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) {
2769345153Sdim  __kmp_destroy_queuing_lock(lck);
2770345153Sdim}
2771345153Sdim
2772345153Sdimstatic void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) {
2773345153Sdim  __kmp_destroy_queuing_lock_with_checks(lck);
2774345153Sdim}
2775345153Sdim
2776345153Sdimstatic void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2777345153Sdim  unsigned retries = 3, status;
2778345153Sdim  do {
2779345153Sdim    status = _xbegin();
2780345153Sdim    if (status == _XBEGIN_STARTED) {
2781345153Sdim      if (__kmp_is_unlocked_queuing_lock(lck))
2782345153Sdim        return;
2783345153Sdim      _xabort(0xff);
2784345153Sdim    }
2785345153Sdim    if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2786345153Sdim      // Wait until lock becomes free
2787353358Sdim      while (!__kmp_is_unlocked_queuing_lock(lck)) {
2788353358Sdim        KMP_YIELD(TRUE);
2789353358Sdim      }
2790345153Sdim    } else if (!(status & _XABORT_RETRY))
2791345153Sdim      break;
2792345153Sdim  } while (retries--);
2793345153Sdim
2794345153Sdim  // Fall-back non-speculative lock (xchg)
2795345153Sdim  __kmp_acquire_queuing_lock(lck, gtid);
2796345153Sdim}
2797345153Sdim
2798345153Sdimstatic void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2799345153Sdim                                               kmp_int32 gtid) {
2800345153Sdim  __kmp_acquire_rtm_lock(lck, gtid);
2801345153Sdim}
2802345153Sdim
2803345153Sdimstatic int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2804345153Sdim  if (__kmp_is_unlocked_queuing_lock(lck)) {
2805345153Sdim    // Releasing from speculation
2806345153Sdim    _xend();
2807345153Sdim  } else {
2808345153Sdim    // Releasing from a real lock
2809345153Sdim    __kmp_release_queuing_lock(lck, gtid);
2810345153Sdim  }
2811345153Sdim  return KMP_LOCK_RELEASED;
2812345153Sdim}
2813345153Sdim
2814345153Sdimstatic int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2815345153Sdim                                              kmp_int32 gtid) {
2816345153Sdim  return __kmp_release_rtm_lock(lck, gtid);
2817345153Sdim}
2818345153Sdim
2819345153Sdimstatic int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
2820345153Sdim  unsigned retries = 3, status;
2821345153Sdim  do {
2822345153Sdim    status = _xbegin();
2823345153Sdim    if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2824345153Sdim      return 1;
2825345153Sdim    }
2826345153Sdim    if (!(status & _XABORT_RETRY))
2827345153Sdim      break;
2828345153Sdim  } while (retries--);
2829345153Sdim
2830345153Sdim  return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0;
2831345153Sdim}
2832345153Sdim
2833345153Sdimstatic int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck,
2834345153Sdim                                           kmp_int32 gtid) {
2835345153Sdim  return __kmp_test_rtm_lock(lck, gtid);
2836345153Sdim}
2837345153Sdim
2838345153Sdim#endif // KMP_USE_TSX
2839345153Sdim
2840345153Sdim// Entry functions for indirect locks (first element of direct lock jump tables)
2841345153Sdimstatic void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2842345153Sdim                                     kmp_dyna_lockseq_t tag);
2843345153Sdimstatic void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2844345153Sdimstatic int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2845345153Sdimstatic int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2846345153Sdimstatic int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2847345153Sdimstatic int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2848345153Sdim                                               kmp_int32);
2849345153Sdimstatic int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2850345153Sdim                                                 kmp_int32);
2851345153Sdimstatic int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2852345153Sdim                                                kmp_int32);
2853345153Sdim
2854345153Sdim// Lock function definitions for the union parameter type
2855345153Sdim#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2856345153Sdim
2857345153Sdim#define expand1(lk, op)                                                        \
2858345153Sdim  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) {               \
2859345153Sdim    __kmp_##op##_##lk##_##lock(&lock->lk);                                     \
2860345153Sdim  }
2861345153Sdim#define expand2(lk, op)                                                        \
2862345153Sdim  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock,                  \
2863345153Sdim                                        kmp_int32 gtid) {                      \
2864345153Sdim    return __kmp_##op##_##lk##_##lock(&lock->lk, gtid);                        \
2865345153Sdim  }
2866345153Sdim#define expand3(lk, op)                                                        \
2867345153Sdim  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock,              \
2868345153Sdim                                            kmp_lock_flags_t flags) {          \
2869345153Sdim    __kmp_set_##lk##_lock_flags(&lock->lk, flags);                             \
2870345153Sdim  }
2871345153Sdim#define expand4(lk, op)                                                        \
2872345153Sdim  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock,           \
2873345153Sdim                                               const ident_t *loc) {           \
2874345153Sdim    __kmp_set_##lk##_lock_location(&lock->lk, loc);                            \
2875345153Sdim  }
2876345153Sdim
2877345153SdimKMP_FOREACH_LOCK_KIND(expand1, init)
2878345153SdimKMP_FOREACH_LOCK_KIND(expand1, init_nested)
2879345153SdimKMP_FOREACH_LOCK_KIND(expand1, destroy)
2880345153SdimKMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2881345153SdimKMP_FOREACH_LOCK_KIND(expand2, acquire)
2882345153SdimKMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2883345153SdimKMP_FOREACH_LOCK_KIND(expand2, release)
2884345153SdimKMP_FOREACH_LOCK_KIND(expand2, release_nested)
2885345153SdimKMP_FOREACH_LOCK_KIND(expand2, test)
2886345153SdimKMP_FOREACH_LOCK_KIND(expand2, test_nested)
2887345153SdimKMP_FOREACH_LOCK_KIND(expand3, )
2888345153SdimKMP_FOREACH_LOCK_KIND(expand4, )
2889345153Sdim
2890345153Sdim#undef expand1
2891345153Sdim#undef expand2
2892345153Sdim#undef expand3
2893345153Sdim#undef expand4
2894345153Sdim
2895345153Sdim// Jump tables for the indirect lock functions
2896345153Sdim// Only fill in the odd entries, that avoids the need to shift out the low bit
2897345153Sdim
2898345153Sdim// init functions
2899345153Sdim#define expand(l, op) 0, __kmp_init_direct_lock,
2900345153Sdimvoid (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2901345153Sdim    __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2902345153Sdim#undef expand
2903345153Sdim
2904345153Sdim// destroy functions
2905345153Sdim#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2906345153Sdimstatic void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2907345153Sdim    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2908345153Sdim#undef expand
2909345153Sdim#define expand(l, op)                                                          \
2910345153Sdim  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2911345153Sdimstatic void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2912345153Sdim    __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2913345153Sdim#undef expand
2914345153Sdim
2915345153Sdim// set/acquire functions
2916345153Sdim#define expand(l, op)                                                          \
2917345153Sdim  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2918345153Sdimstatic int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2919345153Sdim    __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2920345153Sdim#undef expand
2921345153Sdim#define expand(l, op)                                                          \
2922345153Sdim  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2923345153Sdimstatic int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2924345153Sdim    __kmp_set_indirect_lock_with_checks, 0,
2925345153Sdim    KMP_FOREACH_D_LOCK(expand, acquire)};
2926345153Sdim#undef expand
2927345153Sdim
2928345153Sdim// unset/release and test functions
2929345153Sdim#define expand(l, op)                                                          \
2930345153Sdim  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2931345153Sdimstatic int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
2932345153Sdim    __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
2933345153Sdimstatic int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
2934345153Sdim    __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
2935345153Sdim#undef expand
2936345153Sdim#define expand(l, op)                                                          \
2937345153Sdim  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
2938345153Sdimstatic int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2939345153Sdim    __kmp_unset_indirect_lock_with_checks, 0,
2940345153Sdim    KMP_FOREACH_D_LOCK(expand, release)};
2941345153Sdimstatic int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
2942345153Sdim    __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
2943345153Sdim#undef expand
2944345153Sdim
2945345153Sdim// Exposes only one set of jump tables (*lock or *lock_with_checks).
2946360784Sdimvoid (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
2947360784Sdimint (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
2948360784Sdimint (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
2949360784Sdimint (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
2950345153Sdim
2951345153Sdim// Jump tables for the indirect lock functions
2952345153Sdim#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2953345153Sdimvoid (*__kmp_indirect_init[])(kmp_user_lock_p) = {
2954345153Sdim    KMP_FOREACH_I_LOCK(expand, init)};
2955345153Sdim#undef expand
2956345153Sdim
2957345153Sdim#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
2958345153Sdimstatic void (*indirect_destroy[])(kmp_user_lock_p) = {
2959345153Sdim    KMP_FOREACH_I_LOCK(expand, destroy)};
2960345153Sdim#undef expand
2961345153Sdim#define expand(l, op)                                                          \
2962345153Sdim  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
2963345153Sdimstatic void (*indirect_destroy_check[])(kmp_user_lock_p) = {
2964345153Sdim    KMP_FOREACH_I_LOCK(expand, destroy)};
2965345153Sdim#undef expand
2966345153Sdim
2967345153Sdim// set/acquire functions
2968345153Sdim#define expand(l, op)                                                          \
2969345153Sdim  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2970345153Sdimstatic int (*indirect_set[])(kmp_user_lock_p,
2971345153Sdim                             kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
2972345153Sdim#undef expand
2973345153Sdim#define expand(l, op)                                                          \
2974345153Sdim  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2975345153Sdimstatic int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
2976345153Sdim    KMP_FOREACH_I_LOCK(expand, acquire)};
2977345153Sdim#undef expand
2978345153Sdim
2979345153Sdim// unset/release and test functions
2980345153Sdim#define expand(l, op)                                                          \
2981345153Sdim  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
2982345153Sdimstatic int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
2983345153Sdim    KMP_FOREACH_I_LOCK(expand, release)};
2984345153Sdimstatic int (*indirect_test[])(kmp_user_lock_p,
2985345153Sdim                              kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
2986345153Sdim#undef expand
2987345153Sdim#define expand(l, op)                                                          \
2988345153Sdim  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
2989345153Sdimstatic int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
2990345153Sdim    KMP_FOREACH_I_LOCK(expand, release)};
2991345153Sdimstatic int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
2992345153Sdim    KMP_FOREACH_I_LOCK(expand, test)};
2993345153Sdim#undef expand
2994345153Sdim
2995345153Sdim// Exposes only one jump tables (*lock or *lock_with_checks).
2996360784Sdimvoid (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
2997360784Sdimint (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
2998360784Sdimint (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
2999360784Sdimint (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3000345153Sdim
3001345153Sdim// Lock index table.
3002345153Sdimkmp_indirect_lock_table_t __kmp_i_lock_table;
3003345153Sdim
3004345153Sdim// Size of indirect locks.
3005345153Sdimstatic kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3006345153Sdim
3007345153Sdim// Jump tables for lock accessor/modifier.
3008345153Sdimvoid (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3009345153Sdim                                                     const ident_t *) = {0};
3010345153Sdimvoid (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3011345153Sdim                                                  kmp_lock_flags_t) = {0};
3012345153Sdimconst ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3013345153Sdim    kmp_user_lock_p) = {0};
3014345153Sdimkmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3015345153Sdim    kmp_user_lock_p) = {0};
3016345153Sdim
3017345153Sdim// Use different lock pools for different lock types.
3018345153Sdimstatic kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3019345153Sdim
3020345153Sdim// User lock allocator for dynamically dispatched indirect locks. Every entry of
3021345153Sdim// the indirect lock table holds the address and type of the allocated indrect
3022345153Sdim// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3023345153Sdim// full. A destroyed indirect lock object is returned to the reusable pool of
3024345153Sdim// locks, unique to each lock type.
3025345153Sdimkmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3026345153Sdim                                                  kmp_int32 gtid,
3027345153Sdim                                                  kmp_indirect_locktag_t tag) {
3028345153Sdim  kmp_indirect_lock_t *lck;
3029345153Sdim  kmp_lock_index_t idx;
3030345153Sdim
3031345153Sdim  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3032345153Sdim
3033345153Sdim  if (__kmp_indirect_lock_pool[tag] != NULL) {
3034345153Sdim    // Reuse the allocated and destroyed lock object
3035345153Sdim    lck = __kmp_indirect_lock_pool[tag];
3036345153Sdim    if (OMP_LOCK_T_SIZE < sizeof(void *))
3037345153Sdim      idx = lck->lock->pool.index;
3038345153Sdim    __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3039345153Sdim    KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3040345153Sdim                  lck));
3041345153Sdim  } else {
3042345153Sdim    idx = __kmp_i_lock_table.next;
3043345153Sdim    // Check capacity and double the size if it is full
3044345153Sdim    if (idx == __kmp_i_lock_table.size) {
3045345153Sdim      // Double up the space for block pointers
3046345153Sdim      int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3047345153Sdim      kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3048345153Sdim          2 * row * sizeof(kmp_indirect_lock_t *));
3049345153Sdim      KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3050345153Sdim                 row * sizeof(kmp_indirect_lock_t *));
3051345153Sdim      kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3052345153Sdim      __kmp_i_lock_table.table = new_table;
3053345153Sdim      __kmp_free(old_table);
3054345153Sdim      // Allocate new objects in the new blocks
3055345153Sdim      for (int i = row; i < 2 * row; ++i)
3056345153Sdim        *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3057345153Sdim            KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3058345153Sdim      __kmp_i_lock_table.size = 2 * idx;
3059345153Sdim    }
3060345153Sdim    __kmp_i_lock_table.next++;
3061345153Sdim    lck = KMP_GET_I_LOCK(idx);
3062345153Sdim    // Allocate a new base lock object
3063345153Sdim    lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3064345153Sdim    KA_TRACE(20,
3065345153Sdim             ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3066345153Sdim  }
3067345153Sdim
3068345153Sdim  __kmp_release_lock(&__kmp_global_lock, gtid);
3069345153Sdim
3070345153Sdim  lck->type = tag;
3071345153Sdim
3072345153Sdim  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3073345153Sdim    *((kmp_lock_index_t *)user_lock) = idx
3074345153Sdim                                       << 1; // indirect lock word must be even
3075345153Sdim  } else {
3076345153Sdim    *((kmp_indirect_lock_t **)user_lock) = lck;
3077345153Sdim  }
3078345153Sdim
3079345153Sdim  return lck;
3080345153Sdim}
3081345153Sdim
3082345153Sdim// User lock lookup for dynamically dispatched locks.
3083345153Sdimstatic __forceinline kmp_indirect_lock_t *
3084345153Sdim__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3085345153Sdim  if (__kmp_env_consistency_check) {
3086345153Sdim    kmp_indirect_lock_t *lck = NULL;
3087345153Sdim    if (user_lock == NULL) {
3088345153Sdim      KMP_FATAL(LockIsUninitialized, func);
3089345153Sdim    }
3090345153Sdim    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3091345153Sdim      kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3092345153Sdim      if (idx >= __kmp_i_lock_table.size) {
3093345153Sdim        KMP_FATAL(LockIsUninitialized, func);
3094345153Sdim      }
3095345153Sdim      lck = KMP_GET_I_LOCK(idx);
3096345153Sdim    } else {
3097345153Sdim      lck = *((kmp_indirect_lock_t **)user_lock);
3098345153Sdim    }
3099345153Sdim    if (lck == NULL) {
3100345153Sdim      KMP_FATAL(LockIsUninitialized, func);
3101345153Sdim    }
3102345153Sdim    return lck;
3103345153Sdim  } else {
3104345153Sdim    if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3105345153Sdim      return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3106345153Sdim    } else {
3107345153Sdim      return *((kmp_indirect_lock_t **)user_lock);
3108345153Sdim    }
3109345153Sdim  }
3110345153Sdim}
3111345153Sdim
3112345153Sdimstatic void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3113345153Sdim                                     kmp_dyna_lockseq_t seq) {
3114345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3115345153Sdim  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3116345153Sdim    KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3117345153Sdim    seq = lockseq_queuing;
3118345153Sdim  }
3119345153Sdim#endif
3120345153Sdim#if KMP_USE_TSX
3121345153Sdim  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3122345153Sdim    seq = lockseq_queuing;
3123345153Sdim  }
3124345153Sdim#endif
3125345153Sdim  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3126345153Sdim  kmp_indirect_lock_t *l =
3127345153Sdim      __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3128345153Sdim  KMP_I_LOCK_FUNC(l, init)(l->lock);
3129345153Sdim  KA_TRACE(
3130345153Sdim      20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3131345153Sdim           seq));
3132345153Sdim}
3133345153Sdim
3134345153Sdimstatic void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3135345153Sdim  kmp_uint32 gtid = __kmp_entry_gtid();
3136345153Sdim  kmp_indirect_lock_t *l =
3137345153Sdim      __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3138345153Sdim  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3139345153Sdim  kmp_indirect_locktag_t tag = l->type;
3140345153Sdim
3141345153Sdim  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3142345153Sdim
3143345153Sdim  // Use the base lock's space to keep the pool chain.
3144345153Sdim  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3145345153Sdim  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3146345153Sdim    l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3147345153Sdim  }
3148345153Sdim  __kmp_indirect_lock_pool[tag] = l;
3149345153Sdim
3150345153Sdim  __kmp_release_lock(&__kmp_global_lock, gtid);
3151345153Sdim}
3152345153Sdim
3153345153Sdimstatic int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3154345153Sdim  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3155345153Sdim  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3156345153Sdim}
3157345153Sdim
3158345153Sdimstatic int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3159345153Sdim  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3160345153Sdim  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3161345153Sdim}
3162345153Sdim
3163345153Sdimstatic int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3164345153Sdim  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3165345153Sdim  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3166345153Sdim}
3167345153Sdim
3168345153Sdimstatic int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3169345153Sdim                                               kmp_int32 gtid) {
3170345153Sdim  kmp_indirect_lock_t *l =
3171345153Sdim      __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3172345153Sdim  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3173345153Sdim}
3174345153Sdim
3175345153Sdimstatic int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3176345153Sdim                                                 kmp_int32 gtid) {
3177345153Sdim  kmp_indirect_lock_t *l =
3178345153Sdim      __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3179345153Sdim  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3180345153Sdim}
3181345153Sdim
3182345153Sdimstatic int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3183345153Sdim                                                kmp_int32 gtid) {
3184345153Sdim  kmp_indirect_lock_t *l =
3185345153Sdim      __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3186345153Sdim  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3187345153Sdim}
3188345153Sdim
3189345153Sdimkmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3190345153Sdim
3191345153Sdim// This is used only in kmp_error.cpp when consistency checking is on.
3192345153Sdimkmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3193345153Sdim  switch (seq) {
3194345153Sdim  case lockseq_tas:
3195345153Sdim  case lockseq_nested_tas:
3196345153Sdim    return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3197345153Sdim#if KMP_USE_FUTEX
3198345153Sdim  case lockseq_futex:
3199345153Sdim  case lockseq_nested_futex:
3200345153Sdim    return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3201345153Sdim#endif
3202345153Sdim  case lockseq_ticket:
3203345153Sdim  case lockseq_nested_ticket:
3204345153Sdim    return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3205345153Sdim  case lockseq_queuing:
3206345153Sdim  case lockseq_nested_queuing:
3207345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3208345153Sdim  case lockseq_adaptive:
3209345153Sdim#endif
3210345153Sdim    return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3211345153Sdim  case lockseq_drdpa:
3212345153Sdim  case lockseq_nested_drdpa:
3213345153Sdim    return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3214345153Sdim  default:
3215345153Sdim    return 0;
3216345153Sdim  }
3217345153Sdim}
3218345153Sdim
3219345153Sdim// Initializes data for dynamic user locks.
3220345153Sdimvoid __kmp_init_dynamic_user_locks() {
3221345153Sdim  // Initialize jump table for the lock functions
3222345153Sdim  if (__kmp_env_consistency_check) {
3223345153Sdim    __kmp_direct_set = direct_set_check;
3224345153Sdim    __kmp_direct_unset = direct_unset_check;
3225345153Sdim    __kmp_direct_test = direct_test_check;
3226345153Sdim    __kmp_direct_destroy = direct_destroy_check;
3227345153Sdim    __kmp_indirect_set = indirect_set_check;
3228345153Sdim    __kmp_indirect_unset = indirect_unset_check;
3229345153Sdim    __kmp_indirect_test = indirect_test_check;
3230345153Sdim    __kmp_indirect_destroy = indirect_destroy_check;
3231345153Sdim  } else {
3232345153Sdim    __kmp_direct_set = direct_set;
3233345153Sdim    __kmp_direct_unset = direct_unset;
3234345153Sdim    __kmp_direct_test = direct_test;
3235345153Sdim    __kmp_direct_destroy = direct_destroy;
3236345153Sdim    __kmp_indirect_set = indirect_set;
3237345153Sdim    __kmp_indirect_unset = indirect_unset;
3238345153Sdim    __kmp_indirect_test = indirect_test;
3239345153Sdim    __kmp_indirect_destroy = indirect_destroy;
3240345153Sdim  }
3241345153Sdim  // If the user locks have already been initialized, then return. Allow the
3242345153Sdim  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3243345153Sdim  // new lock tables if they have already been allocated.
3244345153Sdim  if (__kmp_init_user_locks)
3245345153Sdim    return;
3246345153Sdim
3247345153Sdim  // Initialize lock index table
3248345153Sdim  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3249345153Sdim  __kmp_i_lock_table.table =
3250345153Sdim      (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3251345153Sdim  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3252345153Sdim      KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3253345153Sdim  __kmp_i_lock_table.next = 0;
3254345153Sdim
3255345153Sdim  // Indirect lock size
3256345153Sdim  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3257345153Sdim  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3258345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3259345153Sdim  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3260345153Sdim#endif
3261345153Sdim  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3262345153Sdim#if KMP_USE_TSX
3263345153Sdim  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3264345153Sdim#endif
3265345153Sdim  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3266345153Sdim#if KMP_USE_FUTEX
3267345153Sdim  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3268345153Sdim#endif
3269345153Sdim  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3270345153Sdim  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3271345153Sdim  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3272345153Sdim
3273345153Sdim// Initialize lock accessor/modifier
3274345153Sdim#define fill_jumps(table, expand, sep)                                         \
3275345153Sdim  {                                                                            \
3276345153Sdim    table[locktag##sep##ticket] = expand(ticket);                              \
3277345153Sdim    table[locktag##sep##queuing] = expand(queuing);                            \
3278345153Sdim    table[locktag##sep##drdpa] = expand(drdpa);                                \
3279345153Sdim  }
3280345153Sdim
3281345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3282345153Sdim#define fill_table(table, expand)                                              \
3283345153Sdim  {                                                                            \
3284345153Sdim    fill_jumps(table, expand, _);                                              \
3285345153Sdim    table[locktag_adaptive] = expand(queuing);                                 \
3286345153Sdim    fill_jumps(table, expand, _nested_);                                       \
3287345153Sdim  }
3288345153Sdim#else
3289345153Sdim#define fill_table(table, expand)                                              \
3290345153Sdim  {                                                                            \
3291345153Sdim    fill_jumps(table, expand, _);                                              \
3292345153Sdim    fill_jumps(table, expand, _nested_);                                       \
3293345153Sdim  }
3294345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS
3295345153Sdim
3296345153Sdim#define expand(l)                                                              \
3297345153Sdim  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3298345153Sdim  fill_table(__kmp_indirect_set_location, expand);
3299345153Sdim#undef expand
3300345153Sdim#define expand(l)                                                              \
3301345153Sdim  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3302345153Sdim  fill_table(__kmp_indirect_set_flags, expand);
3303345153Sdim#undef expand
3304345153Sdim#define expand(l)                                                              \
3305345153Sdim  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3306345153Sdim  fill_table(__kmp_indirect_get_location, expand);
3307345153Sdim#undef expand
3308345153Sdim#define expand(l)                                                              \
3309345153Sdim  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3310345153Sdim  fill_table(__kmp_indirect_get_flags, expand);
3311345153Sdim#undef expand
3312345153Sdim
3313345153Sdim  __kmp_init_user_locks = TRUE;
3314345153Sdim}
3315345153Sdim
3316345153Sdim// Clean up the lock table.
3317345153Sdimvoid __kmp_cleanup_indirect_user_locks() {
3318345153Sdim  kmp_lock_index_t i;
3319345153Sdim  int k;
3320345153Sdim
3321345153Sdim  // Clean up locks in the pools first (they were already destroyed before going
3322345153Sdim  // into the pools).
3323345153Sdim  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3324345153Sdim    kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3325345153Sdim    while (l != NULL) {
3326345153Sdim      kmp_indirect_lock_t *ll = l;
3327345153Sdim      l = (kmp_indirect_lock_t *)l->lock->pool.next;
3328345153Sdim      KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3329345153Sdim                    ll));
3330345153Sdim      __kmp_free(ll->lock);
3331345153Sdim      ll->lock = NULL;
3332345153Sdim    }
3333345153Sdim    __kmp_indirect_lock_pool[k] = NULL;
3334345153Sdim  }
3335345153Sdim  // Clean up the remaining undestroyed locks.
3336345153Sdim  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3337345153Sdim    kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3338345153Sdim    if (l->lock != NULL) {
3339345153Sdim      // Locks not destroyed explicitly need to be destroyed here.
3340345153Sdim      KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3341345153Sdim      KA_TRACE(
3342345153Sdim          20,
3343345153Sdim          ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3344345153Sdim           l));
3345345153Sdim      __kmp_free(l->lock);
3346345153Sdim    }
3347345153Sdim  }
3348345153Sdim  // Free the table
3349345153Sdim  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3350345153Sdim    __kmp_free(__kmp_i_lock_table.table[i]);
3351345153Sdim  __kmp_free(__kmp_i_lock_table.table);
3352345153Sdim
3353345153Sdim  __kmp_init_user_locks = FALSE;
3354345153Sdim}
3355345153Sdim
3356345153Sdimenum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3357345153Sdimint __kmp_num_locks_in_block = 1; // FIXME - tune this value
3358345153Sdim
3359345153Sdim#else // KMP_USE_DYNAMIC_LOCK
3360345153Sdim
3361345153Sdimstatic void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3362345153Sdim  __kmp_init_tas_lock(lck);
3363345153Sdim}
3364345153Sdim
3365345153Sdimstatic void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3366345153Sdim  __kmp_init_nested_tas_lock(lck);
3367345153Sdim}
3368345153Sdim
3369345153Sdim#if KMP_USE_FUTEX
3370345153Sdimstatic void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3371345153Sdim  __kmp_init_futex_lock(lck);
3372345153Sdim}
3373345153Sdim
3374345153Sdimstatic void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3375345153Sdim  __kmp_init_nested_futex_lock(lck);
3376345153Sdim}
3377345153Sdim#endif
3378345153Sdim
3379345153Sdimstatic int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3380345153Sdim  return lck == lck->lk.self;
3381345153Sdim}
3382345153Sdim
3383345153Sdimstatic void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3384345153Sdim  __kmp_init_ticket_lock(lck);
3385345153Sdim}
3386345153Sdim
3387345153Sdimstatic void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3388345153Sdim  __kmp_init_nested_ticket_lock(lck);
3389345153Sdim}
3390345153Sdim
3391345153Sdimstatic int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3392345153Sdim  return lck == lck->lk.initialized;
3393345153Sdim}
3394345153Sdim
3395345153Sdimstatic void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3396345153Sdim  __kmp_init_queuing_lock(lck);
3397345153Sdim}
3398345153Sdim
3399345153Sdimstatic void
3400345153Sdim__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3401345153Sdim  __kmp_init_nested_queuing_lock(lck);
3402345153Sdim}
3403345153Sdim
3404345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3405345153Sdimstatic void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3406345153Sdim  __kmp_init_adaptive_lock(lck);
3407345153Sdim}
3408345153Sdim#endif
3409345153Sdim
3410345153Sdimstatic int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3411345153Sdim  return lck == lck->lk.initialized;
3412345153Sdim}
3413345153Sdim
3414345153Sdimstatic void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3415345153Sdim  __kmp_init_drdpa_lock(lck);
3416345153Sdim}
3417345153Sdim
3418345153Sdimstatic void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3419345153Sdim  __kmp_init_nested_drdpa_lock(lck);
3420345153Sdim}
3421345153Sdim
3422345153Sdim/* user locks
3423345153Sdim * They are implemented as a table of function pointers which are set to the
3424345153Sdim * lock functions of the appropriate kind, once that has been determined. */
3425345153Sdim
3426345153Sdimenum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3427345153Sdim
3428345153Sdimsize_t __kmp_base_user_lock_size = 0;
3429345153Sdimsize_t __kmp_user_lock_size = 0;
3430345153Sdim
3431345153Sdimkmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3432345153Sdimint (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3433345153Sdim                                            kmp_int32 gtid) = NULL;
3434345153Sdim
3435345153Sdimint (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3436345153Sdim                                         kmp_int32 gtid) = NULL;
3437345153Sdimint (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3438345153Sdim                                            kmp_int32 gtid) = NULL;
3439345153Sdimvoid (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3440345153Sdimvoid (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3441345153Sdimvoid (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3442345153Sdimint (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3443345153Sdim                                                   kmp_int32 gtid) = NULL;
3444345153Sdim
3445345153Sdimint (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3446345153Sdim                                                kmp_int32 gtid) = NULL;
3447345153Sdimint (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3448345153Sdim                                                   kmp_int32 gtid) = NULL;
3449345153Sdimvoid (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3450345153Sdimvoid (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3451345153Sdim
3452345153Sdimint (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3453345153Sdimconst ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3454345153Sdimvoid (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3455345153Sdim                                      const ident_t *loc) = NULL;
3456345153Sdimkmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3457345153Sdimvoid (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3458345153Sdim                                   kmp_lock_flags_t flags) = NULL;
3459345153Sdim
3460345153Sdimvoid __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3461345153Sdim  switch (user_lock_kind) {
3462345153Sdim  case lk_default:
3463345153Sdim  default:
3464345153Sdim    KMP_ASSERT(0);
3465345153Sdim
3466345153Sdim  case lk_tas: {
3467345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3468345153Sdim    __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3469345153Sdim
3470345153Sdim    __kmp_get_user_lock_owner_ =
3471345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3472345153Sdim
3473345153Sdim    if (__kmp_env_consistency_check) {
3474345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3475345153Sdim      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3476345153Sdim    } else {
3477345153Sdim      KMP_BIND_USER_LOCK(tas);
3478345153Sdim      KMP_BIND_NESTED_USER_LOCK(tas);
3479345153Sdim    }
3480345153Sdim
3481345153Sdim    __kmp_destroy_user_lock_ =
3482345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3483345153Sdim
3484345153Sdim    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3485345153Sdim
3486345153Sdim    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3487345153Sdim
3488345153Sdim    __kmp_set_user_lock_location_ =
3489345153Sdim        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3490345153Sdim
3491345153Sdim    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3492345153Sdim
3493345153Sdim    __kmp_set_user_lock_flags_ =
3494345153Sdim        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3495345153Sdim  } break;
3496345153Sdim
3497345153Sdim#if KMP_USE_FUTEX
3498345153Sdim
3499345153Sdim  case lk_futex: {
3500345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3501345153Sdim    __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3502345153Sdim
3503345153Sdim    __kmp_get_user_lock_owner_ =
3504345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3505345153Sdim
3506345153Sdim    if (__kmp_env_consistency_check) {
3507345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3508345153Sdim      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3509345153Sdim    } else {
3510345153Sdim      KMP_BIND_USER_LOCK(futex);
3511345153Sdim      KMP_BIND_NESTED_USER_LOCK(futex);
3512345153Sdim    }
3513345153Sdim
3514345153Sdim    __kmp_destroy_user_lock_ =
3515345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3516345153Sdim
3517345153Sdim    __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3518345153Sdim
3519345153Sdim    __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3520345153Sdim
3521345153Sdim    __kmp_set_user_lock_location_ =
3522345153Sdim        (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3523345153Sdim
3524345153Sdim    __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3525345153Sdim
3526345153Sdim    __kmp_set_user_lock_flags_ =
3527345153Sdim        (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3528345153Sdim  } break;
3529345153Sdim
3530345153Sdim#endif // KMP_USE_FUTEX
3531345153Sdim
3532345153Sdim  case lk_ticket: {
3533345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3534345153Sdim    __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3535345153Sdim
3536345153Sdim    __kmp_get_user_lock_owner_ =
3537345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3538345153Sdim
3539345153Sdim    if (__kmp_env_consistency_check) {
3540345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3541345153Sdim      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3542345153Sdim    } else {
3543345153Sdim      KMP_BIND_USER_LOCK(ticket);
3544345153Sdim      KMP_BIND_NESTED_USER_LOCK(ticket);
3545345153Sdim    }
3546345153Sdim
3547345153Sdim    __kmp_destroy_user_lock_ =
3548345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3549345153Sdim
3550345153Sdim    __kmp_is_user_lock_initialized_ =
3551345153Sdim        (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3552345153Sdim
3553345153Sdim    __kmp_get_user_lock_location_ =
3554345153Sdim        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3555345153Sdim
3556345153Sdim    __kmp_set_user_lock_location_ = (void (*)(
3557345153Sdim        kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3558345153Sdim
3559345153Sdim    __kmp_get_user_lock_flags_ =
3560345153Sdim        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3561345153Sdim
3562345153Sdim    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3563345153Sdim        &__kmp_set_ticket_lock_flags);
3564345153Sdim  } break;
3565345153Sdim
3566345153Sdim  case lk_queuing: {
3567345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3568345153Sdim    __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3569345153Sdim
3570345153Sdim    __kmp_get_user_lock_owner_ =
3571345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3572345153Sdim
3573345153Sdim    if (__kmp_env_consistency_check) {
3574345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3575345153Sdim      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3576345153Sdim    } else {
3577345153Sdim      KMP_BIND_USER_LOCK(queuing);
3578345153Sdim      KMP_BIND_NESTED_USER_LOCK(queuing);
3579345153Sdim    }
3580345153Sdim
3581345153Sdim    __kmp_destroy_user_lock_ =
3582345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3583345153Sdim
3584345153Sdim    __kmp_is_user_lock_initialized_ =
3585345153Sdim        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3586345153Sdim
3587345153Sdim    __kmp_get_user_lock_location_ =
3588345153Sdim        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3589345153Sdim
3590345153Sdim    __kmp_set_user_lock_location_ = (void (*)(
3591345153Sdim        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3592345153Sdim
3593345153Sdim    __kmp_get_user_lock_flags_ =
3594345153Sdim        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3595345153Sdim
3596345153Sdim    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3597345153Sdim        &__kmp_set_queuing_lock_flags);
3598345153Sdim  } break;
3599345153Sdim
3600345153Sdim#if KMP_USE_ADAPTIVE_LOCKS
3601345153Sdim  case lk_adaptive: {
3602345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3603345153Sdim    __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3604345153Sdim
3605345153Sdim    __kmp_get_user_lock_owner_ =
3606345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3607345153Sdim
3608345153Sdim    if (__kmp_env_consistency_check) {
3609345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3610345153Sdim    } else {
3611345153Sdim      KMP_BIND_USER_LOCK(adaptive);
3612345153Sdim    }
3613345153Sdim
3614345153Sdim    __kmp_destroy_user_lock_ =
3615345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3616345153Sdim
3617345153Sdim    __kmp_is_user_lock_initialized_ =
3618345153Sdim        (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3619345153Sdim
3620345153Sdim    __kmp_get_user_lock_location_ =
3621345153Sdim        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3622345153Sdim
3623345153Sdim    __kmp_set_user_lock_location_ = (void (*)(
3624345153Sdim        kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3625345153Sdim
3626345153Sdim    __kmp_get_user_lock_flags_ =
3627345153Sdim        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3628345153Sdim
3629345153Sdim    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3630345153Sdim        &__kmp_set_queuing_lock_flags);
3631345153Sdim
3632345153Sdim  } break;
3633345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS
3634345153Sdim
3635345153Sdim  case lk_drdpa: {
3636345153Sdim    __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3637345153Sdim    __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3638345153Sdim
3639345153Sdim    __kmp_get_user_lock_owner_ =
3640345153Sdim        (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3641345153Sdim
3642345153Sdim    if (__kmp_env_consistency_check) {
3643345153Sdim      KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3644345153Sdim      KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3645345153Sdim    } else {
3646345153Sdim      KMP_BIND_USER_LOCK(drdpa);
3647345153Sdim      KMP_BIND_NESTED_USER_LOCK(drdpa);
3648345153Sdim    }
3649345153Sdim
3650345153Sdim    __kmp_destroy_user_lock_ =
3651345153Sdim        (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3652345153Sdim
3653345153Sdim    __kmp_is_user_lock_initialized_ =
3654345153Sdim        (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3655345153Sdim
3656345153Sdim    __kmp_get_user_lock_location_ =
3657345153Sdim        (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3658345153Sdim
3659345153Sdim    __kmp_set_user_lock_location_ = (void (*)(
3660345153Sdim        kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3661345153Sdim
3662345153Sdim    __kmp_get_user_lock_flags_ =
3663345153Sdim        (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3664345153Sdim
3665345153Sdim    __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3666345153Sdim        &__kmp_set_drdpa_lock_flags);
3667345153Sdim  } break;
3668345153Sdim  }
3669345153Sdim}
3670345153Sdim
3671345153Sdim// ----------------------------------------------------------------------------
3672345153Sdim// User lock table & lock allocation
3673345153Sdim
3674345153Sdimkmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3675345153Sdimkmp_user_lock_p __kmp_lock_pool = NULL;
3676345153Sdim
3677345153Sdim// Lock block-allocation support.
3678345153Sdimkmp_block_of_locks *__kmp_lock_blocks = NULL;
3679345153Sdimint __kmp_num_locks_in_block = 1; // FIXME - tune this value
3680345153Sdim
3681345153Sdimstatic kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3682345153Sdim  // Assume that kmp_global_lock is held upon entry/exit.
3683345153Sdim  kmp_lock_index_t index;
3684345153Sdim  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3685345153Sdim    kmp_lock_index_t size;
3686345153Sdim    kmp_user_lock_p *table;
3687345153Sdim    // Reallocate lock table.
3688345153Sdim    if (__kmp_user_lock_table.allocated == 0) {
3689345153Sdim      size = 1024;
3690345153Sdim    } else {
3691345153Sdim      size = __kmp_user_lock_table.allocated * 2;
3692345153Sdim    }
3693345153Sdim    table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3694345153Sdim    KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3695345153Sdim               sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3696345153Sdim    table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3697345153Sdim    // We cannot free the previous table now, since it may be in use by other
3698345153Sdim    // threads. So save the pointer to the previous table in in the first
3699345153Sdim    // element of the new table. All the tables will be organized into a list,
3700345153Sdim    // and could be freed when library shutting down.
3701345153Sdim    __kmp_user_lock_table.table = table;
3702345153Sdim    __kmp_user_lock_table.allocated = size;
3703345153Sdim  }
3704345153Sdim  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3705345153Sdim                   __kmp_user_lock_table.allocated);
3706345153Sdim  index = __kmp_user_lock_table.used;
3707345153Sdim  __kmp_user_lock_table.table[index] = lck;
3708345153Sdim  ++__kmp_user_lock_table.used;
3709345153Sdim  return index;
3710345153Sdim}
3711345153Sdim
3712345153Sdimstatic kmp_user_lock_p __kmp_lock_block_allocate() {
3713345153Sdim  // Assume that kmp_global_lock is held upon entry/exit.
3714345153Sdim  static int last_index = 0;
3715345153Sdim  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3716345153Sdim    // Restart the index.
3717345153Sdim    last_index = 0;
3718345153Sdim    // Need to allocate a new block.
3719345153Sdim    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3720345153Sdim    size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3721345153Sdim    char *buffer =
3722345153Sdim        (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3723345153Sdim    // Set up the new block.
3724345153Sdim    kmp_block_of_locks *new_block =
3725345153Sdim        (kmp_block_of_locks *)(&buffer[space_for_locks]);
3726345153Sdim    new_block->next_block = __kmp_lock_blocks;
3727345153Sdim    new_block->locks = (void *)buffer;
3728345153Sdim    // Publish the new block.
3729345153Sdim    KMP_MB();
3730345153Sdim    __kmp_lock_blocks = new_block;
3731345153Sdim  }
3732345153Sdim  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3733345153Sdim      ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3734345153Sdim  last_index++;
3735345153Sdim  return ret;
3736345153Sdim}
3737345153Sdim
3738345153Sdim// Get memory for a lock. It may be freshly allocated memory or reused memory
3739345153Sdim// from lock pool.
3740345153Sdimkmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3741345153Sdim                                         kmp_lock_flags_t flags) {
3742345153Sdim  kmp_user_lock_p lck;
3743345153Sdim  kmp_lock_index_t index;
3744345153Sdim  KMP_DEBUG_ASSERT(user_lock);
3745345153Sdim
3746345153Sdim  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3747345153Sdim
3748345153Sdim  if (__kmp_lock_pool == NULL) {
3749345153Sdim    // Lock pool is empty. Allocate new memory.
3750345153Sdim
3751345153Sdim    // ANNOTATION: Found no good way to express the syncronisation
3752345153Sdim    // between allocation and usage, so ignore the allocation
3753345153Sdim    ANNOTATE_IGNORE_WRITES_BEGIN();
3754345153Sdim    if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3755345153Sdim      lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3756345153Sdim    } else {
3757345153Sdim      lck = __kmp_lock_block_allocate();
3758345153Sdim    }
3759345153Sdim    ANNOTATE_IGNORE_WRITES_END();
3760345153Sdim
3761345153Sdim    // Insert lock in the table so that it can be freed in __kmp_cleanup,
3762345153Sdim    // and debugger has info on all allocated locks.
3763345153Sdim    index = __kmp_lock_table_insert(lck);
3764345153Sdim  } else {
3765345153Sdim    // Pick up lock from pool.
3766345153Sdim    lck = __kmp_lock_pool;
3767345153Sdim    index = __kmp_lock_pool->pool.index;
3768345153Sdim    __kmp_lock_pool = __kmp_lock_pool->pool.next;
3769345153Sdim  }
3770345153Sdim
3771345153Sdim  // We could potentially differentiate between nested and regular locks
3772345153Sdim  // here, and do the lock table lookup for regular locks only.
3773345153Sdim  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3774345153Sdim    *((kmp_lock_index_t *)user_lock) = index;
3775345153Sdim  } else {
3776345153Sdim    *((kmp_user_lock_p *)user_lock) = lck;
3777345153Sdim  }
3778345153Sdim
3779345153Sdim  // mark the lock if it is critical section lock.
3780345153Sdim  __kmp_set_user_lock_flags(lck, flags);
3781345153Sdim
3782345153Sdim  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3783345153Sdim
3784345153Sdim  return lck;
3785345153Sdim}
3786345153Sdim
3787345153Sdim// Put lock's memory to pool for reusing.
3788345153Sdimvoid __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3789345153Sdim                          kmp_user_lock_p lck) {
3790345153Sdim  KMP_DEBUG_ASSERT(user_lock != NULL);
3791345153Sdim  KMP_DEBUG_ASSERT(lck != NULL);
3792345153Sdim
3793345153Sdim  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3794345153Sdim
3795345153Sdim  lck->pool.next = __kmp_lock_pool;
3796345153Sdim  __kmp_lock_pool = lck;
3797345153Sdim  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3798345153Sdim    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3799345153Sdim    KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3800345153Sdim    lck->pool.index = index;
3801345153Sdim  }
3802345153Sdim
3803345153Sdim  __kmp_release_lock(&__kmp_global_lock, gtid);
3804345153Sdim}
3805345153Sdim
3806345153Sdimkmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3807345153Sdim  kmp_user_lock_p lck = NULL;
3808345153Sdim
3809345153Sdim  if (__kmp_env_consistency_check) {
3810345153Sdim    if (user_lock == NULL) {
3811345153Sdim      KMP_FATAL(LockIsUninitialized, func);
3812345153Sdim    }
3813345153Sdim  }
3814345153Sdim
3815345153Sdim  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3816345153Sdim    kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3817345153Sdim    if (__kmp_env_consistency_check) {
3818345153Sdim      if (!(0 < index && index < __kmp_user_lock_table.used)) {
3819345153Sdim        KMP_FATAL(LockIsUninitialized, func);
3820345153Sdim      }
3821345153Sdim    }
3822345153Sdim    KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3823345153Sdim    KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3824345153Sdim    lck = __kmp_user_lock_table.table[index];
3825345153Sdim  } else {
3826345153Sdim    lck = *((kmp_user_lock_p *)user_lock);
3827345153Sdim  }
3828345153Sdim
3829345153Sdim  if (__kmp_env_consistency_check) {
3830345153Sdim    if (lck == NULL) {
3831345153Sdim      KMP_FATAL(LockIsUninitialized, func);
3832345153Sdim    }
3833345153Sdim  }
3834345153Sdim
3835345153Sdim  return lck;
3836345153Sdim}
3837345153Sdim
3838345153Sdimvoid __kmp_cleanup_user_locks(void) {
3839345153Sdim  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3840345153Sdim  // iterating through lock table (it includes all the locks, dead or alive).
3841345153Sdim  __kmp_lock_pool = NULL;
3842345153Sdim
3843345153Sdim#define IS_CRITICAL(lck)                                                       \
3844345153Sdim  ((__kmp_get_user_lock_flags_ != NULL) &&                                     \
3845345153Sdim   ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3846345153Sdim
3847345153Sdim  // Loop through lock table, free all locks.
3848345153Sdim  // Do not free item [0], it is reserved for lock tables list.
3849345153Sdim  //
3850345153Sdim  // FIXME - we are iterating through a list of (pointers to) objects of type
3851345153Sdim  // union kmp_user_lock, but we have no way of knowing whether the base type is
3852345153Sdim  // currently "pool" or whatever the global user lock type is.
3853345153Sdim  //
3854345153Sdim  // We are relying on the fact that for all of the user lock types
3855345153Sdim  // (except "tas"), the first field in the lock struct is the "initialized"
3856345153Sdim  // field, which is set to the address of the lock object itself when
3857345153Sdim  // the lock is initialized.  When the union is of type "pool", the
3858345153Sdim  // first field is a pointer to the next object in the free list, which
3859345153Sdim  // will not be the same address as the object itself.
3860345153Sdim  //
3861345153Sdim  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3862345153Sdim  // for "pool" objects on the free list.  This must happen as the "location"
3863345153Sdim  // field of real user locks overlaps the "index" field of "pool" objects.
3864345153Sdim  //
3865345153Sdim  // It would be better to run through the free list, and remove all "pool"
3866345153Sdim  // objects from the lock table before executing this loop.  However,
3867345153Sdim  // "pool" objects do not always have their index field set (only on
3868345153Sdim  // lin_32e), and I don't want to search the lock table for the address
3869345153Sdim  // of every "pool" object on the free list.
3870345153Sdim  while (__kmp_user_lock_table.used > 1) {
3871345153Sdim    const ident *loc;
3872345153Sdim
3873345153Sdim    // reduce __kmp_user_lock_table.used before freeing the lock,
3874345153Sdim    // so that state of locks is consistent
3875345153Sdim    kmp_user_lock_p lck =
3876345153Sdim        __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3877345153Sdim
3878345153Sdim    if ((__kmp_is_user_lock_initialized_ != NULL) &&
3879345153Sdim        (*__kmp_is_user_lock_initialized_)(lck)) {
3880345153Sdim      // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3881345153Sdim      // it is NOT a critical section (user is not responsible for destroying
3882345153Sdim      // criticals) AND we know source location to report.
3883345153Sdim      if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3884345153Sdim          ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3885345153Sdim          (loc->psource != NULL)) {
3886345153Sdim        kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0);
3887345153Sdim        KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3888345153Sdim        __kmp_str_loc_free(&str_loc);
3889345153Sdim      }
3890345153Sdim
3891345153Sdim#ifdef KMP_DEBUG
3892345153Sdim      if (IS_CRITICAL(lck)) {
3893345153Sdim        KA_TRACE(
3894345153Sdim            20,
3895345153Sdim            ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3896345153Sdim             lck, *(void **)lck));
3897345153Sdim      } else {
3898345153Sdim        KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3899345153Sdim                      *(void **)lck));
3900345153Sdim      }
3901345153Sdim#endif // KMP_DEBUG
3902345153Sdim
3903345153Sdim      // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3904345153Sdim      __kmp_destroy_user_lock(lck);
3905345153Sdim    }
3906345153Sdim
3907345153Sdim    // Free the lock if block allocation of locks is not used.
3908345153Sdim    if (__kmp_lock_blocks == NULL) {
3909345153Sdim      __kmp_free(lck);
3910345153Sdim    }
3911345153Sdim  }
3912345153Sdim
3913345153Sdim#undef IS_CRITICAL
3914345153Sdim
3915345153Sdim  // delete lock table(s).
3916345153Sdim  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3917345153Sdim  __kmp_user_lock_table.table = NULL;
3918345153Sdim  __kmp_user_lock_table.allocated = 0;
3919345153Sdim
3920345153Sdim  while (table_ptr != NULL) {
3921345153Sdim    // In the first element we saved the pointer to the previous
3922345153Sdim    // (smaller) lock table.
3923345153Sdim    kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3924345153Sdim    __kmp_free(table_ptr);
3925345153Sdim    table_ptr = next;
3926345153Sdim  }
3927345153Sdim
3928345153Sdim  // Free buffers allocated for blocks of locks.
3929345153Sdim  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3930345153Sdim  __kmp_lock_blocks = NULL;
3931345153Sdim
3932345153Sdim  while (block_ptr != NULL) {
3933345153Sdim    kmp_block_of_locks_t *next = block_ptr->next_block;
3934345153Sdim    __kmp_free(block_ptr->locks);
3935345153Sdim    // *block_ptr itself was allocated at the end of the locks vector.
3936345153Sdim    block_ptr = next;
3937345153Sdim  }
3938345153Sdim
3939345153Sdim  TCW_4(__kmp_init_user_locks, FALSE);
3940345153Sdim}
3941345153Sdim
3942345153Sdim#endif // KMP_USE_DYNAMIC_LOCK
3943