1345153Sdim/* 2345153Sdim * kmp_lock.cpp -- lock-related functions 3345153Sdim */ 4345153Sdim 5345153Sdim//===----------------------------------------------------------------------===// 6345153Sdim// 7353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8353358Sdim// See https://llvm.org/LICENSE.txt for license information. 9353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10345153Sdim// 11345153Sdim//===----------------------------------------------------------------------===// 12345153Sdim 13345153Sdim#include <stddef.h> 14345153Sdim#include <atomic> 15345153Sdim 16345153Sdim#include "kmp.h" 17345153Sdim#include "kmp_i18n.h" 18345153Sdim#include "kmp_io.h" 19345153Sdim#include "kmp_itt.h" 20345153Sdim#include "kmp_lock.h" 21345153Sdim#include "kmp_wait_release.h" 22345153Sdim#include "kmp_wrapper_getpid.h" 23345153Sdim 24345153Sdim#include "tsan_annotations.h" 25345153Sdim 26345153Sdim#if KMP_USE_FUTEX 27345153Sdim#include <sys/syscall.h> 28345153Sdim#include <unistd.h> 29345153Sdim// We should really include <futex.h>, but that causes compatibility problems on 30345153Sdim// different Linux* OS distributions that either require that you include (or 31345153Sdim// break when you try to include) <pci/types.h>. Since all we need is the two 32345153Sdim// macros below (which are part of the kernel ABI, so can't change) we just 33345153Sdim// define the constants here and don't include <futex.h> 34345153Sdim#ifndef FUTEX_WAIT 35345153Sdim#define FUTEX_WAIT 0 36345153Sdim#endif 37345153Sdim#ifndef FUTEX_WAKE 38345153Sdim#define FUTEX_WAKE 1 39345153Sdim#endif 40345153Sdim#endif 41345153Sdim 42345153Sdim/* Implement spin locks for internal library use. */ 43345153Sdim/* The algorithm implemented is Lamport's bakery lock [1974]. */ 44345153Sdim 45345153Sdimvoid __kmp_validate_locks(void) { 46345153Sdim int i; 47345153Sdim kmp_uint32 x, y; 48345153Sdim 49345153Sdim /* Check to make sure unsigned arithmetic does wraps properly */ 50345153Sdim x = ~((kmp_uint32)0) - 2; 51345153Sdim y = x - 2; 52345153Sdim 53345153Sdim for (i = 0; i < 8; ++i, ++x, ++y) { 54345153Sdim kmp_uint32 z = (x - y); 55345153Sdim KMP_ASSERT(z == 2); 56345153Sdim } 57345153Sdim 58345153Sdim KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); 59345153Sdim} 60345153Sdim 61345153Sdim/* ------------------------------------------------------------------------ */ 62345153Sdim/* test and set locks */ 63345153Sdim 64345153Sdim// For the non-nested locks, we can only assume that the first 4 bytes were 65345153Sdim// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel 66345153Sdim// compiler only allocates a 4 byte pointer on IA-32 architecture. On 67345153Sdim// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated. 68345153Sdim// 69345153Sdim// gcc reserves >= 8 bytes for nested locks, so we can assume that the 70345153Sdim// entire 8 bytes were allocated for nested locks on all 64-bit platforms. 71345153Sdim 72345153Sdimstatic kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { 73345153Sdim return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1; 74345153Sdim} 75345153Sdim 76345153Sdimstatic inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { 77345153Sdim return lck->lk.depth_locked != -1; 78345153Sdim} 79345153Sdim 80345153Sdim__forceinline static int 81345153Sdim__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { 82345153Sdim KMP_MB(); 83345153Sdim 84345153Sdim#ifdef USE_LOCK_PROFILE 85345153Sdim kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll); 86345153Sdim if ((curr != 0) && (curr != gtid + 1)) 87345153Sdim __kmp_printf("LOCK CONTENTION: %p\n", lck); 88345153Sdim/* else __kmp_printf( "." );*/ 89345153Sdim#endif /* USE_LOCK_PROFILE */ 90345153Sdim 91345153Sdim kmp_int32 tas_free = KMP_LOCK_FREE(tas); 92345153Sdim kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 93345153Sdim 94345153Sdim if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 95345153Sdim __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 96345153Sdim KMP_FSYNC_ACQUIRED(lck); 97345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 98345153Sdim } 99345153Sdim 100345153Sdim kmp_uint32 spins; 101345153Sdim KMP_FSYNC_PREPARE(lck); 102345153Sdim KMP_INIT_YIELD(spins); 103345153Sdim kmp_backoff_t backoff = __kmp_spin_backoff_params; 104353358Sdim do { 105345153Sdim __kmp_spin_backoff(&backoff); 106353358Sdim KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 107353358Sdim } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || 108353358Sdim !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)); 109345153Sdim KMP_FSYNC_ACQUIRED(lck); 110345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 111345153Sdim} 112345153Sdim 113345153Sdimint __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 114345153Sdim int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); 115345153Sdim ANNOTATE_TAS_ACQUIRED(lck); 116345153Sdim return retval; 117345153Sdim} 118345153Sdim 119345153Sdimstatic int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, 120345153Sdim kmp_int32 gtid) { 121345153Sdim char const *const func = "omp_set_lock"; 122345153Sdim if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 123345153Sdim __kmp_is_tas_lock_nestable(lck)) { 124345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 125345153Sdim } 126345153Sdim if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { 127345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 128345153Sdim } 129345153Sdim return __kmp_acquire_tas_lock(lck, gtid); 130345153Sdim} 131345153Sdim 132345153Sdimint __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 133345153Sdim kmp_int32 tas_free = KMP_LOCK_FREE(tas); 134345153Sdim kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); 135345153Sdim if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free && 136345153Sdim __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { 137345153Sdim KMP_FSYNC_ACQUIRED(lck); 138345153Sdim return TRUE; 139345153Sdim } 140345153Sdim return FALSE; 141345153Sdim} 142345153Sdim 143345153Sdimstatic int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, 144345153Sdim kmp_int32 gtid) { 145345153Sdim char const *const func = "omp_test_lock"; 146345153Sdim if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 147345153Sdim __kmp_is_tas_lock_nestable(lck)) { 148345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 149345153Sdim } 150345153Sdim return __kmp_test_tas_lock(lck, gtid); 151345153Sdim} 152345153Sdim 153345153Sdimint __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 154345153Sdim KMP_MB(); /* Flush all pending memory write invalidates. */ 155345153Sdim 156345153Sdim KMP_FSYNC_RELEASING(lck); 157345153Sdim ANNOTATE_TAS_RELEASED(lck); 158345153Sdim KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); 159345153Sdim KMP_MB(); /* Flush all pending memory write invalidates. */ 160345153Sdim 161353358Sdim KMP_YIELD_OVERSUB(); 162345153Sdim return KMP_LOCK_RELEASED; 163345153Sdim} 164345153Sdim 165345153Sdimstatic int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, 166345153Sdim kmp_int32 gtid) { 167345153Sdim char const *const func = "omp_unset_lock"; 168345153Sdim KMP_MB(); /* in case another processor initialized lock */ 169345153Sdim if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 170345153Sdim __kmp_is_tas_lock_nestable(lck)) { 171345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 172345153Sdim } 173345153Sdim if (__kmp_get_tas_lock_owner(lck) == -1) { 174345153Sdim KMP_FATAL(LockUnsettingFree, func); 175345153Sdim } 176345153Sdim if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && 177345153Sdim (__kmp_get_tas_lock_owner(lck) != gtid)) { 178345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 179345153Sdim } 180345153Sdim return __kmp_release_tas_lock(lck, gtid); 181345153Sdim} 182345153Sdim 183345153Sdimvoid __kmp_init_tas_lock(kmp_tas_lock_t *lck) { 184345153Sdim lck->lk.poll = KMP_LOCK_FREE(tas); 185345153Sdim} 186345153Sdim 187345153Sdimvoid __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } 188345153Sdim 189345153Sdimstatic void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { 190345153Sdim char const *const func = "omp_destroy_lock"; 191345153Sdim if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && 192345153Sdim __kmp_is_tas_lock_nestable(lck)) { 193345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 194345153Sdim } 195345153Sdim if (__kmp_get_tas_lock_owner(lck) != -1) { 196345153Sdim KMP_FATAL(LockStillOwned, func); 197345153Sdim } 198345153Sdim __kmp_destroy_tas_lock(lck); 199345153Sdim} 200345153Sdim 201345153Sdim// nested test and set locks 202345153Sdim 203345153Sdimint __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 204345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 205345153Sdim 206345153Sdim if (__kmp_get_tas_lock_owner(lck) == gtid) { 207345153Sdim lck->lk.depth_locked += 1; 208345153Sdim return KMP_LOCK_ACQUIRED_NEXT; 209345153Sdim } else { 210345153Sdim __kmp_acquire_tas_lock_timed_template(lck, gtid); 211345153Sdim ANNOTATE_TAS_ACQUIRED(lck); 212345153Sdim lck->lk.depth_locked = 1; 213345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 214345153Sdim } 215345153Sdim} 216345153Sdim 217345153Sdimstatic int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 218345153Sdim kmp_int32 gtid) { 219345153Sdim char const *const func = "omp_set_nest_lock"; 220345153Sdim if (!__kmp_is_tas_lock_nestable(lck)) { 221345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 222345153Sdim } 223345153Sdim return __kmp_acquire_nested_tas_lock(lck, gtid); 224345153Sdim} 225345153Sdim 226345153Sdimint __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 227345153Sdim int retval; 228345153Sdim 229345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 230345153Sdim 231345153Sdim if (__kmp_get_tas_lock_owner(lck) == gtid) { 232345153Sdim retval = ++lck->lk.depth_locked; 233345153Sdim } else if (!__kmp_test_tas_lock(lck, gtid)) { 234345153Sdim retval = 0; 235345153Sdim } else { 236345153Sdim KMP_MB(); 237345153Sdim retval = lck->lk.depth_locked = 1; 238345153Sdim } 239345153Sdim return retval; 240345153Sdim} 241345153Sdim 242345153Sdimstatic int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 243345153Sdim kmp_int32 gtid) { 244345153Sdim char const *const func = "omp_test_nest_lock"; 245345153Sdim if (!__kmp_is_tas_lock_nestable(lck)) { 246345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 247345153Sdim } 248345153Sdim return __kmp_test_nested_tas_lock(lck, gtid); 249345153Sdim} 250345153Sdim 251345153Sdimint __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { 252345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 253345153Sdim 254345153Sdim KMP_MB(); 255345153Sdim if (--(lck->lk.depth_locked) == 0) { 256345153Sdim __kmp_release_tas_lock(lck, gtid); 257345153Sdim return KMP_LOCK_RELEASED; 258345153Sdim } 259345153Sdim return KMP_LOCK_STILL_HELD; 260345153Sdim} 261345153Sdim 262345153Sdimstatic int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, 263345153Sdim kmp_int32 gtid) { 264345153Sdim char const *const func = "omp_unset_nest_lock"; 265345153Sdim KMP_MB(); /* in case another processor initialized lock */ 266345153Sdim if (!__kmp_is_tas_lock_nestable(lck)) { 267345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 268345153Sdim } 269345153Sdim if (__kmp_get_tas_lock_owner(lck) == -1) { 270345153Sdim KMP_FATAL(LockUnsettingFree, func); 271345153Sdim } 272345153Sdim if (__kmp_get_tas_lock_owner(lck) != gtid) { 273345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 274345153Sdim } 275345153Sdim return __kmp_release_nested_tas_lock(lck, gtid); 276345153Sdim} 277345153Sdim 278345153Sdimvoid __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { 279345153Sdim __kmp_init_tas_lock(lck); 280345153Sdim lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 281345153Sdim} 282345153Sdim 283345153Sdimvoid __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { 284345153Sdim __kmp_destroy_tas_lock(lck); 285345153Sdim lck->lk.depth_locked = 0; 286345153Sdim} 287345153Sdim 288345153Sdimstatic void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 289345153Sdim char const *const func = "omp_destroy_nest_lock"; 290345153Sdim if (!__kmp_is_tas_lock_nestable(lck)) { 291345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 292345153Sdim } 293345153Sdim if (__kmp_get_tas_lock_owner(lck) != -1) { 294345153Sdim KMP_FATAL(LockStillOwned, func); 295345153Sdim } 296345153Sdim __kmp_destroy_nested_tas_lock(lck); 297345153Sdim} 298345153Sdim 299345153Sdim#if KMP_USE_FUTEX 300345153Sdim 301345153Sdim/* ------------------------------------------------------------------------ */ 302345153Sdim/* futex locks */ 303345153Sdim 304345153Sdim// futex locks are really just test and set locks, with a different method 305345153Sdim// of handling contention. They take the same amount of space as test and 306345153Sdim// set locks, and are allocated the same way (i.e. use the area allocated by 307345153Sdim// the compiler for non-nested locks / allocate nested locks on the heap). 308345153Sdim 309345153Sdimstatic kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { 310345153Sdim return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; 311345153Sdim} 312345153Sdim 313345153Sdimstatic inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { 314345153Sdim return lck->lk.depth_locked != -1; 315345153Sdim} 316345153Sdim 317345153Sdim__forceinline static int 318345153Sdim__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { 319345153Sdim kmp_int32 gtid_code = (gtid + 1) << 1; 320345153Sdim 321345153Sdim KMP_MB(); 322345153Sdim 323345153Sdim#ifdef USE_LOCK_PROFILE 324345153Sdim kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); 325345153Sdim if ((curr != 0) && (curr != gtid_code)) 326345153Sdim __kmp_printf("LOCK CONTENTION: %p\n", lck); 327345153Sdim/* else __kmp_printf( "." );*/ 328345153Sdim#endif /* USE_LOCK_PROFILE */ 329345153Sdim 330345153Sdim KMP_FSYNC_PREPARE(lck); 331345153Sdim KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", 332345153Sdim lck, lck->lk.poll, gtid)); 333345153Sdim 334345153Sdim kmp_int32 poll_val; 335345153Sdim 336345153Sdim while ((poll_val = KMP_COMPARE_AND_STORE_RET32( 337345153Sdim &(lck->lk.poll), KMP_LOCK_FREE(futex), 338345153Sdim KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { 339345153Sdim 340345153Sdim kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; 341345153Sdim KA_TRACE( 342345153Sdim 1000, 343345153Sdim ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", 344345153Sdim lck, gtid, poll_val, cond)); 345345153Sdim 346345153Sdim // NOTE: if you try to use the following condition for this branch 347345153Sdim // 348345153Sdim // if ( poll_val & 1 == 0 ) 349345153Sdim // 350345153Sdim // Then the 12.0 compiler has a bug where the following block will 351345153Sdim // always be skipped, regardless of the value of the LSB of poll_val. 352345153Sdim if (!cond) { 353345153Sdim // Try to set the lsb in the poll to indicate to the owner 354345153Sdim // thread that they need to wake this thread up. 355345153Sdim if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, 356345153Sdim poll_val | KMP_LOCK_BUSY(1, futex))) { 357345153Sdim KA_TRACE( 358345153Sdim 1000, 359345153Sdim ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", 360345153Sdim lck, lck->lk.poll, gtid)); 361345153Sdim continue; 362345153Sdim } 363345153Sdim poll_val |= KMP_LOCK_BUSY(1, futex); 364345153Sdim 365345153Sdim KA_TRACE(1000, 366345153Sdim ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, 367345153Sdim lck->lk.poll, gtid)); 368345153Sdim } 369345153Sdim 370345153Sdim KA_TRACE( 371345153Sdim 1000, 372345153Sdim ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", 373345153Sdim lck, gtid, poll_val)); 374345153Sdim 375345153Sdim kmp_int32 rc; 376345153Sdim if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, 377345153Sdim NULL, 0)) != 0) { 378345153Sdim KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " 379345153Sdim "failed (rc=%d errno=%d)\n", 380345153Sdim lck, gtid, poll_val, rc, errno)); 381345153Sdim continue; 382345153Sdim } 383345153Sdim 384345153Sdim KA_TRACE(1000, 385345153Sdim ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", 386345153Sdim lck, gtid, poll_val)); 387345153Sdim // This thread has now done a successful futex wait call and was entered on 388345153Sdim // the OS futex queue. We must now perform a futex wake call when releasing 389345153Sdim // the lock, as we have no idea how many other threads are in the queue. 390345153Sdim gtid_code |= 1; 391345153Sdim } 392345153Sdim 393345153Sdim KMP_FSYNC_ACQUIRED(lck); 394345153Sdim KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 395345153Sdim lck->lk.poll, gtid)); 396345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 397345153Sdim} 398345153Sdim 399345153Sdimint __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 400345153Sdim int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); 401345153Sdim ANNOTATE_FUTEX_ACQUIRED(lck); 402345153Sdim return retval; 403345153Sdim} 404345153Sdim 405345153Sdimstatic int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, 406345153Sdim kmp_int32 gtid) { 407345153Sdim char const *const func = "omp_set_lock"; 408345153Sdim if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 409345153Sdim __kmp_is_futex_lock_nestable(lck)) { 410345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 411345153Sdim } 412345153Sdim if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { 413345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 414345153Sdim } 415345153Sdim return __kmp_acquire_futex_lock(lck, gtid); 416345153Sdim} 417345153Sdim 418345153Sdimint __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 419345153Sdim if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), 420345153Sdim KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { 421345153Sdim KMP_FSYNC_ACQUIRED(lck); 422345153Sdim return TRUE; 423345153Sdim } 424345153Sdim return FALSE; 425345153Sdim} 426345153Sdim 427345153Sdimstatic int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, 428345153Sdim kmp_int32 gtid) { 429345153Sdim char const *const func = "omp_test_lock"; 430345153Sdim if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 431345153Sdim __kmp_is_futex_lock_nestable(lck)) { 432345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 433345153Sdim } 434345153Sdim return __kmp_test_futex_lock(lck, gtid); 435345153Sdim} 436345153Sdim 437345153Sdimint __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 438345153Sdim KMP_MB(); /* Flush all pending memory write invalidates. */ 439345153Sdim 440345153Sdim KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", 441345153Sdim lck, lck->lk.poll, gtid)); 442345153Sdim 443345153Sdim KMP_FSYNC_RELEASING(lck); 444345153Sdim ANNOTATE_FUTEX_RELEASED(lck); 445345153Sdim 446345153Sdim kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); 447345153Sdim 448345153Sdim KA_TRACE(1000, 449345153Sdim ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", 450345153Sdim lck, gtid, poll_val)); 451345153Sdim 452345153Sdim if (KMP_LOCK_STRIP(poll_val) & 1) { 453345153Sdim KA_TRACE(1000, 454345153Sdim ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", 455345153Sdim lck, gtid)); 456345153Sdim syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), 457345153Sdim NULL, NULL, 0); 458345153Sdim } 459345153Sdim 460345153Sdim KMP_MB(); /* Flush all pending memory write invalidates. */ 461345153Sdim 462345153Sdim KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, 463345153Sdim lck->lk.poll, gtid)); 464345153Sdim 465353358Sdim KMP_YIELD_OVERSUB(); 466345153Sdim return KMP_LOCK_RELEASED; 467345153Sdim} 468345153Sdim 469345153Sdimstatic int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, 470345153Sdim kmp_int32 gtid) { 471345153Sdim char const *const func = "omp_unset_lock"; 472345153Sdim KMP_MB(); /* in case another processor initialized lock */ 473345153Sdim if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 474345153Sdim __kmp_is_futex_lock_nestable(lck)) { 475345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 476345153Sdim } 477345153Sdim if (__kmp_get_futex_lock_owner(lck) == -1) { 478345153Sdim KMP_FATAL(LockUnsettingFree, func); 479345153Sdim } 480345153Sdim if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && 481345153Sdim (__kmp_get_futex_lock_owner(lck) != gtid)) { 482345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 483345153Sdim } 484345153Sdim return __kmp_release_futex_lock(lck, gtid); 485345153Sdim} 486345153Sdim 487345153Sdimvoid __kmp_init_futex_lock(kmp_futex_lock_t *lck) { 488345153Sdim TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); 489345153Sdim} 490345153Sdim 491345153Sdimvoid __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } 492345153Sdim 493345153Sdimstatic void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { 494345153Sdim char const *const func = "omp_destroy_lock"; 495345153Sdim if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && 496345153Sdim __kmp_is_futex_lock_nestable(lck)) { 497345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 498345153Sdim } 499345153Sdim if (__kmp_get_futex_lock_owner(lck) != -1) { 500345153Sdim KMP_FATAL(LockStillOwned, func); 501345153Sdim } 502345153Sdim __kmp_destroy_futex_lock(lck); 503345153Sdim} 504345153Sdim 505345153Sdim// nested futex locks 506345153Sdim 507345153Sdimint __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 508345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 509345153Sdim 510345153Sdim if (__kmp_get_futex_lock_owner(lck) == gtid) { 511345153Sdim lck->lk.depth_locked += 1; 512345153Sdim return KMP_LOCK_ACQUIRED_NEXT; 513345153Sdim } else { 514345153Sdim __kmp_acquire_futex_lock_timed_template(lck, gtid); 515345153Sdim ANNOTATE_FUTEX_ACQUIRED(lck); 516345153Sdim lck->lk.depth_locked = 1; 517345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 518345153Sdim } 519345153Sdim} 520345153Sdim 521345153Sdimstatic int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 522345153Sdim kmp_int32 gtid) { 523345153Sdim char const *const func = "omp_set_nest_lock"; 524345153Sdim if (!__kmp_is_futex_lock_nestable(lck)) { 525345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 526345153Sdim } 527345153Sdim return __kmp_acquire_nested_futex_lock(lck, gtid); 528345153Sdim} 529345153Sdim 530345153Sdimint __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 531345153Sdim int retval; 532345153Sdim 533345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 534345153Sdim 535345153Sdim if (__kmp_get_futex_lock_owner(lck) == gtid) { 536345153Sdim retval = ++lck->lk.depth_locked; 537345153Sdim } else if (!__kmp_test_futex_lock(lck, gtid)) { 538345153Sdim retval = 0; 539345153Sdim } else { 540345153Sdim KMP_MB(); 541345153Sdim retval = lck->lk.depth_locked = 1; 542345153Sdim } 543345153Sdim return retval; 544345153Sdim} 545345153Sdim 546345153Sdimstatic int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 547345153Sdim kmp_int32 gtid) { 548345153Sdim char const *const func = "omp_test_nest_lock"; 549345153Sdim if (!__kmp_is_futex_lock_nestable(lck)) { 550345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 551345153Sdim } 552345153Sdim return __kmp_test_nested_futex_lock(lck, gtid); 553345153Sdim} 554345153Sdim 555345153Sdimint __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { 556345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 557345153Sdim 558345153Sdim KMP_MB(); 559345153Sdim if (--(lck->lk.depth_locked) == 0) { 560345153Sdim __kmp_release_futex_lock(lck, gtid); 561345153Sdim return KMP_LOCK_RELEASED; 562345153Sdim } 563345153Sdim return KMP_LOCK_STILL_HELD; 564345153Sdim} 565345153Sdim 566345153Sdimstatic int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, 567345153Sdim kmp_int32 gtid) { 568345153Sdim char const *const func = "omp_unset_nest_lock"; 569345153Sdim KMP_MB(); /* in case another processor initialized lock */ 570345153Sdim if (!__kmp_is_futex_lock_nestable(lck)) { 571345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 572345153Sdim } 573345153Sdim if (__kmp_get_futex_lock_owner(lck) == -1) { 574345153Sdim KMP_FATAL(LockUnsettingFree, func); 575345153Sdim } 576345153Sdim if (__kmp_get_futex_lock_owner(lck) != gtid) { 577345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 578345153Sdim } 579345153Sdim return __kmp_release_nested_futex_lock(lck, gtid); 580345153Sdim} 581345153Sdim 582345153Sdimvoid __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { 583345153Sdim __kmp_init_futex_lock(lck); 584345153Sdim lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 585345153Sdim} 586345153Sdim 587345153Sdimvoid __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { 588345153Sdim __kmp_destroy_futex_lock(lck); 589345153Sdim lck->lk.depth_locked = 0; 590345153Sdim} 591345153Sdim 592345153Sdimstatic void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 593345153Sdim char const *const func = "omp_destroy_nest_lock"; 594345153Sdim if (!__kmp_is_futex_lock_nestable(lck)) { 595345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 596345153Sdim } 597345153Sdim if (__kmp_get_futex_lock_owner(lck) != -1) { 598345153Sdim KMP_FATAL(LockStillOwned, func); 599345153Sdim } 600345153Sdim __kmp_destroy_nested_futex_lock(lck); 601345153Sdim} 602345153Sdim 603345153Sdim#endif // KMP_USE_FUTEX 604345153Sdim 605345153Sdim/* ------------------------------------------------------------------------ */ 606345153Sdim/* ticket (bakery) locks */ 607345153Sdim 608345153Sdimstatic kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { 609345153Sdim return std::atomic_load_explicit(&lck->lk.owner_id, 610345153Sdim std::memory_order_relaxed) - 611345153Sdim 1; 612345153Sdim} 613345153Sdim 614345153Sdimstatic inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { 615345153Sdim return std::atomic_load_explicit(&lck->lk.depth_locked, 616345153Sdim std::memory_order_relaxed) != -1; 617345153Sdim} 618345153Sdim 619345153Sdimstatic kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { 620345153Sdim return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving, 621345153Sdim std::memory_order_acquire) == my_ticket; 622345153Sdim} 623345153Sdim 624345153Sdim__forceinline static int 625345153Sdim__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, 626345153Sdim kmp_int32 gtid) { 627345153Sdim kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( 628345153Sdim &lck->lk.next_ticket, 1U, std::memory_order_relaxed); 629345153Sdim 630345153Sdim#ifdef USE_LOCK_PROFILE 631345153Sdim if (std::atomic_load_explicit(&lck->lk.now_serving, 632345153Sdim std::memory_order_relaxed) != my_ticket) 633345153Sdim __kmp_printf("LOCK CONTENTION: %p\n", lck); 634345153Sdim/* else __kmp_printf( "." );*/ 635345153Sdim#endif /* USE_LOCK_PROFILE */ 636345153Sdim 637345153Sdim if (std::atomic_load_explicit(&lck->lk.now_serving, 638345153Sdim std::memory_order_acquire) == my_ticket) { 639345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 640345153Sdim } 641353358Sdim KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); 642345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 643345153Sdim} 644345153Sdim 645345153Sdimint __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 646345153Sdim int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); 647345153Sdim ANNOTATE_TICKET_ACQUIRED(lck); 648345153Sdim return retval; 649345153Sdim} 650345153Sdim 651345153Sdimstatic int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 652345153Sdim kmp_int32 gtid) { 653345153Sdim char const *const func = "omp_set_lock"; 654345153Sdim 655345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 656345153Sdim std::memory_order_relaxed)) { 657345153Sdim KMP_FATAL(LockIsUninitialized, func); 658345153Sdim } 659345153Sdim if (lck->lk.self != lck) { 660345153Sdim KMP_FATAL(LockIsUninitialized, func); 661345153Sdim } 662345153Sdim if (__kmp_is_ticket_lock_nestable(lck)) { 663345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 664345153Sdim } 665345153Sdim if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { 666345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 667345153Sdim } 668345153Sdim 669345153Sdim __kmp_acquire_ticket_lock(lck, gtid); 670345153Sdim 671345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 672345153Sdim std::memory_order_relaxed); 673345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 674345153Sdim} 675345153Sdim 676345153Sdimint __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 677345153Sdim kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, 678345153Sdim std::memory_order_relaxed); 679345153Sdim 680345153Sdim if (std::atomic_load_explicit(&lck->lk.now_serving, 681345153Sdim std::memory_order_relaxed) == my_ticket) { 682345153Sdim kmp_uint32 next_ticket = my_ticket + 1; 683345153Sdim if (std::atomic_compare_exchange_strong_explicit( 684345153Sdim &lck->lk.next_ticket, &my_ticket, next_ticket, 685345153Sdim std::memory_order_acquire, std::memory_order_acquire)) { 686345153Sdim return TRUE; 687345153Sdim } 688345153Sdim } 689345153Sdim return FALSE; 690345153Sdim} 691345153Sdim 692345153Sdimstatic int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 693345153Sdim kmp_int32 gtid) { 694345153Sdim char const *const func = "omp_test_lock"; 695345153Sdim 696345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 697345153Sdim std::memory_order_relaxed)) { 698345153Sdim KMP_FATAL(LockIsUninitialized, func); 699345153Sdim } 700345153Sdim if (lck->lk.self != lck) { 701345153Sdim KMP_FATAL(LockIsUninitialized, func); 702345153Sdim } 703345153Sdim if (__kmp_is_ticket_lock_nestable(lck)) { 704345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 705345153Sdim } 706345153Sdim 707345153Sdim int retval = __kmp_test_ticket_lock(lck, gtid); 708345153Sdim 709345153Sdim if (retval) { 710345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 711345153Sdim std::memory_order_relaxed); 712345153Sdim } 713345153Sdim return retval; 714345153Sdim} 715345153Sdim 716345153Sdimint __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 717345153Sdim kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, 718345153Sdim std::memory_order_relaxed) - 719345153Sdim std::atomic_load_explicit(&lck->lk.now_serving, 720345153Sdim std::memory_order_relaxed); 721345153Sdim 722345153Sdim ANNOTATE_TICKET_RELEASED(lck); 723345153Sdim std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, 724345153Sdim std::memory_order_release); 725345153Sdim 726345153Sdim KMP_YIELD(distance > 727345153Sdim (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); 728345153Sdim return KMP_LOCK_RELEASED; 729345153Sdim} 730345153Sdim 731345153Sdimstatic int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 732345153Sdim kmp_int32 gtid) { 733345153Sdim char const *const func = "omp_unset_lock"; 734345153Sdim 735345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 736345153Sdim std::memory_order_relaxed)) { 737345153Sdim KMP_FATAL(LockIsUninitialized, func); 738345153Sdim } 739345153Sdim if (lck->lk.self != lck) { 740345153Sdim KMP_FATAL(LockIsUninitialized, func); 741345153Sdim } 742345153Sdim if (__kmp_is_ticket_lock_nestable(lck)) { 743345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 744345153Sdim } 745345153Sdim if (__kmp_get_ticket_lock_owner(lck) == -1) { 746345153Sdim KMP_FATAL(LockUnsettingFree, func); 747345153Sdim } 748345153Sdim if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && 749345153Sdim (__kmp_get_ticket_lock_owner(lck) != gtid)) { 750345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 751345153Sdim } 752345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 753345153Sdim return __kmp_release_ticket_lock(lck, gtid); 754345153Sdim} 755345153Sdim 756345153Sdimvoid __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { 757345153Sdim lck->lk.location = NULL; 758345153Sdim lck->lk.self = lck; 759345153Sdim std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 760345153Sdim std::memory_order_relaxed); 761345153Sdim std::atomic_store_explicit(&lck->lk.now_serving, 0U, 762345153Sdim std::memory_order_relaxed); 763345153Sdim std::atomic_store_explicit( 764345153Sdim &lck->lk.owner_id, 0, 765345153Sdim std::memory_order_relaxed); // no thread owns the lock. 766345153Sdim std::atomic_store_explicit( 767345153Sdim &lck->lk.depth_locked, -1, 768345153Sdim std::memory_order_relaxed); // -1 => not a nested lock. 769345153Sdim std::atomic_store_explicit(&lck->lk.initialized, true, 770345153Sdim std::memory_order_release); 771345153Sdim} 772345153Sdim 773345153Sdimvoid __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { 774345153Sdim std::atomic_store_explicit(&lck->lk.initialized, false, 775345153Sdim std::memory_order_release); 776345153Sdim lck->lk.self = NULL; 777345153Sdim lck->lk.location = NULL; 778345153Sdim std::atomic_store_explicit(&lck->lk.next_ticket, 0U, 779345153Sdim std::memory_order_relaxed); 780345153Sdim std::atomic_store_explicit(&lck->lk.now_serving, 0U, 781345153Sdim std::memory_order_relaxed); 782345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 783345153Sdim std::atomic_store_explicit(&lck->lk.depth_locked, -1, 784345153Sdim std::memory_order_relaxed); 785345153Sdim} 786345153Sdim 787345153Sdimstatic void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 788345153Sdim char const *const func = "omp_destroy_lock"; 789345153Sdim 790345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 791345153Sdim std::memory_order_relaxed)) { 792345153Sdim KMP_FATAL(LockIsUninitialized, func); 793345153Sdim } 794345153Sdim if (lck->lk.self != lck) { 795345153Sdim KMP_FATAL(LockIsUninitialized, func); 796345153Sdim } 797345153Sdim if (__kmp_is_ticket_lock_nestable(lck)) { 798345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 799345153Sdim } 800345153Sdim if (__kmp_get_ticket_lock_owner(lck) != -1) { 801345153Sdim KMP_FATAL(LockStillOwned, func); 802345153Sdim } 803345153Sdim __kmp_destroy_ticket_lock(lck); 804345153Sdim} 805345153Sdim 806345153Sdim// nested ticket locks 807345153Sdim 808345153Sdimint __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 809345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 810345153Sdim 811345153Sdim if (__kmp_get_ticket_lock_owner(lck) == gtid) { 812345153Sdim std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 813345153Sdim std::memory_order_relaxed); 814345153Sdim return KMP_LOCK_ACQUIRED_NEXT; 815345153Sdim } else { 816345153Sdim __kmp_acquire_ticket_lock_timed_template(lck, gtid); 817345153Sdim ANNOTATE_TICKET_ACQUIRED(lck); 818345153Sdim std::atomic_store_explicit(&lck->lk.depth_locked, 1, 819345153Sdim std::memory_order_relaxed); 820345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 821345153Sdim std::memory_order_relaxed); 822345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 823345153Sdim } 824345153Sdim} 825345153Sdim 826345153Sdimstatic int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 827345153Sdim kmp_int32 gtid) { 828345153Sdim char const *const func = "omp_set_nest_lock"; 829345153Sdim 830345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 831345153Sdim std::memory_order_relaxed)) { 832345153Sdim KMP_FATAL(LockIsUninitialized, func); 833345153Sdim } 834345153Sdim if (lck->lk.self != lck) { 835345153Sdim KMP_FATAL(LockIsUninitialized, func); 836345153Sdim } 837345153Sdim if (!__kmp_is_ticket_lock_nestable(lck)) { 838345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 839345153Sdim } 840345153Sdim return __kmp_acquire_nested_ticket_lock(lck, gtid); 841345153Sdim} 842345153Sdim 843345153Sdimint __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 844345153Sdim int retval; 845345153Sdim 846345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 847345153Sdim 848345153Sdim if (__kmp_get_ticket_lock_owner(lck) == gtid) { 849345153Sdim retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, 850345153Sdim std::memory_order_relaxed) + 851345153Sdim 1; 852345153Sdim } else if (!__kmp_test_ticket_lock(lck, gtid)) { 853345153Sdim retval = 0; 854345153Sdim } else { 855345153Sdim std::atomic_store_explicit(&lck->lk.depth_locked, 1, 856345153Sdim std::memory_order_relaxed); 857345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, 858345153Sdim std::memory_order_relaxed); 859345153Sdim retval = 1; 860345153Sdim } 861345153Sdim return retval; 862345153Sdim} 863345153Sdim 864345153Sdimstatic int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 865345153Sdim kmp_int32 gtid) { 866345153Sdim char const *const func = "omp_test_nest_lock"; 867345153Sdim 868345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 869345153Sdim std::memory_order_relaxed)) { 870345153Sdim KMP_FATAL(LockIsUninitialized, func); 871345153Sdim } 872345153Sdim if (lck->lk.self != lck) { 873345153Sdim KMP_FATAL(LockIsUninitialized, func); 874345153Sdim } 875345153Sdim if (!__kmp_is_ticket_lock_nestable(lck)) { 876345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 877345153Sdim } 878345153Sdim return __kmp_test_nested_ticket_lock(lck, gtid); 879345153Sdim} 880345153Sdim 881345153Sdimint __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { 882345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 883345153Sdim 884345153Sdim if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, 885345153Sdim std::memory_order_relaxed) - 886345153Sdim 1) == 0) { 887345153Sdim std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); 888345153Sdim __kmp_release_ticket_lock(lck, gtid); 889345153Sdim return KMP_LOCK_RELEASED; 890345153Sdim } 891345153Sdim return KMP_LOCK_STILL_HELD; 892345153Sdim} 893345153Sdim 894345153Sdimstatic int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, 895345153Sdim kmp_int32 gtid) { 896345153Sdim char const *const func = "omp_unset_nest_lock"; 897345153Sdim 898345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 899345153Sdim std::memory_order_relaxed)) { 900345153Sdim KMP_FATAL(LockIsUninitialized, func); 901345153Sdim } 902345153Sdim if (lck->lk.self != lck) { 903345153Sdim KMP_FATAL(LockIsUninitialized, func); 904345153Sdim } 905345153Sdim if (!__kmp_is_ticket_lock_nestable(lck)) { 906345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 907345153Sdim } 908345153Sdim if (__kmp_get_ticket_lock_owner(lck) == -1) { 909345153Sdim KMP_FATAL(LockUnsettingFree, func); 910345153Sdim } 911345153Sdim if (__kmp_get_ticket_lock_owner(lck) != gtid) { 912345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 913345153Sdim } 914345153Sdim return __kmp_release_nested_ticket_lock(lck, gtid); 915345153Sdim} 916345153Sdim 917345153Sdimvoid __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { 918345153Sdim __kmp_init_ticket_lock(lck); 919345153Sdim std::atomic_store_explicit(&lck->lk.depth_locked, 0, 920345153Sdim std::memory_order_relaxed); 921345153Sdim // >= 0 for nestable locks, -1 for simple locks 922345153Sdim} 923345153Sdim 924345153Sdimvoid __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { 925345153Sdim __kmp_destroy_ticket_lock(lck); 926345153Sdim std::atomic_store_explicit(&lck->lk.depth_locked, 0, 927345153Sdim std::memory_order_relaxed); 928345153Sdim} 929345153Sdim 930345153Sdimstatic void 931345153Sdim__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 932345153Sdim char const *const func = "omp_destroy_nest_lock"; 933345153Sdim 934345153Sdim if (!std::atomic_load_explicit(&lck->lk.initialized, 935345153Sdim std::memory_order_relaxed)) { 936345153Sdim KMP_FATAL(LockIsUninitialized, func); 937345153Sdim } 938345153Sdim if (lck->lk.self != lck) { 939345153Sdim KMP_FATAL(LockIsUninitialized, func); 940345153Sdim } 941345153Sdim if (!__kmp_is_ticket_lock_nestable(lck)) { 942345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 943345153Sdim } 944345153Sdim if (__kmp_get_ticket_lock_owner(lck) != -1) { 945345153Sdim KMP_FATAL(LockStillOwned, func); 946345153Sdim } 947345153Sdim __kmp_destroy_nested_ticket_lock(lck); 948345153Sdim} 949345153Sdim 950345153Sdim// access functions to fields which don't exist for all lock kinds. 951345153Sdim 952345153Sdimstatic const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { 953345153Sdim return lck->lk.location; 954345153Sdim} 955345153Sdim 956345153Sdimstatic void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, 957345153Sdim const ident_t *loc) { 958345153Sdim lck->lk.location = loc; 959345153Sdim} 960345153Sdim 961345153Sdimstatic kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { 962345153Sdim return lck->lk.flags; 963345153Sdim} 964345153Sdim 965345153Sdimstatic void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, 966345153Sdim kmp_lock_flags_t flags) { 967345153Sdim lck->lk.flags = flags; 968345153Sdim} 969345153Sdim 970345153Sdim/* ------------------------------------------------------------------------ */ 971345153Sdim/* queuing locks */ 972345153Sdim 973345153Sdim/* First the states 974345153Sdim (head,tail) = 0, 0 means lock is unheld, nobody on queue 975345153Sdim UINT_MAX or -1, 0 means lock is held, nobody on queue 976345153Sdim h, h means lock held or about to transition, 977345153Sdim 1 element on queue 978345153Sdim h, t h <> t, means lock is held or about to 979345153Sdim transition, >1 elements on queue 980345153Sdim 981345153Sdim Now the transitions 982345153Sdim Acquire(0,0) = -1 ,0 983345153Sdim Release(0,0) = Error 984345153Sdim Acquire(-1,0) = h ,h h > 0 985345153Sdim Release(-1,0) = 0 ,0 986345153Sdim Acquire(h,h) = h ,t h > 0, t > 0, h <> t 987345153Sdim Release(h,h) = -1 ,0 h > 0 988345153Sdim Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' 989345153Sdim Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t 990345153Sdim 991345153Sdim And pictorially 992345153Sdim 993345153Sdim +-----+ 994345153Sdim | 0, 0|------- release -------> Error 995345153Sdim +-----+ 996345153Sdim | ^ 997345153Sdim acquire| |release 998345153Sdim | | 999345153Sdim | | 1000345153Sdim v | 1001345153Sdim +-----+ 1002345153Sdim |-1, 0| 1003345153Sdim +-----+ 1004345153Sdim | ^ 1005345153Sdim acquire| |release 1006345153Sdim | | 1007345153Sdim | | 1008345153Sdim v | 1009345153Sdim +-----+ 1010345153Sdim | h, h| 1011345153Sdim +-----+ 1012345153Sdim | ^ 1013345153Sdim acquire| |release 1014345153Sdim | | 1015345153Sdim | | 1016345153Sdim v | 1017345153Sdim +-----+ 1018345153Sdim | h, t|----- acquire, release loopback ---+ 1019345153Sdim +-----+ | 1020345153Sdim ^ | 1021345153Sdim | | 1022345153Sdim +------------------------------------+ 1023345153Sdim */ 1024345153Sdim 1025345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1026345153Sdim 1027345153Sdim/* Stuff for circular trace buffer */ 1028345153Sdim#define TRACE_BUF_ELE 1024 1029345153Sdimstatic char traces[TRACE_BUF_ELE][128] = {0}; 1030345153Sdimstatic int tc = 0; 1031345153Sdim#define TRACE_LOCK(X, Y) \ 1032345153Sdim KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); 1033345153Sdim#define TRACE_LOCK_T(X, Y, Z) \ 1034345153Sdim KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); 1035345153Sdim#define TRACE_LOCK_HT(X, Y, Z, Q) \ 1036345153Sdim KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ 1037345153Sdim Z, Q); 1038345153Sdim 1039345153Sdimstatic void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, 1040345153Sdim kmp_queuing_lock_t *lck, kmp_int32 head_id, 1041345153Sdim kmp_int32 tail_id) { 1042345153Sdim kmp_int32 t, i; 1043345153Sdim 1044345153Sdim __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); 1045345153Sdim 1046345153Sdim i = tc % TRACE_BUF_ELE; 1047345153Sdim __kmp_printf_no_lock("%s\n", traces[i]); 1048345153Sdim i = (i + 1) % TRACE_BUF_ELE; 1049345153Sdim while (i != (tc % TRACE_BUF_ELE)) { 1050345153Sdim __kmp_printf_no_lock("%s", traces[i]); 1051345153Sdim i = (i + 1) % TRACE_BUF_ELE; 1052345153Sdim } 1053345153Sdim __kmp_printf_no_lock("\n"); 1054345153Sdim 1055345153Sdim __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " 1056345153Sdim "next_wait:%d, head_id:%d, tail_id:%d\n", 1057345153Sdim gtid + 1, this_thr->th.th_spin_here, 1058345153Sdim this_thr->th.th_next_waiting, head_id, tail_id); 1059345153Sdim 1060345153Sdim __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); 1061345153Sdim 1062345153Sdim if (lck->lk.head_id >= 1) { 1063345153Sdim t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; 1064345153Sdim while (t > 0) { 1065345153Sdim __kmp_printf_no_lock("-> %d ", t); 1066345153Sdim t = __kmp_threads[t - 1]->th.th_next_waiting; 1067345153Sdim } 1068345153Sdim } 1069345153Sdim __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); 1070345153Sdim __kmp_printf_no_lock("\n\n"); 1071345153Sdim} 1072345153Sdim 1073345153Sdim#endif /* DEBUG_QUEUING_LOCKS */ 1074345153Sdim 1075345153Sdimstatic kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { 1076345153Sdim return TCR_4(lck->lk.owner_id) - 1; 1077345153Sdim} 1078345153Sdim 1079345153Sdimstatic inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { 1080345153Sdim return lck->lk.depth_locked != -1; 1081345153Sdim} 1082345153Sdim 1083345153Sdim/* Acquire a lock using a the queuing lock implementation */ 1084345153Sdimtemplate <bool takeTime> 1085345153Sdim/* [TLW] The unused template above is left behind because of what BEB believes 1086345153Sdim is a potential compiler problem with __forceinline. */ 1087345153Sdim__forceinline static int 1088345153Sdim__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, 1089345153Sdim kmp_int32 gtid) { 1090345153Sdim kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); 1091345153Sdim volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1092345153Sdim volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1093345153Sdim volatile kmp_uint32 *spin_here_p; 1094345153Sdim kmp_int32 need_mf = 1; 1095345153Sdim 1096345153Sdim#if OMPT_SUPPORT 1097345153Sdim ompt_state_t prev_state = ompt_state_undefined; 1098345153Sdim#endif 1099345153Sdim 1100345153Sdim KA_TRACE(1000, 1101345153Sdim ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1102345153Sdim 1103345153Sdim KMP_FSYNC_PREPARE(lck); 1104345153Sdim KMP_DEBUG_ASSERT(this_thr != NULL); 1105345153Sdim spin_here_p = &this_thr->th.th_spin_here; 1106345153Sdim 1107345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1108345153Sdim TRACE_LOCK(gtid + 1, "acq ent"); 1109345153Sdim if (*spin_here_p) 1110345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1111345153Sdim if (this_thr->th.th_next_waiting != 0) 1112345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1113345153Sdim#endif 1114345153Sdim KMP_DEBUG_ASSERT(!*spin_here_p); 1115345153Sdim KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1116345153Sdim 1117345153Sdim /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to 1118345153Sdim head_id_p that may follow, not just in execution order, but also in 1119345153Sdim visibility order. This way, when a releasing thread observes the changes to 1120345153Sdim the queue by this thread, it can rightly assume that spin_here_p has 1121345153Sdim already been set to TRUE, so that when it sets spin_here_p to FALSE, it is 1122345153Sdim not premature. If the releasing thread sets spin_here_p to FALSE before 1123345153Sdim this thread sets it to TRUE, this thread will hang. */ 1124345153Sdim *spin_here_p = TRUE; /* before enqueuing to prevent race */ 1125345153Sdim 1126345153Sdim while (1) { 1127345153Sdim kmp_int32 enqueued; 1128345153Sdim kmp_int32 head; 1129345153Sdim kmp_int32 tail; 1130345153Sdim 1131345153Sdim head = *head_id_p; 1132345153Sdim 1133345153Sdim switch (head) { 1134345153Sdim 1135345153Sdim case -1: { 1136345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1137345153Sdim tail = *tail_id_p; 1138345153Sdim TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1139345153Sdim#endif 1140345153Sdim tail = 0; /* to make sure next link asynchronously read is not set 1141345153Sdim accidentally; this assignment prevents us from entering the 1142345153Sdim if ( t > 0 ) condition in the enqueued case below, which is not 1143345153Sdim necessary for this state transition */ 1144345153Sdim 1145345153Sdim need_mf = 0; 1146345153Sdim /* try (-1,0)->(tid,tid) */ 1147345153Sdim enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, 1148345153Sdim KMP_PACK_64(-1, 0), 1149345153Sdim KMP_PACK_64(gtid + 1, gtid + 1)); 1150345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1151345153Sdim if (enqueued) 1152345153Sdim TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); 1153345153Sdim#endif 1154345153Sdim } break; 1155345153Sdim 1156345153Sdim default: { 1157345153Sdim tail = *tail_id_p; 1158345153Sdim KMP_DEBUG_ASSERT(tail != gtid + 1); 1159345153Sdim 1160345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1161345153Sdim TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1162345153Sdim#endif 1163345153Sdim 1164345153Sdim if (tail == 0) { 1165345153Sdim enqueued = FALSE; 1166345153Sdim } else { 1167345153Sdim need_mf = 0; 1168345153Sdim /* try (h,t) or (h,h)->(h,tid) */ 1169345153Sdim enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); 1170345153Sdim 1171345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1172345153Sdim if (enqueued) 1173345153Sdim TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); 1174345153Sdim#endif 1175345153Sdim } 1176345153Sdim } break; 1177345153Sdim 1178345153Sdim case 0: /* empty queue */ 1179345153Sdim { 1180345153Sdim kmp_int32 grabbed_lock; 1181345153Sdim 1182345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1183345153Sdim tail = *tail_id_p; 1184345153Sdim TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); 1185345153Sdim#endif 1186345153Sdim /* try (0,0)->(-1,0) */ 1187345153Sdim 1188345153Sdim /* only legal transition out of head = 0 is head = -1 with no change to 1189345153Sdim * tail */ 1190345153Sdim grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); 1191345153Sdim 1192345153Sdim if (grabbed_lock) { 1193345153Sdim 1194345153Sdim *spin_here_p = FALSE; 1195345153Sdim 1196345153Sdim KA_TRACE( 1197345153Sdim 1000, 1198345153Sdim ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", 1199345153Sdim lck, gtid)); 1200345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1201345153Sdim TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); 1202345153Sdim#endif 1203345153Sdim 1204345153Sdim#if OMPT_SUPPORT 1205345153Sdim if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { 1206345153Sdim /* change the state before clearing wait_id */ 1207345153Sdim this_thr->th.ompt_thread_info.state = prev_state; 1208345153Sdim this_thr->th.ompt_thread_info.wait_id = 0; 1209345153Sdim } 1210345153Sdim#endif 1211345153Sdim 1212345153Sdim KMP_FSYNC_ACQUIRED(lck); 1213345153Sdim return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ 1214345153Sdim } 1215345153Sdim enqueued = FALSE; 1216345153Sdim } break; 1217345153Sdim } 1218345153Sdim 1219345153Sdim#if OMPT_SUPPORT 1220345153Sdim if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { 1221345153Sdim /* this thread will spin; set wait_id before entering wait state */ 1222345153Sdim prev_state = this_thr->th.ompt_thread_info.state; 1223345153Sdim this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; 1224345153Sdim this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; 1225345153Sdim } 1226345153Sdim#endif 1227345153Sdim 1228345153Sdim if (enqueued) { 1229345153Sdim if (tail > 0) { 1230345153Sdim kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); 1231345153Sdim KMP_ASSERT(tail_thr != NULL); 1232345153Sdim tail_thr->th.th_next_waiting = gtid + 1; 1233345153Sdim /* corresponding wait for this write in release code */ 1234345153Sdim } 1235345153Sdim KA_TRACE(1000, 1236345153Sdim ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", 1237345153Sdim lck, gtid)); 1238345153Sdim 1239345153Sdim KMP_MB(); 1240353358Sdim // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf 1241353358Sdim KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck); 1242345153Sdim 1243345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1244345153Sdim TRACE_LOCK(gtid + 1, "acq spin"); 1245345153Sdim 1246345153Sdim if (this_thr->th.th_next_waiting != 0) 1247345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1248345153Sdim#endif 1249345153Sdim KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1250345153Sdim KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " 1251345153Sdim "waiting on queue\n", 1252345153Sdim lck, gtid)); 1253345153Sdim 1254345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1255345153Sdim TRACE_LOCK(gtid + 1, "acq exit 2"); 1256345153Sdim#endif 1257345153Sdim 1258345153Sdim#if OMPT_SUPPORT 1259345153Sdim /* change the state before clearing wait_id */ 1260345153Sdim this_thr->th.ompt_thread_info.state = prev_state; 1261345153Sdim this_thr->th.ompt_thread_info.wait_id = 0; 1262345153Sdim#endif 1263345153Sdim 1264345153Sdim /* got lock, we were dequeued by the thread that released lock */ 1265345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 1266345153Sdim } 1267345153Sdim 1268345153Sdim /* Yield if number of threads > number of logical processors */ 1269345153Sdim /* ToDo: Not sure why this should only be in oversubscription case, 1270345153Sdim maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ 1271353358Sdim KMP_YIELD_OVERSUB(); 1272353358Sdim 1273345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1274345153Sdim TRACE_LOCK(gtid + 1, "acq retry"); 1275345153Sdim#endif 1276345153Sdim } 1277345153Sdim KMP_ASSERT2(0, "should not get here"); 1278345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 1279345153Sdim} 1280345153Sdim 1281345153Sdimint __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1282345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1283345153Sdim 1284345153Sdim int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1285345153Sdim ANNOTATE_QUEUING_ACQUIRED(lck); 1286345153Sdim return retval; 1287345153Sdim} 1288345153Sdim 1289345153Sdimstatic int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1290345153Sdim kmp_int32 gtid) { 1291345153Sdim char const *const func = "omp_set_lock"; 1292345153Sdim if (lck->lk.initialized != lck) { 1293345153Sdim KMP_FATAL(LockIsUninitialized, func); 1294345153Sdim } 1295345153Sdim if (__kmp_is_queuing_lock_nestable(lck)) { 1296345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 1297345153Sdim } 1298345153Sdim if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1299345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 1300345153Sdim } 1301345153Sdim 1302345153Sdim __kmp_acquire_queuing_lock(lck, gtid); 1303345153Sdim 1304345153Sdim lck->lk.owner_id = gtid + 1; 1305345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 1306345153Sdim} 1307345153Sdim 1308345153Sdimint __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1309345153Sdim volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1310345153Sdim kmp_int32 head; 1311345153Sdim#ifdef KMP_DEBUG 1312345153Sdim kmp_info_t *this_thr; 1313345153Sdim#endif 1314345153Sdim 1315345153Sdim KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); 1316345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1317345153Sdim#ifdef KMP_DEBUG 1318345153Sdim this_thr = __kmp_thread_from_gtid(gtid); 1319345153Sdim KMP_DEBUG_ASSERT(this_thr != NULL); 1320345153Sdim KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1321345153Sdim#endif 1322345153Sdim 1323345153Sdim head = *head_id_p; 1324345153Sdim 1325345153Sdim if (head == 0) { /* nobody on queue, nobody holding */ 1326345153Sdim /* try (0,0)->(-1,0) */ 1327345153Sdim if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { 1328345153Sdim KA_TRACE(1000, 1329345153Sdim ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); 1330345153Sdim KMP_FSYNC_ACQUIRED(lck); 1331345153Sdim ANNOTATE_QUEUING_ACQUIRED(lck); 1332345153Sdim return TRUE; 1333345153Sdim } 1334345153Sdim } 1335345153Sdim 1336345153Sdim KA_TRACE(1000, 1337345153Sdim ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); 1338345153Sdim return FALSE; 1339345153Sdim} 1340345153Sdim 1341345153Sdimstatic int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1342345153Sdim kmp_int32 gtid) { 1343345153Sdim char const *const func = "omp_test_lock"; 1344345153Sdim if (lck->lk.initialized != lck) { 1345345153Sdim KMP_FATAL(LockIsUninitialized, func); 1346345153Sdim } 1347345153Sdim if (__kmp_is_queuing_lock_nestable(lck)) { 1348345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 1349345153Sdim } 1350345153Sdim 1351345153Sdim int retval = __kmp_test_queuing_lock(lck, gtid); 1352345153Sdim 1353345153Sdim if (retval) { 1354345153Sdim lck->lk.owner_id = gtid + 1; 1355345153Sdim } 1356345153Sdim return retval; 1357345153Sdim} 1358345153Sdim 1359345153Sdimint __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1360345153Sdim kmp_info_t *this_thr; 1361345153Sdim volatile kmp_int32 *head_id_p = &lck->lk.head_id; 1362345153Sdim volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; 1363345153Sdim 1364345153Sdim KA_TRACE(1000, 1365345153Sdim ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); 1366345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1367345153Sdim this_thr = __kmp_thread_from_gtid(gtid); 1368345153Sdim KMP_DEBUG_ASSERT(this_thr != NULL); 1369345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1370345153Sdim TRACE_LOCK(gtid + 1, "rel ent"); 1371345153Sdim 1372345153Sdim if (this_thr->th.th_spin_here) 1373345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1374345153Sdim if (this_thr->th.th_next_waiting != 0) 1375345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); 1376345153Sdim#endif 1377345153Sdim KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); 1378345153Sdim KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); 1379345153Sdim 1380345153Sdim KMP_FSYNC_RELEASING(lck); 1381345153Sdim ANNOTATE_QUEUING_RELEASED(lck); 1382345153Sdim 1383345153Sdim while (1) { 1384345153Sdim kmp_int32 dequeued; 1385345153Sdim kmp_int32 head; 1386345153Sdim kmp_int32 tail; 1387345153Sdim 1388345153Sdim head = *head_id_p; 1389345153Sdim 1390345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1391345153Sdim tail = *tail_id_p; 1392345153Sdim TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); 1393345153Sdim if (head == 0) 1394345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1395345153Sdim#endif 1396345153Sdim KMP_DEBUG_ASSERT(head != 1397345153Sdim 0); /* holding the lock, head must be -1 or queue head */ 1398345153Sdim 1399345153Sdim if (head == -1) { /* nobody on queue */ 1400345153Sdim /* try (-1,0)->(0,0) */ 1401345153Sdim if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { 1402345153Sdim KA_TRACE( 1403345153Sdim 1000, 1404345153Sdim ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", 1405345153Sdim lck, gtid)); 1406345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1407345153Sdim TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); 1408345153Sdim#endif 1409345153Sdim 1410345153Sdim#if OMPT_SUPPORT 1411345153Sdim/* nothing to do - no other thread is trying to shift blame */ 1412345153Sdim#endif 1413345153Sdim return KMP_LOCK_RELEASED; 1414345153Sdim } 1415345153Sdim dequeued = FALSE; 1416345153Sdim } else { 1417345153Sdim KMP_MB(); 1418345153Sdim tail = *tail_id_p; 1419345153Sdim if (head == tail) { /* only one thread on the queue */ 1420345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1421345153Sdim if (head <= 0) 1422345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1423345153Sdim#endif 1424345153Sdim KMP_DEBUG_ASSERT(head > 0); 1425345153Sdim 1426345153Sdim /* try (h,h)->(-1,0) */ 1427345153Sdim dequeued = KMP_COMPARE_AND_STORE_REL64( 1428345153Sdim RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), 1429345153Sdim KMP_PACK_64(-1, 0)); 1430345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1431345153Sdim TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); 1432345153Sdim#endif 1433345153Sdim 1434345153Sdim } else { 1435345153Sdim volatile kmp_int32 *waiting_id_p; 1436345153Sdim kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1437345153Sdim KMP_DEBUG_ASSERT(head_thr != NULL); 1438345153Sdim waiting_id_p = &head_thr->th.th_next_waiting; 1439345153Sdim 1440345153Sdim/* Does this require synchronous reads? */ 1441345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1442345153Sdim if (head <= 0 || tail <= 0) 1443345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1444345153Sdim#endif 1445345153Sdim KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1446345153Sdim 1447345153Sdim /* try (h,t)->(h',t) or (t,t) */ 1448345153Sdim KMP_MB(); 1449345153Sdim /* make sure enqueuing thread has time to update next waiting thread 1450345153Sdim * field */ 1451353358Sdim *head_id_p = 1452353358Sdim KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL); 1453345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1454345153Sdim TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); 1455345153Sdim#endif 1456345153Sdim dequeued = TRUE; 1457345153Sdim } 1458345153Sdim } 1459345153Sdim 1460345153Sdim if (dequeued) { 1461345153Sdim kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); 1462345153Sdim KMP_DEBUG_ASSERT(head_thr != NULL); 1463345153Sdim 1464345153Sdim/* Does this require synchronous reads? */ 1465345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1466345153Sdim if (head <= 0 || tail <= 0) 1467345153Sdim __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); 1468345153Sdim#endif 1469345153Sdim KMP_DEBUG_ASSERT(head > 0 && tail > 0); 1470345153Sdim 1471345153Sdim /* For clean code only. Thread not released until next statement prevents 1472345153Sdim race with acquire code. */ 1473345153Sdim head_thr->th.th_next_waiting = 0; 1474345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1475345153Sdim TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); 1476345153Sdim#endif 1477345153Sdim 1478345153Sdim KMP_MB(); 1479345153Sdim /* reset spin value */ 1480345153Sdim head_thr->th.th_spin_here = FALSE; 1481345153Sdim 1482345153Sdim KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " 1483345153Sdim "dequeuing\n", 1484345153Sdim lck, gtid)); 1485345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1486345153Sdim TRACE_LOCK(gtid + 1, "rel exit 2"); 1487345153Sdim#endif 1488345153Sdim return KMP_LOCK_RELEASED; 1489345153Sdim } 1490345153Sdim/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring 1491345153Sdim threads */ 1492345153Sdim 1493345153Sdim#ifdef DEBUG_QUEUING_LOCKS 1494345153Sdim TRACE_LOCK(gtid + 1, "rel retry"); 1495345153Sdim#endif 1496345153Sdim 1497345153Sdim } /* while */ 1498345153Sdim KMP_ASSERT2(0, "should not get here"); 1499345153Sdim return KMP_LOCK_RELEASED; 1500345153Sdim} 1501345153Sdim 1502345153Sdimstatic int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1503345153Sdim kmp_int32 gtid) { 1504345153Sdim char const *const func = "omp_unset_lock"; 1505345153Sdim KMP_MB(); /* in case another processor initialized lock */ 1506345153Sdim if (lck->lk.initialized != lck) { 1507345153Sdim KMP_FATAL(LockIsUninitialized, func); 1508345153Sdim } 1509345153Sdim if (__kmp_is_queuing_lock_nestable(lck)) { 1510345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 1511345153Sdim } 1512345153Sdim if (__kmp_get_queuing_lock_owner(lck) == -1) { 1513345153Sdim KMP_FATAL(LockUnsettingFree, func); 1514345153Sdim } 1515345153Sdim if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1516345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 1517345153Sdim } 1518345153Sdim lck->lk.owner_id = 0; 1519345153Sdim return __kmp_release_queuing_lock(lck, gtid); 1520345153Sdim} 1521345153Sdim 1522345153Sdimvoid __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { 1523345153Sdim lck->lk.location = NULL; 1524345153Sdim lck->lk.head_id = 0; 1525345153Sdim lck->lk.tail_id = 0; 1526345153Sdim lck->lk.next_ticket = 0; 1527345153Sdim lck->lk.now_serving = 0; 1528345153Sdim lck->lk.owner_id = 0; // no thread owns the lock. 1529345153Sdim lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 1530345153Sdim lck->lk.initialized = lck; 1531345153Sdim 1532345153Sdim KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); 1533345153Sdim} 1534345153Sdim 1535345153Sdimvoid __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { 1536345153Sdim lck->lk.initialized = NULL; 1537345153Sdim lck->lk.location = NULL; 1538345153Sdim lck->lk.head_id = 0; 1539345153Sdim lck->lk.tail_id = 0; 1540345153Sdim lck->lk.next_ticket = 0; 1541345153Sdim lck->lk.now_serving = 0; 1542345153Sdim lck->lk.owner_id = 0; 1543345153Sdim lck->lk.depth_locked = -1; 1544345153Sdim} 1545345153Sdim 1546345153Sdimstatic void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1547345153Sdim char const *const func = "omp_destroy_lock"; 1548345153Sdim if (lck->lk.initialized != lck) { 1549345153Sdim KMP_FATAL(LockIsUninitialized, func); 1550345153Sdim } 1551345153Sdim if (__kmp_is_queuing_lock_nestable(lck)) { 1552345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 1553345153Sdim } 1554345153Sdim if (__kmp_get_queuing_lock_owner(lck) != -1) { 1555345153Sdim KMP_FATAL(LockStillOwned, func); 1556345153Sdim } 1557345153Sdim __kmp_destroy_queuing_lock(lck); 1558345153Sdim} 1559345153Sdim 1560345153Sdim// nested queuing locks 1561345153Sdim 1562345153Sdimint __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1563345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1564345153Sdim 1565345153Sdim if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1566345153Sdim lck->lk.depth_locked += 1; 1567345153Sdim return KMP_LOCK_ACQUIRED_NEXT; 1568345153Sdim } else { 1569345153Sdim __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid); 1570345153Sdim ANNOTATE_QUEUING_ACQUIRED(lck); 1571345153Sdim KMP_MB(); 1572345153Sdim lck->lk.depth_locked = 1; 1573345153Sdim KMP_MB(); 1574345153Sdim lck->lk.owner_id = gtid + 1; 1575345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 1576345153Sdim } 1577345153Sdim} 1578345153Sdim 1579345153Sdimstatic int 1580345153Sdim__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1581345153Sdim kmp_int32 gtid) { 1582345153Sdim char const *const func = "omp_set_nest_lock"; 1583345153Sdim if (lck->lk.initialized != lck) { 1584345153Sdim KMP_FATAL(LockIsUninitialized, func); 1585345153Sdim } 1586345153Sdim if (!__kmp_is_queuing_lock_nestable(lck)) { 1587345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 1588345153Sdim } 1589345153Sdim return __kmp_acquire_nested_queuing_lock(lck, gtid); 1590345153Sdim} 1591345153Sdim 1592345153Sdimint __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1593345153Sdim int retval; 1594345153Sdim 1595345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1596345153Sdim 1597345153Sdim if (__kmp_get_queuing_lock_owner(lck) == gtid) { 1598345153Sdim retval = ++lck->lk.depth_locked; 1599345153Sdim } else if (!__kmp_test_queuing_lock(lck, gtid)) { 1600345153Sdim retval = 0; 1601345153Sdim } else { 1602345153Sdim KMP_MB(); 1603345153Sdim retval = lck->lk.depth_locked = 1; 1604345153Sdim KMP_MB(); 1605345153Sdim lck->lk.owner_id = gtid + 1; 1606345153Sdim } 1607345153Sdim return retval; 1608345153Sdim} 1609345153Sdim 1610345153Sdimstatic int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1611345153Sdim kmp_int32 gtid) { 1612345153Sdim char const *const func = "omp_test_nest_lock"; 1613345153Sdim if (lck->lk.initialized != lck) { 1614345153Sdim KMP_FATAL(LockIsUninitialized, func); 1615345153Sdim } 1616345153Sdim if (!__kmp_is_queuing_lock_nestable(lck)) { 1617345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 1618345153Sdim } 1619345153Sdim return __kmp_test_nested_queuing_lock(lck, gtid); 1620345153Sdim} 1621345153Sdim 1622345153Sdimint __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 1623345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 1624345153Sdim 1625345153Sdim KMP_MB(); 1626345153Sdim if (--(lck->lk.depth_locked) == 0) { 1627345153Sdim KMP_MB(); 1628345153Sdim lck->lk.owner_id = 0; 1629345153Sdim __kmp_release_queuing_lock(lck, gtid); 1630345153Sdim return KMP_LOCK_RELEASED; 1631345153Sdim } 1632345153Sdim return KMP_LOCK_STILL_HELD; 1633345153Sdim} 1634345153Sdim 1635345153Sdimstatic int 1636345153Sdim__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, 1637345153Sdim kmp_int32 gtid) { 1638345153Sdim char const *const func = "omp_unset_nest_lock"; 1639345153Sdim KMP_MB(); /* in case another processor initialized lock */ 1640345153Sdim if (lck->lk.initialized != lck) { 1641345153Sdim KMP_FATAL(LockIsUninitialized, func); 1642345153Sdim } 1643345153Sdim if (!__kmp_is_queuing_lock_nestable(lck)) { 1644345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 1645345153Sdim } 1646345153Sdim if (__kmp_get_queuing_lock_owner(lck) == -1) { 1647345153Sdim KMP_FATAL(LockUnsettingFree, func); 1648345153Sdim } 1649345153Sdim if (__kmp_get_queuing_lock_owner(lck) != gtid) { 1650345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 1651345153Sdim } 1652345153Sdim return __kmp_release_nested_queuing_lock(lck, gtid); 1653345153Sdim} 1654345153Sdim 1655345153Sdimvoid __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1656345153Sdim __kmp_init_queuing_lock(lck); 1657345153Sdim lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 1658345153Sdim} 1659345153Sdim 1660345153Sdimvoid __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { 1661345153Sdim __kmp_destroy_queuing_lock(lck); 1662345153Sdim lck->lk.depth_locked = 0; 1663345153Sdim} 1664345153Sdim 1665345153Sdimstatic void 1666345153Sdim__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 1667345153Sdim char const *const func = "omp_destroy_nest_lock"; 1668345153Sdim if (lck->lk.initialized != lck) { 1669345153Sdim KMP_FATAL(LockIsUninitialized, func); 1670345153Sdim } 1671345153Sdim if (!__kmp_is_queuing_lock_nestable(lck)) { 1672345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 1673345153Sdim } 1674345153Sdim if (__kmp_get_queuing_lock_owner(lck) != -1) { 1675345153Sdim KMP_FATAL(LockStillOwned, func); 1676345153Sdim } 1677345153Sdim __kmp_destroy_nested_queuing_lock(lck); 1678345153Sdim} 1679345153Sdim 1680345153Sdim// access functions to fields which don't exist for all lock kinds. 1681345153Sdim 1682345153Sdimstatic const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { 1683345153Sdim return lck->lk.location; 1684345153Sdim} 1685345153Sdim 1686345153Sdimstatic void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, 1687345153Sdim const ident_t *loc) { 1688345153Sdim lck->lk.location = loc; 1689345153Sdim} 1690345153Sdim 1691345153Sdimstatic kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { 1692345153Sdim return lck->lk.flags; 1693345153Sdim} 1694345153Sdim 1695345153Sdimstatic void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, 1696345153Sdim kmp_lock_flags_t flags) { 1697345153Sdim lck->lk.flags = flags; 1698345153Sdim} 1699345153Sdim 1700345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 1701345153Sdim 1702345153Sdim/* RTM Adaptive locks */ 1703345153Sdim 1704345153Sdim#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ 1705345153Sdim (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ 1706345153Sdim (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) 1707345153Sdim 1708345153Sdim#include <immintrin.h> 1709345153Sdim#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1710345153Sdim 1711345153Sdim#else 1712345153Sdim 1713345153Sdim// Values from the status register after failed speculation. 1714345153Sdim#define _XBEGIN_STARTED (~0u) 1715345153Sdim#define _XABORT_EXPLICIT (1 << 0) 1716345153Sdim#define _XABORT_RETRY (1 << 1) 1717345153Sdim#define _XABORT_CONFLICT (1 << 2) 1718345153Sdim#define _XABORT_CAPACITY (1 << 3) 1719345153Sdim#define _XABORT_DEBUG (1 << 4) 1720345153Sdim#define _XABORT_NESTED (1 << 5) 1721345153Sdim#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) 1722345153Sdim 1723345153Sdim// Aborts for which it's worth trying again immediately 1724345153Sdim#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) 1725345153Sdim 1726345153Sdim#define STRINGIZE_INTERNAL(arg) #arg 1727345153Sdim#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) 1728345153Sdim 1729345153Sdim// Access to RTM instructions 1730345153Sdim/*A version of XBegin which returns -1 on speculation, and the value of EAX on 1731345153Sdim an abort. This is the same definition as the compiler intrinsic that will be 1732345153Sdim supported at some point. */ 1733345153Sdimstatic __inline int _xbegin() { 1734345153Sdim int res = -1; 1735345153Sdim 1736345153Sdim#if KMP_OS_WINDOWS 1737345153Sdim#if KMP_ARCH_X86_64 1738345153Sdim _asm { 1739345153Sdim _emit 0xC7 1740345153Sdim _emit 0xF8 1741345153Sdim _emit 2 1742345153Sdim _emit 0 1743345153Sdim _emit 0 1744345153Sdim _emit 0 1745345153Sdim jmp L2 1746345153Sdim mov res, eax 1747345153Sdim L2: 1748345153Sdim } 1749345153Sdim#else /* IA32 */ 1750345153Sdim _asm { 1751345153Sdim _emit 0xC7 1752345153Sdim _emit 0xF8 1753345153Sdim _emit 2 1754345153Sdim _emit 0 1755345153Sdim _emit 0 1756345153Sdim _emit 0 1757345153Sdim jmp L2 1758345153Sdim mov res, eax 1759345153Sdim L2: 1760345153Sdim } 1761345153Sdim#endif // KMP_ARCH_X86_64 1762345153Sdim#else 1763345153Sdim /* Note that %eax must be noted as killed (clobbered), because the XSR is 1764345153Sdim returned in %eax(%rax) on abort. Other register values are restored, so 1765345153Sdim don't need to be killed. 1766345153Sdim 1767345153Sdim We must also mark 'res' as an input and an output, since otherwise 1768345153Sdim 'res=-1' may be dropped as being dead, whereas we do need the assignment on 1769345153Sdim the successful (i.e., non-abort) path. */ 1770345153Sdim __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" 1771345153Sdim " .long 1f-1b-6\n" 1772345153Sdim " jmp 2f\n" 1773345153Sdim "1: movl %%eax,%0\n" 1774345153Sdim "2:" 1775345153Sdim : "+r"(res)::"memory", "%eax"); 1776345153Sdim#endif // KMP_OS_WINDOWS 1777345153Sdim return res; 1778345153Sdim} 1779345153Sdim 1780345153Sdim/* Transaction end */ 1781345153Sdimstatic __inline void _xend() { 1782345153Sdim#if KMP_OS_WINDOWS 1783345153Sdim __asm { 1784345153Sdim _emit 0x0f 1785345153Sdim _emit 0x01 1786345153Sdim _emit 0xd5 1787345153Sdim } 1788345153Sdim#else 1789345153Sdim __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); 1790345153Sdim#endif 1791345153Sdim} 1792345153Sdim 1793345153Sdim/* This is a macro, the argument must be a single byte constant which can be 1794345153Sdim evaluated by the inline assembler, since it is emitted as a byte into the 1795345153Sdim assembly code. */ 1796345153Sdim// clang-format off 1797345153Sdim#if KMP_OS_WINDOWS 1798345153Sdim#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG 1799345153Sdim#else 1800345153Sdim#define _xabort(ARG) \ 1801345153Sdim __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); 1802345153Sdim#endif 1803345153Sdim// clang-format on 1804345153Sdim#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 1805345153Sdim 1806345153Sdim// Statistics is collected for testing purpose 1807345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS 1808345153Sdim 1809345153Sdim// We accumulate speculative lock statistics when the lock is destroyed. We 1810345153Sdim// keep locks that haven't been destroyed in the liveLocks list so that we can 1811345153Sdim// grab their statistics too. 1812345153Sdimstatic kmp_adaptive_lock_statistics_t destroyedStats; 1813345153Sdim 1814345153Sdim// To hold the list of live locks. 1815345153Sdimstatic kmp_adaptive_lock_info_t liveLocks; 1816345153Sdim 1817345153Sdim// A lock so we can safely update the list of locks. 1818345153Sdimstatic kmp_bootstrap_lock_t chain_lock = 1819345153Sdim KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock); 1820345153Sdim 1821345153Sdim// Initialize the list of stats. 1822345153Sdimvoid __kmp_init_speculative_stats() { 1823345153Sdim kmp_adaptive_lock_info_t *lck = &liveLocks; 1824345153Sdim 1825345153Sdim memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0, 1826345153Sdim sizeof(lck->stats)); 1827345153Sdim lck->stats.next = lck; 1828345153Sdim lck->stats.prev = lck; 1829345153Sdim 1830345153Sdim KMP_ASSERT(lck->stats.next->stats.prev == lck); 1831345153Sdim KMP_ASSERT(lck->stats.prev->stats.next == lck); 1832345153Sdim 1833345153Sdim __kmp_init_bootstrap_lock(&chain_lock); 1834345153Sdim} 1835345153Sdim 1836345153Sdim// Insert the lock into the circular list 1837345153Sdimstatic void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { 1838345153Sdim __kmp_acquire_bootstrap_lock(&chain_lock); 1839345153Sdim 1840345153Sdim lck->stats.next = liveLocks.stats.next; 1841345153Sdim lck->stats.prev = &liveLocks; 1842345153Sdim 1843345153Sdim liveLocks.stats.next = lck; 1844345153Sdim lck->stats.next->stats.prev = lck; 1845345153Sdim 1846345153Sdim KMP_ASSERT(lck->stats.next->stats.prev == lck); 1847345153Sdim KMP_ASSERT(lck->stats.prev->stats.next == lck); 1848345153Sdim 1849345153Sdim __kmp_release_bootstrap_lock(&chain_lock); 1850345153Sdim} 1851345153Sdim 1852345153Sdimstatic void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { 1853345153Sdim KMP_ASSERT(lck->stats.next->stats.prev == lck); 1854345153Sdim KMP_ASSERT(lck->stats.prev->stats.next == lck); 1855345153Sdim 1856345153Sdim kmp_adaptive_lock_info_t *n = lck->stats.next; 1857345153Sdim kmp_adaptive_lock_info_t *p = lck->stats.prev; 1858345153Sdim 1859345153Sdim n->stats.prev = p; 1860345153Sdim p->stats.next = n; 1861345153Sdim} 1862345153Sdim 1863345153Sdimstatic void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1864345153Sdim memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0, 1865345153Sdim sizeof(lck->stats)); 1866345153Sdim __kmp_remember_lock(lck); 1867345153Sdim} 1868345153Sdim 1869345153Sdimstatic void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, 1870345153Sdim kmp_adaptive_lock_info_t *lck) { 1871345153Sdim kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; 1872345153Sdim 1873345153Sdim t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; 1874345153Sdim t->successfulSpeculations += s->successfulSpeculations; 1875345153Sdim t->hardFailedSpeculations += s->hardFailedSpeculations; 1876345153Sdim t->softFailedSpeculations += s->softFailedSpeculations; 1877345153Sdim t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; 1878345153Sdim t->lemmingYields += s->lemmingYields; 1879345153Sdim} 1880345153Sdim 1881345153Sdimstatic void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { 1882345153Sdim __kmp_acquire_bootstrap_lock(&chain_lock); 1883345153Sdim 1884345153Sdim __kmp_add_stats(&destroyedStats, lck); 1885345153Sdim __kmp_forget_lock(lck); 1886345153Sdim 1887345153Sdim __kmp_release_bootstrap_lock(&chain_lock); 1888345153Sdim} 1889345153Sdim 1890345153Sdimstatic float percent(kmp_uint32 count, kmp_uint32 total) { 1891345153Sdim return (total == 0) ? 0.0 : (100.0 * count) / total; 1892345153Sdim} 1893345153Sdim 1894345153Sdimstatic FILE *__kmp_open_stats_file() { 1895345153Sdim if (strcmp(__kmp_speculative_statsfile, "-") == 0) 1896345153Sdim return stdout; 1897345153Sdim 1898345153Sdim size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; 1899345153Sdim char buffer[buffLen]; 1900345153Sdim KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, 1901345153Sdim (kmp_int32)getpid()); 1902345153Sdim FILE *result = fopen(&buffer[0], "w"); 1903345153Sdim 1904345153Sdim // Maybe we should issue a warning here... 1905345153Sdim return result ? result : stdout; 1906345153Sdim} 1907345153Sdim 1908345153Sdimvoid __kmp_print_speculative_stats() { 1909345153Sdim kmp_adaptive_lock_statistics_t total = destroyedStats; 1910345153Sdim kmp_adaptive_lock_info_t *lck; 1911345153Sdim 1912345153Sdim for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { 1913345153Sdim __kmp_add_stats(&total, lck); 1914345153Sdim } 1915345153Sdim kmp_adaptive_lock_statistics_t *t = &total; 1916345153Sdim kmp_uint32 totalSections = 1917345153Sdim t->nonSpeculativeAcquires + t->successfulSpeculations; 1918345153Sdim kmp_uint32 totalSpeculations = t->successfulSpeculations + 1919345153Sdim t->hardFailedSpeculations + 1920345153Sdim t->softFailedSpeculations; 1921345153Sdim if (totalSections <= 0) 1922345153Sdim return; 1923345153Sdim 1924345153Sdim FILE *statsFile = __kmp_open_stats_file(); 1925345153Sdim 1926345153Sdim fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); 1927345153Sdim fprintf(statsFile, " Lock parameters: \n" 1928345153Sdim " max_soft_retries : %10d\n" 1929345153Sdim " max_badness : %10d\n", 1930345153Sdim __kmp_adaptive_backoff_params.max_soft_retries, 1931345153Sdim __kmp_adaptive_backoff_params.max_badness); 1932345153Sdim fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", 1933345153Sdim t->nonSpeculativeAcquireAttempts); 1934345153Sdim fprintf(statsFile, " Total critical sections : %10d\n", 1935345153Sdim totalSections); 1936345153Sdim fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", 1937345153Sdim t->successfulSpeculations, 1938345153Sdim percent(t->successfulSpeculations, totalSections)); 1939345153Sdim fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", 1940345153Sdim t->nonSpeculativeAcquires, 1941345153Sdim percent(t->nonSpeculativeAcquires, totalSections)); 1942345153Sdim fprintf(statsFile, " Lemming yields : %10d\n\n", 1943345153Sdim t->lemmingYields); 1944345153Sdim 1945345153Sdim fprintf(statsFile, " Speculative acquire attempts : %10d\n", 1946345153Sdim totalSpeculations); 1947345153Sdim fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", 1948345153Sdim t->successfulSpeculations, 1949345153Sdim percent(t->successfulSpeculations, totalSpeculations)); 1950345153Sdim fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", 1951345153Sdim t->softFailedSpeculations, 1952345153Sdim percent(t->softFailedSpeculations, totalSpeculations)); 1953345153Sdim fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", 1954345153Sdim t->hardFailedSpeculations, 1955345153Sdim percent(t->hardFailedSpeculations, totalSpeculations)); 1956345153Sdim 1957345153Sdim if (statsFile != stdout) 1958345153Sdim fclose(statsFile); 1959345153Sdim} 1960345153Sdim 1961345153Sdim#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) 1962345153Sdim#else 1963345153Sdim#define KMP_INC_STAT(lck, stat) 1964345153Sdim 1965345153Sdim#endif // KMP_DEBUG_ADAPTIVE_LOCKS 1966345153Sdim 1967345153Sdimstatic inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { 1968345153Sdim // It is enough to check that the head_id is zero. 1969345153Sdim // We don't also need to check the tail. 1970345153Sdim bool res = lck->lk.head_id == 0; 1971345153Sdim 1972345153Sdim// We need a fence here, since we must ensure that no memory operations 1973345153Sdim// from later in this thread float above that read. 1974345153Sdim#if KMP_COMPILER_ICC 1975345153Sdim _mm_mfence(); 1976345153Sdim#else 1977345153Sdim __sync_synchronize(); 1978345153Sdim#endif 1979345153Sdim 1980345153Sdim return res; 1981345153Sdim} 1982345153Sdim 1983345153Sdim// Functions for manipulating the badness 1984345153Sdimstatic __inline void 1985345153Sdim__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { 1986345153Sdim // Reset the badness to zero so we eagerly try to speculate again 1987345153Sdim lck->lk.adaptive.badness = 0; 1988345153Sdim KMP_INC_STAT(lck, successfulSpeculations); 1989345153Sdim} 1990345153Sdim 1991345153Sdim// Create a bit mask with one more set bit. 1992345153Sdimstatic __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { 1993345153Sdim kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; 1994345153Sdim if (newBadness > lck->lk.adaptive.max_badness) { 1995345153Sdim return; 1996345153Sdim } else { 1997345153Sdim lck->lk.adaptive.badness = newBadness; 1998345153Sdim } 1999345153Sdim} 2000345153Sdim 2001345153Sdim// Check whether speculation should be attempted. 2002345153Sdimstatic __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, 2003345153Sdim kmp_int32 gtid) { 2004345153Sdim kmp_uint32 badness = lck->lk.adaptive.badness; 2005345153Sdim kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; 2006345153Sdim int res = (attempts & badness) == 0; 2007345153Sdim return res; 2008345153Sdim} 2009345153Sdim 2010345153Sdim// Attempt to acquire only the speculative lock. 2011345153Sdim// Does not back off to the non-speculative lock. 2012345153Sdimstatic int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, 2013345153Sdim kmp_int32 gtid) { 2014345153Sdim int retries = lck->lk.adaptive.max_soft_retries; 2015345153Sdim 2016345153Sdim // We don't explicitly count the start of speculation, rather we record the 2017345153Sdim // results (success, hard fail, soft fail). The sum of all of those is the 2018345153Sdim // total number of times we started speculation since all speculations must 2019345153Sdim // end one of those ways. 2020345153Sdim do { 2021345153Sdim kmp_uint32 status = _xbegin(); 2022345153Sdim // Switch this in to disable actual speculation but exercise at least some 2023345153Sdim // of the rest of the code. Useful for debugging... 2024345153Sdim // kmp_uint32 status = _XABORT_NESTED; 2025345153Sdim 2026345153Sdim if (status == _XBEGIN_STARTED) { 2027345153Sdim /* We have successfully started speculation. Check that no-one acquired 2028345153Sdim the lock for real between when we last looked and now. This also gets 2029345153Sdim the lock cache line into our read-set, which we need so that we'll 2030345153Sdim abort if anyone later claims it for real. */ 2031345153Sdim if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2032345153Sdim // Lock is now visibly acquired, so someone beat us to it. Abort the 2033345153Sdim // transaction so we'll restart from _xbegin with the failure status. 2034345153Sdim _xabort(0x01); 2035345153Sdim KMP_ASSERT2(0, "should not get here"); 2036345153Sdim } 2037345153Sdim return 1; // Lock has been acquired (speculatively) 2038345153Sdim } else { 2039345153Sdim // We have aborted, update the statistics 2040345153Sdim if (status & SOFT_ABORT_MASK) { 2041345153Sdim KMP_INC_STAT(lck, softFailedSpeculations); 2042345153Sdim // and loop round to retry. 2043345153Sdim } else { 2044345153Sdim KMP_INC_STAT(lck, hardFailedSpeculations); 2045345153Sdim // Give up if we had a hard failure. 2046345153Sdim break; 2047345153Sdim } 2048345153Sdim } 2049345153Sdim } while (retries--); // Loop while we have retries, and didn't fail hard. 2050345153Sdim 2051345153Sdim // Either we had a hard failure or we didn't succeed softly after 2052345153Sdim // the full set of attempts, so back off the badness. 2053345153Sdim __kmp_step_badness(lck); 2054345153Sdim return 0; 2055345153Sdim} 2056345153Sdim 2057345153Sdim// Attempt to acquire the speculative lock, or back off to the non-speculative 2058345153Sdim// one if the speculative lock cannot be acquired. 2059345153Sdim// We can succeed speculatively, non-speculatively, or fail. 2060345153Sdimstatic int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { 2061345153Sdim // First try to acquire the lock speculatively 2062345153Sdim if (__kmp_should_speculate(lck, gtid) && 2063345153Sdim __kmp_test_adaptive_lock_only(lck, gtid)) 2064345153Sdim return 1; 2065345153Sdim 2066345153Sdim // Speculative acquisition failed, so try to acquire it non-speculatively. 2067345153Sdim // Count the non-speculative acquire attempt 2068345153Sdim lck->lk.adaptive.acquire_attempts++; 2069345153Sdim 2070345153Sdim // Use base, non-speculative lock. 2071345153Sdim if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { 2072345153Sdim KMP_INC_STAT(lck, nonSpeculativeAcquires); 2073345153Sdim return 1; // Lock is acquired (non-speculatively) 2074345153Sdim } else { 2075345153Sdim return 0; // Failed to acquire the lock, it's already visibly locked. 2076345153Sdim } 2077345153Sdim} 2078345153Sdim 2079345153Sdimstatic int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2080345153Sdim kmp_int32 gtid) { 2081345153Sdim char const *const func = "omp_test_lock"; 2082345153Sdim if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2083345153Sdim KMP_FATAL(LockIsUninitialized, func); 2084345153Sdim } 2085345153Sdim 2086345153Sdim int retval = __kmp_test_adaptive_lock(lck, gtid); 2087345153Sdim 2088345153Sdim if (retval) { 2089345153Sdim lck->lk.qlk.owner_id = gtid + 1; 2090345153Sdim } 2091345153Sdim return retval; 2092345153Sdim} 2093345153Sdim 2094345153Sdim// Block until we can acquire a speculative, adaptive lock. We check whether we 2095345153Sdim// should be trying to speculate. If we should be, we check the real lock to see 2096345153Sdim// if it is free, and, if not, pause without attempting to acquire it until it 2097345153Sdim// is. Then we try the speculative acquire. This means that although we suffer 2098345153Sdim// from lemmings a little (because all we can't acquire the lock speculatively 2099345153Sdim// until the queue of threads waiting has cleared), we don't get into a state 2100345153Sdim// where we can never acquire the lock speculatively (because we force the queue 2101345153Sdim// to clear by preventing new arrivals from entering the queue). This does mean 2102345153Sdim// that when we're trying to break lemmings, the lock is no longer fair. However 2103345153Sdim// OpenMP makes no guarantee that its locks are fair, so this isn't a real 2104345153Sdim// problem. 2105345153Sdimstatic void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, 2106345153Sdim kmp_int32 gtid) { 2107345153Sdim if (__kmp_should_speculate(lck, gtid)) { 2108345153Sdim if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2109345153Sdim if (__kmp_test_adaptive_lock_only(lck, gtid)) 2110345153Sdim return; 2111345153Sdim // We tried speculation and failed, so give up. 2112345153Sdim } else { 2113345153Sdim // We can't try speculation until the lock is free, so we pause here 2114345153Sdim // (without suspending on the queueing lock, to allow it to drain, then 2115345153Sdim // try again. All other threads will also see the same result for 2116345153Sdim // shouldSpeculate, so will be doing the same if they try to claim the 2117345153Sdim // lock from now on. 2118345153Sdim while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { 2119345153Sdim KMP_INC_STAT(lck, lemmingYields); 2120353358Sdim KMP_YIELD(TRUE); 2121345153Sdim } 2122345153Sdim 2123345153Sdim if (__kmp_test_adaptive_lock_only(lck, gtid)) 2124345153Sdim return; 2125345153Sdim } 2126345153Sdim } 2127345153Sdim 2128345153Sdim // Speculative acquisition failed, so acquire it non-speculatively. 2129345153Sdim // Count the non-speculative acquire attempt 2130345153Sdim lck->lk.adaptive.acquire_attempts++; 2131345153Sdim 2132345153Sdim __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid); 2133345153Sdim // We have acquired the base lock, so count that. 2134345153Sdim KMP_INC_STAT(lck, nonSpeculativeAcquires); 2135345153Sdim ANNOTATE_QUEUING_ACQUIRED(lck); 2136345153Sdim} 2137345153Sdim 2138345153Sdimstatic void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2139345153Sdim kmp_int32 gtid) { 2140345153Sdim char const *const func = "omp_set_lock"; 2141345153Sdim if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2142345153Sdim KMP_FATAL(LockIsUninitialized, func); 2143345153Sdim } 2144345153Sdim if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { 2145345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 2146345153Sdim } 2147345153Sdim 2148345153Sdim __kmp_acquire_adaptive_lock(lck, gtid); 2149345153Sdim 2150345153Sdim lck->lk.qlk.owner_id = gtid + 1; 2151345153Sdim} 2152345153Sdim 2153345153Sdimstatic int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, 2154345153Sdim kmp_int32 gtid) { 2155345153Sdim if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( 2156345153Sdim lck))) { // If the lock doesn't look claimed we must be speculating. 2157345153Sdim // (Or the user's code is buggy and they're releasing without locking; 2158345153Sdim // if we had XTEST we'd be able to check that case...) 2159345153Sdim _xend(); // Exit speculation 2160345153Sdim __kmp_update_badness_after_success(lck); 2161345153Sdim } else { // Since the lock *is* visibly locked we're not speculating, 2162345153Sdim // so should use the underlying lock's release scheme. 2163345153Sdim __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); 2164345153Sdim } 2165345153Sdim return KMP_LOCK_RELEASED; 2166345153Sdim} 2167345153Sdim 2168345153Sdimstatic int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, 2169345153Sdim kmp_int32 gtid) { 2170345153Sdim char const *const func = "omp_unset_lock"; 2171345153Sdim KMP_MB(); /* in case another processor initialized lock */ 2172345153Sdim if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2173345153Sdim KMP_FATAL(LockIsUninitialized, func); 2174345153Sdim } 2175345153Sdim if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { 2176345153Sdim KMP_FATAL(LockUnsettingFree, func); 2177345153Sdim } 2178345153Sdim if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { 2179345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 2180345153Sdim } 2181345153Sdim lck->lk.qlk.owner_id = 0; 2182345153Sdim __kmp_release_adaptive_lock(lck, gtid); 2183345153Sdim return KMP_LOCK_RELEASED; 2184345153Sdim} 2185345153Sdim 2186345153Sdimstatic void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { 2187345153Sdim __kmp_init_queuing_lock(GET_QLK_PTR(lck)); 2188345153Sdim lck->lk.adaptive.badness = 0; 2189345153Sdim lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; 2190345153Sdim lck->lk.adaptive.max_soft_retries = 2191345153Sdim __kmp_adaptive_backoff_params.max_soft_retries; 2192345153Sdim lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; 2193345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS 2194345153Sdim __kmp_zero_speculative_stats(&lck->lk.adaptive); 2195345153Sdim#endif 2196345153Sdim KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); 2197345153Sdim} 2198345153Sdim 2199345153Sdimstatic void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { 2200345153Sdim#if KMP_DEBUG_ADAPTIVE_LOCKS 2201345153Sdim __kmp_accumulate_speculative_stats(&lck->lk.adaptive); 2202345153Sdim#endif 2203345153Sdim __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); 2204345153Sdim // Nothing needed for the speculative part. 2205345153Sdim} 2206345153Sdim 2207345153Sdimstatic void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 2208345153Sdim char const *const func = "omp_destroy_lock"; 2209345153Sdim if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { 2210345153Sdim KMP_FATAL(LockIsUninitialized, func); 2211345153Sdim } 2212345153Sdim if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { 2213345153Sdim KMP_FATAL(LockStillOwned, func); 2214345153Sdim } 2215345153Sdim __kmp_destroy_adaptive_lock(lck); 2216345153Sdim} 2217345153Sdim 2218345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS 2219345153Sdim 2220345153Sdim/* ------------------------------------------------------------------------ */ 2221345153Sdim/* DRDPA ticket locks */ 2222345153Sdim/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ 2223345153Sdim 2224345153Sdimstatic kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { 2225345153Sdim return lck->lk.owner_id - 1; 2226345153Sdim} 2227345153Sdim 2228345153Sdimstatic inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { 2229345153Sdim return lck->lk.depth_locked != -1; 2230345153Sdim} 2231345153Sdim 2232345153Sdim__forceinline static int 2233345153Sdim__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2234345153Sdim kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket); 2235345153Sdim kmp_uint64 mask = lck->lk.mask; // atomic load 2236345153Sdim std::atomic<kmp_uint64> *polls = lck->lk.polls; 2237345153Sdim 2238345153Sdim#ifdef USE_LOCK_PROFILE 2239345153Sdim if (polls[ticket & mask] != ticket) 2240345153Sdim __kmp_printf("LOCK CONTENTION: %p\n", lck); 2241345153Sdim/* else __kmp_printf( "." );*/ 2242345153Sdim#endif /* USE_LOCK_PROFILE */ 2243345153Sdim 2244345153Sdim // Now spin-wait, but reload the polls pointer and mask, in case the 2245345153Sdim // polling area has been reconfigured. Unless it is reconfigured, the 2246345153Sdim // reloads stay in L1 cache and are cheap. 2247345153Sdim // 2248353358Sdim // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!! 2249353358Sdim // The current implementation of KMP_WAIT doesn't allow for mask 2250345153Sdim // and poll to be re-read every spin iteration. 2251345153Sdim kmp_uint32 spins; 2252345153Sdim KMP_FSYNC_PREPARE(lck); 2253345153Sdim KMP_INIT_YIELD(spins); 2254345153Sdim while (polls[ticket & mask] < ticket) { // atomic load 2255353358Sdim KMP_YIELD_OVERSUB_ELSE_SPIN(spins); 2256345153Sdim // Re-read the mask and the poll pointer from the lock structure. 2257345153Sdim // 2258345153Sdim // Make certain that "mask" is read before "polls" !!! 2259345153Sdim // 2260345153Sdim // If another thread picks reconfigures the polling area and updates their 2261345153Sdim // values, and we get the new value of mask and the old polls pointer, we 2262345153Sdim // could access memory beyond the end of the old polling area. 2263345153Sdim mask = lck->lk.mask; // atomic load 2264345153Sdim polls = lck->lk.polls; // atomic load 2265345153Sdim } 2266345153Sdim 2267345153Sdim // Critical section starts here 2268345153Sdim KMP_FSYNC_ACQUIRED(lck); 2269345153Sdim KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", 2270345153Sdim ticket, lck)); 2271345153Sdim lck->lk.now_serving = ticket; // non-volatile store 2272345153Sdim 2273345153Sdim // Deallocate a garbage polling area if we know that we are the last 2274345153Sdim // thread that could possibly access it. 2275345153Sdim // 2276345153Sdim // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup 2277345153Sdim // ticket. 2278345153Sdim if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { 2279345153Sdim __kmp_free(lck->lk.old_polls); 2280345153Sdim lck->lk.old_polls = NULL; 2281345153Sdim lck->lk.cleanup_ticket = 0; 2282345153Sdim } 2283345153Sdim 2284345153Sdim // Check to see if we should reconfigure the polling area. 2285345153Sdim // If there is still a garbage polling area to be deallocated from a 2286345153Sdim // previous reconfiguration, let a later thread reconfigure it. 2287345153Sdim if (lck->lk.old_polls == NULL) { 2288345153Sdim bool reconfigure = false; 2289345153Sdim std::atomic<kmp_uint64> *old_polls = polls; 2290345153Sdim kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); 2291345153Sdim 2292345153Sdim if (TCR_4(__kmp_nth) > 2293345153Sdim (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { 2294345153Sdim // We are in oversubscription mode. Contract the polling area 2295345153Sdim // down to a single location, if that hasn't been done already. 2296345153Sdim if (num_polls > 1) { 2297345153Sdim reconfigure = true; 2298345153Sdim num_polls = TCR_4(lck->lk.num_polls); 2299345153Sdim mask = 0; 2300345153Sdim num_polls = 1; 2301345153Sdim polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2302345153Sdim sizeof(*polls)); 2303345153Sdim polls[0] = ticket; 2304345153Sdim } 2305345153Sdim } else { 2306345153Sdim // We are in under/fully subscribed mode. Check the number of 2307345153Sdim // threads waiting on the lock. The size of the polling area 2308345153Sdim // should be at least the number of threads waiting. 2309345153Sdim kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; 2310345153Sdim if (num_waiting > num_polls) { 2311345153Sdim kmp_uint32 old_num_polls = num_polls; 2312345153Sdim reconfigure = true; 2313345153Sdim do { 2314345153Sdim mask = (mask << 1) | 1; 2315345153Sdim num_polls *= 2; 2316345153Sdim } while (num_polls <= num_waiting); 2317345153Sdim 2318345153Sdim // Allocate the new polling area, and copy the relevant portion 2319345153Sdim // of the old polling area to the new area. __kmp_allocate() 2320345153Sdim // zeroes the memory it allocates, and most of the old area is 2321345153Sdim // just zero padding, so we only copy the release counters. 2322345153Sdim polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls * 2323345153Sdim sizeof(*polls)); 2324345153Sdim kmp_uint32 i; 2325345153Sdim for (i = 0; i < old_num_polls; i++) { 2326345153Sdim polls[i].store(old_polls[i]); 2327345153Sdim } 2328345153Sdim } 2329345153Sdim } 2330345153Sdim 2331345153Sdim if (reconfigure) { 2332345153Sdim // Now write the updated fields back to the lock structure. 2333345153Sdim // 2334345153Sdim // Make certain that "polls" is written before "mask" !!! 2335345153Sdim // 2336345153Sdim // If another thread picks up the new value of mask and the old polls 2337345153Sdim // pointer , it could access memory beyond the end of the old polling 2338345153Sdim // area. 2339345153Sdim // 2340345153Sdim // On x86, we need memory fences. 2341345153Sdim KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " 2342345153Sdim "lock %p to %d polls\n", 2343345153Sdim ticket, lck, num_polls)); 2344345153Sdim 2345345153Sdim lck->lk.old_polls = old_polls; 2346345153Sdim lck->lk.polls = polls; // atomic store 2347345153Sdim 2348345153Sdim KMP_MB(); 2349345153Sdim 2350345153Sdim lck->lk.num_polls = num_polls; 2351345153Sdim lck->lk.mask = mask; // atomic store 2352345153Sdim 2353345153Sdim KMP_MB(); 2354345153Sdim 2355345153Sdim // Only after the new polling area and mask have been flushed 2356345153Sdim // to main memory can we update the cleanup ticket field. 2357345153Sdim // 2358345153Sdim // volatile load / non-volatile store 2359345153Sdim lck->lk.cleanup_ticket = lck->lk.next_ticket; 2360345153Sdim } 2361345153Sdim } 2362345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 2363345153Sdim} 2364345153Sdim 2365345153Sdimint __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2366345153Sdim int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2367345153Sdim ANNOTATE_DRDPA_ACQUIRED(lck); 2368345153Sdim return retval; 2369345153Sdim} 2370345153Sdim 2371345153Sdimstatic int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2372345153Sdim kmp_int32 gtid) { 2373345153Sdim char const *const func = "omp_set_lock"; 2374345153Sdim if (lck->lk.initialized != lck) { 2375345153Sdim KMP_FATAL(LockIsUninitialized, func); 2376345153Sdim } 2377345153Sdim if (__kmp_is_drdpa_lock_nestable(lck)) { 2378345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 2379345153Sdim } 2380345153Sdim if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { 2381345153Sdim KMP_FATAL(LockIsAlreadyOwned, func); 2382345153Sdim } 2383345153Sdim 2384345153Sdim __kmp_acquire_drdpa_lock(lck, gtid); 2385345153Sdim 2386345153Sdim lck->lk.owner_id = gtid + 1; 2387345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 2388345153Sdim} 2389345153Sdim 2390345153Sdimint __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2391345153Sdim // First get a ticket, then read the polls pointer and the mask. 2392345153Sdim // The polls pointer must be read before the mask!!! (See above) 2393345153Sdim kmp_uint64 ticket = lck->lk.next_ticket; // atomic load 2394345153Sdim std::atomic<kmp_uint64> *polls = lck->lk.polls; 2395345153Sdim kmp_uint64 mask = lck->lk.mask; // atomic load 2396345153Sdim if (polls[ticket & mask] == ticket) { 2397345153Sdim kmp_uint64 next_ticket = ticket + 1; 2398345153Sdim if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket, 2399345153Sdim next_ticket)) { 2400345153Sdim KMP_FSYNC_ACQUIRED(lck); 2401345153Sdim KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", 2402345153Sdim ticket, lck)); 2403345153Sdim lck->lk.now_serving = ticket; // non-volatile store 2404345153Sdim 2405345153Sdim // Since no threads are waiting, there is no possibility that we would 2406345153Sdim // want to reconfigure the polling area. We might have the cleanup ticket 2407345153Sdim // value (which says that it is now safe to deallocate old_polls), but 2408345153Sdim // we'll let a later thread which calls __kmp_acquire_lock do that - this 2409345153Sdim // routine isn't supposed to block, and we would risk blocks if we called 2410345153Sdim // __kmp_free() to do the deallocation. 2411345153Sdim return TRUE; 2412345153Sdim } 2413345153Sdim } 2414345153Sdim return FALSE; 2415345153Sdim} 2416345153Sdim 2417345153Sdimstatic int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2418345153Sdim kmp_int32 gtid) { 2419345153Sdim char const *const func = "omp_test_lock"; 2420345153Sdim if (lck->lk.initialized != lck) { 2421345153Sdim KMP_FATAL(LockIsUninitialized, func); 2422345153Sdim } 2423345153Sdim if (__kmp_is_drdpa_lock_nestable(lck)) { 2424345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 2425345153Sdim } 2426345153Sdim 2427345153Sdim int retval = __kmp_test_drdpa_lock(lck, gtid); 2428345153Sdim 2429345153Sdim if (retval) { 2430345153Sdim lck->lk.owner_id = gtid + 1; 2431345153Sdim } 2432345153Sdim return retval; 2433345153Sdim} 2434345153Sdim 2435345153Sdimint __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2436345153Sdim // Read the ticket value from the lock data struct, then the polls pointer and 2437345153Sdim // the mask. The polls pointer must be read before the mask!!! (See above) 2438345153Sdim kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load 2439345153Sdim std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load 2440345153Sdim kmp_uint64 mask = lck->lk.mask; // atomic load 2441345153Sdim KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", 2442345153Sdim ticket - 1, lck)); 2443345153Sdim KMP_FSYNC_RELEASING(lck); 2444345153Sdim ANNOTATE_DRDPA_RELEASED(lck); 2445345153Sdim polls[ticket & mask] = ticket; // atomic store 2446345153Sdim return KMP_LOCK_RELEASED; 2447345153Sdim} 2448345153Sdim 2449345153Sdimstatic int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2450345153Sdim kmp_int32 gtid) { 2451345153Sdim char const *const func = "omp_unset_lock"; 2452345153Sdim KMP_MB(); /* in case another processor initialized lock */ 2453345153Sdim if (lck->lk.initialized != lck) { 2454345153Sdim KMP_FATAL(LockIsUninitialized, func); 2455345153Sdim } 2456345153Sdim if (__kmp_is_drdpa_lock_nestable(lck)) { 2457345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 2458345153Sdim } 2459345153Sdim if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2460345153Sdim KMP_FATAL(LockUnsettingFree, func); 2461345153Sdim } 2462345153Sdim if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && 2463345153Sdim (__kmp_get_drdpa_lock_owner(lck) != gtid)) { 2464345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 2465345153Sdim } 2466345153Sdim lck->lk.owner_id = 0; 2467345153Sdim return __kmp_release_drdpa_lock(lck, gtid); 2468345153Sdim} 2469345153Sdim 2470345153Sdimvoid __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { 2471345153Sdim lck->lk.location = NULL; 2472345153Sdim lck->lk.mask = 0; 2473345153Sdim lck->lk.num_polls = 1; 2474345153Sdim lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate( 2475345153Sdim lck->lk.num_polls * sizeof(*(lck->lk.polls))); 2476345153Sdim lck->lk.cleanup_ticket = 0; 2477345153Sdim lck->lk.old_polls = NULL; 2478345153Sdim lck->lk.next_ticket = 0; 2479345153Sdim lck->lk.now_serving = 0; 2480345153Sdim lck->lk.owner_id = 0; // no thread owns the lock. 2481345153Sdim lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. 2482345153Sdim lck->lk.initialized = lck; 2483345153Sdim 2484345153Sdim KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); 2485345153Sdim} 2486345153Sdim 2487345153Sdimvoid __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { 2488345153Sdim lck->lk.initialized = NULL; 2489345153Sdim lck->lk.location = NULL; 2490345153Sdim if (lck->lk.polls.load() != NULL) { 2491345153Sdim __kmp_free(lck->lk.polls.load()); 2492345153Sdim lck->lk.polls = NULL; 2493345153Sdim } 2494345153Sdim if (lck->lk.old_polls != NULL) { 2495345153Sdim __kmp_free(lck->lk.old_polls); 2496345153Sdim lck->lk.old_polls = NULL; 2497345153Sdim } 2498345153Sdim lck->lk.mask = 0; 2499345153Sdim lck->lk.num_polls = 0; 2500345153Sdim lck->lk.cleanup_ticket = 0; 2501345153Sdim lck->lk.next_ticket = 0; 2502345153Sdim lck->lk.now_serving = 0; 2503345153Sdim lck->lk.owner_id = 0; 2504345153Sdim lck->lk.depth_locked = -1; 2505345153Sdim} 2506345153Sdim 2507345153Sdimstatic void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2508345153Sdim char const *const func = "omp_destroy_lock"; 2509345153Sdim if (lck->lk.initialized != lck) { 2510345153Sdim KMP_FATAL(LockIsUninitialized, func); 2511345153Sdim } 2512345153Sdim if (__kmp_is_drdpa_lock_nestable(lck)) { 2513345153Sdim KMP_FATAL(LockNestableUsedAsSimple, func); 2514345153Sdim } 2515345153Sdim if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2516345153Sdim KMP_FATAL(LockStillOwned, func); 2517345153Sdim } 2518345153Sdim __kmp_destroy_drdpa_lock(lck); 2519345153Sdim} 2520345153Sdim 2521345153Sdim// nested drdpa ticket locks 2522345153Sdim 2523345153Sdimint __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2524345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 2525345153Sdim 2526345153Sdim if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2527345153Sdim lck->lk.depth_locked += 1; 2528345153Sdim return KMP_LOCK_ACQUIRED_NEXT; 2529345153Sdim } else { 2530345153Sdim __kmp_acquire_drdpa_lock_timed_template(lck, gtid); 2531345153Sdim ANNOTATE_DRDPA_ACQUIRED(lck); 2532345153Sdim KMP_MB(); 2533345153Sdim lck->lk.depth_locked = 1; 2534345153Sdim KMP_MB(); 2535345153Sdim lck->lk.owner_id = gtid + 1; 2536345153Sdim return KMP_LOCK_ACQUIRED_FIRST; 2537345153Sdim } 2538345153Sdim} 2539345153Sdim 2540345153Sdimstatic void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2541345153Sdim kmp_int32 gtid) { 2542345153Sdim char const *const func = "omp_set_nest_lock"; 2543345153Sdim if (lck->lk.initialized != lck) { 2544345153Sdim KMP_FATAL(LockIsUninitialized, func); 2545345153Sdim } 2546345153Sdim if (!__kmp_is_drdpa_lock_nestable(lck)) { 2547345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 2548345153Sdim } 2549345153Sdim __kmp_acquire_nested_drdpa_lock(lck, gtid); 2550345153Sdim} 2551345153Sdim 2552345153Sdimint __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2553345153Sdim int retval; 2554345153Sdim 2555345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 2556345153Sdim 2557345153Sdim if (__kmp_get_drdpa_lock_owner(lck) == gtid) { 2558345153Sdim retval = ++lck->lk.depth_locked; 2559345153Sdim } else if (!__kmp_test_drdpa_lock(lck, gtid)) { 2560345153Sdim retval = 0; 2561345153Sdim } else { 2562345153Sdim KMP_MB(); 2563345153Sdim retval = lck->lk.depth_locked = 1; 2564345153Sdim KMP_MB(); 2565345153Sdim lck->lk.owner_id = gtid + 1; 2566345153Sdim } 2567345153Sdim return retval; 2568345153Sdim} 2569345153Sdim 2570345153Sdimstatic int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2571345153Sdim kmp_int32 gtid) { 2572345153Sdim char const *const func = "omp_test_nest_lock"; 2573345153Sdim if (lck->lk.initialized != lck) { 2574345153Sdim KMP_FATAL(LockIsUninitialized, func); 2575345153Sdim } 2576345153Sdim if (!__kmp_is_drdpa_lock_nestable(lck)) { 2577345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 2578345153Sdim } 2579345153Sdim return __kmp_test_nested_drdpa_lock(lck, gtid); 2580345153Sdim} 2581345153Sdim 2582345153Sdimint __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { 2583345153Sdim KMP_DEBUG_ASSERT(gtid >= 0); 2584345153Sdim 2585345153Sdim KMP_MB(); 2586345153Sdim if (--(lck->lk.depth_locked) == 0) { 2587345153Sdim KMP_MB(); 2588345153Sdim lck->lk.owner_id = 0; 2589345153Sdim __kmp_release_drdpa_lock(lck, gtid); 2590345153Sdim return KMP_LOCK_RELEASED; 2591345153Sdim } 2592345153Sdim return KMP_LOCK_STILL_HELD; 2593345153Sdim} 2594345153Sdim 2595345153Sdimstatic int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, 2596345153Sdim kmp_int32 gtid) { 2597345153Sdim char const *const func = "omp_unset_nest_lock"; 2598345153Sdim KMP_MB(); /* in case another processor initialized lock */ 2599345153Sdim if (lck->lk.initialized != lck) { 2600345153Sdim KMP_FATAL(LockIsUninitialized, func); 2601345153Sdim } 2602345153Sdim if (!__kmp_is_drdpa_lock_nestable(lck)) { 2603345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 2604345153Sdim } 2605345153Sdim if (__kmp_get_drdpa_lock_owner(lck) == -1) { 2606345153Sdim KMP_FATAL(LockUnsettingFree, func); 2607345153Sdim } 2608345153Sdim if (__kmp_get_drdpa_lock_owner(lck) != gtid) { 2609345153Sdim KMP_FATAL(LockUnsettingSetByAnother, func); 2610345153Sdim } 2611345153Sdim return __kmp_release_nested_drdpa_lock(lck, gtid); 2612345153Sdim} 2613345153Sdim 2614345153Sdimvoid __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2615345153Sdim __kmp_init_drdpa_lock(lck); 2616345153Sdim lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks 2617345153Sdim} 2618345153Sdim 2619345153Sdimvoid __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { 2620345153Sdim __kmp_destroy_drdpa_lock(lck); 2621345153Sdim lck->lk.depth_locked = 0; 2622345153Sdim} 2623345153Sdim 2624345153Sdimstatic void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 2625345153Sdim char const *const func = "omp_destroy_nest_lock"; 2626345153Sdim if (lck->lk.initialized != lck) { 2627345153Sdim KMP_FATAL(LockIsUninitialized, func); 2628345153Sdim } 2629345153Sdim if (!__kmp_is_drdpa_lock_nestable(lck)) { 2630345153Sdim KMP_FATAL(LockSimpleUsedAsNestable, func); 2631345153Sdim } 2632345153Sdim if (__kmp_get_drdpa_lock_owner(lck) != -1) { 2633345153Sdim KMP_FATAL(LockStillOwned, func); 2634345153Sdim } 2635345153Sdim __kmp_destroy_nested_drdpa_lock(lck); 2636345153Sdim} 2637345153Sdim 2638345153Sdim// access functions to fields which don't exist for all lock kinds. 2639345153Sdim 2640345153Sdimstatic const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { 2641345153Sdim return lck->lk.location; 2642345153Sdim} 2643345153Sdim 2644345153Sdimstatic void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, 2645345153Sdim const ident_t *loc) { 2646345153Sdim lck->lk.location = loc; 2647345153Sdim} 2648345153Sdim 2649345153Sdimstatic kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { 2650345153Sdim return lck->lk.flags; 2651345153Sdim} 2652345153Sdim 2653345153Sdimstatic void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, 2654345153Sdim kmp_lock_flags_t flags) { 2655345153Sdim lck->lk.flags = flags; 2656345153Sdim} 2657345153Sdim 2658345153Sdim// Time stamp counter 2659345153Sdim#if KMP_ARCH_X86 || KMP_ARCH_X86_64 2660345153Sdim#define __kmp_tsc() __kmp_hardware_timestamp() 2661345153Sdim// Runtime's default backoff parameters 2662345153Sdimkmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; 2663345153Sdim#else 2664345153Sdim// Use nanoseconds for other platforms 2665345153Sdimextern kmp_uint64 __kmp_now_nsec(); 2666345153Sdimkmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; 2667345153Sdim#define __kmp_tsc() __kmp_now_nsec() 2668345153Sdim#endif 2669345153Sdim 2670345153Sdim// A useful predicate for dealing with timestamps that may wrap. 2671345153Sdim// Is a before b? Since the timestamps may wrap, this is asking whether it's 2672345153Sdim// shorter to go clockwise from a to b around the clock-face, or anti-clockwise. 2673345153Sdim// Times where going clockwise is less distance than going anti-clockwise 2674345153Sdim// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), 2675345153Sdim// then a > b (true) does not mean a reached b; whereas signed(a) = -2, 2676345153Sdim// signed(b) = 0 captures the actual difference 2677345153Sdimstatic inline bool before(kmp_uint64 a, kmp_uint64 b) { 2678345153Sdim return ((kmp_int64)b - (kmp_int64)a) > 0; 2679345153Sdim} 2680345153Sdim 2681345153Sdim// Truncated binary exponential backoff function 2682345153Sdimvoid __kmp_spin_backoff(kmp_backoff_t *boff) { 2683345153Sdim // We could flatten this loop, but making it a nested loop gives better result 2684345153Sdim kmp_uint32 i; 2685345153Sdim for (i = boff->step; i > 0; i--) { 2686345153Sdim kmp_uint64 goal = __kmp_tsc() + boff->min_tick; 2687345153Sdim do { 2688345153Sdim KMP_CPU_PAUSE(); 2689345153Sdim } while (before(__kmp_tsc(), goal)); 2690345153Sdim } 2691345153Sdim boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); 2692345153Sdim} 2693345153Sdim 2694345153Sdim#if KMP_USE_DYNAMIC_LOCK 2695345153Sdim 2696345153Sdim// Direct lock initializers. It simply writes a tag to the low 8 bits of the 2697345153Sdim// lock word. 2698345153Sdimstatic void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, 2699345153Sdim kmp_dyna_lockseq_t seq) { 2700345153Sdim TCW_4(*lck, KMP_GET_D_TAG(seq)); 2701345153Sdim KA_TRACE( 2702345153Sdim 20, 2703345153Sdim ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); 2704345153Sdim} 2705345153Sdim 2706345153Sdim#if KMP_USE_TSX 2707345153Sdim 2708345153Sdim// HLE lock functions - imported from the testbed runtime. 2709345153Sdim#define HLE_ACQUIRE ".byte 0xf2;" 2710345153Sdim#define HLE_RELEASE ".byte 0xf3;" 2711345153Sdim 2712345153Sdimstatic inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { 2713345153Sdim __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); 2714345153Sdim return v; 2715345153Sdim} 2716345153Sdim 2717345153Sdimstatic void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } 2718345153Sdim 2719345153Sdimstatic void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) { 2720345153Sdim TCW_4(*lck, 0); 2721345153Sdim} 2722345153Sdim 2723345153Sdimstatic void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2724345153Sdim // Use gtid for KMP_LOCK_BUSY if necessary 2725345153Sdim if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { 2726345153Sdim int delay = 1; 2727345153Sdim do { 2728345153Sdim while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { 2729345153Sdim for (int i = delay; i != 0; --i) 2730345153Sdim KMP_CPU_PAUSE(); 2731345153Sdim delay = ((delay << 1) | 1) & 7; 2732345153Sdim } 2733345153Sdim } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); 2734345153Sdim } 2735345153Sdim} 2736345153Sdim 2737345153Sdimstatic void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2738345153Sdim kmp_int32 gtid) { 2739345153Sdim __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks 2740345153Sdim} 2741345153Sdim 2742345153Sdimstatic int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2743345153Sdim __asm__ volatile(HLE_RELEASE "movl %1,%0" 2744345153Sdim : "=m"(*lck) 2745345153Sdim : "r"(KMP_LOCK_FREE(hle)) 2746345153Sdim : "memory"); 2747345153Sdim return KMP_LOCK_RELEASED; 2748345153Sdim} 2749345153Sdim 2750345153Sdimstatic int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2751345153Sdim kmp_int32 gtid) { 2752345153Sdim return __kmp_release_hle_lock(lck, gtid); // TODO: add checks 2753345153Sdim} 2754345153Sdim 2755345153Sdimstatic int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { 2756345153Sdim return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); 2757345153Sdim} 2758345153Sdim 2759345153Sdimstatic int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, 2760345153Sdim kmp_int32 gtid) { 2761345153Sdim return __kmp_test_hle_lock(lck, gtid); // TODO: add checks 2762345153Sdim} 2763345153Sdim 2764345153Sdimstatic void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { 2765345153Sdim __kmp_init_queuing_lock(lck); 2766345153Sdim} 2767345153Sdim 2768345153Sdimstatic void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { 2769345153Sdim __kmp_destroy_queuing_lock(lck); 2770345153Sdim} 2771345153Sdim 2772345153Sdimstatic void __kmp_destroy_rtm_lock_with_checks(kmp_queuing_lock_t *lck) { 2773345153Sdim __kmp_destroy_queuing_lock_with_checks(lck); 2774345153Sdim} 2775345153Sdim 2776345153Sdimstatic void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2777345153Sdim unsigned retries = 3, status; 2778345153Sdim do { 2779345153Sdim status = _xbegin(); 2780345153Sdim if (status == _XBEGIN_STARTED) { 2781345153Sdim if (__kmp_is_unlocked_queuing_lock(lck)) 2782345153Sdim return; 2783345153Sdim _xabort(0xff); 2784345153Sdim } 2785345153Sdim if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { 2786345153Sdim // Wait until lock becomes free 2787353358Sdim while (!__kmp_is_unlocked_queuing_lock(lck)) { 2788353358Sdim KMP_YIELD(TRUE); 2789353358Sdim } 2790345153Sdim } else if (!(status & _XABORT_RETRY)) 2791345153Sdim break; 2792345153Sdim } while (retries--); 2793345153Sdim 2794345153Sdim // Fall-back non-speculative lock (xchg) 2795345153Sdim __kmp_acquire_queuing_lock(lck, gtid); 2796345153Sdim} 2797345153Sdim 2798345153Sdimstatic void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2799345153Sdim kmp_int32 gtid) { 2800345153Sdim __kmp_acquire_rtm_lock(lck, gtid); 2801345153Sdim} 2802345153Sdim 2803345153Sdimstatic int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2804345153Sdim if (__kmp_is_unlocked_queuing_lock(lck)) { 2805345153Sdim // Releasing from speculation 2806345153Sdim _xend(); 2807345153Sdim } else { 2808345153Sdim // Releasing from a real lock 2809345153Sdim __kmp_release_queuing_lock(lck, gtid); 2810345153Sdim } 2811345153Sdim return KMP_LOCK_RELEASED; 2812345153Sdim} 2813345153Sdim 2814345153Sdimstatic int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2815345153Sdim kmp_int32 gtid) { 2816345153Sdim return __kmp_release_rtm_lock(lck, gtid); 2817345153Sdim} 2818345153Sdim 2819345153Sdimstatic int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { 2820345153Sdim unsigned retries = 3, status; 2821345153Sdim do { 2822345153Sdim status = _xbegin(); 2823345153Sdim if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { 2824345153Sdim return 1; 2825345153Sdim } 2826345153Sdim if (!(status & _XABORT_RETRY)) 2827345153Sdim break; 2828345153Sdim } while (retries--); 2829345153Sdim 2830345153Sdim return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; 2831345153Sdim} 2832345153Sdim 2833345153Sdimstatic int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, 2834345153Sdim kmp_int32 gtid) { 2835345153Sdim return __kmp_test_rtm_lock(lck, gtid); 2836345153Sdim} 2837345153Sdim 2838345153Sdim#endif // KMP_USE_TSX 2839345153Sdim 2840345153Sdim// Entry functions for indirect locks (first element of direct lock jump tables) 2841345153Sdimstatic void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, 2842345153Sdim kmp_dyna_lockseq_t tag); 2843345153Sdimstatic void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); 2844345153Sdimstatic int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2845345153Sdimstatic int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2846345153Sdimstatic int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); 2847345153Sdimstatic int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2848345153Sdim kmp_int32); 2849345153Sdimstatic int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2850345153Sdim kmp_int32); 2851345153Sdimstatic int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 2852345153Sdim kmp_int32); 2853345153Sdim 2854345153Sdim// Lock function definitions for the union parameter type 2855345153Sdim#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) 2856345153Sdim 2857345153Sdim#define expand1(lk, op) \ 2858345153Sdim static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \ 2859345153Sdim __kmp_##op##_##lk##_##lock(&lock->lk); \ 2860345153Sdim } 2861345153Sdim#define expand2(lk, op) \ 2862345153Sdim static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \ 2863345153Sdim kmp_int32 gtid) { \ 2864345153Sdim return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \ 2865345153Sdim } 2866345153Sdim#define expand3(lk, op) \ 2867345153Sdim static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \ 2868345153Sdim kmp_lock_flags_t flags) { \ 2869345153Sdim __kmp_set_##lk##_lock_flags(&lock->lk, flags); \ 2870345153Sdim } 2871345153Sdim#define expand4(lk, op) \ 2872345153Sdim static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \ 2873345153Sdim const ident_t *loc) { \ 2874345153Sdim __kmp_set_##lk##_lock_location(&lock->lk, loc); \ 2875345153Sdim } 2876345153Sdim 2877345153SdimKMP_FOREACH_LOCK_KIND(expand1, init) 2878345153SdimKMP_FOREACH_LOCK_KIND(expand1, init_nested) 2879345153SdimKMP_FOREACH_LOCK_KIND(expand1, destroy) 2880345153SdimKMP_FOREACH_LOCK_KIND(expand1, destroy_nested) 2881345153SdimKMP_FOREACH_LOCK_KIND(expand2, acquire) 2882345153SdimKMP_FOREACH_LOCK_KIND(expand2, acquire_nested) 2883345153SdimKMP_FOREACH_LOCK_KIND(expand2, release) 2884345153SdimKMP_FOREACH_LOCK_KIND(expand2, release_nested) 2885345153SdimKMP_FOREACH_LOCK_KIND(expand2, test) 2886345153SdimKMP_FOREACH_LOCK_KIND(expand2, test_nested) 2887345153SdimKMP_FOREACH_LOCK_KIND(expand3, ) 2888345153SdimKMP_FOREACH_LOCK_KIND(expand4, ) 2889345153Sdim 2890345153Sdim#undef expand1 2891345153Sdim#undef expand2 2892345153Sdim#undef expand3 2893345153Sdim#undef expand4 2894345153Sdim 2895345153Sdim// Jump tables for the indirect lock functions 2896345153Sdim// Only fill in the odd entries, that avoids the need to shift out the low bit 2897345153Sdim 2898345153Sdim// init functions 2899345153Sdim#define expand(l, op) 0, __kmp_init_direct_lock, 2900345153Sdimvoid (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { 2901345153Sdim __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; 2902345153Sdim#undef expand 2903345153Sdim 2904345153Sdim// destroy functions 2905345153Sdim#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, 2906345153Sdimstatic void (*direct_destroy[])(kmp_dyna_lock_t *) = { 2907345153Sdim __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2908345153Sdim#undef expand 2909345153Sdim#define expand(l, op) \ 2910345153Sdim 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks, 2911345153Sdimstatic void (*direct_destroy_check[])(kmp_dyna_lock_t *) = { 2912345153Sdim __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; 2913345153Sdim#undef expand 2914345153Sdim 2915345153Sdim// set/acquire functions 2916345153Sdim#define expand(l, op) \ 2917345153Sdim 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2918345153Sdimstatic int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { 2919345153Sdim __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; 2920345153Sdim#undef expand 2921345153Sdim#define expand(l, op) \ 2922345153Sdim 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2923345153Sdimstatic int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2924345153Sdim __kmp_set_indirect_lock_with_checks, 0, 2925345153Sdim KMP_FOREACH_D_LOCK(expand, acquire)}; 2926345153Sdim#undef expand 2927345153Sdim 2928345153Sdim// unset/release and test functions 2929345153Sdim#define expand(l, op) \ 2930345153Sdim 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, 2931345153Sdimstatic int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { 2932345153Sdim __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; 2933345153Sdimstatic int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { 2934345153Sdim __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2935345153Sdim#undef expand 2936345153Sdim#define expand(l, op) \ 2937345153Sdim 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, 2938345153Sdimstatic int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2939345153Sdim __kmp_unset_indirect_lock_with_checks, 0, 2940345153Sdim KMP_FOREACH_D_LOCK(expand, release)}; 2941345153Sdimstatic int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { 2942345153Sdim __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; 2943345153Sdim#undef expand 2944345153Sdim 2945345153Sdim// Exposes only one set of jump tables (*lock or *lock_with_checks). 2946360784Sdimvoid (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0; 2947360784Sdimint (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0; 2948360784Sdimint (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0; 2949360784Sdimint (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0; 2950345153Sdim 2951345153Sdim// Jump tables for the indirect lock functions 2952345153Sdim#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2953345153Sdimvoid (*__kmp_indirect_init[])(kmp_user_lock_p) = { 2954345153Sdim KMP_FOREACH_I_LOCK(expand, init)}; 2955345153Sdim#undef expand 2956345153Sdim 2957345153Sdim#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, 2958345153Sdimstatic void (*indirect_destroy[])(kmp_user_lock_p) = { 2959345153Sdim KMP_FOREACH_I_LOCK(expand, destroy)}; 2960345153Sdim#undef expand 2961345153Sdim#define expand(l, op) \ 2962345153Sdim (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks, 2963345153Sdimstatic void (*indirect_destroy_check[])(kmp_user_lock_p) = { 2964345153Sdim KMP_FOREACH_I_LOCK(expand, destroy)}; 2965345153Sdim#undef expand 2966345153Sdim 2967345153Sdim// set/acquire functions 2968345153Sdim#define expand(l, op) \ 2969345153Sdim (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2970345153Sdimstatic int (*indirect_set[])(kmp_user_lock_p, 2971345153Sdim kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)}; 2972345153Sdim#undef expand 2973345153Sdim#define expand(l, op) \ 2974345153Sdim (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2975345153Sdimstatic int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { 2976345153Sdim KMP_FOREACH_I_LOCK(expand, acquire)}; 2977345153Sdim#undef expand 2978345153Sdim 2979345153Sdim// unset/release and test functions 2980345153Sdim#define expand(l, op) \ 2981345153Sdim (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, 2982345153Sdimstatic int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { 2983345153Sdim KMP_FOREACH_I_LOCK(expand, release)}; 2984345153Sdimstatic int (*indirect_test[])(kmp_user_lock_p, 2985345153Sdim kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; 2986345153Sdim#undef expand 2987345153Sdim#define expand(l, op) \ 2988345153Sdim (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, 2989345153Sdimstatic int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { 2990345153Sdim KMP_FOREACH_I_LOCK(expand, release)}; 2991345153Sdimstatic int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { 2992345153Sdim KMP_FOREACH_I_LOCK(expand, test)}; 2993345153Sdim#undef expand 2994345153Sdim 2995345153Sdim// Exposes only one jump tables (*lock or *lock_with_checks). 2996360784Sdimvoid (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0; 2997360784Sdimint (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0; 2998360784Sdimint (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0; 2999360784Sdimint (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0; 3000345153Sdim 3001345153Sdim// Lock index table. 3002345153Sdimkmp_indirect_lock_table_t __kmp_i_lock_table; 3003345153Sdim 3004345153Sdim// Size of indirect locks. 3005345153Sdimstatic kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; 3006345153Sdim 3007345153Sdim// Jump tables for lock accessor/modifier. 3008345153Sdimvoid (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3009345153Sdim const ident_t *) = {0}; 3010345153Sdimvoid (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, 3011345153Sdim kmp_lock_flags_t) = {0}; 3012345153Sdimconst ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( 3013345153Sdim kmp_user_lock_p) = {0}; 3014345153Sdimkmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( 3015345153Sdim kmp_user_lock_p) = {0}; 3016345153Sdim 3017345153Sdim// Use different lock pools for different lock types. 3018345153Sdimstatic kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; 3019345153Sdim 3020345153Sdim// User lock allocator for dynamically dispatched indirect locks. Every entry of 3021345153Sdim// the indirect lock table holds the address and type of the allocated indrect 3022345153Sdim// lock (kmp_indirect_lock_t), and the size of the table doubles when it is 3023345153Sdim// full. A destroyed indirect lock object is returned to the reusable pool of 3024345153Sdim// locks, unique to each lock type. 3025345153Sdimkmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, 3026345153Sdim kmp_int32 gtid, 3027345153Sdim kmp_indirect_locktag_t tag) { 3028345153Sdim kmp_indirect_lock_t *lck; 3029345153Sdim kmp_lock_index_t idx; 3030345153Sdim 3031345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 3032345153Sdim 3033345153Sdim if (__kmp_indirect_lock_pool[tag] != NULL) { 3034345153Sdim // Reuse the allocated and destroyed lock object 3035345153Sdim lck = __kmp_indirect_lock_pool[tag]; 3036345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) 3037345153Sdim idx = lck->lock->pool.index; 3038345153Sdim __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; 3039345153Sdim KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", 3040345153Sdim lck)); 3041345153Sdim } else { 3042345153Sdim idx = __kmp_i_lock_table.next; 3043345153Sdim // Check capacity and double the size if it is full 3044345153Sdim if (idx == __kmp_i_lock_table.size) { 3045345153Sdim // Double up the space for block pointers 3046345153Sdim int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; 3047345153Sdim kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( 3048345153Sdim 2 * row * sizeof(kmp_indirect_lock_t *)); 3049345153Sdim KMP_MEMCPY(new_table, __kmp_i_lock_table.table, 3050345153Sdim row * sizeof(kmp_indirect_lock_t *)); 3051345153Sdim kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; 3052345153Sdim __kmp_i_lock_table.table = new_table; 3053345153Sdim __kmp_free(old_table); 3054345153Sdim // Allocate new objects in the new blocks 3055345153Sdim for (int i = row; i < 2 * row; ++i) 3056345153Sdim *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( 3057345153Sdim KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3058345153Sdim __kmp_i_lock_table.size = 2 * idx; 3059345153Sdim } 3060345153Sdim __kmp_i_lock_table.next++; 3061345153Sdim lck = KMP_GET_I_LOCK(idx); 3062345153Sdim // Allocate a new base lock object 3063345153Sdim lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); 3064345153Sdim KA_TRACE(20, 3065345153Sdim ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); 3066345153Sdim } 3067345153Sdim 3068345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); 3069345153Sdim 3070345153Sdim lck->type = tag; 3071345153Sdim 3072345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3073345153Sdim *((kmp_lock_index_t *)user_lock) = idx 3074345153Sdim << 1; // indirect lock word must be even 3075345153Sdim } else { 3076345153Sdim *((kmp_indirect_lock_t **)user_lock) = lck; 3077345153Sdim } 3078345153Sdim 3079345153Sdim return lck; 3080345153Sdim} 3081345153Sdim 3082345153Sdim// User lock lookup for dynamically dispatched locks. 3083345153Sdimstatic __forceinline kmp_indirect_lock_t * 3084345153Sdim__kmp_lookup_indirect_lock(void **user_lock, const char *func) { 3085345153Sdim if (__kmp_env_consistency_check) { 3086345153Sdim kmp_indirect_lock_t *lck = NULL; 3087345153Sdim if (user_lock == NULL) { 3088345153Sdim KMP_FATAL(LockIsUninitialized, func); 3089345153Sdim } 3090345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3091345153Sdim kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); 3092345153Sdim if (idx >= __kmp_i_lock_table.size) { 3093345153Sdim KMP_FATAL(LockIsUninitialized, func); 3094345153Sdim } 3095345153Sdim lck = KMP_GET_I_LOCK(idx); 3096345153Sdim } else { 3097345153Sdim lck = *((kmp_indirect_lock_t **)user_lock); 3098345153Sdim } 3099345153Sdim if (lck == NULL) { 3100345153Sdim KMP_FATAL(LockIsUninitialized, func); 3101345153Sdim } 3102345153Sdim return lck; 3103345153Sdim } else { 3104345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3105345153Sdim return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); 3106345153Sdim } else { 3107345153Sdim return *((kmp_indirect_lock_t **)user_lock); 3108345153Sdim } 3109345153Sdim } 3110345153Sdim} 3111345153Sdim 3112345153Sdimstatic void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, 3113345153Sdim kmp_dyna_lockseq_t seq) { 3114345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3115345153Sdim if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { 3116345153Sdim KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); 3117345153Sdim seq = lockseq_queuing; 3118345153Sdim } 3119345153Sdim#endif 3120345153Sdim#if KMP_USE_TSX 3121345153Sdim if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { 3122345153Sdim seq = lockseq_queuing; 3123345153Sdim } 3124345153Sdim#endif 3125345153Sdim kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); 3126345153Sdim kmp_indirect_lock_t *l = 3127345153Sdim __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); 3128345153Sdim KMP_I_LOCK_FUNC(l, init)(l->lock); 3129345153Sdim KA_TRACE( 3130345153Sdim 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", 3131345153Sdim seq)); 3132345153Sdim} 3133345153Sdim 3134345153Sdimstatic void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { 3135345153Sdim kmp_uint32 gtid = __kmp_entry_gtid(); 3136345153Sdim kmp_indirect_lock_t *l = 3137345153Sdim __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); 3138345153Sdim KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3139345153Sdim kmp_indirect_locktag_t tag = l->type; 3140345153Sdim 3141345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 3142345153Sdim 3143345153Sdim // Use the base lock's space to keep the pool chain. 3144345153Sdim l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; 3145345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3146345153Sdim l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); 3147345153Sdim } 3148345153Sdim __kmp_indirect_lock_pool[tag] = l; 3149345153Sdim 3150345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); 3151345153Sdim} 3152345153Sdim 3153345153Sdimstatic int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3154345153Sdim kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3155345153Sdim return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3156345153Sdim} 3157345153Sdim 3158345153Sdimstatic int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3159345153Sdim kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3160345153Sdim return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3161345153Sdim} 3162345153Sdim 3163345153Sdimstatic int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { 3164345153Sdim kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); 3165345153Sdim return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3166345153Sdim} 3167345153Sdim 3168345153Sdimstatic int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3169345153Sdim kmp_int32 gtid) { 3170345153Sdim kmp_indirect_lock_t *l = 3171345153Sdim __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); 3172345153Sdim return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); 3173345153Sdim} 3174345153Sdim 3175345153Sdimstatic int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3176345153Sdim kmp_int32 gtid) { 3177345153Sdim kmp_indirect_lock_t *l = 3178345153Sdim __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); 3179345153Sdim return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); 3180345153Sdim} 3181345153Sdim 3182345153Sdimstatic int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, 3183345153Sdim kmp_int32 gtid) { 3184345153Sdim kmp_indirect_lock_t *l = 3185345153Sdim __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); 3186345153Sdim return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); 3187345153Sdim} 3188345153Sdim 3189345153Sdimkmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; 3190345153Sdim 3191345153Sdim// This is used only in kmp_error.cpp when consistency checking is on. 3192345153Sdimkmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { 3193345153Sdim switch (seq) { 3194345153Sdim case lockseq_tas: 3195345153Sdim case lockseq_nested_tas: 3196345153Sdim return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); 3197345153Sdim#if KMP_USE_FUTEX 3198345153Sdim case lockseq_futex: 3199345153Sdim case lockseq_nested_futex: 3200345153Sdim return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); 3201345153Sdim#endif 3202345153Sdim case lockseq_ticket: 3203345153Sdim case lockseq_nested_ticket: 3204345153Sdim return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); 3205345153Sdim case lockseq_queuing: 3206345153Sdim case lockseq_nested_queuing: 3207345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3208345153Sdim case lockseq_adaptive: 3209345153Sdim#endif 3210345153Sdim return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); 3211345153Sdim case lockseq_drdpa: 3212345153Sdim case lockseq_nested_drdpa: 3213345153Sdim return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); 3214345153Sdim default: 3215345153Sdim return 0; 3216345153Sdim } 3217345153Sdim} 3218345153Sdim 3219345153Sdim// Initializes data for dynamic user locks. 3220345153Sdimvoid __kmp_init_dynamic_user_locks() { 3221345153Sdim // Initialize jump table for the lock functions 3222345153Sdim if (__kmp_env_consistency_check) { 3223345153Sdim __kmp_direct_set = direct_set_check; 3224345153Sdim __kmp_direct_unset = direct_unset_check; 3225345153Sdim __kmp_direct_test = direct_test_check; 3226345153Sdim __kmp_direct_destroy = direct_destroy_check; 3227345153Sdim __kmp_indirect_set = indirect_set_check; 3228345153Sdim __kmp_indirect_unset = indirect_unset_check; 3229345153Sdim __kmp_indirect_test = indirect_test_check; 3230345153Sdim __kmp_indirect_destroy = indirect_destroy_check; 3231345153Sdim } else { 3232345153Sdim __kmp_direct_set = direct_set; 3233345153Sdim __kmp_direct_unset = direct_unset; 3234345153Sdim __kmp_direct_test = direct_test; 3235345153Sdim __kmp_direct_destroy = direct_destroy; 3236345153Sdim __kmp_indirect_set = indirect_set; 3237345153Sdim __kmp_indirect_unset = indirect_unset; 3238345153Sdim __kmp_indirect_test = indirect_test; 3239345153Sdim __kmp_indirect_destroy = indirect_destroy; 3240345153Sdim } 3241345153Sdim // If the user locks have already been initialized, then return. Allow the 3242345153Sdim // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate 3243345153Sdim // new lock tables if they have already been allocated. 3244345153Sdim if (__kmp_init_user_locks) 3245345153Sdim return; 3246345153Sdim 3247345153Sdim // Initialize lock index table 3248345153Sdim __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; 3249345153Sdim __kmp_i_lock_table.table = 3250345153Sdim (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); 3251345153Sdim *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( 3252345153Sdim KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); 3253345153Sdim __kmp_i_lock_table.next = 0; 3254345153Sdim 3255345153Sdim // Indirect lock size 3256345153Sdim __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); 3257345153Sdim __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); 3258345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3259345153Sdim __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); 3260345153Sdim#endif 3261345153Sdim __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); 3262345153Sdim#if KMP_USE_TSX 3263345153Sdim __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); 3264345153Sdim#endif 3265345153Sdim __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); 3266345153Sdim#if KMP_USE_FUTEX 3267345153Sdim __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); 3268345153Sdim#endif 3269345153Sdim __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); 3270345153Sdim __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); 3271345153Sdim __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); 3272345153Sdim 3273345153Sdim// Initialize lock accessor/modifier 3274345153Sdim#define fill_jumps(table, expand, sep) \ 3275345153Sdim { \ 3276345153Sdim table[locktag##sep##ticket] = expand(ticket); \ 3277345153Sdim table[locktag##sep##queuing] = expand(queuing); \ 3278345153Sdim table[locktag##sep##drdpa] = expand(drdpa); \ 3279345153Sdim } 3280345153Sdim 3281345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3282345153Sdim#define fill_table(table, expand) \ 3283345153Sdim { \ 3284345153Sdim fill_jumps(table, expand, _); \ 3285345153Sdim table[locktag_adaptive] = expand(queuing); \ 3286345153Sdim fill_jumps(table, expand, _nested_); \ 3287345153Sdim } 3288345153Sdim#else 3289345153Sdim#define fill_table(table, expand) \ 3290345153Sdim { \ 3291345153Sdim fill_jumps(table, expand, _); \ 3292345153Sdim fill_jumps(table, expand, _nested_); \ 3293345153Sdim } 3294345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS 3295345153Sdim 3296345153Sdim#define expand(l) \ 3297345153Sdim (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location 3298345153Sdim fill_table(__kmp_indirect_set_location, expand); 3299345153Sdim#undef expand 3300345153Sdim#define expand(l) \ 3301345153Sdim (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags 3302345153Sdim fill_table(__kmp_indirect_set_flags, expand); 3303345153Sdim#undef expand 3304345153Sdim#define expand(l) \ 3305345153Sdim (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location 3306345153Sdim fill_table(__kmp_indirect_get_location, expand); 3307345153Sdim#undef expand 3308345153Sdim#define expand(l) \ 3309345153Sdim (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags 3310345153Sdim fill_table(__kmp_indirect_get_flags, expand); 3311345153Sdim#undef expand 3312345153Sdim 3313345153Sdim __kmp_init_user_locks = TRUE; 3314345153Sdim} 3315345153Sdim 3316345153Sdim// Clean up the lock table. 3317345153Sdimvoid __kmp_cleanup_indirect_user_locks() { 3318345153Sdim kmp_lock_index_t i; 3319345153Sdim int k; 3320345153Sdim 3321345153Sdim // Clean up locks in the pools first (they were already destroyed before going 3322345153Sdim // into the pools). 3323345153Sdim for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { 3324345153Sdim kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; 3325345153Sdim while (l != NULL) { 3326345153Sdim kmp_indirect_lock_t *ll = l; 3327345153Sdim l = (kmp_indirect_lock_t *)l->lock->pool.next; 3328345153Sdim KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", 3329345153Sdim ll)); 3330345153Sdim __kmp_free(ll->lock); 3331345153Sdim ll->lock = NULL; 3332345153Sdim } 3333345153Sdim __kmp_indirect_lock_pool[k] = NULL; 3334345153Sdim } 3335345153Sdim // Clean up the remaining undestroyed locks. 3336345153Sdim for (i = 0; i < __kmp_i_lock_table.next; i++) { 3337345153Sdim kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); 3338345153Sdim if (l->lock != NULL) { 3339345153Sdim // Locks not destroyed explicitly need to be destroyed here. 3340345153Sdim KMP_I_LOCK_FUNC(l, destroy)(l->lock); 3341345153Sdim KA_TRACE( 3342345153Sdim 20, 3343345153Sdim ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", 3344345153Sdim l)); 3345345153Sdim __kmp_free(l->lock); 3346345153Sdim } 3347345153Sdim } 3348345153Sdim // Free the table 3349345153Sdim for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) 3350345153Sdim __kmp_free(__kmp_i_lock_table.table[i]); 3351345153Sdim __kmp_free(__kmp_i_lock_table.table); 3352345153Sdim 3353345153Sdim __kmp_init_user_locks = FALSE; 3354345153Sdim} 3355345153Sdim 3356345153Sdimenum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3357345153Sdimint __kmp_num_locks_in_block = 1; // FIXME - tune this value 3358345153Sdim 3359345153Sdim#else // KMP_USE_DYNAMIC_LOCK 3360345153Sdim 3361345153Sdimstatic void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3362345153Sdim __kmp_init_tas_lock(lck); 3363345153Sdim} 3364345153Sdim 3365345153Sdimstatic void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { 3366345153Sdim __kmp_init_nested_tas_lock(lck); 3367345153Sdim} 3368345153Sdim 3369345153Sdim#if KMP_USE_FUTEX 3370345153Sdimstatic void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3371345153Sdim __kmp_init_futex_lock(lck); 3372345153Sdim} 3373345153Sdim 3374345153Sdimstatic void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { 3375345153Sdim __kmp_init_nested_futex_lock(lck); 3376345153Sdim} 3377345153Sdim#endif 3378345153Sdim 3379345153Sdimstatic int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { 3380345153Sdim return lck == lck->lk.self; 3381345153Sdim} 3382345153Sdim 3383345153Sdimstatic void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3384345153Sdim __kmp_init_ticket_lock(lck); 3385345153Sdim} 3386345153Sdim 3387345153Sdimstatic void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { 3388345153Sdim __kmp_init_nested_ticket_lock(lck); 3389345153Sdim} 3390345153Sdim 3391345153Sdimstatic int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { 3392345153Sdim return lck == lck->lk.initialized; 3393345153Sdim} 3394345153Sdim 3395345153Sdimstatic void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3396345153Sdim __kmp_init_queuing_lock(lck); 3397345153Sdim} 3398345153Sdim 3399345153Sdimstatic void 3400345153Sdim__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { 3401345153Sdim __kmp_init_nested_queuing_lock(lck); 3402345153Sdim} 3403345153Sdim 3404345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3405345153Sdimstatic void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { 3406345153Sdim __kmp_init_adaptive_lock(lck); 3407345153Sdim} 3408345153Sdim#endif 3409345153Sdim 3410345153Sdimstatic int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { 3411345153Sdim return lck == lck->lk.initialized; 3412345153Sdim} 3413345153Sdim 3414345153Sdimstatic void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3415345153Sdim __kmp_init_drdpa_lock(lck); 3416345153Sdim} 3417345153Sdim 3418345153Sdimstatic void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { 3419345153Sdim __kmp_init_nested_drdpa_lock(lck); 3420345153Sdim} 3421345153Sdim 3422345153Sdim/* user locks 3423345153Sdim * They are implemented as a table of function pointers which are set to the 3424345153Sdim * lock functions of the appropriate kind, once that has been determined. */ 3425345153Sdim 3426345153Sdimenum kmp_lock_kind __kmp_user_lock_kind = lk_default; 3427345153Sdim 3428345153Sdimsize_t __kmp_base_user_lock_size = 0; 3429345153Sdimsize_t __kmp_user_lock_size = 0; 3430345153Sdim 3431345153Sdimkmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; 3432345153Sdimint (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, 3433345153Sdim kmp_int32 gtid) = NULL; 3434345153Sdim 3435345153Sdimint (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, 3436345153Sdim kmp_int32 gtid) = NULL; 3437345153Sdimint (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, 3438345153Sdim kmp_int32 gtid) = NULL; 3439345153Sdimvoid (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3440345153Sdimvoid (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; 3441345153Sdimvoid (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3442345153Sdimint (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3443345153Sdim kmp_int32 gtid) = NULL; 3444345153Sdim 3445345153Sdimint (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3446345153Sdim kmp_int32 gtid) = NULL; 3447345153Sdimint (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, 3448345153Sdim kmp_int32 gtid) = NULL; 3449345153Sdimvoid (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3450345153Sdimvoid (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; 3451345153Sdim 3452345153Sdimint (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; 3453345153Sdimconst ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; 3454345153Sdimvoid (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, 3455345153Sdim const ident_t *loc) = NULL; 3456345153Sdimkmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; 3457345153Sdimvoid (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, 3458345153Sdim kmp_lock_flags_t flags) = NULL; 3459345153Sdim 3460345153Sdimvoid __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { 3461345153Sdim switch (user_lock_kind) { 3462345153Sdim case lk_default: 3463345153Sdim default: 3464345153Sdim KMP_ASSERT(0); 3465345153Sdim 3466345153Sdim case lk_tas: { 3467345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); 3468345153Sdim __kmp_user_lock_size = sizeof(kmp_tas_lock_t); 3469345153Sdim 3470345153Sdim __kmp_get_user_lock_owner_ = 3471345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); 3472345153Sdim 3473345153Sdim if (__kmp_env_consistency_check) { 3474345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(tas); 3475345153Sdim KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); 3476345153Sdim } else { 3477345153Sdim KMP_BIND_USER_LOCK(tas); 3478345153Sdim KMP_BIND_NESTED_USER_LOCK(tas); 3479345153Sdim } 3480345153Sdim 3481345153Sdim __kmp_destroy_user_lock_ = 3482345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); 3483345153Sdim 3484345153Sdim __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3485345153Sdim 3486345153Sdim __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3487345153Sdim 3488345153Sdim __kmp_set_user_lock_location_ = 3489345153Sdim (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3490345153Sdim 3491345153Sdim __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3492345153Sdim 3493345153Sdim __kmp_set_user_lock_flags_ = 3494345153Sdim (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3495345153Sdim } break; 3496345153Sdim 3497345153Sdim#if KMP_USE_FUTEX 3498345153Sdim 3499345153Sdim case lk_futex: { 3500345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); 3501345153Sdim __kmp_user_lock_size = sizeof(kmp_futex_lock_t); 3502345153Sdim 3503345153Sdim __kmp_get_user_lock_owner_ = 3504345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); 3505345153Sdim 3506345153Sdim if (__kmp_env_consistency_check) { 3507345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(futex); 3508345153Sdim KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); 3509345153Sdim } else { 3510345153Sdim KMP_BIND_USER_LOCK(futex); 3511345153Sdim KMP_BIND_NESTED_USER_LOCK(futex); 3512345153Sdim } 3513345153Sdim 3514345153Sdim __kmp_destroy_user_lock_ = 3515345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); 3516345153Sdim 3517345153Sdim __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; 3518345153Sdim 3519345153Sdim __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; 3520345153Sdim 3521345153Sdim __kmp_set_user_lock_location_ = 3522345153Sdim (void (*)(kmp_user_lock_p, const ident_t *))NULL; 3523345153Sdim 3524345153Sdim __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; 3525345153Sdim 3526345153Sdim __kmp_set_user_lock_flags_ = 3527345153Sdim (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; 3528345153Sdim } break; 3529345153Sdim 3530345153Sdim#endif // KMP_USE_FUTEX 3531345153Sdim 3532345153Sdim case lk_ticket: { 3533345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); 3534345153Sdim __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); 3535345153Sdim 3536345153Sdim __kmp_get_user_lock_owner_ = 3537345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); 3538345153Sdim 3539345153Sdim if (__kmp_env_consistency_check) { 3540345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); 3541345153Sdim KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); 3542345153Sdim } else { 3543345153Sdim KMP_BIND_USER_LOCK(ticket); 3544345153Sdim KMP_BIND_NESTED_USER_LOCK(ticket); 3545345153Sdim } 3546345153Sdim 3547345153Sdim __kmp_destroy_user_lock_ = 3548345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); 3549345153Sdim 3550345153Sdim __kmp_is_user_lock_initialized_ = 3551345153Sdim (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); 3552345153Sdim 3553345153Sdim __kmp_get_user_lock_location_ = 3554345153Sdim (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); 3555345153Sdim 3556345153Sdim __kmp_set_user_lock_location_ = (void (*)( 3557345153Sdim kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); 3558345153Sdim 3559345153Sdim __kmp_get_user_lock_flags_ = 3560345153Sdim (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); 3561345153Sdim 3562345153Sdim __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3563345153Sdim &__kmp_set_ticket_lock_flags); 3564345153Sdim } break; 3565345153Sdim 3566345153Sdim case lk_queuing: { 3567345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); 3568345153Sdim __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); 3569345153Sdim 3570345153Sdim __kmp_get_user_lock_owner_ = 3571345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3572345153Sdim 3573345153Sdim if (__kmp_env_consistency_check) { 3574345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); 3575345153Sdim KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); 3576345153Sdim } else { 3577345153Sdim KMP_BIND_USER_LOCK(queuing); 3578345153Sdim KMP_BIND_NESTED_USER_LOCK(queuing); 3579345153Sdim } 3580345153Sdim 3581345153Sdim __kmp_destroy_user_lock_ = 3582345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); 3583345153Sdim 3584345153Sdim __kmp_is_user_lock_initialized_ = 3585345153Sdim (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3586345153Sdim 3587345153Sdim __kmp_get_user_lock_location_ = 3588345153Sdim (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3589345153Sdim 3590345153Sdim __kmp_set_user_lock_location_ = (void (*)( 3591345153Sdim kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3592345153Sdim 3593345153Sdim __kmp_get_user_lock_flags_ = 3594345153Sdim (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3595345153Sdim 3596345153Sdim __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3597345153Sdim &__kmp_set_queuing_lock_flags); 3598345153Sdim } break; 3599345153Sdim 3600345153Sdim#if KMP_USE_ADAPTIVE_LOCKS 3601345153Sdim case lk_adaptive: { 3602345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); 3603345153Sdim __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); 3604345153Sdim 3605345153Sdim __kmp_get_user_lock_owner_ = 3606345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); 3607345153Sdim 3608345153Sdim if (__kmp_env_consistency_check) { 3609345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); 3610345153Sdim } else { 3611345153Sdim KMP_BIND_USER_LOCK(adaptive); 3612345153Sdim } 3613345153Sdim 3614345153Sdim __kmp_destroy_user_lock_ = 3615345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); 3616345153Sdim 3617345153Sdim __kmp_is_user_lock_initialized_ = 3618345153Sdim (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); 3619345153Sdim 3620345153Sdim __kmp_get_user_lock_location_ = 3621345153Sdim (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); 3622345153Sdim 3623345153Sdim __kmp_set_user_lock_location_ = (void (*)( 3624345153Sdim kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); 3625345153Sdim 3626345153Sdim __kmp_get_user_lock_flags_ = 3627345153Sdim (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); 3628345153Sdim 3629345153Sdim __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3630345153Sdim &__kmp_set_queuing_lock_flags); 3631345153Sdim 3632345153Sdim } break; 3633345153Sdim#endif // KMP_USE_ADAPTIVE_LOCKS 3634345153Sdim 3635345153Sdim case lk_drdpa: { 3636345153Sdim __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); 3637345153Sdim __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); 3638345153Sdim 3639345153Sdim __kmp_get_user_lock_owner_ = 3640345153Sdim (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); 3641345153Sdim 3642345153Sdim if (__kmp_env_consistency_check) { 3643345153Sdim KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); 3644345153Sdim KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); 3645345153Sdim } else { 3646345153Sdim KMP_BIND_USER_LOCK(drdpa); 3647345153Sdim KMP_BIND_NESTED_USER_LOCK(drdpa); 3648345153Sdim } 3649345153Sdim 3650345153Sdim __kmp_destroy_user_lock_ = 3651345153Sdim (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); 3652345153Sdim 3653345153Sdim __kmp_is_user_lock_initialized_ = 3654345153Sdim (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); 3655345153Sdim 3656345153Sdim __kmp_get_user_lock_location_ = 3657345153Sdim (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); 3658345153Sdim 3659345153Sdim __kmp_set_user_lock_location_ = (void (*)( 3660345153Sdim kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); 3661345153Sdim 3662345153Sdim __kmp_get_user_lock_flags_ = 3663345153Sdim (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); 3664345153Sdim 3665345153Sdim __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( 3666345153Sdim &__kmp_set_drdpa_lock_flags); 3667345153Sdim } break; 3668345153Sdim } 3669345153Sdim} 3670345153Sdim 3671345153Sdim// ---------------------------------------------------------------------------- 3672345153Sdim// User lock table & lock allocation 3673345153Sdim 3674345153Sdimkmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; 3675345153Sdimkmp_user_lock_p __kmp_lock_pool = NULL; 3676345153Sdim 3677345153Sdim// Lock block-allocation support. 3678345153Sdimkmp_block_of_locks *__kmp_lock_blocks = NULL; 3679345153Sdimint __kmp_num_locks_in_block = 1; // FIXME - tune this value 3680345153Sdim 3681345153Sdimstatic kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { 3682345153Sdim // Assume that kmp_global_lock is held upon entry/exit. 3683345153Sdim kmp_lock_index_t index; 3684345153Sdim if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { 3685345153Sdim kmp_lock_index_t size; 3686345153Sdim kmp_user_lock_p *table; 3687345153Sdim // Reallocate lock table. 3688345153Sdim if (__kmp_user_lock_table.allocated == 0) { 3689345153Sdim size = 1024; 3690345153Sdim } else { 3691345153Sdim size = __kmp_user_lock_table.allocated * 2; 3692345153Sdim } 3693345153Sdim table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); 3694345153Sdim KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, 3695345153Sdim sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); 3696345153Sdim table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; 3697345153Sdim // We cannot free the previous table now, since it may be in use by other 3698345153Sdim // threads. So save the pointer to the previous table in in the first 3699345153Sdim // element of the new table. All the tables will be organized into a list, 3700345153Sdim // and could be freed when library shutting down. 3701345153Sdim __kmp_user_lock_table.table = table; 3702345153Sdim __kmp_user_lock_table.allocated = size; 3703345153Sdim } 3704345153Sdim KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < 3705345153Sdim __kmp_user_lock_table.allocated); 3706345153Sdim index = __kmp_user_lock_table.used; 3707345153Sdim __kmp_user_lock_table.table[index] = lck; 3708345153Sdim ++__kmp_user_lock_table.used; 3709345153Sdim return index; 3710345153Sdim} 3711345153Sdim 3712345153Sdimstatic kmp_user_lock_p __kmp_lock_block_allocate() { 3713345153Sdim // Assume that kmp_global_lock is held upon entry/exit. 3714345153Sdim static int last_index = 0; 3715345153Sdim if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { 3716345153Sdim // Restart the index. 3717345153Sdim last_index = 0; 3718345153Sdim // Need to allocate a new block. 3719345153Sdim KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3720345153Sdim size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; 3721345153Sdim char *buffer = 3722345153Sdim (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); 3723345153Sdim // Set up the new block. 3724345153Sdim kmp_block_of_locks *new_block = 3725345153Sdim (kmp_block_of_locks *)(&buffer[space_for_locks]); 3726345153Sdim new_block->next_block = __kmp_lock_blocks; 3727345153Sdim new_block->locks = (void *)buffer; 3728345153Sdim // Publish the new block. 3729345153Sdim KMP_MB(); 3730345153Sdim __kmp_lock_blocks = new_block; 3731345153Sdim } 3732345153Sdim kmp_user_lock_p ret = (kmp_user_lock_p)(&( 3733345153Sdim ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); 3734345153Sdim last_index++; 3735345153Sdim return ret; 3736345153Sdim} 3737345153Sdim 3738345153Sdim// Get memory for a lock. It may be freshly allocated memory or reused memory 3739345153Sdim// from lock pool. 3740345153Sdimkmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, 3741345153Sdim kmp_lock_flags_t flags) { 3742345153Sdim kmp_user_lock_p lck; 3743345153Sdim kmp_lock_index_t index; 3744345153Sdim KMP_DEBUG_ASSERT(user_lock); 3745345153Sdim 3746345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 3747345153Sdim 3748345153Sdim if (__kmp_lock_pool == NULL) { 3749345153Sdim // Lock pool is empty. Allocate new memory. 3750345153Sdim 3751345153Sdim // ANNOTATION: Found no good way to express the syncronisation 3752345153Sdim // between allocation and usage, so ignore the allocation 3753345153Sdim ANNOTATE_IGNORE_WRITES_BEGIN(); 3754345153Sdim if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. 3755345153Sdim lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); 3756345153Sdim } else { 3757345153Sdim lck = __kmp_lock_block_allocate(); 3758345153Sdim } 3759345153Sdim ANNOTATE_IGNORE_WRITES_END(); 3760345153Sdim 3761345153Sdim // Insert lock in the table so that it can be freed in __kmp_cleanup, 3762345153Sdim // and debugger has info on all allocated locks. 3763345153Sdim index = __kmp_lock_table_insert(lck); 3764345153Sdim } else { 3765345153Sdim // Pick up lock from pool. 3766345153Sdim lck = __kmp_lock_pool; 3767345153Sdim index = __kmp_lock_pool->pool.index; 3768345153Sdim __kmp_lock_pool = __kmp_lock_pool->pool.next; 3769345153Sdim } 3770345153Sdim 3771345153Sdim // We could potentially differentiate between nested and regular locks 3772345153Sdim // here, and do the lock table lookup for regular locks only. 3773345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3774345153Sdim *((kmp_lock_index_t *)user_lock) = index; 3775345153Sdim } else { 3776345153Sdim *((kmp_user_lock_p *)user_lock) = lck; 3777345153Sdim } 3778345153Sdim 3779345153Sdim // mark the lock if it is critical section lock. 3780345153Sdim __kmp_set_user_lock_flags(lck, flags); 3781345153Sdim 3782345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper 3783345153Sdim 3784345153Sdim return lck; 3785345153Sdim} 3786345153Sdim 3787345153Sdim// Put lock's memory to pool for reusing. 3788345153Sdimvoid __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, 3789345153Sdim kmp_user_lock_p lck) { 3790345153Sdim KMP_DEBUG_ASSERT(user_lock != NULL); 3791345153Sdim KMP_DEBUG_ASSERT(lck != NULL); 3792345153Sdim 3793345153Sdim __kmp_acquire_lock(&__kmp_global_lock, gtid); 3794345153Sdim 3795345153Sdim lck->pool.next = __kmp_lock_pool; 3796345153Sdim __kmp_lock_pool = lck; 3797345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3798345153Sdim kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3799345153Sdim KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); 3800345153Sdim lck->pool.index = index; 3801345153Sdim } 3802345153Sdim 3803345153Sdim __kmp_release_lock(&__kmp_global_lock, gtid); 3804345153Sdim} 3805345153Sdim 3806345153Sdimkmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { 3807345153Sdim kmp_user_lock_p lck = NULL; 3808345153Sdim 3809345153Sdim if (__kmp_env_consistency_check) { 3810345153Sdim if (user_lock == NULL) { 3811345153Sdim KMP_FATAL(LockIsUninitialized, func); 3812345153Sdim } 3813345153Sdim } 3814345153Sdim 3815345153Sdim if (OMP_LOCK_T_SIZE < sizeof(void *)) { 3816345153Sdim kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); 3817345153Sdim if (__kmp_env_consistency_check) { 3818345153Sdim if (!(0 < index && index < __kmp_user_lock_table.used)) { 3819345153Sdim KMP_FATAL(LockIsUninitialized, func); 3820345153Sdim } 3821345153Sdim } 3822345153Sdim KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); 3823345153Sdim KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); 3824345153Sdim lck = __kmp_user_lock_table.table[index]; 3825345153Sdim } else { 3826345153Sdim lck = *((kmp_user_lock_p *)user_lock); 3827345153Sdim } 3828345153Sdim 3829345153Sdim if (__kmp_env_consistency_check) { 3830345153Sdim if (lck == NULL) { 3831345153Sdim KMP_FATAL(LockIsUninitialized, func); 3832345153Sdim } 3833345153Sdim } 3834345153Sdim 3835345153Sdim return lck; 3836345153Sdim} 3837345153Sdim 3838345153Sdimvoid __kmp_cleanup_user_locks(void) { 3839345153Sdim // Reset lock pool. Don't worry about lock in the pool--we will free them when 3840345153Sdim // iterating through lock table (it includes all the locks, dead or alive). 3841345153Sdim __kmp_lock_pool = NULL; 3842345153Sdim 3843345153Sdim#define IS_CRITICAL(lck) \ 3844345153Sdim ((__kmp_get_user_lock_flags_ != NULL) && \ 3845345153Sdim ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) 3846345153Sdim 3847345153Sdim // Loop through lock table, free all locks. 3848345153Sdim // Do not free item [0], it is reserved for lock tables list. 3849345153Sdim // 3850345153Sdim // FIXME - we are iterating through a list of (pointers to) objects of type 3851345153Sdim // union kmp_user_lock, but we have no way of knowing whether the base type is 3852345153Sdim // currently "pool" or whatever the global user lock type is. 3853345153Sdim // 3854345153Sdim // We are relying on the fact that for all of the user lock types 3855345153Sdim // (except "tas"), the first field in the lock struct is the "initialized" 3856345153Sdim // field, which is set to the address of the lock object itself when 3857345153Sdim // the lock is initialized. When the union is of type "pool", the 3858345153Sdim // first field is a pointer to the next object in the free list, which 3859345153Sdim // will not be the same address as the object itself. 3860345153Sdim // 3861345153Sdim // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail 3862345153Sdim // for "pool" objects on the free list. This must happen as the "location" 3863345153Sdim // field of real user locks overlaps the "index" field of "pool" objects. 3864345153Sdim // 3865345153Sdim // It would be better to run through the free list, and remove all "pool" 3866345153Sdim // objects from the lock table before executing this loop. However, 3867345153Sdim // "pool" objects do not always have their index field set (only on 3868345153Sdim // lin_32e), and I don't want to search the lock table for the address 3869345153Sdim // of every "pool" object on the free list. 3870345153Sdim while (__kmp_user_lock_table.used > 1) { 3871345153Sdim const ident *loc; 3872345153Sdim 3873345153Sdim // reduce __kmp_user_lock_table.used before freeing the lock, 3874345153Sdim // so that state of locks is consistent 3875345153Sdim kmp_user_lock_p lck = 3876345153Sdim __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; 3877345153Sdim 3878345153Sdim if ((__kmp_is_user_lock_initialized_ != NULL) && 3879345153Sdim (*__kmp_is_user_lock_initialized_)(lck)) { 3880345153Sdim // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND 3881345153Sdim // it is NOT a critical section (user is not responsible for destroying 3882345153Sdim // criticals) AND we know source location to report. 3883345153Sdim if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && 3884345153Sdim ((loc = __kmp_get_user_lock_location(lck)) != NULL) && 3885345153Sdim (loc->psource != NULL)) { 3886345153Sdim kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); 3887345153Sdim KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); 3888345153Sdim __kmp_str_loc_free(&str_loc); 3889345153Sdim } 3890345153Sdim 3891345153Sdim#ifdef KMP_DEBUG 3892345153Sdim if (IS_CRITICAL(lck)) { 3893345153Sdim KA_TRACE( 3894345153Sdim 20, 3895345153Sdim ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", 3896345153Sdim lck, *(void **)lck)); 3897345153Sdim } else { 3898345153Sdim KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, 3899345153Sdim *(void **)lck)); 3900345153Sdim } 3901345153Sdim#endif // KMP_DEBUG 3902345153Sdim 3903345153Sdim // Cleanup internal lock dynamic resources (for drdpa locks particularly). 3904345153Sdim __kmp_destroy_user_lock(lck); 3905345153Sdim } 3906345153Sdim 3907345153Sdim // Free the lock if block allocation of locks is not used. 3908345153Sdim if (__kmp_lock_blocks == NULL) { 3909345153Sdim __kmp_free(lck); 3910345153Sdim } 3911345153Sdim } 3912345153Sdim 3913345153Sdim#undef IS_CRITICAL 3914345153Sdim 3915345153Sdim // delete lock table(s). 3916345153Sdim kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; 3917345153Sdim __kmp_user_lock_table.table = NULL; 3918345153Sdim __kmp_user_lock_table.allocated = 0; 3919345153Sdim 3920345153Sdim while (table_ptr != NULL) { 3921345153Sdim // In the first element we saved the pointer to the previous 3922345153Sdim // (smaller) lock table. 3923345153Sdim kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); 3924345153Sdim __kmp_free(table_ptr); 3925345153Sdim table_ptr = next; 3926345153Sdim } 3927345153Sdim 3928345153Sdim // Free buffers allocated for blocks of locks. 3929345153Sdim kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; 3930345153Sdim __kmp_lock_blocks = NULL; 3931345153Sdim 3932345153Sdim while (block_ptr != NULL) { 3933345153Sdim kmp_block_of_locks_t *next = block_ptr->next_block; 3934345153Sdim __kmp_free(block_ptr->locks); 3935345153Sdim // *block_ptr itself was allocated at the end of the locks vector. 3936345153Sdim block_ptr = next; 3937345153Sdim } 3938345153Sdim 3939345153Sdim TCW_4(__kmp_init_user_locks, FALSE); 3940345153Sdim} 3941345153Sdim 3942345153Sdim#endif // KMP_USE_DYNAMIC_LOCK 3943