1/* $NetBSD: kern_mutex.c,v 1.112 2023/10/15 10:28:23 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2002, 2006, 2007, 2008, 2019, 2023 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33/* 34 * Kernel mutex implementation, modeled after those found in Solaris, 35 * a description of which can be found in: 36 * 37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 * Richard McDougall. 39 */ 40 41#define __MUTEX_PRIVATE 42 43#include <sys/cdefs.h> 44__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.112 2023/10/15 10:28:23 riastradh Exp $"); 45 46#include <sys/param.h> 47 48#include <sys/atomic.h> 49#include <sys/cpu.h> 50#include <sys/intr.h> 51#include <sys/kernel.h> 52#include <sys/lock.h> 53#include <sys/lockdebug.h> 54#include <sys/mutex.h> 55#include <sys/proc.h> 56#include <sys/pserialize.h> 57#include <sys/sched.h> 58#include <sys/sleepq.h> 59#include <sys/syncobj.h> 60#include <sys/systm.h> 61#include <sys/types.h> 62 63#include <dev/lockstat.h> 64 65#include <machine/lock.h> 66 67/* 68 * When not running a debug kernel, spin mutexes are not much 69 * more than an splraiseipl() and splx() pair. 70 */ 71 72#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 73#define FULL 74#endif 75 76/* 77 * Debugging support. 78 */ 79 80#define MUTEX_WANTLOCK(mtx) \ 81 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \ 82 (uintptr_t)__builtin_return_address(0), 0) 83#define MUTEX_TESTLOCK(mtx) \ 84 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \ 85 (uintptr_t)__builtin_return_address(0), -1) 86#define MUTEX_LOCKED(mtx) \ 87 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \ 88 (uintptr_t)__builtin_return_address(0), 0) 89#define MUTEX_UNLOCKED(mtx) \ 90 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \ 91 (uintptr_t)__builtin_return_address(0), 0) 92#define MUTEX_ABORT(mtx, msg) \ 93 mutex_abort(__func__, __LINE__, mtx, msg) 94 95#if defined(LOCKDEBUG) 96 97#define MUTEX_DASSERT(mtx, cond) \ 98do { \ 99 if (__predict_false(!(cond))) \ 100 MUTEX_ABORT(mtx, "assertion failed: " #cond); \ 101} while (/* CONSTCOND */ 0) 102 103#else /* LOCKDEBUG */ 104 105#define MUTEX_DASSERT(mtx, cond) /* nothing */ 106 107#endif /* LOCKDEBUG */ 108 109#if defined(DIAGNOSTIC) 110 111#define MUTEX_ASSERT(mtx, cond) \ 112do { \ 113 if (__predict_false(!(cond))) \ 114 MUTEX_ABORT(mtx, "assertion failed: " #cond); \ 115} while (/* CONSTCOND */ 0) 116 117#else /* DIAGNOSTIC */ 118 119#define MUTEX_ASSERT(mtx, cond) /* nothing */ 120 121#endif /* DIAGNOSTIC */ 122 123/* 124 * Some architectures can't use __cpu_simple_lock as is so allow a way 125 * for them to use an alternate definition. 126 */ 127#ifndef MUTEX_SPINBIT_LOCK_INIT 128#define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock) 129#endif 130#ifndef MUTEX_SPINBIT_LOCKED_P 131#define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock) 132#endif 133#ifndef MUTEX_SPINBIT_LOCK_TRY 134#define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock) 135#endif 136#ifndef MUTEX_SPINBIT_LOCK_UNLOCK 137#define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock) 138#endif 139 140#ifndef MUTEX_INITIALIZE_SPIN_IPL 141#define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \ 142 ((mtx)->mtx_ipl = makeiplcookie((ipl))) 143#endif 144 145/* 146 * Spin mutex SPL save / restore. 147 */ 148 149#define MUTEX_SPIN_SPLRAISE(mtx) \ 150do { \ 151 const int s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \ 152 struct cpu_info * const x__ci = curcpu(); \ 153 const int x__cnt = x__ci->ci_mtx_count--; \ 154 __insn_barrier(); \ 155 if (x__cnt == 0) \ 156 x__ci->ci_mtx_oldspl = s; \ 157} while (/* CONSTCOND */ 0) 158 159#define MUTEX_SPIN_SPLRESTORE(mtx) \ 160do { \ 161 struct cpu_info * const x__ci = curcpu(); \ 162 const int s = x__ci->ci_mtx_oldspl; \ 163 __insn_barrier(); \ 164 if (++(x__ci->ci_mtx_count) == 0) \ 165 splx(s); \ 166} while (/* CONSTCOND */ 0) 167 168/* 169 * Memory barriers. 170 */ 171#ifdef __HAVE_ATOMIC_AS_MEMBAR 172#define MUTEX_MEMBAR_ENTER() 173#else 174#define MUTEX_MEMBAR_ENTER() membar_enter() 175#endif 176 177/* 178 * For architectures that provide 'simple' mutexes: they provide a 179 * CAS function that is either MP-safe, or does not need to be MP 180 * safe. Adaptive mutexes on these architectures do not require an 181 * additional interlock. 182 */ 183 184#ifdef __HAVE_SIMPLE_MUTEXES 185 186#define MUTEX_OWNER(owner) \ 187 (owner & MUTEX_THREAD) 188#define MUTEX_HAS_WAITERS(mtx) \ 189 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0) 190 191#define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \ 192do { \ 193 if (!dodebug) \ 194 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \ 195} while (/* CONSTCOND */ 0) 196 197#define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \ 198do { \ 199 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \ 200 if (!dodebug) \ 201 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \ 202 MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \ 203 MUTEX_SPINBIT_LOCK_INIT((mtx)); \ 204} while (/* CONSTCOND */ 0) 205 206#define MUTEX_DESTROY(mtx) \ 207do { \ 208 (mtx)->mtx_owner = MUTEX_THREAD; \ 209} while (/* CONSTCOND */ 0) 210 211#define MUTEX_SPIN_P(owner) \ 212 (((owner) & MUTEX_BIT_SPIN) != 0) 213#define MUTEX_ADAPTIVE_P(owner) \ 214 (((owner) & MUTEX_BIT_SPIN) == 0) 215 216#ifndef MUTEX_CAS 217#define MUTEX_CAS(p, o, n) \ 218 (atomic_cas_ulong((volatile unsigned long *)(p), (o), (n)) == (o)) 219#endif /* MUTEX_CAS */ 220 221#define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0) 222#if defined(LOCKDEBUG) 223#define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0) 224#define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG 225#else /* defined(LOCKDEBUG) */ 226#define MUTEX_OWNED(owner) ((owner) != 0) 227#define MUTEX_INHERITDEBUG(n, o) /* nothing */ 228#endif /* defined(LOCKDEBUG) */ 229 230static inline int 231MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread) 232{ 233 int rv; 234 uintptr_t oldown = 0; 235 uintptr_t newown = curthread; 236 237 MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner); 238 MUTEX_INHERITDEBUG(newown, oldown); 239 rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown); 240 membar_acquire(); 241 return rv; 242} 243 244static inline int 245MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner) 246{ 247 int rv; 248 249 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS); 250 MUTEX_MEMBAR_ENTER(); 251 return rv; 252} 253 254static inline void 255MUTEX_RELEASE(kmutex_t *mtx) 256{ 257 uintptr_t newown; 258 259 newown = 0; 260 MUTEX_INHERITDEBUG(newown, mtx->mtx_owner); 261 atomic_store_release(&mtx->mtx_owner, newown); 262} 263#endif /* __HAVE_SIMPLE_MUTEXES */ 264 265/* 266 * Patch in stubs via strong alias where they are not available. 267 */ 268 269#if defined(LOCKDEBUG) 270#undef __HAVE_MUTEX_STUBS 271#undef __HAVE_SPIN_MUTEX_STUBS 272#endif 273 274#ifndef __HAVE_MUTEX_STUBS 275__strong_alias(mutex_enter,mutex_vector_enter); 276__strong_alias(mutex_exit,mutex_vector_exit); 277#endif 278 279#ifndef __HAVE_SPIN_MUTEX_STUBS 280__strong_alias(mutex_spin_enter,mutex_vector_enter); 281__strong_alias(mutex_spin_exit,mutex_vector_exit); 282#endif 283 284static void mutex_abort(const char *, size_t, volatile const kmutex_t *, 285 const char *); 286static void mutex_dump(const volatile void *, lockop_printer_t); 287static lwp_t *mutex_owner(wchan_t); 288 289lockops_t mutex_spin_lockops = { 290 .lo_name = "Mutex", 291 .lo_type = LOCKOPS_SPIN, 292 .lo_dump = mutex_dump, 293}; 294 295lockops_t mutex_adaptive_lockops = { 296 .lo_name = "Mutex", 297 .lo_type = LOCKOPS_SLEEP, 298 .lo_dump = mutex_dump, 299}; 300 301syncobj_t mutex_syncobj = { 302 .sobj_name = "mutex", 303 .sobj_flag = SOBJ_SLEEPQ_SORTED, 304 .sobj_boostpri = PRI_KERNEL, 305 .sobj_unsleep = turnstile_unsleep, 306 .sobj_changepri = turnstile_changepri, 307 .sobj_lendpri = sleepq_lendpri, 308 .sobj_owner = mutex_owner, 309}; 310 311/* 312 * mutex_dump: 313 * 314 * Dump the contents of a mutex structure. 315 */ 316static void 317mutex_dump(const volatile void *cookie, lockop_printer_t pr) 318{ 319 const volatile kmutex_t *mtx = cookie; 320 uintptr_t owner = mtx->mtx_owner; 321 322 pr("owner field : %#018lx wait/spin: %16d/%d\n", 323 (long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx), 324 MUTEX_SPIN_P(owner)); 325} 326 327/* 328 * mutex_abort: 329 * 330 * Dump information about an error and panic the system. This 331 * generates a lot of machine code in the DIAGNOSTIC case, so 332 * we ask the compiler to not inline it. 333 */ 334static void __noinline 335mutex_abort(const char *func, size_t line, volatile const kmutex_t *mtx, 336 const char *msg) 337{ 338 339 LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx->mtx_owner) ? 340 &mutex_spin_lockops : &mutex_adaptive_lockops), msg); 341} 342 343/* 344 * mutex_init: 345 * 346 * Initialize a mutex for use. Note that adaptive mutexes are in 347 * essence spin mutexes that can sleep to avoid deadlock and wasting 348 * CPU time. We can't easily provide a type of mutex that always 349 * sleeps - see comments in mutex_vector_enter() about releasing 350 * mutexes unlocked. 351 */ 352void 353_mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl, 354 uintptr_t return_address) 355{ 356 lockops_t *lockops __unused; 357 bool dodebug; 358 359 memset(mtx, 0, sizeof(*mtx)); 360 361 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK || 362 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET || 363 ipl == IPL_SOFTSERIAL) { 364 lockops = (type == MUTEX_NODEBUG ? 365 NULL : &mutex_adaptive_lockops); 366 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address); 367 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug); 368 } else { 369 lockops = (type == MUTEX_NODEBUG ? 370 NULL : &mutex_spin_lockops); 371 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address); 372 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl); 373 } 374} 375 376void 377mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl) 378{ 379 380 _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0)); 381} 382 383/* 384 * mutex_destroy: 385 * 386 * Tear down a mutex. 387 */ 388void 389mutex_destroy(kmutex_t *mtx) 390{ 391 uintptr_t owner = mtx->mtx_owner; 392 393 if (MUTEX_ADAPTIVE_P(owner)) { 394 MUTEX_ASSERT(mtx, !MUTEX_OWNED(owner)); 395 MUTEX_ASSERT(mtx, !MUTEX_HAS_WAITERS(mtx)); 396 } else { 397 MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx)); 398 } 399 400 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx); 401 MUTEX_DESTROY(mtx); 402} 403 404#ifdef MULTIPROCESSOR 405/* 406 * mutex_oncpu: 407 * 408 * Return true if an adaptive mutex owner is running on a CPU in the 409 * system. If the target is waiting on the kernel big lock, then we 410 * must release it. This is necessary to avoid deadlock. 411 */ 412static bool 413mutex_oncpu(uintptr_t owner) 414{ 415 struct cpu_info *ci; 416 lwp_t *l; 417 418 KASSERT(kpreempt_disabled()); 419 420 if (!MUTEX_OWNED(owner)) { 421 return false; 422 } 423 424 /* 425 * See lwp_dtor() why dereference of the LWP pointer is safe. 426 * We must have kernel preemption disabled for that. 427 */ 428 l = (lwp_t *)MUTEX_OWNER(owner); 429 ci = l->l_cpu; 430 431 if (ci && ci->ci_curlwp == l) { 432 /* Target is running; do we need to block? */ 433 return (atomic_load_relaxed(&ci->ci_biglock_wanted) != l); 434 } 435 436 /* Not running. It may be safe to block now. */ 437 return false; 438} 439#endif /* MULTIPROCESSOR */ 440 441/* 442 * mutex_vector_enter: 443 * 444 * Support routine for mutex_enter() that must handle all cases. In 445 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if 446 * fast-path stubs are available. If a mutex_spin_enter() stub is 447 * not available, then it is also aliased directly here. 448 */ 449void 450mutex_vector_enter(kmutex_t *mtx) 451{ 452 uintptr_t owner, curthread; 453 turnstile_t *ts; 454#ifdef MULTIPROCESSOR 455 u_int count; 456#endif 457 LOCKSTAT_COUNTER(spincnt); 458 LOCKSTAT_COUNTER(slpcnt); 459 LOCKSTAT_TIMER(spintime); 460 LOCKSTAT_TIMER(slptime); 461 LOCKSTAT_FLAG(lsflag); 462 463 /* 464 * Handle spin mutexes. 465 */ 466 KPREEMPT_DISABLE(curlwp); 467 owner = mtx->mtx_owner; 468 if (MUTEX_SPIN_P(owner)) { 469#if defined(LOCKDEBUG) && defined(MULTIPROCESSOR) 470 u_int spins = 0; 471#endif 472 KPREEMPT_ENABLE(curlwp); 473 MUTEX_SPIN_SPLRAISE(mtx); 474 MUTEX_WANTLOCK(mtx); 475#ifdef FULL 476 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) { 477 MUTEX_LOCKED(mtx); 478 return; 479 } 480#if !defined(MULTIPROCESSOR) 481 MUTEX_ABORT(mtx, "locking against myself"); 482#else /* !MULTIPROCESSOR */ 483 484 LOCKSTAT_ENTER(lsflag); 485 LOCKSTAT_START_TIMER(lsflag, spintime); 486 count = SPINLOCK_BACKOFF_MIN; 487 488 /* 489 * Spin testing the lock word and do exponential backoff 490 * to reduce cache line ping-ponging between CPUs. 491 */ 492 do { 493 while (MUTEX_SPINBIT_LOCKED_P(mtx)) { 494 SPINLOCK_SPIN_HOOK; 495 SPINLOCK_BACKOFF(count); 496#ifdef LOCKDEBUG 497 if (SPINLOCK_SPINOUT(spins)) 498 MUTEX_ABORT(mtx, "spinout"); 499#endif /* LOCKDEBUG */ 500 } 501 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx)); 502 503 if (count != SPINLOCK_BACKOFF_MIN) { 504 LOCKSTAT_STOP_TIMER(lsflag, spintime); 505 LOCKSTAT_EVENT(lsflag, mtx, 506 LB_SPIN_MUTEX | LB_SPIN, 1, spintime); 507 } 508 LOCKSTAT_EXIT(lsflag); 509#endif /* !MULTIPROCESSOR */ 510#endif /* FULL */ 511 MUTEX_LOCKED(mtx); 512 return; 513 } 514 515 curthread = (uintptr_t)curlwp; 516 517 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(owner)); 518 MUTEX_ASSERT(mtx, curthread != 0); 519 MUTEX_ASSERT(mtx, !cpu_intr_p()); 520 MUTEX_WANTLOCK(mtx); 521 522 if (__predict_true(panicstr == NULL)) { 523 KDASSERT(pserialize_not_in_read_section()); 524 LOCKDEBUG_BARRIER(&kernel_lock, 1); 525 } 526 527 LOCKSTAT_ENTER(lsflag); 528 529 /* 530 * Adaptive mutex; spin trying to acquire the mutex. If we 531 * determine that the owner is not running on a processor, 532 * then we stop spinning, and sleep instead. 533 */ 534 for (;;) { 535 if (!MUTEX_OWNED(owner)) { 536 /* 537 * Mutex owner clear could mean two things: 538 * 539 * * The mutex has been released. 540 * * The owner field hasn't been set yet. 541 * 542 * Try to acquire it again. If that fails, 543 * we'll just loop again. 544 */ 545 if (MUTEX_ACQUIRE(mtx, curthread)) 546 break; 547 owner = mtx->mtx_owner; 548 continue; 549 } 550 if (__predict_false(MUTEX_OWNER(owner) == curthread)) { 551 MUTEX_ABORT(mtx, "locking against myself"); 552 } 553#ifdef MULTIPROCESSOR 554 /* 555 * Check to see if the owner is running on a processor. 556 * If so, then we should just spin, as the owner will 557 * likely release the lock very soon. 558 */ 559 if (mutex_oncpu(owner)) { 560 LOCKSTAT_START_TIMER(lsflag, spintime); 561 count = SPINLOCK_BACKOFF_MIN; 562 do { 563 KPREEMPT_ENABLE(curlwp); 564 SPINLOCK_BACKOFF(count); 565 KPREEMPT_DISABLE(curlwp); 566 owner = mtx->mtx_owner; 567 } while (mutex_oncpu(owner)); 568 LOCKSTAT_STOP_TIMER(lsflag, spintime); 569 LOCKSTAT_COUNT(spincnt, 1); 570 if (!MUTEX_OWNED(owner)) 571 continue; 572 } 573#endif 574 575 ts = turnstile_lookup(mtx); 576 577 /* 578 * Once we have the turnstile chain interlock, mark the 579 * mutex as having waiters. If that fails, spin again: 580 * chances are that the mutex has been released. 581 */ 582 if (!MUTEX_SET_WAITERS(mtx, owner)) { 583 turnstile_exit(mtx); 584 owner = mtx->mtx_owner; 585 continue; 586 } 587 588#ifdef MULTIPROCESSOR 589 /* 590 * mutex_exit() is permitted to release the mutex without 591 * any interlocking instructions, and the following can 592 * occur as a result: 593 * 594 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit() 595 * ---------------------------- ---------------------------- 596 * .. load mtx->mtx_owner 597 * .. see has-waiters bit clear 598 * set has-waiters bit .. 599 * .. store mtx->mtx_owner := 0 600 * return success 601 * 602 * There is another race that can occur: a third CPU could 603 * acquire the mutex as soon as it is released. Since 604 * adaptive mutexes are primarily spin mutexes, this is not 605 * something that we need to worry about too much. What we 606 * do need to ensure is that the waiters bit gets set. 607 * 608 * To allow the unlocked release, we need to make some 609 * assumptions here: 610 * 611 * o Release is the only non-atomic/unlocked operation 612 * that can be performed on the mutex. (It must still 613 * be atomic on the local CPU, e.g. in case interrupted 614 * or preempted). 615 * 616 * o At any given time on each mutex, MUTEX_SET_WAITERS() 617 * can only ever be in progress on one CPU in the 618 * system - guaranteed by the turnstile chain lock. 619 * 620 * o No other operations other than MUTEX_SET_WAITERS() 621 * and release can modify a mutex with a non-zero 622 * owner field. 623 * 624 * o If the holding LWP switches away, it posts a store 625 * fence before changing curlwp, ensuring that any 626 * overwrite of the mutex waiters flag by mutex_exit() 627 * completes before the modification of curlwp becomes 628 * visible to this CPU. 629 * 630 * o cpu_switchto() posts a store fence after setting curlwp 631 * and before resuming execution of an LWP. 632 * 633 * o _kernel_lock() posts a store fence before setting 634 * curcpu()->ci_biglock_wanted, and after clearing it. 635 * This ensures that any overwrite of the mutex waiters 636 * flag by mutex_exit() completes before the modification 637 * of ci_biglock_wanted becomes visible. 638 * 639 * After MUTEX_SET_WAITERS() succeeds, simultaneously 640 * confirming that the same LWP still holds the mutex 641 * since we took the turnstile lock and notifying it that 642 * we're waiting, we check the lock holder's status again. 643 * Some of the possible outcomes (not an exhaustive list; 644 * XXX this should be made exhaustive): 645 * 646 * 1. The on-CPU check returns true: the holding LWP is 647 * running again. The lock may be released soon and 648 * we should spin. Importantly, we can't trust the 649 * value of the waiters flag. 650 * 651 * 2. The on-CPU check returns false: the holding LWP is 652 * not running. We now have the opportunity to check 653 * if mutex_exit() has blatted the modifications made 654 * by MUTEX_SET_WAITERS(). 655 * 656 * 3. The on-CPU check returns false: the holding LWP may 657 * or may not be running. It has context switched at 658 * some point during our check. Again, we have the 659 * chance to see if the waiters bit is still set or 660 * has been overwritten. 661 * 662 * 4. The on-CPU check returns false: the holding LWP is 663 * running on a CPU, but wants the big lock. It's OK 664 * to check the waiters field in this case. 665 * 666 * 5. The has-waiters check fails: the mutex has been 667 * released, the waiters flag cleared and another LWP 668 * now owns the mutex. 669 * 670 * 6. The has-waiters check fails: the mutex has been 671 * released. 672 * 673 * If the waiters bit is not set it's unsafe to go asleep, 674 * as we might never be awoken. 675 */ 676 if (mutex_oncpu(owner)) { 677 turnstile_exit(mtx); 678 owner = mtx->mtx_owner; 679 continue; 680 } 681 membar_consumer(); 682 if (!MUTEX_HAS_WAITERS(mtx)) { 683 turnstile_exit(mtx); 684 owner = mtx->mtx_owner; 685 continue; 686 } 687#endif /* MULTIPROCESSOR */ 688 689 LOCKSTAT_START_TIMER(lsflag, slptime); 690 691 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj); 692 693 LOCKSTAT_STOP_TIMER(lsflag, slptime); 694 LOCKSTAT_COUNT(slpcnt, 1); 695 696 owner = mtx->mtx_owner; 697 } 698 KPREEMPT_ENABLE(curlwp); 699 700 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1, 701 slpcnt, slptime); 702 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN, 703 spincnt, spintime); 704 LOCKSTAT_EXIT(lsflag); 705 706 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread); 707 MUTEX_LOCKED(mtx); 708} 709 710/* 711 * mutex_vector_exit: 712 * 713 * Support routine for mutex_exit() that handles all cases. 714 */ 715void 716mutex_vector_exit(kmutex_t *mtx) 717{ 718 turnstile_t *ts; 719 uintptr_t curthread; 720 721 if (MUTEX_SPIN_P(mtx->mtx_owner)) { 722#ifdef FULL 723 if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) { 724 MUTEX_ABORT(mtx, "exiting unheld spin mutex"); 725 } 726 MUTEX_UNLOCKED(mtx); 727 MUTEX_SPINBIT_LOCK_UNLOCK(mtx); 728#endif 729 MUTEX_SPIN_SPLRESTORE(mtx); 730 return; 731 } 732 733#ifndef __HAVE_MUTEX_STUBS 734 /* 735 * On some architectures without mutex stubs, we can enter here to 736 * release mutexes before interrupts and whatnot are up and running. 737 * We need this hack to keep them sweet. 738 */ 739 if (__predict_false(cold)) { 740 MUTEX_UNLOCKED(mtx); 741 MUTEX_RELEASE(mtx); 742 return; 743 } 744#endif 745 746 curthread = (uintptr_t)curlwp; 747 MUTEX_DASSERT(mtx, curthread != 0); 748 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread); 749 MUTEX_UNLOCKED(mtx); 750#if !defined(LOCKDEBUG) 751 __USE(curthread); 752#endif 753 754#ifdef LOCKDEBUG 755 /* 756 * Avoid having to take the turnstile chain lock every time 757 * around. Raise the priority level to splhigh() in order 758 * to disable preemption and so make the following atomic. 759 * This also blocks out soft interrupts that could set the 760 * waiters bit. 761 */ 762 { 763 int s = splhigh(); 764 if (!MUTEX_HAS_WAITERS(mtx)) { 765 MUTEX_RELEASE(mtx); 766 splx(s); 767 return; 768 } 769 splx(s); 770 } 771#endif 772 773 /* 774 * Get this lock's turnstile. This gets the interlock on 775 * the sleep queue. Once we have that, we can clear the 776 * lock. If there was no turnstile for the lock, there 777 * were no waiters remaining. 778 */ 779 ts = turnstile_lookup(mtx); 780 781 if (ts == NULL) { 782 MUTEX_RELEASE(mtx); 783 turnstile_exit(mtx); 784 } else { 785 MUTEX_RELEASE(mtx); 786 turnstile_wakeup(ts, TS_WRITER_Q, 787 TS_WAITERS(ts, TS_WRITER_Q), NULL); 788 } 789} 790 791#ifndef __HAVE_SIMPLE_MUTEXES 792/* 793 * mutex_wakeup: 794 * 795 * Support routine for mutex_exit() that wakes up all waiters. 796 * We assume that the mutex has been released, but it need not 797 * be. 798 */ 799void 800mutex_wakeup(kmutex_t *mtx) 801{ 802 turnstile_t *ts; 803 804 ts = turnstile_lookup(mtx); 805 if (ts == NULL) { 806 turnstile_exit(mtx); 807 return; 808 } 809 MUTEX_CLEAR_WAITERS(mtx); 810 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL); 811} 812#endif /* !__HAVE_SIMPLE_MUTEXES */ 813 814/* 815 * mutex_owned: 816 * 817 * Return true if the current LWP (adaptive) or CPU (spin) 818 * holds the mutex. 819 */ 820int 821mutex_owned(const kmutex_t *mtx) 822{ 823 824 if (mtx == NULL) 825 return 0; 826 if (MUTEX_ADAPTIVE_P(mtx->mtx_owner)) 827 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp; 828#ifdef FULL 829 return MUTEX_SPINBIT_LOCKED_P(mtx); 830#else 831 return 1; 832#endif 833} 834 835/* 836 * mutex_owner: 837 * 838 * Return the current owner of an adaptive mutex. Used for 839 * priority inheritance. 840 */ 841static lwp_t * 842mutex_owner(wchan_t wchan) 843{ 844 volatile const kmutex_t *mtx = wchan; 845 846 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner)); 847 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner); 848} 849 850/* 851 * mutex_ownable: 852 * 853 * When compiled with DEBUG and LOCKDEBUG defined, ensure that 854 * the mutex is available. We cannot use !mutex_owned() since 855 * that won't work correctly for spin mutexes. 856 */ 857int 858mutex_ownable(const kmutex_t *mtx) 859{ 860 861#ifdef LOCKDEBUG 862 MUTEX_TESTLOCK(mtx); 863#endif 864 return 1; 865} 866 867/* 868 * mutex_tryenter: 869 * 870 * Try to acquire the mutex; return non-zero if we did. 871 */ 872int 873mutex_tryenter(kmutex_t *mtx) 874{ 875 uintptr_t curthread; 876 877 /* 878 * Handle spin mutexes. 879 */ 880 if (MUTEX_SPIN_P(mtx->mtx_owner)) { 881 MUTEX_SPIN_SPLRAISE(mtx); 882#ifdef FULL 883 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) { 884 MUTEX_WANTLOCK(mtx); 885 MUTEX_LOCKED(mtx); 886 return 1; 887 } 888 MUTEX_SPIN_SPLRESTORE(mtx); 889#else 890 MUTEX_WANTLOCK(mtx); 891 MUTEX_LOCKED(mtx); 892 return 1; 893#endif 894 } else { 895 curthread = (uintptr_t)curlwp; 896 MUTEX_ASSERT(mtx, curthread != 0); 897 if (MUTEX_ACQUIRE(mtx, curthread)) { 898 MUTEX_WANTLOCK(mtx); 899 MUTEX_LOCKED(mtx); 900 MUTEX_DASSERT(mtx, 901 MUTEX_OWNER(mtx->mtx_owner) == curthread); 902 return 1; 903 } 904 } 905 906 return 0; 907} 908 909#if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) 910/* 911 * mutex_spin_retry: 912 * 913 * Support routine for mutex_spin_enter(). Assumes that the caller 914 * has already raised the SPL, and adjusted counters. 915 */ 916void 917mutex_spin_retry(kmutex_t *mtx) 918{ 919#ifdef MULTIPROCESSOR 920 u_int count; 921 LOCKSTAT_TIMER(spintime); 922 LOCKSTAT_FLAG(lsflag); 923#ifdef LOCKDEBUG 924 u_int spins = 0; 925#endif /* LOCKDEBUG */ 926 927 MUTEX_WANTLOCK(mtx); 928 929 LOCKSTAT_ENTER(lsflag); 930 LOCKSTAT_START_TIMER(lsflag, spintime); 931 count = SPINLOCK_BACKOFF_MIN; 932 933 /* 934 * Spin testing the lock word and do exponential backoff 935 * to reduce cache line ping-ponging between CPUs. 936 */ 937 do { 938 while (MUTEX_SPINBIT_LOCKED_P(mtx)) { 939 SPINLOCK_BACKOFF(count); 940#ifdef LOCKDEBUG 941 if (SPINLOCK_SPINOUT(spins)) 942 MUTEX_ABORT(mtx, "spinout"); 943#endif /* LOCKDEBUG */ 944 } 945 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx)); 946 947 LOCKSTAT_STOP_TIMER(lsflag, spintime); 948 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime); 949 LOCKSTAT_EXIT(lsflag); 950 951 MUTEX_LOCKED(mtx); 952#else /* MULTIPROCESSOR */ 953 MUTEX_ABORT(mtx, "locking against myself"); 954#endif /* MULTIPROCESSOR */ 955} 956#endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */ 957