kern_rwlock.c revision 193037
1219820Sjeff/*- 2219820Sjeff * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3219820Sjeff * All rights reserved. 4219820Sjeff * 5219820Sjeff * Redistribution and use in source and binary forms, with or without 6219820Sjeff * modification, are permitted provided that the following conditions 7219820Sjeff * are met: 8219820Sjeff * 1. Redistributions of source code must retain the above copyright 9219820Sjeff * notice, this list of conditions and the following disclaimer. 10219820Sjeff * 2. Redistributions in binary form must reproduce the above copyright 11219820Sjeff * notice, this list of conditions and the following disclaimer in the 12219820Sjeff * documentation and/or other materials provided with the distribution. 13219820Sjeff * 3. Neither the name of the author nor the names of any co-contributors 14219820Sjeff * may be used to endorse or promote products derived from this software 15219820Sjeff * without specific prior written permission. 16219820Sjeff * 17219820Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219820Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219820Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219820Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219820Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219820Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219820Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219820Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219820Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219820Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219820Sjeff * SUCH DAMAGE. 28219820Sjeff */ 29219820Sjeff 30219820Sjeff/* 31219820Sjeff * Machine independent bits of reader/writer lock implementation. 32219820Sjeff */ 33219820Sjeff 34219820Sjeff#include <sys/cdefs.h> 35219820Sjeff__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 193037 2009-05-29 14:03:34Z jhb $"); 36219820Sjeff 37219820Sjeff#include "opt_ddb.h" 38219820Sjeff#include "opt_kdtrace.h" 39219820Sjeff#include "opt_no_adaptive_rwlocks.h" 40219820Sjeff 41219820Sjeff#include <sys/param.h> 42219820Sjeff#include <sys/ktr.h> 43219820Sjeff#include <sys/kernel.h> 44219820Sjeff#include <sys/lock.h> 45219820Sjeff#include <sys/mutex.h> 46219820Sjeff#include <sys/proc.h> 47219820Sjeff#include <sys/rwlock.h> 48219820Sjeff#include <sys/sysctl.h> 49219820Sjeff#include <sys/systm.h> 50219820Sjeff#include <sys/turnstile.h> 51219820Sjeff 52219820Sjeff#include <machine/cpu.h> 53219820Sjeff 54219820SjeffCTASSERT((RW_RECURSE & LO_CLASSFLAGS) == RW_RECURSE); 55219820Sjeff 56219820Sjeff#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 57219820Sjeff#define ADAPTIVE_RWLOCKS 58219820Sjeff#endif 59219820Sjeff 60219820Sjeff#ifdef ADAPTIVE_RWLOCKS 61219820Sjeffstatic int rowner_retries = 10; 62219820Sjeffstatic int rowner_loops = 10000; 63219820SjeffSYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging"); 64219820SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); 65219820SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); 66219820Sjeff#endif 67219820Sjeff 68219820Sjeff#ifdef DDB 69219820Sjeff#include <ddb/ddb.h> 70219820Sjeff 71219820Sjeffstatic void db_show_rwlock(struct lock_object *lock); 72219820Sjeff#endif 73219820Sjeffstatic void assert_rw(struct lock_object *lock, int what); 74219820Sjeffstatic void lock_rw(struct lock_object *lock, int how); 75219820Sjeff#ifdef KDTRACE_HOOKS 76219820Sjeffstatic int owner_rw(struct lock_object *lock, struct thread **owner); 77219820Sjeff#endif 78219820Sjeffstatic int unlock_rw(struct lock_object *lock); 79219820Sjeff 80219820Sjeffstruct lock_class lock_class_rw = { 81219820Sjeff .lc_name = "rw", 82219820Sjeff .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 83219820Sjeff .lc_assert = assert_rw, 84219820Sjeff#ifdef DDB 85219820Sjeff .lc_ddb_show = db_show_rwlock, 86219820Sjeff#endif 87219820Sjeff .lc_lock = lock_rw, 88219820Sjeff .lc_unlock = unlock_rw, 89219820Sjeff#ifdef KDTRACE_HOOKS 90219820Sjeff .lc_owner = owner_rw, 91219820Sjeff#endif 92219820Sjeff}; 93219820Sjeff 94219820Sjeff/* 95219820Sjeff * Return a pointer to the owning thread if the lock is write-locked or 96219820Sjeff * NULL if the lock is unlocked or read-locked. 97219820Sjeff */ 98219820Sjeff#define rw_wowner(rw) \ 99219820Sjeff ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 100219820Sjeff (struct thread *)RW_OWNER((rw)->rw_lock)) 101219820Sjeff 102219820Sjeff/* 103219820Sjeff * Returns if a write owner is recursed. Write ownership is not assured 104219820Sjeff * here and should be previously checked. 105219820Sjeff */ 106219820Sjeff#define rw_recursed(rw) ((rw)->rw_recurse != 0) 107219820Sjeff 108219820Sjeff/* 109219820Sjeff * Return true if curthread helds the lock. 110219820Sjeff */ 111219820Sjeff#define rw_wlocked(rw) (rw_wowner((rw)) == curthread) 112219820Sjeff 113219820Sjeff/* 114219820Sjeff * Return a pointer to the owning thread for this lock who should receive 115219820Sjeff * any priority lent by threads that block on this lock. Currently this 116219820Sjeff * is identical to rw_wowner(). 117219820Sjeff */ 118219820Sjeff#define rw_owner(rw) rw_wowner(rw) 119219820Sjeff 120219820Sjeff#ifndef INVARIANTS 121219820Sjeff#define _rw_assert(rw, what, file, line) 122219820Sjeff#endif 123219820Sjeff 124219820Sjeffvoid 125219820Sjeffassert_rw(struct lock_object *lock, int what) 126219820Sjeff{ 127219820Sjeff 128219820Sjeff rw_assert((struct rwlock *)lock, what); 129219820Sjeff} 130219820Sjeff 131219820Sjeffvoid 132219820Sjefflock_rw(struct lock_object *lock, int how) 133219820Sjeff{ 134219820Sjeff struct rwlock *rw; 135219820Sjeff 136219820Sjeff rw = (struct rwlock *)lock; 137219820Sjeff if (how) 138219820Sjeff rw_wlock(rw); 139219820Sjeff else 140219820Sjeff rw_rlock(rw); 141219820Sjeff} 142219820Sjeff 143219820Sjeffint 144219820Sjeffunlock_rw(struct lock_object *lock) 145219820Sjeff{ 146219820Sjeff struct rwlock *rw; 147219820Sjeff 148219820Sjeff rw = (struct rwlock *)lock; 149219820Sjeff rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 150219820Sjeff if (rw->rw_lock & RW_LOCK_READ) { 151219820Sjeff rw_runlock(rw); 152219820Sjeff return (0); 153219820Sjeff } else { 154219820Sjeff rw_wunlock(rw); 155219820Sjeff return (1); 156219820Sjeff } 157219820Sjeff} 158219820Sjeff 159219820Sjeff#ifdef KDTRACE_HOOKS 160219820Sjeffint 161219820Sjeffowner_rw(struct lock_object *lock, struct thread **owner) 162219820Sjeff{ 163219820Sjeff struct rwlock *rw = (struct rwlock *)lock; 164219820Sjeff uintptr_t x = rw->rw_lock; 165219820Sjeff 166219820Sjeff *owner = rw_wowner(rw); 167219820Sjeff return ((x & RW_LOCK_READ) != 0 ? (RW_READERS(x) != 0) : 168219820Sjeff (*owner != NULL)); 169219820Sjeff} 170219820Sjeff#endif 171219820Sjeff 172219820Sjeffvoid 173219820Sjeffrw_init_flags(struct rwlock *rw, const char *name, int opts) 174219820Sjeff{ 175219820Sjeff int flags; 176219820Sjeff 177219820Sjeff MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET | 178219820Sjeff RW_RECURSE)) == 0); 179219820Sjeff 180219820Sjeff flags = LO_UPGRADABLE | LO_RECURSABLE; 181219820Sjeff if (opts & RW_DUPOK) 182219820Sjeff flags |= LO_DUPOK; 183219820Sjeff if (opts & RW_NOPROFILE) 184219820Sjeff flags |= LO_NOPROFILE; 185219820Sjeff if (!(opts & RW_NOWITNESS)) 186219820Sjeff flags |= LO_WITNESS; 187219820Sjeff if (opts & RW_QUIET) 188219820Sjeff flags |= LO_QUIET; 189219820Sjeff flags |= opts & RW_RECURSE; 190219820Sjeff 191219820Sjeff rw->rw_lock = RW_UNLOCKED; 192219820Sjeff rw->rw_recurse = 0; 193219820Sjeff lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags); 194219820Sjeff} 195219820Sjeff 196219820Sjeffvoid 197219820Sjeffrw_destroy(struct rwlock *rw) 198219820Sjeff{ 199219820Sjeff 200219820Sjeff KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 201219820Sjeff KASSERT(rw->rw_recurse == 0, ("rw lock still recursed")); 202219820Sjeff rw->rw_lock = RW_DESTROYED; 203219820Sjeff lock_destroy(&rw->lock_object); 204219820Sjeff} 205219820Sjeff 206219820Sjeffvoid 207219820Sjeffrw_sysinit(void *arg) 208219820Sjeff{ 209219820Sjeff struct rw_args *args = arg; 210219820Sjeff 211219820Sjeff rw_init(args->ra_rw, args->ra_desc); 212219820Sjeff} 213219820Sjeff 214219820Sjeffvoid 215219820Sjeffrw_sysinit_flags(void *arg) 216219820Sjeff{ 217219820Sjeff struct rw_args_flags *args = arg; 218219820Sjeff 219219820Sjeff rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags); 220219820Sjeff} 221219820Sjeff 222219820Sjeffint 223219820Sjeffrw_wowned(struct rwlock *rw) 224219820Sjeff{ 225219820Sjeff 226219820Sjeff return (rw_wowner(rw) == curthread); 227219820Sjeff} 228219820Sjeff 229219820Sjeffvoid 230219820Sjeff_rw_wlock(struct rwlock *rw, const char *file, int line) 231219820Sjeff{ 232219820Sjeff 233219820Sjeff MPASS(curthread != NULL); 234219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 235219820Sjeff ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 236219820Sjeff WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 237219820Sjeff line, NULL); 238219820Sjeff __rw_wlock(rw, curthread, file, line); 239219820Sjeff LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); 240219820Sjeff WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 241219820Sjeff curthread->td_locks++; 242219820Sjeff} 243219820Sjeff 244219820Sjeffint 245219820Sjeff_rw_try_wlock(struct rwlock *rw, const char *file, int line) 246219820Sjeff{ 247219820Sjeff int rval; 248219820Sjeff 249219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 250219820Sjeff ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line)); 251219820Sjeff 252219820Sjeff if (rw_wlocked(rw) && (rw->lock_object.lo_flags & RW_RECURSE) != 0) { 253219820Sjeff rw->rw_recurse++; 254219820Sjeff rval = 1; 255219820Sjeff } else 256219820Sjeff rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED, 257219820Sjeff (uintptr_t)curthread); 258219820Sjeff 259219820Sjeff LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line); 260219820Sjeff if (rval) { 261219820Sjeff WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 262219820Sjeff file, line); 263219820Sjeff curthread->td_locks++; 264219820Sjeff } 265219820Sjeff return (rval); 266219820Sjeff} 267219820Sjeff 268219820Sjeffvoid 269219820Sjeff_rw_wunlock(struct rwlock *rw, const char *file, int line) 270219820Sjeff{ 271219820Sjeff 272219820Sjeff MPASS(curthread != NULL); 273219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 274219820Sjeff ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 275219820Sjeff _rw_assert(rw, RA_WLOCKED, file, line); 276219820Sjeff curthread->td_locks--; 277219820Sjeff WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 278219820Sjeff LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, 279219820Sjeff line); 280219820Sjeff if (!rw_recursed(rw)) 281219820Sjeff LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw); 282219820Sjeff __rw_wunlock(rw, curthread, file, line); 283219820Sjeff} 284219820Sjeff/* 285219820Sjeff * Determines whether a new reader can acquire a lock. Succeeds if the 286219820Sjeff * reader already owns a read lock and the lock is locked for read to 287219820Sjeff * prevent deadlock from reader recursion. Also succeeds if the lock 288219820Sjeff * is unlocked and has no writer waiters or spinners. Failing otherwise 289219820Sjeff * prioritizes writers before readers. 290219820Sjeff */ 291219820Sjeff#define RW_CAN_READ(_rw) \ 292219820Sjeff ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) & \ 293219820Sjeff (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) == \ 294219820Sjeff RW_LOCK_READ) 295219820Sjeff 296219820Sjeffvoid 297219820Sjeff_rw_rlock(struct rwlock *rw, const char *file, int line) 298219820Sjeff{ 299219820Sjeff struct turnstile *ts; 300219820Sjeff#ifdef ADAPTIVE_RWLOCKS 301219820Sjeff volatile struct thread *owner; 302219820Sjeff int spintries = 0; 303219820Sjeff int i; 304219820Sjeff#endif 305219820Sjeff#ifdef LOCK_PROFILING 306219820Sjeff uint64_t waittime = 0; 307219820Sjeff int contested = 0; 308219820Sjeff#endif 309219820Sjeff uintptr_t v; 310219820Sjeff#ifdef KDTRACE_HOOKS 311219820Sjeff uint64_t spin_cnt = 0; 312219820Sjeff uint64_t sleep_cnt = 0; 313219820Sjeff int64_t sleep_time = 0; 314219820Sjeff#endif 315219820Sjeff 316219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 317219820Sjeff ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 318219820Sjeff KASSERT(rw_wowner(rw) != curthread, 319219820Sjeff ("%s (%s): wlock already held @ %s:%d", __func__, 320219820Sjeff rw->lock_object.lo_name, file, line)); 321219820Sjeff WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); 322219820Sjeff 323219820Sjeff for (;;) { 324219820Sjeff#ifdef KDTRACE_HOOKS 325219820Sjeff spin_cnt++; 326219820Sjeff#endif 327219820Sjeff /* 328219820Sjeff * Handle the easy case. If no other thread has a write 329219820Sjeff * lock, then try to bump up the count of read locks. Note 330219820Sjeff * that we have to preserve the current state of the 331219820Sjeff * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 332219820Sjeff * read lock, then rw_lock must have changed, so restart 333219820Sjeff * the loop. Note that this handles the case of a 334219820Sjeff * completely unlocked rwlock since such a lock is encoded 335219820Sjeff * as a read lock with no waiters. 336219820Sjeff */ 337219820Sjeff v = rw->rw_lock; 338219820Sjeff if (RW_CAN_READ(v)) { 339219820Sjeff /* 340219820Sjeff * The RW_LOCK_READ_WAITERS flag should only be set 341219820Sjeff * if the lock has been unlocked and write waiters 342219820Sjeff * were present. 343219820Sjeff */ 344219820Sjeff if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, 345219820Sjeff v + RW_ONE_READER)) { 346219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 347219820Sjeff CTR4(KTR_LOCK, 348219820Sjeff "%s: %p succeed %p -> %p", __func__, 349219820Sjeff rw, (void *)v, 350219820Sjeff (void *)(v + RW_ONE_READER)); 351219820Sjeff break; 352219820Sjeff } 353219820Sjeff continue; 354219820Sjeff } 355219820Sjeff lock_profile_obtain_lock_failed(&rw->lock_object, 356219820Sjeff &contested, &waittime); 357219820Sjeff 358219820Sjeff#ifdef ADAPTIVE_RWLOCKS 359219820Sjeff /* 360219820Sjeff * If the owner is running on another CPU, spin until 361219820Sjeff * the owner stops running or the state of the lock 362219820Sjeff * changes. 363219820Sjeff */ 364219820Sjeff if ((v & RW_LOCK_READ) == 0) { 365219820Sjeff owner = (struct thread *)RW_OWNER(v); 366219820Sjeff if (TD_IS_RUNNING(owner)) { 367219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 368219820Sjeff CTR3(KTR_LOCK, 369219820Sjeff "%s: spinning on %p held by %p", 370219820Sjeff __func__, rw, owner); 371219820Sjeff while ((struct thread*)RW_OWNER(rw->rw_lock) == 372219820Sjeff owner && TD_IS_RUNNING(owner)) { 373219820Sjeff cpu_spinwait(); 374219820Sjeff#ifdef KDTRACE_HOOKS 375219820Sjeff spin_cnt++; 376219820Sjeff#endif 377219820Sjeff } 378219820Sjeff continue; 379219820Sjeff } 380219820Sjeff } else if (spintries < rowner_retries) { 381219820Sjeff spintries++; 382219820Sjeff for (i = 0; i < rowner_loops; i++) { 383219820Sjeff v = rw->rw_lock; 384219820Sjeff if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v)) 385219820Sjeff break; 386219820Sjeff cpu_spinwait(); 387219820Sjeff } 388219820Sjeff if (i != rowner_loops) 389219820Sjeff continue; 390219820Sjeff } 391219820Sjeff#endif 392219820Sjeff 393219820Sjeff /* 394219820Sjeff * Okay, now it's the hard case. Some other thread already 395219820Sjeff * has a write lock or there are write waiters present, 396219820Sjeff * acquire the turnstile lock so we can begin the process 397219820Sjeff * of blocking. 398219820Sjeff */ 399219820Sjeff ts = turnstile_trywait(&rw->lock_object); 400219820Sjeff 401219820Sjeff /* 402219820Sjeff * The lock might have been released while we spun, so 403219820Sjeff * recheck its state and restart the loop if needed. 404219820Sjeff */ 405219820Sjeff v = rw->rw_lock; 406219820Sjeff if (RW_CAN_READ(v)) { 407219820Sjeff turnstile_cancel(ts); 408219820Sjeff continue; 409219820Sjeff } 410219820Sjeff 411219820Sjeff#ifdef ADAPTIVE_RWLOCKS 412219820Sjeff /* 413219820Sjeff * The current lock owner might have started executing 414219820Sjeff * on another CPU (or the lock could have changed 415219820Sjeff * owners) while we were waiting on the turnstile 416219820Sjeff * chain lock. If so, drop the turnstile lock and try 417219820Sjeff * again. 418219820Sjeff */ 419219820Sjeff if ((v & RW_LOCK_READ) == 0) { 420219820Sjeff owner = (struct thread *)RW_OWNER(v); 421219820Sjeff if (TD_IS_RUNNING(owner)) { 422219820Sjeff turnstile_cancel(ts); 423219820Sjeff continue; 424219820Sjeff } 425219820Sjeff } 426219820Sjeff#endif 427219820Sjeff 428219820Sjeff /* 429219820Sjeff * The lock is held in write mode or it already has waiters. 430219820Sjeff */ 431219820Sjeff MPASS(!RW_CAN_READ(v)); 432219820Sjeff 433219820Sjeff /* 434219820Sjeff * If the RW_LOCK_READ_WAITERS flag is already set, then 435219820Sjeff * we can go ahead and block. If it is not set then try 436219820Sjeff * to set it. If we fail to set it drop the turnstile 437219820Sjeff * lock and restart the loop. 438219820Sjeff */ 439219820Sjeff if (!(v & RW_LOCK_READ_WAITERS)) { 440219820Sjeff if (!atomic_cmpset_ptr(&rw->rw_lock, v, 441219820Sjeff v | RW_LOCK_READ_WAITERS)) { 442219820Sjeff turnstile_cancel(ts); 443219820Sjeff continue; 444219820Sjeff } 445219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 446219820Sjeff CTR2(KTR_LOCK, "%s: %p set read waiters flag", 447219820Sjeff __func__, rw); 448219820Sjeff } 449219820Sjeff 450219820Sjeff /* 451219820Sjeff * We were unable to acquire the lock and the read waiters 452219820Sjeff * flag is set, so we must block on the turnstile. 453219820Sjeff */ 454219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 455219820Sjeff CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 456219820Sjeff rw); 457219820Sjeff#ifdef KDTRACE_HOOKS 458219820Sjeff sleep_time -= lockstat_nsecs(); 459219820Sjeff#endif 460219820Sjeff turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE); 461219820Sjeff#ifdef KDTRACE_HOOKS 462219820Sjeff sleep_time += lockstat_nsecs(); 463219820Sjeff sleep_cnt++; 464219820Sjeff#endif 465219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 466219820Sjeff CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 467219820Sjeff __func__, rw); 468219820Sjeff } 469219820Sjeff 470219820Sjeff /* 471219820Sjeff * TODO: acquire "owner of record" here. Here be turnstile dragons 472219820Sjeff * however. turnstiles don't like owners changing between calls to 473219820Sjeff * turnstile_wait() currently. 474219820Sjeff */ 475219820Sjeff LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested, 476219820Sjeff waittime, file, line); 477219820Sjeff LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 478219820Sjeff WITNESS_LOCK(&rw->lock_object, 0, file, line); 479219820Sjeff curthread->td_locks++; 480219820Sjeff curthread->td_rw_rlocks++; 481219820Sjeff#ifdef KDTRACE_HOOKS 482219820Sjeff if (sleep_time) 483219820Sjeff LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time); 484219820Sjeff 485219820Sjeff /* 486219820Sjeff * Record only the loops spinning and not sleeping. 487219820Sjeff */ 488219820Sjeff if (spin_cnt > sleep_cnt) 489219820Sjeff LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 490219820Sjeff#endif 491219820Sjeff} 492219820Sjeff 493219820Sjeffint 494219820Sjeff_rw_try_rlock(struct rwlock *rw, const char *file, int line) 495219820Sjeff{ 496219820Sjeff uintptr_t x; 497219820Sjeff 498219820Sjeff for (;;) { 499219820Sjeff x = rw->rw_lock; 500219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 501219820Sjeff ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line)); 502219820Sjeff if (!(x & RW_LOCK_READ)) 503219820Sjeff break; 504219820Sjeff if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) { 505219820Sjeff LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, 506219820Sjeff line); 507219820Sjeff WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); 508219820Sjeff curthread->td_locks++; 509219820Sjeff curthread->td_rw_rlocks++; 510219820Sjeff return (1); 511219820Sjeff } 512219820Sjeff } 513219820Sjeff 514219820Sjeff LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line); 515219820Sjeff return (0); 516219820Sjeff} 517219820Sjeff 518219820Sjeffvoid 519219820Sjeff_rw_runlock(struct rwlock *rw, const char *file, int line) 520219820Sjeff{ 521219820Sjeff struct turnstile *ts; 522219820Sjeff uintptr_t x, v, queue; 523219820Sjeff 524219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 525219820Sjeff ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 526219820Sjeff _rw_assert(rw, RA_RLOCKED, file, line); 527219820Sjeff curthread->td_locks--; 528219820Sjeff curthread->td_rw_rlocks--; 529219820Sjeff WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 530219820Sjeff LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 531219820Sjeff 532219820Sjeff /* TODO: drop "owner of record" here. */ 533219820Sjeff 534219820Sjeff for (;;) { 535219820Sjeff /* 536219820Sjeff * See if there is more than one read lock held. If so, 537219820Sjeff * just drop one and return. 538219820Sjeff */ 539219820Sjeff x = rw->rw_lock; 540219820Sjeff if (RW_READERS(x) > 1) { 541219820Sjeff if (atomic_cmpset_ptr(&rw->rw_lock, x, 542219820Sjeff x - RW_ONE_READER)) { 543219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 544219820Sjeff CTR4(KTR_LOCK, 545219820Sjeff "%s: %p succeeded %p -> %p", 546219820Sjeff __func__, rw, (void *)x, 547219820Sjeff (void *)(x - RW_ONE_READER)); 548219820Sjeff break; 549219820Sjeff } 550219820Sjeff continue; 551219820Sjeff } 552219820Sjeff /* 553219820Sjeff * If there aren't any waiters for a write lock, then try 554219820Sjeff * to drop it quickly. 555219820Sjeff */ 556219820Sjeff if (!(x & RW_LOCK_WAITERS)) { 557219820Sjeff MPASS((x & ~RW_LOCK_WRITE_SPINNER) == 558219820Sjeff RW_READERS_LOCK(1)); 559219820Sjeff if (atomic_cmpset_ptr(&rw->rw_lock, x, RW_UNLOCKED)) { 560219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 561219820Sjeff CTR2(KTR_LOCK, "%s: %p last succeeded", 562219820Sjeff __func__, rw); 563219820Sjeff break; 564219820Sjeff } 565219820Sjeff continue; 566219820Sjeff } 567219820Sjeff /* 568219820Sjeff * Ok, we know we have waiters and we think we are the 569219820Sjeff * last reader, so grab the turnstile lock. 570219820Sjeff */ 571219820Sjeff turnstile_chain_lock(&rw->lock_object); 572219820Sjeff v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 573219820Sjeff MPASS(v & RW_LOCK_WAITERS); 574219820Sjeff 575219820Sjeff /* 576219820Sjeff * Try to drop our lock leaving the lock in a unlocked 577219820Sjeff * state. 578219820Sjeff * 579219820Sjeff * If you wanted to do explicit lock handoff you'd have to 580219820Sjeff * do it here. You'd also want to use turnstile_signal() 581219820Sjeff * and you'd have to handle the race where a higher 582219820Sjeff * priority thread blocks on the write lock before the 583219820Sjeff * thread you wakeup actually runs and have the new thread 584219820Sjeff * "steal" the lock. For now it's a lot simpler to just 585219820Sjeff * wakeup all of the waiters. 586219820Sjeff * 587219820Sjeff * As above, if we fail, then another thread might have 588219820Sjeff * acquired a read lock, so drop the turnstile lock and 589219820Sjeff * restart. 590219820Sjeff */ 591219820Sjeff x = RW_UNLOCKED; 592219820Sjeff if (v & RW_LOCK_WRITE_WAITERS) { 593219820Sjeff queue = TS_EXCLUSIVE_QUEUE; 594219820Sjeff x |= (v & RW_LOCK_READ_WAITERS); 595219820Sjeff } else 596219820Sjeff queue = TS_SHARED_QUEUE; 597219820Sjeff if (!atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 598219820Sjeff x)) { 599219820Sjeff turnstile_chain_unlock(&rw->lock_object); 600219820Sjeff continue; 601219820Sjeff } 602219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 603219820Sjeff CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 604219820Sjeff __func__, rw); 605219820Sjeff 606219820Sjeff /* 607219820Sjeff * Ok. The lock is released and all that's left is to 608219820Sjeff * wake up the waiters. Note that the lock might not be 609219820Sjeff * free anymore, but in that case the writers will just 610219820Sjeff * block again if they run before the new lock holder(s) 611219820Sjeff * release the lock. 612219820Sjeff */ 613219820Sjeff ts = turnstile_lookup(&rw->lock_object); 614219820Sjeff MPASS(ts != NULL); 615219820Sjeff turnstile_broadcast(ts, queue); 616219820Sjeff turnstile_unpend(ts, TS_SHARED_LOCK); 617219820Sjeff turnstile_chain_unlock(&rw->lock_object); 618219820Sjeff break; 619219820Sjeff } 620219820Sjeff LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw); 621219820Sjeff} 622219820Sjeff 623219820Sjeff/* 624219820Sjeff * This function is called when we are unable to obtain a write lock on the 625219820Sjeff * first try. This means that at least one other thread holds either a 626219820Sjeff * read or write lock. 627219820Sjeff */ 628219820Sjeffvoid 629219820Sjeff_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 630219820Sjeff{ 631219820Sjeff struct turnstile *ts; 632219820Sjeff#ifdef ADAPTIVE_RWLOCKS 633219820Sjeff volatile struct thread *owner; 634219820Sjeff int spintries = 0; 635219820Sjeff int i; 636219820Sjeff#endif 637219820Sjeff uintptr_t v, x; 638219820Sjeff#ifdef LOCK_PROFILING 639219820Sjeff uint64_t waittime = 0; 640219820Sjeff int contested = 0; 641219820Sjeff#endif 642219820Sjeff#ifdef KDTRACE_HOOKS 643219820Sjeff uint64_t spin_cnt = 0; 644219820Sjeff uint64_t sleep_cnt = 0; 645219820Sjeff int64_t sleep_time = 0; 646219820Sjeff#endif 647219820Sjeff 648219820Sjeff if (rw_wlocked(rw)) { 649219820Sjeff KASSERT(rw->lock_object.lo_flags & RW_RECURSE, 650219820Sjeff ("%s: recursing but non-recursive rw %s @ %s:%d\n", 651219820Sjeff __func__, rw->lock_object.lo_name, file, line)); 652219820Sjeff rw->rw_recurse++; 653219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 654219820Sjeff CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw); 655219820Sjeff return; 656219820Sjeff } 657219820Sjeff 658219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 659219820Sjeff CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 660219820Sjeff rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 661219820Sjeff 662219820Sjeff while (!_rw_write_lock(rw, tid)) { 663219820Sjeff#ifdef KDTRACE_HOOKS 664219820Sjeff spin_cnt++; 665219820Sjeff#endif 666219820Sjeff lock_profile_obtain_lock_failed(&rw->lock_object, 667219820Sjeff &contested, &waittime); 668219820Sjeff#ifdef ADAPTIVE_RWLOCKS 669219820Sjeff /* 670219820Sjeff * If the lock is write locked and the owner is 671219820Sjeff * running on another CPU, spin until the owner stops 672219820Sjeff * running or the state of the lock changes. 673219820Sjeff */ 674219820Sjeff v = rw->rw_lock; 675219820Sjeff owner = (struct thread *)RW_OWNER(v); 676219820Sjeff if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 677219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 678219820Sjeff CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 679219820Sjeff __func__, rw, owner); 680219820Sjeff while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && 681219820Sjeff TD_IS_RUNNING(owner)) { 682219820Sjeff cpu_spinwait(); 683219820Sjeff#ifdef KDTRACE_HOOKS 684219820Sjeff spin_cnt++; 685219820Sjeff#endif 686219820Sjeff } 687219820Sjeff continue; 688219820Sjeff } 689219820Sjeff if ((v & RW_LOCK_READ) && RW_READERS(v) && 690219820Sjeff spintries < rowner_retries) { 691219820Sjeff if (!(v & RW_LOCK_WRITE_SPINNER)) { 692219820Sjeff if (!atomic_cmpset_ptr(&rw->rw_lock, v, 693219820Sjeff v | RW_LOCK_WRITE_SPINNER)) { 694219820Sjeff continue; 695219820Sjeff } 696219820Sjeff } 697219820Sjeff spintries++; 698219820Sjeff for (i = 0; i < rowner_loops; i++) { 699219820Sjeff if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0) 700219820Sjeff break; 701219820Sjeff cpu_spinwait(); 702219820Sjeff } 703219820Sjeff#ifdef KDTRACE_HOOKS 704219820Sjeff spin_cnt += rowner_loops - i; 705219820Sjeff#endif 706219820Sjeff if (i != rowner_loops) 707219820Sjeff continue; 708219820Sjeff } 709219820Sjeff#endif 710219820Sjeff ts = turnstile_trywait(&rw->lock_object); 711219820Sjeff v = rw->rw_lock; 712219820Sjeff 713219820Sjeff#ifdef ADAPTIVE_RWLOCKS 714219820Sjeff /* 715219820Sjeff * The current lock owner might have started executing 716219820Sjeff * on another CPU (or the lock could have changed 717219820Sjeff * owners) while we were waiting on the turnstile 718219820Sjeff * chain lock. If so, drop the turnstile lock and try 719219820Sjeff * again. 720219820Sjeff */ 721219820Sjeff if (!(v & RW_LOCK_READ)) { 722219820Sjeff owner = (struct thread *)RW_OWNER(v); 723219820Sjeff if (TD_IS_RUNNING(owner)) { 724219820Sjeff turnstile_cancel(ts); 725219820Sjeff continue; 726219820Sjeff } 727219820Sjeff } 728219820Sjeff#endif 729219820Sjeff /* 730219820Sjeff * Check for the waiters flags about this rwlock. 731219820Sjeff * If the lock was released, without maintain any pending 732219820Sjeff * waiters queue, simply try to acquire it. 733219820Sjeff * If a pending waiters queue is present, claim the lock 734219820Sjeff * ownership and maintain the pending queue. 735219820Sjeff */ 736219820Sjeff x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER); 737219820Sjeff if ((v & ~x) == RW_UNLOCKED) { 738219820Sjeff x &= ~RW_LOCK_WRITE_SPINNER; 739219820Sjeff if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) { 740219820Sjeff if (x) 741219820Sjeff turnstile_claim(ts); 742219820Sjeff else 743219820Sjeff turnstile_cancel(ts); 744219820Sjeff break; 745219820Sjeff } 746219820Sjeff turnstile_cancel(ts); 747219820Sjeff continue; 748219820Sjeff } 749219820Sjeff /* 750219820Sjeff * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 751219820Sjeff * set it. If we fail to set it, then loop back and try 752219820Sjeff * again. 753219820Sjeff */ 754219820Sjeff if (!(v & RW_LOCK_WRITE_WAITERS)) { 755219820Sjeff if (!atomic_cmpset_ptr(&rw->rw_lock, v, 756219820Sjeff v | RW_LOCK_WRITE_WAITERS)) { 757219820Sjeff turnstile_cancel(ts); 758219820Sjeff continue; 759219820Sjeff } 760219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 761219820Sjeff CTR2(KTR_LOCK, "%s: %p set write waiters flag", 762219820Sjeff __func__, rw); 763219820Sjeff } 764219820Sjeff /* 765219820Sjeff * We were unable to acquire the lock and the write waiters 766219820Sjeff * flag is set, so we must block on the turnstile. 767219820Sjeff */ 768219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 769219820Sjeff CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 770219820Sjeff rw); 771219820Sjeff#ifdef KDTRACE_HOOKS 772219820Sjeff sleep_time -= lockstat_nsecs(); 773219820Sjeff#endif 774219820Sjeff turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE); 775219820Sjeff#ifdef KDTRACE_HOOKS 776219820Sjeff sleep_time += lockstat_nsecs(); 777219820Sjeff sleep_cnt++; 778219820Sjeff#endif 779219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 780219820Sjeff CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 781219820Sjeff __func__, rw); 782219820Sjeff#ifdef ADAPTIVE_RWLOCKS 783219820Sjeff spintries = 0; 784219820Sjeff#endif 785219820Sjeff } 786219820Sjeff LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, 787219820Sjeff waittime, file, line); 788219820Sjeff#ifdef KDTRACE_HOOKS 789219820Sjeff if (sleep_time) 790219820Sjeff LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time); 791219820Sjeff 792219820Sjeff /* 793219820Sjeff * Record only the loops spinning and not sleeping. 794219820Sjeff */ 795219820Sjeff if (spin_cnt > sleep_cnt) 796219820Sjeff LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); 797219820Sjeff#endif 798219820Sjeff} 799219820Sjeff 800219820Sjeff/* 801219820Sjeff * This function is called if the first try at releasing a write lock failed. 802219820Sjeff * This means that one of the 2 waiter bits must be set indicating that at 803219820Sjeff * least one thread is waiting on this lock. 804219820Sjeff */ 805219820Sjeffvoid 806219820Sjeff_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 807219820Sjeff{ 808219820Sjeff struct turnstile *ts; 809219820Sjeff uintptr_t v; 810219820Sjeff int queue; 811219820Sjeff 812219820Sjeff if (rw_wlocked(rw) && rw_recursed(rw)) { 813219820Sjeff rw->rw_recurse--; 814219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 815219820Sjeff CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); 816219820Sjeff return; 817219820Sjeff } 818219820Sjeff 819219820Sjeff KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 820219820Sjeff ("%s: neither of the waiter flags are set", __func__)); 821219820Sjeff 822219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 823219820Sjeff CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 824219820Sjeff 825219820Sjeff turnstile_chain_lock(&rw->lock_object); 826219820Sjeff ts = turnstile_lookup(&rw->lock_object); 827219820Sjeff MPASS(ts != NULL); 828219820Sjeff 829219820Sjeff /* 830219820Sjeff * Use the same algo as sx locks for now. Prefer waking up shared 831219820Sjeff * waiters if we have any over writers. This is probably not ideal. 832219820Sjeff * 833219820Sjeff * 'v' is the value we are going to write back to rw_lock. If we 834219820Sjeff * have waiters on both queues, we need to preserve the state of 835219820Sjeff * the waiter flag for the queue we don't wake up. For now this is 836219820Sjeff * hardcoded for the algorithm mentioned above. 837219820Sjeff * 838219820Sjeff * In the case of both readers and writers waiting we wakeup the 839219820Sjeff * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 840219820Sjeff * new writer comes in before a reader it will claim the lock up 841219820Sjeff * above. There is probably a potential priority inversion in 842219820Sjeff * there that could be worked around either by waking both queues 843219820Sjeff * of waiters or doing some complicated lock handoff gymnastics. 844219820Sjeff */ 845219820Sjeff v = RW_UNLOCKED; 846219820Sjeff if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) { 847219820Sjeff queue = TS_EXCLUSIVE_QUEUE; 848219820Sjeff v |= (rw->rw_lock & RW_LOCK_READ_WAITERS); 849219820Sjeff } else 850219820Sjeff queue = TS_SHARED_QUEUE; 851219820Sjeff 852219820Sjeff /* Wake up all waiters for the specific queue. */ 853219820Sjeff if (LOCK_LOG_TEST(&rw->lock_object, 0)) 854219820Sjeff CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 855219820Sjeff queue == TS_SHARED_QUEUE ? "read" : "write"); 856219820Sjeff turnstile_broadcast(ts, queue); 857219820Sjeff atomic_store_rel_ptr(&rw->rw_lock, v); 858219820Sjeff turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 859219820Sjeff turnstile_chain_unlock(&rw->lock_object); 860219820Sjeff} 861219820Sjeff 862219820Sjeff/* 863219820Sjeff * Attempt to do a non-blocking upgrade from a read lock to a write 864219820Sjeff * lock. This will only succeed if this thread holds a single read 865219820Sjeff * lock. Returns true if the upgrade succeeded and false otherwise. 866219820Sjeff */ 867219820Sjeffint 868219820Sjeff_rw_try_upgrade(struct rwlock *rw, const char *file, int line) 869219820Sjeff{ 870219820Sjeff uintptr_t v, x, tid; 871219820Sjeff struct turnstile *ts; 872219820Sjeff int success; 873219820Sjeff 874219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 875219820Sjeff ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 876219820Sjeff _rw_assert(rw, RA_RLOCKED, file, line); 877219820Sjeff 878219820Sjeff /* 879219820Sjeff * Attempt to switch from one reader to a writer. If there 880219820Sjeff * are any write waiters, then we will have to lock the 881219820Sjeff * turnstile first to prevent races with another writer 882219820Sjeff * calling turnstile_wait() before we have claimed this 883219820Sjeff * turnstile. So, do the simple case of no waiters first. 884219820Sjeff */ 885219820Sjeff tid = (uintptr_t)curthread; 886219820Sjeff success = 0; 887219820Sjeff for (;;) { 888219820Sjeff v = rw->rw_lock; 889219820Sjeff if (RW_READERS(v) > 1) 890219820Sjeff break; 891219820Sjeff if (!(v & RW_LOCK_WAITERS)) { 892219820Sjeff success = atomic_cmpset_ptr(&rw->rw_lock, v, tid); 893219820Sjeff if (!success) 894219820Sjeff continue; 895219820Sjeff break; 896219820Sjeff } 897219820Sjeff 898219820Sjeff /* 899219820Sjeff * Ok, we think we have waiters, so lock the turnstile. 900219820Sjeff */ 901219820Sjeff ts = turnstile_trywait(&rw->lock_object); 902219820Sjeff v = rw->rw_lock; 903219820Sjeff if (RW_READERS(v) > 1) { 904219820Sjeff turnstile_cancel(ts); 905219820Sjeff break; 906219820Sjeff } 907219820Sjeff /* 908219820Sjeff * Try to switch from one reader to a writer again. This time 909219820Sjeff * we honor the current state of the waiters flags. 910219820Sjeff * If we obtain the lock with the flags set, then claim 911219820Sjeff * ownership of the turnstile. 912219820Sjeff */ 913219820Sjeff x = rw->rw_lock & RW_LOCK_WAITERS; 914219820Sjeff success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x); 915219820Sjeff if (success) { 916219820Sjeff if (x) 917219820Sjeff turnstile_claim(ts); 918219820Sjeff else 919219820Sjeff turnstile_cancel(ts); 920219820Sjeff break; 921219820Sjeff } 922219820Sjeff turnstile_cancel(ts); 923219820Sjeff } 924219820Sjeff LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 925219820Sjeff if (success) { 926219820Sjeff curthread->td_rw_rlocks--; 927219820Sjeff WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 928219820Sjeff file, line); 929219820Sjeff LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw); 930219820Sjeff } 931219820Sjeff return (success); 932219820Sjeff} 933219820Sjeff 934219820Sjeff/* 935219820Sjeff * Downgrade a write lock into a single read lock. 936219820Sjeff */ 937219820Sjeffvoid 938219820Sjeff_rw_downgrade(struct rwlock *rw, const char *file, int line) 939219820Sjeff{ 940219820Sjeff struct turnstile *ts; 941219820Sjeff uintptr_t tid, v; 942219820Sjeff int rwait, wwait; 943219820Sjeff 944219820Sjeff KASSERT(rw->rw_lock != RW_DESTROYED, 945219820Sjeff ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 946219820Sjeff _rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line); 947219820Sjeff#ifndef INVARIANTS 948219820Sjeff if (rw_recursed(rw)) 949219820Sjeff panic("downgrade of a recursed lock"); 950219820Sjeff#endif 951219820Sjeff 952219820Sjeff WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 953219820Sjeff 954219820Sjeff /* 955219820Sjeff * Convert from a writer to a single reader. First we handle 956219820Sjeff * the easy case with no waiters. If there are any waiters, we 957219820Sjeff * lock the turnstile and "disown" the lock. 958219820Sjeff */ 959219820Sjeff tid = (uintptr_t)curthread; 960219820Sjeff if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 961219820Sjeff goto out; 962219820Sjeff 963219820Sjeff /* 964219820Sjeff * Ok, we think we have waiters, so lock the turnstile so we can 965219820Sjeff * read the waiter flags without any races. 966219820Sjeff */ 967219820Sjeff turnstile_chain_lock(&rw->lock_object); 968219820Sjeff v = rw->rw_lock & RW_LOCK_WAITERS; 969219820Sjeff rwait = v & RW_LOCK_READ_WAITERS; 970219820Sjeff wwait = v & RW_LOCK_WRITE_WAITERS; 971219820Sjeff MPASS(rwait | wwait); 972219820Sjeff 973219820Sjeff /* 974219820Sjeff * Downgrade from a write lock while preserving waiters flag 975219820Sjeff * and give up ownership of the turnstile. 976219820Sjeff */ 977219820Sjeff ts = turnstile_lookup(&rw->lock_object); 978219820Sjeff MPASS(ts != NULL); 979219820Sjeff if (!wwait) 980219820Sjeff v &= ~RW_LOCK_READ_WAITERS; 981219820Sjeff atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v); 982219820Sjeff /* 983219820Sjeff * Wake other readers if there are no writers pending. Otherwise they 984219820Sjeff * won't be able to acquire the lock anyway. 985219820Sjeff */ 986219820Sjeff if (rwait && !wwait) { 987219820Sjeff turnstile_broadcast(ts, TS_SHARED_QUEUE); 988219820Sjeff turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 989219820Sjeff } else 990219820Sjeff turnstile_disown(ts); 991219820Sjeff turnstile_chain_unlock(&rw->lock_object); 992219820Sjeffout: 993219820Sjeff curthread->td_rw_rlocks++; 994219820Sjeff LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 995219820Sjeff LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw); 996219820Sjeff} 997219820Sjeff 998219820Sjeff#ifdef INVARIANT_SUPPORT 999219820Sjeff#ifndef INVARIANTS 1000219820Sjeff#undef _rw_assert 1001219820Sjeff#endif 1002219820Sjeff 1003219820Sjeff/* 1004219820Sjeff * In the non-WITNESS case, rw_assert() can only detect that at least 1005219820Sjeff * *some* thread owns an rlock, but it cannot guarantee that *this* 1006219820Sjeff * thread owns an rlock. 1007219820Sjeff */ 1008219820Sjeffvoid 1009219820Sjeff_rw_assert(struct rwlock *rw, int what, const char *file, int line) 1010219820Sjeff{ 1011219820Sjeff 1012219820Sjeff if (panicstr != NULL) 1013219820Sjeff return; 1014219820Sjeff switch (what) { 1015219820Sjeff case RA_LOCKED: 1016219820Sjeff case RA_LOCKED | RA_RECURSED: 1017219820Sjeff case RA_LOCKED | RA_NOTRECURSED: 1018219820Sjeff case RA_RLOCKED: 1019219820Sjeff#ifdef WITNESS 1020219820Sjeff witness_assert(&rw->lock_object, what, file, line); 1021219820Sjeff#else 1022219820Sjeff /* 1023219820Sjeff * If some other thread has a write lock or we have one 1024219820Sjeff * and are asserting a read lock, fail. Also, if no one 1025219820Sjeff * has a lock at all, fail. 1026219820Sjeff */ 1027219820Sjeff if (rw->rw_lock == RW_UNLOCKED || 1028219820Sjeff (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 1029219820Sjeff rw_wowner(rw) != curthread))) 1030219820Sjeff panic("Lock %s not %slocked @ %s:%d\n", 1031219820Sjeff rw->lock_object.lo_name, (what == RA_RLOCKED) ? 1032219820Sjeff "read " : "", file, line); 1033219820Sjeff 1034219820Sjeff if (!(rw->rw_lock & RW_LOCK_READ)) { 1035219820Sjeff if (rw_recursed(rw)) { 1036219820Sjeff if (what & RA_NOTRECURSED) 1037219820Sjeff panic("Lock %s recursed @ %s:%d\n", 1038219820Sjeff rw->lock_object.lo_name, file, 1039219820Sjeff line); 1040219820Sjeff } else if (what & RA_RECURSED) 1041219820Sjeff panic("Lock %s not recursed @ %s:%d\n", 1042219820Sjeff rw->lock_object.lo_name, file, line); 1043219820Sjeff } 1044219820Sjeff#endif 1045219820Sjeff break; 1046219820Sjeff case RA_WLOCKED: 1047219820Sjeff case RA_WLOCKED | RA_RECURSED: 1048219820Sjeff case RA_WLOCKED | RA_NOTRECURSED: 1049219820Sjeff if (rw_wowner(rw) != curthread) 1050219820Sjeff panic("Lock %s not exclusively locked @ %s:%d\n", 1051219820Sjeff rw->lock_object.lo_name, file, line); 1052219820Sjeff if (rw_recursed(rw)) { 1053219820Sjeff if (what & RA_NOTRECURSED) 1054219820Sjeff panic("Lock %s recursed @ %s:%d\n", 1055219820Sjeff rw->lock_object.lo_name, file, line); 1056219820Sjeff } else if (what & RA_RECURSED) 1057219820Sjeff panic("Lock %s not recursed @ %s:%d\n", 1058219820Sjeff rw->lock_object.lo_name, file, line); 1059219820Sjeff break; 1060219820Sjeff case RA_UNLOCKED: 1061219820Sjeff#ifdef WITNESS 1062219820Sjeff witness_assert(&rw->lock_object, what, file, line); 1063219820Sjeff#else 1064219820Sjeff /* 1065219820Sjeff * If we hold a write lock fail. We can't reliably check 1066219820Sjeff * to see if we hold a read lock or not. 1067219820Sjeff */ 1068219820Sjeff if (rw_wowner(rw) == curthread) 1069219820Sjeff panic("Lock %s exclusively locked @ %s:%d\n", 1070219820Sjeff rw->lock_object.lo_name, file, line); 1071219820Sjeff#endif 1072219820Sjeff break; 1073219820Sjeff default: 1074219820Sjeff panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 1075219820Sjeff line); 1076219820Sjeff } 1077219820Sjeff} 1078219820Sjeff#endif /* INVARIANT_SUPPORT */ 1079219820Sjeff 1080219820Sjeff#ifdef DDB 1081219820Sjeffvoid 1082219820Sjeffdb_show_rwlock(struct lock_object *lock) 1083219820Sjeff{ 1084219820Sjeff struct rwlock *rw; 1085219820Sjeff struct thread *td; 1086219820Sjeff 1087219820Sjeff rw = (struct rwlock *)lock; 1088219820Sjeff 1089219820Sjeff db_printf(" state: "); 1090219820Sjeff if (rw->rw_lock == RW_UNLOCKED) 1091219820Sjeff db_printf("UNLOCKED\n"); 1092219820Sjeff else if (rw->rw_lock == RW_DESTROYED) { 1093219820Sjeff db_printf("DESTROYED\n"); 1094219820Sjeff return; 1095219820Sjeff } else if (rw->rw_lock & RW_LOCK_READ) 1096219820Sjeff db_printf("RLOCK: %ju locks\n", 1097219820Sjeff (uintmax_t)(RW_READERS(rw->rw_lock))); 1098219820Sjeff else { 1099219820Sjeff td = rw_wowner(rw); 1100219820Sjeff db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 1101219820Sjeff td->td_tid, td->td_proc->p_pid, td->td_name); 1102219820Sjeff if (rw_recursed(rw)) 1103219820Sjeff db_printf(" recursed: %u\n", rw->rw_recurse); 1104219820Sjeff } 1105219820Sjeff db_printf(" waiters: "); 1106219820Sjeff switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 1107219820Sjeff case RW_LOCK_READ_WAITERS: 1108219820Sjeff db_printf("readers\n"); 1109219820Sjeff break; 1110219820Sjeff case RW_LOCK_WRITE_WAITERS: 1111219820Sjeff db_printf("writers\n"); 1112219820Sjeff break; 1113219820Sjeff case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 1114219820Sjeff db_printf("readers and writers\n"); 1115219820Sjeff break; 1116219820Sjeff default: 1117219820Sjeff db_printf("none\n"); 1118219820Sjeff break; 1119219820Sjeff } 1120219820Sjeff} 1121219820Sjeff 1122219820Sjeff#endif 1123219820Sjeff