kern_rwlock.c revision 167787
1154941Sjhb/*- 2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3154941Sjhb * All rights reserved. 4154941Sjhb * 5154941Sjhb * Redistribution and use in source and binary forms, with or without 6154941Sjhb * modification, are permitted provided that the following conditions 7154941Sjhb * are met: 8154941Sjhb * 1. Redistributions of source code must retain the above copyright 9154941Sjhb * notice, this list of conditions and the following disclaimer. 10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright 11154941Sjhb * notice, this list of conditions and the following disclaimer in the 12154941Sjhb * documentation and/or other materials provided with the distribution. 13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors 14154941Sjhb * may be used to endorse or promote products derived from this software 15154941Sjhb * without specific prior written permission. 16154941Sjhb * 17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20154941Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27154941Sjhb * SUCH DAMAGE. 28154941Sjhb */ 29154941Sjhb 30154941Sjhb/* 31154941Sjhb * Machine independent bits of reader/writer lock implementation. 32154941Sjhb */ 33154941Sjhb 34154941Sjhb#include <sys/cdefs.h> 35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 167787 2007-03-21 21:20:51Z jhb $"); 36154941Sjhb 37154941Sjhb#include "opt_ddb.h" 38154941Sjhb 39154941Sjhb#include <sys/param.h> 40154941Sjhb#include <sys/ktr.h> 41154941Sjhb#include <sys/lock.h> 42154941Sjhb#include <sys/mutex.h> 43154941Sjhb#include <sys/proc.h> 44154941Sjhb#include <sys/rwlock.h> 45154941Sjhb#include <sys/systm.h> 46154941Sjhb#include <sys/turnstile.h> 47164159Skmacy#include <sys/lock_profile.h> 48154941Sjhb#include <machine/cpu.h> 49154941Sjhb 50154941Sjhb#ifdef DDB 51154941Sjhb#include <ddb/ddb.h> 52154941Sjhb 53154941Sjhbstatic void db_show_rwlock(struct lock_object *lock); 54154941Sjhb#endif 55167368Sjhbstatic void lock_rw(struct lock_object *lock, int how); 56167368Sjhbstatic int unlock_rw(struct lock_object *lock); 57154941Sjhb 58154941Sjhbstruct lock_class lock_class_rw = { 59167365Sjhb .lc_name = "rw", 60167365Sjhb .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 61154941Sjhb#ifdef DDB 62167365Sjhb .lc_ddb_show = db_show_rwlock, 63154941Sjhb#endif 64167368Sjhb .lc_lock = lock_rw, 65167368Sjhb .lc_unlock = unlock_rw, 66154941Sjhb}; 67154941Sjhb 68157826Sjhb/* 69157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or 70157826Sjhb * NULL if the lock is unlocked or read-locked. 71157826Sjhb */ 72157826Sjhb#define rw_wowner(rw) \ 73154941Sjhb ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 74154941Sjhb (struct thread *)RW_OWNER((rw)->rw_lock)) 75154941Sjhb 76157826Sjhb/* 77157826Sjhb * Return a pointer to the owning thread for this lock who should receive 78157826Sjhb * any priority lent by threads that block on this lock. Currently this 79157826Sjhb * is identical to rw_wowner(). 80157826Sjhb */ 81157826Sjhb#define rw_owner(rw) rw_wowner(rw) 82157826Sjhb 83154941Sjhb#ifndef INVARIANTS 84154941Sjhb#define _rw_assert(rw, what, file, line) 85154941Sjhb#endif 86154941Sjhb 87154941Sjhbvoid 88167368Sjhblock_rw(struct lock_object *lock, int how) 89167368Sjhb{ 90167368Sjhb struct rwlock *rw; 91167368Sjhb 92167368Sjhb rw = (struct rwlock *)lock; 93167368Sjhb if (how) 94167368Sjhb rw_wlock(rw); 95167368Sjhb else 96167368Sjhb rw_rlock(rw); 97167368Sjhb} 98167368Sjhb 99167368Sjhbint 100167368Sjhbunlock_rw(struct lock_object *lock) 101167368Sjhb{ 102167368Sjhb struct rwlock *rw; 103167368Sjhb 104167368Sjhb rw = (struct rwlock *)lock; 105167368Sjhb rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 106167368Sjhb if (rw->rw_lock & RW_LOCK_READ) { 107167368Sjhb rw_runlock(rw); 108167368Sjhb return (0); 109167368Sjhb } else { 110167368Sjhb rw_wunlock(rw); 111167368Sjhb return (1); 112167368Sjhb } 113167368Sjhb} 114167368Sjhb 115167368Sjhbvoid 116154941Sjhbrw_init(struct rwlock *rw, const char *name) 117154941Sjhb{ 118154941Sjhb 119154941Sjhb rw->rw_lock = RW_UNLOCKED; 120154941Sjhb 121167787Sjhb lock_profile_object_init(&rw->lock_object, &lock_class_rw, name); 122167787Sjhb lock_init(&rw->lock_object, &lock_class_rw, name, NULL, LO_WITNESS | 123157882Sjhb LO_RECURSABLE | LO_UPGRADABLE); 124154941Sjhb} 125154941Sjhb 126154941Sjhbvoid 127154941Sjhbrw_destroy(struct rwlock *rw) 128154941Sjhb{ 129154941Sjhb 130154941Sjhb KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 131167787Sjhb lock_profile_object_destroy(&rw->lock_object); 132167787Sjhb lock_destroy(&rw->lock_object); 133154941Sjhb} 134154941Sjhb 135154941Sjhbvoid 136154941Sjhbrw_sysinit(void *arg) 137154941Sjhb{ 138154941Sjhb struct rw_args *args = arg; 139154941Sjhb 140154941Sjhb rw_init(args->ra_rw, args->ra_desc); 141154941Sjhb} 142154941Sjhb 143167024Srwatsonint 144167024Srwatsonrw_wowned(struct rwlock *rw) 145167024Srwatson{ 146167024Srwatson 147167024Srwatson return (rw_wowner(rw) == curthread); 148167024Srwatson} 149167024Srwatson 150154941Sjhbvoid 151154941Sjhb_rw_wlock(struct rwlock *rw, const char *file, int line) 152154941Sjhb{ 153154941Sjhb 154154941Sjhb MPASS(curthread != NULL); 155157826Sjhb KASSERT(rw_wowner(rw) != curthread, 156154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 157167787Sjhb rw->lock_object.lo_name, file, line)); 158167787Sjhb WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 159154941Sjhb line); 160154941Sjhb __rw_wlock(rw, curthread, file, line); 161167787Sjhb LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, 0, file, line); 162167787Sjhb WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 163160771Sjhb curthread->td_locks++; 164154941Sjhb} 165154941Sjhb 166154941Sjhbvoid 167154941Sjhb_rw_wunlock(struct rwlock *rw, const char *file, int line) 168154941Sjhb{ 169154941Sjhb 170154941Sjhb MPASS(curthread != NULL); 171154941Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 172160771Sjhb curthread->td_locks--; 173167787Sjhb WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 174167787Sjhb LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, 0, file, line); 175167787Sjhb lock_profile_release_lock(&rw->lock_object); 176154941Sjhb __rw_wunlock(rw, curthread, file, line); 177154941Sjhb} 178154941Sjhb 179154941Sjhbvoid 180154941Sjhb_rw_rlock(struct rwlock *rw, const char *file, int line) 181154941Sjhb{ 182157851Swkoszek#ifdef SMP 183157846Sjhb volatile struct thread *owner; 184157851Swkoszek#endif 185167307Sjhb uint64_t waittime = 0; 186167054Skmacy int contested = 0; 187154941Sjhb uintptr_t x; 188154941Sjhb 189157826Sjhb KASSERT(rw_wowner(rw) != curthread, 190154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 191167787Sjhb rw->lock_object.lo_name, file, line)); 192167787Sjhb WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line); 193154941Sjhb 194154941Sjhb /* 195154941Sjhb * Note that we don't make any attempt to try to block read 196154941Sjhb * locks once a writer has blocked on the lock. The reason is 197154941Sjhb * that we currently allow for read locks to recurse and we 198154941Sjhb * don't keep track of all the holders of read locks. Thus, if 199154941Sjhb * we were to block readers once a writer blocked and a reader 200154941Sjhb * tried to recurse on their reader lock after a writer had 201154941Sjhb * blocked we would end up in a deadlock since the reader would 202154941Sjhb * be blocked on the writer, and the writer would be blocked 203154941Sjhb * waiting for the reader to release its original read lock. 204154941Sjhb */ 205154941Sjhb for (;;) { 206154941Sjhb /* 207154941Sjhb * Handle the easy case. If no other thread has a write 208154941Sjhb * lock, then try to bump up the count of read locks. Note 209154941Sjhb * that we have to preserve the current state of the 210154941Sjhb * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 211154941Sjhb * read lock, then rw_lock must have changed, so restart 212154941Sjhb * the loop. Note that this handles the case of a 213154941Sjhb * completely unlocked rwlock since such a lock is encoded 214154941Sjhb * as a read lock with no waiters. 215154941Sjhb */ 216154941Sjhb x = rw->rw_lock; 217154941Sjhb if (x & RW_LOCK_READ) { 218154941Sjhb 219154941Sjhb /* 220154941Sjhb * The RW_LOCK_READ_WAITERS flag should only be set 221154941Sjhb * if another thread currently holds a write lock, 222154941Sjhb * and in that case RW_LOCK_READ should be clear. 223154941Sjhb */ 224154941Sjhb MPASS((x & RW_LOCK_READ_WAITERS) == 0); 225154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, 226154941Sjhb x + RW_ONE_READER)) { 227167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 228154941Sjhb CTR4(KTR_LOCK, 229154941Sjhb "%s: %p succeed %p -> %p", __func__, 230154941Sjhb rw, (void *)x, 231154941Sjhb (void *)(x + RW_ONE_READER)); 232167307Sjhb if (RW_READERS(x) == 0) 233167307Sjhb lock_profile_obtain_lock_success( 234167787Sjhb &rw->lock_object, contested, waittime, 235167307Sjhb file, line); 236154941Sjhb break; 237154941Sjhb } 238157846Sjhb cpu_spinwait(); 239154941Sjhb continue; 240154941Sjhb } 241167787Sjhb lock_profile_obtain_lock_failed(&rw->lock_object, &contested, 242167307Sjhb &waittime); 243154941Sjhb 244154941Sjhb /* 245154941Sjhb * Okay, now it's the hard case. Some other thread already 246154941Sjhb * has a write lock, so acquire the turnstile lock so we can 247154941Sjhb * begin the process of blocking. 248154941Sjhb */ 249167787Sjhb turnstile_lock(&rw->lock_object); 250154941Sjhb 251154941Sjhb /* 252154941Sjhb * The lock might have been released while we spun, so 253154941Sjhb * recheck its state and restart the loop if there is no 254154941Sjhb * longer a write lock. 255154941Sjhb */ 256154941Sjhb x = rw->rw_lock; 257154941Sjhb if (x & RW_LOCK_READ) { 258167787Sjhb turnstile_release(&rw->lock_object); 259157846Sjhb cpu_spinwait(); 260154941Sjhb continue; 261154941Sjhb } 262154941Sjhb 263154941Sjhb /* 264154941Sjhb * Ok, it's still a write lock. If the RW_LOCK_READ_WAITERS 265154941Sjhb * flag is already set, then we can go ahead and block. If 266154941Sjhb * it is not set then try to set it. If we fail to set it 267154941Sjhb * drop the turnstile lock and restart the loop. 268154941Sjhb */ 269157826Sjhb if (!(x & RW_LOCK_READ_WAITERS)) { 270157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, x, 271157826Sjhb x | RW_LOCK_READ_WAITERS)) { 272167787Sjhb turnstile_release(&rw->lock_object); 273157826Sjhb cpu_spinwait(); 274157826Sjhb continue; 275157826Sjhb } 276167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 277157826Sjhb CTR2(KTR_LOCK, "%s: %p set read waiters flag", 278157826Sjhb __func__, rw); 279154941Sjhb } 280154941Sjhb 281157846Sjhb#ifdef SMP 282154941Sjhb /* 283157846Sjhb * If the owner is running on another CPU, spin until 284157846Sjhb * the owner stops running or the state of the lock 285157846Sjhb * changes. 286157846Sjhb */ 287157846Sjhb owner = (struct thread *)RW_OWNER(x); 288157846Sjhb if (TD_IS_RUNNING(owner)) { 289167787Sjhb turnstile_release(&rw->lock_object); 290167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 291157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 292157846Sjhb __func__, rw, owner); 293157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 294157846Sjhb TD_IS_RUNNING(owner)) 295157846Sjhb cpu_spinwait(); 296157846Sjhb continue; 297157846Sjhb } 298157846Sjhb#endif 299157846Sjhb 300157846Sjhb /* 301154941Sjhb * We were unable to acquire the lock and the read waiters 302154941Sjhb * flag is set, so we must block on the turnstile. 303154941Sjhb */ 304167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 305154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 306154941Sjhb rw); 307167787Sjhb turnstile_wait(&rw->lock_object, rw_owner(rw), TS_SHARED_QUEUE); 308167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 309154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 310154941Sjhb __func__, rw); 311154941Sjhb } 312154941Sjhb 313154941Sjhb /* 314154941Sjhb * TODO: acquire "owner of record" here. Here be turnstile dragons 315154941Sjhb * however. turnstiles don't like owners changing between calls to 316154941Sjhb * turnstile_wait() currently. 317154941Sjhb */ 318154941Sjhb 319167787Sjhb LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 320167787Sjhb WITNESS_LOCK(&rw->lock_object, 0, file, line); 321160771Sjhb curthread->td_locks++; 322154941Sjhb} 323154941Sjhb 324154941Sjhbvoid 325154941Sjhb_rw_runlock(struct rwlock *rw, const char *file, int line) 326154941Sjhb{ 327154941Sjhb struct turnstile *ts; 328154941Sjhb uintptr_t x; 329154941Sjhb 330154941Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 331160771Sjhb curthread->td_locks--; 332167787Sjhb WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 333167787Sjhb LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 334154941Sjhb 335154941Sjhb /* TODO: drop "owner of record" here. */ 336154941Sjhb 337154941Sjhb for (;;) { 338154941Sjhb /* 339154941Sjhb * See if there is more than one read lock held. If so, 340154941Sjhb * just drop one and return. 341154941Sjhb */ 342154941Sjhb x = rw->rw_lock; 343154941Sjhb if (RW_READERS(x) > 1) { 344154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, x, 345154941Sjhb x - RW_ONE_READER)) { 346167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 347154941Sjhb CTR4(KTR_LOCK, 348154941Sjhb "%s: %p succeeded %p -> %p", 349154941Sjhb __func__, rw, (void *)x, 350154941Sjhb (void *)(x - RW_ONE_READER)); 351154941Sjhb break; 352154941Sjhb } 353154941Sjhb continue; 354167307Sjhb } 355154941Sjhb 356164159Skmacy 357154941Sjhb /* 358154941Sjhb * We should never have read waiters while at least one 359154941Sjhb * thread holds a read lock. (See note above) 360154941Sjhb */ 361154941Sjhb KASSERT(!(x & RW_LOCK_READ_WAITERS), 362154941Sjhb ("%s: waiting readers", __func__)); 363154941Sjhb 364154941Sjhb /* 365154941Sjhb * If there aren't any waiters for a write lock, then try 366154941Sjhb * to drop it quickly. 367154941Sjhb */ 368154941Sjhb if (!(x & RW_LOCK_WRITE_WAITERS)) { 369154941Sjhb 370154941Sjhb /* 371154941Sjhb * There shouldn't be any flags set and we should 372154941Sjhb * be the only read lock. If we fail to release 373154941Sjhb * the single read lock, then another thread might 374154941Sjhb * have just acquired a read lock, so go back up 375154941Sjhb * to the multiple read locks case. 376154941Sjhb */ 377154941Sjhb MPASS(x == RW_READERS_LOCK(1)); 378154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 379154941Sjhb RW_UNLOCKED)) { 380167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 381154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded", 382154941Sjhb __func__, rw); 383154941Sjhb break; 384154941Sjhb } 385154941Sjhb continue; 386154941Sjhb } 387154941Sjhb 388154941Sjhb /* 389154941Sjhb * There should just be one reader with one or more 390154941Sjhb * writers waiting. 391154941Sjhb */ 392154941Sjhb MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS)); 393154941Sjhb 394154941Sjhb /* 395154941Sjhb * Ok, we know we have a waiting writer and we think we 396154941Sjhb * are the last reader, so grab the turnstile lock. 397154941Sjhb */ 398167787Sjhb turnstile_lock(&rw->lock_object); 399154941Sjhb 400154941Sjhb /* 401154941Sjhb * Try to drop our lock leaving the lock in a unlocked 402154941Sjhb * state. 403154941Sjhb * 404154941Sjhb * If you wanted to do explicit lock handoff you'd have to 405154941Sjhb * do it here. You'd also want to use turnstile_signal() 406154941Sjhb * and you'd have to handle the race where a higher 407154941Sjhb * priority thread blocks on the write lock before the 408154941Sjhb * thread you wakeup actually runs and have the new thread 409154941Sjhb * "steal" the lock. For now it's a lot simpler to just 410154941Sjhb * wakeup all of the waiters. 411154941Sjhb * 412154941Sjhb * As above, if we fail, then another thread might have 413154941Sjhb * acquired a read lock, so drop the turnstile lock and 414154941Sjhb * restart. 415154941Sjhb */ 416154941Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, 417154941Sjhb RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { 418167787Sjhb turnstile_release(&rw->lock_object); 419154941Sjhb continue; 420154941Sjhb } 421167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 422154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 423154941Sjhb __func__, rw); 424154941Sjhb 425154941Sjhb /* 426154941Sjhb * Ok. The lock is released and all that's left is to 427154941Sjhb * wake up the waiters. Note that the lock might not be 428154941Sjhb * free anymore, but in that case the writers will just 429154941Sjhb * block again if they run before the new lock holder(s) 430154941Sjhb * release the lock. 431154941Sjhb */ 432167787Sjhb ts = turnstile_lookup(&rw->lock_object); 433157846Sjhb MPASS(ts != NULL); 434154941Sjhb turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 435154941Sjhb turnstile_unpend(ts, TS_SHARED_LOCK); 436154941Sjhb break; 437154941Sjhb } 438167787Sjhb lock_profile_release_lock(&rw->lock_object); 439154941Sjhb} 440154941Sjhb 441154941Sjhb/* 442154941Sjhb * This function is called when we are unable to obtain a write lock on the 443154941Sjhb * first try. This means that at least one other thread holds either a 444154941Sjhb * read or write lock. 445154941Sjhb */ 446154941Sjhbvoid 447154941Sjhb_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 448154941Sjhb{ 449157851Swkoszek#ifdef SMP 450157846Sjhb volatile struct thread *owner; 451157851Swkoszek#endif 452154941Sjhb uintptr_t v; 453154941Sjhb 454167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 455154941Sjhb CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 456167787Sjhb rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 457154941Sjhb 458154941Sjhb while (!_rw_write_lock(rw, tid)) { 459167787Sjhb turnstile_lock(&rw->lock_object); 460154941Sjhb v = rw->rw_lock; 461154941Sjhb 462154941Sjhb /* 463154941Sjhb * If the lock was released while spinning on the 464154941Sjhb * turnstile chain lock, try again. 465154941Sjhb */ 466154941Sjhb if (v == RW_UNLOCKED) { 467167787Sjhb turnstile_release(&rw->lock_object); 468154941Sjhb cpu_spinwait(); 469154941Sjhb continue; 470154941Sjhb } 471154941Sjhb 472154941Sjhb /* 473154941Sjhb * If the lock was released by a writer with both readers 474154941Sjhb * and writers waiting and a reader hasn't woken up and 475154941Sjhb * acquired the lock yet, rw_lock will be set to the 476154941Sjhb * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS. If we see 477154941Sjhb * that value, try to acquire it once. Note that we have 478154941Sjhb * to preserve the RW_LOCK_WRITE_WAITERS flag as there are 479154941Sjhb * other writers waiting still. If we fail, restart the 480154941Sjhb * loop. 481154941Sjhb */ 482154941Sjhb if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) { 483154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, 484154941Sjhb RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, 485154941Sjhb tid | RW_LOCK_WRITE_WAITERS)) { 486167787Sjhb turnstile_claim(&rw->lock_object); 487154941Sjhb CTR2(KTR_LOCK, "%s: %p claimed by new writer", 488154941Sjhb __func__, rw); 489154941Sjhb break; 490154941Sjhb } 491167787Sjhb turnstile_release(&rw->lock_object); 492154941Sjhb cpu_spinwait(); 493154941Sjhb continue; 494154941Sjhb } 495154941Sjhb 496154941Sjhb /* 497154941Sjhb * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 498154941Sjhb * set it. If we fail to set it, then loop back and try 499154941Sjhb * again. 500154941Sjhb */ 501157826Sjhb if (!(v & RW_LOCK_WRITE_WAITERS)) { 502157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, v, 503157826Sjhb v | RW_LOCK_WRITE_WAITERS)) { 504167787Sjhb turnstile_release(&rw->lock_object); 505157826Sjhb cpu_spinwait(); 506157826Sjhb continue; 507157826Sjhb } 508167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 509157826Sjhb CTR2(KTR_LOCK, "%s: %p set write waiters flag", 510157826Sjhb __func__, rw); 511154941Sjhb } 512154941Sjhb 513157846Sjhb#ifdef SMP 514157846Sjhb /* 515157846Sjhb * If the lock is write locked and the owner is 516157846Sjhb * running on another CPU, spin until the owner stops 517157846Sjhb * running or the state of the lock changes. 518157846Sjhb */ 519157846Sjhb owner = (struct thread *)RW_OWNER(v); 520157846Sjhb if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 521167787Sjhb turnstile_release(&rw->lock_object); 522167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 523157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 524157846Sjhb __func__, rw, owner); 525157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 526157846Sjhb TD_IS_RUNNING(owner)) 527157846Sjhb cpu_spinwait(); 528157846Sjhb continue; 529157846Sjhb } 530157846Sjhb#endif 531154941Sjhb 532154941Sjhb /* 533154941Sjhb * We were unable to acquire the lock and the write waiters 534154941Sjhb * flag is set, so we must block on the turnstile. 535154941Sjhb */ 536167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 537154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 538154941Sjhb rw); 539167787Sjhb turnstile_wait(&rw->lock_object, rw_owner(rw), 540154941Sjhb TS_EXCLUSIVE_QUEUE); 541167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 542154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 543154941Sjhb __func__, rw); 544154941Sjhb } 545154941Sjhb} 546154941Sjhb 547154941Sjhb/* 548154941Sjhb * This function is called if the first try at releasing a write lock failed. 549154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at 550154941Sjhb * least one thread is waiting on this lock. 551154941Sjhb */ 552154941Sjhbvoid 553154941Sjhb_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 554154941Sjhb{ 555154941Sjhb struct turnstile *ts; 556154941Sjhb uintptr_t v; 557154941Sjhb int queue; 558154941Sjhb 559154941Sjhb KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 560154941Sjhb ("%s: neither of the waiter flags are set", __func__)); 561154941Sjhb 562167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 563154941Sjhb CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 564154941Sjhb 565167787Sjhb turnstile_lock(&rw->lock_object); 566167787Sjhb ts = turnstile_lookup(&rw->lock_object); 567154941Sjhb 568157846Sjhb#ifdef SMP 569157846Sjhb /* 570157846Sjhb * There might not be a turnstile for this lock if all of 571157846Sjhb * the waiters are adaptively spinning. In that case, just 572157846Sjhb * reset the lock to the unlocked state and return. 573157846Sjhb */ 574157846Sjhb if (ts == NULL) { 575157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED); 576167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 577157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw); 578167787Sjhb turnstile_release(&rw->lock_object); 579157846Sjhb return; 580157846Sjhb } 581157846Sjhb#else 582154941Sjhb MPASS(ts != NULL); 583157846Sjhb#endif 584154941Sjhb 585154941Sjhb /* 586154941Sjhb * Use the same algo as sx locks for now. Prefer waking up shared 587154941Sjhb * waiters if we have any over writers. This is probably not ideal. 588154941Sjhb * 589154941Sjhb * 'v' is the value we are going to write back to rw_lock. If we 590154941Sjhb * have waiters on both queues, we need to preserve the state of 591154941Sjhb * the waiter flag for the queue we don't wake up. For now this is 592154941Sjhb * hardcoded for the algorithm mentioned above. 593154941Sjhb * 594154941Sjhb * In the case of both readers and writers waiting we wakeup the 595154941Sjhb * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 596154941Sjhb * new writer comes in before a reader it will claim the lock up 597154941Sjhb * above. There is probably a potential priority inversion in 598154941Sjhb * there that could be worked around either by waking both queues 599154941Sjhb * of waiters or doing some complicated lock handoff gymnastics. 600157846Sjhb * 601157846Sjhb * Note that in the SMP case, if both flags are set, there might 602157846Sjhb * not be any actual writers on the turnstile as they might all 603157846Sjhb * be spinning. In that case, we don't want to preserve the 604157846Sjhb * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go 605157846Sjhb * away once we wakeup all the readers. 606154941Sjhb */ 607157846Sjhb v = RW_UNLOCKED; 608154941Sjhb if (rw->rw_lock & RW_LOCK_READ_WAITERS) { 609154941Sjhb queue = TS_SHARED_QUEUE; 610157846Sjhb#ifdef SMP 611157846Sjhb if (rw->rw_lock & RW_LOCK_WRITE_WAITERS && 612157846Sjhb !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 613157846Sjhb v |= RW_LOCK_WRITE_WAITERS; 614157846Sjhb#else 615157846Sjhb v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS); 616157846Sjhb#endif 617157846Sjhb } else 618154941Sjhb queue = TS_EXCLUSIVE_QUEUE; 619157846Sjhb 620157846Sjhb#ifdef SMP 621157846Sjhb /* 622157846Sjhb * We have to make sure that we actually have waiters to 623157846Sjhb * wakeup. If they are all spinning, then we just need to 624157846Sjhb * disown the turnstile and return. 625157846Sjhb */ 626157846Sjhb if (turnstile_empty(ts, queue)) { 627167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 628157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw); 629157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 630157846Sjhb turnstile_disown(ts); 631157846Sjhb return; 632154941Sjhb } 633157846Sjhb#endif 634157846Sjhb 635157846Sjhb /* Wake up all waiters for the specific queue. */ 636167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 637154941Sjhb CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 638154941Sjhb queue == TS_SHARED_QUEUE ? "read" : "write"); 639154941Sjhb turnstile_broadcast(ts, queue); 640154941Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 641154941Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 642154941Sjhb} 643154941Sjhb 644157882Sjhb/* 645157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write 646157882Sjhb * lock. This will only succeed if this thread holds a single read 647157882Sjhb * lock. Returns true if the upgrade succeeded and false otherwise. 648157882Sjhb */ 649157882Sjhbint 650157882Sjhb_rw_try_upgrade(struct rwlock *rw, const char *file, int line) 651157882Sjhb{ 652157882Sjhb uintptr_t v, tid; 653157882Sjhb int success; 654157882Sjhb 655157882Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 656157882Sjhb 657157882Sjhb /* 658157882Sjhb * Attempt to switch from one reader to a writer. If there 659157882Sjhb * are any write waiters, then we will have to lock the 660157882Sjhb * turnstile first to prevent races with another writer 661157882Sjhb * calling turnstile_wait() before we have claimed this 662157882Sjhb * turnstile. So, do the simple case of no waiters first. 663157882Sjhb */ 664157882Sjhb tid = (uintptr_t)curthread; 665157882Sjhb if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) { 666157882Sjhb success = atomic_cmpset_acq_ptr(&rw->rw_lock, 667157882Sjhb RW_READERS_LOCK(1), tid); 668157882Sjhb goto out; 669157882Sjhb } 670157882Sjhb 671157882Sjhb /* 672157882Sjhb * Ok, we think we have write waiters, so lock the 673157882Sjhb * turnstile. 674157882Sjhb */ 675167787Sjhb turnstile_lock(&rw->lock_object); 676157882Sjhb 677157882Sjhb /* 678157882Sjhb * Try to switch from one reader to a writer again. This time 679157882Sjhb * we honor the current state of the RW_LOCK_WRITE_WAITERS 680157882Sjhb * flag. If we obtain the lock with the flag set, then claim 681157882Sjhb * ownership of the turnstile. In the SMP case it is possible 682157882Sjhb * for there to not be an associated turnstile even though there 683157882Sjhb * are waiters if all of the waiters are spinning. 684157882Sjhb */ 685157882Sjhb v = rw->rw_lock & RW_LOCK_WRITE_WAITERS; 686157882Sjhb success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 687157882Sjhb tid | v); 688157882Sjhb#ifdef SMP 689167787Sjhb if (success && v && turnstile_lookup(&rw->lock_object) != NULL) 690157882Sjhb#else 691157882Sjhb if (success && v) 692157882Sjhb#endif 693167787Sjhb turnstile_claim(&rw->lock_object); 694157882Sjhb else 695167787Sjhb turnstile_release(&rw->lock_object); 696157882Sjhbout: 697167787Sjhb LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 698157882Sjhb if (success) 699167787Sjhb WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 700157882Sjhb file, line); 701157882Sjhb return (success); 702157882Sjhb} 703157882Sjhb 704157882Sjhb/* 705157882Sjhb * Downgrade a write lock into a single read lock. 706157882Sjhb */ 707157882Sjhbvoid 708157882Sjhb_rw_downgrade(struct rwlock *rw, const char *file, int line) 709157882Sjhb{ 710157882Sjhb struct turnstile *ts; 711157882Sjhb uintptr_t tid, v; 712157882Sjhb 713157882Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 714157882Sjhb 715167787Sjhb WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 716157882Sjhb 717157882Sjhb /* 718157882Sjhb * Convert from a writer to a single reader. First we handle 719157882Sjhb * the easy case with no waiters. If there are any waiters, we 720157882Sjhb * lock the turnstile, "disown" the lock, and awaken any read 721157882Sjhb * waiters. 722157882Sjhb */ 723157882Sjhb tid = (uintptr_t)curthread; 724157882Sjhb if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 725157882Sjhb goto out; 726157882Sjhb 727157882Sjhb /* 728157882Sjhb * Ok, we think we have waiters, so lock the turnstile so we can 729157882Sjhb * read the waiter flags without any races. 730157882Sjhb */ 731167787Sjhb turnstile_lock(&rw->lock_object); 732157882Sjhb v = rw->rw_lock; 733157882Sjhb MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)); 734157882Sjhb 735157882Sjhb /* 736157882Sjhb * Downgrade from a write lock while preserving 737157882Sjhb * RW_LOCK_WRITE_WAITERS and give up ownership of the 738157882Sjhb * turnstile. If there are any read waiters, wake them up. 739157882Sjhb * 740157882Sjhb * For SMP, we have to allow for the fact that all of the 741157882Sjhb * read waiters might be spinning. In that case, act as if 742157882Sjhb * RW_LOCK_READ_WAITERS is not set. Also, only preserve 743157882Sjhb * the RW_LOCK_WRITE_WAITERS flag if at least one writer is 744157882Sjhb * blocked on the turnstile. 745157882Sjhb */ 746167787Sjhb ts = turnstile_lookup(&rw->lock_object); 747157882Sjhb#ifdef SMP 748157882Sjhb if (ts == NULL) 749157882Sjhb v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS); 750157882Sjhb else if (v & RW_LOCK_READ_WAITERS && 751157882Sjhb turnstile_empty(ts, TS_SHARED_QUEUE)) 752157882Sjhb v &= ~RW_LOCK_READ_WAITERS; 753157882Sjhb else if (v & RW_LOCK_WRITE_WAITERS && 754157882Sjhb turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 755157882Sjhb v &= ~RW_LOCK_WRITE_WAITERS; 756157882Sjhb#else 757157882Sjhb MPASS(ts != NULL); 758157882Sjhb#endif 759157882Sjhb if (v & RW_LOCK_READ_WAITERS) 760157882Sjhb turnstile_broadcast(ts, TS_SHARED_QUEUE); 761157882Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | 762157882Sjhb (v & RW_LOCK_WRITE_WAITERS)); 763157882Sjhb if (v & RW_LOCK_READ_WAITERS) 764157882Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 765157882Sjhb#ifdef SMP 766157882Sjhb else if (ts == NULL) 767167787Sjhb turnstile_release(&rw->lock_object); 768157882Sjhb#endif 769157882Sjhb else 770157882Sjhb turnstile_disown(ts); 771157882Sjhbout: 772167787Sjhb LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 773157882Sjhb} 774157882Sjhb 775154941Sjhb#ifdef INVARIANT_SUPPORT 776155162Sscottl#ifndef INVARIANTS 777154941Sjhb#undef _rw_assert 778154941Sjhb#endif 779154941Sjhb 780154941Sjhb/* 781154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least 782154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this* 783154941Sjhb * thread owns an rlock. 784154941Sjhb */ 785154941Sjhbvoid 786154941Sjhb_rw_assert(struct rwlock *rw, int what, const char *file, int line) 787154941Sjhb{ 788154941Sjhb 789154941Sjhb if (panicstr != NULL) 790154941Sjhb return; 791154941Sjhb switch (what) { 792154941Sjhb case RA_LOCKED: 793167368Sjhb case RA_LOCKED | LA_NOTRECURSED: 794154941Sjhb case RA_RLOCKED: 795154941Sjhb#ifdef WITNESS 796167787Sjhb witness_assert(&rw->lock_object, what, file, line); 797154941Sjhb#else 798154941Sjhb /* 799154941Sjhb * If some other thread has a write lock or we have one 800154941Sjhb * and are asserting a read lock, fail. Also, if no one 801154941Sjhb * has a lock at all, fail. 802154941Sjhb */ 803155061Sscottl if (rw->rw_lock == RW_UNLOCKED || 804155061Sscottl (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 805157826Sjhb rw_wowner(rw) != curthread))) 806154941Sjhb panic("Lock %s not %slocked @ %s:%d\n", 807167787Sjhb rw->lock_object.lo_name, (what == RA_RLOCKED) ? 808154941Sjhb "read " : "", file, line); 809154941Sjhb#endif 810154941Sjhb break; 811154941Sjhb case RA_WLOCKED: 812157826Sjhb if (rw_wowner(rw) != curthread) 813154941Sjhb panic("Lock %s not exclusively locked @ %s:%d\n", 814167787Sjhb rw->lock_object.lo_name, file, line); 815154941Sjhb break; 816154941Sjhb case RA_UNLOCKED: 817154941Sjhb#ifdef WITNESS 818167787Sjhb witness_assert(&rw->lock_object, what, file, line); 819154941Sjhb#else 820154941Sjhb /* 821154941Sjhb * If we hold a write lock fail. We can't reliably check 822154941Sjhb * to see if we hold a read lock or not. 823154941Sjhb */ 824157826Sjhb if (rw_wowner(rw) == curthread) 825154941Sjhb panic("Lock %s exclusively locked @ %s:%d\n", 826167787Sjhb rw->lock_object.lo_name, file, line); 827154941Sjhb#endif 828154941Sjhb break; 829154941Sjhb default: 830154941Sjhb panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 831154941Sjhb line); 832154941Sjhb } 833154941Sjhb} 834154941Sjhb#endif /* INVARIANT_SUPPORT */ 835154941Sjhb 836154941Sjhb#ifdef DDB 837154941Sjhbvoid 838154941Sjhbdb_show_rwlock(struct lock_object *lock) 839154941Sjhb{ 840154941Sjhb struct rwlock *rw; 841154941Sjhb struct thread *td; 842154941Sjhb 843154941Sjhb rw = (struct rwlock *)lock; 844154941Sjhb 845154941Sjhb db_printf(" state: "); 846154941Sjhb if (rw->rw_lock == RW_UNLOCKED) 847154941Sjhb db_printf("UNLOCKED\n"); 848154941Sjhb else if (rw->rw_lock & RW_LOCK_READ) 849167504Sjhb db_printf("RLOCK: %ju locks\n", 850167504Sjhb (uintmax_t)(RW_READERS(rw->rw_lock))); 851154941Sjhb else { 852157826Sjhb td = rw_wowner(rw); 853154941Sjhb db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 854154941Sjhb td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 855154941Sjhb } 856154941Sjhb db_printf(" waiters: "); 857154941Sjhb switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 858154941Sjhb case RW_LOCK_READ_WAITERS: 859154941Sjhb db_printf("readers\n"); 860154941Sjhb break; 861154941Sjhb case RW_LOCK_WRITE_WAITERS: 862154941Sjhb db_printf("writers\n"); 863154941Sjhb break; 864154941Sjhb case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 865167492Sjhb db_printf("readers and writers\n"); 866154941Sjhb break; 867154941Sjhb default: 868154941Sjhb db_printf("none\n"); 869154941Sjhb break; 870154941Sjhb } 871154941Sjhb} 872154941Sjhb 873154941Sjhb#endif 874