kern_rwlock.c revision 167054
1154941Sjhb/*- 2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3154941Sjhb * All rights reserved. 4154941Sjhb * 5154941Sjhb * Redistribution and use in source and binary forms, with or without 6154941Sjhb * modification, are permitted provided that the following conditions 7154941Sjhb * are met: 8154941Sjhb * 1. Redistributions of source code must retain the above copyright 9154941Sjhb * notice, this list of conditions and the following disclaimer. 10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright 11154941Sjhb * notice, this list of conditions and the following disclaimer in the 12154941Sjhb * documentation and/or other materials provided with the distribution. 13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors 14154941Sjhb * may be used to endorse or promote products derived from this software 15154941Sjhb * without specific prior written permission. 16154941Sjhb * 17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20154941Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27154941Sjhb * SUCH DAMAGE. 28154941Sjhb */ 29154941Sjhb 30154941Sjhb/* 31154941Sjhb * Machine independent bits of reader/writer lock implementation. 32154941Sjhb */ 33154941Sjhb 34154941Sjhb#include <sys/cdefs.h> 35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 167054 2007-02-27 06:42:05Z kmacy $"); 36154941Sjhb 37154941Sjhb#include "opt_ddb.h" 38154941Sjhb 39154941Sjhb#include <sys/param.h> 40154941Sjhb#include <sys/ktr.h> 41154941Sjhb#include <sys/lock.h> 42154941Sjhb#include <sys/mutex.h> 43154941Sjhb#include <sys/proc.h> 44154941Sjhb#include <sys/rwlock.h> 45154941Sjhb#include <sys/systm.h> 46154941Sjhb#include <sys/turnstile.h> 47164159Skmacy#include <sys/lock_profile.h> 48154941Sjhb#include <machine/cpu.h> 49154941Sjhb 50154941Sjhb#ifdef DDB 51154941Sjhb#include <ddb/ddb.h> 52154941Sjhb 53154941Sjhbstatic void db_show_rwlock(struct lock_object *lock); 54154941Sjhb#endif 55154941Sjhb 56154941Sjhbstruct lock_class lock_class_rw = { 57154941Sjhb "rw", 58157882Sjhb LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 59154941Sjhb#ifdef DDB 60154941Sjhb db_show_rwlock 61154941Sjhb#endif 62154941Sjhb}; 63154941Sjhb 64157826Sjhb/* 65157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or 66157826Sjhb * NULL if the lock is unlocked or read-locked. 67157826Sjhb */ 68157826Sjhb#define rw_wowner(rw) \ 69154941Sjhb ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 70154941Sjhb (struct thread *)RW_OWNER((rw)->rw_lock)) 71154941Sjhb 72157826Sjhb/* 73157826Sjhb * Return a pointer to the owning thread for this lock who should receive 74157826Sjhb * any priority lent by threads that block on this lock. Currently this 75157826Sjhb * is identical to rw_wowner(). 76157826Sjhb */ 77157826Sjhb#define rw_owner(rw) rw_wowner(rw) 78157826Sjhb 79154941Sjhb#ifndef INVARIANTS 80154941Sjhb#define _rw_assert(rw, what, file, line) 81154941Sjhb#endif 82154941Sjhb 83154941Sjhbvoid 84154941Sjhbrw_init(struct rwlock *rw, const char *name) 85154941Sjhb{ 86154941Sjhb 87154941Sjhb rw->rw_lock = RW_UNLOCKED; 88154941Sjhb 89164246Skmacy lock_profile_object_init(&rw->rw_object, &lock_class_rw, name); 90154941Sjhb lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS | 91157882Sjhb LO_RECURSABLE | LO_UPGRADABLE); 92154941Sjhb} 93154941Sjhb 94154941Sjhbvoid 95154941Sjhbrw_destroy(struct rwlock *rw) 96154941Sjhb{ 97154941Sjhb 98154941Sjhb KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 99164159Skmacy lock_profile_object_destroy(&rw->rw_object); 100154941Sjhb lock_destroy(&rw->rw_object); 101154941Sjhb} 102154941Sjhb 103154941Sjhbvoid 104154941Sjhbrw_sysinit(void *arg) 105154941Sjhb{ 106154941Sjhb struct rw_args *args = arg; 107154941Sjhb 108154941Sjhb rw_init(args->ra_rw, args->ra_desc); 109154941Sjhb} 110154941Sjhb 111167024Srwatsonint 112167024Srwatsonrw_wowned(struct rwlock *rw) 113167024Srwatson{ 114167024Srwatson 115167024Srwatson return (rw_wowner(rw) == curthread); 116167024Srwatson} 117167024Srwatson 118154941Sjhbvoid 119154941Sjhb_rw_wlock(struct rwlock *rw, const char *file, int line) 120154941Sjhb{ 121154941Sjhb 122154941Sjhb MPASS(curthread != NULL); 123157826Sjhb KASSERT(rw_wowner(rw) != curthread, 124154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 125154941Sjhb rw->rw_object.lo_name, file, line)); 126154941Sjhb WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 127154941Sjhb line); 128154941Sjhb __rw_wlock(rw, curthread, file, line); 129154941Sjhb LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line); 130154941Sjhb WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 131160771Sjhb curthread->td_locks++; 132154941Sjhb} 133154941Sjhb 134154941Sjhbvoid 135154941Sjhb_rw_wunlock(struct rwlock *rw, const char *file, int line) 136154941Sjhb{ 137154941Sjhb 138154941Sjhb MPASS(curthread != NULL); 139154941Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 140160771Sjhb curthread->td_locks--; 141154941Sjhb WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 142154941Sjhb LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line); 143164159Skmacy lock_profile_release_lock(&rw->rw_object); 144154941Sjhb __rw_wunlock(rw, curthread, file, line); 145154941Sjhb} 146154941Sjhb 147154941Sjhbvoid 148154941Sjhb_rw_rlock(struct rwlock *rw, const char *file, int line) 149154941Sjhb{ 150157851Swkoszek#ifdef SMP 151157846Sjhb volatile struct thread *owner; 152157851Swkoszek#endif 153167054Skmacy uint64_t waitstart = 0; 154167054Skmacy int contested = 0; 155154941Sjhb uintptr_t x; 156154941Sjhb 157157826Sjhb KASSERT(rw_wowner(rw) != curthread, 158154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 159154941Sjhb rw->rw_object.lo_name, file, line)); 160154941Sjhb WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line); 161154941Sjhb 162154941Sjhb /* 163154941Sjhb * Note that we don't make any attempt to try to block read 164154941Sjhb * locks once a writer has blocked on the lock. The reason is 165154941Sjhb * that we currently allow for read locks to recurse and we 166154941Sjhb * don't keep track of all the holders of read locks. Thus, if 167154941Sjhb * we were to block readers once a writer blocked and a reader 168154941Sjhb * tried to recurse on their reader lock after a writer had 169154941Sjhb * blocked we would end up in a deadlock since the reader would 170154941Sjhb * be blocked on the writer, and the writer would be blocked 171154941Sjhb * waiting for the reader to release its original read lock. 172154941Sjhb */ 173154941Sjhb for (;;) { 174154941Sjhb /* 175154941Sjhb * Handle the easy case. If no other thread has a write 176154941Sjhb * lock, then try to bump up the count of read locks. Note 177154941Sjhb * that we have to preserve the current state of the 178154941Sjhb * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 179154941Sjhb * read lock, then rw_lock must have changed, so restart 180154941Sjhb * the loop. Note that this handles the case of a 181154941Sjhb * completely unlocked rwlock since such a lock is encoded 182154941Sjhb * as a read lock with no waiters. 183154941Sjhb */ 184154941Sjhb x = rw->rw_lock; 185154941Sjhb if (x & RW_LOCK_READ) { 186154941Sjhb 187154941Sjhb /* 188154941Sjhb * The RW_LOCK_READ_WAITERS flag should only be set 189154941Sjhb * if another thread currently holds a write lock, 190154941Sjhb * and in that case RW_LOCK_READ should be clear. 191154941Sjhb */ 192154941Sjhb MPASS((x & RW_LOCK_READ_WAITERS) == 0); 193154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, 194154941Sjhb x + RW_ONE_READER)) { 195167012Skmacy lock_profile_obtain_lock_success(&rw->rw_object, contested, waitstart, file, line); 196154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 197154941Sjhb CTR4(KTR_LOCK, 198154941Sjhb "%s: %p succeed %p -> %p", __func__, 199154941Sjhb rw, (void *)x, 200154941Sjhb (void *)(x + RW_ONE_READER)); 201154941Sjhb break; 202154941Sjhb } 203167012Skmacy lock_profile_obtain_lock_failed(&rw->rw_object, &contested, &waitstart); 204157846Sjhb cpu_spinwait(); 205154941Sjhb continue; 206154941Sjhb } 207154941Sjhb 208154941Sjhb /* 209154941Sjhb * Okay, now it's the hard case. Some other thread already 210154941Sjhb * has a write lock, so acquire the turnstile lock so we can 211154941Sjhb * begin the process of blocking. 212154941Sjhb */ 213154941Sjhb turnstile_lock(&rw->rw_object); 214154941Sjhb 215154941Sjhb /* 216154941Sjhb * The lock might have been released while we spun, so 217154941Sjhb * recheck its state and restart the loop if there is no 218154941Sjhb * longer a write lock. 219154941Sjhb */ 220154941Sjhb x = rw->rw_lock; 221154941Sjhb if (x & RW_LOCK_READ) { 222154941Sjhb turnstile_release(&rw->rw_object); 223157846Sjhb cpu_spinwait(); 224154941Sjhb continue; 225154941Sjhb } 226154941Sjhb 227154941Sjhb /* 228154941Sjhb * Ok, it's still a write lock. If the RW_LOCK_READ_WAITERS 229154941Sjhb * flag is already set, then we can go ahead and block. If 230154941Sjhb * it is not set then try to set it. If we fail to set it 231154941Sjhb * drop the turnstile lock and restart the loop. 232154941Sjhb */ 233157826Sjhb if (!(x & RW_LOCK_READ_WAITERS)) { 234157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, x, 235157826Sjhb x | RW_LOCK_READ_WAITERS)) { 236157826Sjhb turnstile_release(&rw->rw_object); 237157826Sjhb cpu_spinwait(); 238157826Sjhb continue; 239157826Sjhb } 240157826Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 241157826Sjhb CTR2(KTR_LOCK, "%s: %p set read waiters flag", 242157826Sjhb __func__, rw); 243154941Sjhb } 244154941Sjhb 245157846Sjhb#ifdef SMP 246154941Sjhb /* 247157846Sjhb * If the owner is running on another CPU, spin until 248157846Sjhb * the owner stops running or the state of the lock 249157846Sjhb * changes. 250157846Sjhb */ 251157846Sjhb owner = (struct thread *)RW_OWNER(x); 252157846Sjhb if (TD_IS_RUNNING(owner)) { 253167012Skmacy lock_profile_obtain_lock_failed(&rw->rw_object, &contested, &waitstart); 254157846Sjhb turnstile_release(&rw->rw_object); 255157846Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 256157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 257157846Sjhb __func__, rw, owner); 258157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 259157846Sjhb TD_IS_RUNNING(owner)) 260157846Sjhb cpu_spinwait(); 261157846Sjhb continue; 262157846Sjhb } 263157846Sjhb#endif 264157846Sjhb 265157846Sjhb /* 266154941Sjhb * We were unable to acquire the lock and the read waiters 267154941Sjhb * flag is set, so we must block on the turnstile. 268154941Sjhb */ 269154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 270154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 271154941Sjhb rw); 272154941Sjhb turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE); 273154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 274154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 275154941Sjhb __func__, rw); 276154941Sjhb } 277154941Sjhb 278154941Sjhb /* 279154941Sjhb * TODO: acquire "owner of record" here. Here be turnstile dragons 280154941Sjhb * however. turnstiles don't like owners changing between calls to 281154941Sjhb * turnstile_wait() currently. 282154941Sjhb */ 283154941Sjhb 284154941Sjhb LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line); 285154941Sjhb WITNESS_LOCK(&rw->rw_object, 0, file, line); 286160771Sjhb curthread->td_locks++; 287154941Sjhb} 288154941Sjhb 289154941Sjhbvoid 290154941Sjhb_rw_runlock(struct rwlock *rw, const char *file, int line) 291154941Sjhb{ 292154941Sjhb struct turnstile *ts; 293154941Sjhb uintptr_t x; 294154941Sjhb 295154941Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 296160771Sjhb curthread->td_locks--; 297154941Sjhb WITNESS_UNLOCK(&rw->rw_object, 0, file, line); 298154941Sjhb LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line); 299154941Sjhb 300154941Sjhb /* TODO: drop "owner of record" here. */ 301154941Sjhb 302154941Sjhb for (;;) { 303154941Sjhb /* 304154941Sjhb * See if there is more than one read lock held. If so, 305154941Sjhb * just drop one and return. 306154941Sjhb */ 307154941Sjhb x = rw->rw_lock; 308154941Sjhb if (RW_READERS(x) > 1) { 309154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, x, 310154941Sjhb x - RW_ONE_READER)) { 311154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 312154941Sjhb CTR4(KTR_LOCK, 313154941Sjhb "%s: %p succeeded %p -> %p", 314154941Sjhb __func__, rw, (void *)x, 315154941Sjhb (void *)(x - RW_ONE_READER)); 316154941Sjhb break; 317154941Sjhb } 318154941Sjhb continue; 319164159Skmacy } else 320164159Skmacy lock_profile_release_lock(&rw->rw_object); 321154941Sjhb 322164159Skmacy 323154941Sjhb /* 324154941Sjhb * We should never have read waiters while at least one 325154941Sjhb * thread holds a read lock. (See note above) 326154941Sjhb */ 327154941Sjhb KASSERT(!(x & RW_LOCK_READ_WAITERS), 328154941Sjhb ("%s: waiting readers", __func__)); 329154941Sjhb 330154941Sjhb /* 331154941Sjhb * If there aren't any waiters for a write lock, then try 332154941Sjhb * to drop it quickly. 333154941Sjhb */ 334154941Sjhb if (!(x & RW_LOCK_WRITE_WAITERS)) { 335154941Sjhb 336154941Sjhb /* 337154941Sjhb * There shouldn't be any flags set and we should 338154941Sjhb * be the only read lock. If we fail to release 339154941Sjhb * the single read lock, then another thread might 340154941Sjhb * have just acquired a read lock, so go back up 341154941Sjhb * to the multiple read locks case. 342154941Sjhb */ 343154941Sjhb MPASS(x == RW_READERS_LOCK(1)); 344154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 345154941Sjhb RW_UNLOCKED)) { 346154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 347154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded", 348154941Sjhb __func__, rw); 349154941Sjhb break; 350154941Sjhb } 351154941Sjhb continue; 352154941Sjhb } 353154941Sjhb 354154941Sjhb /* 355154941Sjhb * There should just be one reader with one or more 356154941Sjhb * writers waiting. 357154941Sjhb */ 358154941Sjhb MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS)); 359154941Sjhb 360154941Sjhb /* 361154941Sjhb * Ok, we know we have a waiting writer and we think we 362154941Sjhb * are the last reader, so grab the turnstile lock. 363154941Sjhb */ 364154941Sjhb turnstile_lock(&rw->rw_object); 365154941Sjhb 366154941Sjhb /* 367154941Sjhb * Try to drop our lock leaving the lock in a unlocked 368154941Sjhb * state. 369154941Sjhb * 370154941Sjhb * If you wanted to do explicit lock handoff you'd have to 371154941Sjhb * do it here. You'd also want to use turnstile_signal() 372154941Sjhb * and you'd have to handle the race where a higher 373154941Sjhb * priority thread blocks on the write lock before the 374154941Sjhb * thread you wakeup actually runs and have the new thread 375154941Sjhb * "steal" the lock. For now it's a lot simpler to just 376154941Sjhb * wakeup all of the waiters. 377154941Sjhb * 378154941Sjhb * As above, if we fail, then another thread might have 379154941Sjhb * acquired a read lock, so drop the turnstile lock and 380154941Sjhb * restart. 381154941Sjhb */ 382154941Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, 383154941Sjhb RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { 384154941Sjhb turnstile_release(&rw->rw_object); 385154941Sjhb continue; 386154941Sjhb } 387154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 388154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 389154941Sjhb __func__, rw); 390154941Sjhb 391154941Sjhb /* 392154941Sjhb * Ok. The lock is released and all that's left is to 393154941Sjhb * wake up the waiters. Note that the lock might not be 394154941Sjhb * free anymore, but in that case the writers will just 395154941Sjhb * block again if they run before the new lock holder(s) 396154941Sjhb * release the lock. 397154941Sjhb */ 398154941Sjhb ts = turnstile_lookup(&rw->rw_object); 399157846Sjhb MPASS(ts != NULL); 400154941Sjhb turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 401154941Sjhb turnstile_unpend(ts, TS_SHARED_LOCK); 402154941Sjhb break; 403154941Sjhb } 404154941Sjhb} 405154941Sjhb 406154941Sjhb/* 407154941Sjhb * This function is called when we are unable to obtain a write lock on the 408154941Sjhb * first try. This means that at least one other thread holds either a 409154941Sjhb * read or write lock. 410154941Sjhb */ 411154941Sjhbvoid 412154941Sjhb_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 413154941Sjhb{ 414157851Swkoszek#ifdef SMP 415157846Sjhb volatile struct thread *owner; 416157851Swkoszek#endif 417154941Sjhb uintptr_t v; 418154941Sjhb 419154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 420154941Sjhb CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 421154941Sjhb rw->rw_object.lo_name, (void *)rw->rw_lock, file, line); 422154941Sjhb 423154941Sjhb while (!_rw_write_lock(rw, tid)) { 424154941Sjhb turnstile_lock(&rw->rw_object); 425154941Sjhb v = rw->rw_lock; 426154941Sjhb 427154941Sjhb /* 428154941Sjhb * If the lock was released while spinning on the 429154941Sjhb * turnstile chain lock, try again. 430154941Sjhb */ 431154941Sjhb if (v == RW_UNLOCKED) { 432154941Sjhb turnstile_release(&rw->rw_object); 433154941Sjhb cpu_spinwait(); 434154941Sjhb continue; 435154941Sjhb } 436154941Sjhb 437154941Sjhb /* 438154941Sjhb * If the lock was released by a writer with both readers 439154941Sjhb * and writers waiting and a reader hasn't woken up and 440154941Sjhb * acquired the lock yet, rw_lock will be set to the 441154941Sjhb * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS. If we see 442154941Sjhb * that value, try to acquire it once. Note that we have 443154941Sjhb * to preserve the RW_LOCK_WRITE_WAITERS flag as there are 444154941Sjhb * other writers waiting still. If we fail, restart the 445154941Sjhb * loop. 446154941Sjhb */ 447154941Sjhb if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) { 448154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, 449154941Sjhb RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, 450154941Sjhb tid | RW_LOCK_WRITE_WAITERS)) { 451154941Sjhb turnstile_claim(&rw->rw_object); 452154941Sjhb CTR2(KTR_LOCK, "%s: %p claimed by new writer", 453154941Sjhb __func__, rw); 454154941Sjhb break; 455154941Sjhb } 456154941Sjhb turnstile_release(&rw->rw_object); 457154941Sjhb cpu_spinwait(); 458154941Sjhb continue; 459154941Sjhb } 460154941Sjhb 461154941Sjhb /* 462154941Sjhb * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 463154941Sjhb * set it. If we fail to set it, then loop back and try 464154941Sjhb * again. 465154941Sjhb */ 466157826Sjhb if (!(v & RW_LOCK_WRITE_WAITERS)) { 467157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, v, 468157826Sjhb v | RW_LOCK_WRITE_WAITERS)) { 469157826Sjhb turnstile_release(&rw->rw_object); 470157826Sjhb cpu_spinwait(); 471157826Sjhb continue; 472157826Sjhb } 473157826Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 474157826Sjhb CTR2(KTR_LOCK, "%s: %p set write waiters flag", 475157826Sjhb __func__, rw); 476154941Sjhb } 477154941Sjhb 478157846Sjhb#ifdef SMP 479157846Sjhb /* 480157846Sjhb * If the lock is write locked and the owner is 481157846Sjhb * running on another CPU, spin until the owner stops 482157846Sjhb * running or the state of the lock changes. 483157846Sjhb */ 484157846Sjhb owner = (struct thread *)RW_OWNER(v); 485157846Sjhb if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 486157846Sjhb turnstile_release(&rw->rw_object); 487157846Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 488157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 489157846Sjhb __func__, rw, owner); 490157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 491157846Sjhb TD_IS_RUNNING(owner)) 492157846Sjhb cpu_spinwait(); 493157846Sjhb continue; 494157846Sjhb } 495157846Sjhb#endif 496154941Sjhb 497154941Sjhb /* 498154941Sjhb * We were unable to acquire the lock and the write waiters 499154941Sjhb * flag is set, so we must block on the turnstile. 500154941Sjhb */ 501154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 502154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 503154941Sjhb rw); 504154941Sjhb turnstile_wait(&rw->rw_object, rw_owner(rw), 505154941Sjhb TS_EXCLUSIVE_QUEUE); 506154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 507154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 508154941Sjhb __func__, rw); 509154941Sjhb } 510154941Sjhb} 511154941Sjhb 512154941Sjhb/* 513154941Sjhb * This function is called if the first try at releasing a write lock failed. 514154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at 515154941Sjhb * least one thread is waiting on this lock. 516154941Sjhb */ 517154941Sjhbvoid 518154941Sjhb_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 519154941Sjhb{ 520154941Sjhb struct turnstile *ts; 521154941Sjhb uintptr_t v; 522154941Sjhb int queue; 523154941Sjhb 524154941Sjhb KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 525154941Sjhb ("%s: neither of the waiter flags are set", __func__)); 526154941Sjhb 527154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 528154941Sjhb CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 529154941Sjhb 530154941Sjhb turnstile_lock(&rw->rw_object); 531154941Sjhb ts = turnstile_lookup(&rw->rw_object); 532154941Sjhb 533157846Sjhb#ifdef SMP 534157846Sjhb /* 535157846Sjhb * There might not be a turnstile for this lock if all of 536157846Sjhb * the waiters are adaptively spinning. In that case, just 537157846Sjhb * reset the lock to the unlocked state and return. 538157846Sjhb */ 539157846Sjhb if (ts == NULL) { 540157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED); 541157846Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 542157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw); 543157846Sjhb turnstile_release(&rw->rw_object); 544157846Sjhb return; 545157846Sjhb } 546157846Sjhb#else 547154941Sjhb MPASS(ts != NULL); 548157846Sjhb#endif 549154941Sjhb 550154941Sjhb /* 551154941Sjhb * Use the same algo as sx locks for now. Prefer waking up shared 552154941Sjhb * waiters if we have any over writers. This is probably not ideal. 553154941Sjhb * 554154941Sjhb * 'v' is the value we are going to write back to rw_lock. If we 555154941Sjhb * have waiters on both queues, we need to preserve the state of 556154941Sjhb * the waiter flag for the queue we don't wake up. For now this is 557154941Sjhb * hardcoded for the algorithm mentioned above. 558154941Sjhb * 559154941Sjhb * In the case of both readers and writers waiting we wakeup the 560154941Sjhb * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 561154941Sjhb * new writer comes in before a reader it will claim the lock up 562154941Sjhb * above. There is probably a potential priority inversion in 563154941Sjhb * there that could be worked around either by waking both queues 564154941Sjhb * of waiters or doing some complicated lock handoff gymnastics. 565157846Sjhb * 566157846Sjhb * Note that in the SMP case, if both flags are set, there might 567157846Sjhb * not be any actual writers on the turnstile as they might all 568157846Sjhb * be spinning. In that case, we don't want to preserve the 569157846Sjhb * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go 570157846Sjhb * away once we wakeup all the readers. 571154941Sjhb */ 572157846Sjhb v = RW_UNLOCKED; 573154941Sjhb if (rw->rw_lock & RW_LOCK_READ_WAITERS) { 574154941Sjhb queue = TS_SHARED_QUEUE; 575157846Sjhb#ifdef SMP 576157846Sjhb if (rw->rw_lock & RW_LOCK_WRITE_WAITERS && 577157846Sjhb !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 578157846Sjhb v |= RW_LOCK_WRITE_WAITERS; 579157846Sjhb#else 580157846Sjhb v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS); 581157846Sjhb#endif 582157846Sjhb } else 583154941Sjhb queue = TS_EXCLUSIVE_QUEUE; 584157846Sjhb 585157846Sjhb#ifdef SMP 586157846Sjhb /* 587157846Sjhb * We have to make sure that we actually have waiters to 588157846Sjhb * wakeup. If they are all spinning, then we just need to 589157846Sjhb * disown the turnstile and return. 590157846Sjhb */ 591157846Sjhb if (turnstile_empty(ts, queue)) { 592157846Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 593157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw); 594157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 595157846Sjhb turnstile_disown(ts); 596157846Sjhb return; 597154941Sjhb } 598157846Sjhb#endif 599157846Sjhb 600157846Sjhb /* Wake up all waiters for the specific queue. */ 601154941Sjhb if (LOCK_LOG_TEST(&rw->rw_object, 0)) 602154941Sjhb CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 603154941Sjhb queue == TS_SHARED_QUEUE ? "read" : "write"); 604154941Sjhb turnstile_broadcast(ts, queue); 605154941Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 606154941Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 607154941Sjhb} 608154941Sjhb 609157882Sjhb/* 610157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write 611157882Sjhb * lock. This will only succeed if this thread holds a single read 612157882Sjhb * lock. Returns true if the upgrade succeeded and false otherwise. 613157882Sjhb */ 614157882Sjhbint 615157882Sjhb_rw_try_upgrade(struct rwlock *rw, const char *file, int line) 616157882Sjhb{ 617157882Sjhb uintptr_t v, tid; 618157882Sjhb int success; 619157882Sjhb 620157882Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 621157882Sjhb 622157882Sjhb /* 623157882Sjhb * Attempt to switch from one reader to a writer. If there 624157882Sjhb * are any write waiters, then we will have to lock the 625157882Sjhb * turnstile first to prevent races with another writer 626157882Sjhb * calling turnstile_wait() before we have claimed this 627157882Sjhb * turnstile. So, do the simple case of no waiters first. 628157882Sjhb */ 629157882Sjhb tid = (uintptr_t)curthread; 630157882Sjhb if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) { 631157882Sjhb success = atomic_cmpset_acq_ptr(&rw->rw_lock, 632157882Sjhb RW_READERS_LOCK(1), tid); 633157882Sjhb goto out; 634157882Sjhb } 635157882Sjhb 636157882Sjhb /* 637157882Sjhb * Ok, we think we have write waiters, so lock the 638157882Sjhb * turnstile. 639157882Sjhb */ 640157882Sjhb turnstile_lock(&rw->rw_object); 641157882Sjhb 642157882Sjhb /* 643157882Sjhb * Try to switch from one reader to a writer again. This time 644157882Sjhb * we honor the current state of the RW_LOCK_WRITE_WAITERS 645157882Sjhb * flag. If we obtain the lock with the flag set, then claim 646157882Sjhb * ownership of the turnstile. In the SMP case it is possible 647157882Sjhb * for there to not be an associated turnstile even though there 648157882Sjhb * are waiters if all of the waiters are spinning. 649157882Sjhb */ 650157882Sjhb v = rw->rw_lock & RW_LOCK_WRITE_WAITERS; 651157882Sjhb success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 652157882Sjhb tid | v); 653157882Sjhb#ifdef SMP 654157882Sjhb if (success && v && turnstile_lookup(&rw->rw_object) != NULL) 655157882Sjhb#else 656157882Sjhb if (success && v) 657157882Sjhb#endif 658157882Sjhb turnstile_claim(&rw->rw_object); 659157882Sjhb else 660157882Sjhb turnstile_release(&rw->rw_object); 661157882Sjhbout: 662157882Sjhb LOCK_LOG_TRY("WUPGRADE", &rw->rw_object, 0, success, file, line); 663157882Sjhb if (success) 664157882Sjhb WITNESS_UPGRADE(&rw->rw_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 665157882Sjhb file, line); 666157882Sjhb return (success); 667157882Sjhb} 668157882Sjhb 669157882Sjhb/* 670157882Sjhb * Downgrade a write lock into a single read lock. 671157882Sjhb */ 672157882Sjhbvoid 673157882Sjhb_rw_downgrade(struct rwlock *rw, const char *file, int line) 674157882Sjhb{ 675157882Sjhb struct turnstile *ts; 676157882Sjhb uintptr_t tid, v; 677157882Sjhb 678157882Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 679157882Sjhb 680157882Sjhb WITNESS_DOWNGRADE(&rw->rw_object, 0, file, line); 681157882Sjhb 682157882Sjhb /* 683157882Sjhb * Convert from a writer to a single reader. First we handle 684157882Sjhb * the easy case with no waiters. If there are any waiters, we 685157882Sjhb * lock the turnstile, "disown" the lock, and awaken any read 686157882Sjhb * waiters. 687157882Sjhb */ 688157882Sjhb tid = (uintptr_t)curthread; 689157882Sjhb if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 690157882Sjhb goto out; 691157882Sjhb 692157882Sjhb /* 693157882Sjhb * Ok, we think we have waiters, so lock the turnstile so we can 694157882Sjhb * read the waiter flags without any races. 695157882Sjhb */ 696157882Sjhb turnstile_lock(&rw->rw_object); 697157882Sjhb v = rw->rw_lock; 698157882Sjhb MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)); 699157882Sjhb 700157882Sjhb /* 701157882Sjhb * Downgrade from a write lock while preserving 702157882Sjhb * RW_LOCK_WRITE_WAITERS and give up ownership of the 703157882Sjhb * turnstile. If there are any read waiters, wake them up. 704157882Sjhb * 705157882Sjhb * For SMP, we have to allow for the fact that all of the 706157882Sjhb * read waiters might be spinning. In that case, act as if 707157882Sjhb * RW_LOCK_READ_WAITERS is not set. Also, only preserve 708157882Sjhb * the RW_LOCK_WRITE_WAITERS flag if at least one writer is 709157882Sjhb * blocked on the turnstile. 710157882Sjhb */ 711157882Sjhb ts = turnstile_lookup(&rw->rw_object); 712157882Sjhb#ifdef SMP 713157882Sjhb if (ts == NULL) 714157882Sjhb v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS); 715157882Sjhb else if (v & RW_LOCK_READ_WAITERS && 716157882Sjhb turnstile_empty(ts, TS_SHARED_QUEUE)) 717157882Sjhb v &= ~RW_LOCK_READ_WAITERS; 718157882Sjhb else if (v & RW_LOCK_WRITE_WAITERS && 719157882Sjhb turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 720157882Sjhb v &= ~RW_LOCK_WRITE_WAITERS; 721157882Sjhb#else 722157882Sjhb MPASS(ts != NULL); 723157882Sjhb#endif 724157882Sjhb if (v & RW_LOCK_READ_WAITERS) 725157882Sjhb turnstile_broadcast(ts, TS_SHARED_QUEUE); 726157882Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | 727157882Sjhb (v & RW_LOCK_WRITE_WAITERS)); 728157882Sjhb if (v & RW_LOCK_READ_WAITERS) 729157882Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 730157882Sjhb#ifdef SMP 731157882Sjhb else if (ts == NULL) 732157882Sjhb turnstile_release(&rw->rw_object); 733157882Sjhb#endif 734157882Sjhb else 735157882Sjhb turnstile_disown(ts); 736157882Sjhbout: 737157882Sjhb LOCK_LOG_LOCK("WDOWNGRADE", &rw->rw_object, 0, 0, file, line); 738157882Sjhb} 739157882Sjhb 740154941Sjhb#ifdef INVARIANT_SUPPORT 741155162Sscottl#ifndef INVARIANTS 742154941Sjhb#undef _rw_assert 743154941Sjhb#endif 744154941Sjhb 745154941Sjhb/* 746154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least 747154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this* 748154941Sjhb * thread owns an rlock. 749154941Sjhb */ 750154941Sjhbvoid 751154941Sjhb_rw_assert(struct rwlock *rw, int what, const char *file, int line) 752154941Sjhb{ 753154941Sjhb 754154941Sjhb if (panicstr != NULL) 755154941Sjhb return; 756154941Sjhb switch (what) { 757154941Sjhb case RA_LOCKED: 758154941Sjhb case RA_RLOCKED: 759154941Sjhb#ifdef WITNESS 760154941Sjhb witness_assert(&rw->rw_object, what, file, line); 761154941Sjhb#else 762154941Sjhb /* 763154941Sjhb * If some other thread has a write lock or we have one 764154941Sjhb * and are asserting a read lock, fail. Also, if no one 765154941Sjhb * has a lock at all, fail. 766154941Sjhb */ 767155061Sscottl if (rw->rw_lock == RW_UNLOCKED || 768155061Sscottl (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 769157826Sjhb rw_wowner(rw) != curthread))) 770154941Sjhb panic("Lock %s not %slocked @ %s:%d\n", 771155012Sscottl rw->rw_object.lo_name, (what == RA_RLOCKED) ? 772154941Sjhb "read " : "", file, line); 773154941Sjhb#endif 774154941Sjhb break; 775154941Sjhb case RA_WLOCKED: 776157826Sjhb if (rw_wowner(rw) != curthread) 777154941Sjhb panic("Lock %s not exclusively locked @ %s:%d\n", 778154941Sjhb rw->rw_object.lo_name, file, line); 779154941Sjhb break; 780154941Sjhb case RA_UNLOCKED: 781154941Sjhb#ifdef WITNESS 782154941Sjhb witness_assert(&rw->rw_object, what, file, line); 783154941Sjhb#else 784154941Sjhb /* 785154941Sjhb * If we hold a write lock fail. We can't reliably check 786154941Sjhb * to see if we hold a read lock or not. 787154941Sjhb */ 788157826Sjhb if (rw_wowner(rw) == curthread) 789154941Sjhb panic("Lock %s exclusively locked @ %s:%d\n", 790154941Sjhb rw->rw_object.lo_name, file, line); 791154941Sjhb#endif 792154941Sjhb break; 793154941Sjhb default: 794154941Sjhb panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 795154941Sjhb line); 796154941Sjhb } 797154941Sjhb} 798154941Sjhb#endif /* INVARIANT_SUPPORT */ 799154941Sjhb 800154941Sjhb#ifdef DDB 801154941Sjhbvoid 802154941Sjhbdb_show_rwlock(struct lock_object *lock) 803154941Sjhb{ 804154941Sjhb struct rwlock *rw; 805154941Sjhb struct thread *td; 806154941Sjhb 807154941Sjhb rw = (struct rwlock *)lock; 808154941Sjhb 809154941Sjhb db_printf(" state: "); 810154941Sjhb if (rw->rw_lock == RW_UNLOCKED) 811154941Sjhb db_printf("UNLOCKED\n"); 812154941Sjhb else if (rw->rw_lock & RW_LOCK_READ) 813154973Smlaier db_printf("RLOCK: %jd locks\n", 814154973Smlaier (intmax_t)(RW_READERS(rw->rw_lock))); 815154941Sjhb else { 816157826Sjhb td = rw_wowner(rw); 817154941Sjhb db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 818154941Sjhb td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 819154941Sjhb } 820154941Sjhb db_printf(" waiters: "); 821154941Sjhb switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 822154941Sjhb case RW_LOCK_READ_WAITERS: 823154941Sjhb db_printf("readers\n"); 824154941Sjhb break; 825154941Sjhb case RW_LOCK_WRITE_WAITERS: 826154941Sjhb db_printf("writers\n"); 827154941Sjhb break; 828154941Sjhb case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 829154941Sjhb db_printf("readers and waiters\n"); 830154941Sjhb break; 831154941Sjhb default: 832154941Sjhb db_printf("none\n"); 833154941Sjhb break; 834154941Sjhb } 835154941Sjhb} 836154941Sjhb 837154941Sjhb#endif 838