kern_rwlock.c revision 169394
1154941Sjhb/*- 2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3154941Sjhb * All rights reserved. 4154941Sjhb * 5154941Sjhb * Redistribution and use in source and binary forms, with or without 6154941Sjhb * modification, are permitted provided that the following conditions 7154941Sjhb * are met: 8154941Sjhb * 1. Redistributions of source code must retain the above copyright 9154941Sjhb * notice, this list of conditions and the following disclaimer. 10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright 11154941Sjhb * notice, this list of conditions and the following disclaimer in the 12154941Sjhb * documentation and/or other materials provided with the distribution. 13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors 14154941Sjhb * may be used to endorse or promote products derived from this software 15154941Sjhb * without specific prior written permission. 16154941Sjhb * 17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20154941Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27154941Sjhb * SUCH DAMAGE. 28154941Sjhb */ 29154941Sjhb 30154941Sjhb/* 31154941Sjhb * Machine independent bits of reader/writer lock implementation. 32154941Sjhb */ 33154941Sjhb 34154941Sjhb#include <sys/cdefs.h> 35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 169394 2007-05-08 21:51:37Z jhb $"); 36154941Sjhb 37154941Sjhb#include "opt_ddb.h" 38167801Sjhb#include "opt_no_adaptive_rwlocks.h" 39154941Sjhb 40154941Sjhb#include <sys/param.h> 41154941Sjhb#include <sys/ktr.h> 42154941Sjhb#include <sys/lock.h> 43154941Sjhb#include <sys/mutex.h> 44154941Sjhb#include <sys/proc.h> 45154941Sjhb#include <sys/rwlock.h> 46154941Sjhb#include <sys/systm.h> 47154941Sjhb#include <sys/turnstile.h> 48164159Skmacy#include <sys/lock_profile.h> 49154941Sjhb#include <machine/cpu.h> 50154941Sjhb 51167801Sjhb#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS) 52167801Sjhb#define ADAPTIVE_RWLOCKS 53167801Sjhb#endif 54167801Sjhb 55154941Sjhb#ifdef DDB 56154941Sjhb#include <ddb/ddb.h> 57154941Sjhb 58154941Sjhbstatic void db_show_rwlock(struct lock_object *lock); 59154941Sjhb#endif 60167368Sjhbstatic void lock_rw(struct lock_object *lock, int how); 61167368Sjhbstatic int unlock_rw(struct lock_object *lock); 62154941Sjhb 63154941Sjhbstruct lock_class lock_class_rw = { 64167365Sjhb .lc_name = "rw", 65167365Sjhb .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, 66154941Sjhb#ifdef DDB 67167365Sjhb .lc_ddb_show = db_show_rwlock, 68154941Sjhb#endif 69167368Sjhb .lc_lock = lock_rw, 70167368Sjhb .lc_unlock = unlock_rw, 71154941Sjhb}; 72154941Sjhb 73157826Sjhb/* 74157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or 75157826Sjhb * NULL if the lock is unlocked or read-locked. 76157826Sjhb */ 77157826Sjhb#define rw_wowner(rw) \ 78154941Sjhb ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 79154941Sjhb (struct thread *)RW_OWNER((rw)->rw_lock)) 80154941Sjhb 81157826Sjhb/* 82157826Sjhb * Return a pointer to the owning thread for this lock who should receive 83157826Sjhb * any priority lent by threads that block on this lock. Currently this 84157826Sjhb * is identical to rw_wowner(). 85157826Sjhb */ 86157826Sjhb#define rw_owner(rw) rw_wowner(rw) 87157826Sjhb 88154941Sjhb#ifndef INVARIANTS 89154941Sjhb#define _rw_assert(rw, what, file, line) 90154941Sjhb#endif 91154941Sjhb 92154941Sjhbvoid 93167368Sjhblock_rw(struct lock_object *lock, int how) 94167368Sjhb{ 95167368Sjhb struct rwlock *rw; 96167368Sjhb 97167368Sjhb rw = (struct rwlock *)lock; 98167368Sjhb if (how) 99167368Sjhb rw_wlock(rw); 100167368Sjhb else 101167368Sjhb rw_rlock(rw); 102167368Sjhb} 103167368Sjhb 104167368Sjhbint 105167368Sjhbunlock_rw(struct lock_object *lock) 106167368Sjhb{ 107167368Sjhb struct rwlock *rw; 108167368Sjhb 109167368Sjhb rw = (struct rwlock *)lock; 110167368Sjhb rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); 111167368Sjhb if (rw->rw_lock & RW_LOCK_READ) { 112167368Sjhb rw_runlock(rw); 113167368Sjhb return (0); 114167368Sjhb } else { 115167368Sjhb rw_wunlock(rw); 116167368Sjhb return (1); 117167368Sjhb } 118167368Sjhb} 119167368Sjhb 120167368Sjhbvoid 121154941Sjhbrw_init(struct rwlock *rw, const char *name) 122154941Sjhb{ 123154941Sjhb 124154941Sjhb rw->rw_lock = RW_UNLOCKED; 125154941Sjhb 126167787Sjhb lock_profile_object_init(&rw->lock_object, &lock_class_rw, name); 127167787Sjhb lock_init(&rw->lock_object, &lock_class_rw, name, NULL, LO_WITNESS | 128157882Sjhb LO_RECURSABLE | LO_UPGRADABLE); 129154941Sjhb} 130154941Sjhb 131154941Sjhbvoid 132154941Sjhbrw_destroy(struct rwlock *rw) 133154941Sjhb{ 134154941Sjhb 135154941Sjhb KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 136169394Sjhb rw->rw_lock = RW_DESTROYED; 137167787Sjhb lock_profile_object_destroy(&rw->lock_object); 138167787Sjhb lock_destroy(&rw->lock_object); 139154941Sjhb} 140154941Sjhb 141154941Sjhbvoid 142154941Sjhbrw_sysinit(void *arg) 143154941Sjhb{ 144154941Sjhb struct rw_args *args = arg; 145154941Sjhb 146154941Sjhb rw_init(args->ra_rw, args->ra_desc); 147154941Sjhb} 148154941Sjhb 149167024Srwatsonint 150167024Srwatsonrw_wowned(struct rwlock *rw) 151167024Srwatson{ 152167024Srwatson 153167024Srwatson return (rw_wowner(rw) == curthread); 154167024Srwatson} 155167024Srwatson 156154941Sjhbvoid 157154941Sjhb_rw_wlock(struct rwlock *rw, const char *file, int line) 158154941Sjhb{ 159154941Sjhb 160154941Sjhb MPASS(curthread != NULL); 161169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 162169394Sjhb ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); 163157826Sjhb KASSERT(rw_wowner(rw) != curthread, 164154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 165167787Sjhb rw->lock_object.lo_name, file, line)); 166167787Sjhb WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 167154941Sjhb line); 168154941Sjhb __rw_wlock(rw, curthread, file, line); 169167787Sjhb LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, 0, file, line); 170167787Sjhb WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 171160771Sjhb curthread->td_locks++; 172154941Sjhb} 173154941Sjhb 174154941Sjhbvoid 175154941Sjhb_rw_wunlock(struct rwlock *rw, const char *file, int line) 176154941Sjhb{ 177154941Sjhb 178154941Sjhb MPASS(curthread != NULL); 179169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 180169394Sjhb ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line)); 181154941Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 182160771Sjhb curthread->td_locks--; 183167787Sjhb WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); 184167787Sjhb LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, 0, file, line); 185167787Sjhb lock_profile_release_lock(&rw->lock_object); 186154941Sjhb __rw_wunlock(rw, curthread, file, line); 187154941Sjhb} 188154941Sjhb 189154941Sjhbvoid 190154941Sjhb_rw_rlock(struct rwlock *rw, const char *file, int line) 191154941Sjhb{ 192167801Sjhb#ifdef ADAPTIVE_RWLOCKS 193157846Sjhb volatile struct thread *owner; 194157851Swkoszek#endif 195167307Sjhb uint64_t waittime = 0; 196167054Skmacy int contested = 0; 197154941Sjhb uintptr_t x; 198154941Sjhb 199169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 200169394Sjhb ("rw_rlock() of destroyed rwlock @ %s:%d", file, line)); 201157826Sjhb KASSERT(rw_wowner(rw) != curthread, 202154941Sjhb ("%s (%s): wlock already held @ %s:%d", __func__, 203167787Sjhb rw->lock_object.lo_name, file, line)); 204167787Sjhb WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line); 205154941Sjhb 206154941Sjhb /* 207154941Sjhb * Note that we don't make any attempt to try to block read 208154941Sjhb * locks once a writer has blocked on the lock. The reason is 209154941Sjhb * that we currently allow for read locks to recurse and we 210154941Sjhb * don't keep track of all the holders of read locks. Thus, if 211154941Sjhb * we were to block readers once a writer blocked and a reader 212154941Sjhb * tried to recurse on their reader lock after a writer had 213154941Sjhb * blocked we would end up in a deadlock since the reader would 214154941Sjhb * be blocked on the writer, and the writer would be blocked 215154941Sjhb * waiting for the reader to release its original read lock. 216154941Sjhb */ 217154941Sjhb for (;;) { 218154941Sjhb /* 219154941Sjhb * Handle the easy case. If no other thread has a write 220154941Sjhb * lock, then try to bump up the count of read locks. Note 221154941Sjhb * that we have to preserve the current state of the 222154941Sjhb * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 223154941Sjhb * read lock, then rw_lock must have changed, so restart 224154941Sjhb * the loop. Note that this handles the case of a 225154941Sjhb * completely unlocked rwlock since such a lock is encoded 226154941Sjhb * as a read lock with no waiters. 227154941Sjhb */ 228154941Sjhb x = rw->rw_lock; 229154941Sjhb if (x & RW_LOCK_READ) { 230154941Sjhb 231154941Sjhb /* 232154941Sjhb * The RW_LOCK_READ_WAITERS flag should only be set 233154941Sjhb * if another thread currently holds a write lock, 234154941Sjhb * and in that case RW_LOCK_READ should be clear. 235154941Sjhb */ 236154941Sjhb MPASS((x & RW_LOCK_READ_WAITERS) == 0); 237154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, 238154941Sjhb x + RW_ONE_READER)) { 239167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 240154941Sjhb CTR4(KTR_LOCK, 241154941Sjhb "%s: %p succeed %p -> %p", __func__, 242154941Sjhb rw, (void *)x, 243154941Sjhb (void *)(x + RW_ONE_READER)); 244167307Sjhb if (RW_READERS(x) == 0) 245167307Sjhb lock_profile_obtain_lock_success( 246167787Sjhb &rw->lock_object, contested, waittime, 247167307Sjhb file, line); 248154941Sjhb break; 249154941Sjhb } 250157846Sjhb cpu_spinwait(); 251154941Sjhb continue; 252154941Sjhb } 253167787Sjhb lock_profile_obtain_lock_failed(&rw->lock_object, &contested, 254167307Sjhb &waittime); 255154941Sjhb 256154941Sjhb /* 257154941Sjhb * Okay, now it's the hard case. Some other thread already 258154941Sjhb * has a write lock, so acquire the turnstile lock so we can 259154941Sjhb * begin the process of blocking. 260154941Sjhb */ 261167787Sjhb turnstile_lock(&rw->lock_object); 262154941Sjhb 263154941Sjhb /* 264154941Sjhb * The lock might have been released while we spun, so 265154941Sjhb * recheck its state and restart the loop if there is no 266154941Sjhb * longer a write lock. 267154941Sjhb */ 268154941Sjhb x = rw->rw_lock; 269154941Sjhb if (x & RW_LOCK_READ) { 270167787Sjhb turnstile_release(&rw->lock_object); 271157846Sjhb cpu_spinwait(); 272154941Sjhb continue; 273154941Sjhb } 274154941Sjhb 275154941Sjhb /* 276154941Sjhb * Ok, it's still a write lock. If the RW_LOCK_READ_WAITERS 277154941Sjhb * flag is already set, then we can go ahead and block. If 278154941Sjhb * it is not set then try to set it. If we fail to set it 279154941Sjhb * drop the turnstile lock and restart the loop. 280154941Sjhb */ 281157826Sjhb if (!(x & RW_LOCK_READ_WAITERS)) { 282157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, x, 283157826Sjhb x | RW_LOCK_READ_WAITERS)) { 284167787Sjhb turnstile_release(&rw->lock_object); 285157826Sjhb cpu_spinwait(); 286157826Sjhb continue; 287157826Sjhb } 288167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 289157826Sjhb CTR2(KTR_LOCK, "%s: %p set read waiters flag", 290157826Sjhb __func__, rw); 291154941Sjhb } 292154941Sjhb 293167801Sjhb#ifdef ADAPTIVE_RWLOCKS 294154941Sjhb /* 295157846Sjhb * If the owner is running on another CPU, spin until 296157846Sjhb * the owner stops running or the state of the lock 297157846Sjhb * changes. 298157846Sjhb */ 299157846Sjhb owner = (struct thread *)RW_OWNER(x); 300157846Sjhb if (TD_IS_RUNNING(owner)) { 301167787Sjhb turnstile_release(&rw->lock_object); 302167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 303157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 304157846Sjhb __func__, rw, owner); 305157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 306157846Sjhb TD_IS_RUNNING(owner)) 307157846Sjhb cpu_spinwait(); 308157846Sjhb continue; 309157846Sjhb } 310157846Sjhb#endif 311157846Sjhb 312157846Sjhb /* 313154941Sjhb * We were unable to acquire the lock and the read waiters 314154941Sjhb * flag is set, so we must block on the turnstile. 315154941Sjhb */ 316167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 317154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 318154941Sjhb rw); 319167787Sjhb turnstile_wait(&rw->lock_object, rw_owner(rw), TS_SHARED_QUEUE); 320167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 321154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 322154941Sjhb __func__, rw); 323154941Sjhb } 324154941Sjhb 325154941Sjhb /* 326154941Sjhb * TODO: acquire "owner of record" here. Here be turnstile dragons 327154941Sjhb * however. turnstiles don't like owners changing between calls to 328154941Sjhb * turnstile_wait() currently. 329154941Sjhb */ 330154941Sjhb 331167787Sjhb LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line); 332167787Sjhb WITNESS_LOCK(&rw->lock_object, 0, file, line); 333160771Sjhb curthread->td_locks++; 334154941Sjhb} 335154941Sjhb 336154941Sjhbvoid 337154941Sjhb_rw_runlock(struct rwlock *rw, const char *file, int line) 338154941Sjhb{ 339154941Sjhb struct turnstile *ts; 340154941Sjhb uintptr_t x; 341154941Sjhb 342169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 343169394Sjhb ("rw_runlock() of destroyed rwlock @ %s:%d", file, line)); 344154941Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 345160771Sjhb curthread->td_locks--; 346167787Sjhb WITNESS_UNLOCK(&rw->lock_object, 0, file, line); 347167787Sjhb LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line); 348154941Sjhb 349154941Sjhb /* TODO: drop "owner of record" here. */ 350154941Sjhb 351154941Sjhb for (;;) { 352154941Sjhb /* 353154941Sjhb * See if there is more than one read lock held. If so, 354154941Sjhb * just drop one and return. 355154941Sjhb */ 356154941Sjhb x = rw->rw_lock; 357154941Sjhb if (RW_READERS(x) > 1) { 358154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, x, 359154941Sjhb x - RW_ONE_READER)) { 360167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 361154941Sjhb CTR4(KTR_LOCK, 362154941Sjhb "%s: %p succeeded %p -> %p", 363154941Sjhb __func__, rw, (void *)x, 364154941Sjhb (void *)(x - RW_ONE_READER)); 365154941Sjhb break; 366154941Sjhb } 367154941Sjhb continue; 368167307Sjhb } 369154941Sjhb 370164159Skmacy 371154941Sjhb /* 372154941Sjhb * We should never have read waiters while at least one 373154941Sjhb * thread holds a read lock. (See note above) 374154941Sjhb */ 375154941Sjhb KASSERT(!(x & RW_LOCK_READ_WAITERS), 376154941Sjhb ("%s: waiting readers", __func__)); 377154941Sjhb 378154941Sjhb /* 379154941Sjhb * If there aren't any waiters for a write lock, then try 380154941Sjhb * to drop it quickly. 381154941Sjhb */ 382154941Sjhb if (!(x & RW_LOCK_WRITE_WAITERS)) { 383154941Sjhb 384154941Sjhb /* 385154941Sjhb * There shouldn't be any flags set and we should 386154941Sjhb * be the only read lock. If we fail to release 387154941Sjhb * the single read lock, then another thread might 388154941Sjhb * have just acquired a read lock, so go back up 389154941Sjhb * to the multiple read locks case. 390154941Sjhb */ 391154941Sjhb MPASS(x == RW_READERS_LOCK(1)); 392154941Sjhb if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 393154941Sjhb RW_UNLOCKED)) { 394167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 395154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded", 396154941Sjhb __func__, rw); 397154941Sjhb break; 398154941Sjhb } 399154941Sjhb continue; 400154941Sjhb } 401154941Sjhb 402154941Sjhb /* 403154941Sjhb * There should just be one reader with one or more 404154941Sjhb * writers waiting. 405154941Sjhb */ 406154941Sjhb MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS)); 407154941Sjhb 408154941Sjhb /* 409154941Sjhb * Ok, we know we have a waiting writer and we think we 410154941Sjhb * are the last reader, so grab the turnstile lock. 411154941Sjhb */ 412167787Sjhb turnstile_lock(&rw->lock_object); 413154941Sjhb 414154941Sjhb /* 415154941Sjhb * Try to drop our lock leaving the lock in a unlocked 416154941Sjhb * state. 417154941Sjhb * 418154941Sjhb * If you wanted to do explicit lock handoff you'd have to 419154941Sjhb * do it here. You'd also want to use turnstile_signal() 420154941Sjhb * and you'd have to handle the race where a higher 421154941Sjhb * priority thread blocks on the write lock before the 422154941Sjhb * thread you wakeup actually runs and have the new thread 423154941Sjhb * "steal" the lock. For now it's a lot simpler to just 424154941Sjhb * wakeup all of the waiters. 425154941Sjhb * 426154941Sjhb * As above, if we fail, then another thread might have 427154941Sjhb * acquired a read lock, so drop the turnstile lock and 428154941Sjhb * restart. 429154941Sjhb */ 430154941Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, 431154941Sjhb RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { 432167787Sjhb turnstile_release(&rw->lock_object); 433154941Sjhb continue; 434154941Sjhb } 435167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 436154941Sjhb CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 437154941Sjhb __func__, rw); 438154941Sjhb 439154941Sjhb /* 440154941Sjhb * Ok. The lock is released and all that's left is to 441154941Sjhb * wake up the waiters. Note that the lock might not be 442154941Sjhb * free anymore, but in that case the writers will just 443154941Sjhb * block again if they run before the new lock holder(s) 444154941Sjhb * release the lock. 445154941Sjhb */ 446167787Sjhb ts = turnstile_lookup(&rw->lock_object); 447157846Sjhb MPASS(ts != NULL); 448154941Sjhb turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 449154941Sjhb turnstile_unpend(ts, TS_SHARED_LOCK); 450154941Sjhb break; 451154941Sjhb } 452167787Sjhb lock_profile_release_lock(&rw->lock_object); 453154941Sjhb} 454154941Sjhb 455154941Sjhb/* 456154941Sjhb * This function is called when we are unable to obtain a write lock on the 457154941Sjhb * first try. This means that at least one other thread holds either a 458154941Sjhb * read or write lock. 459154941Sjhb */ 460154941Sjhbvoid 461154941Sjhb_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 462154941Sjhb{ 463167801Sjhb#ifdef ADAPTIVE_RWLOCKS 464157846Sjhb volatile struct thread *owner; 465157851Swkoszek#endif 466154941Sjhb uintptr_t v; 467154941Sjhb 468167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 469154941Sjhb CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 470167787Sjhb rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); 471154941Sjhb 472154941Sjhb while (!_rw_write_lock(rw, tid)) { 473167787Sjhb turnstile_lock(&rw->lock_object); 474154941Sjhb v = rw->rw_lock; 475154941Sjhb 476154941Sjhb /* 477154941Sjhb * If the lock was released while spinning on the 478154941Sjhb * turnstile chain lock, try again. 479154941Sjhb */ 480154941Sjhb if (v == RW_UNLOCKED) { 481167787Sjhb turnstile_release(&rw->lock_object); 482154941Sjhb cpu_spinwait(); 483154941Sjhb continue; 484154941Sjhb } 485154941Sjhb 486154941Sjhb /* 487154941Sjhb * If the lock was released by a writer with both readers 488154941Sjhb * and writers waiting and a reader hasn't woken up and 489154941Sjhb * acquired the lock yet, rw_lock will be set to the 490154941Sjhb * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS. If we see 491154941Sjhb * that value, try to acquire it once. Note that we have 492154941Sjhb * to preserve the RW_LOCK_WRITE_WAITERS flag as there are 493168073Sjhb * other writers waiting still. If we fail, restart the 494154941Sjhb * loop. 495154941Sjhb */ 496154941Sjhb if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) { 497154941Sjhb if (atomic_cmpset_acq_ptr(&rw->rw_lock, 498154941Sjhb RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, 499154941Sjhb tid | RW_LOCK_WRITE_WAITERS)) { 500167787Sjhb turnstile_claim(&rw->lock_object); 501154941Sjhb CTR2(KTR_LOCK, "%s: %p claimed by new writer", 502154941Sjhb __func__, rw); 503154941Sjhb break; 504154941Sjhb } 505167787Sjhb turnstile_release(&rw->lock_object); 506154941Sjhb cpu_spinwait(); 507154941Sjhb continue; 508154941Sjhb } 509154941Sjhb 510154941Sjhb /* 511154941Sjhb * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 512154941Sjhb * set it. If we fail to set it, then loop back and try 513154941Sjhb * again. 514154941Sjhb */ 515157826Sjhb if (!(v & RW_LOCK_WRITE_WAITERS)) { 516157826Sjhb if (!atomic_cmpset_ptr(&rw->rw_lock, v, 517157826Sjhb v | RW_LOCK_WRITE_WAITERS)) { 518167787Sjhb turnstile_release(&rw->lock_object); 519157826Sjhb cpu_spinwait(); 520157826Sjhb continue; 521157826Sjhb } 522167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 523157826Sjhb CTR2(KTR_LOCK, "%s: %p set write waiters flag", 524157826Sjhb __func__, rw); 525154941Sjhb } 526154941Sjhb 527167801Sjhb#ifdef ADAPTIVE_RWLOCKS 528157846Sjhb /* 529157846Sjhb * If the lock is write locked and the owner is 530157846Sjhb * running on another CPU, spin until the owner stops 531157846Sjhb * running or the state of the lock changes. 532157846Sjhb */ 533157846Sjhb owner = (struct thread *)RW_OWNER(v); 534157846Sjhb if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) { 535167787Sjhb turnstile_release(&rw->lock_object); 536167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 537157846Sjhb CTR3(KTR_LOCK, "%s: spinning on %p held by %p", 538157846Sjhb __func__, rw, owner); 539157846Sjhb while ((struct thread*)RW_OWNER(rw->rw_lock)== owner && 540157846Sjhb TD_IS_RUNNING(owner)) 541157846Sjhb cpu_spinwait(); 542157846Sjhb continue; 543157846Sjhb } 544157846Sjhb#endif 545154941Sjhb 546154941Sjhb /* 547154941Sjhb * We were unable to acquire the lock and the write waiters 548154941Sjhb * flag is set, so we must block on the turnstile. 549154941Sjhb */ 550167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 551154941Sjhb CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 552154941Sjhb rw); 553167787Sjhb turnstile_wait(&rw->lock_object, rw_owner(rw), 554154941Sjhb TS_EXCLUSIVE_QUEUE); 555167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 556154941Sjhb CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 557154941Sjhb __func__, rw); 558154941Sjhb } 559154941Sjhb} 560154941Sjhb 561154941Sjhb/* 562154941Sjhb * This function is called if the first try at releasing a write lock failed. 563154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at 564154941Sjhb * least one thread is waiting on this lock. 565154941Sjhb */ 566154941Sjhbvoid 567154941Sjhb_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 568154941Sjhb{ 569154941Sjhb struct turnstile *ts; 570154941Sjhb uintptr_t v; 571154941Sjhb int queue; 572154941Sjhb 573154941Sjhb KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 574154941Sjhb ("%s: neither of the waiter flags are set", __func__)); 575154941Sjhb 576167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 577154941Sjhb CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 578154941Sjhb 579167787Sjhb turnstile_lock(&rw->lock_object); 580167787Sjhb ts = turnstile_lookup(&rw->lock_object); 581154941Sjhb 582167801Sjhb#ifdef ADAPTIVE_RWLOCKS 583157846Sjhb /* 584157846Sjhb * There might not be a turnstile for this lock if all of 585157846Sjhb * the waiters are adaptively spinning. In that case, just 586157846Sjhb * reset the lock to the unlocked state and return. 587157846Sjhb */ 588157846Sjhb if (ts == NULL) { 589157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED); 590167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 591157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw); 592167787Sjhb turnstile_release(&rw->lock_object); 593157846Sjhb return; 594157846Sjhb } 595157846Sjhb#else 596154941Sjhb MPASS(ts != NULL); 597157846Sjhb#endif 598154941Sjhb 599154941Sjhb /* 600154941Sjhb * Use the same algo as sx locks for now. Prefer waking up shared 601154941Sjhb * waiters if we have any over writers. This is probably not ideal. 602154941Sjhb * 603154941Sjhb * 'v' is the value we are going to write back to rw_lock. If we 604154941Sjhb * have waiters on both queues, we need to preserve the state of 605154941Sjhb * the waiter flag for the queue we don't wake up. For now this is 606154941Sjhb * hardcoded for the algorithm mentioned above. 607154941Sjhb * 608154941Sjhb * In the case of both readers and writers waiting we wakeup the 609154941Sjhb * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 610154941Sjhb * new writer comes in before a reader it will claim the lock up 611154941Sjhb * above. There is probably a potential priority inversion in 612154941Sjhb * there that could be worked around either by waking both queues 613154941Sjhb * of waiters or doing some complicated lock handoff gymnastics. 614157846Sjhb * 615167801Sjhb * Note that in the ADAPTIVE_RWLOCKS case, if both flags are 616167801Sjhb * set, there might not be any actual writers on the turnstile 617167801Sjhb * as they might all be spinning. In that case, we don't want 618167801Sjhb * to preserve the RW_LOCK_WRITE_WAITERS flag as the turnstile 619167801Sjhb * is going to go away once we wakeup all the readers. 620154941Sjhb */ 621157846Sjhb v = RW_UNLOCKED; 622154941Sjhb if (rw->rw_lock & RW_LOCK_READ_WAITERS) { 623154941Sjhb queue = TS_SHARED_QUEUE; 624167801Sjhb#ifdef ADAPTIVE_RWLOCKS 625157846Sjhb if (rw->rw_lock & RW_LOCK_WRITE_WAITERS && 626157846Sjhb !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 627157846Sjhb v |= RW_LOCK_WRITE_WAITERS; 628157846Sjhb#else 629157846Sjhb v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS); 630157846Sjhb#endif 631157846Sjhb } else 632154941Sjhb queue = TS_EXCLUSIVE_QUEUE; 633157846Sjhb 634167801Sjhb#ifdef ADAPTIVE_RWLOCKS 635157846Sjhb /* 636157846Sjhb * We have to make sure that we actually have waiters to 637157846Sjhb * wakeup. If they are all spinning, then we just need to 638157846Sjhb * disown the turnstile and return. 639157846Sjhb */ 640157846Sjhb if (turnstile_empty(ts, queue)) { 641167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 642157846Sjhb CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw); 643157846Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 644157846Sjhb turnstile_disown(ts); 645157846Sjhb return; 646154941Sjhb } 647157846Sjhb#endif 648157846Sjhb 649157846Sjhb /* Wake up all waiters for the specific queue. */ 650167787Sjhb if (LOCK_LOG_TEST(&rw->lock_object, 0)) 651154941Sjhb CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 652154941Sjhb queue == TS_SHARED_QUEUE ? "read" : "write"); 653154941Sjhb turnstile_broadcast(ts, queue); 654154941Sjhb atomic_store_rel_ptr(&rw->rw_lock, v); 655154941Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 656154941Sjhb} 657154941Sjhb 658157882Sjhb/* 659157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write 660157882Sjhb * lock. This will only succeed if this thread holds a single read 661157882Sjhb * lock. Returns true if the upgrade succeeded and false otherwise. 662157882Sjhb */ 663157882Sjhbint 664157882Sjhb_rw_try_upgrade(struct rwlock *rw, const char *file, int line) 665157882Sjhb{ 666157882Sjhb uintptr_t v, tid; 667157882Sjhb int success; 668157882Sjhb 669169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 670169394Sjhb ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line)); 671157882Sjhb _rw_assert(rw, RA_RLOCKED, file, line); 672157882Sjhb 673157882Sjhb /* 674157882Sjhb * Attempt to switch from one reader to a writer. If there 675157882Sjhb * are any write waiters, then we will have to lock the 676157882Sjhb * turnstile first to prevent races with another writer 677157882Sjhb * calling turnstile_wait() before we have claimed this 678157882Sjhb * turnstile. So, do the simple case of no waiters first. 679157882Sjhb */ 680157882Sjhb tid = (uintptr_t)curthread; 681157882Sjhb if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) { 682168073Sjhb success = atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 683168073Sjhb tid); 684157882Sjhb goto out; 685157882Sjhb } 686157882Sjhb 687157882Sjhb /* 688157882Sjhb * Ok, we think we have write waiters, so lock the 689157882Sjhb * turnstile. 690157882Sjhb */ 691167787Sjhb turnstile_lock(&rw->lock_object); 692157882Sjhb 693157882Sjhb /* 694157882Sjhb * Try to switch from one reader to a writer again. This time 695157882Sjhb * we honor the current state of the RW_LOCK_WRITE_WAITERS 696157882Sjhb * flag. If we obtain the lock with the flag set, then claim 697167801Sjhb * ownership of the turnstile. In the ADAPTIVE_RWLOCKS case 698167801Sjhb * it is possible for there to not be an associated turnstile 699167801Sjhb * even though there are waiters if all of the waiters are 700167801Sjhb * spinning. 701157882Sjhb */ 702157882Sjhb v = rw->rw_lock & RW_LOCK_WRITE_WAITERS; 703168073Sjhb success = atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v, 704157882Sjhb tid | v); 705167801Sjhb#ifdef ADAPTIVE_RWLOCKS 706167787Sjhb if (success && v && turnstile_lookup(&rw->lock_object) != NULL) 707157882Sjhb#else 708157882Sjhb if (success && v) 709157882Sjhb#endif 710167787Sjhb turnstile_claim(&rw->lock_object); 711157882Sjhb else 712167787Sjhb turnstile_release(&rw->lock_object); 713157882Sjhbout: 714167787Sjhb LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line); 715157882Sjhb if (success) 716167787Sjhb WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, 717157882Sjhb file, line); 718157882Sjhb return (success); 719157882Sjhb} 720157882Sjhb 721157882Sjhb/* 722157882Sjhb * Downgrade a write lock into a single read lock. 723157882Sjhb */ 724157882Sjhbvoid 725157882Sjhb_rw_downgrade(struct rwlock *rw, const char *file, int line) 726157882Sjhb{ 727157882Sjhb struct turnstile *ts; 728157882Sjhb uintptr_t tid, v; 729157882Sjhb 730169394Sjhb KASSERT(rw->rw_lock != RW_DESTROYED, 731169394Sjhb ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line)); 732157882Sjhb _rw_assert(rw, RA_WLOCKED, file, line); 733157882Sjhb 734167787Sjhb WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line); 735157882Sjhb 736157882Sjhb /* 737157882Sjhb * Convert from a writer to a single reader. First we handle 738157882Sjhb * the easy case with no waiters. If there are any waiters, we 739157882Sjhb * lock the turnstile, "disown" the lock, and awaken any read 740157882Sjhb * waiters. 741157882Sjhb */ 742157882Sjhb tid = (uintptr_t)curthread; 743157882Sjhb if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1))) 744157882Sjhb goto out; 745157882Sjhb 746157882Sjhb /* 747157882Sjhb * Ok, we think we have waiters, so lock the turnstile so we can 748157882Sjhb * read the waiter flags without any races. 749157882Sjhb */ 750167787Sjhb turnstile_lock(&rw->lock_object); 751157882Sjhb v = rw->rw_lock; 752157882Sjhb MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)); 753157882Sjhb 754157882Sjhb /* 755157882Sjhb * Downgrade from a write lock while preserving 756157882Sjhb * RW_LOCK_WRITE_WAITERS and give up ownership of the 757157882Sjhb * turnstile. If there are any read waiters, wake them up. 758157882Sjhb * 759167801Sjhb * For ADAPTIVE_RWLOCKS, we have to allow for the fact that 760167801Sjhb * all of the read waiters might be spinning. In that case, 761167801Sjhb * act as if RW_LOCK_READ_WAITERS is not set. Also, only 762167801Sjhb * preserve the RW_LOCK_WRITE_WAITERS flag if at least one 763167801Sjhb * writer is blocked on the turnstile. 764157882Sjhb */ 765167787Sjhb ts = turnstile_lookup(&rw->lock_object); 766167801Sjhb#ifdef ADAPTIVE_RWLOCKS 767157882Sjhb if (ts == NULL) 768157882Sjhb v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS); 769157882Sjhb else if (v & RW_LOCK_READ_WAITERS && 770157882Sjhb turnstile_empty(ts, TS_SHARED_QUEUE)) 771157882Sjhb v &= ~RW_LOCK_READ_WAITERS; 772157882Sjhb else if (v & RW_LOCK_WRITE_WAITERS && 773157882Sjhb turnstile_empty(ts, TS_EXCLUSIVE_QUEUE)) 774157882Sjhb v &= ~RW_LOCK_WRITE_WAITERS; 775157882Sjhb#else 776157882Sjhb MPASS(ts != NULL); 777157882Sjhb#endif 778157882Sjhb if (v & RW_LOCK_READ_WAITERS) 779157882Sjhb turnstile_broadcast(ts, TS_SHARED_QUEUE); 780157882Sjhb atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | 781157882Sjhb (v & RW_LOCK_WRITE_WAITERS)); 782157882Sjhb if (v & RW_LOCK_READ_WAITERS) 783157882Sjhb turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 784167801Sjhb#ifdef ADAPTIVE_RWLOCKS 785157882Sjhb else if (ts == NULL) 786167787Sjhb turnstile_release(&rw->lock_object); 787157882Sjhb#endif 788157882Sjhb else 789157882Sjhb turnstile_disown(ts); 790157882Sjhbout: 791167787Sjhb LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line); 792157882Sjhb} 793157882Sjhb 794154941Sjhb#ifdef INVARIANT_SUPPORT 795155162Sscottl#ifndef INVARIANTS 796154941Sjhb#undef _rw_assert 797154941Sjhb#endif 798154941Sjhb 799154941Sjhb/* 800154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least 801154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this* 802154941Sjhb * thread owns an rlock. 803154941Sjhb */ 804154941Sjhbvoid 805154941Sjhb_rw_assert(struct rwlock *rw, int what, const char *file, int line) 806154941Sjhb{ 807154941Sjhb 808154941Sjhb if (panicstr != NULL) 809154941Sjhb return; 810154941Sjhb switch (what) { 811154941Sjhb case RA_LOCKED: 812167368Sjhb case RA_LOCKED | LA_NOTRECURSED: 813154941Sjhb case RA_RLOCKED: 814154941Sjhb#ifdef WITNESS 815167787Sjhb witness_assert(&rw->lock_object, what, file, line); 816154941Sjhb#else 817154941Sjhb /* 818154941Sjhb * If some other thread has a write lock or we have one 819154941Sjhb * and are asserting a read lock, fail. Also, if no one 820154941Sjhb * has a lock at all, fail. 821154941Sjhb */ 822155061Sscottl if (rw->rw_lock == RW_UNLOCKED || 823155061Sscottl (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 824157826Sjhb rw_wowner(rw) != curthread))) 825154941Sjhb panic("Lock %s not %slocked @ %s:%d\n", 826167787Sjhb rw->lock_object.lo_name, (what == RA_RLOCKED) ? 827154941Sjhb "read " : "", file, line); 828154941Sjhb#endif 829154941Sjhb break; 830154941Sjhb case RA_WLOCKED: 831157826Sjhb if (rw_wowner(rw) != curthread) 832154941Sjhb panic("Lock %s not exclusively locked @ %s:%d\n", 833167787Sjhb rw->lock_object.lo_name, file, line); 834154941Sjhb break; 835154941Sjhb case RA_UNLOCKED: 836154941Sjhb#ifdef WITNESS 837167787Sjhb witness_assert(&rw->lock_object, what, file, line); 838154941Sjhb#else 839154941Sjhb /* 840154941Sjhb * If we hold a write lock fail. We can't reliably check 841154941Sjhb * to see if we hold a read lock or not. 842154941Sjhb */ 843157826Sjhb if (rw_wowner(rw) == curthread) 844154941Sjhb panic("Lock %s exclusively locked @ %s:%d\n", 845167787Sjhb rw->lock_object.lo_name, file, line); 846154941Sjhb#endif 847154941Sjhb break; 848154941Sjhb default: 849154941Sjhb panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 850154941Sjhb line); 851154941Sjhb } 852154941Sjhb} 853154941Sjhb#endif /* INVARIANT_SUPPORT */ 854154941Sjhb 855154941Sjhb#ifdef DDB 856154941Sjhbvoid 857154941Sjhbdb_show_rwlock(struct lock_object *lock) 858154941Sjhb{ 859154941Sjhb struct rwlock *rw; 860154941Sjhb struct thread *td; 861154941Sjhb 862154941Sjhb rw = (struct rwlock *)lock; 863154941Sjhb 864154941Sjhb db_printf(" state: "); 865154941Sjhb if (rw->rw_lock == RW_UNLOCKED) 866154941Sjhb db_printf("UNLOCKED\n"); 867169394Sjhb else if (rw->rw_lock == RW_DESTROYED) { 868169394Sjhb db_printf("DESTROYED\n"); 869169394Sjhb return; 870169394Sjhb } else if (rw->rw_lock & RW_LOCK_READ) 871167504Sjhb db_printf("RLOCK: %ju locks\n", 872167504Sjhb (uintmax_t)(RW_READERS(rw->rw_lock))); 873154941Sjhb else { 874157826Sjhb td = rw_wowner(rw); 875154941Sjhb db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 876154941Sjhb td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 877154941Sjhb } 878154941Sjhb db_printf(" waiters: "); 879154941Sjhb switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 880154941Sjhb case RW_LOCK_READ_WAITERS: 881154941Sjhb db_printf("readers\n"); 882154941Sjhb break; 883154941Sjhb case RW_LOCK_WRITE_WAITERS: 884154941Sjhb db_printf("writers\n"); 885154941Sjhb break; 886154941Sjhb case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 887167492Sjhb db_printf("readers and writers\n"); 888154941Sjhb break; 889154941Sjhb default: 890154941Sjhb db_printf("none\n"); 891154941Sjhb break; 892154941Sjhb } 893154941Sjhb} 894154941Sjhb 895154941Sjhb#endif 896