kern_rwlock.c revision 157826
1/*- 2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the author nor the names of any co-contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30/* 31 * Machine independent bits of reader/writer lock implementation. 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 157826 2006-04-17 21:11:01Z jhb $"); 36 37#include "opt_ddb.h" 38 39#include <sys/param.h> 40#include <sys/ktr.h> 41#include <sys/lock.h> 42#include <sys/mutex.h> 43#include <sys/proc.h> 44#include <sys/rwlock.h> 45#include <sys/systm.h> 46#include <sys/turnstile.h> 47 48#include <machine/cpu.h> 49 50#ifdef DDB 51#include <ddb/ddb.h> 52 53static void db_show_rwlock(struct lock_object *lock); 54#endif 55 56struct lock_class lock_class_rw = { 57 "rw", 58 LC_SLEEPLOCK | LC_RECURSABLE /* | LC_UPGRADABLE */, 59#ifdef DDB 60 db_show_rwlock 61#endif 62}; 63 64/* 65 * Return a pointer to the owning thread if the lock is write-locked or 66 * NULL if the lock is unlocked or read-locked. 67 */ 68#define rw_wowner(rw) \ 69 ((rw)->rw_lock & RW_LOCK_READ ? NULL : \ 70 (struct thread *)RW_OWNER((rw)->rw_lock)) 71 72/* 73 * Return a pointer to the owning thread for this lock who should receive 74 * any priority lent by threads that block on this lock. Currently this 75 * is identical to rw_wowner(). 76 */ 77#define rw_owner(rw) rw_wowner(rw) 78 79#ifndef INVARIANTS 80#define _rw_assert(rw, what, file, line) 81#endif 82 83void 84rw_init(struct rwlock *rw, const char *name) 85{ 86 87 rw->rw_lock = RW_UNLOCKED; 88 89 lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS | 90 LO_RECURSABLE /* | LO_UPGRADABLE */); 91} 92 93void 94rw_destroy(struct rwlock *rw) 95{ 96 97 KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked")); 98 lock_destroy(&rw->rw_object); 99} 100 101void 102rw_sysinit(void *arg) 103{ 104 struct rw_args *args = arg; 105 106 rw_init(args->ra_rw, args->ra_desc); 107} 108 109void 110_rw_wlock(struct rwlock *rw, const char *file, int line) 111{ 112 113 MPASS(curthread != NULL); 114 KASSERT(rw_wowner(rw) != curthread, 115 ("%s (%s): wlock already held @ %s:%d", __func__, 116 rw->rw_object.lo_name, file, line)); 117 WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, 118 line); 119 __rw_wlock(rw, curthread, file, line); 120 LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line); 121 WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 122} 123 124void 125_rw_wunlock(struct rwlock *rw, const char *file, int line) 126{ 127 128 MPASS(curthread != NULL); 129 _rw_assert(rw, RA_WLOCKED, file, line); 130 WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line); 131 LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line); 132 __rw_wunlock(rw, curthread, file, line); 133} 134 135void 136_rw_rlock(struct rwlock *rw, const char *file, int line) 137{ 138 uintptr_t x; 139 140 KASSERT(rw_wowner(rw) != curthread, 141 ("%s (%s): wlock already held @ %s:%d", __func__, 142 rw->rw_object.lo_name, file, line)); 143 WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line); 144 145 /* 146 * Note that we don't make any attempt to try to block read 147 * locks once a writer has blocked on the lock. The reason is 148 * that we currently allow for read locks to recurse and we 149 * don't keep track of all the holders of read locks. Thus, if 150 * we were to block readers once a writer blocked and a reader 151 * tried to recurse on their reader lock after a writer had 152 * blocked we would end up in a deadlock since the reader would 153 * be blocked on the writer, and the writer would be blocked 154 * waiting for the reader to release its original read lock. 155 */ 156 for (;;) { 157 /* 158 * Handle the easy case. If no other thread has a write 159 * lock, then try to bump up the count of read locks. Note 160 * that we have to preserve the current state of the 161 * RW_LOCK_WRITE_WAITERS flag. If we fail to acquire a 162 * read lock, then rw_lock must have changed, so restart 163 * the loop. Note that this handles the case of a 164 * completely unlocked rwlock since such a lock is encoded 165 * as a read lock with no waiters. 166 */ 167 x = rw->rw_lock; 168 if (x & RW_LOCK_READ) { 169 170 /* 171 * The RW_LOCK_READ_WAITERS flag should only be set 172 * if another thread currently holds a write lock, 173 * and in that case RW_LOCK_READ should be clear. 174 */ 175 MPASS((x & RW_LOCK_READ_WAITERS) == 0); 176 if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, 177 x + RW_ONE_READER)) { 178 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 179 CTR4(KTR_LOCK, 180 "%s: %p succeed %p -> %p", __func__, 181 rw, (void *)x, 182 (void *)(x + RW_ONE_READER)); 183 break; 184 } 185 continue; 186 } 187 188 /* 189 * Okay, now it's the hard case. Some other thread already 190 * has a write lock, so acquire the turnstile lock so we can 191 * begin the process of blocking. 192 */ 193 turnstile_lock(&rw->rw_object); 194 195 /* 196 * The lock might have been released while we spun, so 197 * recheck its state and restart the loop if there is no 198 * longer a write lock. 199 */ 200 x = rw->rw_lock; 201 if (x & RW_LOCK_READ) { 202 turnstile_release(&rw->rw_object); 203 continue; 204 } 205 206 /* 207 * Ok, it's still a write lock. If the RW_LOCK_READ_WAITERS 208 * flag is already set, then we can go ahead and block. If 209 * it is not set then try to set it. If we fail to set it 210 * drop the turnstile lock and restart the loop. 211 */ 212 if (!(x & RW_LOCK_READ_WAITERS)) { 213 if (!atomic_cmpset_ptr(&rw->rw_lock, x, 214 x | RW_LOCK_READ_WAITERS)) { 215 turnstile_release(&rw->rw_object); 216 cpu_spinwait(); 217 continue; 218 } 219 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 220 CTR2(KTR_LOCK, "%s: %p set read waiters flag", 221 __func__, rw); 222 } 223 224 /* 225 * We were unable to acquire the lock and the read waiters 226 * flag is set, so we must block on the turnstile. 227 */ 228 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 229 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 230 rw); 231 turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE); 232 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 233 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 234 __func__, rw); 235 } 236 237 /* 238 * TODO: acquire "owner of record" here. Here be turnstile dragons 239 * however. turnstiles don't like owners changing between calls to 240 * turnstile_wait() currently. 241 */ 242 243 LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line); 244 WITNESS_LOCK(&rw->rw_object, 0, file, line); 245} 246 247void 248_rw_runlock(struct rwlock *rw, const char *file, int line) 249{ 250 struct turnstile *ts; 251 uintptr_t x; 252 253 _rw_assert(rw, RA_RLOCKED, file, line); 254 WITNESS_UNLOCK(&rw->rw_object, 0, file, line); 255 LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line); 256 257 /* TODO: drop "owner of record" here. */ 258 259 for (;;) { 260 /* 261 * See if there is more than one read lock held. If so, 262 * just drop one and return. 263 */ 264 x = rw->rw_lock; 265 if (RW_READERS(x) > 1) { 266 if (atomic_cmpset_ptr(&rw->rw_lock, x, 267 x - RW_ONE_READER)) { 268 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 269 CTR4(KTR_LOCK, 270 "%s: %p succeeded %p -> %p", 271 __func__, rw, (void *)x, 272 (void *)(x - RW_ONE_READER)); 273 break; 274 } 275 continue; 276 } 277 278 /* 279 * We should never have read waiters while at least one 280 * thread holds a read lock. (See note above) 281 */ 282 KASSERT(!(x & RW_LOCK_READ_WAITERS), 283 ("%s: waiting readers", __func__)); 284 285 /* 286 * If there aren't any waiters for a write lock, then try 287 * to drop it quickly. 288 */ 289 if (!(x & RW_LOCK_WRITE_WAITERS)) { 290 291 /* 292 * There shouldn't be any flags set and we should 293 * be the only read lock. If we fail to release 294 * the single read lock, then another thread might 295 * have just acquired a read lock, so go back up 296 * to the multiple read locks case. 297 */ 298 MPASS(x == RW_READERS_LOCK(1)); 299 if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1), 300 RW_UNLOCKED)) { 301 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 302 CTR2(KTR_LOCK, "%s: %p last succeeded", 303 __func__, rw); 304 break; 305 } 306 continue; 307 } 308 309 /* 310 * There should just be one reader with one or more 311 * writers waiting. 312 */ 313 MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS)); 314 315 /* 316 * Ok, we know we have a waiting writer and we think we 317 * are the last reader, so grab the turnstile lock. 318 */ 319 turnstile_lock(&rw->rw_object); 320 321 /* 322 * Try to drop our lock leaving the lock in a unlocked 323 * state. 324 * 325 * If you wanted to do explicit lock handoff you'd have to 326 * do it here. You'd also want to use turnstile_signal() 327 * and you'd have to handle the race where a higher 328 * priority thread blocks on the write lock before the 329 * thread you wakeup actually runs and have the new thread 330 * "steal" the lock. For now it's a lot simpler to just 331 * wakeup all of the waiters. 332 * 333 * As above, if we fail, then another thread might have 334 * acquired a read lock, so drop the turnstile lock and 335 * restart. 336 */ 337 if (!atomic_cmpset_ptr(&rw->rw_lock, 338 RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) { 339 turnstile_release(&rw->rw_object); 340 continue; 341 } 342 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 343 CTR2(KTR_LOCK, "%s: %p last succeeded with waiters", 344 __func__, rw); 345 346 /* 347 * Ok. The lock is released and all that's left is to 348 * wake up the waiters. Note that the lock might not be 349 * free anymore, but in that case the writers will just 350 * block again if they run before the new lock holder(s) 351 * release the lock. 352 */ 353 ts = turnstile_lookup(&rw->rw_object); 354 turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); 355 turnstile_unpend(ts, TS_SHARED_LOCK); 356 break; 357 } 358} 359 360/* 361 * This function is called when we are unable to obtain a write lock on the 362 * first try. This means that at least one other thread holds either a 363 * read or write lock. 364 */ 365void 366_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 367{ 368 uintptr_t v; 369 370 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 371 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, 372 rw->rw_object.lo_name, (void *)rw->rw_lock, file, line); 373 374 while (!_rw_write_lock(rw, tid)) { 375 turnstile_lock(&rw->rw_object); 376 v = rw->rw_lock; 377 378 /* 379 * If the lock was released while spinning on the 380 * turnstile chain lock, try again. 381 */ 382 if (v == RW_UNLOCKED) { 383 turnstile_release(&rw->rw_object); 384 cpu_spinwait(); 385 continue; 386 } 387 388 /* 389 * If the lock was released by a writer with both readers 390 * and writers waiting and a reader hasn't woken up and 391 * acquired the lock yet, rw_lock will be set to the 392 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS. If we see 393 * that value, try to acquire it once. Note that we have 394 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are 395 * other writers waiting still. If we fail, restart the 396 * loop. 397 */ 398 if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) { 399 if (atomic_cmpset_acq_ptr(&rw->rw_lock, 400 RW_UNLOCKED | RW_LOCK_WRITE_WAITERS, 401 tid | RW_LOCK_WRITE_WAITERS)) { 402 turnstile_claim(&rw->rw_object); 403 CTR2(KTR_LOCK, "%s: %p claimed by new writer", 404 __func__, rw); 405 break; 406 } 407 turnstile_release(&rw->rw_object); 408 cpu_spinwait(); 409 continue; 410 } 411 412 /* 413 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to 414 * set it. If we fail to set it, then loop back and try 415 * again. 416 */ 417 if (!(v & RW_LOCK_WRITE_WAITERS)) { 418 if (!atomic_cmpset_ptr(&rw->rw_lock, v, 419 v | RW_LOCK_WRITE_WAITERS)) { 420 turnstile_release(&rw->rw_object); 421 cpu_spinwait(); 422 continue; 423 } 424 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 425 CTR2(KTR_LOCK, "%s: %p set write waiters flag", 426 __func__, rw); 427 } 428 429 /* XXX: Adaptively spin if current wlock owner on another CPU? */ 430 431 /* 432 * We were unable to acquire the lock and the write waiters 433 * flag is set, so we must block on the turnstile. 434 */ 435 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 436 CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__, 437 rw); 438 turnstile_wait(&rw->rw_object, rw_owner(rw), 439 TS_EXCLUSIVE_QUEUE); 440 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 441 CTR2(KTR_LOCK, "%s: %p resuming from turnstile", 442 __func__, rw); 443 } 444} 445 446/* 447 * This function is called if the first try at releasing a write lock failed. 448 * This means that one of the 2 waiter bits must be set indicating that at 449 * least one thread is waiting on this lock. 450 */ 451void 452_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line) 453{ 454 struct turnstile *ts; 455 uintptr_t v; 456 int queue; 457 458 KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), 459 ("%s: neither of the waiter flags are set", __func__)); 460 461 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 462 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw); 463 464 turnstile_lock(&rw->rw_object); 465 ts = turnstile_lookup(&rw->rw_object); 466 467 /* XXX: Adaptive fixup would be required here. */ 468 MPASS(ts != NULL); 469 470 /* 471 * Use the same algo as sx locks for now. Prefer waking up shared 472 * waiters if we have any over writers. This is probably not ideal. 473 * 474 * 'v' is the value we are going to write back to rw_lock. If we 475 * have waiters on both queues, we need to preserve the state of 476 * the waiter flag for the queue we don't wake up. For now this is 477 * hardcoded for the algorithm mentioned above. 478 * 479 * In the case of both readers and writers waiting we wakeup the 480 * readers but leave the RW_LOCK_WRITE_WAITERS flag set. If a 481 * new writer comes in before a reader it will claim the lock up 482 * above. There is probably a potential priority inversion in 483 * there that could be worked around either by waking both queues 484 * of waiters or doing some complicated lock handoff gymnastics. 485 */ 486 if (rw->rw_lock & RW_LOCK_READ_WAITERS) { 487 queue = TS_SHARED_QUEUE; 488 v = RW_UNLOCKED | (rw->rw_lock & RW_LOCK_WRITE_WAITERS); 489 } else { 490 queue = TS_EXCLUSIVE_QUEUE; 491 v = RW_UNLOCKED; 492 } 493 if (LOCK_LOG_TEST(&rw->rw_object, 0)) 494 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw, 495 queue == TS_SHARED_QUEUE ? "read" : "write"); 496 497 /* Wake up all waiters for the specific queue. */ 498 turnstile_broadcast(ts, queue); 499 atomic_store_rel_ptr(&rw->rw_lock, v); 500 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); 501} 502 503#ifdef INVARIANT_SUPPORT 504#ifndef INVARIANTS 505#undef _rw_assert 506#endif 507 508/* 509 * In the non-WITNESS case, rw_assert() can only detect that at least 510 * *some* thread owns an rlock, but it cannot guarantee that *this* 511 * thread owns an rlock. 512 */ 513void 514_rw_assert(struct rwlock *rw, int what, const char *file, int line) 515{ 516 517 if (panicstr != NULL) 518 return; 519 switch (what) { 520 case RA_LOCKED: 521 case RA_RLOCKED: 522#ifdef WITNESS 523 witness_assert(&rw->rw_object, what, file, line); 524#else 525 /* 526 * If some other thread has a write lock or we have one 527 * and are asserting a read lock, fail. Also, if no one 528 * has a lock at all, fail. 529 */ 530 if (rw->rw_lock == RW_UNLOCKED || 531 (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED || 532 rw_wowner(rw) != curthread))) 533 panic("Lock %s not %slocked @ %s:%d\n", 534 rw->rw_object.lo_name, (what == RA_RLOCKED) ? 535 "read " : "", file, line); 536#endif 537 break; 538 case RA_WLOCKED: 539 if (rw_wowner(rw) != curthread) 540 panic("Lock %s not exclusively locked @ %s:%d\n", 541 rw->rw_object.lo_name, file, line); 542 break; 543 case RA_UNLOCKED: 544#ifdef WITNESS 545 witness_assert(&rw->rw_object, what, file, line); 546#else 547 /* 548 * If we hold a write lock fail. We can't reliably check 549 * to see if we hold a read lock or not. 550 */ 551 if (rw_wowner(rw) == curthread) 552 panic("Lock %s exclusively locked @ %s:%d\n", 553 rw->rw_object.lo_name, file, line); 554#endif 555 break; 556 default: 557 panic("Unknown rw lock assertion: %d @ %s:%d", what, file, 558 line); 559 } 560} 561#endif /* INVARIANT_SUPPORT */ 562 563#ifdef DDB 564void 565db_show_rwlock(struct lock_object *lock) 566{ 567 struct rwlock *rw; 568 struct thread *td; 569 570 rw = (struct rwlock *)lock; 571 572 db_printf(" state: "); 573 if (rw->rw_lock == RW_UNLOCKED) 574 db_printf("UNLOCKED\n"); 575 else if (rw->rw_lock & RW_LOCK_READ) 576 db_printf("RLOCK: %jd locks\n", 577 (intmax_t)(RW_READERS(rw->rw_lock))); 578 else { 579 td = rw_wowner(rw); 580 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td, 581 td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm); 582 } 583 db_printf(" waiters: "); 584 switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) { 585 case RW_LOCK_READ_WAITERS: 586 db_printf("readers\n"); 587 break; 588 case RW_LOCK_WRITE_WAITERS: 589 db_printf("writers\n"); 590 break; 591 case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS: 592 db_printf("readers and waiters\n"); 593 break; 594 default: 595 db_printf("none\n"); 596 break; 597 } 598} 599 600#endif 601