kern_rwlock.c revision 167787
1/*-
2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * Machine independent bits of reader/writer lock implementation.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 167787 2007-03-21 21:20:51Z jhb $");
36
37#include "opt_ddb.h"
38
39#include <sys/param.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/rwlock.h>
45#include <sys/systm.h>
46#include <sys/turnstile.h>
47#include <sys/lock_profile.h>
48#include <machine/cpu.h>
49
50#ifdef DDB
51#include <ddb/ddb.h>
52
53static void	db_show_rwlock(struct lock_object *lock);
54#endif
55static void	lock_rw(struct lock_object *lock, int how);
56static int	unlock_rw(struct lock_object *lock);
57
58struct lock_class lock_class_rw = {
59	.lc_name = "rw",
60	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
61#ifdef DDB
62	.lc_ddb_show = db_show_rwlock,
63#endif
64	.lc_lock = lock_rw,
65	.lc_unlock = unlock_rw,
66};
67
68/*
69 * Return a pointer to the owning thread if the lock is write-locked or
70 * NULL if the lock is unlocked or read-locked.
71 */
72#define	rw_wowner(rw)							\
73	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
74	    (struct thread *)RW_OWNER((rw)->rw_lock))
75
76/*
77 * Return a pointer to the owning thread for this lock who should receive
78 * any priority lent by threads that block on this lock.  Currently this
79 * is identical to rw_wowner().
80 */
81#define	rw_owner(rw)		rw_wowner(rw)
82
83#ifndef INVARIANTS
84#define	_rw_assert(rw, what, file, line)
85#endif
86
87void
88lock_rw(struct lock_object *lock, int how)
89{
90	struct rwlock *rw;
91
92	rw = (struct rwlock *)lock;
93	if (how)
94		rw_wlock(rw);
95	else
96		rw_rlock(rw);
97}
98
99int
100unlock_rw(struct lock_object *lock)
101{
102	struct rwlock *rw;
103
104	rw = (struct rwlock *)lock;
105	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
106	if (rw->rw_lock & RW_LOCK_READ) {
107		rw_runlock(rw);
108		return (0);
109	} else {
110		rw_wunlock(rw);
111		return (1);
112	}
113}
114
115void
116rw_init(struct rwlock *rw, const char *name)
117{
118
119	rw->rw_lock = RW_UNLOCKED;
120
121	lock_profile_object_init(&rw->lock_object, &lock_class_rw, name);
122	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, LO_WITNESS |
123	    LO_RECURSABLE | LO_UPGRADABLE);
124}
125
126void
127rw_destroy(struct rwlock *rw)
128{
129
130	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
131	lock_profile_object_destroy(&rw->lock_object);
132	lock_destroy(&rw->lock_object);
133}
134
135void
136rw_sysinit(void *arg)
137{
138	struct rw_args *args = arg;
139
140	rw_init(args->ra_rw, args->ra_desc);
141}
142
143int
144rw_wowned(struct rwlock *rw)
145{
146
147	return (rw_wowner(rw) == curthread);
148}
149
150void
151_rw_wlock(struct rwlock *rw, const char *file, int line)
152{
153
154	MPASS(curthread != NULL);
155	KASSERT(rw_wowner(rw) != curthread,
156	    ("%s (%s): wlock already held @ %s:%d", __func__,
157	    rw->lock_object.lo_name, file, line));
158	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
159	    line);
160	__rw_wlock(rw, curthread, file, line);
161	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, 0, file, line);
162	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
163	curthread->td_locks++;
164}
165
166void
167_rw_wunlock(struct rwlock *rw, const char *file, int line)
168{
169
170	MPASS(curthread != NULL);
171	_rw_assert(rw, RA_WLOCKED, file, line);
172	curthread->td_locks--;
173	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
174	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, 0, file, line);
175	lock_profile_release_lock(&rw->lock_object);
176	__rw_wunlock(rw, curthread, file, line);
177}
178
179void
180_rw_rlock(struct rwlock *rw, const char *file, int line)
181{
182#ifdef SMP
183	volatile struct thread *owner;
184#endif
185	uint64_t waittime = 0;
186	int contested = 0;
187	uintptr_t x;
188
189	KASSERT(rw_wowner(rw) != curthread,
190	    ("%s (%s): wlock already held @ %s:%d", __func__,
191	    rw->lock_object.lo_name, file, line));
192	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line);
193
194	/*
195	 * Note that we don't make any attempt to try to block read
196	 * locks once a writer has blocked on the lock.  The reason is
197	 * that we currently allow for read locks to recurse and we
198	 * don't keep track of all the holders of read locks.  Thus, if
199	 * we were to block readers once a writer blocked and a reader
200	 * tried to recurse on their reader lock after a writer had
201	 * blocked we would end up in a deadlock since the reader would
202	 * be blocked on the writer, and the writer would be blocked
203	 * waiting for the reader to release its original read lock.
204	 */
205	for (;;) {
206		/*
207		 * Handle the easy case.  If no other thread has a write
208		 * lock, then try to bump up the count of read locks.  Note
209		 * that we have to preserve the current state of the
210		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
211		 * read lock, then rw_lock must have changed, so restart
212		 * the loop.  Note that this handles the case of a
213		 * completely unlocked rwlock since such a lock is encoded
214		 * as a read lock with no waiters.
215		 */
216		x = rw->rw_lock;
217		if (x & RW_LOCK_READ) {
218
219			/*
220			 * The RW_LOCK_READ_WAITERS flag should only be set
221			 * if another thread currently holds a write lock,
222			 * and in that case RW_LOCK_READ should be clear.
223			 */
224			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
225			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
226			    x + RW_ONE_READER)) {
227				if (LOCK_LOG_TEST(&rw->lock_object, 0))
228					CTR4(KTR_LOCK,
229					    "%s: %p succeed %p -> %p", __func__,
230					    rw, (void *)x,
231					    (void *)(x + RW_ONE_READER));
232				if (RW_READERS(x) == 0)
233					lock_profile_obtain_lock_success(
234					    &rw->lock_object, contested, waittime,
235					    file, line);
236				break;
237			}
238			cpu_spinwait();
239			continue;
240		}
241		lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
242		    &waittime);
243
244		/*
245		 * Okay, now it's the hard case.  Some other thread already
246		 * has a write lock, so acquire the turnstile lock so we can
247		 * begin the process of blocking.
248		 */
249		turnstile_lock(&rw->lock_object);
250
251		/*
252		 * The lock might have been released while we spun, so
253		 * recheck its state and restart the loop if there is no
254		 * longer a write lock.
255		 */
256		x = rw->rw_lock;
257		if (x & RW_LOCK_READ) {
258			turnstile_release(&rw->lock_object);
259			cpu_spinwait();
260			continue;
261		}
262
263		/*
264		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
265		 * flag is already set, then we can go ahead and block.  If
266		 * it is not set then try to set it.  If we fail to set it
267		 * drop the turnstile lock and restart the loop.
268		 */
269		if (!(x & RW_LOCK_READ_WAITERS)) {
270			if (!atomic_cmpset_ptr(&rw->rw_lock, x,
271			    x | RW_LOCK_READ_WAITERS)) {
272				turnstile_release(&rw->lock_object);
273				cpu_spinwait();
274				continue;
275			}
276			if (LOCK_LOG_TEST(&rw->lock_object, 0))
277				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
278				    __func__, rw);
279		}
280
281#ifdef SMP
282		/*
283		 * If the owner is running on another CPU, spin until
284		 * the owner stops running or the state of the lock
285		 * changes.
286		 */
287		owner = (struct thread *)RW_OWNER(x);
288		if (TD_IS_RUNNING(owner)) {
289			turnstile_release(&rw->lock_object);
290			if (LOCK_LOG_TEST(&rw->lock_object, 0))
291				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
292				    __func__, rw, owner);
293			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
294			    TD_IS_RUNNING(owner))
295				cpu_spinwait();
296			continue;
297		}
298#endif
299
300		/*
301		 * We were unable to acquire the lock and the read waiters
302		 * flag is set, so we must block on the turnstile.
303		 */
304		if (LOCK_LOG_TEST(&rw->lock_object, 0))
305			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
306			    rw);
307		turnstile_wait(&rw->lock_object, rw_owner(rw), TS_SHARED_QUEUE);
308		if (LOCK_LOG_TEST(&rw->lock_object, 0))
309			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
310			    __func__, rw);
311	}
312
313	/*
314	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
315	 * however.  turnstiles don't like owners changing between calls to
316	 * turnstile_wait() currently.
317	 */
318
319	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
320	WITNESS_LOCK(&rw->lock_object, 0, file, line);
321	curthread->td_locks++;
322}
323
324void
325_rw_runlock(struct rwlock *rw, const char *file, int line)
326{
327	struct turnstile *ts;
328	uintptr_t x;
329
330	_rw_assert(rw, RA_RLOCKED, file, line);
331	curthread->td_locks--;
332	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
333	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
334
335	/* TODO: drop "owner of record" here. */
336
337	for (;;) {
338		/*
339		 * See if there is more than one read lock held.  If so,
340		 * just drop one and return.
341		 */
342		x = rw->rw_lock;
343		if (RW_READERS(x) > 1) {
344			if (atomic_cmpset_ptr(&rw->rw_lock, x,
345			    x - RW_ONE_READER)) {
346				if (LOCK_LOG_TEST(&rw->lock_object, 0))
347					CTR4(KTR_LOCK,
348					    "%s: %p succeeded %p -> %p",
349					    __func__, rw, (void *)x,
350					    (void *)(x - RW_ONE_READER));
351				break;
352			}
353			continue;
354		}
355
356
357		/*
358		 * We should never have read waiters while at least one
359		 * thread holds a read lock.  (See note above)
360		 */
361		KASSERT(!(x & RW_LOCK_READ_WAITERS),
362		    ("%s: waiting readers", __func__));
363
364		/*
365		 * If there aren't any waiters for a write lock, then try
366		 * to drop it quickly.
367		 */
368		if (!(x & RW_LOCK_WRITE_WAITERS)) {
369
370			/*
371			 * There shouldn't be any flags set and we should
372			 * be the only read lock.  If we fail to release
373			 * the single read lock, then another thread might
374			 * have just acquired a read lock, so go back up
375			 * to the multiple read locks case.
376			 */
377			MPASS(x == RW_READERS_LOCK(1));
378			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
379			    RW_UNLOCKED)) {
380				if (LOCK_LOG_TEST(&rw->lock_object, 0))
381					CTR2(KTR_LOCK, "%s: %p last succeeded",
382					    __func__, rw);
383				break;
384			}
385			continue;
386		}
387
388		/*
389		 * There should just be one reader with one or more
390		 * writers waiting.
391		 */
392		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
393
394		/*
395		 * Ok, we know we have a waiting writer and we think we
396		 * are the last reader, so grab the turnstile lock.
397		 */
398		turnstile_lock(&rw->lock_object);
399
400		/*
401		 * Try to drop our lock leaving the lock in a unlocked
402		 * state.
403		 *
404		 * If you wanted to do explicit lock handoff you'd have to
405		 * do it here.  You'd also want to use turnstile_signal()
406		 * and you'd have to handle the race where a higher
407		 * priority thread blocks on the write lock before the
408		 * thread you wakeup actually runs and have the new thread
409		 * "steal" the lock.  For now it's a lot simpler to just
410		 * wakeup all of the waiters.
411		 *
412		 * As above, if we fail, then another thread might have
413		 * acquired a read lock, so drop the turnstile lock and
414		 * restart.
415		 */
416		if (!atomic_cmpset_ptr(&rw->rw_lock,
417		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
418			turnstile_release(&rw->lock_object);
419			continue;
420		}
421		if (LOCK_LOG_TEST(&rw->lock_object, 0))
422			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
423			    __func__, rw);
424
425		/*
426		 * Ok.  The lock is released and all that's left is to
427		 * wake up the waiters.  Note that the lock might not be
428		 * free anymore, but in that case the writers will just
429		 * block again if they run before the new lock holder(s)
430		 * release the lock.
431		 */
432		ts = turnstile_lookup(&rw->lock_object);
433		MPASS(ts != NULL);
434		turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
435		turnstile_unpend(ts, TS_SHARED_LOCK);
436		break;
437	}
438	lock_profile_release_lock(&rw->lock_object);
439}
440
441/*
442 * This function is called when we are unable to obtain a write lock on the
443 * first try.  This means that at least one other thread holds either a
444 * read or write lock.
445 */
446void
447_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
448{
449#ifdef SMP
450	volatile struct thread *owner;
451#endif
452	uintptr_t v;
453
454	if (LOCK_LOG_TEST(&rw->lock_object, 0))
455		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
456		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
457
458	while (!_rw_write_lock(rw, tid)) {
459		turnstile_lock(&rw->lock_object);
460		v = rw->rw_lock;
461
462		/*
463		 * If the lock was released while spinning on the
464		 * turnstile chain lock, try again.
465		 */
466		if (v == RW_UNLOCKED) {
467			turnstile_release(&rw->lock_object);
468			cpu_spinwait();
469			continue;
470		}
471
472		/*
473		 * If the lock was released by a writer with both readers
474		 * and writers waiting and a reader hasn't woken up and
475		 * acquired the lock yet, rw_lock will be set to the
476		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
477		 * that value, try to acquire it once.  Note that we have
478		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
479		 * other writers waiting still. If we fail, restart the
480		 * loop.
481		 */
482		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
483			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
484			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
485			    tid | RW_LOCK_WRITE_WAITERS)) {
486				turnstile_claim(&rw->lock_object);
487				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
488				    __func__, rw);
489				break;
490			}
491			turnstile_release(&rw->lock_object);
492			cpu_spinwait();
493			continue;
494		}
495
496		/*
497		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
498		 * set it.  If we fail to set it, then loop back and try
499		 * again.
500		 */
501		if (!(v & RW_LOCK_WRITE_WAITERS)) {
502			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
503			    v | RW_LOCK_WRITE_WAITERS)) {
504				turnstile_release(&rw->lock_object);
505				cpu_spinwait();
506				continue;
507			}
508			if (LOCK_LOG_TEST(&rw->lock_object, 0))
509				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
510				    __func__, rw);
511		}
512
513#ifdef SMP
514		/*
515		 * If the lock is write locked and the owner is
516		 * running on another CPU, spin until the owner stops
517		 * running or the state of the lock changes.
518		 */
519		owner = (struct thread *)RW_OWNER(v);
520		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
521			turnstile_release(&rw->lock_object);
522			if (LOCK_LOG_TEST(&rw->lock_object, 0))
523				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
524				    __func__, rw, owner);
525			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
526			    TD_IS_RUNNING(owner))
527				cpu_spinwait();
528			continue;
529		}
530#endif
531
532		/*
533		 * We were unable to acquire the lock and the write waiters
534		 * flag is set, so we must block on the turnstile.
535		 */
536		if (LOCK_LOG_TEST(&rw->lock_object, 0))
537			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
538			    rw);
539		turnstile_wait(&rw->lock_object, rw_owner(rw),
540		    TS_EXCLUSIVE_QUEUE);
541		if (LOCK_LOG_TEST(&rw->lock_object, 0))
542			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
543			    __func__, rw);
544	}
545}
546
547/*
548 * This function is called if the first try at releasing a write lock failed.
549 * This means that one of the 2 waiter bits must be set indicating that at
550 * least one thread is waiting on this lock.
551 */
552void
553_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
554{
555	struct turnstile *ts;
556	uintptr_t v;
557	int queue;
558
559	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
560	    ("%s: neither of the waiter flags are set", __func__));
561
562	if (LOCK_LOG_TEST(&rw->lock_object, 0))
563		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
564
565	turnstile_lock(&rw->lock_object);
566	ts = turnstile_lookup(&rw->lock_object);
567
568#ifdef SMP
569	/*
570	 * There might not be a turnstile for this lock if all of
571	 * the waiters are adaptively spinning.  In that case, just
572	 * reset the lock to the unlocked state and return.
573	 */
574	if (ts == NULL) {
575		atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED);
576		if (LOCK_LOG_TEST(&rw->lock_object, 0))
577			CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw);
578		turnstile_release(&rw->lock_object);
579		return;
580	}
581#else
582	MPASS(ts != NULL);
583#endif
584
585	/*
586	 * Use the same algo as sx locks for now.  Prefer waking up shared
587	 * waiters if we have any over writers.  This is probably not ideal.
588	 *
589	 * 'v' is the value we are going to write back to rw_lock.  If we
590	 * have waiters on both queues, we need to preserve the state of
591	 * the waiter flag for the queue we don't wake up.  For now this is
592	 * hardcoded for the algorithm mentioned above.
593	 *
594	 * In the case of both readers and writers waiting we wakeup the
595	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
596	 * new writer comes in before a reader it will claim the lock up
597	 * above.  There is probably a potential priority inversion in
598	 * there that could be worked around either by waking both queues
599	 * of waiters or doing some complicated lock handoff gymnastics.
600	 *
601	 * Note that in the SMP case, if both flags are set, there might
602	 * not be any actual writers on the turnstile as they might all
603	 * be spinning.  In that case, we don't want to preserve the
604	 * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go
605	 * away once we wakeup all the readers.
606	 */
607	v = RW_UNLOCKED;
608	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
609		queue = TS_SHARED_QUEUE;
610#ifdef SMP
611		if (rw->rw_lock & RW_LOCK_WRITE_WAITERS &&
612		    !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
613			v |= RW_LOCK_WRITE_WAITERS;
614#else
615		v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
616#endif
617	} else
618		queue = TS_EXCLUSIVE_QUEUE;
619
620#ifdef SMP
621	/*
622	 * We have to make sure that we actually have waiters to
623	 * wakeup.  If they are all spinning, then we just need to
624	 * disown the turnstile and return.
625	 */
626	if (turnstile_empty(ts, queue)) {
627		if (LOCK_LOG_TEST(&rw->lock_object, 0))
628			CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw);
629		atomic_store_rel_ptr(&rw->rw_lock, v);
630		turnstile_disown(ts);
631		return;
632	}
633#endif
634
635	/* Wake up all waiters for the specific queue. */
636	if (LOCK_LOG_TEST(&rw->lock_object, 0))
637		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
638		    queue == TS_SHARED_QUEUE ? "read" : "write");
639	turnstile_broadcast(ts, queue);
640	atomic_store_rel_ptr(&rw->rw_lock, v);
641	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
642}
643
644/*
645 * Attempt to do a non-blocking upgrade from a read lock to a write
646 * lock.  This will only succeed if this thread holds a single read
647 * lock.  Returns true if the upgrade succeeded and false otherwise.
648 */
649int
650_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
651{
652	uintptr_t v, tid;
653	int success;
654
655	_rw_assert(rw, RA_RLOCKED, file, line);
656
657	/*
658	 * Attempt to switch from one reader to a writer.  If there
659	 * are any write waiters, then we will have to lock the
660	 * turnstile first to prevent races with another writer
661	 * calling turnstile_wait() before we have claimed this
662	 * turnstile.  So, do the simple case of no waiters first.
663	 */
664	tid = (uintptr_t)curthread;
665	if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) {
666		success = atomic_cmpset_acq_ptr(&rw->rw_lock,
667		    RW_READERS_LOCK(1), tid);
668		goto out;
669	}
670
671	/*
672	 * Ok, we think we have write waiters, so lock the
673	 * turnstile.
674	 */
675	turnstile_lock(&rw->lock_object);
676
677	/*
678	 * Try to switch from one reader to a writer again.  This time
679	 * we honor the current state of the RW_LOCK_WRITE_WAITERS
680	 * flag.  If we obtain the lock with the flag set, then claim
681	 * ownership of the turnstile.  In the SMP case it is possible
682	 * for there to not be an associated turnstile even though there
683	 * are waiters if all of the waiters are spinning.
684	 */
685	v = rw->rw_lock & RW_LOCK_WRITE_WAITERS;
686	success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
687	    tid | v);
688#ifdef SMP
689	if (success && v && turnstile_lookup(&rw->lock_object) != NULL)
690#else
691	if (success && v)
692#endif
693		turnstile_claim(&rw->lock_object);
694	else
695		turnstile_release(&rw->lock_object);
696out:
697	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
698	if (success)
699		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
700		    file, line);
701	return (success);
702}
703
704/*
705 * Downgrade a write lock into a single read lock.
706 */
707void
708_rw_downgrade(struct rwlock *rw, const char *file, int line)
709{
710	struct turnstile *ts;
711	uintptr_t tid, v;
712
713	_rw_assert(rw, RA_WLOCKED, file, line);
714
715	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
716
717	/*
718	 * Convert from a writer to a single reader.  First we handle
719	 * the easy case with no waiters.  If there are any waiters, we
720	 * lock the turnstile, "disown" the lock, and awaken any read
721	 * waiters.
722	 */
723	tid = (uintptr_t)curthread;
724	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
725		goto out;
726
727	/*
728	 * Ok, we think we have waiters, so lock the turnstile so we can
729	 * read the waiter flags without any races.
730	 */
731	turnstile_lock(&rw->lock_object);
732	v = rw->rw_lock;
733	MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS));
734
735	/*
736	 * Downgrade from a write lock while preserving
737	 * RW_LOCK_WRITE_WAITERS and give up ownership of the
738	 * turnstile.  If there are any read waiters, wake them up.
739	 *
740	 * For SMP, we have to allow for the fact that all of the
741	 * read waiters might be spinning.  In that case, act as if
742	 * RW_LOCK_READ_WAITERS is not set.  Also, only preserve
743	 * the RW_LOCK_WRITE_WAITERS flag if at least one writer is
744	 * blocked on the turnstile.
745	 */
746	ts = turnstile_lookup(&rw->lock_object);
747#ifdef SMP
748	if (ts == NULL)
749		v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS);
750	else if (v & RW_LOCK_READ_WAITERS &&
751	    turnstile_empty(ts, TS_SHARED_QUEUE))
752		v &= ~RW_LOCK_READ_WAITERS;
753	else if (v & RW_LOCK_WRITE_WAITERS &&
754	    turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
755		v &= ~RW_LOCK_WRITE_WAITERS;
756#else
757	MPASS(ts != NULL);
758#endif
759	if (v & RW_LOCK_READ_WAITERS)
760		turnstile_broadcast(ts, TS_SHARED_QUEUE);
761	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) |
762	    (v & RW_LOCK_WRITE_WAITERS));
763	if (v & RW_LOCK_READ_WAITERS)
764		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
765#ifdef SMP
766	else if (ts == NULL)
767		turnstile_release(&rw->lock_object);
768#endif
769	else
770		turnstile_disown(ts);
771out:
772	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
773}
774
775#ifdef INVARIANT_SUPPORT
776#ifndef INVARIANTS
777#undef _rw_assert
778#endif
779
780/*
781 * In the non-WITNESS case, rw_assert() can only detect that at least
782 * *some* thread owns an rlock, but it cannot guarantee that *this*
783 * thread owns an rlock.
784 */
785void
786_rw_assert(struct rwlock *rw, int what, const char *file, int line)
787{
788
789	if (panicstr != NULL)
790		return;
791	switch (what) {
792	case RA_LOCKED:
793	case RA_LOCKED | LA_NOTRECURSED:
794	case RA_RLOCKED:
795#ifdef WITNESS
796		witness_assert(&rw->lock_object, what, file, line);
797#else
798		/*
799		 * If some other thread has a write lock or we have one
800		 * and are asserting a read lock, fail.  Also, if no one
801		 * has a lock at all, fail.
802		 */
803		if (rw->rw_lock == RW_UNLOCKED ||
804		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
805		    rw_wowner(rw) != curthread)))
806			panic("Lock %s not %slocked @ %s:%d\n",
807			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
808			    "read " : "", file, line);
809#endif
810		break;
811	case RA_WLOCKED:
812		if (rw_wowner(rw) != curthread)
813			panic("Lock %s not exclusively locked @ %s:%d\n",
814			    rw->lock_object.lo_name, file, line);
815		break;
816	case RA_UNLOCKED:
817#ifdef WITNESS
818		witness_assert(&rw->lock_object, what, file, line);
819#else
820		/*
821		 * If we hold a write lock fail.  We can't reliably check
822		 * to see if we hold a read lock or not.
823		 */
824		if (rw_wowner(rw) == curthread)
825			panic("Lock %s exclusively locked @ %s:%d\n",
826			    rw->lock_object.lo_name, file, line);
827#endif
828		break;
829	default:
830		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
831		    line);
832	}
833}
834#endif /* INVARIANT_SUPPORT */
835
836#ifdef DDB
837void
838db_show_rwlock(struct lock_object *lock)
839{
840	struct rwlock *rw;
841	struct thread *td;
842
843	rw = (struct rwlock *)lock;
844
845	db_printf(" state: ");
846	if (rw->rw_lock == RW_UNLOCKED)
847		db_printf("UNLOCKED\n");
848	else if (rw->rw_lock & RW_LOCK_READ)
849		db_printf("RLOCK: %ju locks\n",
850		    (uintmax_t)(RW_READERS(rw->rw_lock)));
851	else {
852		td = rw_wowner(rw);
853		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
854		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
855	}
856	db_printf(" waiters: ");
857	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
858	case RW_LOCK_READ_WAITERS:
859		db_printf("readers\n");
860		break;
861	case RW_LOCK_WRITE_WAITERS:
862		db_printf("writers\n");
863		break;
864	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
865		db_printf("readers and writers\n");
866		break;
867	default:
868		db_printf("none\n");
869		break;
870	}
871}
872
873#endif
874