kern_rwlock.c revision 167365
1154941Sjhb/*-
2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3154941Sjhb * All rights reserved.
4154941Sjhb *
5154941Sjhb * Redistribution and use in source and binary forms, with or without
6154941Sjhb * modification, are permitted provided that the following conditions
7154941Sjhb * are met:
8154941Sjhb * 1. Redistributions of source code must retain the above copyright
9154941Sjhb *    notice, this list of conditions and the following disclaimer.
10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11154941Sjhb *    notice, this list of conditions and the following disclaimer in the
12154941Sjhb *    documentation and/or other materials provided with the distribution.
13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14154941Sjhb *    may be used to endorse or promote products derived from this software
15154941Sjhb *    without specific prior written permission.
16154941Sjhb *
17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20154941Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27154941Sjhb * SUCH DAMAGE.
28154941Sjhb */
29154941Sjhb
30154941Sjhb/*
31154941Sjhb * Machine independent bits of reader/writer lock implementation.
32154941Sjhb */
33154941Sjhb
34154941Sjhb#include <sys/cdefs.h>
35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 167365 2007-03-09 16:04:44Z jhb $");
36154941Sjhb
37154941Sjhb#include "opt_ddb.h"
38154941Sjhb
39154941Sjhb#include <sys/param.h>
40154941Sjhb#include <sys/ktr.h>
41154941Sjhb#include <sys/lock.h>
42154941Sjhb#include <sys/mutex.h>
43154941Sjhb#include <sys/proc.h>
44154941Sjhb#include <sys/rwlock.h>
45154941Sjhb#include <sys/systm.h>
46154941Sjhb#include <sys/turnstile.h>
47164159Skmacy#include <sys/lock_profile.h>
48154941Sjhb#include <machine/cpu.h>
49154941Sjhb
50154941Sjhb#ifdef DDB
51154941Sjhb#include <ddb/ddb.h>
52154941Sjhb
53154941Sjhbstatic void	db_show_rwlock(struct lock_object *lock);
54154941Sjhb#endif
55154941Sjhb
56154941Sjhbstruct lock_class lock_class_rw = {
57167365Sjhb	.lc_name = "rw",
58167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
59154941Sjhb#ifdef DDB
60167365Sjhb	.lc_ddb_show = db_show_rwlock,
61154941Sjhb#endif
62154941Sjhb};
63154941Sjhb
64157826Sjhb/*
65157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or
66157826Sjhb * NULL if the lock is unlocked or read-locked.
67157826Sjhb */
68157826Sjhb#define	rw_wowner(rw)							\
69154941Sjhb	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
70154941Sjhb	    (struct thread *)RW_OWNER((rw)->rw_lock))
71154941Sjhb
72157826Sjhb/*
73157826Sjhb * Return a pointer to the owning thread for this lock who should receive
74157826Sjhb * any priority lent by threads that block on this lock.  Currently this
75157826Sjhb * is identical to rw_wowner().
76157826Sjhb */
77157826Sjhb#define	rw_owner(rw)		rw_wowner(rw)
78157826Sjhb
79154941Sjhb#ifndef INVARIANTS
80154941Sjhb#define	_rw_assert(rw, what, file, line)
81154941Sjhb#endif
82154941Sjhb
83154941Sjhbvoid
84154941Sjhbrw_init(struct rwlock *rw, const char *name)
85154941Sjhb{
86154941Sjhb
87154941Sjhb	rw->rw_lock = RW_UNLOCKED;
88154941Sjhb
89164246Skmacy	lock_profile_object_init(&rw->rw_object, &lock_class_rw, name);
90154941Sjhb	lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS |
91157882Sjhb	    LO_RECURSABLE | LO_UPGRADABLE);
92154941Sjhb}
93154941Sjhb
94154941Sjhbvoid
95154941Sjhbrw_destroy(struct rwlock *rw)
96154941Sjhb{
97154941Sjhb
98154941Sjhb	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
99164159Skmacy	lock_profile_object_destroy(&rw->rw_object);
100154941Sjhb	lock_destroy(&rw->rw_object);
101154941Sjhb}
102154941Sjhb
103154941Sjhbvoid
104154941Sjhbrw_sysinit(void *arg)
105154941Sjhb{
106154941Sjhb	struct rw_args *args = arg;
107154941Sjhb
108154941Sjhb	rw_init(args->ra_rw, args->ra_desc);
109154941Sjhb}
110154941Sjhb
111167024Srwatsonint
112167024Srwatsonrw_wowned(struct rwlock *rw)
113167024Srwatson{
114167024Srwatson
115167024Srwatson	return (rw_wowner(rw) == curthread);
116167024Srwatson}
117167024Srwatson
118154941Sjhbvoid
119154941Sjhb_rw_wlock(struct rwlock *rw, const char *file, int line)
120154941Sjhb{
121154941Sjhb
122154941Sjhb	MPASS(curthread != NULL);
123157826Sjhb	KASSERT(rw_wowner(rw) != curthread,
124154941Sjhb	    ("%s (%s): wlock already held @ %s:%d", __func__,
125154941Sjhb	    rw->rw_object.lo_name, file, line));
126154941Sjhb	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
127154941Sjhb	    line);
128154941Sjhb	__rw_wlock(rw, curthread, file, line);
129154941Sjhb	LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line);
130154941Sjhb	WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
131160771Sjhb	curthread->td_locks++;
132154941Sjhb}
133154941Sjhb
134154941Sjhbvoid
135154941Sjhb_rw_wunlock(struct rwlock *rw, const char *file, int line)
136154941Sjhb{
137154941Sjhb
138154941Sjhb	MPASS(curthread != NULL);
139154941Sjhb	_rw_assert(rw, RA_WLOCKED, file, line);
140160771Sjhb	curthread->td_locks--;
141154941Sjhb	WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
142154941Sjhb	LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line);
143164159Skmacy	lock_profile_release_lock(&rw->rw_object);
144154941Sjhb	__rw_wunlock(rw, curthread, file, line);
145154941Sjhb}
146154941Sjhb
147154941Sjhbvoid
148154941Sjhb_rw_rlock(struct rwlock *rw, const char *file, int line)
149154941Sjhb{
150157851Swkoszek#ifdef SMP
151157846Sjhb	volatile struct thread *owner;
152157851Swkoszek#endif
153167307Sjhb	uint64_t waittime = 0;
154167054Skmacy	int contested = 0;
155154941Sjhb	uintptr_t x;
156154941Sjhb
157157826Sjhb	KASSERT(rw_wowner(rw) != curthread,
158154941Sjhb	    ("%s (%s): wlock already held @ %s:%d", __func__,
159154941Sjhb	    rw->rw_object.lo_name, file, line));
160154941Sjhb	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line);
161154941Sjhb
162154941Sjhb	/*
163154941Sjhb	 * Note that we don't make any attempt to try to block read
164154941Sjhb	 * locks once a writer has blocked on the lock.  The reason is
165154941Sjhb	 * that we currently allow for read locks to recurse and we
166154941Sjhb	 * don't keep track of all the holders of read locks.  Thus, if
167154941Sjhb	 * we were to block readers once a writer blocked and a reader
168154941Sjhb	 * tried to recurse on their reader lock after a writer had
169154941Sjhb	 * blocked we would end up in a deadlock since the reader would
170154941Sjhb	 * be blocked on the writer, and the writer would be blocked
171154941Sjhb	 * waiting for the reader to release its original read lock.
172154941Sjhb	 */
173154941Sjhb	for (;;) {
174154941Sjhb		/*
175154941Sjhb		 * Handle the easy case.  If no other thread has a write
176154941Sjhb		 * lock, then try to bump up the count of read locks.  Note
177154941Sjhb		 * that we have to preserve the current state of the
178154941Sjhb		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
179154941Sjhb		 * read lock, then rw_lock must have changed, so restart
180154941Sjhb		 * the loop.  Note that this handles the case of a
181154941Sjhb		 * completely unlocked rwlock since such a lock is encoded
182154941Sjhb		 * as a read lock with no waiters.
183154941Sjhb		 */
184154941Sjhb		x = rw->rw_lock;
185154941Sjhb		if (x & RW_LOCK_READ) {
186154941Sjhb
187154941Sjhb			/*
188154941Sjhb			 * The RW_LOCK_READ_WAITERS flag should only be set
189154941Sjhb			 * if another thread currently holds a write lock,
190154941Sjhb			 * and in that case RW_LOCK_READ should be clear.
191154941Sjhb			 */
192154941Sjhb			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
193154941Sjhb			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
194154941Sjhb			    x + RW_ONE_READER)) {
195154941Sjhb				if (LOCK_LOG_TEST(&rw->rw_object, 0))
196154941Sjhb					CTR4(KTR_LOCK,
197154941Sjhb					    "%s: %p succeed %p -> %p", __func__,
198154941Sjhb					    rw, (void *)x,
199154941Sjhb					    (void *)(x + RW_ONE_READER));
200167307Sjhb				if (RW_READERS(x) == 0)
201167307Sjhb					lock_profile_obtain_lock_success(
202167307Sjhb					    &rw->rw_object, contested, waittime,
203167307Sjhb					    file, line);
204154941Sjhb				break;
205154941Sjhb			}
206157846Sjhb			cpu_spinwait();
207154941Sjhb			continue;
208154941Sjhb		}
209167307Sjhb		lock_profile_obtain_lock_failed(&rw->rw_object, &contested,
210167307Sjhb		    &waittime);
211154941Sjhb
212154941Sjhb		/*
213154941Sjhb		 * Okay, now it's the hard case.  Some other thread already
214154941Sjhb		 * has a write lock, so acquire the turnstile lock so we can
215154941Sjhb		 * begin the process of blocking.
216154941Sjhb		 */
217154941Sjhb		turnstile_lock(&rw->rw_object);
218154941Sjhb
219154941Sjhb		/*
220154941Sjhb		 * The lock might have been released while we spun, so
221154941Sjhb		 * recheck its state and restart the loop if there is no
222154941Sjhb		 * longer a write lock.
223154941Sjhb		 */
224154941Sjhb		x = rw->rw_lock;
225154941Sjhb		if (x & RW_LOCK_READ) {
226154941Sjhb			turnstile_release(&rw->rw_object);
227157846Sjhb			cpu_spinwait();
228154941Sjhb			continue;
229154941Sjhb		}
230154941Sjhb
231154941Sjhb		/*
232154941Sjhb		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
233154941Sjhb		 * flag is already set, then we can go ahead and block.  If
234154941Sjhb		 * it is not set then try to set it.  If we fail to set it
235154941Sjhb		 * drop the turnstile lock and restart the loop.
236154941Sjhb		 */
237157826Sjhb		if (!(x & RW_LOCK_READ_WAITERS)) {
238157826Sjhb			if (!atomic_cmpset_ptr(&rw->rw_lock, x,
239157826Sjhb			    x | RW_LOCK_READ_WAITERS)) {
240157826Sjhb				turnstile_release(&rw->rw_object);
241157826Sjhb				cpu_spinwait();
242157826Sjhb				continue;
243157826Sjhb			}
244157826Sjhb			if (LOCK_LOG_TEST(&rw->rw_object, 0))
245157826Sjhb				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
246157826Sjhb				    __func__, rw);
247154941Sjhb		}
248154941Sjhb
249157846Sjhb#ifdef SMP
250154941Sjhb		/*
251157846Sjhb		 * If the owner is running on another CPU, spin until
252157846Sjhb		 * the owner stops running or the state of the lock
253157846Sjhb		 * changes.
254157846Sjhb		 */
255157846Sjhb		owner = (struct thread *)RW_OWNER(x);
256157846Sjhb		if (TD_IS_RUNNING(owner)) {
257157846Sjhb			turnstile_release(&rw->rw_object);
258157846Sjhb			if (LOCK_LOG_TEST(&rw->rw_object, 0))
259157846Sjhb				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
260157846Sjhb				    __func__, rw, owner);
261157846Sjhb			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
262157846Sjhb			    TD_IS_RUNNING(owner))
263157846Sjhb				cpu_spinwait();
264157846Sjhb			continue;
265157846Sjhb		}
266157846Sjhb#endif
267157846Sjhb
268157846Sjhb		/*
269154941Sjhb		 * We were unable to acquire the lock and the read waiters
270154941Sjhb		 * flag is set, so we must block on the turnstile.
271154941Sjhb		 */
272154941Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
273154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
274154941Sjhb			    rw);
275154941Sjhb		turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE);
276154941Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
277154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
278154941Sjhb			    __func__, rw);
279154941Sjhb	}
280154941Sjhb
281154941Sjhb	/*
282154941Sjhb	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
283154941Sjhb	 * however.  turnstiles don't like owners changing between calls to
284154941Sjhb	 * turnstile_wait() currently.
285154941Sjhb	 */
286154941Sjhb
287154941Sjhb	LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line);
288154941Sjhb	WITNESS_LOCK(&rw->rw_object, 0, file, line);
289160771Sjhb	curthread->td_locks++;
290154941Sjhb}
291154941Sjhb
292154941Sjhbvoid
293154941Sjhb_rw_runlock(struct rwlock *rw, const char *file, int line)
294154941Sjhb{
295154941Sjhb	struct turnstile *ts;
296154941Sjhb	uintptr_t x;
297154941Sjhb
298154941Sjhb	_rw_assert(rw, RA_RLOCKED, file, line);
299160771Sjhb	curthread->td_locks--;
300154941Sjhb	WITNESS_UNLOCK(&rw->rw_object, 0, file, line);
301154941Sjhb	LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line);
302154941Sjhb
303154941Sjhb	/* TODO: drop "owner of record" here. */
304154941Sjhb
305154941Sjhb	for (;;) {
306154941Sjhb		/*
307154941Sjhb		 * See if there is more than one read lock held.  If so,
308154941Sjhb		 * just drop one and return.
309154941Sjhb		 */
310154941Sjhb		x = rw->rw_lock;
311154941Sjhb		if (RW_READERS(x) > 1) {
312154941Sjhb			if (atomic_cmpset_ptr(&rw->rw_lock, x,
313154941Sjhb			    x - RW_ONE_READER)) {
314154941Sjhb				if (LOCK_LOG_TEST(&rw->rw_object, 0))
315154941Sjhb					CTR4(KTR_LOCK,
316154941Sjhb					    "%s: %p succeeded %p -> %p",
317154941Sjhb					    __func__, rw, (void *)x,
318154941Sjhb					    (void *)(x - RW_ONE_READER));
319154941Sjhb				break;
320154941Sjhb			}
321154941Sjhb			continue;
322167307Sjhb		}
323154941Sjhb
324164159Skmacy
325154941Sjhb		/*
326154941Sjhb		 * We should never have read waiters while at least one
327154941Sjhb		 * thread holds a read lock.  (See note above)
328154941Sjhb		 */
329154941Sjhb		KASSERT(!(x & RW_LOCK_READ_WAITERS),
330154941Sjhb		    ("%s: waiting readers", __func__));
331154941Sjhb
332154941Sjhb		/*
333154941Sjhb		 * If there aren't any waiters for a write lock, then try
334154941Sjhb		 * to drop it quickly.
335154941Sjhb		 */
336154941Sjhb		if (!(x & RW_LOCK_WRITE_WAITERS)) {
337154941Sjhb
338154941Sjhb			/*
339154941Sjhb			 * There shouldn't be any flags set and we should
340154941Sjhb			 * be the only read lock.  If we fail to release
341154941Sjhb			 * the single read lock, then another thread might
342154941Sjhb			 * have just acquired a read lock, so go back up
343154941Sjhb			 * to the multiple read locks case.
344154941Sjhb			 */
345154941Sjhb			MPASS(x == RW_READERS_LOCK(1));
346154941Sjhb			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
347154941Sjhb			    RW_UNLOCKED)) {
348154941Sjhb				if (LOCK_LOG_TEST(&rw->rw_object, 0))
349154941Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
350154941Sjhb					    __func__, rw);
351154941Sjhb				break;
352154941Sjhb			}
353154941Sjhb			continue;
354154941Sjhb		}
355154941Sjhb
356154941Sjhb		/*
357154941Sjhb		 * There should just be one reader with one or more
358154941Sjhb		 * writers waiting.
359154941Sjhb		 */
360154941Sjhb		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
361154941Sjhb
362154941Sjhb		/*
363154941Sjhb		 * Ok, we know we have a waiting writer and we think we
364154941Sjhb		 * are the last reader, so grab the turnstile lock.
365154941Sjhb		 */
366154941Sjhb		turnstile_lock(&rw->rw_object);
367154941Sjhb
368154941Sjhb		/*
369154941Sjhb		 * Try to drop our lock leaving the lock in a unlocked
370154941Sjhb		 * state.
371154941Sjhb		 *
372154941Sjhb		 * If you wanted to do explicit lock handoff you'd have to
373154941Sjhb		 * do it here.  You'd also want to use turnstile_signal()
374154941Sjhb		 * and you'd have to handle the race where a higher
375154941Sjhb		 * priority thread blocks on the write lock before the
376154941Sjhb		 * thread you wakeup actually runs and have the new thread
377154941Sjhb		 * "steal" the lock.  For now it's a lot simpler to just
378154941Sjhb		 * wakeup all of the waiters.
379154941Sjhb		 *
380154941Sjhb		 * As above, if we fail, then another thread might have
381154941Sjhb		 * acquired a read lock, so drop the turnstile lock and
382154941Sjhb		 * restart.
383154941Sjhb		 */
384154941Sjhb		if (!atomic_cmpset_ptr(&rw->rw_lock,
385154941Sjhb		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
386154941Sjhb			turnstile_release(&rw->rw_object);
387154941Sjhb			continue;
388154941Sjhb		}
389154941Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
390154941Sjhb			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
391154941Sjhb			    __func__, rw);
392154941Sjhb
393154941Sjhb		/*
394154941Sjhb		 * Ok.  The lock is released and all that's left is to
395154941Sjhb		 * wake up the waiters.  Note that the lock might not be
396154941Sjhb		 * free anymore, but in that case the writers will just
397154941Sjhb		 * block again if they run before the new lock holder(s)
398154941Sjhb		 * release the lock.
399154941Sjhb		 */
400154941Sjhb		ts = turnstile_lookup(&rw->rw_object);
401157846Sjhb		MPASS(ts != NULL);
402154941Sjhb		turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
403154941Sjhb		turnstile_unpend(ts, TS_SHARED_LOCK);
404154941Sjhb		break;
405154941Sjhb	}
406167307Sjhb	lock_profile_release_lock(&rw->rw_object);
407154941Sjhb}
408154941Sjhb
409154941Sjhb/*
410154941Sjhb * This function is called when we are unable to obtain a write lock on the
411154941Sjhb * first try.  This means that at least one other thread holds either a
412154941Sjhb * read or write lock.
413154941Sjhb */
414154941Sjhbvoid
415154941Sjhb_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
416154941Sjhb{
417157851Swkoszek#ifdef SMP
418157846Sjhb	volatile struct thread *owner;
419157851Swkoszek#endif
420154941Sjhb	uintptr_t v;
421154941Sjhb
422154941Sjhb	if (LOCK_LOG_TEST(&rw->rw_object, 0))
423154941Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
424154941Sjhb		    rw->rw_object.lo_name, (void *)rw->rw_lock, file, line);
425154941Sjhb
426154941Sjhb	while (!_rw_write_lock(rw, tid)) {
427154941Sjhb		turnstile_lock(&rw->rw_object);
428154941Sjhb		v = rw->rw_lock;
429154941Sjhb
430154941Sjhb		/*
431154941Sjhb		 * If the lock was released while spinning on the
432154941Sjhb		 * turnstile chain lock, try again.
433154941Sjhb		 */
434154941Sjhb		if (v == RW_UNLOCKED) {
435154941Sjhb			turnstile_release(&rw->rw_object);
436154941Sjhb			cpu_spinwait();
437154941Sjhb			continue;
438154941Sjhb		}
439154941Sjhb
440154941Sjhb		/*
441154941Sjhb		 * If the lock was released by a writer with both readers
442154941Sjhb		 * and writers waiting and a reader hasn't woken up and
443154941Sjhb		 * acquired the lock yet, rw_lock will be set to the
444154941Sjhb		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
445154941Sjhb		 * that value, try to acquire it once.  Note that we have
446154941Sjhb		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
447154941Sjhb		 * other writers waiting still. If we fail, restart the
448154941Sjhb		 * loop.
449154941Sjhb		 */
450154941Sjhb		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
451154941Sjhb			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
452154941Sjhb			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
453154941Sjhb			    tid | RW_LOCK_WRITE_WAITERS)) {
454154941Sjhb				turnstile_claim(&rw->rw_object);
455154941Sjhb				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
456154941Sjhb				    __func__, rw);
457154941Sjhb				break;
458154941Sjhb			}
459154941Sjhb			turnstile_release(&rw->rw_object);
460154941Sjhb			cpu_spinwait();
461154941Sjhb			continue;
462154941Sjhb		}
463154941Sjhb
464154941Sjhb		/*
465154941Sjhb		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
466154941Sjhb		 * set it.  If we fail to set it, then loop back and try
467154941Sjhb		 * again.
468154941Sjhb		 */
469157826Sjhb		if (!(v & RW_LOCK_WRITE_WAITERS)) {
470157826Sjhb			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
471157826Sjhb			    v | RW_LOCK_WRITE_WAITERS)) {
472157826Sjhb				turnstile_release(&rw->rw_object);
473157826Sjhb				cpu_spinwait();
474157826Sjhb				continue;
475157826Sjhb			}
476157826Sjhb			if (LOCK_LOG_TEST(&rw->rw_object, 0))
477157826Sjhb				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
478157826Sjhb				    __func__, rw);
479154941Sjhb		}
480154941Sjhb
481157846Sjhb#ifdef SMP
482157846Sjhb		/*
483157846Sjhb		 * If the lock is write locked and the owner is
484157846Sjhb		 * running on another CPU, spin until the owner stops
485157846Sjhb		 * running or the state of the lock changes.
486157846Sjhb		 */
487157846Sjhb		owner = (struct thread *)RW_OWNER(v);
488157846Sjhb		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
489157846Sjhb			turnstile_release(&rw->rw_object);
490157846Sjhb			if (LOCK_LOG_TEST(&rw->rw_object, 0))
491157846Sjhb				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
492157846Sjhb				    __func__, rw, owner);
493157846Sjhb			while ((struct thread*)RW_OWNER(rw->rw_lock)== owner &&
494157846Sjhb			    TD_IS_RUNNING(owner))
495157846Sjhb				cpu_spinwait();
496157846Sjhb			continue;
497157846Sjhb		}
498157846Sjhb#endif
499154941Sjhb
500154941Sjhb		/*
501154941Sjhb		 * We were unable to acquire the lock and the write waiters
502154941Sjhb		 * flag is set, so we must block on the turnstile.
503154941Sjhb		 */
504154941Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
505154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
506154941Sjhb			    rw);
507154941Sjhb		turnstile_wait(&rw->rw_object, rw_owner(rw),
508154941Sjhb		    TS_EXCLUSIVE_QUEUE);
509154941Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
510154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
511154941Sjhb			    __func__, rw);
512154941Sjhb	}
513154941Sjhb}
514154941Sjhb
515154941Sjhb/*
516154941Sjhb * This function is called if the first try at releasing a write lock failed.
517154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at
518154941Sjhb * least one thread is waiting on this lock.
519154941Sjhb */
520154941Sjhbvoid
521154941Sjhb_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
522154941Sjhb{
523154941Sjhb	struct turnstile *ts;
524154941Sjhb	uintptr_t v;
525154941Sjhb	int queue;
526154941Sjhb
527154941Sjhb	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
528154941Sjhb	    ("%s: neither of the waiter flags are set", __func__));
529154941Sjhb
530154941Sjhb	if (LOCK_LOG_TEST(&rw->rw_object, 0))
531154941Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
532154941Sjhb
533154941Sjhb	turnstile_lock(&rw->rw_object);
534154941Sjhb	ts = turnstile_lookup(&rw->rw_object);
535154941Sjhb
536157846Sjhb#ifdef SMP
537157846Sjhb	/*
538157846Sjhb	 * There might not be a turnstile for this lock if all of
539157846Sjhb	 * the waiters are adaptively spinning.  In that case, just
540157846Sjhb	 * reset the lock to the unlocked state and return.
541157846Sjhb	 */
542157846Sjhb	if (ts == NULL) {
543157846Sjhb		atomic_store_rel_ptr(&rw->rw_lock, RW_UNLOCKED);
544157846Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
545157846Sjhb			CTR2(KTR_LOCK, "%s: %p no sleepers", __func__, rw);
546157846Sjhb		turnstile_release(&rw->rw_object);
547157846Sjhb		return;
548157846Sjhb	}
549157846Sjhb#else
550154941Sjhb	MPASS(ts != NULL);
551157846Sjhb#endif
552154941Sjhb
553154941Sjhb	/*
554154941Sjhb	 * Use the same algo as sx locks for now.  Prefer waking up shared
555154941Sjhb	 * waiters if we have any over writers.  This is probably not ideal.
556154941Sjhb	 *
557154941Sjhb	 * 'v' is the value we are going to write back to rw_lock.  If we
558154941Sjhb	 * have waiters on both queues, we need to preserve the state of
559154941Sjhb	 * the waiter flag for the queue we don't wake up.  For now this is
560154941Sjhb	 * hardcoded for the algorithm mentioned above.
561154941Sjhb	 *
562154941Sjhb	 * In the case of both readers and writers waiting we wakeup the
563154941Sjhb	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
564154941Sjhb	 * new writer comes in before a reader it will claim the lock up
565154941Sjhb	 * above.  There is probably a potential priority inversion in
566154941Sjhb	 * there that could be worked around either by waking both queues
567154941Sjhb	 * of waiters or doing some complicated lock handoff gymnastics.
568157846Sjhb	 *
569157846Sjhb	 * Note that in the SMP case, if both flags are set, there might
570157846Sjhb	 * not be any actual writers on the turnstile as they might all
571157846Sjhb	 * be spinning.  In that case, we don't want to preserve the
572157846Sjhb	 * RW_LOCK_WRITE_WAITERS flag as the turnstile is going to go
573157846Sjhb	 * away once we wakeup all the readers.
574154941Sjhb	 */
575157846Sjhb	v = RW_UNLOCKED;
576154941Sjhb	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
577154941Sjhb		queue = TS_SHARED_QUEUE;
578157846Sjhb#ifdef SMP
579157846Sjhb		if (rw->rw_lock & RW_LOCK_WRITE_WAITERS &&
580157846Sjhb		    !turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
581157846Sjhb			v |= RW_LOCK_WRITE_WAITERS;
582157846Sjhb#else
583157846Sjhb		v |= (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
584157846Sjhb#endif
585157846Sjhb	} else
586154941Sjhb		queue = TS_EXCLUSIVE_QUEUE;
587157846Sjhb
588157846Sjhb#ifdef SMP
589157846Sjhb	/*
590157846Sjhb	 * We have to make sure that we actually have waiters to
591157846Sjhb	 * wakeup.  If they are all spinning, then we just need to
592157846Sjhb	 * disown the turnstile and return.
593157846Sjhb	 */
594157846Sjhb	if (turnstile_empty(ts, queue)) {
595157846Sjhb		if (LOCK_LOG_TEST(&rw->rw_object, 0))
596157846Sjhb			CTR2(KTR_LOCK, "%s: %p no sleepers 2", __func__, rw);
597157846Sjhb		atomic_store_rel_ptr(&rw->rw_lock, v);
598157846Sjhb		turnstile_disown(ts);
599157846Sjhb		return;
600154941Sjhb	}
601157846Sjhb#endif
602157846Sjhb
603157846Sjhb	/* Wake up all waiters for the specific queue. */
604154941Sjhb	if (LOCK_LOG_TEST(&rw->rw_object, 0))
605154941Sjhb		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
606154941Sjhb		    queue == TS_SHARED_QUEUE ? "read" : "write");
607154941Sjhb	turnstile_broadcast(ts, queue);
608154941Sjhb	atomic_store_rel_ptr(&rw->rw_lock, v);
609154941Sjhb	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
610154941Sjhb}
611154941Sjhb
612157882Sjhb/*
613157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write
614157882Sjhb * lock.  This will only succeed if this thread holds a single read
615157882Sjhb * lock.  Returns true if the upgrade succeeded and false otherwise.
616157882Sjhb */
617157882Sjhbint
618157882Sjhb_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
619157882Sjhb{
620157882Sjhb	uintptr_t v, tid;
621157882Sjhb	int success;
622157882Sjhb
623157882Sjhb	_rw_assert(rw, RA_RLOCKED, file, line);
624157882Sjhb
625157882Sjhb	/*
626157882Sjhb	 * Attempt to switch from one reader to a writer.  If there
627157882Sjhb	 * are any write waiters, then we will have to lock the
628157882Sjhb	 * turnstile first to prevent races with another writer
629157882Sjhb	 * calling turnstile_wait() before we have claimed this
630157882Sjhb	 * turnstile.  So, do the simple case of no waiters first.
631157882Sjhb	 */
632157882Sjhb	tid = (uintptr_t)curthread;
633157882Sjhb	if (!(rw->rw_lock & RW_LOCK_WRITE_WAITERS)) {
634157882Sjhb		success = atomic_cmpset_acq_ptr(&rw->rw_lock,
635157882Sjhb		    RW_READERS_LOCK(1), tid);
636157882Sjhb		goto out;
637157882Sjhb	}
638157882Sjhb
639157882Sjhb	/*
640157882Sjhb	 * Ok, we think we have write waiters, so lock the
641157882Sjhb	 * turnstile.
642157882Sjhb	 */
643157882Sjhb	turnstile_lock(&rw->rw_object);
644157882Sjhb
645157882Sjhb	/*
646157882Sjhb	 * Try to switch from one reader to a writer again.  This time
647157882Sjhb	 * we honor the current state of the RW_LOCK_WRITE_WAITERS
648157882Sjhb	 * flag.  If we obtain the lock with the flag set, then claim
649157882Sjhb	 * ownership of the turnstile.  In the SMP case it is possible
650157882Sjhb	 * for there to not be an associated turnstile even though there
651157882Sjhb	 * are waiters if all of the waiters are spinning.
652157882Sjhb	 */
653157882Sjhb	v = rw->rw_lock & RW_LOCK_WRITE_WAITERS;
654157882Sjhb	success = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
655157882Sjhb	    tid | v);
656157882Sjhb#ifdef SMP
657157882Sjhb	if (success && v && turnstile_lookup(&rw->rw_object) != NULL)
658157882Sjhb#else
659157882Sjhb	if (success && v)
660157882Sjhb#endif
661157882Sjhb		turnstile_claim(&rw->rw_object);
662157882Sjhb	else
663157882Sjhb		turnstile_release(&rw->rw_object);
664157882Sjhbout:
665157882Sjhb	LOCK_LOG_TRY("WUPGRADE", &rw->rw_object, 0, success, file, line);
666157882Sjhb	if (success)
667157882Sjhb		WITNESS_UPGRADE(&rw->rw_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
668157882Sjhb		    file, line);
669157882Sjhb	return (success);
670157882Sjhb}
671157882Sjhb
672157882Sjhb/*
673157882Sjhb * Downgrade a write lock into a single read lock.
674157882Sjhb */
675157882Sjhbvoid
676157882Sjhb_rw_downgrade(struct rwlock *rw, const char *file, int line)
677157882Sjhb{
678157882Sjhb	struct turnstile *ts;
679157882Sjhb	uintptr_t tid, v;
680157882Sjhb
681157882Sjhb	_rw_assert(rw, RA_WLOCKED, file, line);
682157882Sjhb
683157882Sjhb	WITNESS_DOWNGRADE(&rw->rw_object, 0, file, line);
684157882Sjhb
685157882Sjhb	/*
686157882Sjhb	 * Convert from a writer to a single reader.  First we handle
687157882Sjhb	 * the easy case with no waiters.  If there are any waiters, we
688157882Sjhb	 * lock the turnstile, "disown" the lock, and awaken any read
689157882Sjhb	 * waiters.
690157882Sjhb	 */
691157882Sjhb	tid = (uintptr_t)curthread;
692157882Sjhb	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
693157882Sjhb		goto out;
694157882Sjhb
695157882Sjhb	/*
696157882Sjhb	 * Ok, we think we have waiters, so lock the turnstile so we can
697157882Sjhb	 * read the waiter flags without any races.
698157882Sjhb	 */
699157882Sjhb	turnstile_lock(&rw->rw_object);
700157882Sjhb	v = rw->rw_lock;
701157882Sjhb	MPASS(v & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS));
702157882Sjhb
703157882Sjhb	/*
704157882Sjhb	 * Downgrade from a write lock while preserving
705157882Sjhb	 * RW_LOCK_WRITE_WAITERS and give up ownership of the
706157882Sjhb	 * turnstile.  If there are any read waiters, wake them up.
707157882Sjhb	 *
708157882Sjhb	 * For SMP, we have to allow for the fact that all of the
709157882Sjhb	 * read waiters might be spinning.  In that case, act as if
710157882Sjhb	 * RW_LOCK_READ_WAITERS is not set.  Also, only preserve
711157882Sjhb	 * the RW_LOCK_WRITE_WAITERS flag if at least one writer is
712157882Sjhb	 * blocked on the turnstile.
713157882Sjhb	 */
714157882Sjhb	ts = turnstile_lookup(&rw->rw_object);
715157882Sjhb#ifdef SMP
716157882Sjhb	if (ts == NULL)
717157882Sjhb		v &= ~(RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS);
718157882Sjhb	else if (v & RW_LOCK_READ_WAITERS &&
719157882Sjhb	    turnstile_empty(ts, TS_SHARED_QUEUE))
720157882Sjhb		v &= ~RW_LOCK_READ_WAITERS;
721157882Sjhb	else if (v & RW_LOCK_WRITE_WAITERS &&
722157882Sjhb	    turnstile_empty(ts, TS_EXCLUSIVE_QUEUE))
723157882Sjhb		v &= ~RW_LOCK_WRITE_WAITERS;
724157882Sjhb#else
725157882Sjhb	MPASS(ts != NULL);
726157882Sjhb#endif
727157882Sjhb	if (v & RW_LOCK_READ_WAITERS)
728157882Sjhb		turnstile_broadcast(ts, TS_SHARED_QUEUE);
729157882Sjhb	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) |
730157882Sjhb	    (v & RW_LOCK_WRITE_WAITERS));
731157882Sjhb	if (v & RW_LOCK_READ_WAITERS)
732157882Sjhb		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
733157882Sjhb#ifdef SMP
734157882Sjhb	else if (ts == NULL)
735157882Sjhb		turnstile_release(&rw->rw_object);
736157882Sjhb#endif
737157882Sjhb	else
738157882Sjhb		turnstile_disown(ts);
739157882Sjhbout:
740157882Sjhb	LOCK_LOG_LOCK("WDOWNGRADE", &rw->rw_object, 0, 0, file, line);
741157882Sjhb}
742157882Sjhb
743154941Sjhb#ifdef INVARIANT_SUPPORT
744155162Sscottl#ifndef INVARIANTS
745154941Sjhb#undef _rw_assert
746154941Sjhb#endif
747154941Sjhb
748154941Sjhb/*
749154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least
750154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this*
751154941Sjhb * thread owns an rlock.
752154941Sjhb */
753154941Sjhbvoid
754154941Sjhb_rw_assert(struct rwlock *rw, int what, const char *file, int line)
755154941Sjhb{
756154941Sjhb
757154941Sjhb	if (panicstr != NULL)
758154941Sjhb		return;
759154941Sjhb	switch (what) {
760154941Sjhb	case RA_LOCKED:
761154941Sjhb	case RA_RLOCKED:
762154941Sjhb#ifdef WITNESS
763154941Sjhb		witness_assert(&rw->rw_object, what, file, line);
764154941Sjhb#else
765154941Sjhb		/*
766154941Sjhb		 * If some other thread has a write lock or we have one
767154941Sjhb		 * and are asserting a read lock, fail.  Also, if no one
768154941Sjhb		 * has a lock at all, fail.
769154941Sjhb		 */
770155061Sscottl		if (rw->rw_lock == RW_UNLOCKED ||
771155061Sscottl		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
772157826Sjhb		    rw_wowner(rw) != curthread)))
773154941Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
774155012Sscottl			    rw->rw_object.lo_name, (what == RA_RLOCKED) ?
775154941Sjhb			    "read " : "", file, line);
776154941Sjhb#endif
777154941Sjhb		break;
778154941Sjhb	case RA_WLOCKED:
779157826Sjhb		if (rw_wowner(rw) != curthread)
780154941Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
781154941Sjhb			    rw->rw_object.lo_name, file, line);
782154941Sjhb		break;
783154941Sjhb	case RA_UNLOCKED:
784154941Sjhb#ifdef WITNESS
785154941Sjhb		witness_assert(&rw->rw_object, what, file, line);
786154941Sjhb#else
787154941Sjhb		/*
788154941Sjhb		 * If we hold a write lock fail.  We can't reliably check
789154941Sjhb		 * to see if we hold a read lock or not.
790154941Sjhb		 */
791157826Sjhb		if (rw_wowner(rw) == curthread)
792154941Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
793154941Sjhb			    rw->rw_object.lo_name, file, line);
794154941Sjhb#endif
795154941Sjhb		break;
796154941Sjhb	default:
797154941Sjhb		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
798154941Sjhb		    line);
799154941Sjhb	}
800154941Sjhb}
801154941Sjhb#endif /* INVARIANT_SUPPORT */
802154941Sjhb
803154941Sjhb#ifdef DDB
804154941Sjhbvoid
805154941Sjhbdb_show_rwlock(struct lock_object *lock)
806154941Sjhb{
807154941Sjhb	struct rwlock *rw;
808154941Sjhb	struct thread *td;
809154941Sjhb
810154941Sjhb	rw = (struct rwlock *)lock;
811154941Sjhb
812154941Sjhb	db_printf(" state: ");
813154941Sjhb	if (rw->rw_lock == RW_UNLOCKED)
814154941Sjhb		db_printf("UNLOCKED\n");
815154941Sjhb	else if (rw->rw_lock & RW_LOCK_READ)
816154973Smlaier		db_printf("RLOCK: %jd locks\n",
817154973Smlaier		    (intmax_t)(RW_READERS(rw->rw_lock)));
818154941Sjhb	else {
819157826Sjhb		td = rw_wowner(rw);
820154941Sjhb		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
821154941Sjhb		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
822154941Sjhb	}
823154941Sjhb	db_printf(" waiters: ");
824154941Sjhb	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
825154941Sjhb	case RW_LOCK_READ_WAITERS:
826154941Sjhb		db_printf("readers\n");
827154941Sjhb		break;
828154941Sjhb	case RW_LOCK_WRITE_WAITERS:
829154941Sjhb		db_printf("writers\n");
830154941Sjhb		break;
831154941Sjhb	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
832154941Sjhb		db_printf("readers and waiters\n");
833154941Sjhb		break;
834154941Sjhb	default:
835154941Sjhb		db_printf("none\n");
836154941Sjhb		break;
837154941Sjhb	}
838154941Sjhb}
839154941Sjhb
840154941Sjhb#endif
841