kern_rwlock.c revision 193037
1219820Sjeff/*-
2219820Sjeff * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3219820Sjeff * All rights reserved.
4219820Sjeff *
5219820Sjeff * Redistribution and use in source and binary forms, with or without
6219820Sjeff * modification, are permitted provided that the following conditions
7219820Sjeff * are met:
8219820Sjeff * 1. Redistributions of source code must retain the above copyright
9219820Sjeff *    notice, this list of conditions and the following disclaimer.
10219820Sjeff * 2. Redistributions in binary form must reproduce the above copyright
11219820Sjeff *    notice, this list of conditions and the following disclaimer in the
12219820Sjeff *    documentation and/or other materials provided with the distribution.
13219820Sjeff * 3. Neither the name of the author nor the names of any co-contributors
14219820Sjeff *    may be used to endorse or promote products derived from this software
15219820Sjeff *    without specific prior written permission.
16219820Sjeff *
17219820Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18219820Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19219820Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20219820Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21219820Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22219820Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23219820Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24219820Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25219820Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26219820Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27219820Sjeff * SUCH DAMAGE.
28219820Sjeff */
29219820Sjeff
30219820Sjeff/*
31219820Sjeff * Machine independent bits of reader/writer lock implementation.
32219820Sjeff */
33219820Sjeff
34219820Sjeff#include <sys/cdefs.h>
35219820Sjeff__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 193037 2009-05-29 14:03:34Z jhb $");
36219820Sjeff
37219820Sjeff#include "opt_ddb.h"
38219820Sjeff#include "opt_kdtrace.h"
39219820Sjeff#include "opt_no_adaptive_rwlocks.h"
40219820Sjeff
41219820Sjeff#include <sys/param.h>
42219820Sjeff#include <sys/ktr.h>
43219820Sjeff#include <sys/kernel.h>
44219820Sjeff#include <sys/lock.h>
45219820Sjeff#include <sys/mutex.h>
46219820Sjeff#include <sys/proc.h>
47219820Sjeff#include <sys/rwlock.h>
48219820Sjeff#include <sys/sysctl.h>
49219820Sjeff#include <sys/systm.h>
50219820Sjeff#include <sys/turnstile.h>
51219820Sjeff
52219820Sjeff#include <machine/cpu.h>
53219820Sjeff
54219820SjeffCTASSERT((RW_RECURSE & LO_CLASSFLAGS) == RW_RECURSE);
55219820Sjeff
56219820Sjeff#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
57219820Sjeff#define	ADAPTIVE_RWLOCKS
58219820Sjeff#endif
59219820Sjeff
60219820Sjeff#ifdef ADAPTIVE_RWLOCKS
61219820Sjeffstatic int rowner_retries = 10;
62219820Sjeffstatic int rowner_loops = 10000;
63219820SjeffSYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging");
64219820SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
65219820SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
66219820Sjeff#endif
67219820Sjeff
68219820Sjeff#ifdef DDB
69219820Sjeff#include <ddb/ddb.h>
70219820Sjeff
71219820Sjeffstatic void	db_show_rwlock(struct lock_object *lock);
72219820Sjeff#endif
73219820Sjeffstatic void	assert_rw(struct lock_object *lock, int what);
74219820Sjeffstatic void	lock_rw(struct lock_object *lock, int how);
75219820Sjeff#ifdef KDTRACE_HOOKS
76219820Sjeffstatic int	owner_rw(struct lock_object *lock, struct thread **owner);
77219820Sjeff#endif
78219820Sjeffstatic int	unlock_rw(struct lock_object *lock);
79219820Sjeff
80219820Sjeffstruct lock_class lock_class_rw = {
81219820Sjeff	.lc_name = "rw",
82219820Sjeff	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
83219820Sjeff	.lc_assert = assert_rw,
84219820Sjeff#ifdef DDB
85219820Sjeff	.lc_ddb_show = db_show_rwlock,
86219820Sjeff#endif
87219820Sjeff	.lc_lock = lock_rw,
88219820Sjeff	.lc_unlock = unlock_rw,
89219820Sjeff#ifdef KDTRACE_HOOKS
90219820Sjeff	.lc_owner = owner_rw,
91219820Sjeff#endif
92219820Sjeff};
93219820Sjeff
94219820Sjeff/*
95219820Sjeff * Return a pointer to the owning thread if the lock is write-locked or
96219820Sjeff * NULL if the lock is unlocked or read-locked.
97219820Sjeff */
98219820Sjeff#define	rw_wowner(rw)							\
99219820Sjeff	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
100219820Sjeff	    (struct thread *)RW_OWNER((rw)->rw_lock))
101219820Sjeff
102219820Sjeff/*
103219820Sjeff * Returns if a write owner is recursed.  Write ownership is not assured
104219820Sjeff * here and should be previously checked.
105219820Sjeff */
106219820Sjeff#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
107219820Sjeff
108219820Sjeff/*
109219820Sjeff * Return true if curthread helds the lock.
110219820Sjeff */
111219820Sjeff#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
112219820Sjeff
113219820Sjeff/*
114219820Sjeff * Return a pointer to the owning thread for this lock who should receive
115219820Sjeff * any priority lent by threads that block on this lock.  Currently this
116219820Sjeff * is identical to rw_wowner().
117219820Sjeff */
118219820Sjeff#define	rw_owner(rw)		rw_wowner(rw)
119219820Sjeff
120219820Sjeff#ifndef INVARIANTS
121219820Sjeff#define	_rw_assert(rw, what, file, line)
122219820Sjeff#endif
123219820Sjeff
124219820Sjeffvoid
125219820Sjeffassert_rw(struct lock_object *lock, int what)
126219820Sjeff{
127219820Sjeff
128219820Sjeff	rw_assert((struct rwlock *)lock, what);
129219820Sjeff}
130219820Sjeff
131219820Sjeffvoid
132219820Sjefflock_rw(struct lock_object *lock, int how)
133219820Sjeff{
134219820Sjeff	struct rwlock *rw;
135219820Sjeff
136219820Sjeff	rw = (struct rwlock *)lock;
137219820Sjeff	if (how)
138219820Sjeff		rw_wlock(rw);
139219820Sjeff	else
140219820Sjeff		rw_rlock(rw);
141219820Sjeff}
142219820Sjeff
143219820Sjeffint
144219820Sjeffunlock_rw(struct lock_object *lock)
145219820Sjeff{
146219820Sjeff	struct rwlock *rw;
147219820Sjeff
148219820Sjeff	rw = (struct rwlock *)lock;
149219820Sjeff	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
150219820Sjeff	if (rw->rw_lock & RW_LOCK_READ) {
151219820Sjeff		rw_runlock(rw);
152219820Sjeff		return (0);
153219820Sjeff	} else {
154219820Sjeff		rw_wunlock(rw);
155219820Sjeff		return (1);
156219820Sjeff	}
157219820Sjeff}
158219820Sjeff
159219820Sjeff#ifdef KDTRACE_HOOKS
160219820Sjeffint
161219820Sjeffowner_rw(struct lock_object *lock, struct thread **owner)
162219820Sjeff{
163219820Sjeff	struct rwlock *rw = (struct rwlock *)lock;
164219820Sjeff	uintptr_t x = rw->rw_lock;
165219820Sjeff
166219820Sjeff	*owner = rw_wowner(rw);
167219820Sjeff	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
168219820Sjeff	    (*owner != NULL));
169219820Sjeff}
170219820Sjeff#endif
171219820Sjeff
172219820Sjeffvoid
173219820Sjeffrw_init_flags(struct rwlock *rw, const char *name, int opts)
174219820Sjeff{
175219820Sjeff	int flags;
176219820Sjeff
177219820Sjeff	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
178219820Sjeff	    RW_RECURSE)) == 0);
179219820Sjeff
180219820Sjeff	flags = LO_UPGRADABLE | LO_RECURSABLE;
181219820Sjeff	if (opts & RW_DUPOK)
182219820Sjeff		flags |= LO_DUPOK;
183219820Sjeff	if (opts & RW_NOPROFILE)
184219820Sjeff		flags |= LO_NOPROFILE;
185219820Sjeff	if (!(opts & RW_NOWITNESS))
186219820Sjeff		flags |= LO_WITNESS;
187219820Sjeff	if (opts & RW_QUIET)
188219820Sjeff		flags |= LO_QUIET;
189219820Sjeff	flags |= opts & RW_RECURSE;
190219820Sjeff
191219820Sjeff	rw->rw_lock = RW_UNLOCKED;
192219820Sjeff	rw->rw_recurse = 0;
193219820Sjeff	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
194219820Sjeff}
195219820Sjeff
196219820Sjeffvoid
197219820Sjeffrw_destroy(struct rwlock *rw)
198219820Sjeff{
199219820Sjeff
200219820Sjeff	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
201219820Sjeff	KASSERT(rw->rw_recurse == 0, ("rw lock still recursed"));
202219820Sjeff	rw->rw_lock = RW_DESTROYED;
203219820Sjeff	lock_destroy(&rw->lock_object);
204219820Sjeff}
205219820Sjeff
206219820Sjeffvoid
207219820Sjeffrw_sysinit(void *arg)
208219820Sjeff{
209219820Sjeff	struct rw_args *args = arg;
210219820Sjeff
211219820Sjeff	rw_init(args->ra_rw, args->ra_desc);
212219820Sjeff}
213219820Sjeff
214219820Sjeffvoid
215219820Sjeffrw_sysinit_flags(void *arg)
216219820Sjeff{
217219820Sjeff	struct rw_args_flags *args = arg;
218219820Sjeff
219219820Sjeff	rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags);
220219820Sjeff}
221219820Sjeff
222219820Sjeffint
223219820Sjeffrw_wowned(struct rwlock *rw)
224219820Sjeff{
225219820Sjeff
226219820Sjeff	return (rw_wowner(rw) == curthread);
227219820Sjeff}
228219820Sjeff
229219820Sjeffvoid
230219820Sjeff_rw_wlock(struct rwlock *rw, const char *file, int line)
231219820Sjeff{
232219820Sjeff
233219820Sjeff	MPASS(curthread != NULL);
234219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
235219820Sjeff	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
236219820Sjeff	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
237219820Sjeff	    line, NULL);
238219820Sjeff	__rw_wlock(rw, curthread, file, line);
239219820Sjeff	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
240219820Sjeff	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
241219820Sjeff	curthread->td_locks++;
242219820Sjeff}
243219820Sjeff
244219820Sjeffint
245219820Sjeff_rw_try_wlock(struct rwlock *rw, const char *file, int line)
246219820Sjeff{
247219820Sjeff	int rval;
248219820Sjeff
249219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
250219820Sjeff	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
251219820Sjeff
252219820Sjeff	if (rw_wlocked(rw) && (rw->lock_object.lo_flags & RW_RECURSE) != 0) {
253219820Sjeff		rw->rw_recurse++;
254219820Sjeff		rval = 1;
255219820Sjeff	} else
256219820Sjeff		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
257219820Sjeff		    (uintptr_t)curthread);
258219820Sjeff
259219820Sjeff	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
260219820Sjeff	if (rval) {
261219820Sjeff		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
262219820Sjeff		    file, line);
263219820Sjeff		curthread->td_locks++;
264219820Sjeff	}
265219820Sjeff	return (rval);
266219820Sjeff}
267219820Sjeff
268219820Sjeffvoid
269219820Sjeff_rw_wunlock(struct rwlock *rw, const char *file, int line)
270219820Sjeff{
271219820Sjeff
272219820Sjeff	MPASS(curthread != NULL);
273219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
274219820Sjeff	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
275219820Sjeff	_rw_assert(rw, RA_WLOCKED, file, line);
276219820Sjeff	curthread->td_locks--;
277219820Sjeff	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
278219820Sjeff	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
279219820Sjeff	    line);
280219820Sjeff	if (!rw_recursed(rw))
281219820Sjeff		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
282219820Sjeff	__rw_wunlock(rw, curthread, file, line);
283219820Sjeff}
284219820Sjeff/*
285219820Sjeff * Determines whether a new reader can acquire a lock.  Succeeds if the
286219820Sjeff * reader already owns a read lock and the lock is locked for read to
287219820Sjeff * prevent deadlock from reader recursion.  Also succeeds if the lock
288219820Sjeff * is unlocked and has no writer waiters or spinners.  Failing otherwise
289219820Sjeff * prioritizes writers before readers.
290219820Sjeff */
291219820Sjeff#define	RW_CAN_READ(_rw)						\
292219820Sjeff    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
293219820Sjeff    (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
294219820Sjeff    RW_LOCK_READ)
295219820Sjeff
296219820Sjeffvoid
297219820Sjeff_rw_rlock(struct rwlock *rw, const char *file, int line)
298219820Sjeff{
299219820Sjeff	struct turnstile *ts;
300219820Sjeff#ifdef ADAPTIVE_RWLOCKS
301219820Sjeff	volatile struct thread *owner;
302219820Sjeff	int spintries = 0;
303219820Sjeff	int i;
304219820Sjeff#endif
305219820Sjeff#ifdef LOCK_PROFILING
306219820Sjeff	uint64_t waittime = 0;
307219820Sjeff	int contested = 0;
308219820Sjeff#endif
309219820Sjeff	uintptr_t v;
310219820Sjeff#ifdef KDTRACE_HOOKS
311219820Sjeff	uint64_t spin_cnt = 0;
312219820Sjeff	uint64_t sleep_cnt = 0;
313219820Sjeff	int64_t sleep_time = 0;
314219820Sjeff#endif
315219820Sjeff
316219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
317219820Sjeff	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
318219820Sjeff	KASSERT(rw_wowner(rw) != curthread,
319219820Sjeff	    ("%s (%s): wlock already held @ %s:%d", __func__,
320219820Sjeff	    rw->lock_object.lo_name, file, line));
321219820Sjeff	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
322219820Sjeff
323219820Sjeff	for (;;) {
324219820Sjeff#ifdef KDTRACE_HOOKS
325219820Sjeff		spin_cnt++;
326219820Sjeff#endif
327219820Sjeff		/*
328219820Sjeff		 * Handle the easy case.  If no other thread has a write
329219820Sjeff		 * lock, then try to bump up the count of read locks.  Note
330219820Sjeff		 * that we have to preserve the current state of the
331219820Sjeff		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
332219820Sjeff		 * read lock, then rw_lock must have changed, so restart
333219820Sjeff		 * the loop.  Note that this handles the case of a
334219820Sjeff		 * completely unlocked rwlock since such a lock is encoded
335219820Sjeff		 * as a read lock with no waiters.
336219820Sjeff		 */
337219820Sjeff		v = rw->rw_lock;
338219820Sjeff		if (RW_CAN_READ(v)) {
339219820Sjeff			/*
340219820Sjeff			 * The RW_LOCK_READ_WAITERS flag should only be set
341219820Sjeff			 * if the lock has been unlocked and write waiters
342219820Sjeff			 * were present.
343219820Sjeff			 */
344219820Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
345219820Sjeff			    v + RW_ONE_READER)) {
346219820Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
347219820Sjeff					CTR4(KTR_LOCK,
348219820Sjeff					    "%s: %p succeed %p -> %p", __func__,
349219820Sjeff					    rw, (void *)v,
350219820Sjeff					    (void *)(v + RW_ONE_READER));
351219820Sjeff				break;
352219820Sjeff			}
353219820Sjeff			continue;
354219820Sjeff		}
355219820Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
356219820Sjeff		    &contested, &waittime);
357219820Sjeff
358219820Sjeff#ifdef ADAPTIVE_RWLOCKS
359219820Sjeff		/*
360219820Sjeff		 * If the owner is running on another CPU, spin until
361219820Sjeff		 * the owner stops running or the state of the lock
362219820Sjeff		 * changes.
363219820Sjeff		 */
364219820Sjeff		if ((v & RW_LOCK_READ) == 0) {
365219820Sjeff			owner = (struct thread *)RW_OWNER(v);
366219820Sjeff			if (TD_IS_RUNNING(owner)) {
367219820Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
368219820Sjeff					CTR3(KTR_LOCK,
369219820Sjeff					    "%s: spinning on %p held by %p",
370219820Sjeff					    __func__, rw, owner);
371219820Sjeff				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
372219820Sjeff				    owner && TD_IS_RUNNING(owner)) {
373219820Sjeff					cpu_spinwait();
374219820Sjeff#ifdef KDTRACE_HOOKS
375219820Sjeff					spin_cnt++;
376219820Sjeff#endif
377219820Sjeff				}
378219820Sjeff				continue;
379219820Sjeff			}
380219820Sjeff		} else if (spintries < rowner_retries) {
381219820Sjeff			spintries++;
382219820Sjeff			for (i = 0; i < rowner_loops; i++) {
383219820Sjeff				v = rw->rw_lock;
384219820Sjeff				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
385219820Sjeff					break;
386219820Sjeff				cpu_spinwait();
387219820Sjeff			}
388219820Sjeff			if (i != rowner_loops)
389219820Sjeff				continue;
390219820Sjeff		}
391219820Sjeff#endif
392219820Sjeff
393219820Sjeff		/*
394219820Sjeff		 * Okay, now it's the hard case.  Some other thread already
395219820Sjeff		 * has a write lock or there are write waiters present,
396219820Sjeff		 * acquire the turnstile lock so we can begin the process
397219820Sjeff		 * of blocking.
398219820Sjeff		 */
399219820Sjeff		ts = turnstile_trywait(&rw->lock_object);
400219820Sjeff
401219820Sjeff		/*
402219820Sjeff		 * The lock might have been released while we spun, so
403219820Sjeff		 * recheck its state and restart the loop if needed.
404219820Sjeff		 */
405219820Sjeff		v = rw->rw_lock;
406219820Sjeff		if (RW_CAN_READ(v)) {
407219820Sjeff			turnstile_cancel(ts);
408219820Sjeff			continue;
409219820Sjeff		}
410219820Sjeff
411219820Sjeff#ifdef ADAPTIVE_RWLOCKS
412219820Sjeff		/*
413219820Sjeff		 * The current lock owner might have started executing
414219820Sjeff		 * on another CPU (or the lock could have changed
415219820Sjeff		 * owners) while we were waiting on the turnstile
416219820Sjeff		 * chain lock.  If so, drop the turnstile lock and try
417219820Sjeff		 * again.
418219820Sjeff		 */
419219820Sjeff		if ((v & RW_LOCK_READ) == 0) {
420219820Sjeff			owner = (struct thread *)RW_OWNER(v);
421219820Sjeff			if (TD_IS_RUNNING(owner)) {
422219820Sjeff				turnstile_cancel(ts);
423219820Sjeff				continue;
424219820Sjeff			}
425219820Sjeff		}
426219820Sjeff#endif
427219820Sjeff
428219820Sjeff		/*
429219820Sjeff		 * The lock is held in write mode or it already has waiters.
430219820Sjeff		 */
431219820Sjeff		MPASS(!RW_CAN_READ(v));
432219820Sjeff
433219820Sjeff		/*
434219820Sjeff		 * If the RW_LOCK_READ_WAITERS flag is already set, then
435219820Sjeff		 * we can go ahead and block.  If it is not set then try
436219820Sjeff		 * to set it.  If we fail to set it drop the turnstile
437219820Sjeff		 * lock and restart the loop.
438219820Sjeff		 */
439219820Sjeff		if (!(v & RW_LOCK_READ_WAITERS)) {
440219820Sjeff			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
441219820Sjeff			    v | RW_LOCK_READ_WAITERS)) {
442219820Sjeff				turnstile_cancel(ts);
443219820Sjeff				continue;
444219820Sjeff			}
445219820Sjeff			if (LOCK_LOG_TEST(&rw->lock_object, 0))
446219820Sjeff				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
447219820Sjeff				    __func__, rw);
448219820Sjeff		}
449219820Sjeff
450219820Sjeff		/*
451219820Sjeff		 * We were unable to acquire the lock and the read waiters
452219820Sjeff		 * flag is set, so we must block on the turnstile.
453219820Sjeff		 */
454219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
455219820Sjeff			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
456219820Sjeff			    rw);
457219820Sjeff#ifdef KDTRACE_HOOKS
458219820Sjeff		sleep_time -= lockstat_nsecs();
459219820Sjeff#endif
460219820Sjeff		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
461219820Sjeff#ifdef KDTRACE_HOOKS
462219820Sjeff		sleep_time += lockstat_nsecs();
463219820Sjeff		sleep_cnt++;
464219820Sjeff#endif
465219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
466219820Sjeff			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
467219820Sjeff			    __func__, rw);
468219820Sjeff	}
469219820Sjeff
470219820Sjeff	/*
471219820Sjeff	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
472219820Sjeff	 * however.  turnstiles don't like owners changing between calls to
473219820Sjeff	 * turnstile_wait() currently.
474219820Sjeff	 */
475219820Sjeff	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
476219820Sjeff	    waittime, file, line);
477219820Sjeff	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
478219820Sjeff	WITNESS_LOCK(&rw->lock_object, 0, file, line);
479219820Sjeff	curthread->td_locks++;
480219820Sjeff	curthread->td_rw_rlocks++;
481219820Sjeff#ifdef KDTRACE_HOOKS
482219820Sjeff	if (sleep_time)
483219820Sjeff		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
484219820Sjeff
485219820Sjeff	/*
486219820Sjeff	 * Record only the loops spinning and not sleeping.
487219820Sjeff	 */
488219820Sjeff	if (spin_cnt > sleep_cnt)
489219820Sjeff		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
490219820Sjeff#endif
491219820Sjeff}
492219820Sjeff
493219820Sjeffint
494219820Sjeff_rw_try_rlock(struct rwlock *rw, const char *file, int line)
495219820Sjeff{
496219820Sjeff	uintptr_t x;
497219820Sjeff
498219820Sjeff	for (;;) {
499219820Sjeff		x = rw->rw_lock;
500219820Sjeff		KASSERT(rw->rw_lock != RW_DESTROYED,
501219820Sjeff		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
502219820Sjeff		if (!(x & RW_LOCK_READ))
503219820Sjeff			break;
504219820Sjeff		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
505219820Sjeff			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
506219820Sjeff			    line);
507219820Sjeff			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
508219820Sjeff			curthread->td_locks++;
509219820Sjeff			curthread->td_rw_rlocks++;
510219820Sjeff			return (1);
511219820Sjeff		}
512219820Sjeff	}
513219820Sjeff
514219820Sjeff	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
515219820Sjeff	return (0);
516219820Sjeff}
517219820Sjeff
518219820Sjeffvoid
519219820Sjeff_rw_runlock(struct rwlock *rw, const char *file, int line)
520219820Sjeff{
521219820Sjeff	struct turnstile *ts;
522219820Sjeff	uintptr_t x, v, queue;
523219820Sjeff
524219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
525219820Sjeff	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
526219820Sjeff	_rw_assert(rw, RA_RLOCKED, file, line);
527219820Sjeff	curthread->td_locks--;
528219820Sjeff	curthread->td_rw_rlocks--;
529219820Sjeff	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
530219820Sjeff	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
531219820Sjeff
532219820Sjeff	/* TODO: drop "owner of record" here. */
533219820Sjeff
534219820Sjeff	for (;;) {
535219820Sjeff		/*
536219820Sjeff		 * See if there is more than one read lock held.  If so,
537219820Sjeff		 * just drop one and return.
538219820Sjeff		 */
539219820Sjeff		x = rw->rw_lock;
540219820Sjeff		if (RW_READERS(x) > 1) {
541219820Sjeff			if (atomic_cmpset_ptr(&rw->rw_lock, x,
542219820Sjeff			    x - RW_ONE_READER)) {
543219820Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
544219820Sjeff					CTR4(KTR_LOCK,
545219820Sjeff					    "%s: %p succeeded %p -> %p",
546219820Sjeff					    __func__, rw, (void *)x,
547219820Sjeff					    (void *)(x - RW_ONE_READER));
548219820Sjeff				break;
549219820Sjeff			}
550219820Sjeff			continue;
551219820Sjeff		}
552219820Sjeff		/*
553219820Sjeff		 * If there aren't any waiters for a write lock, then try
554219820Sjeff		 * to drop it quickly.
555219820Sjeff		 */
556219820Sjeff		if (!(x & RW_LOCK_WAITERS)) {
557219820Sjeff			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
558219820Sjeff			    RW_READERS_LOCK(1));
559219820Sjeff			if (atomic_cmpset_ptr(&rw->rw_lock, x, RW_UNLOCKED)) {
560219820Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
561219820Sjeff					CTR2(KTR_LOCK, "%s: %p last succeeded",
562219820Sjeff					    __func__, rw);
563219820Sjeff				break;
564219820Sjeff			}
565219820Sjeff			continue;
566219820Sjeff		}
567219820Sjeff		/*
568219820Sjeff		 * Ok, we know we have waiters and we think we are the
569219820Sjeff		 * last reader, so grab the turnstile lock.
570219820Sjeff		 */
571219820Sjeff		turnstile_chain_lock(&rw->lock_object);
572219820Sjeff		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
573219820Sjeff		MPASS(v & RW_LOCK_WAITERS);
574219820Sjeff
575219820Sjeff		/*
576219820Sjeff		 * Try to drop our lock leaving the lock in a unlocked
577219820Sjeff		 * state.
578219820Sjeff		 *
579219820Sjeff		 * If you wanted to do explicit lock handoff you'd have to
580219820Sjeff		 * do it here.  You'd also want to use turnstile_signal()
581219820Sjeff		 * and you'd have to handle the race where a higher
582219820Sjeff		 * priority thread blocks on the write lock before the
583219820Sjeff		 * thread you wakeup actually runs and have the new thread
584219820Sjeff		 * "steal" the lock.  For now it's a lot simpler to just
585219820Sjeff		 * wakeup all of the waiters.
586219820Sjeff		 *
587219820Sjeff		 * As above, if we fail, then another thread might have
588219820Sjeff		 * acquired a read lock, so drop the turnstile lock and
589219820Sjeff		 * restart.
590219820Sjeff		 */
591219820Sjeff		x = RW_UNLOCKED;
592219820Sjeff		if (v & RW_LOCK_WRITE_WAITERS) {
593219820Sjeff			queue = TS_EXCLUSIVE_QUEUE;
594219820Sjeff			x |= (v & RW_LOCK_READ_WAITERS);
595219820Sjeff		} else
596219820Sjeff			queue = TS_SHARED_QUEUE;
597219820Sjeff		if (!atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
598219820Sjeff		    x)) {
599219820Sjeff			turnstile_chain_unlock(&rw->lock_object);
600219820Sjeff			continue;
601219820Sjeff		}
602219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
603219820Sjeff			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
604219820Sjeff			    __func__, rw);
605219820Sjeff
606219820Sjeff		/*
607219820Sjeff		 * Ok.  The lock is released and all that's left is to
608219820Sjeff		 * wake up the waiters.  Note that the lock might not be
609219820Sjeff		 * free anymore, but in that case the writers will just
610219820Sjeff		 * block again if they run before the new lock holder(s)
611219820Sjeff		 * release the lock.
612219820Sjeff		 */
613219820Sjeff		ts = turnstile_lookup(&rw->lock_object);
614219820Sjeff		MPASS(ts != NULL);
615219820Sjeff		turnstile_broadcast(ts, queue);
616219820Sjeff		turnstile_unpend(ts, TS_SHARED_LOCK);
617219820Sjeff		turnstile_chain_unlock(&rw->lock_object);
618219820Sjeff		break;
619219820Sjeff	}
620219820Sjeff	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
621219820Sjeff}
622219820Sjeff
623219820Sjeff/*
624219820Sjeff * This function is called when we are unable to obtain a write lock on the
625219820Sjeff * first try.  This means that at least one other thread holds either a
626219820Sjeff * read or write lock.
627219820Sjeff */
628219820Sjeffvoid
629219820Sjeff_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
630219820Sjeff{
631219820Sjeff	struct turnstile *ts;
632219820Sjeff#ifdef ADAPTIVE_RWLOCKS
633219820Sjeff	volatile struct thread *owner;
634219820Sjeff	int spintries = 0;
635219820Sjeff	int i;
636219820Sjeff#endif
637219820Sjeff	uintptr_t v, x;
638219820Sjeff#ifdef LOCK_PROFILING
639219820Sjeff	uint64_t waittime = 0;
640219820Sjeff	int contested = 0;
641219820Sjeff#endif
642219820Sjeff#ifdef KDTRACE_HOOKS
643219820Sjeff	uint64_t spin_cnt = 0;
644219820Sjeff	uint64_t sleep_cnt = 0;
645219820Sjeff	int64_t sleep_time = 0;
646219820Sjeff#endif
647219820Sjeff
648219820Sjeff	if (rw_wlocked(rw)) {
649219820Sjeff		KASSERT(rw->lock_object.lo_flags & RW_RECURSE,
650219820Sjeff		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
651219820Sjeff		    __func__, rw->lock_object.lo_name, file, line));
652219820Sjeff		rw->rw_recurse++;
653219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
654219820Sjeff			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
655219820Sjeff		return;
656219820Sjeff	}
657219820Sjeff
658219820Sjeff	if (LOCK_LOG_TEST(&rw->lock_object, 0))
659219820Sjeff		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
660219820Sjeff		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
661219820Sjeff
662219820Sjeff	while (!_rw_write_lock(rw, tid)) {
663219820Sjeff#ifdef KDTRACE_HOOKS
664219820Sjeff		spin_cnt++;
665219820Sjeff#endif
666219820Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
667219820Sjeff		    &contested, &waittime);
668219820Sjeff#ifdef ADAPTIVE_RWLOCKS
669219820Sjeff		/*
670219820Sjeff		 * If the lock is write locked and the owner is
671219820Sjeff		 * running on another CPU, spin until the owner stops
672219820Sjeff		 * running or the state of the lock changes.
673219820Sjeff		 */
674219820Sjeff		v = rw->rw_lock;
675219820Sjeff		owner = (struct thread *)RW_OWNER(v);
676219820Sjeff		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
677219820Sjeff			if (LOCK_LOG_TEST(&rw->lock_object, 0))
678219820Sjeff				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
679219820Sjeff				    __func__, rw, owner);
680219820Sjeff			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
681219820Sjeff			    TD_IS_RUNNING(owner)) {
682219820Sjeff				cpu_spinwait();
683219820Sjeff#ifdef KDTRACE_HOOKS
684219820Sjeff				spin_cnt++;
685219820Sjeff#endif
686219820Sjeff			}
687219820Sjeff			continue;
688219820Sjeff		}
689219820Sjeff		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
690219820Sjeff		    spintries < rowner_retries) {
691219820Sjeff			if (!(v & RW_LOCK_WRITE_SPINNER)) {
692219820Sjeff				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
693219820Sjeff				    v | RW_LOCK_WRITE_SPINNER)) {
694219820Sjeff					continue;
695219820Sjeff				}
696219820Sjeff			}
697219820Sjeff			spintries++;
698219820Sjeff			for (i = 0; i < rowner_loops; i++) {
699219820Sjeff				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
700219820Sjeff					break;
701219820Sjeff				cpu_spinwait();
702219820Sjeff			}
703219820Sjeff#ifdef KDTRACE_HOOKS
704219820Sjeff			spin_cnt += rowner_loops - i;
705219820Sjeff#endif
706219820Sjeff			if (i != rowner_loops)
707219820Sjeff				continue;
708219820Sjeff		}
709219820Sjeff#endif
710219820Sjeff		ts = turnstile_trywait(&rw->lock_object);
711219820Sjeff		v = rw->rw_lock;
712219820Sjeff
713219820Sjeff#ifdef ADAPTIVE_RWLOCKS
714219820Sjeff		/*
715219820Sjeff		 * The current lock owner might have started executing
716219820Sjeff		 * on another CPU (or the lock could have changed
717219820Sjeff		 * owners) while we were waiting on the turnstile
718219820Sjeff		 * chain lock.  If so, drop the turnstile lock and try
719219820Sjeff		 * again.
720219820Sjeff		 */
721219820Sjeff		if (!(v & RW_LOCK_READ)) {
722219820Sjeff			owner = (struct thread *)RW_OWNER(v);
723219820Sjeff			if (TD_IS_RUNNING(owner)) {
724219820Sjeff				turnstile_cancel(ts);
725219820Sjeff				continue;
726219820Sjeff			}
727219820Sjeff		}
728219820Sjeff#endif
729219820Sjeff		/*
730219820Sjeff		 * Check for the waiters flags about this rwlock.
731219820Sjeff		 * If the lock was released, without maintain any pending
732219820Sjeff		 * waiters queue, simply try to acquire it.
733219820Sjeff		 * If a pending waiters queue is present, claim the lock
734219820Sjeff		 * ownership and maintain the pending queue.
735219820Sjeff		 */
736219820Sjeff		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
737219820Sjeff		if ((v & ~x) == RW_UNLOCKED) {
738219820Sjeff			x &= ~RW_LOCK_WRITE_SPINNER;
739219820Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
740219820Sjeff				if (x)
741219820Sjeff					turnstile_claim(ts);
742219820Sjeff				else
743219820Sjeff					turnstile_cancel(ts);
744219820Sjeff				break;
745219820Sjeff			}
746219820Sjeff			turnstile_cancel(ts);
747219820Sjeff			continue;
748219820Sjeff		}
749219820Sjeff		/*
750219820Sjeff		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
751219820Sjeff		 * set it.  If we fail to set it, then loop back and try
752219820Sjeff		 * again.
753219820Sjeff		 */
754219820Sjeff		if (!(v & RW_LOCK_WRITE_WAITERS)) {
755219820Sjeff			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
756219820Sjeff			    v | RW_LOCK_WRITE_WAITERS)) {
757219820Sjeff				turnstile_cancel(ts);
758219820Sjeff				continue;
759219820Sjeff			}
760219820Sjeff			if (LOCK_LOG_TEST(&rw->lock_object, 0))
761219820Sjeff				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
762219820Sjeff				    __func__, rw);
763219820Sjeff		}
764219820Sjeff		/*
765219820Sjeff		 * We were unable to acquire the lock and the write waiters
766219820Sjeff		 * flag is set, so we must block on the turnstile.
767219820Sjeff		 */
768219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
769219820Sjeff			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
770219820Sjeff			    rw);
771219820Sjeff#ifdef KDTRACE_HOOKS
772219820Sjeff		sleep_time -= lockstat_nsecs();
773219820Sjeff#endif
774219820Sjeff		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
775219820Sjeff#ifdef KDTRACE_HOOKS
776219820Sjeff		sleep_time += lockstat_nsecs();
777219820Sjeff		sleep_cnt++;
778219820Sjeff#endif
779219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
780219820Sjeff			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
781219820Sjeff			    __func__, rw);
782219820Sjeff#ifdef ADAPTIVE_RWLOCKS
783219820Sjeff		spintries = 0;
784219820Sjeff#endif
785219820Sjeff	}
786219820Sjeff	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
787219820Sjeff	    waittime, file, line);
788219820Sjeff#ifdef KDTRACE_HOOKS
789219820Sjeff	if (sleep_time)
790219820Sjeff		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
791219820Sjeff
792219820Sjeff	/*
793219820Sjeff	 * Record only the loops spinning and not sleeping.
794219820Sjeff	 */
795219820Sjeff	if (spin_cnt > sleep_cnt)
796219820Sjeff		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
797219820Sjeff#endif
798219820Sjeff}
799219820Sjeff
800219820Sjeff/*
801219820Sjeff * This function is called if the first try at releasing a write lock failed.
802219820Sjeff * This means that one of the 2 waiter bits must be set indicating that at
803219820Sjeff * least one thread is waiting on this lock.
804219820Sjeff */
805219820Sjeffvoid
806219820Sjeff_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
807219820Sjeff{
808219820Sjeff	struct turnstile *ts;
809219820Sjeff	uintptr_t v;
810219820Sjeff	int queue;
811219820Sjeff
812219820Sjeff	if (rw_wlocked(rw) && rw_recursed(rw)) {
813219820Sjeff		rw->rw_recurse--;
814219820Sjeff		if (LOCK_LOG_TEST(&rw->lock_object, 0))
815219820Sjeff			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
816219820Sjeff		return;
817219820Sjeff	}
818219820Sjeff
819219820Sjeff	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
820219820Sjeff	    ("%s: neither of the waiter flags are set", __func__));
821219820Sjeff
822219820Sjeff	if (LOCK_LOG_TEST(&rw->lock_object, 0))
823219820Sjeff		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
824219820Sjeff
825219820Sjeff	turnstile_chain_lock(&rw->lock_object);
826219820Sjeff	ts = turnstile_lookup(&rw->lock_object);
827219820Sjeff	MPASS(ts != NULL);
828219820Sjeff
829219820Sjeff	/*
830219820Sjeff	 * Use the same algo as sx locks for now.  Prefer waking up shared
831219820Sjeff	 * waiters if we have any over writers.  This is probably not ideal.
832219820Sjeff	 *
833219820Sjeff	 * 'v' is the value we are going to write back to rw_lock.  If we
834219820Sjeff	 * have waiters on both queues, we need to preserve the state of
835219820Sjeff	 * the waiter flag for the queue we don't wake up.  For now this is
836219820Sjeff	 * hardcoded for the algorithm mentioned above.
837219820Sjeff	 *
838219820Sjeff	 * In the case of both readers and writers waiting we wakeup the
839219820Sjeff	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
840219820Sjeff	 * new writer comes in before a reader it will claim the lock up
841219820Sjeff	 * above.  There is probably a potential priority inversion in
842219820Sjeff	 * there that could be worked around either by waking both queues
843219820Sjeff	 * of waiters or doing some complicated lock handoff gymnastics.
844219820Sjeff	 */
845219820Sjeff	v = RW_UNLOCKED;
846219820Sjeff	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
847219820Sjeff		queue = TS_EXCLUSIVE_QUEUE;
848219820Sjeff		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
849219820Sjeff	} else
850219820Sjeff		queue = TS_SHARED_QUEUE;
851219820Sjeff
852219820Sjeff	/* Wake up all waiters for the specific queue. */
853219820Sjeff	if (LOCK_LOG_TEST(&rw->lock_object, 0))
854219820Sjeff		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
855219820Sjeff		    queue == TS_SHARED_QUEUE ? "read" : "write");
856219820Sjeff	turnstile_broadcast(ts, queue);
857219820Sjeff	atomic_store_rel_ptr(&rw->rw_lock, v);
858219820Sjeff	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
859219820Sjeff	turnstile_chain_unlock(&rw->lock_object);
860219820Sjeff}
861219820Sjeff
862219820Sjeff/*
863219820Sjeff * Attempt to do a non-blocking upgrade from a read lock to a write
864219820Sjeff * lock.  This will only succeed if this thread holds a single read
865219820Sjeff * lock.  Returns true if the upgrade succeeded and false otherwise.
866219820Sjeff */
867219820Sjeffint
868219820Sjeff_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
869219820Sjeff{
870219820Sjeff	uintptr_t v, x, tid;
871219820Sjeff	struct turnstile *ts;
872219820Sjeff	int success;
873219820Sjeff
874219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
875219820Sjeff	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
876219820Sjeff	_rw_assert(rw, RA_RLOCKED, file, line);
877219820Sjeff
878219820Sjeff	/*
879219820Sjeff	 * Attempt to switch from one reader to a writer.  If there
880219820Sjeff	 * are any write waiters, then we will have to lock the
881219820Sjeff	 * turnstile first to prevent races with another writer
882219820Sjeff	 * calling turnstile_wait() before we have claimed this
883219820Sjeff	 * turnstile.  So, do the simple case of no waiters first.
884219820Sjeff	 */
885219820Sjeff	tid = (uintptr_t)curthread;
886219820Sjeff	success = 0;
887219820Sjeff	for (;;) {
888219820Sjeff		v = rw->rw_lock;
889219820Sjeff		if (RW_READERS(v) > 1)
890219820Sjeff			break;
891219820Sjeff		if (!(v & RW_LOCK_WAITERS)) {
892219820Sjeff			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
893219820Sjeff			if (!success)
894219820Sjeff				continue;
895219820Sjeff			break;
896219820Sjeff		}
897219820Sjeff
898219820Sjeff		/*
899219820Sjeff		 * Ok, we think we have waiters, so lock the turnstile.
900219820Sjeff		 */
901219820Sjeff		ts = turnstile_trywait(&rw->lock_object);
902219820Sjeff		v = rw->rw_lock;
903219820Sjeff		if (RW_READERS(v) > 1) {
904219820Sjeff			turnstile_cancel(ts);
905219820Sjeff			break;
906219820Sjeff		}
907219820Sjeff		/*
908219820Sjeff		 * Try to switch from one reader to a writer again.  This time
909219820Sjeff		 * we honor the current state of the waiters flags.
910219820Sjeff		 * If we obtain the lock with the flags set, then claim
911219820Sjeff		 * ownership of the turnstile.
912219820Sjeff		 */
913219820Sjeff		x = rw->rw_lock & RW_LOCK_WAITERS;
914219820Sjeff		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
915219820Sjeff		if (success) {
916219820Sjeff			if (x)
917219820Sjeff				turnstile_claim(ts);
918219820Sjeff			else
919219820Sjeff				turnstile_cancel(ts);
920219820Sjeff			break;
921219820Sjeff		}
922219820Sjeff		turnstile_cancel(ts);
923219820Sjeff	}
924219820Sjeff	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
925219820Sjeff	if (success) {
926219820Sjeff		curthread->td_rw_rlocks--;
927219820Sjeff		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
928219820Sjeff		    file, line);
929219820Sjeff		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
930219820Sjeff	}
931219820Sjeff	return (success);
932219820Sjeff}
933219820Sjeff
934219820Sjeff/*
935219820Sjeff * Downgrade a write lock into a single read lock.
936219820Sjeff */
937219820Sjeffvoid
938219820Sjeff_rw_downgrade(struct rwlock *rw, const char *file, int line)
939219820Sjeff{
940219820Sjeff	struct turnstile *ts;
941219820Sjeff	uintptr_t tid, v;
942219820Sjeff	int rwait, wwait;
943219820Sjeff
944219820Sjeff	KASSERT(rw->rw_lock != RW_DESTROYED,
945219820Sjeff	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
946219820Sjeff	_rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
947219820Sjeff#ifndef INVARIANTS
948219820Sjeff	if (rw_recursed(rw))
949219820Sjeff		panic("downgrade of a recursed lock");
950219820Sjeff#endif
951219820Sjeff
952219820Sjeff	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
953219820Sjeff
954219820Sjeff	/*
955219820Sjeff	 * Convert from a writer to a single reader.  First we handle
956219820Sjeff	 * the easy case with no waiters.  If there are any waiters, we
957219820Sjeff	 * lock the turnstile and "disown" the lock.
958219820Sjeff	 */
959219820Sjeff	tid = (uintptr_t)curthread;
960219820Sjeff	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
961219820Sjeff		goto out;
962219820Sjeff
963219820Sjeff	/*
964219820Sjeff	 * Ok, we think we have waiters, so lock the turnstile so we can
965219820Sjeff	 * read the waiter flags without any races.
966219820Sjeff	 */
967219820Sjeff	turnstile_chain_lock(&rw->lock_object);
968219820Sjeff	v = rw->rw_lock & RW_LOCK_WAITERS;
969219820Sjeff	rwait = v & RW_LOCK_READ_WAITERS;
970219820Sjeff	wwait = v & RW_LOCK_WRITE_WAITERS;
971219820Sjeff	MPASS(rwait | wwait);
972219820Sjeff
973219820Sjeff	/*
974219820Sjeff	 * Downgrade from a write lock while preserving waiters flag
975219820Sjeff	 * and give up ownership of the turnstile.
976219820Sjeff	 */
977219820Sjeff	ts = turnstile_lookup(&rw->lock_object);
978219820Sjeff	MPASS(ts != NULL);
979219820Sjeff	if (!wwait)
980219820Sjeff		v &= ~RW_LOCK_READ_WAITERS;
981219820Sjeff	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
982219820Sjeff	/*
983219820Sjeff	 * Wake other readers if there are no writers pending.  Otherwise they
984219820Sjeff	 * won't be able to acquire the lock anyway.
985219820Sjeff	 */
986219820Sjeff	if (rwait && !wwait) {
987219820Sjeff		turnstile_broadcast(ts, TS_SHARED_QUEUE);
988219820Sjeff		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
989219820Sjeff	} else
990219820Sjeff		turnstile_disown(ts);
991219820Sjeff	turnstile_chain_unlock(&rw->lock_object);
992219820Sjeffout:
993219820Sjeff	curthread->td_rw_rlocks++;
994219820Sjeff	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
995219820Sjeff	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
996219820Sjeff}
997219820Sjeff
998219820Sjeff#ifdef INVARIANT_SUPPORT
999219820Sjeff#ifndef INVARIANTS
1000219820Sjeff#undef _rw_assert
1001219820Sjeff#endif
1002219820Sjeff
1003219820Sjeff/*
1004219820Sjeff * In the non-WITNESS case, rw_assert() can only detect that at least
1005219820Sjeff * *some* thread owns an rlock, but it cannot guarantee that *this*
1006219820Sjeff * thread owns an rlock.
1007219820Sjeff */
1008219820Sjeffvoid
1009219820Sjeff_rw_assert(struct rwlock *rw, int what, const char *file, int line)
1010219820Sjeff{
1011219820Sjeff
1012219820Sjeff	if (panicstr != NULL)
1013219820Sjeff		return;
1014219820Sjeff	switch (what) {
1015219820Sjeff	case RA_LOCKED:
1016219820Sjeff	case RA_LOCKED | RA_RECURSED:
1017219820Sjeff	case RA_LOCKED | RA_NOTRECURSED:
1018219820Sjeff	case RA_RLOCKED:
1019219820Sjeff#ifdef WITNESS
1020219820Sjeff		witness_assert(&rw->lock_object, what, file, line);
1021219820Sjeff#else
1022219820Sjeff		/*
1023219820Sjeff		 * If some other thread has a write lock or we have one
1024219820Sjeff		 * and are asserting a read lock, fail.  Also, if no one
1025219820Sjeff		 * has a lock at all, fail.
1026219820Sjeff		 */
1027219820Sjeff		if (rw->rw_lock == RW_UNLOCKED ||
1028219820Sjeff		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
1029219820Sjeff		    rw_wowner(rw) != curthread)))
1030219820Sjeff			panic("Lock %s not %slocked @ %s:%d\n",
1031219820Sjeff			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
1032219820Sjeff			    "read " : "", file, line);
1033219820Sjeff
1034219820Sjeff		if (!(rw->rw_lock & RW_LOCK_READ)) {
1035219820Sjeff			if (rw_recursed(rw)) {
1036219820Sjeff				if (what & RA_NOTRECURSED)
1037219820Sjeff					panic("Lock %s recursed @ %s:%d\n",
1038219820Sjeff					    rw->lock_object.lo_name, file,
1039219820Sjeff					    line);
1040219820Sjeff			} else if (what & RA_RECURSED)
1041219820Sjeff				panic("Lock %s not recursed @ %s:%d\n",
1042219820Sjeff				    rw->lock_object.lo_name, file, line);
1043219820Sjeff		}
1044219820Sjeff#endif
1045219820Sjeff		break;
1046219820Sjeff	case RA_WLOCKED:
1047219820Sjeff	case RA_WLOCKED | RA_RECURSED:
1048219820Sjeff	case RA_WLOCKED | RA_NOTRECURSED:
1049219820Sjeff		if (rw_wowner(rw) != curthread)
1050219820Sjeff			panic("Lock %s not exclusively locked @ %s:%d\n",
1051219820Sjeff			    rw->lock_object.lo_name, file, line);
1052219820Sjeff		if (rw_recursed(rw)) {
1053219820Sjeff			if (what & RA_NOTRECURSED)
1054219820Sjeff				panic("Lock %s recursed @ %s:%d\n",
1055219820Sjeff				    rw->lock_object.lo_name, file, line);
1056219820Sjeff		} else if (what & RA_RECURSED)
1057219820Sjeff			panic("Lock %s not recursed @ %s:%d\n",
1058219820Sjeff			    rw->lock_object.lo_name, file, line);
1059219820Sjeff		break;
1060219820Sjeff	case RA_UNLOCKED:
1061219820Sjeff#ifdef WITNESS
1062219820Sjeff		witness_assert(&rw->lock_object, what, file, line);
1063219820Sjeff#else
1064219820Sjeff		/*
1065219820Sjeff		 * If we hold a write lock fail.  We can't reliably check
1066219820Sjeff		 * to see if we hold a read lock or not.
1067219820Sjeff		 */
1068219820Sjeff		if (rw_wowner(rw) == curthread)
1069219820Sjeff			panic("Lock %s exclusively locked @ %s:%d\n",
1070219820Sjeff			    rw->lock_object.lo_name, file, line);
1071219820Sjeff#endif
1072219820Sjeff		break;
1073219820Sjeff	default:
1074219820Sjeff		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1075219820Sjeff		    line);
1076219820Sjeff	}
1077219820Sjeff}
1078219820Sjeff#endif /* INVARIANT_SUPPORT */
1079219820Sjeff
1080219820Sjeff#ifdef DDB
1081219820Sjeffvoid
1082219820Sjeffdb_show_rwlock(struct lock_object *lock)
1083219820Sjeff{
1084219820Sjeff	struct rwlock *rw;
1085219820Sjeff	struct thread *td;
1086219820Sjeff
1087219820Sjeff	rw = (struct rwlock *)lock;
1088219820Sjeff
1089219820Sjeff	db_printf(" state: ");
1090219820Sjeff	if (rw->rw_lock == RW_UNLOCKED)
1091219820Sjeff		db_printf("UNLOCKED\n");
1092219820Sjeff	else if (rw->rw_lock == RW_DESTROYED) {
1093219820Sjeff		db_printf("DESTROYED\n");
1094219820Sjeff		return;
1095219820Sjeff	} else if (rw->rw_lock & RW_LOCK_READ)
1096219820Sjeff		db_printf("RLOCK: %ju locks\n",
1097219820Sjeff		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1098219820Sjeff	else {
1099219820Sjeff		td = rw_wowner(rw);
1100219820Sjeff		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1101219820Sjeff		    td->td_tid, td->td_proc->p_pid, td->td_name);
1102219820Sjeff		if (rw_recursed(rw))
1103219820Sjeff			db_printf(" recursed: %u\n", rw->rw_recurse);
1104219820Sjeff	}
1105219820Sjeff	db_printf(" waiters: ");
1106219820Sjeff	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1107219820Sjeff	case RW_LOCK_READ_WAITERS:
1108219820Sjeff		db_printf("readers\n");
1109219820Sjeff		break;
1110219820Sjeff	case RW_LOCK_WRITE_WAITERS:
1111219820Sjeff		db_printf("writers\n");
1112219820Sjeff		break;
1113219820Sjeff	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1114219820Sjeff		db_printf("readers and writers\n");
1115219820Sjeff		break;
1116219820Sjeff	default:
1117219820Sjeff		db_printf("none\n");
1118219820Sjeff		break;
1119219820Sjeff	}
1120219820Sjeff}
1121219820Sjeff
1122219820Sjeff#endif
1123219820Sjeff