kern_rwlock.c revision 192853
1154941Sjhb/*-
2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3154941Sjhb * All rights reserved.
4154941Sjhb *
5154941Sjhb * Redistribution and use in source and binary forms, with or without
6154941Sjhb * modification, are permitted provided that the following conditions
7154941Sjhb * are met:
8154941Sjhb * 1. Redistributions of source code must retain the above copyright
9154941Sjhb *    notice, this list of conditions and the following disclaimer.
10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11154941Sjhb *    notice, this list of conditions and the following disclaimer in the
12154941Sjhb *    documentation and/or other materials provided with the distribution.
13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14154941Sjhb *    may be used to endorse or promote products derived from this software
15154941Sjhb *    without specific prior written permission.
16154941Sjhb *
17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20154941Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27154941Sjhb * SUCH DAMAGE.
28154941Sjhb */
29154941Sjhb
30154941Sjhb/*
31154941Sjhb * Machine independent bits of reader/writer lock implementation.
32154941Sjhb */
33154941Sjhb
34154941Sjhb#include <sys/cdefs.h>
35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 192853 2009-05-26 20:28:22Z sson $");
36154941Sjhb
37154941Sjhb#include "opt_ddb.h"
38192853Ssson#include "opt_kdtrace.h"
39167801Sjhb#include "opt_no_adaptive_rwlocks.h"
40154941Sjhb
41154941Sjhb#include <sys/param.h>
42154941Sjhb#include <sys/ktr.h>
43177912Sjeff#include <sys/kernel.h>
44154941Sjhb#include <sys/lock.h>
45154941Sjhb#include <sys/mutex.h>
46154941Sjhb#include <sys/proc.h>
47154941Sjhb#include <sys/rwlock.h>
48177912Sjeff#include <sys/sysctl.h>
49154941Sjhb#include <sys/systm.h>
50154941Sjhb#include <sys/turnstile.h>
51171516Sattilio
52154941Sjhb#include <machine/cpu.h>
53154941Sjhb
54171052SattilioCTASSERT((RW_RECURSE & LO_CLASSFLAGS) == RW_RECURSE);
55171052Sattilio
56167801Sjhb#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
57167801Sjhb#define	ADAPTIVE_RWLOCKS
58167801Sjhb#endif
59167801Sjhb
60177912Sjeff#ifdef ADAPTIVE_RWLOCKS
61177912Sjeffstatic int rowner_retries = 10;
62177912Sjeffstatic int rowner_loops = 10000;
63177912SjeffSYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, "rwlock debugging");
64177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
65177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
66177912Sjeff#endif
67177912Sjeff
68154941Sjhb#ifdef DDB
69154941Sjhb#include <ddb/ddb.h>
70154941Sjhb
71154941Sjhbstatic void	db_show_rwlock(struct lock_object *lock);
72154941Sjhb#endif
73173733Sattiliostatic void	assert_rw(struct lock_object *lock, int what);
74167368Sjhbstatic void	lock_rw(struct lock_object *lock, int how);
75192853Ssson#ifdef KDTRACE_HOOKS
76192853Sssonstatic int	owner_rw(struct lock_object *lock, struct thread **owner);
77192853Ssson#endif
78167368Sjhbstatic int	unlock_rw(struct lock_object *lock);
79154941Sjhb
80154941Sjhbstruct lock_class lock_class_rw = {
81167365Sjhb	.lc_name = "rw",
82167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
83173733Sattilio	.lc_assert = assert_rw,
84154941Sjhb#ifdef DDB
85167365Sjhb	.lc_ddb_show = db_show_rwlock,
86154941Sjhb#endif
87167368Sjhb	.lc_lock = lock_rw,
88167368Sjhb	.lc_unlock = unlock_rw,
89192853Ssson#ifdef KDTRACE_HOOKS
90192853Ssson	.lc_owner = owner_rw,
91192853Ssson#endif
92154941Sjhb};
93154941Sjhb
94157826Sjhb/*
95157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or
96157826Sjhb * NULL if the lock is unlocked or read-locked.
97157826Sjhb */
98157826Sjhb#define	rw_wowner(rw)							\
99154941Sjhb	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
100154941Sjhb	    (struct thread *)RW_OWNER((rw)->rw_lock))
101154941Sjhb
102157826Sjhb/*
103171052Sattilio * Returns if a write owner is recursed.  Write ownership is not assured
104171052Sattilio * here and should be previously checked.
105171052Sattilio */
106171052Sattilio#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
107171052Sattilio
108171052Sattilio/*
109171052Sattilio * Return true if curthread helds the lock.
110171052Sattilio */
111171052Sattilio#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
112171052Sattilio
113171052Sattilio/*
114157826Sjhb * Return a pointer to the owning thread for this lock who should receive
115157826Sjhb * any priority lent by threads that block on this lock.  Currently this
116157826Sjhb * is identical to rw_wowner().
117157826Sjhb */
118157826Sjhb#define	rw_owner(rw)		rw_wowner(rw)
119157826Sjhb
120154941Sjhb#ifndef INVARIANTS
121154941Sjhb#define	_rw_assert(rw, what, file, line)
122154941Sjhb#endif
123154941Sjhb
124154941Sjhbvoid
125173733Sattilioassert_rw(struct lock_object *lock, int what)
126173733Sattilio{
127173733Sattilio
128173733Sattilio	rw_assert((struct rwlock *)lock, what);
129173733Sattilio}
130173733Sattilio
131173733Sattiliovoid
132167368Sjhblock_rw(struct lock_object *lock, int how)
133167368Sjhb{
134167368Sjhb	struct rwlock *rw;
135167368Sjhb
136167368Sjhb	rw = (struct rwlock *)lock;
137167368Sjhb	if (how)
138167368Sjhb		rw_wlock(rw);
139167368Sjhb	else
140167368Sjhb		rw_rlock(rw);
141167368Sjhb}
142167368Sjhb
143167368Sjhbint
144167368Sjhbunlock_rw(struct lock_object *lock)
145167368Sjhb{
146167368Sjhb	struct rwlock *rw;
147167368Sjhb
148167368Sjhb	rw = (struct rwlock *)lock;
149167368Sjhb	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
150167368Sjhb	if (rw->rw_lock & RW_LOCK_READ) {
151167368Sjhb		rw_runlock(rw);
152167368Sjhb		return (0);
153167368Sjhb	} else {
154167368Sjhb		rw_wunlock(rw);
155167368Sjhb		return (1);
156167368Sjhb	}
157167368Sjhb}
158167368Sjhb
159192853Ssson#ifdef KDTRACE_HOOKS
160192853Sssonint
161192853Sssonowner_rw(struct lock_object *lock, struct thread **owner)
162192853Ssson{
163192853Ssson	struct rwlock *rw = (struct rwlock *)lock;
164192853Ssson	uintptr_t x = rw->rw_lock;
165192853Ssson
166192853Ssson	*owner = rw_wowner(rw);
167192853Ssson	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
168192853Ssson	    (*owner != NULL));
169192853Ssson}
170192853Ssson#endif
171192853Ssson
172167368Sjhbvoid
173171052Sattiliorw_init_flags(struct rwlock *rw, const char *name, int opts)
174154941Sjhb{
175171052Sattilio	int flags;
176154941Sjhb
177171052Sattilio	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
178171052Sattilio	    RW_RECURSE)) == 0);
179171052Sattilio
180171052Sattilio	flags = LO_UPGRADABLE | LO_RECURSABLE;
181171052Sattilio	if (opts & RW_DUPOK)
182171052Sattilio		flags |= LO_DUPOK;
183171052Sattilio	if (opts & RW_NOPROFILE)
184171052Sattilio		flags |= LO_NOPROFILE;
185171052Sattilio	if (!(opts & RW_NOWITNESS))
186171052Sattilio		flags |= LO_WITNESS;
187171052Sattilio	if (opts & RW_QUIET)
188171052Sattilio		flags |= LO_QUIET;
189171052Sattilio	flags |= opts & RW_RECURSE;
190171052Sattilio
191154941Sjhb	rw->rw_lock = RW_UNLOCKED;
192171052Sattilio	rw->rw_recurse = 0;
193171052Sattilio	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
194154941Sjhb}
195154941Sjhb
196154941Sjhbvoid
197154941Sjhbrw_destroy(struct rwlock *rw)
198154941Sjhb{
199154941Sjhb
200154941Sjhb	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
201171052Sattilio	KASSERT(rw->rw_recurse == 0, ("rw lock still recursed"));
202169394Sjhb	rw->rw_lock = RW_DESTROYED;
203167787Sjhb	lock_destroy(&rw->lock_object);
204154941Sjhb}
205154941Sjhb
206154941Sjhbvoid
207154941Sjhbrw_sysinit(void *arg)
208154941Sjhb{
209154941Sjhb	struct rw_args *args = arg;
210154941Sjhb
211154941Sjhb	rw_init(args->ra_rw, args->ra_desc);
212154941Sjhb}
213154941Sjhb
214185778Skmacyvoid
215185778Skmacyrw_sysinit_flags(void *arg)
216185778Skmacy{
217185778Skmacy	struct rw_args_flags *args = arg;
218185778Skmacy
219185778Skmacy	rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags);
220185778Skmacy}
221185778Skmacy
222167024Srwatsonint
223167024Srwatsonrw_wowned(struct rwlock *rw)
224167024Srwatson{
225167024Srwatson
226167024Srwatson	return (rw_wowner(rw) == curthread);
227167024Srwatson}
228167024Srwatson
229154941Sjhbvoid
230154941Sjhb_rw_wlock(struct rwlock *rw, const char *file, int line)
231154941Sjhb{
232154941Sjhb
233154941Sjhb	MPASS(curthread != NULL);
234169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
235169394Sjhb	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
236167787Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
237182914Sjhb	    line, NULL);
238154941Sjhb	__rw_wlock(rw, curthread, file, line);
239171052Sattilio	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
240167787Sjhb	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
241160771Sjhb	curthread->td_locks++;
242154941Sjhb}
243154941Sjhb
244177843Sattilioint
245177843Sattilio_rw_try_wlock(struct rwlock *rw, const char *file, int line)
246177843Sattilio{
247177843Sattilio	int rval;
248177843Sattilio
249177843Sattilio	KASSERT(rw->rw_lock != RW_DESTROYED,
250177843Sattilio	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
251177843Sattilio
252177843Sattilio	if (rw_wlocked(rw) && (rw->lock_object.lo_flags & RW_RECURSE) != 0) {
253177843Sattilio		rw->rw_recurse++;
254177843Sattilio		rval = 1;
255177843Sattilio	} else
256177843Sattilio		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
257177843Sattilio		    (uintptr_t)curthread);
258177843Sattilio
259177843Sattilio	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
260177843Sattilio	if (rval) {
261177843Sattilio		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
262177843Sattilio		    file, line);
263177843Sattilio		curthread->td_locks++;
264177843Sattilio	}
265177843Sattilio	return (rval);
266177843Sattilio}
267177843Sattilio
268154941Sjhbvoid
269154941Sjhb_rw_wunlock(struct rwlock *rw, const char *file, int line)
270154941Sjhb{
271154941Sjhb
272154941Sjhb	MPASS(curthread != NULL);
273169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
274169394Sjhb	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
275154941Sjhb	_rw_assert(rw, RA_WLOCKED, file, line);
276160771Sjhb	curthread->td_locks--;
277167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
278171052Sattilio	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
279171052Sattilio	    line);
280171052Sattilio	if (!rw_recursed(rw))
281192853Ssson		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
282154941Sjhb	__rw_wunlock(rw, curthread, file, line);
283154941Sjhb}
284176017Sjeff/*
285176017Sjeff * Determines whether a new reader can acquire a lock.  Succeeds if the
286176017Sjeff * reader already owns a read lock and the lock is locked for read to
287176017Sjeff * prevent deadlock from reader recursion.  Also succeeds if the lock
288176017Sjeff * is unlocked and has no writer waiters or spinners.  Failing otherwise
289176017Sjeff * prioritizes writers before readers.
290176017Sjeff */
291176017Sjeff#define	RW_CAN_READ(_rw)						\
292176017Sjeff    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
293176017Sjeff    (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
294176017Sjeff    RW_LOCK_READ)
295154941Sjhb
296154941Sjhbvoid
297154941Sjhb_rw_rlock(struct rwlock *rw, const char *file, int line)
298154941Sjhb{
299170295Sjeff	struct turnstile *ts;
300167801Sjhb#ifdef ADAPTIVE_RWLOCKS
301157846Sjhb	volatile struct thread *owner;
302177912Sjeff	int spintries = 0;
303177912Sjeff	int i;
304157851Swkoszek#endif
305189846Sjeff#ifdef LOCK_PROFILING
306167307Sjhb	uint64_t waittime = 0;
307167054Skmacy	int contested = 0;
308189846Sjeff#endif
309176017Sjeff	uintptr_t v;
310192853Ssson#ifdef KDTRACE_HOOKS
311192853Ssson	uint64_t spin_cnt = 0;
312192853Ssson	uint64_t sleep_cnt = 0;
313192853Ssson	int64_t sleep_time = 0;
314192853Ssson#endif
315154941Sjhb
316169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
317169394Sjhb	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
318157826Sjhb	KASSERT(rw_wowner(rw) != curthread,
319154941Sjhb	    ("%s (%s): wlock already held @ %s:%d", __func__,
320167787Sjhb	    rw->lock_object.lo_name, file, line));
321182914Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
322154941Sjhb
323154941Sjhb	for (;;) {
324192853Ssson#ifdef KDTRACE_HOOKS
325192853Ssson		spin_cnt++;
326192853Ssson#endif
327154941Sjhb		/*
328154941Sjhb		 * Handle the easy case.  If no other thread has a write
329154941Sjhb		 * lock, then try to bump up the count of read locks.  Note
330154941Sjhb		 * that we have to preserve the current state of the
331154941Sjhb		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
332154941Sjhb		 * read lock, then rw_lock must have changed, so restart
333154941Sjhb		 * the loop.  Note that this handles the case of a
334154941Sjhb		 * completely unlocked rwlock since such a lock is encoded
335154941Sjhb		 * as a read lock with no waiters.
336154941Sjhb		 */
337176017Sjeff		v = rw->rw_lock;
338176017Sjeff		if (RW_CAN_READ(v)) {
339154941Sjhb			/*
340154941Sjhb			 * The RW_LOCK_READ_WAITERS flag should only be set
341176017Sjeff			 * if the lock has been unlocked and write waiters
342176017Sjeff			 * were present.
343154941Sjhb			 */
344176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
345176017Sjeff			    v + RW_ONE_READER)) {
346167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
347154941Sjhb					CTR4(KTR_LOCK,
348154941Sjhb					    "%s: %p succeed %p -> %p", __func__,
349176017Sjeff					    rw, (void *)v,
350176017Sjeff					    (void *)(v + RW_ONE_READER));
351154941Sjhb				break;
352154941Sjhb			}
353157846Sjhb			cpu_spinwait();
354154941Sjhb			continue;
355154941Sjhb		}
356174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
357174629Sjeff		    &contested, &waittime);
358154941Sjhb
359173960Sattilio#ifdef ADAPTIVE_RWLOCKS
360154941Sjhb		/*
361173960Sattilio		 * If the owner is running on another CPU, spin until
362173960Sattilio		 * the owner stops running or the state of the lock
363173960Sattilio		 * changes.
364173960Sattilio		 */
365176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
366176017Sjeff			owner = (struct thread *)RW_OWNER(v);
367176017Sjeff			if (TD_IS_RUNNING(owner)) {
368176017Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
369176017Sjeff					CTR3(KTR_LOCK,
370176017Sjeff					    "%s: spinning on %p held by %p",
371176017Sjeff					    __func__, rw, owner);
372176017Sjeff				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
373192853Ssson				    owner && TD_IS_RUNNING(owner)) {
374176017Sjeff					cpu_spinwait();
375192853Ssson#ifdef KDTRACE_HOOKS
376192853Ssson					spin_cnt++;
377192853Ssson#endif
378192853Ssson				}
379176017Sjeff				continue;
380176017Sjeff			}
381177912Sjeff		} else if (spintries < rowner_retries) {
382177912Sjeff			spintries++;
383177912Sjeff			for (i = 0; i < rowner_loops; i++) {
384177912Sjeff				v = rw->rw_lock;
385177912Sjeff				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
386177912Sjeff					break;
387177912Sjeff				cpu_spinwait();
388177912Sjeff			}
389177912Sjeff			if (i != rowner_loops)
390177912Sjeff				continue;
391173960Sattilio		}
392173960Sattilio#endif
393173960Sattilio
394173960Sattilio		/*
395154941Sjhb		 * Okay, now it's the hard case.  Some other thread already
396176017Sjeff		 * has a write lock or there are write waiters present,
397176017Sjeff		 * acquire the turnstile lock so we can begin the process
398176017Sjeff		 * of blocking.
399154941Sjhb		 */
400170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
401154941Sjhb
402154941Sjhb		/*
403154941Sjhb		 * The lock might have been released while we spun, so
404176017Sjeff		 * recheck its state and restart the loop if needed.
405154941Sjhb		 */
406176017Sjeff		v = rw->rw_lock;
407176017Sjeff		if (RW_CAN_READ(v)) {
408170295Sjeff			turnstile_cancel(ts);
409157846Sjhb			cpu_spinwait();
410154941Sjhb			continue;
411154941Sjhb		}
412154941Sjhb
413173960Sattilio#ifdef ADAPTIVE_RWLOCKS
414154941Sjhb		/*
415173960Sattilio		 * If the current owner of the lock is executing on another
416173960Sattilio		 * CPU quit the hard path and try to spin.
417173960Sattilio		 */
418176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
419176017Sjeff			owner = (struct thread *)RW_OWNER(v);
420176017Sjeff			if (TD_IS_RUNNING(owner)) {
421176017Sjeff				turnstile_cancel(ts);
422176017Sjeff				cpu_spinwait();
423176017Sjeff				continue;
424176017Sjeff			}
425173960Sattilio		}
426173960Sattilio#endif
427173960Sattilio
428173960Sattilio		/*
429176017Sjeff		 * The lock is held in write mode or it already has waiters.
430154941Sjhb		 */
431176017Sjeff		MPASS(!RW_CAN_READ(v));
432176017Sjeff
433176017Sjeff		/*
434176017Sjeff		 * If the RW_LOCK_READ_WAITERS flag is already set, then
435176017Sjeff		 * we can go ahead and block.  If it is not set then try
436176017Sjeff		 * to set it.  If we fail to set it drop the turnstile
437176017Sjeff		 * lock and restart the loop.
438176017Sjeff		 */
439176017Sjeff		if (!(v & RW_LOCK_READ_WAITERS)) {
440176017Sjeff			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
441176017Sjeff			    v | RW_LOCK_READ_WAITERS)) {
442170295Sjeff				turnstile_cancel(ts);
443157826Sjhb				cpu_spinwait();
444157826Sjhb				continue;
445157826Sjhb			}
446167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
447157826Sjhb				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
448157826Sjhb				    __func__, rw);
449154941Sjhb		}
450154941Sjhb
451154941Sjhb		/*
452154941Sjhb		 * We were unable to acquire the lock and the read waiters
453154941Sjhb		 * flag is set, so we must block on the turnstile.
454154941Sjhb		 */
455167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
456154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
457154941Sjhb			    rw);
458192853Ssson#ifdef KDTRACE_HOOKS
459192853Ssson		sleep_time -= lockstat_nsecs();
460192853Ssson#endif
461170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
462192853Ssson#ifdef KDTRACE_HOOKS
463192853Ssson		sleep_time += lockstat_nsecs();
464192853Ssson		sleep_cnt++;
465192853Ssson#endif
466167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
467154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
468154941Sjhb			    __func__, rw);
469154941Sjhb	}
470154941Sjhb
471154941Sjhb	/*
472154941Sjhb	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
473154941Sjhb	 * however.  turnstiles don't like owners changing between calls to
474154941Sjhb	 * turnstile_wait() currently.
475154941Sjhb	 */
476192853Ssson	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
477174629Sjeff	    waittime, file, line);
478167787Sjhb	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
479167787Sjhb	WITNESS_LOCK(&rw->lock_object, 0, file, line);
480160771Sjhb	curthread->td_locks++;
481176017Sjeff	curthread->td_rw_rlocks++;
482192853Ssson#ifdef KDTRACE_HOOKS
483192853Ssson	if (sleep_time)
484192853Ssson		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
485192853Ssson
486192853Ssson	/*
487192853Ssson	 * Record only the loops spinning and not sleeping.
488192853Ssson	 */
489192853Ssson	if (spin_cnt > sleep_cnt)
490192853Ssson		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
491192853Ssson#endif
492154941Sjhb}
493154941Sjhb
494177843Sattilioint
495177843Sattilio_rw_try_rlock(struct rwlock *rw, const char *file, int line)
496177843Sattilio{
497177843Sattilio	uintptr_t x;
498177843Sattilio
499177843Sattilio	for (;;) {
500177843Sattilio		x = rw->rw_lock;
501177843Sattilio		KASSERT(rw->rw_lock != RW_DESTROYED,
502177843Sattilio		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
503177843Sattilio		if (!(x & RW_LOCK_READ))
504177843Sattilio			break;
505177843Sattilio		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
506177843Sattilio			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
507177843Sattilio			    line);
508177843Sattilio			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
509177843Sattilio			curthread->td_locks++;
510177843Sattilio			curthread->td_rw_rlocks++;
511177843Sattilio			return (1);
512177843Sattilio		}
513177843Sattilio	}
514177843Sattilio
515177843Sattilio	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
516177843Sattilio	return (0);
517177843Sattilio}
518177843Sattilio
519154941Sjhbvoid
520154941Sjhb_rw_runlock(struct rwlock *rw, const char *file, int line)
521154941Sjhb{
522154941Sjhb	struct turnstile *ts;
523176017Sjeff	uintptr_t x, v, queue;
524154941Sjhb
525169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
526169394Sjhb	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
527154941Sjhb	_rw_assert(rw, RA_RLOCKED, file, line);
528160771Sjhb	curthread->td_locks--;
529176017Sjeff	curthread->td_rw_rlocks--;
530167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
531167787Sjhb	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
532154941Sjhb
533154941Sjhb	/* TODO: drop "owner of record" here. */
534154941Sjhb
535154941Sjhb	for (;;) {
536154941Sjhb		/*
537154941Sjhb		 * See if there is more than one read lock held.  If so,
538154941Sjhb		 * just drop one and return.
539154941Sjhb		 */
540154941Sjhb		x = rw->rw_lock;
541154941Sjhb		if (RW_READERS(x) > 1) {
542154941Sjhb			if (atomic_cmpset_ptr(&rw->rw_lock, x,
543154941Sjhb			    x - RW_ONE_READER)) {
544167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
545154941Sjhb					CTR4(KTR_LOCK,
546154941Sjhb					    "%s: %p succeeded %p -> %p",
547154941Sjhb					    __func__, rw, (void *)x,
548154941Sjhb					    (void *)(x - RW_ONE_READER));
549154941Sjhb				break;
550154941Sjhb			}
551154941Sjhb			continue;
552167307Sjhb		}
553154941Sjhb		/*
554154941Sjhb		 * If there aren't any waiters for a write lock, then try
555154941Sjhb		 * to drop it quickly.
556154941Sjhb		 */
557176017Sjeff		if (!(x & RW_LOCK_WAITERS)) {
558176017Sjeff			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
559176017Sjeff			    RW_READERS_LOCK(1));
560176017Sjeff			if (atomic_cmpset_ptr(&rw->rw_lock, x, RW_UNLOCKED)) {
561167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
562154941Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
563154941Sjhb					    __func__, rw);
564154941Sjhb				break;
565154941Sjhb			}
566154941Sjhb			continue;
567154941Sjhb		}
568154941Sjhb		/*
569176017Sjeff		 * Ok, we know we have waiters and we think we are the
570176017Sjeff		 * last reader, so grab the turnstile lock.
571154941Sjhb		 */
572170295Sjeff		turnstile_chain_lock(&rw->lock_object);
573176017Sjeff		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
574176017Sjeff		MPASS(v & RW_LOCK_WAITERS);
575154941Sjhb
576154941Sjhb		/*
577154941Sjhb		 * Try to drop our lock leaving the lock in a unlocked
578154941Sjhb		 * state.
579154941Sjhb		 *
580154941Sjhb		 * If you wanted to do explicit lock handoff you'd have to
581154941Sjhb		 * do it here.  You'd also want to use turnstile_signal()
582154941Sjhb		 * and you'd have to handle the race where a higher
583154941Sjhb		 * priority thread blocks on the write lock before the
584154941Sjhb		 * thread you wakeup actually runs and have the new thread
585154941Sjhb		 * "steal" the lock.  For now it's a lot simpler to just
586154941Sjhb		 * wakeup all of the waiters.
587154941Sjhb		 *
588154941Sjhb		 * As above, if we fail, then another thread might have
589154941Sjhb		 * acquired a read lock, so drop the turnstile lock and
590154941Sjhb		 * restart.
591154941Sjhb		 */
592176017Sjeff		x = RW_UNLOCKED;
593176017Sjeff		if (v & RW_LOCK_WRITE_WAITERS) {
594176017Sjeff			queue = TS_EXCLUSIVE_QUEUE;
595176017Sjeff			x |= (v & RW_LOCK_READ_WAITERS);
596176017Sjeff		} else
597176017Sjeff			queue = TS_SHARED_QUEUE;
598176017Sjeff		if (!atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
599176017Sjeff		    x)) {
600170295Sjeff			turnstile_chain_unlock(&rw->lock_object);
601154941Sjhb			continue;
602154941Sjhb		}
603167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
604154941Sjhb			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
605154941Sjhb			    __func__, rw);
606154941Sjhb
607154941Sjhb		/*
608154941Sjhb		 * Ok.  The lock is released and all that's left is to
609154941Sjhb		 * wake up the waiters.  Note that the lock might not be
610154941Sjhb		 * free anymore, but in that case the writers will just
611154941Sjhb		 * block again if they run before the new lock holder(s)
612154941Sjhb		 * release the lock.
613154941Sjhb		 */
614167787Sjhb		ts = turnstile_lookup(&rw->lock_object);
615157846Sjhb		MPASS(ts != NULL);
616176017Sjeff		turnstile_broadcast(ts, queue);
617154941Sjhb		turnstile_unpend(ts, TS_SHARED_LOCK);
618170295Sjeff		turnstile_chain_unlock(&rw->lock_object);
619154941Sjhb		break;
620154941Sjhb	}
621192853Ssson	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
622154941Sjhb}
623154941Sjhb
624154941Sjhb/*
625154941Sjhb * This function is called when we are unable to obtain a write lock on the
626154941Sjhb * first try.  This means that at least one other thread holds either a
627154941Sjhb * read or write lock.
628154941Sjhb */
629154941Sjhbvoid
630154941Sjhb_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
631154941Sjhb{
632170295Sjeff	struct turnstile *ts;
633167801Sjhb#ifdef ADAPTIVE_RWLOCKS
634157846Sjhb	volatile struct thread *owner;
635176017Sjeff	int spintries = 0;
636176017Sjeff	int i;
637157851Swkoszek#endif
638189846Sjeff	uintptr_t v, x;
639189846Sjeff#ifdef LOCK_PROFILING
640171516Sattilio	uint64_t waittime = 0;
641171516Sattilio	int contested = 0;
642189846Sjeff#endif
643192853Ssson#ifdef KDTRACE_HOOKS
644192853Ssson	uint64_t spin_cnt = 0;
645192853Ssson	uint64_t sleep_cnt = 0;
646192853Ssson	int64_t sleep_time = 0;
647192853Ssson#endif
648154941Sjhb
649171052Sattilio	if (rw_wlocked(rw)) {
650171052Sattilio		KASSERT(rw->lock_object.lo_flags & RW_RECURSE,
651171052Sattilio		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
652171052Sattilio		    __func__, rw->lock_object.lo_name, file, line));
653171052Sattilio		rw->rw_recurse++;
654171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
655171052Sattilio			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
656171052Sattilio		return;
657171052Sattilio	}
658171052Sattilio
659167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
660154941Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
661167787Sjhb		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
662154941Sjhb
663154941Sjhb	while (!_rw_write_lock(rw, tid)) {
664192853Ssson#ifdef KDTRACE_HOOKS
665192853Ssson		spin_cnt++;
666192853Ssson#endif
667174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
668174629Sjeff		    &contested, &waittime);
669173960Sattilio#ifdef ADAPTIVE_RWLOCKS
670173960Sattilio		/*
671173960Sattilio		 * If the lock is write locked and the owner is
672173960Sattilio		 * running on another CPU, spin until the owner stops
673173960Sattilio		 * running or the state of the lock changes.
674173960Sattilio		 */
675173960Sattilio		v = rw->rw_lock;
676173960Sattilio		owner = (struct thread *)RW_OWNER(v);
677173960Sattilio		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
678173960Sattilio			if (LOCK_LOG_TEST(&rw->lock_object, 0))
679173960Sattilio				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
680173960Sattilio				    __func__, rw, owner);
681173960Sattilio			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
682192853Ssson			    TD_IS_RUNNING(owner)) {
683173960Sattilio				cpu_spinwait();
684192853Ssson#ifdef KDTRACE_HOOKS
685192853Ssson				spin_cnt++;
686192853Ssson#endif
687192853Ssson			}
688173960Sattilio			continue;
689173960Sattilio		}
690177912Sjeff		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
691177912Sjeff		    spintries < rowner_retries) {
692176017Sjeff			if (!(v & RW_LOCK_WRITE_SPINNER)) {
693176017Sjeff				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
694176017Sjeff				    v | RW_LOCK_WRITE_SPINNER)) {
695176017Sjeff					cpu_spinwait();
696176017Sjeff					continue;
697176017Sjeff				}
698176017Sjeff			}
699176017Sjeff			spintries++;
700177912Sjeff			for (i = 0; i < rowner_loops; i++) {
701176017Sjeff				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
702176017Sjeff					break;
703176017Sjeff				cpu_spinwait();
704176017Sjeff			}
705192853Ssson#ifdef KDTRACE_HOOKS
706192853Ssson			spin_cnt += rowner_loops - i;
707192853Ssson#endif
708177912Sjeff			if (i != rowner_loops)
709176017Sjeff				continue;
710176017Sjeff		}
711173960Sattilio#endif
712170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
713154941Sjhb		v = rw->rw_lock;
714154941Sjhb
715173960Sattilio#ifdef ADAPTIVE_RWLOCKS
716154941Sjhb		/*
717173960Sattilio		 * If the current owner of the lock is executing on another
718173960Sattilio		 * CPU quit the hard path and try to spin.
719173960Sattilio		 */
720173960Sattilio		if (!(v & RW_LOCK_READ)) {
721173960Sattilio			owner = (struct thread *)RW_OWNER(v);
722173960Sattilio			if (TD_IS_RUNNING(owner)) {
723173960Sattilio				turnstile_cancel(ts);
724173960Sattilio				cpu_spinwait();
725173960Sattilio				continue;
726173960Sattilio			}
727173960Sattilio		}
728173960Sattilio#endif
729173960Sattilio		/*
730179334Sattilio		 * Check for the waiters flags about this rwlock.
731179334Sattilio		 * If the lock was released, without maintain any pending
732179334Sattilio		 * waiters queue, simply try to acquire it.
733179334Sattilio		 * If a pending waiters queue is present, claim the lock
734179334Sattilio		 * ownership and maintain the pending queue.
735154941Sjhb		 */
736176017Sjeff		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
737176017Sjeff		if ((v & ~x) == RW_UNLOCKED) {
738176017Sjeff			x &= ~RW_LOCK_WRITE_SPINNER;
739176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
740176017Sjeff				if (x)
741176017Sjeff					turnstile_claim(ts);
742176017Sjeff				else
743176017Sjeff					turnstile_cancel(ts);
744154941Sjhb				break;
745154941Sjhb			}
746170295Sjeff			turnstile_cancel(ts);
747154941Sjhb			cpu_spinwait();
748154941Sjhb			continue;
749154941Sjhb		}
750154941Sjhb		/*
751154941Sjhb		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
752154941Sjhb		 * set it.  If we fail to set it, then loop back and try
753154941Sjhb		 * again.
754154941Sjhb		 */
755157826Sjhb		if (!(v & RW_LOCK_WRITE_WAITERS)) {
756157826Sjhb			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
757157826Sjhb			    v | RW_LOCK_WRITE_WAITERS)) {
758170295Sjeff				turnstile_cancel(ts);
759157826Sjhb				cpu_spinwait();
760157826Sjhb				continue;
761157826Sjhb			}
762167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
763157826Sjhb				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
764157826Sjhb				    __func__, rw);
765154941Sjhb		}
766157846Sjhb		/*
767154941Sjhb		 * We were unable to acquire the lock and the write waiters
768154941Sjhb		 * flag is set, so we must block on the turnstile.
769154941Sjhb		 */
770167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
771154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
772154941Sjhb			    rw);
773192853Ssson#ifdef KDTRACE_HOOKS
774192853Ssson		sleep_time -= lockstat_nsecs();
775192853Ssson#endif
776170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
777192853Ssson#ifdef KDTRACE_HOOKS
778192853Ssson		sleep_time += lockstat_nsecs();
779192853Ssson		sleep_cnt++;
780192853Ssson#endif
781167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
782154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
783154941Sjhb			    __func__, rw);
784176017Sjeff#ifdef ADAPTIVE_RWLOCKS
785176017Sjeff		spintries = 0;
786176017Sjeff#endif
787154941Sjhb	}
788192853Ssson	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
789192853Ssson	    waittime, file, line);
790192853Ssson#ifdef KDTRACE_HOOKS
791192853Ssson	if (sleep_time)
792192853Ssson		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
793192853Ssson
794192853Ssson	/*
795192853Ssson	 * Record only the loops spinning and not sleeping.
796192853Ssson	 */
797192853Ssson	if (spin_cnt > sleep_cnt)
798192853Ssson		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
799192853Ssson#endif
800154941Sjhb}
801154941Sjhb
802154941Sjhb/*
803154941Sjhb * This function is called if the first try at releasing a write lock failed.
804154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at
805154941Sjhb * least one thread is waiting on this lock.
806154941Sjhb */
807154941Sjhbvoid
808154941Sjhb_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
809154941Sjhb{
810154941Sjhb	struct turnstile *ts;
811154941Sjhb	uintptr_t v;
812154941Sjhb	int queue;
813154941Sjhb
814171052Sattilio	if (rw_wlocked(rw) && rw_recursed(rw)) {
815176017Sjeff		rw->rw_recurse--;
816171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
817171052Sattilio			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
818171052Sattilio		return;
819171052Sattilio	}
820171052Sattilio
821154941Sjhb	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
822154941Sjhb	    ("%s: neither of the waiter flags are set", __func__));
823154941Sjhb
824167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
825154941Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
826154941Sjhb
827170295Sjeff	turnstile_chain_lock(&rw->lock_object);
828167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
829154941Sjhb	MPASS(ts != NULL);
830154941Sjhb
831154941Sjhb	/*
832154941Sjhb	 * Use the same algo as sx locks for now.  Prefer waking up shared
833154941Sjhb	 * waiters if we have any over writers.  This is probably not ideal.
834154941Sjhb	 *
835154941Sjhb	 * 'v' is the value we are going to write back to rw_lock.  If we
836154941Sjhb	 * have waiters on both queues, we need to preserve the state of
837154941Sjhb	 * the waiter flag for the queue we don't wake up.  For now this is
838154941Sjhb	 * hardcoded for the algorithm mentioned above.
839154941Sjhb	 *
840154941Sjhb	 * In the case of both readers and writers waiting we wakeup the
841154941Sjhb	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
842154941Sjhb	 * new writer comes in before a reader it will claim the lock up
843154941Sjhb	 * above.  There is probably a potential priority inversion in
844154941Sjhb	 * there that could be worked around either by waking both queues
845154941Sjhb	 * of waiters or doing some complicated lock handoff gymnastics.
846154941Sjhb	 */
847157846Sjhb	v = RW_UNLOCKED;
848176076Sjeff	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
849176076Sjeff		queue = TS_EXCLUSIVE_QUEUE;
850176076Sjeff		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
851176076Sjeff	} else
852154941Sjhb		queue = TS_SHARED_QUEUE;
853157846Sjhb
854157846Sjhb	/* Wake up all waiters for the specific queue. */
855167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
856154941Sjhb		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
857154941Sjhb		    queue == TS_SHARED_QUEUE ? "read" : "write");
858154941Sjhb	turnstile_broadcast(ts, queue);
859154941Sjhb	atomic_store_rel_ptr(&rw->rw_lock, v);
860154941Sjhb	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
861170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
862154941Sjhb}
863154941Sjhb
864157882Sjhb/*
865157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write
866157882Sjhb * lock.  This will only succeed if this thread holds a single read
867157882Sjhb * lock.  Returns true if the upgrade succeeded and false otherwise.
868157882Sjhb */
869157882Sjhbint
870157882Sjhb_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
871157882Sjhb{
872176017Sjeff	uintptr_t v, x, tid;
873170295Sjeff	struct turnstile *ts;
874157882Sjhb	int success;
875157882Sjhb
876169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
877169394Sjhb	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
878157882Sjhb	_rw_assert(rw, RA_RLOCKED, file, line);
879157882Sjhb
880157882Sjhb	/*
881157882Sjhb	 * Attempt to switch from one reader to a writer.  If there
882157882Sjhb	 * are any write waiters, then we will have to lock the
883157882Sjhb	 * turnstile first to prevent races with another writer
884157882Sjhb	 * calling turnstile_wait() before we have claimed this
885157882Sjhb	 * turnstile.  So, do the simple case of no waiters first.
886157882Sjhb	 */
887157882Sjhb	tid = (uintptr_t)curthread;
888176017Sjeff	success = 0;
889176017Sjeff	for (;;) {
890176017Sjeff		v = rw->rw_lock;
891176017Sjeff		if (RW_READERS(v) > 1)
892176017Sjeff			break;
893176017Sjeff		if (!(v & RW_LOCK_WAITERS)) {
894176017Sjeff			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
895176017Sjeff			if (!success)
896176017Sjeff				continue;
897176017Sjeff			break;
898176017Sjeff		}
899157882Sjhb
900176017Sjeff		/*
901176017Sjeff		 * Ok, we think we have waiters, so lock the turnstile.
902176017Sjeff		 */
903176017Sjeff		ts = turnstile_trywait(&rw->lock_object);
904176017Sjeff		v = rw->rw_lock;
905176017Sjeff		if (RW_READERS(v) > 1) {
906176017Sjeff			turnstile_cancel(ts);
907176017Sjeff			break;
908176017Sjeff		}
909176017Sjeff		/*
910176017Sjeff		 * Try to switch from one reader to a writer again.  This time
911176017Sjeff		 * we honor the current state of the waiters flags.
912176017Sjeff		 * If we obtain the lock with the flags set, then claim
913176017Sjeff		 * ownership of the turnstile.
914176017Sjeff		 */
915176017Sjeff		x = rw->rw_lock & RW_LOCK_WAITERS;
916176017Sjeff		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
917176017Sjeff		if (success) {
918176017Sjeff			if (x)
919176017Sjeff				turnstile_claim(ts);
920176017Sjeff			else
921176017Sjeff				turnstile_cancel(ts);
922176017Sjeff			break;
923176017Sjeff		}
924170295Sjeff		turnstile_cancel(ts);
925176017Sjeff	}
926167787Sjhb	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
927176017Sjeff	if (success) {
928176017Sjeff		curthread->td_rw_rlocks--;
929167787Sjhb		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
930157882Sjhb		    file, line);
931192853Ssson		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
932176017Sjeff	}
933157882Sjhb	return (success);
934157882Sjhb}
935157882Sjhb
936157882Sjhb/*
937157882Sjhb * Downgrade a write lock into a single read lock.
938157882Sjhb */
939157882Sjhbvoid
940157882Sjhb_rw_downgrade(struct rwlock *rw, const char *file, int line)
941157882Sjhb{
942157882Sjhb	struct turnstile *ts;
943157882Sjhb	uintptr_t tid, v;
944176017Sjeff	int rwait, wwait;
945157882Sjhb
946169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
947169394Sjhb	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
948171052Sattilio	_rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
949171052Sattilio#ifndef INVARIANTS
950171052Sattilio	if (rw_recursed(rw))
951171052Sattilio		panic("downgrade of a recursed lock");
952171052Sattilio#endif
953157882Sjhb
954167787Sjhb	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
955157882Sjhb
956157882Sjhb	/*
957157882Sjhb	 * Convert from a writer to a single reader.  First we handle
958157882Sjhb	 * the easy case with no waiters.  If there are any waiters, we
959176017Sjeff	 * lock the turnstile and "disown" the lock.
960157882Sjhb	 */
961157882Sjhb	tid = (uintptr_t)curthread;
962157882Sjhb	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
963157882Sjhb		goto out;
964157882Sjhb
965157882Sjhb	/*
966157882Sjhb	 * Ok, we think we have waiters, so lock the turnstile so we can
967157882Sjhb	 * read the waiter flags without any races.
968157882Sjhb	 */
969170295Sjeff	turnstile_chain_lock(&rw->lock_object);
970176017Sjeff	v = rw->rw_lock & RW_LOCK_WAITERS;
971176017Sjeff	rwait = v & RW_LOCK_READ_WAITERS;
972176017Sjeff	wwait = v & RW_LOCK_WRITE_WAITERS;
973176017Sjeff	MPASS(rwait | wwait);
974157882Sjhb
975157882Sjhb	/*
976176017Sjeff	 * Downgrade from a write lock while preserving waiters flag
977176017Sjeff	 * and give up ownership of the turnstile.
978157882Sjhb	 */
979167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
980157882Sjhb	MPASS(ts != NULL);
981176017Sjeff	if (!wwait)
982176017Sjeff		v &= ~RW_LOCK_READ_WAITERS;
983176017Sjeff	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
984176017Sjeff	/*
985176017Sjeff	 * Wake other readers if there are no writers pending.  Otherwise they
986176017Sjeff	 * won't be able to acquire the lock anyway.
987176017Sjeff	 */
988176017Sjeff	if (rwait && !wwait) {
989157882Sjhb		turnstile_broadcast(ts, TS_SHARED_QUEUE);
990157882Sjhb		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
991176017Sjeff	} else
992157882Sjhb		turnstile_disown(ts);
993170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
994157882Sjhbout:
995176017Sjeff	curthread->td_rw_rlocks++;
996167787Sjhb	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
997192853Ssson	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
998157882Sjhb}
999157882Sjhb
1000154941Sjhb#ifdef INVARIANT_SUPPORT
1001155162Sscottl#ifndef INVARIANTS
1002154941Sjhb#undef _rw_assert
1003154941Sjhb#endif
1004154941Sjhb
1005154941Sjhb/*
1006154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least
1007154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this*
1008154941Sjhb * thread owns an rlock.
1009154941Sjhb */
1010154941Sjhbvoid
1011154941Sjhb_rw_assert(struct rwlock *rw, int what, const char *file, int line)
1012154941Sjhb{
1013154941Sjhb
1014154941Sjhb	if (panicstr != NULL)
1015154941Sjhb		return;
1016154941Sjhb	switch (what) {
1017154941Sjhb	case RA_LOCKED:
1018171052Sattilio	case RA_LOCKED | RA_RECURSED:
1019171052Sattilio	case RA_LOCKED | RA_NOTRECURSED:
1020154941Sjhb	case RA_RLOCKED:
1021154941Sjhb#ifdef WITNESS
1022167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1023154941Sjhb#else
1024154941Sjhb		/*
1025154941Sjhb		 * If some other thread has a write lock or we have one
1026154941Sjhb		 * and are asserting a read lock, fail.  Also, if no one
1027154941Sjhb		 * has a lock at all, fail.
1028154941Sjhb		 */
1029155061Sscottl		if (rw->rw_lock == RW_UNLOCKED ||
1030155061Sscottl		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
1031157826Sjhb		    rw_wowner(rw) != curthread)))
1032154941Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
1033167787Sjhb			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
1034154941Sjhb			    "read " : "", file, line);
1035171052Sattilio
1036171052Sattilio		if (!(rw->rw_lock & RW_LOCK_READ)) {
1037171052Sattilio			if (rw_recursed(rw)) {
1038171052Sattilio				if (what & RA_NOTRECURSED)
1039171052Sattilio					panic("Lock %s recursed @ %s:%d\n",
1040171052Sattilio					    rw->lock_object.lo_name, file,
1041171052Sattilio					    line);
1042171052Sattilio			} else if (what & RA_RECURSED)
1043171052Sattilio				panic("Lock %s not recursed @ %s:%d\n",
1044171052Sattilio				    rw->lock_object.lo_name, file, line);
1045171052Sattilio		}
1046154941Sjhb#endif
1047154941Sjhb		break;
1048154941Sjhb	case RA_WLOCKED:
1049171052Sattilio	case RA_WLOCKED | RA_RECURSED:
1050171052Sattilio	case RA_WLOCKED | RA_NOTRECURSED:
1051157826Sjhb		if (rw_wowner(rw) != curthread)
1052154941Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
1053167787Sjhb			    rw->lock_object.lo_name, file, line);
1054171052Sattilio		if (rw_recursed(rw)) {
1055171052Sattilio			if (what & RA_NOTRECURSED)
1056171052Sattilio				panic("Lock %s recursed @ %s:%d\n",
1057171052Sattilio				    rw->lock_object.lo_name, file, line);
1058171052Sattilio		} else if (what & RA_RECURSED)
1059171052Sattilio			panic("Lock %s not recursed @ %s:%d\n",
1060171052Sattilio			    rw->lock_object.lo_name, file, line);
1061154941Sjhb		break;
1062154941Sjhb	case RA_UNLOCKED:
1063154941Sjhb#ifdef WITNESS
1064167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1065154941Sjhb#else
1066154941Sjhb		/*
1067154941Sjhb		 * If we hold a write lock fail.  We can't reliably check
1068154941Sjhb		 * to see if we hold a read lock or not.
1069154941Sjhb		 */
1070157826Sjhb		if (rw_wowner(rw) == curthread)
1071154941Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
1072167787Sjhb			    rw->lock_object.lo_name, file, line);
1073154941Sjhb#endif
1074154941Sjhb		break;
1075154941Sjhb	default:
1076154941Sjhb		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1077154941Sjhb		    line);
1078154941Sjhb	}
1079154941Sjhb}
1080154941Sjhb#endif /* INVARIANT_SUPPORT */
1081154941Sjhb
1082154941Sjhb#ifdef DDB
1083154941Sjhbvoid
1084154941Sjhbdb_show_rwlock(struct lock_object *lock)
1085154941Sjhb{
1086154941Sjhb	struct rwlock *rw;
1087154941Sjhb	struct thread *td;
1088154941Sjhb
1089154941Sjhb	rw = (struct rwlock *)lock;
1090154941Sjhb
1091154941Sjhb	db_printf(" state: ");
1092154941Sjhb	if (rw->rw_lock == RW_UNLOCKED)
1093154941Sjhb		db_printf("UNLOCKED\n");
1094169394Sjhb	else if (rw->rw_lock == RW_DESTROYED) {
1095169394Sjhb		db_printf("DESTROYED\n");
1096169394Sjhb		return;
1097169394Sjhb	} else if (rw->rw_lock & RW_LOCK_READ)
1098167504Sjhb		db_printf("RLOCK: %ju locks\n",
1099167504Sjhb		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1100154941Sjhb	else {
1101157826Sjhb		td = rw_wowner(rw);
1102154941Sjhb		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1103173600Sjulian		    td->td_tid, td->td_proc->p_pid, td->td_name);
1104171052Sattilio		if (rw_recursed(rw))
1105171052Sattilio			db_printf(" recursed: %u\n", rw->rw_recurse);
1106154941Sjhb	}
1107154941Sjhb	db_printf(" waiters: ");
1108154941Sjhb	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1109154941Sjhb	case RW_LOCK_READ_WAITERS:
1110154941Sjhb		db_printf("readers\n");
1111154941Sjhb		break;
1112154941Sjhb	case RW_LOCK_WRITE_WAITERS:
1113154941Sjhb		db_printf("writers\n");
1114154941Sjhb		break;
1115154941Sjhb	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1116167492Sjhb		db_printf("readers and writers\n");
1117154941Sjhb		break;
1118154941Sjhb	default:
1119154941Sjhb		db_printf("none\n");
1120154941Sjhb		break;
1121154941Sjhb	}
1122154941Sjhb}
1123154941Sjhb
1124154941Sjhb#endif
1125