1154941Sjhb/*-
2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3154941Sjhb *
4154941Sjhb * Redistribution and use in source and binary forms, with or without
5154941Sjhb * modification, are permitted provided that the following conditions
6154941Sjhb * are met:
7154941Sjhb * 1. Redistributions of source code must retain the above copyright
8154941Sjhb *    notice, this list of conditions and the following disclaimer.
9154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright
10154941Sjhb *    notice, this list of conditions and the following disclaimer in the
11154941Sjhb *    documentation and/or other materials provided with the distribution.
12154941Sjhb *
13154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16154941Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23154941Sjhb * SUCH DAMAGE.
24154941Sjhb */
25154941Sjhb
26154941Sjhb/*
27154941Sjhb * Machine independent bits of reader/writer lock implementation.
28154941Sjhb */
29154941Sjhb
30154941Sjhb#include <sys/cdefs.h>
31154941Sjhb__FBSDID("$FreeBSD: stable/11/sys/kern/kern_rwlock.c 367457 2020-11-07 18:10:59Z dim $");
32154941Sjhb
33154941Sjhb#include "opt_ddb.h"
34233628Sfabient#include "opt_hwpmc_hooks.h"
35167801Sjhb#include "opt_no_adaptive_rwlocks.h"
36154941Sjhb
37154941Sjhb#include <sys/param.h>
38244582Sattilio#include <sys/kdb.h>
39154941Sjhb#include <sys/ktr.h>
40177912Sjeff#include <sys/kernel.h>
41154941Sjhb#include <sys/lock.h>
42154941Sjhb#include <sys/mutex.h>
43154941Sjhb#include <sys/proc.h>
44154941Sjhb#include <sys/rwlock.h>
45274092Sjhb#include <sys/sched.h>
46303953Smjg#include <sys/smp.h>
47177912Sjeff#include <sys/sysctl.h>
48154941Sjhb#include <sys/systm.h>
49154941Sjhb#include <sys/turnstile.h>
50171516Sattilio
51154941Sjhb#include <machine/cpu.h>
52154941Sjhb
53167801Sjhb#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
54167801Sjhb#define	ADAPTIVE_RWLOCKS
55167801Sjhb#endif
56167801Sjhb
57233628Sfabient#ifdef HWPMC_HOOKS
58233628Sfabient#include <sys/pmckern.h>
59233628SfabientPMC_SOFT_DECLARE( , , lock, failed);
60233628Sfabient#endif
61233628Sfabient
62242515Sattilio/*
63242515Sattilio * Return the rwlock address when the lock cookie address is provided.
64242515Sattilio * This functionality assumes that struct rwlock* have a member named rw_lock.
65242515Sattilio */
66242515Sattilio#define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
67242515Sattilio
68154941Sjhb#ifdef DDB
69154941Sjhb#include <ddb/ddb.h>
70154941Sjhb
71227588Spjdstatic void	db_show_rwlock(const struct lock_object *lock);
72154941Sjhb#endif
73227588Spjdstatic void	assert_rw(const struct lock_object *lock, int what);
74255745Sdavidestatic void	lock_rw(struct lock_object *lock, uintptr_t how);
75192853Ssson#ifdef KDTRACE_HOOKS
76227588Spjdstatic int	owner_rw(const struct lock_object *lock, struct thread **owner);
77192853Ssson#endif
78255745Sdavidestatic uintptr_t unlock_rw(struct lock_object *lock);
79154941Sjhb
80154941Sjhbstruct lock_class lock_class_rw = {
81167365Sjhb	.lc_name = "rw",
82167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
83173733Sattilio	.lc_assert = assert_rw,
84154941Sjhb#ifdef DDB
85167365Sjhb	.lc_ddb_show = db_show_rwlock,
86154941Sjhb#endif
87167368Sjhb	.lc_lock = lock_rw,
88167368Sjhb	.lc_unlock = unlock_rw,
89192853Ssson#ifdef KDTRACE_HOOKS
90192853Ssson	.lc_owner = owner_rw,
91192853Ssson#endif
92154941Sjhb};
93154941Sjhb
94303953Smjg#ifdef ADAPTIVE_RWLOCKS
95334437Smjgstatic int __read_frequently rowner_retries;
96334437Smjgstatic int __read_frequently rowner_loops;
97303953Smjgstatic SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
98303953Smjg    "rwlock debugging");
99303953SmjgSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
100303953SmjgSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
101303953Smjg
102327409Smjgstatic struct lock_delay_config __read_frequently rw_delay;
103303953Smjg
104315339SmjgSYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
105303953Smjg    0, "");
106303953SmjgSYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
107303953Smjg    0, "");
108303953Smjg
109334437Smjgstatic void
110334437Smjgrw_lock_delay_init(void *arg __unused)
111334437Smjg{
112334437Smjg
113334437Smjg	lock_delay_default_init(&rw_delay);
114334437Smjg	rowner_retries = 10;
115334437Smjg	rowner_loops = max(10000, rw_delay.max);
116334437Smjg}
117334437SmjgLOCK_DELAY_SYSINIT(rw_lock_delay_init);
118303953Smjg#endif
119303953Smjg
120157826Sjhb/*
121157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or
122157826Sjhb * NULL if the lock is unlocked or read-locked.
123157826Sjhb */
124154941Sjhb
125315341Smjg#define	lv_rw_wowner(v)							\
126315341Smjg	((v) & RW_LOCK_READ ? NULL :					\
127315341Smjg	 (struct thread *)RW_OWNER((v)))
128315341Smjg
129315341Smjg#define	rw_wowner(rw)	lv_rw_wowner(RW_READ_VALUE(rw))
130315341Smjg
131157826Sjhb/*
132171052Sattilio * Returns if a write owner is recursed.  Write ownership is not assured
133171052Sattilio * here and should be previously checked.
134171052Sattilio */
135171052Sattilio#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
136171052Sattilio
137171052Sattilio/*
138171052Sattilio * Return true if curthread helds the lock.
139171052Sattilio */
140171052Sattilio#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
141171052Sattilio
142171052Sattilio/*
143157826Sjhb * Return a pointer to the owning thread for this lock who should receive
144157826Sjhb * any priority lent by threads that block on this lock.  Currently this
145157826Sjhb * is identical to rw_wowner().
146157826Sjhb */
147157826Sjhb#define	rw_owner(rw)		rw_wowner(rw)
148157826Sjhb
149154941Sjhb#ifndef INVARIANTS
150242515Sattilio#define	__rw_assert(c, what, file, line)
151154941Sjhb#endif
152154941Sjhb
153154941Sjhbvoid
154227588Spjdassert_rw(const struct lock_object *lock, int what)
155173733Sattilio{
156173733Sattilio
157227588Spjd	rw_assert((const struct rwlock *)lock, what);
158173733Sattilio}
159173733Sattilio
160173733Sattiliovoid
161255745Sdavidelock_rw(struct lock_object *lock, uintptr_t how)
162167368Sjhb{
163167368Sjhb	struct rwlock *rw;
164167368Sjhb
165167368Sjhb	rw = (struct rwlock *)lock;
166167368Sjhb	if (how)
167255788Sdavide		rw_rlock(rw);
168255788Sdavide	else
169167368Sjhb		rw_wlock(rw);
170167368Sjhb}
171167368Sjhb
172255745Sdavideuintptr_t
173167368Sjhbunlock_rw(struct lock_object *lock)
174167368Sjhb{
175167368Sjhb	struct rwlock *rw;
176167368Sjhb
177167368Sjhb	rw = (struct rwlock *)lock;
178167368Sjhb	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
179167368Sjhb	if (rw->rw_lock & RW_LOCK_READ) {
180167368Sjhb		rw_runlock(rw);
181255788Sdavide		return (1);
182167368Sjhb	} else {
183167368Sjhb		rw_wunlock(rw);
184255788Sdavide		return (0);
185167368Sjhb	}
186167368Sjhb}
187167368Sjhb
188192853Ssson#ifdef KDTRACE_HOOKS
189192853Sssonint
190227588Spjdowner_rw(const struct lock_object *lock, struct thread **owner)
191192853Ssson{
192227588Spjd	const struct rwlock *rw = (const struct rwlock *)lock;
193192853Ssson	uintptr_t x = rw->rw_lock;
194192853Ssson
195192853Ssson	*owner = rw_wowner(rw);
196192853Ssson	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
197192853Ssson	    (*owner != NULL));
198192853Ssson}
199192853Ssson#endif
200192853Ssson
201167368Sjhbvoid
202242515Sattilio_rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
203154941Sjhb{
204242515Sattilio	struct rwlock *rw;
205171052Sattilio	int flags;
206154941Sjhb
207242515Sattilio	rw = rwlock2rw(c);
208242515Sattilio
209171052Sattilio	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
210275751Sdchagin	    RW_RECURSE | RW_NEW)) == 0);
211196334Sattilio	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
212196334Sattilio	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
213196334Sattilio	    &rw->rw_lock));
214171052Sattilio
215193307Sattilio	flags = LO_UPGRADABLE;
216171052Sattilio	if (opts & RW_DUPOK)
217171052Sattilio		flags |= LO_DUPOK;
218171052Sattilio	if (opts & RW_NOPROFILE)
219171052Sattilio		flags |= LO_NOPROFILE;
220171052Sattilio	if (!(opts & RW_NOWITNESS))
221171052Sattilio		flags |= LO_WITNESS;
222193307Sattilio	if (opts & RW_RECURSE)
223193307Sattilio		flags |= LO_RECURSABLE;
224171052Sattilio	if (opts & RW_QUIET)
225171052Sattilio		flags |= LO_QUIET;
226275751Sdchagin	if (opts & RW_NEW)
227275751Sdchagin		flags |= LO_NEW;
228171052Sattilio
229252212Sjhb	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
230154941Sjhb	rw->rw_lock = RW_UNLOCKED;
231171052Sattilio	rw->rw_recurse = 0;
232154941Sjhb}
233154941Sjhb
234154941Sjhbvoid
235242515Sattilio_rw_destroy(volatile uintptr_t *c)
236154941Sjhb{
237242515Sattilio	struct rwlock *rw;
238154941Sjhb
239242515Sattilio	rw = rwlock2rw(c);
240242515Sattilio
241205626Sbz	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
242205626Sbz	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
243169394Sjhb	rw->rw_lock = RW_DESTROYED;
244167787Sjhb	lock_destroy(&rw->lock_object);
245154941Sjhb}
246154941Sjhb
247154941Sjhbvoid
248154941Sjhbrw_sysinit(void *arg)
249154941Sjhb{
250326305Smarkj	struct rw_args *args;
251154941Sjhb
252326305Smarkj	args = arg;
253242515Sattilio	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
254242515Sattilio	    args->ra_flags);
255185778Skmacy}
256185778Skmacy
257167024Srwatsonint
258242515Sattilio_rw_wowned(const volatile uintptr_t *c)
259167024Srwatson{
260167024Srwatson
261242515Sattilio	return (rw_wowner(rwlock2rw(c)) == curthread);
262167024Srwatson}
263167024Srwatson
264154941Sjhbvoid
265242515Sattilio_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
266154941Sjhb{
267242515Sattilio	struct rwlock *rw;
268315378Smjg	uintptr_t tid, v;
269154941Sjhb
270242515Sattilio	rw = rwlock2rw(c);
271242515Sattilio
272320241Smarkj	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
273320241Smarkj	    !TD_IS_IDLETHREAD(curthread),
274240424Sattilio	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
275240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
276169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
277169394Sjhb	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
278167787Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
279182914Sjhb	    line, NULL);
280315378Smjg	tid = (uintptr_t)curthread;
281315378Smjg	v = RW_UNLOCKED;
282315378Smjg	if (!_rw_write_lock_fetch(rw, &v, tid))
283327413Smjg		_rw_wlock_hard(rw, v, file, line);
284315378Smjg	else
285315378Smjg		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
286315378Smjg		    0, 0, file, line, LOCKSTAT_WRITER);
287315378Smjg
288171052Sattilio	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
289167787Sjhb	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
290286166Smarkj	TD_LOCKS_INC(curthread);
291154941Sjhb}
292154941Sjhb
293177843Sattilioint
294327413Smjg__rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
295177843Sattilio{
296315394Smjg	struct thread *td;
297315394Smjg	uintptr_t tid, v;
298177843Sattilio	int rval;
299315394Smjg	bool recursed;
300177843Sattilio
301315394Smjg	td = curthread;
302315394Smjg	tid = (uintptr_t)td;
303315394Smjg	if (SCHEDULER_STOPPED_TD(td))
304228424Savg		return (1);
305228424Savg
306320241Smarkj	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
307240424Sattilio	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
308240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
309177843Sattilio	KASSERT(rw->rw_lock != RW_DESTROYED,
310177843Sattilio	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
311177843Sattilio
312315394Smjg	rval = 1;
313315394Smjg	recursed = false;
314315394Smjg	v = RW_UNLOCKED;
315315394Smjg	for (;;) {
316315394Smjg		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
317315394Smjg			break;
318315394Smjg		if (v == RW_UNLOCKED)
319315394Smjg			continue;
320315394Smjg		if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
321315394Smjg			rw->rw_recurse++;
322315394Smjg			atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
323315394Smjg			break;
324315394Smjg		}
325315394Smjg		rval = 0;
326315394Smjg		break;
327315394Smjg	}
328177843Sattilio
329177843Sattilio	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
330177843Sattilio	if (rval) {
331177843Sattilio		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
332177843Sattilio		    file, line);
333315394Smjg		if (!recursed)
334285704Smarkj			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
335285704Smarkj			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
336286166Smarkj		TD_LOCKS_INC(curthread);
337177843Sattilio	}
338177843Sattilio	return (rval);
339177843Sattilio}
340177843Sattilio
341327413Smjgint
342327413Smjg__rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
343327413Smjg{
344327413Smjg	struct rwlock *rw;
345327413Smjg
346327413Smjg	rw = rwlock2rw(c);
347327413Smjg	return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
348327413Smjg}
349327413Smjg
350154941Sjhbvoid
351242515Sattilio_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
352154941Sjhb{
353242515Sattilio	struct rwlock *rw;
354154941Sjhb
355242515Sattilio	rw = rwlock2rw(c);
356242515Sattilio
357169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
358169394Sjhb	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
359242515Sattilio	__rw_assert(c, RA_WLOCKED, file, line);
360167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
361171052Sattilio	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
362171052Sattilio	    line);
363315378Smjg
364315394Smjg#ifdef LOCK_PROFILING
365315379Smjg	_rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
366315394Smjg#else
367315394Smjg	__rw_wunlock(rw, curthread, file, line);
368315394Smjg#endif
369315379Smjg
370286166Smarkj	TD_LOCKS_DEC(curthread);
371154941Sjhb}
372286166Smarkj
373176017Sjeff/*
374176017Sjeff * Determines whether a new reader can acquire a lock.  Succeeds if the
375176017Sjeff * reader already owns a read lock and the lock is locked for read to
376176017Sjeff * prevent deadlock from reader recursion.  Also succeeds if the lock
377176017Sjeff * is unlocked and has no writer waiters or spinners.  Failing otherwise
378176017Sjeff * prioritizes writers before readers.
379176017Sjeff */
380327413Smjgstatic bool __always_inline
381327413Smjg__rw_can_read(struct thread *td, uintptr_t v, bool fp)
382327413Smjg{
383154941Sjhb
384327413Smjg	if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
385327413Smjg	    == RW_LOCK_READ)
386327413Smjg		return (true);
387327413Smjg	if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
388327413Smjg		return (true);
389327413Smjg	return (false);
390327413Smjg}
391327413Smjg
392315380Smjgstatic bool __always_inline
393327413Smjg__rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
394327413Smjg    LOCK_FILE_LINE_ARG_DEF)
395154941Sjhb{
396315380Smjg
397315380Smjg	/*
398315380Smjg	 * Handle the easy case.  If no other thread has a write
399315380Smjg	 * lock, then try to bump up the count of read locks.  Note
400315380Smjg	 * that we have to preserve the current state of the
401315380Smjg	 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
402315380Smjg	 * read lock, then rw_lock must have changed, so restart
403315380Smjg	 * the loop.  Note that this handles the case of a
404315380Smjg	 * completely unlocked rwlock since such a lock is encoded
405315380Smjg	 * as a read lock with no waiters.
406315380Smjg	 */
407327413Smjg	while (__rw_can_read(td, *vp, fp)) {
408315380Smjg		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
409315380Smjg			*vp + RW_ONE_READER)) {
410315380Smjg			if (LOCK_LOG_TEST(&rw->lock_object, 0))
411315380Smjg				CTR4(KTR_LOCK,
412315380Smjg				    "%s: %p succeed %p -> %p", __func__,
413315380Smjg				    rw, (void *)*vp,
414315380Smjg				    (void *)(*vp + RW_ONE_READER));
415315380Smjg			td->td_rw_rlocks++;
416315380Smjg			return (true);
417315380Smjg		}
418315380Smjg	}
419315380Smjg	return (false);
420315380Smjg}
421315380Smjg
422315380Smjgstatic void __noinline
423327413Smjg__rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
424327413Smjg    LOCK_FILE_LINE_ARG_DEF)
425315380Smjg{
426170295Sjeff	struct turnstile *ts;
427327413Smjg	struct thread *owner;
428167801Sjhb#ifdef ADAPTIVE_RWLOCKS
429177912Sjeff	int spintries = 0;
430327478Smjg	int i, n;
431157851Swkoszek#endif
432189846Sjeff#ifdef LOCK_PROFILING
433167307Sjhb	uint64_t waittime = 0;
434167054Skmacy	int contested = 0;
435189846Sjeff#endif
436303953Smjg#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
437303953Smjg	struct lock_delay_arg lda;
438303953Smjg#endif
439192853Ssson#ifdef KDTRACE_HOOKS
440303953Smjg	u_int sleep_cnt = 0;
441192853Ssson	int64_t sleep_time = 0;
442284297Savg	int64_t all_time = 0;
443192853Ssson#endif
444327413Smjg#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
445327413Smjg	uintptr_t state;
446334437Smjg	int doing_lockprof = 0;
447327413Smjg#endif
448154941Sjhb
449334437Smjg#ifdef KDTRACE_HOOKS
450334437Smjg	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
451334437Smjg		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
452334437Smjg			goto out_lockstat;
453334437Smjg		doing_lockprof = 1;
454334437Smjg		all_time -= lockstat_nsecs(&rw->lock_object);
455334437Smjg		state = v;
456334437Smjg	}
457334437Smjg#endif
458334437Smjg#ifdef LOCK_PROFILING
459334437Smjg	doing_lockprof = 1;
460334437Smjg	state = v;
461334437Smjg#endif
462334437Smjg
463228424Savg	if (SCHEDULER_STOPPED())
464228424Savg		return;
465228424Savg
466303953Smjg#if defined(ADAPTIVE_RWLOCKS)
467303953Smjg	lock_delay_arg_init(&lda, &rw_delay);
468303953Smjg#elif defined(KDTRACE_HOOKS)
469303953Smjg	lock_delay_arg_init(&lda, NULL);
470303953Smjg#endif
471242515Sattilio
472327413Smjg#ifdef HWPMC_HOOKS
473327413Smjg	PMC_SOFT_CALL( , , lock, failed);
474284297Savg#endif
475327413Smjg	lock_profile_obtain_lock_failed(&rw->lock_object,
476327413Smjg	    &contested, &waittime);
477327413Smjg
478154941Sjhb	for (;;) {
479327413Smjg		if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
480315380Smjg			break;
481285706Smarkj#ifdef KDTRACE_HOOKS
482303953Smjg		lda.spin_cnt++;
483285706Smarkj#endif
484154941Sjhb
485173960Sattilio#ifdef ADAPTIVE_RWLOCKS
486154941Sjhb		/*
487173960Sattilio		 * If the owner is running on another CPU, spin until
488173960Sattilio		 * the owner stops running or the state of the lock
489173960Sattilio		 * changes.
490173960Sattilio		 */
491176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
492176017Sjeff			owner = (struct thread *)RW_OWNER(v);
493176017Sjeff			if (TD_IS_RUNNING(owner)) {
494176017Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
495176017Sjeff					CTR3(KTR_LOCK,
496176017Sjeff					    "%s: spinning on %p held by %p",
497176017Sjeff					    __func__, rw, owner);
498274092Sjhb				KTR_STATE1(KTR_SCHED, "thread",
499274092Sjhb				    sched_tdname(curthread), "spinning",
500274092Sjhb				    "lockname:\"%s\"", rw->lock_object.lo_name);
501315341Smjg				do {
502303953Smjg					lock_delay(&lda);
503315341Smjg					v = RW_READ_VALUE(rw);
504315341Smjg					owner = lv_rw_wowner(v);
505315341Smjg				} while (owner != NULL && TD_IS_RUNNING(owner));
506274092Sjhb				KTR_STATE0(KTR_SCHED, "thread",
507274092Sjhb				    sched_tdname(curthread), "running");
508176017Sjeff				continue;
509176017Sjeff			}
510177912Sjeff		} else if (spintries < rowner_retries) {
511177912Sjeff			spintries++;
512274092Sjhb			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
513274092Sjhb			    "spinning", "lockname:\"%s\"",
514274092Sjhb			    rw->lock_object.lo_name);
515327478Smjg			for (i = 0; i < rowner_loops; i += n) {
516327478Smjg				n = RW_READERS(v);
517327478Smjg				lock_delay_spin(n);
518315341Smjg				v = RW_READ_VALUE(rw);
519327413Smjg				if ((v & RW_LOCK_READ) == 0 || __rw_can_read(td, v, false))
520177912Sjeff					break;
521177912Sjeff			}
522259509Sattilio#ifdef KDTRACE_HOOKS
523303953Smjg			lda.spin_cnt += rowner_loops - i;
524259509Sattilio#endif
525274092Sjhb			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
526274092Sjhb			    "running");
527327478Smjg			if (i < rowner_loops)
528177912Sjeff				continue;
529173960Sattilio		}
530173960Sattilio#endif
531173960Sattilio
532173960Sattilio		/*
533154941Sjhb		 * Okay, now it's the hard case.  Some other thread already
534176017Sjeff		 * has a write lock or there are write waiters present,
535176017Sjeff		 * acquire the turnstile lock so we can begin the process
536176017Sjeff		 * of blocking.
537154941Sjhb		 */
538170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
539154941Sjhb
540154941Sjhb		/*
541154941Sjhb		 * The lock might have been released while we spun, so
542176017Sjeff		 * recheck its state and restart the loop if needed.
543154941Sjhb		 */
544315341Smjg		v = RW_READ_VALUE(rw);
545327413Smjgretry_ts:
546327413Smjg		if (__rw_can_read(td, v, false)) {
547170295Sjeff			turnstile_cancel(ts);
548154941Sjhb			continue;
549154941Sjhb		}
550154941Sjhb
551327413Smjg		owner = lv_rw_wowner(v);
552327413Smjg
553173960Sattilio#ifdef ADAPTIVE_RWLOCKS
554154941Sjhb		/*
555193035Sjhb		 * The current lock owner might have started executing
556193035Sjhb		 * on another CPU (or the lock could have changed
557193035Sjhb		 * owners) while we were waiting on the turnstile
558193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
559193035Sjhb		 * again.
560173960Sattilio		 */
561327413Smjg		if (owner != NULL) {
562176017Sjeff			if (TD_IS_RUNNING(owner)) {
563176017Sjeff				turnstile_cancel(ts);
564176017Sjeff				continue;
565176017Sjeff			}
566173960Sattilio		}
567173960Sattilio#endif
568173960Sattilio
569173960Sattilio		/*
570176017Sjeff		 * The lock is held in write mode or it already has waiters.
571154941Sjhb		 */
572327413Smjg		MPASS(!__rw_can_read(td, v, false));
573176017Sjeff
574176017Sjeff		/*
575176017Sjeff		 * If the RW_LOCK_READ_WAITERS flag is already set, then
576176017Sjeff		 * we can go ahead and block.  If it is not set then try
577176017Sjeff		 * to set it.  If we fail to set it drop the turnstile
578176017Sjeff		 * lock and restart the loop.
579176017Sjeff		 */
580176017Sjeff		if (!(v & RW_LOCK_READ_WAITERS)) {
581327413Smjg			if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
582327413Smjg			    v | RW_LOCK_READ_WAITERS))
583327413Smjg				goto retry_ts;
584167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
585157826Sjhb				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
586157826Sjhb				    __func__, rw);
587154941Sjhb		}
588154941Sjhb
589154941Sjhb		/*
590154941Sjhb		 * We were unable to acquire the lock and the read waiters
591154941Sjhb		 * flag is set, so we must block on the turnstile.
592154941Sjhb		 */
593167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
594154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
595154941Sjhb			    rw);
596192853Ssson#ifdef KDTRACE_HOOKS
597285664Smarkj		sleep_time -= lockstat_nsecs(&rw->lock_object);
598192853Ssson#endif
599327413Smjg		MPASS(owner == rw_owner(rw));
600327413Smjg		turnstile_wait(ts, owner, TS_SHARED_QUEUE);
601192853Ssson#ifdef KDTRACE_HOOKS
602285664Smarkj		sleep_time += lockstat_nsecs(&rw->lock_object);
603192853Ssson		sleep_cnt++;
604192853Ssson#endif
605167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
606154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
607154941Sjhb			    __func__, rw);
608315341Smjg		v = RW_READ_VALUE(rw);
609154941Sjhb	}
610327413Smjg#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
611327413Smjg	if (__predict_true(!doing_lockprof))
612327413Smjg		return;
613327413Smjg#endif
614284297Savg#ifdef KDTRACE_HOOKS
615285664Smarkj	all_time += lockstat_nsecs(&rw->lock_object);
616284297Savg	if (sleep_time)
617285703Smarkj		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
618284297Savg		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
619284297Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
620154941Sjhb
621284297Savg	/* Record only the loops spinning and not sleeping. */
622303953Smjg	if (lda.spin_cnt > sleep_cnt)
623285703Smarkj		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
624284297Savg		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
625284297Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
626334437Smjgout_lockstat:
627284297Savg#endif
628154941Sjhb	/*
629154941Sjhb	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
630154941Sjhb	 * however.  turnstiles don't like owners changing between calls to
631154941Sjhb	 * turnstile_wait() currently.
632154941Sjhb	 */
633285704Smarkj	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
634285704Smarkj	    waittime, file, line, LOCKSTAT_READER);
635315380Smjg}
636315380Smjg
637315380Smjgvoid
638327413Smjg__rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
639315380Smjg{
640315380Smjg	struct thread *td;
641315380Smjg	uintptr_t v;
642315380Smjg
643315380Smjg	td = curthread;
644315380Smjg
645320241Smarkj	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
646320241Smarkj	    !TD_IS_IDLETHREAD(td),
647315380Smjg	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
648315380Smjg	    td, rw->lock_object.lo_name, file, line));
649315380Smjg	KASSERT(rw->rw_lock != RW_DESTROYED,
650315380Smjg	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
651315380Smjg	KASSERT(rw_wowner(rw) != td,
652315380Smjg	    ("rw_rlock: wlock already held for %s @ %s:%d",
653315380Smjg	    rw->lock_object.lo_name, file, line));
654315380Smjg	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
655315380Smjg
656315380Smjg	v = RW_READ_VALUE(rw);
657334437Smjg	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
658327413Smjg	    !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
659327413Smjg		__rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
660334437Smjg	else
661334437Smjg		lock_profile_obtain_lock_success(&rw->lock_object, 0, 0,
662334437Smjg		    file, line);
663315380Smjg
664167787Sjhb	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
665167787Sjhb	WITNESS_LOCK(&rw->lock_object, 0, file, line);
666286166Smarkj	TD_LOCKS_INC(curthread);
667154941Sjhb}
668154941Sjhb
669327413Smjgvoid
670327413Smjg__rw_rlock(volatile uintptr_t *c, const char *file, int line)
671327413Smjg{
672327413Smjg	struct rwlock *rw;
673327413Smjg
674327413Smjg	rw = rwlock2rw(c);
675327413Smjg	__rw_rlock_int(rw LOCK_FILE_LINE_ARG);
676327413Smjg}
677327413Smjg
678177843Sattilioint
679327413Smjg__rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
680177843Sattilio{
681177843Sattilio	uintptr_t x;
682177843Sattilio
683228424Savg	if (SCHEDULER_STOPPED())
684228424Savg		return (1);
685228424Savg
686244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
687240424Sattilio	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
688240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
689240424Sattilio
690315394Smjg	x = rw->rw_lock;
691177843Sattilio	for (;;) {
692177843Sattilio		KASSERT(rw->rw_lock != RW_DESTROYED,
693177843Sattilio		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
694177843Sattilio		if (!(x & RW_LOCK_READ))
695177843Sattilio			break;
696315394Smjg		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
697177843Sattilio			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
698177843Sattilio			    line);
699177843Sattilio			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
700285704Smarkj			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
701285704Smarkj			    rw, 0, 0, file, line, LOCKSTAT_READER);
702286166Smarkj			TD_LOCKS_INC(curthread);
703177843Sattilio			curthread->td_rw_rlocks++;
704177843Sattilio			return (1);
705177843Sattilio		}
706177843Sattilio	}
707177843Sattilio
708177843Sattilio	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
709177843Sattilio	return (0);
710177843Sattilio}
711177843Sattilio
712327413Smjgint
713327413Smjg__rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
714327413Smjg{
715327413Smjg	struct rwlock *rw;
716327413Smjg
717327413Smjg	rw = rwlock2rw(c);
718327413Smjg	return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
719327413Smjg}
720327413Smjg
721315380Smjgstatic bool __always_inline
722315380Smjg__rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
723154941Sjhb{
724154941Sjhb
725154941Sjhb	for (;;) {
726154941Sjhb		/*
727154941Sjhb		 * See if there is more than one read lock held.  If so,
728154941Sjhb		 * just drop one and return.
729154941Sjhb		 */
730315380Smjg		if (RW_READERS(*vp) > 1) {
731315380Smjg			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
732315380Smjg			    *vp - RW_ONE_READER)) {
733167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
734154941Sjhb					CTR4(KTR_LOCK,
735154941Sjhb					    "%s: %p succeeded %p -> %p",
736315380Smjg					    __func__, rw, (void *)*vp,
737315380Smjg					    (void *)(*vp - RW_ONE_READER));
738315380Smjg				td->td_rw_rlocks--;
739315380Smjg				return (true);
740154941Sjhb			}
741154941Sjhb			continue;
742167307Sjhb		}
743154941Sjhb		/*
744154941Sjhb		 * If there aren't any waiters for a write lock, then try
745154941Sjhb		 * to drop it quickly.
746154941Sjhb		 */
747315380Smjg		if (!(*vp & RW_LOCK_WAITERS)) {
748315380Smjg			MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
749176017Sjeff			    RW_READERS_LOCK(1));
750315380Smjg			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
751197643Sattilio			    RW_UNLOCKED)) {
752167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
753154941Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
754154941Sjhb					    __func__, rw);
755315380Smjg				td->td_rw_rlocks--;
756315380Smjg				return (true);
757154941Sjhb			}
758154941Sjhb			continue;
759154941Sjhb		}
760315380Smjg		break;
761315380Smjg	}
762315380Smjg	return (false);
763315380Smjg}
764315380Smjg
765315380Smjgstatic void __noinline
766327413Smjg__rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
767327413Smjg    LOCK_FILE_LINE_ARG_DEF)
768315380Smjg{
769315380Smjg	struct turnstile *ts;
770327478Smjg	uintptr_t setv, queue;
771315380Smjg
772315380Smjg	if (SCHEDULER_STOPPED())
773315380Smjg		return;
774315380Smjg
775334437Smjg	if (__rw_runlock_try(rw, td, &v))
776334437Smjg		goto out_lockstat;
777334437Smjg
778334437Smjg	/*
779334437Smjg	 * Ok, we know we have waiters and we think we are the
780334437Smjg	 * last reader, so grab the turnstile lock.
781334437Smjg	 */
782334437Smjg	turnstile_chain_lock(&rw->lock_object);
783334437Smjg	v = RW_READ_VALUE(rw);
784315380Smjg	for (;;) {
785315380Smjg		if (__rw_runlock_try(rw, td, &v))
786315380Smjg			break;
787315380Smjg
788327413Smjg		v &= (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
789176017Sjeff		MPASS(v & RW_LOCK_WAITERS);
790154941Sjhb
791154941Sjhb		/*
792154941Sjhb		 * Try to drop our lock leaving the lock in a unlocked
793154941Sjhb		 * state.
794154941Sjhb		 *
795154941Sjhb		 * If you wanted to do explicit lock handoff you'd have to
796154941Sjhb		 * do it here.  You'd also want to use turnstile_signal()
797154941Sjhb		 * and you'd have to handle the race where a higher
798154941Sjhb		 * priority thread blocks on the write lock before the
799154941Sjhb		 * thread you wakeup actually runs and have the new thread
800154941Sjhb		 * "steal" the lock.  For now it's a lot simpler to just
801154941Sjhb		 * wakeup all of the waiters.
802154941Sjhb		 *
803154941Sjhb		 * As above, if we fail, then another thread might have
804154941Sjhb		 * acquired a read lock, so drop the turnstile lock and
805154941Sjhb		 * restart.
806154941Sjhb		 */
807327478Smjg		setv = RW_UNLOCKED;
808327478Smjg		queue = TS_SHARED_QUEUE;
809176017Sjeff		if (v & RW_LOCK_WRITE_WAITERS) {
810176017Sjeff			queue = TS_EXCLUSIVE_QUEUE;
811327478Smjg			setv |= (v & RW_LOCK_READ_WAITERS);
812327478Smjg		}
813327413Smjg		v |= RW_READERS_LOCK(1);
814327478Smjg		if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
815334437Smjg			continue;
816167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
817154941Sjhb			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
818154941Sjhb			    __func__, rw);
819154941Sjhb
820154941Sjhb		/*
821154941Sjhb		 * Ok.  The lock is released and all that's left is to
822154941Sjhb		 * wake up the waiters.  Note that the lock might not be
823154941Sjhb		 * free anymore, but in that case the writers will just
824154941Sjhb		 * block again if they run before the new lock holder(s)
825154941Sjhb		 * release the lock.
826154941Sjhb		 */
827167787Sjhb		ts = turnstile_lookup(&rw->lock_object);
828157846Sjhb		MPASS(ts != NULL);
829176017Sjeff		turnstile_broadcast(ts, queue);
830154941Sjhb		turnstile_unpend(ts, TS_SHARED_LOCK);
831315380Smjg		td->td_rw_rlocks--;
832154941Sjhb		break;
833154941Sjhb	}
834334437Smjg	turnstile_chain_unlock(&rw->lock_object);
835334437Smjgout_lockstat:
836285704Smarkj	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
837315380Smjg}
838315380Smjg
839315380Smjgvoid
840327413Smjg_rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
841315380Smjg{
842315380Smjg	struct thread *td;
843315380Smjg	uintptr_t v;
844315380Smjg
845315380Smjg	KASSERT(rw->rw_lock != RW_DESTROYED,
846315380Smjg	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
847327413Smjg	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
848315380Smjg	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
849315380Smjg	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
850315380Smjg
851315380Smjg	td = curthread;
852315380Smjg	v = RW_READ_VALUE(rw);
853315380Smjg
854334437Smjg	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
855315380Smjg	    !__rw_runlock_try(rw, td, &v)))
856327413Smjg		__rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
857334437Smjg	else
858334437Smjg		lock_profile_release_lock(&rw->lock_object);
859315380Smjg
860286166Smarkj	TD_LOCKS_DEC(curthread);
861154941Sjhb}
862154941Sjhb
863327413Smjgvoid
864327413Smjg_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
865327413Smjg{
866327413Smjg	struct rwlock *rw;
867327413Smjg
868327413Smjg	rw = rwlock2rw(c);
869327413Smjg	_rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
870327413Smjg}
871327413Smjg
872154941Sjhb/*
873154941Sjhb * This function is called when we are unable to obtain a write lock on the
874154941Sjhb * first try.  This means that at least one other thread holds either a
875154941Sjhb * read or write lock.
876154941Sjhb */
877154941Sjhbvoid
878327413Smjg__rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
879154941Sjhb{
880327413Smjg	uintptr_t tid;
881242515Sattilio	struct rwlock *rw;
882170295Sjeff	struct turnstile *ts;
883327413Smjg	struct thread *owner;
884167801Sjhb#ifdef ADAPTIVE_RWLOCKS
885176017Sjeff	int spintries = 0;
886327478Smjg	int i, n;
887334437Smjg	enum { READERS, WRITER } sleep_reason = READERS;
888157851Swkoszek#endif
889315377Smjg	uintptr_t x;
890189846Sjeff#ifdef LOCK_PROFILING
891171516Sattilio	uint64_t waittime = 0;
892171516Sattilio	int contested = 0;
893189846Sjeff#endif
894303953Smjg#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
895303953Smjg	struct lock_delay_arg lda;
896303953Smjg#endif
897192853Ssson#ifdef KDTRACE_HOOKS
898303953Smjg	u_int sleep_cnt = 0;
899192853Ssson	int64_t sleep_time = 0;
900284297Savg	int64_t all_time = 0;
901192853Ssson#endif
902327413Smjg#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
903327413Smjg	uintptr_t state;
904334437Smjg	int doing_lockprof = 0;
905327413Smjg#endif
906154941Sjhb
907327413Smjg	tid = (uintptr_t)curthread;
908334437Smjg	rw = rwlock2rw(c);
909334437Smjg
910334437Smjg#ifdef KDTRACE_HOOKS
911334437Smjg	if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
912334437Smjg		while (v == RW_UNLOCKED) {
913334437Smjg			if (_rw_write_lock_fetch(rw, &v, tid))
914334437Smjg				goto out_lockstat;
915334437Smjg		}
916334437Smjg		doing_lockprof = 1;
917334437Smjg		all_time -= lockstat_nsecs(&rw->lock_object);
918334437Smjg		state = v;
919334437Smjg	}
920334437Smjg#endif
921334437Smjg#ifdef LOCK_PROFILING
922334437Smjg	doing_lockprof = 1;
923334437Smjg	state = v;
924334437Smjg#endif
925334437Smjg
926228424Savg	if (SCHEDULER_STOPPED())
927228424Savg		return;
928228424Savg
929303953Smjg#if defined(ADAPTIVE_RWLOCKS)
930303953Smjg	lock_delay_arg_init(&lda, &rw_delay);
931303953Smjg#elif defined(KDTRACE_HOOKS)
932303953Smjg	lock_delay_arg_init(&lda, NULL);
933303953Smjg#endif
934315378Smjg	if (__predict_false(v == RW_UNLOCKED))
935315378Smjg		v = RW_READ_VALUE(rw);
936242515Sattilio
937315341Smjg	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
938193307Sattilio		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
939171052Sattilio		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
940171052Sattilio		    __func__, rw->lock_object.lo_name, file, line));
941171052Sattilio		rw->rw_recurse++;
942315379Smjg		atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
943171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
944171052Sattilio			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
945171052Sattilio		return;
946171052Sattilio	}
947171052Sattilio
948167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
949154941Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
950167787Sjhb		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
951154941Sjhb
952327413Smjg#ifdef HWPMC_HOOKS
953327413Smjg	PMC_SOFT_CALL( , , lock, failed);
954327413Smjg#endif
955327413Smjg	lock_profile_obtain_lock_failed(&rw->lock_object,
956327413Smjg	    &contested, &waittime);
957327413Smjg
958301157Smjg	for (;;) {
959315341Smjg		if (v == RW_UNLOCKED) {
960315377Smjg			if (_rw_write_lock_fetch(rw, &v, tid))
961315341Smjg				break;
962315341Smjg			continue;
963315341Smjg		}
964192853Ssson#ifdef KDTRACE_HOOKS
965303953Smjg		lda.spin_cnt++;
966192853Ssson#endif
967327413Smjg
968173960Sattilio#ifdef ADAPTIVE_RWLOCKS
969173960Sattilio		/*
970173960Sattilio		 * If the lock is write locked and the owner is
971173960Sattilio		 * running on another CPU, spin until the owner stops
972173960Sattilio		 * running or the state of the lock changes.
973173960Sattilio		 */
974334437Smjg		if (!(v & RW_LOCK_READ)) {
975334437Smjg			sleep_reason = WRITER;
976334437Smjg			owner = lv_rw_wowner(v);
977334437Smjg			if (!TD_IS_RUNNING(owner))
978334437Smjg				goto ts;
979173960Sattilio			if (LOCK_LOG_TEST(&rw->lock_object, 0))
980173960Sattilio				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
981173960Sattilio				    __func__, rw, owner);
982274092Sjhb			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
983274092Sjhb			    "spinning", "lockname:\"%s\"",
984274092Sjhb			    rw->lock_object.lo_name);
985315341Smjg			do {
986303953Smjg				lock_delay(&lda);
987315341Smjg				v = RW_READ_VALUE(rw);
988315341Smjg				owner = lv_rw_wowner(v);
989315341Smjg			} while (owner != NULL && TD_IS_RUNNING(owner));
990274092Sjhb			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
991274092Sjhb			    "running");
992173960Sattilio			continue;
993334437Smjg		} else if (RW_READERS(v) > 0) {
994334437Smjg			sleep_reason = READERS;
995334437Smjg			if (spintries == rowner_retries)
996334437Smjg				goto ts;
997176017Sjeff			if (!(v & RW_LOCK_WRITE_SPINNER)) {
998327413Smjg				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
999176017Sjeff				    v | RW_LOCK_WRITE_SPINNER)) {
1000176017Sjeff					continue;
1001176017Sjeff				}
1002176017Sjeff			}
1003176017Sjeff			spintries++;
1004274092Sjhb			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
1005274092Sjhb			    "spinning", "lockname:\"%s\"",
1006274092Sjhb			    rw->lock_object.lo_name);
1007327478Smjg			for (i = 0; i < rowner_loops; i += n) {
1008327478Smjg				n = RW_READERS(v);
1009327478Smjg				lock_delay_spin(n);
1010327413Smjg				v = RW_READ_VALUE(rw);
1011327413Smjg				if ((v & RW_LOCK_WRITE_SPINNER) == 0)
1012176017Sjeff					break;
1013176017Sjeff			}
1014334437Smjg#ifdef KDTRACE_HOOKS
1015334437Smjg			lda.spin_cnt += i;
1016334437Smjg#endif
1017274092Sjhb			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
1018274092Sjhb			    "running");
1019327478Smjg			if (i < rowner_loops)
1020176017Sjeff				continue;
1021176017Sjeff		}
1022334437Smjgts:
1023173960Sattilio#endif
1024170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
1025315341Smjg		v = RW_READ_VALUE(rw);
1026327413Smjgretry_ts:
1027327413Smjg		owner = lv_rw_wowner(v);
1028154941Sjhb
1029173960Sattilio#ifdef ADAPTIVE_RWLOCKS
1030154941Sjhb		/*
1031193035Sjhb		 * The current lock owner might have started executing
1032193035Sjhb		 * on another CPU (or the lock could have changed
1033193035Sjhb		 * owners) while we were waiting on the turnstile
1034193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
1035193035Sjhb		 * again.
1036173960Sattilio		 */
1037327413Smjg		if (owner != NULL) {
1038173960Sattilio			if (TD_IS_RUNNING(owner)) {
1039173960Sattilio				turnstile_cancel(ts);
1040173960Sattilio				continue;
1041173960Sattilio			}
1042334437Smjg		} else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
1043327478Smjg			turnstile_cancel(ts);
1044327478Smjg			continue;
1045173960Sattilio		}
1046173960Sattilio#endif
1047173960Sattilio		/*
1048179334Sattilio		 * Check for the waiters flags about this rwlock.
1049179334Sattilio		 * If the lock was released, without maintain any pending
1050179334Sattilio		 * waiters queue, simply try to acquire it.
1051179334Sattilio		 * If a pending waiters queue is present, claim the lock
1052179334Sattilio		 * ownership and maintain the pending queue.
1053154941Sjhb		 */
1054176017Sjeff		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
1055176017Sjeff		if ((v & ~x) == RW_UNLOCKED) {
1056176017Sjeff			x &= ~RW_LOCK_WRITE_SPINNER;
1057327413Smjg			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | x)) {
1058176017Sjeff				if (x)
1059176017Sjeff					turnstile_claim(ts);
1060176017Sjeff				else
1061176017Sjeff					turnstile_cancel(ts);
1062154941Sjhb				break;
1063154941Sjhb			}
1064327413Smjg			goto retry_ts;
1065154941Sjhb		}
1066154941Sjhb		/*
1067154941Sjhb		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
1068154941Sjhb		 * set it.  If we fail to set it, then loop back and try
1069154941Sjhb		 * again.
1070154941Sjhb		 */
1071157826Sjhb		if (!(v & RW_LOCK_WRITE_WAITERS)) {
1072327413Smjg			if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
1073327413Smjg			    v | RW_LOCK_WRITE_WAITERS))
1074327413Smjg				goto retry_ts;
1075167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
1076157826Sjhb				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
1077157826Sjhb				    __func__, rw);
1078154941Sjhb		}
1079157846Sjhb		/*
1080154941Sjhb		 * We were unable to acquire the lock and the write waiters
1081154941Sjhb		 * flag is set, so we must block on the turnstile.
1082154941Sjhb		 */
1083167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
1084154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
1085154941Sjhb			    rw);
1086192853Ssson#ifdef KDTRACE_HOOKS
1087285664Smarkj		sleep_time -= lockstat_nsecs(&rw->lock_object);
1088192853Ssson#endif
1089327413Smjg		MPASS(owner == rw_owner(rw));
1090327413Smjg		turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
1091192853Ssson#ifdef KDTRACE_HOOKS
1092285664Smarkj		sleep_time += lockstat_nsecs(&rw->lock_object);
1093192853Ssson		sleep_cnt++;
1094192853Ssson#endif
1095167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
1096154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
1097154941Sjhb			    __func__, rw);
1098176017Sjeff#ifdef ADAPTIVE_RWLOCKS
1099176017Sjeff		spintries = 0;
1100176017Sjeff#endif
1101315341Smjg		v = RW_READ_VALUE(rw);
1102154941Sjhb	}
1103327413Smjg#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
1104327413Smjg	if (__predict_true(!doing_lockprof))
1105327413Smjg		return;
1106327413Smjg#endif
1107192853Ssson#ifdef KDTRACE_HOOKS
1108285664Smarkj	all_time += lockstat_nsecs(&rw->lock_object);
1109192853Ssson	if (sleep_time)
1110285703Smarkj		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
1111284297Savg		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
1112284297Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
1113192853Ssson
1114284297Savg	/* Record only the loops spinning and not sleeping. */
1115303953Smjg	if (lda.spin_cnt > sleep_cnt)
1116285703Smarkj		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
1117303953Smjg		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
1118284297Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
1119334437Smjgout_lockstat:
1120192853Ssson#endif
1121285704Smarkj	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
1122285704Smarkj	    waittime, file, line, LOCKSTAT_WRITER);
1123154941Sjhb}
1124154941Sjhb
1125154941Sjhb/*
1126315382Smjg * This function is called if lockstat is active or the first try at releasing
1127315382Smjg * a write lock failed.  The latter means that the lock is recursed or one of
1128315382Smjg * the 2 waiter bits must be set indicating that at least one thread is waiting
1129315382Smjg * on this lock.
1130154941Sjhb */
1131154941Sjhbvoid
1132327413Smjg__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
1133154941Sjhb{
1134242515Sattilio	struct rwlock *rw;
1135154941Sjhb	struct turnstile *ts;
1136327413Smjg	uintptr_t tid, setv;
1137154941Sjhb	int queue;
1138154941Sjhb
1139327413Smjg	tid = (uintptr_t)curthread;
1140228424Savg	if (SCHEDULER_STOPPED())
1141228424Savg		return;
1142228424Savg
1143242515Sattilio	rw = rwlock2rw(c);
1144327413Smjg	if (__predict_false(v == tid))
1145327413Smjg		v = RW_READ_VALUE(rw);
1146327413Smjg
1147315382Smjg	if (v & RW_LOCK_WRITER_RECURSED) {
1148315379Smjg		if (--(rw->rw_recurse) == 0)
1149315379Smjg			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
1150315382Smjg		if (LOCK_LOG_TEST(&rw->lock_object, 0))
1151315382Smjg			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
1152171052Sattilio		return;
1153315379Smjg	}
1154171052Sattilio
1155315382Smjg	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
1156315382Smjg	if (v == tid && _rw_write_unlock(rw, tid))
1157315382Smjg		return;
1158315382Smjg
1159154941Sjhb	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
1160154941Sjhb	    ("%s: neither of the waiter flags are set", __func__));
1161154941Sjhb
1162167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
1163154941Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
1164154941Sjhb
1165170295Sjeff	turnstile_chain_lock(&rw->lock_object);
1166154941Sjhb
1167154941Sjhb	/*
1168154941Sjhb	 * Use the same algo as sx locks for now.  Prefer waking up shared
1169154941Sjhb	 * waiters if we have any over writers.  This is probably not ideal.
1170154941Sjhb	 *
1171154941Sjhb	 * 'v' is the value we are going to write back to rw_lock.  If we
1172154941Sjhb	 * have waiters on both queues, we need to preserve the state of
1173154941Sjhb	 * the waiter flag for the queue we don't wake up.  For now this is
1174154941Sjhb	 * hardcoded for the algorithm mentioned above.
1175154941Sjhb	 *
1176154941Sjhb	 * In the case of both readers and writers waiting we wakeup the
1177154941Sjhb	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
1178154941Sjhb	 * new writer comes in before a reader it will claim the lock up
1179154941Sjhb	 * above.  There is probably a potential priority inversion in
1180154941Sjhb	 * there that could be worked around either by waking both queues
1181154941Sjhb	 * of waiters or doing some complicated lock handoff gymnastics.
1182154941Sjhb	 */
1183327413Smjg	setv = RW_UNLOCKED;
1184327413Smjg	v = RW_READ_VALUE(rw);
1185327413Smjg	queue = TS_SHARED_QUEUE;
1186327413Smjg	if (v & RW_LOCK_WRITE_WAITERS) {
1187176076Sjeff		queue = TS_EXCLUSIVE_QUEUE;
1188327413Smjg		setv |= (v & RW_LOCK_READ_WAITERS);
1189327413Smjg	}
1190327413Smjg	atomic_store_rel_ptr(&rw->rw_lock, setv);
1191157846Sjhb
1192157846Sjhb	/* Wake up all waiters for the specific queue. */
1193167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
1194154941Sjhb		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
1195154941Sjhb		    queue == TS_SHARED_QUEUE ? "read" : "write");
1196327413Smjg
1197327413Smjg	ts = turnstile_lookup(&rw->lock_object);
1198327413Smjg	MPASS(ts != NULL);
1199154941Sjhb	turnstile_broadcast(ts, queue);
1200154941Sjhb	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1201170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
1202154941Sjhb}
1203154941Sjhb
1204157882Sjhb/*
1205157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write
1206157882Sjhb * lock.  This will only succeed if this thread holds a single read
1207157882Sjhb * lock.  Returns true if the upgrade succeeded and false otherwise.
1208157882Sjhb */
1209157882Sjhbint
1210327413Smjg__rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
1211157882Sjhb{
1212334437Smjg	uintptr_t v, setv, tid;
1213170295Sjeff	struct turnstile *ts;
1214157882Sjhb	int success;
1215157882Sjhb
1216228424Savg	if (SCHEDULER_STOPPED())
1217228424Savg		return (1);
1218228424Savg
1219169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
1220169394Sjhb	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
1221327413Smjg	__rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
1222157882Sjhb
1223157882Sjhb	/*
1224157882Sjhb	 * Attempt to switch from one reader to a writer.  If there
1225157882Sjhb	 * are any write waiters, then we will have to lock the
1226157882Sjhb	 * turnstile first to prevent races with another writer
1227157882Sjhb	 * calling turnstile_wait() before we have claimed this
1228157882Sjhb	 * turnstile.  So, do the simple case of no waiters first.
1229157882Sjhb	 */
1230157882Sjhb	tid = (uintptr_t)curthread;
1231176017Sjeff	success = 0;
1232334437Smjg	v = RW_READ_VALUE(rw);
1233176017Sjeff	for (;;) {
1234176017Sjeff		if (RW_READERS(v) > 1)
1235176017Sjeff			break;
1236176017Sjeff		if (!(v & RW_LOCK_WAITERS)) {
1237334437Smjg			success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
1238176017Sjeff			if (!success)
1239176017Sjeff				continue;
1240176017Sjeff			break;
1241176017Sjeff		}
1242157882Sjhb
1243176017Sjeff		/*
1244176017Sjeff		 * Ok, we think we have waiters, so lock the turnstile.
1245176017Sjeff		 */
1246176017Sjeff		ts = turnstile_trywait(&rw->lock_object);
1247334437Smjg		v = RW_READ_VALUE(rw);
1248334437Smjgretry_ts:
1249176017Sjeff		if (RW_READERS(v) > 1) {
1250176017Sjeff			turnstile_cancel(ts);
1251176017Sjeff			break;
1252176017Sjeff		}
1253176017Sjeff		/*
1254176017Sjeff		 * Try to switch from one reader to a writer again.  This time
1255176017Sjeff		 * we honor the current state of the waiters flags.
1256176017Sjeff		 * If we obtain the lock with the flags set, then claim
1257176017Sjeff		 * ownership of the turnstile.
1258176017Sjeff		 */
1259334437Smjg		setv = tid | (v & RW_LOCK_WAITERS);
1260334437Smjg		success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
1261176017Sjeff		if (success) {
1262334437Smjg			if (v & RW_LOCK_WAITERS)
1263176017Sjeff				turnstile_claim(ts);
1264176017Sjeff			else
1265176017Sjeff				turnstile_cancel(ts);
1266176017Sjeff			break;
1267176017Sjeff		}
1268334437Smjg		goto retry_ts;
1269176017Sjeff	}
1270167787Sjhb	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
1271176017Sjeff	if (success) {
1272176017Sjeff		curthread->td_rw_rlocks--;
1273167787Sjhb		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
1274157882Sjhb		    file, line);
1275285703Smarkj		LOCKSTAT_RECORD0(rw__upgrade, rw);
1276176017Sjeff	}
1277157882Sjhb	return (success);
1278157882Sjhb}
1279157882Sjhb
1280327413Smjgint
1281327413Smjg__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
1282327413Smjg{
1283327413Smjg	struct rwlock *rw;
1284327413Smjg
1285327413Smjg	rw = rwlock2rw(c);
1286327413Smjg	return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
1287327413Smjg}
1288327413Smjg
1289157882Sjhb/*
1290157882Sjhb * Downgrade a write lock into a single read lock.
1291157882Sjhb */
1292157882Sjhbvoid
1293327413Smjg__rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
1294157882Sjhb{
1295157882Sjhb	struct turnstile *ts;
1296157882Sjhb	uintptr_t tid, v;
1297176017Sjeff	int rwait, wwait;
1298157882Sjhb
1299228424Savg	if (SCHEDULER_STOPPED())
1300228424Savg		return;
1301228424Savg
1302169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
1303169394Sjhb	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
1304327413Smjg	__rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
1305171052Sattilio#ifndef INVARIANTS
1306171052Sattilio	if (rw_recursed(rw))
1307171052Sattilio		panic("downgrade of a recursed lock");
1308171052Sattilio#endif
1309157882Sjhb
1310167787Sjhb	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
1311157882Sjhb
1312157882Sjhb	/*
1313157882Sjhb	 * Convert from a writer to a single reader.  First we handle
1314157882Sjhb	 * the easy case with no waiters.  If there are any waiters, we
1315176017Sjeff	 * lock the turnstile and "disown" the lock.
1316157882Sjhb	 */
1317157882Sjhb	tid = (uintptr_t)curthread;
1318157882Sjhb	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
1319157882Sjhb		goto out;
1320157882Sjhb
1321157882Sjhb	/*
1322157882Sjhb	 * Ok, we think we have waiters, so lock the turnstile so we can
1323157882Sjhb	 * read the waiter flags without any races.
1324157882Sjhb	 */
1325170295Sjeff	turnstile_chain_lock(&rw->lock_object);
1326176017Sjeff	v = rw->rw_lock & RW_LOCK_WAITERS;
1327176017Sjeff	rwait = v & RW_LOCK_READ_WAITERS;
1328176017Sjeff	wwait = v & RW_LOCK_WRITE_WAITERS;
1329176017Sjeff	MPASS(rwait | wwait);
1330157882Sjhb
1331157882Sjhb	/*
1332176017Sjeff	 * Downgrade from a write lock while preserving waiters flag
1333176017Sjeff	 * and give up ownership of the turnstile.
1334157882Sjhb	 */
1335167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
1336157882Sjhb	MPASS(ts != NULL);
1337176017Sjeff	if (!wwait)
1338176017Sjeff		v &= ~RW_LOCK_READ_WAITERS;
1339176017Sjeff	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
1340176017Sjeff	/*
1341176017Sjeff	 * Wake other readers if there are no writers pending.  Otherwise they
1342176017Sjeff	 * won't be able to acquire the lock anyway.
1343176017Sjeff	 */
1344176017Sjeff	if (rwait && !wwait) {
1345157882Sjhb		turnstile_broadcast(ts, TS_SHARED_QUEUE);
1346157882Sjhb		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1347176017Sjeff	} else
1348157882Sjhb		turnstile_disown(ts);
1349170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
1350157882Sjhbout:
1351176017Sjeff	curthread->td_rw_rlocks++;
1352167787Sjhb	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1353285703Smarkj	LOCKSTAT_RECORD0(rw__downgrade, rw);
1354157882Sjhb}
1355157882Sjhb
1356327413Smjgvoid
1357327413Smjg__rw_downgrade(volatile uintptr_t *c, const char *file, int line)
1358327413Smjg{
1359327413Smjg	struct rwlock *rw;
1360327413Smjg
1361327413Smjg	rw = rwlock2rw(c);
1362327413Smjg	__rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
1363327413Smjg}
1364327413Smjg
1365154941Sjhb#ifdef INVARIANT_SUPPORT
1366155162Sscottl#ifndef INVARIANTS
1367242515Sattilio#undef __rw_assert
1368154941Sjhb#endif
1369154941Sjhb
1370154941Sjhb/*
1371154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least
1372154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this*
1373154941Sjhb * thread owns an rlock.
1374154941Sjhb */
1375154941Sjhbvoid
1376242515Sattilio__rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
1377154941Sjhb{
1378242515Sattilio	const struct rwlock *rw;
1379154941Sjhb
1380341100Svangyzen	if (SCHEDULER_STOPPED())
1381154941Sjhb		return;
1382242515Sattilio
1383242515Sattilio	rw = rwlock2rw(c);
1384242515Sattilio
1385154941Sjhb	switch (what) {
1386154941Sjhb	case RA_LOCKED:
1387171052Sattilio	case RA_LOCKED | RA_RECURSED:
1388171052Sattilio	case RA_LOCKED | RA_NOTRECURSED:
1389154941Sjhb	case RA_RLOCKED:
1390251323Sjhb	case RA_RLOCKED | RA_RECURSED:
1391251323Sjhb	case RA_RLOCKED | RA_NOTRECURSED:
1392154941Sjhb#ifdef WITNESS
1393167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1394154941Sjhb#else
1395154941Sjhb		/*
1396154941Sjhb		 * If some other thread has a write lock or we have one
1397154941Sjhb		 * and are asserting a read lock, fail.  Also, if no one
1398154941Sjhb		 * has a lock at all, fail.
1399154941Sjhb		 */
1400155061Sscottl		if (rw->rw_lock == RW_UNLOCKED ||
1401251323Sjhb		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
1402157826Sjhb		    rw_wowner(rw) != curthread)))
1403154941Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
1404251323Sjhb			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
1405154941Sjhb			    "read " : "", file, line);
1406171052Sattilio
1407251323Sjhb		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
1408171052Sattilio			if (rw_recursed(rw)) {
1409171052Sattilio				if (what & RA_NOTRECURSED)
1410171052Sattilio					panic("Lock %s recursed @ %s:%d\n",
1411171052Sattilio					    rw->lock_object.lo_name, file,
1412171052Sattilio					    line);
1413171052Sattilio			} else if (what & RA_RECURSED)
1414171052Sattilio				panic("Lock %s not recursed @ %s:%d\n",
1415171052Sattilio				    rw->lock_object.lo_name, file, line);
1416171052Sattilio		}
1417154941Sjhb#endif
1418154941Sjhb		break;
1419154941Sjhb	case RA_WLOCKED:
1420171052Sattilio	case RA_WLOCKED | RA_RECURSED:
1421171052Sattilio	case RA_WLOCKED | RA_NOTRECURSED:
1422157826Sjhb		if (rw_wowner(rw) != curthread)
1423154941Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
1424167787Sjhb			    rw->lock_object.lo_name, file, line);
1425171052Sattilio		if (rw_recursed(rw)) {
1426171052Sattilio			if (what & RA_NOTRECURSED)
1427171052Sattilio				panic("Lock %s recursed @ %s:%d\n",
1428171052Sattilio				    rw->lock_object.lo_name, file, line);
1429171052Sattilio		} else if (what & RA_RECURSED)
1430171052Sattilio			panic("Lock %s not recursed @ %s:%d\n",
1431171052Sattilio			    rw->lock_object.lo_name, file, line);
1432154941Sjhb		break;
1433154941Sjhb	case RA_UNLOCKED:
1434154941Sjhb#ifdef WITNESS
1435167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1436154941Sjhb#else
1437154941Sjhb		/*
1438154941Sjhb		 * If we hold a write lock fail.  We can't reliably check
1439154941Sjhb		 * to see if we hold a read lock or not.
1440154941Sjhb		 */
1441157826Sjhb		if (rw_wowner(rw) == curthread)
1442154941Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
1443167787Sjhb			    rw->lock_object.lo_name, file, line);
1444154941Sjhb#endif
1445154941Sjhb		break;
1446154941Sjhb	default:
1447154941Sjhb		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1448154941Sjhb		    line);
1449154941Sjhb	}
1450154941Sjhb}
1451154941Sjhb#endif /* INVARIANT_SUPPORT */
1452154941Sjhb
1453154941Sjhb#ifdef DDB
1454154941Sjhbvoid
1455227588Spjddb_show_rwlock(const struct lock_object *lock)
1456154941Sjhb{
1457227588Spjd	const struct rwlock *rw;
1458154941Sjhb	struct thread *td;
1459154941Sjhb
1460227588Spjd	rw = (const struct rwlock *)lock;
1461154941Sjhb
1462154941Sjhb	db_printf(" state: ");
1463154941Sjhb	if (rw->rw_lock == RW_UNLOCKED)
1464154941Sjhb		db_printf("UNLOCKED\n");
1465169394Sjhb	else if (rw->rw_lock == RW_DESTROYED) {
1466169394Sjhb		db_printf("DESTROYED\n");
1467169394Sjhb		return;
1468169394Sjhb	} else if (rw->rw_lock & RW_LOCK_READ)
1469167504Sjhb		db_printf("RLOCK: %ju locks\n",
1470167504Sjhb		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1471154941Sjhb	else {
1472157826Sjhb		td = rw_wowner(rw);
1473154941Sjhb		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1474173600Sjulian		    td->td_tid, td->td_proc->p_pid, td->td_name);
1475171052Sattilio		if (rw_recursed(rw))
1476171052Sattilio			db_printf(" recursed: %u\n", rw->rw_recurse);
1477154941Sjhb	}
1478154941Sjhb	db_printf(" waiters: ");
1479154941Sjhb	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1480154941Sjhb	case RW_LOCK_READ_WAITERS:
1481154941Sjhb		db_printf("readers\n");
1482154941Sjhb		break;
1483154941Sjhb	case RW_LOCK_WRITE_WAITERS:
1484154941Sjhb		db_printf("writers\n");
1485154941Sjhb		break;
1486154941Sjhb	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1487167492Sjhb		db_printf("readers and writers\n");
1488154941Sjhb		break;
1489154941Sjhb	default:
1490154941Sjhb		db_printf("none\n");
1491154941Sjhb		break;
1492154941Sjhb	}
1493154941Sjhb}
1494154941Sjhb
1495154941Sjhb#endif
1496