1154941Sjhb/*-
2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3154941Sjhb * All rights reserved.
4154941Sjhb *
5154941Sjhb * Redistribution and use in source and binary forms, with or without
6154941Sjhb * modification, are permitted provided that the following conditions
7154941Sjhb * are met:
8154941Sjhb * 1. Redistributions of source code must retain the above copyright
9154941Sjhb *    notice, this list of conditions and the following disclaimer.
10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11154941Sjhb *    notice, this list of conditions and the following disclaimer in the
12154941Sjhb *    documentation and/or other materials provided with the distribution.
13154941Sjhb *
14154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17154941Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24154941Sjhb * SUCH DAMAGE.
25154941Sjhb */
26154941Sjhb
27154941Sjhb/*
28154941Sjhb * Machine independent bits of reader/writer lock implementation.
29154941Sjhb */
30154941Sjhb
31154941Sjhb#include <sys/cdefs.h>
32154941Sjhb__FBSDID("$FreeBSD: releng/10.2/sys/kern/kern_rwlock.c 285759 2015-07-21 17:16:37Z markj $");
33154941Sjhb
34154941Sjhb#include "opt_ddb.h"
35233628Sfabient#include "opt_hwpmc_hooks.h"
36192853Ssson#include "opt_kdtrace.h"
37167801Sjhb#include "opt_no_adaptive_rwlocks.h"
38154941Sjhb
39154941Sjhb#include <sys/param.h>
40244582Sattilio#include <sys/kdb.h>
41154941Sjhb#include <sys/ktr.h>
42177912Sjeff#include <sys/kernel.h>
43154941Sjhb#include <sys/lock.h>
44154941Sjhb#include <sys/mutex.h>
45154941Sjhb#include <sys/proc.h>
46154941Sjhb#include <sys/rwlock.h>
47278694Ssbruno#include <sys/sched.h>
48177912Sjeff#include <sys/sysctl.h>
49154941Sjhb#include <sys/systm.h>
50154941Sjhb#include <sys/turnstile.h>
51171516Sattilio
52154941Sjhb#include <machine/cpu.h>
53154941Sjhb
54167801Sjhb#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
55167801Sjhb#define	ADAPTIVE_RWLOCKS
56167801Sjhb#endif
57167801Sjhb
58233628Sfabient#ifdef HWPMC_HOOKS
59233628Sfabient#include <sys/pmckern.h>
60233628SfabientPMC_SOFT_DECLARE( , , lock, failed);
61233628Sfabient#endif
62233628Sfabient
63242515Sattilio/*
64242515Sattilio * Return the rwlock address when the lock cookie address is provided.
65242515Sattilio * This functionality assumes that struct rwlock* have a member named rw_lock.
66242515Sattilio */
67242515Sattilio#define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
68242515Sattilio
69177912Sjeff#ifdef ADAPTIVE_RWLOCKS
70177912Sjeffstatic int rowner_retries = 10;
71177912Sjeffstatic int rowner_loops = 10000;
72227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
73227309Sed    "rwlock debugging");
74177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
75177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
76177912Sjeff#endif
77177912Sjeff
78154941Sjhb#ifdef DDB
79154941Sjhb#include <ddb/ddb.h>
80154941Sjhb
81227588Spjdstatic void	db_show_rwlock(const struct lock_object *lock);
82154941Sjhb#endif
83227588Spjdstatic void	assert_rw(const struct lock_object *lock, int what);
84255745Sdavidestatic void	lock_rw(struct lock_object *lock, uintptr_t how);
85192853Ssson#ifdef KDTRACE_HOOKS
86227588Spjdstatic int	owner_rw(const struct lock_object *lock, struct thread **owner);
87192853Ssson#endif
88255745Sdavidestatic uintptr_t unlock_rw(struct lock_object *lock);
89154941Sjhb
90154941Sjhbstruct lock_class lock_class_rw = {
91167365Sjhb	.lc_name = "rw",
92167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
93173733Sattilio	.lc_assert = assert_rw,
94154941Sjhb#ifdef DDB
95167365Sjhb	.lc_ddb_show = db_show_rwlock,
96154941Sjhb#endif
97167368Sjhb	.lc_lock = lock_rw,
98167368Sjhb	.lc_unlock = unlock_rw,
99192853Ssson#ifdef KDTRACE_HOOKS
100192853Ssson	.lc_owner = owner_rw,
101192853Ssson#endif
102154941Sjhb};
103154941Sjhb
104157826Sjhb/*
105157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or
106157826Sjhb * NULL if the lock is unlocked or read-locked.
107157826Sjhb */
108157826Sjhb#define	rw_wowner(rw)							\
109154941Sjhb	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
110154941Sjhb	    (struct thread *)RW_OWNER((rw)->rw_lock))
111154941Sjhb
112157826Sjhb/*
113171052Sattilio * Returns if a write owner is recursed.  Write ownership is not assured
114171052Sattilio * here and should be previously checked.
115171052Sattilio */
116171052Sattilio#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
117171052Sattilio
118171052Sattilio/*
119171052Sattilio * Return true if curthread helds the lock.
120171052Sattilio */
121171052Sattilio#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
122171052Sattilio
123171052Sattilio/*
124157826Sjhb * Return a pointer to the owning thread for this lock who should receive
125157826Sjhb * any priority lent by threads that block on this lock.  Currently this
126157826Sjhb * is identical to rw_wowner().
127157826Sjhb */
128157826Sjhb#define	rw_owner(rw)		rw_wowner(rw)
129157826Sjhb
130154941Sjhb#ifndef INVARIANTS
131242515Sattilio#define	__rw_assert(c, what, file, line)
132154941Sjhb#endif
133154941Sjhb
134154941Sjhbvoid
135227588Spjdassert_rw(const struct lock_object *lock, int what)
136173733Sattilio{
137173733Sattilio
138227588Spjd	rw_assert((const struct rwlock *)lock, what);
139173733Sattilio}
140173733Sattilio
141173733Sattiliovoid
142255745Sdavidelock_rw(struct lock_object *lock, uintptr_t how)
143167368Sjhb{
144167368Sjhb	struct rwlock *rw;
145167368Sjhb
146167368Sjhb	rw = (struct rwlock *)lock;
147167368Sjhb	if (how)
148255788Sdavide		rw_rlock(rw);
149255788Sdavide	else
150167368Sjhb		rw_wlock(rw);
151167368Sjhb}
152167368Sjhb
153255745Sdavideuintptr_t
154167368Sjhbunlock_rw(struct lock_object *lock)
155167368Sjhb{
156167368Sjhb	struct rwlock *rw;
157167368Sjhb
158167368Sjhb	rw = (struct rwlock *)lock;
159167368Sjhb	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
160167368Sjhb	if (rw->rw_lock & RW_LOCK_READ) {
161167368Sjhb		rw_runlock(rw);
162255788Sdavide		return (1);
163167368Sjhb	} else {
164167368Sjhb		rw_wunlock(rw);
165255788Sdavide		return (0);
166167368Sjhb	}
167167368Sjhb}
168167368Sjhb
169192853Ssson#ifdef KDTRACE_HOOKS
170192853Sssonint
171227588Spjdowner_rw(const struct lock_object *lock, struct thread **owner)
172192853Ssson{
173227588Spjd	const struct rwlock *rw = (const struct rwlock *)lock;
174192853Ssson	uintptr_t x = rw->rw_lock;
175192853Ssson
176192853Ssson	*owner = rw_wowner(rw);
177192853Ssson	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
178192853Ssson	    (*owner != NULL));
179192853Ssson}
180192853Ssson#endif
181192853Ssson
182167368Sjhbvoid
183242515Sattilio_rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
184154941Sjhb{
185242515Sattilio	struct rwlock *rw;
186171052Sattilio	int flags;
187154941Sjhb
188242515Sattilio	rw = rwlock2rw(c);
189242515Sattilio
190171052Sattilio	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
191171052Sattilio	    RW_RECURSE)) == 0);
192196334Sattilio	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
193196334Sattilio	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
194196334Sattilio	    &rw->rw_lock));
195171052Sattilio
196193307Sattilio	flags = LO_UPGRADABLE;
197171052Sattilio	if (opts & RW_DUPOK)
198171052Sattilio		flags |= LO_DUPOK;
199171052Sattilio	if (opts & RW_NOPROFILE)
200171052Sattilio		flags |= LO_NOPROFILE;
201171052Sattilio	if (!(opts & RW_NOWITNESS))
202171052Sattilio		flags |= LO_WITNESS;
203193307Sattilio	if (opts & RW_RECURSE)
204193307Sattilio		flags |= LO_RECURSABLE;
205171052Sattilio	if (opts & RW_QUIET)
206171052Sattilio		flags |= LO_QUIET;
207171052Sattilio
208252212Sjhb	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
209154941Sjhb	rw->rw_lock = RW_UNLOCKED;
210171052Sattilio	rw->rw_recurse = 0;
211154941Sjhb}
212154941Sjhb
213154941Sjhbvoid
214242515Sattilio_rw_destroy(volatile uintptr_t *c)
215154941Sjhb{
216242515Sattilio	struct rwlock *rw;
217154941Sjhb
218242515Sattilio	rw = rwlock2rw(c);
219242515Sattilio
220205626Sbz	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
221205626Sbz	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
222169394Sjhb	rw->rw_lock = RW_DESTROYED;
223167787Sjhb	lock_destroy(&rw->lock_object);
224154941Sjhb}
225154941Sjhb
226154941Sjhbvoid
227154941Sjhbrw_sysinit(void *arg)
228154941Sjhb{
229154941Sjhb	struct rw_args *args = arg;
230154941Sjhb
231242515Sattilio	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
232154941Sjhb}
233154941Sjhb
234185778Skmacyvoid
235185778Skmacyrw_sysinit_flags(void *arg)
236185778Skmacy{
237185778Skmacy	struct rw_args_flags *args = arg;
238185778Skmacy
239242515Sattilio	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
240242515Sattilio	    args->ra_flags);
241185778Skmacy}
242185778Skmacy
243167024Srwatsonint
244242515Sattilio_rw_wowned(const volatile uintptr_t *c)
245167024Srwatson{
246167024Srwatson
247242515Sattilio	return (rw_wowner(rwlock2rw(c)) == curthread);
248167024Srwatson}
249167024Srwatson
250154941Sjhbvoid
251242515Sattilio_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
252154941Sjhb{
253242515Sattilio	struct rwlock *rw;
254154941Sjhb
255228424Savg	if (SCHEDULER_STOPPED())
256228424Savg		return;
257242515Sattilio
258242515Sattilio	rw = rwlock2rw(c);
259242515Sattilio
260244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
261240424Sattilio	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
262240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
263169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
264169394Sjhb	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
265167787Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
266182914Sjhb	    line, NULL);
267154941Sjhb	__rw_wlock(rw, curthread, file, line);
268171052Sattilio	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
269167787Sjhb	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
270160771Sjhb	curthread->td_locks++;
271154941Sjhb}
272154941Sjhb
273177843Sattilioint
274242515Sattilio__rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
275177843Sattilio{
276242515Sattilio	struct rwlock *rw;
277177843Sattilio	int rval;
278177843Sattilio
279228424Savg	if (SCHEDULER_STOPPED())
280228424Savg		return (1);
281228424Savg
282242515Sattilio	rw = rwlock2rw(c);
283242515Sattilio
284244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
285240424Sattilio	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
286240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
287177843Sattilio	KASSERT(rw->rw_lock != RW_DESTROYED,
288177843Sattilio	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
289177843Sattilio
290193307Sattilio	if (rw_wlocked(rw) &&
291193307Sattilio	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
292177843Sattilio		rw->rw_recurse++;
293177843Sattilio		rval = 1;
294177843Sattilio	} else
295177843Sattilio		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
296177843Sattilio		    (uintptr_t)curthread);
297177843Sattilio
298177843Sattilio	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
299177843Sattilio	if (rval) {
300177843Sattilio		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
301177843Sattilio		    file, line);
302284998Savg		if (!rw_recursed(rw))
303284998Savg			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE,
304284998Savg			    rw, 0, 0, file, line);
305177843Sattilio		curthread->td_locks++;
306177843Sattilio	}
307177843Sattilio	return (rval);
308177843Sattilio}
309177843Sattilio
310154941Sjhbvoid
311242515Sattilio_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
312154941Sjhb{
313242515Sattilio	struct rwlock *rw;
314154941Sjhb
315228424Savg	if (SCHEDULER_STOPPED())
316228424Savg		return;
317242515Sattilio
318242515Sattilio	rw = rwlock2rw(c);
319242515Sattilio
320169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
321169394Sjhb	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
322242515Sattilio	__rw_assert(c, RA_WLOCKED, file, line);
323167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
324171052Sattilio	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
325171052Sattilio	    line);
326171052Sattilio	if (!rw_recursed(rw))
327192853Ssson		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
328154941Sjhb	__rw_wunlock(rw, curthread, file, line);
329252212Sjhb	curthread->td_locks--;
330154941Sjhb}
331176017Sjeff/*
332176017Sjeff * Determines whether a new reader can acquire a lock.  Succeeds if the
333176017Sjeff * reader already owns a read lock and the lock is locked for read to
334176017Sjeff * prevent deadlock from reader recursion.  Also succeeds if the lock
335176017Sjeff * is unlocked and has no writer waiters or spinners.  Failing otherwise
336176017Sjeff * prioritizes writers before readers.
337176017Sjeff */
338176017Sjeff#define	RW_CAN_READ(_rw)						\
339176017Sjeff    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
340176017Sjeff    (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
341176017Sjeff    RW_LOCK_READ)
342154941Sjhb
343154941Sjhbvoid
344242515Sattilio__rw_rlock(volatile uintptr_t *c, const char *file, int line)
345154941Sjhb{
346242515Sattilio	struct rwlock *rw;
347170295Sjeff	struct turnstile *ts;
348167801Sjhb#ifdef ADAPTIVE_RWLOCKS
349157846Sjhb	volatile struct thread *owner;
350177912Sjeff	int spintries = 0;
351177912Sjeff	int i;
352157851Swkoszek#endif
353189846Sjeff#ifdef LOCK_PROFILING
354167307Sjhb	uint64_t waittime = 0;
355167054Skmacy	int contested = 0;
356189846Sjeff#endif
357176017Sjeff	uintptr_t v;
358192853Ssson#ifdef KDTRACE_HOOKS
359284998Savg	uintptr_t state;
360192853Ssson	uint64_t spin_cnt = 0;
361192853Ssson	uint64_t sleep_cnt = 0;
362192853Ssson	int64_t sleep_time = 0;
363284998Savg	int64_t all_time = 0;
364192853Ssson#endif
365154941Sjhb
366228424Savg	if (SCHEDULER_STOPPED())
367228424Savg		return;
368228424Savg
369242515Sattilio	rw = rwlock2rw(c);
370242515Sattilio
371244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
372240424Sattilio	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
373240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
374169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
375169394Sjhb	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
376157826Sjhb	KASSERT(rw_wowner(rw) != curthread,
377251323Sjhb	    ("rw_rlock: wlock already held for %s @ %s:%d",
378167787Sjhb	    rw->lock_object.lo_name, file, line));
379182914Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
380154941Sjhb
381284998Savg#ifdef KDTRACE_HOOKS
382285759Smarkj	all_time -= lockstat_nsecs(&rw->lock_object);
383284998Savg	state = rw->rw_lock;
384284998Savg#endif
385154941Sjhb	for (;;) {
386192853Ssson#ifdef KDTRACE_HOOKS
387192853Ssson		spin_cnt++;
388192853Ssson#endif
389154941Sjhb		/*
390154941Sjhb		 * Handle the easy case.  If no other thread has a write
391154941Sjhb		 * lock, then try to bump up the count of read locks.  Note
392154941Sjhb		 * that we have to preserve the current state of the
393154941Sjhb		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
394154941Sjhb		 * read lock, then rw_lock must have changed, so restart
395154941Sjhb		 * the loop.  Note that this handles the case of a
396154941Sjhb		 * completely unlocked rwlock since such a lock is encoded
397154941Sjhb		 * as a read lock with no waiters.
398154941Sjhb		 */
399176017Sjeff		v = rw->rw_lock;
400176017Sjeff		if (RW_CAN_READ(v)) {
401154941Sjhb			/*
402154941Sjhb			 * The RW_LOCK_READ_WAITERS flag should only be set
403176017Sjeff			 * if the lock has been unlocked and write waiters
404176017Sjeff			 * were present.
405154941Sjhb			 */
406176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
407176017Sjeff			    v + RW_ONE_READER)) {
408167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
409154941Sjhb					CTR4(KTR_LOCK,
410154941Sjhb					    "%s: %p succeed %p -> %p", __func__,
411176017Sjeff					    rw, (void *)v,
412176017Sjeff					    (void *)(v + RW_ONE_READER));
413154941Sjhb				break;
414154941Sjhb			}
415154941Sjhb			continue;
416154941Sjhb		}
417233628Sfabient#ifdef HWPMC_HOOKS
418233628Sfabient		PMC_SOFT_CALL( , , lock, failed);
419233628Sfabient#endif
420174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
421174629Sjeff		    &contested, &waittime);
422154941Sjhb
423173960Sattilio#ifdef ADAPTIVE_RWLOCKS
424154941Sjhb		/*
425173960Sattilio		 * If the owner is running on another CPU, spin until
426173960Sattilio		 * the owner stops running or the state of the lock
427173960Sattilio		 * changes.
428173960Sattilio		 */
429176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
430176017Sjeff			owner = (struct thread *)RW_OWNER(v);
431176017Sjeff			if (TD_IS_RUNNING(owner)) {
432176017Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
433176017Sjeff					CTR3(KTR_LOCK,
434176017Sjeff					    "%s: spinning on %p held by %p",
435176017Sjeff					    __func__, rw, owner);
436278694Ssbruno				KTR_STATE1(KTR_SCHED, "thread",
437278694Ssbruno				    sched_tdname(curthread), "spinning",
438278694Ssbruno				    "lockname:\"%s\"", rw->lock_object.lo_name);
439176017Sjeff				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
440192853Ssson				    owner && TD_IS_RUNNING(owner)) {
441176017Sjeff					cpu_spinwait();
442192853Ssson#ifdef KDTRACE_HOOKS
443192853Ssson					spin_cnt++;
444192853Ssson#endif
445192853Ssson				}
446278694Ssbruno				KTR_STATE0(KTR_SCHED, "thread",
447278694Ssbruno				    sched_tdname(curthread), "running");
448176017Sjeff				continue;
449176017Sjeff			}
450177912Sjeff		} else if (spintries < rowner_retries) {
451177912Sjeff			spintries++;
452278694Ssbruno			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
453278694Ssbruno			    "spinning", "lockname:\"%s\"",
454278694Ssbruno			    rw->lock_object.lo_name);
455177912Sjeff			for (i = 0; i < rowner_loops; i++) {
456177912Sjeff				v = rw->rw_lock;
457177912Sjeff				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
458177912Sjeff					break;
459177912Sjeff				cpu_spinwait();
460177912Sjeff			}
461278694Ssbruno			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
462278694Ssbruno			    "running");
463177912Sjeff			if (i != rowner_loops)
464177912Sjeff				continue;
465173960Sattilio		}
466173960Sattilio#endif
467173960Sattilio
468173960Sattilio		/*
469154941Sjhb		 * Okay, now it's the hard case.  Some other thread already
470176017Sjeff		 * has a write lock or there are write waiters present,
471176017Sjeff		 * acquire the turnstile lock so we can begin the process
472176017Sjeff		 * of blocking.
473154941Sjhb		 */
474170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
475154941Sjhb
476154941Sjhb		/*
477154941Sjhb		 * The lock might have been released while we spun, so
478176017Sjeff		 * recheck its state and restart the loop if needed.
479154941Sjhb		 */
480176017Sjeff		v = rw->rw_lock;
481176017Sjeff		if (RW_CAN_READ(v)) {
482170295Sjeff			turnstile_cancel(ts);
483154941Sjhb			continue;
484154941Sjhb		}
485154941Sjhb
486173960Sattilio#ifdef ADAPTIVE_RWLOCKS
487154941Sjhb		/*
488193035Sjhb		 * The current lock owner might have started executing
489193035Sjhb		 * on another CPU (or the lock could have changed
490193035Sjhb		 * owners) while we were waiting on the turnstile
491193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
492193035Sjhb		 * again.
493173960Sattilio		 */
494176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
495176017Sjeff			owner = (struct thread *)RW_OWNER(v);
496176017Sjeff			if (TD_IS_RUNNING(owner)) {
497176017Sjeff				turnstile_cancel(ts);
498176017Sjeff				continue;
499176017Sjeff			}
500173960Sattilio		}
501173960Sattilio#endif
502173960Sattilio
503173960Sattilio		/*
504176017Sjeff		 * The lock is held in write mode or it already has waiters.
505154941Sjhb		 */
506176017Sjeff		MPASS(!RW_CAN_READ(v));
507176017Sjeff
508176017Sjeff		/*
509176017Sjeff		 * If the RW_LOCK_READ_WAITERS flag is already set, then
510176017Sjeff		 * we can go ahead and block.  If it is not set then try
511176017Sjeff		 * to set it.  If we fail to set it drop the turnstile
512176017Sjeff		 * lock and restart the loop.
513176017Sjeff		 */
514176017Sjeff		if (!(v & RW_LOCK_READ_WAITERS)) {
515176017Sjeff			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
516176017Sjeff			    v | RW_LOCK_READ_WAITERS)) {
517170295Sjeff				turnstile_cancel(ts);
518157826Sjhb				continue;
519157826Sjhb			}
520167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
521157826Sjhb				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
522157826Sjhb				    __func__, rw);
523154941Sjhb		}
524154941Sjhb
525154941Sjhb		/*
526154941Sjhb		 * We were unable to acquire the lock and the read waiters
527154941Sjhb		 * flag is set, so we must block on the turnstile.
528154941Sjhb		 */
529167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
530154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
531154941Sjhb			    rw);
532192853Ssson#ifdef KDTRACE_HOOKS
533285759Smarkj		sleep_time -= lockstat_nsecs(&rw->lock_object);
534192853Ssson#endif
535170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
536192853Ssson#ifdef KDTRACE_HOOKS
537285759Smarkj		sleep_time += lockstat_nsecs(&rw->lock_object);
538192853Ssson		sleep_cnt++;
539192853Ssson#endif
540167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
541154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
542154941Sjhb			    __func__, rw);
543154941Sjhb	}
544284998Savg#ifdef KDTRACE_HOOKS
545285759Smarkj	all_time += lockstat_nsecs(&rw->lock_object);
546284998Savg	if (sleep_time)
547284998Savg		LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time,
548284998Savg		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
549284998Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
550154941Sjhb
551284998Savg	/* Record only the loops spinning and not sleeping. */
552284998Savg	if (spin_cnt > sleep_cnt)
553284998Savg		LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time,
554284998Savg		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
555284998Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
556284998Savg#endif
557154941Sjhb	/*
558154941Sjhb	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
559154941Sjhb	 * however.  turnstiles don't like owners changing between calls to
560154941Sjhb	 * turnstile_wait() currently.
561154941Sjhb	 */
562192853Ssson	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
563174629Sjeff	    waittime, file, line);
564167787Sjhb	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
565167787Sjhb	WITNESS_LOCK(&rw->lock_object, 0, file, line);
566160771Sjhb	curthread->td_locks++;
567176017Sjeff	curthread->td_rw_rlocks++;
568154941Sjhb}
569154941Sjhb
570177843Sattilioint
571242515Sattilio__rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
572177843Sattilio{
573242515Sattilio	struct rwlock *rw;
574177843Sattilio	uintptr_t x;
575177843Sattilio
576228424Savg	if (SCHEDULER_STOPPED())
577228424Savg		return (1);
578228424Savg
579242515Sattilio	rw = rwlock2rw(c);
580242515Sattilio
581244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
582240424Sattilio	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
583240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
584240424Sattilio
585177843Sattilio	for (;;) {
586177843Sattilio		x = rw->rw_lock;
587177843Sattilio		KASSERT(rw->rw_lock != RW_DESTROYED,
588177843Sattilio		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
589177843Sattilio		if (!(x & RW_LOCK_READ))
590177843Sattilio			break;
591177843Sattilio		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
592177843Sattilio			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
593177843Sattilio			    line);
594177843Sattilio			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
595284998Savg			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE,
596284998Savg			    rw, 0, 0, file, line);
597177843Sattilio			curthread->td_locks++;
598177843Sattilio			curthread->td_rw_rlocks++;
599177843Sattilio			return (1);
600177843Sattilio		}
601177843Sattilio	}
602177843Sattilio
603177843Sattilio	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
604177843Sattilio	return (0);
605177843Sattilio}
606177843Sattilio
607154941Sjhbvoid
608242515Sattilio_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
609154941Sjhb{
610242515Sattilio	struct rwlock *rw;
611154941Sjhb	struct turnstile *ts;
612176017Sjeff	uintptr_t x, v, queue;
613154941Sjhb
614228424Savg	if (SCHEDULER_STOPPED())
615228424Savg		return;
616228424Savg
617242515Sattilio	rw = rwlock2rw(c);
618242515Sattilio
619169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
620169394Sjhb	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
621242515Sattilio	__rw_assert(c, RA_RLOCKED, file, line);
622167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
623167787Sjhb	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
624154941Sjhb
625154941Sjhb	/* TODO: drop "owner of record" here. */
626154941Sjhb
627154941Sjhb	for (;;) {
628154941Sjhb		/*
629154941Sjhb		 * See if there is more than one read lock held.  If so,
630154941Sjhb		 * just drop one and return.
631154941Sjhb		 */
632154941Sjhb		x = rw->rw_lock;
633154941Sjhb		if (RW_READERS(x) > 1) {
634197643Sattilio			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
635154941Sjhb			    x - RW_ONE_READER)) {
636167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
637154941Sjhb					CTR4(KTR_LOCK,
638154941Sjhb					    "%s: %p succeeded %p -> %p",
639154941Sjhb					    __func__, rw, (void *)x,
640154941Sjhb					    (void *)(x - RW_ONE_READER));
641154941Sjhb				break;
642154941Sjhb			}
643154941Sjhb			continue;
644167307Sjhb		}
645154941Sjhb		/*
646154941Sjhb		 * If there aren't any waiters for a write lock, then try
647154941Sjhb		 * to drop it quickly.
648154941Sjhb		 */
649176017Sjeff		if (!(x & RW_LOCK_WAITERS)) {
650176017Sjeff			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
651176017Sjeff			    RW_READERS_LOCK(1));
652197643Sattilio			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
653197643Sattilio			    RW_UNLOCKED)) {
654167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
655154941Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
656154941Sjhb					    __func__, rw);
657154941Sjhb				break;
658154941Sjhb			}
659154941Sjhb			continue;
660154941Sjhb		}
661154941Sjhb		/*
662176017Sjeff		 * Ok, we know we have waiters and we think we are the
663176017Sjeff		 * last reader, so grab the turnstile lock.
664154941Sjhb		 */
665170295Sjeff		turnstile_chain_lock(&rw->lock_object);
666176017Sjeff		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
667176017Sjeff		MPASS(v & RW_LOCK_WAITERS);
668154941Sjhb
669154941Sjhb		/*
670154941Sjhb		 * Try to drop our lock leaving the lock in a unlocked
671154941Sjhb		 * state.
672154941Sjhb		 *
673154941Sjhb		 * If you wanted to do explicit lock handoff you'd have to
674154941Sjhb		 * do it here.  You'd also want to use turnstile_signal()
675154941Sjhb		 * and you'd have to handle the race where a higher
676154941Sjhb		 * priority thread blocks on the write lock before the
677154941Sjhb		 * thread you wakeup actually runs and have the new thread
678154941Sjhb		 * "steal" the lock.  For now it's a lot simpler to just
679154941Sjhb		 * wakeup all of the waiters.
680154941Sjhb		 *
681154941Sjhb		 * As above, if we fail, then another thread might have
682154941Sjhb		 * acquired a read lock, so drop the turnstile lock and
683154941Sjhb		 * restart.
684154941Sjhb		 */
685176017Sjeff		x = RW_UNLOCKED;
686176017Sjeff		if (v & RW_LOCK_WRITE_WAITERS) {
687176017Sjeff			queue = TS_EXCLUSIVE_QUEUE;
688176017Sjeff			x |= (v & RW_LOCK_READ_WAITERS);
689176017Sjeff		} else
690176017Sjeff			queue = TS_SHARED_QUEUE;
691197643Sattilio		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
692176017Sjeff		    x)) {
693170295Sjeff			turnstile_chain_unlock(&rw->lock_object);
694154941Sjhb			continue;
695154941Sjhb		}
696167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
697154941Sjhb			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
698154941Sjhb			    __func__, rw);
699154941Sjhb
700154941Sjhb		/*
701154941Sjhb		 * Ok.  The lock is released and all that's left is to
702154941Sjhb		 * wake up the waiters.  Note that the lock might not be
703154941Sjhb		 * free anymore, but in that case the writers will just
704154941Sjhb		 * block again if they run before the new lock holder(s)
705154941Sjhb		 * release the lock.
706154941Sjhb		 */
707167787Sjhb		ts = turnstile_lookup(&rw->lock_object);
708157846Sjhb		MPASS(ts != NULL);
709176017Sjeff		turnstile_broadcast(ts, queue);
710154941Sjhb		turnstile_unpend(ts, TS_SHARED_LOCK);
711170295Sjeff		turnstile_chain_unlock(&rw->lock_object);
712154941Sjhb		break;
713154941Sjhb	}
714192853Ssson	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
715252212Sjhb	curthread->td_locks--;
716252212Sjhb	curthread->td_rw_rlocks--;
717154941Sjhb}
718154941Sjhb
719154941Sjhb/*
720154941Sjhb * This function is called when we are unable to obtain a write lock on the
721154941Sjhb * first try.  This means that at least one other thread holds either a
722154941Sjhb * read or write lock.
723154941Sjhb */
724154941Sjhbvoid
725242515Sattilio__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
726242515Sattilio    int line)
727154941Sjhb{
728242515Sattilio	struct rwlock *rw;
729170295Sjeff	struct turnstile *ts;
730167801Sjhb#ifdef ADAPTIVE_RWLOCKS
731157846Sjhb	volatile struct thread *owner;
732176017Sjeff	int spintries = 0;
733176017Sjeff	int i;
734157851Swkoszek#endif
735189846Sjeff	uintptr_t v, x;
736189846Sjeff#ifdef LOCK_PROFILING
737171516Sattilio	uint64_t waittime = 0;
738171516Sattilio	int contested = 0;
739189846Sjeff#endif
740192853Ssson#ifdef KDTRACE_HOOKS
741284998Savg	uintptr_t state;
742192853Ssson	uint64_t spin_cnt = 0;
743192853Ssson	uint64_t sleep_cnt = 0;
744192853Ssson	int64_t sleep_time = 0;
745284998Savg	int64_t all_time = 0;
746192853Ssson#endif
747154941Sjhb
748228424Savg	if (SCHEDULER_STOPPED())
749228424Savg		return;
750228424Savg
751242515Sattilio	rw = rwlock2rw(c);
752242515Sattilio
753171052Sattilio	if (rw_wlocked(rw)) {
754193307Sattilio		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
755171052Sattilio		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
756171052Sattilio		    __func__, rw->lock_object.lo_name, file, line));
757171052Sattilio		rw->rw_recurse++;
758171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
759171052Sattilio			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
760171052Sattilio		return;
761171052Sattilio	}
762171052Sattilio
763167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
764154941Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
765167787Sjhb		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
766154941Sjhb
767284998Savg#ifdef KDTRACE_HOOKS
768285759Smarkj	all_time -= lockstat_nsecs(&rw->lock_object);
769284998Savg	state = rw->rw_lock;
770284998Savg#endif
771154941Sjhb	while (!_rw_write_lock(rw, tid)) {
772192853Ssson#ifdef KDTRACE_HOOKS
773192853Ssson		spin_cnt++;
774192853Ssson#endif
775233628Sfabient#ifdef HWPMC_HOOKS
776233628Sfabient		PMC_SOFT_CALL( , , lock, failed);
777233628Sfabient#endif
778174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
779174629Sjeff		    &contested, &waittime);
780173960Sattilio#ifdef ADAPTIVE_RWLOCKS
781173960Sattilio		/*
782173960Sattilio		 * If the lock is write locked and the owner is
783173960Sattilio		 * running on another CPU, spin until the owner stops
784173960Sattilio		 * running or the state of the lock changes.
785173960Sattilio		 */
786173960Sattilio		v = rw->rw_lock;
787173960Sattilio		owner = (struct thread *)RW_OWNER(v);
788173960Sattilio		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
789173960Sattilio			if (LOCK_LOG_TEST(&rw->lock_object, 0))
790173960Sattilio				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
791173960Sattilio				    __func__, rw, owner);
792278694Ssbruno			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
793278694Ssbruno			    "spinning", "lockname:\"%s\"",
794278694Ssbruno			    rw->lock_object.lo_name);
795173960Sattilio			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
796192853Ssson			    TD_IS_RUNNING(owner)) {
797173960Sattilio				cpu_spinwait();
798192853Ssson#ifdef KDTRACE_HOOKS
799192853Ssson				spin_cnt++;
800192853Ssson#endif
801192853Ssson			}
802278694Ssbruno			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
803278694Ssbruno			    "running");
804173960Sattilio			continue;
805173960Sattilio		}
806177912Sjeff		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
807177912Sjeff		    spintries < rowner_retries) {
808176017Sjeff			if (!(v & RW_LOCK_WRITE_SPINNER)) {
809176017Sjeff				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
810176017Sjeff				    v | RW_LOCK_WRITE_SPINNER)) {
811176017Sjeff					continue;
812176017Sjeff				}
813176017Sjeff			}
814176017Sjeff			spintries++;
815278694Ssbruno			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
816278694Ssbruno			    "spinning", "lockname:\"%s\"",
817278694Ssbruno			    rw->lock_object.lo_name);
818177912Sjeff			for (i = 0; i < rowner_loops; i++) {
819176017Sjeff				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
820176017Sjeff					break;
821176017Sjeff				cpu_spinwait();
822176017Sjeff			}
823278694Ssbruno			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
824278694Ssbruno			    "running");
825192853Ssson#ifdef KDTRACE_HOOKS
826192853Ssson			spin_cnt += rowner_loops - i;
827192853Ssson#endif
828177912Sjeff			if (i != rowner_loops)
829176017Sjeff				continue;
830176017Sjeff		}
831173960Sattilio#endif
832170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
833154941Sjhb		v = rw->rw_lock;
834154941Sjhb
835173960Sattilio#ifdef ADAPTIVE_RWLOCKS
836154941Sjhb		/*
837193035Sjhb		 * The current lock owner might have started executing
838193035Sjhb		 * on another CPU (or the lock could have changed
839193035Sjhb		 * owners) while we were waiting on the turnstile
840193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
841193035Sjhb		 * again.
842173960Sattilio		 */
843173960Sattilio		if (!(v & RW_LOCK_READ)) {
844173960Sattilio			owner = (struct thread *)RW_OWNER(v);
845173960Sattilio			if (TD_IS_RUNNING(owner)) {
846173960Sattilio				turnstile_cancel(ts);
847173960Sattilio				continue;
848173960Sattilio			}
849173960Sattilio		}
850173960Sattilio#endif
851173960Sattilio		/*
852179334Sattilio		 * Check for the waiters flags about this rwlock.
853179334Sattilio		 * If the lock was released, without maintain any pending
854179334Sattilio		 * waiters queue, simply try to acquire it.
855179334Sattilio		 * If a pending waiters queue is present, claim the lock
856179334Sattilio		 * ownership and maintain the pending queue.
857154941Sjhb		 */
858176017Sjeff		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
859176017Sjeff		if ((v & ~x) == RW_UNLOCKED) {
860176017Sjeff			x &= ~RW_LOCK_WRITE_SPINNER;
861176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
862176017Sjeff				if (x)
863176017Sjeff					turnstile_claim(ts);
864176017Sjeff				else
865176017Sjeff					turnstile_cancel(ts);
866154941Sjhb				break;
867154941Sjhb			}
868170295Sjeff			turnstile_cancel(ts);
869154941Sjhb			continue;
870154941Sjhb		}
871154941Sjhb		/*
872154941Sjhb		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
873154941Sjhb		 * set it.  If we fail to set it, then loop back and try
874154941Sjhb		 * again.
875154941Sjhb		 */
876157826Sjhb		if (!(v & RW_LOCK_WRITE_WAITERS)) {
877157826Sjhb			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
878157826Sjhb			    v | RW_LOCK_WRITE_WAITERS)) {
879170295Sjeff				turnstile_cancel(ts);
880157826Sjhb				continue;
881157826Sjhb			}
882167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
883157826Sjhb				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
884157826Sjhb				    __func__, rw);
885154941Sjhb		}
886157846Sjhb		/*
887154941Sjhb		 * We were unable to acquire the lock and the write waiters
888154941Sjhb		 * flag is set, so we must block on the turnstile.
889154941Sjhb		 */
890167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
891154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
892154941Sjhb			    rw);
893192853Ssson#ifdef KDTRACE_HOOKS
894285759Smarkj		sleep_time -= lockstat_nsecs(&rw->lock_object);
895192853Ssson#endif
896170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
897192853Ssson#ifdef KDTRACE_HOOKS
898285759Smarkj		sleep_time += lockstat_nsecs(&rw->lock_object);
899192853Ssson		sleep_cnt++;
900192853Ssson#endif
901167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
902154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
903154941Sjhb			    __func__, rw);
904176017Sjeff#ifdef ADAPTIVE_RWLOCKS
905176017Sjeff		spintries = 0;
906176017Sjeff#endif
907154941Sjhb	}
908192853Ssson#ifdef KDTRACE_HOOKS
909285759Smarkj	all_time += lockstat_nsecs(&rw->lock_object);
910192853Ssson	if (sleep_time)
911284998Savg		LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time,
912284998Savg		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
913284998Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
914192853Ssson
915284998Savg	/* Record only the loops spinning and not sleeping. */
916192853Ssson	if (spin_cnt > sleep_cnt)
917284998Savg		LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time,
918284998Savg		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
919284998Savg		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
920192853Ssson#endif
921284998Savg	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
922284998Savg	    waittime, file, line);
923154941Sjhb}
924154941Sjhb
925154941Sjhb/*
926154941Sjhb * This function is called if the first try at releasing a write lock failed.
927154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at
928154941Sjhb * least one thread is waiting on this lock.
929154941Sjhb */
930154941Sjhbvoid
931242515Sattilio__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
932242515Sattilio    int line)
933154941Sjhb{
934242515Sattilio	struct rwlock *rw;
935154941Sjhb	struct turnstile *ts;
936154941Sjhb	uintptr_t v;
937154941Sjhb	int queue;
938154941Sjhb
939228424Savg	if (SCHEDULER_STOPPED())
940228424Savg		return;
941228424Savg
942242515Sattilio	rw = rwlock2rw(c);
943242515Sattilio
944171052Sattilio	if (rw_wlocked(rw) && rw_recursed(rw)) {
945176017Sjeff		rw->rw_recurse--;
946171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
947171052Sattilio			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
948171052Sattilio		return;
949171052Sattilio	}
950171052Sattilio
951154941Sjhb	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
952154941Sjhb	    ("%s: neither of the waiter flags are set", __func__));
953154941Sjhb
954167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
955154941Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
956154941Sjhb
957170295Sjeff	turnstile_chain_lock(&rw->lock_object);
958167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
959154941Sjhb	MPASS(ts != NULL);
960154941Sjhb
961154941Sjhb	/*
962154941Sjhb	 * Use the same algo as sx locks for now.  Prefer waking up shared
963154941Sjhb	 * waiters if we have any over writers.  This is probably not ideal.
964154941Sjhb	 *
965154941Sjhb	 * 'v' is the value we are going to write back to rw_lock.  If we
966154941Sjhb	 * have waiters on both queues, we need to preserve the state of
967154941Sjhb	 * the waiter flag for the queue we don't wake up.  For now this is
968154941Sjhb	 * hardcoded for the algorithm mentioned above.
969154941Sjhb	 *
970154941Sjhb	 * In the case of both readers and writers waiting we wakeup the
971154941Sjhb	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
972154941Sjhb	 * new writer comes in before a reader it will claim the lock up
973154941Sjhb	 * above.  There is probably a potential priority inversion in
974154941Sjhb	 * there that could be worked around either by waking both queues
975154941Sjhb	 * of waiters or doing some complicated lock handoff gymnastics.
976154941Sjhb	 */
977157846Sjhb	v = RW_UNLOCKED;
978176076Sjeff	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
979176076Sjeff		queue = TS_EXCLUSIVE_QUEUE;
980176076Sjeff		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
981176076Sjeff	} else
982154941Sjhb		queue = TS_SHARED_QUEUE;
983157846Sjhb
984157846Sjhb	/* Wake up all waiters for the specific queue. */
985167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
986154941Sjhb		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
987154941Sjhb		    queue == TS_SHARED_QUEUE ? "read" : "write");
988154941Sjhb	turnstile_broadcast(ts, queue);
989154941Sjhb	atomic_store_rel_ptr(&rw->rw_lock, v);
990154941Sjhb	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
991170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
992154941Sjhb}
993154941Sjhb
994157882Sjhb/*
995157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write
996157882Sjhb * lock.  This will only succeed if this thread holds a single read
997157882Sjhb * lock.  Returns true if the upgrade succeeded and false otherwise.
998157882Sjhb */
999157882Sjhbint
1000242515Sattilio__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
1001157882Sjhb{
1002242515Sattilio	struct rwlock *rw;
1003176017Sjeff	uintptr_t v, x, tid;
1004170295Sjeff	struct turnstile *ts;
1005157882Sjhb	int success;
1006157882Sjhb
1007228424Savg	if (SCHEDULER_STOPPED())
1008228424Savg		return (1);
1009228424Savg
1010242515Sattilio	rw = rwlock2rw(c);
1011242515Sattilio
1012169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
1013169394Sjhb	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
1014242515Sattilio	__rw_assert(c, RA_RLOCKED, file, line);
1015157882Sjhb
1016157882Sjhb	/*
1017157882Sjhb	 * Attempt to switch from one reader to a writer.  If there
1018157882Sjhb	 * are any write waiters, then we will have to lock the
1019157882Sjhb	 * turnstile first to prevent races with another writer
1020157882Sjhb	 * calling turnstile_wait() before we have claimed this
1021157882Sjhb	 * turnstile.  So, do the simple case of no waiters first.
1022157882Sjhb	 */
1023157882Sjhb	tid = (uintptr_t)curthread;
1024176017Sjeff	success = 0;
1025176017Sjeff	for (;;) {
1026176017Sjeff		v = rw->rw_lock;
1027176017Sjeff		if (RW_READERS(v) > 1)
1028176017Sjeff			break;
1029176017Sjeff		if (!(v & RW_LOCK_WAITERS)) {
1030176017Sjeff			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
1031176017Sjeff			if (!success)
1032176017Sjeff				continue;
1033176017Sjeff			break;
1034176017Sjeff		}
1035157882Sjhb
1036176017Sjeff		/*
1037176017Sjeff		 * Ok, we think we have waiters, so lock the turnstile.
1038176017Sjeff		 */
1039176017Sjeff		ts = turnstile_trywait(&rw->lock_object);
1040176017Sjeff		v = rw->rw_lock;
1041176017Sjeff		if (RW_READERS(v) > 1) {
1042176017Sjeff			turnstile_cancel(ts);
1043176017Sjeff			break;
1044176017Sjeff		}
1045176017Sjeff		/*
1046176017Sjeff		 * Try to switch from one reader to a writer again.  This time
1047176017Sjeff		 * we honor the current state of the waiters flags.
1048176017Sjeff		 * If we obtain the lock with the flags set, then claim
1049176017Sjeff		 * ownership of the turnstile.
1050176017Sjeff		 */
1051176017Sjeff		x = rw->rw_lock & RW_LOCK_WAITERS;
1052176017Sjeff		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
1053176017Sjeff		if (success) {
1054176017Sjeff			if (x)
1055176017Sjeff				turnstile_claim(ts);
1056176017Sjeff			else
1057176017Sjeff				turnstile_cancel(ts);
1058176017Sjeff			break;
1059176017Sjeff		}
1060170295Sjeff		turnstile_cancel(ts);
1061176017Sjeff	}
1062167787Sjhb	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
1063176017Sjeff	if (success) {
1064176017Sjeff		curthread->td_rw_rlocks--;
1065167787Sjhb		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
1066157882Sjhb		    file, line);
1067192853Ssson		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
1068176017Sjeff	}
1069157882Sjhb	return (success);
1070157882Sjhb}
1071157882Sjhb
1072157882Sjhb/*
1073157882Sjhb * Downgrade a write lock into a single read lock.
1074157882Sjhb */
1075157882Sjhbvoid
1076242515Sattilio__rw_downgrade(volatile uintptr_t *c, const char *file, int line)
1077157882Sjhb{
1078242515Sattilio	struct rwlock *rw;
1079157882Sjhb	struct turnstile *ts;
1080157882Sjhb	uintptr_t tid, v;
1081176017Sjeff	int rwait, wwait;
1082157882Sjhb
1083228424Savg	if (SCHEDULER_STOPPED())
1084228424Savg		return;
1085228424Savg
1086242515Sattilio	rw = rwlock2rw(c);
1087242515Sattilio
1088169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
1089169394Sjhb	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
1090242515Sattilio	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
1091171052Sattilio#ifndef INVARIANTS
1092171052Sattilio	if (rw_recursed(rw))
1093171052Sattilio		panic("downgrade of a recursed lock");
1094171052Sattilio#endif
1095157882Sjhb
1096167787Sjhb	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
1097157882Sjhb
1098157882Sjhb	/*
1099157882Sjhb	 * Convert from a writer to a single reader.  First we handle
1100157882Sjhb	 * the easy case with no waiters.  If there are any waiters, we
1101176017Sjeff	 * lock the turnstile and "disown" the lock.
1102157882Sjhb	 */
1103157882Sjhb	tid = (uintptr_t)curthread;
1104157882Sjhb	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
1105157882Sjhb		goto out;
1106157882Sjhb
1107157882Sjhb	/*
1108157882Sjhb	 * Ok, we think we have waiters, so lock the turnstile so we can
1109157882Sjhb	 * read the waiter flags without any races.
1110157882Sjhb	 */
1111170295Sjeff	turnstile_chain_lock(&rw->lock_object);
1112176017Sjeff	v = rw->rw_lock & RW_LOCK_WAITERS;
1113176017Sjeff	rwait = v & RW_LOCK_READ_WAITERS;
1114176017Sjeff	wwait = v & RW_LOCK_WRITE_WAITERS;
1115176017Sjeff	MPASS(rwait | wwait);
1116157882Sjhb
1117157882Sjhb	/*
1118176017Sjeff	 * Downgrade from a write lock while preserving waiters flag
1119176017Sjeff	 * and give up ownership of the turnstile.
1120157882Sjhb	 */
1121167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
1122157882Sjhb	MPASS(ts != NULL);
1123176017Sjeff	if (!wwait)
1124176017Sjeff		v &= ~RW_LOCK_READ_WAITERS;
1125176017Sjeff	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
1126176017Sjeff	/*
1127176017Sjeff	 * Wake other readers if there are no writers pending.  Otherwise they
1128176017Sjeff	 * won't be able to acquire the lock anyway.
1129176017Sjeff	 */
1130176017Sjeff	if (rwait && !wwait) {
1131157882Sjhb		turnstile_broadcast(ts, TS_SHARED_QUEUE);
1132157882Sjhb		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1133176017Sjeff	} else
1134157882Sjhb		turnstile_disown(ts);
1135170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
1136157882Sjhbout:
1137176017Sjeff	curthread->td_rw_rlocks++;
1138167787Sjhb	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1139192853Ssson	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
1140157882Sjhb}
1141157882Sjhb
1142154941Sjhb#ifdef INVARIANT_SUPPORT
1143155162Sscottl#ifndef INVARIANTS
1144242515Sattilio#undef __rw_assert
1145154941Sjhb#endif
1146154941Sjhb
1147154941Sjhb/*
1148154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least
1149154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this*
1150154941Sjhb * thread owns an rlock.
1151154941Sjhb */
1152154941Sjhbvoid
1153242515Sattilio__rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
1154154941Sjhb{
1155242515Sattilio	const struct rwlock *rw;
1156154941Sjhb
1157154941Sjhb	if (panicstr != NULL)
1158154941Sjhb		return;
1159242515Sattilio
1160242515Sattilio	rw = rwlock2rw(c);
1161242515Sattilio
1162154941Sjhb	switch (what) {
1163154941Sjhb	case RA_LOCKED:
1164171052Sattilio	case RA_LOCKED | RA_RECURSED:
1165171052Sattilio	case RA_LOCKED | RA_NOTRECURSED:
1166154941Sjhb	case RA_RLOCKED:
1167251323Sjhb	case RA_RLOCKED | RA_RECURSED:
1168251323Sjhb	case RA_RLOCKED | RA_NOTRECURSED:
1169154941Sjhb#ifdef WITNESS
1170167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1171154941Sjhb#else
1172154941Sjhb		/*
1173154941Sjhb		 * If some other thread has a write lock or we have one
1174154941Sjhb		 * and are asserting a read lock, fail.  Also, if no one
1175154941Sjhb		 * has a lock at all, fail.
1176154941Sjhb		 */
1177155061Sscottl		if (rw->rw_lock == RW_UNLOCKED ||
1178251323Sjhb		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
1179157826Sjhb		    rw_wowner(rw) != curthread)))
1180154941Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
1181251323Sjhb			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
1182154941Sjhb			    "read " : "", file, line);
1183171052Sattilio
1184251323Sjhb		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
1185171052Sattilio			if (rw_recursed(rw)) {
1186171052Sattilio				if (what & RA_NOTRECURSED)
1187171052Sattilio					panic("Lock %s recursed @ %s:%d\n",
1188171052Sattilio					    rw->lock_object.lo_name, file,
1189171052Sattilio					    line);
1190171052Sattilio			} else if (what & RA_RECURSED)
1191171052Sattilio				panic("Lock %s not recursed @ %s:%d\n",
1192171052Sattilio				    rw->lock_object.lo_name, file, line);
1193171052Sattilio		}
1194154941Sjhb#endif
1195154941Sjhb		break;
1196154941Sjhb	case RA_WLOCKED:
1197171052Sattilio	case RA_WLOCKED | RA_RECURSED:
1198171052Sattilio	case RA_WLOCKED | RA_NOTRECURSED:
1199157826Sjhb		if (rw_wowner(rw) != curthread)
1200154941Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
1201167787Sjhb			    rw->lock_object.lo_name, file, line);
1202171052Sattilio		if (rw_recursed(rw)) {
1203171052Sattilio			if (what & RA_NOTRECURSED)
1204171052Sattilio				panic("Lock %s recursed @ %s:%d\n",
1205171052Sattilio				    rw->lock_object.lo_name, file, line);
1206171052Sattilio		} else if (what & RA_RECURSED)
1207171052Sattilio			panic("Lock %s not recursed @ %s:%d\n",
1208171052Sattilio			    rw->lock_object.lo_name, file, line);
1209154941Sjhb		break;
1210154941Sjhb	case RA_UNLOCKED:
1211154941Sjhb#ifdef WITNESS
1212167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1213154941Sjhb#else
1214154941Sjhb		/*
1215154941Sjhb		 * If we hold a write lock fail.  We can't reliably check
1216154941Sjhb		 * to see if we hold a read lock or not.
1217154941Sjhb		 */
1218157826Sjhb		if (rw_wowner(rw) == curthread)
1219154941Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
1220167787Sjhb			    rw->lock_object.lo_name, file, line);
1221154941Sjhb#endif
1222154941Sjhb		break;
1223154941Sjhb	default:
1224154941Sjhb		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1225154941Sjhb		    line);
1226154941Sjhb	}
1227154941Sjhb}
1228154941Sjhb#endif /* INVARIANT_SUPPORT */
1229154941Sjhb
1230154941Sjhb#ifdef DDB
1231154941Sjhbvoid
1232227588Spjddb_show_rwlock(const struct lock_object *lock)
1233154941Sjhb{
1234227588Spjd	const struct rwlock *rw;
1235154941Sjhb	struct thread *td;
1236154941Sjhb
1237227588Spjd	rw = (const struct rwlock *)lock;
1238154941Sjhb
1239154941Sjhb	db_printf(" state: ");
1240154941Sjhb	if (rw->rw_lock == RW_UNLOCKED)
1241154941Sjhb		db_printf("UNLOCKED\n");
1242169394Sjhb	else if (rw->rw_lock == RW_DESTROYED) {
1243169394Sjhb		db_printf("DESTROYED\n");
1244169394Sjhb		return;
1245169394Sjhb	} else if (rw->rw_lock & RW_LOCK_READ)
1246167504Sjhb		db_printf("RLOCK: %ju locks\n",
1247167504Sjhb		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1248154941Sjhb	else {
1249157826Sjhb		td = rw_wowner(rw);
1250154941Sjhb		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1251173600Sjulian		    td->td_tid, td->td_proc->p_pid, td->td_name);
1252171052Sattilio		if (rw_recursed(rw))
1253171052Sattilio			db_printf(" recursed: %u\n", rw->rw_recurse);
1254154941Sjhb	}
1255154941Sjhb	db_printf(" waiters: ");
1256154941Sjhb	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1257154941Sjhb	case RW_LOCK_READ_WAITERS:
1258154941Sjhb		db_printf("readers\n");
1259154941Sjhb		break;
1260154941Sjhb	case RW_LOCK_WRITE_WAITERS:
1261154941Sjhb		db_printf("writers\n");
1262154941Sjhb		break;
1263154941Sjhb	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1264167492Sjhb		db_printf("readers and writers\n");
1265154941Sjhb		break;
1266154941Sjhb	default:
1267154941Sjhb		db_printf("none\n");
1268154941Sjhb		break;
1269154941Sjhb	}
1270154941Sjhb}
1271154941Sjhb
1272154941Sjhb#endif
1273