kern_rwlock.c revision 244582
1154941Sjhb/*-
2154941Sjhb * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3154941Sjhb * All rights reserved.
4154941Sjhb *
5154941Sjhb * Redistribution and use in source and binary forms, with or without
6154941Sjhb * modification, are permitted provided that the following conditions
7154941Sjhb * are met:
8154941Sjhb * 1. Redistributions of source code must retain the above copyright
9154941Sjhb *    notice, this list of conditions and the following disclaimer.
10154941Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11154941Sjhb *    notice, this list of conditions and the following disclaimer in the
12154941Sjhb *    documentation and/or other materials provided with the distribution.
13154941Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14154941Sjhb *    may be used to endorse or promote products derived from this software
15154941Sjhb *    without specific prior written permission.
16154941Sjhb *
17154941Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18154941Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19154941Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20154941Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21154941Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22154941Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23154941Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24154941Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25154941Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26154941Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27154941Sjhb * SUCH DAMAGE.
28154941Sjhb */
29154941Sjhb
30154941Sjhb/*
31154941Sjhb * Machine independent bits of reader/writer lock implementation.
32154941Sjhb */
33154941Sjhb
34154941Sjhb#include <sys/cdefs.h>
35154941Sjhb__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 244582 2012-12-22 09:37:34Z attilio $");
36154941Sjhb
37154941Sjhb#include "opt_ddb.h"
38233628Sfabient#include "opt_hwpmc_hooks.h"
39192853Ssson#include "opt_kdtrace.h"
40167801Sjhb#include "opt_no_adaptive_rwlocks.h"
41154941Sjhb
42154941Sjhb#include <sys/param.h>
43244582Sattilio#include <sys/kdb.h>
44154941Sjhb#include <sys/ktr.h>
45177912Sjeff#include <sys/kernel.h>
46154941Sjhb#include <sys/lock.h>
47154941Sjhb#include <sys/mutex.h>
48154941Sjhb#include <sys/proc.h>
49154941Sjhb#include <sys/rwlock.h>
50177912Sjeff#include <sys/sysctl.h>
51154941Sjhb#include <sys/systm.h>
52154941Sjhb#include <sys/turnstile.h>
53171516Sattilio
54154941Sjhb#include <machine/cpu.h>
55154941Sjhb
56167801Sjhb#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
57167801Sjhb#define	ADAPTIVE_RWLOCKS
58167801Sjhb#endif
59167801Sjhb
60233628Sfabient#ifdef HWPMC_HOOKS
61233628Sfabient#include <sys/pmckern.h>
62233628SfabientPMC_SOFT_DECLARE( , , lock, failed);
63233628Sfabient#endif
64233628Sfabient
65242515Sattilio/*
66242515Sattilio * Return the rwlock address when the lock cookie address is provided.
67242515Sattilio * This functionality assumes that struct rwlock* have a member named rw_lock.
68242515Sattilio */
69242515Sattilio#define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
70242515Sattilio
71177912Sjeff#ifdef ADAPTIVE_RWLOCKS
72177912Sjeffstatic int rowner_retries = 10;
73177912Sjeffstatic int rowner_loops = 10000;
74227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
75227309Sed    "rwlock debugging");
76177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
77177912SjeffSYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
78177912Sjeff#endif
79177912Sjeff
80154941Sjhb#ifdef DDB
81154941Sjhb#include <ddb/ddb.h>
82154941Sjhb
83227588Spjdstatic void	db_show_rwlock(const struct lock_object *lock);
84154941Sjhb#endif
85227588Spjdstatic void	assert_rw(const struct lock_object *lock, int what);
86167368Sjhbstatic void	lock_rw(struct lock_object *lock, int how);
87192853Ssson#ifdef KDTRACE_HOOKS
88227588Spjdstatic int	owner_rw(const struct lock_object *lock, struct thread **owner);
89192853Ssson#endif
90167368Sjhbstatic int	unlock_rw(struct lock_object *lock);
91154941Sjhb
92154941Sjhbstruct lock_class lock_class_rw = {
93167365Sjhb	.lc_name = "rw",
94167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
95173733Sattilio	.lc_assert = assert_rw,
96154941Sjhb#ifdef DDB
97167365Sjhb	.lc_ddb_show = db_show_rwlock,
98154941Sjhb#endif
99167368Sjhb	.lc_lock = lock_rw,
100167368Sjhb	.lc_unlock = unlock_rw,
101192853Ssson#ifdef KDTRACE_HOOKS
102192853Ssson	.lc_owner = owner_rw,
103192853Ssson#endif
104154941Sjhb};
105154941Sjhb
106157826Sjhb/*
107157826Sjhb * Return a pointer to the owning thread if the lock is write-locked or
108157826Sjhb * NULL if the lock is unlocked or read-locked.
109157826Sjhb */
110157826Sjhb#define	rw_wowner(rw)							\
111154941Sjhb	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
112154941Sjhb	    (struct thread *)RW_OWNER((rw)->rw_lock))
113154941Sjhb
114157826Sjhb/*
115171052Sattilio * Returns if a write owner is recursed.  Write ownership is not assured
116171052Sattilio * here and should be previously checked.
117171052Sattilio */
118171052Sattilio#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
119171052Sattilio
120171052Sattilio/*
121171052Sattilio * Return true if curthread helds the lock.
122171052Sattilio */
123171052Sattilio#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
124171052Sattilio
125171052Sattilio/*
126157826Sjhb * Return a pointer to the owning thread for this lock who should receive
127157826Sjhb * any priority lent by threads that block on this lock.  Currently this
128157826Sjhb * is identical to rw_wowner().
129157826Sjhb */
130157826Sjhb#define	rw_owner(rw)		rw_wowner(rw)
131157826Sjhb
132154941Sjhb#ifndef INVARIANTS
133242515Sattilio#define	__rw_assert(c, what, file, line)
134154941Sjhb#endif
135154941Sjhb
136154941Sjhbvoid
137227588Spjdassert_rw(const struct lock_object *lock, int what)
138173733Sattilio{
139173733Sattilio
140227588Spjd	rw_assert((const struct rwlock *)lock, what);
141173733Sattilio}
142173733Sattilio
143173733Sattiliovoid
144167368Sjhblock_rw(struct lock_object *lock, int how)
145167368Sjhb{
146167368Sjhb	struct rwlock *rw;
147167368Sjhb
148167368Sjhb	rw = (struct rwlock *)lock;
149167368Sjhb	if (how)
150167368Sjhb		rw_wlock(rw);
151167368Sjhb	else
152167368Sjhb		rw_rlock(rw);
153167368Sjhb}
154167368Sjhb
155167368Sjhbint
156167368Sjhbunlock_rw(struct lock_object *lock)
157167368Sjhb{
158167368Sjhb	struct rwlock *rw;
159167368Sjhb
160167368Sjhb	rw = (struct rwlock *)lock;
161167368Sjhb	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
162167368Sjhb	if (rw->rw_lock & RW_LOCK_READ) {
163167368Sjhb		rw_runlock(rw);
164167368Sjhb		return (0);
165167368Sjhb	} else {
166167368Sjhb		rw_wunlock(rw);
167167368Sjhb		return (1);
168167368Sjhb	}
169167368Sjhb}
170167368Sjhb
171192853Ssson#ifdef KDTRACE_HOOKS
172192853Sssonint
173227588Spjdowner_rw(const struct lock_object *lock, struct thread **owner)
174192853Ssson{
175227588Spjd	const struct rwlock *rw = (const struct rwlock *)lock;
176192853Ssson	uintptr_t x = rw->rw_lock;
177192853Ssson
178192853Ssson	*owner = rw_wowner(rw);
179192853Ssson	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
180192853Ssson	    (*owner != NULL));
181192853Ssson}
182192853Ssson#endif
183192853Ssson
184167368Sjhbvoid
185242515Sattilio_rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
186154941Sjhb{
187242515Sattilio	struct rwlock *rw;
188171052Sattilio	int flags;
189154941Sjhb
190242515Sattilio	rw = rwlock2rw(c);
191242515Sattilio
192171052Sattilio	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
193171052Sattilio	    RW_RECURSE)) == 0);
194196334Sattilio	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
195196334Sattilio	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
196196334Sattilio	    &rw->rw_lock));
197171052Sattilio
198193307Sattilio	flags = LO_UPGRADABLE;
199171052Sattilio	if (opts & RW_DUPOK)
200171052Sattilio		flags |= LO_DUPOK;
201171052Sattilio	if (opts & RW_NOPROFILE)
202171052Sattilio		flags |= LO_NOPROFILE;
203171052Sattilio	if (!(opts & RW_NOWITNESS))
204171052Sattilio		flags |= LO_WITNESS;
205193307Sattilio	if (opts & RW_RECURSE)
206193307Sattilio		flags |= LO_RECURSABLE;
207171052Sattilio	if (opts & RW_QUIET)
208171052Sattilio		flags |= LO_QUIET;
209171052Sattilio
210154941Sjhb	rw->rw_lock = RW_UNLOCKED;
211171052Sattilio	rw->rw_recurse = 0;
212171052Sattilio	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
213154941Sjhb}
214154941Sjhb
215154941Sjhbvoid
216242515Sattilio_rw_destroy(volatile uintptr_t *c)
217154941Sjhb{
218242515Sattilio	struct rwlock *rw;
219154941Sjhb
220242515Sattilio	rw = rwlock2rw(c);
221242515Sattilio
222205626Sbz	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
223205626Sbz	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
224169394Sjhb	rw->rw_lock = RW_DESTROYED;
225167787Sjhb	lock_destroy(&rw->lock_object);
226154941Sjhb}
227154941Sjhb
228154941Sjhbvoid
229154941Sjhbrw_sysinit(void *arg)
230154941Sjhb{
231154941Sjhb	struct rw_args *args = arg;
232154941Sjhb
233242515Sattilio	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
234154941Sjhb}
235154941Sjhb
236185778Skmacyvoid
237185778Skmacyrw_sysinit_flags(void *arg)
238185778Skmacy{
239185778Skmacy	struct rw_args_flags *args = arg;
240185778Skmacy
241242515Sattilio	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
242242515Sattilio	    args->ra_flags);
243185778Skmacy}
244185778Skmacy
245167024Srwatsonint
246242515Sattilio_rw_wowned(const volatile uintptr_t *c)
247167024Srwatson{
248167024Srwatson
249242515Sattilio	return (rw_wowner(rwlock2rw(c)) == curthread);
250167024Srwatson}
251167024Srwatson
252154941Sjhbvoid
253242515Sattilio_rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
254154941Sjhb{
255242515Sattilio	struct rwlock *rw;
256154941Sjhb
257228424Savg	if (SCHEDULER_STOPPED())
258228424Savg		return;
259242515Sattilio
260242515Sattilio	rw = rwlock2rw(c);
261242515Sattilio
262244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
263240424Sattilio	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
264240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
265169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
266169394Sjhb	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
267167787Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
268182914Sjhb	    line, NULL);
269154941Sjhb	__rw_wlock(rw, curthread, file, line);
270171052Sattilio	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
271167787Sjhb	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
272160771Sjhb	curthread->td_locks++;
273154941Sjhb}
274154941Sjhb
275177843Sattilioint
276242515Sattilio__rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
277177843Sattilio{
278242515Sattilio	struct rwlock *rw;
279177843Sattilio	int rval;
280177843Sattilio
281228424Savg	if (SCHEDULER_STOPPED())
282228424Savg		return (1);
283228424Savg
284242515Sattilio	rw = rwlock2rw(c);
285242515Sattilio
286244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
287240424Sattilio	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
288240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
289177843Sattilio	KASSERT(rw->rw_lock != RW_DESTROYED,
290177843Sattilio	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
291177843Sattilio
292193307Sattilio	if (rw_wlocked(rw) &&
293193307Sattilio	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
294177843Sattilio		rw->rw_recurse++;
295177843Sattilio		rval = 1;
296177843Sattilio	} else
297177843Sattilio		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
298177843Sattilio		    (uintptr_t)curthread);
299177843Sattilio
300177843Sattilio	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
301177843Sattilio	if (rval) {
302177843Sattilio		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
303177843Sattilio		    file, line);
304177843Sattilio		curthread->td_locks++;
305177843Sattilio	}
306177843Sattilio	return (rval);
307177843Sattilio}
308177843Sattilio
309154941Sjhbvoid
310242515Sattilio_rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
311154941Sjhb{
312242515Sattilio	struct rwlock *rw;
313154941Sjhb
314228424Savg	if (SCHEDULER_STOPPED())
315228424Savg		return;
316242515Sattilio
317242515Sattilio	rw = rwlock2rw(c);
318242515Sattilio
319169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
320169394Sjhb	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
321242515Sattilio	__rw_assert(c, RA_WLOCKED, file, line);
322160771Sjhb	curthread->td_locks--;
323167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
324171052Sattilio	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
325171052Sattilio	    line);
326171052Sattilio	if (!rw_recursed(rw))
327192853Ssson		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
328154941Sjhb	__rw_wunlock(rw, curthread, file, line);
329154941Sjhb}
330176017Sjeff/*
331176017Sjeff * Determines whether a new reader can acquire a lock.  Succeeds if the
332176017Sjeff * reader already owns a read lock and the lock is locked for read to
333176017Sjeff * prevent deadlock from reader recursion.  Also succeeds if the lock
334176017Sjeff * is unlocked and has no writer waiters or spinners.  Failing otherwise
335176017Sjeff * prioritizes writers before readers.
336176017Sjeff */
337176017Sjeff#define	RW_CAN_READ(_rw)						\
338176017Sjeff    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
339176017Sjeff    (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
340176017Sjeff    RW_LOCK_READ)
341154941Sjhb
342154941Sjhbvoid
343242515Sattilio__rw_rlock(volatile uintptr_t *c, const char *file, int line)
344154941Sjhb{
345242515Sattilio	struct rwlock *rw;
346170295Sjeff	struct turnstile *ts;
347167801Sjhb#ifdef ADAPTIVE_RWLOCKS
348157846Sjhb	volatile struct thread *owner;
349177912Sjeff	int spintries = 0;
350177912Sjeff	int i;
351157851Swkoszek#endif
352189846Sjeff#ifdef LOCK_PROFILING
353167307Sjhb	uint64_t waittime = 0;
354167054Skmacy	int contested = 0;
355189846Sjeff#endif
356176017Sjeff	uintptr_t v;
357192853Ssson#ifdef KDTRACE_HOOKS
358192853Ssson	uint64_t spin_cnt = 0;
359192853Ssson	uint64_t sleep_cnt = 0;
360192853Ssson	int64_t sleep_time = 0;
361192853Ssson#endif
362154941Sjhb
363228424Savg	if (SCHEDULER_STOPPED())
364228424Savg		return;
365228424Savg
366242515Sattilio	rw = rwlock2rw(c);
367242515Sattilio
368244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
369240424Sattilio	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
370240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
371169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
372169394Sjhb	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
373157826Sjhb	KASSERT(rw_wowner(rw) != curthread,
374154941Sjhb	    ("%s (%s): wlock already held @ %s:%d", __func__,
375167787Sjhb	    rw->lock_object.lo_name, file, line));
376182914Sjhb	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
377154941Sjhb
378154941Sjhb	for (;;) {
379192853Ssson#ifdef KDTRACE_HOOKS
380192853Ssson		spin_cnt++;
381192853Ssson#endif
382154941Sjhb		/*
383154941Sjhb		 * Handle the easy case.  If no other thread has a write
384154941Sjhb		 * lock, then try to bump up the count of read locks.  Note
385154941Sjhb		 * that we have to preserve the current state of the
386154941Sjhb		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
387154941Sjhb		 * read lock, then rw_lock must have changed, so restart
388154941Sjhb		 * the loop.  Note that this handles the case of a
389154941Sjhb		 * completely unlocked rwlock since such a lock is encoded
390154941Sjhb		 * as a read lock with no waiters.
391154941Sjhb		 */
392176017Sjeff		v = rw->rw_lock;
393176017Sjeff		if (RW_CAN_READ(v)) {
394154941Sjhb			/*
395154941Sjhb			 * The RW_LOCK_READ_WAITERS flag should only be set
396176017Sjeff			 * if the lock has been unlocked and write waiters
397176017Sjeff			 * were present.
398154941Sjhb			 */
399176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
400176017Sjeff			    v + RW_ONE_READER)) {
401167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
402154941Sjhb					CTR4(KTR_LOCK,
403154941Sjhb					    "%s: %p succeed %p -> %p", __func__,
404176017Sjeff					    rw, (void *)v,
405176017Sjeff					    (void *)(v + RW_ONE_READER));
406154941Sjhb				break;
407154941Sjhb			}
408154941Sjhb			continue;
409154941Sjhb		}
410233628Sfabient#ifdef HWPMC_HOOKS
411233628Sfabient		PMC_SOFT_CALL( , , lock, failed);
412233628Sfabient#endif
413174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
414174629Sjeff		    &contested, &waittime);
415154941Sjhb
416173960Sattilio#ifdef ADAPTIVE_RWLOCKS
417154941Sjhb		/*
418173960Sattilio		 * If the owner is running on another CPU, spin until
419173960Sattilio		 * the owner stops running or the state of the lock
420173960Sattilio		 * changes.
421173960Sattilio		 */
422176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
423176017Sjeff			owner = (struct thread *)RW_OWNER(v);
424176017Sjeff			if (TD_IS_RUNNING(owner)) {
425176017Sjeff				if (LOCK_LOG_TEST(&rw->lock_object, 0))
426176017Sjeff					CTR3(KTR_LOCK,
427176017Sjeff					    "%s: spinning on %p held by %p",
428176017Sjeff					    __func__, rw, owner);
429176017Sjeff				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
430192853Ssson				    owner && TD_IS_RUNNING(owner)) {
431176017Sjeff					cpu_spinwait();
432192853Ssson#ifdef KDTRACE_HOOKS
433192853Ssson					spin_cnt++;
434192853Ssson#endif
435192853Ssson				}
436176017Sjeff				continue;
437176017Sjeff			}
438177912Sjeff		} else if (spintries < rowner_retries) {
439177912Sjeff			spintries++;
440177912Sjeff			for (i = 0; i < rowner_loops; i++) {
441177912Sjeff				v = rw->rw_lock;
442177912Sjeff				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
443177912Sjeff					break;
444177912Sjeff				cpu_spinwait();
445177912Sjeff			}
446177912Sjeff			if (i != rowner_loops)
447177912Sjeff				continue;
448173960Sattilio		}
449173960Sattilio#endif
450173960Sattilio
451173960Sattilio		/*
452154941Sjhb		 * Okay, now it's the hard case.  Some other thread already
453176017Sjeff		 * has a write lock or there are write waiters present,
454176017Sjeff		 * acquire the turnstile lock so we can begin the process
455176017Sjeff		 * of blocking.
456154941Sjhb		 */
457170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
458154941Sjhb
459154941Sjhb		/*
460154941Sjhb		 * The lock might have been released while we spun, so
461176017Sjeff		 * recheck its state and restart the loop if needed.
462154941Sjhb		 */
463176017Sjeff		v = rw->rw_lock;
464176017Sjeff		if (RW_CAN_READ(v)) {
465170295Sjeff			turnstile_cancel(ts);
466154941Sjhb			continue;
467154941Sjhb		}
468154941Sjhb
469173960Sattilio#ifdef ADAPTIVE_RWLOCKS
470154941Sjhb		/*
471193035Sjhb		 * The current lock owner might have started executing
472193035Sjhb		 * on another CPU (or the lock could have changed
473193035Sjhb		 * owners) while we were waiting on the turnstile
474193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
475193035Sjhb		 * again.
476173960Sattilio		 */
477176017Sjeff		if ((v & RW_LOCK_READ) == 0) {
478176017Sjeff			owner = (struct thread *)RW_OWNER(v);
479176017Sjeff			if (TD_IS_RUNNING(owner)) {
480176017Sjeff				turnstile_cancel(ts);
481176017Sjeff				continue;
482176017Sjeff			}
483173960Sattilio		}
484173960Sattilio#endif
485173960Sattilio
486173960Sattilio		/*
487176017Sjeff		 * The lock is held in write mode or it already has waiters.
488154941Sjhb		 */
489176017Sjeff		MPASS(!RW_CAN_READ(v));
490176017Sjeff
491176017Sjeff		/*
492176017Sjeff		 * If the RW_LOCK_READ_WAITERS flag is already set, then
493176017Sjeff		 * we can go ahead and block.  If it is not set then try
494176017Sjeff		 * to set it.  If we fail to set it drop the turnstile
495176017Sjeff		 * lock and restart the loop.
496176017Sjeff		 */
497176017Sjeff		if (!(v & RW_LOCK_READ_WAITERS)) {
498176017Sjeff			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
499176017Sjeff			    v | RW_LOCK_READ_WAITERS)) {
500170295Sjeff				turnstile_cancel(ts);
501157826Sjhb				continue;
502157826Sjhb			}
503167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
504157826Sjhb				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
505157826Sjhb				    __func__, rw);
506154941Sjhb		}
507154941Sjhb
508154941Sjhb		/*
509154941Sjhb		 * We were unable to acquire the lock and the read waiters
510154941Sjhb		 * flag is set, so we must block on the turnstile.
511154941Sjhb		 */
512167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
513154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
514154941Sjhb			    rw);
515192853Ssson#ifdef KDTRACE_HOOKS
516192853Ssson		sleep_time -= lockstat_nsecs();
517192853Ssson#endif
518170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
519192853Ssson#ifdef KDTRACE_HOOKS
520192853Ssson		sleep_time += lockstat_nsecs();
521192853Ssson		sleep_cnt++;
522192853Ssson#endif
523167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
524154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
525154941Sjhb			    __func__, rw);
526154941Sjhb	}
527154941Sjhb
528154941Sjhb	/*
529154941Sjhb	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
530154941Sjhb	 * however.  turnstiles don't like owners changing between calls to
531154941Sjhb	 * turnstile_wait() currently.
532154941Sjhb	 */
533192853Ssson	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
534174629Sjeff	    waittime, file, line);
535167787Sjhb	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
536167787Sjhb	WITNESS_LOCK(&rw->lock_object, 0, file, line);
537160771Sjhb	curthread->td_locks++;
538176017Sjeff	curthread->td_rw_rlocks++;
539192853Ssson#ifdef KDTRACE_HOOKS
540192853Ssson	if (sleep_time)
541192853Ssson		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
542192853Ssson
543192853Ssson	/*
544192853Ssson	 * Record only the loops spinning and not sleeping.
545192853Ssson	 */
546192853Ssson	if (spin_cnt > sleep_cnt)
547192853Ssson		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
548192853Ssson#endif
549154941Sjhb}
550154941Sjhb
551177843Sattilioint
552242515Sattilio__rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
553177843Sattilio{
554242515Sattilio	struct rwlock *rw;
555177843Sattilio	uintptr_t x;
556177843Sattilio
557228424Savg	if (SCHEDULER_STOPPED())
558228424Savg		return (1);
559228424Savg
560242515Sattilio	rw = rwlock2rw(c);
561242515Sattilio
562244582Sattilio	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
563240424Sattilio	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
564240424Sattilio	    curthread, rw->lock_object.lo_name, file, line));
565240424Sattilio
566177843Sattilio	for (;;) {
567177843Sattilio		x = rw->rw_lock;
568177843Sattilio		KASSERT(rw->rw_lock != RW_DESTROYED,
569177843Sattilio		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
570177843Sattilio		if (!(x & RW_LOCK_READ))
571177843Sattilio			break;
572177843Sattilio		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
573177843Sattilio			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
574177843Sattilio			    line);
575177843Sattilio			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
576177843Sattilio			curthread->td_locks++;
577177843Sattilio			curthread->td_rw_rlocks++;
578177843Sattilio			return (1);
579177843Sattilio		}
580177843Sattilio	}
581177843Sattilio
582177843Sattilio	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
583177843Sattilio	return (0);
584177843Sattilio}
585177843Sattilio
586154941Sjhbvoid
587242515Sattilio_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
588154941Sjhb{
589242515Sattilio	struct rwlock *rw;
590154941Sjhb	struct turnstile *ts;
591176017Sjeff	uintptr_t x, v, queue;
592154941Sjhb
593228424Savg	if (SCHEDULER_STOPPED())
594228424Savg		return;
595228424Savg
596242515Sattilio	rw = rwlock2rw(c);
597242515Sattilio
598169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
599169394Sjhb	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
600242515Sattilio	__rw_assert(c, RA_RLOCKED, file, line);
601160771Sjhb	curthread->td_locks--;
602176017Sjeff	curthread->td_rw_rlocks--;
603167787Sjhb	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
604167787Sjhb	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
605154941Sjhb
606154941Sjhb	/* TODO: drop "owner of record" here. */
607154941Sjhb
608154941Sjhb	for (;;) {
609154941Sjhb		/*
610154941Sjhb		 * See if there is more than one read lock held.  If so,
611154941Sjhb		 * just drop one and return.
612154941Sjhb		 */
613154941Sjhb		x = rw->rw_lock;
614154941Sjhb		if (RW_READERS(x) > 1) {
615197643Sattilio			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
616154941Sjhb			    x - RW_ONE_READER)) {
617167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
618154941Sjhb					CTR4(KTR_LOCK,
619154941Sjhb					    "%s: %p succeeded %p -> %p",
620154941Sjhb					    __func__, rw, (void *)x,
621154941Sjhb					    (void *)(x - RW_ONE_READER));
622154941Sjhb				break;
623154941Sjhb			}
624154941Sjhb			continue;
625167307Sjhb		}
626154941Sjhb		/*
627154941Sjhb		 * If there aren't any waiters for a write lock, then try
628154941Sjhb		 * to drop it quickly.
629154941Sjhb		 */
630176017Sjeff		if (!(x & RW_LOCK_WAITERS)) {
631176017Sjeff			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
632176017Sjeff			    RW_READERS_LOCK(1));
633197643Sattilio			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
634197643Sattilio			    RW_UNLOCKED)) {
635167787Sjhb				if (LOCK_LOG_TEST(&rw->lock_object, 0))
636154941Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
637154941Sjhb					    __func__, rw);
638154941Sjhb				break;
639154941Sjhb			}
640154941Sjhb			continue;
641154941Sjhb		}
642154941Sjhb		/*
643176017Sjeff		 * Ok, we know we have waiters and we think we are the
644176017Sjeff		 * last reader, so grab the turnstile lock.
645154941Sjhb		 */
646170295Sjeff		turnstile_chain_lock(&rw->lock_object);
647176017Sjeff		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
648176017Sjeff		MPASS(v & RW_LOCK_WAITERS);
649154941Sjhb
650154941Sjhb		/*
651154941Sjhb		 * Try to drop our lock leaving the lock in a unlocked
652154941Sjhb		 * state.
653154941Sjhb		 *
654154941Sjhb		 * If you wanted to do explicit lock handoff you'd have to
655154941Sjhb		 * do it here.  You'd also want to use turnstile_signal()
656154941Sjhb		 * and you'd have to handle the race where a higher
657154941Sjhb		 * priority thread blocks on the write lock before the
658154941Sjhb		 * thread you wakeup actually runs and have the new thread
659154941Sjhb		 * "steal" the lock.  For now it's a lot simpler to just
660154941Sjhb		 * wakeup all of the waiters.
661154941Sjhb		 *
662154941Sjhb		 * As above, if we fail, then another thread might have
663154941Sjhb		 * acquired a read lock, so drop the turnstile lock and
664154941Sjhb		 * restart.
665154941Sjhb		 */
666176017Sjeff		x = RW_UNLOCKED;
667176017Sjeff		if (v & RW_LOCK_WRITE_WAITERS) {
668176017Sjeff			queue = TS_EXCLUSIVE_QUEUE;
669176017Sjeff			x |= (v & RW_LOCK_READ_WAITERS);
670176017Sjeff		} else
671176017Sjeff			queue = TS_SHARED_QUEUE;
672197643Sattilio		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
673176017Sjeff		    x)) {
674170295Sjeff			turnstile_chain_unlock(&rw->lock_object);
675154941Sjhb			continue;
676154941Sjhb		}
677167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
678154941Sjhb			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
679154941Sjhb			    __func__, rw);
680154941Sjhb
681154941Sjhb		/*
682154941Sjhb		 * Ok.  The lock is released and all that's left is to
683154941Sjhb		 * wake up the waiters.  Note that the lock might not be
684154941Sjhb		 * free anymore, but in that case the writers will just
685154941Sjhb		 * block again if they run before the new lock holder(s)
686154941Sjhb		 * release the lock.
687154941Sjhb		 */
688167787Sjhb		ts = turnstile_lookup(&rw->lock_object);
689157846Sjhb		MPASS(ts != NULL);
690176017Sjeff		turnstile_broadcast(ts, queue);
691154941Sjhb		turnstile_unpend(ts, TS_SHARED_LOCK);
692170295Sjeff		turnstile_chain_unlock(&rw->lock_object);
693154941Sjhb		break;
694154941Sjhb	}
695192853Ssson	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
696154941Sjhb}
697154941Sjhb
698154941Sjhb/*
699154941Sjhb * This function is called when we are unable to obtain a write lock on the
700154941Sjhb * first try.  This means that at least one other thread holds either a
701154941Sjhb * read or write lock.
702154941Sjhb */
703154941Sjhbvoid
704242515Sattilio__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
705242515Sattilio    int line)
706154941Sjhb{
707242515Sattilio	struct rwlock *rw;
708170295Sjeff	struct turnstile *ts;
709167801Sjhb#ifdef ADAPTIVE_RWLOCKS
710157846Sjhb	volatile struct thread *owner;
711176017Sjeff	int spintries = 0;
712176017Sjeff	int i;
713157851Swkoszek#endif
714189846Sjeff	uintptr_t v, x;
715189846Sjeff#ifdef LOCK_PROFILING
716171516Sattilio	uint64_t waittime = 0;
717171516Sattilio	int contested = 0;
718189846Sjeff#endif
719192853Ssson#ifdef KDTRACE_HOOKS
720192853Ssson	uint64_t spin_cnt = 0;
721192853Ssson	uint64_t sleep_cnt = 0;
722192853Ssson	int64_t sleep_time = 0;
723192853Ssson#endif
724154941Sjhb
725228424Savg	if (SCHEDULER_STOPPED())
726228424Savg		return;
727228424Savg
728242515Sattilio	rw = rwlock2rw(c);
729242515Sattilio
730171052Sattilio	if (rw_wlocked(rw)) {
731193307Sattilio		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
732171052Sattilio		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
733171052Sattilio		    __func__, rw->lock_object.lo_name, file, line));
734171052Sattilio		rw->rw_recurse++;
735171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
736171052Sattilio			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
737171052Sattilio		return;
738171052Sattilio	}
739171052Sattilio
740167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
741154941Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
742167787Sjhb		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
743154941Sjhb
744154941Sjhb	while (!_rw_write_lock(rw, tid)) {
745192853Ssson#ifdef KDTRACE_HOOKS
746192853Ssson		spin_cnt++;
747192853Ssson#endif
748233628Sfabient#ifdef HWPMC_HOOKS
749233628Sfabient		PMC_SOFT_CALL( , , lock, failed);
750233628Sfabient#endif
751174629Sjeff		lock_profile_obtain_lock_failed(&rw->lock_object,
752174629Sjeff		    &contested, &waittime);
753173960Sattilio#ifdef ADAPTIVE_RWLOCKS
754173960Sattilio		/*
755173960Sattilio		 * If the lock is write locked and the owner is
756173960Sattilio		 * running on another CPU, spin until the owner stops
757173960Sattilio		 * running or the state of the lock changes.
758173960Sattilio		 */
759173960Sattilio		v = rw->rw_lock;
760173960Sattilio		owner = (struct thread *)RW_OWNER(v);
761173960Sattilio		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
762173960Sattilio			if (LOCK_LOG_TEST(&rw->lock_object, 0))
763173960Sattilio				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
764173960Sattilio				    __func__, rw, owner);
765173960Sattilio			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
766192853Ssson			    TD_IS_RUNNING(owner)) {
767173960Sattilio				cpu_spinwait();
768192853Ssson#ifdef KDTRACE_HOOKS
769192853Ssson				spin_cnt++;
770192853Ssson#endif
771192853Ssson			}
772173960Sattilio			continue;
773173960Sattilio		}
774177912Sjeff		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
775177912Sjeff		    spintries < rowner_retries) {
776176017Sjeff			if (!(v & RW_LOCK_WRITE_SPINNER)) {
777176017Sjeff				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
778176017Sjeff				    v | RW_LOCK_WRITE_SPINNER)) {
779176017Sjeff					continue;
780176017Sjeff				}
781176017Sjeff			}
782176017Sjeff			spintries++;
783177912Sjeff			for (i = 0; i < rowner_loops; i++) {
784176017Sjeff				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
785176017Sjeff					break;
786176017Sjeff				cpu_spinwait();
787176017Sjeff			}
788192853Ssson#ifdef KDTRACE_HOOKS
789192853Ssson			spin_cnt += rowner_loops - i;
790192853Ssson#endif
791177912Sjeff			if (i != rowner_loops)
792176017Sjeff				continue;
793176017Sjeff		}
794173960Sattilio#endif
795170295Sjeff		ts = turnstile_trywait(&rw->lock_object);
796154941Sjhb		v = rw->rw_lock;
797154941Sjhb
798173960Sattilio#ifdef ADAPTIVE_RWLOCKS
799154941Sjhb		/*
800193035Sjhb		 * The current lock owner might have started executing
801193035Sjhb		 * on another CPU (or the lock could have changed
802193035Sjhb		 * owners) while we were waiting on the turnstile
803193035Sjhb		 * chain lock.  If so, drop the turnstile lock and try
804193035Sjhb		 * again.
805173960Sattilio		 */
806173960Sattilio		if (!(v & RW_LOCK_READ)) {
807173960Sattilio			owner = (struct thread *)RW_OWNER(v);
808173960Sattilio			if (TD_IS_RUNNING(owner)) {
809173960Sattilio				turnstile_cancel(ts);
810173960Sattilio				continue;
811173960Sattilio			}
812173960Sattilio		}
813173960Sattilio#endif
814173960Sattilio		/*
815179334Sattilio		 * Check for the waiters flags about this rwlock.
816179334Sattilio		 * If the lock was released, without maintain any pending
817179334Sattilio		 * waiters queue, simply try to acquire it.
818179334Sattilio		 * If a pending waiters queue is present, claim the lock
819179334Sattilio		 * ownership and maintain the pending queue.
820154941Sjhb		 */
821176017Sjeff		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
822176017Sjeff		if ((v & ~x) == RW_UNLOCKED) {
823176017Sjeff			x &= ~RW_LOCK_WRITE_SPINNER;
824176017Sjeff			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
825176017Sjeff				if (x)
826176017Sjeff					turnstile_claim(ts);
827176017Sjeff				else
828176017Sjeff					turnstile_cancel(ts);
829154941Sjhb				break;
830154941Sjhb			}
831170295Sjeff			turnstile_cancel(ts);
832154941Sjhb			continue;
833154941Sjhb		}
834154941Sjhb		/*
835154941Sjhb		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
836154941Sjhb		 * set it.  If we fail to set it, then loop back and try
837154941Sjhb		 * again.
838154941Sjhb		 */
839157826Sjhb		if (!(v & RW_LOCK_WRITE_WAITERS)) {
840157826Sjhb			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
841157826Sjhb			    v | RW_LOCK_WRITE_WAITERS)) {
842170295Sjeff				turnstile_cancel(ts);
843157826Sjhb				continue;
844157826Sjhb			}
845167787Sjhb			if (LOCK_LOG_TEST(&rw->lock_object, 0))
846157826Sjhb				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
847157826Sjhb				    __func__, rw);
848154941Sjhb		}
849157846Sjhb		/*
850154941Sjhb		 * We were unable to acquire the lock and the write waiters
851154941Sjhb		 * flag is set, so we must block on the turnstile.
852154941Sjhb		 */
853167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
854154941Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
855154941Sjhb			    rw);
856192853Ssson#ifdef KDTRACE_HOOKS
857192853Ssson		sleep_time -= lockstat_nsecs();
858192853Ssson#endif
859170295Sjeff		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
860192853Ssson#ifdef KDTRACE_HOOKS
861192853Ssson		sleep_time += lockstat_nsecs();
862192853Ssson		sleep_cnt++;
863192853Ssson#endif
864167787Sjhb		if (LOCK_LOG_TEST(&rw->lock_object, 0))
865154941Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
866154941Sjhb			    __func__, rw);
867176017Sjeff#ifdef ADAPTIVE_RWLOCKS
868176017Sjeff		spintries = 0;
869176017Sjeff#endif
870154941Sjhb	}
871192853Ssson	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
872192853Ssson	    waittime, file, line);
873192853Ssson#ifdef KDTRACE_HOOKS
874192853Ssson	if (sleep_time)
875192853Ssson		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
876192853Ssson
877192853Ssson	/*
878192853Ssson	 * Record only the loops spinning and not sleeping.
879192853Ssson	 */
880192853Ssson	if (spin_cnt > sleep_cnt)
881192853Ssson		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
882192853Ssson#endif
883154941Sjhb}
884154941Sjhb
885154941Sjhb/*
886154941Sjhb * This function is called if the first try at releasing a write lock failed.
887154941Sjhb * This means that one of the 2 waiter bits must be set indicating that at
888154941Sjhb * least one thread is waiting on this lock.
889154941Sjhb */
890154941Sjhbvoid
891242515Sattilio__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
892242515Sattilio    int line)
893154941Sjhb{
894242515Sattilio	struct rwlock *rw;
895154941Sjhb	struct turnstile *ts;
896154941Sjhb	uintptr_t v;
897154941Sjhb	int queue;
898154941Sjhb
899228424Savg	if (SCHEDULER_STOPPED())
900228424Savg		return;
901228424Savg
902242515Sattilio	rw = rwlock2rw(c);
903242515Sattilio
904171052Sattilio	if (rw_wlocked(rw) && rw_recursed(rw)) {
905176017Sjeff		rw->rw_recurse--;
906171052Sattilio		if (LOCK_LOG_TEST(&rw->lock_object, 0))
907171052Sattilio			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
908171052Sattilio		return;
909171052Sattilio	}
910171052Sattilio
911154941Sjhb	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
912154941Sjhb	    ("%s: neither of the waiter flags are set", __func__));
913154941Sjhb
914167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
915154941Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
916154941Sjhb
917170295Sjeff	turnstile_chain_lock(&rw->lock_object);
918167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
919154941Sjhb	MPASS(ts != NULL);
920154941Sjhb
921154941Sjhb	/*
922154941Sjhb	 * Use the same algo as sx locks for now.  Prefer waking up shared
923154941Sjhb	 * waiters if we have any over writers.  This is probably not ideal.
924154941Sjhb	 *
925154941Sjhb	 * 'v' is the value we are going to write back to rw_lock.  If we
926154941Sjhb	 * have waiters on both queues, we need to preserve the state of
927154941Sjhb	 * the waiter flag for the queue we don't wake up.  For now this is
928154941Sjhb	 * hardcoded for the algorithm mentioned above.
929154941Sjhb	 *
930154941Sjhb	 * In the case of both readers and writers waiting we wakeup the
931154941Sjhb	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
932154941Sjhb	 * new writer comes in before a reader it will claim the lock up
933154941Sjhb	 * above.  There is probably a potential priority inversion in
934154941Sjhb	 * there that could be worked around either by waking both queues
935154941Sjhb	 * of waiters or doing some complicated lock handoff gymnastics.
936154941Sjhb	 */
937157846Sjhb	v = RW_UNLOCKED;
938176076Sjeff	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
939176076Sjeff		queue = TS_EXCLUSIVE_QUEUE;
940176076Sjeff		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
941176076Sjeff	} else
942154941Sjhb		queue = TS_SHARED_QUEUE;
943157846Sjhb
944157846Sjhb	/* Wake up all waiters for the specific queue. */
945167787Sjhb	if (LOCK_LOG_TEST(&rw->lock_object, 0))
946154941Sjhb		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
947154941Sjhb		    queue == TS_SHARED_QUEUE ? "read" : "write");
948154941Sjhb	turnstile_broadcast(ts, queue);
949154941Sjhb	atomic_store_rel_ptr(&rw->rw_lock, v);
950154941Sjhb	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
951170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
952154941Sjhb}
953154941Sjhb
954157882Sjhb/*
955157882Sjhb * Attempt to do a non-blocking upgrade from a read lock to a write
956157882Sjhb * lock.  This will only succeed if this thread holds a single read
957157882Sjhb * lock.  Returns true if the upgrade succeeded and false otherwise.
958157882Sjhb */
959157882Sjhbint
960242515Sattilio__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
961157882Sjhb{
962242515Sattilio	struct rwlock *rw;
963176017Sjeff	uintptr_t v, x, tid;
964170295Sjeff	struct turnstile *ts;
965157882Sjhb	int success;
966157882Sjhb
967228424Savg	if (SCHEDULER_STOPPED())
968228424Savg		return (1);
969228424Savg
970242515Sattilio	rw = rwlock2rw(c);
971242515Sattilio
972169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
973169394Sjhb	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
974242515Sattilio	__rw_assert(c, RA_RLOCKED, file, line);
975157882Sjhb
976157882Sjhb	/*
977157882Sjhb	 * Attempt to switch from one reader to a writer.  If there
978157882Sjhb	 * are any write waiters, then we will have to lock the
979157882Sjhb	 * turnstile first to prevent races with another writer
980157882Sjhb	 * calling turnstile_wait() before we have claimed this
981157882Sjhb	 * turnstile.  So, do the simple case of no waiters first.
982157882Sjhb	 */
983157882Sjhb	tid = (uintptr_t)curthread;
984176017Sjeff	success = 0;
985176017Sjeff	for (;;) {
986176017Sjeff		v = rw->rw_lock;
987176017Sjeff		if (RW_READERS(v) > 1)
988176017Sjeff			break;
989176017Sjeff		if (!(v & RW_LOCK_WAITERS)) {
990176017Sjeff			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
991176017Sjeff			if (!success)
992176017Sjeff				continue;
993176017Sjeff			break;
994176017Sjeff		}
995157882Sjhb
996176017Sjeff		/*
997176017Sjeff		 * Ok, we think we have waiters, so lock the turnstile.
998176017Sjeff		 */
999176017Sjeff		ts = turnstile_trywait(&rw->lock_object);
1000176017Sjeff		v = rw->rw_lock;
1001176017Sjeff		if (RW_READERS(v) > 1) {
1002176017Sjeff			turnstile_cancel(ts);
1003176017Sjeff			break;
1004176017Sjeff		}
1005176017Sjeff		/*
1006176017Sjeff		 * Try to switch from one reader to a writer again.  This time
1007176017Sjeff		 * we honor the current state of the waiters flags.
1008176017Sjeff		 * If we obtain the lock with the flags set, then claim
1009176017Sjeff		 * ownership of the turnstile.
1010176017Sjeff		 */
1011176017Sjeff		x = rw->rw_lock & RW_LOCK_WAITERS;
1012176017Sjeff		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
1013176017Sjeff		if (success) {
1014176017Sjeff			if (x)
1015176017Sjeff				turnstile_claim(ts);
1016176017Sjeff			else
1017176017Sjeff				turnstile_cancel(ts);
1018176017Sjeff			break;
1019176017Sjeff		}
1020170295Sjeff		turnstile_cancel(ts);
1021176017Sjeff	}
1022167787Sjhb	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
1023176017Sjeff	if (success) {
1024176017Sjeff		curthread->td_rw_rlocks--;
1025167787Sjhb		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
1026157882Sjhb		    file, line);
1027192853Ssson		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
1028176017Sjeff	}
1029157882Sjhb	return (success);
1030157882Sjhb}
1031157882Sjhb
1032157882Sjhb/*
1033157882Sjhb * Downgrade a write lock into a single read lock.
1034157882Sjhb */
1035157882Sjhbvoid
1036242515Sattilio__rw_downgrade(volatile uintptr_t *c, const char *file, int line)
1037157882Sjhb{
1038242515Sattilio	struct rwlock *rw;
1039157882Sjhb	struct turnstile *ts;
1040157882Sjhb	uintptr_t tid, v;
1041176017Sjeff	int rwait, wwait;
1042157882Sjhb
1043228424Savg	if (SCHEDULER_STOPPED())
1044228424Savg		return;
1045228424Savg
1046242515Sattilio	rw = rwlock2rw(c);
1047242515Sattilio
1048169394Sjhb	KASSERT(rw->rw_lock != RW_DESTROYED,
1049169394Sjhb	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
1050242515Sattilio	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
1051171052Sattilio#ifndef INVARIANTS
1052171052Sattilio	if (rw_recursed(rw))
1053171052Sattilio		panic("downgrade of a recursed lock");
1054171052Sattilio#endif
1055157882Sjhb
1056167787Sjhb	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
1057157882Sjhb
1058157882Sjhb	/*
1059157882Sjhb	 * Convert from a writer to a single reader.  First we handle
1060157882Sjhb	 * the easy case with no waiters.  If there are any waiters, we
1061176017Sjeff	 * lock the turnstile and "disown" the lock.
1062157882Sjhb	 */
1063157882Sjhb	tid = (uintptr_t)curthread;
1064157882Sjhb	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
1065157882Sjhb		goto out;
1066157882Sjhb
1067157882Sjhb	/*
1068157882Sjhb	 * Ok, we think we have waiters, so lock the turnstile so we can
1069157882Sjhb	 * read the waiter flags without any races.
1070157882Sjhb	 */
1071170295Sjeff	turnstile_chain_lock(&rw->lock_object);
1072176017Sjeff	v = rw->rw_lock & RW_LOCK_WAITERS;
1073176017Sjeff	rwait = v & RW_LOCK_READ_WAITERS;
1074176017Sjeff	wwait = v & RW_LOCK_WRITE_WAITERS;
1075176017Sjeff	MPASS(rwait | wwait);
1076157882Sjhb
1077157882Sjhb	/*
1078176017Sjeff	 * Downgrade from a write lock while preserving waiters flag
1079176017Sjeff	 * and give up ownership of the turnstile.
1080157882Sjhb	 */
1081167787Sjhb	ts = turnstile_lookup(&rw->lock_object);
1082157882Sjhb	MPASS(ts != NULL);
1083176017Sjeff	if (!wwait)
1084176017Sjeff		v &= ~RW_LOCK_READ_WAITERS;
1085176017Sjeff	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
1086176017Sjeff	/*
1087176017Sjeff	 * Wake other readers if there are no writers pending.  Otherwise they
1088176017Sjeff	 * won't be able to acquire the lock anyway.
1089176017Sjeff	 */
1090176017Sjeff	if (rwait && !wwait) {
1091157882Sjhb		turnstile_broadcast(ts, TS_SHARED_QUEUE);
1092157882Sjhb		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
1093176017Sjeff	} else
1094157882Sjhb		turnstile_disown(ts);
1095170295Sjeff	turnstile_chain_unlock(&rw->lock_object);
1096157882Sjhbout:
1097176017Sjeff	curthread->td_rw_rlocks++;
1098167787Sjhb	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1099192853Ssson	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
1100157882Sjhb}
1101157882Sjhb
1102154941Sjhb#ifdef INVARIANT_SUPPORT
1103155162Sscottl#ifndef INVARIANTS
1104242515Sattilio#undef __rw_assert
1105154941Sjhb#endif
1106154941Sjhb
1107154941Sjhb/*
1108154941Sjhb * In the non-WITNESS case, rw_assert() can only detect that at least
1109154941Sjhb * *some* thread owns an rlock, but it cannot guarantee that *this*
1110154941Sjhb * thread owns an rlock.
1111154941Sjhb */
1112154941Sjhbvoid
1113242515Sattilio__rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
1114154941Sjhb{
1115242515Sattilio	const struct rwlock *rw;
1116154941Sjhb
1117154941Sjhb	if (panicstr != NULL)
1118154941Sjhb		return;
1119242515Sattilio
1120242515Sattilio	rw = rwlock2rw(c);
1121242515Sattilio
1122154941Sjhb	switch (what) {
1123154941Sjhb	case RA_LOCKED:
1124171052Sattilio	case RA_LOCKED | RA_RECURSED:
1125171052Sattilio	case RA_LOCKED | RA_NOTRECURSED:
1126154941Sjhb	case RA_RLOCKED:
1127154941Sjhb#ifdef WITNESS
1128167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1129154941Sjhb#else
1130154941Sjhb		/*
1131154941Sjhb		 * If some other thread has a write lock or we have one
1132154941Sjhb		 * and are asserting a read lock, fail.  Also, if no one
1133154941Sjhb		 * has a lock at all, fail.
1134154941Sjhb		 */
1135155061Sscottl		if (rw->rw_lock == RW_UNLOCKED ||
1136155061Sscottl		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
1137157826Sjhb		    rw_wowner(rw) != curthread)))
1138154941Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
1139167787Sjhb			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
1140154941Sjhb			    "read " : "", file, line);
1141171052Sattilio
1142171052Sattilio		if (!(rw->rw_lock & RW_LOCK_READ)) {
1143171052Sattilio			if (rw_recursed(rw)) {
1144171052Sattilio				if (what & RA_NOTRECURSED)
1145171052Sattilio					panic("Lock %s recursed @ %s:%d\n",
1146171052Sattilio					    rw->lock_object.lo_name, file,
1147171052Sattilio					    line);
1148171052Sattilio			} else if (what & RA_RECURSED)
1149171052Sattilio				panic("Lock %s not recursed @ %s:%d\n",
1150171052Sattilio				    rw->lock_object.lo_name, file, line);
1151171052Sattilio		}
1152154941Sjhb#endif
1153154941Sjhb		break;
1154154941Sjhb	case RA_WLOCKED:
1155171052Sattilio	case RA_WLOCKED | RA_RECURSED:
1156171052Sattilio	case RA_WLOCKED | RA_NOTRECURSED:
1157157826Sjhb		if (rw_wowner(rw) != curthread)
1158154941Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
1159167787Sjhb			    rw->lock_object.lo_name, file, line);
1160171052Sattilio		if (rw_recursed(rw)) {
1161171052Sattilio			if (what & RA_NOTRECURSED)
1162171052Sattilio				panic("Lock %s recursed @ %s:%d\n",
1163171052Sattilio				    rw->lock_object.lo_name, file, line);
1164171052Sattilio		} else if (what & RA_RECURSED)
1165171052Sattilio			panic("Lock %s not recursed @ %s:%d\n",
1166171052Sattilio			    rw->lock_object.lo_name, file, line);
1167154941Sjhb		break;
1168154941Sjhb	case RA_UNLOCKED:
1169154941Sjhb#ifdef WITNESS
1170167787Sjhb		witness_assert(&rw->lock_object, what, file, line);
1171154941Sjhb#else
1172154941Sjhb		/*
1173154941Sjhb		 * If we hold a write lock fail.  We can't reliably check
1174154941Sjhb		 * to see if we hold a read lock or not.
1175154941Sjhb		 */
1176157826Sjhb		if (rw_wowner(rw) == curthread)
1177154941Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
1178167787Sjhb			    rw->lock_object.lo_name, file, line);
1179154941Sjhb#endif
1180154941Sjhb		break;
1181154941Sjhb	default:
1182154941Sjhb		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1183154941Sjhb		    line);
1184154941Sjhb	}
1185154941Sjhb}
1186154941Sjhb#endif /* INVARIANT_SUPPORT */
1187154941Sjhb
1188154941Sjhb#ifdef DDB
1189154941Sjhbvoid
1190227588Spjddb_show_rwlock(const struct lock_object *lock)
1191154941Sjhb{
1192227588Spjd	const struct rwlock *rw;
1193154941Sjhb	struct thread *td;
1194154941Sjhb
1195227588Spjd	rw = (const struct rwlock *)lock;
1196154941Sjhb
1197154941Sjhb	db_printf(" state: ");
1198154941Sjhb	if (rw->rw_lock == RW_UNLOCKED)
1199154941Sjhb		db_printf("UNLOCKED\n");
1200169394Sjhb	else if (rw->rw_lock == RW_DESTROYED) {
1201169394Sjhb		db_printf("DESTROYED\n");
1202169394Sjhb		return;
1203169394Sjhb	} else if (rw->rw_lock & RW_LOCK_READ)
1204167504Sjhb		db_printf("RLOCK: %ju locks\n",
1205167504Sjhb		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1206154941Sjhb	else {
1207157826Sjhb		td = rw_wowner(rw);
1208154941Sjhb		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1209173600Sjulian		    td->td_tid, td->td_proc->p_pid, td->td_name);
1210171052Sattilio		if (rw_recursed(rw))
1211171052Sattilio			db_printf(" recursed: %u\n", rw->rw_recurse);
1212154941Sjhb	}
1213154941Sjhb	db_printf(" waiters: ");
1214154941Sjhb	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1215154941Sjhb	case RW_LOCK_READ_WAITERS:
1216154941Sjhb		db_printf("readers\n");
1217154941Sjhb		break;
1218154941Sjhb	case RW_LOCK_WRITE_WAITERS:
1219154941Sjhb		db_printf("writers\n");
1220154941Sjhb		break;
1221154941Sjhb	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1222167492Sjhb		db_printf("readers and writers\n");
1223154941Sjhb		break;
1224154941Sjhb	default:
1225154941Sjhb		db_printf("none\n");
1226154941Sjhb		break;
1227154941Sjhb	}
1228154941Sjhb}
1229154941Sjhb
1230154941Sjhb#endif
1231