kern_rwlock.c revision 227588
1/*-
2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * Machine independent bits of reader/writer lock implementation.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 227588 2011-11-16 21:51:17Z pjd $");
36
37#include "opt_ddb.h"
38#include "opt_kdtrace.h"
39#include "opt_no_adaptive_rwlocks.h"
40
41#include <sys/param.h>
42#include <sys/ktr.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/rwlock.h>
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50#include <sys/turnstile.h>
51
52#include <machine/cpu.h>
53
54#if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
55#define	ADAPTIVE_RWLOCKS
56#endif
57
58#ifdef ADAPTIVE_RWLOCKS
59static int rowner_retries = 10;
60static int rowner_loops = 10000;
61static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
62    "rwlock debugging");
63SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
64SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
65#endif
66
67#ifdef DDB
68#include <ddb/ddb.h>
69
70static void	db_show_rwlock(const struct lock_object *lock);
71#endif
72static void	assert_rw(const struct lock_object *lock, int what);
73static void	lock_rw(struct lock_object *lock, int how);
74#ifdef KDTRACE_HOOKS
75static int	owner_rw(const struct lock_object *lock, struct thread **owner);
76#endif
77static int	unlock_rw(struct lock_object *lock);
78
79struct lock_class lock_class_rw = {
80	.lc_name = "rw",
81	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
82	.lc_assert = assert_rw,
83#ifdef DDB
84	.lc_ddb_show = db_show_rwlock,
85#endif
86	.lc_lock = lock_rw,
87	.lc_unlock = unlock_rw,
88#ifdef KDTRACE_HOOKS
89	.lc_owner = owner_rw,
90#endif
91};
92
93/*
94 * Return a pointer to the owning thread if the lock is write-locked or
95 * NULL if the lock is unlocked or read-locked.
96 */
97#define	rw_wowner(rw)							\
98	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
99	    (struct thread *)RW_OWNER((rw)->rw_lock))
100
101/*
102 * Returns if a write owner is recursed.  Write ownership is not assured
103 * here and should be previously checked.
104 */
105#define	rw_recursed(rw)		((rw)->rw_recurse != 0)
106
107/*
108 * Return true if curthread helds the lock.
109 */
110#define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
111
112/*
113 * Return a pointer to the owning thread for this lock who should receive
114 * any priority lent by threads that block on this lock.  Currently this
115 * is identical to rw_wowner().
116 */
117#define	rw_owner(rw)		rw_wowner(rw)
118
119#ifndef INVARIANTS
120#define	_rw_assert(rw, what, file, line)
121#endif
122
123void
124assert_rw(const struct lock_object *lock, int what)
125{
126
127	rw_assert((const struct rwlock *)lock, what);
128}
129
130void
131lock_rw(struct lock_object *lock, int how)
132{
133	struct rwlock *rw;
134
135	rw = (struct rwlock *)lock;
136	if (how)
137		rw_wlock(rw);
138	else
139		rw_rlock(rw);
140}
141
142int
143unlock_rw(struct lock_object *lock)
144{
145	struct rwlock *rw;
146
147	rw = (struct rwlock *)lock;
148	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
149	if (rw->rw_lock & RW_LOCK_READ) {
150		rw_runlock(rw);
151		return (0);
152	} else {
153		rw_wunlock(rw);
154		return (1);
155	}
156}
157
158#ifdef KDTRACE_HOOKS
159int
160owner_rw(const struct lock_object *lock, struct thread **owner)
161{
162	const struct rwlock *rw = (const struct rwlock *)lock;
163	uintptr_t x = rw->rw_lock;
164
165	*owner = rw_wowner(rw);
166	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
167	    (*owner != NULL));
168}
169#endif
170
171void
172rw_init_flags(struct rwlock *rw, const char *name, int opts)
173{
174	int flags;
175
176	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
177	    RW_RECURSE)) == 0);
178	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
179	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
180	    &rw->rw_lock));
181
182	flags = LO_UPGRADABLE;
183	if (opts & RW_DUPOK)
184		flags |= LO_DUPOK;
185	if (opts & RW_NOPROFILE)
186		flags |= LO_NOPROFILE;
187	if (!(opts & RW_NOWITNESS))
188		flags |= LO_WITNESS;
189	if (opts & RW_RECURSE)
190		flags |= LO_RECURSABLE;
191	if (opts & RW_QUIET)
192		flags |= LO_QUIET;
193
194	rw->rw_lock = RW_UNLOCKED;
195	rw->rw_recurse = 0;
196	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
197}
198
199void
200rw_destroy(struct rwlock *rw)
201{
202
203	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
204	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
205	rw->rw_lock = RW_DESTROYED;
206	lock_destroy(&rw->lock_object);
207}
208
209void
210rw_sysinit(void *arg)
211{
212	struct rw_args *args = arg;
213
214	rw_init(args->ra_rw, args->ra_desc);
215}
216
217void
218rw_sysinit_flags(void *arg)
219{
220	struct rw_args_flags *args = arg;
221
222	rw_init_flags(args->ra_rw, args->ra_desc, args->ra_flags);
223}
224
225int
226rw_wowned(const struct rwlock *rw)
227{
228
229	return (rw_wowner(rw) == curthread);
230}
231
232void
233_rw_wlock(struct rwlock *rw, const char *file, int line)
234{
235
236	MPASS(curthread != NULL);
237	KASSERT(rw->rw_lock != RW_DESTROYED,
238	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
239	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
240	    line, NULL);
241	__rw_wlock(rw, curthread, file, line);
242	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
243	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
244	curthread->td_locks++;
245}
246
247int
248_rw_try_wlock(struct rwlock *rw, const char *file, int line)
249{
250	int rval;
251
252	KASSERT(rw->rw_lock != RW_DESTROYED,
253	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
254
255	if (rw_wlocked(rw) &&
256	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
257		rw->rw_recurse++;
258		rval = 1;
259	} else
260		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
261		    (uintptr_t)curthread);
262
263	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
264	if (rval) {
265		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
266		    file, line);
267		curthread->td_locks++;
268	}
269	return (rval);
270}
271
272void
273_rw_wunlock(struct rwlock *rw, const char *file, int line)
274{
275
276	MPASS(curthread != NULL);
277	KASSERT(rw->rw_lock != RW_DESTROYED,
278	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
279	_rw_assert(rw, RA_WLOCKED, file, line);
280	curthread->td_locks--;
281	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
282	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
283	    line);
284	if (!rw_recursed(rw))
285		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_WUNLOCK_RELEASE, rw);
286	__rw_wunlock(rw, curthread, file, line);
287}
288/*
289 * Determines whether a new reader can acquire a lock.  Succeeds if the
290 * reader already owns a read lock and the lock is locked for read to
291 * prevent deadlock from reader recursion.  Also succeeds if the lock
292 * is unlocked and has no writer waiters or spinners.  Failing otherwise
293 * prioritizes writers before readers.
294 */
295#define	RW_CAN_READ(_rw)						\
296    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
297    (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
298    RW_LOCK_READ)
299
300void
301_rw_rlock(struct rwlock *rw, const char *file, int line)
302{
303	struct turnstile *ts;
304#ifdef ADAPTIVE_RWLOCKS
305	volatile struct thread *owner;
306	int spintries = 0;
307	int i;
308#endif
309#ifdef LOCK_PROFILING
310	uint64_t waittime = 0;
311	int contested = 0;
312#endif
313	uintptr_t v;
314#ifdef KDTRACE_HOOKS
315	uint64_t spin_cnt = 0;
316	uint64_t sleep_cnt = 0;
317	int64_t sleep_time = 0;
318#endif
319
320	KASSERT(rw->rw_lock != RW_DESTROYED,
321	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
322	KASSERT(rw_wowner(rw) != curthread,
323	    ("%s (%s): wlock already held @ %s:%d", __func__,
324	    rw->lock_object.lo_name, file, line));
325	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
326
327	for (;;) {
328#ifdef KDTRACE_HOOKS
329		spin_cnt++;
330#endif
331		/*
332		 * Handle the easy case.  If no other thread has a write
333		 * lock, then try to bump up the count of read locks.  Note
334		 * that we have to preserve the current state of the
335		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
336		 * read lock, then rw_lock must have changed, so restart
337		 * the loop.  Note that this handles the case of a
338		 * completely unlocked rwlock since such a lock is encoded
339		 * as a read lock with no waiters.
340		 */
341		v = rw->rw_lock;
342		if (RW_CAN_READ(v)) {
343			/*
344			 * The RW_LOCK_READ_WAITERS flag should only be set
345			 * if the lock has been unlocked and write waiters
346			 * were present.
347			 */
348			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
349			    v + RW_ONE_READER)) {
350				if (LOCK_LOG_TEST(&rw->lock_object, 0))
351					CTR4(KTR_LOCK,
352					    "%s: %p succeed %p -> %p", __func__,
353					    rw, (void *)v,
354					    (void *)(v + RW_ONE_READER));
355				break;
356			}
357			continue;
358		}
359		lock_profile_obtain_lock_failed(&rw->lock_object,
360		    &contested, &waittime);
361
362#ifdef ADAPTIVE_RWLOCKS
363		/*
364		 * If the owner is running on another CPU, spin until
365		 * the owner stops running or the state of the lock
366		 * changes.
367		 */
368		if ((v & RW_LOCK_READ) == 0) {
369			owner = (struct thread *)RW_OWNER(v);
370			if (TD_IS_RUNNING(owner)) {
371				if (LOCK_LOG_TEST(&rw->lock_object, 0))
372					CTR3(KTR_LOCK,
373					    "%s: spinning on %p held by %p",
374					    __func__, rw, owner);
375				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
376				    owner && TD_IS_RUNNING(owner)) {
377					cpu_spinwait();
378#ifdef KDTRACE_HOOKS
379					spin_cnt++;
380#endif
381				}
382				continue;
383			}
384		} else if (spintries < rowner_retries) {
385			spintries++;
386			for (i = 0; i < rowner_loops; i++) {
387				v = rw->rw_lock;
388				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
389					break;
390				cpu_spinwait();
391			}
392			if (i != rowner_loops)
393				continue;
394		}
395#endif
396
397		/*
398		 * Okay, now it's the hard case.  Some other thread already
399		 * has a write lock or there are write waiters present,
400		 * acquire the turnstile lock so we can begin the process
401		 * of blocking.
402		 */
403		ts = turnstile_trywait(&rw->lock_object);
404
405		/*
406		 * The lock might have been released while we spun, so
407		 * recheck its state and restart the loop if needed.
408		 */
409		v = rw->rw_lock;
410		if (RW_CAN_READ(v)) {
411			turnstile_cancel(ts);
412			continue;
413		}
414
415#ifdef ADAPTIVE_RWLOCKS
416		/*
417		 * The current lock owner might have started executing
418		 * on another CPU (or the lock could have changed
419		 * owners) while we were waiting on the turnstile
420		 * chain lock.  If so, drop the turnstile lock and try
421		 * again.
422		 */
423		if ((v & RW_LOCK_READ) == 0) {
424			owner = (struct thread *)RW_OWNER(v);
425			if (TD_IS_RUNNING(owner)) {
426				turnstile_cancel(ts);
427				continue;
428			}
429		}
430#endif
431
432		/*
433		 * The lock is held in write mode or it already has waiters.
434		 */
435		MPASS(!RW_CAN_READ(v));
436
437		/*
438		 * If the RW_LOCK_READ_WAITERS flag is already set, then
439		 * we can go ahead and block.  If it is not set then try
440		 * to set it.  If we fail to set it drop the turnstile
441		 * lock and restart the loop.
442		 */
443		if (!(v & RW_LOCK_READ_WAITERS)) {
444			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
445			    v | RW_LOCK_READ_WAITERS)) {
446				turnstile_cancel(ts);
447				continue;
448			}
449			if (LOCK_LOG_TEST(&rw->lock_object, 0))
450				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
451				    __func__, rw);
452		}
453
454		/*
455		 * We were unable to acquire the lock and the read waiters
456		 * flag is set, so we must block on the turnstile.
457		 */
458		if (LOCK_LOG_TEST(&rw->lock_object, 0))
459			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
460			    rw);
461#ifdef KDTRACE_HOOKS
462		sleep_time -= lockstat_nsecs();
463#endif
464		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
465#ifdef KDTRACE_HOOKS
466		sleep_time += lockstat_nsecs();
467		sleep_cnt++;
468#endif
469		if (LOCK_LOG_TEST(&rw->lock_object, 0))
470			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
471			    __func__, rw);
472	}
473
474	/*
475	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
476	 * however.  turnstiles don't like owners changing between calls to
477	 * turnstile_wait() currently.
478	 */
479	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, rw, contested,
480	    waittime, file, line);
481	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
482	WITNESS_LOCK(&rw->lock_object, 0, file, line);
483	curthread->td_locks++;
484	curthread->td_rw_rlocks++;
485#ifdef KDTRACE_HOOKS
486	if (sleep_time)
487		LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time);
488
489	/*
490	 * Record only the loops spinning and not sleeping.
491	 */
492	if (spin_cnt > sleep_cnt)
493		LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
494#endif
495}
496
497int
498_rw_try_rlock(struct rwlock *rw, const char *file, int line)
499{
500	uintptr_t x;
501
502	for (;;) {
503		x = rw->rw_lock;
504		KASSERT(rw->rw_lock != RW_DESTROYED,
505		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
506		if (!(x & RW_LOCK_READ))
507			break;
508		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
509			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
510			    line);
511			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
512			curthread->td_locks++;
513			curthread->td_rw_rlocks++;
514			return (1);
515		}
516	}
517
518	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
519	return (0);
520}
521
522void
523_rw_runlock(struct rwlock *rw, const char *file, int line)
524{
525	struct turnstile *ts;
526	uintptr_t x, v, queue;
527
528	KASSERT(rw->rw_lock != RW_DESTROYED,
529	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
530	_rw_assert(rw, RA_RLOCKED, file, line);
531	curthread->td_locks--;
532	curthread->td_rw_rlocks--;
533	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
534	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
535
536	/* TODO: drop "owner of record" here. */
537
538	for (;;) {
539		/*
540		 * See if there is more than one read lock held.  If so,
541		 * just drop one and return.
542		 */
543		x = rw->rw_lock;
544		if (RW_READERS(x) > 1) {
545			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
546			    x - RW_ONE_READER)) {
547				if (LOCK_LOG_TEST(&rw->lock_object, 0))
548					CTR4(KTR_LOCK,
549					    "%s: %p succeeded %p -> %p",
550					    __func__, rw, (void *)x,
551					    (void *)(x - RW_ONE_READER));
552				break;
553			}
554			continue;
555		}
556		/*
557		 * If there aren't any waiters for a write lock, then try
558		 * to drop it quickly.
559		 */
560		if (!(x & RW_LOCK_WAITERS)) {
561			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
562			    RW_READERS_LOCK(1));
563			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
564			    RW_UNLOCKED)) {
565				if (LOCK_LOG_TEST(&rw->lock_object, 0))
566					CTR2(KTR_LOCK, "%s: %p last succeeded",
567					    __func__, rw);
568				break;
569			}
570			continue;
571		}
572		/*
573		 * Ok, we know we have waiters and we think we are the
574		 * last reader, so grab the turnstile lock.
575		 */
576		turnstile_chain_lock(&rw->lock_object);
577		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
578		MPASS(v & RW_LOCK_WAITERS);
579
580		/*
581		 * Try to drop our lock leaving the lock in a unlocked
582		 * state.
583		 *
584		 * If you wanted to do explicit lock handoff you'd have to
585		 * do it here.  You'd also want to use turnstile_signal()
586		 * and you'd have to handle the race where a higher
587		 * priority thread blocks on the write lock before the
588		 * thread you wakeup actually runs and have the new thread
589		 * "steal" the lock.  For now it's a lot simpler to just
590		 * wakeup all of the waiters.
591		 *
592		 * As above, if we fail, then another thread might have
593		 * acquired a read lock, so drop the turnstile lock and
594		 * restart.
595		 */
596		x = RW_UNLOCKED;
597		if (v & RW_LOCK_WRITE_WAITERS) {
598			queue = TS_EXCLUSIVE_QUEUE;
599			x |= (v & RW_LOCK_READ_WAITERS);
600		} else
601			queue = TS_SHARED_QUEUE;
602		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
603		    x)) {
604			turnstile_chain_unlock(&rw->lock_object);
605			continue;
606		}
607		if (LOCK_LOG_TEST(&rw->lock_object, 0))
608			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
609			    __func__, rw);
610
611		/*
612		 * Ok.  The lock is released and all that's left is to
613		 * wake up the waiters.  Note that the lock might not be
614		 * free anymore, but in that case the writers will just
615		 * block again if they run before the new lock holder(s)
616		 * release the lock.
617		 */
618		ts = turnstile_lookup(&rw->lock_object);
619		MPASS(ts != NULL);
620		turnstile_broadcast(ts, queue);
621		turnstile_unpend(ts, TS_SHARED_LOCK);
622		turnstile_chain_unlock(&rw->lock_object);
623		break;
624	}
625	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_RW_RUNLOCK_RELEASE, rw);
626}
627
628/*
629 * This function is called when we are unable to obtain a write lock on the
630 * first try.  This means that at least one other thread holds either a
631 * read or write lock.
632 */
633void
634_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
635{
636	struct turnstile *ts;
637#ifdef ADAPTIVE_RWLOCKS
638	volatile struct thread *owner;
639	int spintries = 0;
640	int i;
641#endif
642	uintptr_t v, x;
643#ifdef LOCK_PROFILING
644	uint64_t waittime = 0;
645	int contested = 0;
646#endif
647#ifdef KDTRACE_HOOKS
648	uint64_t spin_cnt = 0;
649	uint64_t sleep_cnt = 0;
650	int64_t sleep_time = 0;
651#endif
652
653	if (rw_wlocked(rw)) {
654		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
655		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
656		    __func__, rw->lock_object.lo_name, file, line));
657		rw->rw_recurse++;
658		if (LOCK_LOG_TEST(&rw->lock_object, 0))
659			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
660		return;
661	}
662
663	if (LOCK_LOG_TEST(&rw->lock_object, 0))
664		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
665		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
666
667	while (!_rw_write_lock(rw, tid)) {
668#ifdef KDTRACE_HOOKS
669		spin_cnt++;
670#endif
671		lock_profile_obtain_lock_failed(&rw->lock_object,
672		    &contested, &waittime);
673#ifdef ADAPTIVE_RWLOCKS
674		/*
675		 * If the lock is write locked and the owner is
676		 * running on another CPU, spin until the owner stops
677		 * running or the state of the lock changes.
678		 */
679		v = rw->rw_lock;
680		owner = (struct thread *)RW_OWNER(v);
681		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
682			if (LOCK_LOG_TEST(&rw->lock_object, 0))
683				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
684				    __func__, rw, owner);
685			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
686			    TD_IS_RUNNING(owner)) {
687				cpu_spinwait();
688#ifdef KDTRACE_HOOKS
689				spin_cnt++;
690#endif
691			}
692			continue;
693		}
694		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
695		    spintries < rowner_retries) {
696			if (!(v & RW_LOCK_WRITE_SPINNER)) {
697				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
698				    v | RW_LOCK_WRITE_SPINNER)) {
699					continue;
700				}
701			}
702			spintries++;
703			for (i = 0; i < rowner_loops; i++) {
704				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
705					break;
706				cpu_spinwait();
707			}
708#ifdef KDTRACE_HOOKS
709			spin_cnt += rowner_loops - i;
710#endif
711			if (i != rowner_loops)
712				continue;
713		}
714#endif
715		ts = turnstile_trywait(&rw->lock_object);
716		v = rw->rw_lock;
717
718#ifdef ADAPTIVE_RWLOCKS
719		/*
720		 * The current lock owner might have started executing
721		 * on another CPU (or the lock could have changed
722		 * owners) while we were waiting on the turnstile
723		 * chain lock.  If so, drop the turnstile lock and try
724		 * again.
725		 */
726		if (!(v & RW_LOCK_READ)) {
727			owner = (struct thread *)RW_OWNER(v);
728			if (TD_IS_RUNNING(owner)) {
729				turnstile_cancel(ts);
730				continue;
731			}
732		}
733#endif
734		/*
735		 * Check for the waiters flags about this rwlock.
736		 * If the lock was released, without maintain any pending
737		 * waiters queue, simply try to acquire it.
738		 * If a pending waiters queue is present, claim the lock
739		 * ownership and maintain the pending queue.
740		 */
741		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
742		if ((v & ~x) == RW_UNLOCKED) {
743			x &= ~RW_LOCK_WRITE_SPINNER;
744			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
745				if (x)
746					turnstile_claim(ts);
747				else
748					turnstile_cancel(ts);
749				break;
750			}
751			turnstile_cancel(ts);
752			continue;
753		}
754		/*
755		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
756		 * set it.  If we fail to set it, then loop back and try
757		 * again.
758		 */
759		if (!(v & RW_LOCK_WRITE_WAITERS)) {
760			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
761			    v | RW_LOCK_WRITE_WAITERS)) {
762				turnstile_cancel(ts);
763				continue;
764			}
765			if (LOCK_LOG_TEST(&rw->lock_object, 0))
766				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
767				    __func__, rw);
768		}
769		/*
770		 * We were unable to acquire the lock and the write waiters
771		 * flag is set, so we must block on the turnstile.
772		 */
773		if (LOCK_LOG_TEST(&rw->lock_object, 0))
774			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
775			    rw);
776#ifdef KDTRACE_HOOKS
777		sleep_time -= lockstat_nsecs();
778#endif
779		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
780#ifdef KDTRACE_HOOKS
781		sleep_time += lockstat_nsecs();
782		sleep_cnt++;
783#endif
784		if (LOCK_LOG_TEST(&rw->lock_object, 0))
785			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
786			    __func__, rw);
787#ifdef ADAPTIVE_RWLOCKS
788		spintries = 0;
789#endif
790	}
791	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested,
792	    waittime, file, line);
793#ifdef KDTRACE_HOOKS
794	if (sleep_time)
795		LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time);
796
797	/*
798	 * Record only the loops spinning and not sleeping.
799	 */
800	if (spin_cnt > sleep_cnt)
801		LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt));
802#endif
803}
804
805/*
806 * This function is called if the first try at releasing a write lock failed.
807 * This means that one of the 2 waiter bits must be set indicating that at
808 * least one thread is waiting on this lock.
809 */
810void
811_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
812{
813	struct turnstile *ts;
814	uintptr_t v;
815	int queue;
816
817	if (rw_wlocked(rw) && rw_recursed(rw)) {
818		rw->rw_recurse--;
819		if (LOCK_LOG_TEST(&rw->lock_object, 0))
820			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
821		return;
822	}
823
824	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
825	    ("%s: neither of the waiter flags are set", __func__));
826
827	if (LOCK_LOG_TEST(&rw->lock_object, 0))
828		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
829
830	turnstile_chain_lock(&rw->lock_object);
831	ts = turnstile_lookup(&rw->lock_object);
832	MPASS(ts != NULL);
833
834	/*
835	 * Use the same algo as sx locks for now.  Prefer waking up shared
836	 * waiters if we have any over writers.  This is probably not ideal.
837	 *
838	 * 'v' is the value we are going to write back to rw_lock.  If we
839	 * have waiters on both queues, we need to preserve the state of
840	 * the waiter flag for the queue we don't wake up.  For now this is
841	 * hardcoded for the algorithm mentioned above.
842	 *
843	 * In the case of both readers and writers waiting we wakeup the
844	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
845	 * new writer comes in before a reader it will claim the lock up
846	 * above.  There is probably a potential priority inversion in
847	 * there that could be worked around either by waking both queues
848	 * of waiters or doing some complicated lock handoff gymnastics.
849	 */
850	v = RW_UNLOCKED;
851	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
852		queue = TS_EXCLUSIVE_QUEUE;
853		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
854	} else
855		queue = TS_SHARED_QUEUE;
856
857	/* Wake up all waiters for the specific queue. */
858	if (LOCK_LOG_TEST(&rw->lock_object, 0))
859		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
860		    queue == TS_SHARED_QUEUE ? "read" : "write");
861	turnstile_broadcast(ts, queue);
862	atomic_store_rel_ptr(&rw->rw_lock, v);
863	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
864	turnstile_chain_unlock(&rw->lock_object);
865}
866
867/*
868 * Attempt to do a non-blocking upgrade from a read lock to a write
869 * lock.  This will only succeed if this thread holds a single read
870 * lock.  Returns true if the upgrade succeeded and false otherwise.
871 */
872int
873_rw_try_upgrade(struct rwlock *rw, const char *file, int line)
874{
875	uintptr_t v, x, tid;
876	struct turnstile *ts;
877	int success;
878
879	KASSERT(rw->rw_lock != RW_DESTROYED,
880	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
881	_rw_assert(rw, RA_RLOCKED, file, line);
882
883	/*
884	 * Attempt to switch from one reader to a writer.  If there
885	 * are any write waiters, then we will have to lock the
886	 * turnstile first to prevent races with another writer
887	 * calling turnstile_wait() before we have claimed this
888	 * turnstile.  So, do the simple case of no waiters first.
889	 */
890	tid = (uintptr_t)curthread;
891	success = 0;
892	for (;;) {
893		v = rw->rw_lock;
894		if (RW_READERS(v) > 1)
895			break;
896		if (!(v & RW_LOCK_WAITERS)) {
897			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
898			if (!success)
899				continue;
900			break;
901		}
902
903		/*
904		 * Ok, we think we have waiters, so lock the turnstile.
905		 */
906		ts = turnstile_trywait(&rw->lock_object);
907		v = rw->rw_lock;
908		if (RW_READERS(v) > 1) {
909			turnstile_cancel(ts);
910			break;
911		}
912		/*
913		 * Try to switch from one reader to a writer again.  This time
914		 * we honor the current state of the waiters flags.
915		 * If we obtain the lock with the flags set, then claim
916		 * ownership of the turnstile.
917		 */
918		x = rw->rw_lock & RW_LOCK_WAITERS;
919		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
920		if (success) {
921			if (x)
922				turnstile_claim(ts);
923			else
924				turnstile_cancel(ts);
925			break;
926		}
927		turnstile_cancel(ts);
928	}
929	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
930	if (success) {
931		curthread->td_rw_rlocks--;
932		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
933		    file, line);
934		LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, rw);
935	}
936	return (success);
937}
938
939/*
940 * Downgrade a write lock into a single read lock.
941 */
942void
943_rw_downgrade(struct rwlock *rw, const char *file, int line)
944{
945	struct turnstile *ts;
946	uintptr_t tid, v;
947	int rwait, wwait;
948
949	KASSERT(rw->rw_lock != RW_DESTROYED,
950	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
951	_rw_assert(rw, RA_WLOCKED | RA_NOTRECURSED, file, line);
952#ifndef INVARIANTS
953	if (rw_recursed(rw))
954		panic("downgrade of a recursed lock");
955#endif
956
957	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
958
959	/*
960	 * Convert from a writer to a single reader.  First we handle
961	 * the easy case with no waiters.  If there are any waiters, we
962	 * lock the turnstile and "disown" the lock.
963	 */
964	tid = (uintptr_t)curthread;
965	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
966		goto out;
967
968	/*
969	 * Ok, we think we have waiters, so lock the turnstile so we can
970	 * read the waiter flags without any races.
971	 */
972	turnstile_chain_lock(&rw->lock_object);
973	v = rw->rw_lock & RW_LOCK_WAITERS;
974	rwait = v & RW_LOCK_READ_WAITERS;
975	wwait = v & RW_LOCK_WRITE_WAITERS;
976	MPASS(rwait | wwait);
977
978	/*
979	 * Downgrade from a write lock while preserving waiters flag
980	 * and give up ownership of the turnstile.
981	 */
982	ts = turnstile_lookup(&rw->lock_object);
983	MPASS(ts != NULL);
984	if (!wwait)
985		v &= ~RW_LOCK_READ_WAITERS;
986	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
987	/*
988	 * Wake other readers if there are no writers pending.  Otherwise they
989	 * won't be able to acquire the lock anyway.
990	 */
991	if (rwait && !wwait) {
992		turnstile_broadcast(ts, TS_SHARED_QUEUE);
993		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
994	} else
995		turnstile_disown(ts);
996	turnstile_chain_unlock(&rw->lock_object);
997out:
998	curthread->td_rw_rlocks++;
999	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
1000	LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, rw);
1001}
1002
1003#ifdef INVARIANT_SUPPORT
1004#ifndef INVARIANTS
1005#undef _rw_assert
1006#endif
1007
1008/*
1009 * In the non-WITNESS case, rw_assert() can only detect that at least
1010 * *some* thread owns an rlock, but it cannot guarantee that *this*
1011 * thread owns an rlock.
1012 */
1013void
1014_rw_assert(const struct rwlock *rw, int what, const char *file, int line)
1015{
1016
1017	if (panicstr != NULL)
1018		return;
1019	switch (what) {
1020	case RA_LOCKED:
1021	case RA_LOCKED | RA_RECURSED:
1022	case RA_LOCKED | RA_NOTRECURSED:
1023	case RA_RLOCKED:
1024#ifdef WITNESS
1025		witness_assert(&rw->lock_object, what, file, line);
1026#else
1027		/*
1028		 * If some other thread has a write lock or we have one
1029		 * and are asserting a read lock, fail.  Also, if no one
1030		 * has a lock at all, fail.
1031		 */
1032		if (rw->rw_lock == RW_UNLOCKED ||
1033		    (!(rw->rw_lock & RW_LOCK_READ) && (what == RA_RLOCKED ||
1034		    rw_wowner(rw) != curthread)))
1035			panic("Lock %s not %slocked @ %s:%d\n",
1036			    rw->lock_object.lo_name, (what == RA_RLOCKED) ?
1037			    "read " : "", file, line);
1038
1039		if (!(rw->rw_lock & RW_LOCK_READ)) {
1040			if (rw_recursed(rw)) {
1041				if (what & RA_NOTRECURSED)
1042					panic("Lock %s recursed @ %s:%d\n",
1043					    rw->lock_object.lo_name, file,
1044					    line);
1045			} else if (what & RA_RECURSED)
1046				panic("Lock %s not recursed @ %s:%d\n",
1047				    rw->lock_object.lo_name, file, line);
1048		}
1049#endif
1050		break;
1051	case RA_WLOCKED:
1052	case RA_WLOCKED | RA_RECURSED:
1053	case RA_WLOCKED | RA_NOTRECURSED:
1054		if (rw_wowner(rw) != curthread)
1055			panic("Lock %s not exclusively locked @ %s:%d\n",
1056			    rw->lock_object.lo_name, file, line);
1057		if (rw_recursed(rw)) {
1058			if (what & RA_NOTRECURSED)
1059				panic("Lock %s recursed @ %s:%d\n",
1060				    rw->lock_object.lo_name, file, line);
1061		} else if (what & RA_RECURSED)
1062			panic("Lock %s not recursed @ %s:%d\n",
1063			    rw->lock_object.lo_name, file, line);
1064		break;
1065	case RA_UNLOCKED:
1066#ifdef WITNESS
1067		witness_assert(&rw->lock_object, what, file, line);
1068#else
1069		/*
1070		 * If we hold a write lock fail.  We can't reliably check
1071		 * to see if we hold a read lock or not.
1072		 */
1073		if (rw_wowner(rw) == curthread)
1074			panic("Lock %s exclusively locked @ %s:%d\n",
1075			    rw->lock_object.lo_name, file, line);
1076#endif
1077		break;
1078	default:
1079		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
1080		    line);
1081	}
1082}
1083#endif /* INVARIANT_SUPPORT */
1084
1085#ifdef DDB
1086void
1087db_show_rwlock(const struct lock_object *lock)
1088{
1089	const struct rwlock *rw;
1090	struct thread *td;
1091
1092	rw = (const struct rwlock *)lock;
1093
1094	db_printf(" state: ");
1095	if (rw->rw_lock == RW_UNLOCKED)
1096		db_printf("UNLOCKED\n");
1097	else if (rw->rw_lock == RW_DESTROYED) {
1098		db_printf("DESTROYED\n");
1099		return;
1100	} else if (rw->rw_lock & RW_LOCK_READ)
1101		db_printf("RLOCK: %ju locks\n",
1102		    (uintmax_t)(RW_READERS(rw->rw_lock)));
1103	else {
1104		td = rw_wowner(rw);
1105		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1106		    td->td_tid, td->td_proc->p_pid, td->td_name);
1107		if (rw_recursed(rw))
1108			db_printf(" recursed: %u\n", rw->rw_recurse);
1109	}
1110	db_printf(" waiters: ");
1111	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
1112	case RW_LOCK_READ_WAITERS:
1113		db_printf("readers\n");
1114		break;
1115	case RW_LOCK_WRITE_WAITERS:
1116		db_printf("writers\n");
1117		break;
1118	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
1119		db_printf("readers and writers\n");
1120		break;
1121	default:
1122		db_printf("none\n");
1123		break;
1124	}
1125}
1126
1127#endif
1128