kern_rwlock.c revision 154973
1/*-
2 * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * Machine independent bits of reader/writer lock implementation.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/sys/kern/kern_rwlock.c 154973 2006-01-29 02:35:22Z mlaier $");
36
37#include "opt_ddb.h"
38
39#include <sys/param.h>
40#include <sys/ktr.h>
41#include <sys/lock.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/rwlock.h>
45#include <sys/systm.h>
46#include <sys/turnstile.h>
47
48#include <machine/cpu.h>
49
50#ifdef DDB
51#include <ddb/ddb.h>
52
53static void	db_show_rwlock(struct lock_object *lock);
54#endif
55
56struct lock_class lock_class_rw = {
57	"rw",
58	LC_SLEEPLOCK | LC_RECURSABLE /* | LC_UPGRADABLE */,
59#ifdef DDB
60	db_show_rwlock
61#endif
62};
63
64#define	rw_owner(rw)							\
65	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
66	    (struct thread *)RW_OWNER((rw)->rw_lock))
67
68#ifndef INVARIANTS
69#define	_rw_assert(rw, what, file, line)
70#endif
71
72void
73rw_init(struct rwlock *rw, const char *name)
74{
75
76	rw->rw_lock = RW_UNLOCKED;
77
78	lock_init(&rw->rw_object, &lock_class_rw, name, NULL, LO_WITNESS |
79	    LO_RECURSABLE /* | LO_UPGRADABLE */);
80}
81
82void
83rw_destroy(struct rwlock *rw)
84{
85
86	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock not unlocked"));
87	lock_destroy(&rw->rw_object);
88}
89
90void
91rw_sysinit(void *arg)
92{
93	struct rw_args *args = arg;
94
95	rw_init(args->ra_rw, args->ra_desc);
96}
97
98void
99_rw_wlock(struct rwlock *rw, const char *file, int line)
100{
101
102	MPASS(curthread != NULL);
103	KASSERT(rw_owner(rw) != curthread,
104	    ("%s (%s): wlock already held @ %s:%d", __func__,
105	    rw->rw_object.lo_name, file, line));
106	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
107	    line);
108	__rw_wlock(rw, curthread, file, line);
109	LOCK_LOG_LOCK("WLOCK", &rw->rw_object, 0, 0, file, line);
110	WITNESS_LOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
111}
112
113void
114_rw_wunlock(struct rwlock *rw, const char *file, int line)
115{
116
117	MPASS(curthread != NULL);
118	_rw_assert(rw, RA_WLOCKED, file, line);
119	WITNESS_UNLOCK(&rw->rw_object, LOP_EXCLUSIVE, file, line);
120	LOCK_LOG_LOCK("WUNLOCK", &rw->rw_object, 0, 0, file, line);
121	__rw_wunlock(rw, curthread, file, line);
122}
123
124void
125_rw_rlock(struct rwlock *rw, const char *file, int line)
126{
127	uintptr_t x;
128
129	KASSERT(rw_owner(rw) != curthread,
130	    ("%s (%s): wlock already held @ %s:%d", __func__,
131	    rw->rw_object.lo_name, file, line));
132	WITNESS_CHECKORDER(&rw->rw_object, LOP_NEWORDER, file, line);
133
134	/*
135	 * Note that we don't make any attempt to try to block read
136	 * locks once a writer has blocked on the lock.  The reason is
137	 * that we currently allow for read locks to recurse and we
138	 * don't keep track of all the holders of read locks.  Thus, if
139	 * we were to block readers once a writer blocked and a reader
140	 * tried to recurse on their reader lock after a writer had
141	 * blocked we would end up in a deadlock since the reader would
142	 * be blocked on the writer, and the writer would be blocked
143	 * waiting for the reader to release its original read lock.
144	 */
145	for (;;) {
146		/*
147		 * Handle the easy case.  If no other thread has a write
148		 * lock, then try to bump up the count of read locks.  Note
149		 * that we have to preserve the current state of the
150		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
151		 * read lock, then rw_lock must have changed, so restart
152		 * the loop.  Note that this handles the case of a
153		 * completely unlocked rwlock since such a lock is encoded
154		 * as a read lock with no waiters.
155		 */
156		x = rw->rw_lock;
157		if (x & RW_LOCK_READ) {
158
159			/*
160			 * The RW_LOCK_READ_WAITERS flag should only be set
161			 * if another thread currently holds a write lock,
162			 * and in that case RW_LOCK_READ should be clear.
163			 */
164			MPASS((x & RW_LOCK_READ_WAITERS) == 0);
165			if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
166			    x + RW_ONE_READER)) {
167				if (LOCK_LOG_TEST(&rw->rw_object, 0))
168					CTR4(KTR_LOCK,
169					    "%s: %p succeed %p -> %p", __func__,
170					    rw, (void *)x,
171					    (void *)(x + RW_ONE_READER));
172				break;
173			}
174			continue;
175		}
176
177		/*
178		 * Okay, now it's the hard case.  Some other thread already
179		 * has a write lock, so acquire the turnstile lock so we can
180		 * begin the process of blocking.
181		 */
182		turnstile_lock(&rw->rw_object);
183
184		/*
185		 * The lock might have been released while we spun, so
186		 * recheck its state and restart the loop if there is no
187		 * longer a write lock.
188		 */
189		x = rw->rw_lock;
190		if (x & RW_LOCK_READ) {
191			turnstile_release(&rw->rw_object);
192			continue;
193		}
194
195		/*
196		 * Ok, it's still a write lock.  If the RW_LOCK_READ_WAITERS
197		 * flag is already set, then we can go ahead and block.  If
198		 * it is not set then try to set it.  If we fail to set it
199		 * drop the turnstile lock and restart the loop.
200		 */
201		if (!(x & RW_LOCK_READ_WAITERS) &&
202		    !atomic_cmpset_ptr(&rw->rw_lock, x,
203		    x | RW_LOCK_READ_WAITERS)) {
204			turnstile_release(&rw->rw_object);
205			continue;
206		}
207		if (!(x & RW_LOCK_READ_WAITERS) &&
208		    LOCK_LOG_TEST(&rw->rw_object, 0))
209			CTR2(KTR_LOCK, "%s: %p set read waiters flag", __func__,
210				rw);
211
212		/*
213		 * We were unable to acquire the lock and the read waiters
214		 * flag is set, so we must block on the turnstile.
215		 */
216		if (LOCK_LOG_TEST(&rw->rw_object, 0))
217			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
218			    rw);
219		turnstile_wait(&rw->rw_object, rw_owner(rw), TS_SHARED_QUEUE);
220		if (LOCK_LOG_TEST(&rw->rw_object, 0))
221			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
222			    __func__, rw);
223	}
224
225	/*
226	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
227	 * however.  turnstiles don't like owners changing between calls to
228	 * turnstile_wait() currently.
229	 */
230
231	LOCK_LOG_LOCK("RLOCK", &rw->rw_object, 0, 0, file, line);
232	WITNESS_LOCK(&rw->rw_object, 0, file, line);
233}
234
235void
236_rw_runlock(struct rwlock *rw, const char *file, int line)
237{
238	struct turnstile *ts;
239	uintptr_t x;
240
241	_rw_assert(rw, RA_RLOCKED, file, line);
242	WITNESS_UNLOCK(&rw->rw_object, 0, file, line);
243	LOCK_LOG_LOCK("RUNLOCK", &rw->rw_object, 0, 0, file, line);
244
245	/* TODO: drop "owner of record" here. */
246
247	for (;;) {
248		/*
249		 * See if there is more than one read lock held.  If so,
250		 * just drop one and return.
251		 */
252		x = rw->rw_lock;
253		if (RW_READERS(x) > 1) {
254			if (atomic_cmpset_ptr(&rw->rw_lock, x,
255			    x - RW_ONE_READER)) {
256				if (LOCK_LOG_TEST(&rw->rw_object, 0))
257					CTR4(KTR_LOCK,
258					    "%s: %p succeeded %p -> %p",
259					    __func__, rw, (void *)x,
260					    (void *)(x - RW_ONE_READER));
261				break;
262			}
263			continue;
264		}
265
266		/*
267		 * We should never have read waiters while at least one
268		 * thread holds a read lock.  (See note above)
269		 */
270		KASSERT(!(x & RW_LOCK_READ_WAITERS),
271		    ("%s: waiting readers", __func__));
272
273		/*
274		 * If there aren't any waiters for a write lock, then try
275		 * to drop it quickly.
276		 */
277		if (!(x & RW_LOCK_WRITE_WAITERS)) {
278
279			/*
280			 * There shouldn't be any flags set and we should
281			 * be the only read lock.  If we fail to release
282			 * the single read lock, then another thread might
283			 * have just acquired a read lock, so go back up
284			 * to the multiple read locks case.
285			 */
286			MPASS(x == RW_READERS_LOCK(1));
287			if (atomic_cmpset_ptr(&rw->rw_lock, RW_READERS_LOCK(1),
288			    RW_UNLOCKED)) {
289				if (LOCK_LOG_TEST(&rw->rw_object, 0))
290					CTR2(KTR_LOCK, "%s: %p last succeeded",
291					    __func__, rw);
292				break;
293			}
294			continue;
295		}
296
297		/*
298		 * There should just be one reader with one or more
299		 * writers waiting.
300		 */
301		MPASS(x == (RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS));
302
303		/*
304		 * Ok, we know we have a waiting writer and we think we
305		 * are the last reader, so grab the turnstile lock.
306		 */
307		turnstile_lock(&rw->rw_object);
308
309		/*
310		 * Try to drop our lock leaving the lock in a unlocked
311		 * state.
312		 *
313		 * If you wanted to do explicit lock handoff you'd have to
314		 * do it here.  You'd also want to use turnstile_signal()
315		 * and you'd have to handle the race where a higher
316		 * priority thread blocks on the write lock before the
317		 * thread you wakeup actually runs and have the new thread
318		 * "steal" the lock.  For now it's a lot simpler to just
319		 * wakeup all of the waiters.
320		 *
321		 * As above, if we fail, then another thread might have
322		 * acquired a read lock, so drop the turnstile lock and
323		 * restart.
324		 */
325		if (!atomic_cmpset_ptr(&rw->rw_lock,
326		    RW_READERS_LOCK(1) | RW_LOCK_WRITE_WAITERS, RW_UNLOCKED)) {
327			turnstile_release(&rw->rw_object);
328			continue;
329		}
330		if (LOCK_LOG_TEST(&rw->rw_object, 0))
331			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
332			    __func__, rw);
333
334		/*
335		 * Ok.  The lock is released and all that's left is to
336		 * wake up the waiters.  Note that the lock might not be
337		 * free anymore, but in that case the writers will just
338		 * block again if they run before the new lock holder(s)
339		 * release the lock.
340		 */
341		ts = turnstile_lookup(&rw->rw_object);
342		turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
343		turnstile_unpend(ts, TS_SHARED_LOCK);
344		break;
345	}
346}
347
348/*
349 * This function is called when we are unable to obtain a write lock on the
350 * first try.  This means that at least one other thread holds either a
351 * read or write lock.
352 */
353void
354_rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
355{
356	uintptr_t v;
357
358	if (LOCK_LOG_TEST(&rw->rw_object, 0))
359		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
360		    rw->rw_object.lo_name, (void *)rw->rw_lock, file, line);
361
362	while (!_rw_write_lock(rw, tid)) {
363		turnstile_lock(&rw->rw_object);
364		v = rw->rw_lock;
365
366		/*
367		 * If the lock was released while spinning on the
368		 * turnstile chain lock, try again.
369		 */
370		if (v == RW_UNLOCKED) {
371			turnstile_release(&rw->rw_object);
372			cpu_spinwait();
373			continue;
374		}
375
376		/*
377		 * If the lock was released by a writer with both readers
378		 * and writers waiting and a reader hasn't woken up and
379		 * acquired the lock yet, rw_lock will be set to the
380		 * value RW_UNLOCKED | RW_LOCK_WRITE_WAITERS.  If we see
381		 * that value, try to acquire it once.  Note that we have
382		 * to preserve the RW_LOCK_WRITE_WAITERS flag as there are
383		 * other writers waiting still. If we fail, restart the
384		 * loop.
385		 */
386		if (v == (RW_UNLOCKED | RW_LOCK_WRITE_WAITERS)) {
387			if (atomic_cmpset_acq_ptr(&rw->rw_lock,
388			    RW_UNLOCKED | RW_LOCK_WRITE_WAITERS,
389			    tid | RW_LOCK_WRITE_WAITERS)) {
390				turnstile_claim(&rw->rw_object);
391				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
392				    __func__, rw);
393				break;
394			}
395			turnstile_release(&rw->rw_object);
396			cpu_spinwait();
397			continue;
398		}
399
400		/*
401		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
402		 * set it.  If we fail to set it, then loop back and try
403		 * again.
404		 */
405		if (!(v & RW_LOCK_WRITE_WAITERS) &&
406		    !atomic_cmpset_ptr(&rw->rw_lock, v,
407		    v | RW_LOCK_WRITE_WAITERS)) {
408			turnstile_release(&rw->rw_object);
409			cpu_spinwait();
410			continue;
411		}
412		if (!(v & RW_LOCK_WRITE_WAITERS) &&
413		    LOCK_LOG_TEST(&rw->rw_object, 0))
414			CTR2(KTR_LOCK, "%s: %p set write waiters flag",
415			    __func__, rw);
416
417		/* XXX: Adaptively spin if current wlock owner on another CPU? */
418
419		/*
420		 * We were unable to acquire the lock and the write waiters
421		 * flag is set, so we must block on the turnstile.
422		 */
423		if (LOCK_LOG_TEST(&rw->rw_object, 0))
424			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
425			    rw);
426		turnstile_wait(&rw->rw_object, rw_owner(rw),
427		    TS_EXCLUSIVE_QUEUE);
428		if (LOCK_LOG_TEST(&rw->rw_object, 0))
429			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
430			    __func__, rw);
431	}
432}
433
434/*
435 * This function is called if the first try at releasing a write lock failed.
436 * This means that one of the 2 waiter bits must be set indicating that at
437 * least one thread is waiting on this lock.
438 */
439void
440_rw_wunlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
441{
442	struct turnstile *ts;
443	uintptr_t v;
444	int queue;
445
446	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
447	    ("%s: neither of the waiter flags are set", __func__));
448
449	if (LOCK_LOG_TEST(&rw->rw_object, 0))
450		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
451
452	turnstile_lock(&rw->rw_object);
453	ts = turnstile_lookup(&rw->rw_object);
454
455	/* XXX: Adaptive fixup would be required here. */
456	MPASS(ts != NULL);
457
458	/*
459	 * Use the same algo as sx locks for now.  Prefer waking up shared
460	 * waiters if we have any over writers.  This is probably not ideal.
461	 *
462	 * 'v' is the value we are going to write back to rw_lock.  If we
463	 * have waiters on both queues, we need to preserve the state of
464	 * the waiter flag for the queue we don't wake up.  For now this is
465	 * hardcoded for the algorithm mentioned above.
466	 *
467	 * In the case of both readers and writers waiting we wakeup the
468	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
469	 * new writer comes in before a reader it will claim the lock up
470	 * above.  There is probably a potential priority inversion in
471	 * there that could be worked around either by waking both queues
472	 * of waiters or doing some complicated lock handoff gymnastics.
473	 */
474	if (rw->rw_lock & RW_LOCK_READ_WAITERS) {
475		queue = TS_SHARED_QUEUE;
476		v = RW_UNLOCKED | (rw->rw_lock & RW_LOCK_WRITE_WAITERS);
477	} else {
478		queue = TS_EXCLUSIVE_QUEUE;
479		v = RW_UNLOCKED;
480	}
481	if (LOCK_LOG_TEST(&rw->rw_object, 0))
482		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
483		    queue == TS_SHARED_QUEUE ? "read" : "write");
484
485	/* Wake up all waiters for the specific queue. */
486	turnstile_broadcast(ts, queue);
487	atomic_store_rel_ptr(&rw->rw_lock, v);
488	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
489}
490
491#ifdef INVARIANT_SUPPORT
492#ifndef INVARIANT_SUPPORT
493#undef _rw_assert
494#endif
495
496/*
497 * In the non-WITNESS case, rw_assert() can only detect that at least
498 * *some* thread owns an rlock, but it cannot guarantee that *this*
499 * thread owns an rlock.
500 */
501void
502_rw_assert(struct rwlock *rw, int what, const char *file, int line)
503{
504
505	if (panicstr != NULL)
506		return;
507	switch (what) {
508	case RA_LOCKED:
509	case RA_RLOCKED:
510#ifdef WITNESS
511		witness_assert(&rw->rw_object, what, file, line);
512#else
513		/*
514		 * If some other thread has a write lock or we have one
515		 * and are asserting a read lock, fail.  Also, if no one
516		 * has a lock at all, fail.
517		 */
518		if (rw->rw_lock == RW_UNLOCKED ||
519		    !(rw->rw_lock & RW_LOCK_READ) && (what == RW_RLOCKED ||
520		    RW_OWNER(rw) != (uintptr_t)curthread))
521			panic("Lock %s not %slocked @ %s:%d\n",
522			    rw->rw_object.lo_name, (what == RW_RLOCKED) ?
523			    "read " : "", file, line);
524#endif
525		break;
526	case RA_WLOCKED:
527		if (rw_owner(rw) != curthread)
528			panic("Lock %s not exclusively locked @ %s:%d\n",
529			    rw->rw_object.lo_name, file, line);
530		break;
531	case RA_UNLOCKED:
532#ifdef WITNESS
533		witness_assert(&rw->rw_object, what, file, line);
534#else
535		/*
536		 * If we hold a write lock fail.  We can't reliably check
537		 * to see if we hold a read lock or not.
538		 */
539		if (rw_owner(rw) == curthread)
540			panic("Lock %s exclusively locked @ %s:%d\n",
541			    rw->rw_object.lo_name, file, line);
542#endif
543		break;
544	default:
545		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
546		    line);
547	}
548}
549#endif /* INVARIANT_SUPPORT */
550
551#ifdef DDB
552void
553db_show_rwlock(struct lock_object *lock)
554{
555	struct rwlock *rw;
556	struct thread *td;
557
558	rw = (struct rwlock *)lock;
559
560	db_printf(" state: ");
561	if (rw->rw_lock == RW_UNLOCKED)
562		db_printf("UNLOCKED\n");
563	else if (rw->rw_lock & RW_LOCK_READ)
564		db_printf("RLOCK: %jd locks\n",
565		    (intmax_t)(RW_READERS(rw->rw_lock)));
566	else {
567		td = rw_owner(rw);
568		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
569		    td->td_tid, td->td_proc->p_pid, td->td_proc->p_comm);
570	}
571	db_printf(" waiters: ");
572	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
573	case RW_LOCK_READ_WAITERS:
574		db_printf("readers\n");
575		break;
576	case RW_LOCK_WRITE_WAITERS:
577		db_printf("writers\n");
578		break;
579	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
580		db_printf("readers and waiters\n");
581		break;
582	default:
583		db_printf("none\n");
584		break;
585	}
586}
587
588#endif
589