kern_lock.c revision 277528
1/*-
2 * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include "opt_adaptive_lockmgrs.h"
30#include "opt_ddb.h"
31#include "opt_hwpmc_hooks.h"
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/kern/kern_lock.c 277528 2015-01-22 11:12:42Z hselasky $");
35
36#include <sys/param.h>
37#include <sys/kdb.h>
38#include <sys/ktr.h>
39#include <sys/lock.h>
40#include <sys/lock_profile.h>
41#include <sys/lockmgr.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/sleepqueue.h>
45#ifdef DEBUG_LOCKS
46#include <sys/stack.h>
47#endif
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <machine/cpu.h>
52
53#ifdef DDB
54#include <ddb/ddb.h>
55#endif
56
57#ifdef HWPMC_HOOKS
58#include <sys/pmckern.h>
59PMC_SOFT_DECLARE( , , lock, failed);
60#endif
61
62CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63    (LK_ADAPTIVE | LK_NOSHARE));
64CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65    ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66
67#define	SQ_EXCLUSIVE_QUEUE	0
68#define	SQ_SHARED_QUEUE		1
69
70#ifndef INVARIANTS
71#define	_lockmgr_assert(lk, what, file, line)
72#define	TD_LOCKS_INC(td)
73#define	TD_LOCKS_DEC(td)
74#else
75#define	TD_LOCKS_INC(td)	((td)->td_locks++)
76#define	TD_LOCKS_DEC(td)	((td)->td_locks--)
77#endif
78#define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
79#define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
80
81#ifndef DEBUG_LOCKS
82#define	STACK_PRINT(lk)
83#define	STACK_SAVE(lk)
84#define	STACK_ZERO(lk)
85#else
86#define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
87#define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
88#define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
89#endif
90
91#define	LOCK_LOG2(lk, string, arg1, arg2)				\
92	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
93		CTR2(KTR_LOCK, (string), (arg1), (arg2))
94#define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
95	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
96		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
97
98#define	GIANT_DECLARE							\
99	int _i = 0;							\
100	WITNESS_SAVE_DECL(Giant)
101#define	GIANT_RESTORE() do {						\
102	if (_i > 0) {							\
103		while (_i--)						\
104			mtx_lock(&Giant);				\
105		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
106	}								\
107} while (0)
108#define	GIANT_SAVE() do {						\
109	if (mtx_owned(&Giant)) {					\
110		WITNESS_SAVE(&Giant.lock_object, Giant);		\
111		while (mtx_owned(&Giant)) {				\
112			_i++;						\
113			mtx_unlock(&Giant);				\
114		}							\
115	}								\
116} while (0)
117
118#define	LK_CAN_SHARE(x, flags)						\
119	(((x) & LK_SHARE) &&						\
120	(((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 ||	\
121	(curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||	\
122	(curthread->td_pflags & TDP_DEADLKTREAT)))
123#define	LK_TRYOP(x)							\
124	((x) & LK_NOWAIT)
125
126#define	LK_CAN_WITNESS(x)						\
127	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
128#define	LK_TRYWIT(x)							\
129	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
130
131#define	LK_CAN_ADAPT(lk, f)						\
132	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
133	((f) & LK_SLEEPFAIL) == 0)
134
135#define	lockmgr_disowned(lk)						\
136	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
137
138#define	lockmgr_xlocked(lk)						\
139	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
140
141static void	assert_lockmgr(const struct lock_object *lock, int how);
142#ifdef DDB
143static void	db_show_lockmgr(const struct lock_object *lock);
144#endif
145static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
146#ifdef KDTRACE_HOOKS
147static int	owner_lockmgr(const struct lock_object *lock,
148		    struct thread **owner);
149#endif
150static uintptr_t unlock_lockmgr(struct lock_object *lock);
151
152struct lock_class lock_class_lockmgr = {
153	.lc_name = "lockmgr",
154	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
155	.lc_assert = assert_lockmgr,
156#ifdef DDB
157	.lc_ddb_show = db_show_lockmgr,
158#endif
159	.lc_lock = lock_lockmgr,
160	.lc_unlock = unlock_lockmgr,
161#ifdef KDTRACE_HOOKS
162	.lc_owner = owner_lockmgr,
163#endif
164};
165
166#ifdef ADAPTIVE_LOCKMGRS
167static u_int alk_retries = 10;
168static u_int alk_loops = 10000;
169static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
170    "lockmgr debugging");
171SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
172SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
173#endif
174
175static __inline struct thread *
176lockmgr_xholder(const struct lock *lk)
177{
178	uintptr_t x;
179
180	x = lk->lk_lock;
181	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
182}
183
184/*
185 * It assumes sleepq_lock held and returns with this one unheld.
186 * It also assumes the generic interlock is sane and previously checked.
187 * If LK_INTERLOCK is specified the interlock is not reacquired after the
188 * sleep.
189 */
190static __inline int
191sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
192    const char *wmesg, int pri, int timo, int queue)
193{
194	GIANT_DECLARE;
195	struct lock_class *class;
196	int catch, error;
197
198	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
199	catch = pri & PCATCH;
200	pri &= PRIMASK;
201	error = 0;
202
203	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
204	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
205
206	if (flags & LK_INTERLOCK)
207		class->lc_unlock(ilk);
208	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
209		lk->lk_exslpfail++;
210	GIANT_SAVE();
211	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
212	    SLEEPQ_INTERRUPTIBLE : 0), queue);
213	if ((flags & LK_TIMELOCK) && timo)
214		sleepq_set_timeout(&lk->lock_object, timo);
215
216	/*
217	 * Decisional switch for real sleeping.
218	 */
219	if ((flags & LK_TIMELOCK) && timo && catch)
220		error = sleepq_timedwait_sig(&lk->lock_object, pri);
221	else if ((flags & LK_TIMELOCK) && timo)
222		error = sleepq_timedwait(&lk->lock_object, pri);
223	else if (catch)
224		error = sleepq_wait_sig(&lk->lock_object, pri);
225	else
226		sleepq_wait(&lk->lock_object, pri);
227	GIANT_RESTORE();
228	if ((flags & LK_SLEEPFAIL) && error == 0)
229		error = ENOLCK;
230
231	return (error);
232}
233
234static __inline int
235wakeupshlk(struct lock *lk, const char *file, int line)
236{
237	uintptr_t v, x;
238	u_int realexslp;
239	int queue, wakeup_swapper;
240
241	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
242	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
243
244	wakeup_swapper = 0;
245	for (;;) {
246		x = lk->lk_lock;
247
248		/*
249		 * If there is more than one shared lock held, just drop one
250		 * and return.
251		 */
252		if (LK_SHARERS(x) > 1) {
253			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x,
254			    x - LK_ONE_SHARER))
255				break;
256			continue;
257		}
258
259		/*
260		 * If there are not waiters on the exclusive queue, drop the
261		 * lock quickly.
262		 */
263		if ((x & LK_ALL_WAITERS) == 0) {
264			MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
265			    LK_SHARERS_LOCK(1));
266			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED))
267				break;
268			continue;
269		}
270
271		/*
272		 * We should have a sharer with waiters, so enter the hard
273		 * path in order to handle wakeups correctly.
274		 */
275		sleepq_lock(&lk->lock_object);
276		x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
277		v = LK_UNLOCKED;
278
279		/*
280		 * If the lock has exclusive waiters, give them preference in
281		 * order to avoid deadlock with shared runners up.
282		 * If interruptible sleeps left the exclusive queue empty
283		 * avoid a starvation for the threads sleeping on the shared
284		 * queue by giving them precedence and cleaning up the
285		 * exclusive waiters bit anyway.
286		 * Please note that lk_exslpfail count may be lying about
287		 * the real number of waiters with the LK_SLEEPFAIL flag on
288		 * because they may be used in conjuction with interruptible
289		 * sleeps so lk_exslpfail might be considered an 'upper limit'
290		 * bound, including the edge cases.
291		 */
292		realexslp = sleepq_sleepcnt(&lk->lock_object,
293		    SQ_EXCLUSIVE_QUEUE);
294		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
295			if (lk->lk_exslpfail < realexslp) {
296				lk->lk_exslpfail = 0;
297				queue = SQ_EXCLUSIVE_QUEUE;
298				v |= (x & LK_SHARED_WAITERS);
299			} else {
300				lk->lk_exslpfail = 0;
301				LOCK_LOG2(lk,
302				    "%s: %p has only LK_SLEEPFAIL sleepers",
303				    __func__, lk);
304				LOCK_LOG2(lk,
305			    "%s: %p waking up threads on the exclusive queue",
306				    __func__, lk);
307				wakeup_swapper =
308				    sleepq_broadcast(&lk->lock_object,
309				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
310				queue = SQ_SHARED_QUEUE;
311			}
312
313		} else {
314
315			/*
316			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
317			 * and using interruptible sleeps/timeout may have
318			 * left spourious lk_exslpfail counts on, so clean
319			 * it up anyway.
320			 */
321			lk->lk_exslpfail = 0;
322			queue = SQ_SHARED_QUEUE;
323		}
324
325		if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
326		    v)) {
327			sleepq_release(&lk->lock_object);
328			continue;
329		}
330		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
331		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
332		    "exclusive");
333		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
334		    0, queue);
335		sleepq_release(&lk->lock_object);
336		break;
337	}
338
339	lock_profile_release_lock(&lk->lock_object);
340	TD_LOCKS_DEC(curthread);
341	TD_SLOCKS_DEC(curthread);
342	return (wakeup_swapper);
343}
344
345static void
346assert_lockmgr(const struct lock_object *lock, int what)
347{
348
349	panic("lockmgr locks do not support assertions");
350}
351
352static void
353lock_lockmgr(struct lock_object *lock, uintptr_t how)
354{
355
356	panic("lockmgr locks do not support sleep interlocking");
357}
358
359static uintptr_t
360unlock_lockmgr(struct lock_object *lock)
361{
362
363	panic("lockmgr locks do not support sleep interlocking");
364}
365
366#ifdef KDTRACE_HOOKS
367static int
368owner_lockmgr(const struct lock_object *lock, struct thread **owner)
369{
370
371	panic("lockmgr locks do not support owner inquiring");
372}
373#endif
374
375void
376lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
377{
378	int iflags;
379
380	MPASS((flags & ~LK_INIT_MASK) == 0);
381	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
382            ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
383            &lk->lk_lock));
384
385	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
386	if (flags & LK_CANRECURSE)
387		iflags |= LO_RECURSABLE;
388	if ((flags & LK_NODUP) == 0)
389		iflags |= LO_DUPOK;
390	if (flags & LK_NOPROFILE)
391		iflags |= LO_NOPROFILE;
392	if ((flags & LK_NOWITNESS) == 0)
393		iflags |= LO_WITNESS;
394	if (flags & LK_QUIET)
395		iflags |= LO_QUIET;
396	if (flags & LK_IS_VNODE)
397		iflags |= LO_IS_VNODE;
398	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
399
400	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
401	lk->lk_lock = LK_UNLOCKED;
402	lk->lk_recurse = 0;
403	lk->lk_exslpfail = 0;
404	lk->lk_timo = timo;
405	lk->lk_pri = pri;
406	STACK_ZERO(lk);
407}
408
409/*
410 * XXX: Gross hacks to manipulate external lock flags after
411 * initialization.  Used for certain vnode and buf locks.
412 */
413void
414lockallowshare(struct lock *lk)
415{
416
417	lockmgr_assert(lk, KA_XLOCKED);
418	lk->lock_object.lo_flags &= ~LK_NOSHARE;
419}
420
421void
422lockdisableshare(struct lock *lk)
423{
424
425	lockmgr_assert(lk, KA_XLOCKED);
426	lk->lock_object.lo_flags |= LK_NOSHARE;
427}
428
429void
430lockallowrecurse(struct lock *lk)
431{
432
433	lockmgr_assert(lk, KA_XLOCKED);
434	lk->lock_object.lo_flags |= LO_RECURSABLE;
435}
436
437void
438lockdisablerecurse(struct lock *lk)
439{
440
441	lockmgr_assert(lk, KA_XLOCKED);
442	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
443}
444
445void
446lockdestroy(struct lock *lk)
447{
448
449	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
450	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
451	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
452	lock_destroy(&lk->lock_object);
453}
454
455int
456__lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
457    const char *wmesg, int pri, int timo, const char *file, int line)
458{
459	GIANT_DECLARE;
460	struct lock_class *class;
461	const char *iwmesg;
462	uintptr_t tid, v, x;
463	u_int op, realexslp;
464	int error, ipri, itimo, queue, wakeup_swapper;
465#ifdef LOCK_PROFILING
466	uint64_t waittime = 0;
467	int contested = 0;
468#endif
469#ifdef ADAPTIVE_LOCKMGRS
470	volatile struct thread *owner;
471	u_int i, spintries = 0;
472#endif
473
474	error = 0;
475	tid = (uintptr_t)curthread;
476	op = (flags & LK_TYPE_MASK);
477	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
478	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
479	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
480
481	MPASS((flags & ~LK_TOTAL_MASK) == 0);
482	KASSERT((op & (op - 1)) == 0,
483	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
484	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
485	    (op != LK_DOWNGRADE && op != LK_RELEASE),
486	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
487	    __func__, file, line));
488	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
489	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
490	    __func__, file, line));
491	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
492	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
493	    lk->lock_object.lo_name, file, line));
494
495	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
496	if (panicstr != NULL) {
497		if (flags & LK_INTERLOCK)
498			class->lc_unlock(ilk);
499		return (0);
500	}
501
502	if (lk->lock_object.lo_flags & LK_NOSHARE) {
503		switch (op) {
504		case LK_SHARED:
505			op = LK_EXCLUSIVE;
506			break;
507		case LK_UPGRADE:
508		case LK_TRYUPGRADE:
509		case LK_DOWNGRADE:
510			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
511			    file, line);
512			if (flags & LK_INTERLOCK)
513				class->lc_unlock(ilk);
514			return (0);
515		}
516	}
517
518	wakeup_swapper = 0;
519	switch (op) {
520	case LK_SHARED:
521		if (LK_CAN_WITNESS(flags))
522			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
523			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
524		for (;;) {
525			x = lk->lk_lock;
526
527			/*
528			 * If no other thread has an exclusive lock, or
529			 * no exclusive waiter is present, bump the count of
530			 * sharers.  Since we have to preserve the state of
531			 * waiters, if we fail to acquire the shared lock
532			 * loop back and retry.
533			 */
534			if (LK_CAN_SHARE(x, flags)) {
535				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
536				    x + LK_ONE_SHARER))
537					break;
538				continue;
539			}
540#ifdef HWPMC_HOOKS
541			PMC_SOFT_CALL( , , lock, failed);
542#endif
543			lock_profile_obtain_lock_failed(&lk->lock_object,
544			    &contested, &waittime);
545
546			/*
547			 * If the lock is already held by curthread in
548			 * exclusive way avoid a deadlock.
549			 */
550			if (LK_HOLDER(x) == tid) {
551				LOCK_LOG2(lk,
552				    "%s: %p already held in exclusive mode",
553				    __func__, lk);
554				error = EDEADLK;
555				break;
556			}
557
558			/*
559			 * If the lock is expected to not sleep just give up
560			 * and return.
561			 */
562			if (LK_TRYOP(flags)) {
563				LOCK_LOG2(lk, "%s: %p fails the try operation",
564				    __func__, lk);
565				error = EBUSY;
566				break;
567			}
568
569#ifdef ADAPTIVE_LOCKMGRS
570			/*
571			 * If the owner is running on another CPU, spin until
572			 * the owner stops running or the state of the lock
573			 * changes.  We need a double-state handle here
574			 * because for a failed acquisition the lock can be
575			 * either held in exclusive mode or shared mode
576			 * (for the writer starvation avoidance technique).
577			 */
578			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
579			    LK_HOLDER(x) != LK_KERNPROC) {
580				owner = (struct thread *)LK_HOLDER(x);
581				if (LOCK_LOG_TEST(&lk->lock_object, 0))
582					CTR3(KTR_LOCK,
583					    "%s: spinning on %p held by %p",
584					    __func__, lk, owner);
585				KTR_STATE1(KTR_SCHED, "thread",
586				    sched_tdname(td), "spinning",
587				    "lockname:\"%s\"", lk->lock_object.lo_name);
588
589				/*
590				 * If we are holding also an interlock drop it
591				 * in order to avoid a deadlock if the lockmgr
592				 * owner is adaptively spinning on the
593				 * interlock itself.
594				 */
595				if (flags & LK_INTERLOCK) {
596					class->lc_unlock(ilk);
597					flags &= ~LK_INTERLOCK;
598				}
599				GIANT_SAVE();
600				while (LK_HOLDER(lk->lk_lock) ==
601				    (uintptr_t)owner && TD_IS_RUNNING(owner))
602					cpu_spinwait();
603				KTR_STATE0(KTR_SCHED, "thread",
604				    sched_tdname(td), "running");
605				GIANT_RESTORE();
606				continue;
607			} else if (LK_CAN_ADAPT(lk, flags) &&
608			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
609			    spintries < alk_retries) {
610				KTR_STATE1(KTR_SCHED, "thread",
611				    sched_tdname(td), "spinning",
612				    "lockname:\"%s\"", lk->lock_object.lo_name);
613				if (flags & LK_INTERLOCK) {
614					class->lc_unlock(ilk);
615					flags &= ~LK_INTERLOCK;
616				}
617				GIANT_SAVE();
618				spintries++;
619				for (i = 0; i < alk_loops; i++) {
620					if (LOCK_LOG_TEST(&lk->lock_object, 0))
621						CTR4(KTR_LOCK,
622				    "%s: shared spinning on %p with %u and %u",
623						    __func__, lk, spintries, i);
624					x = lk->lk_lock;
625					if ((x & LK_SHARE) == 0 ||
626					    LK_CAN_SHARE(x, flags) != 0)
627						break;
628					cpu_spinwait();
629				}
630				KTR_STATE0(KTR_SCHED, "thread",
631				    sched_tdname(td), "running");
632				GIANT_RESTORE();
633				if (i != alk_loops)
634					continue;
635			}
636#endif
637
638			/*
639			 * Acquire the sleepqueue chain lock because we
640			 * probabilly will need to manipulate waiters flags.
641			 */
642			sleepq_lock(&lk->lock_object);
643			x = lk->lk_lock;
644
645			/*
646			 * if the lock can be acquired in shared mode, try
647			 * again.
648			 */
649			if (LK_CAN_SHARE(x, flags)) {
650				sleepq_release(&lk->lock_object);
651				continue;
652			}
653
654#ifdef ADAPTIVE_LOCKMGRS
655			/*
656			 * The current lock owner might have started executing
657			 * on another CPU (or the lock could have changed
658			 * owner) while we were waiting on the turnstile
659			 * chain lock.  If so, drop the turnstile lock and try
660			 * again.
661			 */
662			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
663			    LK_HOLDER(x) != LK_KERNPROC) {
664				owner = (struct thread *)LK_HOLDER(x);
665				if (TD_IS_RUNNING(owner)) {
666					sleepq_release(&lk->lock_object);
667					continue;
668				}
669			}
670#endif
671
672			/*
673			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
674			 * loop back and retry.
675			 */
676			if ((x & LK_SHARED_WAITERS) == 0) {
677				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
678				    x | LK_SHARED_WAITERS)) {
679					sleepq_release(&lk->lock_object);
680					continue;
681				}
682				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
683				    __func__, lk);
684			}
685
686			/*
687			 * As far as we have been unable to acquire the
688			 * shared lock and the shared waiters flag is set,
689			 * we will sleep.
690			 */
691			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
692			    SQ_SHARED_QUEUE);
693			flags &= ~LK_INTERLOCK;
694			if (error) {
695				LOCK_LOG3(lk,
696				    "%s: interrupted sleep for %p with %d",
697				    __func__, lk, error);
698				break;
699			}
700			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
701			    __func__, lk);
702		}
703		if (error == 0) {
704			lock_profile_obtain_lock_success(&lk->lock_object,
705			    contested, waittime, file, line);
706			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
707			    line);
708			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
709			    line);
710			TD_LOCKS_INC(curthread);
711			TD_SLOCKS_INC(curthread);
712			STACK_SAVE(lk);
713		}
714		break;
715	case LK_UPGRADE:
716	case LK_TRYUPGRADE:
717		_lockmgr_assert(lk, KA_SLOCKED, file, line);
718		v = lk->lk_lock;
719		x = v & LK_ALL_WAITERS;
720		v &= LK_EXCLUSIVE_SPINNERS;
721
722		/*
723		 * Try to switch from one shared lock to an exclusive one.
724		 * We need to preserve waiters flags during the operation.
725		 */
726		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
727		    tid | x)) {
728			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
729			    line);
730			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
731			    LK_TRYWIT(flags), file, line);
732			TD_SLOCKS_DEC(curthread);
733			break;
734		}
735
736		/*
737		 * In LK_TRYUPGRADE mode, do not drop the lock,
738		 * returning EBUSY instead.
739		 */
740		if (op == LK_TRYUPGRADE) {
741			LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
742			    __func__, lk);
743			error = EBUSY;
744			break;
745		}
746
747		/*
748		 * We have been unable to succeed in upgrading, so just
749		 * give up the shared lock.
750		 */
751		wakeup_swapper |= wakeupshlk(lk, file, line);
752
753		/* FALLTHROUGH */
754	case LK_EXCLUSIVE:
755		if (LK_CAN_WITNESS(flags))
756			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
757			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
758			    ilk : NULL);
759
760		/*
761		 * If curthread already holds the lock and this one is
762		 * allowed to recurse, simply recurse on it.
763		 */
764		if (lockmgr_xlocked(lk)) {
765			if ((flags & LK_CANRECURSE) == 0 &&
766			    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
767
768				/*
769				 * If the lock is expected to not panic just
770				 * give up and return.
771				 */
772				if (LK_TRYOP(flags)) {
773					LOCK_LOG2(lk,
774					    "%s: %p fails the try operation",
775					    __func__, lk);
776					error = EBUSY;
777					break;
778				}
779				if (flags & LK_INTERLOCK)
780					class->lc_unlock(ilk);
781		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
782				    __func__, iwmesg, file, line);
783			}
784			lk->lk_recurse++;
785			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
786			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
787			    lk->lk_recurse, file, line);
788			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
789			    LK_TRYWIT(flags), file, line);
790			TD_LOCKS_INC(curthread);
791			break;
792		}
793
794		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
795		    tid)) {
796#ifdef HWPMC_HOOKS
797			PMC_SOFT_CALL( , , lock, failed);
798#endif
799			lock_profile_obtain_lock_failed(&lk->lock_object,
800			    &contested, &waittime);
801
802			/*
803			 * If the lock is expected to not sleep just give up
804			 * and return.
805			 */
806			if (LK_TRYOP(flags)) {
807				LOCK_LOG2(lk, "%s: %p fails the try operation",
808				    __func__, lk);
809				error = EBUSY;
810				break;
811			}
812
813#ifdef ADAPTIVE_LOCKMGRS
814			/*
815			 * If the owner is running on another CPU, spin until
816			 * the owner stops running or the state of the lock
817			 * changes.
818			 */
819			x = lk->lk_lock;
820			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
821			    LK_HOLDER(x) != LK_KERNPROC) {
822				owner = (struct thread *)LK_HOLDER(x);
823				if (LOCK_LOG_TEST(&lk->lock_object, 0))
824					CTR3(KTR_LOCK,
825					    "%s: spinning on %p held by %p",
826					    __func__, lk, owner);
827				KTR_STATE1(KTR_SCHED, "thread",
828				    sched_tdname(td), "spinning",
829				    "lockname:\"%s\"", lk->lock_object.lo_name);
830
831				/*
832				 * If we are holding also an interlock drop it
833				 * in order to avoid a deadlock if the lockmgr
834				 * owner is adaptively spinning on the
835				 * interlock itself.
836				 */
837				if (flags & LK_INTERLOCK) {
838					class->lc_unlock(ilk);
839					flags &= ~LK_INTERLOCK;
840				}
841				GIANT_SAVE();
842				while (LK_HOLDER(lk->lk_lock) ==
843				    (uintptr_t)owner && TD_IS_RUNNING(owner))
844					cpu_spinwait();
845				KTR_STATE0(KTR_SCHED, "thread",
846				    sched_tdname(td), "running");
847				GIANT_RESTORE();
848				continue;
849			} else if (LK_CAN_ADAPT(lk, flags) &&
850			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
851			    spintries < alk_retries) {
852				if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
853				    !atomic_cmpset_ptr(&lk->lk_lock, x,
854				    x | LK_EXCLUSIVE_SPINNERS))
855					continue;
856				KTR_STATE1(KTR_SCHED, "thread",
857				    sched_tdname(td), "spinning",
858				    "lockname:\"%s\"", lk->lock_object.lo_name);
859				if (flags & LK_INTERLOCK) {
860					class->lc_unlock(ilk);
861					flags &= ~LK_INTERLOCK;
862				}
863				GIANT_SAVE();
864				spintries++;
865				for (i = 0; i < alk_loops; i++) {
866					if (LOCK_LOG_TEST(&lk->lock_object, 0))
867						CTR4(KTR_LOCK,
868				    "%s: shared spinning on %p with %u and %u",
869						    __func__, lk, spintries, i);
870					if ((lk->lk_lock &
871					    LK_EXCLUSIVE_SPINNERS) == 0)
872						break;
873					cpu_spinwait();
874				}
875				KTR_STATE0(KTR_SCHED, "thread",
876				    sched_tdname(td), "running");
877				GIANT_RESTORE();
878				if (i != alk_loops)
879					continue;
880			}
881#endif
882
883			/*
884			 * Acquire the sleepqueue chain lock because we
885			 * probabilly will need to manipulate waiters flags.
886			 */
887			sleepq_lock(&lk->lock_object);
888			x = lk->lk_lock;
889
890			/*
891			 * if the lock has been released while we spun on
892			 * the sleepqueue chain lock just try again.
893			 */
894			if (x == LK_UNLOCKED) {
895				sleepq_release(&lk->lock_object);
896				continue;
897			}
898
899#ifdef ADAPTIVE_LOCKMGRS
900			/*
901			 * The current lock owner might have started executing
902			 * on another CPU (or the lock could have changed
903			 * owner) while we were waiting on the turnstile
904			 * chain lock.  If so, drop the turnstile lock and try
905			 * again.
906			 */
907			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
908			    LK_HOLDER(x) != LK_KERNPROC) {
909				owner = (struct thread *)LK_HOLDER(x);
910				if (TD_IS_RUNNING(owner)) {
911					sleepq_release(&lk->lock_object);
912					continue;
913				}
914			}
915#endif
916
917			/*
918			 * The lock can be in the state where there is a
919			 * pending queue of waiters, but still no owner.
920			 * This happens when the lock is contested and an
921			 * owner is going to claim the lock.
922			 * If curthread is the one successfully acquiring it
923			 * claim lock ownership and return, preserving waiters
924			 * flags.
925			 */
926			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
927			if ((x & ~v) == LK_UNLOCKED) {
928				v &= ~LK_EXCLUSIVE_SPINNERS;
929				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
930				    tid | v)) {
931					sleepq_release(&lk->lock_object);
932					LOCK_LOG2(lk,
933					    "%s: %p claimed by a new writer",
934					    __func__, lk);
935					break;
936				}
937				sleepq_release(&lk->lock_object);
938				continue;
939			}
940
941			/*
942			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
943			 * fail, loop back and retry.
944			 */
945			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
946				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
947				    x | LK_EXCLUSIVE_WAITERS)) {
948					sleepq_release(&lk->lock_object);
949					continue;
950				}
951				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
952				    __func__, lk);
953			}
954
955			/*
956			 * As far as we have been unable to acquire the
957			 * exclusive lock and the exclusive waiters flag
958			 * is set, we will sleep.
959			 */
960			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
961			    SQ_EXCLUSIVE_QUEUE);
962			flags &= ~LK_INTERLOCK;
963			if (error) {
964				LOCK_LOG3(lk,
965				    "%s: interrupted sleep for %p with %d",
966				    __func__, lk, error);
967				break;
968			}
969			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
970			    __func__, lk);
971		}
972		if (error == 0) {
973			lock_profile_obtain_lock_success(&lk->lock_object,
974			    contested, waittime, file, line);
975			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
976			    lk->lk_recurse, file, line);
977			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
978			    LK_TRYWIT(flags), file, line);
979			TD_LOCKS_INC(curthread);
980			STACK_SAVE(lk);
981		}
982		break;
983	case LK_DOWNGRADE:
984		_lockmgr_assert(lk, KA_XLOCKED, file, line);
985		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
986		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
987
988		/*
989		 * Panic if the lock is recursed.
990		 */
991		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
992			if (flags & LK_INTERLOCK)
993				class->lc_unlock(ilk);
994			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
995			    __func__, iwmesg, file, line);
996		}
997		TD_SLOCKS_INC(curthread);
998
999		/*
1000		 * In order to preserve waiters flags, just spin.
1001		 */
1002		for (;;) {
1003			x = lk->lk_lock;
1004			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1005			x &= LK_ALL_WAITERS;
1006			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1007			    LK_SHARERS_LOCK(1) | x))
1008				break;
1009			cpu_spinwait();
1010		}
1011		break;
1012	case LK_RELEASE:
1013		_lockmgr_assert(lk, KA_LOCKED, file, line);
1014		x = lk->lk_lock;
1015
1016		if ((x & LK_SHARE) == 0) {
1017
1018			/*
1019			 * As first option, treact the lock as if it has not
1020			 * any waiter.
1021			 * Fix-up the tid var if the lock has been disowned.
1022			 */
1023			if (LK_HOLDER(x) == LK_KERNPROC)
1024				tid = LK_KERNPROC;
1025			else {
1026				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
1027				    file, line);
1028				TD_LOCKS_DEC(curthread);
1029			}
1030			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1031			    lk->lk_recurse, file, line);
1032
1033			/*
1034			 * The lock is held in exclusive mode.
1035			 * If the lock is recursed also, then unrecurse it.
1036			 */
1037			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1038				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1039				    lk);
1040				lk->lk_recurse--;
1041				break;
1042			}
1043			if (tid != LK_KERNPROC)
1044				lock_profile_release_lock(&lk->lock_object);
1045
1046			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1047			    LK_UNLOCKED))
1048				break;
1049
1050			sleepq_lock(&lk->lock_object);
1051			x = lk->lk_lock;
1052			v = LK_UNLOCKED;
1053
1054			/*
1055		 	 * If the lock has exclusive waiters, give them
1056			 * preference in order to avoid deadlock with
1057			 * shared runners up.
1058			 * If interruptible sleeps left the exclusive queue
1059			 * empty avoid a starvation for the threads sleeping
1060			 * on the shared queue by giving them precedence
1061			 * and cleaning up the exclusive waiters bit anyway.
1062			 * Please note that lk_exslpfail count may be lying
1063			 * about the real number of waiters with the
1064			 * LK_SLEEPFAIL flag on because they may be used in
1065			 * conjuction with interruptible sleeps so
1066			 * lk_exslpfail might be considered an 'upper limit'
1067			 * bound, including the edge cases.
1068			 */
1069			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1070			realexslp = sleepq_sleepcnt(&lk->lock_object,
1071			    SQ_EXCLUSIVE_QUEUE);
1072			if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1073				if (lk->lk_exslpfail < realexslp) {
1074					lk->lk_exslpfail = 0;
1075					queue = SQ_EXCLUSIVE_QUEUE;
1076					v |= (x & LK_SHARED_WAITERS);
1077				} else {
1078					lk->lk_exslpfail = 0;
1079					LOCK_LOG2(lk,
1080					"%s: %p has only LK_SLEEPFAIL sleepers",
1081					    __func__, lk);
1082					LOCK_LOG2(lk,
1083			"%s: %p waking up threads on the exclusive queue",
1084					    __func__, lk);
1085					wakeup_swapper =
1086					    sleepq_broadcast(&lk->lock_object,
1087					    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1088					queue = SQ_SHARED_QUEUE;
1089				}
1090			} else {
1091
1092				/*
1093				 * Exclusive waiters sleeping with LK_SLEEPFAIL
1094				 * on and using interruptible sleeps/timeout
1095				 * may have left spourious lk_exslpfail counts
1096				 * on, so clean it up anyway.
1097				 */
1098				lk->lk_exslpfail = 0;
1099				queue = SQ_SHARED_QUEUE;
1100			}
1101
1102			LOCK_LOG3(lk,
1103			    "%s: %p waking up threads on the %s queue",
1104			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1105			    "exclusive");
1106			atomic_store_rel_ptr(&lk->lk_lock, v);
1107			wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1108			    SLEEPQ_LK, 0, queue);
1109			sleepq_release(&lk->lock_object);
1110			break;
1111		} else
1112			wakeup_swapper = wakeupshlk(lk, file, line);
1113		break;
1114	case LK_DRAIN:
1115		if (LK_CAN_WITNESS(flags))
1116			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1117			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1118			    ilk : NULL);
1119
1120		/*
1121		 * Trying to drain a lock we already own will result in a
1122		 * deadlock.
1123		 */
1124		if (lockmgr_xlocked(lk)) {
1125			if (flags & LK_INTERLOCK)
1126				class->lc_unlock(ilk);
1127			panic("%s: draining %s with the lock held @ %s:%d\n",
1128			    __func__, iwmesg, file, line);
1129		}
1130
1131		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1132#ifdef HWPMC_HOOKS
1133			PMC_SOFT_CALL( , , lock, failed);
1134#endif
1135			lock_profile_obtain_lock_failed(&lk->lock_object,
1136			    &contested, &waittime);
1137
1138			/*
1139			 * If the lock is expected to not sleep just give up
1140			 * and return.
1141			 */
1142			if (LK_TRYOP(flags)) {
1143				LOCK_LOG2(lk, "%s: %p fails the try operation",
1144				    __func__, lk);
1145				error = EBUSY;
1146				break;
1147			}
1148
1149			/*
1150			 * Acquire the sleepqueue chain lock because we
1151			 * probabilly will need to manipulate waiters flags.
1152			 */
1153			sleepq_lock(&lk->lock_object);
1154			x = lk->lk_lock;
1155
1156			/*
1157			 * if the lock has been released while we spun on
1158			 * the sleepqueue chain lock just try again.
1159			 */
1160			if (x == LK_UNLOCKED) {
1161				sleepq_release(&lk->lock_object);
1162				continue;
1163			}
1164
1165			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1166			if ((x & ~v) == LK_UNLOCKED) {
1167				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1168
1169				/*
1170				 * If interruptible sleeps left the exclusive
1171				 * queue empty avoid a starvation for the
1172				 * threads sleeping on the shared queue by
1173				 * giving them precedence and cleaning up the
1174				 * exclusive waiters bit anyway.
1175				 * Please note that lk_exslpfail count may be
1176				 * lying about the real number of waiters with
1177				 * the LK_SLEEPFAIL flag on because they may
1178				 * be used in conjuction with interruptible
1179				 * sleeps so lk_exslpfail might be considered
1180				 * an 'upper limit' bound, including the edge
1181				 * cases.
1182				 */
1183				if (v & LK_EXCLUSIVE_WAITERS) {
1184					queue = SQ_EXCLUSIVE_QUEUE;
1185					v &= ~LK_EXCLUSIVE_WAITERS;
1186				} else {
1187
1188					/*
1189					 * Exclusive waiters sleeping with
1190					 * LK_SLEEPFAIL on and using
1191					 * interruptible sleeps/timeout may
1192					 * have left spourious lk_exslpfail
1193					 * counts on, so clean it up anyway.
1194					 */
1195					MPASS(v & LK_SHARED_WAITERS);
1196					lk->lk_exslpfail = 0;
1197					queue = SQ_SHARED_QUEUE;
1198					v &= ~LK_SHARED_WAITERS;
1199				}
1200				if (queue == SQ_EXCLUSIVE_QUEUE) {
1201					realexslp =
1202					    sleepq_sleepcnt(&lk->lock_object,
1203					    SQ_EXCLUSIVE_QUEUE);
1204					if (lk->lk_exslpfail >= realexslp) {
1205						lk->lk_exslpfail = 0;
1206						queue = SQ_SHARED_QUEUE;
1207						v &= ~LK_SHARED_WAITERS;
1208						if (realexslp != 0) {
1209							LOCK_LOG2(lk,
1210					"%s: %p has only LK_SLEEPFAIL sleepers",
1211							    __func__, lk);
1212							LOCK_LOG2(lk,
1213			"%s: %p waking up threads on the exclusive queue",
1214							    __func__, lk);
1215							wakeup_swapper =
1216							    sleepq_broadcast(
1217							    &lk->lock_object,
1218							    SLEEPQ_LK, 0,
1219							    SQ_EXCLUSIVE_QUEUE);
1220						}
1221					} else
1222						lk->lk_exslpfail = 0;
1223				}
1224				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1225					sleepq_release(&lk->lock_object);
1226					continue;
1227				}
1228				LOCK_LOG3(lk,
1229				"%s: %p waking up all threads on the %s queue",
1230				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1231				    "shared" : "exclusive");
1232				wakeup_swapper |= sleepq_broadcast(
1233				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1234
1235				/*
1236				 * If shared waiters have been woken up we need
1237				 * to wait for one of them to acquire the lock
1238				 * before to set the exclusive waiters in
1239				 * order to avoid a deadlock.
1240				 */
1241				if (queue == SQ_SHARED_QUEUE) {
1242					for (v = lk->lk_lock;
1243					    (v & LK_SHARE) && !LK_SHARERS(v);
1244					    v = lk->lk_lock)
1245						cpu_spinwait();
1246				}
1247			}
1248
1249			/*
1250			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1251			 * fail, loop back and retry.
1252			 */
1253			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1254				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1255				    x | LK_EXCLUSIVE_WAITERS)) {
1256					sleepq_release(&lk->lock_object);
1257					continue;
1258				}
1259				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1260				    __func__, lk);
1261			}
1262
1263			/*
1264			 * As far as we have been unable to acquire the
1265			 * exclusive lock and the exclusive waiters flag
1266			 * is set, we will sleep.
1267			 */
1268			if (flags & LK_INTERLOCK) {
1269				class->lc_unlock(ilk);
1270				flags &= ~LK_INTERLOCK;
1271			}
1272			GIANT_SAVE();
1273			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1274			    SQ_EXCLUSIVE_QUEUE);
1275			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1276			GIANT_RESTORE();
1277			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1278			    __func__, lk);
1279		}
1280
1281		if (error == 0) {
1282			lock_profile_obtain_lock_success(&lk->lock_object,
1283			    contested, waittime, file, line);
1284			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1285			    lk->lk_recurse, file, line);
1286			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1287			    LK_TRYWIT(flags), file, line);
1288			TD_LOCKS_INC(curthread);
1289			STACK_SAVE(lk);
1290		}
1291		break;
1292	default:
1293		if (flags & LK_INTERLOCK)
1294			class->lc_unlock(ilk);
1295		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1296	}
1297
1298	if (flags & LK_INTERLOCK)
1299		class->lc_unlock(ilk);
1300	if (wakeup_swapper)
1301		kick_proc0();
1302
1303	return (error);
1304}
1305
1306void
1307_lockmgr_disown(struct lock *lk, const char *file, int line)
1308{
1309	uintptr_t tid, x;
1310
1311	if (SCHEDULER_STOPPED())
1312		return;
1313
1314	tid = (uintptr_t)curthread;
1315	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1316
1317	/*
1318	 * Panic if the lock is recursed.
1319	 */
1320	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1321		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1322		    __func__,  file, line);
1323
1324	/*
1325	 * If the owner is already LK_KERNPROC just skip the whole operation.
1326	 */
1327	if (LK_HOLDER(lk->lk_lock) != tid)
1328		return;
1329	lock_profile_release_lock(&lk->lock_object);
1330	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1331	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1332	TD_LOCKS_DEC(curthread);
1333	STACK_SAVE(lk);
1334
1335	/*
1336	 * In order to preserve waiters flags, just spin.
1337	 */
1338	for (;;) {
1339		x = lk->lk_lock;
1340		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1341		x &= LK_ALL_WAITERS;
1342		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1343		    LK_KERNPROC | x))
1344			return;
1345		cpu_spinwait();
1346	}
1347}
1348
1349void
1350lockmgr_printinfo(const struct lock *lk)
1351{
1352	struct thread *td;
1353	uintptr_t x;
1354
1355	if (lk->lk_lock == LK_UNLOCKED)
1356		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1357	else if (lk->lk_lock & LK_SHARE)
1358		printf("lock type %s: SHARED (count %ju)\n",
1359		    lk->lock_object.lo_name,
1360		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1361	else {
1362		td = lockmgr_xholder(lk);
1363		if (td == (struct thread *)LK_KERNPROC)
1364			printf("lock type %s: EXCL by KERNPROC\n",
1365			    lk->lock_object.lo_name);
1366		else
1367			printf("lock type %s: EXCL by thread %p "
1368			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1369			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1370			    td->td_tid);
1371	}
1372
1373	x = lk->lk_lock;
1374	if (x & LK_EXCLUSIVE_WAITERS)
1375		printf(" with exclusive waiters pending\n");
1376	if (x & LK_SHARED_WAITERS)
1377		printf(" with shared waiters pending\n");
1378	if (x & LK_EXCLUSIVE_SPINNERS)
1379		printf(" with exclusive spinners pending\n");
1380
1381	STACK_PRINT(lk);
1382}
1383
1384int
1385lockstatus(const struct lock *lk)
1386{
1387	uintptr_t v, x;
1388	int ret;
1389
1390	ret = LK_SHARED;
1391	x = lk->lk_lock;
1392	v = LK_HOLDER(x);
1393
1394	if ((x & LK_SHARE) == 0) {
1395		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1396			ret = LK_EXCLUSIVE;
1397		else
1398			ret = LK_EXCLOTHER;
1399	} else if (x == LK_UNLOCKED)
1400		ret = 0;
1401
1402	return (ret);
1403}
1404
1405#ifdef INVARIANT_SUPPORT
1406
1407FEATURE(invariant_support,
1408    "Support for modules compiled with INVARIANTS option");
1409
1410#ifndef INVARIANTS
1411#undef	_lockmgr_assert
1412#endif
1413
1414void
1415_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1416{
1417	int slocked = 0;
1418
1419	if (panicstr != NULL)
1420		return;
1421	switch (what) {
1422	case KA_SLOCKED:
1423	case KA_SLOCKED | KA_NOTRECURSED:
1424	case KA_SLOCKED | KA_RECURSED:
1425		slocked = 1;
1426	case KA_LOCKED:
1427	case KA_LOCKED | KA_NOTRECURSED:
1428	case KA_LOCKED | KA_RECURSED:
1429#ifdef WITNESS
1430
1431		/*
1432		 * We cannot trust WITNESS if the lock is held in exclusive
1433		 * mode and a call to lockmgr_disown() happened.
1434		 * Workaround this skipping the check if the lock is held in
1435		 * exclusive mode even for the KA_LOCKED case.
1436		 */
1437		if (slocked || (lk->lk_lock & LK_SHARE)) {
1438			witness_assert(&lk->lock_object, what, file, line);
1439			break;
1440		}
1441#endif
1442		if (lk->lk_lock == LK_UNLOCKED ||
1443		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1444		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1445			panic("Lock %s not %slocked @ %s:%d\n",
1446			    lk->lock_object.lo_name, slocked ? "share" : "",
1447			    file, line);
1448
1449		if ((lk->lk_lock & LK_SHARE) == 0) {
1450			if (lockmgr_recursed(lk)) {
1451				if (what & KA_NOTRECURSED)
1452					panic("Lock %s recursed @ %s:%d\n",
1453					    lk->lock_object.lo_name, file,
1454					    line);
1455			} else if (what & KA_RECURSED)
1456				panic("Lock %s not recursed @ %s:%d\n",
1457				    lk->lock_object.lo_name, file, line);
1458		}
1459		break;
1460	case KA_XLOCKED:
1461	case KA_XLOCKED | KA_NOTRECURSED:
1462	case KA_XLOCKED | KA_RECURSED:
1463		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1464			panic("Lock %s not exclusively locked @ %s:%d\n",
1465			    lk->lock_object.lo_name, file, line);
1466		if (lockmgr_recursed(lk)) {
1467			if (what & KA_NOTRECURSED)
1468				panic("Lock %s recursed @ %s:%d\n",
1469				    lk->lock_object.lo_name, file, line);
1470		} else if (what & KA_RECURSED)
1471			panic("Lock %s not recursed @ %s:%d\n",
1472			    lk->lock_object.lo_name, file, line);
1473		break;
1474	case KA_UNLOCKED:
1475		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1476			panic("Lock %s exclusively locked @ %s:%d\n",
1477			    lk->lock_object.lo_name, file, line);
1478		break;
1479	default:
1480		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1481		    line);
1482	}
1483}
1484#endif
1485
1486#ifdef DDB
1487int
1488lockmgr_chain(struct thread *td, struct thread **ownerp)
1489{
1490	struct lock *lk;
1491
1492	lk = td->td_wchan;
1493
1494	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1495		return (0);
1496	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1497	if (lk->lk_lock & LK_SHARE)
1498		db_printf("SHARED (count %ju)\n",
1499		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1500	else
1501		db_printf("EXCL\n");
1502	*ownerp = lockmgr_xholder(lk);
1503
1504	return (1);
1505}
1506
1507static void
1508db_show_lockmgr(const struct lock_object *lock)
1509{
1510	struct thread *td;
1511	const struct lock *lk;
1512
1513	lk = (const struct lock *)lock;
1514
1515	db_printf(" state: ");
1516	if (lk->lk_lock == LK_UNLOCKED)
1517		db_printf("UNLOCKED\n");
1518	else if (lk->lk_lock & LK_SHARE)
1519		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1520	else {
1521		td = lockmgr_xholder(lk);
1522		if (td == (struct thread *)LK_KERNPROC)
1523			db_printf("XLOCK: LK_KERNPROC\n");
1524		else
1525			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1526			    td->td_tid, td->td_proc->p_pid,
1527			    td->td_proc->p_comm);
1528		if (lockmgr_recursed(lk))
1529			db_printf(" recursed: %d\n", lk->lk_recurse);
1530	}
1531	db_printf(" waiters: ");
1532	switch (lk->lk_lock & LK_ALL_WAITERS) {
1533	case LK_SHARED_WAITERS:
1534		db_printf("shared\n");
1535		break;
1536	case LK_EXCLUSIVE_WAITERS:
1537		db_printf("exclusive\n");
1538		break;
1539	case LK_ALL_WAITERS:
1540		db_printf("shared and exclusive\n");
1541		break;
1542	default:
1543		db_printf("none\n");
1544	}
1545	db_printf(" spinners: ");
1546	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1547		db_printf("exclusive\n");
1548	else
1549		db_printf("none\n");
1550}
1551#endif
1552