kern_lock.c revision 258541
1/*-
2 * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice(s), this list of conditions and the following disclaimer as
10 *    the first lines of this file unmodified other than the possible
11 *    addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice(s), this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26 * DAMAGE.
27 */
28
29#include "opt_adaptive_lockmgrs.h"
30#include "opt_ddb.h"
31#include "opt_hwpmc_hooks.h"
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/kern/kern_lock.c 258541 2013-11-25 07:38:45Z attilio $");
35
36#include <sys/param.h>
37#include <sys/kdb.h>
38#include <sys/ktr.h>
39#include <sys/lock.h>
40#include <sys/lock_profile.h>
41#include <sys/lockmgr.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/sleepqueue.h>
45#ifdef DEBUG_LOCKS
46#include <sys/stack.h>
47#endif
48#include <sys/sysctl.h>
49#include <sys/systm.h>
50
51#include <machine/cpu.h>
52
53#ifdef DDB
54#include <ddb/ddb.h>
55#endif
56
57#ifdef HWPMC_HOOKS
58#include <sys/pmckern.h>
59PMC_SOFT_DECLARE( , , lock, failed);
60#endif
61
62CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63    (LK_ADAPTIVE | LK_NOSHARE));
64CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65    ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66
67#define	SQ_EXCLUSIVE_QUEUE	0
68#define	SQ_SHARED_QUEUE		1
69
70#ifndef INVARIANTS
71#define	_lockmgr_assert(lk, what, file, line)
72#define	TD_LOCKS_INC(td)
73#define	TD_LOCKS_DEC(td)
74#else
75#define	TD_LOCKS_INC(td)	((td)->td_locks++)
76#define	TD_LOCKS_DEC(td)	((td)->td_locks--)
77#endif
78#define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
79#define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
80
81#ifndef DEBUG_LOCKS
82#define	STACK_PRINT(lk)
83#define	STACK_SAVE(lk)
84#define	STACK_ZERO(lk)
85#else
86#define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
87#define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
88#define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
89#endif
90
91#define	LOCK_LOG2(lk, string, arg1, arg2)				\
92	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
93		CTR2(KTR_LOCK, (string), (arg1), (arg2))
94#define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
95	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
96		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
97
98#define	GIANT_DECLARE							\
99	int _i = 0;							\
100	WITNESS_SAVE_DECL(Giant)
101#define	GIANT_RESTORE() do {						\
102	if (_i > 0) {							\
103		while (_i--)						\
104			mtx_lock(&Giant);				\
105		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
106	}								\
107} while (0)
108#define	GIANT_SAVE() do {						\
109	if (mtx_owned(&Giant)) {					\
110		WITNESS_SAVE(&Giant.lock_object, Giant);		\
111		while (mtx_owned(&Giant)) {				\
112			_i++;						\
113			mtx_unlock(&Giant);				\
114		}							\
115	}								\
116} while (0)
117
118#define	LK_CAN_SHARE(x)							\
119	(((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||	\
120	((x) & LK_EXCLUSIVE_SPINNERS) == 0 ||				\
121	curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
122#define	LK_TRYOP(x)							\
123	((x) & LK_NOWAIT)
124
125#define	LK_CAN_WITNESS(x)						\
126	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
127#define	LK_TRYWIT(x)							\
128	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
129
130#define	LK_CAN_ADAPT(lk, f)						\
131	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
132	((f) & LK_SLEEPFAIL) == 0)
133
134#define	lockmgr_disowned(lk)						\
135	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
136
137#define	lockmgr_xlocked(lk)						\
138	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
139
140static void	assert_lockmgr(const struct lock_object *lock, int how);
141#ifdef DDB
142static void	db_show_lockmgr(const struct lock_object *lock);
143#endif
144static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
145#ifdef KDTRACE_HOOKS
146static int	owner_lockmgr(const struct lock_object *lock,
147		    struct thread **owner);
148#endif
149static uintptr_t unlock_lockmgr(struct lock_object *lock);
150
151struct lock_class lock_class_lockmgr = {
152	.lc_name = "lockmgr",
153	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
154	.lc_assert = assert_lockmgr,
155#ifdef DDB
156	.lc_ddb_show = db_show_lockmgr,
157#endif
158	.lc_lock = lock_lockmgr,
159	.lc_unlock = unlock_lockmgr,
160#ifdef KDTRACE_HOOKS
161	.lc_owner = owner_lockmgr,
162#endif
163};
164
165#ifdef ADAPTIVE_LOCKMGRS
166static u_int alk_retries = 10;
167static u_int alk_loops = 10000;
168static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
169    "lockmgr debugging");
170SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
171SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
172#endif
173
174static __inline struct thread *
175lockmgr_xholder(const struct lock *lk)
176{
177	uintptr_t x;
178
179	x = lk->lk_lock;
180	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
181}
182
183/*
184 * It assumes sleepq_lock held and returns with this one unheld.
185 * It also assumes the generic interlock is sane and previously checked.
186 * If LK_INTERLOCK is specified the interlock is not reacquired after the
187 * sleep.
188 */
189static __inline int
190sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
191    const char *wmesg, int pri, int timo, int queue)
192{
193	GIANT_DECLARE;
194	struct lock_class *class;
195	int catch, error;
196
197	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
198	catch = pri & PCATCH;
199	pri &= PRIMASK;
200	error = 0;
201
202	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
203	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
204
205	if (flags & LK_INTERLOCK)
206		class->lc_unlock(ilk);
207	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
208		lk->lk_exslpfail++;
209	GIANT_SAVE();
210	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
211	    SLEEPQ_INTERRUPTIBLE : 0), queue);
212	if ((flags & LK_TIMELOCK) && timo)
213		sleepq_set_timeout(&lk->lock_object, timo);
214
215	/*
216	 * Decisional switch for real sleeping.
217	 */
218	if ((flags & LK_TIMELOCK) && timo && catch)
219		error = sleepq_timedwait_sig(&lk->lock_object, pri);
220	else if ((flags & LK_TIMELOCK) && timo)
221		error = sleepq_timedwait(&lk->lock_object, pri);
222	else if (catch)
223		error = sleepq_wait_sig(&lk->lock_object, pri);
224	else
225		sleepq_wait(&lk->lock_object, pri);
226	GIANT_RESTORE();
227	if ((flags & LK_SLEEPFAIL) && error == 0)
228		error = ENOLCK;
229
230	return (error);
231}
232
233static __inline int
234wakeupshlk(struct lock *lk, const char *file, int line)
235{
236	uintptr_t v, x;
237	u_int realexslp;
238	int queue, wakeup_swapper;
239
240	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
241	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
242
243	wakeup_swapper = 0;
244	for (;;) {
245		x = lk->lk_lock;
246
247		/*
248		 * If there is more than one shared lock held, just drop one
249		 * and return.
250		 */
251		if (LK_SHARERS(x) > 1) {
252			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x,
253			    x - LK_ONE_SHARER))
254				break;
255			continue;
256		}
257
258		/*
259		 * If there are not waiters on the exclusive queue, drop the
260		 * lock quickly.
261		 */
262		if ((x & LK_ALL_WAITERS) == 0) {
263			MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
264			    LK_SHARERS_LOCK(1));
265			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED))
266				break;
267			continue;
268		}
269
270		/*
271		 * We should have a sharer with waiters, so enter the hard
272		 * path in order to handle wakeups correctly.
273		 */
274		sleepq_lock(&lk->lock_object);
275		x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
276		v = LK_UNLOCKED;
277
278		/*
279		 * If the lock has exclusive waiters, give them preference in
280		 * order to avoid deadlock with shared runners up.
281		 * If interruptible sleeps left the exclusive queue empty
282		 * avoid a starvation for the threads sleeping on the shared
283		 * queue by giving them precedence and cleaning up the
284		 * exclusive waiters bit anyway.
285		 * Please note that lk_exslpfail count may be lying about
286		 * the real number of waiters with the LK_SLEEPFAIL flag on
287		 * because they may be used in conjuction with interruptible
288		 * sleeps so lk_exslpfail might be considered an 'upper limit'
289		 * bound, including the edge cases.
290		 */
291		realexslp = sleepq_sleepcnt(&lk->lock_object,
292		    SQ_EXCLUSIVE_QUEUE);
293		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
294			if (lk->lk_exslpfail < realexslp) {
295				lk->lk_exslpfail = 0;
296				queue = SQ_EXCLUSIVE_QUEUE;
297				v |= (x & LK_SHARED_WAITERS);
298			} else {
299				lk->lk_exslpfail = 0;
300				LOCK_LOG2(lk,
301				    "%s: %p has only LK_SLEEPFAIL sleepers",
302				    __func__, lk);
303				LOCK_LOG2(lk,
304			    "%s: %p waking up threads on the exclusive queue",
305				    __func__, lk);
306				wakeup_swapper =
307				    sleepq_broadcast(&lk->lock_object,
308				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
309				queue = SQ_SHARED_QUEUE;
310			}
311
312		} else {
313
314			/*
315			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
316			 * and using interruptible sleeps/timeout may have
317			 * left spourious lk_exslpfail counts on, so clean
318			 * it up anyway.
319			 */
320			lk->lk_exslpfail = 0;
321			queue = SQ_SHARED_QUEUE;
322		}
323
324		if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
325		    v)) {
326			sleepq_release(&lk->lock_object);
327			continue;
328		}
329		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
330		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
331		    "exclusive");
332		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
333		    0, queue);
334		sleepq_release(&lk->lock_object);
335		break;
336	}
337
338	lock_profile_release_lock(&lk->lock_object);
339	TD_LOCKS_DEC(curthread);
340	TD_SLOCKS_DEC(curthread);
341	return (wakeup_swapper);
342}
343
344static void
345assert_lockmgr(const struct lock_object *lock, int what)
346{
347
348	panic("lockmgr locks do not support assertions");
349}
350
351static void
352lock_lockmgr(struct lock_object *lock, uintptr_t how)
353{
354
355	panic("lockmgr locks do not support sleep interlocking");
356}
357
358static uintptr_t
359unlock_lockmgr(struct lock_object *lock)
360{
361
362	panic("lockmgr locks do not support sleep interlocking");
363}
364
365#ifdef KDTRACE_HOOKS
366static int
367owner_lockmgr(const struct lock_object *lock, struct thread **owner)
368{
369
370	panic("lockmgr locks do not support owner inquiring");
371}
372#endif
373
374void
375lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
376{
377	int iflags;
378
379	MPASS((flags & ~LK_INIT_MASK) == 0);
380	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
381            ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
382            &lk->lk_lock));
383
384	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
385	if (flags & LK_CANRECURSE)
386		iflags |= LO_RECURSABLE;
387	if ((flags & LK_NODUP) == 0)
388		iflags |= LO_DUPOK;
389	if (flags & LK_NOPROFILE)
390		iflags |= LO_NOPROFILE;
391	if ((flags & LK_NOWITNESS) == 0)
392		iflags |= LO_WITNESS;
393	if (flags & LK_QUIET)
394		iflags |= LO_QUIET;
395	if (flags & LK_IS_VNODE)
396		iflags |= LO_IS_VNODE;
397	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
398
399	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
400	lk->lk_lock = LK_UNLOCKED;
401	lk->lk_recurse = 0;
402	lk->lk_exslpfail = 0;
403	lk->lk_timo = timo;
404	lk->lk_pri = pri;
405	STACK_ZERO(lk);
406}
407
408/*
409 * XXX: Gross hacks to manipulate external lock flags after
410 * initialization.  Used for certain vnode and buf locks.
411 */
412void
413lockallowshare(struct lock *lk)
414{
415
416	lockmgr_assert(lk, KA_XLOCKED);
417	lk->lock_object.lo_flags &= ~LK_NOSHARE;
418}
419
420void
421lockallowrecurse(struct lock *lk)
422{
423
424	lockmgr_assert(lk, KA_XLOCKED);
425	lk->lock_object.lo_flags |= LO_RECURSABLE;
426}
427
428void
429lockdisablerecurse(struct lock *lk)
430{
431
432	lockmgr_assert(lk, KA_XLOCKED);
433	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
434}
435
436void
437lockdestroy(struct lock *lk)
438{
439
440	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
441	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
442	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
443	lock_destroy(&lk->lock_object);
444}
445
446int
447__lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
448    const char *wmesg, int pri, int timo, const char *file, int line)
449{
450	GIANT_DECLARE;
451	struct lock_class *class;
452	const char *iwmesg;
453	uintptr_t tid, v, x;
454	u_int op, realexslp;
455	int error, ipri, itimo, queue, wakeup_swapper;
456#ifdef LOCK_PROFILING
457	uint64_t waittime = 0;
458	int contested = 0;
459#endif
460#ifdef ADAPTIVE_LOCKMGRS
461	volatile struct thread *owner;
462	u_int i, spintries = 0;
463#endif
464
465	error = 0;
466	tid = (uintptr_t)curthread;
467	op = (flags & LK_TYPE_MASK);
468	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
469	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
470	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
471
472	MPASS((flags & ~LK_TOTAL_MASK) == 0);
473	KASSERT((op & (op - 1)) == 0,
474	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
475	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
476	    (op != LK_DOWNGRADE && op != LK_RELEASE),
477	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
478	    __func__, file, line));
479	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
480	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
481	    __func__, file, line));
482	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
483	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
484	    lk->lock_object.lo_name, file, line));
485
486	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
487	if (panicstr != NULL) {
488		if (flags & LK_INTERLOCK)
489			class->lc_unlock(ilk);
490		return (0);
491	}
492
493	if (lk->lock_object.lo_flags & LK_NOSHARE) {
494		switch (op) {
495		case LK_SHARED:
496			op = LK_EXCLUSIVE;
497			break;
498		case LK_UPGRADE:
499		case LK_TRYUPGRADE:
500		case LK_DOWNGRADE:
501			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
502			    file, line);
503			if (flags & LK_INTERLOCK)
504				class->lc_unlock(ilk);
505			return (0);
506		}
507	}
508
509	wakeup_swapper = 0;
510	switch (op) {
511	case LK_SHARED:
512		if (LK_CAN_WITNESS(flags))
513			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
514			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
515		for (;;) {
516			x = lk->lk_lock;
517
518			/*
519			 * If no other thread has an exclusive lock, or
520			 * no exclusive waiter is present, bump the count of
521			 * sharers.  Since we have to preserve the state of
522			 * waiters, if we fail to acquire the shared lock
523			 * loop back and retry.
524			 */
525			if (LK_CAN_SHARE(x)) {
526				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
527				    x + LK_ONE_SHARER))
528					break;
529				continue;
530			}
531#ifdef HWPMC_HOOKS
532			PMC_SOFT_CALL( , , lock, failed);
533#endif
534			lock_profile_obtain_lock_failed(&lk->lock_object,
535			    &contested, &waittime);
536
537			/*
538			 * If the lock is already held by curthread in
539			 * exclusive way avoid a deadlock.
540			 */
541			if (LK_HOLDER(x) == tid) {
542				LOCK_LOG2(lk,
543				    "%s: %p already held in exclusive mode",
544				    __func__, lk);
545				error = EDEADLK;
546				break;
547			}
548
549			/*
550			 * If the lock is expected to not sleep just give up
551			 * and return.
552			 */
553			if (LK_TRYOP(flags)) {
554				LOCK_LOG2(lk, "%s: %p fails the try operation",
555				    __func__, lk);
556				error = EBUSY;
557				break;
558			}
559
560#ifdef ADAPTIVE_LOCKMGRS
561			/*
562			 * If the owner is running on another CPU, spin until
563			 * the owner stops running or the state of the lock
564			 * changes.  We need a double-state handle here
565			 * because for a failed acquisition the lock can be
566			 * either held in exclusive mode or shared mode
567			 * (for the writer starvation avoidance technique).
568			 */
569			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
570			    LK_HOLDER(x) != LK_KERNPROC) {
571				owner = (struct thread *)LK_HOLDER(x);
572				if (LOCK_LOG_TEST(&lk->lock_object, 0))
573					CTR3(KTR_LOCK,
574					    "%s: spinning on %p held by %p",
575					    __func__, lk, owner);
576
577				/*
578				 * If we are holding also an interlock drop it
579				 * in order to avoid a deadlock if the lockmgr
580				 * owner is adaptively spinning on the
581				 * interlock itself.
582				 */
583				if (flags & LK_INTERLOCK) {
584					class->lc_unlock(ilk);
585					flags &= ~LK_INTERLOCK;
586				}
587				GIANT_SAVE();
588				while (LK_HOLDER(lk->lk_lock) ==
589				    (uintptr_t)owner && TD_IS_RUNNING(owner))
590					cpu_spinwait();
591				GIANT_RESTORE();
592				continue;
593			} else if (LK_CAN_ADAPT(lk, flags) &&
594			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
595			    spintries < alk_retries) {
596				if (flags & LK_INTERLOCK) {
597					class->lc_unlock(ilk);
598					flags &= ~LK_INTERLOCK;
599				}
600				GIANT_SAVE();
601				spintries++;
602				for (i = 0; i < alk_loops; i++) {
603					if (LOCK_LOG_TEST(&lk->lock_object, 0))
604						CTR4(KTR_LOCK,
605				    "%s: shared spinning on %p with %u and %u",
606						    __func__, lk, spintries, i);
607					x = lk->lk_lock;
608					if ((x & LK_SHARE) == 0 ||
609					    LK_CAN_SHARE(x) != 0)
610						break;
611					cpu_spinwait();
612				}
613				GIANT_RESTORE();
614				if (i != alk_loops)
615					continue;
616			}
617#endif
618
619			/*
620			 * Acquire the sleepqueue chain lock because we
621			 * probabilly will need to manipulate waiters flags.
622			 */
623			sleepq_lock(&lk->lock_object);
624			x = lk->lk_lock;
625
626			/*
627			 * if the lock can be acquired in shared mode, try
628			 * again.
629			 */
630			if (LK_CAN_SHARE(x)) {
631				sleepq_release(&lk->lock_object);
632				continue;
633			}
634
635#ifdef ADAPTIVE_LOCKMGRS
636			/*
637			 * The current lock owner might have started executing
638			 * on another CPU (or the lock could have changed
639			 * owner) while we were waiting on the turnstile
640			 * chain lock.  If so, drop the turnstile lock and try
641			 * again.
642			 */
643			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
644			    LK_HOLDER(x) != LK_KERNPROC) {
645				owner = (struct thread *)LK_HOLDER(x);
646				if (TD_IS_RUNNING(owner)) {
647					sleepq_release(&lk->lock_object);
648					continue;
649				}
650			}
651#endif
652
653			/*
654			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
655			 * loop back and retry.
656			 */
657			if ((x & LK_SHARED_WAITERS) == 0) {
658				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
659				    x | LK_SHARED_WAITERS)) {
660					sleepq_release(&lk->lock_object);
661					continue;
662				}
663				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
664				    __func__, lk);
665			}
666
667			/*
668			 * As far as we have been unable to acquire the
669			 * shared lock and the shared waiters flag is set,
670			 * we will sleep.
671			 */
672			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
673			    SQ_SHARED_QUEUE);
674			flags &= ~LK_INTERLOCK;
675			if (error) {
676				LOCK_LOG3(lk,
677				    "%s: interrupted sleep for %p with %d",
678				    __func__, lk, error);
679				break;
680			}
681			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
682			    __func__, lk);
683		}
684		if (error == 0) {
685			lock_profile_obtain_lock_success(&lk->lock_object,
686			    contested, waittime, file, line);
687			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
688			    line);
689			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
690			    line);
691			TD_LOCKS_INC(curthread);
692			TD_SLOCKS_INC(curthread);
693			STACK_SAVE(lk);
694		}
695		break;
696	case LK_UPGRADE:
697	case LK_TRYUPGRADE:
698		_lockmgr_assert(lk, KA_SLOCKED, file, line);
699		v = lk->lk_lock;
700		x = v & LK_ALL_WAITERS;
701		v &= LK_EXCLUSIVE_SPINNERS;
702
703		/*
704		 * Try to switch from one shared lock to an exclusive one.
705		 * We need to preserve waiters flags during the operation.
706		 */
707		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
708		    tid | x)) {
709			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
710			    line);
711			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
712			    LK_TRYWIT(flags), file, line);
713			TD_SLOCKS_DEC(curthread);
714			break;
715		}
716
717		/*
718		 * In LK_TRYUPGRADE mode, do not drop the lock,
719		 * returning EBUSY instead.
720		 */
721		if (op == LK_TRYUPGRADE) {
722			LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
723			    __func__, lk);
724			error = EBUSY;
725			break;
726		}
727
728		/*
729		 * We have been unable to succeed in upgrading, so just
730		 * give up the shared lock.
731		 */
732		wakeup_swapper |= wakeupshlk(lk, file, line);
733
734		/* FALLTHROUGH */
735	case LK_EXCLUSIVE:
736		if (LK_CAN_WITNESS(flags))
737			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
738			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
739			    ilk : NULL);
740
741		/*
742		 * If curthread already holds the lock and this one is
743		 * allowed to recurse, simply recurse on it.
744		 */
745		if (lockmgr_xlocked(lk)) {
746			if ((flags & LK_CANRECURSE) == 0 &&
747			    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
748
749				/*
750				 * If the lock is expected to not panic just
751				 * give up and return.
752				 */
753				if (LK_TRYOP(flags)) {
754					LOCK_LOG2(lk,
755					    "%s: %p fails the try operation",
756					    __func__, lk);
757					error = EBUSY;
758					break;
759				}
760				if (flags & LK_INTERLOCK)
761					class->lc_unlock(ilk);
762		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
763				    __func__, iwmesg, file, line);
764			}
765			lk->lk_recurse++;
766			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
767			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
768			    lk->lk_recurse, file, line);
769			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
770			    LK_TRYWIT(flags), file, line);
771			TD_LOCKS_INC(curthread);
772			break;
773		}
774
775		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
776		    tid)) {
777#ifdef HWPMC_HOOKS
778			PMC_SOFT_CALL( , , lock, failed);
779#endif
780			lock_profile_obtain_lock_failed(&lk->lock_object,
781			    &contested, &waittime);
782
783			/*
784			 * If the lock is expected to not sleep just give up
785			 * and return.
786			 */
787			if (LK_TRYOP(flags)) {
788				LOCK_LOG2(lk, "%s: %p fails the try operation",
789				    __func__, lk);
790				error = EBUSY;
791				break;
792			}
793
794#ifdef ADAPTIVE_LOCKMGRS
795			/*
796			 * If the owner is running on another CPU, spin until
797			 * the owner stops running or the state of the lock
798			 * changes.
799			 */
800			x = lk->lk_lock;
801			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
802			    LK_HOLDER(x) != LK_KERNPROC) {
803				owner = (struct thread *)LK_HOLDER(x);
804				if (LOCK_LOG_TEST(&lk->lock_object, 0))
805					CTR3(KTR_LOCK,
806					    "%s: spinning on %p held by %p",
807					    __func__, lk, owner);
808
809				/*
810				 * If we are holding also an interlock drop it
811				 * in order to avoid a deadlock if the lockmgr
812				 * owner is adaptively spinning on the
813				 * interlock itself.
814				 */
815				if (flags & LK_INTERLOCK) {
816					class->lc_unlock(ilk);
817					flags &= ~LK_INTERLOCK;
818				}
819				GIANT_SAVE();
820				while (LK_HOLDER(lk->lk_lock) ==
821				    (uintptr_t)owner && TD_IS_RUNNING(owner))
822					cpu_spinwait();
823				GIANT_RESTORE();
824				continue;
825			} else if (LK_CAN_ADAPT(lk, flags) &&
826			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
827			    spintries < alk_retries) {
828				if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
829				    !atomic_cmpset_ptr(&lk->lk_lock, x,
830				    x | LK_EXCLUSIVE_SPINNERS))
831					continue;
832				if (flags & LK_INTERLOCK) {
833					class->lc_unlock(ilk);
834					flags &= ~LK_INTERLOCK;
835				}
836				GIANT_SAVE();
837				spintries++;
838				for (i = 0; i < alk_loops; i++) {
839					if (LOCK_LOG_TEST(&lk->lock_object, 0))
840						CTR4(KTR_LOCK,
841				    "%s: shared spinning on %p with %u and %u",
842						    __func__, lk, spintries, i);
843					if ((lk->lk_lock &
844					    LK_EXCLUSIVE_SPINNERS) == 0)
845						break;
846					cpu_spinwait();
847				}
848				GIANT_RESTORE();
849				if (i != alk_loops)
850					continue;
851			}
852#endif
853
854			/*
855			 * Acquire the sleepqueue chain lock because we
856			 * probabilly will need to manipulate waiters flags.
857			 */
858			sleepq_lock(&lk->lock_object);
859			x = lk->lk_lock;
860
861			/*
862			 * if the lock has been released while we spun on
863			 * the sleepqueue chain lock just try again.
864			 */
865			if (x == LK_UNLOCKED) {
866				sleepq_release(&lk->lock_object);
867				continue;
868			}
869
870#ifdef ADAPTIVE_LOCKMGRS
871			/*
872			 * The current lock owner might have started executing
873			 * on another CPU (or the lock could have changed
874			 * owner) while we were waiting on the turnstile
875			 * chain lock.  If so, drop the turnstile lock and try
876			 * again.
877			 */
878			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
879			    LK_HOLDER(x) != LK_KERNPROC) {
880				owner = (struct thread *)LK_HOLDER(x);
881				if (TD_IS_RUNNING(owner)) {
882					sleepq_release(&lk->lock_object);
883					continue;
884				}
885			}
886#endif
887
888			/*
889			 * The lock can be in the state where there is a
890			 * pending queue of waiters, but still no owner.
891			 * This happens when the lock is contested and an
892			 * owner is going to claim the lock.
893			 * If curthread is the one successfully acquiring it
894			 * claim lock ownership and return, preserving waiters
895			 * flags.
896			 */
897			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
898			if ((x & ~v) == LK_UNLOCKED) {
899				v &= ~LK_EXCLUSIVE_SPINNERS;
900				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
901				    tid | v)) {
902					sleepq_release(&lk->lock_object);
903					LOCK_LOG2(lk,
904					    "%s: %p claimed by a new writer",
905					    __func__, lk);
906					break;
907				}
908				sleepq_release(&lk->lock_object);
909				continue;
910			}
911
912			/*
913			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
914			 * fail, loop back and retry.
915			 */
916			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
917				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
918				    x | LK_EXCLUSIVE_WAITERS)) {
919					sleepq_release(&lk->lock_object);
920					continue;
921				}
922				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
923				    __func__, lk);
924			}
925
926			/*
927			 * As far as we have been unable to acquire the
928			 * exclusive lock and the exclusive waiters flag
929			 * is set, we will sleep.
930			 */
931			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
932			    SQ_EXCLUSIVE_QUEUE);
933			flags &= ~LK_INTERLOCK;
934			if (error) {
935				LOCK_LOG3(lk,
936				    "%s: interrupted sleep for %p with %d",
937				    __func__, lk, error);
938				break;
939			}
940			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
941			    __func__, lk);
942		}
943		if (error == 0) {
944			lock_profile_obtain_lock_success(&lk->lock_object,
945			    contested, waittime, file, line);
946			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
947			    lk->lk_recurse, file, line);
948			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
949			    LK_TRYWIT(flags), file, line);
950			TD_LOCKS_INC(curthread);
951			STACK_SAVE(lk);
952		}
953		break;
954	case LK_DOWNGRADE:
955		_lockmgr_assert(lk, KA_XLOCKED, file, line);
956		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
957		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
958
959		/*
960		 * Panic if the lock is recursed.
961		 */
962		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
963			if (flags & LK_INTERLOCK)
964				class->lc_unlock(ilk);
965			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
966			    __func__, iwmesg, file, line);
967		}
968		TD_SLOCKS_INC(curthread);
969
970		/*
971		 * In order to preserve waiters flags, just spin.
972		 */
973		for (;;) {
974			x = lk->lk_lock;
975			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
976			x &= LK_ALL_WAITERS;
977			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
978			    LK_SHARERS_LOCK(1) | x))
979				break;
980			cpu_spinwait();
981		}
982		break;
983	case LK_RELEASE:
984		_lockmgr_assert(lk, KA_LOCKED, file, line);
985		x = lk->lk_lock;
986
987		if ((x & LK_SHARE) == 0) {
988
989			/*
990			 * As first option, treact the lock as if it has not
991			 * any waiter.
992			 * Fix-up the tid var if the lock has been disowned.
993			 */
994			if (LK_HOLDER(x) == LK_KERNPROC)
995				tid = LK_KERNPROC;
996			else {
997				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
998				    file, line);
999				TD_LOCKS_DEC(curthread);
1000			}
1001			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1002			    lk->lk_recurse, file, line);
1003
1004			/*
1005			 * The lock is held in exclusive mode.
1006			 * If the lock is recursed also, then unrecurse it.
1007			 */
1008			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1009				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1010				    lk);
1011				lk->lk_recurse--;
1012				break;
1013			}
1014			if (tid != LK_KERNPROC)
1015				lock_profile_release_lock(&lk->lock_object);
1016
1017			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1018			    LK_UNLOCKED))
1019				break;
1020
1021			sleepq_lock(&lk->lock_object);
1022			x = lk->lk_lock;
1023			v = LK_UNLOCKED;
1024
1025			/*
1026		 	 * If the lock has exclusive waiters, give them
1027			 * preference in order to avoid deadlock with
1028			 * shared runners up.
1029			 * If interruptible sleeps left the exclusive queue
1030			 * empty avoid a starvation for the threads sleeping
1031			 * on the shared queue by giving them precedence
1032			 * and cleaning up the exclusive waiters bit anyway.
1033			 * Please note that lk_exslpfail count may be lying
1034			 * about the real number of waiters with the
1035			 * LK_SLEEPFAIL flag on because they may be used in
1036			 * conjuction with interruptible sleeps so
1037			 * lk_exslpfail might be considered an 'upper limit'
1038			 * bound, including the edge cases.
1039			 */
1040			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1041			realexslp = sleepq_sleepcnt(&lk->lock_object,
1042			    SQ_EXCLUSIVE_QUEUE);
1043			if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1044				if (lk->lk_exslpfail < realexslp) {
1045					lk->lk_exslpfail = 0;
1046					queue = SQ_EXCLUSIVE_QUEUE;
1047					v |= (x & LK_SHARED_WAITERS);
1048				} else {
1049					lk->lk_exslpfail = 0;
1050					LOCK_LOG2(lk,
1051					"%s: %p has only LK_SLEEPFAIL sleepers",
1052					    __func__, lk);
1053					LOCK_LOG2(lk,
1054			"%s: %p waking up threads on the exclusive queue",
1055					    __func__, lk);
1056					wakeup_swapper =
1057					    sleepq_broadcast(&lk->lock_object,
1058					    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1059					queue = SQ_SHARED_QUEUE;
1060				}
1061			} else {
1062
1063				/*
1064				 * Exclusive waiters sleeping with LK_SLEEPFAIL
1065				 * on and using interruptible sleeps/timeout
1066				 * may have left spourious lk_exslpfail counts
1067				 * on, so clean it up anyway.
1068				 */
1069				lk->lk_exslpfail = 0;
1070				queue = SQ_SHARED_QUEUE;
1071			}
1072
1073			LOCK_LOG3(lk,
1074			    "%s: %p waking up threads on the %s queue",
1075			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1076			    "exclusive");
1077			atomic_store_rel_ptr(&lk->lk_lock, v);
1078			wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1079			    SLEEPQ_LK, 0, queue);
1080			sleepq_release(&lk->lock_object);
1081			break;
1082		} else
1083			wakeup_swapper = wakeupshlk(lk, file, line);
1084		break;
1085	case LK_DRAIN:
1086		if (LK_CAN_WITNESS(flags))
1087			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1088			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1089			    ilk : NULL);
1090
1091		/*
1092		 * Trying to drain a lock we already own will result in a
1093		 * deadlock.
1094		 */
1095		if (lockmgr_xlocked(lk)) {
1096			if (flags & LK_INTERLOCK)
1097				class->lc_unlock(ilk);
1098			panic("%s: draining %s with the lock held @ %s:%d\n",
1099			    __func__, iwmesg, file, line);
1100		}
1101
1102		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1103#ifdef HWPMC_HOOKS
1104			PMC_SOFT_CALL( , , lock, failed);
1105#endif
1106			lock_profile_obtain_lock_failed(&lk->lock_object,
1107			    &contested, &waittime);
1108
1109			/*
1110			 * If the lock is expected to not sleep just give up
1111			 * and return.
1112			 */
1113			if (LK_TRYOP(flags)) {
1114				LOCK_LOG2(lk, "%s: %p fails the try operation",
1115				    __func__, lk);
1116				error = EBUSY;
1117				break;
1118			}
1119
1120			/*
1121			 * Acquire the sleepqueue chain lock because we
1122			 * probabilly will need to manipulate waiters flags.
1123			 */
1124			sleepq_lock(&lk->lock_object);
1125			x = lk->lk_lock;
1126
1127			/*
1128			 * if the lock has been released while we spun on
1129			 * the sleepqueue chain lock just try again.
1130			 */
1131			if (x == LK_UNLOCKED) {
1132				sleepq_release(&lk->lock_object);
1133				continue;
1134			}
1135
1136			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1137			if ((x & ~v) == LK_UNLOCKED) {
1138				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1139
1140				/*
1141				 * If interruptible sleeps left the exclusive
1142				 * queue empty avoid a starvation for the
1143				 * threads sleeping on the shared queue by
1144				 * giving them precedence and cleaning up the
1145				 * exclusive waiters bit anyway.
1146				 * Please note that lk_exslpfail count may be
1147				 * lying about the real number of waiters with
1148				 * the LK_SLEEPFAIL flag on because they may
1149				 * be used in conjuction with interruptible
1150				 * sleeps so lk_exslpfail might be considered
1151				 * an 'upper limit' bound, including the edge
1152				 * cases.
1153				 */
1154				if (v & LK_EXCLUSIVE_WAITERS) {
1155					queue = SQ_EXCLUSIVE_QUEUE;
1156					v &= ~LK_EXCLUSIVE_WAITERS;
1157				} else {
1158
1159					/*
1160					 * Exclusive waiters sleeping with
1161					 * LK_SLEEPFAIL on and using
1162					 * interruptible sleeps/timeout may
1163					 * have left spourious lk_exslpfail
1164					 * counts on, so clean it up anyway.
1165					 */
1166					MPASS(v & LK_SHARED_WAITERS);
1167					lk->lk_exslpfail = 0;
1168					queue = SQ_SHARED_QUEUE;
1169					v &= ~LK_SHARED_WAITERS;
1170				}
1171				if (queue == SQ_EXCLUSIVE_QUEUE) {
1172					realexslp =
1173					    sleepq_sleepcnt(&lk->lock_object,
1174					    SQ_EXCLUSIVE_QUEUE);
1175					if (lk->lk_exslpfail >= realexslp) {
1176						lk->lk_exslpfail = 0;
1177						queue = SQ_SHARED_QUEUE;
1178						v &= ~LK_SHARED_WAITERS;
1179						if (realexslp != 0) {
1180							LOCK_LOG2(lk,
1181					"%s: %p has only LK_SLEEPFAIL sleepers",
1182							    __func__, lk);
1183							LOCK_LOG2(lk,
1184			"%s: %p waking up threads on the exclusive queue",
1185							    __func__, lk);
1186							wakeup_swapper =
1187							    sleepq_broadcast(
1188							    &lk->lock_object,
1189							    SLEEPQ_LK, 0,
1190							    SQ_EXCLUSIVE_QUEUE);
1191						}
1192					} else
1193						lk->lk_exslpfail = 0;
1194				}
1195				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1196					sleepq_release(&lk->lock_object);
1197					continue;
1198				}
1199				LOCK_LOG3(lk,
1200				"%s: %p waking up all threads on the %s queue",
1201				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1202				    "shared" : "exclusive");
1203				wakeup_swapper |= sleepq_broadcast(
1204				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1205
1206				/*
1207				 * If shared waiters have been woken up we need
1208				 * to wait for one of them to acquire the lock
1209				 * before to set the exclusive waiters in
1210				 * order to avoid a deadlock.
1211				 */
1212				if (queue == SQ_SHARED_QUEUE) {
1213					for (v = lk->lk_lock;
1214					    (v & LK_SHARE) && !LK_SHARERS(v);
1215					    v = lk->lk_lock)
1216						cpu_spinwait();
1217				}
1218			}
1219
1220			/*
1221			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1222			 * fail, loop back and retry.
1223			 */
1224			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1225				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1226				    x | LK_EXCLUSIVE_WAITERS)) {
1227					sleepq_release(&lk->lock_object);
1228					continue;
1229				}
1230				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1231				    __func__, lk);
1232			}
1233
1234			/*
1235			 * As far as we have been unable to acquire the
1236			 * exclusive lock and the exclusive waiters flag
1237			 * is set, we will sleep.
1238			 */
1239			if (flags & LK_INTERLOCK) {
1240				class->lc_unlock(ilk);
1241				flags &= ~LK_INTERLOCK;
1242			}
1243			GIANT_SAVE();
1244			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1245			    SQ_EXCLUSIVE_QUEUE);
1246			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1247			GIANT_RESTORE();
1248			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1249			    __func__, lk);
1250		}
1251
1252		if (error == 0) {
1253			lock_profile_obtain_lock_success(&lk->lock_object,
1254			    contested, waittime, file, line);
1255			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1256			    lk->lk_recurse, file, line);
1257			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1258			    LK_TRYWIT(flags), file, line);
1259			TD_LOCKS_INC(curthread);
1260			STACK_SAVE(lk);
1261		}
1262		break;
1263	default:
1264		if (flags & LK_INTERLOCK)
1265			class->lc_unlock(ilk);
1266		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1267	}
1268
1269	if (flags & LK_INTERLOCK)
1270		class->lc_unlock(ilk);
1271	if (wakeup_swapper)
1272		kick_proc0();
1273
1274	return (error);
1275}
1276
1277void
1278_lockmgr_disown(struct lock *lk, const char *file, int line)
1279{
1280	uintptr_t tid, x;
1281
1282	if (SCHEDULER_STOPPED())
1283		return;
1284
1285	tid = (uintptr_t)curthread;
1286	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1287
1288	/*
1289	 * Panic if the lock is recursed.
1290	 */
1291	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1292		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1293		    __func__,  file, line);
1294
1295	/*
1296	 * If the owner is already LK_KERNPROC just skip the whole operation.
1297	 */
1298	if (LK_HOLDER(lk->lk_lock) != tid)
1299		return;
1300	lock_profile_release_lock(&lk->lock_object);
1301	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1302	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1303	TD_LOCKS_DEC(curthread);
1304	STACK_SAVE(lk);
1305
1306	/*
1307	 * In order to preserve waiters flags, just spin.
1308	 */
1309	for (;;) {
1310		x = lk->lk_lock;
1311		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1312		x &= LK_ALL_WAITERS;
1313		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1314		    LK_KERNPROC | x))
1315			return;
1316		cpu_spinwait();
1317	}
1318}
1319
1320void
1321lockmgr_printinfo(const struct lock *lk)
1322{
1323	struct thread *td;
1324	uintptr_t x;
1325
1326	if (lk->lk_lock == LK_UNLOCKED)
1327		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1328	else if (lk->lk_lock & LK_SHARE)
1329		printf("lock type %s: SHARED (count %ju)\n",
1330		    lk->lock_object.lo_name,
1331		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1332	else {
1333		td = lockmgr_xholder(lk);
1334		printf("lock type %s: EXCL by thread %p "
1335		    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name, td,
1336		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_tid);
1337	}
1338
1339	x = lk->lk_lock;
1340	if (x & LK_EXCLUSIVE_WAITERS)
1341		printf(" with exclusive waiters pending\n");
1342	if (x & LK_SHARED_WAITERS)
1343		printf(" with shared waiters pending\n");
1344	if (x & LK_EXCLUSIVE_SPINNERS)
1345		printf(" with exclusive spinners pending\n");
1346
1347	STACK_PRINT(lk);
1348}
1349
1350int
1351lockstatus(const struct lock *lk)
1352{
1353	uintptr_t v, x;
1354	int ret;
1355
1356	ret = LK_SHARED;
1357	x = lk->lk_lock;
1358	v = LK_HOLDER(x);
1359
1360	if ((x & LK_SHARE) == 0) {
1361		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1362			ret = LK_EXCLUSIVE;
1363		else
1364			ret = LK_EXCLOTHER;
1365	} else if (x == LK_UNLOCKED)
1366		ret = 0;
1367
1368	return (ret);
1369}
1370
1371#ifdef INVARIANT_SUPPORT
1372
1373FEATURE(invariant_support,
1374    "Support for modules compiled with INVARIANTS option");
1375
1376#ifndef INVARIANTS
1377#undef	_lockmgr_assert
1378#endif
1379
1380void
1381_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1382{
1383	int slocked = 0;
1384
1385	if (panicstr != NULL)
1386		return;
1387	switch (what) {
1388	case KA_SLOCKED:
1389	case KA_SLOCKED | KA_NOTRECURSED:
1390	case KA_SLOCKED | KA_RECURSED:
1391		slocked = 1;
1392	case KA_LOCKED:
1393	case KA_LOCKED | KA_NOTRECURSED:
1394	case KA_LOCKED | KA_RECURSED:
1395#ifdef WITNESS
1396
1397		/*
1398		 * We cannot trust WITNESS if the lock is held in exclusive
1399		 * mode and a call to lockmgr_disown() happened.
1400		 * Workaround this skipping the check if the lock is held in
1401		 * exclusive mode even for the KA_LOCKED case.
1402		 */
1403		if (slocked || (lk->lk_lock & LK_SHARE)) {
1404			witness_assert(&lk->lock_object, what, file, line);
1405			break;
1406		}
1407#endif
1408		if (lk->lk_lock == LK_UNLOCKED ||
1409		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1410		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1411			panic("Lock %s not %slocked @ %s:%d\n",
1412			    lk->lock_object.lo_name, slocked ? "share" : "",
1413			    file, line);
1414
1415		if ((lk->lk_lock & LK_SHARE) == 0) {
1416			if (lockmgr_recursed(lk)) {
1417				if (what & KA_NOTRECURSED)
1418					panic("Lock %s recursed @ %s:%d\n",
1419					    lk->lock_object.lo_name, file,
1420					    line);
1421			} else if (what & KA_RECURSED)
1422				panic("Lock %s not recursed @ %s:%d\n",
1423				    lk->lock_object.lo_name, file, line);
1424		}
1425		break;
1426	case KA_XLOCKED:
1427	case KA_XLOCKED | KA_NOTRECURSED:
1428	case KA_XLOCKED | KA_RECURSED:
1429		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1430			panic("Lock %s not exclusively locked @ %s:%d\n",
1431			    lk->lock_object.lo_name, file, line);
1432		if (lockmgr_recursed(lk)) {
1433			if (what & KA_NOTRECURSED)
1434				panic("Lock %s recursed @ %s:%d\n",
1435				    lk->lock_object.lo_name, file, line);
1436		} else if (what & KA_RECURSED)
1437			panic("Lock %s not recursed @ %s:%d\n",
1438			    lk->lock_object.lo_name, file, line);
1439		break;
1440	case KA_UNLOCKED:
1441		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1442			panic("Lock %s exclusively locked @ %s:%d\n",
1443			    lk->lock_object.lo_name, file, line);
1444		break;
1445	default:
1446		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1447		    line);
1448	}
1449}
1450#endif
1451
1452#ifdef DDB
1453int
1454lockmgr_chain(struct thread *td, struct thread **ownerp)
1455{
1456	struct lock *lk;
1457
1458	lk = td->td_wchan;
1459
1460	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1461		return (0);
1462	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1463	if (lk->lk_lock & LK_SHARE)
1464		db_printf("SHARED (count %ju)\n",
1465		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1466	else
1467		db_printf("EXCL\n");
1468	*ownerp = lockmgr_xholder(lk);
1469
1470	return (1);
1471}
1472
1473static void
1474db_show_lockmgr(const struct lock_object *lock)
1475{
1476	struct thread *td;
1477	const struct lock *lk;
1478
1479	lk = (const struct lock *)lock;
1480
1481	db_printf(" state: ");
1482	if (lk->lk_lock == LK_UNLOCKED)
1483		db_printf("UNLOCKED\n");
1484	else if (lk->lk_lock & LK_SHARE)
1485		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1486	else {
1487		td = lockmgr_xholder(lk);
1488		if (td == (struct thread *)LK_KERNPROC)
1489			db_printf("XLOCK: LK_KERNPROC\n");
1490		else
1491			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1492			    td->td_tid, td->td_proc->p_pid,
1493			    td->td_proc->p_comm);
1494		if (lockmgr_recursed(lk))
1495			db_printf(" recursed: %d\n", lk->lk_recurse);
1496	}
1497	db_printf(" waiters: ");
1498	switch (lk->lk_lock & LK_ALL_WAITERS) {
1499	case LK_SHARED_WAITERS:
1500		db_printf("shared\n");
1501		break;
1502	case LK_EXCLUSIVE_WAITERS:
1503		db_printf("exclusive\n");
1504		break;
1505	case LK_ALL_WAITERS:
1506		db_printf("shared and exclusive\n");
1507		break;
1508	default:
1509		db_printf("none\n");
1510	}
1511	db_printf(" spinners: ");
1512	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1513		db_printf("exclusive\n");
1514	else
1515		db_printf("none\n");
1516}
1517#endif
1518