1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice(s), this list of conditions and the following disclaimer as
12 *    the first lines of this file unmodified other than the possible
13 *    addition of one or more copyright notices.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice(s), this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 * DAMAGE.
29 */
30
31#include "opt_ddb.h"
32#include "opt_hwpmc_hooks.h"
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include <sys/param.h>
38#include <sys/kdb.h>
39#include <sys/ktr.h>
40#include <sys/lock.h>
41#include <sys/lock_profile.h>
42#include <sys/lockmgr.h>
43#include <sys/lockstat.h>
44#include <sys/mutex.h>
45#include <sys/proc.h>
46#include <sys/sleepqueue.h>
47#ifdef DEBUG_LOCKS
48#include <sys/stack.h>
49#endif
50#include <sys/sysctl.h>
51#include <sys/systm.h>
52
53#include <machine/cpu.h>
54
55#ifdef DDB
56#include <ddb/ddb.h>
57#endif
58
59#ifdef HWPMC_HOOKS
60#include <sys/pmckern.h>
61PMC_SOFT_DECLARE( , , lock, failed);
62#endif
63
64CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
65    (LK_ADAPTIVE | LK_NOSHARE));
66CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
67    ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
68
69#define	SQ_EXCLUSIVE_QUEUE	0
70#define	SQ_SHARED_QUEUE		1
71
72#ifndef INVARIANTS
73#define	_lockmgr_assert(lk, what, file, line)
74#endif
75
76#define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
77#define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
78
79#ifndef DEBUG_LOCKS
80#define	STACK_PRINT(lk)
81#define	STACK_SAVE(lk)
82#define	STACK_ZERO(lk)
83#else
84#define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
85#define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
86#define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
87#endif
88
89#define	LOCK_LOG2(lk, string, arg1, arg2)				\
90	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
91		CTR2(KTR_LOCK, (string), (arg1), (arg2))
92#define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
93	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
94		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
95
96#define	GIANT_DECLARE							\
97	int _i = 0;							\
98	WITNESS_SAVE_DECL(Giant)
99#define	GIANT_RESTORE() do {						\
100	if (_i > 0) {							\
101		while (_i--)						\
102			mtx_lock(&Giant);				\
103		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
104	}								\
105} while (0)
106#define	GIANT_SAVE() do {						\
107	if (mtx_owned(&Giant)) {					\
108		WITNESS_SAVE(&Giant.lock_object, Giant);		\
109		while (mtx_owned(&Giant)) {				\
110			_i++;						\
111			mtx_unlock(&Giant);				\
112		}							\
113	}								\
114} while (0)
115
116static bool __always_inline
117LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
118{
119
120	if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
121	    LK_SHARE)
122		return (true);
123	if (fp || (!(x & LK_SHARE)))
124		return (false);
125	if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
126	    (curthread->td_pflags & TDP_DEADLKTREAT))
127		return (true);
128	return (false);
129}
130
131#define	LK_TRYOP(x)							\
132	((x) & LK_NOWAIT)
133
134#define	LK_CAN_WITNESS(x)						\
135	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
136#define	LK_TRYWIT(x)							\
137	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
138
139#define	LK_CAN_ADAPT(lk, f)						\
140	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
141	((f) & LK_SLEEPFAIL) == 0)
142
143#define	lockmgr_disowned(lk)						\
144	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
145
146#define	lockmgr_xlocked_v(v)						\
147	(((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
148
149#define	lockmgr_xlocked(lk) lockmgr_xlocked_v((lk)->lk_lock)
150
151static void	assert_lockmgr(const struct lock_object *lock, int how);
152#ifdef DDB
153static void	db_show_lockmgr(const struct lock_object *lock);
154#endif
155static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
156#ifdef KDTRACE_HOOKS
157static int	owner_lockmgr(const struct lock_object *lock,
158		    struct thread **owner);
159#endif
160static uintptr_t unlock_lockmgr(struct lock_object *lock);
161
162struct lock_class lock_class_lockmgr = {
163	.lc_name = "lockmgr",
164	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
165	.lc_assert = assert_lockmgr,
166#ifdef DDB
167	.lc_ddb_show = db_show_lockmgr,
168#endif
169	.lc_lock = lock_lockmgr,
170	.lc_unlock = unlock_lockmgr,
171#ifdef KDTRACE_HOOKS
172	.lc_owner = owner_lockmgr,
173#endif
174};
175
176struct lockmgr_wait {
177	const char *iwmesg;
178	int ipri;
179	int itimo;
180};
181
182static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
183    int flags, bool fp);
184static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
185
186static void
187lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
188{
189	struct lock_class *class;
190
191	if (flags & LK_INTERLOCK) {
192		class = LOCK_CLASS(ilk);
193		class->lc_unlock(ilk);
194	}
195
196	if (__predict_false(wakeup_swapper))
197		kick_proc0();
198}
199
200static void
201lockmgr_note_shared_acquire(struct lock *lk, int contested,
202    uint64_t waittime, const char *file, int line, int flags)
203{
204
205	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
206	    waittime, file, line, LOCKSTAT_READER);
207	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
208	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
209	TD_LOCKS_INC(curthread);
210	TD_SLOCKS_INC(curthread);
211	STACK_SAVE(lk);
212}
213
214static void
215lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
216{
217
218	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
219	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
220	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
221	TD_LOCKS_DEC(curthread);
222	TD_SLOCKS_DEC(curthread);
223}
224
225static void
226lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
227    uint64_t waittime, const char *file, int line, int flags)
228{
229
230	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
231	    waittime, file, line, LOCKSTAT_WRITER);
232	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
233	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
234	    line);
235	TD_LOCKS_INC(curthread);
236	STACK_SAVE(lk);
237}
238
239static void
240lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
241{
242
243	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_WRITER);
244	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
245	    line);
246	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
247	TD_LOCKS_DEC(curthread);
248}
249
250static __inline struct thread *
251lockmgr_xholder(const struct lock *lk)
252{
253	uintptr_t x;
254
255	x = lk->lk_lock;
256	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
257}
258
259/*
260 * It assumes sleepq_lock held and returns with this one unheld.
261 * It also assumes the generic interlock is sane and previously checked.
262 * If LK_INTERLOCK is specified the interlock is not reacquired after the
263 * sleep.
264 */
265static __inline int
266sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
267    const char *wmesg, int pri, int timo, int queue)
268{
269	GIANT_DECLARE;
270	struct lock_class *class;
271	int catch, error;
272
273	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
274	catch = pri & PCATCH;
275	pri &= PRIMASK;
276	error = 0;
277
278	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
279	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
280
281	if (flags & LK_INTERLOCK)
282		class->lc_unlock(ilk);
283	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
284		lk->lk_exslpfail++;
285	GIANT_SAVE();
286	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
287	    SLEEPQ_INTERRUPTIBLE : 0), queue);
288	if ((flags & LK_TIMELOCK) && timo)
289		sleepq_set_timeout(&lk->lock_object, timo);
290
291	/*
292	 * Decisional switch for real sleeping.
293	 */
294	if ((flags & LK_TIMELOCK) && timo && catch)
295		error = sleepq_timedwait_sig(&lk->lock_object, pri);
296	else if ((flags & LK_TIMELOCK) && timo)
297		error = sleepq_timedwait(&lk->lock_object, pri);
298	else if (catch)
299		error = sleepq_wait_sig(&lk->lock_object, pri);
300	else
301		sleepq_wait(&lk->lock_object, pri);
302	GIANT_RESTORE();
303	if ((flags & LK_SLEEPFAIL) && error == 0)
304		error = ENOLCK;
305
306	return (error);
307}
308
309static __inline int
310wakeupshlk(struct lock *lk, const char *file, int line)
311{
312	uintptr_t v, x, orig_x;
313	u_int realexslp;
314	int queue, wakeup_swapper;
315
316	wakeup_swapper = 0;
317	for (;;) {
318		x = lk->lk_lock;
319		if (lockmgr_sunlock_try(lk, &x))
320			break;
321
322		/*
323		 * We should have a sharer with waiters, so enter the hard
324		 * path in order to handle wakeups correctly.
325		 */
326		sleepq_lock(&lk->lock_object);
327		orig_x = lk->lk_lock;
328retry_sleepq:
329		x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
330		v = LK_UNLOCKED;
331
332		/*
333		 * If the lock has exclusive waiters, give them preference in
334		 * order to avoid deadlock with shared runners up.
335		 * If interruptible sleeps left the exclusive queue empty
336		 * avoid a starvation for the threads sleeping on the shared
337		 * queue by giving them precedence and cleaning up the
338		 * exclusive waiters bit anyway.
339		 * Please note that lk_exslpfail count may be lying about
340		 * the real number of waiters with the LK_SLEEPFAIL flag on
341		 * because they may be used in conjunction with interruptible
342		 * sleeps so lk_exslpfail might be considered an 'upper limit'
343		 * bound, including the edge cases.
344		 */
345		realexslp = sleepq_sleepcnt(&lk->lock_object,
346		    SQ_EXCLUSIVE_QUEUE);
347		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
348			if (lk->lk_exslpfail < realexslp) {
349				lk->lk_exslpfail = 0;
350				queue = SQ_EXCLUSIVE_QUEUE;
351				v |= (x & LK_SHARED_WAITERS);
352			} else {
353				lk->lk_exslpfail = 0;
354				LOCK_LOG2(lk,
355				    "%s: %p has only LK_SLEEPFAIL sleepers",
356				    __func__, lk);
357				LOCK_LOG2(lk,
358			    "%s: %p waking up threads on the exclusive queue",
359				    __func__, lk);
360				wakeup_swapper =
361				    sleepq_broadcast(&lk->lock_object,
362				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
363				queue = SQ_SHARED_QUEUE;
364			}
365
366		} else {
367
368			/*
369			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
370			 * and using interruptible sleeps/timeout may have
371			 * left spourious lk_exslpfail counts on, so clean
372			 * it up anyway.
373			 */
374			lk->lk_exslpfail = 0;
375			queue = SQ_SHARED_QUEUE;
376		}
377
378		if (lockmgr_sunlock_try(lk, &orig_x)) {
379			sleepq_release(&lk->lock_object);
380			break;
381		}
382
383		x |= LK_SHARERS_LOCK(1);
384		if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
385			orig_x = x;
386			goto retry_sleepq;
387		}
388		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
389		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
390		    "exclusive");
391		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
392		    0, queue);
393		sleepq_release(&lk->lock_object);
394		break;
395	}
396
397	lockmgr_note_shared_release(lk, file, line);
398	return (wakeup_swapper);
399}
400
401static void
402assert_lockmgr(const struct lock_object *lock, int what)
403{
404
405	panic("lockmgr locks do not support assertions");
406}
407
408static void
409lock_lockmgr(struct lock_object *lock, uintptr_t how)
410{
411
412	panic("lockmgr locks do not support sleep interlocking");
413}
414
415static uintptr_t
416unlock_lockmgr(struct lock_object *lock)
417{
418
419	panic("lockmgr locks do not support sleep interlocking");
420}
421
422#ifdef KDTRACE_HOOKS
423static int
424owner_lockmgr(const struct lock_object *lock, struct thread **owner)
425{
426
427	panic("lockmgr locks do not support owner inquiring");
428}
429#endif
430
431void
432lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
433{
434	int iflags;
435
436	MPASS((flags & ~LK_INIT_MASK) == 0);
437	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
438            ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
439            &lk->lk_lock));
440
441	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
442	if (flags & LK_CANRECURSE)
443		iflags |= LO_RECURSABLE;
444	if ((flags & LK_NODUP) == 0)
445		iflags |= LO_DUPOK;
446	if (flags & LK_NOPROFILE)
447		iflags |= LO_NOPROFILE;
448	if ((flags & LK_NOWITNESS) == 0)
449		iflags |= LO_WITNESS;
450	if (flags & LK_QUIET)
451		iflags |= LO_QUIET;
452	if (flags & LK_IS_VNODE)
453		iflags |= LO_IS_VNODE;
454	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
455
456	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
457	lk->lk_lock = LK_UNLOCKED;
458	lk->lk_recurse = 0;
459	lk->lk_exslpfail = 0;
460	lk->lk_timo = timo;
461	lk->lk_pri = pri;
462	STACK_ZERO(lk);
463}
464
465/*
466 * XXX: Gross hacks to manipulate external lock flags after
467 * initialization.  Used for certain vnode and buf locks.
468 */
469void
470lockallowshare(struct lock *lk)
471{
472
473	lockmgr_assert(lk, KA_XLOCKED);
474	lk->lock_object.lo_flags &= ~LK_NOSHARE;
475}
476
477void
478lockdisableshare(struct lock *lk)
479{
480
481	lockmgr_assert(lk, KA_XLOCKED);
482	lk->lock_object.lo_flags |= LK_NOSHARE;
483}
484
485void
486lockallowrecurse(struct lock *lk)
487{
488
489	lockmgr_assert(lk, KA_XLOCKED);
490	lk->lock_object.lo_flags |= LO_RECURSABLE;
491}
492
493void
494lockdisablerecurse(struct lock *lk)
495{
496
497	lockmgr_assert(lk, KA_XLOCKED);
498	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
499}
500
501void
502lockdestroy(struct lock *lk)
503{
504
505	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
506	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
507	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
508	lock_destroy(&lk->lock_object);
509}
510
511static bool __always_inline
512lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
513{
514
515	/*
516	 * If no other thread has an exclusive lock, or
517	 * no exclusive waiter is present, bump the count of
518	 * sharers.  Since we have to preserve the state of
519	 * waiters, if we fail to acquire the shared lock
520	 * loop back and retry.
521	 */
522	*xp = lk->lk_lock;
523	while (LK_CAN_SHARE(*xp, flags, fp)) {
524		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
525		    *xp + LK_ONE_SHARER)) {
526			return (true);
527		}
528	}
529	return (false);
530}
531
532static bool __always_inline
533lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
534{
535
536	for (;;) {
537		if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
538			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
539			    *xp - LK_ONE_SHARER))
540				return (true);
541			continue;
542		}
543		break;
544	}
545	return (false);
546}
547
548static __noinline int
549lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
550    const char *file, int line, struct lockmgr_wait *lwa)
551{
552	uintptr_t tid, x;
553	int error = 0;
554	const char *iwmesg;
555	int ipri, itimo;
556
557#ifdef KDTRACE_HOOKS
558	uint64_t sleep_time = 0;
559#endif
560#ifdef LOCK_PROFILING
561	uint64_t waittime = 0;
562	int contested = 0;
563#endif
564
565	if (__predict_false(panicstr != NULL))
566		goto out;
567
568	tid = (uintptr_t)curthread;
569
570	if (LK_CAN_WITNESS(flags))
571		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
572		    file, line, flags & LK_INTERLOCK ? ilk : NULL);
573	for (;;) {
574		if (lockmgr_slock_try(lk, &x, flags, false))
575			break;
576#ifdef HWPMC_HOOKS
577		PMC_SOFT_CALL( , , lock, failed);
578#endif
579		lock_profile_obtain_lock_failed(&lk->lock_object,
580		    &contested, &waittime);
581
582		/*
583		 * If the lock is already held by curthread in
584		 * exclusive way avoid a deadlock.
585		 */
586		if (LK_HOLDER(x) == tid) {
587			LOCK_LOG2(lk,
588			    "%s: %p already held in exclusive mode",
589			    __func__, lk);
590			error = EDEADLK;
591			break;
592		}
593
594		/*
595		 * If the lock is expected to not sleep just give up
596		 * and return.
597		 */
598		if (LK_TRYOP(flags)) {
599			LOCK_LOG2(lk, "%s: %p fails the try operation",
600			    __func__, lk);
601			error = EBUSY;
602			break;
603		}
604
605		/*
606		 * Acquire the sleepqueue chain lock because we
607		 * probabilly will need to manipulate waiters flags.
608		 */
609		sleepq_lock(&lk->lock_object);
610		x = lk->lk_lock;
611retry_sleepq:
612
613		/*
614		 * if the lock can be acquired in shared mode, try
615		 * again.
616		 */
617		if (LK_CAN_SHARE(x, flags, false)) {
618			sleepq_release(&lk->lock_object);
619			continue;
620		}
621
622		/*
623		 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
624		 * loop back and retry.
625		 */
626		if ((x & LK_SHARED_WAITERS) == 0) {
627			if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
628			    x | LK_SHARED_WAITERS)) {
629				goto retry_sleepq;
630			}
631			LOCK_LOG2(lk, "%s: %p set shared waiters flag",
632			    __func__, lk);
633		}
634
635		if (lwa == NULL) {
636			iwmesg = lk->lock_object.lo_name;
637			ipri = lk->lk_pri;
638			itimo = lk->lk_timo;
639		} else {
640			iwmesg = lwa->iwmesg;
641			ipri = lwa->ipri;
642			itimo = lwa->itimo;
643		}
644
645		/*
646		 * As far as we have been unable to acquire the
647		 * shared lock and the shared waiters flag is set,
648		 * we will sleep.
649		 */
650#ifdef KDTRACE_HOOKS
651		sleep_time -= lockstat_nsecs(&lk->lock_object);
652#endif
653		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
654		    SQ_SHARED_QUEUE);
655#ifdef KDTRACE_HOOKS
656		sleep_time += lockstat_nsecs(&lk->lock_object);
657#endif
658		flags &= ~LK_INTERLOCK;
659		if (error) {
660			LOCK_LOG3(lk,
661			    "%s: interrupted sleep for %p with %d",
662			    __func__, lk, error);
663			break;
664		}
665		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
666		    __func__, lk);
667	}
668	if (error == 0) {
669#ifdef KDTRACE_HOOKS
670		if (sleep_time != 0)
671			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
672			    LOCKSTAT_READER, (x & LK_SHARE) == 0,
673			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
674#endif
675#ifdef LOCK_PROFILING
676		lockmgr_note_shared_acquire(lk, contested, waittime,
677		    file, line, flags);
678#else
679		lockmgr_note_shared_acquire(lk, 0, 0, file, line,
680		    flags);
681#endif
682	}
683
684out:
685	lockmgr_exit(flags, ilk, 0);
686	return (error);
687}
688
689static __noinline int
690lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
691    const char *file, int line, struct lockmgr_wait *lwa)
692{
693	struct lock_class *class;
694	uintptr_t tid, x, v;
695	int error = 0;
696	const char *iwmesg;
697	int ipri, itimo;
698
699#ifdef KDTRACE_HOOKS
700	uint64_t sleep_time = 0;
701#endif
702#ifdef LOCK_PROFILING
703	uint64_t waittime = 0;
704	int contested = 0;
705#endif
706
707	if (__predict_false(panicstr != NULL))
708		goto out;
709
710	tid = (uintptr_t)curthread;
711
712	if (LK_CAN_WITNESS(flags))
713		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
714		    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
715		    ilk : NULL);
716
717	/*
718	 * If curthread already holds the lock and this one is
719	 * allowed to recurse, simply recurse on it.
720	 */
721	if (lockmgr_xlocked(lk)) {
722		if ((flags & LK_CANRECURSE) == 0 &&
723		    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
724			/*
725			 * If the lock is expected to not panic just
726			 * give up and return.
727			 */
728			if (LK_TRYOP(flags)) {
729				LOCK_LOG2(lk,
730				    "%s: %p fails the try operation",
731				    __func__, lk);
732				error = EBUSY;
733				goto out;
734			}
735			if (flags & LK_INTERLOCK) {
736				class = LOCK_CLASS(ilk);
737				class->lc_unlock(ilk);
738			}
739			panic("%s: recursing on non recursive lockmgr %p "
740			    "@ %s:%d\n", __func__, lk, file, line);
741		}
742		lk->lk_recurse++;
743		LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
744		LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
745		    lk->lk_recurse, file, line);
746		WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
747		    LK_TRYWIT(flags), file, line);
748		TD_LOCKS_INC(curthread);
749		goto out;
750	}
751
752	for (;;) {
753		if (lk->lk_lock == LK_UNLOCKED &&
754		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
755			break;
756#ifdef HWPMC_HOOKS
757		PMC_SOFT_CALL( , , lock, failed);
758#endif
759		lock_profile_obtain_lock_failed(&lk->lock_object,
760		    &contested, &waittime);
761
762		/*
763		 * If the lock is expected to not sleep just give up
764		 * and return.
765		 */
766		if (LK_TRYOP(flags)) {
767			LOCK_LOG2(lk, "%s: %p fails the try operation",
768			    __func__, lk);
769			error = EBUSY;
770			break;
771		}
772
773		/*
774		 * Acquire the sleepqueue chain lock because we
775		 * probabilly will need to manipulate waiters flags.
776		 */
777		sleepq_lock(&lk->lock_object);
778		x = lk->lk_lock;
779retry_sleepq:
780
781		/*
782		 * if the lock has been released while we spun on
783		 * the sleepqueue chain lock just try again.
784		 */
785		if (x == LK_UNLOCKED) {
786			sleepq_release(&lk->lock_object);
787			continue;
788		}
789
790		/*
791		 * The lock can be in the state where there is a
792		 * pending queue of waiters, but still no owner.
793		 * This happens when the lock is contested and an
794		 * owner is going to claim the lock.
795		 * If curthread is the one successfully acquiring it
796		 * claim lock ownership and return, preserving waiters
797		 * flags.
798		 */
799		v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
800		if ((x & ~v) == LK_UNLOCKED) {
801			v &= ~LK_EXCLUSIVE_SPINNERS;
802			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
803			    tid | v)) {
804				sleepq_release(&lk->lock_object);
805				LOCK_LOG2(lk,
806				    "%s: %p claimed by a new writer",
807				    __func__, lk);
808				break;
809			}
810			goto retry_sleepq;
811		}
812
813		/*
814		 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
815		 * fail, loop back and retry.
816		 */
817		if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
818			if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
819			    x | LK_EXCLUSIVE_WAITERS)) {
820				goto retry_sleepq;
821			}
822			LOCK_LOG2(lk, "%s: %p set excl waiters flag",
823			    __func__, lk);
824		}
825
826		if (lwa == NULL) {
827			iwmesg = lk->lock_object.lo_name;
828			ipri = lk->lk_pri;
829			itimo = lk->lk_timo;
830		} else {
831			iwmesg = lwa->iwmesg;
832			ipri = lwa->ipri;
833			itimo = lwa->itimo;
834		}
835
836		/*
837		 * As far as we have been unable to acquire the
838		 * exclusive lock and the exclusive waiters flag
839		 * is set, we will sleep.
840		 */
841#ifdef KDTRACE_HOOKS
842		sleep_time -= lockstat_nsecs(&lk->lock_object);
843#endif
844		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
845		    SQ_EXCLUSIVE_QUEUE);
846#ifdef KDTRACE_HOOKS
847		sleep_time += lockstat_nsecs(&lk->lock_object);
848#endif
849		flags &= ~LK_INTERLOCK;
850		if (error) {
851			LOCK_LOG3(lk,
852			    "%s: interrupted sleep for %p with %d",
853			    __func__, lk, error);
854			break;
855		}
856		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
857		    __func__, lk);
858	}
859	if (error == 0) {
860#ifdef KDTRACE_HOOKS
861		if (sleep_time != 0)
862			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
863			    LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
864			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
865#endif
866#ifdef LOCK_PROFILING
867		lockmgr_note_exclusive_acquire(lk, contested, waittime,
868		    file, line, flags);
869#else
870		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
871		    flags);
872#endif
873	}
874
875out:
876	lockmgr_exit(flags, ilk, 0);
877	return (error);
878}
879
880static __noinline int
881lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
882    const char *file, int line, struct lockmgr_wait *lwa)
883{
884	uintptr_t tid, x, v;
885	int error = 0;
886	int wakeup_swapper = 0;
887	int op;
888
889	if (__predict_false(panicstr != NULL))
890		goto out;
891
892	tid = (uintptr_t)curthread;
893
894	_lockmgr_assert(lk, KA_SLOCKED, file, line);
895	v = lk->lk_lock;
896	x = v & LK_ALL_WAITERS;
897	v &= LK_EXCLUSIVE_SPINNERS;
898
899	/*
900	 * Try to switch from one shared lock to an exclusive one.
901	 * We need to preserve waiters flags during the operation.
902	 */
903	if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
904	    tid | x)) {
905		LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
906		    line);
907		WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
908		    LK_TRYWIT(flags), file, line);
909		LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
910		TD_SLOCKS_DEC(curthread);
911		goto out;
912	}
913
914	op = flags & LK_TYPE_MASK;
915
916	/*
917	 * In LK_TRYUPGRADE mode, do not drop the lock,
918	 * returning EBUSY instead.
919	 */
920	if (op == LK_TRYUPGRADE) {
921		LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
922		    __func__, lk);
923		error = EBUSY;
924		goto out;
925	}
926
927	/*
928	 * We have been unable to succeed in upgrading, so just
929	 * give up the shared lock.
930	 */
931	wakeup_swapper |= wakeupshlk(lk, file, line);
932	error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
933	flags &= ~LK_INTERLOCK;
934out:
935	lockmgr_exit(flags, ilk, wakeup_swapper);
936	return (error);
937}
938
939int
940lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
941    const char *file, int line)
942{
943	struct lock_class *class;
944	uintptr_t x, tid;
945	u_int op;
946	bool locked;
947
948	if (__predict_false(panicstr != NULL))
949		return (0);
950
951	op = flags & LK_TYPE_MASK;
952	locked = false;
953	switch (op) {
954	case LK_SHARED:
955		if (LK_CAN_WITNESS(flags))
956			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
957			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
958		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
959			break;
960		if (lockmgr_slock_try(lk, &x, flags, true)) {
961			lockmgr_note_shared_acquire(lk, 0, 0,
962			    file, line, flags);
963			locked = true;
964		} else {
965			return (lockmgr_slock_hard(lk, flags, ilk, file, line,
966			    NULL));
967		}
968		break;
969	case LK_EXCLUSIVE:
970		if (LK_CAN_WITNESS(flags))
971			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
972			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
973			    ilk : NULL);
974		tid = (uintptr_t)curthread;
975		if (lk->lk_lock == LK_UNLOCKED &&
976		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
977			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
978			    flags);
979			locked = true;
980		} else {
981			return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
982			    NULL));
983		}
984		break;
985	case LK_UPGRADE:
986	case LK_TRYUPGRADE:
987		return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
988	default:
989		break;
990	}
991	if (__predict_true(locked)) {
992		if (__predict_false(flags & LK_INTERLOCK)) {
993			class = LOCK_CLASS(ilk);
994			class->lc_unlock(ilk);
995		}
996		return (0);
997	} else {
998		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
999		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1000	}
1001}
1002
1003static __noinline int
1004lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1005    const char *file, int line)
1006
1007{
1008	int wakeup_swapper = 0;
1009
1010	if (__predict_false(panicstr != NULL))
1011		goto out;
1012
1013	wakeup_swapper = wakeupshlk(lk, file, line);
1014
1015out:
1016	lockmgr_exit(flags, ilk, wakeup_swapper);
1017	return (0);
1018}
1019
1020static __noinline int
1021lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1022    const char *file, int line)
1023{
1024	uintptr_t tid, v;
1025	int wakeup_swapper = 0;
1026	u_int realexslp;
1027	int queue;
1028
1029	if (__predict_false(panicstr != NULL))
1030		goto out;
1031
1032	tid = (uintptr_t)curthread;
1033
1034	/*
1035	 * As first option, treact the lock as if it has not
1036	 * any waiter.
1037	 * Fix-up the tid var if the lock has been disowned.
1038	 */
1039	if (LK_HOLDER(x) == LK_KERNPROC)
1040		tid = LK_KERNPROC;
1041	else {
1042		WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1043		TD_LOCKS_DEC(curthread);
1044	}
1045	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
1046
1047	/*
1048	 * The lock is held in exclusive mode.
1049	 * If the lock is recursed also, then unrecurse it.
1050	 */
1051	if (lockmgr_xlocked_v(x) && lockmgr_recursed(lk)) {
1052		LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1053		lk->lk_recurse--;
1054		goto out;
1055	}
1056	if (tid != LK_KERNPROC)
1057		LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1058		    LOCKSTAT_WRITER);
1059
1060	if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1061		goto out;
1062
1063	sleepq_lock(&lk->lock_object);
1064	x = lk->lk_lock;
1065	v = LK_UNLOCKED;
1066
1067	/*
1068	 * If the lock has exclusive waiters, give them
1069	 * preference in order to avoid deadlock with
1070	 * shared runners up.
1071	 * If interruptible sleeps left the exclusive queue
1072	 * empty avoid a starvation for the threads sleeping
1073	 * on the shared queue by giving them precedence
1074	 * and cleaning up the exclusive waiters bit anyway.
1075	 * Please note that lk_exslpfail count may be lying
1076	 * about the real number of waiters with the
1077	 * LK_SLEEPFAIL flag on because they may be used in
1078	 * conjunction with interruptible sleeps so
1079	 * lk_exslpfail might be considered an 'upper limit'
1080	 * bound, including the edge cases.
1081	 */
1082	MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1083	realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1084	if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1085		if (lk->lk_exslpfail < realexslp) {
1086			lk->lk_exslpfail = 0;
1087			queue = SQ_EXCLUSIVE_QUEUE;
1088			v |= (x & LK_SHARED_WAITERS);
1089		} else {
1090			lk->lk_exslpfail = 0;
1091			LOCK_LOG2(lk,
1092			    "%s: %p has only LK_SLEEPFAIL sleepers",
1093			    __func__, lk);
1094			LOCK_LOG2(lk,
1095			    "%s: %p waking up threads on the exclusive queue",
1096			    __func__, lk);
1097			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1098			    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1099			queue = SQ_SHARED_QUEUE;
1100		}
1101	} else {
1102
1103		/*
1104		 * Exclusive waiters sleeping with LK_SLEEPFAIL
1105		 * on and using interruptible sleeps/timeout
1106		 * may have left spourious lk_exslpfail counts
1107		 * on, so clean it up anyway.
1108		 */
1109		lk->lk_exslpfail = 0;
1110		queue = SQ_SHARED_QUEUE;
1111	}
1112
1113	LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1114	    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1115	    "exclusive");
1116	atomic_store_rel_ptr(&lk->lk_lock, v);
1117	wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1118	sleepq_release(&lk->lock_object);
1119
1120out:
1121	lockmgr_exit(flags, ilk, wakeup_swapper);
1122	return (0);
1123}
1124
1125int
1126lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
1127{
1128	struct lock_class *class;
1129	uintptr_t x, tid;
1130	const char *file;
1131	int line;
1132
1133	if (__predict_false(panicstr != NULL))
1134		return (0);
1135
1136	file = __FILE__;
1137	line = __LINE__;
1138
1139	_lockmgr_assert(lk, KA_LOCKED, file, line);
1140	x = lk->lk_lock;
1141	if (__predict_true(x & LK_SHARE) != 0) {
1142		if (lockmgr_sunlock_try(lk, &x)) {
1143			lockmgr_note_shared_release(lk, file, line);
1144		} else {
1145			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1146		}
1147	} else {
1148		tid = (uintptr_t)curthread;
1149		if (!lockmgr_recursed(lk) &&
1150		    atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1151			lockmgr_note_exclusive_release(lk, file, line);
1152		} else {
1153			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1154		}
1155	}
1156	if (__predict_false(flags & LK_INTERLOCK)) {
1157		class = LOCK_CLASS(ilk);
1158		class->lc_unlock(ilk);
1159	}
1160	return (0);
1161}
1162
1163int
1164__lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1165    const char *wmesg, int pri, int timo, const char *file, int line)
1166{
1167	GIANT_DECLARE;
1168	struct lockmgr_wait lwa;
1169	struct lock_class *class;
1170	const char *iwmesg;
1171	uintptr_t tid, v, x;
1172	u_int op, realexslp;
1173	int error, ipri, itimo, queue, wakeup_swapper;
1174#ifdef LOCK_PROFILING
1175	uint64_t waittime = 0;
1176	int contested = 0;
1177#endif
1178
1179	if (panicstr != NULL)
1180		return (0);
1181
1182	error = 0;
1183	tid = (uintptr_t)curthread;
1184	op = (flags & LK_TYPE_MASK);
1185	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1186	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1187	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1188
1189	lwa.iwmesg = iwmesg;
1190	lwa.ipri = ipri;
1191	lwa.itimo = itimo;
1192
1193	MPASS((flags & ~LK_TOTAL_MASK) == 0);
1194	KASSERT((op & (op - 1)) == 0,
1195	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1196	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1197	    (op != LK_DOWNGRADE && op != LK_RELEASE),
1198	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1199	    __func__, file, line));
1200	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1201	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1202	    __func__, file, line));
1203	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1204	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1205	    lk->lock_object.lo_name, file, line));
1206
1207	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1208
1209	if (lk->lock_object.lo_flags & LK_NOSHARE) {
1210		switch (op) {
1211		case LK_SHARED:
1212			op = LK_EXCLUSIVE;
1213			break;
1214		case LK_UPGRADE:
1215		case LK_TRYUPGRADE:
1216		case LK_DOWNGRADE:
1217			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1218			    file, line);
1219			if (flags & LK_INTERLOCK)
1220				class->lc_unlock(ilk);
1221			return (0);
1222		}
1223	}
1224
1225	wakeup_swapper = 0;
1226	switch (op) {
1227	case LK_SHARED:
1228		return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1229		break;
1230	case LK_UPGRADE:
1231	case LK_TRYUPGRADE:
1232		return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1233		break;
1234	case LK_EXCLUSIVE:
1235		return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1236		break;
1237	case LK_DOWNGRADE:
1238		_lockmgr_assert(lk, KA_XLOCKED, file, line);
1239		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1240
1241		/*
1242		 * Panic if the lock is recursed.
1243		 */
1244		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1245			if (flags & LK_INTERLOCK)
1246				class->lc_unlock(ilk);
1247			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1248			    __func__, iwmesg, file, line);
1249		}
1250		TD_SLOCKS_INC(curthread);
1251
1252		/*
1253		 * In order to preserve waiters flags, just spin.
1254		 */
1255		for (;;) {
1256			x = lk->lk_lock;
1257			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1258			x &= LK_ALL_WAITERS;
1259			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1260			    LK_SHARERS_LOCK(1) | x))
1261				break;
1262			cpu_spinwait();
1263		}
1264		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1265		LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1266		break;
1267	case LK_RELEASE:
1268		_lockmgr_assert(lk, KA_LOCKED, file, line);
1269		x = lk->lk_lock;
1270
1271		if (__predict_true(x & LK_SHARE) != 0) {
1272			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1273		} else {
1274			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1275		}
1276		break;
1277	case LK_DRAIN:
1278		if (LK_CAN_WITNESS(flags))
1279			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1280			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1281			    ilk : NULL);
1282
1283		/*
1284		 * Trying to drain a lock we already own will result in a
1285		 * deadlock.
1286		 */
1287		if (lockmgr_xlocked(lk)) {
1288			if (flags & LK_INTERLOCK)
1289				class->lc_unlock(ilk);
1290			panic("%s: draining %s with the lock held @ %s:%d\n",
1291			    __func__, iwmesg, file, line);
1292		}
1293
1294		for (;;) {
1295			if (lk->lk_lock == LK_UNLOCKED &&
1296			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1297				break;
1298
1299#ifdef HWPMC_HOOKS
1300			PMC_SOFT_CALL( , , lock, failed);
1301#endif
1302			lock_profile_obtain_lock_failed(&lk->lock_object,
1303			    &contested, &waittime);
1304
1305			/*
1306			 * If the lock is expected to not sleep just give up
1307			 * and return.
1308			 */
1309			if (LK_TRYOP(flags)) {
1310				LOCK_LOG2(lk, "%s: %p fails the try operation",
1311				    __func__, lk);
1312				error = EBUSY;
1313				break;
1314			}
1315
1316			/*
1317			 * Acquire the sleepqueue chain lock because we
1318			 * probabilly will need to manipulate waiters flags.
1319			 */
1320			sleepq_lock(&lk->lock_object);
1321			x = lk->lk_lock;
1322
1323			/*
1324			 * if the lock has been released while we spun on
1325			 * the sleepqueue chain lock just try again.
1326			 */
1327			if (x == LK_UNLOCKED) {
1328				sleepq_release(&lk->lock_object);
1329				continue;
1330			}
1331
1332			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1333			if ((x & ~v) == LK_UNLOCKED) {
1334				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1335
1336				/*
1337				 * If interruptible sleeps left the exclusive
1338				 * queue empty avoid a starvation for the
1339				 * threads sleeping on the shared queue by
1340				 * giving them precedence and cleaning up the
1341				 * exclusive waiters bit anyway.
1342				 * Please note that lk_exslpfail count may be
1343				 * lying about the real number of waiters with
1344				 * the LK_SLEEPFAIL flag on because they may
1345				 * be used in conjunction with interruptible
1346				 * sleeps so lk_exslpfail might be considered
1347				 * an 'upper limit' bound, including the edge
1348				 * cases.
1349				 */
1350				if (v & LK_EXCLUSIVE_WAITERS) {
1351					queue = SQ_EXCLUSIVE_QUEUE;
1352					v &= ~LK_EXCLUSIVE_WAITERS;
1353				} else {
1354
1355					/*
1356					 * Exclusive waiters sleeping with
1357					 * LK_SLEEPFAIL on and using
1358					 * interruptible sleeps/timeout may
1359					 * have left spourious lk_exslpfail
1360					 * counts on, so clean it up anyway.
1361					 */
1362					MPASS(v & LK_SHARED_WAITERS);
1363					lk->lk_exslpfail = 0;
1364					queue = SQ_SHARED_QUEUE;
1365					v &= ~LK_SHARED_WAITERS;
1366				}
1367				if (queue == SQ_EXCLUSIVE_QUEUE) {
1368					realexslp =
1369					    sleepq_sleepcnt(&lk->lock_object,
1370					    SQ_EXCLUSIVE_QUEUE);
1371					if (lk->lk_exslpfail >= realexslp) {
1372						lk->lk_exslpfail = 0;
1373						queue = SQ_SHARED_QUEUE;
1374						v &= ~LK_SHARED_WAITERS;
1375						if (realexslp != 0) {
1376							LOCK_LOG2(lk,
1377					"%s: %p has only LK_SLEEPFAIL sleepers",
1378							    __func__, lk);
1379							LOCK_LOG2(lk,
1380			"%s: %p waking up threads on the exclusive queue",
1381							    __func__, lk);
1382							wakeup_swapper =
1383							    sleepq_broadcast(
1384							    &lk->lock_object,
1385							    SLEEPQ_LK, 0,
1386							    SQ_EXCLUSIVE_QUEUE);
1387						}
1388					} else
1389						lk->lk_exslpfail = 0;
1390				}
1391				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1392					sleepq_release(&lk->lock_object);
1393					continue;
1394				}
1395				LOCK_LOG3(lk,
1396				"%s: %p waking up all threads on the %s queue",
1397				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1398				    "shared" : "exclusive");
1399				wakeup_swapper |= sleepq_broadcast(
1400				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1401
1402				/*
1403				 * If shared waiters have been woken up we need
1404				 * to wait for one of them to acquire the lock
1405				 * before to set the exclusive waiters in
1406				 * order to avoid a deadlock.
1407				 */
1408				if (queue == SQ_SHARED_QUEUE) {
1409					for (v = lk->lk_lock;
1410					    (v & LK_SHARE) && !LK_SHARERS(v);
1411					    v = lk->lk_lock)
1412						cpu_spinwait();
1413				}
1414			}
1415
1416			/*
1417			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1418			 * fail, loop back and retry.
1419			 */
1420			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1421				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1422				    x | LK_EXCLUSIVE_WAITERS)) {
1423					sleepq_release(&lk->lock_object);
1424					continue;
1425				}
1426				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1427				    __func__, lk);
1428			}
1429
1430			/*
1431			 * As far as we have been unable to acquire the
1432			 * exclusive lock and the exclusive waiters flag
1433			 * is set, we will sleep.
1434			 */
1435			if (flags & LK_INTERLOCK) {
1436				class->lc_unlock(ilk);
1437				flags &= ~LK_INTERLOCK;
1438			}
1439			GIANT_SAVE();
1440			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1441			    SQ_EXCLUSIVE_QUEUE);
1442			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1443			GIANT_RESTORE();
1444			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1445			    __func__, lk);
1446		}
1447
1448		if (error == 0) {
1449			lock_profile_obtain_lock_success(&lk->lock_object,
1450			    contested, waittime, file, line);
1451			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1452			    lk->lk_recurse, file, line);
1453			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1454			    LK_TRYWIT(flags), file, line);
1455			TD_LOCKS_INC(curthread);
1456			STACK_SAVE(lk);
1457		}
1458		break;
1459	default:
1460		if (flags & LK_INTERLOCK)
1461			class->lc_unlock(ilk);
1462		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1463	}
1464
1465	if (flags & LK_INTERLOCK)
1466		class->lc_unlock(ilk);
1467	if (wakeup_swapper)
1468		kick_proc0();
1469
1470	return (error);
1471}
1472
1473void
1474_lockmgr_disown(struct lock *lk, const char *file, int line)
1475{
1476	uintptr_t tid, x;
1477
1478	if (SCHEDULER_STOPPED())
1479		return;
1480
1481	tid = (uintptr_t)curthread;
1482	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1483
1484	/*
1485	 * Panic if the lock is recursed.
1486	 */
1487	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1488		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1489		    __func__,  file, line);
1490
1491	/*
1492	 * If the owner is already LK_KERNPROC just skip the whole operation.
1493	 */
1494	if (LK_HOLDER(lk->lk_lock) != tid)
1495		return;
1496	lock_profile_release_lock(&lk->lock_object);
1497	LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1498	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1499	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1500	TD_LOCKS_DEC(curthread);
1501	STACK_SAVE(lk);
1502
1503	/*
1504	 * In order to preserve waiters flags, just spin.
1505	 */
1506	for (;;) {
1507		x = lk->lk_lock;
1508		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1509		x &= LK_ALL_WAITERS;
1510		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1511		    LK_KERNPROC | x))
1512			return;
1513		cpu_spinwait();
1514	}
1515}
1516
1517void
1518lockmgr_printinfo(const struct lock *lk)
1519{
1520	struct thread *td;
1521	uintptr_t x;
1522
1523	if (lk->lk_lock == LK_UNLOCKED)
1524		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1525	else if (lk->lk_lock & LK_SHARE)
1526		printf("lock type %s: SHARED (count %ju)\n",
1527		    lk->lock_object.lo_name,
1528		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1529	else {
1530		td = lockmgr_xholder(lk);
1531		if (td == (struct thread *)LK_KERNPROC)
1532			printf("lock type %s: EXCL by KERNPROC\n",
1533			    lk->lock_object.lo_name);
1534		else
1535			printf("lock type %s: EXCL by thread %p "
1536			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1537			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1538			    td->td_tid);
1539	}
1540
1541	x = lk->lk_lock;
1542	if (x & LK_EXCLUSIVE_WAITERS)
1543		printf(" with exclusive waiters pending\n");
1544	if (x & LK_SHARED_WAITERS)
1545		printf(" with shared waiters pending\n");
1546	if (x & LK_EXCLUSIVE_SPINNERS)
1547		printf(" with exclusive spinners pending\n");
1548
1549	STACK_PRINT(lk);
1550}
1551
1552int
1553lockstatus(const struct lock *lk)
1554{
1555	uintptr_t v, x;
1556	int ret;
1557
1558	ret = LK_SHARED;
1559	x = lk->lk_lock;
1560	v = LK_HOLDER(x);
1561
1562	if ((x & LK_SHARE) == 0) {
1563		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1564			ret = LK_EXCLUSIVE;
1565		else
1566			ret = LK_EXCLOTHER;
1567	} else if (x == LK_UNLOCKED)
1568		ret = 0;
1569
1570	return (ret);
1571}
1572
1573#ifdef INVARIANT_SUPPORT
1574
1575FEATURE(invariant_support,
1576    "Support for modules compiled with INVARIANTS option");
1577
1578#ifndef INVARIANTS
1579#undef	_lockmgr_assert
1580#endif
1581
1582void
1583_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1584{
1585	int slocked = 0;
1586
1587	if (panicstr != NULL)
1588		return;
1589	switch (what) {
1590	case KA_SLOCKED:
1591	case KA_SLOCKED | KA_NOTRECURSED:
1592	case KA_SLOCKED | KA_RECURSED:
1593		slocked = 1;
1594	case KA_LOCKED:
1595	case KA_LOCKED | KA_NOTRECURSED:
1596	case KA_LOCKED | KA_RECURSED:
1597#ifdef WITNESS
1598
1599		/*
1600		 * We cannot trust WITNESS if the lock is held in exclusive
1601		 * mode and a call to lockmgr_disown() happened.
1602		 * Workaround this skipping the check if the lock is held in
1603		 * exclusive mode even for the KA_LOCKED case.
1604		 */
1605		if (slocked || (lk->lk_lock & LK_SHARE)) {
1606			witness_assert(&lk->lock_object, what, file, line);
1607			break;
1608		}
1609#endif
1610		if (lk->lk_lock == LK_UNLOCKED ||
1611		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1612		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1613			panic("Lock %s not %slocked @ %s:%d\n",
1614			    lk->lock_object.lo_name, slocked ? "share" : "",
1615			    file, line);
1616
1617		if ((lk->lk_lock & LK_SHARE) == 0) {
1618			if (lockmgr_recursed(lk)) {
1619				if (what & KA_NOTRECURSED)
1620					panic("Lock %s recursed @ %s:%d\n",
1621					    lk->lock_object.lo_name, file,
1622					    line);
1623			} else if (what & KA_RECURSED)
1624				panic("Lock %s not recursed @ %s:%d\n",
1625				    lk->lock_object.lo_name, file, line);
1626		}
1627		break;
1628	case KA_XLOCKED:
1629	case KA_XLOCKED | KA_NOTRECURSED:
1630	case KA_XLOCKED | KA_RECURSED:
1631		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1632			panic("Lock %s not exclusively locked @ %s:%d\n",
1633			    lk->lock_object.lo_name, file, line);
1634		if (lockmgr_recursed(lk)) {
1635			if (what & KA_NOTRECURSED)
1636				panic("Lock %s recursed @ %s:%d\n",
1637				    lk->lock_object.lo_name, file, line);
1638		} else if (what & KA_RECURSED)
1639			panic("Lock %s not recursed @ %s:%d\n",
1640			    lk->lock_object.lo_name, file, line);
1641		break;
1642	case KA_UNLOCKED:
1643		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1644			panic("Lock %s exclusively locked @ %s:%d\n",
1645			    lk->lock_object.lo_name, file, line);
1646		break;
1647	default:
1648		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1649		    line);
1650	}
1651}
1652#endif
1653
1654#ifdef DDB
1655int
1656lockmgr_chain(struct thread *td, struct thread **ownerp)
1657{
1658	struct lock *lk;
1659
1660	lk = td->td_wchan;
1661
1662	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1663		return (0);
1664	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1665	if (lk->lk_lock & LK_SHARE)
1666		db_printf("SHARED (count %ju)\n",
1667		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1668	else
1669		db_printf("EXCL\n");
1670	*ownerp = lockmgr_xholder(lk);
1671
1672	return (1);
1673}
1674
1675static void
1676db_show_lockmgr(const struct lock_object *lock)
1677{
1678	struct thread *td;
1679	const struct lock *lk;
1680
1681	lk = (const struct lock *)lock;
1682
1683	db_printf(" state: ");
1684	if (lk->lk_lock == LK_UNLOCKED)
1685		db_printf("UNLOCKED\n");
1686	else if (lk->lk_lock & LK_SHARE)
1687		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1688	else {
1689		td = lockmgr_xholder(lk);
1690		if (td == (struct thread *)LK_KERNPROC)
1691			db_printf("XLOCK: LK_KERNPROC\n");
1692		else
1693			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1694			    td->td_tid, td->td_proc->p_pid,
1695			    td->td_proc->p_comm);
1696		if (lockmgr_recursed(lk))
1697			db_printf(" recursed: %d\n", lk->lk_recurse);
1698	}
1699	db_printf(" waiters: ");
1700	switch (lk->lk_lock & LK_ALL_WAITERS) {
1701	case LK_SHARED_WAITERS:
1702		db_printf("shared\n");
1703		break;
1704	case LK_EXCLUSIVE_WAITERS:
1705		db_printf("exclusive\n");
1706		break;
1707	case LK_ALL_WAITERS:
1708		db_printf("shared and exclusive\n");
1709		break;
1710	default:
1711		db_printf("none\n");
1712	}
1713	db_printf(" spinners: ");
1714	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1715		db_printf("exclusive\n");
1716	else
1717		db_printf("none\n");
1718}
1719#endif
1720