1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice(s), this list of conditions and the following disclaimer as
12 *    the first lines of this file unmodified other than the possible
13 *    addition of one or more copyright notices.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice(s), this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 * DAMAGE.
29 */
30
31#include "opt_ddb.h"
32#include "opt_hwpmc_hooks.h"
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include <sys/param.h>
38#include <sys/kdb.h>
39#include <sys/ktr.h>
40#include <sys/lock.h>
41#include <sys/lock_profile.h>
42#include <sys/lockmgr.h>
43#include <sys/lockstat.h>
44#include <sys/mutex.h>
45#include <sys/proc.h>
46#include <sys/sleepqueue.h>
47#ifdef DEBUG_LOCKS
48#include <sys/stack.h>
49#endif
50#include <sys/sysctl.h>
51#include <sys/systm.h>
52
53#include <machine/cpu.h>
54
55#ifdef DDB
56#include <ddb/ddb.h>
57#endif
58
59#ifdef HWPMC_HOOKS
60#include <sys/pmckern.h>
61PMC_SOFT_DECLARE( , , lock, failed);
62#endif
63
64CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65    ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66
67#define	SQ_EXCLUSIVE_QUEUE	0
68#define	SQ_SHARED_QUEUE		1
69
70#ifndef INVARIANTS
71#define	_lockmgr_assert(lk, what, file, line)
72#endif
73
74#define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
75#define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
76
77#ifndef DEBUG_LOCKS
78#define	STACK_PRINT(lk)
79#define	STACK_SAVE(lk)
80#define	STACK_ZERO(lk)
81#else
82#define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
83#define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
84#define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
85#endif
86
87#define	LOCK_LOG2(lk, string, arg1, arg2)				\
88	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
89		CTR2(KTR_LOCK, (string), (arg1), (arg2))
90#define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
91	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
92		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
93
94#define	GIANT_DECLARE							\
95	int _i = 0;							\
96	WITNESS_SAVE_DECL(Giant)
97#define	GIANT_RESTORE() do {						\
98	if (__predict_false(_i > 0)) {					\
99		while (_i--)						\
100			mtx_lock(&Giant);				\
101		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
102	}								\
103} while (0)
104#define	GIANT_SAVE() do {						\
105	if (__predict_false(mtx_owned(&Giant))) {			\
106		WITNESS_SAVE(&Giant.lock_object, Giant);		\
107		while (mtx_owned(&Giant)) {				\
108			_i++;						\
109			mtx_unlock(&Giant);				\
110		}							\
111	}								\
112} while (0)
113
114static bool __always_inline
115LK_CAN_SHARE(uintptr_t x, int flags, bool fp)
116{
117
118	if ((x & (LK_SHARE | LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) ==
119	    LK_SHARE)
120		return (true);
121	if (fp || (!(x & LK_SHARE)))
122		return (false);
123	if ((curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||
124	    (curthread->td_pflags & TDP_DEADLKTREAT))
125		return (true);
126	return (false);
127}
128
129#define	LK_TRYOP(x)							\
130	((x) & LK_NOWAIT)
131
132#define	LK_CAN_WITNESS(x)						\
133	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
134#define	LK_TRYWIT(x)							\
135	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
136
137#define	lockmgr_disowned(lk)						\
138	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
139
140#define	lockmgr_xlocked_v(v)						\
141	(((v) & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
142
143#define	lockmgr_xlocked(lk) lockmgr_xlocked_v(lockmgr_read_value(lk))
144
145static void	assert_lockmgr(const struct lock_object *lock, int how);
146#ifdef DDB
147static void	db_show_lockmgr(const struct lock_object *lock);
148#endif
149static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
150#ifdef KDTRACE_HOOKS
151static int	owner_lockmgr(const struct lock_object *lock,
152		    struct thread **owner);
153#endif
154static uintptr_t unlock_lockmgr(struct lock_object *lock);
155
156struct lock_class lock_class_lockmgr = {
157	.lc_name = "lockmgr",
158	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
159	.lc_assert = assert_lockmgr,
160#ifdef DDB
161	.lc_ddb_show = db_show_lockmgr,
162#endif
163	.lc_lock = lock_lockmgr,
164	.lc_unlock = unlock_lockmgr,
165#ifdef KDTRACE_HOOKS
166	.lc_owner = owner_lockmgr,
167#endif
168};
169
170static __read_mostly bool lk_adaptive = true;
171static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL, "lockmgr debugging");
172SYSCTL_BOOL(_debug_lockmgr, OID_AUTO, adaptive_spinning, CTLFLAG_RW, &lk_adaptive,
173    0, "");
174#define lockmgr_delay  locks_delay
175
176struct lockmgr_wait {
177	const char *iwmesg;
178	int ipri;
179	int itimo;
180};
181
182static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
183    int flags, bool fp);
184static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp);
185
186static void
187lockmgr_exit(u_int flags, struct lock_object *ilk, int wakeup_swapper)
188{
189	struct lock_class *class;
190
191	if (flags & LK_INTERLOCK) {
192		class = LOCK_CLASS(ilk);
193		class->lc_unlock(ilk);
194	}
195
196	if (__predict_false(wakeup_swapper))
197		kick_proc0();
198}
199
200static void
201lockmgr_note_shared_acquire(struct lock *lk, int contested,
202    uint64_t waittime, const char *file, int line, int flags)
203{
204
205	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
206	    waittime, file, line, LOCKSTAT_READER);
207	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
208	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
209	TD_LOCKS_INC(curthread);
210	TD_SLOCKS_INC(curthread);
211	STACK_SAVE(lk);
212}
213
214static void
215lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
216{
217
218	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
219	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
220	TD_LOCKS_DEC(curthread);
221	TD_SLOCKS_DEC(curthread);
222}
223
224static void
225lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
226    uint64_t waittime, const char *file, int line, int flags)
227{
228
229	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(lockmgr__acquire, lk, contested,
230	    waittime, file, line, LOCKSTAT_WRITER);
231	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
232	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
233	    line);
234	TD_LOCKS_INC(curthread);
235	STACK_SAVE(lk);
236}
237
238static void
239lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
240{
241
242	if (LK_HOLDER(lockmgr_read_value(lk)) != LK_KERNPROC) {
243		WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
244		TD_LOCKS_DEC(curthread);
245	}
246	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
247	    line);
248}
249
250static __inline struct thread *
251lockmgr_xholder(const struct lock *lk)
252{
253	uintptr_t x;
254
255	x = lockmgr_read_value(lk);
256	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
257}
258
259/*
260 * It assumes sleepq_lock held and returns with this one unheld.
261 * It also assumes the generic interlock is sane and previously checked.
262 * If LK_INTERLOCK is specified the interlock is not reacquired after the
263 * sleep.
264 */
265static __inline int
266sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
267    const char *wmesg, int pri, int timo, int queue)
268{
269	GIANT_DECLARE;
270	struct lock_class *class;
271	int catch, error;
272
273	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
274	catch = pri & PCATCH;
275	pri &= PRIMASK;
276	error = 0;
277
278	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
279	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
280
281	if (flags & LK_INTERLOCK)
282		class->lc_unlock(ilk);
283	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
284		lk->lk_exslpfail++;
285	GIANT_SAVE();
286	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
287	    SLEEPQ_INTERRUPTIBLE : 0), queue);
288	if ((flags & LK_TIMELOCK) && timo)
289		sleepq_set_timeout(&lk->lock_object, timo);
290
291	/*
292	 * Decisional switch for real sleeping.
293	 */
294	if ((flags & LK_TIMELOCK) && timo && catch)
295		error = sleepq_timedwait_sig(&lk->lock_object, pri);
296	else if ((flags & LK_TIMELOCK) && timo)
297		error = sleepq_timedwait(&lk->lock_object, pri);
298	else if (catch)
299		error = sleepq_wait_sig(&lk->lock_object, pri);
300	else
301		sleepq_wait(&lk->lock_object, pri);
302	GIANT_RESTORE();
303	if ((flags & LK_SLEEPFAIL) && error == 0)
304		error = ENOLCK;
305
306	return (error);
307}
308
309static __inline int
310wakeupshlk(struct lock *lk, const char *file, int line)
311{
312	uintptr_t v, x, orig_x;
313	u_int realexslp;
314	int queue, wakeup_swapper;
315
316	wakeup_swapper = 0;
317	for (;;) {
318		x = lockmgr_read_value(lk);
319		if (lockmgr_sunlock_try(lk, &x))
320			break;
321
322		/*
323		 * We should have a sharer with waiters, so enter the hard
324		 * path in order to handle wakeups correctly.
325		 */
326		sleepq_lock(&lk->lock_object);
327		orig_x = lockmgr_read_value(lk);
328retry_sleepq:
329		x = orig_x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
330		v = LK_UNLOCKED;
331
332		/*
333		 * If the lock has exclusive waiters, give them preference in
334		 * order to avoid deadlock with shared runners up.
335		 * If interruptible sleeps left the exclusive queue empty
336		 * avoid a starvation for the threads sleeping on the shared
337		 * queue by giving them precedence and cleaning up the
338		 * exclusive waiters bit anyway.
339		 * Please note that lk_exslpfail count may be lying about
340		 * the real number of waiters with the LK_SLEEPFAIL flag on
341		 * because they may be used in conjunction with interruptible
342		 * sleeps so lk_exslpfail might be considered an 'upper limit'
343		 * bound, including the edge cases.
344		 */
345		realexslp = sleepq_sleepcnt(&lk->lock_object,
346		    SQ_EXCLUSIVE_QUEUE);
347		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
348			if (lk->lk_exslpfail < realexslp) {
349				lk->lk_exslpfail = 0;
350				queue = SQ_EXCLUSIVE_QUEUE;
351				v |= (x & LK_SHARED_WAITERS);
352			} else {
353				lk->lk_exslpfail = 0;
354				LOCK_LOG2(lk,
355				    "%s: %p has only LK_SLEEPFAIL sleepers",
356				    __func__, lk);
357				LOCK_LOG2(lk,
358			    "%s: %p waking up threads on the exclusive queue",
359				    __func__, lk);
360				wakeup_swapper =
361				    sleepq_broadcast(&lk->lock_object,
362				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
363				queue = SQ_SHARED_QUEUE;
364			}
365
366		} else {
367			/*
368			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
369			 * and using interruptible sleeps/timeout may have
370			 * left spourious lk_exslpfail counts on, so clean
371			 * it up anyway.
372			 */
373			lk->lk_exslpfail = 0;
374			queue = SQ_SHARED_QUEUE;
375		}
376
377		if (lockmgr_sunlock_try(lk, &orig_x)) {
378			sleepq_release(&lk->lock_object);
379			break;
380		}
381
382		x |= LK_SHARERS_LOCK(1);
383		if (!atomic_fcmpset_rel_ptr(&lk->lk_lock, &x, v)) {
384			orig_x = x;
385			goto retry_sleepq;
386		}
387		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
388		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
389		    "exclusive");
390		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
391		    0, queue);
392		sleepq_release(&lk->lock_object);
393		break;
394	}
395
396	LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
397	return (wakeup_swapper);
398}
399
400static void
401assert_lockmgr(const struct lock_object *lock, int what)
402{
403
404	panic("lockmgr locks do not support assertions");
405}
406
407static void
408lock_lockmgr(struct lock_object *lock, uintptr_t how)
409{
410
411	panic("lockmgr locks do not support sleep interlocking");
412}
413
414static uintptr_t
415unlock_lockmgr(struct lock_object *lock)
416{
417
418	panic("lockmgr locks do not support sleep interlocking");
419}
420
421#ifdef KDTRACE_HOOKS
422static int
423owner_lockmgr(const struct lock_object *lock, struct thread **owner)
424{
425
426	panic("lockmgr locks do not support owner inquiring");
427}
428#endif
429
430void
431lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
432{
433	int iflags;
434
435	MPASS((flags & ~LK_INIT_MASK) == 0);
436	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
437            ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
438            &lk->lk_lock));
439
440	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
441	if (flags & LK_CANRECURSE)
442		iflags |= LO_RECURSABLE;
443	if ((flags & LK_NODUP) == 0)
444		iflags |= LO_DUPOK;
445	if (flags & LK_NOPROFILE)
446		iflags |= LO_NOPROFILE;
447	if ((flags & LK_NOWITNESS) == 0)
448		iflags |= LO_WITNESS;
449	if (flags & LK_QUIET)
450		iflags |= LO_QUIET;
451	if (flags & LK_IS_VNODE)
452		iflags |= LO_IS_VNODE;
453	if (flags & LK_NEW)
454		iflags |= LO_NEW;
455	iflags |= flags & LK_NOSHARE;
456
457	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
458	lk->lk_lock = LK_UNLOCKED;
459	lk->lk_recurse = 0;
460	lk->lk_exslpfail = 0;
461	lk->lk_timo = timo;
462	lk->lk_pri = pri;
463	STACK_ZERO(lk);
464}
465
466/*
467 * XXX: Gross hacks to manipulate external lock flags after
468 * initialization.  Used for certain vnode and buf locks.
469 */
470void
471lockallowshare(struct lock *lk)
472{
473
474	lockmgr_assert(lk, KA_XLOCKED);
475	lk->lock_object.lo_flags &= ~LK_NOSHARE;
476}
477
478void
479lockdisableshare(struct lock *lk)
480{
481
482	lockmgr_assert(lk, KA_XLOCKED);
483	lk->lock_object.lo_flags |= LK_NOSHARE;
484}
485
486void
487lockallowrecurse(struct lock *lk)
488{
489
490	lockmgr_assert(lk, KA_XLOCKED);
491	lk->lock_object.lo_flags |= LO_RECURSABLE;
492}
493
494void
495lockdisablerecurse(struct lock *lk)
496{
497
498	lockmgr_assert(lk, KA_XLOCKED);
499	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
500}
501
502void
503lockdestroy(struct lock *lk)
504{
505
506	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
507	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
508	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
509	lock_destroy(&lk->lock_object);
510}
511
512static bool __always_inline
513lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags, bool fp)
514{
515
516	/*
517	 * If no other thread has an exclusive lock, or
518	 * no exclusive waiter is present, bump the count of
519	 * sharers.  Since we have to preserve the state of
520	 * waiters, if we fail to acquire the shared lock
521	 * loop back and retry.
522	 */
523	while (LK_CAN_SHARE(*xp, flags, fp)) {
524		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
525		    *xp + LK_ONE_SHARER)) {
526			return (true);
527		}
528	}
529	return (false);
530}
531
532static bool __always_inline
533lockmgr_sunlock_try(struct lock *lk, uintptr_t *xp)
534{
535
536	for (;;) {
537		if (LK_SHARERS(*xp) > 1 || !(*xp & LK_ALL_WAITERS)) {
538			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, xp,
539			    *xp - LK_ONE_SHARER))
540				return (true);
541			continue;
542		}
543		break;
544	}
545	return (false);
546}
547
548static bool
549lockmgr_slock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp,
550    int flags)
551{
552	struct thread *owner;
553	uintptr_t x;
554
555	x = *xp;
556	MPASS(x != LK_UNLOCKED);
557	owner = (struct thread *)LK_HOLDER(x);
558	for (;;) {
559		MPASS(owner != curthread);
560		if (owner == (struct thread *)LK_KERNPROC)
561			return (false);
562		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
563			return (false);
564		if (owner == NULL)
565			return (false);
566		if (!TD_IS_RUNNING(owner))
567			return (false);
568		if ((x & LK_ALL_WAITERS) != 0)
569			return (false);
570		lock_delay(lda);
571		x = lockmgr_read_value(lk);
572		if (LK_CAN_SHARE(x, flags, false)) {
573			*xp = x;
574			return (true);
575		}
576		owner = (struct thread *)LK_HOLDER(x);
577	}
578}
579
580static __noinline int
581lockmgr_slock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
582    const char *file, int line, struct lockmgr_wait *lwa)
583{
584	uintptr_t tid, x;
585	int error = 0;
586	const char *iwmesg;
587	int ipri, itimo;
588
589#ifdef KDTRACE_HOOKS
590	uint64_t sleep_time = 0;
591#endif
592#ifdef LOCK_PROFILING
593	uint64_t waittime = 0;
594	int contested = 0;
595#endif
596	struct lock_delay_arg lda;
597
598	if (KERNEL_PANICKED())
599		goto out;
600
601	tid = (uintptr_t)curthread;
602
603	if (LK_CAN_WITNESS(flags))
604		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
605		    file, line, flags & LK_INTERLOCK ? ilk : NULL);
606	x = lockmgr_read_value(lk);
607	lock_delay_arg_init(&lda, &lockmgr_delay);
608	if (!lk_adaptive)
609		flags &= ~LK_ADAPTIVE;
610	/*
611	 * The lock may already be locked exclusive by curthread,
612	 * avoid deadlock.
613	 */
614	if (LK_HOLDER(x) == tid) {
615		LOCK_LOG2(lk,
616		    "%s: %p already held in exclusive mode",
617		    __func__, lk);
618		error = EDEADLK;
619		goto out;
620	}
621
622	for (;;) {
623		if (lockmgr_slock_try(lk, &x, flags, false))
624			break;
625
626		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
627			if (lockmgr_slock_adaptive(&lda, lk, &x, flags))
628				continue;
629		}
630
631#ifdef HWPMC_HOOKS
632		PMC_SOFT_CALL( , , lock, failed);
633#endif
634		lock_profile_obtain_lock_failed(&lk->lock_object, false,
635		    &contested, &waittime);
636
637		/*
638		 * If the lock is expected to not sleep just give up
639		 * and return.
640		 */
641		if (LK_TRYOP(flags)) {
642			LOCK_LOG2(lk, "%s: %p fails the try operation",
643			    __func__, lk);
644			error = EBUSY;
645			break;
646		}
647
648		/*
649		 * Acquire the sleepqueue chain lock because we
650		 * probabilly will need to manipulate waiters flags.
651		 */
652		sleepq_lock(&lk->lock_object);
653		x = lockmgr_read_value(lk);
654retry_sleepq:
655
656		/*
657		 * if the lock can be acquired in shared mode, try
658		 * again.
659		 */
660		if (LK_CAN_SHARE(x, flags, false)) {
661			sleepq_release(&lk->lock_object);
662			continue;
663		}
664
665		/*
666		 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
667		 * loop back and retry.
668		 */
669		if ((x & LK_SHARED_WAITERS) == 0) {
670			if (!atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
671			    x | LK_SHARED_WAITERS)) {
672				goto retry_sleepq;
673			}
674			LOCK_LOG2(lk, "%s: %p set shared waiters flag",
675			    __func__, lk);
676		}
677
678		if (lwa == NULL) {
679			iwmesg = lk->lock_object.lo_name;
680			ipri = lk->lk_pri;
681			itimo = lk->lk_timo;
682		} else {
683			iwmesg = lwa->iwmesg;
684			ipri = lwa->ipri;
685			itimo = lwa->itimo;
686		}
687
688		/*
689		 * As far as we have been unable to acquire the
690		 * shared lock and the shared waiters flag is set,
691		 * we will sleep.
692		 */
693#ifdef KDTRACE_HOOKS
694		sleep_time -= lockstat_nsecs(&lk->lock_object);
695#endif
696		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
697		    SQ_SHARED_QUEUE);
698#ifdef KDTRACE_HOOKS
699		sleep_time += lockstat_nsecs(&lk->lock_object);
700#endif
701		flags &= ~LK_INTERLOCK;
702		if (error) {
703			LOCK_LOG3(lk,
704			    "%s: interrupted sleep for %p with %d",
705			    __func__, lk, error);
706			break;
707		}
708		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
709		    __func__, lk);
710		x = lockmgr_read_value(lk);
711	}
712	if (error == 0) {
713#ifdef KDTRACE_HOOKS
714		if (sleep_time != 0)
715			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
716			    LOCKSTAT_READER, (x & LK_SHARE) == 0,
717			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
718#endif
719#ifdef LOCK_PROFILING
720		lockmgr_note_shared_acquire(lk, contested, waittime,
721		    file, line, flags);
722#else
723		lockmgr_note_shared_acquire(lk, 0, 0, file, line,
724		    flags);
725#endif
726	}
727
728out:
729	lockmgr_exit(flags, ilk, 0);
730	return (error);
731}
732
733static bool
734lockmgr_xlock_adaptive(struct lock_delay_arg *lda, struct lock *lk, uintptr_t *xp)
735{
736	struct thread *owner;
737	uintptr_t x;
738
739	x = *xp;
740	MPASS(x != LK_UNLOCKED);
741	owner = (struct thread *)LK_HOLDER(x);
742	for (;;) {
743		MPASS(owner != curthread);
744		if (owner == NULL)
745			return (false);
746		if ((x & LK_SHARE) && LK_SHARERS(x) > 0)
747			return (false);
748		if (owner == (struct thread *)LK_KERNPROC)
749			return (false);
750		if (!TD_IS_RUNNING(owner))
751			return (false);
752		if ((x & LK_ALL_WAITERS) != 0)
753			return (false);
754		lock_delay(lda);
755		x = lockmgr_read_value(lk);
756		if (x == LK_UNLOCKED) {
757			*xp = x;
758			return (true);
759		}
760		owner = (struct thread *)LK_HOLDER(x);
761	}
762}
763
764static __noinline int
765lockmgr_xlock_hard(struct lock *lk, u_int flags, struct lock_object *ilk,
766    const char *file, int line, struct lockmgr_wait *lwa)
767{
768	struct lock_class *class;
769	uintptr_t tid, x, v;
770	int error = 0;
771	const char *iwmesg;
772	int ipri, itimo;
773
774#ifdef KDTRACE_HOOKS
775	uint64_t sleep_time = 0;
776#endif
777#ifdef LOCK_PROFILING
778	uint64_t waittime = 0;
779	int contested = 0;
780#endif
781	struct lock_delay_arg lda;
782
783	if (KERNEL_PANICKED())
784		goto out;
785
786	tid = (uintptr_t)curthread;
787
788	if (LK_CAN_WITNESS(flags))
789		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
790		    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
791		    ilk : NULL);
792
793	/*
794	 * If curthread already holds the lock and this one is
795	 * allowed to recurse, simply recurse on it.
796	 */
797	if (lockmgr_xlocked(lk)) {
798		if ((flags & LK_CANRECURSE) == 0 &&
799		    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
800			/*
801			 * If the lock is expected to not panic just
802			 * give up and return.
803			 */
804			if (LK_TRYOP(flags)) {
805				LOCK_LOG2(lk,
806				    "%s: %p fails the try operation",
807				    __func__, lk);
808				error = EBUSY;
809				goto out;
810			}
811			if (flags & LK_INTERLOCK) {
812				class = LOCK_CLASS(ilk);
813				class->lc_unlock(ilk);
814			}
815			STACK_PRINT(lk);
816			panic("%s: recursing on non recursive lockmgr %p "
817			    "@ %s:%d\n", __func__, lk, file, line);
818		}
819		atomic_set_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
820		lk->lk_recurse++;
821		LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
822		LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
823		    lk->lk_recurse, file, line);
824		WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
825		    LK_TRYWIT(flags), file, line);
826		TD_LOCKS_INC(curthread);
827		goto out;
828	}
829
830	x = LK_UNLOCKED;
831	lock_delay_arg_init(&lda, &lockmgr_delay);
832	if (!lk_adaptive)
833		flags &= ~LK_ADAPTIVE;
834	for (;;) {
835		if (x == LK_UNLOCKED) {
836			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x, tid))
837				break;
838			continue;
839		}
840		if ((flags & (LK_ADAPTIVE | LK_INTERLOCK)) == LK_ADAPTIVE) {
841			if (lockmgr_xlock_adaptive(&lda, lk, &x))
842				continue;
843		}
844#ifdef HWPMC_HOOKS
845		PMC_SOFT_CALL( , , lock, failed);
846#endif
847		lock_profile_obtain_lock_failed(&lk->lock_object, false,
848		    &contested, &waittime);
849
850		/*
851		 * If the lock is expected to not sleep just give up
852		 * and return.
853		 */
854		if (LK_TRYOP(flags)) {
855			LOCK_LOG2(lk, "%s: %p fails the try operation",
856			    __func__, lk);
857			error = EBUSY;
858			break;
859		}
860
861		/*
862		 * Acquire the sleepqueue chain lock because we
863		 * probabilly will need to manipulate waiters flags.
864		 */
865		sleepq_lock(&lk->lock_object);
866		x = lockmgr_read_value(lk);
867retry_sleepq:
868
869		/*
870		 * if the lock has been released while we spun on
871		 * the sleepqueue chain lock just try again.
872		 */
873		if (x == LK_UNLOCKED) {
874			sleepq_release(&lk->lock_object);
875			continue;
876		}
877
878		/*
879		 * The lock can be in the state where there is a
880		 * pending queue of waiters, but still no owner.
881		 * This happens when the lock is contested and an
882		 * owner is going to claim the lock.
883		 * If curthread is the one successfully acquiring it
884		 * claim lock ownership and return, preserving waiters
885		 * flags.
886		 */
887		v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
888		if ((x & ~v) == LK_UNLOCKED) {
889			v &= ~LK_EXCLUSIVE_SPINNERS;
890			if (atomic_fcmpset_acq_ptr(&lk->lk_lock, &x,
891			    tid | v)) {
892				sleepq_release(&lk->lock_object);
893				LOCK_LOG2(lk,
894				    "%s: %p claimed by a new writer",
895				    __func__, lk);
896				break;
897			}
898			goto retry_sleepq;
899		}
900
901		/*
902		 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
903		 * fail, loop back and retry.
904		 */
905		if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
906			if (!atomic_fcmpset_ptr(&lk->lk_lock, &x,
907			    x | LK_EXCLUSIVE_WAITERS)) {
908				goto retry_sleepq;
909			}
910			LOCK_LOG2(lk, "%s: %p set excl waiters flag",
911			    __func__, lk);
912		}
913
914		if (lwa == NULL) {
915			iwmesg = lk->lock_object.lo_name;
916			ipri = lk->lk_pri;
917			itimo = lk->lk_timo;
918		} else {
919			iwmesg = lwa->iwmesg;
920			ipri = lwa->ipri;
921			itimo = lwa->itimo;
922		}
923
924		/*
925		 * As far as we have been unable to acquire the
926		 * exclusive lock and the exclusive waiters flag
927		 * is set, we will sleep.
928		 */
929#ifdef KDTRACE_HOOKS
930		sleep_time -= lockstat_nsecs(&lk->lock_object);
931#endif
932		error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
933		    SQ_EXCLUSIVE_QUEUE);
934#ifdef KDTRACE_HOOKS
935		sleep_time += lockstat_nsecs(&lk->lock_object);
936#endif
937		flags &= ~LK_INTERLOCK;
938		if (error) {
939			LOCK_LOG3(lk,
940			    "%s: interrupted sleep for %p with %d",
941			    __func__, lk, error);
942			break;
943		}
944		LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
945		    __func__, lk);
946		x = lockmgr_read_value(lk);
947	}
948	if (error == 0) {
949#ifdef KDTRACE_HOOKS
950		if (sleep_time != 0)
951			LOCKSTAT_RECORD4(lockmgr__block, lk, sleep_time,
952			    LOCKSTAT_WRITER, (x & LK_SHARE) == 0,
953			    (x & LK_SHARE) == 0 ? 0 : LK_SHARERS(x));
954#endif
955#ifdef LOCK_PROFILING
956		lockmgr_note_exclusive_acquire(lk, contested, waittime,
957		    file, line, flags);
958#else
959		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
960		    flags);
961#endif
962	}
963
964out:
965	lockmgr_exit(flags, ilk, 0);
966	return (error);
967}
968
969static __noinline int
970lockmgr_upgrade(struct lock *lk, u_int flags, struct lock_object *ilk,
971    const char *file, int line, struct lockmgr_wait *lwa)
972{
973	uintptr_t tid, v, setv;
974	int error = 0;
975	int op;
976
977	if (KERNEL_PANICKED())
978		goto out;
979
980	tid = (uintptr_t)curthread;
981
982	_lockmgr_assert(lk, KA_SLOCKED, file, line);
983
984	op = flags & LK_TYPE_MASK;
985	v = lockmgr_read_value(lk);
986	for (;;) {
987		if (LK_SHARERS(v) > 1) {
988			if (op == LK_TRYUPGRADE) {
989				LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
990				    __func__, lk);
991				error = EBUSY;
992				goto out;
993			}
994			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &v,
995			    v - LK_ONE_SHARER)) {
996				lockmgr_note_shared_release(lk, file, line);
997				goto out_xlock;
998			}
999			continue;
1000		}
1001		MPASS((v & ~LK_ALL_WAITERS) == LK_SHARERS_LOCK(1));
1002
1003		setv = tid;
1004		setv |= (v & LK_ALL_WAITERS);
1005
1006		/*
1007		 * Try to switch from one shared lock to an exclusive one.
1008		 * We need to preserve waiters flags during the operation.
1009		 */
1010		if (atomic_fcmpset_ptr(&lk->lk_lock, &v, setv)) {
1011			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
1012			    line);
1013			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
1014			    LK_TRYWIT(flags), file, line);
1015			LOCKSTAT_RECORD0(lockmgr__upgrade, lk);
1016			TD_SLOCKS_DEC(curthread);
1017			goto out;
1018		}
1019	}
1020
1021out_xlock:
1022	error = lockmgr_xlock_hard(lk, flags, ilk, file, line, lwa);
1023	flags &= ~LK_INTERLOCK;
1024out:
1025	lockmgr_exit(flags, ilk, 0);
1026	return (error);
1027}
1028
1029int
1030lockmgr_lock_flags(struct lock *lk, u_int flags, struct lock_object *ilk,
1031    const char *file, int line)
1032{
1033	struct lock_class *class;
1034	uintptr_t x, tid;
1035	u_int op;
1036	bool locked;
1037
1038	if (KERNEL_PANICKED())
1039		return (0);
1040
1041	op = flags & LK_TYPE_MASK;
1042	locked = false;
1043	switch (op) {
1044	case LK_SHARED:
1045		if (LK_CAN_WITNESS(flags))
1046			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
1047			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
1048		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
1049			break;
1050		x = lockmgr_read_value(lk);
1051		if (lockmgr_slock_try(lk, &x, flags, true)) {
1052			lockmgr_note_shared_acquire(lk, 0, 0,
1053			    file, line, flags);
1054			locked = true;
1055		} else {
1056			return (lockmgr_slock_hard(lk, flags, ilk, file, line,
1057			    NULL));
1058		}
1059		break;
1060	case LK_EXCLUSIVE:
1061		if (LK_CAN_WITNESS(flags))
1062			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1063			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1064			    ilk : NULL);
1065		tid = (uintptr_t)curthread;
1066		if (lockmgr_read_value(lk) == LK_UNLOCKED &&
1067		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1068			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1069			    flags);
1070			locked = true;
1071		} else {
1072			return (lockmgr_xlock_hard(lk, flags, ilk, file, line,
1073			    NULL));
1074		}
1075		break;
1076	case LK_UPGRADE:
1077	case LK_TRYUPGRADE:
1078		return (lockmgr_upgrade(lk, flags, ilk, file, line, NULL));
1079	default:
1080		break;
1081	}
1082	if (__predict_true(locked)) {
1083		if (__predict_false(flags & LK_INTERLOCK)) {
1084			class = LOCK_CLASS(ilk);
1085			class->lc_unlock(ilk);
1086		}
1087		return (0);
1088	} else {
1089		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
1090		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
1091	}
1092}
1093
1094static __noinline int
1095lockmgr_sunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1096    const char *file, int line)
1097
1098{
1099	int wakeup_swapper = 0;
1100
1101	if (KERNEL_PANICKED())
1102		goto out;
1103
1104	wakeup_swapper = wakeupshlk(lk, file, line);
1105
1106out:
1107	lockmgr_exit(flags, ilk, wakeup_swapper);
1108	return (0);
1109}
1110
1111static __noinline int
1112lockmgr_xunlock_hard(struct lock *lk, uintptr_t x, u_int flags, struct lock_object *ilk,
1113    const char *file, int line)
1114{
1115	uintptr_t tid, v;
1116	int wakeup_swapper = 0;
1117	u_int realexslp;
1118	int queue;
1119
1120	if (KERNEL_PANICKED())
1121		goto out;
1122
1123	tid = (uintptr_t)curthread;
1124
1125	/*
1126	 * As first option, treact the lock as if it has not
1127	 * any waiter.
1128	 * Fix-up the tid var if the lock has been disowned.
1129	 */
1130	if (LK_HOLDER(x) == LK_KERNPROC)
1131		tid = LK_KERNPROC;
1132
1133	/*
1134	 * The lock is held in exclusive mode.
1135	 * If the lock is recursed also, then unrecurse it.
1136	 */
1137	if (lockmgr_recursed_v(x)) {
1138		LOCK_LOG2(lk, "%s: %p unrecursing", __func__, lk);
1139		lk->lk_recurse--;
1140		if (lk->lk_recurse == 0)
1141			atomic_clear_ptr(&lk->lk_lock, LK_WRITER_RECURSED);
1142		goto out;
1143	}
1144	if (tid != LK_KERNPROC)
1145		LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,
1146		    LOCKSTAT_WRITER);
1147
1148	if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED))
1149		goto out;
1150
1151	sleepq_lock(&lk->lock_object);
1152	x = lockmgr_read_value(lk);
1153	v = LK_UNLOCKED;
1154
1155	/*
1156	 * If the lock has exclusive waiters, give them
1157	 * preference in order to avoid deadlock with
1158	 * shared runners up.
1159	 * If interruptible sleeps left the exclusive queue
1160	 * empty avoid a starvation for the threads sleeping
1161	 * on the shared queue by giving them precedence
1162	 * and cleaning up the exclusive waiters bit anyway.
1163	 * Please note that lk_exslpfail count may be lying
1164	 * about the real number of waiters with the
1165	 * LK_SLEEPFAIL flag on because they may be used in
1166	 * conjunction with interruptible sleeps so
1167	 * lk_exslpfail might be considered an 'upper limit'
1168	 * bound, including the edge cases.
1169	 */
1170	MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1171	realexslp = sleepq_sleepcnt(&lk->lock_object, SQ_EXCLUSIVE_QUEUE);
1172	if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1173		if (lk->lk_exslpfail < realexslp) {
1174			lk->lk_exslpfail = 0;
1175			queue = SQ_EXCLUSIVE_QUEUE;
1176			v |= (x & LK_SHARED_WAITERS);
1177		} else {
1178			lk->lk_exslpfail = 0;
1179			LOCK_LOG2(lk,
1180			    "%s: %p has only LK_SLEEPFAIL sleepers",
1181			    __func__, lk);
1182			LOCK_LOG2(lk,
1183			    "%s: %p waking up threads on the exclusive queue",
1184			    __func__, lk);
1185			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
1186			    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1187			queue = SQ_SHARED_QUEUE;
1188		}
1189	} else {
1190		/*
1191		 * Exclusive waiters sleeping with LK_SLEEPFAIL
1192		 * on and using interruptible sleeps/timeout
1193		 * may have left spourious lk_exslpfail counts
1194		 * on, so clean it up anyway.
1195		 */
1196		lk->lk_exslpfail = 0;
1197		queue = SQ_SHARED_QUEUE;
1198	}
1199
1200	LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
1201	    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1202	    "exclusive");
1203	atomic_store_rel_ptr(&lk->lk_lock, v);
1204	wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
1205	sleepq_release(&lk->lock_object);
1206
1207out:
1208	lockmgr_exit(flags, ilk, wakeup_swapper);
1209	return (0);
1210}
1211
1212/*
1213 * Lightweight entry points for common operations.
1214 *
1215 * Functionality is similar to sx locks, in that none of the additional lockmgr
1216 * features are supported. To be clear, these are NOT supported:
1217 * 1. shared locking disablement
1218 * 2. returning with an error after sleep
1219 * 3. unlocking the interlock
1220 *
1221 * If in doubt, use lockmgr_lock_flags.
1222 */
1223int
1224lockmgr_slock(struct lock *lk, u_int flags, const char *file, int line)
1225{
1226	uintptr_t x;
1227
1228	MPASS((flags & LK_TYPE_MASK) == LK_SHARED);
1229	MPASS((flags & LK_INTERLOCK) == 0);
1230	MPASS((lk->lock_object.lo_flags & LK_NOSHARE) == 0);
1231
1232	if (LK_CAN_WITNESS(flags))
1233		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
1234		    file, line, NULL);
1235	x = lockmgr_read_value(lk);
1236	if (__predict_true(lockmgr_slock_try(lk, &x, flags, true))) {
1237		lockmgr_note_shared_acquire(lk, 0, 0, file, line, flags);
1238		return (0);
1239	}
1240
1241	return (lockmgr_slock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
1242}
1243
1244int
1245lockmgr_xlock(struct lock *lk, u_int flags, const char *file, int line)
1246{
1247	uintptr_t tid;
1248
1249	MPASS((flags & LK_TYPE_MASK) == LK_EXCLUSIVE);
1250	MPASS((flags & LK_INTERLOCK) == 0);
1251
1252	if (LK_CAN_WITNESS(flags))
1253		WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1254		    LOP_EXCLUSIVE, file, line, NULL);
1255	tid = (uintptr_t)curthread;
1256	if (atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1257		lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1258		    flags);
1259		return (0);
1260	}
1261
1262	return (lockmgr_xlock_hard(lk, flags | LK_ADAPTIVE, NULL, file, line, NULL));
1263}
1264
1265int
1266lockmgr_unlock(struct lock *lk)
1267{
1268	uintptr_t x, tid;
1269	const char *file;
1270	int line;
1271
1272	file = __FILE__;
1273	line = __LINE__;
1274
1275	_lockmgr_assert(lk, KA_LOCKED, file, line);
1276	x = lockmgr_read_value(lk);
1277	if (__predict_true(x & LK_SHARE) != 0) {
1278		lockmgr_note_shared_release(lk, file, line);
1279		if (lockmgr_sunlock_try(lk, &x)) {
1280			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk, LOCKSTAT_READER);
1281		} else {
1282			return (lockmgr_sunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
1283		}
1284	} else {
1285		tid = (uintptr_t)curthread;
1286		lockmgr_note_exclusive_release(lk, file, line);
1287		if (x == tid && atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
1288			LOCKSTAT_PROFILE_RELEASE_RWLOCK(lockmgr__release, lk,LOCKSTAT_WRITER);
1289		} else {
1290			return (lockmgr_xunlock_hard(lk, x, LK_RELEASE, NULL, file, line));
1291		}
1292	}
1293	return (0);
1294}
1295
1296int
1297__lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
1298    const char *wmesg, int pri, int timo, const char *file, int line)
1299{
1300	GIANT_DECLARE;
1301	struct lockmgr_wait lwa;
1302	struct lock_class *class;
1303	const char *iwmesg;
1304	uintptr_t tid, v, x;
1305	u_int op, realexslp;
1306	int error, ipri, itimo, queue, wakeup_swapper;
1307#ifdef LOCK_PROFILING
1308	uint64_t waittime = 0;
1309	int contested = 0;
1310#endif
1311
1312	if (KERNEL_PANICKED())
1313		return (0);
1314
1315	error = 0;
1316	tid = (uintptr_t)curthread;
1317	op = (flags & LK_TYPE_MASK);
1318	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
1319	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
1320	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
1321
1322	lwa.iwmesg = iwmesg;
1323	lwa.ipri = ipri;
1324	lwa.itimo = itimo;
1325
1326	MPASS((flags & ~LK_TOTAL_MASK) == 0);
1327	KASSERT((op & (op - 1)) == 0,
1328	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
1329	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
1330	    (op != LK_DOWNGRADE && op != LK_RELEASE),
1331	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
1332	    __func__, file, line));
1333	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
1334	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
1335	    __func__, file, line));
1336	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
1337	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
1338	    lk->lock_object.lo_name, file, line));
1339
1340	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
1341
1342	if (lk->lock_object.lo_flags & LK_NOSHARE) {
1343		switch (op) {
1344		case LK_SHARED:
1345			op = LK_EXCLUSIVE;
1346			break;
1347		case LK_UPGRADE:
1348		case LK_TRYUPGRADE:
1349		case LK_DOWNGRADE:
1350			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
1351			    file, line);
1352			if (flags & LK_INTERLOCK)
1353				class->lc_unlock(ilk);
1354			return (0);
1355		}
1356	}
1357
1358	wakeup_swapper = 0;
1359	switch (op) {
1360	case LK_SHARED:
1361		return (lockmgr_slock_hard(lk, flags, ilk, file, line, &lwa));
1362		break;
1363	case LK_UPGRADE:
1364	case LK_TRYUPGRADE:
1365		return (lockmgr_upgrade(lk, flags, ilk, file, line, &lwa));
1366		break;
1367	case LK_EXCLUSIVE:
1368		return (lockmgr_xlock_hard(lk, flags, ilk, file, line, &lwa));
1369		break;
1370	case LK_DOWNGRADE:
1371		_lockmgr_assert(lk, KA_XLOCKED, file, line);
1372		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1373
1374		/*
1375		 * Panic if the lock is recursed.
1376		 */
1377		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1378			if (flags & LK_INTERLOCK)
1379				class->lc_unlock(ilk);
1380			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1381			    __func__, iwmesg, file, line);
1382		}
1383		TD_SLOCKS_INC(curthread);
1384
1385		/*
1386		 * In order to preserve waiters flags, just spin.
1387		 */
1388		for (;;) {
1389			x = lockmgr_read_value(lk);
1390			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1391			x &= LK_ALL_WAITERS;
1392			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1393			    LK_SHARERS_LOCK(1) | x))
1394				break;
1395			cpu_spinwait();
1396		}
1397		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1398		LOCKSTAT_RECORD0(lockmgr__downgrade, lk);
1399		break;
1400	case LK_RELEASE:
1401		_lockmgr_assert(lk, KA_LOCKED, file, line);
1402		x = lockmgr_read_value(lk);
1403
1404		if (__predict_true(x & LK_SHARE) != 0) {
1405			lockmgr_note_shared_release(lk, file, line);
1406			return (lockmgr_sunlock_hard(lk, x, flags, ilk, file, line));
1407		} else {
1408			lockmgr_note_exclusive_release(lk, file, line);
1409			return (lockmgr_xunlock_hard(lk, x, flags, ilk, file, line));
1410		}
1411		break;
1412	case LK_DRAIN:
1413		if (LK_CAN_WITNESS(flags))
1414			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1415			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1416			    ilk : NULL);
1417
1418		/*
1419		 * Trying to drain a lock we already own will result in a
1420		 * deadlock.
1421		 */
1422		if (lockmgr_xlocked(lk)) {
1423			if (flags & LK_INTERLOCK)
1424				class->lc_unlock(ilk);
1425			panic("%s: draining %s with the lock held @ %s:%d\n",
1426			    __func__, iwmesg, file, line);
1427		}
1428
1429		for (;;) {
1430			if (lk->lk_lock == LK_UNLOCKED &&
1431			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1432				break;
1433
1434#ifdef HWPMC_HOOKS
1435			PMC_SOFT_CALL( , , lock, failed);
1436#endif
1437			lock_profile_obtain_lock_failed(&lk->lock_object, false,
1438			    &contested, &waittime);
1439
1440			/*
1441			 * If the lock is expected to not sleep just give up
1442			 * and return.
1443			 */
1444			if (LK_TRYOP(flags)) {
1445				LOCK_LOG2(lk, "%s: %p fails the try operation",
1446				    __func__, lk);
1447				error = EBUSY;
1448				break;
1449			}
1450
1451			/*
1452			 * Acquire the sleepqueue chain lock because we
1453			 * probabilly will need to manipulate waiters flags.
1454			 */
1455			sleepq_lock(&lk->lock_object);
1456			x = lockmgr_read_value(lk);
1457
1458			/*
1459			 * if the lock has been released while we spun on
1460			 * the sleepqueue chain lock just try again.
1461			 */
1462			if (x == LK_UNLOCKED) {
1463				sleepq_release(&lk->lock_object);
1464				continue;
1465			}
1466
1467			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1468			if ((x & ~v) == LK_UNLOCKED) {
1469				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1470
1471				/*
1472				 * If interruptible sleeps left the exclusive
1473				 * queue empty avoid a starvation for the
1474				 * threads sleeping on the shared queue by
1475				 * giving them precedence and cleaning up the
1476				 * exclusive waiters bit anyway.
1477				 * Please note that lk_exslpfail count may be
1478				 * lying about the real number of waiters with
1479				 * the LK_SLEEPFAIL flag on because they may
1480				 * be used in conjunction with interruptible
1481				 * sleeps so lk_exslpfail might be considered
1482				 * an 'upper limit' bound, including the edge
1483				 * cases.
1484				 */
1485				if (v & LK_EXCLUSIVE_WAITERS) {
1486					queue = SQ_EXCLUSIVE_QUEUE;
1487					v &= ~LK_EXCLUSIVE_WAITERS;
1488				} else {
1489					/*
1490					 * Exclusive waiters sleeping with
1491					 * LK_SLEEPFAIL on and using
1492					 * interruptible sleeps/timeout may
1493					 * have left spourious lk_exslpfail
1494					 * counts on, so clean it up anyway.
1495					 */
1496					MPASS(v & LK_SHARED_WAITERS);
1497					lk->lk_exslpfail = 0;
1498					queue = SQ_SHARED_QUEUE;
1499					v &= ~LK_SHARED_WAITERS;
1500				}
1501				if (queue == SQ_EXCLUSIVE_QUEUE) {
1502					realexslp =
1503					    sleepq_sleepcnt(&lk->lock_object,
1504					    SQ_EXCLUSIVE_QUEUE);
1505					if (lk->lk_exslpfail >= realexslp) {
1506						lk->lk_exslpfail = 0;
1507						queue = SQ_SHARED_QUEUE;
1508						v &= ~LK_SHARED_WAITERS;
1509						if (realexslp != 0) {
1510							LOCK_LOG2(lk,
1511					"%s: %p has only LK_SLEEPFAIL sleepers",
1512							    __func__, lk);
1513							LOCK_LOG2(lk,
1514			"%s: %p waking up threads on the exclusive queue",
1515							    __func__, lk);
1516							wakeup_swapper =
1517							    sleepq_broadcast(
1518							    &lk->lock_object,
1519							    SLEEPQ_LK, 0,
1520							    SQ_EXCLUSIVE_QUEUE);
1521						}
1522					} else
1523						lk->lk_exslpfail = 0;
1524				}
1525				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1526					sleepq_release(&lk->lock_object);
1527					continue;
1528				}
1529				LOCK_LOG3(lk,
1530				"%s: %p waking up all threads on the %s queue",
1531				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1532				    "shared" : "exclusive");
1533				wakeup_swapper |= sleepq_broadcast(
1534				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1535
1536				/*
1537				 * If shared waiters have been woken up we need
1538				 * to wait for one of them to acquire the lock
1539				 * before to set the exclusive waiters in
1540				 * order to avoid a deadlock.
1541				 */
1542				if (queue == SQ_SHARED_QUEUE) {
1543					for (v = lk->lk_lock;
1544					    (v & LK_SHARE) && !LK_SHARERS(v);
1545					    v = lk->lk_lock)
1546						cpu_spinwait();
1547				}
1548			}
1549
1550			/*
1551			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1552			 * fail, loop back and retry.
1553			 */
1554			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1555				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1556				    x | LK_EXCLUSIVE_WAITERS)) {
1557					sleepq_release(&lk->lock_object);
1558					continue;
1559				}
1560				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1561				    __func__, lk);
1562			}
1563
1564			/*
1565			 * As far as we have been unable to acquire the
1566			 * exclusive lock and the exclusive waiters flag
1567			 * is set, we will sleep.
1568			 */
1569			if (flags & LK_INTERLOCK) {
1570				class->lc_unlock(ilk);
1571				flags &= ~LK_INTERLOCK;
1572			}
1573			GIANT_SAVE();
1574			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1575			    SQ_EXCLUSIVE_QUEUE);
1576			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1577			GIANT_RESTORE();
1578			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1579			    __func__, lk);
1580		}
1581
1582		if (error == 0) {
1583			lock_profile_obtain_lock_success(&lk->lock_object,
1584			    false, contested, waittime, file, line);
1585			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1586			    lk->lk_recurse, file, line);
1587			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1588			    LK_TRYWIT(flags), file, line);
1589			TD_LOCKS_INC(curthread);
1590			STACK_SAVE(lk);
1591		}
1592		break;
1593	default:
1594		if (flags & LK_INTERLOCK)
1595			class->lc_unlock(ilk);
1596		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1597	}
1598
1599	if (flags & LK_INTERLOCK)
1600		class->lc_unlock(ilk);
1601	if (wakeup_swapper)
1602		kick_proc0();
1603
1604	return (error);
1605}
1606
1607void
1608_lockmgr_disown(struct lock *lk, const char *file, int line)
1609{
1610	uintptr_t tid, x;
1611
1612	if (SCHEDULER_STOPPED())
1613		return;
1614
1615	tid = (uintptr_t)curthread;
1616	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1617
1618	/*
1619	 * Panic if the lock is recursed.
1620	 */
1621	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1622		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1623		    __func__,  file, line);
1624
1625	/*
1626	 * If the owner is already LK_KERNPROC just skip the whole operation.
1627	 */
1628	if (LK_HOLDER(lk->lk_lock) != tid)
1629		return;
1630	lock_profile_release_lock(&lk->lock_object, false);
1631	LOCKSTAT_RECORD1(lockmgr__disown, lk, LOCKSTAT_WRITER);
1632	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1633	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1634	TD_LOCKS_DEC(curthread);
1635	STACK_SAVE(lk);
1636
1637	/*
1638	 * In order to preserve waiters flags, just spin.
1639	 */
1640	for (;;) {
1641		x = lockmgr_read_value(lk);
1642		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1643		x &= LK_ALL_WAITERS;
1644		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1645		    LK_KERNPROC | x))
1646			return;
1647		cpu_spinwait();
1648	}
1649}
1650
1651void
1652lockmgr_printinfo(const struct lock *lk)
1653{
1654	struct thread *td;
1655	uintptr_t x;
1656
1657	if (lk->lk_lock == LK_UNLOCKED)
1658		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1659	else if (lk->lk_lock & LK_SHARE)
1660		printf("lock type %s: SHARED (count %ju)\n",
1661		    lk->lock_object.lo_name,
1662		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1663	else {
1664		td = lockmgr_xholder(lk);
1665		if (td == (struct thread *)LK_KERNPROC)
1666			printf("lock type %s: EXCL by KERNPROC\n",
1667			    lk->lock_object.lo_name);
1668		else
1669			printf("lock type %s: EXCL by thread %p "
1670			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1671			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1672			    td->td_tid);
1673	}
1674
1675	x = lk->lk_lock;
1676	if (x & LK_EXCLUSIVE_WAITERS)
1677		printf(" with exclusive waiters pending\n");
1678	if (x & LK_SHARED_WAITERS)
1679		printf(" with shared waiters pending\n");
1680	if (x & LK_EXCLUSIVE_SPINNERS)
1681		printf(" with exclusive spinners pending\n");
1682
1683	STACK_PRINT(lk);
1684}
1685
1686int
1687lockstatus(const struct lock *lk)
1688{
1689	uintptr_t v, x;
1690	int ret;
1691
1692	ret = LK_SHARED;
1693	x = lockmgr_read_value(lk);
1694	v = LK_HOLDER(x);
1695
1696	if ((x & LK_SHARE) == 0) {
1697		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1698			ret = LK_EXCLUSIVE;
1699		else
1700			ret = LK_EXCLOTHER;
1701	} else if (x == LK_UNLOCKED)
1702		ret = 0;
1703
1704	return (ret);
1705}
1706
1707#ifdef INVARIANT_SUPPORT
1708
1709FEATURE(invariant_support,
1710    "Support for modules compiled with INVARIANTS option");
1711
1712#ifndef INVARIANTS
1713#undef	_lockmgr_assert
1714#endif
1715
1716void
1717_lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1718{
1719	int slocked = 0;
1720
1721	if (KERNEL_PANICKED())
1722		return;
1723	switch (what) {
1724	case KA_SLOCKED:
1725	case KA_SLOCKED | KA_NOTRECURSED:
1726	case KA_SLOCKED | KA_RECURSED:
1727		slocked = 1;
1728	case KA_LOCKED:
1729	case KA_LOCKED | KA_NOTRECURSED:
1730	case KA_LOCKED | KA_RECURSED:
1731#ifdef WITNESS
1732
1733		/*
1734		 * We cannot trust WITNESS if the lock is held in exclusive
1735		 * mode and a call to lockmgr_disown() happened.
1736		 * Workaround this skipping the check if the lock is held in
1737		 * exclusive mode even for the KA_LOCKED case.
1738		 */
1739		if (slocked || (lk->lk_lock & LK_SHARE)) {
1740			witness_assert(&lk->lock_object, what, file, line);
1741			break;
1742		}
1743#endif
1744		if (lk->lk_lock == LK_UNLOCKED ||
1745		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1746		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1747			panic("Lock %s not %slocked @ %s:%d\n",
1748			    lk->lock_object.lo_name, slocked ? "share" : "",
1749			    file, line);
1750
1751		if ((lk->lk_lock & LK_SHARE) == 0) {
1752			if (lockmgr_recursed(lk)) {
1753				if (what & KA_NOTRECURSED)
1754					panic("Lock %s recursed @ %s:%d\n",
1755					    lk->lock_object.lo_name, file,
1756					    line);
1757			} else if (what & KA_RECURSED)
1758				panic("Lock %s not recursed @ %s:%d\n",
1759				    lk->lock_object.lo_name, file, line);
1760		}
1761		break;
1762	case KA_XLOCKED:
1763	case KA_XLOCKED | KA_NOTRECURSED:
1764	case KA_XLOCKED | KA_RECURSED:
1765		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1766			panic("Lock %s not exclusively locked @ %s:%d\n",
1767			    lk->lock_object.lo_name, file, line);
1768		if (lockmgr_recursed(lk)) {
1769			if (what & KA_NOTRECURSED)
1770				panic("Lock %s recursed @ %s:%d\n",
1771				    lk->lock_object.lo_name, file, line);
1772		} else if (what & KA_RECURSED)
1773			panic("Lock %s not recursed @ %s:%d\n",
1774			    lk->lock_object.lo_name, file, line);
1775		break;
1776	case KA_UNLOCKED:
1777		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1778			panic("Lock %s exclusively locked @ %s:%d\n",
1779			    lk->lock_object.lo_name, file, line);
1780		break;
1781	default:
1782		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1783		    line);
1784	}
1785}
1786#endif
1787
1788#ifdef DDB
1789int
1790lockmgr_chain(struct thread *td, struct thread **ownerp)
1791{
1792	const struct lock *lk;
1793
1794	lk = td->td_wchan;
1795
1796	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1797		return (0);
1798	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1799	if (lk->lk_lock & LK_SHARE)
1800		db_printf("SHARED (count %ju)\n",
1801		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1802	else
1803		db_printf("EXCL\n");
1804	*ownerp = lockmgr_xholder(lk);
1805
1806	return (1);
1807}
1808
1809static void
1810db_show_lockmgr(const struct lock_object *lock)
1811{
1812	struct thread *td;
1813	const struct lock *lk;
1814
1815	lk = (const struct lock *)lock;
1816
1817	db_printf(" state: ");
1818	if (lk->lk_lock == LK_UNLOCKED)
1819		db_printf("UNLOCKED\n");
1820	else if (lk->lk_lock & LK_SHARE)
1821		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1822	else {
1823		td = lockmgr_xholder(lk);
1824		if (td == (struct thread *)LK_KERNPROC)
1825			db_printf("XLOCK: LK_KERNPROC\n");
1826		else
1827			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1828			    td->td_tid, td->td_proc->p_pid,
1829			    td->td_proc->p_comm);
1830		if (lockmgr_recursed(lk))
1831			db_printf(" recursed: %d\n", lk->lk_recurse);
1832	}
1833	db_printf(" waiters: ");
1834	switch (lk->lk_lock & LK_ALL_WAITERS) {
1835	case LK_SHARED_WAITERS:
1836		db_printf("shared\n");
1837		break;
1838	case LK_EXCLUSIVE_WAITERS:
1839		db_printf("exclusive\n");
1840		break;
1841	case LK_ALL_WAITERS:
1842		db_printf("shared and exclusive\n");
1843		break;
1844	default:
1845		db_printf("none\n");
1846	}
1847	db_printf(" spinners: ");
1848	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1849		db_printf("exclusive\n");
1850	else
1851		db_printf("none\n");
1852}
1853#endif
1854