1/*-
2 * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
3 * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice(s), this list of conditions and the following disclaimer as
11 *    the first lines of this file unmodified other than the possible
12 *    addition of one or more copyright notices.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice(s), this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
27 * DAMAGE.
28 */
29
30/*
31 * Shared/exclusive locks.  This implementation attempts to ensure
32 * deterministic lock granting behavior, so that slocks and xlocks are
33 * interleaved.
34 *
35 * Priority propagation will not generally raise the priority of lock holders,
36 * so should not be relied upon in combination with sx locks.
37 */
38
39#include "opt_ddb.h"
40#include "opt_hwpmc_hooks.h"
41#include "opt_no_adaptive_sx.h"
42
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: stable/11/sys/kern/kern_sx.c 341100 2018-11-27 22:33:58Z vangyzen $");
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/kdb.h>
49#include <sys/kernel.h>
50#include <sys/ktr.h>
51#include <sys/lock.h>
52#include <sys/mutex.h>
53#include <sys/proc.h>
54#include <sys/sched.h>
55#include <sys/sleepqueue.h>
56#include <sys/sx.h>
57#include <sys/smp.h>
58#include <sys/sysctl.h>
59
60#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
61#include <machine/cpu.h>
62#endif
63
64#ifdef DDB
65#include <ddb/ddb.h>
66#endif
67
68#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
69#define	ADAPTIVE_SX
70#endif
71
72CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
73
74#ifdef HWPMC_HOOKS
75#include <sys/pmckern.h>
76PMC_SOFT_DECLARE( , , lock, failed);
77#endif
78
79/* Handy macros for sleep queues. */
80#define	SQ_EXCLUSIVE_QUEUE	0
81#define	SQ_SHARED_QUEUE		1
82
83/*
84 * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
85 * drop Giant anytime we have to sleep or if we adaptively spin.
86 */
87#define	GIANT_DECLARE							\
88	int _giantcnt = 0;						\
89	WITNESS_SAVE_DECL(Giant)					\
90
91#define	GIANT_SAVE(work) do {						\
92	if (__predict_false(mtx_owned(&Giant))) {			\
93		work++;							\
94		WITNESS_SAVE(&Giant.lock_object, Giant);		\
95		while (mtx_owned(&Giant)) {				\
96			_giantcnt++;					\
97			mtx_unlock(&Giant);				\
98		}							\
99	}								\
100} while (0)
101
102#define GIANT_RESTORE() do {						\
103	if (_giantcnt > 0) {						\
104		mtx_assert(&Giant, MA_NOTOWNED);			\
105		while (_giantcnt--)					\
106			mtx_lock(&Giant);				\
107		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
108	}								\
109} while (0)
110
111/*
112 * Returns true if an exclusive lock is recursed.  It assumes
113 * curthread currently has an exclusive lock.
114 */
115#define	sx_recursed(sx)		((sx)->sx_recurse != 0)
116
117static void	assert_sx(const struct lock_object *lock, int what);
118#ifdef DDB
119static void	db_show_sx(const struct lock_object *lock);
120#endif
121static void	lock_sx(struct lock_object *lock, uintptr_t how);
122#ifdef KDTRACE_HOOKS
123static int	owner_sx(const struct lock_object *lock, struct thread **owner);
124#endif
125static uintptr_t unlock_sx(struct lock_object *lock);
126
127struct lock_class lock_class_sx = {
128	.lc_name = "sx",
129	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
130	.lc_assert = assert_sx,
131#ifdef DDB
132	.lc_ddb_show = db_show_sx,
133#endif
134	.lc_lock = lock_sx,
135	.lc_unlock = unlock_sx,
136#ifdef KDTRACE_HOOKS
137	.lc_owner = owner_sx,
138#endif
139};
140
141#ifndef INVARIANTS
142#define	_sx_assert(sx, what, file, line)
143#endif
144
145#ifdef ADAPTIVE_SX
146static __read_frequently u_int asx_retries;
147static __read_frequently u_int asx_loops;
148static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
149SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
150SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
151
152static struct lock_delay_config __read_frequently sx_delay;
153
154SYSCTL_INT(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
155    0, "");
156SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
157    0, "");
158
159static void
160sx_lock_delay_init(void *arg __unused)
161{
162
163	lock_delay_default_init(&sx_delay);
164	asx_retries = 10;
165	asx_loops = max(10000, sx_delay.max);
166}
167LOCK_DELAY_SYSINIT(sx_lock_delay_init);
168#endif
169
170void
171assert_sx(const struct lock_object *lock, int what)
172{
173
174	sx_assert((const struct sx *)lock, what);
175}
176
177void
178lock_sx(struct lock_object *lock, uintptr_t how)
179{
180	struct sx *sx;
181
182	sx = (struct sx *)lock;
183	if (how)
184		sx_slock(sx);
185	else
186		sx_xlock(sx);
187}
188
189uintptr_t
190unlock_sx(struct lock_object *lock)
191{
192	struct sx *sx;
193
194	sx = (struct sx *)lock;
195	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
196	if (sx_xlocked(sx)) {
197		sx_xunlock(sx);
198		return (0);
199	} else {
200		sx_sunlock(sx);
201		return (1);
202	}
203}
204
205#ifdef KDTRACE_HOOKS
206int
207owner_sx(const struct lock_object *lock, struct thread **owner)
208{
209	const struct sx *sx;
210	uintptr_t x;
211
212	sx = (const struct sx *)lock;
213	x = sx->sx_lock;
214	*owner = NULL;
215	return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
216	    ((*owner = (struct thread *)SX_OWNER(x)) != NULL));
217}
218#endif
219
220void
221sx_sysinit(void *arg)
222{
223	struct sx_args *sargs = arg;
224
225	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
226}
227
228void
229sx_init_flags(struct sx *sx, const char *description, int opts)
230{
231	int flags;
232
233	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
234	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
235	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
236	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
237	    &sx->sx_lock));
238
239	flags = LO_SLEEPABLE | LO_UPGRADABLE;
240	if (opts & SX_DUPOK)
241		flags |= LO_DUPOK;
242	if (opts & SX_NOPROFILE)
243		flags |= LO_NOPROFILE;
244	if (!(opts & SX_NOWITNESS))
245		flags |= LO_WITNESS;
246	if (opts & SX_RECURSE)
247		flags |= LO_RECURSABLE;
248	if (opts & SX_QUIET)
249		flags |= LO_QUIET;
250	if (opts & SX_NEW)
251		flags |= LO_NEW;
252
253	flags |= opts & SX_NOADAPTIVE;
254	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
255	sx->sx_lock = SX_LOCK_UNLOCKED;
256	sx->sx_recurse = 0;
257}
258
259void
260sx_destroy(struct sx *sx)
261{
262
263	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
264	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
265	sx->sx_lock = SX_LOCK_DESTROYED;
266	lock_destroy(&sx->lock_object);
267}
268
269int
270sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
271{
272	uintptr_t x;
273
274	if (SCHEDULER_STOPPED())
275		return (1);
276
277	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
278	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
279	    curthread, sx->lock_object.lo_name, file, line));
280
281	x = sx->sx_lock;
282	for (;;) {
283		KASSERT(x != SX_LOCK_DESTROYED,
284		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
285		if (!(x & SX_LOCK_SHARED))
286			break;
287		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, x + SX_ONE_SHARER)) {
288			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
289			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
290			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
291			    sx, 0, 0, file, line, LOCKSTAT_READER);
292			TD_LOCKS_INC(curthread);
293			return (1);
294		}
295	}
296
297	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
298	return (0);
299}
300
301int
302sx_try_slock_(struct sx *sx, const char *file, int line)
303{
304
305	return (sx_try_slock_int(sx LOCK_FILE_LINE_ARG));
306}
307
308int
309_sx_xlock(struct sx *sx, int opts, const char *file, int line)
310{
311	uintptr_t tid, x;
312	int error = 0;
313
314	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
315	    !TD_IS_IDLETHREAD(curthread),
316	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
317	    curthread, sx->lock_object.lo_name, file, line));
318	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
319	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
320	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
321	    line, NULL);
322	tid = (uintptr_t)curthread;
323	x = SX_LOCK_UNLOCKED;
324	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
325		error = _sx_xlock_hard(sx, x, opts LOCK_FILE_LINE_ARG);
326	else
327		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
328		    0, 0, file, line, LOCKSTAT_WRITER);
329	if (!error) {
330		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
331		    file, line);
332		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
333		TD_LOCKS_INC(curthread);
334	}
335
336	return (error);
337}
338
339int
340sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
341{
342	struct thread *td;
343	uintptr_t tid, x;
344	int rval;
345	bool recursed;
346
347	td = curthread;
348	tid = (uintptr_t)td;
349	if (SCHEDULER_STOPPED_TD(td))
350		return (1);
351
352	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
353	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
354	    curthread, sx->lock_object.lo_name, file, line));
355	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
356	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
357
358	rval = 1;
359	recursed = false;
360	x = SX_LOCK_UNLOCKED;
361	for (;;) {
362		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
363			break;
364		if (x == SX_LOCK_UNLOCKED)
365			continue;
366		if (x == tid && (sx->lock_object.lo_flags & LO_RECURSABLE)) {
367			sx->sx_recurse++;
368			atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
369			break;
370		}
371		rval = 0;
372		break;
373	}
374
375	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
376	if (rval) {
377		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
378		    file, line);
379		if (!recursed)
380			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
381			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
382		TD_LOCKS_INC(curthread);
383	}
384
385	return (rval);
386}
387
388int
389sx_try_xlock_(struct sx *sx, const char *file, int line)
390{
391
392	return (sx_try_xlock_int(sx LOCK_FILE_LINE_ARG));
393}
394
395void
396_sx_xunlock(struct sx *sx, const char *file, int line)
397{
398
399	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
400	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
401	_sx_assert(sx, SA_XLOCKED, file, line);
402	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
403	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
404	    line);
405#if LOCK_DEBUG > 0
406	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
407#else
408	__sx_xunlock(sx, curthread, file, line);
409#endif
410	TD_LOCKS_DEC(curthread);
411}
412
413/*
414 * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
415 * This will only succeed if this thread holds a single shared lock.
416 * Return 1 if if the upgrade succeed, 0 otherwise.
417 */
418int
419sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
420{
421	uintptr_t x;
422	uintptr_t waiters;
423	int success;
424
425	if (SCHEDULER_STOPPED())
426		return (1);
427
428	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
429	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
430	_sx_assert(sx, SA_SLOCKED, file, line);
431
432	/*
433	 * Try to switch from one shared lock to an exclusive lock.  We need
434	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
435	 * we will wake up the exclusive waiters when we drop the lock.
436	 */
437	success = 0;
438	x = SX_READ_VALUE(sx);
439	for (;;) {
440		if (SX_SHARERS(x) > 1)
441			break;
442		waiters = (x & SX_LOCK_EXCLUSIVE_WAITERS);
443		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
444		    (uintptr_t)curthread | waiters)) {
445			success = 1;
446			break;
447		}
448	}
449	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
450	if (success) {
451		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
452		    file, line);
453		LOCKSTAT_RECORD0(sx__upgrade, sx);
454	}
455	return (success);
456}
457
458int
459sx_try_upgrade_(struct sx *sx, const char *file, int line)
460{
461
462	return (sx_try_upgrade_int(sx LOCK_FILE_LINE_ARG));
463}
464
465/*
466 * Downgrade an unrecursed exclusive lock into a single shared lock.
467 */
468void
469sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
470{
471	uintptr_t x;
472	int wakeup_swapper;
473
474	if (SCHEDULER_STOPPED())
475		return;
476
477	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
478	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
479	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
480#ifndef INVARIANTS
481	if (sx_recursed(sx))
482		panic("downgrade of a recursed lock");
483#endif
484
485	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
486
487	/*
488	 * Try to switch from an exclusive lock with no shared waiters
489	 * to one sharer with no shared waiters.  If there are
490	 * exclusive waiters, we don't need to lock the sleep queue so
491	 * long as we preserve the flag.  We do one quick try and if
492	 * that fails we grab the sleepq lock to keep the flags from
493	 * changing and do it the slow way.
494	 *
495	 * We have to lock the sleep queue if there are shared waiters
496	 * so we can wake them up.
497	 */
498	x = sx->sx_lock;
499	if (!(x & SX_LOCK_SHARED_WAITERS) &&
500	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
501	    (x & SX_LOCK_EXCLUSIVE_WAITERS)))
502		goto out;
503
504	/*
505	 * Lock the sleep queue so we can read the waiters bits
506	 * without any races and wakeup any shared waiters.
507	 */
508	sleepq_lock(&sx->lock_object);
509
510	/*
511	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
512	 * shared lock.  If there are any shared waiters, wake them up.
513	 */
514	wakeup_swapper = 0;
515	x = sx->sx_lock;
516	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
517	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
518	if (x & SX_LOCK_SHARED_WAITERS)
519		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
520		    0, SQ_SHARED_QUEUE);
521	sleepq_release(&sx->lock_object);
522
523	if (wakeup_swapper)
524		kick_proc0();
525
526out:
527	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
528	LOCKSTAT_RECORD0(sx__downgrade, sx);
529}
530
531void
532sx_downgrade_(struct sx *sx, const char *file, int line)
533{
534
535	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
536}
537
538/*
539 * This function represents the so-called 'hard case' for sx_xlock
540 * operation.  All 'easy case' failures are redirected to this.  Note
541 * that ideally this would be a static function, but it needs to be
542 * accessible from at least sx.h.
543 */
544int
545_sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
546{
547	GIANT_DECLARE;
548	uintptr_t tid;
549#ifdef ADAPTIVE_SX
550	volatile struct thread *owner;
551	u_int i, n, spintries = 0;
552	enum { READERS, WRITER } sleep_reason = READERS;
553	bool adaptive;
554#endif
555#ifdef LOCK_PROFILING
556	uint64_t waittime = 0;
557	int contested = 0;
558#endif
559	int error = 0;
560#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
561	struct lock_delay_arg lda;
562#endif
563#ifdef	KDTRACE_HOOKS
564	u_int sleep_cnt = 0;
565	int64_t sleep_time = 0;
566	int64_t all_time = 0;
567#endif
568#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
569	uintptr_t state = 0;
570#endif
571	int extra_work = 0;
572
573	tid = (uintptr_t)curthread;
574
575#ifdef KDTRACE_HOOKS
576	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
577		while (x == SX_LOCK_UNLOCKED) {
578			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
579				goto out_lockstat;
580		}
581		extra_work = 1;
582		all_time -= lockstat_nsecs(&sx->lock_object);
583		state = x;
584	}
585#endif
586#ifdef LOCK_PROFILING
587	extra_work = 1;
588	state = x;
589#endif
590
591	if (SCHEDULER_STOPPED())
592		return (0);
593
594#if defined(ADAPTIVE_SX)
595	lock_delay_arg_init(&lda, &sx_delay);
596#elif defined(KDTRACE_HOOKS)
597	lock_delay_arg_init(&lda, NULL);
598#endif
599
600	if (__predict_false(x == SX_LOCK_UNLOCKED))
601		x = SX_READ_VALUE(sx);
602
603	/* If we already hold an exclusive lock, then recurse. */
604	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
605		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
606	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
607		    sx->lock_object.lo_name, file, line));
608		sx->sx_recurse++;
609		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
610		if (LOCK_LOG_TEST(&sx->lock_object, 0))
611			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
612		return (0);
613	}
614
615	if (LOCK_LOG_TEST(&sx->lock_object, 0))
616		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
617		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
618
619#ifdef ADAPTIVE_SX
620	adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
621#endif
622
623#ifdef HWPMC_HOOKS
624	PMC_SOFT_CALL( , , lock, failed);
625#endif
626	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
627	    &waittime);
628
629#ifndef INVARIANTS
630	GIANT_SAVE(extra_work);
631#endif
632
633	for (;;) {
634		if (x == SX_LOCK_UNLOCKED) {
635			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
636				break;
637			continue;
638		}
639#ifdef INVARIANTS
640		GIANT_SAVE(extra_work);
641#endif
642#ifdef KDTRACE_HOOKS
643		lda.spin_cnt++;
644#endif
645#ifdef ADAPTIVE_SX
646		if (__predict_false(!adaptive))
647			goto sleepq;
648		/*
649		 * If the lock is write locked and the owner is
650		 * running on another CPU, spin until the owner stops
651		 * running or the state of the lock changes.
652		 */
653		if ((x & SX_LOCK_SHARED) == 0) {
654			sleep_reason = WRITER;
655			owner = lv_sx_owner(x);
656			if (!TD_IS_RUNNING(owner))
657				goto sleepq;
658			if (LOCK_LOG_TEST(&sx->lock_object, 0))
659				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
660				    __func__, sx, owner);
661			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
662			    "spinning", "lockname:\"%s\"",
663			    sx->lock_object.lo_name);
664			do {
665				lock_delay(&lda);
666				x = SX_READ_VALUE(sx);
667				owner = lv_sx_owner(x);
668			} while (owner != NULL && TD_IS_RUNNING(owner));
669			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
670			    "running");
671			continue;
672		} else if (SX_SHARERS(x) > 0) {
673			sleep_reason = READERS;
674			if (spintries == asx_retries)
675				goto sleepq;
676			spintries++;
677			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
678			    "spinning", "lockname:\"%s\"",
679			    sx->lock_object.lo_name);
680			for (i = 0; i < asx_loops; i += n) {
681				n = SX_SHARERS(x);
682				lock_delay_spin(n);
683				x = SX_READ_VALUE(sx);
684				if ((x & SX_LOCK_SHARED) == 0 ||
685				    SX_SHARERS(x) == 0)
686					break;
687			}
688#ifdef KDTRACE_HOOKS
689			lda.spin_cnt += i;
690#endif
691			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
692			    "running");
693			if (i < asx_loops)
694				continue;
695		}
696sleepq:
697#endif
698		sleepq_lock(&sx->lock_object);
699		x = SX_READ_VALUE(sx);
700retry_sleepq:
701
702		/*
703		 * If the lock was released while spinning on the
704		 * sleep queue chain lock, try again.
705		 */
706		if (x == SX_LOCK_UNLOCKED) {
707			sleepq_release(&sx->lock_object);
708			continue;
709		}
710
711#ifdef ADAPTIVE_SX
712		/*
713		 * The current lock owner might have started executing
714		 * on another CPU (or the lock could have changed
715		 * owners) while we were waiting on the sleep queue
716		 * chain lock.  If so, drop the sleep queue lock and try
717		 * again.
718		 */
719		if (adaptive) {
720			if (!(x & SX_LOCK_SHARED)) {
721				owner = (struct thread *)SX_OWNER(x);
722				if (TD_IS_RUNNING(owner)) {
723					sleepq_release(&sx->lock_object);
724					continue;
725				}
726			} else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
727				sleepq_release(&sx->lock_object);
728				continue;
729			}
730		}
731#endif
732
733		/*
734		 * If an exclusive lock was released with both shared
735		 * and exclusive waiters and a shared waiter hasn't
736		 * woken up and acquired the lock yet, sx_lock will be
737		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
738		 * If we see that value, try to acquire it once.  Note
739		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
740		 * as there are other exclusive waiters still.  If we
741		 * fail, restart the loop.
742		 */
743		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
744			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
745			    tid | SX_LOCK_EXCLUSIVE_WAITERS))
746				goto retry_sleepq;
747			sleepq_release(&sx->lock_object);
748			CTR2(KTR_LOCK, "%s: %p claimed by new writer",
749			    __func__, sx);
750			break;
751		}
752
753		/*
754		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
755		 * than loop back and retry.
756		 */
757		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
758			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
759			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
760				goto retry_sleepq;
761			}
762			if (LOCK_LOG_TEST(&sx->lock_object, 0))
763				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
764				    __func__, sx);
765		}
766
767		/*
768		 * Since we have been unable to acquire the exclusive
769		 * lock and the exclusive waiters flag is set, we have
770		 * to sleep.
771		 */
772		if (LOCK_LOG_TEST(&sx->lock_object, 0))
773			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
774			    __func__, sx);
775
776#ifdef KDTRACE_HOOKS
777		sleep_time -= lockstat_nsecs(&sx->lock_object);
778#endif
779		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
780		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
781		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
782		if (!(opts & SX_INTERRUPTIBLE))
783			sleepq_wait(&sx->lock_object, 0);
784		else
785			error = sleepq_wait_sig(&sx->lock_object, 0);
786#ifdef KDTRACE_HOOKS
787		sleep_time += lockstat_nsecs(&sx->lock_object);
788		sleep_cnt++;
789#endif
790		if (error) {
791			if (LOCK_LOG_TEST(&sx->lock_object, 0))
792				CTR2(KTR_LOCK,
793			"%s: interruptible sleep by %p suspended by signal",
794				    __func__, sx);
795			break;
796		}
797		if (LOCK_LOG_TEST(&sx->lock_object, 0))
798			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
799			    __func__, sx);
800		x = SX_READ_VALUE(sx);
801	}
802#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
803	if (__predict_true(!extra_work))
804		return (error);
805#endif
806#ifdef KDTRACE_HOOKS
807	all_time += lockstat_nsecs(&sx->lock_object);
808	if (sleep_time)
809		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
810		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
811		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
812	if (lda.spin_cnt > sleep_cnt)
813		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
814		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
815		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
816out_lockstat:
817#endif
818	if (!error)
819		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
820		    contested, waittime, file, line, LOCKSTAT_WRITER);
821	GIANT_RESTORE();
822	return (error);
823}
824
825/*
826 * This function represents the so-called 'hard case' for sx_xunlock
827 * operation.  All 'easy case' failures are redirected to this.  Note
828 * that ideally this would be a static function, but it needs to be
829 * accessible from at least sx.h.
830 */
831void
832_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
833{
834	uintptr_t tid, setx;
835	int queue, wakeup_swapper;
836
837	if (SCHEDULER_STOPPED())
838		return;
839
840	tid = (uintptr_t)curthread;
841
842	if (__predict_false(x == tid))
843		x = SX_READ_VALUE(sx);
844
845	MPASS(!(x & SX_LOCK_SHARED));
846
847	if (__predict_false(x & SX_LOCK_RECURSED)) {
848		/* The lock is recursed, unrecurse one level. */
849		if ((--sx->sx_recurse) == 0)
850			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
851		if (LOCK_LOG_TEST(&sx->lock_object, 0))
852			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
853		return;
854	}
855
856	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_WRITER);
857	if (x == tid &&
858	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
859		return;
860
861	if (LOCK_LOG_TEST(&sx->lock_object, 0))
862		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
863
864	sleepq_lock(&sx->lock_object);
865	x = SX_READ_VALUE(sx);
866	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
867
868	/*
869	 * The wake up algorithm here is quite simple and probably not
870	 * ideal.  It gives precedence to shared waiters if they are
871	 * present.  For this condition, we have to preserve the
872	 * state of the exclusive waiters flag.
873	 * If interruptible sleeps left the shared queue empty avoid a
874	 * starvation for the threads sleeping on the exclusive queue by giving
875	 * them precedence and cleaning up the shared waiters bit anyway.
876	 */
877	setx = SX_LOCK_UNLOCKED;
878	queue = SQ_EXCLUSIVE_QUEUE;
879	if ((x & SX_LOCK_SHARED_WAITERS) != 0 &&
880	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
881		queue = SQ_SHARED_QUEUE;
882		setx |= (x & SX_LOCK_EXCLUSIVE_WAITERS);
883	}
884	atomic_store_rel_ptr(&sx->sx_lock, setx);
885
886	/* Wake up all the waiters for the specific queue. */
887	if (LOCK_LOG_TEST(&sx->lock_object, 0))
888		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
889		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
890		    "exclusive");
891
892	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
893	    queue);
894	sleepq_release(&sx->lock_object);
895	if (wakeup_swapper)
896		kick_proc0();
897}
898
899static bool __always_inline
900__sx_slock_try(struct sx *sx, uintptr_t *xp LOCK_FILE_LINE_ARG_DEF)
901{
902
903	/*
904	 * If no other thread has an exclusive lock then try to bump up
905	 * the count of sharers.  Since we have to preserve the state
906	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
907	 * shared lock loop back and retry.
908	 */
909	while (*xp & SX_LOCK_SHARED) {
910		MPASS(!(*xp & SX_LOCK_SHARED_WAITERS));
911		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
912		    *xp + SX_ONE_SHARER)) {
913			if (LOCK_LOG_TEST(&sx->lock_object, 0))
914				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
915				    __func__, sx, (void *)*xp,
916				    (void *)(*xp + SX_ONE_SHARER));
917			return (true);
918		}
919	}
920	return (false);
921}
922
923static int __noinline
924_sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
925{
926	GIANT_DECLARE;
927#ifdef ADAPTIVE_SX
928	volatile struct thread *owner;
929	bool adaptive;
930#endif
931#ifdef LOCK_PROFILING
932	uint64_t waittime = 0;
933	int contested = 0;
934#endif
935	int error = 0;
936#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
937	struct lock_delay_arg lda;
938#endif
939#ifdef KDTRACE_HOOKS
940	u_int sleep_cnt = 0;
941	int64_t sleep_time = 0;
942	int64_t all_time = 0;
943#endif
944#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
945	uintptr_t state = 0;
946#endif
947	int extra_work = 0;
948
949#ifdef KDTRACE_HOOKS
950	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
951		if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
952			goto out_lockstat;
953		extra_work = 1;
954		all_time -= lockstat_nsecs(&sx->lock_object);
955		state = x;
956	}
957#endif
958#ifdef LOCK_PROFILING
959	extra_work = 1;
960	state = x;
961#endif
962
963	if (SCHEDULER_STOPPED())
964		return (0);
965
966#if defined(ADAPTIVE_SX)
967	lock_delay_arg_init(&lda, &sx_delay);
968#elif defined(KDTRACE_HOOKS)
969	lock_delay_arg_init(&lda, NULL);
970#endif
971
972#ifdef ADAPTIVE_SX
973	adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
974#endif
975
976#ifdef HWPMC_HOOKS
977	PMC_SOFT_CALL( , , lock, failed);
978#endif
979	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
980	    &waittime);
981
982#ifndef INVARIANTS
983	GIANT_SAVE(extra_work);
984#endif
985
986	/*
987	 * As with rwlocks, we don't make any attempt to try to block
988	 * shared locks once there is an exclusive waiter.
989	 */
990	for (;;) {
991		if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
992			break;
993#ifdef INVARIANTS
994		GIANT_SAVE(extra_work);
995#endif
996#ifdef KDTRACE_HOOKS
997		lda.spin_cnt++;
998#endif
999
1000#ifdef ADAPTIVE_SX
1001		if (__predict_false(!adaptive))
1002			goto sleepq;
1003		/*
1004		 * If the owner is running on another CPU, spin until
1005		 * the owner stops running or the state of the lock
1006		 * changes.
1007		 */
1008		owner = lv_sx_owner(x);
1009		if (TD_IS_RUNNING(owner)) {
1010			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1011				CTR3(KTR_LOCK,
1012				    "%s: spinning on %p held by %p",
1013				    __func__, sx, owner);
1014			KTR_STATE1(KTR_SCHED, "thread",
1015			    sched_tdname(curthread), "spinning",
1016			    "lockname:\"%s\"", sx->lock_object.lo_name);
1017			do {
1018				lock_delay(&lda);
1019				x = SX_READ_VALUE(sx);
1020				owner = lv_sx_owner(x);
1021			} while (owner != NULL && TD_IS_RUNNING(owner));
1022			KTR_STATE0(KTR_SCHED, "thread",
1023			    sched_tdname(curthread), "running");
1024			continue;
1025		}
1026sleepq:
1027#endif
1028
1029		/*
1030		 * Some other thread already has an exclusive lock, so
1031		 * start the process of blocking.
1032		 */
1033		sleepq_lock(&sx->lock_object);
1034		x = SX_READ_VALUE(sx);
1035retry_sleepq:
1036		/*
1037		 * The lock could have been released while we spun.
1038		 * In this case loop back and retry.
1039		 */
1040		if (x & SX_LOCK_SHARED) {
1041			sleepq_release(&sx->lock_object);
1042			continue;
1043		}
1044
1045#ifdef ADAPTIVE_SX
1046		/*
1047		 * If the owner is running on another CPU, spin until
1048		 * the owner stops running or the state of the lock
1049		 * changes.
1050		 */
1051		if (!(x & SX_LOCK_SHARED) && adaptive) {
1052			owner = (struct thread *)SX_OWNER(x);
1053			if (TD_IS_RUNNING(owner)) {
1054				sleepq_release(&sx->lock_object);
1055				x = SX_READ_VALUE(sx);
1056				continue;
1057			}
1058		}
1059#endif
1060
1061		/*
1062		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
1063		 * fail to set it drop the sleep queue lock and loop
1064		 * back.
1065		 */
1066		if (!(x & SX_LOCK_SHARED_WAITERS)) {
1067			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
1068			    x | SX_LOCK_SHARED_WAITERS))
1069				goto retry_sleepq;
1070			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1071				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
1072				    __func__, sx);
1073		}
1074
1075		/*
1076		 * Since we have been unable to acquire the shared lock,
1077		 * we have to sleep.
1078		 */
1079		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1080			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
1081			    __func__, sx);
1082
1083#ifdef KDTRACE_HOOKS
1084		sleep_time -= lockstat_nsecs(&sx->lock_object);
1085#endif
1086		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
1087		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
1088		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
1089		if (!(opts & SX_INTERRUPTIBLE))
1090			sleepq_wait(&sx->lock_object, 0);
1091		else
1092			error = sleepq_wait_sig(&sx->lock_object, 0);
1093#ifdef KDTRACE_HOOKS
1094		sleep_time += lockstat_nsecs(&sx->lock_object);
1095		sleep_cnt++;
1096#endif
1097		if (error) {
1098			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1099				CTR2(KTR_LOCK,
1100			"%s: interruptible sleep by %p suspended by signal",
1101				    __func__, sx);
1102			break;
1103		}
1104		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1105			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
1106			    __func__, sx);
1107		x = SX_READ_VALUE(sx);
1108	}
1109#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
1110	if (__predict_true(!extra_work))
1111		return (error);
1112#endif
1113#ifdef KDTRACE_HOOKS
1114	all_time += lockstat_nsecs(&sx->lock_object);
1115	if (sleep_time)
1116		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
1117		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
1118		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
1119	if (lda.spin_cnt > sleep_cnt)
1120		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
1121		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
1122		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
1123out_lockstat:
1124#endif
1125	if (error == 0) {
1126		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
1127		    contested, waittime, file, line, LOCKSTAT_READER);
1128	}
1129	GIANT_RESTORE();
1130	return (error);
1131}
1132
1133int
1134_sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
1135{
1136	uintptr_t x;
1137	int error;
1138
1139	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
1140	    !TD_IS_IDLETHREAD(curthread),
1141	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
1142	    curthread, sx->lock_object.lo_name, file, line));
1143	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
1144	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
1145	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
1146
1147	error = 0;
1148	x = SX_READ_VALUE(sx);
1149	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
1150	    !__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG)))
1151		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
1152	else
1153		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0,
1154		    file, line);
1155	if (error == 0) {
1156		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
1157		WITNESS_LOCK(&sx->lock_object, 0, file, line);
1158		TD_LOCKS_INC(curthread);
1159	}
1160	return (error);
1161}
1162
1163int
1164_sx_slock(struct sx *sx, int opts, const char *file, int line)
1165{
1166
1167	return (_sx_slock_int(sx, opts LOCK_FILE_LINE_ARG));
1168}
1169
1170static bool __always_inline
1171_sx_sunlock_try(struct sx *sx, uintptr_t *xp)
1172{
1173
1174	for (;;) {
1175		/*
1176		 * We should never have sharers while at least one thread
1177		 * holds a shared lock.
1178		 */
1179		KASSERT(!(*xp & SX_LOCK_SHARED_WAITERS),
1180		    ("%s: waiting sharers", __func__));
1181
1182		/*
1183		 * See if there is more than one shared lock held.  If
1184		 * so, just drop one and return.
1185		 */
1186		if (SX_SHARERS(*xp) > 1) {
1187			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
1188			    *xp - SX_ONE_SHARER)) {
1189				if (LOCK_LOG_TEST(&sx->lock_object, 0))
1190					CTR4(KTR_LOCK,
1191					    "%s: %p succeeded %p -> %p",
1192					    __func__, sx, (void *)*xp,
1193					    (void *)(*xp - SX_ONE_SHARER));
1194				return (true);
1195			}
1196			continue;
1197		}
1198
1199		/*
1200		 * If there aren't any waiters for an exclusive lock,
1201		 * then try to drop it quickly.
1202		 */
1203		if (!(*xp & SX_LOCK_EXCLUSIVE_WAITERS)) {
1204			MPASS(*xp == SX_SHARERS_LOCK(1));
1205			*xp = SX_SHARERS_LOCK(1);
1206			if (atomic_fcmpset_rel_ptr(&sx->sx_lock,
1207			    xp, SX_LOCK_UNLOCKED)) {
1208				if (LOCK_LOG_TEST(&sx->lock_object, 0))
1209					CTR2(KTR_LOCK, "%s: %p last succeeded",
1210					    __func__, sx);
1211				return (true);
1212			}
1213			continue;
1214		}
1215		break;
1216	}
1217	return (false);
1218}
1219
1220static void __noinline
1221_sx_sunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
1222{
1223	int wakeup_swapper = 0;
1224	uintptr_t setx;
1225
1226	if (SCHEDULER_STOPPED())
1227		return;
1228
1229	if (_sx_sunlock_try(sx, &x))
1230		goto out_lockstat;
1231
1232	/*
1233	 * At this point, there should just be one sharer with
1234	 * exclusive waiters.
1235	 */
1236	MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
1237
1238	sleepq_lock(&sx->lock_object);
1239	x = SX_READ_VALUE(sx);
1240	for (;;) {
1241		MPASS(x & SX_LOCK_EXCLUSIVE_WAITERS);
1242		MPASS(!(x & SX_LOCK_SHARED_WAITERS));
1243		if (_sx_sunlock_try(sx, &x))
1244			break;
1245
1246		/*
1247		 * Wake up semantic here is quite simple:
1248		 * Just wake up all the exclusive waiters.
1249		 * Note that the state of the lock could have changed,
1250		 * so if it fails loop back and retry.
1251		 */
1252		setx = x - SX_ONE_SHARER;
1253		setx &= ~SX_LOCK_EXCLUSIVE_WAITERS;
1254		if (!atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, setx))
1255			continue;
1256		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1257			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
1258			    "exclusive queue", __func__, sx);
1259		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
1260		    0, SQ_EXCLUSIVE_QUEUE);
1261		break;
1262	}
1263	sleepq_release(&sx->lock_object);
1264	if (wakeup_swapper)
1265		kick_proc0();
1266out_lockstat:
1267	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
1268}
1269
1270void
1271_sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
1272{
1273	uintptr_t x;
1274
1275	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
1276	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
1277	_sx_assert(sx, SA_SLOCKED, file, line);
1278	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
1279	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
1280
1281	x = SX_READ_VALUE(sx);
1282	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
1283	    !_sx_sunlock_try(sx, &x)))
1284		_sx_sunlock_hard(sx, x LOCK_FILE_LINE_ARG);
1285	else
1286		lock_profile_release_lock(&sx->lock_object);
1287
1288	TD_LOCKS_DEC(curthread);
1289}
1290
1291void
1292_sx_sunlock(struct sx *sx, const char *file, int line)
1293{
1294
1295	_sx_sunlock_int(sx LOCK_FILE_LINE_ARG);
1296}
1297
1298#ifdef INVARIANT_SUPPORT
1299#ifndef INVARIANTS
1300#undef	_sx_assert
1301#endif
1302
1303/*
1304 * In the non-WITNESS case, sx_assert() can only detect that at least
1305 * *some* thread owns an slock, but it cannot guarantee that *this*
1306 * thread owns an slock.
1307 */
1308void
1309_sx_assert(const struct sx *sx, int what, const char *file, int line)
1310{
1311#ifndef WITNESS
1312	int slocked = 0;
1313#endif
1314
1315	if (SCHEDULER_STOPPED())
1316		return;
1317	switch (what) {
1318	case SA_SLOCKED:
1319	case SA_SLOCKED | SA_NOTRECURSED:
1320	case SA_SLOCKED | SA_RECURSED:
1321#ifndef WITNESS
1322		slocked = 1;
1323		/* FALLTHROUGH */
1324#endif
1325	case SA_LOCKED:
1326	case SA_LOCKED | SA_NOTRECURSED:
1327	case SA_LOCKED | SA_RECURSED:
1328#ifdef WITNESS
1329		witness_assert(&sx->lock_object, what, file, line);
1330#else
1331		/*
1332		 * If some other thread has an exclusive lock or we
1333		 * have one and are asserting a shared lock, fail.
1334		 * Also, if no one has a lock at all, fail.
1335		 */
1336		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
1337		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
1338		    sx_xholder(sx) != curthread)))
1339			panic("Lock %s not %slocked @ %s:%d\n",
1340			    sx->lock_object.lo_name, slocked ? "share " : "",
1341			    file, line);
1342
1343		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
1344			if (sx_recursed(sx)) {
1345				if (what & SA_NOTRECURSED)
1346					panic("Lock %s recursed @ %s:%d\n",
1347					    sx->lock_object.lo_name, file,
1348					    line);
1349			} else if (what & SA_RECURSED)
1350				panic("Lock %s not recursed @ %s:%d\n",
1351				    sx->lock_object.lo_name, file, line);
1352		}
1353#endif
1354		break;
1355	case SA_XLOCKED:
1356	case SA_XLOCKED | SA_NOTRECURSED:
1357	case SA_XLOCKED | SA_RECURSED:
1358		if (sx_xholder(sx) != curthread)
1359			panic("Lock %s not exclusively locked @ %s:%d\n",
1360			    sx->lock_object.lo_name, file, line);
1361		if (sx_recursed(sx)) {
1362			if (what & SA_NOTRECURSED)
1363				panic("Lock %s recursed @ %s:%d\n",
1364				    sx->lock_object.lo_name, file, line);
1365		} else if (what & SA_RECURSED)
1366			panic("Lock %s not recursed @ %s:%d\n",
1367			    sx->lock_object.lo_name, file, line);
1368		break;
1369	case SA_UNLOCKED:
1370#ifdef WITNESS
1371		witness_assert(&sx->lock_object, what, file, line);
1372#else
1373		/*
1374		 * If we hold an exclusve lock fail.  We can't
1375		 * reliably check to see if we hold a shared lock or
1376		 * not.
1377		 */
1378		if (sx_xholder(sx) == curthread)
1379			panic("Lock %s exclusively locked @ %s:%d\n",
1380			    sx->lock_object.lo_name, file, line);
1381#endif
1382		break;
1383	default:
1384		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
1385		    line);
1386	}
1387}
1388#endif	/* INVARIANT_SUPPORT */
1389
1390#ifdef DDB
1391static void
1392db_show_sx(const struct lock_object *lock)
1393{
1394	struct thread *td;
1395	const struct sx *sx;
1396
1397	sx = (const struct sx *)lock;
1398
1399	db_printf(" state: ");
1400	if (sx->sx_lock == SX_LOCK_UNLOCKED)
1401		db_printf("UNLOCKED\n");
1402	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
1403		db_printf("DESTROYED\n");
1404		return;
1405	} else if (sx->sx_lock & SX_LOCK_SHARED)
1406		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
1407	else {
1408		td = sx_xholder(sx);
1409		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1410		    td->td_tid, td->td_proc->p_pid, td->td_name);
1411		if (sx_recursed(sx))
1412			db_printf(" recursed: %d\n", sx->sx_recurse);
1413	}
1414
1415	db_printf(" waiters: ");
1416	switch(sx->sx_lock &
1417	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
1418	case SX_LOCK_SHARED_WAITERS:
1419		db_printf("shared\n");
1420		break;
1421	case SX_LOCK_EXCLUSIVE_WAITERS:
1422		db_printf("exclusive\n");
1423		break;
1424	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
1425		db_printf("exclusive and shared\n");
1426		break;
1427	default:
1428		db_printf("none\n");
1429	}
1430}
1431
1432/*
1433 * Check to see if a thread that is blocked on a sleep queue is actually
1434 * blocked on an sx lock.  If so, output some details and return true.
1435 * If the lock has an exclusive owner, return that in *ownerp.
1436 */
1437int
1438sx_chain(struct thread *td, struct thread **ownerp)
1439{
1440	struct sx *sx;
1441
1442	/*
1443	 * Check to see if this thread is blocked on an sx lock.
1444	 * First, we check the lock class.  If that is ok, then we
1445	 * compare the lock name against the wait message.
1446	 */
1447	sx = td->td_wchan;
1448	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
1449	    sx->lock_object.lo_name != td->td_wmesg)
1450		return (0);
1451
1452	/* We think we have an sx lock, so output some details. */
1453	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
1454	*ownerp = sx_xholder(sx);
1455	if (sx->sx_lock & SX_LOCK_SHARED)
1456		db_printf("SLOCK (count %ju)\n",
1457		    (uintmax_t)SX_SHARERS(sx->sx_lock));
1458	else
1459		db_printf("XLOCK\n");
1460	return (1);
1461}
1462#endif
1463