1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
5 * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice(s), this list of conditions and the following disclaimer as
13 *    the first lines of this file unmodified other than the possible
14 *    addition of one or more copyright notices.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice(s), this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 * DAMAGE.
30 */
31
32/*
33 * Shared/exclusive locks.  This implementation attempts to ensure
34 * deterministic lock granting behavior, so that slocks and xlocks are
35 * interleaved.
36 *
37 * Priority propagation will not generally raise the priority of lock holders,
38 * so should not be relied upon in combination with sx locks.
39 */
40
41#include "opt_ddb.h"
42#include "opt_hwpmc_hooks.h"
43#include "opt_no_adaptive_sx.h"
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD$");
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/kdb.h>
51#include <sys/kernel.h>
52#include <sys/ktr.h>
53#include <sys/lock.h>
54#include <sys/mutex.h>
55#include <sys/proc.h>
56#include <sys/sched.h>
57#include <sys/sleepqueue.h>
58#include <sys/sx.h>
59#include <sys/smp.h>
60#include <sys/sysctl.h>
61
62#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
63#include <machine/cpu.h>
64#endif
65
66#ifdef DDB
67#include <ddb/ddb.h>
68#endif
69
70#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
71#define	ADAPTIVE_SX
72#endif
73
74CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
75
76#ifdef HWPMC_HOOKS
77#include <sys/pmckern.h>
78PMC_SOFT_DECLARE( , , lock, failed);
79#endif
80
81/* Handy macros for sleep queues. */
82#define	SQ_EXCLUSIVE_QUEUE	0
83#define	SQ_SHARED_QUEUE		1
84
85/*
86 * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
87 * drop Giant anytime we have to sleep or if we adaptively spin.
88 */
89#define	GIANT_DECLARE							\
90	int _giantcnt = 0;						\
91	WITNESS_SAVE_DECL(Giant)					\
92
93#define	GIANT_SAVE(work) do {						\
94	if (__predict_false(mtx_owned(&Giant))) {			\
95		work++;							\
96		WITNESS_SAVE(&Giant.lock_object, Giant);		\
97		while (mtx_owned(&Giant)) {				\
98			_giantcnt++;					\
99			mtx_unlock(&Giant);				\
100		}							\
101	}								\
102} while (0)
103
104#define GIANT_RESTORE() do {						\
105	if (_giantcnt > 0) {						\
106		mtx_assert(&Giant, MA_NOTOWNED);			\
107		while (_giantcnt--)					\
108			mtx_lock(&Giant);				\
109		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
110	}								\
111} while (0)
112
113/*
114 * Returns true if an exclusive lock is recursed.  It assumes
115 * curthread currently has an exclusive lock.
116 */
117#define	sx_recursed(sx)		((sx)->sx_recurse != 0)
118
119static void	assert_sx(const struct lock_object *lock, int what);
120#ifdef DDB
121static void	db_show_sx(const struct lock_object *lock);
122#endif
123static void	lock_sx(struct lock_object *lock, uintptr_t how);
124#ifdef KDTRACE_HOOKS
125static int	owner_sx(const struct lock_object *lock, struct thread **owner);
126#endif
127static uintptr_t unlock_sx(struct lock_object *lock);
128
129struct lock_class lock_class_sx = {
130	.lc_name = "sx",
131	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
132	.lc_assert = assert_sx,
133#ifdef DDB
134	.lc_ddb_show = db_show_sx,
135#endif
136	.lc_lock = lock_sx,
137	.lc_unlock = unlock_sx,
138#ifdef KDTRACE_HOOKS
139	.lc_owner = owner_sx,
140#endif
141};
142
143#ifndef INVARIANTS
144#define	_sx_assert(sx, what, file, line)
145#endif
146
147#ifdef ADAPTIVE_SX
148static __read_frequently u_int asx_retries;
149static __read_frequently u_int asx_loops;
150static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
151SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
152SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
153
154static struct lock_delay_config __read_frequently sx_delay;
155
156SYSCTL_INT(_debug_sx, OID_AUTO, delay_base, CTLFLAG_RW, &sx_delay.base,
157    0, "");
158SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
159    0, "");
160
161static void
162sx_lock_delay_init(void *arg __unused)
163{
164
165	lock_delay_default_init(&sx_delay);
166	asx_retries = 10;
167	asx_loops = max(10000, sx_delay.max);
168}
169LOCK_DELAY_SYSINIT(sx_lock_delay_init);
170#endif
171
172void
173assert_sx(const struct lock_object *lock, int what)
174{
175
176	sx_assert((const struct sx *)lock, what);
177}
178
179void
180lock_sx(struct lock_object *lock, uintptr_t how)
181{
182	struct sx *sx;
183
184	sx = (struct sx *)lock;
185	if (how)
186		sx_slock(sx);
187	else
188		sx_xlock(sx);
189}
190
191uintptr_t
192unlock_sx(struct lock_object *lock)
193{
194	struct sx *sx;
195
196	sx = (struct sx *)lock;
197	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
198	if (sx_xlocked(sx)) {
199		sx_xunlock(sx);
200		return (0);
201	} else {
202		sx_sunlock(sx);
203		return (1);
204	}
205}
206
207#ifdef KDTRACE_HOOKS
208int
209owner_sx(const struct lock_object *lock, struct thread **owner)
210{
211	const struct sx *sx;
212	uintptr_t x;
213
214	sx = (const struct sx *)lock;
215	x = sx->sx_lock;
216	*owner = NULL;
217	return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
218	    ((*owner = (struct thread *)SX_OWNER(x)) != NULL));
219}
220#endif
221
222void
223sx_sysinit(void *arg)
224{
225	struct sx_args *sargs = arg;
226
227	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
228}
229
230void
231sx_init_flags(struct sx *sx, const char *description, int opts)
232{
233	int flags;
234
235	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
236	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
237	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
238	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
239	    &sx->sx_lock));
240
241	flags = LO_SLEEPABLE | LO_UPGRADABLE;
242	if (opts & SX_DUPOK)
243		flags |= LO_DUPOK;
244	if (opts & SX_NOPROFILE)
245		flags |= LO_NOPROFILE;
246	if (!(opts & SX_NOWITNESS))
247		flags |= LO_WITNESS;
248	if (opts & SX_RECURSE)
249		flags |= LO_RECURSABLE;
250	if (opts & SX_QUIET)
251		flags |= LO_QUIET;
252	if (opts & SX_NEW)
253		flags |= LO_NEW;
254
255	flags |= opts & SX_NOADAPTIVE;
256	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
257	sx->sx_lock = SX_LOCK_UNLOCKED;
258	sx->sx_recurse = 0;
259}
260
261void
262sx_destroy(struct sx *sx)
263{
264
265	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
266	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
267	sx->sx_lock = SX_LOCK_DESTROYED;
268	lock_destroy(&sx->lock_object);
269}
270
271int
272sx_try_slock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
273{
274	uintptr_t x;
275
276	if (SCHEDULER_STOPPED())
277		return (1);
278
279	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
280	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
281	    curthread, sx->lock_object.lo_name, file, line));
282
283	x = sx->sx_lock;
284	for (;;) {
285		KASSERT(x != SX_LOCK_DESTROYED,
286		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
287		if (!(x & SX_LOCK_SHARED))
288			break;
289		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, x + SX_ONE_SHARER)) {
290			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
291			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
292			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
293			    sx, 0, 0, file, line, LOCKSTAT_READER);
294			TD_LOCKS_INC(curthread);
295			curthread->td_sx_slocks++;
296			return (1);
297		}
298	}
299
300	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
301	return (0);
302}
303
304int
305sx_try_slock_(struct sx *sx, const char *file, int line)
306{
307
308	return (sx_try_slock_int(sx LOCK_FILE_LINE_ARG));
309}
310
311int
312_sx_xlock(struct sx *sx, int opts, const char *file, int line)
313{
314	uintptr_t tid, x;
315	int error = 0;
316
317	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
318	    !TD_IS_IDLETHREAD(curthread),
319	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
320	    curthread, sx->lock_object.lo_name, file, line));
321	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
322	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
323	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
324	    line, NULL);
325	tid = (uintptr_t)curthread;
326	x = SX_LOCK_UNLOCKED;
327	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
328		error = _sx_xlock_hard(sx, x, opts LOCK_FILE_LINE_ARG);
329	else
330		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
331		    0, 0, file, line, LOCKSTAT_WRITER);
332	if (!error) {
333		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
334		    file, line);
335		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
336		TD_LOCKS_INC(curthread);
337	}
338
339	return (error);
340}
341
342int
343sx_try_xlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
344{
345	struct thread *td;
346	uintptr_t tid, x;
347	int rval;
348	bool recursed;
349
350	td = curthread;
351	tid = (uintptr_t)td;
352	if (SCHEDULER_STOPPED_TD(td))
353		return (1);
354
355	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
356	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
357	    curthread, sx->lock_object.lo_name, file, line));
358	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
359	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
360
361	rval = 1;
362	recursed = false;
363	x = SX_LOCK_UNLOCKED;
364	for (;;) {
365		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
366			break;
367		if (x == SX_LOCK_UNLOCKED)
368			continue;
369		if (x == tid && (sx->lock_object.lo_flags & LO_RECURSABLE)) {
370			sx->sx_recurse++;
371			atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
372			break;
373		}
374		rval = 0;
375		break;
376	}
377
378	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
379	if (rval) {
380		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
381		    file, line);
382		if (!recursed)
383			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
384			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
385		TD_LOCKS_INC(curthread);
386	}
387
388	return (rval);
389}
390
391int
392sx_try_xlock_(struct sx *sx, const char *file, int line)
393{
394
395	return (sx_try_xlock_int(sx LOCK_FILE_LINE_ARG));
396}
397
398void
399_sx_xunlock(struct sx *sx, const char *file, int line)
400{
401
402	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
403	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
404	_sx_assert(sx, SA_XLOCKED, file, line);
405	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
406	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
407	    line);
408#if LOCK_DEBUG > 0
409	_sx_xunlock_hard(sx, (uintptr_t)curthread, file, line);
410#else
411	__sx_xunlock(sx, curthread, file, line);
412#endif
413	TD_LOCKS_DEC(curthread);
414}
415
416/*
417 * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
418 * This will only succeed if this thread holds a single shared lock.
419 * Return 1 if if the upgrade succeed, 0 otherwise.
420 */
421int
422sx_try_upgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
423{
424	uintptr_t x;
425	uintptr_t waiters;
426	int success;
427
428	if (SCHEDULER_STOPPED())
429		return (1);
430
431	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
432	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
433	_sx_assert(sx, SA_SLOCKED, file, line);
434
435	/*
436	 * Try to switch from one shared lock to an exclusive lock.  We need
437	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
438	 * we will wake up the exclusive waiters when we drop the lock.
439	 */
440	success = 0;
441	x = SX_READ_VALUE(sx);
442	for (;;) {
443		if (SX_SHARERS(x) > 1)
444			break;
445		waiters = (x & SX_LOCK_WAITERS);
446		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
447		    (uintptr_t)curthread | waiters)) {
448			success = 1;
449			break;
450		}
451	}
452	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
453	if (success) {
454		curthread->td_sx_slocks--;
455		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
456		    file, line);
457		LOCKSTAT_RECORD0(sx__upgrade, sx);
458	}
459	return (success);
460}
461
462int
463sx_try_upgrade_(struct sx *sx, const char *file, int line)
464{
465
466	return (sx_try_upgrade_int(sx LOCK_FILE_LINE_ARG));
467}
468
469/*
470 * Downgrade an unrecursed exclusive lock into a single shared lock.
471 */
472void
473sx_downgrade_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
474{
475	uintptr_t x;
476	int wakeup_swapper;
477
478	if (SCHEDULER_STOPPED())
479		return;
480
481	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
482	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
483	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
484#ifndef INVARIANTS
485	if (sx_recursed(sx))
486		panic("downgrade of a recursed lock");
487#endif
488
489	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
490
491	/*
492	 * Try to switch from an exclusive lock with no shared waiters
493	 * to one sharer with no shared waiters.  If there are
494	 * exclusive waiters, we don't need to lock the sleep queue so
495	 * long as we preserve the flag.  We do one quick try and if
496	 * that fails we grab the sleepq lock to keep the flags from
497	 * changing and do it the slow way.
498	 *
499	 * We have to lock the sleep queue if there are shared waiters
500	 * so we can wake them up.
501	 */
502	x = sx->sx_lock;
503	if (!(x & SX_LOCK_SHARED_WAITERS) &&
504	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
505	    (x & SX_LOCK_EXCLUSIVE_WAITERS)))
506		goto out;
507
508	/*
509	 * Lock the sleep queue so we can read the waiters bits
510	 * without any races and wakeup any shared waiters.
511	 */
512	sleepq_lock(&sx->lock_object);
513
514	/*
515	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
516	 * shared lock.  If there are any shared waiters, wake them up.
517	 */
518	wakeup_swapper = 0;
519	x = sx->sx_lock;
520	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
521	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
522	if (x & SX_LOCK_SHARED_WAITERS)
523		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
524		    0, SQ_SHARED_QUEUE);
525	sleepq_release(&sx->lock_object);
526
527	if (wakeup_swapper)
528		kick_proc0();
529
530out:
531	curthread->td_sx_slocks++;
532	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
533	LOCKSTAT_RECORD0(sx__downgrade, sx);
534}
535
536void
537sx_downgrade_(struct sx *sx, const char *file, int line)
538{
539
540	sx_downgrade_int(sx LOCK_FILE_LINE_ARG);
541}
542
543#ifdef	ADAPTIVE_SX
544static inline void
545sx_drop_critical(uintptr_t x, bool *in_critical, int *extra_work)
546{
547
548	if (x & SX_LOCK_WRITE_SPINNER)
549		return;
550	if (*in_critical) {
551		critical_exit();
552		*in_critical = false;
553		(*extra_work)--;
554	}
555}
556#else
557#define sx_drop_critical(x, in_critical, extra_work) do { } while(0)
558#endif
559
560/*
561 * This function represents the so-called 'hard case' for sx_xlock
562 * operation.  All 'easy case' failures are redirected to this.  Note
563 * that ideally this would be a static function, but it needs to be
564 * accessible from at least sx.h.
565 */
566int
567_sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
568{
569	GIANT_DECLARE;
570	uintptr_t tid, setx;
571#ifdef ADAPTIVE_SX
572	volatile struct thread *owner;
573	u_int i, n, spintries = 0;
574	enum { READERS, WRITER } sleep_reason = READERS;
575	bool adaptive;
576	bool in_critical = false;
577#endif
578#ifdef LOCK_PROFILING
579	uint64_t waittime = 0;
580	int contested = 0;
581#endif
582	int error = 0;
583#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
584	struct lock_delay_arg lda;
585#endif
586#ifdef	KDTRACE_HOOKS
587	u_int sleep_cnt = 0;
588	int64_t sleep_time = 0;
589	int64_t all_time = 0;
590#endif
591#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
592	uintptr_t state = 0;
593	int doing_lockprof = 0;
594#endif
595	int extra_work = 0;
596
597	tid = (uintptr_t)curthread;
598
599#ifdef KDTRACE_HOOKS
600	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
601		while (x == SX_LOCK_UNLOCKED) {
602			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
603				goto out_lockstat;
604		}
605		extra_work = 1;
606		doing_lockprof = 1;
607		all_time -= lockstat_nsecs(&sx->lock_object);
608		state = x;
609	}
610#endif
611#ifdef LOCK_PROFILING
612	extra_work = 1;
613	doing_lockprof = 1;
614	state = x;
615#endif
616
617	if (SCHEDULER_STOPPED())
618		return (0);
619
620#if defined(ADAPTIVE_SX)
621	lock_delay_arg_init(&lda, &sx_delay);
622#elif defined(KDTRACE_HOOKS)
623	lock_delay_arg_init(&lda, NULL);
624#endif
625
626	if (__predict_false(x == SX_LOCK_UNLOCKED))
627		x = SX_READ_VALUE(sx);
628
629	/* If we already hold an exclusive lock, then recurse. */
630	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
631		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
632	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
633		    sx->lock_object.lo_name, file, line));
634		sx->sx_recurse++;
635		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
636		if (LOCK_LOG_TEST(&sx->lock_object, 0))
637			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
638		return (0);
639	}
640
641	if (LOCK_LOG_TEST(&sx->lock_object, 0))
642		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
643		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
644
645#ifdef ADAPTIVE_SX
646	adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
647#endif
648
649#ifdef HWPMC_HOOKS
650	PMC_SOFT_CALL( , , lock, failed);
651#endif
652	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
653	    &waittime);
654
655#ifndef INVARIANTS
656	GIANT_SAVE(extra_work);
657#endif
658
659	for (;;) {
660		if (x == SX_LOCK_UNLOCKED) {
661			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
662				break;
663			continue;
664		}
665#ifdef INVARIANTS
666		GIANT_SAVE(extra_work);
667#endif
668#ifdef KDTRACE_HOOKS
669		lda.spin_cnt++;
670#endif
671#ifdef ADAPTIVE_SX
672		if (__predict_false(!adaptive))
673			goto sleepq;
674		/*
675		 * If the lock is write locked and the owner is
676		 * running on another CPU, spin until the owner stops
677		 * running or the state of the lock changes.
678		 */
679		if ((x & SX_LOCK_SHARED) == 0) {
680			sx_drop_critical(x, &in_critical, &extra_work);
681			sleep_reason = WRITER;
682			owner = lv_sx_owner(x);
683			if (!TD_IS_RUNNING(owner))
684				goto sleepq;
685			if (LOCK_LOG_TEST(&sx->lock_object, 0))
686				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
687				    __func__, sx, owner);
688			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
689			    "spinning", "lockname:\"%s\"",
690			    sx->lock_object.lo_name);
691			do {
692				lock_delay(&lda);
693				x = SX_READ_VALUE(sx);
694				owner = lv_sx_owner(x);
695			} while (owner != NULL && TD_IS_RUNNING(owner));
696			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
697			    "running");
698			continue;
699		} else if (SX_SHARERS(x) > 0) {
700			sleep_reason = READERS;
701			if (spintries == asx_retries)
702				goto sleepq;
703			if (!(x & SX_LOCK_WRITE_SPINNER)) {
704				if (!in_critical) {
705					critical_enter();
706					in_critical = true;
707					extra_work++;
708				}
709				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
710				    x | SX_LOCK_WRITE_SPINNER)) {
711					critical_exit();
712					in_critical = false;
713					extra_work--;
714					continue;
715				}
716			}
717			spintries++;
718			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
719			    "spinning", "lockname:\"%s\"",
720			    sx->lock_object.lo_name);
721			n = SX_SHARERS(x);
722			for (i = 0; i < asx_loops; i += n) {
723				lock_delay_spin(n);
724				x = SX_READ_VALUE(sx);
725				if (!(x & SX_LOCK_WRITE_SPINNER))
726					break;
727				if (!(x & SX_LOCK_SHARED))
728					break;
729				n = SX_SHARERS(x);
730				if (n == 0)
731					break;
732			}
733#ifdef KDTRACE_HOOKS
734			lda.spin_cnt += i;
735#endif
736			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
737			    "running");
738			if (i < asx_loops)
739				continue;
740		}
741sleepq:
742#endif
743		sleepq_lock(&sx->lock_object);
744		x = SX_READ_VALUE(sx);
745retry_sleepq:
746
747		/*
748		 * If the lock was released while spinning on the
749		 * sleep queue chain lock, try again.
750		 */
751		if (x == SX_LOCK_UNLOCKED) {
752			sleepq_release(&sx->lock_object);
753			sx_drop_critical(x, &in_critical, &extra_work);
754			continue;
755		}
756
757#ifdef ADAPTIVE_SX
758		/*
759		 * The current lock owner might have started executing
760		 * on another CPU (or the lock could have changed
761		 * owners) while we were waiting on the sleep queue
762		 * chain lock.  If so, drop the sleep queue lock and try
763		 * again.
764		 */
765		if (adaptive) {
766			if (!(x & SX_LOCK_SHARED)) {
767				owner = (struct thread *)SX_OWNER(x);
768				if (TD_IS_RUNNING(owner)) {
769					sleepq_release(&sx->lock_object);
770					sx_drop_critical(x, &in_critical,
771					    &extra_work);
772					continue;
773				}
774			} else if (SX_SHARERS(x) > 0 && sleep_reason == WRITER) {
775				sleepq_release(&sx->lock_object);
776				sx_drop_critical(x, &in_critical, &extra_work);
777				continue;
778			}
779		}
780#endif
781
782		/*
783		 * If an exclusive lock was released with both shared
784		 * and exclusive waiters and a shared waiter hasn't
785		 * woken up and acquired the lock yet, sx_lock will be
786		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
787		 * If we see that value, try to acquire it once.  Note
788		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
789		 * as there are other exclusive waiters still.  If we
790		 * fail, restart the loop.
791		 */
792		setx = x & (SX_LOCK_WAITERS | SX_LOCK_WRITE_SPINNER);
793		if ((x & ~setx) == SX_LOCK_SHARED) {
794			setx &= ~SX_LOCK_WRITE_SPINNER;
795			if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid | setx))
796				goto retry_sleepq;
797			sleepq_release(&sx->lock_object);
798			CTR2(KTR_LOCK, "%s: %p claimed by new writer",
799			    __func__, sx);
800			break;
801		}
802
803#ifdef ADAPTIVE_SX
804		/*
805		 * It is possible we set the SX_LOCK_WRITE_SPINNER bit.
806		 * It is an invariant that when the bit is set, there is
807		 * a writer ready to grab the lock. Thus clear the bit since
808		 * we are going to sleep.
809		 */
810		if (in_critical) {
811			if ((x & SX_LOCK_WRITE_SPINNER) ||
812			    !((x & SX_LOCK_EXCLUSIVE_WAITERS))) {
813				setx = x & ~SX_LOCK_WRITE_SPINNER;
814				setx |= SX_LOCK_EXCLUSIVE_WAITERS;
815				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
816				    setx)) {
817					goto retry_sleepq;
818				}
819			}
820			critical_exit();
821			in_critical = false;
822		} else {
823#endif
824			/*
825			 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
826			 * than loop back and retry.
827			 */
828			if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
829				if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
830				    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
831					goto retry_sleepq;
832				}
833				if (LOCK_LOG_TEST(&sx->lock_object, 0))
834					CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
835					    __func__, sx);
836			}
837#ifdef ADAPTIVE_SX
838		}
839#endif
840
841		/*
842		 * Since we have been unable to acquire the exclusive
843		 * lock and the exclusive waiters flag is set, we have
844		 * to sleep.
845		 */
846		if (LOCK_LOG_TEST(&sx->lock_object, 0))
847			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
848			    __func__, sx);
849
850#ifdef KDTRACE_HOOKS
851		sleep_time -= lockstat_nsecs(&sx->lock_object);
852#endif
853		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
854		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
855		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
856		if (!(opts & SX_INTERRUPTIBLE))
857			sleepq_wait(&sx->lock_object, 0);
858		else
859			error = sleepq_wait_sig(&sx->lock_object, 0);
860#ifdef KDTRACE_HOOKS
861		sleep_time += lockstat_nsecs(&sx->lock_object);
862		sleep_cnt++;
863#endif
864		if (error) {
865			if (LOCK_LOG_TEST(&sx->lock_object, 0))
866				CTR2(KTR_LOCK,
867			"%s: interruptible sleep by %p suspended by signal",
868				    __func__, sx);
869			break;
870		}
871		if (LOCK_LOG_TEST(&sx->lock_object, 0))
872			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
873			    __func__, sx);
874		x = SX_READ_VALUE(sx);
875	}
876	if (__predict_true(!extra_work))
877		return (error);
878#ifdef ADAPTIVE_SX
879	if (in_critical)
880		critical_exit();
881#endif
882	GIANT_RESTORE();
883#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
884	if (__predict_true(!doing_lockprof))
885		return (error);
886#endif
887#ifdef KDTRACE_HOOKS
888	all_time += lockstat_nsecs(&sx->lock_object);
889	if (sleep_time)
890		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
891		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
892		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
893	if (lda.spin_cnt > sleep_cnt)
894		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
895		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
896		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
897out_lockstat:
898#endif
899	if (!error)
900		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
901		    contested, waittime, file, line, LOCKSTAT_WRITER);
902	return (error);
903}
904
905/*
906 * This function represents the so-called 'hard case' for sx_xunlock
907 * operation.  All 'easy case' failures are redirected to this.  Note
908 * that ideally this would be a static function, but it needs to be
909 * accessible from at least sx.h.
910 */
911void
912_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
913{
914	uintptr_t tid, setx;
915	int queue, wakeup_swapper;
916
917	if (SCHEDULER_STOPPED())
918		return;
919
920	tid = (uintptr_t)curthread;
921
922	if (__predict_false(x == tid))
923		x = SX_READ_VALUE(sx);
924
925	MPASS(!(x & SX_LOCK_SHARED));
926
927	if (__predict_false(x & SX_LOCK_RECURSED)) {
928		/* The lock is recursed, unrecurse one level. */
929		if ((--sx->sx_recurse) == 0)
930			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
931		if (LOCK_LOG_TEST(&sx->lock_object, 0))
932			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
933		return;
934	}
935
936	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_WRITER);
937	if (x == tid &&
938	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
939		return;
940
941	if (LOCK_LOG_TEST(&sx->lock_object, 0))
942		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
943
944	sleepq_lock(&sx->lock_object);
945	x = SX_READ_VALUE(sx);
946	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
947
948	/*
949	 * The wake up algorithm here is quite simple and probably not
950	 * ideal.  It gives precedence to shared waiters if they are
951	 * present.  For this condition, we have to preserve the
952	 * state of the exclusive waiters flag.
953	 * If interruptible sleeps left the shared queue empty avoid a
954	 * starvation for the threads sleeping on the exclusive queue by giving
955	 * them precedence and cleaning up the shared waiters bit anyway.
956	 */
957	setx = SX_LOCK_UNLOCKED;
958	queue = SQ_SHARED_QUEUE;
959	if ((x & SX_LOCK_EXCLUSIVE_WAITERS) != 0 &&
960	    sleepq_sleepcnt(&sx->lock_object, SQ_EXCLUSIVE_QUEUE) != 0) {
961		queue = SQ_EXCLUSIVE_QUEUE;
962		setx |= (x & SX_LOCK_SHARED_WAITERS);
963	}
964	atomic_store_rel_ptr(&sx->sx_lock, setx);
965
966	/* Wake up all the waiters for the specific queue. */
967	if (LOCK_LOG_TEST(&sx->lock_object, 0))
968		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
969		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
970		    "exclusive");
971
972	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
973	    queue);
974	sleepq_release(&sx->lock_object);
975	if (wakeup_swapper)
976		kick_proc0();
977}
978
979static bool __always_inline
980__sx_can_read(struct thread *td, uintptr_t x, bool fp)
981{
982
983	if ((x & (SX_LOCK_SHARED | SX_LOCK_EXCLUSIVE_WAITERS | SX_LOCK_WRITE_SPINNER))
984			== SX_LOCK_SHARED)
985		return (true);
986	if (!fp && td->td_sx_slocks && (x & SX_LOCK_SHARED))
987		return (true);
988	return (false);
989}
990
991static bool __always_inline
992__sx_slock_try(struct sx *sx, struct thread *td, uintptr_t *xp, bool fp
993    LOCK_FILE_LINE_ARG_DEF)
994{
995
996	/*
997	 * If no other thread has an exclusive lock then try to bump up
998	 * the count of sharers.  Since we have to preserve the state
999	 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
1000	 * shared lock loop back and retry.
1001	 */
1002	while (__sx_can_read(td, *xp, fp)) {
1003		if (atomic_fcmpset_acq_ptr(&sx->sx_lock, xp,
1004		    *xp + SX_ONE_SHARER)) {
1005			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1006				CTR4(KTR_LOCK, "%s: %p succeed %p -> %p",
1007				    __func__, sx, (void *)*xp,
1008				    (void *)(*xp + SX_ONE_SHARER));
1009			td->td_sx_slocks++;
1010			return (true);
1011		}
1012	}
1013	return (false);
1014}
1015
1016static int __noinline
1017_sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
1018{
1019	GIANT_DECLARE;
1020	struct thread *td;
1021#ifdef ADAPTIVE_SX
1022	volatile struct thread *owner;
1023	u_int i, n, spintries = 0;
1024	bool adaptive;
1025#endif
1026#ifdef LOCK_PROFILING
1027	uint64_t waittime = 0;
1028	int contested = 0;
1029#endif
1030	int error = 0;
1031#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
1032	struct lock_delay_arg lda;
1033#endif
1034#ifdef KDTRACE_HOOKS
1035	u_int sleep_cnt = 0;
1036	int64_t sleep_time = 0;
1037	int64_t all_time = 0;
1038#endif
1039#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
1040	uintptr_t state = 0;
1041#endif
1042	int extra_work = 0;
1043
1044	td = curthread;
1045
1046#ifdef KDTRACE_HOOKS
1047	if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
1048		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
1049			goto out_lockstat;
1050		extra_work = 1;
1051		all_time -= lockstat_nsecs(&sx->lock_object);
1052		state = x;
1053	}
1054#endif
1055#ifdef LOCK_PROFILING
1056	extra_work = 1;
1057	state = x;
1058#endif
1059
1060	if (SCHEDULER_STOPPED())
1061		return (0);
1062
1063#if defined(ADAPTIVE_SX)
1064	lock_delay_arg_init(&lda, &sx_delay);
1065#elif defined(KDTRACE_HOOKS)
1066	lock_delay_arg_init(&lda, NULL);
1067#endif
1068
1069#ifdef ADAPTIVE_SX
1070	adaptive = ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0);
1071#endif
1072
1073#ifdef HWPMC_HOOKS
1074	PMC_SOFT_CALL( , , lock, failed);
1075#endif
1076	lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
1077	    &waittime);
1078
1079#ifndef INVARIANTS
1080	GIANT_SAVE(extra_work);
1081#endif
1082
1083	/*
1084	 * As with rwlocks, we don't make any attempt to try to block
1085	 * shared locks once there is an exclusive waiter.
1086	 */
1087	for (;;) {
1088		if (__sx_slock_try(sx, td, &x, false LOCK_FILE_LINE_ARG))
1089			break;
1090#ifdef INVARIANTS
1091		GIANT_SAVE(extra_work);
1092#endif
1093#ifdef KDTRACE_HOOKS
1094		lda.spin_cnt++;
1095#endif
1096
1097#ifdef ADAPTIVE_SX
1098		if (__predict_false(!adaptive))
1099			goto sleepq;
1100
1101		/*
1102		 * If the owner is running on another CPU, spin until
1103		 * the owner stops running or the state of the lock
1104		 * changes.
1105		 */
1106		if ((x & SX_LOCK_SHARED) == 0) {
1107			owner = lv_sx_owner(x);
1108			if (TD_IS_RUNNING(owner)) {
1109				if (LOCK_LOG_TEST(&sx->lock_object, 0))
1110					CTR3(KTR_LOCK,
1111					    "%s: spinning on %p held by %p",
1112					    __func__, sx, owner);
1113				KTR_STATE1(KTR_SCHED, "thread",
1114				    sched_tdname(curthread), "spinning",
1115				    "lockname:\"%s\"", sx->lock_object.lo_name);
1116				do {
1117					lock_delay(&lda);
1118					x = SX_READ_VALUE(sx);
1119					owner = lv_sx_owner(x);
1120				} while (owner != NULL && TD_IS_RUNNING(owner));
1121				KTR_STATE0(KTR_SCHED, "thread",
1122				    sched_tdname(curthread), "running");
1123				continue;
1124			}
1125		} else {
1126			if ((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) {
1127				MPASS(!__sx_can_read(td, x, false));
1128				lock_delay_spin(2);
1129				x = SX_READ_VALUE(sx);
1130				continue;
1131			}
1132			if (spintries < asx_retries) {
1133				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
1134				    "spinning", "lockname:\"%s\"",
1135				    sx->lock_object.lo_name);
1136				n = SX_SHARERS(x);
1137				for (i = 0; i < asx_loops; i += n) {
1138					lock_delay_spin(n);
1139					x = SX_READ_VALUE(sx);
1140					if (!(x & SX_LOCK_SHARED))
1141						break;
1142					n = SX_SHARERS(x);
1143					if (n == 0)
1144						break;
1145					if (__sx_can_read(td, x, false))
1146						break;
1147				}
1148#ifdef KDTRACE_HOOKS
1149				lda.spin_cnt += i;
1150#endif
1151				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
1152				    "running");
1153				if (i < asx_loops)
1154					continue;
1155			}
1156		}
1157sleepq:
1158#endif
1159
1160		/*
1161		 * Some other thread already has an exclusive lock, so
1162		 * start the process of blocking.
1163		 */
1164		sleepq_lock(&sx->lock_object);
1165		x = SX_READ_VALUE(sx);
1166retry_sleepq:
1167		if (((x & SX_LOCK_WRITE_SPINNER) && SX_SHARERS(x) == 0) ||
1168		    __sx_can_read(td, x, false)) {
1169			sleepq_release(&sx->lock_object);
1170			continue;
1171		}
1172
1173#ifdef ADAPTIVE_SX
1174		/*
1175		 * If the owner is running on another CPU, spin until
1176		 * the owner stops running or the state of the lock
1177		 * changes.
1178		 */
1179		if (!(x & SX_LOCK_SHARED) && adaptive) {
1180			owner = (struct thread *)SX_OWNER(x);
1181			if (TD_IS_RUNNING(owner)) {
1182				sleepq_release(&sx->lock_object);
1183				x = SX_READ_VALUE(sx);
1184				continue;
1185			}
1186		}
1187#endif
1188
1189		/*
1190		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
1191		 * fail to set it drop the sleep queue lock and loop
1192		 * back.
1193		 */
1194		if (!(x & SX_LOCK_SHARED_WAITERS)) {
1195			if (!atomic_fcmpset_ptr(&sx->sx_lock, &x,
1196			    x | SX_LOCK_SHARED_WAITERS))
1197				goto retry_sleepq;
1198			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1199				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
1200				    __func__, sx);
1201		}
1202
1203		/*
1204		 * Since we have been unable to acquire the shared lock,
1205		 * we have to sleep.
1206		 */
1207		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1208			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
1209			    __func__, sx);
1210
1211#ifdef KDTRACE_HOOKS
1212		sleep_time -= lockstat_nsecs(&sx->lock_object);
1213#endif
1214		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
1215		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
1216		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
1217		if (!(opts & SX_INTERRUPTIBLE))
1218			sleepq_wait(&sx->lock_object, 0);
1219		else
1220			error = sleepq_wait_sig(&sx->lock_object, 0);
1221#ifdef KDTRACE_HOOKS
1222		sleep_time += lockstat_nsecs(&sx->lock_object);
1223		sleep_cnt++;
1224#endif
1225		if (error) {
1226			if (LOCK_LOG_TEST(&sx->lock_object, 0))
1227				CTR2(KTR_LOCK,
1228			"%s: interruptible sleep by %p suspended by signal",
1229				    __func__, sx);
1230			break;
1231		}
1232		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1233			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
1234			    __func__, sx);
1235		x = SX_READ_VALUE(sx);
1236	}
1237#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
1238	if (__predict_true(!extra_work))
1239		return (error);
1240#endif
1241#ifdef KDTRACE_HOOKS
1242	all_time += lockstat_nsecs(&sx->lock_object);
1243	if (sleep_time)
1244		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
1245		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
1246		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
1247	if (lda.spin_cnt > sleep_cnt)
1248		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
1249		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
1250		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
1251out_lockstat:
1252#endif
1253	if (error == 0) {
1254		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
1255		    contested, waittime, file, line, LOCKSTAT_READER);
1256	}
1257	GIANT_RESTORE();
1258	return (error);
1259}
1260
1261int
1262_sx_slock_int(struct sx *sx, int opts LOCK_FILE_LINE_ARG_DEF)
1263{
1264	struct thread *td;
1265	uintptr_t x;
1266	int error;
1267
1268	KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
1269	    !TD_IS_IDLETHREAD(curthread),
1270	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
1271	    curthread, sx->lock_object.lo_name, file, line));
1272	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
1273	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
1274	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
1275
1276	error = 0;
1277	td = curthread;
1278	x = SX_READ_VALUE(sx);
1279	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) ||
1280	    !__sx_slock_try(sx, td, &x, true LOCK_FILE_LINE_ARG)))
1281		error = _sx_slock_hard(sx, opts, x LOCK_FILE_LINE_ARG);
1282	else
1283		lock_profile_obtain_lock_success(&sx->lock_object, 0, 0,
1284		    file, line);
1285	if (error == 0) {
1286		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
1287		WITNESS_LOCK(&sx->lock_object, 0, file, line);
1288		TD_LOCKS_INC(curthread);
1289	}
1290	return (error);
1291}
1292
1293int
1294_sx_slock(struct sx *sx, int opts, const char *file, int line)
1295{
1296
1297	return (_sx_slock_int(sx, opts LOCK_FILE_LINE_ARG));
1298}
1299
1300static bool __always_inline
1301_sx_sunlock_try(struct sx *sx, struct thread *td, uintptr_t *xp)
1302{
1303
1304	for (;;) {
1305		if (SX_SHARERS(*xp) > 1 || !(*xp & SX_LOCK_WAITERS)) {
1306			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, xp,
1307			    *xp - SX_ONE_SHARER)) {
1308				if (LOCK_LOG_TEST(&sx->lock_object, 0))
1309					CTR4(KTR_LOCK,
1310					    "%s: %p succeeded %p -> %p",
1311					    __func__, sx, (void *)*xp,
1312					    (void *)(*xp - SX_ONE_SHARER));
1313				td->td_sx_slocks--;
1314				return (true);
1315			}
1316			continue;
1317		}
1318		break;
1319	}
1320	return (false);
1321}
1322
1323static void __noinline
1324_sx_sunlock_hard(struct sx *sx, struct thread *td, uintptr_t x
1325    LOCK_FILE_LINE_ARG_DEF)
1326{
1327	int wakeup_swapper = 0;
1328	uintptr_t setx, queue;
1329
1330	if (SCHEDULER_STOPPED())
1331		return;
1332
1333	if (_sx_sunlock_try(sx, td, &x))
1334		goto out_lockstat;
1335
1336	sleepq_lock(&sx->lock_object);
1337	x = SX_READ_VALUE(sx);
1338	for (;;) {
1339		if (_sx_sunlock_try(sx, td, &x))
1340			break;
1341
1342		/*
1343		 * Wake up semantic here is quite simple:
1344		 * Just wake up all the exclusive waiters.
1345		 * Note that the state of the lock could have changed,
1346		 * so if it fails loop back and retry.
1347		 */
1348		setx = SX_LOCK_UNLOCKED;
1349		queue = SQ_SHARED_QUEUE;
1350		if (x & SX_LOCK_EXCLUSIVE_WAITERS) {
1351			setx |= (x & SX_LOCK_SHARED_WAITERS);
1352			queue = SQ_EXCLUSIVE_QUEUE;
1353		}
1354		setx |= (x & SX_LOCK_WRITE_SPINNER);
1355		if (!atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, setx))
1356			continue;
1357		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1358			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
1359			    "exclusive queue", __func__, sx);
1360		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
1361		    0, queue);
1362		td->td_sx_slocks--;
1363		break;
1364	}
1365	sleepq_release(&sx->lock_object);
1366	if (wakeup_swapper)
1367		kick_proc0();
1368out_lockstat:
1369	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
1370}
1371
1372void
1373_sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF)
1374{
1375	struct thread *td;
1376	uintptr_t x;
1377
1378	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
1379	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
1380	_sx_assert(sx, SA_SLOCKED, file, line);
1381	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
1382	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
1383
1384	td = curthread;
1385	x = SX_READ_VALUE(sx);
1386	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
1387	    !_sx_sunlock_try(sx, td, &x)))
1388		_sx_sunlock_hard(sx, td, x LOCK_FILE_LINE_ARG);
1389	else
1390		lock_profile_release_lock(&sx->lock_object);
1391
1392	TD_LOCKS_DEC(curthread);
1393}
1394
1395void
1396_sx_sunlock(struct sx *sx, const char *file, int line)
1397{
1398
1399	_sx_sunlock_int(sx LOCK_FILE_LINE_ARG);
1400}
1401
1402#ifdef INVARIANT_SUPPORT
1403#ifndef INVARIANTS
1404#undef	_sx_assert
1405#endif
1406
1407/*
1408 * In the non-WITNESS case, sx_assert() can only detect that at least
1409 * *some* thread owns an slock, but it cannot guarantee that *this*
1410 * thread owns an slock.
1411 */
1412void
1413_sx_assert(const struct sx *sx, int what, const char *file, int line)
1414{
1415#ifndef WITNESS
1416	int slocked = 0;
1417#endif
1418
1419	if (SCHEDULER_STOPPED())
1420		return;
1421	switch (what) {
1422	case SA_SLOCKED:
1423	case SA_SLOCKED | SA_NOTRECURSED:
1424	case SA_SLOCKED | SA_RECURSED:
1425#ifndef WITNESS
1426		slocked = 1;
1427		/* FALLTHROUGH */
1428#endif
1429	case SA_LOCKED:
1430	case SA_LOCKED | SA_NOTRECURSED:
1431	case SA_LOCKED | SA_RECURSED:
1432#ifdef WITNESS
1433		witness_assert(&sx->lock_object, what, file, line);
1434#else
1435		/*
1436		 * If some other thread has an exclusive lock or we
1437		 * have one and are asserting a shared lock, fail.
1438		 * Also, if no one has a lock at all, fail.
1439		 */
1440		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
1441		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
1442		    sx_xholder(sx) != curthread)))
1443			panic("Lock %s not %slocked @ %s:%d\n",
1444			    sx->lock_object.lo_name, slocked ? "share " : "",
1445			    file, line);
1446
1447		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
1448			if (sx_recursed(sx)) {
1449				if (what & SA_NOTRECURSED)
1450					panic("Lock %s recursed @ %s:%d\n",
1451					    sx->lock_object.lo_name, file,
1452					    line);
1453			} else if (what & SA_RECURSED)
1454				panic("Lock %s not recursed @ %s:%d\n",
1455				    sx->lock_object.lo_name, file, line);
1456		}
1457#endif
1458		break;
1459	case SA_XLOCKED:
1460	case SA_XLOCKED | SA_NOTRECURSED:
1461	case SA_XLOCKED | SA_RECURSED:
1462		if (sx_xholder(sx) != curthread)
1463			panic("Lock %s not exclusively locked @ %s:%d\n",
1464			    sx->lock_object.lo_name, file, line);
1465		if (sx_recursed(sx)) {
1466			if (what & SA_NOTRECURSED)
1467				panic("Lock %s recursed @ %s:%d\n",
1468				    sx->lock_object.lo_name, file, line);
1469		} else if (what & SA_RECURSED)
1470			panic("Lock %s not recursed @ %s:%d\n",
1471			    sx->lock_object.lo_name, file, line);
1472		break;
1473	case SA_UNLOCKED:
1474#ifdef WITNESS
1475		witness_assert(&sx->lock_object, what, file, line);
1476#else
1477		/*
1478		 * If we hold an exclusve lock fail.  We can't
1479		 * reliably check to see if we hold a shared lock or
1480		 * not.
1481		 */
1482		if (sx_xholder(sx) == curthread)
1483			panic("Lock %s exclusively locked @ %s:%d\n",
1484			    sx->lock_object.lo_name, file, line);
1485#endif
1486		break;
1487	default:
1488		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
1489		    line);
1490	}
1491}
1492#endif	/* INVARIANT_SUPPORT */
1493
1494#ifdef DDB
1495static void
1496db_show_sx(const struct lock_object *lock)
1497{
1498	struct thread *td;
1499	const struct sx *sx;
1500
1501	sx = (const struct sx *)lock;
1502
1503	db_printf(" state: ");
1504	if (sx->sx_lock == SX_LOCK_UNLOCKED)
1505		db_printf("UNLOCKED\n");
1506	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
1507		db_printf("DESTROYED\n");
1508		return;
1509	} else if (sx->sx_lock & SX_LOCK_SHARED)
1510		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
1511	else {
1512		td = sx_xholder(sx);
1513		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1514		    td->td_tid, td->td_proc->p_pid, td->td_name);
1515		if (sx_recursed(sx))
1516			db_printf(" recursed: %d\n", sx->sx_recurse);
1517	}
1518
1519	db_printf(" waiters: ");
1520	switch(sx->sx_lock &
1521	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
1522	case SX_LOCK_SHARED_WAITERS:
1523		db_printf("shared\n");
1524		break;
1525	case SX_LOCK_EXCLUSIVE_WAITERS:
1526		db_printf("exclusive\n");
1527		break;
1528	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
1529		db_printf("exclusive and shared\n");
1530		break;
1531	default:
1532		db_printf("none\n");
1533	}
1534}
1535
1536/*
1537 * Check to see if a thread that is blocked on a sleep queue is actually
1538 * blocked on an sx lock.  If so, output some details and return true.
1539 * If the lock has an exclusive owner, return that in *ownerp.
1540 */
1541int
1542sx_chain(struct thread *td, struct thread **ownerp)
1543{
1544	struct sx *sx;
1545
1546	/*
1547	 * Check to see if this thread is blocked on an sx lock.
1548	 * First, we check the lock class.  If that is ok, then we
1549	 * compare the lock name against the wait message.
1550	 */
1551	sx = td->td_wchan;
1552	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
1553	    sx->lock_object.lo_name != td->td_wmesg)
1554		return (0);
1555
1556	/* We think we have an sx lock, so output some details. */
1557	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
1558	*ownerp = sx_xholder(sx);
1559	if (sx->sx_lock & SX_LOCK_SHARED)
1560		db_printf("SLOCK (count %ju)\n",
1561		    (uintmax_t)SX_SHARERS(sx->sx_lock));
1562	else
1563		db_printf("XLOCK\n");
1564	return (1);
1565}
1566#endif
1567