1139804Simp/*-
2168191Sjhb * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
3168191Sjhb * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
4168191Sjhb * All rights reserved.
573782Sjasone *
673782Sjasone * Redistribution and use in source and binary forms, with or without
773782Sjasone * modification, are permitted provided that the following conditions
873782Sjasone * are met:
973782Sjasone * 1. Redistributions of source code must retain the above copyright
1073782Sjasone *    notice(s), this list of conditions and the following disclaimer as
11168191Sjhb *    the first lines of this file unmodified other than the possible
1273782Sjasone *    addition of one or more copyright notices.
1373782Sjasone * 2. Redistributions in binary form must reproduce the above copyright
1473782Sjasone *    notice(s), this list of conditions and the following disclaimer in the
1573782Sjasone *    documentation and/or other materials provided with the distribution.
1673782Sjasone *
1773782Sjasone * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
1873782Sjasone * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1973782Sjasone * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2073782Sjasone * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
2173782Sjasone * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
2273782Sjasone * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2373782Sjasone * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
2473782Sjasone * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2573782Sjasone * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2673782Sjasone * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
2773782Sjasone * DAMAGE.
2873782Sjasone */
2973782Sjasone
3073782Sjasone/*
31168191Sjhb * Shared/exclusive locks.  This implementation attempts to ensure
32168191Sjhb * deterministic lock granting behavior, so that slocks and xlocks are
33168191Sjhb * interleaved.
3473782Sjasone *
3573782Sjasone * Priority propagation will not generally raise the priority of lock holders,
3673782Sjasone * so should not be relied upon in combination with sx locks.
3773782Sjasone */
3873782Sjasone
39168191Sjhb#include "opt_ddb.h"
40236238Sfabient#include "opt_hwpmc_hooks.h"
41192853Ssson#include "opt_kdtrace.h"
42193025Sattilio#include "opt_no_adaptive_sx.h"
43168191Sjhb
44116182Sobrien#include <sys/cdefs.h>
45116182Sobrien__FBSDID("$FreeBSD$");
46116182Sobrien
4773782Sjasone#include <sys/param.h>
48230167Savg#include <sys/systm.h>
49255862Sjhb#include <sys/kdb.h>
5073782Sjasone#include <sys/ktr.h>
5174912Sjhb#include <sys/lock.h>
5273782Sjasone#include <sys/mutex.h>
53153395Sjhb#include <sys/proc.h>
54168191Sjhb#include <sys/sleepqueue.h>
5573782Sjasone#include <sys/sx.h>
56193025Sattilio#include <sys/sysctl.h>
5773782Sjasone
58193025Sattilio#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
59168191Sjhb#include <machine/cpu.h>
60168191Sjhb#endif
61168191Sjhb
62161337Sjhb#ifdef DDB
63153395Sjhb#include <ddb/ddb.h>
64168191Sjhb#endif
65153395Sjhb
66193011Sattilio#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
67193011Sattilio#define	ADAPTIVE_SX
68168191Sjhb#endif
69168191Sjhb
70193307SattilioCTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
71171277Sattilio
72236238Sfabient#ifdef HWPMC_HOOKS
73236238Sfabient#include <sys/pmckern.h>
74236238SfabientPMC_SOFT_DECLARE( , , lock, failed);
75236238Sfabient#endif
76236238Sfabient
77168191Sjhb/* Handy macros for sleep queues. */
78168191Sjhb#define	SQ_EXCLUSIVE_QUEUE	0
79168191Sjhb#define	SQ_SHARED_QUEUE		1
80168191Sjhb
81226255Sattilio#ifdef ADAPTIVE_SX
82226255Sattilio#define	ASX_RETRIES		10
83226255Sattilio#define	ASX_LOOPS		10000
84226255Sattilio#endif
85226255Sattilio
86168191Sjhb/*
87168191Sjhb * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
88168191Sjhb * drop Giant anytime we have to sleep or if we adaptively spin.
89168191Sjhb */
90168191Sjhb#define	GIANT_DECLARE							\
91168191Sjhb	int _giantcnt = 0;						\
92168191Sjhb	WITNESS_SAVE_DECL(Giant)					\
93168191Sjhb
94168191Sjhb#define	GIANT_SAVE() do {						\
95168191Sjhb	if (mtx_owned(&Giant)) {					\
96168191Sjhb		WITNESS_SAVE(&Giant.lock_object, Giant);		\
97168191Sjhb		while (mtx_owned(&Giant)) {				\
98168191Sjhb			_giantcnt++;					\
99168191Sjhb			mtx_unlock(&Giant);				\
100168191Sjhb		}							\
101168191Sjhb	}								\
102168191Sjhb} while (0)
103168191Sjhb
104168191Sjhb#define GIANT_RESTORE() do {						\
105168191Sjhb	if (_giantcnt > 0) {						\
106168191Sjhb		mtx_assert(&Giant, MA_NOTOWNED);			\
107168191Sjhb		while (_giantcnt--)					\
108168191Sjhb			mtx_lock(&Giant);				\
109168191Sjhb		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
110168191Sjhb	}								\
111168191Sjhb} while (0)
112168191Sjhb
113168191Sjhb/*
114169676Sjhb * Returns true if an exclusive lock is recursed.  It assumes
115169676Sjhb * curthread currently has an exclusive lock.
116168191Sjhb */
117179025Sattilio#define	sx_recurse		lock_object.lo_data
118168191Sjhb#define	sx_recursed(sx)		((sx)->sx_recurse != 0)
119168191Sjhb
120173733Sattiliostatic void	assert_sx(struct lock_object *lock, int what);
121168191Sjhb#ifdef DDB
122153395Sjhbstatic void	db_show_sx(struct lock_object *lock);
123153395Sjhb#endif
124167368Sjhbstatic void	lock_sx(struct lock_object *lock, int how);
125192853Ssson#ifdef KDTRACE_HOOKS
126192853Sssonstatic int	owner_sx(struct lock_object *lock, struct thread **owner);
127192853Ssson#endif
128167368Sjhbstatic int	unlock_sx(struct lock_object *lock);
129153395Sjhb
13074912Sjhbstruct lock_class lock_class_sx = {
131167365Sjhb	.lc_name = "sx",
132167365Sjhb	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
133173733Sattilio	.lc_assert = assert_sx,
134153395Sjhb#ifdef DDB
135167365Sjhb	.lc_ddb_show = db_show_sx,
136153395Sjhb#endif
137167368Sjhb	.lc_lock = lock_sx,
138167368Sjhb	.lc_unlock = unlock_sx,
139192853Ssson#ifdef KDTRACE_HOOKS
140192853Ssson	.lc_owner = owner_sx,
141192853Ssson#endif
14274912Sjhb};
14374912Sjhb
14485412Sjhb#ifndef INVARIANTS
14585412Sjhb#define	_sx_assert(sx, what, file, line)
14685412Sjhb#endif
14785412Sjhb
14873782Sjasonevoid
149173733Sattilioassert_sx(struct lock_object *lock, int what)
150173733Sattilio{
151173733Sattilio
152173733Sattilio	sx_assert((struct sx *)lock, what);
153173733Sattilio}
154173733Sattilio
155173733Sattiliovoid
156167368Sjhblock_sx(struct lock_object *lock, int how)
157167368Sjhb{
158167368Sjhb	struct sx *sx;
159167368Sjhb
160167368Sjhb	sx = (struct sx *)lock;
161167368Sjhb	if (how)
162167368Sjhb		sx_xlock(sx);
163167368Sjhb	else
164167368Sjhb		sx_slock(sx);
165167368Sjhb}
166167368Sjhb
167167368Sjhbint
168167368Sjhbunlock_sx(struct lock_object *lock)
169167368Sjhb{
170167368Sjhb	struct sx *sx;
171167368Sjhb
172167368Sjhb	sx = (struct sx *)lock;
173169780Sjhb	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
174167368Sjhb	if (sx_xlocked(sx)) {
175167368Sjhb		sx_xunlock(sx);
176167368Sjhb		return (1);
177167368Sjhb	} else {
178167368Sjhb		sx_sunlock(sx);
179167368Sjhb		return (0);
180167368Sjhb	}
181167368Sjhb}
182167368Sjhb
183192853Ssson#ifdef KDTRACE_HOOKS
184192853Sssonint
185192853Sssonowner_sx(struct lock_object *lock, struct thread **owner)
186192853Ssson{
187192853Ssson        struct sx *sx = (struct sx *)lock;
188192853Ssson	uintptr_t x = sx->sx_lock;
189192853Ssson
190192853Ssson        *owner = (struct thread *)SX_OWNER(x);
191192853Ssson        return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
192192853Ssson	    (*owner != NULL));
193192853Ssson}
194192853Ssson#endif
195192853Ssson
196167368Sjhbvoid
19793672Sarrsx_sysinit(void *arg)
19893672Sarr{
19993672Sarr	struct sx_args *sargs = arg;
20093672Sarr
201219819Sjeff	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
20293672Sarr}
20393672Sarr
20493672Sarrvoid
205168191Sjhbsx_init_flags(struct sx *sx, const char *description, int opts)
20673782Sjasone{
207168191Sjhb	int flags;
20873782Sjasone
209169769Sjhb	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
210193011Sattilio	    SX_NOPROFILE | SX_NOADAPTIVE)) == 0);
211196334Sattilio	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
212196334Sattilio	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
213196334Sattilio	    &sx->sx_lock));
214169769Sjhb
215193307Sattilio	flags = LO_SLEEPABLE | LO_UPGRADABLE;
216168191Sjhb	if (opts & SX_DUPOK)
217168191Sjhb		flags |= LO_DUPOK;
218168191Sjhb	if (opts & SX_NOPROFILE)
219168191Sjhb		flags |= LO_NOPROFILE;
220168191Sjhb	if (!(opts & SX_NOWITNESS))
221168191Sjhb		flags |= LO_WITNESS;
222193307Sattilio	if (opts & SX_RECURSE)
223193307Sattilio		flags |= LO_RECURSABLE;
224168191Sjhb	if (opts & SX_QUIET)
225168191Sjhb		flags |= LO_QUIET;
226168191Sjhb
227193307Sattilio	flags |= opts & SX_NOADAPTIVE;
228168191Sjhb	sx->sx_lock = SX_LOCK_UNLOCKED;
229168191Sjhb	sx->sx_recurse = 0;
230168191Sjhb	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
23173782Sjasone}
23273782Sjasone
23373782Sjasonevoid
23473782Sjasonesx_destroy(struct sx *sx)
23573782Sjasone{
23673782Sjasone
237168191Sjhb	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
238168191Sjhb	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
239169394Sjhb	sx->sx_lock = SX_LOCK_DESTROYED;
240167787Sjhb	lock_destroy(&sx->lock_object);
24173782Sjasone}
24273782Sjasone
243170149Sattilioint
244170149Sattilio_sx_slock(struct sx *sx, int opts, const char *file, int line)
24573782Sjasone{
246170149Sattilio	int error = 0;
24773782Sjasone
248235404Savg	if (SCHEDULER_STOPPED())
249235404Savg		return (0);
250255862Sjhb	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
251255862Sjhb	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
252255862Sjhb	    curthread, sx->lock_object.lo_name, file, line));
253169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
254169394Sjhb	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
255182914Sjhb	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
256170149Sattilio	error = __sx_slock(sx, opts, file, line);
257170149Sattilio	if (!error) {
258170149Sattilio		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
259170149Sattilio		WITNESS_LOCK(&sx->lock_object, 0, file, line);
260170149Sattilio		curthread->td_locks++;
261170149Sattilio	}
262170149Sattilio
263170149Sattilio	return (error);
26473782Sjasone}
26573782Sjasone
26678872Sjhbint
26778872Sjhb_sx_try_slock(struct sx *sx, const char *file, int line)
26878872Sjhb{
269168191Sjhb	uintptr_t x;
27078872Sjhb
271235404Savg	if (SCHEDULER_STOPPED())
272235404Savg		return (1);
273235404Savg
274255862Sjhb	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
275255862Sjhb	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
276255862Sjhb	    curthread, sx->lock_object.lo_name, file, line));
277255862Sjhb
278172416Spjd	for (;;) {
279172416Spjd		x = sx->sx_lock;
280172416Spjd		KASSERT(x != SX_LOCK_DESTROYED,
281172416Spjd		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
282172416Spjd		if (!(x & SX_LOCK_SHARED))
283172416Spjd			break;
284172416Spjd		if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
285172416Spjd			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
286172416Spjd			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
287172416Spjd			curthread->td_locks++;
288172416Spjd			return (1);
289172416Spjd		}
29078872Sjhb	}
291168191Sjhb
292168191Sjhb	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
293168191Sjhb	return (0);
29478872Sjhb}
29578872Sjhb
296170149Sattilioint
297170149Sattilio_sx_xlock(struct sx *sx, int opts, const char *file, int line)
29873782Sjasone{
299170149Sattilio	int error = 0;
30073782Sjasone
301235404Savg	if (SCHEDULER_STOPPED())
302235404Savg		return (0);
303255862Sjhb	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
304255862Sjhb	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
305255862Sjhb	    curthread, sx->lock_object.lo_name, file, line));
306169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
307169394Sjhb	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
308167787Sjhb	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
309182914Sjhb	    line, NULL);
310170149Sattilio	error = __sx_xlock(sx, curthread, opts, file, line);
311170149Sattilio	if (!error) {
312170149Sattilio		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
313170149Sattilio		    file, line);
314170149Sattilio		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
315170149Sattilio		curthread->td_locks++;
316170149Sattilio	}
317170149Sattilio
318170149Sattilio	return (error);
31973782Sjasone}
32073782Sjasone
32178872Sjhbint
32278872Sjhb_sx_try_xlock(struct sx *sx, const char *file, int line)
32378872Sjhb{
324168191Sjhb	int rval;
32578872Sjhb
326235404Savg	if (SCHEDULER_STOPPED())
327235404Savg		return (1);
328235404Savg
329255862Sjhb	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
330255862Sjhb	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
331255862Sjhb	    curthread, sx->lock_object.lo_name, file, line));
332169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
333169394Sjhb	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
334168191Sjhb
335193307Sattilio	if (sx_xlocked(sx) &&
336193307Sattilio	    (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) {
337168191Sjhb		sx->sx_recurse++;
338168191Sjhb		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
339168191Sjhb		rval = 1;
340168191Sjhb	} else
341168191Sjhb		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
342168191Sjhb		    (uintptr_t)curthread);
343168191Sjhb	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
344168191Sjhb	if (rval) {
345168191Sjhb		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
346168191Sjhb		    file, line);
347160771Sjhb		curthread->td_locks++;
34878872Sjhb	}
349168191Sjhb
350168191Sjhb	return (rval);
35178872Sjhb}
35278872Sjhb
35373782Sjasonevoid
35474912Sjhb_sx_sunlock(struct sx *sx, const char *file, int line)
35573782Sjasone{
356168191Sjhb
357235404Savg	if (SCHEDULER_STOPPED())
358235404Savg		return;
359169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
360169394Sjhb	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
361169780Sjhb	_sx_assert(sx, SA_SLOCKED, file, line);
362160771Sjhb	curthread->td_locks--;
363167787Sjhb	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
364168191Sjhb	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
365168191Sjhb	__sx_sunlock(sx, file, line);
366192853Ssson	LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_SUNLOCK_RELEASE, sx);
367168191Sjhb}
36874912Sjhb
369168191Sjhbvoid
370168191Sjhb_sx_xunlock(struct sx *sx, const char *file, int line)
371168191Sjhb{
37273782Sjasone
373235404Savg	if (SCHEDULER_STOPPED())
374235404Savg		return;
375169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
376169394Sjhb	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
377169780Sjhb	_sx_assert(sx, SA_XLOCKED, file, line);
378168191Sjhb	curthread->td_locks--;
379168191Sjhb	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
380168191Sjhb	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
381168191Sjhb	    line);
382168330Skmacy	if (!sx_recursed(sx))
383192853Ssson		LOCKSTAT_PROFILE_RELEASE_LOCK(LS_SX_XUNLOCK_RELEASE, sx);
384168191Sjhb	__sx_xunlock(sx, curthread, file, line);
385168191Sjhb}
386168191Sjhb
387168191Sjhb/*
388168191Sjhb * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
389168191Sjhb * This will only succeed if this thread holds a single shared lock.
390168191Sjhb * Return 1 if if the upgrade succeed, 0 otherwise.
391168191Sjhb */
392168191Sjhbint
393168191Sjhb_sx_try_upgrade(struct sx *sx, const char *file, int line)
394168191Sjhb{
395168191Sjhb	uintptr_t x;
396168191Sjhb	int success;
397168191Sjhb
398235404Savg	if (SCHEDULER_STOPPED())
399235404Savg		return (1);
400235404Savg
401169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
402169394Sjhb	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
403169780Sjhb	_sx_assert(sx, SA_SLOCKED, file, line);
404168191Sjhb
405168191Sjhb	/*
406168191Sjhb	 * Try to switch from one shared lock to an exclusive lock.  We need
407168191Sjhb	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
408168191Sjhb	 * we will wake up the exclusive waiters when we drop the lock.
409168191Sjhb	 */
410168191Sjhb	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
411168191Sjhb	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
412168191Sjhb	    (uintptr_t)curthread | x);
413168191Sjhb	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
414192853Ssson	if (success) {
415168191Sjhb		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
416168191Sjhb		    file, line);
417192853Ssson		LOCKSTAT_RECORD0(LS_SX_TRYUPGRADE_UPGRADE, sx);
418192853Ssson	}
419168191Sjhb	return (success);
420168191Sjhb}
421168191Sjhb
422168191Sjhb/*
423168191Sjhb * Downgrade an unrecursed exclusive lock into a single shared lock.
424168191Sjhb */
425168191Sjhbvoid
426168191Sjhb_sx_downgrade(struct sx *sx, const char *file, int line)
427168191Sjhb{
428168191Sjhb	uintptr_t x;
429181334Sjhb	int wakeup_swapper;
430168191Sjhb
431235404Savg	if (SCHEDULER_STOPPED())
432235404Savg		return;
433235404Savg
434169394Sjhb	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
435169394Sjhb	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
436169780Sjhb	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
437168191Sjhb#ifndef INVARIANTS
438168191Sjhb	if (sx_recursed(sx))
439168191Sjhb		panic("downgrade of a recursed lock");
440168191Sjhb#endif
441168191Sjhb
442168191Sjhb	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
443168191Sjhb
444168191Sjhb	/*
445168191Sjhb	 * Try to switch from an exclusive lock with no shared waiters
446168191Sjhb	 * to one sharer with no shared waiters.  If there are
447168191Sjhb	 * exclusive waiters, we don't need to lock the sleep queue so
448168191Sjhb	 * long as we preserve the flag.  We do one quick try and if
449168191Sjhb	 * that fails we grab the sleepq lock to keep the flags from
450168191Sjhb	 * changing and do it the slow way.
451168191Sjhb	 *
452168191Sjhb	 * We have to lock the sleep queue if there are shared waiters
453168191Sjhb	 * so we can wake them up.
454168191Sjhb	 */
455168191Sjhb	x = sx->sx_lock;
456168191Sjhb	if (!(x & SX_LOCK_SHARED_WAITERS) &&
457168191Sjhb	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
458168191Sjhb	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
459168191Sjhb		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
460168191Sjhb		return;
461167054Skmacy	}
462167163Skmacy
46373782Sjasone	/*
464168191Sjhb	 * Lock the sleep queue so we can read the waiters bits
465168191Sjhb	 * without any races and wakeup any shared waiters.
46673782Sjasone	 */
467168191Sjhb	sleepq_lock(&sx->lock_object);
46873782Sjasone
469168191Sjhb	/*
470168191Sjhb	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
471168191Sjhb	 * shared lock.  If there are any shared waiters, wake them up.
472168191Sjhb	 */
473181334Sjhb	wakeup_swapper = 0;
474168191Sjhb	x = sx->sx_lock;
475168191Sjhb	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
476168191Sjhb	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
477168191Sjhb	if (x & SX_LOCK_SHARED_WAITERS)
478181334Sjhb		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
479181334Sjhb		    0, SQ_SHARED_QUEUE);
480177085Sjeff	sleepq_release(&sx->lock_object);
48174912Sjhb
482168191Sjhb	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
483192853Ssson	LOCKSTAT_RECORD0(LS_SX_DOWNGRADE_DOWNGRADE, sx);
484181334Sjhb
485181334Sjhb	if (wakeup_swapper)
486181334Sjhb		kick_proc0();
48773782Sjasone}
48873782Sjasone
489168191Sjhb/*
490168191Sjhb * This function represents the so-called 'hard case' for sx_xlock
491168191Sjhb * operation.  All 'easy case' failures are redirected to this.  Note
492168191Sjhb * that ideally this would be a static function, but it needs to be
493168191Sjhb * accessible from at least sx.h.
494168191Sjhb */
495170149Sattilioint
496170149Sattilio_sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
497170149Sattilio    int line)
49873782Sjasone{
499168191Sjhb	GIANT_DECLARE;
500168191Sjhb#ifdef ADAPTIVE_SX
501168191Sjhb	volatile struct thread *owner;
502193011Sattilio	u_int i, spintries = 0;
503168191Sjhb#endif
504189846Sjeff	uintptr_t x;
505189846Sjeff#ifdef LOCK_PROFILING
506171277Sattilio	uint64_t waittime = 0;
507189846Sjeff	int contested = 0;
508189846Sjeff#endif
509189846Sjeff	int error = 0;
510192853Ssson#ifdef	KDTRACE_HOOKS
511192853Ssson	uint64_t spin_cnt = 0;
512192853Ssson	uint64_t sleep_cnt = 0;
513192853Ssson	int64_t sleep_time = 0;
514192853Ssson#endif
51573782Sjasone
516235404Savg	if (SCHEDULER_STOPPED())
517235404Savg		return (0);
518235404Savg
519168191Sjhb	/* If we already hold an exclusive lock, then recurse. */
520168191Sjhb	if (sx_xlocked(sx)) {
521193307Sattilio		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
522169769Sjhb	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
523171277Sattilio		    sx->lock_object.lo_name, file, line));
524168191Sjhb		sx->sx_recurse++;
525168191Sjhb		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
526168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
527168191Sjhb			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
528170149Sattilio		return (0);
529168191Sjhb	}
53074912Sjhb
531168191Sjhb	if (LOCK_LOG_TEST(&sx->lock_object, 0))
532168191Sjhb		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
533168191Sjhb		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
53473782Sjasone
535168191Sjhb	while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
536192853Ssson#ifdef KDTRACE_HOOKS
537192853Ssson		spin_cnt++;
538192853Ssson#endif
539236238Sfabient#ifdef HWPMC_HOOKS
540236238Sfabient		PMC_SOFT_CALL( , , lock, failed);
541236238Sfabient#endif
542174629Sjeff		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
543174629Sjeff		    &waittime);
544168191Sjhb#ifdef ADAPTIVE_SX
545168191Sjhb		/*
546168191Sjhb		 * If the lock is write locked and the owner is
547168191Sjhb		 * running on another CPU, spin until the owner stops
548168191Sjhb		 * running or the state of the lock changes.
549168191Sjhb		 */
550168191Sjhb		x = sx->sx_lock;
551208912Sjhb		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
552193011Sattilio			if ((x & SX_LOCK_SHARED) == 0) {
553193011Sattilio				x = SX_OWNER(x);
554193011Sattilio				owner = (struct thread *)x;
555193011Sattilio				if (TD_IS_RUNNING(owner)) {
556193011Sattilio					if (LOCK_LOG_TEST(&sx->lock_object, 0))
557193011Sattilio						CTR3(KTR_LOCK,
558168191Sjhb					    "%s: spinning on %p held by %p",
559193011Sattilio						    __func__, sx, owner);
560193011Sattilio					GIANT_SAVE();
561193011Sattilio					while (SX_OWNER(sx->sx_lock) == x &&
562193011Sattilio					    TD_IS_RUNNING(owner)) {
563193011Sattilio						cpu_spinwait();
564193011Sattilio#ifdef KDTRACE_HOOKS
565193011Sattilio						spin_cnt++;
566193011Sattilio#endif
567193011Sattilio					}
568193011Sattilio					continue;
569193011Sattilio				}
570226255Sattilio			} else if (SX_SHARERS(x) && spintries < ASX_RETRIES) {
571196772Sattilio				GIANT_SAVE();
572193011Sattilio				spintries++;
573226255Sattilio				for (i = 0; i < ASX_LOOPS; i++) {
574193011Sattilio					if (LOCK_LOG_TEST(&sx->lock_object, 0))
575193011Sattilio						CTR4(KTR_LOCK,
576193011Sattilio				    "%s: shared spinning on %p with %u and %u",
577193011Sattilio						    __func__, sx, spintries, i);
578193011Sattilio					x = sx->sx_lock;
579193011Sattilio					if ((x & SX_LOCK_SHARED) == 0 ||
580193011Sattilio					    SX_SHARERS(x) == 0)
581193011Sattilio						break;
582168191Sjhb					cpu_spinwait();
583192853Ssson#ifdef KDTRACE_HOOKS
584192853Ssson					spin_cnt++;
585192853Ssson#endif
586192853Ssson				}
587226255Sattilio				if (i != ASX_LOOPS)
588193011Sattilio					continue;
589168191Sjhb			}
590168191Sjhb		}
591168191Sjhb#endif
592168191Sjhb
593168191Sjhb		sleepq_lock(&sx->lock_object);
594168191Sjhb		x = sx->sx_lock;
595168191Sjhb
596168191Sjhb		/*
597168191Sjhb		 * If the lock was released while spinning on the
598168191Sjhb		 * sleep queue chain lock, try again.
599168191Sjhb		 */
600168191Sjhb		if (x == SX_LOCK_UNLOCKED) {
601168191Sjhb			sleepq_release(&sx->lock_object);
602168191Sjhb			continue;
603168191Sjhb		}
604168191Sjhb
605168191Sjhb#ifdef ADAPTIVE_SX
606168191Sjhb		/*
607168191Sjhb		 * The current lock owner might have started executing
608168191Sjhb		 * on another CPU (or the lock could have changed
609168191Sjhb		 * owners) while we were waiting on the sleep queue
610168191Sjhb		 * chain lock.  If so, drop the sleep queue lock and try
611168191Sjhb		 * again.
612168191Sjhb		 */
613168191Sjhb		if (!(x & SX_LOCK_SHARED) &&
614193011Sattilio		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
615168191Sjhb			owner = (struct thread *)SX_OWNER(x);
616168191Sjhb			if (TD_IS_RUNNING(owner)) {
617168191Sjhb				sleepq_release(&sx->lock_object);
618168191Sjhb				continue;
619168191Sjhb			}
620168191Sjhb		}
621168191Sjhb#endif
622168191Sjhb
623168191Sjhb		/*
624168191Sjhb		 * If an exclusive lock was released with both shared
625168191Sjhb		 * and exclusive waiters and a shared waiter hasn't
626168191Sjhb		 * woken up and acquired the lock yet, sx_lock will be
627168191Sjhb		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
628168191Sjhb		 * If we see that value, try to acquire it once.  Note
629168191Sjhb		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
630168191Sjhb		 * as there are other exclusive waiters still.  If we
631168191Sjhb		 * fail, restart the loop.
632168191Sjhb		 */
633168191Sjhb		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
634168191Sjhb			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
635168191Sjhb			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
636168191Sjhb			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
637168191Sjhb				sleepq_release(&sx->lock_object);
638168191Sjhb				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
639168191Sjhb				    __func__, sx);
640168191Sjhb				break;
641168191Sjhb			}
642168191Sjhb			sleepq_release(&sx->lock_object);
643168191Sjhb			continue;
644168191Sjhb		}
645168191Sjhb
646168191Sjhb		/*
647168191Sjhb		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
648168191Sjhb		 * than loop back and retry.
649168191Sjhb		 */
650168191Sjhb		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
651168191Sjhb			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
652168191Sjhb			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
653168191Sjhb				sleepq_release(&sx->lock_object);
654168191Sjhb				continue;
655168191Sjhb			}
656168191Sjhb			if (LOCK_LOG_TEST(&sx->lock_object, 0))
657168191Sjhb				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
658168191Sjhb				    __func__, sx);
659168191Sjhb		}
660168191Sjhb
661168191Sjhb		/*
662168191Sjhb		 * Since we have been unable to acquire the exclusive
663168191Sjhb		 * lock and the exclusive waiters flag is set, we have
664168191Sjhb		 * to sleep.
665168191Sjhb		 */
666168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
667168191Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
668168191Sjhb			    __func__, sx);
669168191Sjhb
670192853Ssson#ifdef KDTRACE_HOOKS
671192853Ssson		sleep_time -= lockstat_nsecs();
672192853Ssson#endif
673168191Sjhb		GIANT_SAVE();
674168191Sjhb		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
675170149Sattilio		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
676170149Sattilio		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
677170149Sattilio		if (!(opts & SX_INTERRUPTIBLE))
678177085Sjeff			sleepq_wait(&sx->lock_object, 0);
679170149Sattilio		else
680177085Sjeff			error = sleepq_wait_sig(&sx->lock_object, 0);
681192853Ssson#ifdef KDTRACE_HOOKS
682192853Ssson		sleep_time += lockstat_nsecs();
683192853Ssson		sleep_cnt++;
684192853Ssson#endif
685170149Sattilio		if (error) {
686170149Sattilio			if (LOCK_LOG_TEST(&sx->lock_object, 0))
687170149Sattilio				CTR2(KTR_LOCK,
688170149Sattilio			"%s: interruptible sleep by %p suspended by signal",
689170149Sattilio				    __func__, sx);
690170149Sattilio			break;
691170149Sattilio		}
692168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
693168191Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
694168191Sjhb			    __func__, sx);
695168191Sjhb	}
696170115Sattilio
697168332Skmacy	GIANT_RESTORE();
698170149Sattilio	if (!error)
699192853Ssson		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx,
700192853Ssson		    contested, waittime, file, line);
701192853Ssson#ifdef KDTRACE_HOOKS
702192853Ssson	if (sleep_time)
703192853Ssson		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
704192853Ssson	if (spin_cnt > sleep_cnt)
705192853Ssson		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
706192853Ssson#endif
707170149Sattilio	return (error);
708168191Sjhb}
709168191Sjhb
710168191Sjhb/*
711168191Sjhb * This function represents the so-called 'hard case' for sx_xunlock
712168191Sjhb * operation.  All 'easy case' failures are redirected to this.  Note
713168191Sjhb * that ideally this would be a static function, but it needs to be
714168191Sjhb * accessible from at least sx.h.
715168191Sjhb */
716168191Sjhbvoid
717168191Sjhb_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
718168191Sjhb{
719168191Sjhb	uintptr_t x;
720181334Sjhb	int queue, wakeup_swapper;
721168191Sjhb
722235404Savg	if (SCHEDULER_STOPPED())
723235404Savg		return;
724235404Savg
725168191Sjhb	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
726168191Sjhb
727168191Sjhb	/* If the lock is recursed, then unrecurse one level. */
728168191Sjhb	if (sx_xlocked(sx) && sx_recursed(sx)) {
729168191Sjhb		if ((--sx->sx_recurse) == 0)
730168191Sjhb			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
731168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
732168191Sjhb			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
733168191Sjhb		return;
734168191Sjhb	}
735168191Sjhb	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
736168191Sjhb	    SX_LOCK_EXCLUSIVE_WAITERS));
737168191Sjhb	if (LOCK_LOG_TEST(&sx->lock_object, 0))
738168191Sjhb		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
739168191Sjhb
740168191Sjhb	sleepq_lock(&sx->lock_object);
741168191Sjhb	x = SX_LOCK_UNLOCKED;
742168191Sjhb
74373782Sjasone	/*
744168191Sjhb	 * The wake up algorithm here is quite simple and probably not
745168191Sjhb	 * ideal.  It gives precedence to shared waiters if they are
746168191Sjhb	 * present.  For this condition, we have to preserve the
747168191Sjhb	 * state of the exclusive waiters flag.
748200447Sattilio	 * If interruptible sleeps left the shared queue empty avoid a
749200447Sattilio	 * starvation for the threads sleeping on the exclusive queue by giving
750200447Sattilio	 * them precedence and cleaning up the shared waiters bit anyway.
75173782Sjasone	 */
752200447Sattilio	if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
753200447Sattilio	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
754168191Sjhb		queue = SQ_SHARED_QUEUE;
755168191Sjhb		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
756168191Sjhb	} else
757168191Sjhb		queue = SQ_EXCLUSIVE_QUEUE;
75873782Sjasone
759168191Sjhb	/* Wake up all the waiters for the specific queue. */
760168191Sjhb	if (LOCK_LOG_TEST(&sx->lock_object, 0))
761168191Sjhb		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
762168191Sjhb		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
763168191Sjhb		    "exclusive");
764168191Sjhb	atomic_store_rel_ptr(&sx->sx_lock, x);
765181334Sjhb	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
766181334Sjhb	    queue);
767177085Sjeff	sleepq_release(&sx->lock_object);
768181334Sjhb	if (wakeup_swapper)
769181334Sjhb		kick_proc0();
77073782Sjasone}
77181599Sjasone
772168191Sjhb/*
773168191Sjhb * This function represents the so-called 'hard case' for sx_slock
774168191Sjhb * operation.  All 'easy case' failures are redirected to this.  Note
775168191Sjhb * that ideally this would be a static function, but it needs to be
776168191Sjhb * accessible from at least sx.h.
777168191Sjhb */
778170149Sattilioint
779170149Sattilio_sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
78081599Sjasone{
781168191Sjhb	GIANT_DECLARE;
782168191Sjhb#ifdef ADAPTIVE_SX
783168191Sjhb	volatile struct thread *owner;
784168191Sjhb#endif
785189846Sjeff#ifdef LOCK_PROFILING
786171277Sattilio	uint64_t waittime = 0;
787171277Sattilio	int contested = 0;
788189846Sjeff#endif
789168191Sjhb	uintptr_t x;
790171277Sattilio	int error = 0;
791192853Ssson#ifdef KDTRACE_HOOKS
792192853Ssson	uint64_t spin_cnt = 0;
793192853Ssson	uint64_t sleep_cnt = 0;
794192853Ssson	int64_t sleep_time = 0;
795192853Ssson#endif
796171277Sattilio
797235404Savg	if (SCHEDULER_STOPPED())
798235404Savg		return (0);
799235404Savg
800168191Sjhb	/*
801168191Sjhb	 * As with rwlocks, we don't make any attempt to try to block
802168191Sjhb	 * shared locks once there is an exclusive waiter.
803168191Sjhb	 */
804168191Sjhb	for (;;) {
805192853Ssson#ifdef KDTRACE_HOOKS
806192853Ssson		spin_cnt++;
807192853Ssson#endif
808168191Sjhb		x = sx->sx_lock;
80981599Sjasone
810168191Sjhb		/*
811168191Sjhb		 * If no other thread has an exclusive lock then try to bump up
812168191Sjhb		 * the count of sharers.  Since we have to preserve the state
813168191Sjhb		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
814168191Sjhb		 * shared lock loop back and retry.
815168191Sjhb		 */
816168191Sjhb		if (x & SX_LOCK_SHARED) {
817168191Sjhb			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
818168191Sjhb			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
819168191Sjhb			    x + SX_ONE_SHARER)) {
820168191Sjhb				if (LOCK_LOG_TEST(&sx->lock_object, 0))
821168191Sjhb					CTR4(KTR_LOCK,
822168191Sjhb					    "%s: %p succeed %p -> %p", __func__,
823168191Sjhb					    sx, (void *)x,
824168191Sjhb					    (void *)(x + SX_ONE_SHARER));
825168191Sjhb				break;
826168191Sjhb			}
827168191Sjhb			continue;
828168191Sjhb		}
829236238Sfabient#ifdef HWPMC_HOOKS
830236238Sfabient		PMC_SOFT_CALL( , , lock, failed);
831236238Sfabient#endif
832174629Sjeff		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
833174629Sjeff		    &waittime);
83481599Sjasone
835168191Sjhb#ifdef ADAPTIVE_SX
836168191Sjhb		/*
837168191Sjhb		 * If the owner is running on another CPU, spin until
838168191Sjhb		 * the owner stops running or the state of the lock
839168191Sjhb		 * changes.
840168191Sjhb		 */
841193011Sattilio		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
842168191Sjhb			x = SX_OWNER(x);
843168191Sjhb			owner = (struct thread *)x;
844168191Sjhb			if (TD_IS_RUNNING(owner)) {
845168191Sjhb				if (LOCK_LOG_TEST(&sx->lock_object, 0))
846168191Sjhb					CTR3(KTR_LOCK,
847168191Sjhb					    "%s: spinning on %p held by %p",
848168191Sjhb					    __func__, sx, owner);
849168191Sjhb				GIANT_SAVE();
850168191Sjhb				while (SX_OWNER(sx->sx_lock) == x &&
851192853Ssson				    TD_IS_RUNNING(owner)) {
852192853Ssson#ifdef KDTRACE_HOOKS
853192853Ssson					spin_cnt++;
854192853Ssson#endif
855168191Sjhb					cpu_spinwait();
856192853Ssson				}
857168191Sjhb				continue;
858168191Sjhb			}
859171277Sattilio		}
860168191Sjhb#endif
86181599Sjasone
862168191Sjhb		/*
863168191Sjhb		 * Some other thread already has an exclusive lock, so
864168191Sjhb		 * start the process of blocking.
865168191Sjhb		 */
866168191Sjhb		sleepq_lock(&sx->lock_object);
867168191Sjhb		x = sx->sx_lock;
868168191Sjhb
869168191Sjhb		/*
870168191Sjhb		 * The lock could have been released while we spun.
871168191Sjhb		 * In this case loop back and retry.
872168191Sjhb		 */
873168191Sjhb		if (x & SX_LOCK_SHARED) {
874168191Sjhb			sleepq_release(&sx->lock_object);
875168191Sjhb			continue;
876168191Sjhb		}
877168191Sjhb
878168191Sjhb#ifdef ADAPTIVE_SX
879168191Sjhb		/*
880168191Sjhb		 * If the owner is running on another CPU, spin until
881168191Sjhb		 * the owner stops running or the state of the lock
882168191Sjhb		 * changes.
883168191Sjhb		 */
884168191Sjhb		if (!(x & SX_LOCK_SHARED) &&
885193011Sattilio		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
886168191Sjhb			owner = (struct thread *)SX_OWNER(x);
887168191Sjhb			if (TD_IS_RUNNING(owner)) {
888168191Sjhb				sleepq_release(&sx->lock_object);
889168191Sjhb				continue;
890168191Sjhb			}
891168191Sjhb		}
892168191Sjhb#endif
893168191Sjhb
894168191Sjhb		/*
895168191Sjhb		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
896168191Sjhb		 * fail to set it drop the sleep queue lock and loop
897168191Sjhb		 * back.
898168191Sjhb		 */
899168191Sjhb		if (!(x & SX_LOCK_SHARED_WAITERS)) {
900168191Sjhb			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
901168191Sjhb			    x | SX_LOCK_SHARED_WAITERS)) {
902168191Sjhb				sleepq_release(&sx->lock_object);
903168191Sjhb				continue;
904168191Sjhb			}
905168191Sjhb			if (LOCK_LOG_TEST(&sx->lock_object, 0))
906168191Sjhb				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
907168191Sjhb				    __func__, sx);
908168191Sjhb		}
909168191Sjhb
910168191Sjhb		/*
911168191Sjhb		 * Since we have been unable to acquire the shared lock,
912168191Sjhb		 * we have to sleep.
913168191Sjhb		 */
914168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
915168191Sjhb			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
916168191Sjhb			    __func__, sx);
917171277Sattilio
918192853Ssson#ifdef KDTRACE_HOOKS
919192853Ssson		sleep_time -= lockstat_nsecs();
920192853Ssson#endif
921168191Sjhb		GIANT_SAVE();
922168191Sjhb		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
923170149Sattilio		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
924170149Sattilio		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
925170149Sattilio		if (!(opts & SX_INTERRUPTIBLE))
926177085Sjeff			sleepq_wait(&sx->lock_object, 0);
927170149Sattilio		else
928177085Sjeff			error = sleepq_wait_sig(&sx->lock_object, 0);
929192853Ssson#ifdef KDTRACE_HOOKS
930192853Ssson		sleep_time += lockstat_nsecs();
931192853Ssson		sleep_cnt++;
932192853Ssson#endif
933170149Sattilio		if (error) {
934170149Sattilio			if (LOCK_LOG_TEST(&sx->lock_object, 0))
935170149Sattilio				CTR2(KTR_LOCK,
936170149Sattilio			"%s: interruptible sleep by %p suspended by signal",
937170149Sattilio				    __func__, sx);
938170149Sattilio			break;
939170149Sattilio		}
940168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
941168191Sjhb			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
942168191Sjhb			    __func__, sx);
94381599Sjasone	}
944174629Sjeff	if (error == 0)
945192853Ssson		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx,
946192853Ssson		    contested, waittime, file, line);
947192853Ssson#ifdef KDTRACE_HOOKS
948192853Ssson	if (sleep_time)
949192853Ssson		LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time);
950192853Ssson	if (spin_cnt > sleep_cnt)
951192853Ssson		LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt));
952192853Ssson#endif
953168191Sjhb	GIANT_RESTORE();
954170149Sattilio	return (error);
95581599Sjasone}
95681599Sjasone
957168191Sjhb/*
958168191Sjhb * This function represents the so-called 'hard case' for sx_sunlock
959168191Sjhb * operation.  All 'easy case' failures are redirected to this.  Note
960168191Sjhb * that ideally this would be a static function, but it needs to be
961168191Sjhb * accessible from at least sx.h.
962168191Sjhb */
96381599Sjasonevoid
964168191Sjhb_sx_sunlock_hard(struct sx *sx, const char *file, int line)
96581599Sjasone{
966168191Sjhb	uintptr_t x;
967181334Sjhb	int wakeup_swapper;
96881599Sjasone
969235404Savg	if (SCHEDULER_STOPPED())
970235404Savg		return;
971235404Savg
972168191Sjhb	for (;;) {
973168191Sjhb		x = sx->sx_lock;
97481599Sjasone
975168191Sjhb		/*
976168191Sjhb		 * We should never have sharers while at least one thread
977168191Sjhb		 * holds a shared lock.
978168191Sjhb		 */
979168191Sjhb		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
980168191Sjhb		    ("%s: waiting sharers", __func__));
98181599Sjasone
982168191Sjhb		/*
983168191Sjhb		 * See if there is more than one shared lock held.  If
984168191Sjhb		 * so, just drop one and return.
985168191Sjhb		 */
986168191Sjhb		if (SX_SHARERS(x) > 1) {
987197643Sattilio			if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
988168191Sjhb			    x - SX_ONE_SHARER)) {
989168191Sjhb				if (LOCK_LOG_TEST(&sx->lock_object, 0))
990168191Sjhb					CTR4(KTR_LOCK,
991168191Sjhb					    "%s: %p succeeded %p -> %p",
992168191Sjhb					    __func__, sx, (void *)x,
993168191Sjhb					    (void *)(x - SX_ONE_SHARER));
994168191Sjhb				break;
995168191Sjhb			}
996168191Sjhb			continue;
997168191Sjhb		}
99881599Sjasone
999168191Sjhb		/*
1000168191Sjhb		 * If there aren't any waiters for an exclusive lock,
1001168191Sjhb		 * then try to drop it quickly.
1002168191Sjhb		 */
1003168191Sjhb		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
1004168191Sjhb			MPASS(x == SX_SHARERS_LOCK(1));
1005197643Sattilio			if (atomic_cmpset_rel_ptr(&sx->sx_lock,
1006197643Sattilio			    SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
1007168191Sjhb				if (LOCK_LOG_TEST(&sx->lock_object, 0))
1008168191Sjhb					CTR2(KTR_LOCK, "%s: %p last succeeded",
1009168191Sjhb					    __func__, sx);
1010168191Sjhb				break;
1011168191Sjhb			}
1012168191Sjhb			continue;
1013168191Sjhb		}
101481599Sjasone
1015168191Sjhb		/*
1016168191Sjhb		 * At this point, there should just be one sharer with
1017168191Sjhb		 * exclusive waiters.
1018168191Sjhb		 */
1019168191Sjhb		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
1020168191Sjhb
1021168191Sjhb		sleepq_lock(&sx->lock_object);
1022168191Sjhb
1023168191Sjhb		/*
1024168191Sjhb		 * Wake up semantic here is quite simple:
1025168191Sjhb		 * Just wake up all the exclusive waiters.
1026168191Sjhb		 * Note that the state of the lock could have changed,
1027168191Sjhb		 * so if it fails loop back and retry.
1028168191Sjhb		 */
1029197643Sattilio		if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
1030168191Sjhb		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
1031168191Sjhb		    SX_LOCK_UNLOCKED)) {
1032168191Sjhb			sleepq_release(&sx->lock_object);
1033168191Sjhb			continue;
1034168191Sjhb		}
1035168191Sjhb		if (LOCK_LOG_TEST(&sx->lock_object, 0))
1036168191Sjhb			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
1037168191Sjhb			    "exclusive queue", __func__, sx);
1038181334Sjhb		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
1039181334Sjhb		    0, SQ_EXCLUSIVE_QUEUE);
1040177085Sjeff		sleepq_release(&sx->lock_object);
1041181334Sjhb		if (wakeup_swapper)
1042181334Sjhb			kick_proc0();
1043168191Sjhb		break;
1044168191Sjhb	}
104581599Sjasone}
104685388Sjhb
104785388Sjhb#ifdef INVARIANT_SUPPORT
104885412Sjhb#ifndef INVARIANTS
104985412Sjhb#undef	_sx_assert
105085412Sjhb#endif
105185412Sjhb
105285388Sjhb/*
105385388Sjhb * In the non-WITNESS case, sx_assert() can only detect that at least
105485388Sjhb * *some* thread owns an slock, but it cannot guarantee that *this*
105585388Sjhb * thread owns an slock.
105685388Sjhb */
105785388Sjhbvoid
105885388Sjhb_sx_assert(struct sx *sx, int what, const char *file, int line)
105985388Sjhb{
1060168191Sjhb#ifndef WITNESS
1061168191Sjhb	int slocked = 0;
1062168191Sjhb#endif
106385388Sjhb
1064126316Sjhb	if (panicstr != NULL)
1065126316Sjhb		return;
106685388Sjhb	switch (what) {
1067169780Sjhb	case SA_SLOCKED:
1068169780Sjhb	case SA_SLOCKED | SA_NOTRECURSED:
1069169780Sjhb	case SA_SLOCKED | SA_RECURSED:
1070168191Sjhb#ifndef WITNESS
1071168191Sjhb		slocked = 1;
1072168191Sjhb		/* FALLTHROUGH */
1073168191Sjhb#endif
1074169780Sjhb	case SA_LOCKED:
1075169780Sjhb	case SA_LOCKED | SA_NOTRECURSED:
1076169780Sjhb	case SA_LOCKED | SA_RECURSED:
107785388Sjhb#ifdef WITNESS
1078167787Sjhb		witness_assert(&sx->lock_object, what, file, line);
107985388Sjhb#else
1080168191Sjhb		/*
1081168191Sjhb		 * If some other thread has an exclusive lock or we
1082168191Sjhb		 * have one and are asserting a shared lock, fail.
1083168191Sjhb		 * Also, if no one has a lock at all, fail.
1084168191Sjhb		 */
1085168191Sjhb		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
1086168191Sjhb		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
1087168191Sjhb		    sx_xholder(sx) != curthread)))
1088126316Sjhb			panic("Lock %s not %slocked @ %s:%d\n",
1089168191Sjhb			    sx->lock_object.lo_name, slocked ? "share " : "",
1090168191Sjhb			    file, line);
1091168191Sjhb
1092168191Sjhb		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
1093168191Sjhb			if (sx_recursed(sx)) {
1094169780Sjhb				if (what & SA_NOTRECURSED)
1095168191Sjhb					panic("Lock %s recursed @ %s:%d\n",
1096168191Sjhb					    sx->lock_object.lo_name, file,
1097168191Sjhb					    line);
1098169780Sjhb			} else if (what & SA_RECURSED)
1099168191Sjhb				panic("Lock %s not recursed @ %s:%d\n",
1100168191Sjhb				    sx->lock_object.lo_name, file, line);
1101168191Sjhb		}
110285388Sjhb#endif
110385388Sjhb		break;
1104169780Sjhb	case SA_XLOCKED:
1105169780Sjhb	case SA_XLOCKED | SA_NOTRECURSED:
1106169780Sjhb	case SA_XLOCKED | SA_RECURSED:
1107168191Sjhb		if (sx_xholder(sx) != curthread)
1108126316Sjhb			panic("Lock %s not exclusively locked @ %s:%d\n",
1109167787Sjhb			    sx->lock_object.lo_name, file, line);
1110168191Sjhb		if (sx_recursed(sx)) {
1111169780Sjhb			if (what & SA_NOTRECURSED)
1112168191Sjhb				panic("Lock %s recursed @ %s:%d\n",
1113168191Sjhb				    sx->lock_object.lo_name, file, line);
1114169780Sjhb		} else if (what & SA_RECURSED)
1115168191Sjhb			panic("Lock %s not recursed @ %s:%d\n",
1116168191Sjhb			    sx->lock_object.lo_name, file, line);
111785388Sjhb		break;
1118169780Sjhb	case SA_UNLOCKED:
1119125421Spjd#ifdef WITNESS
1120167787Sjhb		witness_assert(&sx->lock_object, what, file, line);
1121125421Spjd#else
1122126003Spjd		/*
1123168191Sjhb		 * If we hold an exclusve lock fail.  We can't
1124168191Sjhb		 * reliably check to see if we hold a shared lock or
1125168191Sjhb		 * not.
1126126003Spjd		 */
1127168191Sjhb		if (sx_xholder(sx) == curthread)
1128126316Sjhb			panic("Lock %s exclusively locked @ %s:%d\n",
1129167787Sjhb			    sx->lock_object.lo_name, file, line);
1130125421Spjd#endif
1131125421Spjd		break;
113285388Sjhb	default:
113385388Sjhb		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
113485388Sjhb		    line);
113585388Sjhb	}
113685388Sjhb}
113785388Sjhb#endif	/* INVARIANT_SUPPORT */
1138153395Sjhb
1139153395Sjhb#ifdef DDB
1140168191Sjhbstatic void
1141153395Sjhbdb_show_sx(struct lock_object *lock)
1142153395Sjhb{
1143153395Sjhb	struct thread *td;
1144153395Sjhb	struct sx *sx;
1145153395Sjhb
1146153395Sjhb	sx = (struct sx *)lock;
1147153395Sjhb
1148153395Sjhb	db_printf(" state: ");
1149168191Sjhb	if (sx->sx_lock == SX_LOCK_UNLOCKED)
1150168191Sjhb		db_printf("UNLOCKED\n");
1151169394Sjhb	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
1152169394Sjhb		db_printf("DESTROYED\n");
1153169394Sjhb		return;
1154169394Sjhb	} else if (sx->sx_lock & SX_LOCK_SHARED)
1155168191Sjhb		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
1156168191Sjhb	else {
1157168191Sjhb		td = sx_xholder(sx);
1158153395Sjhb		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1159173600Sjulian		    td->td_tid, td->td_proc->p_pid, td->td_name);
1160168191Sjhb		if (sx_recursed(sx))
1161168191Sjhb			db_printf(" recursed: %d\n", sx->sx_recurse);
1162168191Sjhb	}
1163168191Sjhb
1164168191Sjhb	db_printf(" waiters: ");
1165168191Sjhb	switch(sx->sx_lock &
1166168191Sjhb	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
1167168191Sjhb	case SX_LOCK_SHARED_WAITERS:
1168168191Sjhb		db_printf("shared\n");
1169168191Sjhb		break;
1170168191Sjhb	case SX_LOCK_EXCLUSIVE_WAITERS:
1171168191Sjhb		db_printf("exclusive\n");
1172168191Sjhb		break;
1173168191Sjhb	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
1174168191Sjhb		db_printf("exclusive and shared\n");
1175168191Sjhb		break;
1176168191Sjhb	default:
1177168191Sjhb		db_printf("none\n");
1178168191Sjhb	}
1179153395Sjhb}
1180161337Sjhb
1181161337Sjhb/*
1182161337Sjhb * Check to see if a thread that is blocked on a sleep queue is actually
1183161337Sjhb * blocked on an sx lock.  If so, output some details and return true.
1184161337Sjhb * If the lock has an exclusive owner, return that in *ownerp.
1185161337Sjhb */
1186161337Sjhbint
1187161337Sjhbsx_chain(struct thread *td, struct thread **ownerp)
1188161337Sjhb{
1189161337Sjhb	struct sx *sx;
1190161337Sjhb
1191161337Sjhb	/*
1192168191Sjhb	 * Check to see if this thread is blocked on an sx lock.
1193168191Sjhb	 * First, we check the lock class.  If that is ok, then we
1194168191Sjhb	 * compare the lock name against the wait message.
1195161337Sjhb	 */
1196168191Sjhb	sx = td->td_wchan;
1197168191Sjhb	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
1198168191Sjhb	    sx->lock_object.lo_name != td->td_wmesg)
1199161337Sjhb		return (0);
1200161337Sjhb
1201161337Sjhb	/* We think we have an sx lock, so output some details. */
1202161337Sjhb	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
1203168191Sjhb	*ownerp = sx_xholder(sx);
1204168191Sjhb	if (sx->sx_lock & SX_LOCK_SHARED)
1205168191Sjhb		db_printf("SLOCK (count %ju)\n",
1206168191Sjhb		    (uintmax_t)SX_SHARERS(sx->sx_lock));
1207168191Sjhb	else
1208161337Sjhb		db_printf("XLOCK\n");
1209161337Sjhb	return (1);
1210161337Sjhb}
1211153395Sjhb#endif
1212