1139804Simp/*-
2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
3126324Sjhb *
4126324Sjhb * Redistribution and use in source and binary forms, with or without
5126324Sjhb * modification, are permitted provided that the following conditions
6126324Sjhb * are met:
7126324Sjhb * 1. Redistributions of source code must retain the above copyright
8126324Sjhb *    notice, this list of conditions and the following disclaimer.
9126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright
10126324Sjhb *    notice, this list of conditions and the following disclaimer in the
11126324Sjhb *    documentation and/or other materials provided with the distribution.
12126324Sjhb *
13126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16126324Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23126324Sjhb * SUCH DAMAGE.
24126324Sjhb */
25126324Sjhb
26126324Sjhb/*
27126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on
28315027Svangyzen * a wait channel.  Sleep queues are different from turnstiles in that wait
29126324Sjhb * channels are not owned by anyone, so there is no priority propagation.
30126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by
31126324Sjhb * signals.  That said, there are several similarities between the turnstile
32126324Sjhb * and sleep queue implementations.  (Note: turnstiles were implemented
33126324Sjhb * first.)  For example, both use a hash table of the same size where each
34126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and
35126324Sjhb * a linked list of queues.  An individual queue is located by using a hash
36126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching
37126324Sjhb * for the queue.  This means that a wait channel object does not need to
38315027Svangyzen * embed its queue head just as locks do not embed their turnstile queue
39126324Sjhb * head.  Threads also carry around a sleep queue that they lend to the
40126324Sjhb * wait channel when blocking.  Just as in turnstiles, the queue includes
41126324Sjhb * a free list of the sleep queues of other threads blocked on the same
42126324Sjhb * wait channel in the case of multiple waiters.
43126324Sjhb *
44126324Sjhb * Some additional functionality provided by sleep queues include the
45126324Sjhb * ability to set a timeout.  The timeout is managed using a per-thread
46126324Sjhb * callout that resumes a thread if it is asleep.  A thread may also
47126324Sjhb * catch signals while it is asleep (aka an interruptible sleep).  The
48126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
49126324Sjhb * sleep queues also provide some extra assertions.  One is not allowed to
50126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
51126324Sjhb * must consistently use the same lock to synchronize with a wait channel,
52126324Sjhb * though this check is currently only a warning for sleep/wakeup due to
53126324Sjhb * pre-existing abuse of that API.  The same lock must also be held when
54126324Sjhb * awakening threads, though that is currently only enforced for condition
55126324Sjhb * variables.
56126324Sjhb */
57126324Sjhb
58126324Sjhb#include <sys/cdefs.h>
59126324Sjhb__FBSDID("$FreeBSD: stable/11/sys/kern/subr_sleepqueue.c 367457 2020-11-07 18:10:59Z dim $");
60126324Sjhb
61154936Sjhb#include "opt_sleepqueue_profiling.h"
62154936Sjhb#include "opt_ddb.h"
63170640Sjeff#include "opt_sched.h"
64296973Scem#include "opt_stack.h"
65154936Sjhb
66126324Sjhb#include <sys/param.h>
67126324Sjhb#include <sys/systm.h>
68126324Sjhb#include <sys/lock.h>
69126324Sjhb#include <sys/kernel.h>
70126324Sjhb#include <sys/ktr.h>
71126324Sjhb#include <sys/mutex.h>
72126324Sjhb#include <sys/proc.h>
73177372Sjeff#include <sys/sbuf.h>
74126324Sjhb#include <sys/sched.h>
75235459Srstone#include <sys/sdt.h>
76126324Sjhb#include <sys/signalvar.h>
77126324Sjhb#include <sys/sleepqueue.h>
78296927Scem#include <sys/stack.h>
79131259Sjhb#include <sys/sysctl.h>
80316120Svangyzen#include <sys/time.h>
81126324Sjhb
82316120Svangyzen#include <machine/atomic.h>
83316120Svangyzen
84169666Sjeff#include <vm/uma.h>
85169666Sjeff
86154936Sjhb#ifdef DDB
87154936Sjhb#include <ddb/ddb.h>
88154936Sjhb#endif
89154936Sjhb
90296927Scem
91126324Sjhb/*
92248186Smav * Constants for the hash table of sleep queue chains.
93248186Smav * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
94126324Sjhb */
95248186Smav#define	SC_TABLESIZE	256			/* Must be power of 2. */
96126324Sjhb#define	SC_MASK		(SC_TABLESIZE - 1)
97126324Sjhb#define	SC_SHIFT	8
98248186Smav#define	SC_HASH(wc)	((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
99248186Smav			    SC_MASK)
100126324Sjhb#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
101165272Skmacy#define NR_SLEEPQS      2
102126324Sjhb/*
103315027Svangyzen * There are two different lists of sleep queues.  Both lists are connected
104126324Sjhb * via the sq_hash entries.  The first list is the sleep queue chain list
105126324Sjhb * that a sleep queue is on when it is attached to a wait channel.  The
106126324Sjhb * second list is the free list hung off of a sleep queue that is attached
107126324Sjhb * to a wait channel.
108126324Sjhb *
109126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the
110126324Sjhb * list of threads blocked on that wait channel, flags specific to the
111126324Sjhb * wait channel, and the lock used to synchronize with a wait channel.
112126324Sjhb * The flags are used to catch mismatches between the various consumers
113126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables).
114126324Sjhb * The lock pointer is only used when invariants are enabled for various
115126324Sjhb * debugging checks.
116126324Sjhb *
117126324Sjhb * Locking key:
118126324Sjhb *  c - sleep queue chain lock
119126324Sjhb */
120126324Sjhbstruct sleepqueue {
121354405Smav	struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
122200447Sattilio	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
123126324Sjhb	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
124126324Sjhb	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
125126324Sjhb	void	*sq_wchan;			/* (c) Wait channel. */
126201879Sattilio	int	sq_type;			/* (c) Queue type. */
127136445Sjhb#ifdef INVARIANTS
128164325Spjd	struct lock_object *sq_lock;		/* (c) Associated lock. */
129126324Sjhb#endif
130126324Sjhb};
131126324Sjhb
132126324Sjhbstruct sleepqueue_chain {
133126324Sjhb	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
134126324Sjhb	struct mtx sc_lock;			/* Spin lock for this chain. */
135131259Sjhb#ifdef SLEEPQUEUE_PROFILING
136131259Sjhb	u_int	sc_depth;			/* Length of sc_queues. */
137131259Sjhb	u_int	sc_max_depth;			/* Max length of sc_queues. */
138131259Sjhb#endif
139126324Sjhb};
140126324Sjhb
141131259Sjhb#ifdef SLEEPQUEUE_PROFILING
142131259Sjhbu_int sleepq_max_depth;
143227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
144227309Sedstatic SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
145131259Sjhb    "sleepq chain stats");
146131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
147131259Sjhb    0, "maxmimum depth achieved of a single chain");
148177372Sjeff
149177372Sjeffstatic void	sleepq_profile(const char *wmesg);
150177372Sjeffstatic int	prof_enabled;
151131259Sjhb#endif
152126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
153169666Sjeffstatic uma_zone_t sleepq_zone;
154126324Sjhb
155126324Sjhb/*
156126324Sjhb * Prototypes for non-exported routines.
157126324Sjhb */
158177085Sjeffstatic int	sleepq_catch_signals(void *wchan, int pri);
159165272Skmacystatic int	sleepq_check_signals(void);
160277528Shselaskystatic int	sleepq_check_timeout(void);
161169666Sjeff#ifdef INVARIANTS
162169666Sjeffstatic void	sleepq_dtor(void *mem, int size, void *arg);
163169666Sjeff#endif
164169666Sjeffstatic int	sleepq_init(void *mem, int size, int flags);
165181334Sjhbstatic int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
166169666Sjeff		    int pri);
167177085Sjeffstatic void	sleepq_switch(void *wchan, int pri);
168126324Sjhbstatic void	sleepq_timeout(void *arg);
169126324Sjhb
170235459SrstoneSDT_PROBE_DECLARE(sched, , , sleep);
171235459SrstoneSDT_PROBE_DECLARE(sched, , , wakeup);
172235459Srstone
173126324Sjhb/*
174267820Sattilio * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
175267820Sattilio * Note that it must happen after sleepinit() has been fully executed, so
176267820Sattilio * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
177126324Sjhb */
178267820Sattilio#ifdef SLEEPQUEUE_PROFILING
179267820Sattiliostatic void
180267820Sattilioinit_sleepqueue_profiling(void)
181126324Sjhb{
182267820Sattilio	char chain_name[10];
183131259Sjhb	struct sysctl_oid *chain_oid;
184267820Sattilio	u_int i;
185126324Sjhb
186126324Sjhb	for (i = 0; i < SC_TABLESIZE; i++) {
187267820Sattilio		snprintf(chain_name, sizeof(chain_name), "%u", i);
188315027Svangyzen		chain_oid = SYSCTL_ADD_NODE(NULL,
189131259Sjhb		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
190131259Sjhb		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
191131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
192131259Sjhb		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
193131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
194131259Sjhb		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
195131259Sjhb		    NULL);
196267820Sattilio	}
197267820Sattilio}
198267820Sattilio
199267820SattilioSYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
200267820Sattilio    init_sleepqueue_profiling, NULL);
201131259Sjhb#endif
202267820Sattilio
203267820Sattilio/*
204267820Sattilio * Early initialization of sleep queues that is called from the sleepinit()
205267820Sattilio * SYSINIT.
206267820Sattilio */
207267820Sattiliovoid
208267820Sattilioinit_sleepqueues(void)
209267820Sattilio{
210267820Sattilio	int i;
211267820Sattilio
212267820Sattilio	for (i = 0; i < SC_TABLESIZE; i++) {
213267820Sattilio		LIST_INIT(&sleepq_chains[i].sc_queues);
214267820Sattilio		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
215267820Sattilio		    MTX_SPIN | MTX_RECURSE);
216126324Sjhb	}
217169666Sjeff	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
218169666Sjeff#ifdef INVARIANTS
219169666Sjeff	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
220169666Sjeff#else
221169666Sjeff	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
222169666Sjeff#endif
223315027Svangyzen
224126324Sjhb	thread0.td_sleepqueue = sleepq_alloc();
225126324Sjhb}
226126324Sjhb
227126324Sjhb/*
228169666Sjeff * Get a sleep queue for a new thread.
229126324Sjhb */
230126324Sjhbstruct sleepqueue *
231126324Sjhbsleepq_alloc(void)
232126324Sjhb{
233126324Sjhb
234169666Sjeff	return (uma_zalloc(sleepq_zone, M_WAITOK));
235126324Sjhb}
236126324Sjhb
237126324Sjhb/*
238126324Sjhb * Free a sleep queue when a thread is destroyed.
239126324Sjhb */
240126324Sjhbvoid
241126324Sjhbsleepq_free(struct sleepqueue *sq)
242126324Sjhb{
243126324Sjhb
244169666Sjeff	uma_zfree(sleepq_zone, sq);
245126324Sjhb}
246126324Sjhb
247126324Sjhb/*
248136445Sjhb * Lock the sleep queue chain associated with the specified wait channel.
249136445Sjhb */
250136445Sjhbvoid
251136445Sjhbsleepq_lock(void *wchan)
252136445Sjhb{
253136445Sjhb	struct sleepqueue_chain *sc;
254136445Sjhb
255136445Sjhb	sc = SC_LOOKUP(wchan);
256136445Sjhb	mtx_lock_spin(&sc->sc_lock);
257136445Sjhb}
258136445Sjhb
259136445Sjhb/*
260126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash
261136445Sjhb * table locking the associated sleep queue chain.  If no queue is found in
262136445Sjhb * the table, NULL is returned.
263126324Sjhb */
264126324Sjhbstruct sleepqueue *
265126324Sjhbsleepq_lookup(void *wchan)
266126324Sjhb{
267126324Sjhb	struct sleepqueue_chain *sc;
268126324Sjhb	struct sleepqueue *sq;
269126324Sjhb
270126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
271126324Sjhb	sc = SC_LOOKUP(wchan);
272136445Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
273126324Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
274126324Sjhb		if (sq->sq_wchan == wchan)
275126324Sjhb			return (sq);
276126324Sjhb	return (NULL);
277126324Sjhb}
278126324Sjhb
279126324Sjhb/*
280126324Sjhb * Unlock the sleep queue chain associated with a given wait channel.
281126324Sjhb */
282126324Sjhbvoid
283126324Sjhbsleepq_release(void *wchan)
284126324Sjhb{
285126324Sjhb	struct sleepqueue_chain *sc;
286126324Sjhb
287126324Sjhb	sc = SC_LOOKUP(wchan);
288126324Sjhb	mtx_unlock_spin(&sc->sc_lock);
289126324Sjhb}
290126324Sjhb
291126324Sjhb/*
292137277Sjhb * Places the current thread on the sleep queue for the specified wait
293126324Sjhb * channel.  If INVARIANTS is enabled, then it associates the passed in
294126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is
295126324Sjhb * woken up.
296126324Sjhb */
297126324Sjhbvoid
298165272Skmacysleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
299165272Skmacy    int queue)
300126324Sjhb{
301126324Sjhb	struct sleepqueue_chain *sc;
302136445Sjhb	struct sleepqueue *sq;
303137277Sjhb	struct thread *td;
304126324Sjhb
305126324Sjhb	td = curthread;
306126324Sjhb	sc = SC_LOOKUP(wchan);
307126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
308126324Sjhb	MPASS(td->td_sleepqueue != NULL);
309126324Sjhb	MPASS(wchan != NULL);
310165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
311126324Sjhb
312150177Sjhb	/* If this thread is not allowed to sleep, die a horrible death. */
313247588Sjhb	KASSERT(td->td_no_sleeping == 0,
314247588Sjhb	    ("%s: td %p to sleep on wchan %p with sleeping prohibited",
315240423Sattilio	    __func__, td, wchan));
316150177Sjhb
317136445Sjhb	/* Look up the sleep queue associated with the wait channel 'wchan'. */
318136445Sjhb	sq = sleepq_lookup(wchan);
319136445Sjhb
320136445Sjhb	/*
321136445Sjhb	 * If the wait channel does not already have a sleep queue, use
322136445Sjhb	 * this thread's sleep queue.  Otherwise, insert the current thread
323136445Sjhb	 * into the sleep queue already in use by this wait channel.
324136445Sjhb	 */
325126324Sjhb	if (sq == NULL) {
326165272Skmacy#ifdef INVARIANTS
327165292Skmacy		int i;
328165291Sache
329165292Skmacy		sq = td->td_sleepqueue;
330200447Sattilio		for (i = 0; i < NR_SLEEPQS; i++) {
331165292Skmacy			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
332200447Sattilio			    ("thread's sleep queue %d is not empty", i));
333200447Sattilio			KASSERT(sq->sq_blockedcnt[i] == 0,
334200447Sattilio			    ("thread's sleep queue %d count mismatches", i));
335200447Sattilio		}
336165272Skmacy		KASSERT(LIST_EMPTY(&sq->sq_free),
337165272Skmacy		    ("thread's sleep queue has a non-empty free list"));
338165272Skmacy		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
339165292Skmacy		sq->sq_lock = lock;
340165272Skmacy#endif
341131259Sjhb#ifdef SLEEPQUEUE_PROFILING
342131259Sjhb		sc->sc_depth++;
343131259Sjhb		if (sc->sc_depth > sc->sc_max_depth) {
344131259Sjhb			sc->sc_max_depth = sc->sc_depth;
345131259Sjhb			if (sc->sc_max_depth > sleepq_max_depth)
346131259Sjhb				sleepq_max_depth = sc->sc_max_depth;
347131259Sjhb		}
348131259Sjhb#endif
349165292Skmacy		sq = td->td_sleepqueue;
350126324Sjhb		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
351126324Sjhb		sq->sq_wchan = wchan;
352201879Sattilio		sq->sq_type = flags & SLEEPQ_TYPE;
353126324Sjhb	} else {
354126324Sjhb		MPASS(wchan == sq->sq_wchan);
355126488Sjhb		MPASS(lock == sq->sq_lock);
356136445Sjhb		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
357126324Sjhb		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
358126324Sjhb	}
359172155Sattilio	thread_lock(td);
360165272Skmacy	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
361200447Sattilio	sq->sq_blockedcnt[queue]++;
362126324Sjhb	td->td_sleepqueue = NULL;
363165272Skmacy	td->td_sqqueue = queue;
364126324Sjhb	td->td_wchan = wchan;
365126324Sjhb	td->td_wmesg = wmesg;
366155741Sdavidxu	if (flags & SLEEPQ_INTERRUPTIBLE) {
367134013Sjhb		td->td_flags |= TDF_SINTR;
368155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
369155741Sdavidxu	}
370172155Sattilio	thread_unlock(td);
371126324Sjhb}
372126324Sjhb
373126324Sjhb/*
374126324Sjhb * Sets a timeout that will remove the current thread from the specified
375126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened.
376126324Sjhb */
377126324Sjhbvoid
378247783Sdavidesleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
379247783Sdavide    int flags)
380126324Sjhb{
381277528Shselasky	struct sleepqueue_chain *sc;
382126324Sjhb	struct thread *td;
383304883Skib	sbintime_t pr1;
384126324Sjhb
385126324Sjhb	td = curthread;
386277528Shselasky	sc = SC_LOOKUP(wchan);
387277528Shselasky	mtx_assert(&sc->sc_lock, MA_OWNED);
388277528Shselasky	MPASS(TD_ON_SLEEPQ(td));
389277528Shselasky	MPASS(td->td_sleepqueue == NULL);
390277528Shselasky	MPASS(wchan != NULL);
391310439Sjhb	if (cold && td == &thread0)
392297466Sjhb		panic("timed sleep before timers are working");
393304883Skib	KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
394304883Skib	    td->td_tid, td, (uintmax_t)td->td_sleeptimo));
395304883Skib	thread_lock(td);
396304883Skib	callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
397304883Skib	thread_unlock(td);
398304883Skib	callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
399304883Skib	    sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
400304883Skib	    C_DIRECT_EXEC);
401126324Sjhb}
402126324Sjhb
403126324Sjhb/*
404200447Sattilio * Return the number of actual sleepers for the specified queue.
405200447Sattilio */
406200447Sattiliou_int
407200447Sattiliosleepq_sleepcnt(void *wchan, int queue)
408200447Sattilio{
409200447Sattilio	struct sleepqueue *sq;
410200447Sattilio
411200447Sattilio	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
412200447Sattilio	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
413200447Sattilio	sq = sleepq_lookup(wchan);
414200447Sattilio	if (sq == NULL)
415200447Sattilio		return (0);
416200447Sattilio	return (sq->sq_blockedcnt[queue]);
417200447Sattilio}
418200447Sattilio
419200447Sattilio/*
420126324Sjhb * Marks the pending sleep of the current thread as interruptible and
421126324Sjhb * makes an initial check for pending signals before putting a thread
422170294Sjeff * to sleep. Enters and exits with the thread lock held.  Thread lock
423170294Sjeff * may have transitioned from the sleepq lock to a run lock.
424126324Sjhb */
425155741Sdavidxustatic int
426177085Sjeffsleepq_catch_signals(void *wchan, int pri)
427126324Sjhb{
428126324Sjhb	struct sleepqueue_chain *sc;
429126324Sjhb	struct sleepqueue *sq;
430126324Sjhb	struct thread *td;
431126324Sjhb	struct proc *p;
432155741Sdavidxu	struct sigacts *ps;
433248470Sjhb	int sig, ret;
434126324Sjhb
435314719Sbadger	ret = 0;
436126324Sjhb	td = curthread;
437155741Sdavidxu	p = curproc;
438126324Sjhb	sc = SC_LOOKUP(wchan);
439126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
440126324Sjhb	MPASS(wchan != NULL);
441211523Sdavidxu	if ((td->td_pflags & TDP_WAKEUP) != 0) {
442211523Sdavidxu		td->td_pflags &= ~TDP_WAKEUP;
443211523Sdavidxu		ret = EINTR;
444211534Sdavidxu		thread_lock(td);
445211523Sdavidxu		goto out;
446211523Sdavidxu	}
447211523Sdavidxu
448177375Sjeff	/*
449314719Sbadger	 * See if there are any pending signals or suspension requests for this
450314719Sbadger	 * thread.  If not, we can switch immediately.
451177375Sjeff	 */
452177375Sjeff	thread_lock(td);
453314719Sbadger	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) {
454314719Sbadger		thread_unlock(td);
455314719Sbadger		mtx_unlock_spin(&sc->sc_lock);
456314719Sbadger		CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
457314719Sbadger			(void *)td, (long)p->p_pid, td->td_name);
458314719Sbadger		PROC_LOCK(p);
459314719Sbadger		/*
460314719Sbadger		 * Check for suspension first. Checking for signals and then
461314719Sbadger		 * suspending could result in a missed signal, since a signal
462314719Sbadger		 * can be delivered while this thread is suspended.
463314719Sbadger		 */
464314719Sbadger		if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
465314719Sbadger			ret = thread_suspend_check(1);
466314719Sbadger			MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
467314719Sbadger			if (ret != 0) {
468314719Sbadger				PROC_UNLOCK(p);
469314719Sbadger				mtx_lock_spin(&sc->sc_lock);
470314719Sbadger				thread_lock(td);
471314719Sbadger				goto out;
472314719Sbadger			}
473314719Sbadger		}
474314719Sbadger		if ((td->td_flags & TDF_NEEDSIGCHK) != 0) {
475314719Sbadger			ps = p->p_sigacts;
476314719Sbadger			mtx_lock(&ps->ps_mtx);
477314719Sbadger			sig = cursig(td);
478314719Sbadger			if (sig == -1) {
479314719Sbadger				mtx_unlock(&ps->ps_mtx);
480314719Sbadger				KASSERT((td->td_flags & TDF_SBDRY) != 0,
481314719Sbadger				    ("lost TDF_SBDRY"));
482314719Sbadger				KASSERT(TD_SBDRY_INTR(td),
483314719Sbadger				    ("lost TDF_SERESTART of TDF_SEINTR"));
484314719Sbadger				KASSERT((td->td_flags &
485314719Sbadger				    (TDF_SEINTR | TDF_SERESTART)) !=
486314719Sbadger				    (TDF_SEINTR | TDF_SERESTART),
487314719Sbadger				    ("both TDF_SEINTR and TDF_SERESTART"));
488314719Sbadger				ret = TD_SBDRY_ERRNO(td);
489314719Sbadger			} else if (sig != 0) {
490314719Sbadger				ret = SIGISMEMBER(ps->ps_sigintr, sig) ?
491314719Sbadger				    EINTR : ERESTART;
492314719Sbadger				mtx_unlock(&ps->ps_mtx);
493314719Sbadger			} else {
494314719Sbadger				mtx_unlock(&ps->ps_mtx);
495314719Sbadger			}
496350357Skib
497350357Skib			/*
498350357Skib			 * Do not go into sleep if this thread was the
499350357Skib			 * ptrace(2) attach leader.  cursig() consumed
500350357Skib			 * SIGSTOP from PT_ATTACH, but we usually act
501350357Skib			 * on the signal by interrupting sleep, and
502350357Skib			 * should do that here as well.
503350357Skib			 */
504350357Skib			if ((td->td_dbgflags & TDB_FSTP) != 0) {
505350357Skib				if (ret == 0)
506350357Skib					ret = EINTR;
507350357Skib				td->td_dbgflags &= ~TDB_FSTP;
508350357Skib			}
509314719Sbadger		}
510314719Sbadger		/*
511314719Sbadger		 * Lock the per-process spinlock prior to dropping the PROC_LOCK
512314719Sbadger		 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
513314719Sbadger		 * thread_lock() are currently held in tdsendsignal().
514314719Sbadger		 */
515314719Sbadger		PROC_SLOCK(p);
516314719Sbadger		mtx_lock_spin(&sc->sc_lock);
517314719Sbadger		PROC_UNLOCK(p);
518314719Sbadger		thread_lock(td);
519314719Sbadger		PROC_SUNLOCK(p);
520177375Sjeff	}
521185502Sdavidxu	if (ret == 0) {
522185502Sdavidxu		sleepq_switch(wchan, pri);
523185502Sdavidxu		return (0);
524185502Sdavidxu	}
525211523Sdavidxuout:
526155936Sdavidxu	/*
527155936Sdavidxu	 * There were pending signals and this thread is still
528155936Sdavidxu	 * on the sleep queue, remove it from the sleep queue.
529155936Sdavidxu	 */
530170294Sjeff	if (TD_ON_SLEEPQ(td)) {
531170294Sjeff		sq = sleepq_lookup(wchan);
532181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
533181334Sjhb#ifdef INVARIANTS
534181334Sjhb			/*
535181334Sjhb			 * This thread hasn't gone to sleep yet, so it
536181334Sjhb			 * should not be swapped out.
537181334Sjhb			 */
538181334Sjhb			panic("not waking up swapper");
539181334Sjhb#endif
540181334Sjhb		}
541170294Sjeff	}
542170294Sjeff	mtx_unlock_spin(&sc->sc_lock);
543170294Sjeff	MPASS(td->td_lock != &sc->sc_lock);
544155741Sdavidxu	return (ret);
545126324Sjhb}
546126324Sjhb
547126324Sjhb/*
548170294Sjeff * Switches to another thread if we are still asleep on a sleep queue.
549170294Sjeff * Returns with thread lock.
550126324Sjhb */
551126324Sjhbstatic void
552177085Sjeffsleepq_switch(void *wchan, int pri)
553126324Sjhb{
554126324Sjhb	struct sleepqueue_chain *sc;
555175654Sjhb	struct sleepqueue *sq;
556126324Sjhb	struct thread *td;
557316120Svangyzen	bool rtc_changed;
558126324Sjhb
559126324Sjhb	td = curthread;
560126324Sjhb	sc = SC_LOOKUP(wchan);
561126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
562170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
563175654Sjhb
564315027Svangyzen	/*
565175654Sjhb	 * If we have a sleep queue, then we've already been woken up, so
566175654Sjhb	 * just return.
567175654Sjhb	 */
568126324Sjhb	if (td->td_sleepqueue != NULL) {
569126324Sjhb		mtx_unlock_spin(&sc->sc_lock);
570126324Sjhb		return;
571126324Sjhb	}
572175654Sjhb
573175654Sjhb	/*
574175654Sjhb	 * If TDF_TIMEOUT is set, then our sleep has been timed out
575175654Sjhb	 * already but we are still on the sleep queue, so dequeue the
576175654Sjhb	 * thread and return.
577316120Svangyzen	 *
578316120Svangyzen	 * Do the same if the real-time clock has been adjusted since this
579316120Svangyzen	 * thread calculated its timeout based on that clock.  This handles
580316120Svangyzen	 * the following race:
581316120Svangyzen	 * - The Ts thread needs to sleep until an absolute real-clock time.
582316120Svangyzen	 *   It copies the global rtc_generation into curthread->td_rtcgen,
583316120Svangyzen	 *   reads the RTC, and calculates a sleep duration based on that time.
584316120Svangyzen	 *   See umtxq_sleep() for an example.
585316120Svangyzen	 * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
586316120Svangyzen	 *   threads that are sleeping until an absolute real-clock time.
587316120Svangyzen	 *   See tc_setclock() and the POSIX specification of clock_settime().
588316120Svangyzen	 * - Ts reaches the code below.  It holds the sleepqueue chain lock,
589316120Svangyzen	 *   so Tc has finished waking, so this thread must test td_rtcgen.
590316120Svangyzen	 * (The declaration of td_rtcgen refers to this comment.)
591175654Sjhb	 */
592316120Svangyzen	rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
593316120Svangyzen	if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
594316120Svangyzen		if (rtc_changed) {
595316120Svangyzen			td->td_rtcgen = 0;
596316120Svangyzen		}
597175654Sjhb		MPASS(TD_ON_SLEEPQ(td));
598175654Sjhb		sq = sleepq_lookup(wchan);
599181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
600181334Sjhb#ifdef INVARIANTS
601181334Sjhb			/*
602181334Sjhb			 * This thread hasn't gone to sleep yet, so it
603181334Sjhb			 * should not be swapped out.
604181334Sjhb			 */
605181334Sjhb			panic("not waking up swapper");
606181334Sjhb#endif
607181334Sjhb		}
608175654Sjhb		mtx_unlock_spin(&sc->sc_lock);
609315027Svangyzen		return;
610175654Sjhb	}
611177372Sjeff#ifdef SLEEPQUEUE_PROFILING
612177372Sjeff	if (prof_enabled)
613177372Sjeff		sleepq_profile(td->td_wmesg);
614177372Sjeff#endif
615177085Sjeff	MPASS(td->td_sleepqueue == NULL);
616177085Sjeff	sched_sleep(td, pri);
617170294Sjeff	thread_lock_set(td, &sc->sc_lock);
618235459Srstone	SDT_PROBE0(sched, , , sleep);
619126324Sjhb	TD_SET_SLEEPING(td);
620178272Sjeff	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
621126324Sjhb	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
622129241Sbde	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
623173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
624126324Sjhb}
625126324Sjhb
626126324Sjhb/*
627126324Sjhb * Check to see if we timed out.
628126324Sjhb */
629126324Sjhbstatic int
630277528Shselaskysleepq_check_timeout(void)
631126324Sjhb{
632277528Shselasky	struct thread *td;
633304883Skib	int res;
634277528Shselasky
635277528Shselasky	td = curthread;
636170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
637126324Sjhb
638126324Sjhb	/*
639304883Skib	 * If TDF_TIMEOUT is set, we timed out.  But recheck
640304883Skib	 * td_sleeptimo anyway.
641126324Sjhb	 */
642304883Skib	res = 0;
643304883Skib	if (td->td_sleeptimo != 0) {
644304883Skib		if (td->td_sleeptimo <= sbinuptime())
645304883Skib			res = EWOULDBLOCK;
646304883Skib		td->td_sleeptimo = 0;
647304883Skib	}
648304883Skib	if (td->td_flags & TDF_TIMEOUT)
649126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
650304883Skib	else
651304883Skib		/*
652304883Skib		 * We ignore the situation where timeout subsystem was
653304883Skib		 * unable to stop our callout.  The struct thread is
654304883Skib		 * type-stable, the callout will use the correct
655304883Skib		 * memory when running.  The checks of the
656304883Skib		 * td_sleeptimo value in this function and in
657304883Skib		 * sleepq_timeout() ensure that the thread does not
658304883Skib		 * get spurious wakeups, even if the callout was reset
659304883Skib		 * or thread reused.
660304883Skib		 */
661304883Skib		callout_stop(&td->td_slpcallout);
662304883Skib	return (res);
663126324Sjhb}
664126324Sjhb
665126324Sjhb/*
666126324Sjhb * Check to see if we were awoken by a signal.
667126324Sjhb */
668126324Sjhbstatic int
669126324Sjhbsleepq_check_signals(void)
670126324Sjhb{
671126324Sjhb	struct thread *td;
672126324Sjhb
673126324Sjhb	td = curthread;
674170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
675126324Sjhb
676126324Sjhb	/* We are no longer in an interruptible sleep. */
677155741Sdavidxu	if (td->td_flags & TDF_SINTR)
678246417Sjhb		td->td_flags &= ~TDF_SINTR;
679126324Sjhb
680155741Sdavidxu	if (td->td_flags & TDF_SLEEPABORT) {
681155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
682155741Sdavidxu		return (td->td_intrval);
683155741Sdavidxu	}
684155741Sdavidxu
685126324Sjhb	return (0);
686126324Sjhb}
687126324Sjhb
688126324Sjhb/*
689126324Sjhb * Block the current thread until it is awakened from its sleep queue.
690126324Sjhb */
691126324Sjhbvoid
692177085Sjeffsleepq_wait(void *wchan, int pri)
693126324Sjhb{
694170294Sjeff	struct thread *td;
695126324Sjhb
696170294Sjeff	td = curthread;
697170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
698170294Sjeff	thread_lock(td);
699177085Sjeff	sleepq_switch(wchan, pri);
700170294Sjeff	thread_unlock(td);
701126324Sjhb}
702126324Sjhb
703126324Sjhb/*
704126324Sjhb * Block the current thread until it is awakened from its sleep queue
705126324Sjhb * or it is interrupted by a signal.
706126324Sjhb */
707126324Sjhbint
708177085Sjeffsleepq_wait_sig(void *wchan, int pri)
709126324Sjhb{
710155741Sdavidxu	int rcatch;
711126324Sjhb	int rval;
712126324Sjhb
713177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
714126324Sjhb	rval = sleepq_check_signals();
715170294Sjeff	thread_unlock(curthread);
716155741Sdavidxu	if (rcatch)
717155741Sdavidxu		return (rcatch);
718126324Sjhb	return (rval);
719126324Sjhb}
720126324Sjhb
721126324Sjhb/*
722126324Sjhb * Block the current thread until it is awakened from its sleep queue
723126324Sjhb * or it times out while waiting.
724126324Sjhb */
725126324Sjhbint
726177085Sjeffsleepq_timedwait(void *wchan, int pri)
727126324Sjhb{
728170294Sjeff	struct thread *td;
729126324Sjhb	int rval;
730126324Sjhb
731170294Sjeff	td = curthread;
732170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
733170294Sjeff	thread_lock(td);
734177085Sjeff	sleepq_switch(wchan, pri);
735277528Shselasky	rval = sleepq_check_timeout();
736170294Sjeff	thread_unlock(td);
737170294Sjeff
738131249Sjhb	return (rval);
739126324Sjhb}
740126324Sjhb
741126324Sjhb/*
742126324Sjhb * Block the current thread until it is awakened from its sleep queue,
743126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened.
744126324Sjhb */
745126324Sjhbint
746177085Sjeffsleepq_timedwait_sig(void *wchan, int pri)
747126324Sjhb{
748155741Sdavidxu	int rcatch, rvalt, rvals;
749126324Sjhb
750177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
751277528Shselasky	rvalt = sleepq_check_timeout();
752126324Sjhb	rvals = sleepq_check_signals();
753277528Shselasky	thread_unlock(curthread);
754155741Sdavidxu	if (rcatch)
755155741Sdavidxu		return (rcatch);
756155741Sdavidxu	if (rvals)
757126324Sjhb		return (rvals);
758155741Sdavidxu	return (rvalt);
759126324Sjhb}
760126324Sjhb
761126324Sjhb/*
762201879Sattilio * Returns the type of sleepqueue given a waitchannel.
763201879Sattilio */
764201879Sattilioint
765201879Sattiliosleepq_type(void *wchan)
766201879Sattilio{
767201879Sattilio	struct sleepqueue *sq;
768201879Sattilio	int type;
769201879Sattilio
770201879Sattilio	MPASS(wchan != NULL);
771201879Sattilio
772201879Sattilio	sleepq_lock(wchan);
773201879Sattilio	sq = sleepq_lookup(wchan);
774201879Sattilio	if (sq == NULL) {
775201879Sattilio		sleepq_release(wchan);
776201879Sattilio		return (-1);
777201879Sattilio	}
778201879Sattilio	type = sq->sq_type;
779201879Sattilio	sleepq_release(wchan);
780201879Sattilio	return (type);
781201879Sattilio}
782201879Sattilio
783201879Sattilio/*
784145056Sjhb * Removes a thread from a sleep queue and makes it
785145056Sjhb * runnable.
786126324Sjhb */
787181334Sjhbstatic int
788145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
789126324Sjhb{
790126324Sjhb	struct sleepqueue_chain *sc;
791126324Sjhb
792126324Sjhb	MPASS(td != NULL);
793126324Sjhb	MPASS(sq->sq_wchan != NULL);
794126324Sjhb	MPASS(td->td_wchan == sq->sq_wchan);
795165272Skmacy	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
796170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
797126324Sjhb	sc = SC_LOOKUP(sq->sq_wchan);
798126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
799126324Sjhb
800235459Srstone	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
801235459Srstone
802126324Sjhb	/* Remove the thread from the queue. */
803200447Sattilio	sq->sq_blockedcnt[td->td_sqqueue]--;
804165272Skmacy	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
805126324Sjhb
806126324Sjhb	/*
807126324Sjhb	 * Get a sleep queue for this thread.  If this is the last waiter,
808126324Sjhb	 * use the queue itself and take it out of the chain, otherwise,
809126324Sjhb	 * remove a queue from the free list.
810126324Sjhb	 */
811126324Sjhb	if (LIST_EMPTY(&sq->sq_free)) {
812126324Sjhb		td->td_sleepqueue = sq;
813126324Sjhb#ifdef INVARIANTS
814126324Sjhb		sq->sq_wchan = NULL;
815126324Sjhb#endif
816131259Sjhb#ifdef SLEEPQUEUE_PROFILING
817131259Sjhb		sc->sc_depth--;
818131259Sjhb#endif
819126324Sjhb	} else
820126324Sjhb		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
821126324Sjhb	LIST_REMOVE(td->td_sleepqueue, sq_hash);
822126324Sjhb
823129188Sjhb	td->td_wmesg = NULL;
824129188Sjhb	td->td_wchan = NULL;
825246417Sjhb	td->td_flags &= ~TDF_SINTR;
826129188Sjhb
827129241Sbde	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
828173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
829126324Sjhb
830126324Sjhb	/* Adjust priority if requested. */
831177085Sjeff	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
832217410Sjhb	if (pri != 0 && td->td_priority > pri &&
833217410Sjhb	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
834136439Sups		sched_prio(td, pri);
835184653Sjhb
836184653Sjhb	/*
837184653Sjhb	 * Note that thread td might not be sleeping if it is running
838184653Sjhb	 * sleepq_catch_signals() on another CPU or is blocked on its
839184653Sjhb	 * proc lock to check signals.  There's no need to mark the
840184653Sjhb	 * thread runnable in that case.
841184653Sjhb	 */
842184653Sjhb	if (TD_IS_SLEEPING(td)) {
843184653Sjhb		TD_CLR_SLEEPING(td);
844184653Sjhb		return (setrunnable(td));
845184653Sjhb	}
846184653Sjhb	return (0);
847126324Sjhb}
848126324Sjhb
849169666Sjeff#ifdef INVARIANTS
850126324Sjhb/*
851169666Sjeff * UMA zone item deallocator.
852169666Sjeff */
853169666Sjeffstatic void
854169666Sjeffsleepq_dtor(void *mem, int size, void *arg)
855169666Sjeff{
856169666Sjeff	struct sleepqueue *sq;
857169666Sjeff	int i;
858169666Sjeff
859169666Sjeff	sq = mem;
860200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
861169666Sjeff		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
862200447Sattilio		MPASS(sq->sq_blockedcnt[i] == 0);
863200447Sattilio	}
864169666Sjeff}
865169666Sjeff#endif
866169666Sjeff
867169666Sjeff/*
868169666Sjeff * UMA zone item initializer.
869169666Sjeff */
870169666Sjeffstatic int
871169666Sjeffsleepq_init(void *mem, int size, int flags)
872169666Sjeff{
873169666Sjeff	struct sleepqueue *sq;
874169666Sjeff	int i;
875169666Sjeff
876169666Sjeff	bzero(mem, size);
877169666Sjeff	sq = mem;
878200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
879169666Sjeff		TAILQ_INIT(&sq->sq_blocked[i]);
880200447Sattilio		sq->sq_blockedcnt[i] = 0;
881200447Sattilio	}
882169666Sjeff	LIST_INIT(&sq->sq_free);
883169666Sjeff	return (0);
884169666Sjeff}
885169666Sjeff
886169666Sjeff/*
887354405Smav * Find thread sleeping on a wait channel and resume it.
888126324Sjhb */
889181334Sjhbint
890165272Skmacysleepq_signal(void *wchan, int flags, int pri, int queue)
891126324Sjhb{
892354405Smav	struct sleepqueue_chain *sc;
893126324Sjhb	struct sleepqueue *sq;
894354405Smav	struct threadqueue *head;
895137277Sjhb	struct thread *td, *besttd;
896181334Sjhb	int wakeup_swapper;
897126324Sjhb
898126324Sjhb	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
899126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
900165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
901126324Sjhb	sq = sleepq_lookup(wchan);
902170294Sjeff	if (sq == NULL)
903181334Sjhb		return (0);
904134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
905126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
906129188Sjhb
907354405Smav	head = &sq->sq_blocked[queue];
908354405Smav	if (flags & SLEEPQ_UNFAIR) {
909354405Smav		/*
910354405Smav		 * Find the most recently sleeping thread, but try to
911354405Smav		 * skip threads still in process of context switch to
912354405Smav		 * avoid spinning on the thread lock.
913354405Smav		 */
914354405Smav		sc = SC_LOOKUP(wchan);
915354405Smav		besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
916354405Smav		while (besttd->td_lock != &sc->sc_lock) {
917354405Smav			td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
918354405Smav			if (td == NULL)
919354405Smav				break;
920137277Sjhb			besttd = td;
921354405Smav		}
922354405Smav	} else {
923354405Smav		/*
924354405Smav		 * Find the highest priority thread on the queue.  If there
925354405Smav		 * is a tie, use the thread that first appears in the queue
926354405Smav		 * as it has been sleeping the longest since threads are
927354405Smav		 * always added to the tail of sleep queues.
928354405Smav		 */
929354405Smav		besttd = td = TAILQ_FIRST(head);
930354405Smav		while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
931354405Smav			if (td->td_priority < besttd->td_priority)
932354405Smav				besttd = td;
933354405Smav		}
934137277Sjhb	}
935137277Sjhb	MPASS(besttd != NULL);
936170294Sjeff	thread_lock(besttd);
937181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
938170294Sjeff	thread_unlock(besttd);
939181334Sjhb	return (wakeup_swapper);
940126324Sjhb}
941126324Sjhb
942316120Svangyzenstatic bool
943316120Svangyzenmatch_any(struct thread *td __unused)
944316120Svangyzen{
945316120Svangyzen
946316120Svangyzen	return (true);
947316120Svangyzen}
948316120Svangyzen
949126324Sjhb/*
950126324Sjhb * Resume all threads sleeping on a specified wait channel.
951126324Sjhb */
952181334Sjhbint
953165272Skmacysleepq_broadcast(void *wchan, int flags, int pri, int queue)
954126324Sjhb{
955126324Sjhb	struct sleepqueue *sq;
956126324Sjhb
957126324Sjhb	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
958126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
959165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
960126324Sjhb	sq = sleepq_lookup(wchan);
961177085Sjeff	if (sq == NULL)
962181334Sjhb		return (0);
963134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
964126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
965129188Sjhb
966316120Svangyzen	return (sleepq_remove_matching(sq, queue, match_any, pri));
967316120Svangyzen}
968316120Svangyzen
969316120Svangyzen/*
970316120Svangyzen * Resume threads on the sleep queue that match the given predicate.
971316120Svangyzen */
972316120Svangyzenint
973316120Svangyzensleepq_remove_matching(struct sleepqueue *sq, int queue,
974316120Svangyzen    bool (*matches)(struct thread *), int pri)
975316120Svangyzen{
976316120Svangyzen	struct thread *td, *tdn;
977316120Svangyzen	int wakeup_swapper;
978316120Svangyzen
979310531Smarkj	/*
980316120Svangyzen	 * The last thread will be given ownership of sq and may
981316120Svangyzen	 * re-enqueue itself before sleepq_resume_thread() returns,
982316120Svangyzen	 * so we must cache the "next" queue item at the beginning
983316120Svangyzen	 * of the final iteration.
984310531Smarkj	 */
985181334Sjhb	wakeup_swapper = 0;
986310531Smarkj	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
987170294Sjeff		thread_lock(td);
988316120Svangyzen		if (matches(td))
989316120Svangyzen			wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
990170294Sjeff		thread_unlock(td);
991170294Sjeff	}
992316120Svangyzen
993181334Sjhb	return (wakeup_swapper);
994126324Sjhb}
995126324Sjhb
996126324Sjhb/*
997126324Sjhb * Time sleeping threads out.  When the timeout expires, the thread is
998126324Sjhb * removed from the sleep queue and made runnable if it is still asleep.
999126324Sjhb */
1000126324Sjhbstatic void
1001126324Sjhbsleepq_timeout(void *arg)
1002126324Sjhb{
1003277528Shselasky	struct sleepqueue_chain *sc;
1004277528Shselasky	struct sleepqueue *sq;
1005277528Shselasky	struct thread *td;
1006277528Shselasky	void *wchan;
1007277528Shselasky	int wakeup_swapper;
1008126324Sjhb
1009277528Shselasky	td = arg;
1010277528Shselasky	wakeup_swapper = 0;
1011129241Sbde	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
1012173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
1013126324Sjhb
1014277528Shselasky	thread_lock(td);
1015304883Skib
1016304883Skib	if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
1017304883Skib		/*
1018304883Skib		 * The thread does not want a timeout (yet).
1019304883Skib		 */
1020304883Skib	} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
1021304883Skib		/*
1022304883Skib		 * See if the thread is asleep and get the wait
1023304883Skib		 * channel if it is.
1024304883Skib		 */
1025277528Shselasky		wchan = td->td_wchan;
1026277528Shselasky		sc = SC_LOOKUP(wchan);
1027277528Shselasky		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
1028277528Shselasky		sq = sleepq_lookup(wchan);
1029277528Shselasky		MPASS(sq != NULL);
1030277528Shselasky		td->td_flags |= TDF_TIMEOUT;
1031277528Shselasky		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
1032304883Skib	} else if (TD_ON_SLEEPQ(td)) {
1033304883Skib		/*
1034304883Skib		 * If the thread is on the SLEEPQ but isn't sleeping
1035304883Skib		 * yet, it can either be on another CPU in between
1036304883Skib		 * sleepq_add() and one of the sleepq_*wait*()
1037304883Skib		 * routines or it can be in sleepq_catch_signals().
1038304883Skib		 */
1039277528Shselasky		td->td_flags |= TDF_TIMEOUT;
1040277528Shselasky	}
1041175654Sjhb
1042170294Sjeff	thread_unlock(td);
1043181334Sjhb	if (wakeup_swapper)
1044181334Sjhb		kick_proc0();
1045126324Sjhb}
1046126324Sjhb
1047126324Sjhb/*
1048126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific
1049126324Sjhb * wait channel if it is on that queue.
1050126324Sjhb */
1051126324Sjhbvoid
1052126324Sjhbsleepq_remove(struct thread *td, void *wchan)
1053126324Sjhb{
1054126324Sjhb	struct sleepqueue *sq;
1055181334Sjhb	int wakeup_swapper;
1056126324Sjhb
1057126324Sjhb	/*
1058126324Sjhb	 * Look up the sleep queue for this wait channel, then re-check
1059126324Sjhb	 * that the thread is asleep on that channel, if it is not, then
1060126324Sjhb	 * bail.
1061126324Sjhb	 */
1062126324Sjhb	MPASS(wchan != NULL);
1063136445Sjhb	sleepq_lock(wchan);
1064126324Sjhb	sq = sleepq_lookup(wchan);
1065170294Sjeff	/*
1066170294Sjeff	 * We can not lock the thread here as it may be sleeping on a
1067170294Sjeff	 * different sleepq.  However, holding the sleepq lock for this
1068170294Sjeff	 * wchan can guarantee that we do not miss a wakeup for this
1069170294Sjeff	 * channel.  The asserts below will catch any false positives.
1070170294Sjeff	 */
1071126324Sjhb	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
1072126324Sjhb		sleepq_release(wchan);
1073126324Sjhb		return;
1074126324Sjhb	}
1075170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
1076170294Sjeff	thread_lock(td);
1077126324Sjhb	MPASS(sq != NULL);
1078170294Sjeff	MPASS(td->td_wchan == wchan);
1079181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
1080170294Sjeff	thread_unlock(td);
1081126324Sjhb	sleepq_release(wchan);
1082181334Sjhb	if (wakeup_swapper)
1083181334Sjhb		kick_proc0();
1084126324Sjhb}
1085126324Sjhb
1086126324Sjhb/*
1087129241Sbde * Abort a thread as if an interrupt had occurred.  Only abort
1088129241Sbde * interruptible waits (unfortunately it isn't safe to abort others).
1089126324Sjhb */
1090181334Sjhbint
1091155741Sdavidxusleepq_abort(struct thread *td, int intrval)
1092126324Sjhb{
1093170294Sjeff	struct sleepqueue *sq;
1094126324Sjhb	void *wchan;
1095126324Sjhb
1096170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1097126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
1098126324Sjhb	MPASS(td->td_flags & TDF_SINTR);
1099155741Sdavidxu	MPASS(intrval == EINTR || intrval == ERESTART);
1100126324Sjhb
1101126324Sjhb	/*
1102126324Sjhb	 * If the TDF_TIMEOUT flag is set, just leave. A
1103126324Sjhb	 * timeout is scheduled anyhow.
1104126324Sjhb	 */
1105126324Sjhb	if (td->td_flags & TDF_TIMEOUT)
1106181334Sjhb		return (0);
1107126324Sjhb
1108129241Sbde	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
1109173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
1110170294Sjeff	td->td_intrval = intrval;
1111170294Sjeff	td->td_flags |= TDF_SLEEPABORT;
1112170294Sjeff	/*
1113170294Sjeff	 * If the thread has not slept yet it will find the signal in
1114170294Sjeff	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
1115170294Sjeff	 * we have to do it here.
1116170294Sjeff	 */
1117170294Sjeff	if (!TD_IS_SLEEPING(td))
1118181334Sjhb		return (0);
1119126324Sjhb	wchan = td->td_wchan;
1120170294Sjeff	MPASS(wchan != NULL);
1121170294Sjeff	sq = sleepq_lookup(wchan);
1122170294Sjeff	MPASS(sq != NULL);
1123170294Sjeff
1124170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
1125181334Sjhb	return (sleepq_resume_thread(sq, td, 0));
1126126324Sjhb}
1127154936Sjhb
1128316120Svangyzenvoid
1129316120Svangyzensleepq_chains_remove_matching(bool (*matches)(struct thread *))
1130316120Svangyzen{
1131316120Svangyzen	struct sleepqueue_chain *sc;
1132331672Smarkj	struct sleepqueue *sq, *sq1;
1133316120Svangyzen	int i, wakeup_swapper;
1134316120Svangyzen
1135316120Svangyzen	wakeup_swapper = 0;
1136316120Svangyzen	for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
1137316120Svangyzen		if (LIST_EMPTY(&sc->sc_queues)) {
1138316120Svangyzen			continue;
1139316120Svangyzen		}
1140316120Svangyzen		mtx_lock_spin(&sc->sc_lock);
1141331672Smarkj		LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
1142316120Svangyzen			for (i = 0; i < NR_SLEEPQS; ++i) {
1143316120Svangyzen				wakeup_swapper |= sleepq_remove_matching(sq, i,
1144316120Svangyzen				    matches, 0);
1145316120Svangyzen			}
1146316120Svangyzen		}
1147316120Svangyzen		mtx_unlock_spin(&sc->sc_lock);
1148316120Svangyzen	}
1149316120Svangyzen	if (wakeup_swapper) {
1150316120Svangyzen		kick_proc0();
1151316120Svangyzen	}
1152316120Svangyzen}
1153316120Svangyzen
1154296927Scem/*
1155296927Scem * Prints the stacks of all threads presently sleeping on wchan/queue to
1156296927Scem * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
1157296927Scem * printed.  Typically, this will equal the number of threads sleeping on the
1158296927Scem * queue, but may be less if sb overflowed before all stacks were printed.
1159296927Scem */
1160296973Scem#ifdef STACK
1161296927Scemint
1162296927Scemsleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
1163296927Scem    int *count_stacks_printed)
1164296927Scem{
1165296927Scem	struct thread *td, *td_next;
1166296927Scem	struct sleepqueue *sq;
1167296927Scem	struct stack **st;
1168296927Scem	struct sbuf **td_infos;
1169296927Scem	int i, stack_idx, error, stacks_to_allocate;
1170296927Scem	bool finished, partial_print;
1171296927Scem
1172296927Scem	error = 0;
1173296927Scem	finished = false;
1174296927Scem	partial_print = false;
1175296927Scem
1176296927Scem	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
1177296927Scem	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
1178296927Scem
1179296927Scem	stacks_to_allocate = 10;
1180296927Scem	for (i = 0; i < 3 && !finished ; i++) {
1181296927Scem		/* We cannot malloc while holding the queue's spinlock, so
1182296927Scem		 * we do our mallocs now, and hope it is enough.  If it
1183296927Scem		 * isn't, we will free these, drop the lock, malloc more,
1184296927Scem		 * and try again, up to a point.  After that point we will
1185296927Scem		 * give up and report ENOMEM. We also cannot write to sb
1186296927Scem		 * during this time since the client may have set the
1187296927Scem		 * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
1188296927Scem		 * malloc as we print to it.  So we defer actually printing
1189296927Scem		 * to sb until after we drop the spinlock.
1190296927Scem		 */
1191296927Scem
1192296927Scem		/* Where we will store the stacks. */
1193296927Scem		st = malloc(sizeof(struct stack *) * stacks_to_allocate,
1194296927Scem		    M_TEMP, M_WAITOK);
1195296927Scem		for (stack_idx = 0; stack_idx < stacks_to_allocate;
1196296927Scem		    stack_idx++)
1197296927Scem			st[stack_idx] = stack_create();
1198296927Scem
1199296927Scem		/* Where we will store the td name, tid, etc. */
1200296927Scem		td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
1201296927Scem		    M_TEMP, M_WAITOK);
1202296927Scem		for (stack_idx = 0; stack_idx < stacks_to_allocate;
1203296927Scem		    stack_idx++)
1204296927Scem			td_infos[stack_idx] = sbuf_new(NULL, NULL,
1205296927Scem			    MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
1206296927Scem			    SBUF_FIXEDLEN);
1207296927Scem
1208296927Scem		sleepq_lock(wchan);
1209296927Scem		sq = sleepq_lookup(wchan);
1210296927Scem		if (sq == NULL) {
1211296927Scem			/* This sleepq does not exist; exit and return ENOENT. */
1212296927Scem			error = ENOENT;
1213296927Scem			finished = true;
1214296927Scem			sleepq_release(wchan);
1215296927Scem			goto loop_end;
1216296927Scem		}
1217296927Scem
1218296927Scem		stack_idx = 0;
1219296927Scem		/* Save thread info */
1220296927Scem		TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
1221296927Scem		    td_next) {
1222296927Scem			if (stack_idx >= stacks_to_allocate)
1223296927Scem				goto loop_end;
1224296927Scem
1225296927Scem			/* Note the td_lock is equal to the sleepq_lock here. */
1226296927Scem			stack_save_td(st[stack_idx], td);
1227296927Scem
1228296927Scem			sbuf_printf(td_infos[stack_idx], "%d: %s %p",
1229296927Scem			    td->td_tid, td->td_name, td);
1230296927Scem
1231296927Scem			++stack_idx;
1232296927Scem		}
1233296927Scem
1234296927Scem		finished = true;
1235296927Scem		sleepq_release(wchan);
1236296927Scem
1237296927Scem		/* Print the stacks */
1238296927Scem		for (i = 0; i < stack_idx; i++) {
1239296927Scem			sbuf_finish(td_infos[i]);
1240296927Scem			sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
1241296927Scem			stack_sbuf_print(sb, st[i]);
1242296927Scem			sbuf_printf(sb, "\n");
1243296927Scem
1244296927Scem			error = sbuf_error(sb);
1245296927Scem			if (error == 0)
1246296927Scem				*count_stacks_printed = stack_idx;
1247296927Scem		}
1248296927Scem
1249296927Scemloop_end:
1250296927Scem		if (!finished)
1251296927Scem			sleepq_release(wchan);
1252296927Scem		for (stack_idx = 0; stack_idx < stacks_to_allocate;
1253296927Scem		    stack_idx++)
1254296927Scem			stack_destroy(st[stack_idx]);
1255296927Scem		for (stack_idx = 0; stack_idx < stacks_to_allocate;
1256296927Scem		    stack_idx++)
1257296927Scem			sbuf_delete(td_infos[stack_idx]);
1258296927Scem		free(st, M_TEMP);
1259296927Scem		free(td_infos, M_TEMP);
1260296927Scem		stacks_to_allocate *= 10;
1261296927Scem	}
1262296927Scem
1263296927Scem	if (!finished && error == 0)
1264296927Scem		error = ENOMEM;
1265296927Scem
1266296927Scem	return (error);
1267296927Scem}
1268296973Scem#endif
1269296927Scem
1270177372Sjeff#ifdef SLEEPQUEUE_PROFILING
1271177372Sjeff#define	SLEEPQ_PROF_LOCATIONS	1024
1272212750Smdf#define	SLEEPQ_SBUFSIZE		512
1273177372Sjeffstruct sleepq_prof {
1274177372Sjeff	LIST_ENTRY(sleepq_prof) sp_link;
1275177372Sjeff	const char	*sp_wmesg;
1276177372Sjeff	long		sp_count;
1277177372Sjeff};
1278177372Sjeff
1279177372SjeffLIST_HEAD(sqphead, sleepq_prof);
1280177372Sjeff
1281177372Sjeffstruct sqphead sleepq_prof_free;
1282177372Sjeffstruct sqphead sleepq_hash[SC_TABLESIZE];
1283177372Sjeffstatic struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
1284177372Sjeffstatic struct mtx sleepq_prof_lock;
1285177372SjeffMTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
1286177372Sjeff
1287177372Sjeffstatic void
1288177372Sjeffsleepq_profile(const char *wmesg)
1289177372Sjeff{
1290177372Sjeff	struct sleepq_prof *sp;
1291177372Sjeff
1292177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1293177372Sjeff	if (prof_enabled == 0)
1294177372Sjeff		goto unlock;
1295177372Sjeff	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
1296177372Sjeff		if (sp->sp_wmesg == wmesg)
1297177372Sjeff			goto done;
1298177372Sjeff	sp = LIST_FIRST(&sleepq_prof_free);
1299177372Sjeff	if (sp == NULL)
1300177372Sjeff		goto unlock;
1301177372Sjeff	sp->sp_wmesg = wmesg;
1302177372Sjeff	LIST_REMOVE(sp, sp_link);
1303177372Sjeff	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
1304177372Sjeffdone:
1305177372Sjeff	sp->sp_count++;
1306177372Sjeffunlock:
1307177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1308177372Sjeff	return;
1309177372Sjeff}
1310177372Sjeff
1311177372Sjeffstatic void
1312177372Sjeffsleepq_prof_reset(void)
1313177372Sjeff{
1314177372Sjeff	struct sleepq_prof *sp;
1315177372Sjeff	int enabled;
1316177372Sjeff	int i;
1317177372Sjeff
1318177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1319177372Sjeff	enabled = prof_enabled;
1320177372Sjeff	prof_enabled = 0;
1321177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++)
1322177372Sjeff		LIST_INIT(&sleepq_hash[i]);
1323177372Sjeff	LIST_INIT(&sleepq_prof_free);
1324177372Sjeff	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
1325177372Sjeff		sp = &sleepq_profent[i];
1326177372Sjeff		sp->sp_wmesg = NULL;
1327177372Sjeff		sp->sp_count = 0;
1328177372Sjeff		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
1329177372Sjeff	}
1330177372Sjeff	prof_enabled = enabled;
1331177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1332177372Sjeff}
1333177372Sjeff
1334177372Sjeffstatic int
1335177372Sjeffenable_sleepq_prof(SYSCTL_HANDLER_ARGS)
1336177372Sjeff{
1337177372Sjeff	int error, v;
1338177372Sjeff
1339177372Sjeff	v = prof_enabled;
1340177372Sjeff	error = sysctl_handle_int(oidp, &v, v, req);
1341177372Sjeff	if (error)
1342177372Sjeff		return (error);
1343177372Sjeff	if (req->newptr == NULL)
1344177372Sjeff		return (error);
1345177372Sjeff	if (v == prof_enabled)
1346177372Sjeff		return (0);
1347177372Sjeff	if (v == 1)
1348177372Sjeff		sleepq_prof_reset();
1349177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1350177372Sjeff	prof_enabled = !!v;
1351177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1352177372Sjeff
1353177372Sjeff	return (0);
1354177372Sjeff}
1355177372Sjeff
1356177372Sjeffstatic int
1357177372Sjeffreset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1358177372Sjeff{
1359177372Sjeff	int error, v;
1360177372Sjeff
1361177372Sjeff	v = 0;
1362177372Sjeff	error = sysctl_handle_int(oidp, &v, 0, req);
1363177372Sjeff	if (error)
1364177372Sjeff		return (error);
1365177372Sjeff	if (req->newptr == NULL)
1366177372Sjeff		return (error);
1367177372Sjeff	if (v == 0)
1368177372Sjeff		return (0);
1369177372Sjeff	sleepq_prof_reset();
1370177372Sjeff
1371177372Sjeff	return (0);
1372177372Sjeff}
1373177372Sjeff
1374177372Sjeffstatic int
1375177372Sjeffdump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1376177372Sjeff{
1377177372Sjeff	struct sleepq_prof *sp;
1378177372Sjeff	struct sbuf *sb;
1379177372Sjeff	int enabled;
1380177372Sjeff	int error;
1381177372Sjeff	int i;
1382177372Sjeff
1383217916Smdf	error = sysctl_wire_old_buffer(req, 0);
1384217916Smdf	if (error != 0)
1385217916Smdf		return (error);
1386212750Smdf	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
1387177372Sjeff	sbuf_printf(sb, "\nwmesg\tcount\n");
1388177372Sjeff	enabled = prof_enabled;
1389177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1390177372Sjeff	prof_enabled = 0;
1391177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1392177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++) {
1393177372Sjeff		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
1394177372Sjeff			sbuf_printf(sb, "%s\t%ld\n",
1395177372Sjeff			    sp->sp_wmesg, sp->sp_count);
1396177372Sjeff		}
1397177372Sjeff	}
1398177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1399177372Sjeff	prof_enabled = enabled;
1400177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1401177372Sjeff
1402212750Smdf	error = sbuf_finish(sb);
1403177372Sjeff	sbuf_delete(sb);
1404177372Sjeff	return (error);
1405177372Sjeff}
1406177372Sjeff
1407177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
1408177372Sjeff    NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
1409177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
1410177372Sjeff    NULL, 0, reset_sleepq_prof_stats, "I",
1411177372Sjeff    "Reset sleepqueue profiling statistics");
1412177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
1413177372Sjeff    NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
1414177372Sjeff#endif
1415177372Sjeff
1416154936Sjhb#ifdef DDB
1417154936SjhbDB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
1418154936Sjhb{
1419154936Sjhb	struct sleepqueue_chain *sc;
1420154936Sjhb	struct sleepqueue *sq;
1421154944Simp#ifdef INVARIANTS
1422154936Sjhb	struct lock_object *lock;
1423154944Simp#endif
1424154936Sjhb	struct thread *td;
1425154936Sjhb	void *wchan;
1426154936Sjhb	int i;
1427154936Sjhb
1428154936Sjhb	if (!have_addr)
1429154936Sjhb		return;
1430154936Sjhb
1431154936Sjhb	/*
1432154936Sjhb	 * First, see if there is an active sleep queue for the wait channel
1433154936Sjhb	 * indicated by the address.
1434154936Sjhb	 */
1435154936Sjhb	wchan = (void *)addr;
1436154936Sjhb	sc = SC_LOOKUP(wchan);
1437154936Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
1438154936Sjhb		if (sq->sq_wchan == wchan)
1439154936Sjhb			goto found;
1440154936Sjhb
1441154936Sjhb	/*
1442154936Sjhb	 * Second, see if there is an active sleep queue at the address
1443154936Sjhb	 * indicated.
1444154936Sjhb	 */
1445154936Sjhb	for (i = 0; i < SC_TABLESIZE; i++)
1446154936Sjhb		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
1447154936Sjhb			if (sq == (struct sleepqueue *)addr)
1448154936Sjhb				goto found;
1449154936Sjhb		}
1450154936Sjhb
1451154936Sjhb	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
1452154936Sjhb	return;
1453154936Sjhbfound:
1454154936Sjhb	db_printf("Wait channel: %p\n", sq->sq_wchan);
1455201879Sattilio	db_printf("Queue type: %d\n", sq->sq_type);
1456154936Sjhb#ifdef INVARIANTS
1457154936Sjhb	if (sq->sq_lock) {
1458164325Spjd		lock = sq->sq_lock;
1459154936Sjhb		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
1460154936Sjhb		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
1461154936Sjhb	}
1462154936Sjhb#endif
1463154936Sjhb	db_printf("Blocked threads:\n");
1464165272Skmacy	for (i = 0; i < NR_SLEEPQS; i++) {
1465165272Skmacy		db_printf("\nQueue[%d]:\n", i);
1466165272Skmacy		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
1467165272Skmacy			db_printf("\tempty\n");
1468165272Skmacy		else
1469324799Shselasky			TAILQ_FOREACH(td, &sq->sq_blocked[i],
1470165272Skmacy				      td_slpq) {
1471165272Skmacy				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
1472165272Skmacy					  td->td_tid, td->td_proc->p_pid,
1473180930Sjhb					  td->td_name);
1474165272Skmacy			}
1475200447Sattilio		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
1476165272Skmacy	}
1477154936Sjhb}
1478157823Sjhb
1479157823Sjhb/* Alias 'show sleepqueue' to 'show sleepq'. */
1480183054SsamDB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
1481154936Sjhb#endif
1482