subr_sleepqueue.c revision 277213
1139804Simp/*-
2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
3126324Sjhb * All rights reserved.
4126324Sjhb *
5126324Sjhb * Redistribution and use in source and binary forms, with or without
6126324Sjhb * modification, are permitted provided that the following conditions
7126324Sjhb * are met:
8126324Sjhb * 1. Redistributions of source code must retain the above copyright
9126324Sjhb *    notice, this list of conditions and the following disclaimer.
10126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11126324Sjhb *    notice, this list of conditions and the following disclaimer in the
12126324Sjhb *    documentation and/or other materials provided with the distribution.
13126324Sjhb *
14126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17126324Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24126324Sjhb * SUCH DAMAGE.
25126324Sjhb */
26126324Sjhb
27126324Sjhb/*
28126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on
29126324Sjhb * a wait channel.  Sleep queues different from turnstiles in that wait
30126324Sjhb * channels are not owned by anyone, so there is no priority propagation.
31126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by
32126324Sjhb * signals.  That said, there are several similarities between the turnstile
33126324Sjhb * and sleep queue implementations.  (Note: turnstiles were implemented
34126324Sjhb * first.)  For example, both use a hash table of the same size where each
35126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and
36126324Sjhb * a linked list of queues.  An individual queue is located by using a hash
37126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching
38126324Sjhb * for the queue.  This means that a wait channel object does not need to
39126324Sjhb * embed it's queue head just as locks do not embed their turnstile queue
40126324Sjhb * head.  Threads also carry around a sleep queue that they lend to the
41126324Sjhb * wait channel when blocking.  Just as in turnstiles, the queue includes
42126324Sjhb * a free list of the sleep queues of other threads blocked on the same
43126324Sjhb * wait channel in the case of multiple waiters.
44126324Sjhb *
45126324Sjhb * Some additional functionality provided by sleep queues include the
46126324Sjhb * ability to set a timeout.  The timeout is managed using a per-thread
47126324Sjhb * callout that resumes a thread if it is asleep.  A thread may also
48126324Sjhb * catch signals while it is asleep (aka an interruptible sleep).  The
49126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
50126324Sjhb * sleep queues also provide some extra assertions.  One is not allowed to
51126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
52126324Sjhb * must consistently use the same lock to synchronize with a wait channel,
53126324Sjhb * though this check is currently only a warning for sleep/wakeup due to
54126324Sjhb * pre-existing abuse of that API.  The same lock must also be held when
55126324Sjhb * awakening threads, though that is currently only enforced for condition
56126324Sjhb * variables.
57126324Sjhb */
58126324Sjhb
59126324Sjhb#include <sys/cdefs.h>
60126324Sjhb__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 277213 2015-01-15 15:32:30Z hselasky $");
61126324Sjhb
62154936Sjhb#include "opt_sleepqueue_profiling.h"
63154936Sjhb#include "opt_ddb.h"
64170640Sjeff#include "opt_sched.h"
65154936Sjhb
66126324Sjhb#include <sys/param.h>
67126324Sjhb#include <sys/systm.h>
68126324Sjhb#include <sys/lock.h>
69126324Sjhb#include <sys/kernel.h>
70126324Sjhb#include <sys/ktr.h>
71126324Sjhb#include <sys/mutex.h>
72126324Sjhb#include <sys/proc.h>
73177372Sjeff#include <sys/sbuf.h>
74126324Sjhb#include <sys/sched.h>
75235459Srstone#include <sys/sdt.h>
76126324Sjhb#include <sys/signalvar.h>
77126324Sjhb#include <sys/sleepqueue.h>
78131259Sjhb#include <sys/sysctl.h>
79126324Sjhb
80169666Sjeff#include <vm/uma.h>
81169666Sjeff
82154936Sjhb#ifdef DDB
83154936Sjhb#include <ddb/ddb.h>
84154936Sjhb#endif
85154936Sjhb
86126324Sjhb/*
87248186Smav * Constants for the hash table of sleep queue chains.
88248186Smav * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
89126324Sjhb */
90248186Smav#define	SC_TABLESIZE	256			/* Must be power of 2. */
91126324Sjhb#define	SC_MASK		(SC_TABLESIZE - 1)
92126324Sjhb#define	SC_SHIFT	8
93248186Smav#define	SC_HASH(wc)	((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
94248186Smav			    SC_MASK)
95126324Sjhb#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
96165272Skmacy#define NR_SLEEPQS      2
97126324Sjhb/*
98126324Sjhb * There two different lists of sleep queues.  Both lists are connected
99126324Sjhb * via the sq_hash entries.  The first list is the sleep queue chain list
100126324Sjhb * that a sleep queue is on when it is attached to a wait channel.  The
101126324Sjhb * second list is the free list hung off of a sleep queue that is attached
102126324Sjhb * to a wait channel.
103126324Sjhb *
104126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the
105126324Sjhb * list of threads blocked on that wait channel, flags specific to the
106126324Sjhb * wait channel, and the lock used to synchronize with a wait channel.
107126324Sjhb * The flags are used to catch mismatches between the various consumers
108126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables).
109126324Sjhb * The lock pointer is only used when invariants are enabled for various
110126324Sjhb * debugging checks.
111126324Sjhb *
112126324Sjhb * Locking key:
113126324Sjhb *  c - sleep queue chain lock
114126324Sjhb */
115126324Sjhbstruct sleepqueue {
116165272Skmacy	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
117200447Sattilio	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
118126324Sjhb	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
119126324Sjhb	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
120126324Sjhb	void	*sq_wchan;			/* (c) Wait channel. */
121201879Sattilio	int	sq_type;			/* (c) Queue type. */
122136445Sjhb#ifdef INVARIANTS
123164325Spjd	struct lock_object *sq_lock;		/* (c) Associated lock. */
124126324Sjhb#endif
125126324Sjhb};
126126324Sjhb
127126324Sjhbstruct sleepqueue_chain {
128126324Sjhb	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
129126324Sjhb	struct mtx sc_lock;			/* Spin lock for this chain. */
130131259Sjhb#ifdef SLEEPQUEUE_PROFILING
131131259Sjhb	u_int	sc_depth;			/* Length of sc_queues. */
132131259Sjhb	u_int	sc_max_depth;			/* Max length of sc_queues. */
133131259Sjhb#endif
134126324Sjhb};
135126324Sjhb
136131259Sjhb#ifdef SLEEPQUEUE_PROFILING
137131259Sjhbu_int sleepq_max_depth;
138227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
139227309Sedstatic SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
140131259Sjhb    "sleepq chain stats");
141131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
142131259Sjhb    0, "maxmimum depth achieved of a single chain");
143177372Sjeff
144177372Sjeffstatic void	sleepq_profile(const char *wmesg);
145177372Sjeffstatic int	prof_enabled;
146131259Sjhb#endif
147126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
148169666Sjeffstatic uma_zone_t sleepq_zone;
149126324Sjhb
150126324Sjhb/*
151126324Sjhb * Prototypes for non-exported routines.
152126324Sjhb */
153177085Sjeffstatic int	sleepq_catch_signals(void *wchan, int pri);
154165272Skmacystatic int	sleepq_check_signals(void);
155277213Shselaskystatic int	sleepq_check_timeout(struct thread *);
156277213Shselaskystatic void	sleepq_stop_timeout(struct thread *);
157169666Sjeff#ifdef INVARIANTS
158169666Sjeffstatic void	sleepq_dtor(void *mem, int size, void *arg);
159169666Sjeff#endif
160169666Sjeffstatic int	sleepq_init(void *mem, int size, int flags);
161181334Sjhbstatic int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
162169666Sjeff		    int pri);
163177085Sjeffstatic void	sleepq_switch(void *wchan, int pri);
164126324Sjhbstatic void	sleepq_timeout(void *arg);
165126324Sjhb
166235459SrstoneSDT_PROBE_DECLARE(sched, , , sleep);
167235459SrstoneSDT_PROBE_DECLARE(sched, , , wakeup);
168235459Srstone
169126324Sjhb/*
170267820Sattilio * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
171267820Sattilio * Note that it must happen after sleepinit() has been fully executed, so
172267820Sattilio * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
173126324Sjhb */
174267820Sattilio#ifdef SLEEPQUEUE_PROFILING
175267820Sattiliostatic void
176267820Sattilioinit_sleepqueue_profiling(void)
177126324Sjhb{
178267820Sattilio	char chain_name[10];
179131259Sjhb	struct sysctl_oid *chain_oid;
180267820Sattilio	u_int i;
181126324Sjhb
182126324Sjhb	for (i = 0; i < SC_TABLESIZE; i++) {
183267820Sattilio		snprintf(chain_name, sizeof(chain_name), "%u", i);
184131259Sjhb		chain_oid = SYSCTL_ADD_NODE(NULL,
185131259Sjhb		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
186131259Sjhb		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
187131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
188131259Sjhb		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
189131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
190131259Sjhb		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
191131259Sjhb		    NULL);
192267820Sattilio	}
193267820Sattilio}
194267820Sattilio
195267820SattilioSYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
196267820Sattilio    init_sleepqueue_profiling, NULL);
197131259Sjhb#endif
198267820Sattilio
199267820Sattilio/*
200267820Sattilio * Early initialization of sleep queues that is called from the sleepinit()
201267820Sattilio * SYSINIT.
202267820Sattilio */
203267820Sattiliovoid
204267820Sattilioinit_sleepqueues(void)
205267820Sattilio{
206267820Sattilio	int i;
207267820Sattilio
208267820Sattilio	for (i = 0; i < SC_TABLESIZE; i++) {
209267820Sattilio		LIST_INIT(&sleepq_chains[i].sc_queues);
210267820Sattilio		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
211267820Sattilio		    MTX_SPIN | MTX_RECURSE);
212126324Sjhb	}
213169666Sjeff	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
214169666Sjeff#ifdef INVARIANTS
215169666Sjeff	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
216169666Sjeff#else
217169666Sjeff	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
218169666Sjeff#endif
219169666Sjeff
220126324Sjhb	thread0.td_sleepqueue = sleepq_alloc();
221126324Sjhb}
222126324Sjhb
223126324Sjhb/*
224169666Sjeff * Get a sleep queue for a new thread.
225126324Sjhb */
226126324Sjhbstruct sleepqueue *
227126324Sjhbsleepq_alloc(void)
228126324Sjhb{
229126324Sjhb
230169666Sjeff	return (uma_zalloc(sleepq_zone, M_WAITOK));
231126324Sjhb}
232126324Sjhb
233126324Sjhb/*
234126324Sjhb * Free a sleep queue when a thread is destroyed.
235126324Sjhb */
236126324Sjhbvoid
237126324Sjhbsleepq_free(struct sleepqueue *sq)
238126324Sjhb{
239126324Sjhb
240169666Sjeff	uma_zfree(sleepq_zone, sq);
241126324Sjhb}
242126324Sjhb
243126324Sjhb/*
244136445Sjhb * Lock the sleep queue chain associated with the specified wait channel.
245136445Sjhb */
246136445Sjhbvoid
247136445Sjhbsleepq_lock(void *wchan)
248136445Sjhb{
249136445Sjhb	struct sleepqueue_chain *sc;
250136445Sjhb
251136445Sjhb	sc = SC_LOOKUP(wchan);
252136445Sjhb	mtx_lock_spin(&sc->sc_lock);
253136445Sjhb}
254136445Sjhb
255136445Sjhb/*
256126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash
257136445Sjhb * table locking the associated sleep queue chain.  If no queue is found in
258136445Sjhb * the table, NULL is returned.
259126324Sjhb */
260126324Sjhbstruct sleepqueue *
261126324Sjhbsleepq_lookup(void *wchan)
262126324Sjhb{
263126324Sjhb	struct sleepqueue_chain *sc;
264126324Sjhb	struct sleepqueue *sq;
265126324Sjhb
266126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
267126324Sjhb	sc = SC_LOOKUP(wchan);
268136445Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
269126324Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
270126324Sjhb		if (sq->sq_wchan == wchan)
271126324Sjhb			return (sq);
272126324Sjhb	return (NULL);
273126324Sjhb}
274126324Sjhb
275126324Sjhb/*
276126324Sjhb * Unlock the sleep queue chain associated with a given wait channel.
277126324Sjhb */
278126324Sjhbvoid
279126324Sjhbsleepq_release(void *wchan)
280126324Sjhb{
281126324Sjhb	struct sleepqueue_chain *sc;
282126324Sjhb
283126324Sjhb	sc = SC_LOOKUP(wchan);
284126324Sjhb	mtx_unlock_spin(&sc->sc_lock);
285126324Sjhb}
286126324Sjhb
287126324Sjhb/*
288137277Sjhb * Places the current thread on the sleep queue for the specified wait
289126324Sjhb * channel.  If INVARIANTS is enabled, then it associates the passed in
290126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is
291126324Sjhb * woken up.
292126324Sjhb */
293126324Sjhbvoid
294165272Skmacysleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
295165272Skmacy    int queue)
296126324Sjhb{
297126324Sjhb	struct sleepqueue_chain *sc;
298136445Sjhb	struct sleepqueue *sq;
299137277Sjhb	struct thread *td;
300126324Sjhb
301126324Sjhb	td = curthread;
302126324Sjhb	sc = SC_LOOKUP(wchan);
303126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
304126324Sjhb	MPASS(td->td_sleepqueue != NULL);
305126324Sjhb	MPASS(wchan != NULL);
306165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
307126324Sjhb
308150177Sjhb	/* If this thread is not allowed to sleep, die a horrible death. */
309247588Sjhb	KASSERT(td->td_no_sleeping == 0,
310247588Sjhb	    ("%s: td %p to sleep on wchan %p with sleeping prohibited",
311240423Sattilio	    __func__, td, wchan));
312150177Sjhb
313136445Sjhb	/* Look up the sleep queue associated with the wait channel 'wchan'. */
314136445Sjhb	sq = sleepq_lookup(wchan);
315136445Sjhb
316136445Sjhb	/*
317136445Sjhb	 * If the wait channel does not already have a sleep queue, use
318136445Sjhb	 * this thread's sleep queue.  Otherwise, insert the current thread
319136445Sjhb	 * into the sleep queue already in use by this wait channel.
320136445Sjhb	 */
321126324Sjhb	if (sq == NULL) {
322165272Skmacy#ifdef INVARIANTS
323165292Skmacy		int i;
324165291Sache
325165292Skmacy		sq = td->td_sleepqueue;
326200447Sattilio		for (i = 0; i < NR_SLEEPQS; i++) {
327165292Skmacy			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
328200447Sattilio			    ("thread's sleep queue %d is not empty", i));
329200447Sattilio			KASSERT(sq->sq_blockedcnt[i] == 0,
330200447Sattilio			    ("thread's sleep queue %d count mismatches", i));
331200447Sattilio		}
332165272Skmacy		KASSERT(LIST_EMPTY(&sq->sq_free),
333165272Skmacy		    ("thread's sleep queue has a non-empty free list"));
334165272Skmacy		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
335165292Skmacy		sq->sq_lock = lock;
336165272Skmacy#endif
337131259Sjhb#ifdef SLEEPQUEUE_PROFILING
338131259Sjhb		sc->sc_depth++;
339131259Sjhb		if (sc->sc_depth > sc->sc_max_depth) {
340131259Sjhb			sc->sc_max_depth = sc->sc_depth;
341131259Sjhb			if (sc->sc_max_depth > sleepq_max_depth)
342131259Sjhb				sleepq_max_depth = sc->sc_max_depth;
343131259Sjhb		}
344131259Sjhb#endif
345165292Skmacy		sq = td->td_sleepqueue;
346126324Sjhb		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
347126324Sjhb		sq->sq_wchan = wchan;
348201879Sattilio		sq->sq_type = flags & SLEEPQ_TYPE;
349126324Sjhb	} else {
350126324Sjhb		MPASS(wchan == sq->sq_wchan);
351126488Sjhb		MPASS(lock == sq->sq_lock);
352136445Sjhb		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
353126324Sjhb		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
354126324Sjhb	}
355172155Sattilio	thread_lock(td);
356165272Skmacy	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
357200447Sattilio	sq->sq_blockedcnt[queue]++;
358126324Sjhb	td->td_sleepqueue = NULL;
359165272Skmacy	td->td_sqqueue = queue;
360126324Sjhb	td->td_wchan = wchan;
361126324Sjhb	td->td_wmesg = wmesg;
362155741Sdavidxu	if (flags & SLEEPQ_INTERRUPTIBLE) {
363134013Sjhb		td->td_flags |= TDF_SINTR;
364155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
365155741Sdavidxu	}
366172155Sattilio	thread_unlock(td);
367126324Sjhb}
368126324Sjhb
369126324Sjhb/*
370126324Sjhb * Sets a timeout that will remove the current thread from the specified
371126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened.
372126324Sjhb */
373126324Sjhbvoid
374247783Sdavidesleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
375247783Sdavide    int flags)
376126324Sjhb{
377126324Sjhb	struct thread *td;
378126324Sjhb
379126324Sjhb	td = curthread;
380277213Shselasky
381277213Shselasky	mtx_lock_spin(&td->td_slpmutex);
382247783Sdavide	callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
383247783Sdavide	    sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
384277213Shselasky	mtx_unlock_spin(&td->td_slpmutex);
385126324Sjhb}
386126324Sjhb
387126324Sjhb/*
388200447Sattilio * Return the number of actual sleepers for the specified queue.
389200447Sattilio */
390200447Sattiliou_int
391200447Sattiliosleepq_sleepcnt(void *wchan, int queue)
392200447Sattilio{
393200447Sattilio	struct sleepqueue *sq;
394200447Sattilio
395200447Sattilio	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
396200447Sattilio	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
397200447Sattilio	sq = sleepq_lookup(wchan);
398200447Sattilio	if (sq == NULL)
399200447Sattilio		return (0);
400200447Sattilio	return (sq->sq_blockedcnt[queue]);
401200447Sattilio}
402200447Sattilio
403200447Sattilio/*
404126324Sjhb * Marks the pending sleep of the current thread as interruptible and
405126324Sjhb * makes an initial check for pending signals before putting a thread
406170294Sjeff * to sleep. Enters and exits with the thread lock held.  Thread lock
407170294Sjeff * may have transitioned from the sleepq lock to a run lock.
408126324Sjhb */
409155741Sdavidxustatic int
410177085Sjeffsleepq_catch_signals(void *wchan, int pri)
411126324Sjhb{
412126324Sjhb	struct sleepqueue_chain *sc;
413126324Sjhb	struct sleepqueue *sq;
414126324Sjhb	struct thread *td;
415126324Sjhb	struct proc *p;
416155741Sdavidxu	struct sigacts *ps;
417248470Sjhb	int sig, ret;
418126324Sjhb
419126324Sjhb	td = curthread;
420155741Sdavidxu	p = curproc;
421126324Sjhb	sc = SC_LOOKUP(wchan);
422126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
423126324Sjhb	MPASS(wchan != NULL);
424211523Sdavidxu	if ((td->td_pflags & TDP_WAKEUP) != 0) {
425211523Sdavidxu		td->td_pflags &= ~TDP_WAKEUP;
426211523Sdavidxu		ret = EINTR;
427211534Sdavidxu		thread_lock(td);
428211523Sdavidxu		goto out;
429211523Sdavidxu	}
430211523Sdavidxu
431177375Sjeff	/*
432177375Sjeff	 * See if there are any pending signals for this thread.  If not
433177375Sjeff	 * we can switch immediately.  Otherwise do the signal processing
434177375Sjeff	 * directly.
435177375Sjeff	 */
436177375Sjeff	thread_lock(td);
437177471Sjeff	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) {
438177375Sjeff		sleepq_switch(wchan, pri);
439177375Sjeff		return (0);
440177375Sjeff	}
441177375Sjeff	thread_unlock(td);
442177375Sjeff	mtx_unlock_spin(&sc->sc_lock);
443129241Sbde	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
444173601Sjulian		(void *)td, (long)p->p_pid, td->td_name);
445126324Sjhb	PROC_LOCK(p);
446155741Sdavidxu	ps = p->p_sigacts;
447155741Sdavidxu	mtx_lock(&ps->ps_mtx);
448248470Sjhb	sig = cursig(td);
449155741Sdavidxu	if (sig == 0) {
450155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
451155741Sdavidxu		ret = thread_suspend_check(1);
452155741Sdavidxu		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
453155741Sdavidxu	} else {
454155741Sdavidxu		if (SIGISMEMBER(ps->ps_sigintr, sig))
455155741Sdavidxu			ret = EINTR;
456155741Sdavidxu		else
457155741Sdavidxu			ret = ERESTART;
458155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
459155741Sdavidxu	}
460184667Sdavidxu	/*
461184667Sdavidxu	 * Lock the per-process spinlock prior to dropping the PROC_LOCK
462184667Sdavidxu	 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
463209612Sjhb	 * thread_lock() are currently held in tdsendsignal().
464184667Sdavidxu	 */
465184667Sdavidxu	PROC_SLOCK(p);
466170294Sjeff	mtx_lock_spin(&sc->sc_lock);
467184667Sdavidxu	PROC_UNLOCK(p);
468170294Sjeff	thread_lock(td);
469184667Sdavidxu	PROC_SUNLOCK(p);
470185502Sdavidxu	if (ret == 0) {
471185502Sdavidxu		sleepq_switch(wchan, pri);
472185502Sdavidxu		return (0);
473185502Sdavidxu	}
474211523Sdavidxuout:
475155936Sdavidxu	/*
476155936Sdavidxu	 * There were pending signals and this thread is still
477155936Sdavidxu	 * on the sleep queue, remove it from the sleep queue.
478155936Sdavidxu	 */
479170294Sjeff	if (TD_ON_SLEEPQ(td)) {
480170294Sjeff		sq = sleepq_lookup(wchan);
481181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
482181334Sjhb#ifdef INVARIANTS
483181334Sjhb			/*
484181334Sjhb			 * This thread hasn't gone to sleep yet, so it
485181334Sjhb			 * should not be swapped out.
486181334Sjhb			 */
487181334Sjhb			panic("not waking up swapper");
488181334Sjhb#endif
489181334Sjhb		}
490170294Sjeff	}
491170294Sjeff	mtx_unlock_spin(&sc->sc_lock);
492170294Sjeff	MPASS(td->td_lock != &sc->sc_lock);
493155741Sdavidxu	return (ret);
494126324Sjhb}
495126324Sjhb
496126324Sjhb/*
497170294Sjeff * Switches to another thread if we are still asleep on a sleep queue.
498170294Sjeff * Returns with thread lock.
499126324Sjhb */
500126324Sjhbstatic void
501177085Sjeffsleepq_switch(void *wchan, int pri)
502126324Sjhb{
503126324Sjhb	struct sleepqueue_chain *sc;
504175654Sjhb	struct sleepqueue *sq;
505126324Sjhb	struct thread *td;
506126324Sjhb
507126324Sjhb	td = curthread;
508126324Sjhb	sc = SC_LOOKUP(wchan);
509126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
510170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
511175654Sjhb
512175654Sjhb	/*
513175654Sjhb	 * If we have a sleep queue, then we've already been woken up, so
514175654Sjhb	 * just return.
515175654Sjhb	 */
516126324Sjhb	if (td->td_sleepqueue != NULL) {
517126324Sjhb		mtx_unlock_spin(&sc->sc_lock);
518126324Sjhb		return;
519126324Sjhb	}
520175654Sjhb
521175654Sjhb	/*
522175654Sjhb	 * If TDF_TIMEOUT is set, then our sleep has been timed out
523175654Sjhb	 * already but we are still on the sleep queue, so dequeue the
524175654Sjhb	 * thread and return.
525175654Sjhb	 */
526175654Sjhb	if (td->td_flags & TDF_TIMEOUT) {
527175654Sjhb		MPASS(TD_ON_SLEEPQ(td));
528175654Sjhb		sq = sleepq_lookup(wchan);
529181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
530181334Sjhb#ifdef INVARIANTS
531181334Sjhb			/*
532181334Sjhb			 * This thread hasn't gone to sleep yet, so it
533181334Sjhb			 * should not be swapped out.
534181334Sjhb			 */
535181334Sjhb			panic("not waking up swapper");
536181334Sjhb#endif
537181334Sjhb		}
538175654Sjhb		mtx_unlock_spin(&sc->sc_lock);
539175654Sjhb		return;
540175654Sjhb	}
541177372Sjeff#ifdef SLEEPQUEUE_PROFILING
542177372Sjeff	if (prof_enabled)
543177372Sjeff		sleepq_profile(td->td_wmesg);
544177372Sjeff#endif
545177085Sjeff	MPASS(td->td_sleepqueue == NULL);
546177085Sjeff	sched_sleep(td, pri);
547170294Sjeff	thread_lock_set(td, &sc->sc_lock);
548235459Srstone	SDT_PROBE0(sched, , , sleep);
549126324Sjhb	TD_SET_SLEEPING(td);
550178272Sjeff	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
551126324Sjhb	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
552129241Sbde	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
553173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
554126324Sjhb}
555126324Sjhb
556126324Sjhb/*
557126324Sjhb * Check to see if we timed out.
558126324Sjhb */
559126324Sjhbstatic int
560277213Shselaskysleepq_check_timeout(struct thread *td)
561126324Sjhb{
562170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
563126324Sjhb
564126324Sjhb	/*
565126324Sjhb	 * If TDF_TIMEOUT is set, we timed out.
566126324Sjhb	 */
567126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
568126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
569126324Sjhb		return (EWOULDBLOCK);
570126324Sjhb	}
571126324Sjhb	return (0);
572126324Sjhb}
573126324Sjhb
574126324Sjhb/*
575277213Shselasky * Atomically stop the timeout by using a mutex.
576277213Shselasky */
577277213Shselaskystatic void
578277213Shselaskysleepq_stop_timeout(struct thread *td)
579277213Shselasky{
580277213Shselasky	mtx_lock_spin(&td->td_slpmutex);
581277213Shselasky	callout_stop(&td->td_slpcallout);
582277213Shselasky	mtx_unlock_spin(&td->td_slpmutex);
583277213Shselasky}
584277213Shselasky
585277213Shselasky/*
586126324Sjhb * Check to see if we were awoken by a signal.
587126324Sjhb */
588126324Sjhbstatic int
589126324Sjhbsleepq_check_signals(void)
590126324Sjhb{
591126324Sjhb	struct thread *td;
592126324Sjhb
593126324Sjhb	td = curthread;
594170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
595126324Sjhb
596126324Sjhb	/* We are no longer in an interruptible sleep. */
597155741Sdavidxu	if (td->td_flags & TDF_SINTR)
598246417Sjhb		td->td_flags &= ~TDF_SINTR;
599126324Sjhb
600155741Sdavidxu	if (td->td_flags & TDF_SLEEPABORT) {
601155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
602155741Sdavidxu		return (td->td_intrval);
603155741Sdavidxu	}
604155741Sdavidxu
605126324Sjhb	return (0);
606126324Sjhb}
607126324Sjhb
608126324Sjhb/*
609126324Sjhb * Block the current thread until it is awakened from its sleep queue.
610126324Sjhb */
611126324Sjhbvoid
612177085Sjeffsleepq_wait(void *wchan, int pri)
613126324Sjhb{
614170294Sjeff	struct thread *td;
615126324Sjhb
616170294Sjeff	td = curthread;
617170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
618170294Sjeff	thread_lock(td);
619177085Sjeff	sleepq_switch(wchan, pri);
620170294Sjeff	thread_unlock(td);
621126324Sjhb}
622126324Sjhb
623126324Sjhb/*
624126324Sjhb * Block the current thread until it is awakened from its sleep queue
625126324Sjhb * or it is interrupted by a signal.
626126324Sjhb */
627126324Sjhbint
628177085Sjeffsleepq_wait_sig(void *wchan, int pri)
629126324Sjhb{
630155741Sdavidxu	int rcatch;
631126324Sjhb	int rval;
632126324Sjhb
633177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
634126324Sjhb	rval = sleepq_check_signals();
635170294Sjeff	thread_unlock(curthread);
636155741Sdavidxu	if (rcatch)
637155741Sdavidxu		return (rcatch);
638126324Sjhb	return (rval);
639126324Sjhb}
640126324Sjhb
641126324Sjhb/*
642126324Sjhb * Block the current thread until it is awakened from its sleep queue
643126324Sjhb * or it times out while waiting.
644126324Sjhb */
645126324Sjhbint
646177085Sjeffsleepq_timedwait(void *wchan, int pri)
647126324Sjhb{
648170294Sjeff	struct thread *td;
649126324Sjhb	int rval;
650126324Sjhb
651170294Sjeff	td = curthread;
652170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
653170294Sjeff	thread_lock(td);
654177085Sjeff	sleepq_switch(wchan, pri);
655277213Shselasky	rval = sleepq_check_timeout(td);
656170294Sjeff	thread_unlock(td);
657170294Sjeff
658277213Shselasky	sleepq_stop_timeout(td);
659277213Shselasky
660131249Sjhb	return (rval);
661126324Sjhb}
662126324Sjhb
663126324Sjhb/*
664126324Sjhb * Block the current thread until it is awakened from its sleep queue,
665126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened.
666126324Sjhb */
667126324Sjhbint
668177085Sjeffsleepq_timedwait_sig(void *wchan, int pri)
669126324Sjhb{
670277213Shselasky	struct thread *td;
671155741Sdavidxu	int rcatch, rvalt, rvals;
672126324Sjhb
673277213Shselasky	td = curthread;
674277213Shselasky
675177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
676277213Shselasky	rvalt = sleepq_check_timeout(td);
677126324Sjhb	rvals = sleepq_check_signals();
678277213Shselasky	thread_unlock(td);
679277213Shselasky
680277213Shselasky	sleepq_stop_timeout(td);
681277213Shselasky
682155741Sdavidxu	if (rcatch)
683155741Sdavidxu		return (rcatch);
684155741Sdavidxu	if (rvals)
685126324Sjhb		return (rvals);
686155741Sdavidxu	return (rvalt);
687126324Sjhb}
688126324Sjhb
689126324Sjhb/*
690201879Sattilio * Returns the type of sleepqueue given a waitchannel.
691201879Sattilio */
692201879Sattilioint
693201879Sattiliosleepq_type(void *wchan)
694201879Sattilio{
695201879Sattilio	struct sleepqueue *sq;
696201879Sattilio	int type;
697201879Sattilio
698201879Sattilio	MPASS(wchan != NULL);
699201879Sattilio
700201879Sattilio	sleepq_lock(wchan);
701201879Sattilio	sq = sleepq_lookup(wchan);
702201879Sattilio	if (sq == NULL) {
703201879Sattilio		sleepq_release(wchan);
704201879Sattilio		return (-1);
705201879Sattilio	}
706201879Sattilio	type = sq->sq_type;
707201879Sattilio	sleepq_release(wchan);
708201879Sattilio	return (type);
709201879Sattilio}
710201879Sattilio
711201879Sattilio/*
712145056Sjhb * Removes a thread from a sleep queue and makes it
713145056Sjhb * runnable.
714126324Sjhb */
715181334Sjhbstatic int
716145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
717126324Sjhb{
718126324Sjhb	struct sleepqueue_chain *sc;
719126324Sjhb
720126324Sjhb	MPASS(td != NULL);
721126324Sjhb	MPASS(sq->sq_wchan != NULL);
722126324Sjhb	MPASS(td->td_wchan == sq->sq_wchan);
723165272Skmacy	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
724170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
725126324Sjhb	sc = SC_LOOKUP(sq->sq_wchan);
726126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
727126324Sjhb
728235459Srstone	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
729235459Srstone
730126324Sjhb	/* Remove the thread from the queue. */
731200447Sattilio	sq->sq_blockedcnt[td->td_sqqueue]--;
732165272Skmacy	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
733126324Sjhb
734126324Sjhb	/*
735126324Sjhb	 * Get a sleep queue for this thread.  If this is the last waiter,
736126324Sjhb	 * use the queue itself and take it out of the chain, otherwise,
737126324Sjhb	 * remove a queue from the free list.
738126324Sjhb	 */
739126324Sjhb	if (LIST_EMPTY(&sq->sq_free)) {
740126324Sjhb		td->td_sleepqueue = sq;
741126324Sjhb#ifdef INVARIANTS
742126324Sjhb		sq->sq_wchan = NULL;
743126324Sjhb#endif
744131259Sjhb#ifdef SLEEPQUEUE_PROFILING
745131259Sjhb		sc->sc_depth--;
746131259Sjhb#endif
747126324Sjhb	} else
748126324Sjhb		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
749126324Sjhb	LIST_REMOVE(td->td_sleepqueue, sq_hash);
750126324Sjhb
751129188Sjhb	td->td_wmesg = NULL;
752129188Sjhb	td->td_wchan = NULL;
753246417Sjhb	td->td_flags &= ~TDF_SINTR;
754129188Sjhb
755129241Sbde	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
756173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
757126324Sjhb
758126324Sjhb	/* Adjust priority if requested. */
759177085Sjeff	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
760217410Sjhb	if (pri != 0 && td->td_priority > pri &&
761217410Sjhb	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
762136439Sups		sched_prio(td, pri);
763184653Sjhb
764184653Sjhb	/*
765184653Sjhb	 * Note that thread td might not be sleeping if it is running
766184653Sjhb	 * sleepq_catch_signals() on another CPU or is blocked on its
767184653Sjhb	 * proc lock to check signals.  There's no need to mark the
768184653Sjhb	 * thread runnable in that case.
769184653Sjhb	 */
770184653Sjhb	if (TD_IS_SLEEPING(td)) {
771184653Sjhb		TD_CLR_SLEEPING(td);
772184653Sjhb		return (setrunnable(td));
773184653Sjhb	}
774184653Sjhb	return (0);
775126324Sjhb}
776126324Sjhb
777169666Sjeff#ifdef INVARIANTS
778126324Sjhb/*
779169666Sjeff * UMA zone item deallocator.
780169666Sjeff */
781169666Sjeffstatic void
782169666Sjeffsleepq_dtor(void *mem, int size, void *arg)
783169666Sjeff{
784169666Sjeff	struct sleepqueue *sq;
785169666Sjeff	int i;
786169666Sjeff
787169666Sjeff	sq = mem;
788200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
789169666Sjeff		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
790200447Sattilio		MPASS(sq->sq_blockedcnt[i] == 0);
791200447Sattilio	}
792169666Sjeff}
793169666Sjeff#endif
794169666Sjeff
795169666Sjeff/*
796169666Sjeff * UMA zone item initializer.
797169666Sjeff */
798169666Sjeffstatic int
799169666Sjeffsleepq_init(void *mem, int size, int flags)
800169666Sjeff{
801169666Sjeff	struct sleepqueue *sq;
802169666Sjeff	int i;
803169666Sjeff
804169666Sjeff	bzero(mem, size);
805169666Sjeff	sq = mem;
806200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
807169666Sjeff		TAILQ_INIT(&sq->sq_blocked[i]);
808200447Sattilio		sq->sq_blockedcnt[i] = 0;
809200447Sattilio	}
810169666Sjeff	LIST_INIT(&sq->sq_free);
811169666Sjeff	return (0);
812169666Sjeff}
813169666Sjeff
814169666Sjeff/*
815126324Sjhb * Find the highest priority thread sleeping on a wait channel and resume it.
816126324Sjhb */
817181334Sjhbint
818165272Skmacysleepq_signal(void *wchan, int flags, int pri, int queue)
819126324Sjhb{
820126324Sjhb	struct sleepqueue *sq;
821137277Sjhb	struct thread *td, *besttd;
822181334Sjhb	int wakeup_swapper;
823126324Sjhb
824126324Sjhb	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
825126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
826165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
827126324Sjhb	sq = sleepq_lookup(wchan);
828170294Sjeff	if (sq == NULL)
829181334Sjhb		return (0);
830134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
831126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
832129188Sjhb
833137277Sjhb	/*
834137277Sjhb	 * Find the highest priority thread on the queue.  If there is a
835137277Sjhb	 * tie, use the thread that first appears in the queue as it has
836137277Sjhb	 * been sleeping the longest since threads are always added to
837137277Sjhb	 * the tail of sleep queues.
838137277Sjhb	 */
839137277Sjhb	besttd = NULL;
840165272Skmacy	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
841137277Sjhb		if (besttd == NULL || td->td_priority < besttd->td_priority)
842137277Sjhb			besttd = td;
843137277Sjhb	}
844137277Sjhb	MPASS(besttd != NULL);
845170294Sjeff	thread_lock(besttd);
846181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
847170294Sjeff	thread_unlock(besttd);
848181334Sjhb	return (wakeup_swapper);
849126324Sjhb}
850126324Sjhb
851126324Sjhb/*
852126324Sjhb * Resume all threads sleeping on a specified wait channel.
853126324Sjhb */
854181334Sjhbint
855165272Skmacysleepq_broadcast(void *wchan, int flags, int pri, int queue)
856126324Sjhb{
857126324Sjhb	struct sleepqueue *sq;
858182875Sjhb	struct thread *td, *tdn;
859181334Sjhb	int wakeup_swapper;
860126324Sjhb
861126324Sjhb	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
862126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
863165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
864126324Sjhb	sq = sleepq_lookup(wchan);
865177085Sjeff	if (sq == NULL)
866181334Sjhb		return (0);
867134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
868126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
869129188Sjhb
870145056Sjhb	/* Resume all blocked threads on the sleep queue. */
871181334Sjhb	wakeup_swapper = 0;
872182875Sjhb	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
873170294Sjeff		thread_lock(td);
874181334Sjhb		if (sleepq_resume_thread(sq, td, pri))
875181334Sjhb			wakeup_swapper = 1;
876170294Sjeff		thread_unlock(td);
877170294Sjeff	}
878181334Sjhb	return (wakeup_swapper);
879126324Sjhb}
880126324Sjhb
881126324Sjhb/*
882126324Sjhb * Time sleeping threads out.  When the timeout expires, the thread is
883126324Sjhb * removed from the sleep queue and made runnable if it is still asleep.
884126324Sjhb */
885126324Sjhbstatic void
886126324Sjhbsleepq_timeout(void *arg)
887126324Sjhb{
888277213Shselasky	struct thread *td = arg;
889277213Shselasky	int wakeup_swapper = 0;
890126324Sjhb
891129241Sbde	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
892173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
893126324Sjhb
894277213Shselasky	/* Handle the three cases which can happen */
895277213Shselasky
896170294Sjeff	thread_lock(td);
897277213Shselasky	if (TD_ON_SLEEPQ(td)) {
898277213Shselasky		if (TD_IS_SLEEPING(td)) {
899277213Shselasky			struct sleepqueue_chain *sc;
900277213Shselasky			struct sleepqueue *sq;
901277213Shselasky			void *wchan;
902175654Sjhb
903277213Shselasky			/*
904277213Shselasky			 * Case I - thread is asleep and needs to be
905277213Shselasky			 * awoken:
906277213Shselasky			 */
907277213Shselasky			wchan = td->td_wchan;
908277213Shselasky			sc = SC_LOOKUP(wchan);
909277213Shselasky			THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
910277213Shselasky			sq = sleepq_lookup(wchan);
911277213Shselasky			MPASS(sq != NULL);
912277213Shselasky			td->td_flags |= TDF_TIMEOUT;
913277213Shselasky			wakeup_swapper = sleepq_resume_thread(sq, td, 0);
914277213Shselasky		} else {
915277213Shselasky			/*
916277213Shselasky			 * Case II - cancel going to sleep by setting
917277213Shselasky			 * the timeout flag because the target thread
918277213Shselasky			 * is not asleep yet. It can be on another CPU
919277213Shselasky			 * in between sleepq_add() and one of the
920277213Shselasky			 * sleepq_*wait*() routines or it can be in
921277213Shselasky			 * sleepq_catch_signals().
922277213Shselasky			 */
923277213Shselasky			td->td_flags |= TDF_TIMEOUT;
924277213Shselasky		}
925277213Shselasky	} else {
926277213Shselasky		/*
927277213Shselasky		 * Case III - thread is already woken up by a wakeup
928277213Shselasky		 * call and should not timeout. Nothing to do!
929277213Shselasky		 */
930170294Sjeff	}
931170294Sjeff	thread_unlock(td);
932181334Sjhb	if (wakeup_swapper)
933181334Sjhb		kick_proc0();
934126324Sjhb}
935126324Sjhb
936126324Sjhb/*
937126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific
938126324Sjhb * wait channel if it is on that queue.
939126324Sjhb */
940126324Sjhbvoid
941126324Sjhbsleepq_remove(struct thread *td, void *wchan)
942126324Sjhb{
943126324Sjhb	struct sleepqueue *sq;
944181334Sjhb	int wakeup_swapper;
945126324Sjhb
946126324Sjhb	/*
947126324Sjhb	 * Look up the sleep queue for this wait channel, then re-check
948126324Sjhb	 * that the thread is asleep on that channel, if it is not, then
949126324Sjhb	 * bail.
950126324Sjhb	 */
951126324Sjhb	MPASS(wchan != NULL);
952136445Sjhb	sleepq_lock(wchan);
953126324Sjhb	sq = sleepq_lookup(wchan);
954170294Sjeff	/*
955170294Sjeff	 * We can not lock the thread here as it may be sleeping on a
956170294Sjeff	 * different sleepq.  However, holding the sleepq lock for this
957170294Sjeff	 * wchan can guarantee that we do not miss a wakeup for this
958170294Sjeff	 * channel.  The asserts below will catch any false positives.
959170294Sjeff	 */
960126324Sjhb	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
961126324Sjhb		sleepq_release(wchan);
962126324Sjhb		return;
963126324Sjhb	}
964170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
965170294Sjeff	thread_lock(td);
966126324Sjhb	MPASS(sq != NULL);
967170294Sjeff	MPASS(td->td_wchan == wchan);
968181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
969170294Sjeff	thread_unlock(td);
970126324Sjhb	sleepq_release(wchan);
971181334Sjhb	if (wakeup_swapper)
972181334Sjhb		kick_proc0();
973126324Sjhb}
974126324Sjhb
975126324Sjhb/*
976129241Sbde * Abort a thread as if an interrupt had occurred.  Only abort
977129241Sbde * interruptible waits (unfortunately it isn't safe to abort others).
978126324Sjhb */
979181334Sjhbint
980155741Sdavidxusleepq_abort(struct thread *td, int intrval)
981126324Sjhb{
982170294Sjeff	struct sleepqueue *sq;
983126324Sjhb	void *wchan;
984126324Sjhb
985170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
986126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
987126324Sjhb	MPASS(td->td_flags & TDF_SINTR);
988155741Sdavidxu	MPASS(intrval == EINTR || intrval == ERESTART);
989126324Sjhb
990126324Sjhb	/*
991126324Sjhb	 * If the TDF_TIMEOUT flag is set, just leave. A
992126324Sjhb	 * timeout is scheduled anyhow.
993126324Sjhb	 */
994126324Sjhb	if (td->td_flags & TDF_TIMEOUT)
995181334Sjhb		return (0);
996126324Sjhb
997129241Sbde	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
998173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
999170294Sjeff	td->td_intrval = intrval;
1000170294Sjeff	td->td_flags |= TDF_SLEEPABORT;
1001170294Sjeff	/*
1002170294Sjeff	 * If the thread has not slept yet it will find the signal in
1003170294Sjeff	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
1004170294Sjeff	 * we have to do it here.
1005170294Sjeff	 */
1006170294Sjeff	if (!TD_IS_SLEEPING(td))
1007181334Sjhb		return (0);
1008126324Sjhb	wchan = td->td_wchan;
1009170294Sjeff	MPASS(wchan != NULL);
1010170294Sjeff	sq = sleepq_lookup(wchan);
1011170294Sjeff	MPASS(sq != NULL);
1012170294Sjeff
1013170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
1014181334Sjhb	return (sleepq_resume_thread(sq, td, 0));
1015126324Sjhb}
1016154936Sjhb
1017177372Sjeff#ifdef SLEEPQUEUE_PROFILING
1018177372Sjeff#define	SLEEPQ_PROF_LOCATIONS	1024
1019212750Smdf#define	SLEEPQ_SBUFSIZE		512
1020177372Sjeffstruct sleepq_prof {
1021177372Sjeff	LIST_ENTRY(sleepq_prof) sp_link;
1022177372Sjeff	const char	*sp_wmesg;
1023177372Sjeff	long		sp_count;
1024177372Sjeff};
1025177372Sjeff
1026177372SjeffLIST_HEAD(sqphead, sleepq_prof);
1027177372Sjeff
1028177372Sjeffstruct sqphead sleepq_prof_free;
1029177372Sjeffstruct sqphead sleepq_hash[SC_TABLESIZE];
1030177372Sjeffstatic struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
1031177372Sjeffstatic struct mtx sleepq_prof_lock;
1032177372SjeffMTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
1033177372Sjeff
1034177372Sjeffstatic void
1035177372Sjeffsleepq_profile(const char *wmesg)
1036177372Sjeff{
1037177372Sjeff	struct sleepq_prof *sp;
1038177372Sjeff
1039177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1040177372Sjeff	if (prof_enabled == 0)
1041177372Sjeff		goto unlock;
1042177372Sjeff	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
1043177372Sjeff		if (sp->sp_wmesg == wmesg)
1044177372Sjeff			goto done;
1045177372Sjeff	sp = LIST_FIRST(&sleepq_prof_free);
1046177372Sjeff	if (sp == NULL)
1047177372Sjeff		goto unlock;
1048177372Sjeff	sp->sp_wmesg = wmesg;
1049177372Sjeff	LIST_REMOVE(sp, sp_link);
1050177372Sjeff	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
1051177372Sjeffdone:
1052177372Sjeff	sp->sp_count++;
1053177372Sjeffunlock:
1054177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1055177372Sjeff	return;
1056177372Sjeff}
1057177372Sjeff
1058177372Sjeffstatic void
1059177372Sjeffsleepq_prof_reset(void)
1060177372Sjeff{
1061177372Sjeff	struct sleepq_prof *sp;
1062177372Sjeff	int enabled;
1063177372Sjeff	int i;
1064177372Sjeff
1065177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1066177372Sjeff	enabled = prof_enabled;
1067177372Sjeff	prof_enabled = 0;
1068177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++)
1069177372Sjeff		LIST_INIT(&sleepq_hash[i]);
1070177372Sjeff	LIST_INIT(&sleepq_prof_free);
1071177372Sjeff	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
1072177372Sjeff		sp = &sleepq_profent[i];
1073177372Sjeff		sp->sp_wmesg = NULL;
1074177372Sjeff		sp->sp_count = 0;
1075177372Sjeff		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
1076177372Sjeff	}
1077177372Sjeff	prof_enabled = enabled;
1078177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1079177372Sjeff}
1080177372Sjeff
1081177372Sjeffstatic int
1082177372Sjeffenable_sleepq_prof(SYSCTL_HANDLER_ARGS)
1083177372Sjeff{
1084177372Sjeff	int error, v;
1085177372Sjeff
1086177372Sjeff	v = prof_enabled;
1087177372Sjeff	error = sysctl_handle_int(oidp, &v, v, req);
1088177372Sjeff	if (error)
1089177372Sjeff		return (error);
1090177372Sjeff	if (req->newptr == NULL)
1091177372Sjeff		return (error);
1092177372Sjeff	if (v == prof_enabled)
1093177372Sjeff		return (0);
1094177372Sjeff	if (v == 1)
1095177372Sjeff		sleepq_prof_reset();
1096177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1097177372Sjeff	prof_enabled = !!v;
1098177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1099177372Sjeff
1100177372Sjeff	return (0);
1101177372Sjeff}
1102177372Sjeff
1103177372Sjeffstatic int
1104177372Sjeffreset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1105177372Sjeff{
1106177372Sjeff	int error, v;
1107177372Sjeff
1108177372Sjeff	v = 0;
1109177372Sjeff	error = sysctl_handle_int(oidp, &v, 0, req);
1110177372Sjeff	if (error)
1111177372Sjeff		return (error);
1112177372Sjeff	if (req->newptr == NULL)
1113177372Sjeff		return (error);
1114177372Sjeff	if (v == 0)
1115177372Sjeff		return (0);
1116177372Sjeff	sleepq_prof_reset();
1117177372Sjeff
1118177372Sjeff	return (0);
1119177372Sjeff}
1120177372Sjeff
1121177372Sjeffstatic int
1122177372Sjeffdump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1123177372Sjeff{
1124177372Sjeff	struct sleepq_prof *sp;
1125177372Sjeff	struct sbuf *sb;
1126177372Sjeff	int enabled;
1127177372Sjeff	int error;
1128177372Sjeff	int i;
1129177372Sjeff
1130217916Smdf	error = sysctl_wire_old_buffer(req, 0);
1131217916Smdf	if (error != 0)
1132217916Smdf		return (error);
1133212750Smdf	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
1134177372Sjeff	sbuf_printf(sb, "\nwmesg\tcount\n");
1135177372Sjeff	enabled = prof_enabled;
1136177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1137177372Sjeff	prof_enabled = 0;
1138177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1139177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++) {
1140177372Sjeff		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
1141177372Sjeff			sbuf_printf(sb, "%s\t%ld\n",
1142177372Sjeff			    sp->sp_wmesg, sp->sp_count);
1143177372Sjeff		}
1144177372Sjeff	}
1145177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1146177372Sjeff	prof_enabled = enabled;
1147177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1148177372Sjeff
1149212750Smdf	error = sbuf_finish(sb);
1150177372Sjeff	sbuf_delete(sb);
1151177372Sjeff	return (error);
1152177372Sjeff}
1153177372Sjeff
1154177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
1155177372Sjeff    NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
1156177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
1157177372Sjeff    NULL, 0, reset_sleepq_prof_stats, "I",
1158177372Sjeff    "Reset sleepqueue profiling statistics");
1159177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
1160177372Sjeff    NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
1161177372Sjeff#endif
1162177372Sjeff
1163154936Sjhb#ifdef DDB
1164154936SjhbDB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
1165154936Sjhb{
1166154936Sjhb	struct sleepqueue_chain *sc;
1167154936Sjhb	struct sleepqueue *sq;
1168154944Simp#ifdef INVARIANTS
1169154936Sjhb	struct lock_object *lock;
1170154944Simp#endif
1171154936Sjhb	struct thread *td;
1172154936Sjhb	void *wchan;
1173154936Sjhb	int i;
1174154936Sjhb
1175154936Sjhb	if (!have_addr)
1176154936Sjhb		return;
1177154936Sjhb
1178154936Sjhb	/*
1179154936Sjhb	 * First, see if there is an active sleep queue for the wait channel
1180154936Sjhb	 * indicated by the address.
1181154936Sjhb	 */
1182154936Sjhb	wchan = (void *)addr;
1183154936Sjhb	sc = SC_LOOKUP(wchan);
1184154936Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
1185154936Sjhb		if (sq->sq_wchan == wchan)
1186154936Sjhb			goto found;
1187154936Sjhb
1188154936Sjhb	/*
1189154936Sjhb	 * Second, see if there is an active sleep queue at the address
1190154936Sjhb	 * indicated.
1191154936Sjhb	 */
1192154936Sjhb	for (i = 0; i < SC_TABLESIZE; i++)
1193154936Sjhb		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
1194154936Sjhb			if (sq == (struct sleepqueue *)addr)
1195154936Sjhb				goto found;
1196154936Sjhb		}
1197154936Sjhb
1198154936Sjhb	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
1199154936Sjhb	return;
1200154936Sjhbfound:
1201154936Sjhb	db_printf("Wait channel: %p\n", sq->sq_wchan);
1202201879Sattilio	db_printf("Queue type: %d\n", sq->sq_type);
1203154936Sjhb#ifdef INVARIANTS
1204154936Sjhb	if (sq->sq_lock) {
1205164325Spjd		lock = sq->sq_lock;
1206154936Sjhb		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
1207154936Sjhb		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
1208154936Sjhb	}
1209154936Sjhb#endif
1210154936Sjhb	db_printf("Blocked threads:\n");
1211165272Skmacy	for (i = 0; i < NR_SLEEPQS; i++) {
1212165272Skmacy		db_printf("\nQueue[%d]:\n", i);
1213165272Skmacy		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
1214165272Skmacy			db_printf("\tempty\n");
1215165272Skmacy		else
1216165272Skmacy			TAILQ_FOREACH(td, &sq->sq_blocked[0],
1217165272Skmacy				      td_slpq) {
1218165272Skmacy				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
1219165272Skmacy					  td->td_tid, td->td_proc->p_pid,
1220180930Sjhb					  td->td_name);
1221165272Skmacy			}
1222200447Sattilio		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
1223165272Skmacy	}
1224154936Sjhb}
1225157823Sjhb
1226157823Sjhb/* Alias 'show sleepqueue' to 'show sleepq'. */
1227183054SsamDB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
1228154936Sjhb#endif
1229