subr_sleepqueue.c revision 184653
1139804Simp/*-
2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
3126324Sjhb * All rights reserved.
4126324Sjhb *
5126324Sjhb * Redistribution and use in source and binary forms, with or without
6126324Sjhb * modification, are permitted provided that the following conditions
7126324Sjhb * are met:
8126324Sjhb * 1. Redistributions of source code must retain the above copyright
9126324Sjhb *    notice, this list of conditions and the following disclaimer.
10126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11126324Sjhb *    notice, this list of conditions and the following disclaimer in the
12126324Sjhb *    documentation and/or other materials provided with the distribution.
13126324Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14126324Sjhb *    may be used to endorse or promote products derived from this software
15126324Sjhb *    without specific prior written permission.
16126324Sjhb *
17126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20126324Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27126324Sjhb * SUCH DAMAGE.
28126324Sjhb */
29126324Sjhb
30126324Sjhb/*
31126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on
32126324Sjhb * a wait channel.  Sleep queues different from turnstiles in that wait
33126324Sjhb * channels are not owned by anyone, so there is no priority propagation.
34126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by
35126324Sjhb * signals.  That said, there are several similarities between the turnstile
36126324Sjhb * and sleep queue implementations.  (Note: turnstiles were implemented
37126324Sjhb * first.)  For example, both use a hash table of the same size where each
38126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and
39126324Sjhb * a linked list of queues.  An individual queue is located by using a hash
40126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching
41126324Sjhb * for the queue.  This means that a wait channel object does not need to
42126324Sjhb * embed it's queue head just as locks do not embed their turnstile queue
43126324Sjhb * head.  Threads also carry around a sleep queue that they lend to the
44126324Sjhb * wait channel when blocking.  Just as in turnstiles, the queue includes
45126324Sjhb * a free list of the sleep queues of other threads blocked on the same
46126324Sjhb * wait channel in the case of multiple waiters.
47126324Sjhb *
48126324Sjhb * Some additional functionality provided by sleep queues include the
49126324Sjhb * ability to set a timeout.  The timeout is managed using a per-thread
50126324Sjhb * callout that resumes a thread if it is asleep.  A thread may also
51126324Sjhb * catch signals while it is asleep (aka an interruptible sleep).  The
52126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
53126324Sjhb * sleep queues also provide some extra assertions.  One is not allowed to
54126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
55126324Sjhb * must consistently use the same lock to synchronize with a wait channel,
56126324Sjhb * though this check is currently only a warning for sleep/wakeup due to
57126324Sjhb * pre-existing abuse of that API.  The same lock must also be held when
58126324Sjhb * awakening threads, though that is currently only enforced for condition
59126324Sjhb * variables.
60126324Sjhb */
61126324Sjhb
62126324Sjhb#include <sys/cdefs.h>
63126324Sjhb__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 184653 2008-11-04 19:13:53Z jhb $");
64126324Sjhb
65154936Sjhb#include "opt_sleepqueue_profiling.h"
66154936Sjhb#include "opt_ddb.h"
67170640Sjeff#include "opt_sched.h"
68154936Sjhb
69126324Sjhb#include <sys/param.h>
70126324Sjhb#include <sys/systm.h>
71126324Sjhb#include <sys/lock.h>
72126324Sjhb#include <sys/kernel.h>
73126324Sjhb#include <sys/ktr.h>
74126324Sjhb#include <sys/mutex.h>
75126324Sjhb#include <sys/proc.h>
76177372Sjeff#include <sys/sbuf.h>
77126324Sjhb#include <sys/sched.h>
78126324Sjhb#include <sys/signalvar.h>
79126324Sjhb#include <sys/sleepqueue.h>
80131259Sjhb#include <sys/sysctl.h>
81126324Sjhb
82169666Sjeff#include <vm/uma.h>
83169666Sjeff
84154936Sjhb#ifdef DDB
85154936Sjhb#include <ddb/ddb.h>
86154936Sjhb#endif
87154936Sjhb
88126324Sjhb/*
89126324Sjhb * Constants for the hash table of sleep queue chains.  These constants are
90126324Sjhb * the same ones that 4BSD (and possibly earlier versions of BSD) used.
91126324Sjhb * Basically, we ignore the lower 8 bits of the address since most wait
92126324Sjhb * channel pointers are aligned and only look at the next 7 bits for the
93126324Sjhb * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
94126324Sjhb */
95126324Sjhb#define	SC_TABLESIZE	128			/* Must be power of 2. */
96126324Sjhb#define	SC_MASK		(SC_TABLESIZE - 1)
97126324Sjhb#define	SC_SHIFT	8
98126324Sjhb#define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
99126324Sjhb#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
100165272Skmacy#define NR_SLEEPQS      2
101126324Sjhb/*
102126324Sjhb * There two different lists of sleep queues.  Both lists are connected
103126324Sjhb * via the sq_hash entries.  The first list is the sleep queue chain list
104126324Sjhb * that a sleep queue is on when it is attached to a wait channel.  The
105126324Sjhb * second list is the free list hung off of a sleep queue that is attached
106126324Sjhb * to a wait channel.
107126324Sjhb *
108126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the
109126324Sjhb * list of threads blocked on that wait channel, flags specific to the
110126324Sjhb * wait channel, and the lock used to synchronize with a wait channel.
111126324Sjhb * The flags are used to catch mismatches between the various consumers
112126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables).
113126324Sjhb * The lock pointer is only used when invariants are enabled for various
114126324Sjhb * debugging checks.
115126324Sjhb *
116126324Sjhb * Locking key:
117126324Sjhb *  c - sleep queue chain lock
118126324Sjhb */
119126324Sjhbstruct sleepqueue {
120165272Skmacy	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
121126324Sjhb	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
122126324Sjhb	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
123126324Sjhb	void	*sq_wchan;			/* (c) Wait channel. */
124136445Sjhb#ifdef INVARIANTS
125134013Sjhb	int	sq_type;			/* (c) Queue type. */
126164325Spjd	struct lock_object *sq_lock;		/* (c) Associated lock. */
127126324Sjhb#endif
128126324Sjhb};
129126324Sjhb
130126324Sjhbstruct sleepqueue_chain {
131126324Sjhb	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
132126324Sjhb	struct mtx sc_lock;			/* Spin lock for this chain. */
133131259Sjhb#ifdef SLEEPQUEUE_PROFILING
134131259Sjhb	u_int	sc_depth;			/* Length of sc_queues. */
135131259Sjhb	u_int	sc_max_depth;			/* Max length of sc_queues. */
136131259Sjhb#endif
137126324Sjhb};
138126324Sjhb
139131259Sjhb#ifdef SLEEPQUEUE_PROFILING
140131259Sjhbu_int sleepq_max_depth;
141131259SjhbSYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
142131259SjhbSYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
143131259Sjhb    "sleepq chain stats");
144131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
145131259Sjhb    0, "maxmimum depth achieved of a single chain");
146177372Sjeff
147177372Sjeffstatic void	sleepq_profile(const char *wmesg);
148177372Sjeffstatic int	prof_enabled;
149131259Sjhb#endif
150126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
151169666Sjeffstatic uma_zone_t sleepq_zone;
152126324Sjhb
153126324Sjhb/*
154126324Sjhb * Prototypes for non-exported routines.
155126324Sjhb */
156177085Sjeffstatic int	sleepq_catch_signals(void *wchan, int pri);
157165272Skmacystatic int	sleepq_check_signals(void);
158126324Sjhbstatic int	sleepq_check_timeout(void);
159169666Sjeff#ifdef INVARIANTS
160169666Sjeffstatic void	sleepq_dtor(void *mem, int size, void *arg);
161169666Sjeff#endif
162169666Sjeffstatic int	sleepq_init(void *mem, int size, int flags);
163181334Sjhbstatic int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
164169666Sjeff		    int pri);
165177085Sjeffstatic void	sleepq_switch(void *wchan, int pri);
166126324Sjhbstatic void	sleepq_timeout(void *arg);
167126324Sjhb
168126324Sjhb/*
169126324Sjhb * Early initialization of sleep queues that is called from the sleepinit()
170126324Sjhb * SYSINIT.
171126324Sjhb */
172126324Sjhbvoid
173126324Sjhbinit_sleepqueues(void)
174126324Sjhb{
175131259Sjhb#ifdef SLEEPQUEUE_PROFILING
176131259Sjhb	struct sysctl_oid *chain_oid;
177131259Sjhb	char chain_name[10];
178131259Sjhb#endif
179126324Sjhb	int i;
180126324Sjhb
181126324Sjhb	for (i = 0; i < SC_TABLESIZE; i++) {
182126324Sjhb		LIST_INIT(&sleepq_chains[i].sc_queues);
183126324Sjhb		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
184176258Sjhb		    MTX_SPIN | MTX_RECURSE);
185131259Sjhb#ifdef SLEEPQUEUE_PROFILING
186131259Sjhb		snprintf(chain_name, sizeof(chain_name), "%d", i);
187131259Sjhb		chain_oid = SYSCTL_ADD_NODE(NULL,
188131259Sjhb		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
189131259Sjhb		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
190131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
191131259Sjhb		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
192131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
193131259Sjhb		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
194131259Sjhb		    NULL);
195131259Sjhb#endif
196126324Sjhb	}
197169666Sjeff	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
198169666Sjeff#ifdef INVARIANTS
199169666Sjeff	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
200169666Sjeff#else
201169666Sjeff	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
202169666Sjeff#endif
203169666Sjeff
204126324Sjhb	thread0.td_sleepqueue = sleepq_alloc();
205126324Sjhb}
206126324Sjhb
207126324Sjhb/*
208169666Sjeff * Get a sleep queue for a new thread.
209126324Sjhb */
210126324Sjhbstruct sleepqueue *
211126324Sjhbsleepq_alloc(void)
212126324Sjhb{
213126324Sjhb
214169666Sjeff	return (uma_zalloc(sleepq_zone, M_WAITOK));
215126324Sjhb}
216126324Sjhb
217126324Sjhb/*
218126324Sjhb * Free a sleep queue when a thread is destroyed.
219126324Sjhb */
220126324Sjhbvoid
221126324Sjhbsleepq_free(struct sleepqueue *sq)
222126324Sjhb{
223126324Sjhb
224169666Sjeff	uma_zfree(sleepq_zone, sq);
225126324Sjhb}
226126324Sjhb
227126324Sjhb/*
228136445Sjhb * Lock the sleep queue chain associated with the specified wait channel.
229136445Sjhb */
230136445Sjhbvoid
231136445Sjhbsleepq_lock(void *wchan)
232136445Sjhb{
233136445Sjhb	struct sleepqueue_chain *sc;
234136445Sjhb
235136445Sjhb	sc = SC_LOOKUP(wchan);
236136445Sjhb	mtx_lock_spin(&sc->sc_lock);
237136445Sjhb}
238136445Sjhb
239136445Sjhb/*
240126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash
241136445Sjhb * table locking the associated sleep queue chain.  If no queue is found in
242136445Sjhb * the table, NULL is returned.
243126324Sjhb */
244126324Sjhbstruct sleepqueue *
245126324Sjhbsleepq_lookup(void *wchan)
246126324Sjhb{
247126324Sjhb	struct sleepqueue_chain *sc;
248126324Sjhb	struct sleepqueue *sq;
249126324Sjhb
250126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
251126324Sjhb	sc = SC_LOOKUP(wchan);
252136445Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
253126324Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
254126324Sjhb		if (sq->sq_wchan == wchan)
255126324Sjhb			return (sq);
256126324Sjhb	return (NULL);
257126324Sjhb}
258126324Sjhb
259126324Sjhb/*
260126324Sjhb * Unlock the sleep queue chain associated with a given wait channel.
261126324Sjhb */
262126324Sjhbvoid
263126324Sjhbsleepq_release(void *wchan)
264126324Sjhb{
265126324Sjhb	struct sleepqueue_chain *sc;
266126324Sjhb
267126324Sjhb	sc = SC_LOOKUP(wchan);
268126324Sjhb	mtx_unlock_spin(&sc->sc_lock);
269126324Sjhb}
270126324Sjhb
271126324Sjhb/*
272137277Sjhb * Places the current thread on the sleep queue for the specified wait
273126324Sjhb * channel.  If INVARIANTS is enabled, then it associates the passed in
274126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is
275126324Sjhb * woken up.
276126324Sjhb */
277126324Sjhbvoid
278165272Skmacysleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
279165272Skmacy    int queue)
280126324Sjhb{
281126324Sjhb	struct sleepqueue_chain *sc;
282136445Sjhb	struct sleepqueue *sq;
283137277Sjhb	struct thread *td;
284126324Sjhb
285126324Sjhb	td = curthread;
286126324Sjhb	sc = SC_LOOKUP(wchan);
287126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
288126324Sjhb	MPASS(td->td_sleepqueue != NULL);
289126324Sjhb	MPASS(wchan != NULL);
290165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
291126324Sjhb
292150177Sjhb	/* If this thread is not allowed to sleep, die a horrible death. */
293150177Sjhb	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
294152221Simp	    ("Trying sleep, but thread marked as sleeping prohibited"));
295150177Sjhb
296136445Sjhb	/* Look up the sleep queue associated with the wait channel 'wchan'. */
297136445Sjhb	sq = sleepq_lookup(wchan);
298136445Sjhb
299136445Sjhb	/*
300136445Sjhb	 * If the wait channel does not already have a sleep queue, use
301136445Sjhb	 * this thread's sleep queue.  Otherwise, insert the current thread
302136445Sjhb	 * into the sleep queue already in use by this wait channel.
303136445Sjhb	 */
304126324Sjhb	if (sq == NULL) {
305165272Skmacy#ifdef INVARIANTS
306165292Skmacy		int i;
307165291Sache
308165292Skmacy		sq = td->td_sleepqueue;
309165292Skmacy		for (i = 0; i < NR_SLEEPQS; i++)
310165292Skmacy			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
311165272Skmacy				("thread's sleep queue %d is not empty", i));
312165272Skmacy		KASSERT(LIST_EMPTY(&sq->sq_free),
313165272Skmacy		    ("thread's sleep queue has a non-empty free list"));
314165272Skmacy		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
315165292Skmacy		sq->sq_lock = lock;
316165292Skmacy		sq->sq_type = flags & SLEEPQ_TYPE;
317165272Skmacy#endif
318131259Sjhb#ifdef SLEEPQUEUE_PROFILING
319131259Sjhb		sc->sc_depth++;
320131259Sjhb		if (sc->sc_depth > sc->sc_max_depth) {
321131259Sjhb			sc->sc_max_depth = sc->sc_depth;
322131259Sjhb			if (sc->sc_max_depth > sleepq_max_depth)
323131259Sjhb				sleepq_max_depth = sc->sc_max_depth;
324131259Sjhb		}
325131259Sjhb#endif
326165292Skmacy		sq = td->td_sleepqueue;
327126324Sjhb		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
328126324Sjhb		sq->sq_wchan = wchan;
329126324Sjhb	} else {
330126324Sjhb		MPASS(wchan == sq->sq_wchan);
331126488Sjhb		MPASS(lock == sq->sq_lock);
332136445Sjhb		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
333126324Sjhb		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
334126324Sjhb	}
335172155Sattilio	thread_lock(td);
336165272Skmacy	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
337126324Sjhb	td->td_sleepqueue = NULL;
338165272Skmacy	td->td_sqqueue = queue;
339126324Sjhb	td->td_wchan = wchan;
340126324Sjhb	td->td_wmesg = wmesg;
341155741Sdavidxu	if (flags & SLEEPQ_INTERRUPTIBLE) {
342134013Sjhb		td->td_flags |= TDF_SINTR;
343155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
344155741Sdavidxu	}
345172155Sattilio	thread_unlock(td);
346126324Sjhb}
347126324Sjhb
348126324Sjhb/*
349126324Sjhb * Sets a timeout that will remove the current thread from the specified
350126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened.
351126324Sjhb */
352126324Sjhbvoid
353126885Sjhbsleepq_set_timeout(void *wchan, int timo)
354126324Sjhb{
355126324Sjhb	struct sleepqueue_chain *sc;
356126324Sjhb	struct thread *td;
357126324Sjhb
358126324Sjhb	td = curthread;
359126324Sjhb	sc = SC_LOOKUP(wchan);
360126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
361126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
362126324Sjhb	MPASS(td->td_sleepqueue == NULL);
363126324Sjhb	MPASS(wchan != NULL);
364177860Sjeff	callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td);
365126324Sjhb}
366126324Sjhb
367126324Sjhb/*
368126324Sjhb * Marks the pending sleep of the current thread as interruptible and
369126324Sjhb * makes an initial check for pending signals before putting a thread
370170294Sjeff * to sleep. Enters and exits with the thread lock held.  Thread lock
371170294Sjeff * may have transitioned from the sleepq lock to a run lock.
372126324Sjhb */
373155741Sdavidxustatic int
374177085Sjeffsleepq_catch_signals(void *wchan, int pri)
375126324Sjhb{
376126324Sjhb	struct sleepqueue_chain *sc;
377126324Sjhb	struct sleepqueue *sq;
378126324Sjhb	struct thread *td;
379126324Sjhb	struct proc *p;
380155741Sdavidxu	struct sigacts *ps;
381155741Sdavidxu	int sig, ret;
382126324Sjhb
383126324Sjhb	td = curthread;
384155741Sdavidxu	p = curproc;
385126324Sjhb	sc = SC_LOOKUP(wchan);
386126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
387126324Sjhb	MPASS(wchan != NULL);
388177375Sjeff	/*
389177375Sjeff	 * See if there are any pending signals for this thread.  If not
390177375Sjeff	 * we can switch immediately.  Otherwise do the signal processing
391177375Sjeff	 * directly.
392177375Sjeff	 */
393177375Sjeff	thread_lock(td);
394177471Sjeff	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) {
395177375Sjeff		sleepq_switch(wchan, pri);
396177375Sjeff		return (0);
397177375Sjeff	}
398184199Sdavidxu
399177375Sjeff	thread_unlock(td);
400177375Sjeff	mtx_unlock_spin(&sc->sc_lock);
401129241Sbde	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
402173601Sjulian		(void *)td, (long)p->p_pid, td->td_name);
403126324Sjhb	PROC_LOCK(p);
404155741Sdavidxu	ps = p->p_sigacts;
405155741Sdavidxu	mtx_lock(&ps->ps_mtx);
406126324Sjhb	sig = cursig(td);
407155741Sdavidxu	if (sig == 0) {
408155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
409155741Sdavidxu		ret = thread_suspend_check(1);
410155741Sdavidxu		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
411155741Sdavidxu	} else {
412155741Sdavidxu		if (SIGISMEMBER(ps->ps_sigintr, sig))
413155741Sdavidxu			ret = EINTR;
414155741Sdavidxu		else
415155741Sdavidxu			ret = ERESTART;
416155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
417155741Sdavidxu	}
418184199Sdavidxu
419170294Sjeff	mtx_lock_spin(&sc->sc_lock);
420170294Sjeff	thread_lock(td);
421184216Sdavidxu	PROC_UNLOCK(p);
422184216Sdavidxu	if (ret == 0) {
423184216Sdavidxu		sleepq_switch(wchan, pri);
424184216Sdavidxu		return (0);
425184216Sdavidxu	}
426184199Sdavidxu
427155936Sdavidxu	/*
428155936Sdavidxu	 * There were pending signals and this thread is still
429155936Sdavidxu	 * on the sleep queue, remove it from the sleep queue.
430155936Sdavidxu	 */
431170294Sjeff	if (TD_ON_SLEEPQ(td)) {
432170294Sjeff		sq = sleepq_lookup(wchan);
433181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
434181334Sjhb#ifdef INVARIANTS
435181334Sjhb			/*
436181334Sjhb			 * This thread hasn't gone to sleep yet, so it
437181334Sjhb			 * should not be swapped out.
438181334Sjhb			 */
439181334Sjhb			panic("not waking up swapper");
440181334Sjhb#endif
441181334Sjhb		}
442170294Sjeff	}
443170294Sjeff	mtx_unlock_spin(&sc->sc_lock);
444170294Sjeff	MPASS(td->td_lock != &sc->sc_lock);
445155741Sdavidxu	return (ret);
446126324Sjhb}
447126324Sjhb
448126324Sjhb/*
449170294Sjeff * Switches to another thread if we are still asleep on a sleep queue.
450170294Sjeff * Returns with thread lock.
451126324Sjhb */
452126324Sjhbstatic void
453177085Sjeffsleepq_switch(void *wchan, int pri)
454126324Sjhb{
455126324Sjhb	struct sleepqueue_chain *sc;
456175654Sjhb	struct sleepqueue *sq;
457126324Sjhb	struct thread *td;
458126324Sjhb
459126324Sjhb	td = curthread;
460126324Sjhb	sc = SC_LOOKUP(wchan);
461126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
462170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
463175654Sjhb
464175654Sjhb	/*
465175654Sjhb	 * If we have a sleep queue, then we've already been woken up, so
466175654Sjhb	 * just return.
467175654Sjhb	 */
468126324Sjhb	if (td->td_sleepqueue != NULL) {
469126324Sjhb		mtx_unlock_spin(&sc->sc_lock);
470126324Sjhb		return;
471126324Sjhb	}
472175654Sjhb
473175654Sjhb	/*
474175654Sjhb	 * If TDF_TIMEOUT is set, then our sleep has been timed out
475175654Sjhb	 * already but we are still on the sleep queue, so dequeue the
476175654Sjhb	 * thread and return.
477175654Sjhb	 */
478175654Sjhb	if (td->td_flags & TDF_TIMEOUT) {
479175654Sjhb		MPASS(TD_ON_SLEEPQ(td));
480175654Sjhb		sq = sleepq_lookup(wchan);
481181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
482181334Sjhb#ifdef INVARIANTS
483181334Sjhb			/*
484181334Sjhb			 * This thread hasn't gone to sleep yet, so it
485181334Sjhb			 * should not be swapped out.
486181334Sjhb			 */
487181334Sjhb			panic("not waking up swapper");
488181334Sjhb#endif
489181334Sjhb		}
490175654Sjhb		mtx_unlock_spin(&sc->sc_lock);
491175654Sjhb		return;
492175654Sjhb	}
493177372Sjeff#ifdef SLEEPQUEUE_PROFILING
494177372Sjeff	if (prof_enabled)
495177372Sjeff		sleepq_profile(td->td_wmesg);
496177372Sjeff#endif
497177085Sjeff	MPASS(td->td_sleepqueue == NULL);
498177085Sjeff	sched_sleep(td, pri);
499170294Sjeff	thread_lock_set(td, &sc->sc_lock);
500126324Sjhb	TD_SET_SLEEPING(td);
501178272Sjeff	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
502126324Sjhb	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
503129241Sbde	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
504173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
505126324Sjhb}
506126324Sjhb
507126324Sjhb/*
508126324Sjhb * Check to see if we timed out.
509126324Sjhb */
510126324Sjhbstatic int
511126324Sjhbsleepq_check_timeout(void)
512126324Sjhb{
513126324Sjhb	struct thread *td;
514126324Sjhb
515126324Sjhb	td = curthread;
516170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
517126324Sjhb
518126324Sjhb	/*
519126324Sjhb	 * If TDF_TIMEOUT is set, we timed out.
520126324Sjhb	 */
521126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
522126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
523126324Sjhb		return (EWOULDBLOCK);
524126324Sjhb	}
525126324Sjhb
526126324Sjhb	/*
527126324Sjhb	 * If TDF_TIMOFAIL is set, the timeout ran after we had
528126324Sjhb	 * already been woken up.
529126324Sjhb	 */
530126324Sjhb	if (td->td_flags & TDF_TIMOFAIL)
531126324Sjhb		td->td_flags &= ~TDF_TIMOFAIL;
532126324Sjhb
533126324Sjhb	/*
534126324Sjhb	 * If callout_stop() fails, then the timeout is running on
535126324Sjhb	 * another CPU, so synchronize with it to avoid having it
536126324Sjhb	 * accidentally wake up a subsequent sleep.
537126324Sjhb	 */
538126324Sjhb	else if (callout_stop(&td->td_slpcallout) == 0) {
539126324Sjhb		td->td_flags |= TDF_TIMEOUT;
540126324Sjhb		TD_SET_SLEEPING(td);
541178272Sjeff		mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
542126324Sjhb	}
543126324Sjhb	return (0);
544126324Sjhb}
545126324Sjhb
546126324Sjhb/*
547126324Sjhb * Check to see if we were awoken by a signal.
548126324Sjhb */
549126324Sjhbstatic int
550126324Sjhbsleepq_check_signals(void)
551126324Sjhb{
552126324Sjhb	struct thread *td;
553126324Sjhb
554126324Sjhb	td = curthread;
555170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
556126324Sjhb
557126324Sjhb	/* We are no longer in an interruptible sleep. */
558155741Sdavidxu	if (td->td_flags & TDF_SINTR)
559155741Sdavidxu		td->td_flags &= ~TDF_SINTR;
560126324Sjhb
561155741Sdavidxu	if (td->td_flags & TDF_SLEEPABORT) {
562155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
563155741Sdavidxu		return (td->td_intrval);
564155741Sdavidxu	}
565155741Sdavidxu
566126324Sjhb	return (0);
567126324Sjhb}
568126324Sjhb
569126324Sjhb/*
570126324Sjhb * Block the current thread until it is awakened from its sleep queue.
571126324Sjhb */
572126324Sjhbvoid
573177085Sjeffsleepq_wait(void *wchan, int pri)
574126324Sjhb{
575170294Sjeff	struct thread *td;
576126324Sjhb
577170294Sjeff	td = curthread;
578170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
579170294Sjeff	thread_lock(td);
580177085Sjeff	sleepq_switch(wchan, pri);
581170294Sjeff	thread_unlock(td);
582126324Sjhb}
583126324Sjhb
584126324Sjhb/*
585126324Sjhb * Block the current thread until it is awakened from its sleep queue
586126324Sjhb * or it is interrupted by a signal.
587126324Sjhb */
588126324Sjhbint
589177085Sjeffsleepq_wait_sig(void *wchan, int pri)
590126324Sjhb{
591155741Sdavidxu	int rcatch;
592126324Sjhb	int rval;
593126324Sjhb
594177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
595126324Sjhb	rval = sleepq_check_signals();
596170294Sjeff	thread_unlock(curthread);
597155741Sdavidxu	if (rcatch)
598155741Sdavidxu		return (rcatch);
599126324Sjhb	return (rval);
600126324Sjhb}
601126324Sjhb
602126324Sjhb/*
603126324Sjhb * Block the current thread until it is awakened from its sleep queue
604126324Sjhb * or it times out while waiting.
605126324Sjhb */
606126324Sjhbint
607177085Sjeffsleepq_timedwait(void *wchan, int pri)
608126324Sjhb{
609170294Sjeff	struct thread *td;
610126324Sjhb	int rval;
611126324Sjhb
612170294Sjeff	td = curthread;
613170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
614170294Sjeff	thread_lock(td);
615177085Sjeff	sleepq_switch(wchan, pri);
616126324Sjhb	rval = sleepq_check_timeout();
617170294Sjeff	thread_unlock(td);
618170294Sjeff
619131249Sjhb	return (rval);
620126324Sjhb}
621126324Sjhb
622126324Sjhb/*
623126324Sjhb * Block the current thread until it is awakened from its sleep queue,
624126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened.
625126324Sjhb */
626126324Sjhbint
627177085Sjeffsleepq_timedwait_sig(void *wchan, int pri)
628126324Sjhb{
629155741Sdavidxu	int rcatch, rvalt, rvals;
630126324Sjhb
631177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
632126324Sjhb	rvalt = sleepq_check_timeout();
633126324Sjhb	rvals = sleepq_check_signals();
634170294Sjeff	thread_unlock(curthread);
635155741Sdavidxu	if (rcatch)
636155741Sdavidxu		return (rcatch);
637155741Sdavidxu	if (rvals)
638126324Sjhb		return (rvals);
639155741Sdavidxu	return (rvalt);
640126324Sjhb}
641126324Sjhb
642126324Sjhb/*
643145056Sjhb * Removes a thread from a sleep queue and makes it
644145056Sjhb * runnable.
645126324Sjhb */
646181334Sjhbstatic int
647145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
648126324Sjhb{
649126324Sjhb	struct sleepqueue_chain *sc;
650126324Sjhb
651126324Sjhb	MPASS(td != NULL);
652126324Sjhb	MPASS(sq->sq_wchan != NULL);
653126324Sjhb	MPASS(td->td_wchan == sq->sq_wchan);
654165272Skmacy	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
655170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
656126324Sjhb	sc = SC_LOOKUP(sq->sq_wchan);
657126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
658126324Sjhb
659126324Sjhb	/* Remove the thread from the queue. */
660165272Skmacy	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
661126324Sjhb
662126324Sjhb	/*
663126324Sjhb	 * Get a sleep queue for this thread.  If this is the last waiter,
664126324Sjhb	 * use the queue itself and take it out of the chain, otherwise,
665126324Sjhb	 * remove a queue from the free list.
666126324Sjhb	 */
667126324Sjhb	if (LIST_EMPTY(&sq->sq_free)) {
668126324Sjhb		td->td_sleepqueue = sq;
669126324Sjhb#ifdef INVARIANTS
670126324Sjhb		sq->sq_wchan = NULL;
671126324Sjhb#endif
672131259Sjhb#ifdef SLEEPQUEUE_PROFILING
673131259Sjhb		sc->sc_depth--;
674131259Sjhb#endif
675126324Sjhb	} else
676126324Sjhb		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
677126324Sjhb	LIST_REMOVE(td->td_sleepqueue, sq_hash);
678126324Sjhb
679129188Sjhb	td->td_wmesg = NULL;
680129188Sjhb	td->td_wchan = NULL;
681157743Sdavidxu	td->td_flags &= ~TDF_SINTR;
682129188Sjhb
683129241Sbde	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
684173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
685126324Sjhb
686126324Sjhb	/* Adjust priority if requested. */
687177085Sjeff	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
688177085Sjeff	if (pri != 0 && td->td_priority > pri)
689136439Sups		sched_prio(td, pri);
690184653Sjhb
691184653Sjhb	/*
692184653Sjhb	 * Note that thread td might not be sleeping if it is running
693184653Sjhb	 * sleepq_catch_signals() on another CPU or is blocked on its
694184653Sjhb	 * proc lock to check signals.  There's no need to mark the
695184653Sjhb	 * thread runnable in that case.
696184653Sjhb	 */
697184653Sjhb	if (TD_IS_SLEEPING(td)) {
698184653Sjhb		TD_CLR_SLEEPING(td);
699184653Sjhb		return (setrunnable(td));
700184653Sjhb	}
701184653Sjhb	return (0);
702126324Sjhb}
703126324Sjhb
704169666Sjeff#ifdef INVARIANTS
705126324Sjhb/*
706169666Sjeff * UMA zone item deallocator.
707169666Sjeff */
708169666Sjeffstatic void
709169666Sjeffsleepq_dtor(void *mem, int size, void *arg)
710169666Sjeff{
711169666Sjeff	struct sleepqueue *sq;
712169666Sjeff	int i;
713169666Sjeff
714169666Sjeff	sq = mem;
715169666Sjeff	for (i = 0; i < NR_SLEEPQS; i++)
716169666Sjeff		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
717169666Sjeff}
718169666Sjeff#endif
719169666Sjeff
720169666Sjeff/*
721169666Sjeff * UMA zone item initializer.
722169666Sjeff */
723169666Sjeffstatic int
724169666Sjeffsleepq_init(void *mem, int size, int flags)
725169666Sjeff{
726169666Sjeff	struct sleepqueue *sq;
727169666Sjeff	int i;
728169666Sjeff
729169666Sjeff	bzero(mem, size);
730169666Sjeff	sq = mem;
731169666Sjeff	for (i = 0; i < NR_SLEEPQS; i++)
732169666Sjeff		TAILQ_INIT(&sq->sq_blocked[i]);
733169666Sjeff	LIST_INIT(&sq->sq_free);
734169666Sjeff	return (0);
735169666Sjeff}
736169666Sjeff
737169666Sjeff/*
738126324Sjhb * Find the highest priority thread sleeping on a wait channel and resume it.
739126324Sjhb */
740181334Sjhbint
741165272Skmacysleepq_signal(void *wchan, int flags, int pri, int queue)
742126324Sjhb{
743126324Sjhb	struct sleepqueue *sq;
744137277Sjhb	struct thread *td, *besttd;
745181334Sjhb	int wakeup_swapper;
746126324Sjhb
747126324Sjhb	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
748126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
749165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
750126324Sjhb	sq = sleepq_lookup(wchan);
751170294Sjeff	if (sq == NULL)
752181334Sjhb		return (0);
753134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
754126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
755129188Sjhb
756137277Sjhb	/*
757137277Sjhb	 * Find the highest priority thread on the queue.  If there is a
758137277Sjhb	 * tie, use the thread that first appears in the queue as it has
759137277Sjhb	 * been sleeping the longest since threads are always added to
760137277Sjhb	 * the tail of sleep queues.
761137277Sjhb	 */
762137277Sjhb	besttd = NULL;
763165272Skmacy	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
764137277Sjhb		if (besttd == NULL || td->td_priority < besttd->td_priority)
765137277Sjhb			besttd = td;
766137277Sjhb	}
767137277Sjhb	MPASS(besttd != NULL);
768170294Sjeff	thread_lock(besttd);
769181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
770170294Sjeff	thread_unlock(besttd);
771181334Sjhb	return (wakeup_swapper);
772126324Sjhb}
773126324Sjhb
774126324Sjhb/*
775126324Sjhb * Resume all threads sleeping on a specified wait channel.
776126324Sjhb */
777181334Sjhbint
778165272Skmacysleepq_broadcast(void *wchan, int flags, int pri, int queue)
779126324Sjhb{
780126324Sjhb	struct sleepqueue *sq;
781182875Sjhb	struct thread *td, *tdn;
782181334Sjhb	int wakeup_swapper;
783126324Sjhb
784126324Sjhb	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
785126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
786165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
787126324Sjhb	sq = sleepq_lookup(wchan);
788177085Sjeff	if (sq == NULL)
789181334Sjhb		return (0);
790134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
791126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
792129188Sjhb
793145056Sjhb	/* Resume all blocked threads on the sleep queue. */
794181334Sjhb	wakeup_swapper = 0;
795182875Sjhb	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
796170294Sjeff		thread_lock(td);
797181334Sjhb		if (sleepq_resume_thread(sq, td, pri))
798181334Sjhb			wakeup_swapper = 1;
799170294Sjeff		thread_unlock(td);
800170294Sjeff	}
801181334Sjhb	return (wakeup_swapper);
802126324Sjhb}
803126324Sjhb
804126324Sjhb/*
805126324Sjhb * Time sleeping threads out.  When the timeout expires, the thread is
806126324Sjhb * removed from the sleep queue and made runnable if it is still asleep.
807126324Sjhb */
808126324Sjhbstatic void
809126324Sjhbsleepq_timeout(void *arg)
810126324Sjhb{
811170294Sjeff	struct sleepqueue_chain *sc;
812126324Sjhb	struct sleepqueue *sq;
813126324Sjhb	struct thread *td;
814126324Sjhb	void *wchan;
815181334Sjhb	int wakeup_swapper;
816126324Sjhb
817129241Sbde	td = arg;
818181334Sjhb	wakeup_swapper = 0;
819129241Sbde	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
820173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
821126324Sjhb
822126324Sjhb	/*
823126324Sjhb	 * First, see if the thread is asleep and get the wait channel if
824126324Sjhb	 * it is.
825126324Sjhb	 */
826170294Sjeff	thread_lock(td);
827170294Sjeff	if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
828126324Sjhb		wchan = td->td_wchan;
829170294Sjeff		sc = SC_LOOKUP(wchan);
830176078Sjeff		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
831126324Sjhb		sq = sleepq_lookup(wchan);
832170294Sjeff		MPASS(sq != NULL);
833170294Sjeff		td->td_flags |= TDF_TIMEOUT;
834181334Sjhb		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
835170294Sjeff		thread_unlock(td);
836181334Sjhb		if (wakeup_swapper)
837181334Sjhb			kick_proc0();
838170294Sjeff		return;
839126324Sjhb	}
840175654Sjhb
841126324Sjhb	/*
842175654Sjhb	 * If the thread is on the SLEEPQ but isn't sleeping yet, it
843175654Sjhb	 * can either be on another CPU in between sleepq_add() and
844175654Sjhb	 * one of the sleepq_*wait*() routines or it can be in
845175654Sjhb	 * sleepq_catch_signals().
846126324Sjhb	 */
847126324Sjhb	if (TD_ON_SLEEPQ(td)) {
848175664Sjhb		td->td_flags |= TDF_TIMEOUT;
849170294Sjeff		thread_unlock(td);
850126324Sjhb		return;
851170294Sjeff	}
852126324Sjhb
853126324Sjhb	/*
854126324Sjhb	 * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
855126324Sjhb	 * then the other thread has already yielded to us, so clear
856126324Sjhb	 * the flag and resume it.  If TDF_TIMEOUT is not set, then the
857126324Sjhb	 * we know that the other thread is not on a sleep queue, but it
858126324Sjhb	 * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
859126324Sjhb	 * to let it know that the timeout has already run and doesn't
860126324Sjhb	 * need to be canceled.
861126324Sjhb	 */
862126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
863127085Sjhb		MPASS(TD_IS_SLEEPING(td));
864126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
865126324Sjhb		TD_CLR_SLEEPING(td);
866181334Sjhb		wakeup_swapper = setrunnable(td);
867126324Sjhb	} else
868126324Sjhb		td->td_flags |= TDF_TIMOFAIL;
869170294Sjeff	thread_unlock(td);
870181334Sjhb	if (wakeup_swapper)
871181334Sjhb		kick_proc0();
872126324Sjhb}
873126324Sjhb
874126324Sjhb/*
875126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific
876126324Sjhb * wait channel if it is on that queue.
877126324Sjhb */
878126324Sjhbvoid
879126324Sjhbsleepq_remove(struct thread *td, void *wchan)
880126324Sjhb{
881126324Sjhb	struct sleepqueue *sq;
882181334Sjhb	int wakeup_swapper;
883126324Sjhb
884126324Sjhb	/*
885126324Sjhb	 * Look up the sleep queue for this wait channel, then re-check
886126324Sjhb	 * that the thread is asleep on that channel, if it is not, then
887126324Sjhb	 * bail.
888126324Sjhb	 */
889126324Sjhb	MPASS(wchan != NULL);
890136445Sjhb	sleepq_lock(wchan);
891126324Sjhb	sq = sleepq_lookup(wchan);
892170294Sjeff	/*
893170294Sjeff	 * We can not lock the thread here as it may be sleeping on a
894170294Sjeff	 * different sleepq.  However, holding the sleepq lock for this
895170294Sjeff	 * wchan can guarantee that we do not miss a wakeup for this
896170294Sjeff	 * channel.  The asserts below will catch any false positives.
897170294Sjeff	 */
898126324Sjhb	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
899126324Sjhb		sleepq_release(wchan);
900126324Sjhb		return;
901126324Sjhb	}
902170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
903170294Sjeff	thread_lock(td);
904126324Sjhb	MPASS(sq != NULL);
905170294Sjeff	MPASS(td->td_wchan == wchan);
906181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
907170294Sjeff	thread_unlock(td);
908126324Sjhb	sleepq_release(wchan);
909181334Sjhb	if (wakeup_swapper)
910181334Sjhb		kick_proc0();
911126324Sjhb}
912126324Sjhb
913126324Sjhb/*
914129241Sbde * Abort a thread as if an interrupt had occurred.  Only abort
915129241Sbde * interruptible waits (unfortunately it isn't safe to abort others).
916126324Sjhb */
917181334Sjhbint
918155741Sdavidxusleepq_abort(struct thread *td, int intrval)
919126324Sjhb{
920170294Sjeff	struct sleepqueue *sq;
921126324Sjhb	void *wchan;
922126324Sjhb
923170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
924126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
925126324Sjhb	MPASS(td->td_flags & TDF_SINTR);
926155741Sdavidxu	MPASS(intrval == EINTR || intrval == ERESTART);
927126324Sjhb
928126324Sjhb	/*
929126324Sjhb	 * If the TDF_TIMEOUT flag is set, just leave. A
930126324Sjhb	 * timeout is scheduled anyhow.
931126324Sjhb	 */
932126324Sjhb	if (td->td_flags & TDF_TIMEOUT)
933181334Sjhb		return (0);
934126324Sjhb
935129241Sbde	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
936173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
937170294Sjeff	td->td_intrval = intrval;
938170294Sjeff	td->td_flags |= TDF_SLEEPABORT;
939170294Sjeff	/*
940170294Sjeff	 * If the thread has not slept yet it will find the signal in
941170294Sjeff	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
942170294Sjeff	 * we have to do it here.
943170294Sjeff	 */
944170294Sjeff	if (!TD_IS_SLEEPING(td))
945181334Sjhb		return (0);
946126324Sjhb	wchan = td->td_wchan;
947170294Sjeff	MPASS(wchan != NULL);
948170294Sjeff	sq = sleepq_lookup(wchan);
949170294Sjeff	MPASS(sq != NULL);
950170294Sjeff
951170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
952181334Sjhb	return (sleepq_resume_thread(sq, td, 0));
953126324Sjhb}
954154936Sjhb
955177372Sjeff#ifdef SLEEPQUEUE_PROFILING
956177372Sjeff#define	SLEEPQ_PROF_LOCATIONS	1024
957177372Sjeff#define	SLEEPQ_SBUFSIZE		(40 * 512)
958177372Sjeffstruct sleepq_prof {
959177372Sjeff	LIST_ENTRY(sleepq_prof) sp_link;
960177372Sjeff	const char	*sp_wmesg;
961177372Sjeff	long		sp_count;
962177372Sjeff};
963177372Sjeff
964177372SjeffLIST_HEAD(sqphead, sleepq_prof);
965177372Sjeff
966177372Sjeffstruct sqphead sleepq_prof_free;
967177372Sjeffstruct sqphead sleepq_hash[SC_TABLESIZE];
968177372Sjeffstatic struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
969177372Sjeffstatic struct mtx sleepq_prof_lock;
970177372SjeffMTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
971177372Sjeff
972177372Sjeffstatic void
973177372Sjeffsleepq_profile(const char *wmesg)
974177372Sjeff{
975177372Sjeff	struct sleepq_prof *sp;
976177372Sjeff
977177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
978177372Sjeff	if (prof_enabled == 0)
979177372Sjeff		goto unlock;
980177372Sjeff	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
981177372Sjeff		if (sp->sp_wmesg == wmesg)
982177372Sjeff			goto done;
983177372Sjeff	sp = LIST_FIRST(&sleepq_prof_free);
984177372Sjeff	if (sp == NULL)
985177372Sjeff		goto unlock;
986177372Sjeff	sp->sp_wmesg = wmesg;
987177372Sjeff	LIST_REMOVE(sp, sp_link);
988177372Sjeff	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
989177372Sjeffdone:
990177372Sjeff	sp->sp_count++;
991177372Sjeffunlock:
992177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
993177372Sjeff	return;
994177372Sjeff}
995177372Sjeff
996177372Sjeffstatic void
997177372Sjeffsleepq_prof_reset(void)
998177372Sjeff{
999177372Sjeff	struct sleepq_prof *sp;
1000177372Sjeff	int enabled;
1001177372Sjeff	int i;
1002177372Sjeff
1003177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1004177372Sjeff	enabled = prof_enabled;
1005177372Sjeff	prof_enabled = 0;
1006177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++)
1007177372Sjeff		LIST_INIT(&sleepq_hash[i]);
1008177372Sjeff	LIST_INIT(&sleepq_prof_free);
1009177372Sjeff	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
1010177372Sjeff		sp = &sleepq_profent[i];
1011177372Sjeff		sp->sp_wmesg = NULL;
1012177372Sjeff		sp->sp_count = 0;
1013177372Sjeff		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
1014177372Sjeff	}
1015177372Sjeff	prof_enabled = enabled;
1016177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1017177372Sjeff}
1018177372Sjeff
1019177372Sjeffstatic int
1020177372Sjeffenable_sleepq_prof(SYSCTL_HANDLER_ARGS)
1021177372Sjeff{
1022177372Sjeff	int error, v;
1023177372Sjeff
1024177372Sjeff	v = prof_enabled;
1025177372Sjeff	error = sysctl_handle_int(oidp, &v, v, req);
1026177372Sjeff	if (error)
1027177372Sjeff		return (error);
1028177372Sjeff	if (req->newptr == NULL)
1029177372Sjeff		return (error);
1030177372Sjeff	if (v == prof_enabled)
1031177372Sjeff		return (0);
1032177372Sjeff	if (v == 1)
1033177372Sjeff		sleepq_prof_reset();
1034177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1035177372Sjeff	prof_enabled = !!v;
1036177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1037177372Sjeff
1038177372Sjeff	return (0);
1039177372Sjeff}
1040177372Sjeff
1041177372Sjeffstatic int
1042177372Sjeffreset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1043177372Sjeff{
1044177372Sjeff	int error, v;
1045177372Sjeff
1046177372Sjeff	v = 0;
1047177372Sjeff	error = sysctl_handle_int(oidp, &v, 0, req);
1048177372Sjeff	if (error)
1049177372Sjeff		return (error);
1050177372Sjeff	if (req->newptr == NULL)
1051177372Sjeff		return (error);
1052177372Sjeff	if (v == 0)
1053177372Sjeff		return (0);
1054177372Sjeff	sleepq_prof_reset();
1055177372Sjeff
1056177372Sjeff	return (0);
1057177372Sjeff}
1058177372Sjeff
1059177372Sjeffstatic int
1060177372Sjeffdump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1061177372Sjeff{
1062177372Sjeff	static int multiplier = 1;
1063177372Sjeff	struct sleepq_prof *sp;
1064177372Sjeff	struct sbuf *sb;
1065177372Sjeff	int enabled;
1066177372Sjeff	int error;
1067177372Sjeff	int i;
1068177372Sjeff
1069177372Sjeffretry_sbufops:
1070177372Sjeff	sb = sbuf_new(NULL, NULL, SLEEPQ_SBUFSIZE * multiplier, SBUF_FIXEDLEN);
1071177372Sjeff	sbuf_printf(sb, "\nwmesg\tcount\n");
1072177372Sjeff	enabled = prof_enabled;
1073177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1074177372Sjeff	prof_enabled = 0;
1075177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1076177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++) {
1077177372Sjeff		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
1078177372Sjeff			sbuf_printf(sb, "%s\t%ld\n",
1079177372Sjeff			    sp->sp_wmesg, sp->sp_count);
1080177372Sjeff			if (sbuf_overflowed(sb)) {
1081177372Sjeff				sbuf_delete(sb);
1082177372Sjeff				multiplier++;
1083177372Sjeff				goto retry_sbufops;
1084177372Sjeff			}
1085177372Sjeff		}
1086177372Sjeff	}
1087177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1088177372Sjeff	prof_enabled = enabled;
1089177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1090177372Sjeff
1091177372Sjeff	sbuf_finish(sb);
1092177372Sjeff	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
1093177372Sjeff	sbuf_delete(sb);
1094177372Sjeff	return (error);
1095177372Sjeff}
1096177372Sjeff
1097177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
1098177372Sjeff    NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
1099177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
1100177372Sjeff    NULL, 0, reset_sleepq_prof_stats, "I",
1101177372Sjeff    "Reset sleepqueue profiling statistics");
1102177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
1103177372Sjeff    NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
1104177372Sjeff#endif
1105177372Sjeff
1106154936Sjhb#ifdef DDB
1107154936SjhbDB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
1108154936Sjhb{
1109154936Sjhb	struct sleepqueue_chain *sc;
1110154936Sjhb	struct sleepqueue *sq;
1111154944Simp#ifdef INVARIANTS
1112154936Sjhb	struct lock_object *lock;
1113154944Simp#endif
1114154936Sjhb	struct thread *td;
1115154936Sjhb	void *wchan;
1116154936Sjhb	int i;
1117154936Sjhb
1118154936Sjhb	if (!have_addr)
1119154936Sjhb		return;
1120154936Sjhb
1121154936Sjhb	/*
1122154936Sjhb	 * First, see if there is an active sleep queue for the wait channel
1123154936Sjhb	 * indicated by the address.
1124154936Sjhb	 */
1125154936Sjhb	wchan = (void *)addr;
1126154936Sjhb	sc = SC_LOOKUP(wchan);
1127154936Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
1128154936Sjhb		if (sq->sq_wchan == wchan)
1129154936Sjhb			goto found;
1130154936Sjhb
1131154936Sjhb	/*
1132154936Sjhb	 * Second, see if there is an active sleep queue at the address
1133154936Sjhb	 * indicated.
1134154936Sjhb	 */
1135154936Sjhb	for (i = 0; i < SC_TABLESIZE; i++)
1136154936Sjhb		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
1137154936Sjhb			if (sq == (struct sleepqueue *)addr)
1138154936Sjhb				goto found;
1139154936Sjhb		}
1140154936Sjhb
1141154936Sjhb	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
1142154936Sjhb	return;
1143154936Sjhbfound:
1144154936Sjhb	db_printf("Wait channel: %p\n", sq->sq_wchan);
1145154936Sjhb#ifdef INVARIANTS
1146154936Sjhb	db_printf("Queue type: %d\n", sq->sq_type);
1147154936Sjhb	if (sq->sq_lock) {
1148164325Spjd		lock = sq->sq_lock;
1149154936Sjhb		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
1150154936Sjhb		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
1151154936Sjhb	}
1152154936Sjhb#endif
1153154936Sjhb	db_printf("Blocked threads:\n");
1154165272Skmacy	for (i = 0; i < NR_SLEEPQS; i++) {
1155165272Skmacy		db_printf("\nQueue[%d]:\n", i);
1156165272Skmacy		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
1157165272Skmacy			db_printf("\tempty\n");
1158165272Skmacy		else
1159165272Skmacy			TAILQ_FOREACH(td, &sq->sq_blocked[0],
1160165272Skmacy				      td_slpq) {
1161165272Skmacy				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
1162165272Skmacy					  td->td_tid, td->td_proc->p_pid,
1163180930Sjhb					  td->td_name);
1164165272Skmacy			}
1165165272Skmacy	}
1166154936Sjhb}
1167157823Sjhb
1168157823Sjhb/* Alias 'show sleepqueue' to 'show sleepq'. */
1169183054SsamDB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
1170154936Sjhb#endif
1171