subr_sleepqueue.c revision 217916
1139804Simp/*-
2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
3126324Sjhb * All rights reserved.
4126324Sjhb *
5126324Sjhb * Redistribution and use in source and binary forms, with or without
6126324Sjhb * modification, are permitted provided that the following conditions
7126324Sjhb * are met:
8126324Sjhb * 1. Redistributions of source code must retain the above copyright
9126324Sjhb *    notice, this list of conditions and the following disclaimer.
10126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11126324Sjhb *    notice, this list of conditions and the following disclaimer in the
12126324Sjhb *    documentation and/or other materials provided with the distribution.
13126324Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14126324Sjhb *    may be used to endorse or promote products derived from this software
15126324Sjhb *    without specific prior written permission.
16126324Sjhb *
17126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20126324Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27126324Sjhb * SUCH DAMAGE.
28126324Sjhb */
29126324Sjhb
30126324Sjhb/*
31126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on
32126324Sjhb * a wait channel.  Sleep queues different from turnstiles in that wait
33126324Sjhb * channels are not owned by anyone, so there is no priority propagation.
34126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by
35126324Sjhb * signals.  That said, there are several similarities between the turnstile
36126324Sjhb * and sleep queue implementations.  (Note: turnstiles were implemented
37126324Sjhb * first.)  For example, both use a hash table of the same size where each
38126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and
39126324Sjhb * a linked list of queues.  An individual queue is located by using a hash
40126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching
41126324Sjhb * for the queue.  This means that a wait channel object does not need to
42126324Sjhb * embed it's queue head just as locks do not embed their turnstile queue
43126324Sjhb * head.  Threads also carry around a sleep queue that they lend to the
44126324Sjhb * wait channel when blocking.  Just as in turnstiles, the queue includes
45126324Sjhb * a free list of the sleep queues of other threads blocked on the same
46126324Sjhb * wait channel in the case of multiple waiters.
47126324Sjhb *
48126324Sjhb * Some additional functionality provided by sleep queues include the
49126324Sjhb * ability to set a timeout.  The timeout is managed using a per-thread
50126324Sjhb * callout that resumes a thread if it is asleep.  A thread may also
51126324Sjhb * catch signals while it is asleep (aka an interruptible sleep).  The
52126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
53126324Sjhb * sleep queues also provide some extra assertions.  One is not allowed to
54126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
55126324Sjhb * must consistently use the same lock to synchronize with a wait channel,
56126324Sjhb * though this check is currently only a warning for sleep/wakeup due to
57126324Sjhb * pre-existing abuse of that API.  The same lock must also be held when
58126324Sjhb * awakening threads, though that is currently only enforced for condition
59126324Sjhb * variables.
60126324Sjhb */
61126324Sjhb
62126324Sjhb#include <sys/cdefs.h>
63126324Sjhb__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 217916 2011-01-27 00:34:12Z mdf $");
64126324Sjhb
65154936Sjhb#include "opt_sleepqueue_profiling.h"
66154936Sjhb#include "opt_ddb.h"
67170640Sjeff#include "opt_sched.h"
68154936Sjhb
69126324Sjhb#include <sys/param.h>
70126324Sjhb#include <sys/systm.h>
71126324Sjhb#include <sys/lock.h>
72126324Sjhb#include <sys/kernel.h>
73126324Sjhb#include <sys/ktr.h>
74126324Sjhb#include <sys/mutex.h>
75126324Sjhb#include <sys/proc.h>
76177372Sjeff#include <sys/sbuf.h>
77126324Sjhb#include <sys/sched.h>
78126324Sjhb#include <sys/signalvar.h>
79126324Sjhb#include <sys/sleepqueue.h>
80131259Sjhb#include <sys/sysctl.h>
81126324Sjhb
82169666Sjeff#include <vm/uma.h>
83169666Sjeff
84154936Sjhb#ifdef DDB
85154936Sjhb#include <ddb/ddb.h>
86154936Sjhb#endif
87154936Sjhb
88126324Sjhb/*
89126324Sjhb * Constants for the hash table of sleep queue chains.  These constants are
90126324Sjhb * the same ones that 4BSD (and possibly earlier versions of BSD) used.
91126324Sjhb * Basically, we ignore the lower 8 bits of the address since most wait
92126324Sjhb * channel pointers are aligned and only look at the next 7 bits for the
93126324Sjhb * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
94126324Sjhb */
95126324Sjhb#define	SC_TABLESIZE	128			/* Must be power of 2. */
96126324Sjhb#define	SC_MASK		(SC_TABLESIZE - 1)
97126324Sjhb#define	SC_SHIFT	8
98126324Sjhb#define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
99126324Sjhb#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
100165272Skmacy#define NR_SLEEPQS      2
101126324Sjhb/*
102126324Sjhb * There two different lists of sleep queues.  Both lists are connected
103126324Sjhb * via the sq_hash entries.  The first list is the sleep queue chain list
104126324Sjhb * that a sleep queue is on when it is attached to a wait channel.  The
105126324Sjhb * second list is the free list hung off of a sleep queue that is attached
106126324Sjhb * to a wait channel.
107126324Sjhb *
108126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the
109126324Sjhb * list of threads blocked on that wait channel, flags specific to the
110126324Sjhb * wait channel, and the lock used to synchronize with a wait channel.
111126324Sjhb * The flags are used to catch mismatches between the various consumers
112126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables).
113126324Sjhb * The lock pointer is only used when invariants are enabled for various
114126324Sjhb * debugging checks.
115126324Sjhb *
116126324Sjhb * Locking key:
117126324Sjhb *  c - sleep queue chain lock
118126324Sjhb */
119126324Sjhbstruct sleepqueue {
120165272Skmacy	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
121200447Sattilio	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
122126324Sjhb	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
123126324Sjhb	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
124126324Sjhb	void	*sq_wchan;			/* (c) Wait channel. */
125201879Sattilio	int	sq_type;			/* (c) Queue type. */
126136445Sjhb#ifdef INVARIANTS
127164325Spjd	struct lock_object *sq_lock;		/* (c) Associated lock. */
128126324Sjhb#endif
129126324Sjhb};
130126324Sjhb
131126324Sjhbstruct sleepqueue_chain {
132126324Sjhb	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
133126324Sjhb	struct mtx sc_lock;			/* Spin lock for this chain. */
134131259Sjhb#ifdef SLEEPQUEUE_PROFILING
135131259Sjhb	u_int	sc_depth;			/* Length of sc_queues. */
136131259Sjhb	u_int	sc_max_depth;			/* Max length of sc_queues. */
137131259Sjhb#endif
138126324Sjhb};
139126324Sjhb
140131259Sjhb#ifdef SLEEPQUEUE_PROFILING
141131259Sjhbu_int sleepq_max_depth;
142131259SjhbSYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
143131259SjhbSYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
144131259Sjhb    "sleepq chain stats");
145131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
146131259Sjhb    0, "maxmimum depth achieved of a single chain");
147177372Sjeff
148177372Sjeffstatic void	sleepq_profile(const char *wmesg);
149177372Sjeffstatic int	prof_enabled;
150131259Sjhb#endif
151126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
152169666Sjeffstatic uma_zone_t sleepq_zone;
153126324Sjhb
154126324Sjhb/*
155126324Sjhb * Prototypes for non-exported routines.
156126324Sjhb */
157177085Sjeffstatic int	sleepq_catch_signals(void *wchan, int pri);
158165272Skmacystatic int	sleepq_check_signals(void);
159126324Sjhbstatic int	sleepq_check_timeout(void);
160169666Sjeff#ifdef INVARIANTS
161169666Sjeffstatic void	sleepq_dtor(void *mem, int size, void *arg);
162169666Sjeff#endif
163169666Sjeffstatic int	sleepq_init(void *mem, int size, int flags);
164181334Sjhbstatic int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
165169666Sjeff		    int pri);
166177085Sjeffstatic void	sleepq_switch(void *wchan, int pri);
167126324Sjhbstatic void	sleepq_timeout(void *arg);
168126324Sjhb
169126324Sjhb/*
170126324Sjhb * Early initialization of sleep queues that is called from the sleepinit()
171126324Sjhb * SYSINIT.
172126324Sjhb */
173126324Sjhbvoid
174126324Sjhbinit_sleepqueues(void)
175126324Sjhb{
176131259Sjhb#ifdef SLEEPQUEUE_PROFILING
177131259Sjhb	struct sysctl_oid *chain_oid;
178131259Sjhb	char chain_name[10];
179131259Sjhb#endif
180126324Sjhb	int i;
181126324Sjhb
182126324Sjhb	for (i = 0; i < SC_TABLESIZE; i++) {
183126324Sjhb		LIST_INIT(&sleepq_chains[i].sc_queues);
184126324Sjhb		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
185176258Sjhb		    MTX_SPIN | MTX_RECURSE);
186131259Sjhb#ifdef SLEEPQUEUE_PROFILING
187131259Sjhb		snprintf(chain_name, sizeof(chain_name), "%d", i);
188131259Sjhb		chain_oid = SYSCTL_ADD_NODE(NULL,
189131259Sjhb		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
190131259Sjhb		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
191131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
192131259Sjhb		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
193131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
194131259Sjhb		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
195131259Sjhb		    NULL);
196131259Sjhb#endif
197126324Sjhb	}
198169666Sjeff	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
199169666Sjeff#ifdef INVARIANTS
200169666Sjeff	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
201169666Sjeff#else
202169666Sjeff	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
203169666Sjeff#endif
204169666Sjeff
205126324Sjhb	thread0.td_sleepqueue = sleepq_alloc();
206126324Sjhb}
207126324Sjhb
208126324Sjhb/*
209169666Sjeff * Get a sleep queue for a new thread.
210126324Sjhb */
211126324Sjhbstruct sleepqueue *
212126324Sjhbsleepq_alloc(void)
213126324Sjhb{
214126324Sjhb
215169666Sjeff	return (uma_zalloc(sleepq_zone, M_WAITOK));
216126324Sjhb}
217126324Sjhb
218126324Sjhb/*
219126324Sjhb * Free a sleep queue when a thread is destroyed.
220126324Sjhb */
221126324Sjhbvoid
222126324Sjhbsleepq_free(struct sleepqueue *sq)
223126324Sjhb{
224126324Sjhb
225169666Sjeff	uma_zfree(sleepq_zone, sq);
226126324Sjhb}
227126324Sjhb
228126324Sjhb/*
229136445Sjhb * Lock the sleep queue chain associated with the specified wait channel.
230136445Sjhb */
231136445Sjhbvoid
232136445Sjhbsleepq_lock(void *wchan)
233136445Sjhb{
234136445Sjhb	struct sleepqueue_chain *sc;
235136445Sjhb
236136445Sjhb	sc = SC_LOOKUP(wchan);
237136445Sjhb	mtx_lock_spin(&sc->sc_lock);
238136445Sjhb}
239136445Sjhb
240136445Sjhb/*
241126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash
242136445Sjhb * table locking the associated sleep queue chain.  If no queue is found in
243136445Sjhb * the table, NULL is returned.
244126324Sjhb */
245126324Sjhbstruct sleepqueue *
246126324Sjhbsleepq_lookup(void *wchan)
247126324Sjhb{
248126324Sjhb	struct sleepqueue_chain *sc;
249126324Sjhb	struct sleepqueue *sq;
250126324Sjhb
251126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
252126324Sjhb	sc = SC_LOOKUP(wchan);
253136445Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
254126324Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
255126324Sjhb		if (sq->sq_wchan == wchan)
256126324Sjhb			return (sq);
257126324Sjhb	return (NULL);
258126324Sjhb}
259126324Sjhb
260126324Sjhb/*
261126324Sjhb * Unlock the sleep queue chain associated with a given wait channel.
262126324Sjhb */
263126324Sjhbvoid
264126324Sjhbsleepq_release(void *wchan)
265126324Sjhb{
266126324Sjhb	struct sleepqueue_chain *sc;
267126324Sjhb
268126324Sjhb	sc = SC_LOOKUP(wchan);
269126324Sjhb	mtx_unlock_spin(&sc->sc_lock);
270126324Sjhb}
271126324Sjhb
272126324Sjhb/*
273137277Sjhb * Places the current thread on the sleep queue for the specified wait
274126324Sjhb * channel.  If INVARIANTS is enabled, then it associates the passed in
275126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is
276126324Sjhb * woken up.
277126324Sjhb */
278126324Sjhbvoid
279165272Skmacysleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
280165272Skmacy    int queue)
281126324Sjhb{
282126324Sjhb	struct sleepqueue_chain *sc;
283136445Sjhb	struct sleepqueue *sq;
284137277Sjhb	struct thread *td;
285126324Sjhb
286126324Sjhb	td = curthread;
287126324Sjhb	sc = SC_LOOKUP(wchan);
288126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
289126324Sjhb	MPASS(td->td_sleepqueue != NULL);
290126324Sjhb	MPASS(wchan != NULL);
291165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
292126324Sjhb
293150177Sjhb	/* If this thread is not allowed to sleep, die a horrible death. */
294150177Sjhb	KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
295152221Simp	    ("Trying sleep, but thread marked as sleeping prohibited"));
296150177Sjhb
297136445Sjhb	/* Look up the sleep queue associated with the wait channel 'wchan'. */
298136445Sjhb	sq = sleepq_lookup(wchan);
299136445Sjhb
300136445Sjhb	/*
301136445Sjhb	 * If the wait channel does not already have a sleep queue, use
302136445Sjhb	 * this thread's sleep queue.  Otherwise, insert the current thread
303136445Sjhb	 * into the sleep queue already in use by this wait channel.
304136445Sjhb	 */
305126324Sjhb	if (sq == NULL) {
306165272Skmacy#ifdef INVARIANTS
307165292Skmacy		int i;
308165291Sache
309165292Skmacy		sq = td->td_sleepqueue;
310200447Sattilio		for (i = 0; i < NR_SLEEPQS; i++) {
311165292Skmacy			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
312200447Sattilio			    ("thread's sleep queue %d is not empty", i));
313200447Sattilio			KASSERT(sq->sq_blockedcnt[i] == 0,
314200447Sattilio			    ("thread's sleep queue %d count mismatches", i));
315200447Sattilio		}
316165272Skmacy		KASSERT(LIST_EMPTY(&sq->sq_free),
317165272Skmacy		    ("thread's sleep queue has a non-empty free list"));
318165272Skmacy		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
319165292Skmacy		sq->sq_lock = lock;
320165272Skmacy#endif
321131259Sjhb#ifdef SLEEPQUEUE_PROFILING
322131259Sjhb		sc->sc_depth++;
323131259Sjhb		if (sc->sc_depth > sc->sc_max_depth) {
324131259Sjhb			sc->sc_max_depth = sc->sc_depth;
325131259Sjhb			if (sc->sc_max_depth > sleepq_max_depth)
326131259Sjhb				sleepq_max_depth = sc->sc_max_depth;
327131259Sjhb		}
328131259Sjhb#endif
329165292Skmacy		sq = td->td_sleepqueue;
330126324Sjhb		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
331126324Sjhb		sq->sq_wchan = wchan;
332201879Sattilio		sq->sq_type = flags & SLEEPQ_TYPE;
333126324Sjhb	} else {
334126324Sjhb		MPASS(wchan == sq->sq_wchan);
335126488Sjhb		MPASS(lock == sq->sq_lock);
336136445Sjhb		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
337126324Sjhb		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
338126324Sjhb	}
339172155Sattilio	thread_lock(td);
340165272Skmacy	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
341200447Sattilio	sq->sq_blockedcnt[queue]++;
342126324Sjhb	td->td_sleepqueue = NULL;
343165272Skmacy	td->td_sqqueue = queue;
344126324Sjhb	td->td_wchan = wchan;
345126324Sjhb	td->td_wmesg = wmesg;
346155741Sdavidxu	if (flags & SLEEPQ_INTERRUPTIBLE) {
347134013Sjhb		td->td_flags |= TDF_SINTR;
348155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
349195702Skib		if (flags & SLEEPQ_STOP_ON_BDRY)
350195702Skib			td->td_flags |= TDF_SBDRY;
351155741Sdavidxu	}
352172155Sattilio	thread_unlock(td);
353126324Sjhb}
354126324Sjhb
355126324Sjhb/*
356126324Sjhb * Sets a timeout that will remove the current thread from the specified
357126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened.
358126324Sjhb */
359126324Sjhbvoid
360126885Sjhbsleepq_set_timeout(void *wchan, int timo)
361126324Sjhb{
362126324Sjhb	struct sleepqueue_chain *sc;
363126324Sjhb	struct thread *td;
364126324Sjhb
365126324Sjhb	td = curthread;
366126324Sjhb	sc = SC_LOOKUP(wchan);
367126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
368126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
369126324Sjhb	MPASS(td->td_sleepqueue == NULL);
370126324Sjhb	MPASS(wchan != NULL);
371177860Sjeff	callout_reset_curcpu(&td->td_slpcallout, timo, sleepq_timeout, td);
372126324Sjhb}
373126324Sjhb
374126324Sjhb/*
375200447Sattilio * Return the number of actual sleepers for the specified queue.
376200447Sattilio */
377200447Sattiliou_int
378200447Sattiliosleepq_sleepcnt(void *wchan, int queue)
379200447Sattilio{
380200447Sattilio	struct sleepqueue *sq;
381200447Sattilio
382200447Sattilio	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
383200447Sattilio	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
384200447Sattilio	sq = sleepq_lookup(wchan);
385200447Sattilio	if (sq == NULL)
386200447Sattilio		return (0);
387200447Sattilio	return (sq->sq_blockedcnt[queue]);
388200447Sattilio}
389200447Sattilio
390200447Sattilio/*
391126324Sjhb * Marks the pending sleep of the current thread as interruptible and
392126324Sjhb * makes an initial check for pending signals before putting a thread
393170294Sjeff * to sleep. Enters and exits with the thread lock held.  Thread lock
394170294Sjeff * may have transitioned from the sleepq lock to a run lock.
395126324Sjhb */
396155741Sdavidxustatic int
397177085Sjeffsleepq_catch_signals(void *wchan, int pri)
398126324Sjhb{
399126324Sjhb	struct sleepqueue_chain *sc;
400126324Sjhb	struct sleepqueue *sq;
401126324Sjhb	struct thread *td;
402126324Sjhb	struct proc *p;
403155741Sdavidxu	struct sigacts *ps;
404195702Skib	int sig, ret, stop_allowed;
405126324Sjhb
406126324Sjhb	td = curthread;
407155741Sdavidxu	p = curproc;
408126324Sjhb	sc = SC_LOOKUP(wchan);
409126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
410126324Sjhb	MPASS(wchan != NULL);
411211523Sdavidxu	if ((td->td_pflags & TDP_WAKEUP) != 0) {
412211523Sdavidxu		td->td_pflags &= ~TDP_WAKEUP;
413211523Sdavidxu		ret = EINTR;
414211534Sdavidxu		thread_lock(td);
415211523Sdavidxu		goto out;
416211523Sdavidxu	}
417211523Sdavidxu
418177375Sjeff	/*
419177375Sjeff	 * See if there are any pending signals for this thread.  If not
420177375Sjeff	 * we can switch immediately.  Otherwise do the signal processing
421177375Sjeff	 * directly.
422177375Sjeff	 */
423177375Sjeff	thread_lock(td);
424177471Sjeff	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) {
425177375Sjeff		sleepq_switch(wchan, pri);
426177375Sjeff		return (0);
427177375Sjeff	}
428195702Skib	stop_allowed = (td->td_flags & TDF_SBDRY) ? SIG_STOP_NOT_ALLOWED :
429195702Skib	    SIG_STOP_ALLOWED;
430177375Sjeff	thread_unlock(td);
431177375Sjeff	mtx_unlock_spin(&sc->sc_lock);
432129241Sbde	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
433173601Sjulian		(void *)td, (long)p->p_pid, td->td_name);
434126324Sjhb	PROC_LOCK(p);
435155741Sdavidxu	ps = p->p_sigacts;
436155741Sdavidxu	mtx_lock(&ps->ps_mtx);
437195702Skib	sig = cursig(td, stop_allowed);
438155741Sdavidxu	if (sig == 0) {
439155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
440155741Sdavidxu		ret = thread_suspend_check(1);
441155741Sdavidxu		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
442155741Sdavidxu	} else {
443155741Sdavidxu		if (SIGISMEMBER(ps->ps_sigintr, sig))
444155741Sdavidxu			ret = EINTR;
445155741Sdavidxu		else
446155741Sdavidxu			ret = ERESTART;
447155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
448155741Sdavidxu	}
449184667Sdavidxu	/*
450184667Sdavidxu	 * Lock the per-process spinlock prior to dropping the PROC_LOCK
451184667Sdavidxu	 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
452209612Sjhb	 * thread_lock() are currently held in tdsendsignal().
453184667Sdavidxu	 */
454184667Sdavidxu	PROC_SLOCK(p);
455170294Sjeff	mtx_lock_spin(&sc->sc_lock);
456184667Sdavidxu	PROC_UNLOCK(p);
457170294Sjeff	thread_lock(td);
458184667Sdavidxu	PROC_SUNLOCK(p);
459185502Sdavidxu	if (ret == 0) {
460185502Sdavidxu		sleepq_switch(wchan, pri);
461185502Sdavidxu		return (0);
462185502Sdavidxu	}
463211523Sdavidxuout:
464155936Sdavidxu	/*
465155936Sdavidxu	 * There were pending signals and this thread is still
466155936Sdavidxu	 * on the sleep queue, remove it from the sleep queue.
467155936Sdavidxu	 */
468170294Sjeff	if (TD_ON_SLEEPQ(td)) {
469170294Sjeff		sq = sleepq_lookup(wchan);
470181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
471181334Sjhb#ifdef INVARIANTS
472181334Sjhb			/*
473181334Sjhb			 * This thread hasn't gone to sleep yet, so it
474181334Sjhb			 * should not be swapped out.
475181334Sjhb			 */
476181334Sjhb			panic("not waking up swapper");
477181334Sjhb#endif
478181334Sjhb		}
479170294Sjeff	}
480170294Sjeff	mtx_unlock_spin(&sc->sc_lock);
481170294Sjeff	MPASS(td->td_lock != &sc->sc_lock);
482155741Sdavidxu	return (ret);
483126324Sjhb}
484126324Sjhb
485126324Sjhb/*
486170294Sjeff * Switches to another thread if we are still asleep on a sleep queue.
487170294Sjeff * Returns with thread lock.
488126324Sjhb */
489126324Sjhbstatic void
490177085Sjeffsleepq_switch(void *wchan, int pri)
491126324Sjhb{
492126324Sjhb	struct sleepqueue_chain *sc;
493175654Sjhb	struct sleepqueue *sq;
494126324Sjhb	struct thread *td;
495126324Sjhb
496126324Sjhb	td = curthread;
497126324Sjhb	sc = SC_LOOKUP(wchan);
498126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
499170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
500175654Sjhb
501175654Sjhb	/*
502175654Sjhb	 * If we have a sleep queue, then we've already been woken up, so
503175654Sjhb	 * just return.
504175654Sjhb	 */
505126324Sjhb	if (td->td_sleepqueue != NULL) {
506126324Sjhb		mtx_unlock_spin(&sc->sc_lock);
507126324Sjhb		return;
508126324Sjhb	}
509175654Sjhb
510175654Sjhb	/*
511175654Sjhb	 * If TDF_TIMEOUT is set, then our sleep has been timed out
512175654Sjhb	 * already but we are still on the sleep queue, so dequeue the
513175654Sjhb	 * thread and return.
514175654Sjhb	 */
515175654Sjhb	if (td->td_flags & TDF_TIMEOUT) {
516175654Sjhb		MPASS(TD_ON_SLEEPQ(td));
517175654Sjhb		sq = sleepq_lookup(wchan);
518181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
519181334Sjhb#ifdef INVARIANTS
520181334Sjhb			/*
521181334Sjhb			 * This thread hasn't gone to sleep yet, so it
522181334Sjhb			 * should not be swapped out.
523181334Sjhb			 */
524181334Sjhb			panic("not waking up swapper");
525181334Sjhb#endif
526181334Sjhb		}
527175654Sjhb		mtx_unlock_spin(&sc->sc_lock);
528175654Sjhb		return;
529175654Sjhb	}
530177372Sjeff#ifdef SLEEPQUEUE_PROFILING
531177372Sjeff	if (prof_enabled)
532177372Sjeff		sleepq_profile(td->td_wmesg);
533177372Sjeff#endif
534177085Sjeff	MPASS(td->td_sleepqueue == NULL);
535177085Sjeff	sched_sleep(td, pri);
536170294Sjeff	thread_lock_set(td, &sc->sc_lock);
537126324Sjhb	TD_SET_SLEEPING(td);
538178272Sjeff	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
539126324Sjhb	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
540129241Sbde	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
541173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
542126324Sjhb}
543126324Sjhb
544126324Sjhb/*
545126324Sjhb * Check to see if we timed out.
546126324Sjhb */
547126324Sjhbstatic int
548126324Sjhbsleepq_check_timeout(void)
549126324Sjhb{
550126324Sjhb	struct thread *td;
551126324Sjhb
552126324Sjhb	td = curthread;
553170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
554126324Sjhb
555126324Sjhb	/*
556126324Sjhb	 * If TDF_TIMEOUT is set, we timed out.
557126324Sjhb	 */
558126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
559126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
560126324Sjhb		return (EWOULDBLOCK);
561126324Sjhb	}
562126324Sjhb
563126324Sjhb	/*
564126324Sjhb	 * If TDF_TIMOFAIL is set, the timeout ran after we had
565126324Sjhb	 * already been woken up.
566126324Sjhb	 */
567126324Sjhb	if (td->td_flags & TDF_TIMOFAIL)
568126324Sjhb		td->td_flags &= ~TDF_TIMOFAIL;
569126324Sjhb
570126324Sjhb	/*
571126324Sjhb	 * If callout_stop() fails, then the timeout is running on
572126324Sjhb	 * another CPU, so synchronize with it to avoid having it
573126324Sjhb	 * accidentally wake up a subsequent sleep.
574126324Sjhb	 */
575126324Sjhb	else if (callout_stop(&td->td_slpcallout) == 0) {
576126324Sjhb		td->td_flags |= TDF_TIMEOUT;
577126324Sjhb		TD_SET_SLEEPING(td);
578178272Sjeff		mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
579126324Sjhb	}
580126324Sjhb	return (0);
581126324Sjhb}
582126324Sjhb
583126324Sjhb/*
584126324Sjhb * Check to see if we were awoken by a signal.
585126324Sjhb */
586126324Sjhbstatic int
587126324Sjhbsleepq_check_signals(void)
588126324Sjhb{
589126324Sjhb	struct thread *td;
590126324Sjhb
591126324Sjhb	td = curthread;
592170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
593126324Sjhb
594126324Sjhb	/* We are no longer in an interruptible sleep. */
595155741Sdavidxu	if (td->td_flags & TDF_SINTR)
596195702Skib		td->td_flags &= ~(TDF_SINTR | TDF_SBDRY);
597126324Sjhb
598155741Sdavidxu	if (td->td_flags & TDF_SLEEPABORT) {
599155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
600155741Sdavidxu		return (td->td_intrval);
601155741Sdavidxu	}
602155741Sdavidxu
603126324Sjhb	return (0);
604126324Sjhb}
605126324Sjhb
606126324Sjhb/*
607126324Sjhb * Block the current thread until it is awakened from its sleep queue.
608126324Sjhb */
609126324Sjhbvoid
610177085Sjeffsleepq_wait(void *wchan, int pri)
611126324Sjhb{
612170294Sjeff	struct thread *td;
613126324Sjhb
614170294Sjeff	td = curthread;
615170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
616170294Sjeff	thread_lock(td);
617177085Sjeff	sleepq_switch(wchan, pri);
618170294Sjeff	thread_unlock(td);
619126324Sjhb}
620126324Sjhb
621126324Sjhb/*
622126324Sjhb * Block the current thread until it is awakened from its sleep queue
623126324Sjhb * or it is interrupted by a signal.
624126324Sjhb */
625126324Sjhbint
626177085Sjeffsleepq_wait_sig(void *wchan, int pri)
627126324Sjhb{
628155741Sdavidxu	int rcatch;
629126324Sjhb	int rval;
630126324Sjhb
631177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
632126324Sjhb	rval = sleepq_check_signals();
633170294Sjeff	thread_unlock(curthread);
634155741Sdavidxu	if (rcatch)
635155741Sdavidxu		return (rcatch);
636126324Sjhb	return (rval);
637126324Sjhb}
638126324Sjhb
639126324Sjhb/*
640126324Sjhb * Block the current thread until it is awakened from its sleep queue
641126324Sjhb * or it times out while waiting.
642126324Sjhb */
643126324Sjhbint
644177085Sjeffsleepq_timedwait(void *wchan, int pri)
645126324Sjhb{
646170294Sjeff	struct thread *td;
647126324Sjhb	int rval;
648126324Sjhb
649170294Sjeff	td = curthread;
650170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
651170294Sjeff	thread_lock(td);
652177085Sjeff	sleepq_switch(wchan, pri);
653126324Sjhb	rval = sleepq_check_timeout();
654170294Sjeff	thread_unlock(td);
655170294Sjeff
656131249Sjhb	return (rval);
657126324Sjhb}
658126324Sjhb
659126324Sjhb/*
660126324Sjhb * Block the current thread until it is awakened from its sleep queue,
661126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened.
662126324Sjhb */
663126324Sjhbint
664177085Sjeffsleepq_timedwait_sig(void *wchan, int pri)
665126324Sjhb{
666155741Sdavidxu	int rcatch, rvalt, rvals;
667126324Sjhb
668177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
669126324Sjhb	rvalt = sleepq_check_timeout();
670126324Sjhb	rvals = sleepq_check_signals();
671170294Sjeff	thread_unlock(curthread);
672155741Sdavidxu	if (rcatch)
673155741Sdavidxu		return (rcatch);
674155741Sdavidxu	if (rvals)
675126324Sjhb		return (rvals);
676155741Sdavidxu	return (rvalt);
677126324Sjhb}
678126324Sjhb
679126324Sjhb/*
680201879Sattilio * Returns the type of sleepqueue given a waitchannel.
681201879Sattilio */
682201879Sattilioint
683201879Sattiliosleepq_type(void *wchan)
684201879Sattilio{
685201879Sattilio	struct sleepqueue *sq;
686201879Sattilio	int type;
687201879Sattilio
688201879Sattilio	MPASS(wchan != NULL);
689201879Sattilio
690201879Sattilio	sleepq_lock(wchan);
691201879Sattilio	sq = sleepq_lookup(wchan);
692201879Sattilio	if (sq == NULL) {
693201879Sattilio		sleepq_release(wchan);
694201879Sattilio		return (-1);
695201879Sattilio	}
696201879Sattilio	type = sq->sq_type;
697201879Sattilio	sleepq_release(wchan);
698201879Sattilio	return (type);
699201879Sattilio}
700201879Sattilio
701201879Sattilio/*
702145056Sjhb * Removes a thread from a sleep queue and makes it
703145056Sjhb * runnable.
704126324Sjhb */
705181334Sjhbstatic int
706145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
707126324Sjhb{
708126324Sjhb	struct sleepqueue_chain *sc;
709126324Sjhb
710126324Sjhb	MPASS(td != NULL);
711126324Sjhb	MPASS(sq->sq_wchan != NULL);
712126324Sjhb	MPASS(td->td_wchan == sq->sq_wchan);
713165272Skmacy	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
714170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
715126324Sjhb	sc = SC_LOOKUP(sq->sq_wchan);
716126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
717126324Sjhb
718126324Sjhb	/* Remove the thread from the queue. */
719200447Sattilio	sq->sq_blockedcnt[td->td_sqqueue]--;
720165272Skmacy	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
721126324Sjhb
722126324Sjhb	/*
723126324Sjhb	 * Get a sleep queue for this thread.  If this is the last waiter,
724126324Sjhb	 * use the queue itself and take it out of the chain, otherwise,
725126324Sjhb	 * remove a queue from the free list.
726126324Sjhb	 */
727126324Sjhb	if (LIST_EMPTY(&sq->sq_free)) {
728126324Sjhb		td->td_sleepqueue = sq;
729126324Sjhb#ifdef INVARIANTS
730126324Sjhb		sq->sq_wchan = NULL;
731126324Sjhb#endif
732131259Sjhb#ifdef SLEEPQUEUE_PROFILING
733131259Sjhb		sc->sc_depth--;
734131259Sjhb#endif
735126324Sjhb	} else
736126324Sjhb		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
737126324Sjhb	LIST_REMOVE(td->td_sleepqueue, sq_hash);
738126324Sjhb
739129188Sjhb	td->td_wmesg = NULL;
740129188Sjhb	td->td_wchan = NULL;
741195702Skib	td->td_flags &= ~(TDF_SINTR | TDF_SBDRY);
742129188Sjhb
743129241Sbde	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
744173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
745126324Sjhb
746126324Sjhb	/* Adjust priority if requested. */
747177085Sjeff	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
748217410Sjhb	if (pri != 0 && td->td_priority > pri &&
749217410Sjhb	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
750136439Sups		sched_prio(td, pri);
751184653Sjhb
752184653Sjhb	/*
753184653Sjhb	 * Note that thread td might not be sleeping if it is running
754184653Sjhb	 * sleepq_catch_signals() on another CPU or is blocked on its
755184653Sjhb	 * proc lock to check signals.  There's no need to mark the
756184653Sjhb	 * thread runnable in that case.
757184653Sjhb	 */
758184653Sjhb	if (TD_IS_SLEEPING(td)) {
759184653Sjhb		TD_CLR_SLEEPING(td);
760184653Sjhb		return (setrunnable(td));
761184653Sjhb	}
762184653Sjhb	return (0);
763126324Sjhb}
764126324Sjhb
765169666Sjeff#ifdef INVARIANTS
766126324Sjhb/*
767169666Sjeff * UMA zone item deallocator.
768169666Sjeff */
769169666Sjeffstatic void
770169666Sjeffsleepq_dtor(void *mem, int size, void *arg)
771169666Sjeff{
772169666Sjeff	struct sleepqueue *sq;
773169666Sjeff	int i;
774169666Sjeff
775169666Sjeff	sq = mem;
776200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
777169666Sjeff		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
778200447Sattilio		MPASS(sq->sq_blockedcnt[i] == 0);
779200447Sattilio	}
780169666Sjeff}
781169666Sjeff#endif
782169666Sjeff
783169666Sjeff/*
784169666Sjeff * UMA zone item initializer.
785169666Sjeff */
786169666Sjeffstatic int
787169666Sjeffsleepq_init(void *mem, int size, int flags)
788169666Sjeff{
789169666Sjeff	struct sleepqueue *sq;
790169666Sjeff	int i;
791169666Sjeff
792169666Sjeff	bzero(mem, size);
793169666Sjeff	sq = mem;
794200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
795169666Sjeff		TAILQ_INIT(&sq->sq_blocked[i]);
796200447Sattilio		sq->sq_blockedcnt[i] = 0;
797200447Sattilio	}
798169666Sjeff	LIST_INIT(&sq->sq_free);
799169666Sjeff	return (0);
800169666Sjeff}
801169666Sjeff
802169666Sjeff/*
803126324Sjhb * Find the highest priority thread sleeping on a wait channel and resume it.
804126324Sjhb */
805181334Sjhbint
806165272Skmacysleepq_signal(void *wchan, int flags, int pri, int queue)
807126324Sjhb{
808126324Sjhb	struct sleepqueue *sq;
809137277Sjhb	struct thread *td, *besttd;
810181334Sjhb	int wakeup_swapper;
811126324Sjhb
812126324Sjhb	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
813126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
814165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
815126324Sjhb	sq = sleepq_lookup(wchan);
816170294Sjeff	if (sq == NULL)
817181334Sjhb		return (0);
818134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
819126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
820129188Sjhb
821137277Sjhb	/*
822137277Sjhb	 * Find the highest priority thread on the queue.  If there is a
823137277Sjhb	 * tie, use the thread that first appears in the queue as it has
824137277Sjhb	 * been sleeping the longest since threads are always added to
825137277Sjhb	 * the tail of sleep queues.
826137277Sjhb	 */
827137277Sjhb	besttd = NULL;
828165272Skmacy	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
829137277Sjhb		if (besttd == NULL || td->td_priority < besttd->td_priority)
830137277Sjhb			besttd = td;
831137277Sjhb	}
832137277Sjhb	MPASS(besttd != NULL);
833170294Sjeff	thread_lock(besttd);
834181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
835170294Sjeff	thread_unlock(besttd);
836181334Sjhb	return (wakeup_swapper);
837126324Sjhb}
838126324Sjhb
839126324Sjhb/*
840126324Sjhb * Resume all threads sleeping on a specified wait channel.
841126324Sjhb */
842181334Sjhbint
843165272Skmacysleepq_broadcast(void *wchan, int flags, int pri, int queue)
844126324Sjhb{
845126324Sjhb	struct sleepqueue *sq;
846182875Sjhb	struct thread *td, *tdn;
847181334Sjhb	int wakeup_swapper;
848126324Sjhb
849126324Sjhb	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
850126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
851165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
852126324Sjhb	sq = sleepq_lookup(wchan);
853177085Sjeff	if (sq == NULL)
854181334Sjhb		return (0);
855134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
856126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
857129188Sjhb
858145056Sjhb	/* Resume all blocked threads on the sleep queue. */
859181334Sjhb	wakeup_swapper = 0;
860182875Sjhb	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
861170294Sjeff		thread_lock(td);
862181334Sjhb		if (sleepq_resume_thread(sq, td, pri))
863181334Sjhb			wakeup_swapper = 1;
864170294Sjeff		thread_unlock(td);
865170294Sjeff	}
866181334Sjhb	return (wakeup_swapper);
867126324Sjhb}
868126324Sjhb
869126324Sjhb/*
870126324Sjhb * Time sleeping threads out.  When the timeout expires, the thread is
871126324Sjhb * removed from the sleep queue and made runnable if it is still asleep.
872126324Sjhb */
873126324Sjhbstatic void
874126324Sjhbsleepq_timeout(void *arg)
875126324Sjhb{
876170294Sjeff	struct sleepqueue_chain *sc;
877126324Sjhb	struct sleepqueue *sq;
878126324Sjhb	struct thread *td;
879126324Sjhb	void *wchan;
880181334Sjhb	int wakeup_swapper;
881126324Sjhb
882129241Sbde	td = arg;
883181334Sjhb	wakeup_swapper = 0;
884129241Sbde	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
885173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
886126324Sjhb
887126324Sjhb	/*
888126324Sjhb	 * First, see if the thread is asleep and get the wait channel if
889126324Sjhb	 * it is.
890126324Sjhb	 */
891170294Sjeff	thread_lock(td);
892170294Sjeff	if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
893126324Sjhb		wchan = td->td_wchan;
894170294Sjeff		sc = SC_LOOKUP(wchan);
895176078Sjeff		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
896126324Sjhb		sq = sleepq_lookup(wchan);
897170294Sjeff		MPASS(sq != NULL);
898170294Sjeff		td->td_flags |= TDF_TIMEOUT;
899181334Sjhb		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
900170294Sjeff		thread_unlock(td);
901181334Sjhb		if (wakeup_swapper)
902181334Sjhb			kick_proc0();
903170294Sjeff		return;
904126324Sjhb	}
905175654Sjhb
906126324Sjhb	/*
907175654Sjhb	 * If the thread is on the SLEEPQ but isn't sleeping yet, it
908175654Sjhb	 * can either be on another CPU in between sleepq_add() and
909175654Sjhb	 * one of the sleepq_*wait*() routines or it can be in
910175654Sjhb	 * sleepq_catch_signals().
911126324Sjhb	 */
912126324Sjhb	if (TD_ON_SLEEPQ(td)) {
913175664Sjhb		td->td_flags |= TDF_TIMEOUT;
914170294Sjeff		thread_unlock(td);
915126324Sjhb		return;
916170294Sjeff	}
917126324Sjhb
918126324Sjhb	/*
919126324Sjhb	 * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
920126324Sjhb	 * then the other thread has already yielded to us, so clear
921126324Sjhb	 * the flag and resume it.  If TDF_TIMEOUT is not set, then the
922126324Sjhb	 * we know that the other thread is not on a sleep queue, but it
923126324Sjhb	 * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
924126324Sjhb	 * to let it know that the timeout has already run and doesn't
925126324Sjhb	 * need to be canceled.
926126324Sjhb	 */
927126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
928127085Sjhb		MPASS(TD_IS_SLEEPING(td));
929126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
930126324Sjhb		TD_CLR_SLEEPING(td);
931181334Sjhb		wakeup_swapper = setrunnable(td);
932126324Sjhb	} else
933126324Sjhb		td->td_flags |= TDF_TIMOFAIL;
934170294Sjeff	thread_unlock(td);
935181334Sjhb	if (wakeup_swapper)
936181334Sjhb		kick_proc0();
937126324Sjhb}
938126324Sjhb
939126324Sjhb/*
940126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific
941126324Sjhb * wait channel if it is on that queue.
942126324Sjhb */
943126324Sjhbvoid
944126324Sjhbsleepq_remove(struct thread *td, void *wchan)
945126324Sjhb{
946126324Sjhb	struct sleepqueue *sq;
947181334Sjhb	int wakeup_swapper;
948126324Sjhb
949126324Sjhb	/*
950126324Sjhb	 * Look up the sleep queue for this wait channel, then re-check
951126324Sjhb	 * that the thread is asleep on that channel, if it is not, then
952126324Sjhb	 * bail.
953126324Sjhb	 */
954126324Sjhb	MPASS(wchan != NULL);
955136445Sjhb	sleepq_lock(wchan);
956126324Sjhb	sq = sleepq_lookup(wchan);
957170294Sjeff	/*
958170294Sjeff	 * We can not lock the thread here as it may be sleeping on a
959170294Sjeff	 * different sleepq.  However, holding the sleepq lock for this
960170294Sjeff	 * wchan can guarantee that we do not miss a wakeup for this
961170294Sjeff	 * channel.  The asserts below will catch any false positives.
962170294Sjeff	 */
963126324Sjhb	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
964126324Sjhb		sleepq_release(wchan);
965126324Sjhb		return;
966126324Sjhb	}
967170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
968170294Sjeff	thread_lock(td);
969126324Sjhb	MPASS(sq != NULL);
970170294Sjeff	MPASS(td->td_wchan == wchan);
971181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
972170294Sjeff	thread_unlock(td);
973126324Sjhb	sleepq_release(wchan);
974181334Sjhb	if (wakeup_swapper)
975181334Sjhb		kick_proc0();
976126324Sjhb}
977126324Sjhb
978126324Sjhb/*
979129241Sbde * Abort a thread as if an interrupt had occurred.  Only abort
980129241Sbde * interruptible waits (unfortunately it isn't safe to abort others).
981126324Sjhb */
982181334Sjhbint
983155741Sdavidxusleepq_abort(struct thread *td, int intrval)
984126324Sjhb{
985170294Sjeff	struct sleepqueue *sq;
986126324Sjhb	void *wchan;
987126324Sjhb
988170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
989126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
990126324Sjhb	MPASS(td->td_flags & TDF_SINTR);
991155741Sdavidxu	MPASS(intrval == EINTR || intrval == ERESTART);
992126324Sjhb
993126324Sjhb	/*
994126324Sjhb	 * If the TDF_TIMEOUT flag is set, just leave. A
995126324Sjhb	 * timeout is scheduled anyhow.
996126324Sjhb	 */
997126324Sjhb	if (td->td_flags & TDF_TIMEOUT)
998181334Sjhb		return (0);
999126324Sjhb
1000129241Sbde	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
1001173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
1002170294Sjeff	td->td_intrval = intrval;
1003170294Sjeff	td->td_flags |= TDF_SLEEPABORT;
1004170294Sjeff	/*
1005170294Sjeff	 * If the thread has not slept yet it will find the signal in
1006170294Sjeff	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
1007170294Sjeff	 * we have to do it here.
1008170294Sjeff	 */
1009170294Sjeff	if (!TD_IS_SLEEPING(td))
1010181334Sjhb		return (0);
1011126324Sjhb	wchan = td->td_wchan;
1012170294Sjeff	MPASS(wchan != NULL);
1013170294Sjeff	sq = sleepq_lookup(wchan);
1014170294Sjeff	MPASS(sq != NULL);
1015170294Sjeff
1016170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
1017181334Sjhb	return (sleepq_resume_thread(sq, td, 0));
1018126324Sjhb}
1019154936Sjhb
1020177372Sjeff#ifdef SLEEPQUEUE_PROFILING
1021177372Sjeff#define	SLEEPQ_PROF_LOCATIONS	1024
1022212750Smdf#define	SLEEPQ_SBUFSIZE		512
1023177372Sjeffstruct sleepq_prof {
1024177372Sjeff	LIST_ENTRY(sleepq_prof) sp_link;
1025177372Sjeff	const char	*sp_wmesg;
1026177372Sjeff	long		sp_count;
1027177372Sjeff};
1028177372Sjeff
1029177372SjeffLIST_HEAD(sqphead, sleepq_prof);
1030177372Sjeff
1031177372Sjeffstruct sqphead sleepq_prof_free;
1032177372Sjeffstruct sqphead sleepq_hash[SC_TABLESIZE];
1033177372Sjeffstatic struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
1034177372Sjeffstatic struct mtx sleepq_prof_lock;
1035177372SjeffMTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
1036177372Sjeff
1037177372Sjeffstatic void
1038177372Sjeffsleepq_profile(const char *wmesg)
1039177372Sjeff{
1040177372Sjeff	struct sleepq_prof *sp;
1041177372Sjeff
1042177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1043177372Sjeff	if (prof_enabled == 0)
1044177372Sjeff		goto unlock;
1045177372Sjeff	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
1046177372Sjeff		if (sp->sp_wmesg == wmesg)
1047177372Sjeff			goto done;
1048177372Sjeff	sp = LIST_FIRST(&sleepq_prof_free);
1049177372Sjeff	if (sp == NULL)
1050177372Sjeff		goto unlock;
1051177372Sjeff	sp->sp_wmesg = wmesg;
1052177372Sjeff	LIST_REMOVE(sp, sp_link);
1053177372Sjeff	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
1054177372Sjeffdone:
1055177372Sjeff	sp->sp_count++;
1056177372Sjeffunlock:
1057177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1058177372Sjeff	return;
1059177372Sjeff}
1060177372Sjeff
1061177372Sjeffstatic void
1062177372Sjeffsleepq_prof_reset(void)
1063177372Sjeff{
1064177372Sjeff	struct sleepq_prof *sp;
1065177372Sjeff	int enabled;
1066177372Sjeff	int i;
1067177372Sjeff
1068177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1069177372Sjeff	enabled = prof_enabled;
1070177372Sjeff	prof_enabled = 0;
1071177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++)
1072177372Sjeff		LIST_INIT(&sleepq_hash[i]);
1073177372Sjeff	LIST_INIT(&sleepq_prof_free);
1074177372Sjeff	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
1075177372Sjeff		sp = &sleepq_profent[i];
1076177372Sjeff		sp->sp_wmesg = NULL;
1077177372Sjeff		sp->sp_count = 0;
1078177372Sjeff		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
1079177372Sjeff	}
1080177372Sjeff	prof_enabled = enabled;
1081177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1082177372Sjeff}
1083177372Sjeff
1084177372Sjeffstatic int
1085177372Sjeffenable_sleepq_prof(SYSCTL_HANDLER_ARGS)
1086177372Sjeff{
1087177372Sjeff	int error, v;
1088177372Sjeff
1089177372Sjeff	v = prof_enabled;
1090177372Sjeff	error = sysctl_handle_int(oidp, &v, v, req);
1091177372Sjeff	if (error)
1092177372Sjeff		return (error);
1093177372Sjeff	if (req->newptr == NULL)
1094177372Sjeff		return (error);
1095177372Sjeff	if (v == prof_enabled)
1096177372Sjeff		return (0);
1097177372Sjeff	if (v == 1)
1098177372Sjeff		sleepq_prof_reset();
1099177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1100177372Sjeff	prof_enabled = !!v;
1101177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1102177372Sjeff
1103177372Sjeff	return (0);
1104177372Sjeff}
1105177372Sjeff
1106177372Sjeffstatic int
1107177372Sjeffreset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1108177372Sjeff{
1109177372Sjeff	int error, v;
1110177372Sjeff
1111177372Sjeff	v = 0;
1112177372Sjeff	error = sysctl_handle_int(oidp, &v, 0, req);
1113177372Sjeff	if (error)
1114177372Sjeff		return (error);
1115177372Sjeff	if (req->newptr == NULL)
1116177372Sjeff		return (error);
1117177372Sjeff	if (v == 0)
1118177372Sjeff		return (0);
1119177372Sjeff	sleepq_prof_reset();
1120177372Sjeff
1121177372Sjeff	return (0);
1122177372Sjeff}
1123177372Sjeff
1124177372Sjeffstatic int
1125177372Sjeffdump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1126177372Sjeff{
1127177372Sjeff	struct sleepq_prof *sp;
1128177372Sjeff	struct sbuf *sb;
1129177372Sjeff	int enabled;
1130177372Sjeff	int error;
1131177372Sjeff	int i;
1132177372Sjeff
1133217916Smdf	error = sysctl_wire_old_buffer(req, 0);
1134217916Smdf	if (error != 0)
1135217916Smdf		return (error);
1136212750Smdf	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
1137177372Sjeff	sbuf_printf(sb, "\nwmesg\tcount\n");
1138177372Sjeff	enabled = prof_enabled;
1139177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1140177372Sjeff	prof_enabled = 0;
1141177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1142177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++) {
1143177372Sjeff		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
1144177372Sjeff			sbuf_printf(sb, "%s\t%ld\n",
1145177372Sjeff			    sp->sp_wmesg, sp->sp_count);
1146177372Sjeff		}
1147177372Sjeff	}
1148177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1149177372Sjeff	prof_enabled = enabled;
1150177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1151177372Sjeff
1152212750Smdf	error = sbuf_finish(sb);
1153177372Sjeff	sbuf_delete(sb);
1154177372Sjeff	return (error);
1155177372Sjeff}
1156177372Sjeff
1157177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
1158177372Sjeff    NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
1159177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
1160177372Sjeff    NULL, 0, reset_sleepq_prof_stats, "I",
1161177372Sjeff    "Reset sleepqueue profiling statistics");
1162177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
1163177372Sjeff    NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
1164177372Sjeff#endif
1165177372Sjeff
1166154936Sjhb#ifdef DDB
1167154936SjhbDB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
1168154936Sjhb{
1169154936Sjhb	struct sleepqueue_chain *sc;
1170154936Sjhb	struct sleepqueue *sq;
1171154944Simp#ifdef INVARIANTS
1172154936Sjhb	struct lock_object *lock;
1173154944Simp#endif
1174154936Sjhb	struct thread *td;
1175154936Sjhb	void *wchan;
1176154936Sjhb	int i;
1177154936Sjhb
1178154936Sjhb	if (!have_addr)
1179154936Sjhb		return;
1180154936Sjhb
1181154936Sjhb	/*
1182154936Sjhb	 * First, see if there is an active sleep queue for the wait channel
1183154936Sjhb	 * indicated by the address.
1184154936Sjhb	 */
1185154936Sjhb	wchan = (void *)addr;
1186154936Sjhb	sc = SC_LOOKUP(wchan);
1187154936Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
1188154936Sjhb		if (sq->sq_wchan == wchan)
1189154936Sjhb			goto found;
1190154936Sjhb
1191154936Sjhb	/*
1192154936Sjhb	 * Second, see if there is an active sleep queue at the address
1193154936Sjhb	 * indicated.
1194154936Sjhb	 */
1195154936Sjhb	for (i = 0; i < SC_TABLESIZE; i++)
1196154936Sjhb		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
1197154936Sjhb			if (sq == (struct sleepqueue *)addr)
1198154936Sjhb				goto found;
1199154936Sjhb		}
1200154936Sjhb
1201154936Sjhb	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
1202154936Sjhb	return;
1203154936Sjhbfound:
1204154936Sjhb	db_printf("Wait channel: %p\n", sq->sq_wchan);
1205201879Sattilio	db_printf("Queue type: %d\n", sq->sq_type);
1206154936Sjhb#ifdef INVARIANTS
1207154936Sjhb	if (sq->sq_lock) {
1208164325Spjd		lock = sq->sq_lock;
1209154936Sjhb		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
1210154936Sjhb		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
1211154936Sjhb	}
1212154936Sjhb#endif
1213154936Sjhb	db_printf("Blocked threads:\n");
1214165272Skmacy	for (i = 0; i < NR_SLEEPQS; i++) {
1215165272Skmacy		db_printf("\nQueue[%d]:\n", i);
1216165272Skmacy		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
1217165272Skmacy			db_printf("\tempty\n");
1218165272Skmacy		else
1219165272Skmacy			TAILQ_FOREACH(td, &sq->sq_blocked[0],
1220165272Skmacy				      td_slpq) {
1221165272Skmacy				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
1222165272Skmacy					  td->td_tid, td->td_proc->p_pid,
1223180930Sjhb					  td->td_name);
1224165272Skmacy			}
1225200447Sattilio		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
1226165272Skmacy	}
1227154936Sjhb}
1228157823Sjhb
1229157823Sjhb/* Alias 'show sleepqueue' to 'show sleepq'. */
1230183054SsamDB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
1231154936Sjhb#endif
1232