subr_sleepqueue.c revision 247783
1139804Simp/*-
2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
3126324Sjhb * All rights reserved.
4126324Sjhb *
5126324Sjhb * Redistribution and use in source and binary forms, with or without
6126324Sjhb * modification, are permitted provided that the following conditions
7126324Sjhb * are met:
8126324Sjhb * 1. Redistributions of source code must retain the above copyright
9126324Sjhb *    notice, this list of conditions and the following disclaimer.
10126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright
11126324Sjhb *    notice, this list of conditions and the following disclaimer in the
12126324Sjhb *    documentation and/or other materials provided with the distribution.
13126324Sjhb * 3. Neither the name of the author nor the names of any co-contributors
14126324Sjhb *    may be used to endorse or promote products derived from this software
15126324Sjhb *    without specific prior written permission.
16126324Sjhb *
17126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20126324Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27126324Sjhb * SUCH DAMAGE.
28126324Sjhb */
29126324Sjhb
30126324Sjhb/*
31126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on
32126324Sjhb * a wait channel.  Sleep queues different from turnstiles in that wait
33126324Sjhb * channels are not owned by anyone, so there is no priority propagation.
34126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by
35126324Sjhb * signals.  That said, there are several similarities between the turnstile
36126324Sjhb * and sleep queue implementations.  (Note: turnstiles were implemented
37126324Sjhb * first.)  For example, both use a hash table of the same size where each
38126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and
39126324Sjhb * a linked list of queues.  An individual queue is located by using a hash
40126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching
41126324Sjhb * for the queue.  This means that a wait channel object does not need to
42126324Sjhb * embed it's queue head just as locks do not embed their turnstile queue
43126324Sjhb * head.  Threads also carry around a sleep queue that they lend to the
44126324Sjhb * wait channel when blocking.  Just as in turnstiles, the queue includes
45126324Sjhb * a free list of the sleep queues of other threads blocked on the same
46126324Sjhb * wait channel in the case of multiple waiters.
47126324Sjhb *
48126324Sjhb * Some additional functionality provided by sleep queues include the
49126324Sjhb * ability to set a timeout.  The timeout is managed using a per-thread
50126324Sjhb * callout that resumes a thread if it is asleep.  A thread may also
51126324Sjhb * catch signals while it is asleep (aka an interruptible sleep).  The
52126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
53126324Sjhb * sleep queues also provide some extra assertions.  One is not allowed to
54126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
55126324Sjhb * must consistently use the same lock to synchronize with a wait channel,
56126324Sjhb * though this check is currently only a warning for sleep/wakeup due to
57126324Sjhb * pre-existing abuse of that API.  The same lock must also be held when
58126324Sjhb * awakening threads, though that is currently only enforced for condition
59126324Sjhb * variables.
60126324Sjhb */
61126324Sjhb
62126324Sjhb#include <sys/cdefs.h>
63126324Sjhb__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 247783 2013-03-04 11:51:46Z davide $");
64126324Sjhb
65154936Sjhb#include "opt_sleepqueue_profiling.h"
66154936Sjhb#include "opt_ddb.h"
67235459Srstone#include "opt_kdtrace.h"
68170640Sjeff#include "opt_sched.h"
69154936Sjhb
70126324Sjhb#include <sys/param.h>
71126324Sjhb#include <sys/systm.h>
72126324Sjhb#include <sys/lock.h>
73126324Sjhb#include <sys/kernel.h>
74126324Sjhb#include <sys/ktr.h>
75126324Sjhb#include <sys/mutex.h>
76126324Sjhb#include <sys/proc.h>
77177372Sjeff#include <sys/sbuf.h>
78126324Sjhb#include <sys/sched.h>
79235459Srstone#include <sys/sdt.h>
80126324Sjhb#include <sys/signalvar.h>
81126324Sjhb#include <sys/sleepqueue.h>
82131259Sjhb#include <sys/sysctl.h>
83126324Sjhb
84169666Sjeff#include <vm/uma.h>
85169666Sjeff
86154936Sjhb#ifdef DDB
87154936Sjhb#include <ddb/ddb.h>
88154936Sjhb#endif
89154936Sjhb
90126324Sjhb/*
91126324Sjhb * Constants for the hash table of sleep queue chains.  These constants are
92126324Sjhb * the same ones that 4BSD (and possibly earlier versions of BSD) used.
93126324Sjhb * Basically, we ignore the lower 8 bits of the address since most wait
94126324Sjhb * channel pointers are aligned and only look at the next 7 bits for the
95126324Sjhb * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
96126324Sjhb */
97126324Sjhb#define	SC_TABLESIZE	128			/* Must be power of 2. */
98126324Sjhb#define	SC_MASK		(SC_TABLESIZE - 1)
99126324Sjhb#define	SC_SHIFT	8
100126324Sjhb#define	SC_HASH(wc)	(((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
101126324Sjhb#define	SC_LOOKUP(wc)	&sleepq_chains[SC_HASH(wc)]
102165272Skmacy#define NR_SLEEPQS      2
103126324Sjhb/*
104126324Sjhb * There two different lists of sleep queues.  Both lists are connected
105126324Sjhb * via the sq_hash entries.  The first list is the sleep queue chain list
106126324Sjhb * that a sleep queue is on when it is attached to a wait channel.  The
107126324Sjhb * second list is the free list hung off of a sleep queue that is attached
108126324Sjhb * to a wait channel.
109126324Sjhb *
110126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the
111126324Sjhb * list of threads blocked on that wait channel, flags specific to the
112126324Sjhb * wait channel, and the lock used to synchronize with a wait channel.
113126324Sjhb * The flags are used to catch mismatches between the various consumers
114126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables).
115126324Sjhb * The lock pointer is only used when invariants are enabled for various
116126324Sjhb * debugging checks.
117126324Sjhb *
118126324Sjhb * Locking key:
119126324Sjhb *  c - sleep queue chain lock
120126324Sjhb */
121126324Sjhbstruct sleepqueue {
122165272Skmacy	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
123200447Sattilio	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
124126324Sjhb	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
125126324Sjhb	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
126126324Sjhb	void	*sq_wchan;			/* (c) Wait channel. */
127201879Sattilio	int	sq_type;			/* (c) Queue type. */
128136445Sjhb#ifdef INVARIANTS
129164325Spjd	struct lock_object *sq_lock;		/* (c) Associated lock. */
130126324Sjhb#endif
131126324Sjhb};
132126324Sjhb
133126324Sjhbstruct sleepqueue_chain {
134126324Sjhb	LIST_HEAD(, sleepqueue) sc_queues;	/* List of sleep queues. */
135126324Sjhb	struct mtx sc_lock;			/* Spin lock for this chain. */
136131259Sjhb#ifdef SLEEPQUEUE_PROFILING
137131259Sjhb	u_int	sc_depth;			/* Length of sc_queues. */
138131259Sjhb	u_int	sc_max_depth;			/* Max length of sc_queues. */
139131259Sjhb#endif
140126324Sjhb};
141126324Sjhb
142131259Sjhb#ifdef SLEEPQUEUE_PROFILING
143131259Sjhbu_int sleepq_max_depth;
144227309Sedstatic SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
145227309Sedstatic SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
146131259Sjhb    "sleepq chain stats");
147131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
148131259Sjhb    0, "maxmimum depth achieved of a single chain");
149177372Sjeff
150177372Sjeffstatic void	sleepq_profile(const char *wmesg);
151177372Sjeffstatic int	prof_enabled;
152131259Sjhb#endif
153126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
154169666Sjeffstatic uma_zone_t sleepq_zone;
155126324Sjhb
156126324Sjhb/*
157126324Sjhb * Prototypes for non-exported routines.
158126324Sjhb */
159177085Sjeffstatic int	sleepq_catch_signals(void *wchan, int pri);
160165272Skmacystatic int	sleepq_check_signals(void);
161126324Sjhbstatic int	sleepq_check_timeout(void);
162169666Sjeff#ifdef INVARIANTS
163169666Sjeffstatic void	sleepq_dtor(void *mem, int size, void *arg);
164169666Sjeff#endif
165169666Sjeffstatic int	sleepq_init(void *mem, int size, int flags);
166181334Sjhbstatic int	sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
167169666Sjeff		    int pri);
168177085Sjeffstatic void	sleepq_switch(void *wchan, int pri);
169126324Sjhbstatic void	sleepq_timeout(void *arg);
170126324Sjhb
171235459SrstoneSDT_PROBE_DECLARE(sched, , , sleep);
172235459SrstoneSDT_PROBE_DECLARE(sched, , , wakeup);
173235459Srstone
174126324Sjhb/*
175126324Sjhb * Early initialization of sleep queues that is called from the sleepinit()
176126324Sjhb * SYSINIT.
177126324Sjhb */
178126324Sjhbvoid
179126324Sjhbinit_sleepqueues(void)
180126324Sjhb{
181131259Sjhb#ifdef SLEEPQUEUE_PROFILING
182131259Sjhb	struct sysctl_oid *chain_oid;
183131259Sjhb	char chain_name[10];
184131259Sjhb#endif
185126324Sjhb	int i;
186126324Sjhb
187126324Sjhb	for (i = 0; i < SC_TABLESIZE; i++) {
188126324Sjhb		LIST_INIT(&sleepq_chains[i].sc_queues);
189126324Sjhb		mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
190176258Sjhb		    MTX_SPIN | MTX_RECURSE);
191131259Sjhb#ifdef SLEEPQUEUE_PROFILING
192131259Sjhb		snprintf(chain_name, sizeof(chain_name), "%d", i);
193131259Sjhb		chain_oid = SYSCTL_ADD_NODE(NULL,
194131259Sjhb		    SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
195131259Sjhb		    chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
196131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
197131259Sjhb		    "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
198131259Sjhb		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
199131259Sjhb		    "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
200131259Sjhb		    NULL);
201131259Sjhb#endif
202126324Sjhb	}
203169666Sjeff	sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
204169666Sjeff#ifdef INVARIANTS
205169666Sjeff	    NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
206169666Sjeff#else
207169666Sjeff	    NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
208169666Sjeff#endif
209169666Sjeff
210126324Sjhb	thread0.td_sleepqueue = sleepq_alloc();
211126324Sjhb}
212126324Sjhb
213126324Sjhb/*
214169666Sjeff * Get a sleep queue for a new thread.
215126324Sjhb */
216126324Sjhbstruct sleepqueue *
217126324Sjhbsleepq_alloc(void)
218126324Sjhb{
219126324Sjhb
220169666Sjeff	return (uma_zalloc(sleepq_zone, M_WAITOK));
221126324Sjhb}
222126324Sjhb
223126324Sjhb/*
224126324Sjhb * Free a sleep queue when a thread is destroyed.
225126324Sjhb */
226126324Sjhbvoid
227126324Sjhbsleepq_free(struct sleepqueue *sq)
228126324Sjhb{
229126324Sjhb
230169666Sjeff	uma_zfree(sleepq_zone, sq);
231126324Sjhb}
232126324Sjhb
233126324Sjhb/*
234136445Sjhb * Lock the sleep queue chain associated with the specified wait channel.
235136445Sjhb */
236136445Sjhbvoid
237136445Sjhbsleepq_lock(void *wchan)
238136445Sjhb{
239136445Sjhb	struct sleepqueue_chain *sc;
240136445Sjhb
241136445Sjhb	sc = SC_LOOKUP(wchan);
242136445Sjhb	mtx_lock_spin(&sc->sc_lock);
243136445Sjhb}
244136445Sjhb
245136445Sjhb/*
246126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash
247136445Sjhb * table locking the associated sleep queue chain.  If no queue is found in
248136445Sjhb * the table, NULL is returned.
249126324Sjhb */
250126324Sjhbstruct sleepqueue *
251126324Sjhbsleepq_lookup(void *wchan)
252126324Sjhb{
253126324Sjhb	struct sleepqueue_chain *sc;
254126324Sjhb	struct sleepqueue *sq;
255126324Sjhb
256126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
257126324Sjhb	sc = SC_LOOKUP(wchan);
258136445Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
259126324Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
260126324Sjhb		if (sq->sq_wchan == wchan)
261126324Sjhb			return (sq);
262126324Sjhb	return (NULL);
263126324Sjhb}
264126324Sjhb
265126324Sjhb/*
266126324Sjhb * Unlock the sleep queue chain associated with a given wait channel.
267126324Sjhb */
268126324Sjhbvoid
269126324Sjhbsleepq_release(void *wchan)
270126324Sjhb{
271126324Sjhb	struct sleepqueue_chain *sc;
272126324Sjhb
273126324Sjhb	sc = SC_LOOKUP(wchan);
274126324Sjhb	mtx_unlock_spin(&sc->sc_lock);
275126324Sjhb}
276126324Sjhb
277126324Sjhb/*
278137277Sjhb * Places the current thread on the sleep queue for the specified wait
279126324Sjhb * channel.  If INVARIANTS is enabled, then it associates the passed in
280126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is
281126324Sjhb * woken up.
282126324Sjhb */
283126324Sjhbvoid
284165272Skmacysleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
285165272Skmacy    int queue)
286126324Sjhb{
287126324Sjhb	struct sleepqueue_chain *sc;
288136445Sjhb	struct sleepqueue *sq;
289137277Sjhb	struct thread *td;
290126324Sjhb
291126324Sjhb	td = curthread;
292126324Sjhb	sc = SC_LOOKUP(wchan);
293126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
294126324Sjhb	MPASS(td->td_sleepqueue != NULL);
295126324Sjhb	MPASS(wchan != NULL);
296165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
297126324Sjhb
298150177Sjhb	/* If this thread is not allowed to sleep, die a horrible death. */
299247588Sjhb	KASSERT(td->td_no_sleeping == 0,
300247588Sjhb	    ("%s: td %p to sleep on wchan %p with sleeping prohibited",
301240423Sattilio	    __func__, td, wchan));
302150177Sjhb
303136445Sjhb	/* Look up the sleep queue associated with the wait channel 'wchan'. */
304136445Sjhb	sq = sleepq_lookup(wchan);
305136445Sjhb
306136445Sjhb	/*
307136445Sjhb	 * If the wait channel does not already have a sleep queue, use
308136445Sjhb	 * this thread's sleep queue.  Otherwise, insert the current thread
309136445Sjhb	 * into the sleep queue already in use by this wait channel.
310136445Sjhb	 */
311126324Sjhb	if (sq == NULL) {
312165272Skmacy#ifdef INVARIANTS
313165292Skmacy		int i;
314165291Sache
315165292Skmacy		sq = td->td_sleepqueue;
316200447Sattilio		for (i = 0; i < NR_SLEEPQS; i++) {
317165292Skmacy			KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
318200447Sattilio			    ("thread's sleep queue %d is not empty", i));
319200447Sattilio			KASSERT(sq->sq_blockedcnt[i] == 0,
320200447Sattilio			    ("thread's sleep queue %d count mismatches", i));
321200447Sattilio		}
322165272Skmacy		KASSERT(LIST_EMPTY(&sq->sq_free),
323165272Skmacy		    ("thread's sleep queue has a non-empty free list"));
324165272Skmacy		KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
325165292Skmacy		sq->sq_lock = lock;
326165272Skmacy#endif
327131259Sjhb#ifdef SLEEPQUEUE_PROFILING
328131259Sjhb		sc->sc_depth++;
329131259Sjhb		if (sc->sc_depth > sc->sc_max_depth) {
330131259Sjhb			sc->sc_max_depth = sc->sc_depth;
331131259Sjhb			if (sc->sc_max_depth > sleepq_max_depth)
332131259Sjhb				sleepq_max_depth = sc->sc_max_depth;
333131259Sjhb		}
334131259Sjhb#endif
335165292Skmacy		sq = td->td_sleepqueue;
336126324Sjhb		LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
337126324Sjhb		sq->sq_wchan = wchan;
338201879Sattilio		sq->sq_type = flags & SLEEPQ_TYPE;
339126324Sjhb	} else {
340126324Sjhb		MPASS(wchan == sq->sq_wchan);
341126488Sjhb		MPASS(lock == sq->sq_lock);
342136445Sjhb		MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
343126324Sjhb		LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
344126324Sjhb	}
345172155Sattilio	thread_lock(td);
346165272Skmacy	TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
347200447Sattilio	sq->sq_blockedcnt[queue]++;
348126324Sjhb	td->td_sleepqueue = NULL;
349165272Skmacy	td->td_sqqueue = queue;
350126324Sjhb	td->td_wchan = wchan;
351126324Sjhb	td->td_wmesg = wmesg;
352155741Sdavidxu	if (flags & SLEEPQ_INTERRUPTIBLE) {
353134013Sjhb		td->td_flags |= TDF_SINTR;
354155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
355155741Sdavidxu	}
356172155Sattilio	thread_unlock(td);
357126324Sjhb}
358126324Sjhb
359126324Sjhb/*
360126324Sjhb * Sets a timeout that will remove the current thread from the specified
361126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened.
362126324Sjhb */
363126324Sjhbvoid
364247783Sdavidesleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
365247783Sdavide    int flags)
366126324Sjhb{
367126324Sjhb	struct sleepqueue_chain *sc;
368126324Sjhb	struct thread *td;
369126324Sjhb
370126324Sjhb	td = curthread;
371126324Sjhb	sc = SC_LOOKUP(wchan);
372126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
373126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
374126324Sjhb	MPASS(td->td_sleepqueue == NULL);
375126324Sjhb	MPASS(wchan != NULL);
376247783Sdavide	callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
377247783Sdavide	    sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
378126324Sjhb}
379126324Sjhb
380126324Sjhb/*
381200447Sattilio * Return the number of actual sleepers for the specified queue.
382200447Sattilio */
383200447Sattiliou_int
384200447Sattiliosleepq_sleepcnt(void *wchan, int queue)
385200447Sattilio{
386200447Sattilio	struct sleepqueue *sq;
387200447Sattilio
388200447Sattilio	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
389200447Sattilio	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
390200447Sattilio	sq = sleepq_lookup(wchan);
391200447Sattilio	if (sq == NULL)
392200447Sattilio		return (0);
393200447Sattilio	return (sq->sq_blockedcnt[queue]);
394200447Sattilio}
395200447Sattilio
396200447Sattilio/*
397126324Sjhb * Marks the pending sleep of the current thread as interruptible and
398126324Sjhb * makes an initial check for pending signals before putting a thread
399170294Sjeff * to sleep. Enters and exits with the thread lock held.  Thread lock
400170294Sjeff * may have transitioned from the sleepq lock to a run lock.
401126324Sjhb */
402155741Sdavidxustatic int
403177085Sjeffsleepq_catch_signals(void *wchan, int pri)
404126324Sjhb{
405126324Sjhb	struct sleepqueue_chain *sc;
406126324Sjhb	struct sleepqueue *sq;
407126324Sjhb	struct thread *td;
408126324Sjhb	struct proc *p;
409155741Sdavidxu	struct sigacts *ps;
410195702Skib	int sig, ret, stop_allowed;
411126324Sjhb
412126324Sjhb	td = curthread;
413155741Sdavidxu	p = curproc;
414126324Sjhb	sc = SC_LOOKUP(wchan);
415126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
416126324Sjhb	MPASS(wchan != NULL);
417211523Sdavidxu	if ((td->td_pflags & TDP_WAKEUP) != 0) {
418211523Sdavidxu		td->td_pflags &= ~TDP_WAKEUP;
419211523Sdavidxu		ret = EINTR;
420211534Sdavidxu		thread_lock(td);
421211523Sdavidxu		goto out;
422211523Sdavidxu	}
423211523Sdavidxu
424177375Sjeff	/*
425177375Sjeff	 * See if there are any pending signals for this thread.  If not
426177375Sjeff	 * we can switch immediately.  Otherwise do the signal processing
427177375Sjeff	 * directly.
428177375Sjeff	 */
429177375Sjeff	thread_lock(td);
430177471Sjeff	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) {
431177375Sjeff		sleepq_switch(wchan, pri);
432177375Sjeff		return (0);
433177375Sjeff	}
434195702Skib	stop_allowed = (td->td_flags & TDF_SBDRY) ? SIG_STOP_NOT_ALLOWED :
435195702Skib	    SIG_STOP_ALLOWED;
436177375Sjeff	thread_unlock(td);
437177375Sjeff	mtx_unlock_spin(&sc->sc_lock);
438129241Sbde	CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
439173601Sjulian		(void *)td, (long)p->p_pid, td->td_name);
440126324Sjhb	PROC_LOCK(p);
441155741Sdavidxu	ps = p->p_sigacts;
442155741Sdavidxu	mtx_lock(&ps->ps_mtx);
443195702Skib	sig = cursig(td, stop_allowed);
444155741Sdavidxu	if (sig == 0) {
445155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
446155741Sdavidxu		ret = thread_suspend_check(1);
447155741Sdavidxu		MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
448155741Sdavidxu	} else {
449155741Sdavidxu		if (SIGISMEMBER(ps->ps_sigintr, sig))
450155741Sdavidxu			ret = EINTR;
451155741Sdavidxu		else
452155741Sdavidxu			ret = ERESTART;
453155741Sdavidxu		mtx_unlock(&ps->ps_mtx);
454155741Sdavidxu	}
455184667Sdavidxu	/*
456184667Sdavidxu	 * Lock the per-process spinlock prior to dropping the PROC_LOCK
457184667Sdavidxu	 * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
458209612Sjhb	 * thread_lock() are currently held in tdsendsignal().
459184667Sdavidxu	 */
460184667Sdavidxu	PROC_SLOCK(p);
461170294Sjeff	mtx_lock_spin(&sc->sc_lock);
462184667Sdavidxu	PROC_UNLOCK(p);
463170294Sjeff	thread_lock(td);
464184667Sdavidxu	PROC_SUNLOCK(p);
465185502Sdavidxu	if (ret == 0) {
466185502Sdavidxu		sleepq_switch(wchan, pri);
467185502Sdavidxu		return (0);
468185502Sdavidxu	}
469211523Sdavidxuout:
470155936Sdavidxu	/*
471155936Sdavidxu	 * There were pending signals and this thread is still
472155936Sdavidxu	 * on the sleep queue, remove it from the sleep queue.
473155936Sdavidxu	 */
474170294Sjeff	if (TD_ON_SLEEPQ(td)) {
475170294Sjeff		sq = sleepq_lookup(wchan);
476181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
477181334Sjhb#ifdef INVARIANTS
478181334Sjhb			/*
479181334Sjhb			 * This thread hasn't gone to sleep yet, so it
480181334Sjhb			 * should not be swapped out.
481181334Sjhb			 */
482181334Sjhb			panic("not waking up swapper");
483181334Sjhb#endif
484181334Sjhb		}
485170294Sjeff	}
486170294Sjeff	mtx_unlock_spin(&sc->sc_lock);
487170294Sjeff	MPASS(td->td_lock != &sc->sc_lock);
488155741Sdavidxu	return (ret);
489126324Sjhb}
490126324Sjhb
491126324Sjhb/*
492170294Sjeff * Switches to another thread if we are still asleep on a sleep queue.
493170294Sjeff * Returns with thread lock.
494126324Sjhb */
495126324Sjhbstatic void
496177085Sjeffsleepq_switch(void *wchan, int pri)
497126324Sjhb{
498126324Sjhb	struct sleepqueue_chain *sc;
499175654Sjhb	struct sleepqueue *sq;
500126324Sjhb	struct thread *td;
501126324Sjhb
502126324Sjhb	td = curthread;
503126324Sjhb	sc = SC_LOOKUP(wchan);
504126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
505170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
506175654Sjhb
507175654Sjhb	/*
508175654Sjhb	 * If we have a sleep queue, then we've already been woken up, so
509175654Sjhb	 * just return.
510175654Sjhb	 */
511126324Sjhb	if (td->td_sleepqueue != NULL) {
512126324Sjhb		mtx_unlock_spin(&sc->sc_lock);
513126324Sjhb		return;
514126324Sjhb	}
515175654Sjhb
516175654Sjhb	/*
517175654Sjhb	 * If TDF_TIMEOUT is set, then our sleep has been timed out
518175654Sjhb	 * already but we are still on the sleep queue, so dequeue the
519175654Sjhb	 * thread and return.
520175654Sjhb	 */
521175654Sjhb	if (td->td_flags & TDF_TIMEOUT) {
522175654Sjhb		MPASS(TD_ON_SLEEPQ(td));
523175654Sjhb		sq = sleepq_lookup(wchan);
524181334Sjhb		if (sleepq_resume_thread(sq, td, 0)) {
525181334Sjhb#ifdef INVARIANTS
526181334Sjhb			/*
527181334Sjhb			 * This thread hasn't gone to sleep yet, so it
528181334Sjhb			 * should not be swapped out.
529181334Sjhb			 */
530181334Sjhb			panic("not waking up swapper");
531181334Sjhb#endif
532181334Sjhb		}
533175654Sjhb		mtx_unlock_spin(&sc->sc_lock);
534175654Sjhb		return;
535175654Sjhb	}
536177372Sjeff#ifdef SLEEPQUEUE_PROFILING
537177372Sjeff	if (prof_enabled)
538177372Sjeff		sleepq_profile(td->td_wmesg);
539177372Sjeff#endif
540177085Sjeff	MPASS(td->td_sleepqueue == NULL);
541177085Sjeff	sched_sleep(td, pri);
542170294Sjeff	thread_lock_set(td, &sc->sc_lock);
543235459Srstone	SDT_PROBE0(sched, , , sleep);
544126324Sjhb	TD_SET_SLEEPING(td);
545178272Sjeff	mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
546126324Sjhb	KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
547129241Sbde	CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
548173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
549126324Sjhb}
550126324Sjhb
551126324Sjhb/*
552126324Sjhb * Check to see if we timed out.
553126324Sjhb */
554126324Sjhbstatic int
555126324Sjhbsleepq_check_timeout(void)
556126324Sjhb{
557126324Sjhb	struct thread *td;
558126324Sjhb
559126324Sjhb	td = curthread;
560170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
561126324Sjhb
562126324Sjhb	/*
563126324Sjhb	 * If TDF_TIMEOUT is set, we timed out.
564126324Sjhb	 */
565126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
566126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
567126324Sjhb		return (EWOULDBLOCK);
568126324Sjhb	}
569126324Sjhb
570126324Sjhb	/*
571126324Sjhb	 * If TDF_TIMOFAIL is set, the timeout ran after we had
572126324Sjhb	 * already been woken up.
573126324Sjhb	 */
574126324Sjhb	if (td->td_flags & TDF_TIMOFAIL)
575126324Sjhb		td->td_flags &= ~TDF_TIMOFAIL;
576126324Sjhb
577126324Sjhb	/*
578126324Sjhb	 * If callout_stop() fails, then the timeout is running on
579126324Sjhb	 * another CPU, so synchronize with it to avoid having it
580126324Sjhb	 * accidentally wake up a subsequent sleep.
581126324Sjhb	 */
582126324Sjhb	else if (callout_stop(&td->td_slpcallout) == 0) {
583126324Sjhb		td->td_flags |= TDF_TIMEOUT;
584126324Sjhb		TD_SET_SLEEPING(td);
585178272Sjeff		mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
586126324Sjhb	}
587126324Sjhb	return (0);
588126324Sjhb}
589126324Sjhb
590126324Sjhb/*
591126324Sjhb * Check to see if we were awoken by a signal.
592126324Sjhb */
593126324Sjhbstatic int
594126324Sjhbsleepq_check_signals(void)
595126324Sjhb{
596126324Sjhb	struct thread *td;
597126324Sjhb
598126324Sjhb	td = curthread;
599170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
600126324Sjhb
601126324Sjhb	/* We are no longer in an interruptible sleep. */
602155741Sdavidxu	if (td->td_flags & TDF_SINTR)
603246417Sjhb		td->td_flags &= ~TDF_SINTR;
604126324Sjhb
605155741Sdavidxu	if (td->td_flags & TDF_SLEEPABORT) {
606155741Sdavidxu		td->td_flags &= ~TDF_SLEEPABORT;
607155741Sdavidxu		return (td->td_intrval);
608155741Sdavidxu	}
609155741Sdavidxu
610126324Sjhb	return (0);
611126324Sjhb}
612126324Sjhb
613126324Sjhb/*
614126324Sjhb * Block the current thread until it is awakened from its sleep queue.
615126324Sjhb */
616126324Sjhbvoid
617177085Sjeffsleepq_wait(void *wchan, int pri)
618126324Sjhb{
619170294Sjeff	struct thread *td;
620126324Sjhb
621170294Sjeff	td = curthread;
622170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
623170294Sjeff	thread_lock(td);
624177085Sjeff	sleepq_switch(wchan, pri);
625170294Sjeff	thread_unlock(td);
626126324Sjhb}
627126324Sjhb
628126324Sjhb/*
629126324Sjhb * Block the current thread until it is awakened from its sleep queue
630126324Sjhb * or it is interrupted by a signal.
631126324Sjhb */
632126324Sjhbint
633177085Sjeffsleepq_wait_sig(void *wchan, int pri)
634126324Sjhb{
635155741Sdavidxu	int rcatch;
636126324Sjhb	int rval;
637126324Sjhb
638177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
639126324Sjhb	rval = sleepq_check_signals();
640170294Sjeff	thread_unlock(curthread);
641155741Sdavidxu	if (rcatch)
642155741Sdavidxu		return (rcatch);
643126324Sjhb	return (rval);
644126324Sjhb}
645126324Sjhb
646126324Sjhb/*
647126324Sjhb * Block the current thread until it is awakened from its sleep queue
648126324Sjhb * or it times out while waiting.
649126324Sjhb */
650126324Sjhbint
651177085Sjeffsleepq_timedwait(void *wchan, int pri)
652126324Sjhb{
653170294Sjeff	struct thread *td;
654126324Sjhb	int rval;
655126324Sjhb
656170294Sjeff	td = curthread;
657170294Sjeff	MPASS(!(td->td_flags & TDF_SINTR));
658170294Sjeff	thread_lock(td);
659177085Sjeff	sleepq_switch(wchan, pri);
660126324Sjhb	rval = sleepq_check_timeout();
661170294Sjeff	thread_unlock(td);
662170294Sjeff
663131249Sjhb	return (rval);
664126324Sjhb}
665126324Sjhb
666126324Sjhb/*
667126324Sjhb * Block the current thread until it is awakened from its sleep queue,
668126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened.
669126324Sjhb */
670126324Sjhbint
671177085Sjeffsleepq_timedwait_sig(void *wchan, int pri)
672126324Sjhb{
673155741Sdavidxu	int rcatch, rvalt, rvals;
674126324Sjhb
675177085Sjeff	rcatch = sleepq_catch_signals(wchan, pri);
676126324Sjhb	rvalt = sleepq_check_timeout();
677126324Sjhb	rvals = sleepq_check_signals();
678170294Sjeff	thread_unlock(curthread);
679155741Sdavidxu	if (rcatch)
680155741Sdavidxu		return (rcatch);
681155741Sdavidxu	if (rvals)
682126324Sjhb		return (rvals);
683155741Sdavidxu	return (rvalt);
684126324Sjhb}
685126324Sjhb
686126324Sjhb/*
687201879Sattilio * Returns the type of sleepqueue given a waitchannel.
688201879Sattilio */
689201879Sattilioint
690201879Sattiliosleepq_type(void *wchan)
691201879Sattilio{
692201879Sattilio	struct sleepqueue *sq;
693201879Sattilio	int type;
694201879Sattilio
695201879Sattilio	MPASS(wchan != NULL);
696201879Sattilio
697201879Sattilio	sleepq_lock(wchan);
698201879Sattilio	sq = sleepq_lookup(wchan);
699201879Sattilio	if (sq == NULL) {
700201879Sattilio		sleepq_release(wchan);
701201879Sattilio		return (-1);
702201879Sattilio	}
703201879Sattilio	type = sq->sq_type;
704201879Sattilio	sleepq_release(wchan);
705201879Sattilio	return (type);
706201879Sattilio}
707201879Sattilio
708201879Sattilio/*
709145056Sjhb * Removes a thread from a sleep queue and makes it
710145056Sjhb * runnable.
711126324Sjhb */
712181334Sjhbstatic int
713145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
714126324Sjhb{
715126324Sjhb	struct sleepqueue_chain *sc;
716126324Sjhb
717126324Sjhb	MPASS(td != NULL);
718126324Sjhb	MPASS(sq->sq_wchan != NULL);
719126324Sjhb	MPASS(td->td_wchan == sq->sq_wchan);
720165272Skmacy	MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
721170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
722126324Sjhb	sc = SC_LOOKUP(sq->sq_wchan);
723126324Sjhb	mtx_assert(&sc->sc_lock, MA_OWNED);
724126324Sjhb
725235459Srstone	SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
726235459Srstone
727126324Sjhb	/* Remove the thread from the queue. */
728200447Sattilio	sq->sq_blockedcnt[td->td_sqqueue]--;
729165272Skmacy	TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
730126324Sjhb
731126324Sjhb	/*
732126324Sjhb	 * Get a sleep queue for this thread.  If this is the last waiter,
733126324Sjhb	 * use the queue itself and take it out of the chain, otherwise,
734126324Sjhb	 * remove a queue from the free list.
735126324Sjhb	 */
736126324Sjhb	if (LIST_EMPTY(&sq->sq_free)) {
737126324Sjhb		td->td_sleepqueue = sq;
738126324Sjhb#ifdef INVARIANTS
739126324Sjhb		sq->sq_wchan = NULL;
740126324Sjhb#endif
741131259Sjhb#ifdef SLEEPQUEUE_PROFILING
742131259Sjhb		sc->sc_depth--;
743131259Sjhb#endif
744126324Sjhb	} else
745126324Sjhb		td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
746126324Sjhb	LIST_REMOVE(td->td_sleepqueue, sq_hash);
747126324Sjhb
748129188Sjhb	td->td_wmesg = NULL;
749129188Sjhb	td->td_wchan = NULL;
750246417Sjhb	td->td_flags &= ~TDF_SINTR;
751129188Sjhb
752129241Sbde	CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
753173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, td->td_name);
754126324Sjhb
755126324Sjhb	/* Adjust priority if requested. */
756177085Sjeff	MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
757217410Sjhb	if (pri != 0 && td->td_priority > pri &&
758217410Sjhb	    PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
759136439Sups		sched_prio(td, pri);
760184653Sjhb
761184653Sjhb	/*
762184653Sjhb	 * Note that thread td might not be sleeping if it is running
763184653Sjhb	 * sleepq_catch_signals() on another CPU or is blocked on its
764184653Sjhb	 * proc lock to check signals.  There's no need to mark the
765184653Sjhb	 * thread runnable in that case.
766184653Sjhb	 */
767184653Sjhb	if (TD_IS_SLEEPING(td)) {
768184653Sjhb		TD_CLR_SLEEPING(td);
769184653Sjhb		return (setrunnable(td));
770184653Sjhb	}
771184653Sjhb	return (0);
772126324Sjhb}
773126324Sjhb
774169666Sjeff#ifdef INVARIANTS
775126324Sjhb/*
776169666Sjeff * UMA zone item deallocator.
777169666Sjeff */
778169666Sjeffstatic void
779169666Sjeffsleepq_dtor(void *mem, int size, void *arg)
780169666Sjeff{
781169666Sjeff	struct sleepqueue *sq;
782169666Sjeff	int i;
783169666Sjeff
784169666Sjeff	sq = mem;
785200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
786169666Sjeff		MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
787200447Sattilio		MPASS(sq->sq_blockedcnt[i] == 0);
788200447Sattilio	}
789169666Sjeff}
790169666Sjeff#endif
791169666Sjeff
792169666Sjeff/*
793169666Sjeff * UMA zone item initializer.
794169666Sjeff */
795169666Sjeffstatic int
796169666Sjeffsleepq_init(void *mem, int size, int flags)
797169666Sjeff{
798169666Sjeff	struct sleepqueue *sq;
799169666Sjeff	int i;
800169666Sjeff
801169666Sjeff	bzero(mem, size);
802169666Sjeff	sq = mem;
803200447Sattilio	for (i = 0; i < NR_SLEEPQS; i++) {
804169666Sjeff		TAILQ_INIT(&sq->sq_blocked[i]);
805200447Sattilio		sq->sq_blockedcnt[i] = 0;
806200447Sattilio	}
807169666Sjeff	LIST_INIT(&sq->sq_free);
808169666Sjeff	return (0);
809169666Sjeff}
810169666Sjeff
811169666Sjeff/*
812126324Sjhb * Find the highest priority thread sleeping on a wait channel and resume it.
813126324Sjhb */
814181334Sjhbint
815165272Skmacysleepq_signal(void *wchan, int flags, int pri, int queue)
816126324Sjhb{
817126324Sjhb	struct sleepqueue *sq;
818137277Sjhb	struct thread *td, *besttd;
819181334Sjhb	int wakeup_swapper;
820126324Sjhb
821126324Sjhb	CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
822126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
823165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
824126324Sjhb	sq = sleepq_lookup(wchan);
825170294Sjeff	if (sq == NULL)
826181334Sjhb		return (0);
827134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
828126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
829129188Sjhb
830137277Sjhb	/*
831137277Sjhb	 * Find the highest priority thread on the queue.  If there is a
832137277Sjhb	 * tie, use the thread that first appears in the queue as it has
833137277Sjhb	 * been sleeping the longest since threads are always added to
834137277Sjhb	 * the tail of sleep queues.
835137277Sjhb	 */
836137277Sjhb	besttd = NULL;
837165272Skmacy	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
838137277Sjhb		if (besttd == NULL || td->td_priority < besttd->td_priority)
839137277Sjhb			besttd = td;
840137277Sjhb	}
841137277Sjhb	MPASS(besttd != NULL);
842170294Sjeff	thread_lock(besttd);
843181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
844170294Sjeff	thread_unlock(besttd);
845181334Sjhb	return (wakeup_swapper);
846126324Sjhb}
847126324Sjhb
848126324Sjhb/*
849126324Sjhb * Resume all threads sleeping on a specified wait channel.
850126324Sjhb */
851181334Sjhbint
852165272Skmacysleepq_broadcast(void *wchan, int flags, int pri, int queue)
853126324Sjhb{
854126324Sjhb	struct sleepqueue *sq;
855182875Sjhb	struct thread *td, *tdn;
856181334Sjhb	int wakeup_swapper;
857126324Sjhb
858126324Sjhb	CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
859126324Sjhb	KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
860165272Skmacy	MPASS((queue >= 0) && (queue < NR_SLEEPQS));
861126324Sjhb	sq = sleepq_lookup(wchan);
862177085Sjeff	if (sq == NULL)
863181334Sjhb		return (0);
864134013Sjhb	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
865126324Sjhb	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
866129188Sjhb
867145056Sjhb	/* Resume all blocked threads on the sleep queue. */
868181334Sjhb	wakeup_swapper = 0;
869182875Sjhb	TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
870170294Sjeff		thread_lock(td);
871181334Sjhb		if (sleepq_resume_thread(sq, td, pri))
872181334Sjhb			wakeup_swapper = 1;
873170294Sjeff		thread_unlock(td);
874170294Sjeff	}
875181334Sjhb	return (wakeup_swapper);
876126324Sjhb}
877126324Sjhb
878126324Sjhb/*
879126324Sjhb * Time sleeping threads out.  When the timeout expires, the thread is
880126324Sjhb * removed from the sleep queue and made runnable if it is still asleep.
881126324Sjhb */
882126324Sjhbstatic void
883126324Sjhbsleepq_timeout(void *arg)
884126324Sjhb{
885170294Sjeff	struct sleepqueue_chain *sc;
886126324Sjhb	struct sleepqueue *sq;
887126324Sjhb	struct thread *td;
888126324Sjhb	void *wchan;
889181334Sjhb	int wakeup_swapper;
890126324Sjhb
891129241Sbde	td = arg;
892181334Sjhb	wakeup_swapper = 0;
893129241Sbde	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
894173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
895126324Sjhb
896126324Sjhb	/*
897126324Sjhb	 * First, see if the thread is asleep and get the wait channel if
898126324Sjhb	 * it is.
899126324Sjhb	 */
900170294Sjeff	thread_lock(td);
901170294Sjeff	if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
902126324Sjhb		wchan = td->td_wchan;
903170294Sjeff		sc = SC_LOOKUP(wchan);
904176078Sjeff		THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
905126324Sjhb		sq = sleepq_lookup(wchan);
906170294Sjeff		MPASS(sq != NULL);
907170294Sjeff		td->td_flags |= TDF_TIMEOUT;
908181334Sjhb		wakeup_swapper = sleepq_resume_thread(sq, td, 0);
909170294Sjeff		thread_unlock(td);
910181334Sjhb		if (wakeup_swapper)
911181334Sjhb			kick_proc0();
912170294Sjeff		return;
913126324Sjhb	}
914175654Sjhb
915126324Sjhb	/*
916175654Sjhb	 * If the thread is on the SLEEPQ but isn't sleeping yet, it
917175654Sjhb	 * can either be on another CPU in between sleepq_add() and
918175654Sjhb	 * one of the sleepq_*wait*() routines or it can be in
919175654Sjhb	 * sleepq_catch_signals().
920126324Sjhb	 */
921126324Sjhb	if (TD_ON_SLEEPQ(td)) {
922175664Sjhb		td->td_flags |= TDF_TIMEOUT;
923170294Sjeff		thread_unlock(td);
924126324Sjhb		return;
925170294Sjeff	}
926126324Sjhb
927126324Sjhb	/*
928126324Sjhb	 * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
929126324Sjhb	 * then the other thread has already yielded to us, so clear
930126324Sjhb	 * the flag and resume it.  If TDF_TIMEOUT is not set, then the
931126324Sjhb	 * we know that the other thread is not on a sleep queue, but it
932126324Sjhb	 * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
933126324Sjhb	 * to let it know that the timeout has already run and doesn't
934126324Sjhb	 * need to be canceled.
935126324Sjhb	 */
936126324Sjhb	if (td->td_flags & TDF_TIMEOUT) {
937127085Sjhb		MPASS(TD_IS_SLEEPING(td));
938126324Sjhb		td->td_flags &= ~TDF_TIMEOUT;
939126324Sjhb		TD_CLR_SLEEPING(td);
940181334Sjhb		wakeup_swapper = setrunnable(td);
941126324Sjhb	} else
942126324Sjhb		td->td_flags |= TDF_TIMOFAIL;
943170294Sjeff	thread_unlock(td);
944181334Sjhb	if (wakeup_swapper)
945181334Sjhb		kick_proc0();
946126324Sjhb}
947126324Sjhb
948126324Sjhb/*
949126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific
950126324Sjhb * wait channel if it is on that queue.
951126324Sjhb */
952126324Sjhbvoid
953126324Sjhbsleepq_remove(struct thread *td, void *wchan)
954126324Sjhb{
955126324Sjhb	struct sleepqueue *sq;
956181334Sjhb	int wakeup_swapper;
957126324Sjhb
958126324Sjhb	/*
959126324Sjhb	 * Look up the sleep queue for this wait channel, then re-check
960126324Sjhb	 * that the thread is asleep on that channel, if it is not, then
961126324Sjhb	 * bail.
962126324Sjhb	 */
963126324Sjhb	MPASS(wchan != NULL);
964136445Sjhb	sleepq_lock(wchan);
965126324Sjhb	sq = sleepq_lookup(wchan);
966170294Sjeff	/*
967170294Sjeff	 * We can not lock the thread here as it may be sleeping on a
968170294Sjeff	 * different sleepq.  However, holding the sleepq lock for this
969170294Sjeff	 * wchan can guarantee that we do not miss a wakeup for this
970170294Sjeff	 * channel.  The asserts below will catch any false positives.
971170294Sjeff	 */
972126324Sjhb	if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
973126324Sjhb		sleepq_release(wchan);
974126324Sjhb		return;
975126324Sjhb	}
976170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
977170294Sjeff	thread_lock(td);
978126324Sjhb	MPASS(sq != NULL);
979170294Sjeff	MPASS(td->td_wchan == wchan);
980181334Sjhb	wakeup_swapper = sleepq_resume_thread(sq, td, 0);
981170294Sjeff	thread_unlock(td);
982126324Sjhb	sleepq_release(wchan);
983181334Sjhb	if (wakeup_swapper)
984181334Sjhb		kick_proc0();
985126324Sjhb}
986126324Sjhb
987126324Sjhb/*
988129241Sbde * Abort a thread as if an interrupt had occurred.  Only abort
989129241Sbde * interruptible waits (unfortunately it isn't safe to abort others).
990126324Sjhb */
991181334Sjhbint
992155741Sdavidxusleepq_abort(struct thread *td, int intrval)
993126324Sjhb{
994170294Sjeff	struct sleepqueue *sq;
995126324Sjhb	void *wchan;
996126324Sjhb
997170294Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
998126324Sjhb	MPASS(TD_ON_SLEEPQ(td));
999126324Sjhb	MPASS(td->td_flags & TDF_SINTR);
1000155741Sdavidxu	MPASS(intrval == EINTR || intrval == ERESTART);
1001126324Sjhb
1002126324Sjhb	/*
1003126324Sjhb	 * If the TDF_TIMEOUT flag is set, just leave. A
1004126324Sjhb	 * timeout is scheduled anyhow.
1005126324Sjhb	 */
1006126324Sjhb	if (td->td_flags & TDF_TIMEOUT)
1007181334Sjhb		return (0);
1008126324Sjhb
1009129241Sbde	CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
1010173600Sjulian	    (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
1011170294Sjeff	td->td_intrval = intrval;
1012170294Sjeff	td->td_flags |= TDF_SLEEPABORT;
1013170294Sjeff	/*
1014170294Sjeff	 * If the thread has not slept yet it will find the signal in
1015170294Sjeff	 * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
1016170294Sjeff	 * we have to do it here.
1017170294Sjeff	 */
1018170294Sjeff	if (!TD_IS_SLEEPING(td))
1019181334Sjhb		return (0);
1020126324Sjhb	wchan = td->td_wchan;
1021170294Sjeff	MPASS(wchan != NULL);
1022170294Sjeff	sq = sleepq_lookup(wchan);
1023170294Sjeff	MPASS(sq != NULL);
1024170294Sjeff
1025170294Sjeff	/* Thread is asleep on sleep queue sq, so wake it up. */
1026181334Sjhb	return (sleepq_resume_thread(sq, td, 0));
1027126324Sjhb}
1028154936Sjhb
1029177372Sjeff#ifdef SLEEPQUEUE_PROFILING
1030177372Sjeff#define	SLEEPQ_PROF_LOCATIONS	1024
1031212750Smdf#define	SLEEPQ_SBUFSIZE		512
1032177372Sjeffstruct sleepq_prof {
1033177372Sjeff	LIST_ENTRY(sleepq_prof) sp_link;
1034177372Sjeff	const char	*sp_wmesg;
1035177372Sjeff	long		sp_count;
1036177372Sjeff};
1037177372Sjeff
1038177372SjeffLIST_HEAD(sqphead, sleepq_prof);
1039177372Sjeff
1040177372Sjeffstruct sqphead sleepq_prof_free;
1041177372Sjeffstruct sqphead sleepq_hash[SC_TABLESIZE];
1042177372Sjeffstatic struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
1043177372Sjeffstatic struct mtx sleepq_prof_lock;
1044177372SjeffMTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
1045177372Sjeff
1046177372Sjeffstatic void
1047177372Sjeffsleepq_profile(const char *wmesg)
1048177372Sjeff{
1049177372Sjeff	struct sleepq_prof *sp;
1050177372Sjeff
1051177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1052177372Sjeff	if (prof_enabled == 0)
1053177372Sjeff		goto unlock;
1054177372Sjeff	LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
1055177372Sjeff		if (sp->sp_wmesg == wmesg)
1056177372Sjeff			goto done;
1057177372Sjeff	sp = LIST_FIRST(&sleepq_prof_free);
1058177372Sjeff	if (sp == NULL)
1059177372Sjeff		goto unlock;
1060177372Sjeff	sp->sp_wmesg = wmesg;
1061177372Sjeff	LIST_REMOVE(sp, sp_link);
1062177372Sjeff	LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
1063177372Sjeffdone:
1064177372Sjeff	sp->sp_count++;
1065177372Sjeffunlock:
1066177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1067177372Sjeff	return;
1068177372Sjeff}
1069177372Sjeff
1070177372Sjeffstatic void
1071177372Sjeffsleepq_prof_reset(void)
1072177372Sjeff{
1073177372Sjeff	struct sleepq_prof *sp;
1074177372Sjeff	int enabled;
1075177372Sjeff	int i;
1076177372Sjeff
1077177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1078177372Sjeff	enabled = prof_enabled;
1079177372Sjeff	prof_enabled = 0;
1080177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++)
1081177372Sjeff		LIST_INIT(&sleepq_hash[i]);
1082177372Sjeff	LIST_INIT(&sleepq_prof_free);
1083177372Sjeff	for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
1084177372Sjeff		sp = &sleepq_profent[i];
1085177372Sjeff		sp->sp_wmesg = NULL;
1086177372Sjeff		sp->sp_count = 0;
1087177372Sjeff		LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
1088177372Sjeff	}
1089177372Sjeff	prof_enabled = enabled;
1090177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1091177372Sjeff}
1092177372Sjeff
1093177372Sjeffstatic int
1094177372Sjeffenable_sleepq_prof(SYSCTL_HANDLER_ARGS)
1095177372Sjeff{
1096177372Sjeff	int error, v;
1097177372Sjeff
1098177372Sjeff	v = prof_enabled;
1099177372Sjeff	error = sysctl_handle_int(oidp, &v, v, req);
1100177372Sjeff	if (error)
1101177372Sjeff		return (error);
1102177372Sjeff	if (req->newptr == NULL)
1103177372Sjeff		return (error);
1104177372Sjeff	if (v == prof_enabled)
1105177372Sjeff		return (0);
1106177372Sjeff	if (v == 1)
1107177372Sjeff		sleepq_prof_reset();
1108177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1109177372Sjeff	prof_enabled = !!v;
1110177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1111177372Sjeff
1112177372Sjeff	return (0);
1113177372Sjeff}
1114177372Sjeff
1115177372Sjeffstatic int
1116177372Sjeffreset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1117177372Sjeff{
1118177372Sjeff	int error, v;
1119177372Sjeff
1120177372Sjeff	v = 0;
1121177372Sjeff	error = sysctl_handle_int(oidp, &v, 0, req);
1122177372Sjeff	if (error)
1123177372Sjeff		return (error);
1124177372Sjeff	if (req->newptr == NULL)
1125177372Sjeff		return (error);
1126177372Sjeff	if (v == 0)
1127177372Sjeff		return (0);
1128177372Sjeff	sleepq_prof_reset();
1129177372Sjeff
1130177372Sjeff	return (0);
1131177372Sjeff}
1132177372Sjeff
1133177372Sjeffstatic int
1134177372Sjeffdump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
1135177372Sjeff{
1136177372Sjeff	struct sleepq_prof *sp;
1137177372Sjeff	struct sbuf *sb;
1138177372Sjeff	int enabled;
1139177372Sjeff	int error;
1140177372Sjeff	int i;
1141177372Sjeff
1142217916Smdf	error = sysctl_wire_old_buffer(req, 0);
1143217916Smdf	if (error != 0)
1144217916Smdf		return (error);
1145212750Smdf	sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
1146177372Sjeff	sbuf_printf(sb, "\nwmesg\tcount\n");
1147177372Sjeff	enabled = prof_enabled;
1148177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1149177372Sjeff	prof_enabled = 0;
1150177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1151177372Sjeff	for (i = 0; i < SC_TABLESIZE; i++) {
1152177372Sjeff		LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
1153177372Sjeff			sbuf_printf(sb, "%s\t%ld\n",
1154177372Sjeff			    sp->sp_wmesg, sp->sp_count);
1155177372Sjeff		}
1156177372Sjeff	}
1157177372Sjeff	mtx_lock_spin(&sleepq_prof_lock);
1158177372Sjeff	prof_enabled = enabled;
1159177372Sjeff	mtx_unlock_spin(&sleepq_prof_lock);
1160177372Sjeff
1161212750Smdf	error = sbuf_finish(sb);
1162177372Sjeff	sbuf_delete(sb);
1163177372Sjeff	return (error);
1164177372Sjeff}
1165177372Sjeff
1166177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
1167177372Sjeff    NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
1168177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
1169177372Sjeff    NULL, 0, reset_sleepq_prof_stats, "I",
1170177372Sjeff    "Reset sleepqueue profiling statistics");
1171177372SjeffSYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
1172177372Sjeff    NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
1173177372Sjeff#endif
1174177372Sjeff
1175154936Sjhb#ifdef DDB
1176154936SjhbDB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
1177154936Sjhb{
1178154936Sjhb	struct sleepqueue_chain *sc;
1179154936Sjhb	struct sleepqueue *sq;
1180154944Simp#ifdef INVARIANTS
1181154936Sjhb	struct lock_object *lock;
1182154944Simp#endif
1183154936Sjhb	struct thread *td;
1184154936Sjhb	void *wchan;
1185154936Sjhb	int i;
1186154936Sjhb
1187154936Sjhb	if (!have_addr)
1188154936Sjhb		return;
1189154936Sjhb
1190154936Sjhb	/*
1191154936Sjhb	 * First, see if there is an active sleep queue for the wait channel
1192154936Sjhb	 * indicated by the address.
1193154936Sjhb	 */
1194154936Sjhb	wchan = (void *)addr;
1195154936Sjhb	sc = SC_LOOKUP(wchan);
1196154936Sjhb	LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
1197154936Sjhb		if (sq->sq_wchan == wchan)
1198154936Sjhb			goto found;
1199154936Sjhb
1200154936Sjhb	/*
1201154936Sjhb	 * Second, see if there is an active sleep queue at the address
1202154936Sjhb	 * indicated.
1203154936Sjhb	 */
1204154936Sjhb	for (i = 0; i < SC_TABLESIZE; i++)
1205154936Sjhb		LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
1206154936Sjhb			if (sq == (struct sleepqueue *)addr)
1207154936Sjhb				goto found;
1208154936Sjhb		}
1209154936Sjhb
1210154936Sjhb	db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
1211154936Sjhb	return;
1212154936Sjhbfound:
1213154936Sjhb	db_printf("Wait channel: %p\n", sq->sq_wchan);
1214201879Sattilio	db_printf("Queue type: %d\n", sq->sq_type);
1215154936Sjhb#ifdef INVARIANTS
1216154936Sjhb	if (sq->sq_lock) {
1217164325Spjd		lock = sq->sq_lock;
1218154936Sjhb		db_printf("Associated Interlock: %p - (%s) %s\n", lock,
1219154936Sjhb		    LOCK_CLASS(lock)->lc_name, lock->lo_name);
1220154936Sjhb	}
1221154936Sjhb#endif
1222154936Sjhb	db_printf("Blocked threads:\n");
1223165272Skmacy	for (i = 0; i < NR_SLEEPQS; i++) {
1224165272Skmacy		db_printf("\nQueue[%d]:\n", i);
1225165272Skmacy		if (TAILQ_EMPTY(&sq->sq_blocked[i]))
1226165272Skmacy			db_printf("\tempty\n");
1227165272Skmacy		else
1228165272Skmacy			TAILQ_FOREACH(td, &sq->sq_blocked[0],
1229165272Skmacy				      td_slpq) {
1230165272Skmacy				db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
1231165272Skmacy					  td->td_tid, td->td_proc->p_pid,
1232180930Sjhb					  td->td_name);
1233165272Skmacy			}
1234200447Sattilio		db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
1235165272Skmacy	}
1236154936Sjhb}
1237157823Sjhb
1238157823Sjhb/* Alias 'show sleepqueue' to 'show sleepq'. */
1239183054SsamDB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
1240154936Sjhb#endif
1241