subr_sleepqueue.c revision 152221
1139804Simp/*- 2126324Sjhb * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org> 3126324Sjhb * All rights reserved. 4126324Sjhb * 5126324Sjhb * Redistribution and use in source and binary forms, with or without 6126324Sjhb * modification, are permitted provided that the following conditions 7126324Sjhb * are met: 8126324Sjhb * 1. Redistributions of source code must retain the above copyright 9126324Sjhb * notice, this list of conditions and the following disclaimer. 10126324Sjhb * 2. Redistributions in binary form must reproduce the above copyright 11126324Sjhb * notice, this list of conditions and the following disclaimer in the 12126324Sjhb * documentation and/or other materials provided with the distribution. 13126324Sjhb * 3. Neither the name of the author nor the names of any co-contributors 14126324Sjhb * may be used to endorse or promote products derived from this software 15126324Sjhb * without specific prior written permission. 16126324Sjhb * 17126324Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18126324Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19126324Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20126324Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21126324Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22126324Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23126324Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24126324Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25126324Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26126324Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27126324Sjhb * SUCH DAMAGE. 28126324Sjhb */ 29126324Sjhb 30126324Sjhb/* 31126324Sjhb * Implementation of sleep queues used to hold queue of threads blocked on 32126324Sjhb * a wait channel. Sleep queues different from turnstiles in that wait 33126324Sjhb * channels are not owned by anyone, so there is no priority propagation. 34126324Sjhb * Sleep queues can also provide a timeout and can also be interrupted by 35126324Sjhb * signals. That said, there are several similarities between the turnstile 36126324Sjhb * and sleep queue implementations. (Note: turnstiles were implemented 37126324Sjhb * first.) For example, both use a hash table of the same size where each 38126324Sjhb * bucket is referred to as a "chain" that contains both a spin lock and 39126324Sjhb * a linked list of queues. An individual queue is located by using a hash 40126324Sjhb * to pick a chain, locking the chain, and then walking the chain searching 41126324Sjhb * for the queue. This means that a wait channel object does not need to 42126324Sjhb * embed it's queue head just as locks do not embed their turnstile queue 43126324Sjhb * head. Threads also carry around a sleep queue that they lend to the 44126324Sjhb * wait channel when blocking. Just as in turnstiles, the queue includes 45126324Sjhb * a free list of the sleep queues of other threads blocked on the same 46126324Sjhb * wait channel in the case of multiple waiters. 47126324Sjhb * 48126324Sjhb * Some additional functionality provided by sleep queues include the 49126324Sjhb * ability to set a timeout. The timeout is managed using a per-thread 50126324Sjhb * callout that resumes a thread if it is asleep. A thread may also 51126324Sjhb * catch signals while it is asleep (aka an interruptible sleep). The 52126324Sjhb * signal code uses sleepq_abort() to interrupt a sleeping thread. Finally, 53126324Sjhb * sleep queues also provide some extra assertions. One is not allowed to 54126324Sjhb * mix the sleep/wakeup and cv APIs for a given wait channel. Also, one 55126324Sjhb * must consistently use the same lock to synchronize with a wait channel, 56126324Sjhb * though this check is currently only a warning for sleep/wakeup due to 57126324Sjhb * pre-existing abuse of that API. The same lock must also be held when 58126324Sjhb * awakening threads, though that is currently only enforced for condition 59126324Sjhb * variables. 60126324Sjhb */ 61126324Sjhb 62131259Sjhb#include "opt_sleepqueue_profiling.h" 63131259Sjhb 64126324Sjhb#include <sys/cdefs.h> 65126324Sjhb__FBSDID("$FreeBSD: head/sys/kern/subr_sleepqueue.c 152221 2005-11-09 07:28:52Z imp $"); 66126324Sjhb 67126324Sjhb#include <sys/param.h> 68126324Sjhb#include <sys/systm.h> 69126324Sjhb#include <sys/lock.h> 70126324Sjhb#include <sys/kernel.h> 71126324Sjhb#include <sys/ktr.h> 72126324Sjhb#include <sys/malloc.h> 73126324Sjhb#include <sys/mutex.h> 74126324Sjhb#include <sys/proc.h> 75126324Sjhb#include <sys/sched.h> 76126324Sjhb#include <sys/signalvar.h> 77126324Sjhb#include <sys/sleepqueue.h> 78131259Sjhb#include <sys/sysctl.h> 79126324Sjhb 80126324Sjhb/* 81126324Sjhb * Constants for the hash table of sleep queue chains. These constants are 82126324Sjhb * the same ones that 4BSD (and possibly earlier versions of BSD) used. 83126324Sjhb * Basically, we ignore the lower 8 bits of the address since most wait 84126324Sjhb * channel pointers are aligned and only look at the next 7 bits for the 85126324Sjhb * hash. SC_TABLESIZE must be a power of two for SC_MASK to work properly. 86126324Sjhb */ 87126324Sjhb#define SC_TABLESIZE 128 /* Must be power of 2. */ 88126324Sjhb#define SC_MASK (SC_TABLESIZE - 1) 89126324Sjhb#define SC_SHIFT 8 90126324Sjhb#define SC_HASH(wc) (((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK) 91126324Sjhb#define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] 92126324Sjhb 93126324Sjhb/* 94126324Sjhb * There two different lists of sleep queues. Both lists are connected 95126324Sjhb * via the sq_hash entries. The first list is the sleep queue chain list 96126324Sjhb * that a sleep queue is on when it is attached to a wait channel. The 97126324Sjhb * second list is the free list hung off of a sleep queue that is attached 98126324Sjhb * to a wait channel. 99126324Sjhb * 100126324Sjhb * Each sleep queue also contains the wait channel it is attached to, the 101126324Sjhb * list of threads blocked on that wait channel, flags specific to the 102126324Sjhb * wait channel, and the lock used to synchronize with a wait channel. 103126324Sjhb * The flags are used to catch mismatches between the various consumers 104126324Sjhb * of the sleep queue API (e.g. sleep/wakeup and condition variables). 105126324Sjhb * The lock pointer is only used when invariants are enabled for various 106126324Sjhb * debugging checks. 107126324Sjhb * 108126324Sjhb * Locking key: 109126324Sjhb * c - sleep queue chain lock 110126324Sjhb */ 111126324Sjhbstruct sleepqueue { 112126324Sjhb TAILQ_HEAD(, thread) sq_blocked; /* (c) Blocked threads. */ 113126324Sjhb LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ 114126324Sjhb LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ 115126324Sjhb void *sq_wchan; /* (c) Wait channel. */ 116136445Sjhb#ifdef INVARIANTS 117134013Sjhb int sq_type; /* (c) Queue type. */ 118126324Sjhb struct mtx *sq_lock; /* (c) Associated lock. */ 119126324Sjhb#endif 120126324Sjhb}; 121126324Sjhb 122126324Sjhbstruct sleepqueue_chain { 123126324Sjhb LIST_HEAD(, sleepqueue) sc_queues; /* List of sleep queues. */ 124126324Sjhb struct mtx sc_lock; /* Spin lock for this chain. */ 125131259Sjhb#ifdef SLEEPQUEUE_PROFILING 126131259Sjhb u_int sc_depth; /* Length of sc_queues. */ 127131259Sjhb u_int sc_max_depth; /* Max length of sc_queues. */ 128131259Sjhb#endif 129126324Sjhb}; 130126324Sjhb 131131259Sjhb#ifdef SLEEPQUEUE_PROFILING 132131259Sjhbu_int sleepq_max_depth; 133131259SjhbSYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling"); 134131259SjhbSYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0, 135131259Sjhb "sleepq chain stats"); 136131259SjhbSYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth, 137131259Sjhb 0, "maxmimum depth achieved of a single chain"); 138131259Sjhb#endif 139126324Sjhbstatic struct sleepqueue_chain sleepq_chains[SC_TABLESIZE]; 140126324Sjhb 141151897Srwatsonstatic MALLOC_DEFINE(M_SLEEPQUEUE, "sleepqueue", "sleep queues"); 142126324Sjhb 143126324Sjhb/* 144126324Sjhb * Prototypes for non-exported routines. 145126324Sjhb */ 146126324Sjhbstatic int sleepq_check_timeout(void); 147126324Sjhbstatic void sleepq_switch(void *wchan); 148126324Sjhbstatic void sleepq_timeout(void *arg); 149145056Sjhbstatic void sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri); 150126324Sjhb 151126324Sjhb/* 152126324Sjhb * Early initialization of sleep queues that is called from the sleepinit() 153126324Sjhb * SYSINIT. 154126324Sjhb */ 155126324Sjhbvoid 156126324Sjhbinit_sleepqueues(void) 157126324Sjhb{ 158131259Sjhb#ifdef SLEEPQUEUE_PROFILING 159131259Sjhb struct sysctl_oid *chain_oid; 160131259Sjhb char chain_name[10]; 161131259Sjhb#endif 162126324Sjhb int i; 163126324Sjhb 164126324Sjhb for (i = 0; i < SC_TABLESIZE; i++) { 165126324Sjhb LIST_INIT(&sleepq_chains[i].sc_queues); 166126324Sjhb mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL, 167126324Sjhb MTX_SPIN); 168131259Sjhb#ifdef SLEEPQUEUE_PROFILING 169131259Sjhb snprintf(chain_name, sizeof(chain_name), "%d", i); 170131259Sjhb chain_oid = SYSCTL_ADD_NODE(NULL, 171131259Sjhb SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO, 172131259Sjhb chain_name, CTLFLAG_RD, NULL, "sleepq chain stats"); 173131259Sjhb SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 174131259Sjhb "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL); 175131259Sjhb SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, 176131259Sjhb "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0, 177131259Sjhb NULL); 178131259Sjhb#endif 179126324Sjhb } 180126324Sjhb thread0.td_sleepqueue = sleepq_alloc(); 181126324Sjhb} 182126324Sjhb 183126324Sjhb/* 184126324Sjhb * Malloc and initialize a new sleep queue for a new thread. 185126324Sjhb */ 186126324Sjhbstruct sleepqueue * 187126324Sjhbsleepq_alloc(void) 188126324Sjhb{ 189126324Sjhb struct sleepqueue *sq; 190126324Sjhb 191126324Sjhb sq = malloc(sizeof(struct sleepqueue), M_SLEEPQUEUE, M_WAITOK | M_ZERO); 192126324Sjhb TAILQ_INIT(&sq->sq_blocked); 193126324Sjhb LIST_INIT(&sq->sq_free); 194126324Sjhb return (sq); 195126324Sjhb} 196126324Sjhb 197126324Sjhb/* 198126324Sjhb * Free a sleep queue when a thread is destroyed. 199126324Sjhb */ 200126324Sjhbvoid 201126324Sjhbsleepq_free(struct sleepqueue *sq) 202126324Sjhb{ 203126324Sjhb 204126324Sjhb MPASS(sq != NULL); 205126324Sjhb MPASS(TAILQ_EMPTY(&sq->sq_blocked)); 206126324Sjhb free(sq, M_SLEEPQUEUE); 207126324Sjhb} 208126324Sjhb 209126324Sjhb/* 210136445Sjhb * Lock the sleep queue chain associated with the specified wait channel. 211136445Sjhb */ 212136445Sjhbvoid 213136445Sjhbsleepq_lock(void *wchan) 214136445Sjhb{ 215136445Sjhb struct sleepqueue_chain *sc; 216136445Sjhb 217136445Sjhb sc = SC_LOOKUP(wchan); 218136445Sjhb mtx_lock_spin(&sc->sc_lock); 219136445Sjhb} 220136445Sjhb 221136445Sjhb/* 222126324Sjhb * Look up the sleep queue associated with a given wait channel in the hash 223136445Sjhb * table locking the associated sleep queue chain. If no queue is found in 224136445Sjhb * the table, NULL is returned. 225126324Sjhb */ 226126324Sjhbstruct sleepqueue * 227126324Sjhbsleepq_lookup(void *wchan) 228126324Sjhb{ 229126324Sjhb struct sleepqueue_chain *sc; 230126324Sjhb struct sleepqueue *sq; 231126324Sjhb 232126324Sjhb KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); 233126324Sjhb sc = SC_LOOKUP(wchan); 234136445Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 235126324Sjhb LIST_FOREACH(sq, &sc->sc_queues, sq_hash) 236126324Sjhb if (sq->sq_wchan == wchan) 237126324Sjhb return (sq); 238126324Sjhb return (NULL); 239126324Sjhb} 240126324Sjhb 241126324Sjhb/* 242126324Sjhb * Unlock the sleep queue chain associated with a given wait channel. 243126324Sjhb */ 244126324Sjhbvoid 245126324Sjhbsleepq_release(void *wchan) 246126324Sjhb{ 247126324Sjhb struct sleepqueue_chain *sc; 248126324Sjhb 249126324Sjhb sc = SC_LOOKUP(wchan); 250126324Sjhb mtx_unlock_spin(&sc->sc_lock); 251126324Sjhb} 252126324Sjhb 253126324Sjhb/* 254137277Sjhb * Places the current thread on the sleep queue for the specified wait 255126324Sjhb * channel. If INVARIANTS is enabled, then it associates the passed in 256126324Sjhb * lock with the sleepq to make sure it is held when that sleep queue is 257126324Sjhb * woken up. 258126324Sjhb */ 259126324Sjhbvoid 260136445Sjhbsleepq_add(void *wchan, struct mtx *lock, const char *wmesg, int flags) 261126324Sjhb{ 262126324Sjhb struct sleepqueue_chain *sc; 263136445Sjhb struct sleepqueue *sq; 264137277Sjhb struct thread *td; 265126324Sjhb 266126324Sjhb td = curthread; 267126324Sjhb sc = SC_LOOKUP(wchan); 268126324Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 269126324Sjhb MPASS(td->td_sleepqueue != NULL); 270126324Sjhb MPASS(wchan != NULL); 271126324Sjhb 272150177Sjhb /* If this thread is not allowed to sleep, die a horrible death. */ 273150177Sjhb KASSERT(!(td->td_pflags & TDP_NOSLEEPING), 274152221Simp ("Trying sleep, but thread marked as sleeping prohibited")); 275150177Sjhb 276136445Sjhb /* Look up the sleep queue associated with the wait channel 'wchan'. */ 277136445Sjhb sq = sleepq_lookup(wchan); 278136445Sjhb 279136445Sjhb /* 280136445Sjhb * If the wait channel does not already have a sleep queue, use 281136445Sjhb * this thread's sleep queue. Otherwise, insert the current thread 282136445Sjhb * into the sleep queue already in use by this wait channel. 283136445Sjhb */ 284126324Sjhb if (sq == NULL) { 285131259Sjhb#ifdef SLEEPQUEUE_PROFILING 286131259Sjhb sc->sc_depth++; 287131259Sjhb if (sc->sc_depth > sc->sc_max_depth) { 288131259Sjhb sc->sc_max_depth = sc->sc_depth; 289131259Sjhb if (sc->sc_max_depth > sleepq_max_depth) 290131259Sjhb sleepq_max_depth = sc->sc_max_depth; 291131259Sjhb } 292131259Sjhb#endif 293126324Sjhb sq = td->td_sleepqueue; 294126324Sjhb LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); 295126324Sjhb KASSERT(TAILQ_EMPTY(&sq->sq_blocked), 296126324Sjhb ("thread's sleep queue has a non-empty queue")); 297126324Sjhb KASSERT(LIST_EMPTY(&sq->sq_free), 298126324Sjhb ("thread's sleep queue has a non-empty free list")); 299126324Sjhb KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); 300126324Sjhb sq->sq_wchan = wchan; 301126324Sjhb#ifdef INVARIANTS 302126324Sjhb sq->sq_lock = lock; 303136445Sjhb sq->sq_type = flags & SLEEPQ_TYPE; 304126324Sjhb#endif 305126324Sjhb } else { 306126324Sjhb MPASS(wchan == sq->sq_wchan); 307126488Sjhb MPASS(lock == sq->sq_lock); 308136445Sjhb MPASS((flags & SLEEPQ_TYPE) == sq->sq_type); 309126324Sjhb LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash); 310126324Sjhb } 311137277Sjhb TAILQ_INSERT_TAIL(&sq->sq_blocked, td, td_slpq); 312126324Sjhb td->td_sleepqueue = NULL; 313126324Sjhb mtx_lock_spin(&sched_lock); 314126324Sjhb td->td_wchan = wchan; 315126324Sjhb td->td_wmesg = wmesg; 316134013Sjhb if (flags & SLEEPQ_INTERRUPTIBLE) 317134013Sjhb td->td_flags |= TDF_SINTR; 318126324Sjhb mtx_unlock_spin(&sched_lock); 319126324Sjhb} 320126324Sjhb 321126324Sjhb/* 322126324Sjhb * Sets a timeout that will remove the current thread from the specified 323126324Sjhb * sleep queue after timo ticks if the thread has not already been awakened. 324126324Sjhb */ 325126324Sjhbvoid 326126885Sjhbsleepq_set_timeout(void *wchan, int timo) 327126324Sjhb{ 328126324Sjhb struct sleepqueue_chain *sc; 329126324Sjhb struct thread *td; 330126324Sjhb 331126324Sjhb td = curthread; 332126324Sjhb sc = SC_LOOKUP(wchan); 333126324Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 334126324Sjhb MPASS(TD_ON_SLEEPQ(td)); 335126324Sjhb MPASS(td->td_sleepqueue == NULL); 336126324Sjhb MPASS(wchan != NULL); 337126324Sjhb callout_reset(&td->td_slpcallout, timo, sleepq_timeout, td); 338126324Sjhb} 339126324Sjhb 340126324Sjhb/* 341126324Sjhb * Marks the pending sleep of the current thread as interruptible and 342126324Sjhb * makes an initial check for pending signals before putting a thread 343126324Sjhb * to sleep. 344126324Sjhb */ 345126324Sjhbint 346126324Sjhbsleepq_catch_signals(void *wchan) 347126324Sjhb{ 348126324Sjhb struct sleepqueue_chain *sc; 349126324Sjhb struct sleepqueue *sq; 350126324Sjhb struct thread *td; 351126324Sjhb struct proc *p; 352126324Sjhb int sig; 353126324Sjhb 354126324Sjhb td = curthread; 355126324Sjhb p = td->td_proc; 356126324Sjhb sc = SC_LOOKUP(wchan); 357126324Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 358126324Sjhb MPASS(td->td_sleepqueue == NULL); 359126324Sjhb MPASS(wchan != NULL); 360129241Sbde CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", 361129241Sbde (void *)td, (long)p->p_pid, p->p_comm); 362126324Sjhb 363126324Sjhb /* Mark thread as being in an interruptible sleep. */ 364134013Sjhb MPASS(td->td_flags & TDF_SINTR); 365126324Sjhb MPASS(TD_ON_SLEEPQ(td)); 366126324Sjhb sleepq_release(wchan); 367126324Sjhb 368126324Sjhb /* See if there are any pending signals for this thread. */ 369126324Sjhb PROC_LOCK(p); 370126324Sjhb mtx_lock(&p->p_sigacts->ps_mtx); 371126324Sjhb sig = cursig(td); 372126324Sjhb mtx_unlock(&p->p_sigacts->ps_mtx); 373126324Sjhb if (sig == 0 && thread_suspend_check(1)) 374126324Sjhb sig = SIGSTOP; 375126324Sjhb PROC_UNLOCK(p); 376126324Sjhb 377126324Sjhb /* 378126324Sjhb * If there were pending signals and this thread is still on 379134013Sjhb * the sleep queue, remove it from the sleep queue. If the 380134013Sjhb * thread was removed from the sleep queue while we were blocked 381134013Sjhb * above, then clear TDF_SINTR before returning. 382126324Sjhb */ 383136445Sjhb sleepq_lock(wchan); 384126324Sjhb sq = sleepq_lookup(wchan); 385126324Sjhb mtx_lock_spin(&sched_lock); 386146687Sdavidxu if (TD_ON_SLEEPQ(td) && sig != 0) 387145056Sjhb sleepq_resume_thread(sq, td, -1); 388145056Sjhb else if (!TD_ON_SLEEPQ(td) && sig == 0) 389145056Sjhb td->td_flags &= ~TDF_SINTR; 390145056Sjhb mtx_unlock_spin(&sched_lock); 391126324Sjhb return (sig); 392126324Sjhb} 393126324Sjhb 394126324Sjhb/* 395126324Sjhb * Switches to another thread if we are still asleep on a sleep queue and 396137277Sjhb * drop the lock on the sleep queue chain. Returns with sched_lock held. 397126324Sjhb */ 398126324Sjhbstatic void 399126324Sjhbsleepq_switch(void *wchan) 400126324Sjhb{ 401126324Sjhb struct sleepqueue_chain *sc; 402126324Sjhb struct thread *td; 403126324Sjhb 404126324Sjhb td = curthread; 405126324Sjhb sc = SC_LOOKUP(wchan); 406126324Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 407126324Sjhb 408126324Sjhb /* 409126324Sjhb * If we have a sleep queue, then we've already been woken up, so 410126324Sjhb * just return. 411126324Sjhb */ 412126324Sjhb if (td->td_sleepqueue != NULL) { 413126324Sjhb MPASS(!TD_ON_SLEEPQ(td)); 414126324Sjhb mtx_unlock_spin(&sc->sc_lock); 415126324Sjhb mtx_lock_spin(&sched_lock); 416126324Sjhb return; 417126324Sjhb } 418126324Sjhb 419126324Sjhb /* 420126324Sjhb * Otherwise, actually go to sleep. 421126324Sjhb */ 422126324Sjhb mtx_lock_spin(&sched_lock); 423126324Sjhb mtx_unlock_spin(&sc->sc_lock); 424126324Sjhb 425126324Sjhb sched_sleep(td); 426126324Sjhb TD_SET_SLEEPING(td); 427131473Sjhb mi_switch(SW_VOL, NULL); 428126324Sjhb KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING")); 429129241Sbde CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)", 430129241Sbde (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm); 431126324Sjhb} 432126324Sjhb 433126324Sjhb/* 434126324Sjhb * Check to see if we timed out. 435126324Sjhb */ 436126324Sjhbstatic int 437126324Sjhbsleepq_check_timeout(void) 438126324Sjhb{ 439126324Sjhb struct thread *td; 440126324Sjhb 441126324Sjhb mtx_assert(&sched_lock, MA_OWNED); 442126324Sjhb td = curthread; 443126324Sjhb 444126324Sjhb /* 445126324Sjhb * If TDF_TIMEOUT is set, we timed out. 446126324Sjhb */ 447126324Sjhb if (td->td_flags & TDF_TIMEOUT) { 448126324Sjhb td->td_flags &= ~TDF_TIMEOUT; 449126324Sjhb return (EWOULDBLOCK); 450126324Sjhb } 451126324Sjhb 452126324Sjhb /* 453126324Sjhb * If TDF_TIMOFAIL is set, the timeout ran after we had 454126324Sjhb * already been woken up. 455126324Sjhb */ 456126324Sjhb if (td->td_flags & TDF_TIMOFAIL) 457126324Sjhb td->td_flags &= ~TDF_TIMOFAIL; 458126324Sjhb 459126324Sjhb /* 460126324Sjhb * If callout_stop() fails, then the timeout is running on 461126324Sjhb * another CPU, so synchronize with it to avoid having it 462126324Sjhb * accidentally wake up a subsequent sleep. 463126324Sjhb */ 464126324Sjhb else if (callout_stop(&td->td_slpcallout) == 0) { 465126324Sjhb td->td_flags |= TDF_TIMEOUT; 466126324Sjhb TD_SET_SLEEPING(td); 467131473Sjhb mi_switch(SW_INVOL, NULL); 468126324Sjhb } 469126324Sjhb return (0); 470126324Sjhb} 471126324Sjhb 472126324Sjhb/* 473126324Sjhb * Check to see if we were awoken by a signal. 474126324Sjhb */ 475126324Sjhbstatic int 476126324Sjhbsleepq_check_signals(void) 477126324Sjhb{ 478126324Sjhb struct thread *td; 479126324Sjhb 480126324Sjhb mtx_assert(&sched_lock, MA_OWNED); 481126324Sjhb td = curthread; 482126324Sjhb 483134013Sjhb /* 484134013Sjhb * If TDF_SINTR is clear, then we were awakened while executing 485134013Sjhb * sleepq_catch_signals(). 486134013Sjhb */ 487134013Sjhb if (!(td->td_flags & TDF_SINTR)) 488134013Sjhb return (0); 489134013Sjhb 490126324Sjhb /* We are no longer in an interruptible sleep. */ 491126324Sjhb td->td_flags &= ~TDF_SINTR; 492126324Sjhb 493126324Sjhb if (td->td_flags & TDF_INTERRUPT) 494126324Sjhb return (td->td_intrval); 495126324Sjhb return (0); 496126324Sjhb} 497126324Sjhb 498126324Sjhb/* 499126324Sjhb * If we were in an interruptible sleep and we weren't interrupted and 500126324Sjhb * didn't timeout, check to see if there are any pending signals and 501126324Sjhb * which return value we should use if so. The return value from an 502126324Sjhb * earlier call to sleepq_catch_signals() should be passed in as the 503126324Sjhb * argument. 504126324Sjhb */ 505126324Sjhbint 506126324Sjhbsleepq_calc_signal_retval(int sig) 507126324Sjhb{ 508126324Sjhb struct thread *td; 509126324Sjhb struct proc *p; 510126324Sjhb int rval; 511126324Sjhb 512126324Sjhb td = curthread; 513126324Sjhb p = td->td_proc; 514126324Sjhb PROC_LOCK(p); 515126324Sjhb mtx_lock(&p->p_sigacts->ps_mtx); 516126324Sjhb /* XXX: Should we always be calling cursig()? */ 517126324Sjhb if (sig == 0) 518126324Sjhb sig = cursig(td); 519126324Sjhb if (sig != 0) { 520126324Sjhb if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 521126324Sjhb rval = EINTR; 522126324Sjhb else 523126324Sjhb rval = ERESTART; 524126324Sjhb } else 525126324Sjhb rval = 0; 526126324Sjhb mtx_unlock(&p->p_sigacts->ps_mtx); 527126324Sjhb PROC_UNLOCK(p); 528126324Sjhb return (rval); 529126324Sjhb} 530126324Sjhb 531126324Sjhb/* 532126324Sjhb * Block the current thread until it is awakened from its sleep queue. 533126324Sjhb */ 534126324Sjhbvoid 535126324Sjhbsleepq_wait(void *wchan) 536126324Sjhb{ 537126324Sjhb 538134013Sjhb MPASS(!(curthread->td_flags & TDF_SINTR)); 539126324Sjhb sleepq_switch(wchan); 540126324Sjhb mtx_unlock_spin(&sched_lock); 541126324Sjhb} 542126324Sjhb 543126324Sjhb/* 544126324Sjhb * Block the current thread until it is awakened from its sleep queue 545126324Sjhb * or it is interrupted by a signal. 546126324Sjhb */ 547126324Sjhbint 548126324Sjhbsleepq_wait_sig(void *wchan) 549126324Sjhb{ 550126324Sjhb int rval; 551126324Sjhb 552126324Sjhb sleepq_switch(wchan); 553126324Sjhb rval = sleepq_check_signals(); 554126324Sjhb mtx_unlock_spin(&sched_lock); 555126324Sjhb return (rval); 556126324Sjhb} 557126324Sjhb 558126324Sjhb/* 559126324Sjhb * Block the current thread until it is awakened from its sleep queue 560126324Sjhb * or it times out while waiting. 561126324Sjhb */ 562126324Sjhbint 563131249Sjhbsleepq_timedwait(void *wchan) 564126324Sjhb{ 565126324Sjhb int rval; 566126324Sjhb 567134013Sjhb MPASS(!(curthread->td_flags & TDF_SINTR)); 568126324Sjhb sleepq_switch(wchan); 569126324Sjhb rval = sleepq_check_timeout(); 570126324Sjhb mtx_unlock_spin(&sched_lock); 571131249Sjhb return (rval); 572126324Sjhb} 573126324Sjhb 574126324Sjhb/* 575126324Sjhb * Block the current thread until it is awakened from its sleep queue, 576126324Sjhb * it is interrupted by a signal, or it times out waiting to be awakened. 577126324Sjhb */ 578126324Sjhbint 579126324Sjhbsleepq_timedwait_sig(void *wchan, int signal_caught) 580126324Sjhb{ 581126324Sjhb int rvalt, rvals; 582126324Sjhb 583126324Sjhb sleepq_switch(wchan); 584126324Sjhb rvalt = sleepq_check_timeout(); 585126324Sjhb rvals = sleepq_check_signals(); 586126324Sjhb mtx_unlock_spin(&sched_lock); 587126324Sjhb if (signal_caught || rvalt == 0) 588126324Sjhb return (rvals); 589126324Sjhb else 590126324Sjhb return (rvalt); 591126324Sjhb} 592126324Sjhb 593126324Sjhb/* 594145056Sjhb * Removes a thread from a sleep queue and makes it 595145056Sjhb * runnable. 596126324Sjhb */ 597126324Sjhbstatic void 598145056Sjhbsleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri) 599126324Sjhb{ 600126324Sjhb struct sleepqueue_chain *sc; 601126324Sjhb 602126324Sjhb MPASS(td != NULL); 603126324Sjhb MPASS(sq->sq_wchan != NULL); 604126324Sjhb MPASS(td->td_wchan == sq->sq_wchan); 605126324Sjhb sc = SC_LOOKUP(sq->sq_wchan); 606126324Sjhb mtx_assert(&sc->sc_lock, MA_OWNED); 607145056Sjhb mtx_assert(&sched_lock, MA_OWNED); 608126324Sjhb 609126324Sjhb /* Remove the thread from the queue. */ 610126324Sjhb TAILQ_REMOVE(&sq->sq_blocked, td, td_slpq); 611126324Sjhb 612126324Sjhb /* 613126324Sjhb * Get a sleep queue for this thread. If this is the last waiter, 614126324Sjhb * use the queue itself and take it out of the chain, otherwise, 615126324Sjhb * remove a queue from the free list. 616126324Sjhb */ 617126324Sjhb if (LIST_EMPTY(&sq->sq_free)) { 618126324Sjhb td->td_sleepqueue = sq; 619126324Sjhb#ifdef INVARIANTS 620126324Sjhb sq->sq_wchan = NULL; 621126324Sjhb#endif 622131259Sjhb#ifdef SLEEPQUEUE_PROFILING 623131259Sjhb sc->sc_depth--; 624131259Sjhb#endif 625126324Sjhb } else 626126324Sjhb td->td_sleepqueue = LIST_FIRST(&sq->sq_free); 627126324Sjhb LIST_REMOVE(td->td_sleepqueue, sq_hash); 628126324Sjhb 629129188Sjhb td->td_wmesg = NULL; 630129188Sjhb td->td_wchan = NULL; 631129188Sjhb 632126324Sjhb /* 633129188Sjhb * Note that thread td might not be sleeping if it is running 634129188Sjhb * sleepq_catch_signals() on another CPU or is blocked on 635129188Sjhb * its proc lock to check signals. It doesn't hurt to clear 636129188Sjhb * the sleeping flag if it isn't set though, so we just always 637129188Sjhb * do it. However, we can't assert that it is set. 638126324Sjhb */ 639129241Sbde CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)", 640129241Sbde (void *)td, (long)td->td_proc->p_pid, td->td_proc->p_comm); 641126324Sjhb TD_CLR_SLEEPING(td); 642126324Sjhb 643126324Sjhb /* Adjust priority if requested. */ 644126324Sjhb MPASS(pri == -1 || (pri >= PRI_MIN && pri <= PRI_MAX)); 645126324Sjhb if (pri != -1 && td->td_priority > pri) 646136439Sups sched_prio(td, pri); 647126324Sjhb setrunnable(td); 648126324Sjhb} 649126324Sjhb 650126324Sjhb/* 651126324Sjhb * Find the highest priority thread sleeping on a wait channel and resume it. 652126324Sjhb */ 653126324Sjhbvoid 654126324Sjhbsleepq_signal(void *wchan, int flags, int pri) 655126324Sjhb{ 656126324Sjhb struct sleepqueue *sq; 657137277Sjhb struct thread *td, *besttd; 658126324Sjhb 659126324Sjhb CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags); 660126324Sjhb KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); 661126324Sjhb sq = sleepq_lookup(wchan); 662126324Sjhb if (sq == NULL) { 663126324Sjhb sleepq_release(wchan); 664126324Sjhb return; 665126324Sjhb } 666134013Sjhb KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), 667126324Sjhb ("%s: mismatch between sleep/wakeup and cv_*", __func__)); 668129188Sjhb 669137277Sjhb /* 670137277Sjhb * Find the highest priority thread on the queue. If there is a 671137277Sjhb * tie, use the thread that first appears in the queue as it has 672137277Sjhb * been sleeping the longest since threads are always added to 673137277Sjhb * the tail of sleep queues. 674137277Sjhb */ 675137277Sjhb besttd = NULL; 676137277Sjhb TAILQ_FOREACH(td, &sq->sq_blocked, td_slpq) { 677137277Sjhb if (besttd == NULL || td->td_priority < besttd->td_priority) 678137277Sjhb besttd = td; 679137277Sjhb } 680137277Sjhb MPASS(besttd != NULL); 681145056Sjhb mtx_lock_spin(&sched_lock); 682145056Sjhb sleepq_resume_thread(sq, besttd, pri); 683145056Sjhb mtx_unlock_spin(&sched_lock); 684126324Sjhb sleepq_release(wchan); 685126324Sjhb} 686126324Sjhb 687126324Sjhb/* 688126324Sjhb * Resume all threads sleeping on a specified wait channel. 689126324Sjhb */ 690126324Sjhbvoid 691126324Sjhbsleepq_broadcast(void *wchan, int flags, int pri) 692126324Sjhb{ 693126324Sjhb struct sleepqueue *sq; 694126324Sjhb 695126324Sjhb CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); 696126324Sjhb KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); 697126324Sjhb sq = sleepq_lookup(wchan); 698126324Sjhb if (sq == NULL) { 699126324Sjhb sleepq_release(wchan); 700126324Sjhb return; 701126324Sjhb } 702134013Sjhb KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), 703126324Sjhb ("%s: mismatch between sleep/wakeup and cv_*", __func__)); 704129188Sjhb 705145056Sjhb /* Resume all blocked threads on the sleep queue. */ 706145056Sjhb mtx_lock_spin(&sched_lock); 707145056Sjhb while (!TAILQ_EMPTY(&sq->sq_blocked)) 708145056Sjhb sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked), pri); 709145056Sjhb mtx_unlock_spin(&sched_lock); 710126324Sjhb sleepq_release(wchan); 711126324Sjhb} 712126324Sjhb 713126324Sjhb/* 714126324Sjhb * Time sleeping threads out. When the timeout expires, the thread is 715126324Sjhb * removed from the sleep queue and made runnable if it is still asleep. 716126324Sjhb */ 717126324Sjhbstatic void 718126324Sjhbsleepq_timeout(void *arg) 719126324Sjhb{ 720126324Sjhb struct sleepqueue *sq; 721126324Sjhb struct thread *td; 722126324Sjhb void *wchan; 723126324Sjhb 724129241Sbde td = arg; 725129241Sbde CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", 726129241Sbde (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm); 727126324Sjhb 728126324Sjhb /* 729126324Sjhb * First, see if the thread is asleep and get the wait channel if 730126324Sjhb * it is. 731126324Sjhb */ 732126324Sjhb mtx_lock_spin(&sched_lock); 733126324Sjhb if (TD_ON_SLEEPQ(td)) { 734126324Sjhb wchan = td->td_wchan; 735126324Sjhb mtx_unlock_spin(&sched_lock); 736136445Sjhb sleepq_lock(wchan); 737126324Sjhb sq = sleepq_lookup(wchan); 738126324Sjhb mtx_lock_spin(&sched_lock); 739126324Sjhb } else { 740126324Sjhb wchan = NULL; 741126324Sjhb sq = NULL; 742126324Sjhb } 743126324Sjhb 744126324Sjhb /* 745126324Sjhb * At this point, if the thread is still on the sleep queue, 746126324Sjhb * we have that sleep queue locked as it cannot migrate sleep 747126324Sjhb * queues while we dropped sched_lock. If it had resumed and 748126324Sjhb * was on another CPU while the lock was dropped, it would have 749126324Sjhb * seen that TDF_TIMEOUT and TDF_TIMOFAIL are clear and the 750126324Sjhb * call to callout_stop() to stop this routine would have failed 751126324Sjhb * meaning that it would have already set TDF_TIMEOUT to 752126324Sjhb * synchronize with this function. 753126324Sjhb */ 754126324Sjhb if (TD_ON_SLEEPQ(td)) { 755126324Sjhb MPASS(td->td_wchan == wchan); 756126324Sjhb MPASS(sq != NULL); 757126324Sjhb td->td_flags |= TDF_TIMEOUT; 758145056Sjhb sleepq_resume_thread(sq, td, -1); 759126324Sjhb mtx_unlock_spin(&sched_lock); 760126324Sjhb sleepq_release(wchan); 761126324Sjhb return; 762126324Sjhb } else if (wchan != NULL) 763126324Sjhb sleepq_release(wchan); 764126324Sjhb 765126324Sjhb /* 766126324Sjhb * Now check for the edge cases. First, if TDF_TIMEOUT is set, 767126324Sjhb * then the other thread has already yielded to us, so clear 768126324Sjhb * the flag and resume it. If TDF_TIMEOUT is not set, then the 769126324Sjhb * we know that the other thread is not on a sleep queue, but it 770126324Sjhb * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL 771126324Sjhb * to let it know that the timeout has already run and doesn't 772126324Sjhb * need to be canceled. 773126324Sjhb */ 774126324Sjhb if (td->td_flags & TDF_TIMEOUT) { 775127085Sjhb MPASS(TD_IS_SLEEPING(td)); 776126324Sjhb td->td_flags &= ~TDF_TIMEOUT; 777126324Sjhb TD_CLR_SLEEPING(td); 778126324Sjhb setrunnable(td); 779126324Sjhb } else 780126324Sjhb td->td_flags |= TDF_TIMOFAIL; 781126324Sjhb mtx_unlock_spin(&sched_lock); 782126324Sjhb} 783126324Sjhb 784126324Sjhb/* 785126324Sjhb * Resumes a specific thread from the sleep queue associated with a specific 786126324Sjhb * wait channel if it is on that queue. 787126324Sjhb */ 788126324Sjhbvoid 789126324Sjhbsleepq_remove(struct thread *td, void *wchan) 790126324Sjhb{ 791126324Sjhb struct sleepqueue *sq; 792126324Sjhb 793126324Sjhb /* 794126324Sjhb * Look up the sleep queue for this wait channel, then re-check 795126324Sjhb * that the thread is asleep on that channel, if it is not, then 796126324Sjhb * bail. 797126324Sjhb */ 798126324Sjhb MPASS(wchan != NULL); 799136445Sjhb sleepq_lock(wchan); 800126324Sjhb sq = sleepq_lookup(wchan); 801126324Sjhb mtx_lock_spin(&sched_lock); 802126324Sjhb if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) { 803126324Sjhb mtx_unlock_spin(&sched_lock); 804126324Sjhb sleepq_release(wchan); 805126324Sjhb return; 806126324Sjhb } 807126324Sjhb MPASS(sq != NULL); 808126324Sjhb 809126324Sjhb /* Thread is asleep on sleep queue sq, so wake it up. */ 810145056Sjhb sleepq_resume_thread(sq, td, -1); 811126324Sjhb sleepq_release(wchan); 812145056Sjhb mtx_unlock_spin(&sched_lock); 813126324Sjhb} 814126324Sjhb 815126324Sjhb/* 816129241Sbde * Abort a thread as if an interrupt had occurred. Only abort 817129241Sbde * interruptible waits (unfortunately it isn't safe to abort others). 818126324Sjhb * 819126324Sjhb * XXX: What in the world does the comment below mean? 820126324Sjhb * Also, whatever the signal code does... 821126324Sjhb */ 822126324Sjhbvoid 823126324Sjhbsleepq_abort(struct thread *td) 824126324Sjhb{ 825126324Sjhb void *wchan; 826126324Sjhb 827126324Sjhb mtx_assert(&sched_lock, MA_OWNED); 828126324Sjhb MPASS(TD_ON_SLEEPQ(td)); 829126324Sjhb MPASS(td->td_flags & TDF_SINTR); 830126324Sjhb 831126324Sjhb /* 832126324Sjhb * If the TDF_TIMEOUT flag is set, just leave. A 833126324Sjhb * timeout is scheduled anyhow. 834126324Sjhb */ 835126324Sjhb if (td->td_flags & TDF_TIMEOUT) 836126324Sjhb return; 837126324Sjhb 838129241Sbde CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)", 839129241Sbde (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm); 840126324Sjhb wchan = td->td_wchan; 841126324Sjhb mtx_unlock_spin(&sched_lock); 842126324Sjhb sleepq_remove(td, wchan); 843126324Sjhb mtx_lock_spin(&sched_lock); 844126324Sjhb} 845