kern_condvar.c revision 122352
1193323Sed/*- 2193323Sed * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3193323Sed * All rights reserved. 4193323Sed * 5193323Sed * Redistribution and use in source and binary forms, with or without 6193323Sed * modification, are permitted provided that the following conditions 7193323Sed * are met: 8193323Sed * 1. Redistributions of source code must retain the above copyright 9193323Sed * notice, this list of conditions and the following disclaimer. 10198090Srdivacky * 2. Redistributions in binary form must reproduce the above copyright 11198090Srdivacky * notice, this list of conditions and the following disclaimer in the 12193323Sed * documentation and/or other materials provided with the distribution. 13193323Sed * 14193323Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17193323Sed * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18243830Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22193323Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23193323Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24193323Sed * SUCH DAMAGE. 25243830Sdim */ 26243830Sdim 27243830Sdim#include <sys/cdefs.h> 28243830Sdim__FBSDID("$FreeBSD: head/sys/kern/kern_condvar.c 122352 2003-11-09 09:17:26Z tanimura $"); 29243830Sdim 30243830Sdim#include "opt_ktrace.h" 31243830Sdim 32193323Sed#include <sys/param.h> 33243830Sdim#include <sys/systm.h> 34243830Sdim#include <sys/lock.h> 35243830Sdim#include <sys/mutex.h> 36243830Sdim#include <sys/proc.h> 37193323Sed#include <sys/kernel.h> 38263508Sdim#include <sys/ktr.h> 39193323Sed#include <sys/condvar.h> 40193323Sed#include <sys/sched.h> 41263508Sdim#include <sys/signalvar.h> 42263508Sdim#include <sys/resourcevar.h> 43263508Sdim#ifdef KTRACE 44193323Sed#include <sys/uio.h> 45193323Sed#include <sys/ktrace.h> 46263508Sdim#endif 47193323Sed 48193323Sed/* 49193323Sed * Common sanity checks for cv_wait* functions. 50193323Sed */ 51243830Sdim#define CV_ASSERT(cvp, mp, td) do { \ 52243830Sdim KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 53243830Sdim KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ 54243830Sdim KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 55263508Sdim KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 56193323Sed mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 57193323Sed} while (0) 58193323Sed 59193323Sed#ifdef INVARIANTS 60193323Sed#define CV_WAIT_VALIDATE(cvp, mp) do { \ 61193323Sed if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 62193323Sed /* Only waiter. */ \ 63193323Sed (cvp)->cv_mtx = (mp); \ 64198090Srdivacky } else { \ 65193323Sed /* \ 66193323Sed * Other waiter; assert that we're using the \ 67198090Srdivacky * same mutex. \ 68243830Sdim */ \ 69243830Sdim KASSERT((cvp)->cv_mtx == (mp), \ 70243830Sdim ("%s: Multiple mutexes", __func__)); \ 71243830Sdim } \ 72263508Sdim} while (0) 73193323Sed 74193323Sed#define CV_SIGNAL_VALIDATE(cvp) do { \ 75198090Srdivacky if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 76193323Sed KASSERT(mtx_owned((cvp)->cv_mtx), \ 77193323Sed ("%s: Mutex not owned", __func__)); \ 78193323Sed } \ 79193323Sed} while (0) 80193323Sed 81193323Sed#else 82263508Sdim#define CV_WAIT_VALIDATE(cvp, mp) 83193323Sed#define CV_SIGNAL_VALIDATE(cvp) 84193323Sed#endif 85193323Sed 86193323Sedstatic void cv_timedwait_end(void *arg); 87193323Sed 88193323Sed/* 89198090Srdivacky * Initialize a condition variable. Must be called before use. 90193323Sed */ 91193323Sedvoid 92193323Sedcv_init(struct cv *cvp, const char *desc) 93193323Sed{ 94193323Sed 95198090Srdivacky TAILQ_INIT(&cvp->cv_waitq); 96193323Sed cvp->cv_mtx = NULL; 97243830Sdim cvp->cv_description = desc; 98193323Sed} 99193323Sed 100198090Srdivacky/* 101193323Sed * Destroy a condition variable. The condition variable must be re-initialized 102243830Sdim * in order to be re-used. 103193323Sed */ 104193323Sedvoid 105198090Srdivackycv_destroy(struct cv *cvp) 106193323Sed{ 107193323Sed 108193323Sed KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 109193323Sed} 110198090Srdivacky 111193323Sed/* 112193323Sed * Common code for cv_wait* functions. All require sched_lock. 113193323Sed */ 114193323Sed 115198090Srdivacky/* 116193323Sed * Switch context. 117193323Sed */ 118193323Sedstatic __inline void 119193323Sedcv_switch(struct thread *td) 120198090Srdivacky{ 121193323Sed TD_SET_SLEEPING(td); 122193323Sed td->td_proc->p_stats->p_ru.ru_nvcsw++; 123193323Sed mi_switch(); 124193323Sed CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 125198090Srdivacky td->td_proc->p_pid, td->td_proc->p_comm); 126193323Sed} 127193323Sed 128193323Sed/* 129 * Switch context, catching signals. 130 */ 131static __inline int 132cv_switch_catch(struct thread *td) 133{ 134 struct proc *p; 135 int sig; 136 137 /* 138 * We put ourselves on the sleep queue and start our timeout before 139 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 140 * both) could occur while we were stopped. A SIGCONT would cause us to 141 * be marked as TDS_SLP without resuming us, thus we must be ready for 142 * sleep when cursig is called. If the wakeup happens while we're 143 * stopped, td->td_wchan will be 0 upon return from cursig, 144 * and TD_ON_SLEEPQ() will return false. 145 */ 146 td->td_flags |= TDF_SINTR; 147 mtx_unlock_spin(&sched_lock); 148 p = td->td_proc; 149 PROC_LOCK(p); 150 mtx_lock(&p->p_sigacts->ps_mtx); 151 sig = cursig(td); 152 mtx_unlock(&p->p_sigacts->ps_mtx); 153 if (thread_suspend_check(1)) 154 sig = SIGSTOP; 155 mtx_lock_spin(&sched_lock); 156 PROC_UNLOCK(p); 157 if (sig != 0) { 158 if (TD_ON_SLEEPQ(td)) 159 cv_waitq_remove(td); 160 TD_SET_RUNNING(td); 161 } else if (TD_ON_SLEEPQ(td)) { 162 cv_switch(td); 163 } 164 td->td_flags &= ~TDF_SINTR; 165 166 return sig; 167} 168 169/* 170 * Add a thread to the wait queue of a condition variable. 171 */ 172static __inline void 173cv_waitq_add(struct cv *cvp, struct thread *td) 174{ 175 176 td->td_flags |= TDF_CVWAITQ; 177 TD_SET_ON_SLEEPQ(td); 178 td->td_wchan = cvp; 179 td->td_wmesg = cvp->cv_description; 180 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 181 td->td_proc->p_pid, td->td_proc->p_comm); 182 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 183 sched_sleep(td, td->td_priority); 184} 185 186/* 187 * Wait on a condition variable. The current thread is placed on the condition 188 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 189 * condition variable will resume the thread. The mutex is released before 190 * sleeping and will be held on return. It is recommended that the mutex be 191 * held when cv_signal or cv_broadcast are called. 192 */ 193void 194cv_wait(struct cv *cvp, struct mtx *mp) 195{ 196 struct thread *td; 197 WITNESS_SAVE_DECL(mp); 198 199 td = curthread; 200#ifdef KTRACE 201 if (KTRPOINT(td, KTR_CSW)) 202 ktrcsw(1, 0); 203#endif 204 CV_ASSERT(cvp, mp, td); 205 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 206 "Waiting on \"%s\"", cvp->cv_description); 207 WITNESS_SAVE(&mp->mtx_object, mp); 208 209 if (cold ) { 210 /* 211 * During autoconfiguration, just give interrupts 212 * a chance, then just return. Don't run any other 213 * thread or panic below, in case this is the idle 214 * process and already asleep. 215 */ 216 return; 217 } 218 219 mtx_lock_spin(&sched_lock); 220 221 CV_WAIT_VALIDATE(cvp, mp); 222 223 DROP_GIANT(); 224 mtx_unlock(mp); 225 226 cv_waitq_add(cvp, td); 227 cv_switch(td); 228 229 mtx_unlock_spin(&sched_lock); 230#ifdef KTRACE 231 if (KTRPOINT(td, KTR_CSW)) 232 ktrcsw(0, 0); 233#endif 234 PICKUP_GIANT(); 235 mtx_lock(mp); 236 WITNESS_RESTORE(&mp->mtx_object, mp); 237} 238 239/* 240 * Wait on a condition variable, allowing interruption by signals. Return 0 if 241 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 242 * a signal was caught. If ERESTART is returned the system call should be 243 * restarted if possible. 244 */ 245int 246cv_wait_sig(struct cv *cvp, struct mtx *mp) 247{ 248 struct thread *td; 249 struct proc *p; 250 int rval; 251 int sig; 252 WITNESS_SAVE_DECL(mp); 253 254 td = curthread; 255 p = td->td_proc; 256 rval = 0; 257#ifdef KTRACE 258 if (KTRPOINT(td, KTR_CSW)) 259 ktrcsw(1, 0); 260#endif 261 CV_ASSERT(cvp, mp, td); 262 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 263 "Waiting on \"%s\"", cvp->cv_description); 264 WITNESS_SAVE(&mp->mtx_object, mp); 265 266 if (cold || panicstr) { 267 /* 268 * After a panic, or during autoconfiguration, just give 269 * interrupts a chance, then just return; don't run any other 270 * procs or panic below, in case this is the idle process and 271 * already asleep. 272 */ 273 return 0; 274 } 275 276 mtx_lock_spin(&sched_lock); 277 278 CV_WAIT_VALIDATE(cvp, mp); 279 280 DROP_GIANT(); 281 mtx_unlock(mp); 282 283 cv_waitq_add(cvp, td); 284 sig = cv_switch_catch(td); 285 286 mtx_unlock_spin(&sched_lock); 287 288 PROC_LOCK(p); 289 mtx_lock(&p->p_sigacts->ps_mtx); 290 if (sig == 0) { 291 sig = cursig(td); /* XXXKSE */ 292 if (sig == 0 && td->td_flags & TDF_INTERRUPT) 293 rval = td->td_intrval; 294 } 295 if (sig != 0) { 296 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 297 rval = EINTR; 298 else 299 rval = ERESTART; 300 } 301 mtx_unlock(&p->p_sigacts->ps_mtx); 302 if (p->p_flag & P_WEXIT) 303 rval = EINTR; 304 PROC_UNLOCK(p); 305 306#ifdef KTRACE 307 if (KTRPOINT(td, KTR_CSW)) 308 ktrcsw(0, 0); 309#endif 310 PICKUP_GIANT(); 311 mtx_lock(mp); 312 WITNESS_RESTORE(&mp->mtx_object, mp); 313 314 return (rval); 315} 316 317/* 318 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 319 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 320 * expires. 321 */ 322int 323cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 324{ 325 struct thread *td; 326 int rval; 327 WITNESS_SAVE_DECL(mp); 328 329 td = curthread; 330 rval = 0; 331#ifdef KTRACE 332 if (KTRPOINT(td, KTR_CSW)) 333 ktrcsw(1, 0); 334#endif 335 CV_ASSERT(cvp, mp, td); 336 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 337 "Waiting on \"%s\"", cvp->cv_description); 338 WITNESS_SAVE(&mp->mtx_object, mp); 339 340 if (cold || panicstr) { 341 /* 342 * After a panic, or during autoconfiguration, just give 343 * interrupts a chance, then just return; don't run any other 344 * thread or panic below, in case this is the idle process and 345 * already asleep. 346 */ 347 return 0; 348 } 349 350 mtx_lock_spin(&sched_lock); 351 352 CV_WAIT_VALIDATE(cvp, mp); 353 354 DROP_GIANT(); 355 mtx_unlock(mp); 356 357 cv_waitq_add(cvp, td); 358 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 359 cv_switch(td); 360 361 if (td->td_flags & TDF_TIMEOUT) { 362 td->td_flags &= ~TDF_TIMEOUT; 363 rval = EWOULDBLOCK; 364 } else if (td->td_flags & TDF_TIMOFAIL) 365 td->td_flags &= ~TDF_TIMOFAIL; 366 else if (callout_stop(&td->td_slpcallout) == 0) { 367 /* 368 * Work around race with cv_timedwait_end similar to that 369 * between msleep and endtsleep. 370 * Go back to sleep. 371 */ 372 TD_SET_SLEEPING(td); 373 td->td_proc->p_stats->p_ru.ru_nivcsw++; 374 mi_switch(); 375 td->td_flags &= ~TDF_TIMOFAIL; 376 } 377 378 mtx_unlock_spin(&sched_lock); 379#ifdef KTRACE 380 if (KTRPOINT(td, KTR_CSW)) 381 ktrcsw(0, 0); 382#endif 383 PICKUP_GIANT(); 384 mtx_lock(mp); 385 WITNESS_RESTORE(&mp->mtx_object, mp); 386 387 return (rval); 388} 389 390/* 391 * Wait on a condition variable for at most timo/hz seconds, allowing 392 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 393 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 394 * a signal was caught. 395 */ 396int 397cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 398{ 399 struct thread *td; 400 struct proc *p; 401 int rval; 402 int sig; 403 WITNESS_SAVE_DECL(mp); 404 405 td = curthread; 406 p = td->td_proc; 407 rval = 0; 408#ifdef KTRACE 409 if (KTRPOINT(td, KTR_CSW)) 410 ktrcsw(1, 0); 411#endif 412 CV_ASSERT(cvp, mp, td); 413 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 414 "Waiting on \"%s\"", cvp->cv_description); 415 WITNESS_SAVE(&mp->mtx_object, mp); 416 417 if (cold || panicstr) { 418 /* 419 * After a panic, or during autoconfiguration, just give 420 * interrupts a chance, then just return; don't run any other 421 * thread or panic below, in case this is the idle process and 422 * already asleep. 423 */ 424 return 0; 425 } 426 427 mtx_lock_spin(&sched_lock); 428 429 CV_WAIT_VALIDATE(cvp, mp); 430 431 DROP_GIANT(); 432 mtx_unlock(mp); 433 434 cv_waitq_add(cvp, td); 435 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 436 sig = cv_switch_catch(td); 437 438 if (td->td_flags & TDF_TIMEOUT) { 439 td->td_flags &= ~TDF_TIMEOUT; 440 rval = EWOULDBLOCK; 441 } else if (td->td_flags & TDF_TIMOFAIL) 442 td->td_flags &= ~TDF_TIMOFAIL; 443 else if (callout_stop(&td->td_slpcallout) == 0) { 444 /* 445 * Work around race with cv_timedwait_end similar to that 446 * between msleep and endtsleep. 447 * Go back to sleep. 448 */ 449 TD_SET_SLEEPING(td); 450 td->td_proc->p_stats->p_ru.ru_nivcsw++; 451 mi_switch(); 452 td->td_flags &= ~TDF_TIMOFAIL; 453 } 454 mtx_unlock_spin(&sched_lock); 455 456 PROC_LOCK(p); 457 mtx_lock(&p->p_sigacts->ps_mtx); 458 if (sig == 0) { 459 sig = cursig(td); 460 if (sig == 0 && td->td_flags & TDF_INTERRUPT) 461 rval = td->td_intrval; 462 } 463 if (sig != 0) { 464 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 465 rval = EINTR; 466 else 467 rval = ERESTART; 468 } 469 mtx_unlock(&p->p_sigacts->ps_mtx); 470 if (p->p_flag & P_WEXIT) 471 rval = EINTR; 472 PROC_UNLOCK(p); 473 474#ifdef KTRACE 475 if (KTRPOINT(td, KTR_CSW)) 476 ktrcsw(0, 0); 477#endif 478 PICKUP_GIANT(); 479 mtx_lock(mp); 480 WITNESS_RESTORE(&mp->mtx_object, mp); 481 482 return (rval); 483} 484 485/* 486 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 487 * called with sched_lock held. 488 */ 489static __inline void 490cv_wakeup(struct cv *cvp) 491{ 492 struct thread *td; 493 494 mtx_assert(&sched_lock, MA_OWNED); 495 td = TAILQ_FIRST(&cvp->cv_waitq); 496 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 497 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 498 cv_waitq_remove(td); 499 TD_CLR_SLEEPING(td); 500 setrunnable(td); 501} 502 503/* 504 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 505 * the swapper if the process is not in memory, so that it can bring the 506 * sleeping process in. Note that this may also result in additional threads 507 * being made runnable. Should be called with the same mutex as was passed to 508 * cv_wait held. 509 */ 510void 511cv_signal(struct cv *cvp) 512{ 513 514 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 515 mtx_lock_spin(&sched_lock); 516 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 517 CV_SIGNAL_VALIDATE(cvp); 518 cv_wakeup(cvp); 519 } 520 mtx_unlock_spin(&sched_lock); 521} 522 523/* 524 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 525 * Should be called with the same mutex as was passed to cv_wait held. 526 */ 527void 528cv_broadcastpri(struct cv *cvp, int pri) 529{ 530 struct thread *td; 531 532 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 533 mtx_lock_spin(&sched_lock); 534 CV_SIGNAL_VALIDATE(cvp); 535 while (!TAILQ_EMPTY(&cvp->cv_waitq)) { 536 if (pri >= PRI_MIN && pri <= PRI_MAX) { 537 td = TAILQ_FIRST(&cvp->cv_waitq); 538 if (td->td_priority > pri) 539 td->td_priority = pri; 540 } 541 cv_wakeup(cvp); 542 } 543 mtx_unlock_spin(&sched_lock); 544} 545 546/* 547 * Remove a thread from the wait queue of its condition variable. This may be 548 * called externally. 549 */ 550void 551cv_waitq_remove(struct thread *td) 552{ 553 struct cv *cvp; 554 555 mtx_assert(&sched_lock, MA_OWNED); 556 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 557 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 558 td->td_flags &= ~TDF_CVWAITQ; 559 td->td_wmesg = NULL; 560 TD_CLR_ON_SLEEPQ(td); 561 } 562} 563 564/* 565 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 566 * its timeout flag. 567 */ 568static void 569cv_timedwait_end(void *arg) 570{ 571 struct thread *td; 572 573 td = arg; 574 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", 575 td, td->td_proc->p_pid, td->td_proc->p_comm); 576 mtx_lock_spin(&sched_lock); 577 if (TD_ON_SLEEPQ(td)) { 578 cv_waitq_remove(td); 579 td->td_flags |= TDF_TIMEOUT; 580 } else { 581 td->td_flags |= TDF_TIMOFAIL; 582 } 583 TD_CLR_SLEEPING(td); 584 setrunnable(td); 585 mtx_unlock_spin(&sched_lock); 586} 587 588/* 589 * For now only abort interruptable waits. 590 * The others will have to either complete on their own or have a timeout. 591 */ 592void 593cv_abort(struct thread *td) 594{ 595 596 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 597 td->td_proc->p_pid, td->td_proc->p_comm); 598 mtx_lock_spin(&sched_lock); 599 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 600 if (TD_ON_SLEEPQ(td)) { 601 cv_waitq_remove(td); 602 } 603 TD_CLR_SLEEPING(td); 604 setrunnable(td); 605 } 606 mtx_unlock_spin(&sched_lock); 607} 608 609