kern_condvar.c revision 102544
1/*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/kern_condvar.c 102544 2002-08-28 23:45:15Z peter $ 27 */ 28 29#include "opt_ktrace.h" 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/kernel.h> 37#include <sys/ktr.h> 38#include <sys/condvar.h> 39#include <sys/signalvar.h> 40#include <sys/resourcevar.h> 41#ifdef KTRACE 42#include <sys/uio.h> 43#include <sys/ktrace.h> 44#endif 45 46/* 47 * Common sanity checks for cv_wait* functions. 48 */ 49#define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55} while (0) 56 57#ifdef INVARIANTS 58#define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70} while (0) 71#define CV_SIGNAL_VALIDATE(cvp) do { \ 72 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 73 KASSERT(mtx_owned((cvp)->cv_mtx), \ 74 ("%s: Mutex not owned", __func__)); \ 75 } \ 76} while (0) 77#else 78#define CV_WAIT_VALIDATE(cvp, mp) 79#define CV_SIGNAL_VALIDATE(cvp) 80#endif 81 82static void cv_timedwait_end(void *arg); 83static void cv_check_upcall(struct thread *td); 84 85/* 86 * Initialize a condition variable. Must be called before use. 87 */ 88void 89cv_init(struct cv *cvp, const char *desc) 90{ 91 92 TAILQ_INIT(&cvp->cv_waitq); 93 cvp->cv_mtx = NULL; 94 cvp->cv_description = desc; 95} 96 97/* 98 * Destroy a condition variable. The condition variable must be re-initialized 99 * in order to be re-used. 100 */ 101void 102cv_destroy(struct cv *cvp) 103{ 104 105 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 106} 107 108/* 109 * Common code for cv_wait* functions. All require sched_lock. 110 */ 111 112/* 113 * Decide if we need to queue an upcall. 114 * This is copied from msleep(), perhaps this should be a common function. 115 */ 116static void 117cv_check_upcall(struct thread *td) 118{ 119 120 /* 121 * If we are capable of async syscalls and there isn't already 122 * another one ready to return, start a new thread 123 * and queue it as ready to run. Note that there is danger here 124 * because we need to make sure that we don't sleep allocating 125 * the thread (recursion here might be bad). 126 * Hence the TDF_INMSLEEP flag. 127 */ 128 if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && 129 (td->td_flags & TDF_INMSLEEP) == 0) { 130 /* 131 * If we have no queued work to do, 132 * upcall to the UTS to see if it has more work. 133 * We don't need to upcall now, just queue it. 134 */ 135 if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { 136 /* Don't recurse here! */ 137 td->td_flags |= TDF_INMSLEEP; 138 thread_schedule_upcall(td, td->td_kse); 139 td->td_flags &= ~TDF_INMSLEEP; 140 } 141 } 142} 143 144/* 145 * Switch context. 146 */ 147static __inline void 148cv_switch(struct thread *td) 149{ 150 151 td->td_state = TDS_SLP; 152 td->td_proc->p_stats->p_ru.ru_nvcsw++; 153 cv_check_upcall(td); 154 mi_switch(); 155 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 156 td->td_proc->p_pid, td->td_proc->p_comm); 157} 158 159/* 160 * Switch context, catching signals. 161 */ 162static __inline int 163cv_switch_catch(struct thread *td) 164{ 165 struct proc *p; 166 int sig; 167 168 /* 169 * We put ourselves on the sleep queue and start our timeout before 170 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 171 * both) could occur while we were stopped. A SIGCONT would cause us to 172 * be marked as TDS_SLP without resuming us, thus we must be ready for 173 * sleep when cursig is called. If the wakeup happens while we're 174 * stopped, td->td_wchan will be 0 upon return from cursig. 175 */ 176 td->td_flags |= TDF_SINTR; 177 mtx_unlock_spin(&sched_lock); 178 p = td->td_proc; 179 PROC_LOCK(p); 180 sig = cursig(td); /* XXXKSE */ 181 if (thread_suspend_check(1)) 182 sig = SIGSTOP; 183 mtx_lock_spin(&sched_lock); 184 PROC_UNLOCK(p); 185 if (sig != 0) { 186 if (td->td_wchan != NULL) 187 cv_waitq_remove(td); 188 td->td_state = TDS_RUNNING; /* XXXKSE */ 189 } else if (td->td_wchan != NULL) { 190 cv_switch(td); 191 } 192 td->td_flags &= ~TDF_SINTR; 193 194 return sig; 195} 196 197/* 198 * Add a thread to the wait queue of a condition variable. 199 */ 200static __inline void 201cv_waitq_add(struct cv *cvp, struct thread *td) 202{ 203 204 td->td_flags |= TDF_CVWAITQ; 205 td->td_wchan = cvp; 206 td->td_wmesg = cvp->cv_description; 207 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 208 td->td_base_pri = td->td_priority; 209 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 210 td->td_proc->p_pid, td->td_proc->p_comm); 211 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 212} 213 214/* 215 * Wait on a condition variable. The current thread is placed on the condition 216 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 217 * condition variable will resume the thread. The mutex is released before 218 * sleeping and will be held on return. It is recommended that the mutex be 219 * held when cv_signal or cv_broadcast are called. 220 */ 221void 222cv_wait(struct cv *cvp, struct mtx *mp) 223{ 224 struct thread *td; 225 WITNESS_SAVE_DECL(mp); 226 227 td = curthread; 228#ifdef KTRACE 229 if (KTRPOINT(td, KTR_CSW)) 230 ktrcsw(1, 0); 231#endif 232 CV_ASSERT(cvp, mp, td); 233 WITNESS_SLEEP(0, &mp->mtx_object); 234 WITNESS_SAVE(&mp->mtx_object, mp); 235 236 if (cold ) { 237 /* 238 * During autoconfiguration, just give interrupts 239 * a chance, then just return. Don't run any other 240 * thread or panic below, in case this is the idle 241 * process and already asleep. 242 */ 243 return; 244 } 245 246 mtx_lock_spin(&sched_lock); 247 248 CV_WAIT_VALIDATE(cvp, mp); 249 250 DROP_GIANT(); 251 mtx_unlock(mp); 252 253 cv_waitq_add(cvp, td); 254 cv_switch(td); 255 256 mtx_unlock_spin(&sched_lock); 257#ifdef KTRACE 258 if (KTRPOINT(td, KTR_CSW)) 259 ktrcsw(0, 0); 260#endif 261 PICKUP_GIANT(); 262 mtx_lock(mp); 263 WITNESS_RESTORE(&mp->mtx_object, mp); 264} 265 266/* 267 * Wait on a condition variable, allowing interruption by signals. Return 0 if 268 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 269 * a signal was caught. If ERESTART is returned the system call should be 270 * restarted if possible. 271 */ 272int 273cv_wait_sig(struct cv *cvp, struct mtx *mp) 274{ 275 struct thread *td; 276 struct proc *p; 277 int rval; 278 int sig; 279 WITNESS_SAVE_DECL(mp); 280 281 td = curthread; 282 p = td->td_proc; 283 rval = 0; 284#ifdef KTRACE 285 if (KTRPOINT(td, KTR_CSW)) 286 ktrcsw(1, 0); 287#endif 288 CV_ASSERT(cvp, mp, td); 289 WITNESS_SLEEP(0, &mp->mtx_object); 290 WITNESS_SAVE(&mp->mtx_object, mp); 291 292 if (cold || panicstr) { 293 /* 294 * After a panic, or during autoconfiguration, just give 295 * interrupts a chance, then just return; don't run any other 296 * procs or panic below, in case this is the idle process and 297 * already asleep. 298 */ 299 return 0; 300 } 301 302 mtx_lock_spin(&sched_lock); 303 304 CV_WAIT_VALIDATE(cvp, mp); 305 306 DROP_GIANT(); 307 mtx_unlock(mp); 308 309 cv_waitq_add(cvp, td); 310 sig = cv_switch_catch(td); 311 312 mtx_unlock_spin(&sched_lock); 313 314 PROC_LOCK(p); 315 if (sig == 0) 316 sig = cursig(td); /* XXXKSE */ 317 if (sig != 0) { 318 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 319 rval = EINTR; 320 else 321 rval = ERESTART; 322 } 323 PROC_UNLOCK(p); 324 if (p->p_flag & P_WEXIT) 325 rval = EINTR; 326 327#ifdef KTRACE 328 if (KTRPOINT(td, KTR_CSW)) 329 ktrcsw(0, 0); 330#endif 331 PICKUP_GIANT(); 332 mtx_lock(mp); 333 WITNESS_RESTORE(&mp->mtx_object, mp); 334 335 return (rval); 336} 337 338/* 339 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 340 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 341 * expires. 342 */ 343int 344cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 345{ 346 struct thread *td; 347 int rval; 348 WITNESS_SAVE_DECL(mp); 349 350 td = curthread; 351 rval = 0; 352#ifdef KTRACE 353 if (KTRPOINT(td, KTR_CSW)) 354 ktrcsw(1, 0); 355#endif 356 CV_ASSERT(cvp, mp, td); 357 WITNESS_SLEEP(0, &mp->mtx_object); 358 WITNESS_SAVE(&mp->mtx_object, mp); 359 360 if (cold || panicstr) { 361 /* 362 * After a panic, or during autoconfiguration, just give 363 * interrupts a chance, then just return; don't run any other 364 * thread or panic below, in case this is the idle process and 365 * already asleep. 366 */ 367 return 0; 368 } 369 370 mtx_lock_spin(&sched_lock); 371 372 CV_WAIT_VALIDATE(cvp, mp); 373 374 DROP_GIANT(); 375 mtx_unlock(mp); 376 377 cv_waitq_add(cvp, td); 378 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 379 cv_switch(td); 380 381 if (td->td_flags & TDF_TIMEOUT) { 382 td->td_flags &= ~TDF_TIMEOUT; 383 rval = EWOULDBLOCK; 384 } else if (td->td_flags & TDF_TIMOFAIL) 385 td->td_flags &= ~TDF_TIMOFAIL; 386 else if (callout_stop(&td->td_slpcallout) == 0) { 387 /* 388 * Work around race with cv_timedwait_end similar to that 389 * between msleep and endtsleep. 390 * Go back to sleep. 391 */ 392 td->td_flags |= TDF_TIMEOUT; 393 td->td_state = TDS_SLP; 394 td->td_proc->p_stats->p_ru.ru_nivcsw++; 395 mi_switch(); 396 } 397 398 if (td->td_proc->p_flag & P_WEXIT) 399 rval = EWOULDBLOCK; 400 mtx_unlock_spin(&sched_lock); 401#ifdef KTRACE 402 if (KTRPOINT(td, KTR_CSW)) 403 ktrcsw(0, 0); 404#endif 405 PICKUP_GIANT(); 406 mtx_lock(mp); 407 WITNESS_RESTORE(&mp->mtx_object, mp); 408 409 return (rval); 410} 411 412/* 413 * Wait on a condition variable for at most timo/hz seconds, allowing 414 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 415 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 416 * a signal was caught. 417 */ 418int 419cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 420{ 421 struct thread *td; 422 struct proc *p; 423 int rval; 424 int sig; 425 WITNESS_SAVE_DECL(mp); 426 427 td = curthread; 428 p = td->td_proc; 429 rval = 0; 430#ifdef KTRACE 431 if (KTRPOINT(td, KTR_CSW)) 432 ktrcsw(1, 0); 433#endif 434 CV_ASSERT(cvp, mp, td); 435 WITNESS_SLEEP(0, &mp->mtx_object); 436 WITNESS_SAVE(&mp->mtx_object, mp); 437 438 if (cold || panicstr) { 439 /* 440 * After a panic, or during autoconfiguration, just give 441 * interrupts a chance, then just return; don't run any other 442 * thread or panic below, in case this is the idle process and 443 * already asleep. 444 */ 445 return 0; 446 } 447 448 mtx_lock_spin(&sched_lock); 449 450 CV_WAIT_VALIDATE(cvp, mp); 451 452 DROP_GIANT(); 453 mtx_unlock(mp); 454 455 cv_waitq_add(cvp, td); 456 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 457 sig = cv_switch_catch(td); 458 459 if (td->td_flags & TDF_TIMEOUT) { 460 td->td_flags &= ~TDF_TIMEOUT; 461 rval = EWOULDBLOCK; 462 } else if (td->td_flags & TDF_TIMOFAIL) 463 td->td_flags &= ~TDF_TIMOFAIL; 464 else if (callout_stop(&td->td_slpcallout) == 0) { 465 /* 466 * Work around race with cv_timedwait_end similar to that 467 * between msleep and endtsleep. 468 * Go back to sleep. 469 */ 470 td->td_flags |= TDF_TIMEOUT; 471 td->td_state = TDS_SLP; 472 td->td_proc->p_stats->p_ru.ru_nivcsw++; 473 mi_switch(); 474 } 475 mtx_unlock_spin(&sched_lock); 476 477 PROC_LOCK(p); 478 if (sig == 0) 479 sig = cursig(td); 480 if (sig != 0) { 481 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 482 rval = EINTR; 483 else 484 rval = ERESTART; 485 } 486 PROC_UNLOCK(p); 487 488 if (p->p_flag & P_WEXIT) 489 rval = EINTR; 490 491#ifdef KTRACE 492 if (KTRPOINT(td, KTR_CSW)) 493 ktrcsw(0, 0); 494#endif 495 PICKUP_GIANT(); 496 mtx_lock(mp); 497 WITNESS_RESTORE(&mp->mtx_object, mp); 498 499 return (rval); 500} 501 502/* 503 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 504 * called with sched_lock held. 505 */ 506static __inline void 507cv_wakeup(struct cv *cvp) 508{ 509 struct thread *td; 510 struct ksegrp *kg; 511 512 mtx_assert(&sched_lock, MA_OWNED); 513 td = TAILQ_FIRST(&cvp->cv_waitq); 514 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 515 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 516 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 517 td->td_flags &= ~TDF_CVWAITQ; 518 td->td_wchan = 0; 519 if (td->td_state == TDS_SLP) { 520 /* OPTIMIZED EXPANSION OF setrunnable(td); */ 521 CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)", 522 td, td->td_proc->p_pid, td->td_proc->p_comm); 523 kg = td->td_ksegrp; 524 if (kg->kg_slptime > 1) /* XXXKSE */ 525 updatepri(kg); 526 kg->kg_slptime = 0; 527 if (td->td_proc->p_sflag & PS_INMEM) { 528 setrunqueue(td); 529 maybe_resched(td); 530 } else { 531 td->td_state = TDS_SWAPPED; 532 if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) { 533 td->td_proc->p_sflag |= PS_SWAPINREQ; 534 wakeup(&proc0); 535 } 536 } 537 /* END INLINE EXPANSION */ 538 } 539} 540 541/* 542 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 543 * the swapper if the process is not in memory, so that it can bring the 544 * sleeping process in. Note that this may also result in additional threads 545 * being made runnable. Should be called with the same mutex as was passed to 546 * cv_wait held. 547 */ 548void 549cv_signal(struct cv *cvp) 550{ 551 552 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 553 mtx_lock_spin(&sched_lock); 554 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 555 CV_SIGNAL_VALIDATE(cvp); 556 cv_wakeup(cvp); 557 } 558 mtx_unlock_spin(&sched_lock); 559} 560 561/* 562 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 563 * Should be called with the same mutex as was passed to cv_wait held. 564 */ 565void 566cv_broadcast(struct cv *cvp) 567{ 568 569 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 570 mtx_lock_spin(&sched_lock); 571 CV_SIGNAL_VALIDATE(cvp); 572 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 573 cv_wakeup(cvp); 574 mtx_unlock_spin(&sched_lock); 575} 576 577/* 578 * Remove a thread from the wait queue of its condition variable. This may be 579 * called externally. 580 */ 581void 582cv_waitq_remove(struct thread *td) 583{ 584 struct cv *cvp; 585 586 mtx_lock_spin(&sched_lock); 587 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 588 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 589 td->td_flags &= ~TDF_CVWAITQ; 590 td->td_wchan = NULL; 591 } 592 mtx_unlock_spin(&sched_lock); 593} 594 595/* 596 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 597 * its timeout flag. 598 */ 599static void 600cv_timedwait_end(void *arg) 601{ 602 struct thread *td; 603 604 td = arg; 605 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid, 606 td->td_proc->p_comm); 607 mtx_lock_spin(&sched_lock); 608 if (td->td_flags & TDF_TIMEOUT) { 609 td->td_flags &= ~TDF_TIMEOUT; 610 if (td->td_proc->p_sflag & PS_INMEM) { 611 setrunqueue(td); 612 maybe_resched(td); 613 } else { 614 td->td_state = TDS_SWAPPED; 615 if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) { 616 td->td_proc->p_sflag |= PS_SWAPINREQ; 617 wakeup(&proc0); 618 } 619 } 620 } else if (td->td_wchan != NULL) { 621 if (td->td_state == TDS_SLP) /* XXXKSE */ 622 setrunnable(td); 623 else 624 cv_waitq_remove(td); 625 td->td_flags |= TDF_TIMEOUT; 626 } else 627 td->td_flags |= TDF_TIMOFAIL; 628 mtx_unlock_spin(&sched_lock); 629} 630 631/* 632 * For now only abort interruptable waits. 633 * The others will have to either complete on their own or have a timeout. 634 */ 635void 636cv_abort(struct thread *td) 637{ 638 639 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 640 td->td_proc->p_pid, 641 td->td_proc->p_comm); 642 mtx_lock_spin(&sched_lock); 643 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 644 if (td->td_wchan != NULL) { 645 if (td->td_state == TDS_SLP) 646 setrunnable(td); 647 else 648 cv_waitq_remove(td); 649 } 650 } 651 mtx_unlock_spin(&sched_lock); 652} 653 654