kern_condvar.c revision 100913
1/*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/kern_condvar.c 100913 2002-07-30 06:54:05Z tanimura $ 27 */ 28 29#include "opt_ktrace.h" 30 31#include <sys/param.h> 32#include <sys/systm.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/kernel.h> 37#include <sys/ktr.h> 38#include <sys/condvar.h> 39#include <sys/signalvar.h> 40#include <sys/resourcevar.h> 41#ifdef KTRACE 42#include <sys/uio.h> 43#include <sys/ktrace.h> 44#endif 45 46/* 47 * Common sanity checks for cv_wait* functions. 48 */ 49#define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55} while (0) 56 57#ifdef INVARIANTS 58#define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70} while (0) 71#define CV_SIGNAL_VALIDATE(cvp) do { \ 72 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 73 KASSERT(mtx_owned((cvp)->cv_mtx), \ 74 ("%s: Mutex not owned", __func__)); \ 75 } \ 76} while (0) 77#else 78#define CV_WAIT_VALIDATE(cvp, mp) 79#define CV_SIGNAL_VALIDATE(cvp) 80#endif 81 82static void cv_timedwait_end(void *arg); 83static void cv_check_upcall(struct thread *td); 84 85/* 86 * Initialize a condition variable. Must be called before use. 87 */ 88void 89cv_init(struct cv *cvp, const char *desc) 90{ 91 92 TAILQ_INIT(&cvp->cv_waitq); 93 cvp->cv_mtx = NULL; 94 cvp->cv_description = desc; 95} 96 97/* 98 * Destroy a condition variable. The condition variable must be re-initialized 99 * in order to be re-used. 100 */ 101void 102cv_destroy(struct cv *cvp) 103{ 104 105 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 106} 107 108/* 109 * Common code for cv_wait* functions. All require sched_lock. 110 */ 111 112/* 113 * Decide if we need to queue an upcall. 114 * This is copied from msleep(), perhaps this should be a common function. 115 */ 116static void 117cv_check_upcall(struct thread *td) 118{ 119 120 /* 121 * If we are capable of async syscalls and there isn't already 122 * another one ready to return, start a new thread 123 * and queue it as ready to run. Note that there is danger here 124 * because we need to make sure that we don't sleep allocating 125 * the thread (recursion here might be bad). 126 * Hence the TDF_INMSLEEP flag. 127 */ 128 if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && 129 (td->td_flags & TDF_INMSLEEP) == 0) { 130 /* 131 * If we have no queued work to do, 132 * upcall to the UTS to see if it has more work. 133 * We don't need to upcall now, just queue it. 134 */ 135 if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { 136 /* Don't recurse here! */ 137 td->td_flags |= TDF_INMSLEEP; 138 thread_schedule_upcall(td, td->td_kse); 139 td->td_flags &= ~TDF_INMSLEEP; 140 } 141 } 142} 143 144/* 145 * Switch context. 146 */ 147static __inline void 148cv_switch(struct thread *td) 149{ 150 151 td->td_state = TDS_SLP; 152 td->td_proc->p_stats->p_ru.ru_nvcsw++; 153 cv_check_upcall(td); 154 mi_switch(); 155 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 156 td->td_proc->p_pid, td->td_proc->p_comm); 157} 158 159/* 160 * Switch context, catching signals. 161 */ 162static __inline int 163cv_switch_catch(struct thread *td) 164{ 165 struct proc *p; 166 int sig; 167 168 /* 169 * We put ourselves on the sleep queue and start our timeout before 170 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 171 * both) could occur while we were stopped. A SIGCONT would cause us to 172 * be marked as TDS_SLP without resuming us, thus we must be ready for 173 * sleep when cursig is called. If the wakeup happens while we're 174 * stopped, td->td_wchan will be 0 upon return from cursig. 175 */ 176 td->td_flags |= TDF_SINTR; 177 mtx_unlock_spin(&sched_lock); 178 p = td->td_proc; 179 PROC_LOCK(p); 180 sig = cursig(td); /* XXXKSE */ 181 if (thread_suspend_check(1)) 182 sig = SIGSTOP; 183 mtx_lock_spin(&sched_lock); 184 PROC_UNLOCK(p); 185 if (sig != 0) { 186 if (td->td_wchan != NULL) 187 cv_waitq_remove(td); 188 td->td_state = TDS_RUNNING; /* XXXKSE */ 189 } else if (td->td_wchan != NULL) { 190 cv_switch(td); 191 } 192 td->td_flags &= ~TDF_SINTR; 193 194 return sig; 195} 196 197/* 198 * Add a thread to the wait queue of a condition variable. 199 */ 200static __inline void 201cv_waitq_add(struct cv *cvp, struct thread *td) 202{ 203 204 /* 205 * Process may be sitting on a slpque if asleep() was called, remove it 206 * before re-adding. 207 */ 208 if (td->td_wchan != NULL) 209 unsleep(td); 210 211 td->td_flags |= TDF_CVWAITQ; 212 td->td_wchan = cvp; 213 td->td_wmesg = cvp->cv_description; 214 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 215 td->td_base_pri = td->td_priority; 216 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 217 td->td_proc->p_pid, td->td_proc->p_comm); 218 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 219} 220 221/* 222 * Wait on a condition variable. The current thread is placed on the condition 223 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 224 * condition variable will resume the thread. The mutex is released before 225 * sleeping and will be held on return. It is recommended that the mutex be 226 * held when cv_signal or cv_broadcast are called. 227 */ 228void 229cv_wait(struct cv *cvp, struct mtx *mp) 230{ 231 struct thread *td; 232 WITNESS_SAVE_DECL(mp); 233 234 td = curthread; 235#ifdef KTRACE 236 if (KTRPOINT(td, KTR_CSW)) 237 ktrcsw(1, 0); 238#endif 239 CV_ASSERT(cvp, mp, td); 240 WITNESS_SLEEP(0, &mp->mtx_object); 241 WITNESS_SAVE(&mp->mtx_object, mp); 242 243 if (cold ) { 244 /* 245 * During autoconfiguration, just give interrupts 246 * a chance, then just return. Don't run any other 247 * thread or panic below, in case this is the idle 248 * process and already asleep. 249 */ 250 return; 251 } 252 253 mtx_lock_spin(&sched_lock); 254 255 CV_WAIT_VALIDATE(cvp, mp); 256 257 DROP_GIANT(); 258 mtx_unlock(mp); 259 260 cv_waitq_add(cvp, td); 261 cv_switch(td); 262 263 mtx_unlock_spin(&sched_lock); 264#ifdef KTRACE 265 if (KTRPOINT(td, KTR_CSW)) 266 ktrcsw(0, 0); 267#endif 268 PICKUP_GIANT(); 269 mtx_lock(mp); 270 WITNESS_RESTORE(&mp->mtx_object, mp); 271} 272 273/* 274 * Wait on a condition variable, allowing interruption by signals. Return 0 if 275 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 276 * a signal was caught. If ERESTART is returned the system call should be 277 * restarted if possible. 278 */ 279int 280cv_wait_sig(struct cv *cvp, struct mtx *mp) 281{ 282 struct thread *td; 283 struct proc *p; 284 int rval; 285 int sig; 286 WITNESS_SAVE_DECL(mp); 287 288 td = curthread; 289 p = td->td_proc; 290 rval = 0; 291#ifdef KTRACE 292 if (KTRPOINT(td, KTR_CSW)) 293 ktrcsw(1, 0); 294#endif 295 CV_ASSERT(cvp, mp, td); 296 WITNESS_SLEEP(0, &mp->mtx_object); 297 WITNESS_SAVE(&mp->mtx_object, mp); 298 299 if (cold || panicstr) { 300 /* 301 * After a panic, or during autoconfiguration, just give 302 * interrupts a chance, then just return; don't run any other 303 * procs or panic below, in case this is the idle process and 304 * already asleep. 305 */ 306 return 0; 307 } 308 309 mtx_lock_spin(&sched_lock); 310 311 CV_WAIT_VALIDATE(cvp, mp); 312 313 DROP_GIANT(); 314 mtx_unlock(mp); 315 316 cv_waitq_add(cvp, td); 317 sig = cv_switch_catch(td); 318 319 mtx_unlock_spin(&sched_lock); 320 321 PROC_LOCK(p); 322 if (sig == 0) 323 sig = cursig(td); /* XXXKSE */ 324 if (sig != 0) { 325 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 326 rval = EINTR; 327 else 328 rval = ERESTART; 329 } 330 PROC_UNLOCK(p); 331 if (p->p_flag & P_WEXIT) 332 rval = EINTR; 333 334#ifdef KTRACE 335 if (KTRPOINT(td, KTR_CSW)) 336 ktrcsw(0, 0); 337#endif 338 PICKUP_GIANT(); 339 mtx_lock(mp); 340 WITNESS_RESTORE(&mp->mtx_object, mp); 341 342 return (rval); 343} 344 345/* 346 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 347 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 348 * expires. 349 */ 350int 351cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 352{ 353 struct thread *td; 354 int rval; 355 WITNESS_SAVE_DECL(mp); 356 357 td = curthread; 358 rval = 0; 359#ifdef KTRACE 360 if (KTRPOINT(td, KTR_CSW)) 361 ktrcsw(1, 0); 362#endif 363 CV_ASSERT(cvp, mp, td); 364 WITNESS_SLEEP(0, &mp->mtx_object); 365 WITNESS_SAVE(&mp->mtx_object, mp); 366 367 if (cold || panicstr) { 368 /* 369 * After a panic, or during autoconfiguration, just give 370 * interrupts a chance, then just return; don't run any other 371 * thread or panic below, in case this is the idle process and 372 * already asleep. 373 */ 374 return 0; 375 } 376 377 mtx_lock_spin(&sched_lock); 378 379 CV_WAIT_VALIDATE(cvp, mp); 380 381 DROP_GIANT(); 382 mtx_unlock(mp); 383 384 cv_waitq_add(cvp, td); 385 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 386 cv_switch(td); 387 388 if (td->td_flags & TDF_TIMEOUT) { 389 td->td_flags &= ~TDF_TIMEOUT; 390 rval = EWOULDBLOCK; 391 } else if (td->td_flags & TDF_TIMOFAIL) 392 td->td_flags &= ~TDF_TIMOFAIL; 393 else if (callout_stop(&td->td_slpcallout) == 0) { 394 /* 395 * Work around race with cv_timedwait_end similar to that 396 * between msleep and endtsleep. 397 * Go back to sleep. 398 */ 399 td->td_flags |= TDF_TIMEOUT; 400 td->td_state = TDS_SLP; 401 td->td_proc->p_stats->p_ru.ru_nivcsw++; 402 mi_switch(); 403 } 404 405 if (td->td_proc->p_flag & P_WEXIT) 406 rval = EWOULDBLOCK; 407 mtx_unlock_spin(&sched_lock); 408#ifdef KTRACE 409 if (KTRPOINT(td, KTR_CSW)) 410 ktrcsw(0, 0); 411#endif 412 PICKUP_GIANT(); 413 mtx_lock(mp); 414 WITNESS_RESTORE(&mp->mtx_object, mp); 415 416 return (rval); 417} 418 419/* 420 * Wait on a condition variable for at most timo/hz seconds, allowing 421 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 422 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 423 * a signal was caught. 424 */ 425int 426cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 427{ 428 struct thread *td; 429 struct proc *p; 430 int rval; 431 int sig; 432 WITNESS_SAVE_DECL(mp); 433 434 td = curthread; 435 p = td->td_proc; 436 rval = 0; 437#ifdef KTRACE 438 if (KTRPOINT(td, KTR_CSW)) 439 ktrcsw(1, 0); 440#endif 441 CV_ASSERT(cvp, mp, td); 442 WITNESS_SLEEP(0, &mp->mtx_object); 443 WITNESS_SAVE(&mp->mtx_object, mp); 444 445 if (cold || panicstr) { 446 /* 447 * After a panic, or during autoconfiguration, just give 448 * interrupts a chance, then just return; don't run any other 449 * thread or panic below, in case this is the idle process and 450 * already asleep. 451 */ 452 return 0; 453 } 454 455 mtx_lock_spin(&sched_lock); 456 457 CV_WAIT_VALIDATE(cvp, mp); 458 459 DROP_GIANT(); 460 mtx_unlock(mp); 461 462 cv_waitq_add(cvp, td); 463 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 464 sig = cv_switch_catch(td); 465 466 if (td->td_flags & TDF_TIMEOUT) { 467 td->td_flags &= ~TDF_TIMEOUT; 468 rval = EWOULDBLOCK; 469 } else if (td->td_flags & TDF_TIMOFAIL) 470 td->td_flags &= ~TDF_TIMOFAIL; 471 else if (callout_stop(&td->td_slpcallout) == 0) { 472 /* 473 * Work around race with cv_timedwait_end similar to that 474 * between msleep and endtsleep. 475 * Go back to sleep. 476 */ 477 td->td_flags |= TDF_TIMEOUT; 478 td->td_state = TDS_SLP; 479 td->td_proc->p_stats->p_ru.ru_nivcsw++; 480 mi_switch(); 481 } 482 mtx_unlock_spin(&sched_lock); 483 484 PROC_LOCK(p); 485 if (sig == 0) 486 sig = cursig(td); 487 if (sig != 0) { 488 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 489 rval = EINTR; 490 else 491 rval = ERESTART; 492 } 493 PROC_UNLOCK(p); 494 495 if (p->p_flag & P_WEXIT) 496 rval = EINTR; 497 498#ifdef KTRACE 499 if (KTRPOINT(td, KTR_CSW)) 500 ktrcsw(0, 0); 501#endif 502 PICKUP_GIANT(); 503 mtx_lock(mp); 504 WITNESS_RESTORE(&mp->mtx_object, mp); 505 506 return (rval); 507} 508 509/* 510 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 511 * called with sched_lock held. 512 */ 513static __inline void 514cv_wakeup(struct cv *cvp) 515{ 516 struct thread *td; 517 518 mtx_assert(&sched_lock, MA_OWNED); 519 td = TAILQ_FIRST(&cvp->cv_waitq); 520 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 521 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 522 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 523 td->td_flags &= ~TDF_CVWAITQ; 524 td->td_wchan = 0; 525 if (td->td_state == TDS_SLP) { 526 /* OPTIMIZED EXPANSION OF setrunnable(td); */ 527 CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)", 528 td, td->td_proc->p_pid, td->td_proc->p_comm); 529 if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */ 530 updatepri(td); 531 td->td_ksegrp->kg_slptime = 0; 532 if (td->td_proc->p_sflag & PS_INMEM) { 533 setrunqueue(td); 534 maybe_resched(td); 535 } else { 536 td->td_state = TDS_SWAPPED; 537 if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) { 538 td->td_proc->p_sflag |= PS_SWAPINREQ; 539 wakeup(&proc0); 540 } 541 } 542 /* END INLINE EXPANSION */ 543 } 544} 545 546/* 547 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 548 * the swapper if the process is not in memory, so that it can bring the 549 * sleeping process in. Note that this may also result in additional threads 550 * being made runnable. Should be called with the same mutex as was passed to 551 * cv_wait held. 552 */ 553void 554cv_signal(struct cv *cvp) 555{ 556 557 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 558 mtx_lock_spin(&sched_lock); 559 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 560 CV_SIGNAL_VALIDATE(cvp); 561 cv_wakeup(cvp); 562 } 563 mtx_unlock_spin(&sched_lock); 564} 565 566/* 567 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 568 * Should be called with the same mutex as was passed to cv_wait held. 569 */ 570void 571cv_broadcast(struct cv *cvp) 572{ 573 574 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 575 mtx_lock_spin(&sched_lock); 576 CV_SIGNAL_VALIDATE(cvp); 577 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 578 cv_wakeup(cvp); 579 mtx_unlock_spin(&sched_lock); 580} 581 582/* 583 * Remove a thread from the wait queue of its condition variable. This may be 584 * called externally. 585 */ 586void 587cv_waitq_remove(struct thread *td) 588{ 589 struct cv *cvp; 590 591 mtx_lock_spin(&sched_lock); 592 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 593 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 594 td->td_flags &= ~TDF_CVWAITQ; 595 td->td_wchan = NULL; 596 } 597 mtx_unlock_spin(&sched_lock); 598} 599 600/* 601 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 602 * its timeout flag. 603 */ 604static void 605cv_timedwait_end(void *arg) 606{ 607 struct thread *td; 608 609 td = arg; 610 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid, 611 td->td_proc->p_comm); 612 mtx_lock_spin(&sched_lock); 613 if (td->td_flags & TDF_TIMEOUT) { 614 td->td_flags &= ~TDF_TIMEOUT; 615 setrunqueue(td); 616 } else if (td->td_wchan != NULL) { 617 if (td->td_state == TDS_SLP) /* XXXKSE */ 618 setrunnable(td); 619 else 620 cv_waitq_remove(td); 621 td->td_flags |= TDF_TIMEOUT; 622 } else 623 td->td_flags |= TDF_TIMOFAIL; 624 mtx_unlock_spin(&sched_lock); 625} 626 627/* 628 * For now only abort interruptable waits. 629 * The others will have to either complete on their own or have a timeout. 630 */ 631void 632cv_abort(struct thread *td) 633{ 634 635 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 636 td->td_proc->p_pid, 637 td->td_proc->p_comm); 638 mtx_lock_spin(&sched_lock); 639 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 640 if (td->td_wchan != NULL) { 641 if (td->td_state == TDS_SLP) 642 setrunnable(td); 643 else 644 cv_waitq_remove(td); 645 } 646 } 647 mtx_unlock_spin(&sched_lock); 648} 649 650