kern_synch.c revision 1.148
1/* $OpenBSD: kern_synch.c,v 1.148 2019/04/23 13:35:12 visa Exp $ */ 2/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4/* 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/signalvar.h> 45#include <sys/resourcevar.h> 46#include <sys/sched.h> 47#include <sys/timeout.h> 48#include <sys/mount.h> 49#include <sys/syscallargs.h> 50#include <sys/pool.h> 51#include <sys/refcnt.h> 52#include <sys/atomic.h> 53#include <sys/witness.h> 54#include <ddb/db_output.h> 55 56#include <machine/spinlock.h> 57 58#ifdef KTRACE 59#include <sys/ktrace.h> 60#endif 61 62int thrsleep(struct proc *, struct sys___thrsleep_args *); 63int thrsleep_unlock(void *); 64 65/* 66 * We're only looking at 7 bits of the address; everything is 67 * aligned to 4, lots of things are aligned to greater powers 68 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 69 */ 70#define TABLESIZE 128 71#define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 72TAILQ_HEAD(slpque,proc) slpque[TABLESIZE]; 73 74void 75sleep_queue_init(void) 76{ 77 int i; 78 79 for (i = 0; i < TABLESIZE; i++) 80 TAILQ_INIT(&slpque[i]); 81} 82 83 84/* 85 * During autoconfiguration or after a panic, a sleep will simply 86 * lower the priority briefly to allow interrupts, then return. 87 * The priority to be used (safepri) is machine-dependent, thus this 88 * value is initialized and maintained in the machine-dependent layers. 89 * This priority will typically be 0, or the lowest priority 90 * that is safe for use on the interrupt stack; it can be made 91 * higher to block network software interrupts after panics. 92 */ 93extern int safepri; 94 95/* 96 * General sleep call. Suspends the current process until a wakeup is 97 * performed on the specified identifier. The process will then be made 98 * runnable with the specified priority. Sleeps at most timo/hz seconds 99 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 100 * before and after sleeping, else signals are not checked. Returns 0 if 101 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 102 * signal needs to be delivered, ERESTART is returned if the current system 103 * call should be restarted if possible, and EINTR is returned if the system 104 * call should be interrupted by the signal (return EINTR). 105 */ 106int 107tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) 108{ 109 struct sleep_state sls; 110#ifdef MULTIPROCESSOR 111 int hold_count; 112#endif 113 114 KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); 115 116#ifdef MULTIPROCESSOR 117 KASSERT(timo || _kernel_lock_held()); 118#endif 119 120#ifdef DDB 121 if (cold == 2) 122 db_stack_dump(); 123#endif 124 if (cold || panicstr) { 125 int s; 126 /* 127 * After a panic, or during autoconfiguration, 128 * just give interrupts a chance, then just return; 129 * don't run any other procs or panic below, 130 * in case this is the idle process and already asleep. 131 */ 132 s = splhigh(); 133 splx(safepri); 134#ifdef MULTIPROCESSOR 135 if (_kernel_lock_held()) { 136 hold_count = __mp_release_all(&kernel_lock); 137 __mp_acquire_count(&kernel_lock, hold_count); 138 } 139#endif 140 splx(s); 141 return (0); 142 } 143 144 sleep_setup(&sls, ident, priority, wmesg); 145 sleep_setup_timeout(&sls, timo); 146 sleep_setup_signal(&sls, priority); 147 148 return sleep_finish_all(&sls, 1); 149} 150 151int 152sleep_finish_all(struct sleep_state *sls, int do_sleep) 153{ 154 int error, error1; 155 156 sleep_finish(sls, do_sleep); 157 error1 = sleep_finish_timeout(sls); 158 error = sleep_finish_signal(sls); 159 160 /* Signal errors are higher priority than timeouts. */ 161 if (error == 0 && error1 != 0) 162 error = error1; 163 164 return error; 165} 166 167/* 168 * Same as tsleep, but if we have a mutex provided, then once we've 169 * entered the sleep queue we drop the mutex. After sleeping we re-lock. 170 */ 171int 172msleep(const volatile void *ident, struct mutex *mtx, int priority, 173 const char *wmesg, int timo) 174{ 175 struct sleep_state sls; 176 int error, spl; 177#ifdef MULTIPROCESSOR 178 int hold_count; 179#endif 180 181 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 182 KASSERT(mtx != NULL); 183 184 if (cold || panicstr) { 185 /* 186 * After a panic, or during autoconfiguration, 187 * just give interrupts a chance, then just return; 188 * don't run any other procs or panic below, 189 * in case this is the idle process and already asleep. 190 */ 191 spl = MUTEX_OLDIPL(mtx); 192 MUTEX_OLDIPL(mtx) = safepri; 193 mtx_leave(mtx); 194#ifdef MULTIPROCESSOR 195 if (_kernel_lock_held()) { 196 hold_count = __mp_release_all(&kernel_lock); 197 __mp_acquire_count(&kernel_lock, hold_count); 198 } 199#endif 200 if ((priority & PNORELOCK) == 0) { 201 mtx_enter(mtx); 202 MUTEX_OLDIPL(mtx) = spl; 203 } else 204 splx(spl); 205 return (0); 206 } 207 208 sleep_setup(&sls, ident, priority, wmesg); 209 sleep_setup_timeout(&sls, timo); 210 sleep_setup_signal(&sls, priority); 211 212 /* XXX - We need to make sure that the mutex doesn't 213 * unblock splsched. This can be made a bit more 214 * correct when the sched_lock is a mutex. 215 */ 216 spl = MUTEX_OLDIPL(mtx); 217 MUTEX_OLDIPL(mtx) = splsched(); 218 mtx_leave(mtx); 219 220 error = sleep_finish_all(&sls, 1); 221 222 if ((priority & PNORELOCK) == 0) { 223 mtx_enter(mtx); 224 MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */ 225 } else 226 splx(spl); 227 228 return error; 229} 230 231/* 232 * Same as tsleep, but if we have a rwlock provided, then once we've 233 * entered the sleep queue we drop the it. After sleeping we re-lock. 234 */ 235int 236rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, 237 const char *wmesg, int timo) 238{ 239 struct sleep_state sls; 240 int error, status; 241 242 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 243 rw_assert_anylock(rwl); 244 status = rw_status(rwl); 245 246 sleep_setup(&sls, ident, priority, wmesg); 247 sleep_setup_timeout(&sls, timo); 248 sleep_setup_signal(&sls, priority); 249 250 rw_exit(rwl); 251 252 error = sleep_finish_all(&sls, 1); 253 254 if ((priority & PNORELOCK) == 0) 255 rw_enter(rwl, status); 256 257 return error; 258} 259 260void 261sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, 262 const char *wmesg) 263{ 264 struct proc *p = curproc; 265 266#ifdef DIAGNOSTIC 267 if (p->p_flag & P_CANTSLEEP) 268 panic("sleep: %s failed insomnia", p->p_p->ps_comm); 269 if (ident == NULL) 270 panic("tsleep: no ident"); 271 if (p->p_stat != SONPROC) 272 panic("tsleep: not SONPROC"); 273#endif 274 275 sls->sls_catch = 0; 276 sls->sls_do_sleep = 1; 277 sls->sls_sig = 1; 278 279 SCHED_LOCK(sls->sls_s); 280 281 p->p_wchan = ident; 282 p->p_wmesg = wmesg; 283 p->p_slptime = 0; 284 p->p_priority = prio & PRIMASK; 285 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); 286} 287 288void 289sleep_finish(struct sleep_state *sls, int do_sleep) 290{ 291 struct proc *p = curproc; 292 293 if (sls->sls_do_sleep && do_sleep) { 294 p->p_stat = SSLEEP; 295 p->p_ru.ru_nvcsw++; 296 SCHED_ASSERT_LOCKED(); 297 mi_switch(); 298 } else if (!do_sleep) { 299 unsleep(p); 300 } 301 302#ifdef DIAGNOSTIC 303 if (p->p_stat != SONPROC) 304 panic("sleep_finish !SONPROC"); 305#endif 306 307 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 308 SCHED_UNLOCK(sls->sls_s); 309 310 /* 311 * Even though this belongs to the signal handling part of sleep, 312 * we need to clear it before the ktrace. 313 */ 314 atomic_clearbits_int(&p->p_flag, P_SINTR); 315} 316 317void 318sleep_setup_timeout(struct sleep_state *sls, int timo) 319{ 320 if (timo) 321 timeout_add(&curproc->p_sleep_to, timo); 322} 323 324int 325sleep_finish_timeout(struct sleep_state *sls) 326{ 327 struct proc *p = curproc; 328 329 if (p->p_flag & P_TIMEOUT) { 330 atomic_clearbits_int(&p->p_flag, P_TIMEOUT); 331 return (EWOULDBLOCK); 332 } else 333 timeout_del(&p->p_sleep_to); 334 335 return (0); 336} 337 338void 339sleep_setup_signal(struct sleep_state *sls, int prio) 340{ 341 struct proc *p = curproc; 342 343 if ((sls->sls_catch = (prio & PCATCH)) == 0) 344 return; 345 346 /* 347 * We put ourselves on the sleep queue and start our timeout 348 * before calling CURSIG, as we could stop there, and a wakeup 349 * or a SIGCONT (or both) could occur while we were stopped. 350 * A SIGCONT would cause us to be marked as SSLEEP 351 * without resuming us, thus we must be ready for sleep 352 * when CURSIG is called. If the wakeup happens while we're 353 * stopped, p->p_wchan will be 0 upon return from CURSIG. 354 */ 355 atomic_setbits_int(&p->p_flag, P_SINTR); 356 if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) { 357 if (p->p_wchan) 358 unsleep(p); 359 p->p_stat = SONPROC; 360 sls->sls_do_sleep = 0; 361 } else if (p->p_wchan == 0) { 362 sls->sls_catch = 0; 363 sls->sls_do_sleep = 0; 364 } 365} 366 367int 368sleep_finish_signal(struct sleep_state *sls) 369{ 370 struct proc *p = curproc; 371 int error; 372 373 if (sls->sls_catch != 0) { 374 if ((error = single_thread_check(p, 1))) 375 return (error); 376 if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) { 377 if (p->p_p->ps_sigacts->ps_sigintr & 378 sigmask(sls->sls_sig)) 379 return (EINTR); 380 return (ERESTART); 381 } 382 } 383 384 return (0); 385} 386 387/* 388 * Implement timeout for tsleep. 389 * If process hasn't been awakened (wchan non-zero), 390 * set timeout flag and undo the sleep. If proc 391 * is stopped, just unsleep so it will remain stopped. 392 */ 393void 394endtsleep(void *arg) 395{ 396 struct proc *p = arg; 397 int s; 398 399 SCHED_LOCK(s); 400 if (p->p_wchan) { 401 if (p->p_stat == SSLEEP) 402 setrunnable(p); 403 else 404 unsleep(p); 405 atomic_setbits_int(&p->p_flag, P_TIMEOUT); 406 } 407 SCHED_UNLOCK(s); 408} 409 410/* 411 * Remove a process from its wait queue 412 */ 413void 414unsleep(struct proc *p) 415{ 416 SCHED_ASSERT_LOCKED(); 417 418 if (p->p_wchan) { 419 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); 420 p->p_wchan = NULL; 421 } 422} 423 424/* 425 * Make a number of processes sleeping on the specified identifier runnable. 426 */ 427void 428wakeup_n(const volatile void *ident, int n) 429{ 430 struct slpque *qp; 431 struct proc *p; 432 struct proc *pnext; 433 int s; 434 435 SCHED_LOCK(s); 436 qp = &slpque[LOOKUP(ident)]; 437 for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { 438 pnext = TAILQ_NEXT(p, p_runq); 439#ifdef DIAGNOSTIC 440 /* 441 * If the rwlock passed to rwsleep() is contended, the 442 * CPU will end up calling wakeup() between sleep_setup() 443 * and sleep_finish(). 444 */ 445 if (p == curproc) { 446 KASSERT(p->p_stat == SONPROC); 447 continue; 448 } 449 if (p->p_stat != SSLEEP && p->p_stat != SSTOP) 450 panic("wakeup: p_stat is %d", (int)p->p_stat); 451#endif 452 if (p->p_wchan == ident) { 453 --n; 454 p->p_wchan = 0; 455 TAILQ_REMOVE(qp, p, p_runq); 456 if (p->p_stat == SSLEEP) 457 setrunnable(p); 458 } 459 } 460 SCHED_UNLOCK(s); 461} 462 463/* 464 * Make all processes sleeping on the specified identifier runnable. 465 */ 466void 467wakeup(const volatile void *chan) 468{ 469 wakeup_n(chan, -1); 470} 471 472int 473sys_sched_yield(struct proc *p, void *v, register_t *retval) 474{ 475 struct proc *q; 476 int s; 477 478 SCHED_LOCK(s); 479 /* 480 * If one of the threads of a multi-threaded process called 481 * sched_yield(2), drop its priority to ensure its siblings 482 * can make some progress. 483 */ 484 p->p_priority = p->p_usrpri; 485 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) 486 p->p_priority = max(p->p_priority, q->p_priority); 487 p->p_stat = SRUN; 488 setrunqueue(p); 489 p->p_ru.ru_nvcsw++; 490 mi_switch(); 491 SCHED_UNLOCK(s); 492 493 return (0); 494} 495 496int 497thrsleep_unlock(void *lock) 498{ 499 static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; 500 _atomic_lock_t *atomiclock = lock; 501 502 if (!lock) 503 return 0; 504 505 return copyout(&unlocked, atomiclock, sizeof(unlocked)); 506} 507 508static int globalsleepaddr; 509 510int 511thrsleep(struct proc *p, struct sys___thrsleep_args *v) 512{ 513 struct sys___thrsleep_args /* { 514 syscallarg(const volatile void *) ident; 515 syscallarg(clockid_t) clock_id; 516 syscallarg(const struct timespec *) tp; 517 syscallarg(void *) lock; 518 syscallarg(const int *) abort; 519 } */ *uap = v; 520 long ident = (long)SCARG(uap, ident); 521 struct timespec *tsp = (struct timespec *)SCARG(uap, tp); 522 void *lock = SCARG(uap, lock); 523 uint64_t to_ticks = 0; 524 int abort, error; 525 clockid_t clock_id = SCARG(uap, clock_id); 526 527 if (ident == 0) 528 return (EINVAL); 529 if (tsp != NULL) { 530 struct timespec now; 531 532 if ((error = clock_gettime(p, clock_id, &now))) 533 return (error); 534#ifdef KTRACE 535 if (KTRPOINT(p, KTR_STRUCT)) 536 ktrabstimespec(p, tsp); 537#endif 538 539 if (timespeccmp(tsp, &now, <)) { 540 /* already passed: still do the unlock */ 541 if ((error = thrsleep_unlock(lock))) 542 return (error); 543 return (EWOULDBLOCK); 544 } 545 546 timespecsub(tsp, &now, tsp); 547 to_ticks = (uint64_t)hz * tsp->tv_sec + 548 (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 549 if (to_ticks > INT_MAX) 550 to_ticks = INT_MAX; 551 } 552 553 p->p_thrslpid = ident; 554 555 if ((error = thrsleep_unlock(lock))) 556 goto out; 557 558 if (SCARG(uap, abort) != NULL) { 559 if ((error = copyin(SCARG(uap, abort), &abort, 560 sizeof(abort))) != 0) 561 goto out; 562 if (abort) { 563 error = EINTR; 564 goto out; 565 } 566 } 567 568 if (p->p_thrslpid == 0) 569 error = 0; 570 else { 571 void *sleepaddr = &p->p_thrslpid; 572 if (ident == -1) 573 sleepaddr = &globalsleepaddr; 574 error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep", 575 (int)to_ticks); 576 } 577 578out: 579 p->p_thrslpid = 0; 580 581 if (error == ERESTART) 582 error = ECANCELED; 583 584 return (error); 585 586} 587 588int 589sys___thrsleep(struct proc *p, void *v, register_t *retval) 590{ 591 struct sys___thrsleep_args /* { 592 syscallarg(const volatile void *) ident; 593 syscallarg(clockid_t) clock_id; 594 syscallarg(struct timespec *) tp; 595 syscallarg(void *) lock; 596 syscallarg(const int *) abort; 597 } */ *uap = v; 598 struct timespec ts; 599 int error; 600 601 if (SCARG(uap, tp) != NULL) { 602 if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) { 603 *retval = error; 604 return 0; 605 } 606 if (!timespecisvalid(&ts)) { 607 *retval = EINVAL; 608 return 0; 609 } 610 SCARG(uap, tp) = &ts; 611 } 612 613 *retval = thrsleep(p, uap); 614 return 0; 615} 616 617int 618sys___thrwakeup(struct proc *p, void *v, register_t *retval) 619{ 620 struct sys___thrwakeup_args /* { 621 syscallarg(const volatile void *) ident; 622 syscallarg(int) n; 623 } */ *uap = v; 624 long ident = (long)SCARG(uap, ident); 625 int n = SCARG(uap, n); 626 struct proc *q; 627 int found = 0; 628 629 if (ident == 0) 630 *retval = EINVAL; 631 else if (ident == -1) 632 wakeup(&globalsleepaddr); 633 else { 634 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { 635 if (q->p_thrslpid == ident) { 636 wakeup_one(&q->p_thrslpid); 637 q->p_thrslpid = 0; 638 if (++found == n) 639 break; 640 } 641 } 642 *retval = found ? 0 : ESRCH; 643 } 644 645 return (0); 646} 647 648void 649refcnt_init(struct refcnt *r) 650{ 651 r->refs = 1; 652} 653 654void 655refcnt_take(struct refcnt *r) 656{ 657#ifdef DIAGNOSTIC 658 u_int refcnt; 659 660 refcnt = atomic_inc_int_nv(&r->refs); 661 KASSERT(refcnt != 0); 662#else 663 atomic_inc_int(&r->refs); 664#endif 665} 666 667int 668refcnt_rele(struct refcnt *r) 669{ 670 u_int refcnt; 671 672 refcnt = atomic_dec_int_nv(&r->refs); 673 KASSERT(refcnt != ~0); 674 675 return (refcnt == 0); 676} 677 678void 679refcnt_rele_wake(struct refcnt *r) 680{ 681 if (refcnt_rele(r)) 682 wakeup_one(r); 683} 684 685void 686refcnt_finalize(struct refcnt *r, const char *wmesg) 687{ 688 struct sleep_state sls; 689 u_int refcnt; 690 691 refcnt = atomic_dec_int_nv(&r->refs); 692 while (refcnt) { 693 sleep_setup(&sls, r, PWAIT, wmesg); 694 refcnt = r->refs; 695 sleep_finish(&sls, refcnt); 696 } 697} 698 699void 700cond_init(struct cond *c) 701{ 702 c->c_wait = 1; 703} 704 705void 706cond_signal(struct cond *c) 707{ 708 c->c_wait = 0; 709 710 wakeup_one(c); 711} 712 713void 714cond_wait(struct cond *c, const char *wmesg) 715{ 716 struct sleep_state sls; 717 int wait; 718 719 wait = c->c_wait; 720 while (wait) { 721 sleep_setup(&sls, c, PWAIT, wmesg); 722 wait = c->c_wait; 723 sleep_finish(&sls, wait); 724 } 725} 726