kern_synch.c revision 1.146
1/* $OpenBSD: kern_synch.c,v 1.146 2018/05/31 02:16:22 guenther Exp $ */ 2/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4/* 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/signalvar.h> 45#include <sys/resourcevar.h> 46#include <sys/sched.h> 47#include <sys/timeout.h> 48#include <sys/mount.h> 49#include <sys/syscallargs.h> 50#include <sys/pool.h> 51#include <sys/refcnt.h> 52#include <sys/atomic.h> 53#include <sys/witness.h> 54#include <ddb/db_output.h> 55 56#include <machine/spinlock.h> 57 58#ifdef KTRACE 59#include <sys/ktrace.h> 60#endif 61 62int thrsleep(struct proc *, struct sys___thrsleep_args *); 63int thrsleep_unlock(void *); 64 65/* 66 * We're only looking at 7 bits of the address; everything is 67 * aligned to 4, lots of things are aligned to greater powers 68 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 69 */ 70#define TABLESIZE 128 71#define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 72TAILQ_HEAD(slpque,proc) slpque[TABLESIZE]; 73 74void 75sleep_queue_init(void) 76{ 77 int i; 78 79 for (i = 0; i < TABLESIZE; i++) 80 TAILQ_INIT(&slpque[i]); 81} 82 83 84/* 85 * During autoconfiguration or after a panic, a sleep will simply 86 * lower the priority briefly to allow interrupts, then return. 87 * The priority to be used (safepri) is machine-dependent, thus this 88 * value is initialized and maintained in the machine-dependent layers. 89 * This priority will typically be 0, or the lowest priority 90 * that is safe for use on the interrupt stack; it can be made 91 * higher to block network software interrupts after panics. 92 */ 93extern int safepri; 94 95/* 96 * General sleep call. Suspends the current process until a wakeup is 97 * performed on the specified identifier. The process will then be made 98 * runnable with the specified priority. Sleeps at most timo/hz seconds 99 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 100 * before and after sleeping, else signals are not checked. Returns 0 if 101 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 102 * signal needs to be delivered, ERESTART is returned if the current system 103 * call should be restarted if possible, and EINTR is returned if the system 104 * call should be interrupted by the signal (return EINTR). 105 */ 106int 107tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) 108{ 109 struct sleep_state sls; 110#ifdef MULTIPROCESSOR 111 int hold_count; 112#endif 113 114 KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); 115 116#ifdef MULTIPROCESSOR 117 KASSERT(timo || _kernel_lock_held()); 118#endif 119 120#ifdef DDB 121 if (cold == 2) 122 db_stack_dump(); 123#endif 124 if (cold || panicstr) { 125 int s; 126 /* 127 * After a panic, or during autoconfiguration, 128 * just give interrupts a chance, then just return; 129 * don't run any other procs or panic below, 130 * in case this is the idle process and already asleep. 131 */ 132 s = splhigh(); 133 splx(safepri); 134#ifdef MULTIPROCESSOR 135 if (_kernel_lock_held()) { 136 hold_count = __mp_release_all(&kernel_lock); 137 __mp_acquire_count(&kernel_lock, hold_count); 138 } 139#endif 140 splx(s); 141 return (0); 142 } 143 144 sleep_setup(&sls, ident, priority, wmesg); 145 sleep_setup_timeout(&sls, timo); 146 sleep_setup_signal(&sls, priority); 147 148 return sleep_finish_all(&sls, 1); 149} 150 151int 152sleep_finish_all(struct sleep_state *sls, int do_sleep) 153{ 154 int error, error1; 155 156 sleep_finish(sls, do_sleep); 157 error1 = sleep_finish_timeout(sls); 158 error = sleep_finish_signal(sls); 159 160 /* Signal errors are higher priority than timeouts. */ 161 if (error == 0 && error1 != 0) 162 error = error1; 163 164 return error; 165} 166 167/* 168 * Same as tsleep, but if we have a mutex provided, then once we've 169 * entered the sleep queue we drop the mutex. After sleeping we re-lock. 170 */ 171int 172msleep(const volatile void *ident, struct mutex *mtx, int priority, 173 const char *wmesg, int timo) 174{ 175 struct sleep_state sls; 176 int error, spl; 177#ifdef MULTIPROCESSOR 178 int hold_count; 179#endif 180 WITNESS_SAVE_DECL(lock_fl); 181 182 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 183 KASSERT(mtx != NULL); 184 185 if (cold || panicstr) { 186 /* 187 * After a panic, or during autoconfiguration, 188 * just give interrupts a chance, then just return; 189 * don't run any other procs or panic below, 190 * in case this is the idle process and already asleep. 191 */ 192 spl = MUTEX_OLDIPL(mtx); 193 MUTEX_OLDIPL(mtx) = safepri; 194 mtx_leave(mtx); 195#ifdef MULTIPROCESSOR 196 if (_kernel_lock_held()) { 197 hold_count = __mp_release_all(&kernel_lock); 198 __mp_acquire_count(&kernel_lock, hold_count); 199 } 200#endif 201 if ((priority & PNORELOCK) == 0) { 202 mtx_enter(mtx); 203 MUTEX_OLDIPL(mtx) = spl; 204 } else 205 splx(spl); 206 return (0); 207 } 208 209 sleep_setup(&sls, ident, priority, wmesg); 210 sleep_setup_timeout(&sls, timo); 211 sleep_setup_signal(&sls, priority); 212 213 WITNESS_SAVE(MUTEX_LOCK_OBJECT(mtx), lock_fl); 214 215 /* XXX - We need to make sure that the mutex doesn't 216 * unblock splsched. This can be made a bit more 217 * correct when the sched_lock is a mutex. 218 */ 219 spl = MUTEX_OLDIPL(mtx); 220 MUTEX_OLDIPL(mtx) = splsched(); 221 mtx_leave(mtx); 222 223 error = sleep_finish_all(&sls, 1); 224 225 if ((priority & PNORELOCK) == 0) { 226 mtx_enter(mtx); 227 MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */ 228 WITNESS_RESTORE(MUTEX_LOCK_OBJECT(mtx), lock_fl); 229 } else 230 splx(spl); 231 232 return error; 233} 234 235/* 236 * Same as tsleep, but if we have a rwlock provided, then once we've 237 * entered the sleep queue we drop the it. After sleeping we re-lock. 238 */ 239int 240rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, 241 const char *wmesg, int timo) 242{ 243 struct sleep_state sls; 244 int error, status; 245 WITNESS_SAVE_DECL(lock_fl); 246 247 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 248 rw_assert_anylock(rwl); 249 status = rw_status(rwl); 250 251 sleep_setup(&sls, ident, priority, wmesg); 252 sleep_setup_timeout(&sls, timo); 253 sleep_setup_signal(&sls, priority); 254 255 WITNESS_SAVE(&rwl->rwl_lock_obj, lock_fl); 256 257 rw_exit(rwl); 258 259 error = sleep_finish_all(&sls, 1); 260 261 if ((priority & PNORELOCK) == 0) { 262 rw_enter(rwl, status); 263 WITNESS_RESTORE(&rwl->rwl_lock_obj, lock_fl); 264 } 265 266 return error; 267} 268 269void 270sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, 271 const char *wmesg) 272{ 273 struct proc *p = curproc; 274 275#ifdef DIAGNOSTIC 276 if (p->p_flag & P_CANTSLEEP) 277 panic("sleep: %s failed insomnia", p->p_p->ps_comm); 278 if (ident == NULL) 279 panic("tsleep: no ident"); 280 if (p->p_stat != SONPROC) 281 panic("tsleep: not SONPROC"); 282#endif 283 284 sls->sls_catch = 0; 285 sls->sls_do_sleep = 1; 286 sls->sls_sig = 1; 287 288 SCHED_LOCK(sls->sls_s); 289 290 p->p_wchan = ident; 291 p->p_wmesg = wmesg; 292 p->p_slptime = 0; 293 p->p_priority = prio & PRIMASK; 294 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); 295} 296 297void 298sleep_finish(struct sleep_state *sls, int do_sleep) 299{ 300 struct proc *p = curproc; 301 302 if (sls->sls_do_sleep && do_sleep) { 303 p->p_stat = SSLEEP; 304 p->p_ru.ru_nvcsw++; 305 SCHED_ASSERT_LOCKED(); 306 mi_switch(); 307 } else if (!do_sleep) { 308 unsleep(p); 309 } 310 311#ifdef DIAGNOSTIC 312 if (p->p_stat != SONPROC) 313 panic("sleep_finish !SONPROC"); 314#endif 315 316 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 317 SCHED_UNLOCK(sls->sls_s); 318 319 /* 320 * Even though this belongs to the signal handling part of sleep, 321 * we need to clear it before the ktrace. 322 */ 323 atomic_clearbits_int(&p->p_flag, P_SINTR); 324} 325 326void 327sleep_setup_timeout(struct sleep_state *sls, int timo) 328{ 329 if (timo) 330 timeout_add(&curproc->p_sleep_to, timo); 331} 332 333int 334sleep_finish_timeout(struct sleep_state *sls) 335{ 336 struct proc *p = curproc; 337 338 if (p->p_flag & P_TIMEOUT) { 339 atomic_clearbits_int(&p->p_flag, P_TIMEOUT); 340 return (EWOULDBLOCK); 341 } else 342 timeout_del(&p->p_sleep_to); 343 344 return (0); 345} 346 347void 348sleep_setup_signal(struct sleep_state *sls, int prio) 349{ 350 struct proc *p = curproc; 351 352 if ((sls->sls_catch = (prio & PCATCH)) == 0) 353 return; 354 355 /* 356 * We put ourselves on the sleep queue and start our timeout 357 * before calling CURSIG, as we could stop there, and a wakeup 358 * or a SIGCONT (or both) could occur while we were stopped. 359 * A SIGCONT would cause us to be marked as SSLEEP 360 * without resuming us, thus we must be ready for sleep 361 * when CURSIG is called. If the wakeup happens while we're 362 * stopped, p->p_wchan will be 0 upon return from CURSIG. 363 */ 364 atomic_setbits_int(&p->p_flag, P_SINTR); 365 if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) { 366 if (p->p_wchan) 367 unsleep(p); 368 p->p_stat = SONPROC; 369 sls->sls_do_sleep = 0; 370 } else if (p->p_wchan == 0) { 371 sls->sls_catch = 0; 372 sls->sls_do_sleep = 0; 373 } 374} 375 376int 377sleep_finish_signal(struct sleep_state *sls) 378{ 379 struct proc *p = curproc; 380 int error; 381 382 if (sls->sls_catch != 0) { 383 if ((error = single_thread_check(p, 1))) 384 return (error); 385 if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) { 386 if (p->p_p->ps_sigacts->ps_sigintr & 387 sigmask(sls->sls_sig)) 388 return (EINTR); 389 return (ERESTART); 390 } 391 } 392 393 return (0); 394} 395 396/* 397 * Implement timeout for tsleep. 398 * If process hasn't been awakened (wchan non-zero), 399 * set timeout flag and undo the sleep. If proc 400 * is stopped, just unsleep so it will remain stopped. 401 */ 402void 403endtsleep(void *arg) 404{ 405 struct proc *p = arg; 406 int s; 407 408 SCHED_LOCK(s); 409 if (p->p_wchan) { 410 if (p->p_stat == SSLEEP) 411 setrunnable(p); 412 else 413 unsleep(p); 414 atomic_setbits_int(&p->p_flag, P_TIMEOUT); 415 } 416 SCHED_UNLOCK(s); 417} 418 419/* 420 * Remove a process from its wait queue 421 */ 422void 423unsleep(struct proc *p) 424{ 425 SCHED_ASSERT_LOCKED(); 426 427 if (p->p_wchan) { 428 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); 429 p->p_wchan = NULL; 430 } 431} 432 433/* 434 * Make a number of processes sleeping on the specified identifier runnable. 435 */ 436void 437wakeup_n(const volatile void *ident, int n) 438{ 439 struct slpque *qp; 440 struct proc *p; 441 struct proc *pnext; 442 int s; 443 444 SCHED_LOCK(s); 445 qp = &slpque[LOOKUP(ident)]; 446 for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { 447 pnext = TAILQ_NEXT(p, p_runq); 448#ifdef DIAGNOSTIC 449 /* 450 * If the rwlock passed to rwsleep() is contended, the 451 * CPU will end up calling wakeup() between sleep_setup() 452 * and sleep_finish(). 453 */ 454 if (p == curproc) { 455 KASSERT(p->p_stat == SONPROC); 456 continue; 457 } 458 if (p->p_stat != SSLEEP && p->p_stat != SSTOP) 459 panic("wakeup: p_stat is %d", (int)p->p_stat); 460#endif 461 if (p->p_wchan == ident) { 462 --n; 463 p->p_wchan = 0; 464 TAILQ_REMOVE(qp, p, p_runq); 465 if (p->p_stat == SSLEEP) 466 setrunnable(p); 467 } 468 } 469 SCHED_UNLOCK(s); 470} 471 472/* 473 * Make all processes sleeping on the specified identifier runnable. 474 */ 475void 476wakeup(const volatile void *chan) 477{ 478 wakeup_n(chan, -1); 479} 480 481int 482sys_sched_yield(struct proc *p, void *v, register_t *retval) 483{ 484 struct proc *q; 485 int s; 486 487 SCHED_LOCK(s); 488 /* 489 * If one of the threads of a multi-threaded process called 490 * sched_yield(2), drop its priority to ensure its siblings 491 * can make some progress. 492 */ 493 p->p_priority = p->p_usrpri; 494 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) 495 p->p_priority = max(p->p_priority, q->p_priority); 496 p->p_stat = SRUN; 497 setrunqueue(p); 498 p->p_ru.ru_nvcsw++; 499 mi_switch(); 500 SCHED_UNLOCK(s); 501 502 return (0); 503} 504 505int 506thrsleep_unlock(void *lock) 507{ 508 static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; 509 _atomic_lock_t *atomiclock = lock; 510 511 if (!lock) 512 return 0; 513 514 return copyout(&unlocked, atomiclock, sizeof(unlocked)); 515} 516 517static int globalsleepaddr; 518 519int 520thrsleep(struct proc *p, struct sys___thrsleep_args *v) 521{ 522 struct sys___thrsleep_args /* { 523 syscallarg(const volatile void *) ident; 524 syscallarg(clockid_t) clock_id; 525 syscallarg(const struct timespec *) tp; 526 syscallarg(void *) lock; 527 syscallarg(const int *) abort; 528 } */ *uap = v; 529 long ident = (long)SCARG(uap, ident); 530 struct timespec *tsp = (struct timespec *)SCARG(uap, tp); 531 void *lock = SCARG(uap, lock); 532 uint64_t to_ticks = 0; 533 int abort, error; 534 clockid_t clock_id = SCARG(uap, clock_id); 535 536 if (ident == 0) 537 return (EINVAL); 538 if (tsp != NULL) { 539 struct timespec now; 540 541 if ((error = clock_gettime(p, clock_id, &now))) 542 return (error); 543#ifdef KTRACE 544 if (KTRPOINT(p, KTR_STRUCT)) 545 ktrabstimespec(p, tsp); 546#endif 547 548 if (timespeccmp(tsp, &now, <)) { 549 /* already passed: still do the unlock */ 550 if ((error = thrsleep_unlock(lock))) 551 return (error); 552 return (EWOULDBLOCK); 553 } 554 555 timespecsub(tsp, &now, tsp); 556 to_ticks = (uint64_t)hz * tsp->tv_sec + 557 (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 558 if (to_ticks > INT_MAX) 559 to_ticks = INT_MAX; 560 } 561 562 p->p_thrslpid = ident; 563 564 if ((error = thrsleep_unlock(lock))) 565 goto out; 566 567 if (SCARG(uap, abort) != NULL) { 568 if ((error = copyin(SCARG(uap, abort), &abort, 569 sizeof(abort))) != 0) 570 goto out; 571 if (abort) { 572 error = EINTR; 573 goto out; 574 } 575 } 576 577 if (p->p_thrslpid == 0) 578 error = 0; 579 else { 580 void *sleepaddr = &p->p_thrslpid; 581 if (ident == -1) 582 sleepaddr = &globalsleepaddr; 583 error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep", 584 (int)to_ticks); 585 } 586 587out: 588 p->p_thrslpid = 0; 589 590 if (error == ERESTART) 591 error = ECANCELED; 592 593 return (error); 594 595} 596 597int 598sys___thrsleep(struct proc *p, void *v, register_t *retval) 599{ 600 struct sys___thrsleep_args /* { 601 syscallarg(const volatile void *) ident; 602 syscallarg(clockid_t) clock_id; 603 syscallarg(struct timespec *) tp; 604 syscallarg(void *) lock; 605 syscallarg(const int *) abort; 606 } */ *uap = v; 607 struct timespec ts; 608 int error; 609 610 if (SCARG(uap, tp) != NULL) { 611 if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) { 612 *retval = error; 613 return 0; 614 } 615 if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) { 616 *retval = EINVAL; 617 return 0; 618 } 619 SCARG(uap, tp) = &ts; 620 } 621 622 *retval = thrsleep(p, uap); 623 return 0; 624} 625 626int 627sys___thrwakeup(struct proc *p, void *v, register_t *retval) 628{ 629 struct sys___thrwakeup_args /* { 630 syscallarg(const volatile void *) ident; 631 syscallarg(int) n; 632 } */ *uap = v; 633 long ident = (long)SCARG(uap, ident); 634 int n = SCARG(uap, n); 635 struct proc *q; 636 int found = 0; 637 638 if (ident == 0) 639 *retval = EINVAL; 640 else if (ident == -1) 641 wakeup(&globalsleepaddr); 642 else { 643 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { 644 if (q->p_thrslpid == ident) { 645 wakeup_one(&q->p_thrslpid); 646 q->p_thrslpid = 0; 647 if (++found == n) 648 break; 649 } 650 } 651 *retval = found ? 0 : ESRCH; 652 } 653 654 return (0); 655} 656 657void 658refcnt_init(struct refcnt *r) 659{ 660 r->refs = 1; 661} 662 663void 664refcnt_take(struct refcnt *r) 665{ 666#ifdef DIAGNOSTIC 667 u_int refcnt; 668 669 refcnt = atomic_inc_int_nv(&r->refs); 670 KASSERT(refcnt != 0); 671#else 672 atomic_inc_int(&r->refs); 673#endif 674} 675 676int 677refcnt_rele(struct refcnt *r) 678{ 679 u_int refcnt; 680 681 refcnt = atomic_dec_int_nv(&r->refs); 682 KASSERT(refcnt != ~0); 683 684 return (refcnt == 0); 685} 686 687void 688refcnt_rele_wake(struct refcnt *r) 689{ 690 if (refcnt_rele(r)) 691 wakeup_one(r); 692} 693 694void 695refcnt_finalize(struct refcnt *r, const char *wmesg) 696{ 697 struct sleep_state sls; 698 u_int refcnt; 699 700 refcnt = atomic_dec_int_nv(&r->refs); 701 while (refcnt) { 702 sleep_setup(&sls, r, PWAIT, wmesg); 703 refcnt = r->refs; 704 sleep_finish(&sls, refcnt); 705 } 706} 707 708void 709cond_init(struct cond *c) 710{ 711 c->c_wait = 1; 712} 713 714void 715cond_signal(struct cond *c) 716{ 717 c->c_wait = 0; 718 719 wakeup_one(c); 720} 721 722void 723cond_wait(struct cond *c, const char *wmesg) 724{ 725 struct sleep_state sls; 726 int wait; 727 728 wait = c->c_wait; 729 while (wait) { 730 sleep_setup(&sls, c, PWAIT, wmesg); 731 wait = c->c_wait; 732 sleep_finish(&sls, wait); 733 } 734} 735