kern_synch.c revision 1.130
1/* $openbsd: kern_synch.c,v 1.129 2016/03/09 13:38:50 mpi Exp $ */ 2/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4/* 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/proc.h> 43#include <sys/kernel.h> 44#include <sys/signalvar.h> 45#include <sys/resourcevar.h> 46#include <sys/sched.h> 47#include <sys/timeout.h> 48#include <sys/mount.h> 49#include <sys/syscallargs.h> 50#include <sys/pool.h> 51#include <sys/refcnt.h> 52#include <sys/atomic.h> 53#include <ddb/db_output.h> 54 55#include <machine/spinlock.h> 56 57#ifdef KTRACE 58#include <sys/ktrace.h> 59#endif 60 61int thrsleep(struct proc *, struct sys___thrsleep_args *); 62int thrsleep_unlock(void *, int); 63 64/* 65 * We're only looking at 7 bits of the address; everything is 66 * aligned to 4, lots of things are aligned to greater powers 67 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 68 */ 69#define TABLESIZE 128 70#define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 71TAILQ_HEAD(slpque,proc) slpque[TABLESIZE]; 72 73void 74sleep_queue_init(void) 75{ 76 int i; 77 78 for (i = 0; i < TABLESIZE; i++) 79 TAILQ_INIT(&slpque[i]); 80} 81 82 83/* 84 * During autoconfiguration or after a panic, a sleep will simply 85 * lower the priority briefly to allow interrupts, then return. 86 * The priority to be used (safepri) is machine-dependent, thus this 87 * value is initialized and maintained in the machine-dependent layers. 88 * This priority will typically be 0, or the lowest priority 89 * that is safe for use on the interrupt stack; it can be made 90 * higher to block network software interrupts after panics. 91 */ 92extern int safepri; 93 94/* 95 * General sleep call. Suspends the current process until a wakeup is 96 * performed on the specified identifier. The process will then be made 97 * runnable with the specified priority. Sleeps at most timo/hz seconds 98 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 99 * before and after sleeping, else signals are not checked. Returns 0 if 100 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 101 * signal needs to be delivered, ERESTART is returned if the current system 102 * call should be restarted if possible, and EINTR is returned if the system 103 * call should be interrupted by the signal (return EINTR). 104 */ 105int 106tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) 107{ 108 struct sleep_state sls; 109 int error, error1; 110#ifdef MULTIPROCESSOR 111 int hold_count; 112#endif 113 114 KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); 115 116#ifdef MULTIPROCESSOR 117 KASSERT(timo || __mp_lock_held(&kernel_lock)); 118#endif 119 120#ifdef DDB 121 if (cold == 2) 122 db_stack_dump(); 123#endif 124 if (cold || panicstr) { 125 int s; 126 /* 127 * After a panic, or during autoconfiguration, 128 * just give interrupts a chance, then just return; 129 * don't run any other procs or panic below, 130 * in case this is the idle process and already asleep. 131 */ 132 s = splhigh(); 133 splx(safepri); 134#ifdef MULTIPROCESSOR 135 if (__mp_lock_held(&kernel_lock)) { 136 hold_count = __mp_release_all(&kernel_lock); 137 __mp_acquire_count(&kernel_lock, hold_count); 138 } 139#endif 140 splx(s); 141 return (0); 142 } 143 144 sleep_setup(&sls, ident, priority, wmesg); 145 sleep_setup_timeout(&sls, timo); 146 sleep_setup_signal(&sls, priority); 147 148 sleep_finish(&sls, 1); 149 error1 = sleep_finish_timeout(&sls); 150 error = sleep_finish_signal(&sls); 151 152 /* Signal errors are higher priority than timeouts. */ 153 if (error == 0 && error1 != 0) 154 error = error1; 155 156 return (error); 157} 158 159/* 160 * Same as tsleep, but if we have a mutex provided, then once we've 161 * entered the sleep queue we drop the mutex. After sleeping we re-lock. 162 */ 163int 164msleep(const volatile void *ident, struct mutex *mtx, int priority, 165 const char *wmesg, int timo) 166{ 167 struct sleep_state sls; 168 int error, error1, spl; 169#ifdef MULTIPROCESSOR 170 int hold_count; 171#endif 172 173 KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); 174 KASSERT(mtx != NULL); 175 176 if (cold || panicstr) { 177 /* 178 * After a panic, or during autoconfiguration, 179 * just give interrupts a chance, then just return; 180 * don't run any other procs or panic below, 181 * in case this is the idle process and already asleep. 182 */ 183 spl = MUTEX_OLDIPL(mtx); 184 MUTEX_OLDIPL(mtx) = safepri; 185 mtx_leave(mtx); 186#ifdef MULTIPROCESSOR 187 if (__mp_lock_held(&kernel_lock)) { 188 hold_count = __mp_release_all(&kernel_lock); 189 __mp_acquire_count(&kernel_lock, hold_count); 190 } 191#endif 192 if ((priority & PNORELOCK) == 0) { 193 mtx_enter(mtx); 194 MUTEX_OLDIPL(mtx) = spl; 195 } else 196 splx(spl); 197 return (0); 198 } 199 200 sleep_setup(&sls, ident, priority, wmesg); 201 sleep_setup_timeout(&sls, timo); 202 sleep_setup_signal(&sls, priority); 203 204 /* XXX - We need to make sure that the mutex doesn't 205 * unblock splsched. This can be made a bit more 206 * correct when the sched_lock is a mutex. 207 */ 208 spl = MUTEX_OLDIPL(mtx); 209 MUTEX_OLDIPL(mtx) = splsched(); 210 mtx_leave(mtx); 211 212 sleep_finish(&sls, 1); 213 error1 = sleep_finish_timeout(&sls); 214 error = sleep_finish_signal(&sls); 215 216 if ((priority & PNORELOCK) == 0) { 217 mtx_enter(mtx); 218 MUTEX_OLDIPL(mtx) = spl; /* put the ipl back */ 219 } else 220 splx(spl); 221 222 /* Signal errors are higher priority than timeouts. */ 223 if (error == 0 && error1 != 0) 224 error = error1; 225 226 return (error); 227} 228 229void 230sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, 231 const char *wmesg) 232{ 233 struct proc *p = curproc; 234 235#ifdef DIAGNOSTIC 236 if (p->p_flag & P_CANTSLEEP) 237 panic("sleep: %s failed insomnia", p->p_comm); 238 if (ident == NULL) 239 panic("tsleep: no ident"); 240 if (p->p_stat != SONPROC) 241 panic("tsleep: not SONPROC"); 242#endif 243 244 sls->sls_catch = 0; 245 sls->sls_do_sleep = 1; 246 sls->sls_sig = 1; 247 248 SCHED_LOCK(sls->sls_s); 249 250 p->p_wchan = ident; 251 p->p_wmesg = wmesg; 252 p->p_slptime = 0; 253 p->p_priority = prio & PRIMASK; 254 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); 255} 256 257void 258sleep_finish(struct sleep_state *sls, int do_sleep) 259{ 260 struct proc *p = curproc; 261 262 if (sls->sls_do_sleep && do_sleep) { 263 p->p_stat = SSLEEP; 264 p->p_ru.ru_nvcsw++; 265 SCHED_ASSERT_LOCKED(); 266 mi_switch(); 267 } else if (!do_sleep) { 268 unsleep(p); 269 } 270 271#ifdef DIAGNOSTIC 272 if (p->p_stat != SONPROC) 273 panic("sleep_finish !SONPROC"); 274#endif 275 276 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 277 SCHED_UNLOCK(sls->sls_s); 278 279 /* 280 * Even though this belongs to the signal handling part of sleep, 281 * we need to clear it before the ktrace. 282 */ 283 atomic_clearbits_int(&p->p_flag, P_SINTR); 284} 285 286void 287sleep_setup_timeout(struct sleep_state *sls, int timo) 288{ 289 if (timo) 290 timeout_add(&curproc->p_sleep_to, timo); 291} 292 293int 294sleep_finish_timeout(struct sleep_state *sls) 295{ 296 struct proc *p = curproc; 297 298 if (p->p_flag & P_TIMEOUT) { 299 atomic_clearbits_int(&p->p_flag, P_TIMEOUT); 300 return (EWOULDBLOCK); 301 } else 302 timeout_del(&p->p_sleep_to); 303 304 return (0); 305} 306 307void 308sleep_setup_signal(struct sleep_state *sls, int prio) 309{ 310 struct proc *p = curproc; 311 312 if ((sls->sls_catch = (prio & PCATCH)) == 0) 313 return; 314 315 /* 316 * We put ourselves on the sleep queue and start our timeout 317 * before calling CURSIG, as we could stop there, and a wakeup 318 * or a SIGCONT (or both) could occur while we were stopped. 319 * A SIGCONT would cause us to be marked as SSLEEP 320 * without resuming us, thus we must be ready for sleep 321 * when CURSIG is called. If the wakeup happens while we're 322 * stopped, p->p_wchan will be 0 upon return from CURSIG. 323 */ 324 atomic_setbits_int(&p->p_flag, P_SINTR); 325 if (p->p_p->ps_single != NULL || (sls->sls_sig = CURSIG(p)) != 0) { 326 if (p->p_wchan) 327 unsleep(p); 328 p->p_stat = SONPROC; 329 sls->sls_do_sleep = 0; 330 } else if (p->p_wchan == 0) { 331 sls->sls_catch = 0; 332 sls->sls_do_sleep = 0; 333 } 334} 335 336int 337sleep_finish_signal(struct sleep_state *sls) 338{ 339 struct proc *p = curproc; 340 int error; 341 342 if (sls->sls_catch != 0) { 343 if ((error = single_thread_check(p, 1))) 344 return (error); 345 if (sls->sls_sig != 0 || (sls->sls_sig = CURSIG(p)) != 0) { 346 if (p->p_p->ps_sigacts->ps_sigintr & 347 sigmask(sls->sls_sig)) 348 return (EINTR); 349 return (ERESTART); 350 } 351 } 352 353 return (0); 354} 355 356/* 357 * Implement timeout for tsleep. 358 * If process hasn't been awakened (wchan non-zero), 359 * set timeout flag and undo the sleep. If proc 360 * is stopped, just unsleep so it will remain stopped. 361 */ 362void 363endtsleep(void *arg) 364{ 365 struct proc *p = arg; 366 int s; 367 368 SCHED_LOCK(s); 369 if (p->p_wchan) { 370 if (p->p_stat == SSLEEP) 371 setrunnable(p); 372 else 373 unsleep(p); 374 atomic_setbits_int(&p->p_flag, P_TIMEOUT); 375 } 376 SCHED_UNLOCK(s); 377} 378 379/* 380 * Remove a process from its wait queue 381 */ 382void 383unsleep(struct proc *p) 384{ 385 SCHED_ASSERT_LOCKED(); 386 387 if (p->p_wchan) { 388 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); 389 p->p_wchan = NULL; 390 } 391} 392 393/* 394 * Make a number of processes sleeping on the specified identifier runnable. 395 */ 396void 397wakeup_n(const volatile void *ident, int n) 398{ 399 struct slpque *qp; 400 struct proc *p; 401 struct proc *pnext; 402 int s; 403 404 SCHED_LOCK(s); 405 qp = &slpque[LOOKUP(ident)]; 406 for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { 407 pnext = TAILQ_NEXT(p, p_runq); 408#ifdef DIAGNOSTIC 409 if (p->p_stat != SSLEEP && p->p_stat != SSTOP) 410 panic("wakeup: p_stat is %d", (int)p->p_stat); 411#endif 412 if (p->p_wchan == ident) { 413 --n; 414 p->p_wchan = 0; 415 TAILQ_REMOVE(qp, p, p_runq); 416 if (p->p_stat == SSLEEP) 417 setrunnable(p); 418 } 419 } 420 SCHED_UNLOCK(s); 421} 422 423/* 424 * Make all processes sleeping on the specified identifier runnable. 425 */ 426void 427wakeup(const volatile void *chan) 428{ 429 wakeup_n(chan, -1); 430} 431 432int 433sys_sched_yield(struct proc *p, void *v, register_t *retval) 434{ 435 struct proc *q; 436 int s; 437 438 SCHED_LOCK(s); 439 /* 440 * If one of the threads of a multi-threaded process called 441 * sched_yield(2), drop its priority to ensure its siblings 442 * can make some progress. 443 */ 444 p->p_priority = p->p_usrpri; 445 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) 446 p->p_priority = max(p->p_priority, q->p_priority); 447 p->p_stat = SRUN; 448 setrunqueue(p); 449 p->p_ru.ru_nvcsw++; 450 mi_switch(); 451 SCHED_UNLOCK(s); 452 453 return (0); 454} 455 456int 457thrsleep_unlock(void *lock, int lockflags) 458{ 459 static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; 460 _atomic_lock_t *atomiclock = lock; 461 uint32_t *ticket = lock; 462 uint32_t ticketvalue; 463 int error; 464 465 if (!lock) 466 return (0); 467 468 if (lockflags) { 469 if ((error = copyin(ticket, &ticketvalue, sizeof(ticketvalue)))) 470 return (error); 471 ticketvalue++; 472 error = copyout(&ticketvalue, ticket, sizeof(ticketvalue)); 473 } else { 474 error = copyout(&unlocked, atomiclock, sizeof(unlocked)); 475 } 476 return (error); 477} 478 479static int globalsleepaddr; 480 481int 482thrsleep(struct proc *p, struct sys___thrsleep_args *v) 483{ 484 struct sys___thrsleep_args /* { 485 syscallarg(const volatile void *) ident; 486 syscallarg(clockid_t) clock_id; 487 syscallarg(const struct timespec *) tp; 488 syscallarg(void *) lock; 489 syscallarg(const int *) abort; 490 } */ *uap = v; 491 long ident = (long)SCARG(uap, ident); 492 struct timespec *tsp = (struct timespec *)SCARG(uap, tp); 493 void *lock = SCARG(uap, lock); 494 long long to_ticks = 0; 495 int abort, error; 496 clockid_t clock_id = SCARG(uap, clock_id) & 0x7; 497 int lockflags = SCARG(uap, clock_id) & 0x8; 498 499 if (ident == 0) 500 return (EINVAL); 501 if (tsp != NULL) { 502 struct timespec now; 503 504 if ((error = clock_gettime(p, clock_id, &now))) 505 return (error); 506#ifdef KTRACE 507 if (KTRPOINT(p, KTR_STRUCT)) 508 ktrabstimespec(p, tsp); 509#endif 510 511 if (timespeccmp(tsp, &now, <)) { 512 /* already passed: still do the unlock */ 513 if ((error = thrsleep_unlock(lock, lockflags))) 514 return (error); 515 return (EWOULDBLOCK); 516 } 517 518 timespecsub(tsp, &now, tsp); 519 to_ticks = (long long)hz * tsp->tv_sec + 520 (tsp->tv_nsec + tick * 1000 - 1) / (tick * 1000) + 1; 521 if (to_ticks > INT_MAX) 522 to_ticks = INT_MAX; 523 } 524 525 p->p_thrslpid = ident; 526 527 if ((error = thrsleep_unlock(lock, lockflags))) { 528 goto out; 529 } 530 531 if (SCARG(uap, abort) != NULL) { 532 if ((error = copyin(SCARG(uap, abort), &abort, 533 sizeof(abort))) != 0) 534 goto out; 535 if (abort) { 536 error = EINTR; 537 goto out; 538 } 539 } 540 541 if (p->p_thrslpid == 0) 542 error = 0; 543 else { 544 void *sleepaddr = &p->p_thrslpid; 545 if (ident == -1) 546 sleepaddr = &globalsleepaddr; 547 error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep", 548 (int)to_ticks); 549 } 550 551out: 552 p->p_thrslpid = 0; 553 554 if (error == ERESTART) 555 error = EINTR; 556 557 return (error); 558 559} 560 561int 562sys___thrsleep(struct proc *p, void *v, register_t *retval) 563{ 564 struct sys___thrsleep_args /* { 565 syscallarg(const volatile void *) ident; 566 syscallarg(clockid_t) clock_id; 567 syscallarg(struct timespec *) tp; 568 syscallarg(void *) lock; 569 syscallarg(const int *) abort; 570 } */ *uap = v; 571 struct timespec ts; 572 int error; 573 574 if (SCARG(uap, tp) != NULL) { 575 if ((error = copyin(SCARG(uap, tp), &ts, sizeof(ts)))) { 576 *retval = error; 577 return (0); 578 } 579 SCARG(uap, tp) = &ts; 580 } 581 582 *retval = thrsleep(p, uap); 583 return (0); 584} 585 586int 587sys___thrwakeup(struct proc *p, void *v, register_t *retval) 588{ 589 struct sys___thrwakeup_args /* { 590 syscallarg(const volatile void *) ident; 591 syscallarg(int) n; 592 } */ *uap = v; 593 long ident = (long)SCARG(uap, ident); 594 int n = SCARG(uap, n); 595 struct proc *q; 596 int found = 0; 597 598 if (ident == 0) 599 *retval = EINVAL; 600 else if (ident == -1) 601 wakeup(&globalsleepaddr); 602 else { 603 TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) { 604 if (q->p_thrslpid == ident) { 605 wakeup_one(&q->p_thrslpid); 606 q->p_thrslpid = 0; 607 if (++found == n) 608 break; 609 } 610 } 611 *retval = found ? 0 : ESRCH; 612 } 613 614 return (0); 615} 616 617void 618refcnt_init(struct refcnt *r) 619{ 620 r->refs = 1; 621} 622 623void 624refcnt_take(struct refcnt *r) 625{ 626#ifdef DIAGNOSTIC 627 u_int refcnt; 628 629 refcnt = atomic_inc_int_nv(&r->refs); 630 KASSERT(refcnt != 0); 631#else 632 atomic_inc_int(&r->refs); 633#endif 634} 635 636int 637refcnt_rele(struct refcnt *r) 638{ 639 u_int refcnt; 640 641 refcnt = atomic_dec_int_nv(&r->refs); 642 KASSERT(refcnt != ~0); 643 644 return (refcnt == 0); 645} 646 647void 648refcnt_rele_wake(struct refcnt *r) 649{ 650 if (refcnt_rele(r)) 651 wakeup_one(r); 652} 653 654void 655refcnt_finalize(struct refcnt *r, const char *wmesg) 656{ 657 struct sleep_state sls; 658 u_int refcnt; 659 660 refcnt = atomic_dec_int_nv(&r->refs); 661 while (refcnt) { 662 sleep_setup(&sls, r, PWAIT, wmesg); 663 refcnt = r->refs; 664 sleep_finish(&sls, refcnt); 665 } 666} 667