kern_synch.c revision 44218
1/*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 39 * $Id: kern_synch.c,v 1.72 1999/01/10 01:58:24 eivind Exp $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/signalvar.h> 49#include <sys/resourcevar.h> 50#include <sys/vmmeter.h> 51#include <sys/sysctl.h> 52#include <vm/vm.h> 53#include <vm/vm_extern.h> 54#ifdef KTRACE 55#include <sys/uio.h> 56#include <sys/ktrace.h> 57#endif 58 59#include <machine/cpu.h> 60#ifdef SMP 61#include <machine/smp.h> 62#endif 63#include <machine/limits.h> /* for UCHAR_MAX = typeof(p_priority)_MAX */ 64 65static void rqinit __P((void *)); 66SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL) 67 68u_char curpriority; /* usrpri of curproc */ 69int hogticks; 70int lbolt; /* once a second sleep address */ 71 72static void endtsleep __P((void *)); 73static void roundrobin __P((void *arg)); 74static void schedcpu __P((void *arg)); 75static void updatepri __P((struct proc *p)); 76 77#define MAXIMUM_SCHEDULE_QUANTUM (1000000) /* arbitrary limit */ 78#ifndef DEFAULT_SCHEDULE_QUANTUM 79#define DEFAULT_SCHEDULE_QUANTUM 10 80#endif 81static int quantum = DEFAULT_SCHEDULE_QUANTUM; /* default value */ 82 83static int 84sysctl_kern_quantum SYSCTL_HANDLER_ARGS 85{ 86 int error; 87 int new_val = quantum; 88 89 new_val = quantum; 90 error = sysctl_handle_int(oidp, &new_val, 0, req); 91 if (error == 0) { 92 if ((new_val > 0) && (new_val < MAXIMUM_SCHEDULE_QUANTUM)) { 93 quantum = new_val; 94 } else { 95 error = EINVAL; 96 } 97 } 98 hogticks = 2 * (hz / quantum); 99 return (error); 100} 101 102SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 103 0, sizeof quantum, sysctl_kern_quantum, "I", ""); 104 105/* maybe_resched: Decide if you need to reschedule or not 106 * taking the priorities and schedulers into account. 107 */ 108static void maybe_resched(struct proc *chk) 109{ 110 struct proc *p = curproc; /* XXX */ 111 112 /* 113 * Compare priorities if the new process is on the same scheduler, 114 * otherwise the one on the more realtimeish scheduler wins. 115 * 116 * XXX idle scheduler still broken because proccess stays on idle 117 * scheduler during waits (such as when getting FS locks). If a 118 * standard process becomes runaway cpu-bound, the system can lockup 119 * due to idle-scheduler processes in wakeup never getting any cpu. 120 */ 121 if (p == 0 || 122 (chk->p_priority < curpriority && RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_BASE(chk->p_rtprio.type)) || 123 RTP_PRIO_BASE(chk->p_rtprio.type) < RTP_PRIO_BASE(p->p_rtprio.type) 124 ) { 125 need_resched(); 126 } 127} 128 129#define ROUNDROBIN_INTERVAL (hz / quantum) 130int roundrobin_interval(void) 131{ 132 return ROUNDROBIN_INTERVAL; 133} 134 135/* 136 * Force switch among equal priority processes every 100ms. 137 */ 138/* ARGSUSED */ 139static void 140roundrobin(arg) 141 void *arg; 142{ 143#ifndef SMP 144 struct proc *p = curproc; /* XXX */ 145#endif 146 147#ifdef SMP 148 need_resched(); 149 forward_roundrobin(); 150#else 151 if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type)) 152 need_resched(); 153#endif 154 155 timeout(roundrobin, NULL, ROUNDROBIN_INTERVAL); 156} 157 158/* 159 * Constants for digital decay and forget: 160 * 90% of (p_estcpu) usage in 5 * loadav time 161 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 162 * Note that, as ps(1) mentions, this can let percentages 163 * total over 100% (I've seen 137.9% for 3 processes). 164 * 165 * Note that statclock() updates p_estcpu and p_cpticks asynchronously. 166 * 167 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 168 * That is, the system wants to compute a value of decay such 169 * that the following for loop: 170 * for (i = 0; i < (5 * loadavg); i++) 171 * p_estcpu *= decay; 172 * will compute 173 * p_estcpu *= 0.1; 174 * for all values of loadavg: 175 * 176 * Mathematically this loop can be expressed by saying: 177 * decay ** (5 * loadavg) ~= .1 178 * 179 * The system computes decay as: 180 * decay = (2 * loadavg) / (2 * loadavg + 1) 181 * 182 * We wish to prove that the system's computation of decay 183 * will always fulfill the equation: 184 * decay ** (5 * loadavg) ~= .1 185 * 186 * If we compute b as: 187 * b = 2 * loadavg 188 * then 189 * decay = b / (b + 1) 190 * 191 * We now need to prove two things: 192 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 193 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 194 * 195 * Facts: 196 * For x close to zero, exp(x) =~ 1 + x, since 197 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 198 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 199 * For x close to zero, ln(1+x) =~ x, since 200 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 201 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 202 * ln(.1) =~ -2.30 203 * 204 * Proof of (1): 205 * Solve (factor)**(power) =~ .1 given power (5*loadav): 206 * solving for factor, 207 * ln(factor) =~ (-2.30/5*loadav), or 208 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 209 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 210 * 211 * Proof of (2): 212 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 213 * solving for power, 214 * power*ln(b/(b+1)) =~ -2.30, or 215 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 216 * 217 * Actual power values for the implemented algorithm are as follows: 218 * loadav: 1 2 3 4 219 * power: 5.68 10.32 14.94 19.55 220 */ 221 222/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 223#define loadfactor(loadav) (2 * (loadav)) 224#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 225 226/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 227static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 228SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 229 230/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 231static int fscale __unused = FSCALE; 232SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 233 234/* 235 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 236 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 237 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 238 * 239 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 240 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 241 * 242 * If you don't want to bother with the faster/more-accurate formula, you 243 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 244 * (more general) method of calculating the %age of CPU used by a process. 245 */ 246#define CCPU_SHIFT 11 247 248/* 249 * Recompute process priorities, every hz ticks. 250 */ 251/* ARGSUSED */ 252static void 253schedcpu(arg) 254 void *arg; 255{ 256 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 257 register struct proc *p; 258 register int realstathz, s; 259 register unsigned int newcpu; 260 261 realstathz = stathz ? stathz : hz; 262 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 263 /* 264 * Increment time in/out of memory and sleep time 265 * (if sleeping). We ignore overflow; with 16-bit int's 266 * (remember them?) overflow takes 45 days. 267 */ 268 p->p_swtime++; 269 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 270 p->p_slptime++; 271 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 272 /* 273 * If the process has slept the entire second, 274 * stop recalculating its priority until it wakes up. 275 */ 276 if (p->p_slptime > 1) 277 continue; 278 s = splhigh(); /* prevent state changes and protect run queue */ 279 /* 280 * p_pctcpu is only for ps. 281 */ 282#if (FSHIFT >= CCPU_SHIFT) 283 p->p_pctcpu += (realstathz == 100)? 284 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 285 100 * (((fixpt_t) p->p_cpticks) 286 << (FSHIFT - CCPU_SHIFT)) / realstathz; 287#else 288 p->p_pctcpu += ((FSCALE - ccpu) * 289 (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; 290#endif 291 p->p_cpticks = 0; 292 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice; 293 p->p_estcpu = min(newcpu, UCHAR_MAX); 294 resetpriority(p); 295 if (p->p_priority >= PUSER) { 296#define PPQ (128 / NQS) /* priorities per queue */ 297 if ((p != curproc) && 298#ifdef SMP 299 (u_char)p->p_oncpu == 0xff && /* idle */ 300#endif 301 p->p_stat == SRUN && 302 (p->p_flag & P_INMEM) && 303 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 304 remrq(p); 305 p->p_priority = p->p_usrpri; 306 setrunqueue(p); 307 } else 308 p->p_priority = p->p_usrpri; 309 } 310 splx(s); 311 } 312 vmmeter(); 313 wakeup((caddr_t)&lbolt); 314 timeout(schedcpu, (void *)0, hz); 315} 316 317/* 318 * Recalculate the priority of a process after it has slept for a while. 319 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 320 * least six times the loadfactor will decay p_estcpu to zero. 321 */ 322static void 323updatepri(p) 324 register struct proc *p; 325{ 326 register unsigned int newcpu = p->p_estcpu; 327 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 328 329 if (p->p_slptime > 5 * loadfac) 330 p->p_estcpu = 0; 331 else { 332 p->p_slptime--; /* the first time was done in schedcpu */ 333 while (newcpu && --p->p_slptime) 334 newcpu = (int) decay_cpu(loadfac, newcpu); 335 p->p_estcpu = min(newcpu, UCHAR_MAX); 336 } 337 resetpriority(p); 338} 339 340/* 341 * We're only looking at 7 bits of the address; everything is 342 * aligned to 4, lots of things are aligned to greater powers 343 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 344 */ 345#define TABLESIZE 128 346static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; 347#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 348 349/* 350 * During autoconfiguration or after a panic, a sleep will simply 351 * lower the priority briefly to allow interrupts, then return. 352 * The priority to be used (safepri) is machine-dependent, thus this 353 * value is initialized and maintained in the machine-dependent layers. 354 * This priority will typically be 0, or the lowest priority 355 * that is safe for use on the interrupt stack; it can be made 356 * higher to block network software interrupts after panics. 357 */ 358int safepri; 359 360void 361sleepinit() 362{ 363 int i; 364 365 hogticks = 2 * (hz / quantum); 366 for (i = 0; i < TABLESIZE; i++) 367 TAILQ_INIT(&slpque[i]); 368} 369 370/* 371 * General sleep call. Suspends the current process until a wakeup is 372 * performed on the specified identifier. The process will then be made 373 * runnable with the specified priority. Sleeps at most timo/hz seconds 374 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 375 * before and after sleeping, else signals are not checked. Returns 0 if 376 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 377 * signal needs to be delivered, ERESTART is returned if the current system 378 * call should be restarted if possible, and EINTR is returned if the system 379 * call should be interrupted by the signal (return EINTR). 380 */ 381int 382tsleep(ident, priority, wmesg, timo) 383 void *ident; 384 int priority, timo; 385 const char *wmesg; 386{ 387 struct proc *p = curproc; 388 int s, sig, catch = priority & PCATCH; 389 struct callout_handle thandle; 390 391#ifdef KTRACE 392 if (KTRPOINT(p, KTR_CSW)) 393 ktrcsw(p->p_tracep, 1, 0); 394#endif 395 s = splhigh(); 396 if (cold || panicstr) { 397 /* 398 * After a panic, or during autoconfiguration, 399 * just give interrupts a chance, then just return; 400 * don't run any other procs or panic below, 401 * in case this is the idle process and already asleep. 402 */ 403 splx(safepri); 404 splx(s); 405 return (0); 406 } 407 KASSERT(p != NULL, ("tsleep1")); 408 KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep")); 409 /* 410 * Process may be sitting on a slpque if asleep() was called, remove 411 * it before re-adding. 412 */ 413 if (p->p_wchan != NULL) 414 unsleep(p); 415 416 p->p_wchan = ident; 417 p->p_wmesg = wmesg; 418 p->p_slptime = 0; 419 p->p_priority = priority & PRIMASK; 420 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 421 if (timo) 422 thandle = timeout(endtsleep, (void *)p, timo); 423 /* 424 * We put ourselves on the sleep queue and start our timeout 425 * before calling CURSIG, as we could stop there, and a wakeup 426 * or a SIGCONT (or both) could occur while we were stopped. 427 * A SIGCONT would cause us to be marked as SSLEEP 428 * without resuming us, thus we must be ready for sleep 429 * when CURSIG is called. If the wakeup happens while we're 430 * stopped, p->p_wchan will be 0 upon return from CURSIG. 431 */ 432 if (catch) { 433 p->p_flag |= P_SINTR; 434 if ((sig = CURSIG(p))) { 435 if (p->p_wchan) 436 unsleep(p); 437 p->p_stat = SRUN; 438 goto resume; 439 } 440 if (p->p_wchan == 0) { 441 catch = 0; 442 goto resume; 443 } 444 } else 445 sig = 0; 446 p->p_stat = SSLEEP; 447 p->p_stats->p_ru.ru_nvcsw++; 448 mi_switch(); 449resume: 450 curpriority = p->p_usrpri; 451 splx(s); 452 p->p_flag &= ~P_SINTR; 453 if (p->p_flag & P_TIMEOUT) { 454 p->p_flag &= ~P_TIMEOUT; 455 if (sig == 0) { 456#ifdef KTRACE 457 if (KTRPOINT(p, KTR_CSW)) 458 ktrcsw(p->p_tracep, 0, 0); 459#endif 460 return (EWOULDBLOCK); 461 } 462 } else if (timo) 463 untimeout(endtsleep, (void *)p, thandle); 464 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 465#ifdef KTRACE 466 if (KTRPOINT(p, KTR_CSW)) 467 ktrcsw(p->p_tracep, 0, 0); 468#endif 469 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 470 return (EINTR); 471 return (ERESTART); 472 } 473#ifdef KTRACE 474 if (KTRPOINT(p, KTR_CSW)) 475 ktrcsw(p->p_tracep, 0, 0); 476#endif 477 return (0); 478} 479 480/* 481 * asleep() - async sleep call. Place process on wait queue and return 482 * immediately without blocking. The process stays runnable until await() 483 * is called. If ident is NULL, remove process from wait queue if it is still 484 * on one. 485 * 486 * Only the most recent sleep condition is effective when making successive 487 * calls to asleep() or when calling tsleep(). 488 * 489 * The timeout, if any, is not initiated until await() is called. The sleep 490 * priority, signal, and timeout is specified in the asleep() call but may be 491 * overriden in the await() call. 492 * 493 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 494 */ 495 496int 497asleep(void *ident, int priority, const char *wmesg, int timo) 498{ 499 struct proc *p = curproc; 500 int s; 501 502 /* 503 * splhigh() while manipulating sleep structures and slpque. 504 * 505 * Remove preexisting wait condition (if any) and place process 506 * on appropriate slpque, but do not put process to sleep. 507 */ 508 509 s = splhigh(); 510 511 if (p->p_wchan != NULL) 512 unsleep(p); 513 514 if (ident) { 515 p->p_wchan = ident; 516 p->p_wmesg = wmesg; 517 p->p_slptime = 0; 518 p->p_asleep.as_priority = priority; 519 p->p_asleep.as_timo = timo; 520 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 521 } 522 523 splx(s); 524 525 return(0); 526} 527 528/* 529 * await() - wait for async condition to occur. The process blocks until 530 * wakeup() is called on the most recent asleep() address. If wakeup is called 531 * priority to await(), await() winds up being a NOP. 532 * 533 * If await() is called more then once (without an intervening asleep() call), 534 * await() is still effectively a NOP but it calls mi_switch() to give other 535 * processes some cpu before returning. The process is left runnable. 536 * 537 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 538 */ 539 540int 541await(int priority, int timo) 542{ 543 struct proc *p = curproc; 544 int s; 545 546 s = splhigh(); 547 548 if (p->p_wchan != NULL) { 549 struct callout_handle thandle; 550 int sig; 551 int catch; 552 553 /* 554 * The call to await() can override defaults specified in 555 * the original asleep(). 556 */ 557 if (priority < 0) 558 priority = p->p_asleep.as_priority; 559 if (timo < 0) 560 timo = p->p_asleep.as_timo; 561 562 /* 563 * Install timeout 564 */ 565 566 if (timo) 567 thandle = timeout(endtsleep, (void *)p, timo); 568 569 sig = 0; 570 catch = priority & PCATCH; 571 572 if (catch) { 573 p->p_flag |= P_SINTR; 574 if ((sig = CURSIG(p))) { 575 if (p->p_wchan) 576 unsleep(p); 577 p->p_stat = SRUN; 578 goto resume; 579 } 580 if (p->p_wchan == NULL) { 581 catch = 0; 582 goto resume; 583 } 584 } 585 p->p_stat = SSLEEP; 586 p->p_stats->p_ru.ru_nvcsw++; 587 mi_switch(); 588resume: 589 curpriority = p->p_usrpri; 590 591 splx(s); 592 p->p_flag &= ~P_SINTR; 593 if (p->p_flag & P_TIMEOUT) { 594 p->p_flag &= ~P_TIMEOUT; 595 if (sig == 0) { 596#ifdef KTRACE 597 if (KTRPOINT(p, KTR_CSW)) 598 ktrcsw(p->p_tracep, 0, 0); 599#endif 600 return (EWOULDBLOCK); 601 } 602 } else if (timo) 603 untimeout(endtsleep, (void *)p, thandle); 604 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 605#ifdef KTRACE 606 if (KTRPOINT(p, KTR_CSW)) 607 ktrcsw(p->p_tracep, 0, 0); 608#endif 609 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 610 return (EINTR); 611 return (ERESTART); 612 } 613#ifdef KTRACE 614 if (KTRPOINT(p, KTR_CSW)) 615 ktrcsw(p->p_tracep, 0, 0); 616#endif 617 } else { 618 /* 619 * If as_priority is 0, await() has been called without an 620 * intervening asleep(). We are still effectively a NOP, 621 * but we call mi_switch() for safety. 622 */ 623 624 if (p->p_asleep.as_priority == 0) { 625 p->p_stats->p_ru.ru_nvcsw++; 626 mi_switch(); 627 } 628 splx(s); 629 } 630 631 /* 632 * clear p_asleep.as_priority as an indication that await() has been 633 * called. If await() is called again without an intervening asleep(), 634 * await() is still effectively a NOP but the above mi_switch() code 635 * is triggered as a safety. 636 */ 637 p->p_asleep.as_priority = 0; 638 639 return (0); 640} 641 642/* 643 * Implement timeout for tsleep or asleep()/await() 644 * 645 * If process hasn't been awakened (wchan non-zero), 646 * set timeout flag and undo the sleep. If proc 647 * is stopped, just unsleep so it will remain stopped. 648 */ 649static void 650endtsleep(arg) 651 void *arg; 652{ 653 register struct proc *p; 654 int s; 655 656 p = (struct proc *)arg; 657 s = splhigh(); 658 if (p->p_wchan) { 659 if (p->p_stat == SSLEEP) 660 setrunnable(p); 661 else 662 unsleep(p); 663 p->p_flag |= P_TIMEOUT; 664 } 665 splx(s); 666} 667 668/* 669 * Remove a process from its wait queue 670 */ 671void 672unsleep(p) 673 register struct proc *p; 674{ 675 int s; 676 677 s = splhigh(); 678 if (p->p_wchan) { 679 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq); 680 p->p_wchan = 0; 681 } 682 splx(s); 683} 684 685/* 686 * Make all processes sleeping on the specified identifier runnable. 687 */ 688void 689wakeup(ident) 690 register void *ident; 691{ 692 register struct slpquehead *qp; 693 register struct proc *p; 694 int s; 695 696 s = splhigh(); 697 qp = &slpque[LOOKUP(ident)]; 698restart: 699 for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) { 700 if (p->p_wchan == ident) { 701 TAILQ_REMOVE(qp, p, p_procq); 702 p->p_wchan = 0; 703 if (p->p_stat == SSLEEP) { 704 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 705 if (p->p_slptime > 1) 706 updatepri(p); 707 p->p_slptime = 0; 708 p->p_stat = SRUN; 709 if (p->p_flag & P_INMEM) { 710 setrunqueue(p); 711 maybe_resched(p); 712 } else { 713 p->p_flag |= P_SWAPINREQ; 714 wakeup((caddr_t)&proc0); 715 } 716 /* END INLINE EXPANSION */ 717 goto restart; 718 } 719 } 720 } 721 splx(s); 722} 723 724/* 725 * Make a process sleeping on the specified identifier runnable. 726 * May wake more than one process if a target prcoess is currently 727 * swapped out. 728 */ 729void 730wakeup_one(ident) 731 register void *ident; 732{ 733 register struct slpquehead *qp; 734 register struct proc *p; 735 int s; 736 737 s = splhigh(); 738 qp = &slpque[LOOKUP(ident)]; 739 740 for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) { 741 if (p->p_wchan == ident) { 742 TAILQ_REMOVE(qp, p, p_procq); 743 p->p_wchan = 0; 744 if (p->p_stat == SSLEEP) { 745 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 746 if (p->p_slptime > 1) 747 updatepri(p); 748 p->p_slptime = 0; 749 p->p_stat = SRUN; 750 if (p->p_flag & P_INMEM) { 751 setrunqueue(p); 752 maybe_resched(p); 753 break; 754 } else { 755 p->p_flag |= P_SWAPINREQ; 756 wakeup((caddr_t)&proc0); 757 } 758 /* END INLINE EXPANSION */ 759 } 760 } 761 } 762 splx(s); 763} 764 765/* 766 * The machine independent parts of mi_switch(). 767 * Must be called at splstatclock() or higher. 768 */ 769void 770mi_switch() 771{ 772 register struct proc *p = curproc; /* XXX */ 773 register struct rlimit *rlim; 774 int x; 775 776 /* 777 * XXX this spl is almost unnecessary. It is partly to allow for 778 * sloppy callers that don't do it (issignal() via CURSIG() is the 779 * main offender). It is partly to work around a bug in the i386 780 * cpu_switch() (the ipl is not preserved). We ran for years 781 * without it. I think there was only a interrupt latency problem. 782 * The main caller, tsleep(), does an splx() a couple of instructions 783 * after calling here. The buggy caller, issignal(), usually calls 784 * here at spl0() and sometimes returns at splhigh(). The process 785 * then runs for a little too long at splhigh(). The ipl gets fixed 786 * when the process returns to user mode (or earlier). 787 * 788 * It would probably be better to always call here at spl0(). Callers 789 * are prepared to give up control to another process, so they must 790 * be prepared to be interrupted. The clock stuff here may not 791 * actually need splstatclock(). 792 */ 793 x = splstatclock(); 794 795#ifdef SIMPLELOCK_DEBUG 796 if (p->p_simple_locks) 797 printf("sleep: holding simple lock\n"); 798#endif 799 /* 800 * Compute the amount of time during which the current 801 * process was running, and add that to its total so far. 802 */ 803 microuptime(&switchtime); 804 p->p_runtime += (switchtime.tv_usec - p->p_switchtime.tv_usec) + 805 (switchtime.tv_sec - p->p_switchtime.tv_sec) * (int64_t)1000000; 806 807 /* 808 * Check if the process exceeds its cpu resource allocation. 809 * If over max, kill it. 810 */ 811 if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && 812 p->p_runtime > p->p_limit->p_cpulimit) { 813 rlim = &p->p_rlimit[RLIMIT_CPU]; 814 if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { 815 killproc(p, "exceeded maximum CPU limit"); 816 } else { 817 psignal(p, SIGXCPU); 818 if (rlim->rlim_cur < rlim->rlim_max) { 819 /* XXX: we should make a private copy */ 820 rlim->rlim_cur += 5; 821 } 822 } 823 } 824 825 /* 826 * Pick a new current process and record its start time. 827 */ 828 cnt.v_swtch++; 829 cpu_switch(p); 830 if (switchtime.tv_sec) 831 p->p_switchtime = switchtime; 832 else 833 microuptime(&p->p_switchtime); 834 switchticks = ticks; 835 836 splx(x); 837} 838 839/* 840 * Initialize the (doubly-linked) run queues 841 * to be empty. 842 */ 843/* ARGSUSED*/ 844static void 845rqinit(dummy) 846 void *dummy; 847{ 848 register int i; 849 850 for (i = 0; i < NQS; i++) { 851 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 852 rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i]; 853 idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i]; 854 } 855} 856 857/* 858 * Change process state to be runnable, 859 * placing it on the run queue if it is in memory, 860 * and awakening the swapper if it isn't in memory. 861 */ 862void 863setrunnable(p) 864 register struct proc *p; 865{ 866 register int s; 867 868 s = splhigh(); 869 switch (p->p_stat) { 870 case 0: 871 case SRUN: 872 case SZOMB: 873 default: 874 panic("setrunnable"); 875 case SSTOP: 876 case SSLEEP: 877 unsleep(p); /* e.g. when sending signals */ 878 break; 879 880 case SIDL: 881 break; 882 } 883 p->p_stat = SRUN; 884 if (p->p_flag & P_INMEM) 885 setrunqueue(p); 886 splx(s); 887 if (p->p_slptime > 1) 888 updatepri(p); 889 p->p_slptime = 0; 890 if ((p->p_flag & P_INMEM) == 0) { 891 p->p_flag |= P_SWAPINREQ; 892 wakeup((caddr_t)&proc0); 893 } 894 else 895 maybe_resched(p); 896} 897 898/* 899 * Compute the priority of a process when running in user mode. 900 * Arrange to reschedule if the resulting priority is better 901 * than that of the current process. 902 */ 903void 904resetpriority(p) 905 register struct proc *p; 906{ 907 register unsigned int newpriority; 908 909 if (p->p_rtprio.type == RTP_PRIO_NORMAL) { 910 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice; 911 newpriority = min(newpriority, MAXPRI); 912 p->p_usrpri = newpriority; 913 } 914 maybe_resched(p); 915} 916 917/* ARGSUSED */ 918static void sched_setup __P((void *dummy)); 919static void 920sched_setup(dummy) 921 void *dummy; 922{ 923 /* Kick off timeout driven events by calling first time. */ 924 roundrobin(NULL); 925 schedcpu(NULL); 926} 927SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) 928 929