kern_synch.c revision 57704
1/*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 39 * $FreeBSD: head/sys/kern/kern_synch.c 57704 2000-03-02 22:03:49Z dufault $ 40 */ 41 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/signalvar.h> 49#include <sys/resourcevar.h> 50#include <sys/vmmeter.h> 51#include <sys/sysctl.h> 52#include <vm/vm.h> 53#include <vm/vm_extern.h> 54#ifdef KTRACE 55#include <sys/uio.h> 56#include <sys/ktrace.h> 57#endif 58 59#include <machine/cpu.h> 60#ifdef SMP 61#include <machine/smp.h> 62#endif 63 64static void sched_setup __P((void *dummy)); 65SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) 66 67u_char curpriority; 68int hogticks; 69int lbolt; 70int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 71 72static int curpriority_cmp __P((struct proc *p)); 73static void endtsleep __P((void *)); 74static void maybe_resched __P((struct proc *chk)); 75static void roundrobin __P((void *arg)); 76static void schedcpu __P((void *arg)); 77static void updatepri __P((struct proc *p)); 78 79static int 80sysctl_kern_quantum SYSCTL_HANDLER_ARGS 81{ 82 int error, new_val; 83 84 new_val = sched_quantum * tick; 85 error = sysctl_handle_int(oidp, &new_val, 0, req); 86 if (error != 0 || req->newptr == NULL) 87 return (error); 88 if (new_val < tick) 89 return (EINVAL); 90 sched_quantum = new_val / tick; 91 hogticks = 2 * sched_quantum; 92 return (0); 93} 94 95SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 96 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); 97 98/*- 99 * Compare priorities. Return: 100 * <0: priority of p < current priority 101 * 0: priority of p == current priority 102 * >0: priority of p > current priority 103 * The priorities are the normal priorities or the normal realtime priorities 104 * if p is on the same scheduler as curproc. Otherwise the process on the 105 * more realtimeish scheduler has lowest priority. As usual, a higher 106 * priority really means a lower priority. 107 */ 108static int 109curpriority_cmp(p) 110 struct proc *p; 111{ 112 int c_class, p_class; 113 114 c_class = RTP_PRIO_BASE(curproc->p_rtprio.type); 115 p_class = RTP_PRIO_BASE(p->p_rtprio.type); 116 if (p_class != c_class) 117 return (p_class - c_class); 118 if (p_class == RTP_PRIO_NORMAL) 119 return (((int)p->p_priority - (int)curpriority) / PPQ); 120 return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio); 121} 122 123/* 124 * Arrange to reschedule if necessary, taking the priorities and 125 * schedulers into account. 126 */ 127static void 128maybe_resched(chk) 129 struct proc *chk; 130{ 131 struct proc *p = curproc; /* XXX */ 132 133 /* 134 * XXX idle scheduler still broken because proccess stays on idle 135 * scheduler during waits (such as when getting FS locks). If a 136 * standard process becomes runaway cpu-bound, the system can lockup 137 * due to idle-scheduler processes in wakeup never getting any cpu. 138 */ 139 if (p == NULL) { 140#if 0 141 need_resched(); 142#endif 143 } else if (chk == p) { 144 /* We may need to yield if our priority has been raised. */ 145 if (curpriority_cmp(chk) > 0) 146 need_resched(); 147 } else if (curpriority_cmp(chk) < 0) 148 need_resched(); 149} 150 151int 152roundrobin_interval(void) 153{ 154 return (sched_quantum); 155} 156 157/* 158 * Force switch among equal priority processes every 100ms. 159 */ 160/* ARGSUSED */ 161static void 162roundrobin(arg) 163 void *arg; 164{ 165#ifndef SMP 166 struct proc *p = curproc; /* XXX */ 167#endif 168 169#ifdef SMP 170 need_resched(); 171 forward_roundrobin(); 172#else 173 if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type)) 174 need_resched(); 175#endif 176 177 timeout(roundrobin, NULL, sched_quantum); 178} 179 180/* 181 * Constants for digital decay and forget: 182 * 90% of (p_estcpu) usage in 5 * loadav time 183 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 184 * Note that, as ps(1) mentions, this can let percentages 185 * total over 100% (I've seen 137.9% for 3 processes). 186 * 187 * Note that schedclock() updates p_estcpu and p_cpticks asynchronously. 188 * 189 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 190 * That is, the system wants to compute a value of decay such 191 * that the following for loop: 192 * for (i = 0; i < (5 * loadavg); i++) 193 * p_estcpu *= decay; 194 * will compute 195 * p_estcpu *= 0.1; 196 * for all values of loadavg: 197 * 198 * Mathematically this loop can be expressed by saying: 199 * decay ** (5 * loadavg) ~= .1 200 * 201 * The system computes decay as: 202 * decay = (2 * loadavg) / (2 * loadavg + 1) 203 * 204 * We wish to prove that the system's computation of decay 205 * will always fulfill the equation: 206 * decay ** (5 * loadavg) ~= .1 207 * 208 * If we compute b as: 209 * b = 2 * loadavg 210 * then 211 * decay = b / (b + 1) 212 * 213 * We now need to prove two things: 214 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 215 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 216 * 217 * Facts: 218 * For x close to zero, exp(x) =~ 1 + x, since 219 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 220 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 221 * For x close to zero, ln(1+x) =~ x, since 222 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 223 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 224 * ln(.1) =~ -2.30 225 * 226 * Proof of (1): 227 * Solve (factor)**(power) =~ .1 given power (5*loadav): 228 * solving for factor, 229 * ln(factor) =~ (-2.30/5*loadav), or 230 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 231 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 232 * 233 * Proof of (2): 234 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 235 * solving for power, 236 * power*ln(b/(b+1)) =~ -2.30, or 237 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 238 * 239 * Actual power values for the implemented algorithm are as follows: 240 * loadav: 1 2 3 4 241 * power: 5.68 10.32 14.94 19.55 242 */ 243 244/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 245#define loadfactor(loadav) (2 * (loadav)) 246#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 247 248/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 249static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 250SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 251 252/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 253static int fscale __unused = FSCALE; 254SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 255 256/* 257 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 258 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 259 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 260 * 261 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 262 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 263 * 264 * If you don't want to bother with the faster/more-accurate formula, you 265 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 266 * (more general) method of calculating the %age of CPU used by a process. 267 */ 268#define CCPU_SHIFT 11 269 270/* 271 * Recompute process priorities, every hz ticks. 272 */ 273/* ARGSUSED */ 274static void 275schedcpu(arg) 276 void *arg; 277{ 278 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 279 register struct proc *p; 280 register int realstathz, s; 281 282 realstathz = stathz ? stathz : hz; 283 LIST_FOREACH(p, &allproc, p_list) { 284 /* 285 * Increment time in/out of memory and sleep time 286 * (if sleeping). We ignore overflow; with 16-bit int's 287 * (remember them?) overflow takes 45 days. 288 */ 289 p->p_swtime++; 290 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 291 p->p_slptime++; 292 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 293 /* 294 * If the process has slept the entire second, 295 * stop recalculating its priority until it wakes up. 296 */ 297 if (p->p_slptime > 1) 298 continue; 299 s = splhigh(); /* prevent state changes and protect run queue */ 300 /* 301 * p_pctcpu is only for ps. 302 */ 303#if (FSHIFT >= CCPU_SHIFT) 304 p->p_pctcpu += (realstathz == 100)? 305 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 306 100 * (((fixpt_t) p->p_cpticks) 307 << (FSHIFT - CCPU_SHIFT)) / realstathz; 308#else 309 p->p_pctcpu += ((FSCALE - ccpu) * 310 (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; 311#endif 312 p->p_cpticks = 0; 313 p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); 314 resetpriority(p); 315 if (p->p_priority >= PUSER) { 316 if ((p != curproc) && 317#ifdef SMP 318 p->p_oncpu == 0xff && /* idle */ 319#endif 320 p->p_stat == SRUN && 321 (p->p_flag & P_INMEM) && 322 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 323 remrunqueue(p); 324 p->p_priority = p->p_usrpri; 325 setrunqueue(p); 326 } else 327 p->p_priority = p->p_usrpri; 328 } 329 splx(s); 330 } 331 vmmeter(); 332 wakeup((caddr_t)&lbolt); 333 timeout(schedcpu, (void *)0, hz); 334} 335 336/* 337 * Recalculate the priority of a process after it has slept for a while. 338 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 339 * least six times the loadfactor will decay p_estcpu to zero. 340 */ 341static void 342updatepri(p) 343 register struct proc *p; 344{ 345 register unsigned int newcpu = p->p_estcpu; 346 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 347 348 if (p->p_slptime > 5 * loadfac) 349 p->p_estcpu = 0; 350 else { 351 p->p_slptime--; /* the first time was done in schedcpu */ 352 while (newcpu && --p->p_slptime) 353 newcpu = decay_cpu(loadfac, newcpu); 354 p->p_estcpu = newcpu; 355 } 356 resetpriority(p); 357} 358 359/* 360 * We're only looking at 7 bits of the address; everything is 361 * aligned to 4, lots of things are aligned to greater powers 362 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 363 */ 364#define TABLESIZE 128 365static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; 366#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 367 368/* 369 * During autoconfiguration or after a panic, a sleep will simply 370 * lower the priority briefly to allow interrupts, then return. 371 * The priority to be used (safepri) is machine-dependent, thus this 372 * value is initialized and maintained in the machine-dependent layers. 373 * This priority will typically be 0, or the lowest priority 374 * that is safe for use on the interrupt stack; it can be made 375 * higher to block network software interrupts after panics. 376 */ 377int safepri; 378 379void 380sleepinit(void) 381{ 382 int i; 383 384 sched_quantum = hz/10; 385 hogticks = 2 * sched_quantum; 386 for (i = 0; i < TABLESIZE; i++) 387 TAILQ_INIT(&slpque[i]); 388} 389 390/* 391 * General sleep call. Suspends the current process until a wakeup is 392 * performed on the specified identifier. The process will then be made 393 * runnable with the specified priority. Sleeps at most timo/hz seconds 394 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 395 * before and after sleeping, else signals are not checked. Returns 0 if 396 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 397 * signal needs to be delivered, ERESTART is returned if the current system 398 * call should be restarted if possible, and EINTR is returned if the system 399 * call should be interrupted by the signal (return EINTR). 400 */ 401int 402tsleep(ident, priority, wmesg, timo) 403 void *ident; 404 int priority, timo; 405 const char *wmesg; 406{ 407 struct proc *p = curproc; 408 int s, sig, catch = priority & PCATCH; 409 struct callout_handle thandle; 410 411#ifdef KTRACE 412 if (p && KTRPOINT(p, KTR_CSW)) 413 ktrcsw(p->p_tracep, 1, 0); 414#endif 415 s = splhigh(); 416 if (cold || panicstr) { 417 /* 418 * After a panic, or during autoconfiguration, 419 * just give interrupts a chance, then just return; 420 * don't run any other procs or panic below, 421 * in case this is the idle process and already asleep. 422 */ 423 splx(safepri); 424 splx(s); 425 return (0); 426 } 427 KASSERT(p != NULL, ("tsleep1")); 428 KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep")); 429 /* 430 * Process may be sitting on a slpque if asleep() was called, remove 431 * it before re-adding. 432 */ 433 if (p->p_wchan != NULL) 434 unsleep(p); 435 436 p->p_wchan = ident; 437 p->p_wmesg = wmesg; 438 p->p_slptime = 0; 439 p->p_priority = priority & PRIMASK; 440 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 441 if (timo) 442 thandle = timeout(endtsleep, (void *)p, timo); 443 /* 444 * We put ourselves on the sleep queue and start our timeout 445 * before calling CURSIG, as we could stop there, and a wakeup 446 * or a SIGCONT (or both) could occur while we were stopped. 447 * A SIGCONT would cause us to be marked as SSLEEP 448 * without resuming us, thus we must be ready for sleep 449 * when CURSIG is called. If the wakeup happens while we're 450 * stopped, p->p_wchan will be 0 upon return from CURSIG. 451 */ 452 if (catch) { 453 p->p_flag |= P_SINTR; 454 if ((sig = CURSIG(p))) { 455 if (p->p_wchan) 456 unsleep(p); 457 p->p_stat = SRUN; 458 goto resume; 459 } 460 if (p->p_wchan == 0) { 461 catch = 0; 462 goto resume; 463 } 464 } else 465 sig = 0; 466 p->p_stat = SSLEEP; 467 p->p_stats->p_ru.ru_nvcsw++; 468 mi_switch(); 469resume: 470 curpriority = p->p_usrpri; 471 splx(s); 472 p->p_flag &= ~P_SINTR; 473 if (p->p_flag & P_TIMEOUT) { 474 p->p_flag &= ~P_TIMEOUT; 475 if (sig == 0) { 476#ifdef KTRACE 477 if (KTRPOINT(p, KTR_CSW)) 478 ktrcsw(p->p_tracep, 0, 0); 479#endif 480 return (EWOULDBLOCK); 481 } 482 } else if (timo) 483 untimeout(endtsleep, (void *)p, thandle); 484 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 485#ifdef KTRACE 486 if (KTRPOINT(p, KTR_CSW)) 487 ktrcsw(p->p_tracep, 0, 0); 488#endif 489 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 490 return (EINTR); 491 return (ERESTART); 492 } 493#ifdef KTRACE 494 if (KTRPOINT(p, KTR_CSW)) 495 ktrcsw(p->p_tracep, 0, 0); 496#endif 497 return (0); 498} 499 500/* 501 * asleep() - async sleep call. Place process on wait queue and return 502 * immediately without blocking. The process stays runnable until await() 503 * is called. If ident is NULL, remove process from wait queue if it is still 504 * on one. 505 * 506 * Only the most recent sleep condition is effective when making successive 507 * calls to asleep() or when calling tsleep(). 508 * 509 * The timeout, if any, is not initiated until await() is called. The sleep 510 * priority, signal, and timeout is specified in the asleep() call but may be 511 * overriden in the await() call. 512 * 513 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 514 */ 515 516int 517asleep(void *ident, int priority, const char *wmesg, int timo) 518{ 519 struct proc *p = curproc; 520 int s; 521 522 /* 523 * splhigh() while manipulating sleep structures and slpque. 524 * 525 * Remove preexisting wait condition (if any) and place process 526 * on appropriate slpque, but do not put process to sleep. 527 */ 528 529 s = splhigh(); 530 531 if (p->p_wchan != NULL) 532 unsleep(p); 533 534 if (ident) { 535 p->p_wchan = ident; 536 p->p_wmesg = wmesg; 537 p->p_slptime = 0; 538 p->p_asleep.as_priority = priority; 539 p->p_asleep.as_timo = timo; 540 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 541 } 542 543 splx(s); 544 545 return(0); 546} 547 548/* 549 * await() - wait for async condition to occur. The process blocks until 550 * wakeup() is called on the most recent asleep() address. If wakeup is called 551 * priority to await(), await() winds up being a NOP. 552 * 553 * If await() is called more then once (without an intervening asleep() call), 554 * await() is still effectively a NOP but it calls mi_switch() to give other 555 * processes some cpu before returning. The process is left runnable. 556 * 557 * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 558 */ 559 560int 561await(int priority, int timo) 562{ 563 struct proc *p = curproc; 564 int s; 565 566 s = splhigh(); 567 568 if (p->p_wchan != NULL) { 569 struct callout_handle thandle; 570 int sig; 571 int catch; 572 573 /* 574 * The call to await() can override defaults specified in 575 * the original asleep(). 576 */ 577 if (priority < 0) 578 priority = p->p_asleep.as_priority; 579 if (timo < 0) 580 timo = p->p_asleep.as_timo; 581 582 /* 583 * Install timeout 584 */ 585 586 if (timo) 587 thandle = timeout(endtsleep, (void *)p, timo); 588 589 sig = 0; 590 catch = priority & PCATCH; 591 592 if (catch) { 593 p->p_flag |= P_SINTR; 594 if ((sig = CURSIG(p))) { 595 if (p->p_wchan) 596 unsleep(p); 597 p->p_stat = SRUN; 598 goto resume; 599 } 600 if (p->p_wchan == NULL) { 601 catch = 0; 602 goto resume; 603 } 604 } 605 p->p_stat = SSLEEP; 606 p->p_stats->p_ru.ru_nvcsw++; 607 mi_switch(); 608resume: 609 curpriority = p->p_usrpri; 610 611 splx(s); 612 p->p_flag &= ~P_SINTR; 613 if (p->p_flag & P_TIMEOUT) { 614 p->p_flag &= ~P_TIMEOUT; 615 if (sig == 0) { 616#ifdef KTRACE 617 if (KTRPOINT(p, KTR_CSW)) 618 ktrcsw(p->p_tracep, 0, 0); 619#endif 620 return (EWOULDBLOCK); 621 } 622 } else if (timo) 623 untimeout(endtsleep, (void *)p, thandle); 624 if (catch && (sig != 0 || (sig = CURSIG(p)))) { 625#ifdef KTRACE 626 if (KTRPOINT(p, KTR_CSW)) 627 ktrcsw(p->p_tracep, 0, 0); 628#endif 629 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 630 return (EINTR); 631 return (ERESTART); 632 } 633#ifdef KTRACE 634 if (KTRPOINT(p, KTR_CSW)) 635 ktrcsw(p->p_tracep, 0, 0); 636#endif 637 } else { 638 /* 639 * If as_priority is 0, await() has been called without an 640 * intervening asleep(). We are still effectively a NOP, 641 * but we call mi_switch() for safety. 642 */ 643 644 if (p->p_asleep.as_priority == 0) { 645 p->p_stats->p_ru.ru_nvcsw++; 646 mi_switch(); 647 } 648 splx(s); 649 } 650 651 /* 652 * clear p_asleep.as_priority as an indication that await() has been 653 * called. If await() is called again without an intervening asleep(), 654 * await() is still effectively a NOP but the above mi_switch() code 655 * is triggered as a safety. 656 */ 657 p->p_asleep.as_priority = 0; 658 659 return (0); 660} 661 662/* 663 * Implement timeout for tsleep or asleep()/await() 664 * 665 * If process hasn't been awakened (wchan non-zero), 666 * set timeout flag and undo the sleep. If proc 667 * is stopped, just unsleep so it will remain stopped. 668 */ 669static void 670endtsleep(arg) 671 void *arg; 672{ 673 register struct proc *p; 674 int s; 675 676 p = (struct proc *)arg; 677 s = splhigh(); 678 if (p->p_wchan) { 679 if (p->p_stat == SSLEEP) 680 setrunnable(p); 681 else 682 unsleep(p); 683 p->p_flag |= P_TIMEOUT; 684 } 685 splx(s); 686} 687 688/* 689 * Remove a process from its wait queue 690 */ 691void 692unsleep(p) 693 register struct proc *p; 694{ 695 int s; 696 697 s = splhigh(); 698 if (p->p_wchan) { 699 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq); 700 p->p_wchan = 0; 701 } 702 splx(s); 703} 704 705/* 706 * Make all processes sleeping on the specified identifier runnable. 707 */ 708void 709wakeup(ident) 710 register void *ident; 711{ 712 register struct slpquehead *qp; 713 register struct proc *p; 714 int s; 715 716 s = splhigh(); 717 qp = &slpque[LOOKUP(ident)]; 718restart: 719 TAILQ_FOREACH(p, qp, p_procq) { 720 if (p->p_wchan == ident) { 721 TAILQ_REMOVE(qp, p, p_procq); 722 p->p_wchan = 0; 723 if (p->p_stat == SSLEEP) { 724 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 725 if (p->p_slptime > 1) 726 updatepri(p); 727 p->p_slptime = 0; 728 p->p_stat = SRUN; 729 if (p->p_flag & P_INMEM) { 730 setrunqueue(p); 731 maybe_resched(p); 732 } else { 733 p->p_flag |= P_SWAPINREQ; 734 wakeup((caddr_t)&proc0); 735 } 736 /* END INLINE EXPANSION */ 737 goto restart; 738 } 739 } 740 } 741 splx(s); 742} 743 744/* 745 * Make a process sleeping on the specified identifier runnable. 746 * May wake more than one process if a target prcoess is currently 747 * swapped out. 748 */ 749void 750wakeup_one(ident) 751 register void *ident; 752{ 753 register struct slpquehead *qp; 754 register struct proc *p; 755 int s; 756 757 s = splhigh(); 758 qp = &slpque[LOOKUP(ident)]; 759 760 TAILQ_FOREACH(p, qp, p_procq) { 761 if (p->p_wchan == ident) { 762 TAILQ_REMOVE(qp, p, p_procq); 763 p->p_wchan = 0; 764 if (p->p_stat == SSLEEP) { 765 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 766 if (p->p_slptime > 1) 767 updatepri(p); 768 p->p_slptime = 0; 769 p->p_stat = SRUN; 770 if (p->p_flag & P_INMEM) { 771 setrunqueue(p); 772 maybe_resched(p); 773 break; 774 } else { 775 p->p_flag |= P_SWAPINREQ; 776 wakeup((caddr_t)&proc0); 777 } 778 /* END INLINE EXPANSION */ 779 } 780 } 781 } 782 splx(s); 783} 784 785/* 786 * The machine independent parts of mi_switch(). 787 * Must be called at splstatclock() or higher. 788 */ 789void 790mi_switch() 791{ 792 struct timeval new_switchtime; 793 register struct proc *p = curproc; /* XXX */ 794 register struct rlimit *rlim; 795 int x; 796 797 /* 798 * XXX this spl is almost unnecessary. It is partly to allow for 799 * sloppy callers that don't do it (issignal() via CURSIG() is the 800 * main offender). It is partly to work around a bug in the i386 801 * cpu_switch() (the ipl is not preserved). We ran for years 802 * without it. I think there was only a interrupt latency problem. 803 * The main caller, tsleep(), does an splx() a couple of instructions 804 * after calling here. The buggy caller, issignal(), usually calls 805 * here at spl0() and sometimes returns at splhigh(). The process 806 * then runs for a little too long at splhigh(). The ipl gets fixed 807 * when the process returns to user mode (or earlier). 808 * 809 * It would probably be better to always call here at spl0(). Callers 810 * are prepared to give up control to another process, so they must 811 * be prepared to be interrupted. The clock stuff here may not 812 * actually need splstatclock(). 813 */ 814 x = splstatclock(); 815 816#ifdef SIMPLELOCK_DEBUG 817 if (p->p_simple_locks) 818 printf("sleep: holding simple lock\n"); 819#endif 820 /* 821 * Compute the amount of time during which the current 822 * process was running, and add that to its total so far. 823 */ 824 microuptime(&new_switchtime); 825 if (timevalcmp(&new_switchtime, &switchtime, <)) { 826 printf("microuptime() went backwards (%ld.%06ld -> %ld,%06ld)\n", 827 switchtime.tv_sec, switchtime.tv_usec, 828 new_switchtime.tv_sec, new_switchtime.tv_usec); 829 new_switchtime = switchtime; 830 } else { 831 p->p_runtime += (new_switchtime.tv_usec - switchtime.tv_usec) + 832 (new_switchtime.tv_sec - switchtime.tv_sec) * (int64_t)1000000; 833 } 834 835 /* 836 * Check if the process exceeds its cpu resource allocation. 837 * If over max, kill it. 838 */ 839 if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && 840 p->p_runtime > p->p_limit->p_cpulimit) { 841 rlim = &p->p_rlimit[RLIMIT_CPU]; 842 if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { 843 killproc(p, "exceeded maximum CPU limit"); 844 } else { 845 psignal(p, SIGXCPU); 846 if (rlim->rlim_cur < rlim->rlim_max) { 847 /* XXX: we should make a private copy */ 848 rlim->rlim_cur += 5; 849 } 850 } 851 } 852 853 /* 854 * Pick a new current process and record its start time. 855 */ 856 cnt.v_swtch++; 857 switchtime = new_switchtime; 858 cpu_switch(p); 859 if (switchtime.tv_sec == 0) 860 microuptime(&switchtime); 861 switchticks = ticks; 862 863 splx(x); 864} 865 866/* 867 * Change process state to be runnable, 868 * placing it on the run queue if it is in memory, 869 * and awakening the swapper if it isn't in memory. 870 */ 871void 872setrunnable(p) 873 register struct proc *p; 874{ 875 register int s; 876 877 s = splhigh(); 878 switch (p->p_stat) { 879 case 0: 880 case SRUN: 881 case SZOMB: 882 default: 883 panic("setrunnable"); 884 case SSTOP: 885 case SSLEEP: 886 unsleep(p); /* e.g. when sending signals */ 887 break; 888 889 case SIDL: 890 break; 891 } 892 p->p_stat = SRUN; 893 if (p->p_flag & P_INMEM) 894 setrunqueue(p); 895 splx(s); 896 if (p->p_slptime > 1) 897 updatepri(p); 898 p->p_slptime = 0; 899 if ((p->p_flag & P_INMEM) == 0) { 900 p->p_flag |= P_SWAPINREQ; 901 wakeup((caddr_t)&proc0); 902 } 903 else 904 maybe_resched(p); 905} 906 907/* 908 * Compute the priority of a process when running in user mode. 909 * Arrange to reschedule if the resulting priority is better 910 * than that of the current process. 911 */ 912void 913resetpriority(p) 914 register struct proc *p; 915{ 916 register unsigned int newpriority; 917 918 if (p->p_rtprio.type == RTP_PRIO_NORMAL) { 919 newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + 920 NICE_WEIGHT * p->p_nice; 921 newpriority = min(newpriority, MAXPRI); 922 p->p_usrpri = newpriority; 923 } 924 maybe_resched(p); 925} 926 927/* ARGSUSED */ 928static void 929sched_setup(dummy) 930 void *dummy; 931{ 932 /* Kick off timeout driven events by calling first time. */ 933 roundrobin(NULL); 934 schedcpu(NULL); 935} 936 937/* 938 * We adjust the priority of the current process. The priority of 939 * a process gets worse as it accumulates CPU time. The cpu usage 940 * estimator (p_estcpu) is increased here. resetpriority() will 941 * compute a different priority each time p_estcpu increases by 942 * INVERSE_ESTCPU_WEIGHT 943 * (until MAXPRI is reached). The cpu usage estimator ramps up 944 * quite quickly when the process is running (linearly), and decays 945 * away exponentially, at a rate which is proportionally slower when 946 * the system is busy. The basic principle is that the system will 947 * 90% forget that the process used a lot of CPU time in 5 * loadav 948 * seconds. This causes the system to favor processes which haven't 949 * run much recently, and to round-robin among other processes. 950 */ 951void 952schedclock(p) 953 struct proc *p; 954{ 955 956 p->p_cpticks++; 957 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 958 if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 959 resetpriority(p); 960 if (p->p_priority >= PUSER) 961 p->p_priority = p->p_usrpri; 962 } 963} 964