kern_synch.c revision 1.46
1/* $OpenBSD: kern_synch.c,v 1.46 2002/10/15 20:17:22 art Exp $ */ 2/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ 3 4/*- 5 * Copyright (c) 1982, 1986, 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 42 */ 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/proc.h> 47#include <sys/kernel.h> 48#include <sys/buf.h> 49#include <sys/signalvar.h> 50#include <sys/resourcevar.h> 51#include <uvm/uvm_extern.h> 52#include <sys/sched.h> 53#include <sys/timeout.h> 54 55#ifdef KTRACE 56#include <sys/ktrace.h> 57#endif 58 59#include <machine/cpu.h> 60 61u_char curpriority; /* usrpri of curproc */ 62int lbolt; /* once a second sleep address */ 63 64int whichqs; /* Bit mask summary of non-empty Q's. */ 65struct prochd qs[NQS]; 66 67void scheduler_start(void); 68 69void roundrobin(void *); 70void schedcpu(void *); 71void updatepri(struct proc *); 72void endtsleep(void *); 73 74void 75scheduler_start() 76{ 77 static struct timeout roundrobin_to; 78 static struct timeout schedcpu_to; 79 80 /* 81 * We avoid polluting the global namespace by keeping the scheduler 82 * timeouts static in this function. 83 * We setup the timeouts here and kick roundrobin and schedcpu once to 84 * make them do their job. 85 */ 86 87 timeout_set(&roundrobin_to, roundrobin, &roundrobin_to); 88 timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); 89 90 roundrobin(&roundrobin_to); 91 schedcpu(&schedcpu_to); 92} 93 94/* 95 * Force switch among equal priority processes every 100ms. 96 */ 97/* ARGSUSED */ 98void 99roundrobin(arg) 100 void *arg; 101{ 102 struct timeout *to = (struct timeout *)arg; 103 struct proc *p = curproc; 104 int s; 105 106 if (p != NULL) { 107 s = splstatclock(); 108 if (p->p_schedflags & PSCHED_SEENRR) { 109 /* 110 * The process has already been through a roundrobin 111 * without switching and may be hogging the CPU. 112 * Indicate that the process should yield. 113 */ 114 p->p_schedflags |= PSCHED_SHOULDYIELD; 115 } else { 116 p->p_schedflags |= PSCHED_SEENRR; 117 } 118 splx(s); 119 } 120 need_resched(); 121 timeout_add(to, hz / 10); 122} 123 124/* 125 * Constants for digital decay and forget: 126 * 90% of (p_estcpu) usage in 5 * loadav time 127 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 128 * Note that, as ps(1) mentions, this can let percentages 129 * total over 100% (I've seen 137.9% for 3 processes). 130 * 131 * Note that hardclock updates p_estcpu and p_cpticks independently. 132 * 133 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 134 * That is, the system wants to compute a value of decay such 135 * that the following for loop: 136 * for (i = 0; i < (5 * loadavg); i++) 137 * p_estcpu *= decay; 138 * will compute 139 * p_estcpu *= 0.1; 140 * for all values of loadavg: 141 * 142 * Mathematically this loop can be expressed by saying: 143 * decay ** (5 * loadavg) ~= .1 144 * 145 * The system computes decay as: 146 * decay = (2 * loadavg) / (2 * loadavg + 1) 147 * 148 * We wish to prove that the system's computation of decay 149 * will always fulfill the equation: 150 * decay ** (5 * loadavg) ~= .1 151 * 152 * If we compute b as: 153 * b = 2 * loadavg 154 * then 155 * decay = b / (b + 1) 156 * 157 * We now need to prove two things: 158 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 159 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 160 * 161 * Facts: 162 * For x close to zero, exp(x) =~ 1 + x, since 163 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 164 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 165 * For x close to zero, ln(1+x) =~ x, since 166 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 167 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 168 * ln(.1) =~ -2.30 169 * 170 * Proof of (1): 171 * Solve (factor)**(power) =~ .1 given power (5*loadav): 172 * solving for factor, 173 * ln(factor) =~ (-2.30/5*loadav), or 174 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 175 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 176 * 177 * Proof of (2): 178 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 179 * solving for power, 180 * power*ln(b/(b+1)) =~ -2.30, or 181 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 182 * 183 * Actual power values for the implemented algorithm are as follows: 184 * loadav: 1 2 3 4 185 * power: 5.68 10.32 14.94 19.55 186 */ 187 188/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 189#define loadfactor(loadav) (2 * (loadav)) 190#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 191 192/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 193fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 194 195/* 196 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 197 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 198 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 199 * 200 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 201 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 202 * 203 * If you dont want to bother with the faster/more-accurate formula, you 204 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 205 * (more general) method of calculating the %age of CPU used by a process. 206 */ 207#define CCPU_SHIFT 11 208 209/* 210 * Recompute process priorities, every hz ticks. 211 */ 212/* ARGSUSED */ 213void 214schedcpu(arg) 215 void *arg; 216{ 217 struct timeout *to = (struct timeout *)arg; 218 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 219 struct proc *p; 220 int s; 221 unsigned int newcpu; 222 int phz; 223 224 /* 225 * If we have a statistics clock, use that to calculate CPU 226 * time, otherwise revert to using the profiling clock (which, 227 * in turn, defaults to hz if there is no separate profiling 228 * clock available) 229 */ 230 phz = stathz ? stathz : profhz; 231 KASSERT(phz); 232 233 for (p = LIST_FIRST(&allproc); p != 0; p = LIST_NEXT(p, p_list)) { 234 /* 235 * Increment time in/out of memory and sleep time 236 * (if sleeping). We ignore overflow; with 16-bit int's 237 * (remember them?) overflow takes 45 days. 238 */ 239 p->p_swtime++; 240 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 241 p->p_slptime++; 242 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 243 /* 244 * If the process has slept the entire second, 245 * stop recalculating its priority until it wakes up. 246 */ 247 if (p->p_slptime > 1) 248 continue; 249 s = splstatclock(); /* prevent state changes */ 250 /* 251 * p_pctcpu is only for ps. 252 */ 253#if (FSHIFT >= CCPU_SHIFT) 254 p->p_pctcpu += (phz == 100)? 255 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 256 100 * (((fixpt_t) p->p_cpticks) 257 << (FSHIFT - CCPU_SHIFT)) / phz; 258#else 259 p->p_pctcpu += ((FSCALE - ccpu) * 260 (p->p_cpticks * FSCALE / phz)) >> FSHIFT; 261#endif 262 p->p_cpticks = 0; 263 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); 264 p->p_estcpu = newcpu; 265 resetpriority(p); 266 if (p->p_priority >= PUSER) { 267 if ((p != curproc) && 268 p->p_stat == SRUN && 269 (p->p_flag & P_INMEM) && 270 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 271 remrunqueue(p); 272 p->p_priority = p->p_usrpri; 273 setrunqueue(p); 274 } else 275 p->p_priority = p->p_usrpri; 276 } 277 splx(s); 278 } 279 uvm_meter(); 280 wakeup((caddr_t)&lbolt); 281 timeout_add(to, hz); 282} 283 284/* 285 * Recalculate the priority of a process after it has slept for a while. 286 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 287 * least six times the loadfactor will decay p_estcpu to zero. 288 */ 289void 290updatepri(p) 291 register struct proc *p; 292{ 293 register unsigned int newcpu = p->p_estcpu; 294 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 295 296 if (p->p_slptime > 5 * loadfac) 297 p->p_estcpu = 0; 298 else { 299 p->p_slptime--; /* the first time was done in schedcpu */ 300 while (newcpu && --p->p_slptime) 301 newcpu = (int) decay_cpu(loadfac, newcpu); 302 p->p_estcpu = newcpu; 303 } 304 resetpriority(p); 305} 306 307/* 308 * We're only looking at 7 bits of the address; everything is 309 * aligned to 4, lots of things are aligned to greater powers 310 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 311 */ 312#define TABLESIZE 128 313#define LOOKUP(x) (((long)(x) >> 8) & (TABLESIZE - 1)) 314struct slpque { 315 struct proc *sq_head; 316 struct proc **sq_tailp; 317} slpque[TABLESIZE]; 318 319/* 320 * During autoconfiguration or after a panic, a sleep will simply 321 * lower the priority briefly to allow interrupts, then return. 322 * The priority to be used (safepri) is machine-dependent, thus this 323 * value is initialized and maintained in the machine-dependent layers. 324 * This priority will typically be 0, or the lowest priority 325 * that is safe for use on the interrupt stack; it can be made 326 * higher to block network software interrupts after panics. 327 */ 328int safepri; 329 330/* 331 * General sleep call. Suspends the current process until a wakeup is 332 * performed on the specified identifier. The process will then be made 333 * runnable with the specified priority. Sleeps at most timo/hz seconds 334 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 335 * before and after sleeping, else signals are not checked. Returns 0 if 336 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 337 * signal needs to be delivered, ERESTART is returned if the current system 338 * call should be restarted if possible, and EINTR is returned if the system 339 * call should be interrupted by the signal (return EINTR). 340 * 341 * The interlock is held until the scheduler_slock (XXX) is held. The 342 * interlock will be locked before returning back to the caller 343 * unless the PNORELOCK flag is specified, in which case the 344 * interlock will always be unlocked upon return. 345 */ 346int 347ltsleep(ident, priority, wmesg, timo, interlock) 348 void *ident; 349 int priority, timo; 350 const char *wmesg; 351 volatile struct simplelock *interlock; 352{ 353 struct proc *p = curproc; 354 struct slpque *qp; 355 int s, sig; 356 int catch = priority & PCATCH; 357 int relock = (priority & PNORELOCK) == 0; 358 359#ifdef KTRACE 360 if (KTRPOINT(p, KTR_CSW)) 361 ktrcsw(p, 1, 0); 362#endif 363 s = splhigh(); 364 if (cold || panicstr) { 365 /* 366 * After a panic, or during autoconfiguration, 367 * just give interrupts a chance, then just return; 368 * don't run any other procs or panic below, 369 * in case this is the idle process and already asleep. 370 */ 371 splx(safepri); 372 splx(s); 373 if (interlock != NULL && relock == 0) 374 simple_unlock(interlock); 375 return (0); 376 } 377#ifdef DIAGNOSTIC 378 if (ident == NULL || p->p_stat != SRUN || p->p_back) 379 panic("tsleep"); 380#endif 381 p->p_wchan = ident; 382 p->p_wmesg = wmesg; 383 p->p_slptime = 0; 384 p->p_priority = priority & PRIMASK; 385 qp = &slpque[LOOKUP(ident)]; 386 if (qp->sq_head == 0) 387 qp->sq_head = p; 388 else 389 *qp->sq_tailp = p; 390 *(qp->sq_tailp = &p->p_forw) = 0; 391 if (timo) 392 timeout_add(&p->p_sleep_to, timo); 393 /* 394 * We can now release the interlock; the scheduler_slock 395 * is held, so a thread can't get in to do wakeup() before 396 * we do the switch. 397 * 398 * XXX We leave the code block here, after inserting ourselves 399 * on the sleep queue, because we might want a more clever 400 * data structure for the sleep queues at some point. 401 */ 402 if (interlock != NULL) 403 simple_unlock(interlock); 404 405 /* 406 * We put ourselves on the sleep queue and start our timeout 407 * before calling CURSIG, as we could stop there, and a wakeup 408 * or a SIGCONT (or both) could occur while we were stopped. 409 * A SIGCONT would cause us to be marked as SSLEEP 410 * without resuming us, thus we must be ready for sleep 411 * when CURSIG is called. If the wakeup happens while we're 412 * stopped, p->p_wchan will be 0 upon return from CURSIG. 413 */ 414 if (catch) { 415 p->p_flag |= P_SINTR; 416 if ((sig = CURSIG(p)) != 0) { 417 if (p->p_wchan) 418 unsleep(p); 419 p->p_stat = SRUN; 420 goto resume; 421 } 422 if (p->p_wchan == 0) { 423 catch = 0; 424 goto resume; 425 } 426 } else 427 sig = 0; 428 p->p_stat = SSLEEP; 429 p->p_stats->p_ru.ru_nvcsw++; 430 mi_switch(); 431#ifdef DDB 432 /* handy breakpoint location after process "wakes" */ 433 __asm(".globl bpendtsleep\nbpendtsleep:"); 434#endif 435resume: 436 curpriority = p->p_usrpri; 437 splx(s); 438 p->p_flag &= ~P_SINTR; 439 if (p->p_flag & P_TIMEOUT) { 440 p->p_flag &= ~P_TIMEOUT; 441 if (sig == 0) { 442#ifdef KTRACE 443 if (KTRPOINT(p, KTR_CSW)) 444 ktrcsw(p, 0, 0); 445#endif 446 if (interlock != NULL && relock) 447 simple_lock(interlock); 448 return (EWOULDBLOCK); 449 } 450 } else if (timo) 451 timeout_del(&p->p_sleep_to); 452 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 453#ifdef KTRACE 454 if (KTRPOINT(p, KTR_CSW)) 455 ktrcsw(p, 0, 0); 456#endif 457 if (interlock != NULL && relock) 458 simple_lock(interlock); 459 if (p->p_sigacts->ps_sigintr & sigmask(sig)) 460 return (EINTR); 461 return (ERESTART); 462 } 463#ifdef KTRACE 464 if (KTRPOINT(p, KTR_CSW)) 465 ktrcsw(p, 0, 0); 466#endif 467 if (interlock != NULL && relock) 468 simple_lock(interlock); 469 return (0); 470} 471 472/* 473 * Implement timeout for tsleep. 474 * If process hasn't been awakened (wchan non-zero), 475 * set timeout flag and undo the sleep. If proc 476 * is stopped, just unsleep so it will remain stopped. 477 */ 478void 479endtsleep(arg) 480 void *arg; 481{ 482 struct proc *p; 483 int s; 484 485 p = (struct proc *)arg; 486 s = splhigh(); 487 if (p->p_wchan) { 488 if (p->p_stat == SSLEEP) 489 setrunnable(p); 490 else 491 unsleep(p); 492 p->p_flag |= P_TIMEOUT; 493 } 494 splx(s); 495} 496 497/* 498 * Short-term, non-interruptable sleep. 499 */ 500void 501sleep(ident, priority) 502 void *ident; 503 int priority; 504{ 505 register struct proc *p = curproc; 506 register struct slpque *qp; 507 register int s; 508 509#ifdef DIAGNOSTIC 510 if (priority > PZERO) { 511 printf("sleep called with priority %d > PZERO, wchan: %p\n", 512 priority, ident); 513 panic("old sleep"); 514 } 515#endif 516 s = splhigh(); 517 if (cold || panicstr) { 518 /* 519 * After a panic, or during autoconfiguration, 520 * just give interrupts a chance, then just return; 521 * don't run any other procs or panic below, 522 * in case this is the idle process and already asleep. 523 */ 524 splx(safepri); 525 splx(s); 526 return; 527 } 528#ifdef DIAGNOSTIC 529 if (ident == NULL || p->p_stat != SRUN || p->p_back) 530 panic("sleep"); 531#endif 532 p->p_wchan = ident; 533 p->p_wmesg = NULL; 534 p->p_slptime = 0; 535 p->p_priority = priority; 536 qp = &slpque[LOOKUP(ident)]; 537 if (qp->sq_head == 0) 538 qp->sq_head = p; 539 else 540 *qp->sq_tailp = p; 541 *(qp->sq_tailp = &p->p_forw) = 0; 542 p->p_stat = SSLEEP; 543 p->p_stats->p_ru.ru_nvcsw++; 544#ifdef KTRACE 545 if (KTRPOINT(p, KTR_CSW)) 546 ktrcsw(p, 1, 0); 547#endif 548 mi_switch(); 549#ifdef DDB 550 /* handy breakpoint location after process "wakes" */ 551 __asm(".globl bpendsleep\nbpendsleep:"); 552#endif 553#ifdef KTRACE 554 if (KTRPOINT(p, KTR_CSW)) 555 ktrcsw(p, 0, 0); 556#endif 557 curpriority = p->p_usrpri; 558 splx(s); 559} 560 561/* 562 * Remove a process from its wait queue 563 */ 564void 565unsleep(p) 566 register struct proc *p; 567{ 568 register struct slpque *qp; 569 register struct proc **hp; 570 int s; 571 572 s = splhigh(); 573 if (p->p_wchan) { 574 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head; 575 while (*hp != p) 576 hp = &(*hp)->p_forw; 577 *hp = p->p_forw; 578 if (qp->sq_tailp == &p->p_forw) 579 qp->sq_tailp = hp; 580 p->p_wchan = 0; 581 } 582 splx(s); 583} 584 585/* 586 * Make all processes sleeping on the specified identifier runnable. 587 */ 588void 589wakeup_n(ident, n) 590 void *ident; 591 int n; 592{ 593 struct slpque *qp; 594 struct proc *p, **q; 595 int s; 596 597 s = splhigh(); 598 qp = &slpque[LOOKUP(ident)]; 599restart: 600 for (q = &qp->sq_head; (p = *q) != NULL; ) { 601#ifdef DIAGNOSTIC 602 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 603 panic("wakeup"); 604#endif 605 if (p->p_wchan == ident) { 606 --n; 607 p->p_wchan = 0; 608 *q = p->p_forw; 609 if (qp->sq_tailp == &p->p_forw) 610 qp->sq_tailp = q; 611 if (p->p_stat == SSLEEP) { 612 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 613 if (p->p_slptime > 1) 614 updatepri(p); 615 p->p_slptime = 0; 616 p->p_stat = SRUN; 617 618 /* 619 * Since curpriority is a user priority, 620 * p->p_priority is always better than 621 * curpriority. 622 */ 623 624 if ((p->p_flag & P_INMEM) != 0) { 625 setrunqueue(p); 626 need_resched(); 627 } else { 628 wakeup((caddr_t)&proc0); 629 } 630 /* END INLINE EXPANSION */ 631 632 if (n != 0) 633 goto restart; 634 else 635 break; 636 } 637 } else 638 q = &p->p_forw; 639 } 640 splx(s); 641} 642 643void 644wakeup(chan) 645 void *chan; 646{ 647 wakeup_n(chan, -1); 648} 649 650/* 651 * General yield call. Puts the current process back on its run queue and 652 * performs a voluntary context switch. 653 */ 654void 655yield() 656{ 657 struct proc *p = curproc; 658 int s; 659 660 s = splstatclock(); 661 p->p_priority = p->p_usrpri; 662 setrunqueue(p); 663 p->p_stats->p_ru.ru_nvcsw++; 664 mi_switch(); 665 splx(s); 666} 667 668/* 669 * General preemption call. Puts the current process back on its run queue 670 * and performs an involuntary context switch. If a process is supplied, 671 * we switch to that process. Otherwise, we use the normal process selection 672 * criteria. 673 */ 674void 675preempt(newp) 676 struct proc *newp; 677{ 678 struct proc *p = curproc; 679 int s; 680 681 /* 682 * XXX Switching to a specific process is not supported yet. 683 */ 684 if (newp != NULL) 685 panic("preempt: cpu_preempt not yet implemented"); 686 687 s = splstatclock(); 688 p->p_priority = p->p_usrpri; 689 setrunqueue(p); 690 p->p_stats->p_ru.ru_nivcsw++; 691 mi_switch(); 692 splx(s); 693} 694 695 696/* 697 * Must be called at splstatclock() or higher. 698 */ 699void 700mi_switch() 701{ 702 struct proc *p = curproc; /* XXX */ 703 struct rlimit *rlim; 704 long s, u; 705 struct timeval tv; 706 707 splassert(IPL_STATCLOCK); 708 709 /* 710 * Compute the amount of time during which the current 711 * process was running, and add that to its total so far. 712 */ 713 microtime(&tv); 714 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec); 715 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec); 716 if (u < 0) { 717 u += 1000000; 718 s--; 719 } else if (u >= 1000000) { 720 u -= 1000000; 721 s++; 722 } 723 p->p_rtime.tv_usec = u; 724 p->p_rtime.tv_sec = s; 725 726 /* 727 * Check if the process exceeds its cpu resource allocation. 728 * If over max, kill it. In any case, if it has run for more 729 * than 10 minutes, reduce priority to give others a chance. 730 */ 731 rlim = &p->p_rlimit[RLIMIT_CPU]; 732 if (s >= rlim->rlim_cur) { 733 if (s >= rlim->rlim_max) 734 psignal(p, SIGKILL); 735 else { 736 psignal(p, SIGXCPU); 737 if (rlim->rlim_cur < rlim->rlim_max) 738 rlim->rlim_cur += 5; 739 } 740 } 741 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) { 742 p->p_nice = NZERO + 4; 743 resetpriority(p); 744 } 745 746 747 /* 748 * Process is about to yield the CPU; clear the appropriate 749 * scheduling flags. 750 */ 751 p->p_schedflags &= ~PSCHED_SWITCHCLEAR; 752 753 /* 754 * Pick a new current process and record its start time. 755 */ 756 uvmexp.swtch++; 757 cpu_switch(p); 758 microtime(&runtime); 759} 760 761/* 762 * Initialize the (doubly-linked) run queues 763 * to be empty. 764 */ 765void 766rqinit() 767{ 768 register int i; 769 770 for (i = 0; i < NQS; i++) 771 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i]; 772} 773 774/* 775 * Change process state to be runnable, 776 * placing it on the run queue if it is in memory, 777 * and awakening the swapper if it isn't in memory. 778 */ 779void 780setrunnable(p) 781 register struct proc *p; 782{ 783 register int s; 784 785 s = splhigh(); 786 switch (p->p_stat) { 787 case 0: 788 case SRUN: 789 case SZOMB: 790 case SDEAD: 791 default: 792 panic("setrunnable"); 793 case SSTOP: 794 /* 795 * If we're being traced (possibly because someone attached us 796 * while we were stopped), check for a signal from the debugger. 797 */ 798 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) 799 p->p_siglist |= sigmask(p->p_xstat); 800 case SSLEEP: 801 unsleep(p); /* e.g. when sending signals */ 802 break; 803 case SIDL: 804 break; 805 } 806 p->p_stat = SRUN; 807 if (p->p_flag & P_INMEM) 808 setrunqueue(p); 809 splx(s); 810 if (p->p_slptime > 1) 811 updatepri(p); 812 p->p_slptime = 0; 813 if ((p->p_flag & P_INMEM) == 0) 814 wakeup((caddr_t)&proc0); 815 else if (p->p_priority < curpriority) 816 need_resched(); 817} 818 819/* 820 * Compute the priority of a process when running in user mode. 821 * Arrange to reschedule if the resulting priority is better 822 * than that of the current process. 823 */ 824void 825resetpriority(p) 826 register struct proc *p; 827{ 828 register unsigned int newpriority; 829 830 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 831 newpriority = min(newpriority, MAXPRI); 832 p->p_usrpri = newpriority; 833 if (newpriority < curpriority) 834 need_resched(); 835} 836 837/* 838 * We adjust the priority of the current process. The priority of a process 839 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 840 * is increased here. The formula for computing priorities (in kern_synch.c) 841 * will compute a different value each time p_estcpu increases. This can 842 * cause a switch, but unless the priority crosses a PPQ boundary the actual 843 * queue will not change. The cpu usage estimator ramps up quite quickly 844 * when the process is running (linearly), and decays away exponentially, at 845 * a rate which is proportionally slower when the system is busy. The basic 846 * principle is that the system will 90% forget that the process used a lot 847 * of CPU time in 5 * loadav seconds. This causes the system to favor 848 * processes which haven't run much recently, and to round-robin among other 849 * processes. 850 */ 851 852void 853schedclock(p) 854 struct proc *p; 855{ 856 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 857 resetpriority(p); 858 if (p->p_priority >= PUSER) 859 p->p_priority = p->p_usrpri; 860} 861 862#ifdef DDB 863#include <machine/db_machdep.h> 864 865#include <ddb/db_interface.h> 866#include <ddb/db_output.h> 867 868void 869db_show_all_procs(addr, haddr, count, modif) 870 db_expr_t addr; 871 int haddr; 872 db_expr_t count; 873 char *modif; 874{ 875 char *mode; 876 int doingzomb = 0; 877 struct proc *p, *pp; 878 879 if (modif[0] == 0) 880 modif[0] = 'n'; /* default == normal mode */ 881 882 mode = "mawn"; 883 while (*mode && *mode != modif[0]) 884 mode++; 885 if (*mode == 0 || *mode == 'm') { 886 db_printf("usage: show all procs [/a] [/n] [/w]\n"); 887 db_printf("\t/a == show process address info\n"); 888 db_printf("\t/n == show normal process info [default]\n"); 889 db_printf("\t/w == show process wait/emul info\n"); 890 return; 891 } 892 893 p = LIST_FIRST(&allproc); 894 895 switch (*mode) { 896 897 case 'a': 898 db_printf(" PID %-10s %18s %18s %18s\n", 899 "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); 900 break; 901 case 'n': 902 db_printf(" PID %5s %5s %5s S %10s %-9s %-16s\n", 903 "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); 904 break; 905 case 'w': 906 db_printf(" PID %-16s %-8s %18s %s\n", 907 "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); 908 break; 909 } 910 911 while (p != 0) { 912 pp = p->p_pptr; 913 if (p->p_stat) { 914 915 db_printf("%c%5d ", p == curproc ? '*' : ' ', 916 p->p_pid); 917 918 switch (*mode) { 919 920 case 'a': 921 db_printf("%-10.10s %18p %18p %18p\n", 922 p->p_comm, p, p->p_addr, p->p_vmspace); 923 break; 924 925 case 'n': 926 db_printf("%5d %5d %5d %d %#10x " 927 "%-9.9s %-16s\n", 928 pp ? pp->p_pid : -1, p->p_pgrp->pg_id, 929 p->p_cred->p_ruid, p->p_stat, p->p_flag, 930 (p->p_wchan && p->p_wmesg) ? 931 p->p_wmesg : "", p->p_comm); 932 break; 933 934 case 'w': 935 db_printf("%-16s %-8s %18p %s\n", p->p_comm, 936 p->p_emul->e_name, p->p_wchan, 937 (p->p_wchan && p->p_wmesg) ? 938 p->p_wmesg : ""); 939 break; 940 941 } 942 } 943 p = LIST_NEXT(p, p_list); 944 if (p == 0 && doingzomb == 0) { 945 doingzomb = 1; 946 p = LIST_FIRST(&zombproc); 947 } 948 } 949} 950#endif 951