1/*- 2 * Copyright (c) 1982, 1986, 1990, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. --- 22 unchanged lines hidden (view full) --- 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 |
39 * $FreeBSD: head/sys/kern/kern_synch.c 83366 2001-09-12 08:38:13Z julian $ |
40 */ 41 42#include "opt_ddb.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/condvar.h> --- 54 unchanged lines hidden (view full) --- 102SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 103 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); 104 105/* 106 * Arrange to reschedule if necessary, taking the priorities and 107 * schedulers into account. 108 */ 109void |
110maybe_resched(kg) 111 struct ksegrp *kg; |
112{ 113 114 mtx_assert(&sched_lock, MA_OWNED); |
115 if (kg->kg_pri.pri_level < curthread->td_ksegrp->kg_pri.pri_level) 116 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; |
117} 118 119int 120roundrobin_interval(void) 121{ 122 return (sched_quantum); 123} 124 --- 115 unchanged lines hidden (view full) --- 240 */ 241/* ARGSUSED */ 242static void 243schedcpu(arg) 244 void *arg; 245{ 246 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 247 register struct proc *p; |
248 register struct kse *ke; 249 register struct ksegrp *kg; |
250 register int realstathz; |
251 int awake; |
252 253 realstathz = stathz ? stathz : hz; 254 sx_slock(&allproc_lock); |
255 FOREACH_PROC_IN_SYSTEM(p) { |
256 mtx_lock_spin(&sched_lock); 257 p->p_swtime++; |
258 FOREACH_KSEGRP_IN_PROC(p, kg) { 259 awake = 0; 260 FOREACH_KSE_IN_GROUP(kg, ke) { 261 /* 262 * Increment time in/out of memory and sleep 263 * time (if sleeping). We ignore overflow; 264 * with 16-bit int's (remember them?) 265 * overflow takes 45 days. 266 */ 267 /* XXXKSE */ 268 /* if ((ke->ke_flags & KEF_ONRUNQ) == 0) */ 269 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) { 270 ke->ke_slptime++; 271 } else { 272 ke->ke_slptime = 0; 273 awake = 1; 274 } |
275 |
276 /* 277 * pctcpu is only for ps? 278 * Do it per kse.. and add them up at the end? 279 * XXXKSE 280 */ 281 ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> FSHIFT; 282 /* 283 * If the kse has been idle the entire second, 284 * stop recalculating its priority until 285 * it wakes up. 286 */ 287 if (ke->ke_slptime > 1) { 288 continue; 289 } 290 |
291#if (FSHIFT >= CCPU_SHIFT) |
292 ke->ke_pctcpu += (realstathz == 100) ? 293 ((fixpt_t) ke->ke_cpticks) << 294 (FSHIFT - CCPU_SHIFT) : 295 100 * (((fixpt_t) ke->ke_cpticks) << 296 (FSHIFT - CCPU_SHIFT)) / realstathz; |
297#else |
298 ke->ke_pctcpu += ((FSCALE - ccpu) * 299 (ke->ke_cpticks * FSCALE / realstathz)) >> 300 FSHIFT; |
301#endif |
302 ke->ke_cpticks = 0; 303 } /* end of kse loop */ 304 if (awake == 0) { 305 kg->kg_slptime++; 306 } else { 307 kg->kg_slptime = 0; 308 } 309 kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); 310 resetpriority(kg); 311 if (kg->kg_pri.pri_level >= PUSER && 312 (p->p_sflag & PS_INMEM)) { 313 int changedqueue = 314 ((kg->kg_pri.pri_level / RQ_PPQ) != 315 (kg->kg_pri.pri_user / RQ_PPQ)); 316 317 kg->kg_pri.pri_level = kg->kg_pri.pri_user; 318 FOREACH_KSE_IN_GROUP(kg, ke) { 319 if ((ke->ke_oncpu == NOCPU) && /* idle */ 320 (p->p_stat == SRUN) && /* XXXKSE */ 321 changedqueue) { 322 remrunqueue(ke->ke_thread); 323 setrunqueue(ke->ke_thread); 324 } 325 } 326 } 327 } /* end of ksegrp loop */ |
328 mtx_unlock_spin(&sched_lock); |
329 } /* end of process loop */ |
330 sx_sunlock(&allproc_lock); 331 vmmeter(); 332 wakeup((caddr_t)&lbolt); 333 callout_reset(&schedcpu_callout, hz, schedcpu, NULL); 334} 335 336/* 337 * Recalculate the priority of a process after it has slept for a while. 338 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 339 * least six times the loadfactor will decay p_estcpu to zero. 340 */ 341void |
342updatepri(td) 343 register struct thread *td; |
344{ |
345 register struct ksegrp *kg; 346 register unsigned int newcpu; |
347 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 348 |
349 if (td == NULL) 350 return; 351 kg = td->td_ksegrp; 352 newcpu = kg->kg_estcpu; 353 if (kg->kg_slptime > 5 * loadfac) 354 kg->kg_estcpu = 0; |
355 else { |
356 kg->kg_slptime--; /* the first time was done in schedcpu */ 357 while (newcpu && --kg->kg_slptime) |
358 newcpu = decay_cpu(loadfac, newcpu); |
359 kg->kg_estcpu = newcpu; |
360 } |
361 resetpriority(td->td_ksegrp); |
362} 363 364/* 365 * We're only looking at 7 bits of the address; everything is 366 * aligned to 4, lots of things are aligned to greater powers 367 * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 368 */ 369#define TABLESIZE 128 |
370static TAILQ_HEAD(slpquehead, thread) slpque[TABLESIZE]; |
371#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 372 373void 374sleepinit(void) 375{ 376 int i; 377 378 sched_quantum = hz/10; --- 20 unchanged lines hidden (view full) --- 399int 400msleep(ident, mtx, priority, wmesg, timo) 401 void *ident; 402 struct mtx *mtx; 403 int priority, timo; 404 const char *wmesg; 405{ 406 struct proc *p = curproc; |
407 struct thread *td = curthread; |
408 int sig, catch = priority & PCATCH; 409 int rval = 0; 410 WITNESS_SAVE_DECL(mtx); 411 412#ifdef KTRACE 413 if (p && KTRPOINT(p, KTR_CSW)) 414 ktrcsw(p->p_tracep, 1, 0); 415#endif --- 20 unchanged lines hidden (view full) --- 436 mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 437 WITNESS_SAVE(&mtx->mtx_object, mtx); 438 mtx_unlock_flags(mtx, MTX_NOSWITCH); 439 if (priority & PDROP) 440 mtx = NULL; 441 } 442 443 KASSERT(p != NULL, ("msleep1")); |
444 KASSERT(ident != NULL && td->td_proc->p_stat == SRUN, ("msleep")); |
445 |
446 td->td_wchan = ident; 447 td->td_wmesg = wmesg; 448 td->td_kse->ke_slptime = 0; /* XXXKSE */ 449 td->td_ksegrp->kg_slptime = 0; 450 td->td_ksegrp->kg_pri.pri_level = priority & PRIMASK; 451 CTR5(KTR_PROC, "msleep: thread %p (pid %d, %s) on %s (%p)", 452 td, p->p_pid, p->p_comm, wmesg, ident); 453 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], td, td_slpq); |
454 if (timo) |
455 callout_reset(&td->td_slpcallout, timo, endtsleep, td); |
456 /* 457 * We put ourselves on the sleep queue and start our timeout 458 * before calling CURSIG, as we could stop there, and a wakeup 459 * or a SIGCONT (or both) could occur while we were stopped. 460 * A SIGCONT would cause us to be marked as SSLEEP 461 * without resuming us, thus we must be ready for sleep 462 * when CURSIG is called. If the wakeup happens while we're |
463 * stopped, td->td_wchan will be 0 upon return from CURSIG. |
464 */ 465 if (catch) { 466 CTR3(KTR_PROC, "msleep caught: proc %p (pid %d, %s)", p, 467 p->p_pid, p->p_comm); |
468 td->td_flags |= TDF_SINTR; |
469 mtx_unlock_spin(&sched_lock); 470 PROC_LOCK(p); 471 sig = CURSIG(p); 472 mtx_lock_spin(&sched_lock); 473 PROC_UNLOCK_NOSWITCH(p); 474 if (sig != 0) { |
475 if (td->td_wchan != NULL) 476 unsleep(td); 477 } else if (td->td_wchan == NULL) |
478 catch = 0; 479 } else 480 sig = 0; |
481 if (td->td_wchan != NULL) { 482 td->td_proc->p_stat = SSLEEP; |
483 p->p_stats->p_ru.ru_nvcsw++; 484 mi_switch(); 485 } |
486 CTR3(KTR_PROC, "msleep resume: proc %p (pid %d, %s)", td, p->p_pid, |
487 p->p_comm); |
488 KASSERT(td->td_proc->p_stat == SRUN, ("running but not SRUN")); 489 td->td_flags &= ~TDF_SINTR; 490 if (td->td_flags & TDF_TIMEOUT) { 491 td->td_flags &= ~TDF_TIMEOUT; |
492 if (sig == 0) 493 rval = EWOULDBLOCK; |
494 } else if (td->td_flags & TDF_TIMOFAIL) 495 td->td_flags &= ~TDF_TIMOFAIL; 496 else if (timo && callout_stop(&td->td_slpcallout) == 0) { |
497 /* 498 * This isn't supposed to be pretty. If we are here, then 499 * the endtsleep() callout is currently executing on another 500 * CPU and is either spinning on the sched_lock or will be 501 * soon. If we don't synchronize here, there is a chance 502 * that this process may msleep() again before the callout 503 * has a chance to run and the callout may end up waking up 504 * the wrong msleep(). Yuck. 505 */ |
506 td->td_flags |= TDF_TIMEOUT; |
507 p->p_stats->p_ru.ru_nivcsw++; 508 mi_switch(); 509 } 510 mtx_unlock_spin(&sched_lock); 511 512 if (rval == 0 && catch) { 513 PROC_LOCK(p); 514 /* XXX: shouldn't we always be calling CURSIG() */ --- 26 unchanged lines hidden (view full) --- 541 * set timeout flag and undo the sleep. If proc 542 * is stopped, just unsleep so it will remain stopped. 543 * MP-safe, called without the Giant mutex. 544 */ 545static void 546endtsleep(arg) 547 void *arg; 548{ |
549 register struct thread *td = arg; |
550 |
551 CTR3(KTR_PROC, "endtsleep: thread %p (pid %d, %s)", td, td->td_proc->p_pid, 552 td->td_proc->p_comm); |
553 mtx_lock_spin(&sched_lock); 554 /* 555 * This is the other half of the synchronization with msleep() 556 * described above. If the PS_TIMEOUT flag is set, we lost the 557 * race and just need to put the process back on the runqueue. 558 */ |
559 if ((td->td_flags & TDF_TIMEOUT) != 0) { 560 td->td_flags &= ~TDF_TIMEOUT; 561 setrunqueue(td); 562 } else if (td->td_wchan != NULL) { 563 if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */ 564 setrunnable(td); |
565 else |
566 unsleep(td); 567 td->td_flags |= TDF_TIMEOUT; 568 } else { 569 td->td_flags |= TDF_TIMOFAIL; 570 } |
571 mtx_unlock_spin(&sched_lock); 572} 573 574/* 575 * Remove a process from its wait queue 576 */ 577void |
578unsleep(struct thread *td) |
579{ 580 581 mtx_lock_spin(&sched_lock); |
582 if (td->td_wchan != NULL) { 583 TAILQ_REMOVE(&slpque[LOOKUP(td->td_wchan)], td, td_slpq); 584 td->td_wchan = NULL; |
585 } 586 mtx_unlock_spin(&sched_lock); 587} 588 589/* 590 * Make all processes sleeping on the specified identifier runnable. 591 */ 592void 593wakeup(ident) 594 register void *ident; 595{ 596 register struct slpquehead *qp; |
597 register struct thread *td; 598 struct proc *p; |
599 600 mtx_lock_spin(&sched_lock); 601 qp = &slpque[LOOKUP(ident)]; 602restart: |
603 TAILQ_FOREACH(td, qp, td_slpq) { 604 p = td->td_proc; 605 if (td->td_wchan == ident) { 606 TAILQ_REMOVE(qp, td, td_slpq); 607 td->td_wchan = NULL; 608 if (td->td_proc->p_stat == SSLEEP) { |
609 /* OPTIMIZED EXPANSION OF setrunnable(p); */ |
610 CTR3(KTR_PROC, "wakeup: thread %p (pid %d, %s)", 611 td, p->p_pid, p->p_comm); 612 if (td->td_ksegrp->kg_slptime > 1) 613 updatepri(td); 614 td->td_ksegrp->kg_slptime = 0; 615 td->td_kse->ke_slptime = 0; 616 td->td_proc->p_stat = SRUN; |
617 if (p->p_sflag & PS_INMEM) { |
618 setrunqueue(td); 619 maybe_resched(td->td_ksegrp); |
620 } else { 621 p->p_sflag |= PS_SWAPINREQ; 622 wakeup((caddr_t)&proc0); 623 } 624 /* END INLINE EXPANSION */ 625 goto restart; 626 } 627 } --- 6 unchanged lines hidden (view full) --- 634 * May wake more than one process if a target process is currently 635 * swapped out. 636 */ 637void 638wakeup_one(ident) 639 register void *ident; 640{ 641 register struct slpquehead *qp; |
642 register struct thread *td; |
643 register struct proc *p; 644 645 mtx_lock_spin(&sched_lock); 646 qp = &slpque[LOOKUP(ident)]; 647 |
648 TAILQ_FOREACH(td, qp, td_slpq) { 649 p = td->td_proc; 650 if (td->td_wchan == ident) { 651 TAILQ_REMOVE(qp, td, td_slpq); 652 td->td_wchan = NULL; 653 if (td->td_proc->p_stat == SSLEEP) { |
654 /* OPTIMIZED EXPANSION OF setrunnable(p); */ 655 CTR3(KTR_PROC, "wakeup1: proc %p (pid %d, %s)", 656 p, p->p_pid, p->p_comm); |
657 if (td->td_ksegrp->kg_slptime > 1) 658 updatepri(td); 659 td->td_ksegrp->kg_slptime = 0; 660 td->td_kse->ke_slptime = 0; 661 td->td_proc->p_stat = SRUN; |
662 if (p->p_sflag & PS_INMEM) { |
663 setrunqueue(td); 664 maybe_resched(td->td_ksegrp); |
665 break; 666 } else { 667 p->p_sflag |= PS_SWAPINREQ; 668 wakeup((caddr_t)&proc0); 669 } 670 /* END INLINE EXPANSION */ 671 } 672 } 673 } 674 mtx_unlock_spin(&sched_lock); 675} 676 677/* 678 * The machine independent parts of mi_switch(). 679 */ 680void 681mi_switch() 682{ 683 struct timeval new_switchtime; |
684 struct thread *td = curthread; /* XXX */ 685 register struct proc *p = td->td_proc; /* XXX */ |
686#if 0 687 register struct rlimit *rlim; 688#endif 689 critical_t sched_crit; 690 u_int sched_nest; 691 692 mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 693 --- 60 unchanged lines hidden (view full) --- 754 * Pick a new current process and record its start time. 755 */ 756 cnt.v_swtch++; 757 PCPU_SET(switchtime, new_switchtime); 758 CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d, %s)", p, p->p_pid, 759 p->p_comm); 760 sched_crit = sched_lock.mtx_savecrit; 761 sched_nest = sched_lock.mtx_recurse; |
762 td->td_lastcpu = td->td_kse->ke_oncpu; 763 td->td_kse->ke_oncpu = NOCPU; 764 td->td_kse->ke_flags &= ~KEF_NEEDRESCHED; |
765 cpu_switch(); |
766 td->td_kse->ke_oncpu = PCPU_GET(cpuid); |
767 sched_lock.mtx_savecrit = sched_crit; 768 sched_lock.mtx_recurse = sched_nest; |
769 sched_lock.mtx_lock = (uintptr_t)td; |
770 CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d, %s)", p, p->p_pid, 771 p->p_comm); 772 if (PCPU_GET(switchtime.tv_sec) == 0) 773 microuptime(PCPU_PTR(switchtime)); 774 PCPU_SET(switchticks, ticks); 775} 776 777/* 778 * Change process state to be runnable, 779 * placing it on the run queue if it is in memory, 780 * and awakening the swapper if it isn't in memory. 781 */ 782void |
783setrunnable(struct thread *td) |
784{ |
785 struct proc *p = td->td_proc; |
786 mtx_lock_spin(&sched_lock); 787 switch (p->p_stat) { |
788 case SZOMB: /* not a thread flag XXXKSE */ 789 panic("setrunnabl(1)"); 790 } 791 switch (td->td_proc->p_stat) { |
792 case 0: 793 case SRUN: |
794 case SWAIT: 795 default: |
796 panic("setrunnable(2)"); |
797 case SSTOP: 798 case SSLEEP: /* e.g. when sending signals */ |
799 if (td->td_flags & TDF_CVWAITQ) 800 cv_waitq_remove(td); |
801 else |
802 unsleep(td); |
803 break; 804 805 case SIDL: 806 break; 807 } |
808 td->td_proc->p_stat = SRUN; 809 if (td->td_ksegrp->kg_slptime > 1) 810 updatepri(td); 811 td->td_ksegrp->kg_slptime = 0; 812 td->td_kse->ke_slptime = 0; |
813 if ((p->p_sflag & PS_INMEM) == 0) { 814 p->p_sflag |= PS_SWAPINREQ; 815 wakeup((caddr_t)&proc0); 816 } else { |
817 setrunqueue(td); 818 maybe_resched(td->td_ksegrp); |
819 } 820 mtx_unlock_spin(&sched_lock); 821} 822 823/* 824 * Compute the priority of a process when running in user mode. 825 * Arrange to reschedule if the resulting priority is better 826 * than that of the current process. 827 */ 828void |
829resetpriority(kg) 830 register struct ksegrp *kg; |
831{ 832 register unsigned int newpriority; 833 834 mtx_lock_spin(&sched_lock); |
835 if (kg->kg_pri.pri_class == PRI_TIMESHARE) { 836 newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + 837 NICE_WEIGHT * (kg->kg_nice - PRIO_MIN); |
838 newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), 839 PRI_MAX_TIMESHARE); |
840 kg->kg_pri.pri_user = newpriority; |
841 } |
842 maybe_resched(kg); |
843 mtx_unlock_spin(&sched_lock); 844} 845 846/* ARGSUSED */ 847static void 848sched_setup(dummy) 849 void *dummy; 850{ --- 16 unchanged lines hidden (view full) --- 867 * quite quickly when the process is running (linearly), and decays 868 * away exponentially, at a rate which is proportionally slower when 869 * the system is busy. The basic principle is that the system will 870 * 90% forget that the process used a lot of CPU time in 5 * loadav 871 * seconds. This causes the system to favor processes which haven't 872 * run much recently, and to round-robin among other processes. 873 */ 874void |
875schedclock(td) 876 struct thread *td; |
877{ |
878 struct kse *ke = td->td_kse; 879 struct ksegrp *kg = td->td_ksegrp; |
880 |
881 if (td) { 882 ke->ke_cpticks++; 883 kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); 884 if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 885 resetpriority(td->td_ksegrp); 886 if (kg->kg_pri.pri_level >= PUSER) 887 kg->kg_pri.pri_level = kg->kg_pri.pri_user; 888 } 889 } else { 890 panic("schedclock"); |
891 } 892} 893 894/* 895 * General purpose yield system call 896 */ 897int |
898yield(struct thread *td, struct yield_args *uap) |
899{ |
900 |
901 struct ksegrp *kg = td->td_ksegrp; 902 td->td_retval[0] = 0; 903 |
904 mtx_lock_spin(&sched_lock); 905 mtx_assert(&Giant, MA_NOTOWNED); 906#if 0 907 DROP_GIANT_NOSWITCH(); 908#endif |
909 kg->kg_pri.pri_level = PRI_MAX_TIMESHARE; 910 setrunqueue(td); 911 kg->kg_proc->p_stats->p_ru.ru_nvcsw++; |
912 mi_switch(); 913 mtx_unlock_spin(&sched_lock); 914#if 0 915 PICKUP_GIANT(); 916#endif 917 918 return (0); 919} 920 |