kern_synch.c revision 71288
1165138Syongari/*- 2165138Syongari * Copyright (c) 1982, 1986, 1990, 1991, 1993 3165138Syongari * The Regents of the University of California. All rights reserved. 4165138Syongari * (c) UNIX System Laboratories, Inc. 5165138Syongari * All or some portions of this file are derived from material licensed 6165138Syongari * to the University of California by American Telephone and Telegraph 7165138Syongari * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8165138Syongari * the permission of UNIX System Laboratories, Inc. 9165138Syongari * 10165138Syongari * Redistribution and use in source and binary forms, with or without 11165138Syongari * modification, are permitted provided that the following conditions 12165138Syongari * are met: 13165138Syongari * 1. Redistributions of source code must retain the above copyright 14165138Syongari * notice, this list of conditions and the following disclaimer. 15165138Syongari * 2. Redistributions in binary form must reproduce the above copyright 16165138Syongari * notice, this list of conditions and the following disclaimer in the 17165138Syongari * documentation and/or other materials provided with the distribution. 18165138Syongari * 3. All advertising materials mentioning features or use of this software 19165138Syongari * must display the following acknowledgement: 20165138Syongari * This product includes software developed by the University of 21165138Syongari * California, Berkeley and its contributors. 22165138Syongari * 4. Neither the name of the University nor the names of its contributors 23165138Syongari * may be used to endorse or promote products derived from this software 24165138Syongari * without specific prior written permission. 25165138Syongari * 26165138Syongari * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27165138Syongari * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28165138Syongari * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29165138Syongari * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30165138Syongari * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31165138Syongari * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32165138Syongari * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33165138Syongari * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34165138Syongari * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35165138Syongari * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36165138Syongari * SUCH DAMAGE. 37165138Syongari * 38165138Syongari * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 39165138Syongari * $FreeBSD: head/sys/kern/kern_synch.c 71288 2001-01-20 02:57:59Z jhb $ 40165138Syongari */ 41165138Syongari 42165138Syongari#include "opt_ktrace.h" 43165138Syongari 44165138Syongari#include <sys/param.h> 45165138Syongari#include <sys/systm.h> 46165138Syongari#include <sys/proc.h> 47165138Syongari#include <sys/ipl.h> 48165138Syongari#include <sys/kernel.h> 49165138Syongari#include <sys/ktr.h> 50165138Syongari#include <sys/condvar.h> 51165138Syongari#include <sys/lock.h> 52165138Syongari#include <sys/mutex.h> 53165138Syongari#include <sys/signalvar.h> 54165138Syongari#include <sys/resourcevar.h> 55165138Syongari#include <sys/vmmeter.h> 56165138Syongari#include <sys/sysctl.h> 57165138Syongari#include <sys/sysproto.h> 58165138Syongari#include <vm/vm.h> 59165138Syongari#include <vm/vm_extern.h> 60165138Syongari#ifdef KTRACE 61165138Syongari#include <sys/uio.h> 62165138Syongari#include <sys/ktrace.h> 63165138Syongari#endif 64165138Syongari 65165138Syongari#include <machine/cpu.h> 66165138Syongari#include <machine/smp.h> 67165138Syongari 68165138Syongaristatic void sched_setup __P((void *dummy)); 69165138SyongariSYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) 70165138Syongari 71165138Syongariu_char curpriority; 72165138Syongariint hogticks; 73165138Syongariint lbolt; 74165138Syongariint sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 75165138Syongari 76165138Syongaristatic struct callout schedcpu_callout; 77165138Syongaristatic struct callout roundrobin_callout; 78165138Syongari 79165138Syongaristatic int curpriority_cmp __P((struct proc *p)); 80165138Syongaristatic void endtsleep __P((void *)); 81165138Syongaristatic void roundrobin __P((void *arg)); 82165138Syongaristatic void schedcpu __P((void *arg)); 83165138Syongari 84165138Syongaristatic int 85165138Syongarisysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 86165138Syongari{ 87165138Syongari int error, new_val; 88165138Syongari 89165138Syongari new_val = sched_quantum * tick; 90165138Syongari error = sysctl_handle_int(oidp, &new_val, 0, req); 91165138Syongari if (error != 0 || req->newptr == NULL) 92165138Syongari return (error); 93165138Syongari if (new_val < tick) 94165138Syongari return (EINVAL); 95165138Syongari sched_quantum = new_val / tick; 96165138Syongari hogticks = 2 * sched_quantum; 97165138Syongari return (0); 98165138Syongari} 99165138Syongari 100165138SyongariSYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 101165138Syongari 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); 102165138Syongari 103165138Syongari/*- 104165138Syongari * Compare priorities. Return: 105165138Syongari * <0: priority of p < current priority 106165138Syongari * 0: priority of p == current priority 107165138Syongari * >0: priority of p > current priority 108165138Syongari * The priorities are the normal priorities or the normal realtime priorities 109165138Syongari * if p is on the same scheduler as curproc. Otherwise the process on the 110165138Syongari * more realtimeish scheduler has lowest priority. As usual, a higher 111165138Syongari * priority really means a lower priority. 112165138Syongari */ 113165138Syongaristatic int 114165138Syongaricurpriority_cmp(p) 115165138Syongari struct proc *p; 116165138Syongari{ 117165138Syongari int c_class, p_class; 118165138Syongari 119165138Syongari c_class = RTP_PRIO_BASE(curproc->p_rtprio.type); 120165138Syongari p_class = RTP_PRIO_BASE(p->p_rtprio.type); 121165138Syongari if (p_class != c_class) 122165138Syongari return (p_class - c_class); 123165138Syongari if (p_class == RTP_PRIO_NORMAL) 124165138Syongari return (((int)p->p_priority - (int)curpriority) / PPQ); 125165138Syongari return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio); 126165138Syongari} 127165138Syongari 128165138Syongari/* 129165138Syongari * Arrange to reschedule if necessary, taking the priorities and 130165138Syongari * schedulers into account. 131165138Syongari */ 132165138Syongarivoid 133192736Syongarimaybe_resched(chk) 134192736Syongari struct proc *chk; 135192736Syongari{ 136198475Slulf struct proc *p = curproc; /* XXX */ 137192736Syongari 138165138Syongari /* 139165138Syongari * XXX idle scheduler still broken because proccess stays on idle 140165138Syongari * scheduler during waits (such as when getting FS locks). If a 141165138Syongari * standard process becomes runaway cpu-bound, the system can lockup 142165138Syongari * due to idle-scheduler processes in wakeup never getting any cpu. 143193299Syongari */ 144173775Syongari if (p == PCPU_GET(idleproc)) { 145193299Syongari#if 0 146193299Syongari need_resched(); 147199012Syongari#endif 148165138Syongari } else if (chk == p) { 149165138Syongari /* We may need to yield if our priority has been raised. */ 150165138Syongari if (curpriority_cmp(chk) > 0) 151165138Syongari need_resched(); 152165138Syongari } else if (curpriority_cmp(chk) < 0) 153197592Syongari need_resched(); 154165138Syongari} 155165138Syongari 156165138Syongariint 157165138Syongariroundrobin_interval(void) 158165138Syongari{ 159165138Syongari return (sched_quantum); 160165138Syongari} 161165138Syongari 162165138Syongari/* 163165138Syongari * Force switch among equal priority processes every 100ms. 164165138Syongari */ 165165138Syongari/* ARGSUSED */ 166165138Syongaristatic void 167165138Syongariroundrobin(arg) 168165138Syongari void *arg; 169165138Syongari{ 170165138Syongari#ifndef SMP 171165138Syongari struct proc *p = curproc; /* XXX */ 172165138Syongari#endif 173165138Syongari 174165138Syongari#ifdef SMP 175165138Syongari need_resched(); 176165138Syongari forward_roundrobin(); 177165138Syongari#else 178165138Syongari if (p == PCPU_GET(idleproc) || RTP_PRIO_NEED_RR(p->p_rtprio.type)) 179165138Syongari need_resched(); 180165138Syongari#endif 181165138Syongari 182165138Syongari callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); 183165138Syongari} 184165138Syongari 185165138Syongari/* 186165138Syongari * Constants for digital decay and forget: 187165138Syongari * 90% of (p_estcpu) usage in 5 * loadav time 188165138Syongari * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 189165138Syongari * Note that, as ps(1) mentions, this can let percentages 190165138Syongari * total over 100% (I've seen 137.9% for 3 processes). 191165138Syongari * 192165138Syongari * Note that schedclock() updates p_estcpu and p_cpticks asynchronously. 193165138Syongari * 194165138Syongari * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 195165138Syongari * That is, the system wants to compute a value of decay such 196165138Syongari * that the following for loop: 197165138Syongari * for (i = 0; i < (5 * loadavg); i++) 198165138Syongari * p_estcpu *= decay; 199165138Syongari * will compute 200165138Syongari * p_estcpu *= 0.1; 201165138Syongari * for all values of loadavg: 202165138Syongari * 203165138Syongari * Mathematically this loop can be expressed by saying: 204165138Syongari * decay ** (5 * loadavg) ~= .1 205165138Syongari * 206165138Syongari * The system computes decay as: 207165138Syongari * decay = (2 * loadavg) / (2 * loadavg + 1) 208165138Syongari * 209165138Syongari * We wish to prove that the system's computation of decay 210165138Syongari * will always fulfill the equation: 211165138Syongari * decay ** (5 * loadavg) ~= .1 212165138Syongari * 213165138Syongari * If we compute b as: 214165138Syongari * b = 2 * loadavg 215165138Syongari * then 216165138Syongari * decay = b / (b + 1) 217165138Syongari * 218165138Syongari * We now need to prove two things: 219165138Syongari * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 220165138Syongari * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 221165138Syongari * 222165138Syongari * Facts: 223165138Syongari * For x close to zero, exp(x) =~ 1 + x, since 224165138Syongari * exp(x) = 0! + x**1/1! + x**2/2! + ... . 225165138Syongari * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 226165138Syongari * For x close to zero, ln(1+x) =~ x, since 227165138Syongari * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 228165138Syongari * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 229165138Syongari * ln(.1) =~ -2.30 230165138Syongari * 231165138Syongari * Proof of (1): 232165138Syongari * Solve (factor)**(power) =~ .1 given power (5*loadav): 233193293Syongari * solving for factor, 234193293Syongari * ln(factor) =~ (-2.30/5*loadav), or 235165138Syongari * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 236165138Syongari * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 237165138Syongari * 238165138Syongari * Proof of (2): 239165138Syongari * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 240165138Syongari * solving for power, 241165138Syongari * power*ln(b/(b+1)) =~ -2.30, or 242165138Syongari * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 243165138Syongari * 244165138Syongari * Actual power values for the implemented algorithm are as follows: 245165138Syongari * loadav: 1 2 3 4 246165138Syongari * power: 5.68 10.32 14.94 19.55 247165138Syongari */ 248165138Syongari 249165138Syongari/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 250165138Syongari#define loadfactor(loadav) (2 * (loadav)) 251165138Syongari#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 252165138Syongari 253165138Syongari/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 254165138Syongaristatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 255165138SyongariSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 256165138Syongari 257165138Syongari/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 258165138Syongaristatic int fscale __unused = FSCALE; 259165138SyongariSYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 260165138Syongari 261165138Syongari/* 262165138Syongari * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 263165138Syongari * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 264165138Syongari * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 265165138Syongari * 266165138Syongari * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 267165138Syongari * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 268165138Syongari * 269165138Syongari * If you don't want to bother with the faster/more-accurate formula, you 270165138Syongari * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 271165138Syongari * (more general) method of calculating the %age of CPU used by a process. 272165138Syongari */ 273165138Syongari#define CCPU_SHIFT 11 274165138Syongari 275165138Syongari/* 276165138Syongari * Recompute process priorities, every hz ticks. 277165138Syongari * MP-safe, called without the Giant mutex. 278165138Syongari */ 279165138Syongari/* ARGSUSED */ 280165138Syongaristatic void 281165138Syongarischedcpu(arg) 282165138Syongari void *arg; 283165138Syongari{ 284165138Syongari register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 285165138Syongari register struct proc *p; 286165138Syongari register int realstathz, s; 287165138Syongari 288165138Syongari realstathz = stathz ? stathz : hz; 289165138Syongari ALLPROC_LOCK(AP_SHARED); 290165138Syongari LIST_FOREACH(p, &allproc, p_list) { 291165138Syongari /* 292165138Syongari * Increment time in/out of memory and sleep time 293165138Syongari * (if sleeping). We ignore overflow; with 16-bit int's 294165138Syongari * (remember them?) overflow takes 45 days. 295165138Syongari if (p->p_stat == SWAIT) 296165138Syongari continue; 297165138Syongari */ 298165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 299165138Syongari p->p_swtime++; 300165138Syongari if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 301165138Syongari p->p_slptime++; 302165138Syongari p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 303165138Syongari /* 304165138Syongari * If the process has slept the entire second, 305165138Syongari * stop recalculating its priority until it wakes up. 306165138Syongari */ 307165138Syongari if (p->p_slptime > 1) { 308165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 309165138Syongari continue; 310165138Syongari } 311165138Syongari 312165138Syongari /* 313165138Syongari * prevent state changes and protect run queue 314165138Syongari */ 315165138Syongari s = splhigh(); 316165138Syongari 317165138Syongari /* 318165138Syongari * p_pctcpu is only for ps. 319165138Syongari */ 320165138Syongari#if (FSHIFT >= CCPU_SHIFT) 321165138Syongari p->p_pctcpu += (realstathz == 100)? 322165138Syongari ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 323165138Syongari 100 * (((fixpt_t) p->p_cpticks) 324165138Syongari << (FSHIFT - CCPU_SHIFT)) / realstathz; 325165138Syongari#else 326165138Syongari p->p_pctcpu += ((FSCALE - ccpu) * 327165138Syongari (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; 328165138Syongari#endif 329165138Syongari p->p_cpticks = 0; 330165138Syongari p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); 331165138Syongari resetpriority(p); 332165138Syongari if (p->p_priority >= PUSER) { 333165138Syongari if ((p != curproc) && 334165138Syongari#ifdef SMP 335193293Syongari p->p_oncpu == 0xff && /* idle */ 336193293Syongari#endif 337193293Syongari p->p_stat == SRUN && 338193293Syongari (p->p_flag & P_INMEM) && 339193293Syongari (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 340193293Syongari remrunqueue(p); 341193293Syongari p->p_priority = p->p_usrpri; 342193293Syongari setrunqueue(p); 343193293Syongari } else 344193293Syongari p->p_priority = p->p_usrpri; 345193293Syongari } 346193293Syongari mtx_exit(&sched_lock, MTX_SPIN); 347193293Syongari splx(s); 348193293Syongari } 349193293Syongari ALLPROC_LOCK(AP_RELEASE); 350193293Syongari vmmeter(); 351193293Syongari wakeup((caddr_t)&lbolt); 352193293Syongari callout_reset(&schedcpu_callout, hz, schedcpu, NULL); 353193293Syongari} 354193293Syongari 355193293Syongari/* 356193293Syongari * Recalculate the priority of a process after it has slept for a while. 357193293Syongari * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 358193293Syongari * least six times the loadfactor will decay p_estcpu to zero. 359193293Syongari */ 360193293Syongarivoid 361193293Syongariupdatepri(p) 362193293Syongari register struct proc *p; 363193293Syongari{ 364193293Syongari register unsigned int newcpu = p->p_estcpu; 365193293Syongari register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 366193293Syongari 367193293Syongari if (p->p_slptime > 5 * loadfac) 368193293Syongari p->p_estcpu = 0; 369193293Syongari else { 370193293Syongari p->p_slptime--; /* the first time was done in schedcpu */ 371193293Syongari while (newcpu && --p->p_slptime) 372193293Syongari newcpu = decay_cpu(loadfac, newcpu); 373193293Syongari p->p_estcpu = newcpu; 374193293Syongari } 375193293Syongari resetpriority(p); 376193293Syongari} 377193293Syongari 378193293Syongari/* 379193293Syongari * We're only looking at 7 bits of the address; everything is 380193293Syongari * aligned to 4, lots of things are aligned to greater powers 381193293Syongari * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 382193293Syongari */ 383193293Syongari#define TABLESIZE 128 384193293Syongaristatic TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; 385165138Syongari#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 386165138Syongari 387165138Syongarivoid 388165138Syongarisleepinit(void) 389165138Syongari{ 390165138Syongari int i; 391165138Syongari 392165138Syongari sched_quantum = hz/10; 393165138Syongari hogticks = 2 * sched_quantum; 394165138Syongari for (i = 0; i < TABLESIZE; i++) 395165138Syongari TAILQ_INIT(&slpque[i]); 396165138Syongari} 397165138Syongari 398165138Syongari/* 399165138Syongari * General sleep call. Suspends the current process until a wakeup is 400165138Syongari * performed on the specified identifier. The process will then be made 401165138Syongari * runnable with the specified priority. Sleeps at most timo/hz seconds 402165138Syongari * (0 means no timeout). If pri includes PCATCH flag, signals are checked 403165138Syongari * before and after sleeping, else signals are not checked. Returns 0 if 404165138Syongari * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 405165138Syongari * signal needs to be delivered, ERESTART is returned if the current system 406165138Syongari * call should be restarted if possible, and EINTR is returned if the system 407165138Syongari * call should be interrupted by the signal (return EINTR). 408165138Syongari * 409165138Syongari * The mutex argument is exited before the caller is suspended, and 410165138Syongari * entered before msleep returns. If priority includes the PDROP 411165138Syongari * flag the mutex is not entered before returning. 412165138Syongari */ 413165138Syongariint 414165138Syongarimsleep(ident, mtx, priority, wmesg, timo) 415165138Syongari void *ident; 416165138Syongari struct mtx *mtx; 417165138Syongari int priority, timo; 418165138Syongari const char *wmesg; 419165138Syongari{ 420165138Syongari struct proc *p = curproc; 421165138Syongari int s, sig, catch = priority & PCATCH; 422165138Syongari int rval = 0; 423165138Syongari WITNESS_SAVE_DECL(mtx); 424165138Syongari 425165138Syongari#ifdef KTRACE 426165138Syongari if (p && KTRPOINT(p, KTR_CSW)) 427165138Syongari ktrcsw(p->p_tracep, 1, 0); 428165138Syongari#endif 429165138Syongari WITNESS_SLEEP(0, mtx); 430165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 431165138Syongari s = splhigh(); 432165138Syongari if (cold || panicstr) { 433165138Syongari /* 434165138Syongari * After a panic, or during autoconfiguration, 435165138Syongari * just give interrupts a chance, then just return; 436165138Syongari * don't run any other procs or panic below, 437165138Syongari * in case this is the idle process and already asleep. 438165138Syongari */ 439165138Syongari if (mtx != NULL && priority & PDROP) 440165138Syongari mtx_exit(mtx, MTX_DEF | MTX_NOSWITCH); 441165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 442165138Syongari splx(s); 443165138Syongari return (0); 444165138Syongari } 445165138Syongari 446165138Syongari DROP_GIANT_NOSWITCH(); 447165138Syongari 448165138Syongari if (mtx != NULL) { 449165138Syongari mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 450165138Syongari WITNESS_SAVE(mtx, mtx); 451165138Syongari mtx_exit(mtx, MTX_DEF | MTX_NOSWITCH); 452165138Syongari if (priority & PDROP) 453165138Syongari mtx = NULL; 454165138Syongari } 455165138Syongari 456165138Syongari KASSERT(p != NULL, ("msleep1")); 457165138Syongari KASSERT(ident != NULL && p->p_stat == SRUN, ("msleep")); 458165138Syongari /* 459165138Syongari * Process may be sitting on a slpque if asleep() was called, remove 460165138Syongari * it before re-adding. 461165138Syongari */ 462165138Syongari if (p->p_wchan != NULL) 463165138Syongari unsleep(p); 464165138Syongari 465165138Syongari p->p_wchan = ident; 466165138Syongari p->p_wmesg = wmesg; 467165138Syongari p->p_slptime = 0; 468165138Syongari p->p_priority = priority & PRIMASK; 469165138Syongari CTR4(KTR_PROC, "msleep: proc %p (pid %d, %s), schedlock %p", 470165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 471165138Syongari TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq); 472165138Syongari if (timo) 473165138Syongari callout_reset(&p->p_slpcallout, timo, endtsleep, p); 474165138Syongari /* 475165138Syongari * We put ourselves on the sleep queue and start our timeout 476165138Syongari * before calling CURSIG, as we could stop there, and a wakeup 477165138Syongari * or a SIGCONT (or both) could occur while we were stopped. 478165138Syongari * A SIGCONT would cause us to be marked as SSLEEP 479165138Syongari * without resuming us, thus we must be ready for sleep 480165138Syongari * when CURSIG is called. If the wakeup happens while we're 481165138Syongari * stopped, p->p_wchan will be 0 upon return from CURSIG. 482165138Syongari */ 483165138Syongari if (catch) { 484165138Syongari CTR4(KTR_PROC, 485165138Syongari "msleep caught: proc %p (pid %d, %s), schedlock %p", 486165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 487165138Syongari p->p_flag |= P_SINTR; 488165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 489165138Syongari if ((sig = CURSIG(p))) { 490165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 491165138Syongari if (p->p_wchan) 492165138Syongari unsleep(p); 493165138Syongari p->p_stat = SRUN; 494165138Syongari goto resume; 495165138Syongari } 496165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 497165138Syongari if (p->p_wchan == 0) { 498165138Syongari catch = 0; 499165138Syongari goto resume; 500165138Syongari } 501165138Syongari } else 502165138Syongari sig = 0; 503165138Syongari p->p_stat = SSLEEP; 504165138Syongari p->p_stats->p_ru.ru_nvcsw++; 505165138Syongari mi_switch(); 506165138Syongari CTR4(KTR_PROC, 507165138Syongari "msleep resume: proc %p (pid %d, %s), schedlock %p", 508165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 509165138Syongariresume: 510165138Syongari curpriority = p->p_usrpri; 511165138Syongari splx(s); 512165138Syongari p->p_flag &= ~P_SINTR; 513165138Syongari if (p->p_flag & P_TIMEOUT) { 514165138Syongari p->p_flag &= ~P_TIMEOUT; 515165138Syongari if (sig == 0) { 516165138Syongari#ifdef KTRACE 517165138Syongari if (KTRPOINT(p, KTR_CSW)) 518165138Syongari ktrcsw(p->p_tracep, 0, 0); 519165138Syongari#endif 520165138Syongari rval = EWOULDBLOCK; 521165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 522165138Syongari goto out; 523165138Syongari } 524165138Syongari } else if (timo) 525165138Syongari callout_stop(&p->p_slpcallout); 526165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 527165138Syongari 528165138Syongari if (catch && (sig != 0 || (sig = CURSIG(p)))) { 529165138Syongari#ifdef KTRACE 530165138Syongari if (KTRPOINT(p, KTR_CSW)) 531165138Syongari ktrcsw(p->p_tracep, 0, 0); 532165138Syongari#endif 533165138Syongari if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 534165138Syongari rval = EINTR; 535165138Syongari else 536165138Syongari rval = ERESTART; 537165138Syongari goto out; 538165138Syongari } 539165138Syongariout: 540165138Syongari#ifdef KTRACE 541165138Syongari if (KTRPOINT(p, KTR_CSW)) 542165138Syongari ktrcsw(p->p_tracep, 0, 0); 543165138Syongari#endif 544165138Syongari PICKUP_GIANT(); 545165138Syongari if (mtx != NULL) { 546165138Syongari mtx_enter(mtx, MTX_DEF); 547165138Syongari WITNESS_RESTORE(mtx, mtx); 548165138Syongari } 549165138Syongari return (rval); 550165138Syongari} 551165138Syongari 552165138Syongari/* 553165138Syongari * asleep() - async sleep call. Place process on wait queue and return 554165138Syongari * immediately without blocking. The process stays runnable until mawait() 555165138Syongari * is called. If ident is NULL, remove process from wait queue if it is still 556165138Syongari * on one. 557165138Syongari * 558165138Syongari * Only the most recent sleep condition is effective when making successive 559165138Syongari * calls to asleep() or when calling msleep(). 560165138Syongari * 561165138Syongari * The timeout, if any, is not initiated until mawait() is called. The sleep 562165138Syongari * priority, signal, and timeout is specified in the asleep() call but may be 563165138Syongari * overriden in the mawait() call. 564165138Syongari * 565165138Syongari * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 566165138Syongari */ 567165138Syongari 568165138Syongariint 569165138Syongariasleep(void *ident, int priority, const char *wmesg, int timo) 570165138Syongari{ 571165138Syongari struct proc *p = curproc; 572165138Syongari int s; 573165138Syongari 574165138Syongari /* 575165138Syongari * obtain sched_lock while manipulating sleep structures and slpque. 576165138Syongari * 577165138Syongari * Remove preexisting wait condition (if any) and place process 578165138Syongari * on appropriate slpque, but do not put process to sleep. 579165138Syongari */ 580165138Syongari 581165138Syongari s = splhigh(); 582165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 583165138Syongari 584165138Syongari if (p->p_wchan != NULL) 585165138Syongari unsleep(p); 586165138Syongari 587165138Syongari if (ident) { 588165138Syongari p->p_wchan = ident; 589165138Syongari p->p_wmesg = wmesg; 590165138Syongari p->p_slptime = 0; 591165138Syongari p->p_asleep.as_priority = priority; 592165138Syongari p->p_asleep.as_timo = timo; 593165138Syongari TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq); 594165138Syongari } 595165138Syongari 596165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 597165138Syongari splx(s); 598165138Syongari 599165138Syongari return(0); 600165138Syongari} 601165138Syongari 602165138Syongari/* 603165138Syongari * mawait() - wait for async condition to occur. The process blocks until 604165138Syongari * wakeup() is called on the most recent asleep() address. If wakeup is called 605165138Syongari * prior to mawait(), mawait() winds up being a NOP. 606165138Syongari * 607165138Syongari * If mawait() is called more then once (without an intervening asleep() call), 608165138Syongari * mawait() is still effectively a NOP but it calls mi_switch() to give other 609165138Syongari * processes some cpu before returning. The process is left runnable. 610165138Syongari * 611165138Syongari * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 612165138Syongari */ 613165138Syongari 614165138Syongariint 615165138Syongarimawait(struct mtx *mtx, int priority, int timo) 616165138Syongari{ 617165138Syongari struct proc *p = curproc; 618165138Syongari int rval = 0; 619165138Syongari int s; 620165138Syongari WITNESS_SAVE_DECL(mtx); 621165138Syongari 622165138Syongari WITNESS_SLEEP(0, mtx); 623165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 624207409Syongari DROP_GIANT_NOSWITCH(); 625207409Syongari if (mtx != NULL) { 626165138Syongari mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 627165138Syongari WITNESS_SAVE(mtx, mtx); 628165138Syongari mtx_exit(mtx, MTX_DEF | MTX_NOSWITCH); 629165138Syongari if (priority & PDROP) 630165138Syongari mtx = NULL; 631165138Syongari } 632165138Syongari 633165138Syongari s = splhigh(); 634165138Syongari 635165138Syongari if (p->p_wchan != NULL) { 636165138Syongari int sig; 637165138Syongari int catch; 638165138Syongari 639165138Syongari /* 640165138Syongari * The call to mawait() can override defaults specified in 641165138Syongari * the original asleep(). 642165138Syongari */ 643165138Syongari if (priority < 0) 644165138Syongari priority = p->p_asleep.as_priority; 645165138Syongari if (timo < 0) 646165138Syongari timo = p->p_asleep.as_timo; 647165138Syongari 648165138Syongari /* 649165138Syongari * Install timeout 650165138Syongari */ 651165138Syongari 652165138Syongari if (timo) 653165138Syongari callout_reset(&p->p_slpcallout, timo, endtsleep, p); 654165138Syongari 655165138Syongari sig = 0; 656165138Syongari catch = priority & PCATCH; 657165138Syongari 658165138Syongari if (catch) { 659165138Syongari p->p_flag |= P_SINTR; 660165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 661165138Syongari if ((sig = CURSIG(p))) { 662165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 663165138Syongari if (p->p_wchan) 664165138Syongari unsleep(p); 665165138Syongari p->p_stat = SRUN; 666165138Syongari goto resume; 667165138Syongari } 668165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 669165138Syongari if (p->p_wchan == NULL) { 670165138Syongari catch = 0; 671165138Syongari goto resume; 672165138Syongari } 673165138Syongari } 674165138Syongari p->p_stat = SSLEEP; 675165138Syongari p->p_stats->p_ru.ru_nvcsw++; 676165138Syongari mi_switch(); 677165138Syongariresume: 678165138Syongari curpriority = p->p_usrpri; 679165138Syongari 680165138Syongari splx(s); 681193293Syongari p->p_flag &= ~P_SINTR; 682165138Syongari if (p->p_flag & P_TIMEOUT) { 683165138Syongari p->p_flag &= ~P_TIMEOUT; 684165138Syongari if (sig == 0) { 685165138Syongari#ifdef KTRACE 686165138Syongari if (KTRPOINT(p, KTR_CSW)) 687165138Syongari ktrcsw(p->p_tracep, 0, 0); 688165138Syongari#endif 689165138Syongari rval = EWOULDBLOCK; 690165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 691165138Syongari goto out; 692165138Syongari } 693165138Syongari } else if (timo) 694165138Syongari callout_stop(&p->p_slpcallout); 695165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 696165138Syongari 697165138Syongari if (catch && (sig != 0 || (sig = CURSIG(p)))) { 698165138Syongari#ifdef KTRACE 699165138Syongari if (KTRPOINT(p, KTR_CSW)) 700165138Syongari ktrcsw(p->p_tracep, 0, 0); 701165138Syongari#endif 702165138Syongari if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 703165138Syongari rval = EINTR; 704165138Syongari else 705165138Syongari rval = ERESTART; 706165138Syongari goto out; 707165138Syongari } 708165138Syongari#ifdef KTRACE 709165138Syongari if (KTRPOINT(p, KTR_CSW)) 710165138Syongari ktrcsw(p->p_tracep, 0, 0); 711165138Syongari#endif 712165138Syongari } else { 713165138Syongari /* 714165138Syongari * If as_priority is 0, mawait() has been called without an 715165138Syongari * intervening asleep(). We are still effectively a NOP, 716165138Syongari * but we call mi_switch() for safety. 717165138Syongari */ 718165138Syongari 719165138Syongari if (p->p_asleep.as_priority == 0) { 720165138Syongari p->p_stats->p_ru.ru_nvcsw++; 721165138Syongari mi_switch(); 722165138Syongari } 723165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 724165138Syongari splx(s); 725165138Syongari } 726165138Syongari 727165138Syongari /* 728165138Syongari * clear p_asleep.as_priority as an indication that mawait() has been 729165138Syongari * called. If mawait() is called again without an intervening asleep(), 730165138Syongari * mawait() is still effectively a NOP but the above mi_switch() code 731165138Syongari * is triggered as a safety. 732165138Syongari */ 733165138Syongari p->p_asleep.as_priority = 0; 734165138Syongari 735165138Syongariout: 736165138Syongari PICKUP_GIANT(); 737165138Syongari if (mtx != NULL) { 738165138Syongari mtx_enter(mtx, MTX_DEF); 739165138Syongari WITNESS_RESTORE(mtx, mtx); 740165138Syongari } 741165138Syongari return (rval); 742165138Syongari} 743165138Syongari 744165138Syongari/* 745165138Syongari * Implement timeout for msleep or asleep()/mawait() 746165138Syongari * 747165138Syongari * If process hasn't been awakened (wchan non-zero), 748165138Syongari * set timeout flag and undo the sleep. If proc 749165138Syongari * is stopped, just unsleep so it will remain stopped. 750165138Syongari * MP-safe, called without the Giant mutex. 751165138Syongari */ 752165138Syongaristatic void 753165138Syongariendtsleep(arg) 754165138Syongari void *arg; 755165138Syongari{ 756165138Syongari register struct proc *p; 757165138Syongari int s; 758165138Syongari 759165138Syongari p = (struct proc *)arg; 760165138Syongari CTR4(KTR_PROC, 761165138Syongari "endtsleep: proc %p (pid %d, %s), schedlock %p", 762165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 763165138Syongari s = splhigh(); 764165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 765165138Syongari if (p->p_wchan) { 766165138Syongari if (p->p_stat == SSLEEP) 767165138Syongari setrunnable(p); 768165138Syongari else 769165138Syongari unsleep(p); 770165138Syongari p->p_flag |= P_TIMEOUT; 771165138Syongari } 772165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 773165138Syongari splx(s); 774165138Syongari} 775165138Syongari 776165138Syongari/* 777165138Syongari * Remove a process from its wait queue 778165138Syongari */ 779165138Syongarivoid 780165138Syongariunsleep(p) 781165138Syongari register struct proc *p; 782165138Syongari{ 783165138Syongari int s; 784165138Syongari 785165138Syongari s = splhigh(); 786165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 787165138Syongari if (p->p_wchan) { 788165138Syongari TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq); 789165138Syongari p->p_wchan = 0; 790165138Syongari } 791165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 792165138Syongari splx(s); 793165138Syongari} 794165138Syongari 795165138Syongari/* 796165138Syongari * Make all processes sleeping on the specified identifier runnable. 797165138Syongari */ 798165138Syongarivoid 799165138Syongariwakeup(ident) 800165138Syongari register void *ident; 801165138Syongari{ 802165138Syongari register struct slpquehead *qp; 803165138Syongari register struct proc *p; 804165138Syongari int s; 805165138Syongari 806165138Syongari s = splhigh(); 807165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 808165138Syongari qp = &slpque[LOOKUP(ident)]; 809165138Syongarirestart: 810165138Syongari TAILQ_FOREACH(p, qp, p_slpq) { 811165138Syongari if (p->p_wchan == ident) { 812165138Syongari TAILQ_REMOVE(qp, p, p_slpq); 813165138Syongari p->p_wchan = 0; 814165138Syongari if (p->p_stat == SSLEEP) { 815165138Syongari /* OPTIMIZED EXPANSION OF setrunnable(p); */ 816165138Syongari CTR4(KTR_PROC, 817165138Syongari "wakeup: proc %p (pid %d, %s), schedlock %p", 818165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 819165138Syongari if (p->p_slptime > 1) 820165138Syongari updatepri(p); 821165138Syongari p->p_slptime = 0; 822165138Syongari p->p_stat = SRUN; 823165138Syongari if (p->p_flag & P_INMEM) { 824165138Syongari setrunqueue(p); 825165138Syongari maybe_resched(p); 826165138Syongari } else { 827165138Syongari p->p_flag |= P_SWAPINREQ; 828165138Syongari wakeup((caddr_t)&proc0); 829165138Syongari } 830165138Syongari /* END INLINE EXPANSION */ 831165138Syongari goto restart; 832165138Syongari } 833165138Syongari } 834165138Syongari } 835165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 836165138Syongari splx(s); 837165138Syongari} 838165138Syongari 839165138Syongari/* 840165138Syongari * Make a process sleeping on the specified identifier runnable. 841165138Syongari * May wake more than one process if a target process is currently 842165138Syongari * swapped out. 843165138Syongari */ 844165138Syongarivoid 845165138Syongariwakeup_one(ident) 846165138Syongari register void *ident; 847165138Syongari{ 848165138Syongari register struct slpquehead *qp; 849165138Syongari register struct proc *p; 850165138Syongari int s; 851165138Syongari 852165138Syongari s = splhigh(); 853165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 854165138Syongari qp = &slpque[LOOKUP(ident)]; 855165138Syongari 856165138Syongari TAILQ_FOREACH(p, qp, p_slpq) { 857165138Syongari if (p->p_wchan == ident) { 858165138Syongari TAILQ_REMOVE(qp, p, p_slpq); 859165138Syongari p->p_wchan = 0; 860165138Syongari if (p->p_stat == SSLEEP) { 861165138Syongari /* OPTIMIZED EXPANSION OF setrunnable(p); */ 862165138Syongari CTR4(KTR_PROC, 863165138Syongari "wakeup1: proc %p (pid %d, %s), schedlock %p", 864165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 865165138Syongari if (p->p_slptime > 1) 866165138Syongari updatepri(p); 867165138Syongari p->p_slptime = 0; 868165138Syongari p->p_stat = SRUN; 869165138Syongari if (p->p_flag & P_INMEM) { 870165138Syongari setrunqueue(p); 871165138Syongari maybe_resched(p); 872165138Syongari break; 873165138Syongari } else { 874165138Syongari p->p_flag |= P_SWAPINREQ; 875165138Syongari wakeup((caddr_t)&proc0); 876165138Syongari } 877165138Syongari /* END INLINE EXPANSION */ 878165138Syongari } 879165138Syongari } 880165138Syongari } 881165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 882165138Syongari splx(s); 883165138Syongari} 884165138Syongari 885165138Syongari/* 886165138Syongari * The machine independent parts of mi_switch(). 887165138Syongari * Must be called at splstatclock() or higher. 888165138Syongari */ 889165138Syongarivoid 890165138Syongarimi_switch() 891193293Syongari{ 892165138Syongari struct timeval new_switchtime; 893165138Syongari register struct proc *p = curproc; /* XXX */ 894192734Syongari register struct rlimit *rlim; 895199012Syongari int x; 896199012Syongari 897165138Syongari /* 898165138Syongari * XXX this spl is almost unnecessary. It is partly to allow for 899165138Syongari * sloppy callers that don't do it (issignal() via CURSIG() is the 900165138Syongari * main offender). It is partly to work around a bug in the i386 901165138Syongari * cpu_switch() (the ipl is not preserved). We ran for years 902165138Syongari * without it. I think there was only a interrupt latency problem. 903165138Syongari * The main caller, msleep(), does an splx() a couple of instructions 904165138Syongari * after calling here. The buggy caller, issignal(), usually calls 905165138Syongari * here at spl0() and sometimes returns at splhigh(). The process 906165138Syongari * then runs for a little too long at splhigh(). The ipl gets fixed 907173769Syongari * when the process returns to user mode (or earlier). 908173769Syongari * 909165138Syongari * It would probably be better to always call here at spl0(). Callers 910192734Syongari * are prepared to give up control to another process, so they must 911192734Syongari * be prepared to be interrupted. The clock stuff here may not 912193293Syongari * actually need splstatclock(). 913193293Syongari */ 914193293Syongari x = splstatclock(); 915165138Syongari 916165138Syongari mtx_assert(&sched_lock, MA_OWNED); 917165138Syongari 918165138Syongari#ifdef SIMPLELOCK_DEBUG 919165138Syongari if (p->p_simple_locks) 920165138Syongari printf("sleep: holding simple lock\n"); 921165138Syongari#endif 922165138Syongari /* 923165138Syongari * Compute the amount of time during which the current 924165138Syongari * process was running, and add that to its total so far. 925165138Syongari */ 926165138Syongari microuptime(&new_switchtime); 927165138Syongari if (timevalcmp(&new_switchtime, PCPU_PTR(switchtime), <)) { 928165138Syongari#if 0 929165138Syongari /* XXX: This doesn't play well with sched_lock right now. */ 930165138Syongari printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 931165138Syongari PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec), 932165138Syongari new_switchtime.tv_sec, new_switchtime.tv_usec); 933165138Syongari#endif 934165138Syongari new_switchtime = PCPU_GET(switchtime); 935165138Syongari } else { 936165138Syongari p->p_runtime += (new_switchtime.tv_usec - PCPU_GET(switchtime.tv_usec)) + 937165138Syongari (new_switchtime.tv_sec - PCPU_GET(switchtime.tv_sec)) * 938165138Syongari (int64_t)1000000; 939165138Syongari } 940165138Syongari 941165138Syongari#if 0 942165138Syongari /* 943165138Syongari * Check if the process exceeds its cpu resource allocation. 944165138Syongari * If over max, kill it. 945165138Syongari * 946165138Syongari * XXX drop sched_lock, pickup Giant 947165138Syongari */ 948165138Syongari if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && 949165138Syongari p->p_runtime > p->p_limit->p_cpulimit) { 950165138Syongari rlim = &p->p_rlimit[RLIMIT_CPU]; 951165138Syongari if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { 952165138Syongari killproc(p, "exceeded maximum CPU limit"); 953165138Syongari } else { 954165138Syongari psignal(p, SIGXCPU); 955165138Syongari if (rlim->rlim_cur < rlim->rlim_max) { 956165138Syongari /* XXX: we should make a private copy */ 957165138Syongari rlim->rlim_cur += 5; 958165138Syongari } 959165138Syongari } 960165138Syongari } 961165138Syongari#endif 962165138Syongari 963165138Syongari /* 964165138Syongari * Pick a new current process and record its start time. 965165138Syongari */ 966165138Syongari cnt.v_swtch++; 967165138Syongari PCPU_SET(switchtime, new_switchtime); 968165138Syongari CTR4(KTR_PROC, "mi_switch: old proc %p (pid %d, %s), schedlock %p", 969165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 970165138Syongari cpu_switch(); 971165138Syongari CTR4(KTR_PROC, "mi_switch: new proc %p (pid %d, %s), schedlock %p", 972165138Syongari p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 973165138Syongari if (PCPU_GET(switchtime.tv_sec) == 0) 974165138Syongari microuptime(PCPU_PTR(switchtime)); 975165138Syongari PCPU_SET(switchticks, ticks); 976165138Syongari splx(x); 977165138Syongari} 978165138Syongari 979193293Syongari/* 980193293Syongari * Change process state to be runnable, 981193293Syongari * placing it on the run queue if it is in memory, 982193293Syongari * and awakening the swapper if it isn't in memory. 983193293Syongari */ 984193293Syongarivoid 985193293Syongarisetrunnable(p) 986193293Syongari register struct proc *p; 987193293Syongari{ 988193293Syongari register int s; 989193293Syongari 990193293Syongari s = splhigh(); 991165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 992165138Syongari switch (p->p_stat) { 993165138Syongari case 0: 994165138Syongari case SRUN: 995165138Syongari case SZOMB: 996165138Syongari case SWAIT: 997165138Syongari default: 998165138Syongari panic("setrunnable"); 999165138Syongari case SSTOP: 1000165138Syongari case SSLEEP: /* e.g. when sending signals */ 1001165138Syongari if (p->p_flag & P_CVWAITQ) 1002165138Syongari cv_waitq_remove(p); 1003165138Syongari else 1004165138Syongari unsleep(p); 1005165138Syongari break; 1006165138Syongari 1007165138Syongari case SIDL: 1008165138Syongari break; 1009165138Syongari } 1010165138Syongari p->p_stat = SRUN; 1011165138Syongari if (p->p_flag & P_INMEM) 1012165138Syongari setrunqueue(p); 1013165138Syongari splx(s); 1014165138Syongari if (p->p_slptime > 1) 1015165138Syongari updatepri(p); 1016165138Syongari p->p_slptime = 0; 1017165138Syongari if ((p->p_flag & P_INMEM) == 0) { 1018165138Syongari p->p_flag |= P_SWAPINREQ; 1019165138Syongari wakeup((caddr_t)&proc0); 1020165138Syongari } 1021165138Syongari else 1022165138Syongari maybe_resched(p); 1023165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 1024165138Syongari} 1025165138Syongari 1026165138Syongari/* 1027165138Syongari * Compute the priority of a process when running in user mode. 1028165138Syongari * Arrange to reschedule if the resulting priority is better 1029165138Syongari * than that of the current process. 1030165138Syongari */ 1031165138Syongarivoid 1032165138Syongariresetpriority(p) 1033165138Syongari register struct proc *p; 1034165138Syongari{ 1035165138Syongari register unsigned int newpriority; 1036165138Syongari 1037165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 1038165138Syongari if (p->p_rtprio.type == RTP_PRIO_NORMAL) { 1039165138Syongari newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + 1040165138Syongari NICE_WEIGHT * (p->p_nice - PRIO_MIN); 1041165138Syongari newpriority = min(newpriority, MAXPRI); 1042165138Syongari p->p_usrpri = newpriority; 1043165138Syongari } 1044165138Syongari maybe_resched(p); 1045165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 1046165138Syongari} 1047165138Syongari 1048165138Syongari/* ARGSUSED */ 1049165138Syongaristatic void 1050165138Syongarisched_setup(dummy) 1051165138Syongari void *dummy; 1052165138Syongari{ 1053165138Syongari 1054165138Syongari callout_init(&schedcpu_callout, 1); 1055165138Syongari callout_init(&roundrobin_callout, 0); 1056165138Syongari 1057165138Syongari /* Kick off timeout driven events by calling first time. */ 1058165138Syongari roundrobin(NULL); 1059165138Syongari schedcpu(NULL); 1060165138Syongari} 1061165138Syongari 1062165138Syongari/* 1063165138Syongari * We adjust the priority of the current process. The priority of 1064165138Syongari * a process gets worse as it accumulates CPU time. The cpu usage 1065165138Syongari * estimator (p_estcpu) is increased here. resetpriority() will 1066165138Syongari * compute a different priority each time p_estcpu increases by 1067165138Syongari * INVERSE_ESTCPU_WEIGHT 1068165138Syongari * (until MAXPRI is reached). The cpu usage estimator ramps up 1069165138Syongari * quite quickly when the process is running (linearly), and decays 1070165138Syongari * away exponentially, at a rate which is proportionally slower when 1071165138Syongari * the system is busy. The basic principle is that the system will 1072165138Syongari * 90% forget that the process used a lot of CPU time in 5 * loadav 1073165138Syongari * seconds. This causes the system to favor processes which haven't 1074165138Syongari * run much recently, and to round-robin among other processes. 1075165138Syongari */ 1076165138Syongarivoid 1077165138Syongarischedclock(p) 1078165138Syongari struct proc *p; 1079165138Syongari{ 1080165138Syongari 1081165138Syongari p->p_cpticks++; 1082165138Syongari p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 1083165138Syongari if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 1084165138Syongari resetpriority(p); 1085165138Syongari if (p->p_priority >= PUSER) 1086165138Syongari p->p_priority = p->p_usrpri; 1087165138Syongari } 1088165138Syongari} 1089165138Syongari 1090165138Syongari/* 1091165138Syongari * General purpose yield system call 1092165138Syongari */ 1093165138Syongariint 1094165138Syongariyield(struct proc *p, struct yield_args *uap) 1095165138Syongari{ 1096165138Syongari int s; 1097165138Syongari 1098165138Syongari p->p_retval[0] = 0; 1099165138Syongari 1100165138Syongari s = splhigh(); 1101165138Syongari mtx_enter(&sched_lock, MTX_SPIN); 1102165138Syongari DROP_GIANT_NOSWITCH(); 1103165138Syongari p->p_priority = MAXPRI; 1104165138Syongari setrunqueue(p); 1105165138Syongari p->p_stats->p_ru.ru_nvcsw++; 1106165138Syongari mi_switch(); 1107165138Syongari mtx_exit(&sched_lock, MTX_SPIN); 1108165138Syongari PICKUP_GIANT(); 1109165138Syongari splx(s); 1110165138Syongari 1111165138Syongari return (0); 1112193293Syongari} 1113193293Syongari