kern_synch.c revision 66716
1139749Simp/*- 2113584Ssimokawa * Copyright (c) 1982, 1986, 1990, 1991, 1993 3103285Sikob * The Regents of the University of California. All rights reserved. 4103285Sikob * (c) UNIX System Laboratories, Inc. 5103285Sikob * All or some portions of this file are derived from material licensed 6103285Sikob * to the University of California by American Telephone and Telegraph 7103285Sikob * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8103285Sikob * the permission of UNIX System Laboratories, Inc. 9103285Sikob * 10103285Sikob * Redistribution and use in source and binary forms, with or without 11103285Sikob * modification, are permitted provided that the following conditions 12103285Sikob * are met: 13103285Sikob * 1. Redistributions of source code must retain the above copyright 14103285Sikob * notice, this list of conditions and the following disclaimer. 15103285Sikob * 2. Redistributions in binary form must reproduce the above copyright 16103285Sikob * notice, this list of conditions and the following disclaimer in the 17103285Sikob * documentation and/or other materials provided with the distribution. 18103285Sikob * 3. All advertising materials mentioning features or use of this software 19103285Sikob * must display the following acknowledgement: 20103285Sikob * This product includes software developed by the University of 21103285Sikob * California, Berkeley and its contributors. 22103285Sikob * 4. Neither the name of the University nor the names of its contributors 23103285Sikob * may be used to endorse or promote products derived from this software 24103285Sikob * without specific prior written permission. 25103285Sikob * 26103285Sikob * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27103285Sikob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28103285Sikob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29103285Sikob * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30103285Sikob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31103285Sikob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32103285Sikob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33103285Sikob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34103285Sikob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35227843Smarius * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36227843Smarius * SUCH DAMAGE. 37227843Smarius * 38103285Sikob * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 39103285Sikob * $FreeBSD: head/sys/kern/kern_synch.c 66716 2000-10-06 02:20:21Z jhb $ 40103285Sikob */ 41103285Sikob 42193066Sjamie#include "opt_ktrace.h" 43103285Sikob 44129879Sphk#include <sys/param.h> 45103285Sikob#include <sys/systm.h> 46103285Sikob#include <sys/proc.h> 47103285Sikob#include <sys/kernel.h> 48169806Ssimokawa#include <sys/ktr.h> 49103285Sikob#include <sys/signalvar.h> 50170374Ssimokawa#include <sys/resourcevar.h> 51170374Ssimokawa#include <sys/vmmeter.h> 52127468Ssimokawa#include <sys/sysctl.h> 53117067Ssimokawa#include <vm/vm.h> 54117067Ssimokawa#include <vm/vm_extern.h> 55103285Sikob#ifdef KTRACE 56103285Sikob#include <sys/uio.h> 57113584Ssimokawa#include <sys/ktrace.h> 58103285Sikob#endif 59127468Ssimokawa 60127468Ssimokawa#include <machine/cpu.h> 61127468Ssimokawa#include <machine/ipl.h> 62127468Ssimokawa#include <machine/smp.h> 63127468Ssimokawa#include <machine/mutex.h> 64127468Ssimokawa 65127468Ssimokawastatic void sched_setup __P((void *dummy)); 66103285SikobSYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) 67103285Sikob 68110072Ssimokawau_char curpriority; 69103285Sikobint hogticks; 70103285Sikobint lbolt; 71127468Ssimokawaint sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 72103285Sikob 73116376Ssimokawastatic int curpriority_cmp __P((struct proc *p)); 74116376Ssimokawastatic void endtsleep __P((void *)); 75116376Ssimokawastatic void maybe_resched __P((struct proc *chk)); 76116376Ssimokawastatic void roundrobin __P((void *arg)); 77116376Ssimokawastatic void schedcpu __P((void *arg)); 78116376Ssimokawastatic void updatepri __P((struct proc *p)); 79116376Ssimokawa 80188704Ssbrunostatic int 81103285Sikobsysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 82108281Ssimokawa{ 83109736Ssimokawa int error, new_val; 84109736Ssimokawa 85109736Ssimokawa new_val = sched_quantum * tick; 86120850Ssimokawa error = sysctl_handle_int(oidp, &new_val, 0, req); 87120850Ssimokawa if (error != 0 || req->newptr == NULL) 88103285Sikob return (error); 89110195Ssimokawa if (new_val < tick) 90110269Ssimokawa return (EINVAL); 91110195Ssimokawa sched_quantum = new_val / tick; 92103285Sikob hogticks = 2 * sched_quantum; 93103285Sikob return (0); 94103285Sikob} 95103285Sikob 96125238SsimokawaSYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 97125238Ssimokawa 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); 98124169Ssimokawa 99124169Ssimokawa/*- 100124169Ssimokawa * Compare priorities. Return: 101170374Ssimokawa * <0: priority of p < current priority 102103285Sikob * 0: priority of p == current priority 103124169Ssimokawa * >0: priority of p > current priority 104103285Sikob * The priorities are the normal priorities or the normal realtime priorities 105212413Savg * if p is on the same scheduler as curproc. Otherwise the process on the 106124169Ssimokawa * more realtimeish scheduler has lowest priority. As usual, a higher 107124169Ssimokawa * priority really means a lower priority. 108124169Ssimokawa */ 109124169Ssimokawastatic int 110124169Ssimokawacurpriority_cmp(p) 111124169Ssimokawa struct proc *p; 112169806Ssimokawa{ 113106543Ssimokawa int c_class, p_class; 114124169Ssimokawa 115106543Ssimokawa c_class = RTP_PRIO_BASE(curproc->p_rtprio.type); 116124169Ssimokawa p_class = RTP_PRIO_BASE(p->p_rtprio.type); 117170374Ssimokawa if (p_class != c_class) 118103285Sikob return (p_class - c_class); 119103285Sikob if (p_class == RTP_PRIO_NORMAL) 120103285Sikob return (((int)p->p_priority - (int)curpriority) / PPQ); 121125238Ssimokawa return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio); 122125238Ssimokawa} 123103285Sikob 124103285Sikob/* 125108642Ssimokawa * Arrange to reschedule if necessary, taking the priorities and 126116978Ssimokawa * schedulers into account. 127103285Sikob */ 128103285Sikobstatic void 129103285Sikobmaybe_resched(chk) 130103285Sikob struct proc *chk; 131103285Sikob{ 132227843Smarius struct proc *p = curproc; /* XXX */ 133103285Sikob 134124251Ssimokawa /* 135124251Ssimokawa * XXX idle scheduler still broken because proccess stays on idle 136124251Ssimokawa * scheduler during waits (such as when getting FS locks). If a 137124251Ssimokawa * standard process becomes runaway cpu-bound, the system can lockup 138103285Sikob * due to idle-scheduler processes in wakeup never getting any cpu. 139124251Ssimokawa */ 140124251Ssimokawa if (p == idleproc) { 141124251Ssimokawa#if 0 142124251Ssimokawa need_resched(); 143124251Ssimokawa#endif 144124251Ssimokawa } else if (chk == p) { 145124251Ssimokawa /* We may need to yield if our priority has been raised. */ 146114909Ssimokawa if (curpriority_cmp(chk) > 0) 147114909Ssimokawa need_resched(); 148114909Ssimokawa } else if (curpriority_cmp(chk) < 0) 149114909Ssimokawa need_resched(); 150106813Ssimokawa} 151103285Sikob 152103285Sikobint 153103285Sikobroundrobin_interval(void) 154103285Sikob{ 155103285Sikob return (sched_quantum); 156103285Sikob} 157103285Sikob 158110072Ssimokawa/* 159103285Sikob * Force switch among equal priority processes every 100ms. 160106810Ssimokawa */ 161110072Ssimokawa/* ARGSUSED */ 162103285Sikobstatic void 163103285Sikobroundrobin(arg) 164110072Ssimokawa void *arg; 165110072Ssimokawa{ 166110072Ssimokawa#ifndef SMP 167110193Ssimokawa struct proc *p = curproc; /* XXX */ 168120660Ssimokawa#endif 169103285Sikob 170110072Ssimokawa#ifdef SMP 171110072Ssimokawa need_resched(); 172106810Ssimokawa forward_roundrobin(); 173103285Sikob#else 174106813Ssimokawa if (p == idleproc || RTP_PRIO_NEED_RR(p->p_rtprio.type)) 175103285Sikob need_resched(); 176110072Ssimokawa#endif 177110072Ssimokawa 178110072Ssimokawa timeout(roundrobin, NULL, sched_quantum); 179110582Ssimokawa} 180110072Ssimokawa 181110072Ssimokawa/* 182110072Ssimokawa * Constants for digital decay and forget: 183110072Ssimokawa * 90% of (p_estcpu) usage in 5 * loadav time 184110072Ssimokawa * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 185170374Ssimokawa * Note that, as ps(1) mentions, this can let percentages 186110193Ssimokawa * total over 100% (I've seen 137.9% for 3 processes). 187110582Ssimokawa * 188110072Ssimokawa * Note that schedclock() updates p_estcpu and p_cpticks asynchronously. 189170374Ssimokawa * 190110072Ssimokawa * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 191110072Ssimokawa * That is, the system wants to compute a value of decay such 192110072Ssimokawa * that the following for loop: 193110072Ssimokawa * for (i = 0; i < (5 * loadavg); i++) 194110072Ssimokawa * p_estcpu *= decay; 195110072Ssimokawa * will compute 196110072Ssimokawa * p_estcpu *= 0.1; 197110072Ssimokawa * for all values of loadavg: 198103285Sikob * 199103285Sikob * Mathematically this loop can be expressed by saying: 200103285Sikob * decay ** (5 * loadavg) ~= .1 201103285Sikob * 202103285Sikob * The system computes decay as: 203103285Sikob * decay = (2 * loadavg) / (2 * loadavg + 1) 204103285Sikob * 205170374Ssimokawa * We wish to prove that the system's computation of decay 206103285Sikob * will always fulfill the equation: 207103285Sikob * decay ** (5 * loadavg) ~= .1 208103285Sikob * 209103285Sikob * If we compute b as: 210103285Sikob * b = 2 * loadavg 211167632Ssimokawa * then 212167632Ssimokawa * decay = b / (b + 1) 213103285Sikob * 214103285Sikob * We now need to prove two things: 215120660Ssimokawa * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 216103285Sikob * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 217103285Sikob * 218103285Sikob * Facts: 219103285Sikob * For x close to zero, exp(x) =~ 1 + x, since 220124251Ssimokawa * exp(x) = 0! + x**1/1! + x**2/2! + ... . 221103285Sikob * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 222103285Sikob * For x close to zero, ln(1+x) =~ x, since 223170425Ssimokawa * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 224170425Ssimokawa * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 225170425Ssimokawa * ln(.1) =~ -2.30 226170425Ssimokawa * 227170425Ssimokawa * Proof of (1): 228170425Ssimokawa * Solve (factor)**(power) =~ .1 given power (5*loadav): 229170425Ssimokawa * solving for factor, 230170425Ssimokawa * ln(factor) =~ (-2.30/5*loadav), or 231170425Ssimokawa * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 232170425Ssimokawa * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 233170425Ssimokawa * 234103285Sikob * Proof of (2): 235103285Sikob * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 236103285Sikob * solving for power, 237103285Sikob * power*ln(b/(b+1)) =~ -2.30, or 238103285Sikob * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 239120660Ssimokawa * 240120660Ssimokawa * Actual power values for the implemented algorithm are as follows: 241120660Ssimokawa * loadav: 1 2 3 4 242120660Ssimokawa * power: 5.68 10.32 14.94 19.55 243103285Sikob */ 244120660Ssimokawa 245103285Sikob/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 246120660Ssimokawa#define loadfactor(loadav) (2 * (loadav)) 247120660Ssimokawa#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 248120660Ssimokawa 249120660Ssimokawa/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 250124251Ssimokawastatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 251124251SsimokawaSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 252103285Sikob 253103285Sikob/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 254106790Ssimokawastatic int fscale __unused = FSCALE; 255103285SikobSYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 256103285Sikob 257103285Sikob/* 258103285Sikob * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 259103285Sikob * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 260108655Ssimokawa * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 261108655Ssimokawa * 262170374Ssimokawa * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 263103285Sikob * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 264103285Sikob * 265170374Ssimokawa * If you don't want to bother with the faster/more-accurate formula, you 266103285Sikob * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 267170374Ssimokawa * (more general) method of calculating the %age of CPU used by a process. 268130460Sdfr */ 269103285Sikob#define CCPU_SHIFT 11 270103285Sikob 271103285Sikob/* 272103285Sikob * Recompute process priorities, every hz ticks. 273103285Sikob */ 274103285Sikob/* ARGSUSED */ 275103285Sikobstatic void 276103285Sikobschedcpu(arg) 277103285Sikob void *arg; 278103285Sikob{ 279103285Sikob register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 280103285Sikob register struct proc *p; 281103285Sikob register int realstathz, s; 282170374Ssimokawa 283170374Ssimokawa realstathz = stathz ? stathz : hz; 284170374Ssimokawa LIST_FOREACH(p, &allproc, p_list) { 285170374Ssimokawa /* 286170374Ssimokawa * Increment time in/out of memory and sleep time 287170374Ssimokawa * (if sleeping). We ignore overflow; with 16-bit int's 288170374Ssimokawa * (remember them?) overflow takes 45 days. 289170374Ssimokawa if (p->p_stat == SWAIT) 290103285Sikob continue; 291103285Sikob */ 292103285Sikob mtx_enter(&sched_lock, MTX_SPIN); 293103285Sikob p->p_swtime++; 294170374Ssimokawa if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 295170374Ssimokawa p->p_slptime++; 296170374Ssimokawa p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 297170374Ssimokawa /* 298170374Ssimokawa * If the process has slept the entire second, 299170374Ssimokawa * stop recalculating its priority until it wakes up. 300170374Ssimokawa */ 301170374Ssimokawa if (p->p_slptime > 1) { 302170374Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 303170374Ssimokawa continue; 304170374Ssimokawa } 305170374Ssimokawa 306170374Ssimokawa /* 307170374Ssimokawa * prevent state changes and protect run queue 308103285Sikob */ 309103285Sikob s = splhigh(); 310103285Sikob 311106790Ssimokawa /* 312106790Ssimokawa * p_pctcpu is only for ps. 313106790Ssimokawa */ 314103285Sikob#if (FSHIFT >= CCPU_SHIFT) 315103285Sikob p->p_pctcpu += (realstathz == 100)? 316170374Ssimokawa ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 317170374Ssimokawa 100 * (((fixpt_t) p->p_cpticks) 318170374Ssimokawa << (FSHIFT - CCPU_SHIFT)) / realstathz; 319170374Ssimokawa#else 320103285Sikob p->p_pctcpu += ((FSCALE - ccpu) * 321170374Ssimokawa (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; 322103285Sikob#endif 323170374Ssimokawa p->p_cpticks = 0; 324170374Ssimokawa p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); 325103285Sikob resetpriority(p); 326103285Sikob if (p->p_priority >= PUSER) { 327103285Sikob if ((p != curproc) && 328103285Sikob#ifdef SMP 329103285Sikob p->p_oncpu == 0xff && /* idle */ 330103285Sikob#endif 331106790Ssimokawa p->p_stat == SRUN && 332125238Ssimokawa (p->p_flag & P_INMEM) && 333125238Ssimokawa (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 334125238Ssimokawa remrunqueue(p); 335125238Ssimokawa p->p_priority = p->p_usrpri; 336125238Ssimokawa setrunqueue(p); 337125238Ssimokawa } else 338103285Sikob p->p_priority = p->p_usrpri; 339125238Ssimokawa } 340103285Sikob mtx_exit(&sched_lock, MTX_SPIN); 341108281Ssimokawa splx(s); 342125238Ssimokawa } 343103285Sikob vmmeter(); 344106790Ssimokawa wakeup((caddr_t)&lbolt); 345110577Ssimokawa timeout(schedcpu, (void *)0, hz); 346170374Ssimokawa} 347110577Ssimokawa 348170374Ssimokawa/* 349170374Ssimokawa * Recalculate the priority of a process after it has slept for a while. 350110577Ssimokawa * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 351110577Ssimokawa * least six times the loadfactor will decay p_estcpu to zero. 352170374Ssimokawa */ 353111040Ssimokawastatic void 354110577Ssimokawaupdatepri(p) 355120660Ssimokawa register struct proc *p; 356120660Ssimokawa{ 357110577Ssimokawa register unsigned int newcpu = p->p_estcpu; 358110577Ssimokawa register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 359110577Ssimokawa 360170374Ssimokawa if (p->p_slptime > 5 * loadfac) 361110577Ssimokawa p->p_estcpu = 0; 362111040Ssimokawa else { 363171513Ssimokawa p->p_slptime--; /* the first time was done in schedcpu */ 364110577Ssimokawa while (newcpu && --p->p_slptime) 365169119Ssimokawa newcpu = decay_cpu(loadfac, newcpu); 366170427Ssimokawa p->p_estcpu = newcpu; 367170427Ssimokawa } 368170427Ssimokawa resetpriority(p); 369110577Ssimokawa} 370110577Ssimokawa 371110577Ssimokawa/* 372110577Ssimokawa * We're only looking at 7 bits of the address; everything is 373170374Ssimokawa * aligned to 4, lots of things are aligned to greater powers 374170374Ssimokawa * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 375170374Ssimokawa */ 376110577Ssimokawa#define TABLESIZE 128 377249291Swillstatic TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; 378170374Ssimokawa#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 379170374Ssimokawa 380110577Ssimokawa#if 0 381110577Ssimokawa/* 382171513Ssimokawa * During autoconfiguration or after a panic, a sleep will simply 383111040Ssimokawa * lower the priority briefly to allow interrupts, then return. 384170374Ssimokawa * The priority to be used (safepri) is machine-dependent, thus this 385170374Ssimokawa * value is initialized and maintained in the machine-dependent layers. 386170374Ssimokawa * This priority will typically be 0, or the lowest priority 387170374Ssimokawa * that is safe for use on the interrupt stack; it can be made 388110577Ssimokawa * higher to block network software interrupts after panics. 389110577Ssimokawa */ 390170374Ssimokawaint safepri; 391110577Ssimokawa#endif 392110577Ssimokawa 393110577Ssimokawavoid 394110577Ssimokawasleepinit(void) 395170374Ssimokawa{ 396110577Ssimokawa int i; 397110577Ssimokawa 398121463Ssimokawa sched_quantum = hz/10; 399121463Ssimokawa hogticks = 2 * sched_quantum; 400121463Ssimokawa for (i = 0; i < TABLESIZE; i++) 401121463Ssimokawa TAILQ_INIT(&slpque[i]); 402121463Ssimokawa} 403121463Ssimokawa 404170374Ssimokawa/* 405170374Ssimokawa * General sleep call. Suspends the current process until a wakeup is 406170374Ssimokawa * performed on the specified identifier. The process will then be made 407170374Ssimokawa * runnable with the specified priority. Sleeps at most timo/hz seconds 408121463Ssimokawa * (0 means no timeout). If pri includes PCATCH flag, signals are checked 409170374Ssimokawa * before and after sleeping, else signals are not checked. Returns 0 if 410110577Ssimokawa * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 411110577Ssimokawa * signal needs to be delivered, ERESTART is returned if the current system 412110577Ssimokawa * call should be restarted if possible, and EINTR is returned if the system 413103285Sikob * call should be interrupted by the signal (return EINTR). 414103285Sikob * 415103285Sikob * The mutex argument is exited before the caller is suspended, and 416103285Sikob * entered before msleep returns. If priority includes the PDROP 417118455Ssimokawa * flag the mutex is not entered before returning. 418103285Sikob */ 419118455Ssimokawaint 420103285Sikobmsleep(ident, mtx, priority, wmesg, timo) 421103285Sikob void *ident; 422103285Sikob struct mtx *mtx; 423103285Sikob int priority, timo; 424103285Sikob const char *wmesg; 425103285Sikob{ 426116978Ssimokawa struct proc *p = curproc; 427103285Sikob int s, sig, catch = priority & PCATCH; 428118455Ssimokawa struct callout_handle thandle; 429118455Ssimokawa int rval = 0; 430103285Sikob WITNESS_SAVE_DECL(mtx); 431118455Ssimokawa 432118455Ssimokawa#ifdef KTRACE 433187993Ssbruno if (p && KTRPOINT(p, KTR_CSW)) 434187993Ssbruno ktrcsw(p->p_tracep, 1, 0); 435187993Ssbruno#endif 436187993Ssbruno WITNESS_SLEEP(0, mtx); 437187993Ssbruno mtx_enter(&sched_lock, MTX_SPIN); 438187993Ssbruno 439187993Ssbruno if (mtx != NULL) { 440187993Ssbruno KASSERT(mtx->mtx_recurse == 0, 441187993Ssbruno ("sleeping on recursed mutex %s", mtx->mtx_description)); 442187993Ssbruno WITNESS_SAVE(mtx, mtx); 443187993Ssbruno mtx_exit(mtx, MTX_DEF | MTX_NOSWITCH); 444187993Ssbruno if (priority & PDROP) 445187993Ssbruno mtx = NULL; 446187993Ssbruno } 447187993Ssbruno 448187993Ssbruno s = splhigh(); 449187993Ssbruno if (cold || panicstr) { 450187993Ssbruno /* 451187993Ssbruno * After a panic, or during autoconfiguration, 452187993Ssbruno * just give interrupts a chance, then just return; 453187993Ssbruno * don't run any other procs or panic below, 454187993Ssbruno * in case this is the idle process and already asleep. 455187993Ssbruno */ 456187993Ssbruno mtx_exit(&sched_lock, MTX_SPIN); 457187993Ssbruno splx(s); 458170374Ssimokawa return (0); 459171513Ssimokawa } 460170374Ssimokawa 461170374Ssimokawa KASSERT(p != NULL, ("tsleep1")); 462170374Ssimokawa KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep")); 463170374Ssimokawa /* 464108853Ssimokawa * Process may be sitting on a slpque if asleep() was called, remove 465110577Ssimokawa * it before re-adding. 466110577Ssimokawa */ 467110193Ssimokawa if (p->p_wchan != NULL) 468169806Ssimokawa unsleep(p); 469172836Sjulian 470169806Ssimokawa p->p_wchan = ident; 471169806Ssimokawa p->p_wmesg = wmesg; 472103285Sikob p->p_slptime = 0; 473103285Sikob p->p_priority = priority & PRIMASK; 474103285Sikob p->p_nativepri = p->p_priority; 475103285Sikob CTR4(KTR_PROC, "tsleep: proc %p (pid %d, %s), schedlock %p", 476103285Sikob p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 477103285Sikob TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 478103285Sikob if (timo) 479187993Ssbruno thandle = timeout(endtsleep, (void *)p, timo); 480169117Ssimokawa /* 481187993Ssbruno * We put ourselves on the sleep queue and start our timeout 482103285Sikob * before calling CURSIG, as we could stop there, and a wakeup 483103285Sikob * or a SIGCONT (or both) could occur while we were stopped. 484103285Sikob * A SIGCONT would cause us to be marked as SSLEEP 485103285Sikob * without resuming us, thus we must be ready for sleep 486103285Sikob * when CURSIG is called. If the wakeup happens while we're 487103285Sikob * stopped, p->p_wchan will be 0 upon return from CURSIG. 488103285Sikob */ 489103285Sikob if (catch) { 490103285Sikob CTR4(KTR_PROC, 491212413Savg "tsleep caught: proc %p (pid %d, %s), schedlock %p", 492103285Sikob p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 493103285Sikob p->p_flag |= P_SINTR; 494103285Sikob if ((sig = CURSIG(p))) { 495103285Sikob if (p->p_wchan) 496103285Sikob unsleep(p); 497103285Sikob p->p_stat = SRUN; 498103285Sikob goto resume; 499103285Sikob } 500103285Sikob if (p->p_wchan == 0) { 501103285Sikob catch = 0; 502103285Sikob goto resume; 503103285Sikob } 504103285Sikob } else 505106790Ssimokawa sig = 0; 506116978Ssimokawa p->p_stat = SSLEEP; 507116978Ssimokawa p->p_stats->p_ru.ru_nvcsw++; 508116978Ssimokawa mi_switch(); 509116978Ssimokawa CTR4(KTR_PROC, 510116978Ssimokawa "tsleep resume: proc %p (pid %d, %s), schedlock %p", 511116978Ssimokawa p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 512116978Ssimokawaresume: 513116978Ssimokawa curpriority = p->p_usrpri; 514116978Ssimokawa splx(s); 515116978Ssimokawa p->p_flag &= ~P_SINTR; 516116978Ssimokawa if (p->p_flag & P_TIMEOUT) { 517116978Ssimokawa p->p_flag &= ~P_TIMEOUT; 518116978Ssimokawa if (sig == 0) { 519103285Sikob#ifdef KTRACE 520103285Sikob if (KTRPOINT(p, KTR_CSW)) 521103285Sikob ktrcsw(p->p_tracep, 0, 0); 522103285Sikob#endif 523118455Ssimokawa rval = EWOULDBLOCK; 524103285Sikob goto out; 525103285Sikob } 526169806Ssimokawa } else if (timo) 527111078Ssimokawa untimeout(endtsleep, (void *)p, thandle); 528118455Ssimokawa if (catch && (sig != 0 || (sig = CURSIG(p)))) { 529103285Sikob#ifdef KTRACE 530103285Sikob if (KTRPOINT(p, KTR_CSW)) 531169806Ssimokawa ktrcsw(p->p_tracep, 0, 0); 532170374Ssimokawa#endif 533169806Ssimokawa if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 534170374Ssimokawa rval = EINTR; 535170374Ssimokawa else 536170374Ssimokawa rval = ERESTART; 537170374Ssimokawa goto out; 538169806Ssimokawa } 539178915Ssimokawaout: 540178915Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 541178915Ssimokawa#ifdef KTRACE 542118455Ssimokawa if (KTRPOINT(p, KTR_CSW)) 543118455Ssimokawa ktrcsw(p->p_tracep, 0, 0); 544106790Ssimokawa#endif 545118455Ssimokawa if (mtx != NULL) { 546118455Ssimokawa mtx_enter(mtx, MTX_DEF); 547111078Ssimokawa WITNESS_RESTORE(mtx, mtx); 548169806Ssimokawa } 549169806Ssimokawa return (rval); 550169806Ssimokawa} 551111078Ssimokawa 552178915Ssimokawa/* 553169806Ssimokawa * asleep() - async sleep call. Place process on wait queue and return 554111078Ssimokawa * immediately without blocking. The process stays runnable until await() 555111078Ssimokawa * is called. If ident is NULL, remove process from wait queue if it is still 556111078Ssimokawa * on one. 557111078Ssimokawa * 558169806Ssimokawa * Only the most recent sleep condition is effective when making successive 559169806Ssimokawa * calls to asleep() or when calling tsleep(). 560169806Ssimokawa * 561169806Ssimokawa * The timeout, if any, is not initiated until await() is called. The sleep 562171513Ssimokawa * priority, signal, and timeout is specified in the asleep() call but may be 563170374Ssimokawa * overriden in the await() call. 564103285Sikob * 565103285Sikob * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 566103285Sikob */ 567103285Sikob 568103285Sikobint 569103285Sikobasleep(void *ident, int priority, const char *wmesg, int timo) 570103285Sikob{ 571103285Sikob struct proc *p = curproc; 572103285Sikob int s; 573106790Ssimokawa 574110577Ssimokawa /* 575110577Ssimokawa * obtain sched_lock while manipulating sleep structures and slpque. 576110798Ssimokawa * 577110577Ssimokawa * Remove preexisting wait condition (if any) and place process 578110577Ssimokawa * on appropriate slpque, but do not put process to sleep. 579110577Ssimokawa */ 580110577Ssimokawa 581110577Ssimokawa s = splhigh(); 582170374Ssimokawa mtx_enter(&sched_lock, MTX_SPIN); 583111942Ssimokawa 584170374Ssimokawa if (p->p_wchan != NULL) 585110577Ssimokawa unsleep(p); 586170374Ssimokawa 587113584Ssimokawa if (ident) { 588110577Ssimokawa p->p_wchan = ident; 589110577Ssimokawa p->p_wmesg = wmesg; 590110577Ssimokawa p->p_slptime = 0; 591110798Ssimokawa p->p_asleep.as_priority = priority; 592110798Ssimokawa p->p_asleep.as_timo = timo; 593110798Ssimokawa TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq); 594170374Ssimokawa } 595170374Ssimokawa 596110798Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 597110798Ssimokawa splx(s); 598170374Ssimokawa 599170374Ssimokawa return(0); 600170374Ssimokawa} 601110798Ssimokawa 602110798Ssimokawa/* 603110798Ssimokawa * await() - wait for async condition to occur. The process blocks until 604110798Ssimokawa * wakeup() is called on the most recent asleep() address. If wakeup is called 605171513Ssimokawa * priority to await(), await() winds up being a NOP. 606170374Ssimokawa * 607171513Ssimokawa * If await() is called more then once (without an intervening asleep() call), 608170374Ssimokawa * await() is still effectively a NOP but it calls mi_switch() to give other 609170374Ssimokawa * processes some cpu before returning. The process is left runnable. 610170374Ssimokawa * 611170374Ssimokawa * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>> 612249291Swill */ 613170374Ssimokawa 614170374Ssimokawaint 615170374Ssimokawaawait(int priority, int timo) 616170374Ssimokawa{ 617171513Ssimokawa struct proc *p = curproc; 618170374Ssimokawa int rval = 0; 619170374Ssimokawa int s; 620170374Ssimokawa 621110798Ssimokawa mtx_enter(&sched_lock, MTX_SPIN); 622110798Ssimokawa 623116376Ssimokawa s = splhigh(); 624116376Ssimokawa 625103285Sikob if (p->p_wchan != NULL) { 626103285Sikob struct callout_handle thandle; 627103285Sikob int sig; 628103285Sikob int catch; 629103285Sikob 630103285Sikob /* 631103285Sikob * The call to await() can override defaults specified in 632103285Sikob * the original asleep(). 633103285Sikob */ 634103285Sikob if (priority < 0) 635103285Sikob priority = p->p_asleep.as_priority; 636103285Sikob if (timo < 0) 637103285Sikob timo = p->p_asleep.as_timo; 638103285Sikob 639103285Sikob /* 640103285Sikob * Install timeout 641103285Sikob */ 642103285Sikob 643103285Sikob if (timo) 644103285Sikob thandle = timeout(endtsleep, (void *)p, timo); 645103285Sikob 646103285Sikob sig = 0; 647103285Sikob catch = priority & PCATCH; 648103285Sikob 649103285Sikob if (catch) { 650103285Sikob p->p_flag |= P_SINTR; 651103285Sikob if ((sig = CURSIG(p))) { 652103285Sikob if (p->p_wchan) 653103285Sikob unsleep(p); 654103285Sikob p->p_stat = SRUN; 655103285Sikob goto resume; 656103285Sikob } 657103285Sikob if (p->p_wchan == NULL) { 658103285Sikob catch = 0; 659103285Sikob goto resume; 660103285Sikob } 661103285Sikob } 662103285Sikob p->p_stat = SSLEEP; 663103285Sikob p->p_stats->p_ru.ru_nvcsw++; 664103285Sikob mi_switch(); 665103285Sikobresume: 666116376Ssimokawa curpriority = p->p_usrpri; 667113584Ssimokawa 668116376Ssimokawa splx(s); 669116376Ssimokawa p->p_flag &= ~P_SINTR; 670116376Ssimokawa if (p->p_flag & P_TIMEOUT) { 671116376Ssimokawa p->p_flag &= ~P_TIMEOUT; 672116376Ssimokawa if (sig == 0) { 673116376Ssimokawa#ifdef KTRACE 674116376Ssimokawa if (KTRPOINT(p, KTR_CSW)) 675116376Ssimokawa ktrcsw(p->p_tracep, 0, 0); 676116376Ssimokawa#endif 677116376Ssimokawa rval = EWOULDBLOCK; 678116376Ssimokawa goto out; 679116376Ssimokawa } 680116376Ssimokawa } else if (timo) 681116376Ssimokawa untimeout(endtsleep, (void *)p, thandle); 682116376Ssimokawa if (catch && (sig != 0 || (sig = CURSIG(p)))) { 683116376Ssimokawa#ifdef KTRACE 684116376Ssimokawa if (KTRPOINT(p, KTR_CSW)) 685116376Ssimokawa ktrcsw(p->p_tracep, 0, 0); 686116376Ssimokawa#endif 687116376Ssimokawa if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 688116376Ssimokawa rval = EINTR; 689189928Ssbruno else 690189928Ssbruno rval = ERESTART; 691116376Ssimokawa goto out; 692116376Ssimokawa } 693116376Ssimokawa#ifdef KTRACE 694116376Ssimokawa if (KTRPOINT(p, KTR_CSW)) 695116376Ssimokawa ktrcsw(p->p_tracep, 0, 0); 696116376Ssimokawa#endif 697116376Ssimokawa } else { 698116376Ssimokawa /* 699116376Ssimokawa * If as_priority is 0, await() has been called without an 700116376Ssimokawa * intervening asleep(). We are still effectively a NOP, 701116376Ssimokawa * but we call mi_switch() for safety. 702116376Ssimokawa */ 703116376Ssimokawa 704116376Ssimokawa if (p->p_asleep.as_priority == 0) { 705116376Ssimokawa p->p_stats->p_ru.ru_nvcsw++; 706116376Ssimokawa mi_switch(); 707116376Ssimokawa } 708116376Ssimokawa splx(s); 709116376Ssimokawa } 710116376Ssimokawa 711116376Ssimokawa /* 712116376Ssimokawa * clear p_asleep.as_priority as an indication that await() has been 713116376Ssimokawa * called. If await() is called again without an intervening asleep(), 714116376Ssimokawa * await() is still effectively a NOP but the above mi_switch() code 715116376Ssimokawa * is triggered as a safety. 716116376Ssimokawa */ 717116376Ssimokawa p->p_asleep.as_priority = 0; 718116376Ssimokawa 719116376Ssimokawaout: 720127468Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 721127468Ssimokawa 722127468Ssimokawa return (rval); 723127468Ssimokawa} 724116376Ssimokawa 725116376Ssimokawa/* 726127468Ssimokawa * Implement timeout for tsleep or asleep()/await() 727193066Sjamie * 728194118Sjamie * If process hasn't been awakened (wchan non-zero), 729193066Sjamie * set timeout flag and undo the sleep. If proc 730116376Ssimokawa * is stopped, just unsleep so it will remain stopped. 731116376Ssimokawa */ 732116376Ssimokawastatic void 733116376Ssimokawaendtsleep(arg) 734116376Ssimokawa void *arg; 735116376Ssimokawa{ 736169117Ssimokawa register struct proc *p; 737116376Ssimokawa int s; 738116376Ssimokawa 739117350Ssimokawa p = (struct proc *)arg; 740116376Ssimokawa CTR4(KTR_PROC, 741189928Ssbruno "endtsleep: proc %p (pid %d, %s), schedlock %p", 742116376Ssimokawa p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 743116376Ssimokawa s = splhigh(); 744187993Ssbruno mtx_enter(&sched_lock, MTX_SPIN); 745187993Ssbruno if (p->p_wchan) { 746116376Ssimokawa if (p->p_stat == SSLEEP) 747187993Ssbruno setrunnable(p); 748169117Ssimokawa else 749116376Ssimokawa unsleep(p); 750187993Ssbruno p->p_flag |= P_TIMEOUT; 751187993Ssbruno } 752187993Ssbruno mtx_exit(&sched_lock, MTX_SPIN); 753187993Ssbruno splx(s); 754116376Ssimokawa} 755116376Ssimokawa 756113584Ssimokawa/* 757113584Ssimokawa * Remove a process from its wait queue 758113584Ssimokawa */ 759113584Ssimokawavoid 760113584Ssimokawaunsleep(p) 761113584Ssimokawa register struct proc *p; 762113584Ssimokawa{ 763113584Ssimokawa int s; 764113584Ssimokawa 765116376Ssimokawa s = splhigh(); 766117350Ssimokawa mtx_enter(&sched_lock, MTX_SPIN); 767189928Ssbruno if (p->p_wchan) { 768189928Ssbruno TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq); 769189928Ssbruno p->p_wchan = 0; 770189928Ssbruno } 771189928Ssbruno mtx_exit(&sched_lock, MTX_SPIN); 772189928Ssbruno splx(s); 773189928Ssbruno} 774189928Ssbruno 775189928Ssbruno/* 776189928Ssbruno * Make all processes sleeping on the specified identifier runnable. 777189928Ssbruno */ 778189928Ssbrunovoid 779189928Ssbrunowakeup(ident) 780189928Ssbruno register void *ident; 781189928Ssbruno{ 782189928Ssbruno register struct slpquehead *qp; 783189928Ssbruno register struct proc *p; 784189928Ssbruno int s; 785189928Ssbruno 786189928Ssbruno s = splhigh(); 787189928Ssbruno mtx_enter(&sched_lock, MTX_SPIN); 788189928Ssbruno qp = &slpque[LOOKUP(ident)]; 789189928Ssbrunorestart: 790189928Ssbruno TAILQ_FOREACH(p, qp, p_procq) { 791189928Ssbruno if (p->p_wchan == ident) { 792103285Sikob TAILQ_REMOVE(qp, p, p_procq); 793106790Ssimokawa p->p_wchan = 0; 794103285Sikob if (p->p_stat == SSLEEP) { 795106790Ssimokawa /* OPTIMIZED EXPANSION OF setrunnable(p); */ 796103285Sikob CTR4(KTR_PROC, 797103285Sikob "wakeup: proc %p (pid %d, %s), schedlock %p", 798106543Ssimokawa p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 799103285Sikob if (p->p_slptime > 1) 800103285Sikob updatepri(p); 801106543Ssimokawa p->p_slptime = 0; 802103285Sikob p->p_stat = SRUN; 803103285Sikob if (p->p_flag & P_INMEM) { 804103285Sikob setrunqueue(p); 805103285Sikob maybe_resched(p); 806103285Sikob } else { 807103285Sikob p->p_flag |= P_SWAPINREQ; 808103285Sikob wakeup((caddr_t)&proc0); 809103285Sikob } 810103285Sikob /* END INLINE EXPANSION */ 811103285Sikob goto restart; 812103285Sikob } 813113584Ssimokawa } 814113584Ssimokawa } 815113584Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 816113584Ssimokawa splx(s); 817103285Sikob} 818103285Sikob 819103285Sikob/* 820103285Sikob * Make a process sleeping on the specified identifier runnable. 821103285Sikob * May wake more than one process if a target process is currently 822103285Sikob * swapped out. 823103285Sikob */ 824103285Sikobvoid 825103285Sikobwakeup_one(ident) 826103285Sikob register void *ident; 827103285Sikob{ 828103285Sikob register struct slpquehead *qp; 829103285Sikob register struct proc *p; 830103285Sikob int s; 831103285Sikob 832103285Sikob s = splhigh(); 833103285Sikob mtx_enter(&sched_lock, MTX_SPIN); 834103285Sikob qp = &slpque[LOOKUP(ident)]; 835103285Sikob 836103285Sikob TAILQ_FOREACH(p, qp, p_procq) { 837103285Sikob if (p->p_wchan == ident) { 838103285Sikob TAILQ_REMOVE(qp, p, p_procq); 839103285Sikob p->p_wchan = 0; 840103285Sikob if (p->p_stat == SSLEEP) { 841103285Sikob /* OPTIMIZED EXPANSION OF setrunnable(p); */ 842103285Sikob CTR4(KTR_PROC, 843103285Sikob "wakeup1: proc %p (pid %d, %s), schedlock %p", 844103285Sikob p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 845103285Sikob if (p->p_slptime > 1) 846103285Sikob updatepri(p); 847187993Ssbruno p->p_slptime = 0; 848103285Sikob p->p_stat = SRUN; 849103285Sikob if (p->p_flag & P_INMEM) { 850103285Sikob setrunqueue(p); 851103285Sikob maybe_resched(p); 852103285Sikob break; 853110193Ssimokawa } else { 854103285Sikob p->p_flag |= P_SWAPINREQ; 855103285Sikob wakeup((caddr_t)&proc0); 856103285Sikob } 857103285Sikob /* END INLINE EXPANSION */ 858103285Sikob } 859103285Sikob } 860103285Sikob } 861103285Sikob mtx_exit(&sched_lock, MTX_SPIN); 862103285Sikob splx(s); 863103285Sikob} 864103285Sikob 865103285Sikob/* 866103285Sikob * The machine independent parts of mi_switch(). 867103285Sikob * Must be called at splstatclock() or higher. 868103285Sikob */ 869103285Sikobvoid 870103285Sikobmi_switch() 871103285Sikob{ 872103285Sikob struct timeval new_switchtime; 873103285Sikob register struct proc *p = curproc; /* XXX */ 874103285Sikob register struct rlimit *rlim; 875103285Sikob int giantreleased; 876116376Ssimokawa int x; 877103285Sikob WITNESS_SAVE_DECL(Giant); 878106543Ssimokawa 879103285Sikob /* 880103285Sikob * XXX this spl is almost unnecessary. It is partly to allow for 881103285Sikob * sloppy callers that don't do it (issignal() via CURSIG() is the 882110195Ssimokawa * main offender). It is partly to work around a bug in the i386 883103285Sikob * cpu_switch() (the ipl is not preserved). We ran for years 884103285Sikob * without it. I think there was only a interrupt latency problem. 885139680Sjmg * The main caller, tsleep(), does an splx() a couple of instructions 886103285Sikob * after calling here. The buggy caller, issignal(), usually calls 887167632Ssimokawa * here at spl0() and sometimes returns at splhigh(). The process 888103285Sikob * then runs for a little too long at splhigh(). The ipl gets fixed 889103285Sikob * when the process returns to user mode (or earlier). 890103285Sikob * 891103285Sikob * It would probably be better to always call here at spl0(). Callers 892103285Sikob * are prepared to give up control to another process, so they must 893103285Sikob * be prepared to be interrupted. The clock stuff here may not 894103285Sikob * actually need splstatclock(). 895103285Sikob */ 896106543Ssimokawa x = splstatclock(); 897103285Sikob 898106790Ssimokawa CTR4(KTR_PROC, "mi_switch: old proc %p (pid %d, %s), schedlock %p", 899120660Ssimokawa p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 900120660Ssimokawa mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 901120660Ssimokawa 902103285Sikob WITNESS_SAVE(&Giant, Giant); 903129541Sdfr for (giantreleased = 0; mtx_owned(&Giant); giantreleased++) 904103285Sikob mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH); 905106813Ssimokawa 906129585Sdfr#ifdef SIMPLELOCK_DEBUG 907103285Sikob if (p->p_simple_locks) 908120660Ssimokawa printf("sleep: holding simple lock\n"); 909170374Ssimokawa#endif 910120660Ssimokawa /* 911120660Ssimokawa * Compute the amount of time during which the current 912170374Ssimokawa * process was running, and add that to its total so far. 913120660Ssimokawa */ 914170374Ssimokawa microuptime(&new_switchtime); 915170374Ssimokawa if (timevalcmp(&new_switchtime, &switchtime, <)) { 916170374Ssimokawa printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 917170374Ssimokawa switchtime.tv_sec, switchtime.tv_usec, 918170374Ssimokawa new_switchtime.tv_sec, new_switchtime.tv_usec); 919170374Ssimokawa new_switchtime = switchtime; 920103285Sikob } else { 921106790Ssimokawa p->p_runtime += (new_switchtime.tv_usec - switchtime.tv_usec) + 922103285Sikob (new_switchtime.tv_sec - switchtime.tv_sec) * (int64_t)1000000; 923103285Sikob } 924103285Sikob 925106790Ssimokawa /* 926106790Ssimokawa * Check if the process exceeds its cpu resource allocation. 927103285Sikob * If over max, kill it. 928120660Ssimokawa * 929170374Ssimokawa * XXX drop sched_lock, pickup Giant 930120660Ssimokawa */ 931120660Ssimokawa if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && 932127468Ssimokawa p->p_runtime > p->p_limit->p_cpulimit) { 933120660Ssimokawa rlim = &p->p_rlimit[RLIMIT_CPU]; 934120660Ssimokawa if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { 935120660Ssimokawa killproc(p, "exceeded maximum CPU limit"); 936170374Ssimokawa } else { 937120660Ssimokawa psignal(p, SIGXCPU); 938120660Ssimokawa if (rlim->rlim_cur < rlim->rlim_max) { 939120660Ssimokawa /* XXX: we should make a private copy */ 940120660Ssimokawa rlim->rlim_cur += 5; 941120660Ssimokawa } 942170374Ssimokawa } 943103285Sikob } 944170374Ssimokawa 945120660Ssimokawa /* 946170374Ssimokawa * Pick a new current process and record its start time. 947170374Ssimokawa */ 948170374Ssimokawa cnt.v_swtch++; 949103285Sikob switchtime = new_switchtime; 950170374Ssimokawa CTR4(KTR_PROC, "mi_switch: old proc %p (pid %d, %s), schedlock %p", 951170374Ssimokawa p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 952103285Sikob cpu_switch(); 953103285Sikob CTR4(KTR_PROC, "mi_switch: new proc %p (pid %d, %s), schedlock %p", 954103285Sikob p, p->p_pid, p->p_comm, (void *) sched_lock.mtx_lock); 955103285Sikob if (switchtime.tv_sec == 0) 956103285Sikob microuptime(&switchtime); 957106790Ssimokawa switchticks = ticks; 958106790Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 959103285Sikob while (giantreleased--) 960120660Ssimokawa mtx_enter(&Giant, MTX_DEF); 961120660Ssimokawa WITNESS_RESTORE(&Giant, Giant); 962120660Ssimokawa 963120660Ssimokawa splx(x); 964103285Sikob} 965103285Sikob 966103285Sikob/* 967170374Ssimokawa * Change process state to be runnable, 968120660Ssimokawa * placing it on the run queue if it is in memory, 969120660Ssimokawa * and awakening the swapper if it isn't in memory. 970120660Ssimokawa */ 971120660Ssimokawavoid 972120660Ssimokawasetrunnable(p) 973120660Ssimokawa register struct proc *p; 974129541Sdfr{ 975170374Ssimokawa register int s; 976120660Ssimokawa 977120660Ssimokawa s = splhigh(); 978120660Ssimokawa mtx_enter(&sched_lock, MTX_SPIN); 979120660Ssimokawa switch (p->p_stat) { 980113584Ssimokawa case 0: 981113584Ssimokawa case SRUN: 982113584Ssimokawa case SZOMB: 983113584Ssimokawa case SWAIT: 984113584Ssimokawa default: 985113584Ssimokawa panic("setrunnable"); 986120660Ssimokawa case SSTOP: 987170374Ssimokawa case SSLEEP: 988113584Ssimokawa unsleep(p); /* e.g. when sending signals */ 989103285Sikob break; 990103285Sikob 991103285Sikob case SIDL: 992103285Sikob break; 993169130Ssimokawa } 994169130Ssimokawa p->p_stat = SRUN; 995169130Ssimokawa if (p->p_flag & P_INMEM) 996169130Ssimokawa setrunqueue(p); 997169130Ssimokawa splx(s); 998169130Ssimokawa if (p->p_slptime > 1) 999169130Ssimokawa updatepri(p); 1000169130Ssimokawa p->p_slptime = 0; 1001169130Ssimokawa if ((p->p_flag & P_INMEM) == 0) { 1002169130Ssimokawa p->p_flag |= P_SWAPINREQ; 1003169130Ssimokawa wakeup((caddr_t)&proc0); 1004169130Ssimokawa } 1005169130Ssimokawa else 1006169130Ssimokawa maybe_resched(p); 1007169130Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 1008169130Ssimokawa} 1009169130Ssimokawa 1010169130Ssimokawa/* 1011169130Ssimokawa * Compute the priority of a process when running in user mode. 1012169130Ssimokawa * Arrange to reschedule if the resulting priority is better 1013169130Ssimokawa * than that of the current process. 1014169130Ssimokawa */ 1015169130Ssimokawavoid 1016169130Ssimokawaresetpriority(p) 1017169130Ssimokawa register struct proc *p; 1018169130Ssimokawa{ 1019169130Ssimokawa register unsigned int newpriority; 1020169130Ssimokawa 1021169130Ssimokawa mtx_enter(&sched_lock, MTX_SPIN); 1022169130Ssimokawa if (p->p_rtprio.type == RTP_PRIO_NORMAL) { 1023169130Ssimokawa newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + 1024169130Ssimokawa NICE_WEIGHT * (p->p_nice - PRIO_MIN); 1025169130Ssimokawa newpriority = min(newpriority, MAXPRI); 1026170374Ssimokawa p->p_usrpri = newpriority; 1027170374Ssimokawa } 1028170374Ssimokawa maybe_resched(p); 1029170374Ssimokawa mtx_exit(&sched_lock, MTX_SPIN); 1030170374Ssimokawa} 1031170374Ssimokawa 1032170374Ssimokawa/* ARGSUSED */ 1033170374Ssimokawastatic void 1034170374Ssimokawasched_setup(dummy) 1035170374Ssimokawa void *dummy; 1036170374Ssimokawa{ 1037170374Ssimokawa /* Kick off timeout driven events by calling first time. */ 1038170374Ssimokawa roundrobin(NULL); 1039169130Ssimokawa schedcpu(NULL); 1040103285Sikob} 1041103285Sikob 1042103285Sikob/* 1043106790Ssimokawa * We adjust the priority of the current process. The priority of 1044106790Ssimokawa * a process gets worse as it accumulates CPU time. The cpu usage 1045103285Sikob * estimator (p_estcpu) is increased here. resetpriority() will 1046169119Ssimokawa * compute a different priority each time p_estcpu increases by 1047169119Ssimokawa * INVERSE_ESTCPU_WEIGHT 1048103285Sikob * (until MAXPRI is reached). The cpu usage estimator ramps up 1049169119Ssimokawa * quite quickly when the process is running (linearly), and decays 1050171513Ssimokawa * away exponentially, at a rate which is proportionally slower when 1051249291Swill * the system is busy. The basic principle is that the system will 1052249291Swill * 90% forget that the process used a lot of CPU time in 5 * loadav 1053249291Swill * seconds. This causes the system to favor processes which haven't 1054249291Swill * run much recently, and to round-robin among other processes. 1055169119Ssimokawa */ 1056169119Ssimokawavoid 1057169119Ssimokawaschedclock(p) 1058169119Ssimokawa struct proc *p; 1059169119Ssimokawa{ 1060170374Ssimokawa 1061170374Ssimokawa p->p_cpticks++; 1062170374Ssimokawa p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 1063170374Ssimokawa if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 1064170374Ssimokawa resetpriority(p); 1065170374Ssimokawa if (p->p_priority >= PUSER) 1066171513Ssimokawa p->p_priority = p->p_usrpri; 1067169119Ssimokawa } 1068169119Ssimokawa} 1069103285Sikob