kern_synch.c revision 82096
1298948Sadrian/*- 2298948Sadrian * Copyright (c) 1982, 1986, 1990, 1991, 1993 3298948Sadrian * The Regents of the University of California. All rights reserved. 4298948Sadrian * (c) UNIX System Laboratories, Inc. 5298948Sadrian * All or some portions of this file are derived from material licensed 6298948Sadrian * to the University of California by American Telephone and Telegraph 7298948Sadrian * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8298948Sadrian * the permission of UNIX System Laboratories, Inc. 9298948Sadrian * 10298948Sadrian * Redistribution and use in source and binary forms, with or without 11298948Sadrian * modification, are permitted provided that the following conditions 12298948Sadrian * are met: 13298948Sadrian * 1. Redistributions of source code must retain the above copyright 14298948Sadrian * notice, this list of conditions and the following disclaimer. 15298948Sadrian * 2. Redistributions in binary form must reproduce the above copyright 16298948Sadrian * notice, this list of conditions and the following disclaimer in the 17298948Sadrian * documentation and/or other materials provided with the distribution. 18298948Sadrian * 3. All advertising materials mentioning features or use of this software 19298948Sadrian * must display the following acknowledgement: 20298948Sadrian * This product includes software developed by the University of 21298948Sadrian * California, Berkeley and its contributors. 22298948Sadrian * 4. Neither the name of the University nor the names of its contributors 23298948Sadrian * may be used to endorse or promote products derived from this software 24298948Sadrian * without specific prior written permission. 25298948Sadrian * 26298948Sadrian * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27298948Sadrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28298948Sadrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29298948Sadrian * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30298948Sadrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31298948Sadrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32298948Sadrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33298948Sadrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34298948Sadrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35298948Sadrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36298948Sadrian * SUCH DAMAGE. 37298948Sadrian * 38298948Sadrian * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 39298948Sadrian * $FreeBSD: head/sys/kern/kern_synch.c 82096 2001-08-21 20:09:05Z jhb $ 40298948Sadrian */ 41298948Sadrian 42298948Sadrian#include "opt_ddb.h" 43298948Sadrian#include "opt_ktrace.h" 44298948Sadrian 45298948Sadrian#include <sys/param.h> 46298948Sadrian#include <sys/systm.h> 47298948Sadrian#include <sys/condvar.h> 48298948Sadrian#include <sys/kernel.h> 49298948Sadrian#include <sys/ktr.h> 50298948Sadrian#include <sys/lock.h> 51298948Sadrian#include <sys/mutex.h> 52298948Sadrian#include <sys/proc.h> 53298948Sadrian#include <sys/resourcevar.h> 54298948Sadrian#include <sys/signalvar.h> 55298948Sadrian#include <sys/smp.h> 56298948Sadrian#include <sys/sx.h> 57298948Sadrian#include <sys/sysctl.h> 58298948Sadrian#include <sys/sysproto.h> 59298948Sadrian#include <sys/vmmeter.h> 60298948Sadrian#include <vm/vm.h> 61298948Sadrian#include <vm/vm_extern.h> 62298948Sadrian#ifdef DDB 63298948Sadrian#include <ddb/ddb.h> 64298948Sadrian#endif 65298948Sadrian#ifdef KTRACE 66298948Sadrian#include <sys/uio.h> 67298948Sadrian#include <sys/ktrace.h> 68298948Sadrian#endif 69298948Sadrian 70298948Sadrian#include <machine/cpu.h> 71298948Sadrian 72298948Sadrianstatic void sched_setup __P((void *dummy)); 73298948SadrianSYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) 74298948Sadrian 75298948Sadrianint hogticks; 76298948Sadrianint lbolt; 77298948Sadrianint sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 78298948Sadrian 79298948Sadrianstatic struct callout schedcpu_callout; 80298948Sadrianstatic struct callout roundrobin_callout; 81298948Sadrian 82298948Sadrianstatic void endtsleep __P((void *)); 83298948Sadrianstatic void roundrobin __P((void *arg)); 84298948Sadrianstatic void schedcpu __P((void *arg)); 85298948Sadrian 86298948Sadrianstatic int 87298948Sadriansysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 88298948Sadrian{ 89298948Sadrian int error, new_val; 90298948Sadrian 91298948Sadrian new_val = sched_quantum * tick; 92298948Sadrian error = sysctl_handle_int(oidp, &new_val, 0, req); 93298948Sadrian if (error != 0 || req->newptr == NULL) 94298948Sadrian return (error); 95298948Sadrian if (new_val < tick) 96298948Sadrian return (EINVAL); 97298948Sadrian sched_quantum = new_val / tick; 98298948Sadrian hogticks = 2 * sched_quantum; 99298948Sadrian return (0); 100298948Sadrian} 101298948Sadrian 102298948SadrianSYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW, 103298948Sadrian 0, sizeof sched_quantum, sysctl_kern_quantum, "I", ""); 104298948Sadrian 105298948Sadrian/* 106298948Sadrian * Arrange to reschedule if necessary, taking the priorities and 107298948Sadrian * schedulers into account. 108298948Sadrian */ 109298948Sadrianvoid 110298948Sadrianmaybe_resched(p) 111298948Sadrian struct proc *p; 112298948Sadrian{ 113298948Sadrian 114298948Sadrian mtx_assert(&sched_lock, MA_OWNED); 115298948Sadrian if (p->p_pri.pri_level < curproc->p_pri.pri_level) 116298948Sadrian curproc->p_sflag |= PS_NEEDRESCHED; 117298948Sadrian} 118298948Sadrian 119298948Sadrianint 120298948Sadrianroundrobin_interval(void) 121298948Sadrian{ 122298948Sadrian return (sched_quantum); 123298948Sadrian} 124298948Sadrian 125298948Sadrian/* 126298948Sadrian * Force switch among equal priority processes every 100ms. 127298948Sadrian * We don't actually need to force a context switch of the current process. 128298948Sadrian * The act of firing the event triggers a context switch to softclock() and 129298948Sadrian * then switching back out again which is equivalent to a preemption, thus 130298948Sadrian * no further work is needed on the local CPU. 131298948Sadrian */ 132298948Sadrian/* ARGSUSED */ 133298948Sadrianstatic void 134298948Sadrianroundrobin(arg) 135298948Sadrian void *arg; 136298948Sadrian{ 137298948Sadrian 138298948Sadrian#ifdef SMP 139298948Sadrian mtx_lock_spin(&sched_lock); 140298948Sadrian forward_roundrobin(); 141298948Sadrian mtx_unlock_spin(&sched_lock); 142298948Sadrian#endif 143298948Sadrian 144298948Sadrian callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); 145298948Sadrian} 146298948Sadrian 147298948Sadrian/* 148298948Sadrian * Constants for digital decay and forget: 149298948Sadrian * 90% of (p_estcpu) usage in 5 * loadav time 150298948Sadrian * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 151298948Sadrian * Note that, as ps(1) mentions, this can let percentages 152298948Sadrian * total over 100% (I've seen 137.9% for 3 processes). 153298948Sadrian * 154298948Sadrian * Note that schedclock() updates p_estcpu and p_cpticks asynchronously. 155298948Sadrian * 156298948Sadrian * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 157298948Sadrian * That is, the system wants to compute a value of decay such 158298948Sadrian * that the following for loop: 159298948Sadrian * for (i = 0; i < (5 * loadavg); i++) 160298948Sadrian * p_estcpu *= decay; 161298948Sadrian * will compute 162298948Sadrian * p_estcpu *= 0.1; 163298948Sadrian * for all values of loadavg: 164298948Sadrian * 165298948Sadrian * Mathematically this loop can be expressed by saying: 166298948Sadrian * decay ** (5 * loadavg) ~= .1 167298948Sadrian * 168298948Sadrian * The system computes decay as: 169298948Sadrian * decay = (2 * loadavg) / (2 * loadavg + 1) 170298948Sadrian * 171298948Sadrian * We wish to prove that the system's computation of decay 172298948Sadrian * will always fulfill the equation: 173298948Sadrian * decay ** (5 * loadavg) ~= .1 174298948Sadrian * 175298948Sadrian * If we compute b as: 176298948Sadrian * b = 2 * loadavg 177298948Sadrian * then 178298948Sadrian * decay = b / (b + 1) 179298948Sadrian * 180298948Sadrian * We now need to prove two things: 181298948Sadrian * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 182298948Sadrian * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 183298948Sadrian * 184298948Sadrian * Facts: 185298948Sadrian * For x close to zero, exp(x) =~ 1 + x, since 186298948Sadrian * exp(x) = 0! + x**1/1! + x**2/2! + ... . 187298948Sadrian * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 188298948Sadrian * For x close to zero, ln(1+x) =~ x, since 189298948Sadrian * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 190298948Sadrian * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 191298948Sadrian * ln(.1) =~ -2.30 192298948Sadrian * 193298948Sadrian * Proof of (1): 194298948Sadrian * Solve (factor)**(power) =~ .1 given power (5*loadav): 195298948Sadrian * solving for factor, 196298948Sadrian * ln(factor) =~ (-2.30/5*loadav), or 197298948Sadrian * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 198298948Sadrian * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 199298948Sadrian * 200298948Sadrian * Proof of (2): 201298948Sadrian * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 202298948Sadrian * solving for power, 203298948Sadrian * power*ln(b/(b+1)) =~ -2.30, or 204298948Sadrian * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 205298948Sadrian * 206298948Sadrian * Actual power values for the implemented algorithm are as follows: 207298948Sadrian * loadav: 1 2 3 4 208298948Sadrian * power: 5.68 10.32 14.94 19.55 209298948Sadrian */ 210298948Sadrian 211298948Sadrian/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 212298948Sadrian#define loadfactor(loadav) (2 * (loadav)) 213298948Sadrian#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 214298948Sadrian 215298948Sadrian/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 216298948Sadrianstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 217298948SadrianSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 218298948Sadrian 219298948Sadrian/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */ 220298948Sadrianstatic int fscale __unused = FSCALE; 221298948SadrianSYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); 222298948Sadrian 223298948Sadrian/* 224298948Sadrian * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 225298948Sadrian * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 226298948Sadrian * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 227298948Sadrian * 228298948Sadrian * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 229298948Sadrian * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 230298948Sadrian * 231298948Sadrian * If you don't want to bother with the faster/more-accurate formula, you 232298948Sadrian * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 233298948Sadrian * (more general) method of calculating the %age of CPU used by a process. 234298948Sadrian */ 235298948Sadrian#define CCPU_SHIFT 11 236298948Sadrian 237298948Sadrian/* 238298948Sadrian * Recompute process priorities, every hz ticks. 239298948Sadrian * MP-safe, called without the Giant mutex. 240298948Sadrian */ 241298948Sadrian/* ARGSUSED */ 242298948Sadrianstatic void 243298948Sadrianschedcpu(arg) 244298948Sadrian void *arg; 245298948Sadrian{ 246298948Sadrian register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 247298948Sadrian register struct proc *p; 248298948Sadrian register int realstathz; 249298948Sadrian 250298948Sadrian realstathz = stathz ? stathz : hz; 251298948Sadrian sx_slock(&allproc_lock); 252298948Sadrian LIST_FOREACH(p, &allproc, p_list) { 253298948Sadrian /* 254298948Sadrian * Increment time in/out of memory and sleep time 255298948Sadrian * (if sleeping). We ignore overflow; with 16-bit int's 256298948Sadrian * (remember them?) overflow takes 45 days. 257298948Sadrian */ 258298948Sadrian mtx_lock_spin(&sched_lock); 259298948Sadrian p->p_swtime++; 260298948Sadrian if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 261298948Sadrian p->p_slptime++; 262298948Sadrian p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 263298948Sadrian /* 264298948Sadrian * If the process has slept the entire second, 265298948Sadrian * stop recalculating its priority until it wakes up. 266298948Sadrian */ 267298948Sadrian if (p->p_slptime > 1) { 268298948Sadrian mtx_unlock_spin(&sched_lock); 269298948Sadrian continue; 270298948Sadrian } 271298948Sadrian 272298948Sadrian /* 273298948Sadrian * p_pctcpu is only for ps. 274298948Sadrian */ 275298948Sadrian#if (FSHIFT >= CCPU_SHIFT) 276298948Sadrian p->p_pctcpu += (realstathz == 100)? 277298948Sadrian ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 278298948Sadrian 100 * (((fixpt_t) p->p_cpticks) 279298948Sadrian << (FSHIFT - CCPU_SHIFT)) / realstathz; 280298948Sadrian#else 281298948Sadrian p->p_pctcpu += ((FSCALE - ccpu) * 282298948Sadrian (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT; 283298948Sadrian#endif 284298948Sadrian p->p_cpticks = 0; 285298948Sadrian p->p_estcpu = decay_cpu(loadfac, p->p_estcpu); 286298948Sadrian resetpriority(p); 287298948Sadrian if (p->p_pri.pri_level >= PUSER) { 288298948Sadrian if (p->p_oncpu == NOCPU && /* idle */ 289298948Sadrian p->p_stat == SRUN && 290298948Sadrian (p->p_sflag & PS_INMEM) && 291298948Sadrian (p->p_pri.pri_level / RQ_PPQ) != 292298948Sadrian (p->p_pri.pri_user / RQ_PPQ)) { 293298948Sadrian remrunqueue(p); 294298948Sadrian p->p_pri.pri_level = p->p_pri.pri_user; 295298948Sadrian setrunqueue(p); 296298948Sadrian } else 297298948Sadrian p->p_pri.pri_level = p->p_pri.pri_user; 298298948Sadrian } 299298948Sadrian mtx_unlock_spin(&sched_lock); 300298948Sadrian } 301298948Sadrian sx_sunlock(&allproc_lock); 302298948Sadrian vmmeter(); 303298948Sadrian wakeup((caddr_t)&lbolt); 304298948Sadrian callout_reset(&schedcpu_callout, hz, schedcpu, NULL); 305298948Sadrian} 306298948Sadrian 307298948Sadrian/* 308298948Sadrian * Recalculate the priority of a process after it has slept for a while. 309298948Sadrian * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 310298948Sadrian * least six times the loadfactor will decay p_estcpu to zero. 311298948Sadrian */ 312298948Sadrianvoid 313298948Sadrianupdatepri(p) 314298948Sadrian register struct proc *p; 315298948Sadrian{ 316298948Sadrian register unsigned int newcpu = p->p_estcpu; 317298948Sadrian register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 318298948Sadrian 319298948Sadrian if (p->p_slptime > 5 * loadfac) 320298948Sadrian p->p_estcpu = 0; 321298948Sadrian else { 322298948Sadrian p->p_slptime--; /* the first time was done in schedcpu */ 323298948Sadrian while (newcpu && --p->p_slptime) 324298948Sadrian newcpu = decay_cpu(loadfac, newcpu); 325298948Sadrian p->p_estcpu = newcpu; 326298948Sadrian } 327298948Sadrian resetpriority(p); 328298948Sadrian} 329298948Sadrian 330298948Sadrian/* 331298948Sadrian * We're only looking at 7 bits of the address; everything is 332298948Sadrian * aligned to 4, lots of things are aligned to greater powers 333298948Sadrian * of 2. Shift right by 8, i.e. drop the bottom 256 worth. 334298948Sadrian */ 335298948Sadrian#define TABLESIZE 128 336298948Sadrianstatic TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE]; 337298948Sadrian#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1)) 338298948Sadrian 339298948Sadrianvoid 340298948Sadriansleepinit(void) 341298948Sadrian{ 342298948Sadrian int i; 343298948Sadrian 344298948Sadrian sched_quantum = hz/10; 345298948Sadrian hogticks = 2 * sched_quantum; 346298948Sadrian for (i = 0; i < TABLESIZE; i++) 347298948Sadrian TAILQ_INIT(&slpque[i]); 348298948Sadrian} 349298948Sadrian 350298948Sadrian/* 351298948Sadrian * General sleep call. Suspends the current process until a wakeup is 352298948Sadrian * performed on the specified identifier. The process will then be made 353298948Sadrian * runnable with the specified priority. Sleeps at most timo/hz seconds 354298948Sadrian * (0 means no timeout). If pri includes PCATCH flag, signals are checked 355298948Sadrian * before and after sleeping, else signals are not checked. Returns 0 if 356298948Sadrian * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 357298948Sadrian * signal needs to be delivered, ERESTART is returned if the current system 358298948Sadrian * call should be restarted if possible, and EINTR is returned if the system 359298948Sadrian * call should be interrupted by the signal (return EINTR). 360298948Sadrian * 361298948Sadrian * The mutex argument is exited before the caller is suspended, and 362298948Sadrian * entered before msleep returns. If priority includes the PDROP 363298948Sadrian * flag the mutex is not entered before returning. 364298948Sadrian */ 365298948Sadrianint 366298948Sadrianmsleep(ident, mtx, priority, wmesg, timo) 367298948Sadrian void *ident; 368298948Sadrian struct mtx *mtx; 369298948Sadrian int priority, timo; 370298948Sadrian const char *wmesg; 371298948Sadrian{ 372298948Sadrian struct proc *p = curproc; 373298948Sadrian int sig, catch = priority & PCATCH; 374298948Sadrian int rval = 0; 375298948Sadrian WITNESS_SAVE_DECL(mtx); 376298948Sadrian 377298948Sadrian#ifdef KTRACE 378298948Sadrian if (p && KTRPOINT(p, KTR_CSW)) 379298948Sadrian ktrcsw(p->p_tracep, 1, 0); 380298948Sadrian#endif 381298948Sadrian WITNESS_SLEEP(0, &mtx->mtx_object); 382298948Sadrian KASSERT(timo != 0 || mtx_owned(&Giant) || mtx != NULL, 383298948Sadrian ("sleeping without a mutex")); 384298948Sadrian mtx_lock_spin(&sched_lock); 385298948Sadrian if (cold || panicstr) { 386298948Sadrian /* 387298948Sadrian * After a panic, or during autoconfiguration, 388298948Sadrian * just give interrupts a chance, then just return; 389298948Sadrian * don't run any other procs or panic below, 390298948Sadrian * in case this is the idle process and already asleep. 391298948Sadrian */ 392298948Sadrian if (mtx != NULL && priority & PDROP) 393298948Sadrian mtx_unlock_flags(mtx, MTX_NOSWITCH); 394298948Sadrian mtx_unlock_spin(&sched_lock); 395298948Sadrian return (0); 396298948Sadrian } 397298948Sadrian 398298948Sadrian DROP_GIANT_NOSWITCH(); 399298948Sadrian 400298948Sadrian if (mtx != NULL) { 401298948Sadrian mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); 402298948Sadrian WITNESS_SAVE(&mtx->mtx_object, mtx); 403298948Sadrian mtx_unlock_flags(mtx, MTX_NOSWITCH); 404298948Sadrian if (priority & PDROP) 405298948Sadrian mtx = NULL; 406298948Sadrian } 407298948Sadrian 408298948Sadrian KASSERT(p != NULL, ("msleep1")); 409298948Sadrian KASSERT(ident != NULL && p->p_stat == SRUN, ("msleep")); 410298948Sadrian 411298948Sadrian p->p_wchan = ident; 412298948Sadrian p->p_wmesg = wmesg; 413298948Sadrian p->p_slptime = 0; 414298948Sadrian p->p_pri.pri_level = priority & PRIMASK; 415298948Sadrian CTR5(KTR_PROC, "msleep: proc %p (pid %d, %s) on %s (%p)", p, p->p_pid, 416298948Sadrian p->p_comm, wmesg, ident); 417298948Sadrian TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq); 418298948Sadrian if (timo) 419298948Sadrian callout_reset(&p->p_slpcallout, timo, endtsleep, p); 420298948Sadrian /* 421298948Sadrian * We put ourselves on the sleep queue and start our timeout 422298948Sadrian * before calling CURSIG, as we could stop there, and a wakeup 423298948Sadrian * or a SIGCONT (or both) could occur while we were stopped. 424298948Sadrian * A SIGCONT would cause us to be marked as SSLEEP 425298948Sadrian * without resuming us, thus we must be ready for sleep 426298948Sadrian * when CURSIG is called. If the wakeup happens while we're 427298948Sadrian * stopped, p->p_wchan will be 0 upon return from CURSIG. 428298948Sadrian */ 429298948Sadrian if (catch) { 430298948Sadrian CTR3(KTR_PROC, "msleep caught: proc %p (pid %d, %s)", p, 431298948Sadrian p->p_pid, p->p_comm); 432298948Sadrian p->p_sflag |= PS_SINTR; 433298948Sadrian mtx_unlock_spin(&sched_lock); 434298948Sadrian PROC_LOCK(p); 435298948Sadrian sig = CURSIG(p); 436298948Sadrian mtx_lock_spin(&sched_lock); 437298948Sadrian PROC_UNLOCK_NOSWITCH(p); 438298948Sadrian if (sig != 0) { 439298948Sadrian if (p->p_wchan != NULL) 440298948Sadrian unsleep(p); 441298948Sadrian } else if (p->p_wchan == NULL) 442298948Sadrian catch = 0; 443298948Sadrian } else 444298948Sadrian sig = 0; 445298948Sadrian if (p->p_wchan != NULL) { 446298948Sadrian p->p_stat = SSLEEP; 447298948Sadrian p->p_stats->p_ru.ru_nvcsw++; 448298948Sadrian mi_switch(); 449298948Sadrian } 450298948Sadrian CTR3(KTR_PROC, "msleep resume: proc %p (pid %d, %s)", p, p->p_pid, 451298948Sadrian p->p_comm); 452298948Sadrian KASSERT(p->p_stat == SRUN, ("running but not SRUN")); 453298948Sadrian p->p_sflag &= ~PS_SINTR; 454298948Sadrian if (p->p_sflag & PS_TIMEOUT) { 455298948Sadrian p->p_sflag &= ~PS_TIMEOUT; 456298948Sadrian if (sig == 0) 457298948Sadrian rval = EWOULDBLOCK; 458298948Sadrian } else if (p->p_sflag & PS_TIMOFAIL) 459298948Sadrian p->p_sflag &= ~PS_TIMOFAIL; 460298948Sadrian else if (timo && callout_stop(&p->p_slpcallout) == 0) { 461298948Sadrian /* 462298948Sadrian * This isn't supposed to be pretty. If we are here, then 463298948Sadrian * the endtsleep() callout is currently executing on another 464298948Sadrian * CPU and is either spinning on the sched_lock or will be 465298948Sadrian * soon. If we don't synchronize here, there is a chance 466298948Sadrian * that this process may msleep() again before the callout 467298948Sadrian * has a chance to run and the callout may end up waking up 468298948Sadrian * the wrong msleep(). Yuck. 469298948Sadrian */ 470298948Sadrian p->p_sflag |= PS_TIMEOUT; 471298948Sadrian p->p_stats->p_ru.ru_nivcsw++; 472298948Sadrian mi_switch(); 473298948Sadrian } 474298948Sadrian mtx_unlock_spin(&sched_lock); 475298948Sadrian 476298948Sadrian if (rval == 0 && catch) { 477298948Sadrian PROC_LOCK(p); 478298948Sadrian /* XXX: shouldn't we always be calling CURSIG() */ 479298948Sadrian if (sig != 0 || (sig = CURSIG(p))) { 480298948Sadrian if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 481298948Sadrian rval = EINTR; 482298948Sadrian else 483298948Sadrian rval = ERESTART; 484298948Sadrian } 485298948Sadrian PROC_UNLOCK(p); 486298948Sadrian } 487298948Sadrian PICKUP_GIANT(); 488298948Sadrian#ifdef KTRACE 489298948Sadrian mtx_lock(&Giant); 490298948Sadrian if (KTRPOINT(p, KTR_CSW)) 491298948Sadrian ktrcsw(p->p_tracep, 0, 0); 492298948Sadrian mtx_unlock(&Giant); 493298948Sadrian#endif 494298948Sadrian if (mtx != NULL) { 495298948Sadrian mtx_lock(mtx); 496298948Sadrian WITNESS_RESTORE(&mtx->mtx_object, mtx); 497298948Sadrian } 498298948Sadrian return (rval); 499298948Sadrian} 500298948Sadrian 501298948Sadrian/* 502298948Sadrian * Implement timeout for msleep() 503298948Sadrian * 504298948Sadrian * If process hasn't been awakened (wchan non-zero), 505298948Sadrian * set timeout flag and undo the sleep. If proc 506298948Sadrian * is stopped, just unsleep so it will remain stopped. 507298948Sadrian * MP-safe, called without the Giant mutex. 508298948Sadrian */ 509298948Sadrianstatic void 510298948Sadrianendtsleep(arg) 511298948Sadrian void *arg; 512298948Sadrian{ 513298948Sadrian register struct proc *p; 514298948Sadrian 515298948Sadrian p = (struct proc *)arg; 516298948Sadrian CTR3(KTR_PROC, "endtsleep: proc %p (pid %d, %s)", p, p->p_pid, 517298948Sadrian p->p_comm); 518298948Sadrian mtx_lock_spin(&sched_lock); 519298948Sadrian /* 520298948Sadrian * This is the other half of the synchronization with msleep() 521298948Sadrian * described above. If the PS_TIMEOUT flag is set, we lost the 522298948Sadrian * race and just need to put the process back on the runqueue. 523298948Sadrian */ 524298948Sadrian if ((p->p_sflag & PS_TIMEOUT) != 0) { 525298948Sadrian p->p_sflag &= ~PS_TIMEOUT; 526298948Sadrian setrunqueue(p); 527298948Sadrian } else if (p->p_wchan != NULL) { 528298948Sadrian if (p->p_stat == SSLEEP) 529298948Sadrian setrunnable(p); 530298948Sadrian else 531298948Sadrian unsleep(p); 532298948Sadrian p->p_sflag |= PS_TIMEOUT; 533298948Sadrian } else 534298948Sadrian p->p_sflag |= PS_TIMOFAIL; 535298948Sadrian mtx_unlock_spin(&sched_lock); 536298948Sadrian} 537298948Sadrian 538298948Sadrian/* 539298948Sadrian * Remove a process from its wait queue 540298948Sadrian */ 541298948Sadrianvoid 542298948Sadrianunsleep(p) 543298948Sadrian register struct proc *p; 544298948Sadrian{ 545298948Sadrian 546298948Sadrian mtx_lock_spin(&sched_lock); 547298948Sadrian if (p->p_wchan != NULL) { 548298948Sadrian TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq); 549298948Sadrian p->p_wchan = NULL; 550298948Sadrian } 551298948Sadrian mtx_unlock_spin(&sched_lock); 552298948Sadrian} 553298948Sadrian 554298948Sadrian/* 555298948Sadrian * Make all processes sleeping on the specified identifier runnable. 556298948Sadrian */ 557298948Sadrianvoid 558298948Sadrianwakeup(ident) 559298948Sadrian register void *ident; 560298948Sadrian{ 561298948Sadrian register struct slpquehead *qp; 562298948Sadrian register struct proc *p; 563298948Sadrian 564298948Sadrian mtx_lock_spin(&sched_lock); 565298948Sadrian qp = &slpque[LOOKUP(ident)]; 566298948Sadrianrestart: 567298948Sadrian TAILQ_FOREACH(p, qp, p_slpq) { 568298948Sadrian if (p->p_wchan == ident) { 569298948Sadrian TAILQ_REMOVE(qp, p, p_slpq); 570298948Sadrian p->p_wchan = NULL; 571298948Sadrian if (p->p_stat == SSLEEP) { 572298948Sadrian /* OPTIMIZED EXPANSION OF setrunnable(p); */ 573298948Sadrian CTR3(KTR_PROC, "wakeup: proc %p (pid %d, %s)", 574298948Sadrian p, p->p_pid, p->p_comm); 575298948Sadrian if (p->p_slptime > 1) 576298948Sadrian updatepri(p); 577298948Sadrian p->p_slptime = 0; 578298948Sadrian p->p_stat = SRUN; 579298948Sadrian if (p->p_sflag & PS_INMEM) { 580298948Sadrian setrunqueue(p); 581298948Sadrian maybe_resched(p); 582298948Sadrian } else { 583298948Sadrian p->p_sflag |= PS_SWAPINREQ; 584298948Sadrian wakeup((caddr_t)&proc0); 585298948Sadrian } 586298948Sadrian /* END INLINE EXPANSION */ 587298948Sadrian goto restart; 588298948Sadrian } 589298948Sadrian } 590298948Sadrian } 591298948Sadrian mtx_unlock_spin(&sched_lock); 592298948Sadrian} 593298948Sadrian 594298948Sadrian/* 595298948Sadrian * Make a process sleeping on the specified identifier runnable. 596298948Sadrian * May wake more than one process if a target process is currently 597298948Sadrian * swapped out. 598298948Sadrian */ 599298948Sadrianvoid 600298948Sadrianwakeup_one(ident) 601298948Sadrian register void *ident; 602298948Sadrian{ 603298948Sadrian register struct slpquehead *qp; 604298948Sadrian register struct proc *p; 605298948Sadrian 606298948Sadrian mtx_lock_spin(&sched_lock); 607298948Sadrian qp = &slpque[LOOKUP(ident)]; 608298948Sadrian 609298948Sadrian TAILQ_FOREACH(p, qp, p_slpq) { 610298948Sadrian if (p->p_wchan == ident) { 611298948Sadrian TAILQ_REMOVE(qp, p, p_slpq); 612298948Sadrian p->p_wchan = NULL; 613298948Sadrian if (p->p_stat == SSLEEP) { 614298948Sadrian /* OPTIMIZED EXPANSION OF setrunnable(p); */ 615298948Sadrian CTR3(KTR_PROC, "wakeup1: proc %p (pid %d, %s)", 616298948Sadrian p, p->p_pid, p->p_comm); 617298948Sadrian if (p->p_slptime > 1) 618298948Sadrian updatepri(p); 619298948Sadrian p->p_slptime = 0; 620298948Sadrian p->p_stat = SRUN; 621298948Sadrian if (p->p_sflag & PS_INMEM) { 622298948Sadrian setrunqueue(p); 623298948Sadrian maybe_resched(p); 624298948Sadrian break; 625298948Sadrian } else { 626298948Sadrian p->p_sflag |= PS_SWAPINREQ; 627298948Sadrian wakeup((caddr_t)&proc0); 628298948Sadrian } 629298948Sadrian /* END INLINE EXPANSION */ 630298948Sadrian } 631298948Sadrian } 632298948Sadrian } 633298948Sadrian mtx_unlock_spin(&sched_lock); 634298948Sadrian} 635298948Sadrian 636298948Sadrian/* 637298948Sadrian * The machine independent parts of mi_switch(). 638298948Sadrian */ 639298948Sadrianvoid 640298948Sadrianmi_switch() 641298948Sadrian{ 642298948Sadrian struct timeval new_switchtime; 643298948Sadrian register struct proc *p = curproc; /* XXX */ 644298948Sadrian#if 0 645298948Sadrian register struct rlimit *rlim; 646298948Sadrian#endif 647298948Sadrian critical_t sched_crit; 648298948Sadrian u_int sched_nest; 649298948Sadrian 650298948Sadrian mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); 651298948Sadrian 652298948Sadrian /* 653298948Sadrian * Compute the amount of time during which the current 654298948Sadrian * process was running, and add that to its total so far. 655298948Sadrian */ 656298948Sadrian microuptime(&new_switchtime); 657298948Sadrian if (timevalcmp(&new_switchtime, PCPU_PTR(switchtime), <)) { 658298948Sadrian#if 0 659298948Sadrian /* XXX: This doesn't play well with sched_lock right now. */ 660298948Sadrian printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n", 661298948Sadrian PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec), 662298948Sadrian new_switchtime.tv_sec, new_switchtime.tv_usec); 663298948Sadrian#endif 664298948Sadrian new_switchtime = PCPU_GET(switchtime); 665298948Sadrian } else { 666298948Sadrian p->p_runtime += (new_switchtime.tv_usec - PCPU_GET(switchtime.tv_usec)) + 667298948Sadrian (new_switchtime.tv_sec - PCPU_GET(switchtime.tv_sec)) * 668298948Sadrian (int64_t)1000000; 669298948Sadrian } 670298948Sadrian 671298948Sadrian#ifdef DDB 672298948Sadrian /* 673298948Sadrian * Don't perform context switches from the debugger. 674298948Sadrian */ 675298948Sadrian if (db_active) 676298948Sadrian db_error("Context switches not allowed in the debugger."); 677298948Sadrian#endif 678298948Sadrian 679298948Sadrian#if 0 680298948Sadrian /* 681298948Sadrian * Check if the process exceeds its cpu resource allocation. 682298948Sadrian * If over max, kill it. 683298948Sadrian * 684298948Sadrian * XXX drop sched_lock, pickup Giant 685298948Sadrian */ 686298948Sadrian if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY && 687298948Sadrian p->p_runtime > p->p_limit->p_cpulimit) { 688298948Sadrian rlim = &p->p_rlimit[RLIMIT_CPU]; 689298948Sadrian if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) { 690298948Sadrian mtx_unlock_spin(&sched_lock); 691298948Sadrian PROC_LOCK(p); 692298948Sadrian killproc(p, "exceeded maximum CPU limit"); 693298948Sadrian mtx_lock_spin(&sched_lock); 694298948Sadrian PROC_UNLOCK_NOSWITCH(p); 695298948Sadrian } else { 696298948Sadrian mtx_unlock_spin(&sched_lock); 697298948Sadrian PROC_LOCK(p); 698298948Sadrian psignal(p, SIGXCPU); 699298948Sadrian mtx_lock_spin(&sched_lock); 700298948Sadrian PROC_UNLOCK_NOSWITCH(p); 701298948Sadrian if (rlim->rlim_cur < rlim->rlim_max) { 702298948Sadrian /* XXX: we should make a private copy */ 703298948Sadrian rlim->rlim_cur += 5; 704298948Sadrian } 705298948Sadrian } 706298948Sadrian } 707298948Sadrian#endif 708298948Sadrian 709298948Sadrian /* 710298948Sadrian * Pick a new current process and record its start time. 711298948Sadrian */ 712298948Sadrian cnt.v_swtch++; 713298948Sadrian PCPU_SET(switchtime, new_switchtime); 714298948Sadrian CTR3(KTR_PROC, "mi_switch: old proc %p (pid %d, %s)", p, p->p_pid, 715298948Sadrian p->p_comm); 716298948Sadrian sched_crit = sched_lock.mtx_savecrit; 717298948Sadrian sched_nest = sched_lock.mtx_recurse; 718298948Sadrian p->p_lastcpu = p->p_oncpu; 719298948Sadrian p->p_oncpu = NOCPU; 720298948Sadrian p->p_sflag &= ~PS_NEEDRESCHED; 721298948Sadrian cpu_switch(); 722298948Sadrian p->p_oncpu = PCPU_GET(cpuid); 723298948Sadrian sched_lock.mtx_savecrit = sched_crit; 724298948Sadrian sched_lock.mtx_recurse = sched_nest; 725298948Sadrian sched_lock.mtx_lock = (uintptr_t)p; 726298948Sadrian CTR3(KTR_PROC, "mi_switch: new proc %p (pid %d, %s)", p, p->p_pid, 727298948Sadrian p->p_comm); 728298948Sadrian if (PCPU_GET(switchtime.tv_sec) == 0) 729298948Sadrian microuptime(PCPU_PTR(switchtime)); 730298948Sadrian PCPU_SET(switchticks, ticks); 731298948Sadrian} 732298948Sadrian 733298948Sadrian/* 734298948Sadrian * Change process state to be runnable, 735298948Sadrian * placing it on the run queue if it is in memory, 736298948Sadrian * and awakening the swapper if it isn't in memory. 737298948Sadrian */ 738298948Sadrianvoid 739298948Sadriansetrunnable(p) 740298948Sadrian register struct proc *p; 741298948Sadrian{ 742298948Sadrian 743298948Sadrian mtx_lock_spin(&sched_lock); 744298948Sadrian switch (p->p_stat) { 745298948Sadrian case 0: 746298948Sadrian case SRUN: 747298948Sadrian case SZOMB: 748298948Sadrian case SWAIT: 749298948Sadrian default: 750298948Sadrian panic("setrunnable"); 751298948Sadrian case SSTOP: 752298948Sadrian case SSLEEP: /* e.g. when sending signals */ 753298948Sadrian if (p->p_sflag & PS_CVWAITQ) 754298948Sadrian cv_waitq_remove(p); 755298948Sadrian else 756298948Sadrian unsleep(p); 757298948Sadrian break; 758298948Sadrian 759298948Sadrian case SIDL: 760298948Sadrian break; 761298948Sadrian } 762298948Sadrian p->p_stat = SRUN; 763298948Sadrian if (p->p_slptime > 1) 764298948Sadrian updatepri(p); 765298948Sadrian p->p_slptime = 0; 766298948Sadrian if ((p->p_sflag & PS_INMEM) == 0) { 767298948Sadrian p->p_sflag |= PS_SWAPINREQ; 768298948Sadrian wakeup((caddr_t)&proc0); 769298948Sadrian } else { 770298948Sadrian setrunqueue(p); 771298948Sadrian maybe_resched(p); 772298948Sadrian } 773298948Sadrian mtx_unlock_spin(&sched_lock); 774298948Sadrian} 775298948Sadrian 776298948Sadrian/* 777298948Sadrian * Compute the priority of a process when running in user mode. 778298948Sadrian * Arrange to reschedule if the resulting priority is better 779298948Sadrian * than that of the current process. 780298948Sadrian */ 781298948Sadrianvoid 782298948Sadrianresetpriority(p) 783298948Sadrian register struct proc *p; 784298948Sadrian{ 785298948Sadrian register unsigned int newpriority; 786298948Sadrian 787298948Sadrian mtx_lock_spin(&sched_lock); 788298948Sadrian if (p->p_pri.pri_class == PRI_TIMESHARE) { 789298948Sadrian newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT + 790298948Sadrian NICE_WEIGHT * (p->p_nice - PRIO_MIN); 791298948Sadrian newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), 792298948Sadrian PRI_MAX_TIMESHARE); 793298948Sadrian p->p_pri.pri_user = newpriority; 794298948Sadrian } 795298948Sadrian maybe_resched(p); 796298948Sadrian mtx_unlock_spin(&sched_lock); 797298948Sadrian} 798298948Sadrian 799298948Sadrian/* ARGSUSED */ 800298948Sadrianstatic void 801298948Sadriansched_setup(dummy) 802298948Sadrian void *dummy; 803298948Sadrian{ 804298948Sadrian 805298948Sadrian callout_init(&schedcpu_callout, 1); 806298948Sadrian callout_init(&roundrobin_callout, 0); 807298948Sadrian 808298948Sadrian /* Kick off timeout driven events by calling first time. */ 809298948Sadrian roundrobin(NULL); 810298948Sadrian schedcpu(NULL); 811298948Sadrian} 812298948Sadrian 813298948Sadrian/* 814298948Sadrian * We adjust the priority of the current process. The priority of 815298948Sadrian * a process gets worse as it accumulates CPU time. The cpu usage 816298948Sadrian * estimator (p_estcpu) is increased here. resetpriority() will 817298948Sadrian * compute a different priority each time p_estcpu increases by 818298948Sadrian * INVERSE_ESTCPU_WEIGHT 819298948Sadrian * (until MAXPRI is reached). The cpu usage estimator ramps up 820298948Sadrian * quite quickly when the process is running (linearly), and decays 821298948Sadrian * away exponentially, at a rate which is proportionally slower when 822298948Sadrian * the system is busy. The basic principle is that the system will 823298948Sadrian * 90% forget that the process used a lot of CPU time in 5 * loadav 824298948Sadrian * seconds. This causes the system to favor processes which haven't 825298948Sadrian * run much recently, and to round-robin among other processes. 826298948Sadrian */ 827298948Sadrianvoid 828298948Sadrianschedclock(p) 829298948Sadrian struct proc *p; 830298948Sadrian{ 831298948Sadrian 832298948Sadrian p->p_cpticks++; 833298948Sadrian p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 834298948Sadrian if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 835298948Sadrian resetpriority(p); 836298948Sadrian if (p->p_pri.pri_level >= PUSER) 837298948Sadrian p->p_pri.pri_level = p->p_pri.pri_user; 838298948Sadrian } 839298948Sadrian} 840298948Sadrian 841298948Sadrian/* 842298948Sadrian * General purpose yield system call 843298948Sadrian */ 844298948Sadrianint 845298948Sadrianyield(struct proc *p, struct yield_args *uap) 846298948Sadrian{ 847298948Sadrian 848298948Sadrian p->p_retval[0] = 0; 849298948Sadrian 850298948Sadrian mtx_lock_spin(&sched_lock); 851298948Sadrian DROP_GIANT_NOSWITCH(); 852298948Sadrian p->p_pri.pri_level = PRI_MAX_TIMESHARE; 853298948Sadrian setrunqueue(p); 854298948Sadrian p->p_stats->p_ru.ru_nvcsw++; 855298948Sadrian mi_switch(); 856298948Sadrian mtx_unlock_spin(&sched_lock); 857298948Sadrian PICKUP_GIANT(); 858298948Sadrian 859298948Sadrian return (0); 860298948Sadrian} 861298948Sadrian