sched_4bsd.c revision 135295
1104964Sjeff/*- 2104964Sjeff * Copyright (c) 1982, 1986, 1990, 1991, 1993 3104964Sjeff * The Regents of the University of California. All rights reserved. 4104964Sjeff * (c) UNIX System Laboratories, Inc. 5104964Sjeff * All or some portions of this file are derived from material licensed 6104964Sjeff * to the University of California by American Telephone and Telegraph 7104964Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8104964Sjeff * the permission of UNIX System Laboratories, Inc. 9104964Sjeff * 10104964Sjeff * Redistribution and use in source and binary forms, with or without 11104964Sjeff * modification, are permitted provided that the following conditions 12104964Sjeff * are met: 13104964Sjeff * 1. Redistributions of source code must retain the above copyright 14104964Sjeff * notice, this list of conditions and the following disclaimer. 15104964Sjeff * 2. Redistributions in binary form must reproduce the above copyright 16104964Sjeff * notice, this list of conditions and the following disclaimer in the 17104964Sjeff * documentation and/or other materials provided with the distribution. 18104964Sjeff * 4. Neither the name of the University nor the names of its contributors 19104964Sjeff * may be used to endorse or promote products derived from this software 20104964Sjeff * without specific prior written permission. 21104964Sjeff * 22104964Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23104964Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24104964Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25104964Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26104964Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27104964Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28104964Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29104964Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30104964Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31104964Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32104964Sjeff * SUCH DAMAGE. 33104964Sjeff */ 34104964Sjeff 35116182Sobrien#include <sys/cdefs.h> 36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 135295 2004-09-16 07:12:59Z julian $"); 37116182Sobrien 38134791Sjulian#define kse td_sched 39134791Sjulian 40104964Sjeff#include <sys/param.h> 41104964Sjeff#include <sys/systm.h> 42104964Sjeff#include <sys/kernel.h> 43104964Sjeff#include <sys/ktr.h> 44104964Sjeff#include <sys/lock.h> 45123871Sjhb#include <sys/kthread.h> 46104964Sjeff#include <sys/mutex.h> 47104964Sjeff#include <sys/proc.h> 48104964Sjeff#include <sys/resourcevar.h> 49104964Sjeff#include <sys/sched.h> 50104964Sjeff#include <sys/smp.h> 51104964Sjeff#include <sys/sysctl.h> 52104964Sjeff#include <sys/sx.h> 53134689Sjulian#include <machine/smp.h> 54104964Sjeff 55107135Sjeff/* 56107135Sjeff * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in 57107135Sjeff * the range 100-256 Hz (approximately). 58107135Sjeff */ 59107135Sjeff#define ESTCPULIM(e) \ 60107135Sjeff min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ 61107135Sjeff RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1) 62122355Sbde#ifdef SMP 63122355Sbde#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus) 64122355Sbde#else 65107135Sjeff#define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ 66122355Sbde#endif 67107135Sjeff#define NICE_WEIGHT 1 /* Priorities per nice level. */ 68107135Sjeff 69134791Sjulian/* 70134791Sjulian * The schedulable entity that can be given a context to run. 71134791Sjulian * A process may have several of these. Probably one per processor 72134791Sjulian * but posibly a few more. In this universe they are grouped 73134791Sjulian * with a KSEG that contains the priority and niceness 74134791Sjulian * for the group. 75134791Sjulian */ 76134791Sjulianstruct kse { 77134791Sjulian TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */ 78134791Sjulian TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */ 79134791Sjulian TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 80134791Sjulian struct thread *ke_thread; /* (*) Active associated thread. */ 81134791Sjulian fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 82134791Sjulian u_char ke_oncpu; /* (j) Which cpu we are on. */ 83134791Sjulian char ke_rqindex; /* (j) Run queue index. */ 84134791Sjulian enum { 85134791Sjulian KES_THREAD = 0x0, /* slaved to thread state */ 86134791Sjulian KES_ONRUNQ 87134791Sjulian } ke_state; /* (j) KSE status. */ 88134791Sjulian int ke_cpticks; /* (j) Ticks of cpu time. */ 89134791Sjulian struct runq *ke_runq; /* runq the kse is currently on */ 90109145Sjeff}; 91109145Sjeff 92134791Sjulian#define ke_proc ke_thread->td_proc 93134791Sjulian#define ke_ksegrp ke_thread->td_ksegrp 94134791Sjulian 95134791Sjulian#define td_kse td_sched 96134791Sjulian 97134791Sjulian/* flags kept in td_flags */ 98134791Sjulian#define TDF_DIDRUN TDF_SCHED0 /* KSE actually ran. */ 99134791Sjulian#define TDF_EXIT TDF_SCHED1 /* KSE is being killed. */ 100134791Sjulian#define TDF_BOUND TDF_SCHED2 101134791Sjulian 102134791Sjulian#define ke_flags ke_thread->td_flags 103134791Sjulian#define KEF_DIDRUN TDF_DIDRUN /* KSE actually ran. */ 104134791Sjulian#define KEF_EXIT TDF_EXIT /* KSE is being killed. */ 105134791Sjulian#define KEF_BOUND TDF_BOUND /* stuck to one CPU */ 106134791Sjulian 107124955Sjeff#define SKE_RUNQ_PCPU(ke) \ 108124955Sjeff ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq) 109124955Sjeff 110134791Sjulianstruct kg_sched { 111134791Sjulian struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 112134791Sjulian /* the system scheduler. */ 113134791Sjulian int skg_avail_opennings; /* (j) Num KSEs requested in group. */ 114134791Sjulian int skg_concurrency; /* (j) Num KSEs requested in group. */ 115134791Sjulian int skg_runq_kses; /* (j) Num KSEs on runq. */ 116134791Sjulian}; 117134791Sjulian#define kg_last_assigned kg_sched->skg_last_assigned 118134791Sjulian#define kg_avail_opennings kg_sched->skg_avail_opennings 119134791Sjulian#define kg_concurrency kg_sched->skg_concurrency 120134791Sjulian#define kg_runq_kses kg_sched->skg_runq_kses 121134791Sjulian 122124955Sjeff/* 123124955Sjeff * KSE_CAN_MIGRATE macro returns true if the kse can migrate between 124125295Sjeff * cpus. 125124955Sjeff */ 126124955Sjeff#define KSE_CAN_MIGRATE(ke) \ 127135076Sscottl ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 128109145Sjeff 129134791Sjulianstatic struct kse kse0; 130134791Sjulianstatic struct kg_sched kg_sched0; 131104964Sjeff 132125288Sjeffstatic int sched_tdcnt; /* Total runnable threads in the system. */ 133104964Sjeffstatic int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 134112535Smux#define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ 135104964Sjeff 136104964Sjeffstatic struct callout roundrobin_callout; 137104964Sjeff 138134791Sjulianstatic void slot_fill(struct ksegrp *kg); 139134791Sjulianstatic struct kse *sched_choose(void); /* XXX Should be thread * */ 140134791Sjulian 141124955Sjeffstatic void setup_runqs(void); 142104964Sjeffstatic void roundrobin(void *arg); 143123871Sjhbstatic void schedcpu(void); 144124955Sjeffstatic void schedcpu_thread(void); 145104964Sjeffstatic void sched_setup(void *dummy); 146104964Sjeffstatic void maybe_resched(struct thread *td); 147104964Sjeffstatic void updatepri(struct ksegrp *kg); 148104964Sjeffstatic void resetpriority(struct ksegrp *kg); 149134694Sjulian#ifdef SMP 150134688Sjulianstatic int forward_wakeup(int cpunum); 151134694Sjulian#endif 152104964Sjeff 153124955Sjeffstatic struct kproc_desc sched_kp = { 154124955Sjeff "schedcpu", 155124955Sjeff schedcpu_thread, 156124955Sjeff NULL 157124955Sjeff}; 158124955SjeffSYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &sched_kp) 159124955SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 160104964Sjeff 161104964Sjeff/* 162104964Sjeff * Global run queue. 163104964Sjeff */ 164104964Sjeffstatic struct runq runq; 165104964Sjeff 166124955Sjeff#ifdef SMP 167124955Sjeff/* 168124955Sjeff * Per-CPU run queues 169124955Sjeff */ 170124955Sjeffstatic struct runq runq_pcpu[MAXCPU]; 171124955Sjeff#endif 172124955Sjeff 173124955Sjeffstatic void 174124955Sjeffsetup_runqs(void) 175124955Sjeff{ 176124955Sjeff#ifdef SMP 177124955Sjeff int i; 178124955Sjeff 179124955Sjeff for (i = 0; i < MAXCPU; ++i) 180124955Sjeff runq_init(&runq_pcpu[i]); 181124955Sjeff#endif 182124955Sjeff 183124955Sjeff runq_init(&runq); 184124955Sjeff} 185124955Sjeff 186104964Sjeffstatic int 187104964Sjeffsysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 188104964Sjeff{ 189104964Sjeff int error, new_val; 190104964Sjeff 191104964Sjeff new_val = sched_quantum * tick; 192104964Sjeff error = sysctl_handle_int(oidp, &new_val, 0, req); 193104964Sjeff if (error != 0 || req->newptr == NULL) 194104964Sjeff return (error); 195104964Sjeff if (new_val < tick) 196104964Sjeff return (EINVAL); 197104964Sjeff sched_quantum = new_val / tick; 198104964Sjeff hogticks = 2 * sched_quantum; 199104964Sjeff return (0); 200104964Sjeff} 201104964Sjeff 202132589SscottlSYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler"); 203130881Sscottl 204132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, 205132589Sscottl "Scheduler name"); 206130881Sscottl 207132589SscottlSYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW, 208132589Sscottl 0, sizeof sched_quantum, sysctl_kern_quantum, "I", 209132589Sscottl "Roundrobin scheduling quantum in microseconds"); 210104964Sjeff 211134693Sjulian#ifdef SMP 212134688Sjulian/* Enable forwarding of wakeups to all other cpus */ 213134688SjulianSYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP"); 214134688Sjulian 215134792Sjulianstatic int forward_wakeup_enabled = 1; 216134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, 217134688Sjulian &forward_wakeup_enabled, 0, 218134688Sjulian "Forwarding of wakeup to idle CPUs"); 219134688Sjulian 220134688Sjulianstatic int forward_wakeups_requested = 0; 221134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, 222134688Sjulian &forward_wakeups_requested, 0, 223134688Sjulian "Requests for Forwarding of wakeup to idle CPUs"); 224134688Sjulian 225134688Sjulianstatic int forward_wakeups_delivered = 0; 226134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, 227134688Sjulian &forward_wakeups_delivered, 0, 228134688Sjulian "Completed Forwarding of wakeup to idle CPUs"); 229134688Sjulian 230134792Sjulianstatic int forward_wakeup_use_mask = 1; 231134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, 232134688Sjulian &forward_wakeup_use_mask, 0, 233134688Sjulian "Use the mask of idle cpus"); 234134688Sjulian 235134688Sjulianstatic int forward_wakeup_use_loop = 0; 236134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, 237134688Sjulian &forward_wakeup_use_loop, 0, 238134688Sjulian "Use a loop to find idle cpus"); 239134688Sjulian 240134688Sjulianstatic int forward_wakeup_use_single = 0; 241134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW, 242134688Sjulian &forward_wakeup_use_single, 0, 243134688Sjulian "Only signal one idle cpu"); 244134688Sjulian 245134688Sjulianstatic int forward_wakeup_use_htt = 0; 246134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW, 247134688Sjulian &forward_wakeup_use_htt, 0, 248134688Sjulian "account for htt"); 249135051Sjulian 250134693Sjulian#endif 251135051Sjulianstatic int sched_followon = 0; 252135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, 253135051Sjulian &sched_followon, 0, 254135051Sjulian "allow threads to share a quantum"); 255134688Sjulian 256135051Sjulianstatic int sched_pfollowons = 0; 257135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, pfollowons, CTLFLAG_RD, 258135051Sjulian &sched_pfollowons, 0, 259135051Sjulian "number of followons done to a different ksegrp"); 260135051Sjulian 261135051Sjulianstatic int sched_kgfollowons = 0; 262135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, kgfollowons, CTLFLAG_RD, 263135051Sjulian &sched_kgfollowons, 0, 264135051Sjulian "number of followons done in a ksegrp"); 265135051Sjulian 266104964Sjeff/* 267104964Sjeff * Arrange to reschedule if necessary, taking the priorities and 268104964Sjeff * schedulers into account. 269104964Sjeff */ 270104964Sjeffstatic void 271104964Sjeffmaybe_resched(struct thread *td) 272104964Sjeff{ 273104964Sjeff 274104964Sjeff mtx_assert(&sched_lock, MA_OWNED); 275134791Sjulian if (td->td_priority < curthread->td_priority) 276111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 277104964Sjeff} 278104964Sjeff 279104964Sjeff/* 280104964Sjeff * Force switch among equal priority processes every 100ms. 281104964Sjeff * We don't actually need to force a context switch of the current process. 282104964Sjeff * The act of firing the event triggers a context switch to softclock() and 283104964Sjeff * then switching back out again which is equivalent to a preemption, thus 284104964Sjeff * no further work is needed on the local CPU. 285104964Sjeff */ 286104964Sjeff/* ARGSUSED */ 287104964Sjeffstatic void 288104964Sjeffroundrobin(void *arg) 289104964Sjeff{ 290104964Sjeff 291104964Sjeff#ifdef SMP 292104964Sjeff mtx_lock_spin(&sched_lock); 293104964Sjeff forward_roundrobin(); 294104964Sjeff mtx_unlock_spin(&sched_lock); 295104964Sjeff#endif 296104964Sjeff 297104964Sjeff callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); 298104964Sjeff} 299104964Sjeff 300104964Sjeff/* 301104964Sjeff * Constants for digital decay and forget: 302118972Sjhb * 90% of (kg_estcpu) usage in 5 * loadav time 303118972Sjhb * 95% of (ke_pctcpu) usage in 60 seconds (load insensitive) 304104964Sjeff * Note that, as ps(1) mentions, this can let percentages 305104964Sjeff * total over 100% (I've seen 137.9% for 3 processes). 306104964Sjeff * 307118972Sjhb * Note that schedclock() updates kg_estcpu and p_cpticks asynchronously. 308104964Sjeff * 309118972Sjhb * We wish to decay away 90% of kg_estcpu in (5 * loadavg) seconds. 310104964Sjeff * That is, the system wants to compute a value of decay such 311104964Sjeff * that the following for loop: 312104964Sjeff * for (i = 0; i < (5 * loadavg); i++) 313118972Sjhb * kg_estcpu *= decay; 314104964Sjeff * will compute 315118972Sjhb * kg_estcpu *= 0.1; 316104964Sjeff * for all values of loadavg: 317104964Sjeff * 318104964Sjeff * Mathematically this loop can be expressed by saying: 319104964Sjeff * decay ** (5 * loadavg) ~= .1 320104964Sjeff * 321104964Sjeff * The system computes decay as: 322104964Sjeff * decay = (2 * loadavg) / (2 * loadavg + 1) 323104964Sjeff * 324104964Sjeff * We wish to prove that the system's computation of decay 325104964Sjeff * will always fulfill the equation: 326104964Sjeff * decay ** (5 * loadavg) ~= .1 327104964Sjeff * 328104964Sjeff * If we compute b as: 329104964Sjeff * b = 2 * loadavg 330104964Sjeff * then 331104964Sjeff * decay = b / (b + 1) 332104964Sjeff * 333104964Sjeff * We now need to prove two things: 334104964Sjeff * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 335104964Sjeff * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 336104964Sjeff * 337104964Sjeff * Facts: 338104964Sjeff * For x close to zero, exp(x) =~ 1 + x, since 339104964Sjeff * exp(x) = 0! + x**1/1! + x**2/2! + ... . 340104964Sjeff * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 341104964Sjeff * For x close to zero, ln(1+x) =~ x, since 342104964Sjeff * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 343104964Sjeff * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 344104964Sjeff * ln(.1) =~ -2.30 345104964Sjeff * 346104964Sjeff * Proof of (1): 347104964Sjeff * Solve (factor)**(power) =~ .1 given power (5*loadav): 348104964Sjeff * solving for factor, 349104964Sjeff * ln(factor) =~ (-2.30/5*loadav), or 350104964Sjeff * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 351104964Sjeff * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 352104964Sjeff * 353104964Sjeff * Proof of (2): 354104964Sjeff * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 355104964Sjeff * solving for power, 356104964Sjeff * power*ln(b/(b+1)) =~ -2.30, or 357104964Sjeff * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 358104964Sjeff * 359104964Sjeff * Actual power values for the implemented algorithm are as follows: 360104964Sjeff * loadav: 1 2 3 4 361104964Sjeff * power: 5.68 10.32 14.94 19.55 362104964Sjeff */ 363104964Sjeff 364104964Sjeff/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 365104964Sjeff#define loadfactor(loadav) (2 * (loadav)) 366104964Sjeff#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 367104964Sjeff 368118972Sjhb/* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 369104964Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 370104964SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 371104964Sjeff 372104964Sjeff/* 373104964Sjeff * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 374104964Sjeff * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 375104964Sjeff * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 376104964Sjeff * 377104964Sjeff * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 378104964Sjeff * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 379104964Sjeff * 380104964Sjeff * If you don't want to bother with the faster/more-accurate formula, you 381104964Sjeff * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 382104964Sjeff * (more general) method of calculating the %age of CPU used by a process. 383104964Sjeff */ 384104964Sjeff#define CCPU_SHIFT 11 385104964Sjeff 386104964Sjeff/* 387104964Sjeff * Recompute process priorities, every hz ticks. 388104964Sjeff * MP-safe, called without the Giant mutex. 389104964Sjeff */ 390104964Sjeff/* ARGSUSED */ 391104964Sjeffstatic void 392123871Sjhbschedcpu(void) 393104964Sjeff{ 394104964Sjeff register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 395104964Sjeff struct thread *td; 396104964Sjeff struct proc *p; 397104964Sjeff struct kse *ke; 398104964Sjeff struct ksegrp *kg; 399118972Sjhb int awake, realstathz; 400104964Sjeff 401104964Sjeff realstathz = stathz ? stathz : hz; 402104964Sjeff sx_slock(&allproc_lock); 403104964Sjeff FOREACH_PROC_IN_SYSTEM(p) { 404118972Sjhb /* 405118972Sjhb * Prevent state changes and protect run queue. 406118972Sjhb */ 407104964Sjeff mtx_lock_spin(&sched_lock); 408118972Sjhb /* 409118972Sjhb * Increment time in/out of memory. We ignore overflow; with 410118972Sjhb * 16-bit int's (remember them?) overflow takes 45 days. 411118972Sjhb */ 412104964Sjeff p->p_swtime++; 413104964Sjeff FOREACH_KSEGRP_IN_PROC(p, kg) { 414104964Sjeff awake = 0; 415134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 416134791Sjulian ke = td->td_kse; 417104964Sjeff /* 418118972Sjhb * Increment sleep time (if sleeping). We 419118972Sjhb * ignore overflow, as above. 420104964Sjeff */ 421104964Sjeff /* 422104964Sjeff * The kse slptimes are not touched in wakeup 423104964Sjeff * because the thread may not HAVE a KSE. 424104964Sjeff */ 425104964Sjeff if (ke->ke_state == KES_ONRUNQ) { 426104964Sjeff awake = 1; 427104964Sjeff ke->ke_flags &= ~KEF_DIDRUN; 428104964Sjeff } else if ((ke->ke_state == KES_THREAD) && 429134791Sjulian (TD_IS_RUNNING(td))) { 430104964Sjeff awake = 1; 431104964Sjeff /* Do not clear KEF_DIDRUN */ 432104964Sjeff } else if (ke->ke_flags & KEF_DIDRUN) { 433104964Sjeff awake = 1; 434104964Sjeff ke->ke_flags &= ~KEF_DIDRUN; 435104964Sjeff } 436104964Sjeff 437104964Sjeff /* 438118972Sjhb * ke_pctcpu is only for ps and ttyinfo(). 439118972Sjhb * Do it per kse, and add them up at the end? 440104964Sjeff * XXXKSE 441104964Sjeff */ 442118972Sjhb ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> 443109145Sjeff FSHIFT; 444104964Sjeff /* 445104964Sjeff * If the kse has been idle the entire second, 446104964Sjeff * stop recalculating its priority until 447104964Sjeff * it wakes up. 448104964Sjeff */ 449134145Sjulian if (ke->ke_cpticks == 0) 450104964Sjeff continue; 451104964Sjeff#if (FSHIFT >= CCPU_SHIFT) 452109157Sjeff ke->ke_pctcpu += (realstathz == 100) 453134145Sjulian ? ((fixpt_t) ke->ke_cpticks) << 454104964Sjeff (FSHIFT - CCPU_SHIFT) : 455134145Sjulian 100 * (((fixpt_t) ke->ke_cpticks) 456109145Sjeff << (FSHIFT - CCPU_SHIFT)) / realstathz; 457104964Sjeff#else 458109157Sjeff ke->ke_pctcpu += ((FSCALE - ccpu) * 459134145Sjulian (ke->ke_cpticks * 460109145Sjeff FSCALE / realstathz)) >> FSHIFT; 461104964Sjeff#endif 462134145Sjulian ke->ke_cpticks = 0; 463104964Sjeff } /* end of kse loop */ 464104964Sjeff /* 465104964Sjeff * If there are ANY running threads in this KSEGRP, 466104964Sjeff * then don't count it as sleeping. 467104964Sjeff */ 468104964Sjeff if (awake) { 469104964Sjeff if (kg->kg_slptime > 1) { 470104964Sjeff /* 471104964Sjeff * In an ideal world, this should not 472104964Sjeff * happen, because whoever woke us 473104964Sjeff * up from the long sleep should have 474104964Sjeff * unwound the slptime and reset our 475104964Sjeff * priority before we run at the stale 476104964Sjeff * priority. Should KASSERT at some 477104964Sjeff * point when all the cases are fixed. 478104964Sjeff */ 479104964Sjeff updatepri(kg); 480104964Sjeff } 481104964Sjeff kg->kg_slptime = 0; 482118972Sjhb } else 483104964Sjeff kg->kg_slptime++; 484104964Sjeff if (kg->kg_slptime > 1) 485104964Sjeff continue; 486104964Sjeff kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); 487104964Sjeff resetpriority(kg); 488104964Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 489104964Sjeff if (td->td_priority >= PUSER) { 490105127Sjulian sched_prio(td, kg->kg_user_pri); 491104964Sjeff } 492104964Sjeff } 493104964Sjeff } /* end of ksegrp loop */ 494104964Sjeff mtx_unlock_spin(&sched_lock); 495104964Sjeff } /* end of process loop */ 496104964Sjeff sx_sunlock(&allproc_lock); 497104964Sjeff} 498104964Sjeff 499104964Sjeff/* 500123871Sjhb * Main loop for a kthread that executes schedcpu once a second. 501123871Sjhb */ 502123871Sjhbstatic void 503124955Sjeffschedcpu_thread(void) 504123871Sjhb{ 505123871Sjhb int nowake; 506123871Sjhb 507123871Sjhb for (;;) { 508123871Sjhb schedcpu(); 509123871Sjhb tsleep(&nowake, curthread->td_priority, "-", hz); 510123871Sjhb } 511123871Sjhb} 512123871Sjhb 513123871Sjhb/* 514104964Sjeff * Recalculate the priority of a process after it has slept for a while. 515118972Sjhb * For all load averages >= 1 and max kg_estcpu of 255, sleeping for at 516118972Sjhb * least six times the loadfactor will decay kg_estcpu to zero. 517104964Sjeff */ 518104964Sjeffstatic void 519104964Sjeffupdatepri(struct ksegrp *kg) 520104964Sjeff{ 521118972Sjhb register fixpt_t loadfac; 522104964Sjeff register unsigned int newcpu; 523104964Sjeff 524118972Sjhb loadfac = loadfactor(averunnable.ldavg[0]); 525104964Sjeff if (kg->kg_slptime > 5 * loadfac) 526104964Sjeff kg->kg_estcpu = 0; 527104964Sjeff else { 528118972Sjhb newcpu = kg->kg_estcpu; 529118972Sjhb kg->kg_slptime--; /* was incremented in schedcpu() */ 530104964Sjeff while (newcpu && --kg->kg_slptime) 531104964Sjeff newcpu = decay_cpu(loadfac, newcpu); 532104964Sjeff kg->kg_estcpu = newcpu; 533104964Sjeff } 534104964Sjeff resetpriority(kg); 535104964Sjeff} 536104964Sjeff 537104964Sjeff/* 538104964Sjeff * Compute the priority of a process when running in user mode. 539104964Sjeff * Arrange to reschedule if the resulting priority is better 540104964Sjeff * than that of the current process. 541104964Sjeff */ 542104964Sjeffstatic void 543104964Sjeffresetpriority(struct ksegrp *kg) 544104964Sjeff{ 545104964Sjeff register unsigned int newpriority; 546104964Sjeff struct thread *td; 547104964Sjeff 548104964Sjeff if (kg->kg_pri_class == PRI_TIMESHARE) { 549104964Sjeff newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + 550130551Sjulian NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); 551104964Sjeff newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), 552104964Sjeff PRI_MAX_TIMESHARE); 553104964Sjeff kg->kg_user_pri = newpriority; 554104964Sjeff } 555104964Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 556104964Sjeff maybe_resched(td); /* XXXKSE silly */ 557104964Sjeff } 558104964Sjeff} 559104964Sjeff 560104964Sjeff/* ARGSUSED */ 561104964Sjeffstatic void 562104964Sjeffsched_setup(void *dummy) 563104964Sjeff{ 564124955Sjeff setup_runqs(); 565118972Sjhb 566104964Sjeff if (sched_quantum == 0) 567104964Sjeff sched_quantum = SCHED_QUANTUM; 568104964Sjeff hogticks = 2 * sched_quantum; 569104964Sjeff 570126665Srwatson callout_init(&roundrobin_callout, CALLOUT_MPSAFE); 571104964Sjeff 572104964Sjeff /* Kick off timeout driven events by calling first time. */ 573104964Sjeff roundrobin(NULL); 574125288Sjeff 575125288Sjeff /* Account for thread0. */ 576125288Sjeff sched_tdcnt++; 577104964Sjeff} 578104964Sjeff 579104964Sjeff/* External interfaces start here */ 580134791Sjulian/* 581134791Sjulian * Very early in the boot some setup of scheduler-specific 582134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 583134791Sjulian * Called from: 584134791Sjulian * proc0_init() 585134791Sjulian */ 586134791Sjulianvoid 587134791Sjulianschedinit(void) 588134791Sjulian{ 589134791Sjulian /* 590134791Sjulian * Set up the scheduler specific parts of proc0. 591134791Sjulian */ 592134791Sjulian proc0.p_sched = NULL; /* XXX */ 593134791Sjulian ksegrp0.kg_sched = &kg_sched0; 594134791Sjulian thread0.td_sched = &kse0; 595134791Sjulian kse0.ke_thread = &thread0; 596134791Sjulian kse0.ke_oncpu = NOCPU; /* wrong.. can we use PCPU(cpuid) yet? */ 597134791Sjulian kse0.ke_state = KES_THREAD; 598134791Sjulian kg_sched0.skg_concurrency = 1; 599134791Sjulian kg_sched0.skg_avail_opennings = 0; /* we are already running */ 600134791Sjulian} 601134791Sjulian 602104964Sjeffint 603104964Sjeffsched_runnable(void) 604104964Sjeff{ 605124955Sjeff#ifdef SMP 606124955Sjeff return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]); 607124955Sjeff#else 608124955Sjeff return runq_check(&runq); 609124955Sjeff#endif 610104964Sjeff} 611104964Sjeff 612104964Sjeffint 613104964Sjeffsched_rr_interval(void) 614104964Sjeff{ 615104964Sjeff if (sched_quantum == 0) 616104964Sjeff sched_quantum = SCHED_QUANTUM; 617104964Sjeff return (sched_quantum); 618104964Sjeff} 619104964Sjeff 620104964Sjeff/* 621104964Sjeff * We adjust the priority of the current process. The priority of 622104964Sjeff * a process gets worse as it accumulates CPU time. The cpu usage 623118972Sjhb * estimator (kg_estcpu) is increased here. resetpriority() will 624118972Sjhb * compute a different priority each time kg_estcpu increases by 625104964Sjeff * INVERSE_ESTCPU_WEIGHT 626104964Sjeff * (until MAXPRI is reached). The cpu usage estimator ramps up 627104964Sjeff * quite quickly when the process is running (linearly), and decays 628104964Sjeff * away exponentially, at a rate which is proportionally slower when 629104964Sjeff * the system is busy. The basic principle is that the system will 630104964Sjeff * 90% forget that the process used a lot of CPU time in 5 * loadav 631104964Sjeff * seconds. This causes the system to favor processes which haven't 632104964Sjeff * run much recently, and to round-robin among other processes. 633104964Sjeff */ 634104964Sjeffvoid 635121127Sjeffsched_clock(struct thread *td) 636104964Sjeff{ 637104964Sjeff struct ksegrp *kg; 638121127Sjeff struct kse *ke; 639104964Sjeff 640113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 641121127Sjeff kg = td->td_ksegrp; 642121127Sjeff ke = td->td_kse; 643113356Sjeff 644134145Sjulian ke->ke_cpticks++; 645104964Sjeff kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); 646104964Sjeff if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 647104964Sjeff resetpriority(kg); 648104964Sjeff if (td->td_priority >= PUSER) 649104964Sjeff td->td_priority = kg->kg_user_pri; 650104964Sjeff } 651104964Sjeff} 652118972Sjhb 653104964Sjeff/* 654104964Sjeff * charge childs scheduling cpu usage to parent. 655104964Sjeff * 656104964Sjeff * XXXKSE assume only one thread & kse & ksegrp keep estcpu in each ksegrp. 657104964Sjeff * Charge it to the ksegrp that did the wait since process estcpu is sum of 658104964Sjeff * all ksegrps, this is strictly as expected. Assume that the child process 659104964Sjeff * aggregated all the estcpu into the 'built-in' ksegrp. 660104964Sjeff */ 661104964Sjeffvoid 662132372Sjuliansched_exit(struct proc *p, struct thread *td) 663104964Sjeff{ 664132372Sjulian sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td); 665132372Sjulian sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 666113356Sjeff} 667113356Sjeff 668113356Sjeffvoid 669132372Sjuliansched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd) 670113356Sjeff{ 671113923Sjhb 672113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 673132372Sjulian kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + childtd->td_ksegrp->kg_estcpu); 674104964Sjeff} 675104964Sjeff 676104964Sjeffvoid 677113356Sjeffsched_exit_thread(struct thread *td, struct thread *child) 678104964Sjeff{ 679127894Sdfr if ((child->td_proc->p_flag & P_NOLOAD) == 0) 680125288Sjeff sched_tdcnt--; 681113356Sjeff} 682109145Sjeff 683113356Sjeffvoid 684134791Sjuliansched_fork(struct thread *td, struct thread *childtd) 685113356Sjeff{ 686134791Sjulian sched_fork_ksegrp(td, childtd->td_ksegrp); 687134791Sjulian sched_fork_thread(td, childtd); 688113356Sjeff} 689113356Sjeff 690113356Sjeffvoid 691132372Sjuliansched_fork_ksegrp(struct thread *td, struct ksegrp *child) 692113356Sjeff{ 693113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 694132372Sjulian child->kg_estcpu = td->td_ksegrp->kg_estcpu; 695113356Sjeff} 696109145Sjeff 697113356Sjeffvoid 698134791Sjuliansched_fork_thread(struct thread *td, struct thread *childtd) 699113356Sjeff{ 700134791Sjulian sched_newthread(childtd); 701104964Sjeff} 702104964Sjeff 703104964Sjeffvoid 704130551Sjuliansched_nice(struct proc *p, int nice) 705104964Sjeff{ 706130551Sjulian struct ksegrp *kg; 707113873Sjhb 708130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 709113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 710130551Sjulian p->p_nice = nice; 711130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 712130551Sjulian resetpriority(kg); 713130551Sjulian } 714104964Sjeff} 715104964Sjeff 716113356Sjeffvoid 717113356Sjeffsched_class(struct ksegrp *kg, int class) 718113356Sjeff{ 719113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 720113356Sjeff kg->kg_pri_class = class; 721113356Sjeff} 722113356Sjeff 723105127Sjulian/* 724105127Sjulian * Adjust the priority of a thread. 725105127Sjulian * This may include moving the thread within the KSEGRP, 726105127Sjulian * changing the assignment of a kse to the thread, 727105127Sjulian * and moving a KSE in the system run queue. 728105127Sjulian */ 729104964Sjeffvoid 730104964Sjeffsched_prio(struct thread *td, u_char prio) 731104964Sjeff{ 732104964Sjeff 733113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 734104964Sjeff if (TD_ON_RUNQ(td)) { 735105127Sjulian adjustrunqueue(td, prio); 736105127Sjulian } else { 737105127Sjulian td->td_priority = prio; 738104964Sjeff } 739104964Sjeff} 740104964Sjeff 741104964Sjeffvoid 742126326Sjhbsched_sleep(struct thread *td) 743104964Sjeff{ 744113923Sjhb 745113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 746104964Sjeff td->td_ksegrp->kg_slptime = 0; 747126326Sjhb td->td_base_pri = td->td_priority; 748104964Sjeff} 749104964Sjeff 750135051Sjulianstatic void remrunqueue(struct thread *td); 751135051Sjulian 752104964Sjeffvoid 753135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 754104964Sjeff{ 755104964Sjeff struct kse *ke; 756135051Sjulian struct ksegrp *kg; 757104964Sjeff struct proc *p; 758104964Sjeff 759104964Sjeff ke = td->td_kse; 760104964Sjeff p = td->td_proc; 761104964Sjeff 762113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 763104964Sjeff 764125295Sjeff if ((p->p_flag & P_NOLOAD) == 0) 765125288Sjeff sched_tdcnt--; 766134791Sjulian /* 767135051Sjulian * We are volunteering to switch out so we get to nominate 768135051Sjulian * a successor for the rest of our quantum 769135051Sjulian * First try another thread in our ksegrp, and then look for 770135051Sjulian * other ksegrps in our process. 771135051Sjulian */ 772135051Sjulian if (sched_followon && 773135051Sjulian (p->p_flag & P_HADTHREADS) && 774135051Sjulian (flags & SW_VOL) && 775135051Sjulian newtd == NULL) { 776135051Sjulian /* lets schedule another thread from this process */ 777135051Sjulian kg = td->td_ksegrp; 778135051Sjulian if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { 779135051Sjulian remrunqueue(newtd); 780135051Sjulian sched_kgfollowons++; 781135051Sjulian } else { 782135051Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 783135051Sjulian if ((newtd = TAILQ_FIRST(&kg->kg_runq))) { 784135051Sjulian sched_pfollowons++; 785135051Sjulian remrunqueue(newtd); 786135051Sjulian break; 787135051Sjulian } 788135051Sjulian } 789135051Sjulian } 790135051Sjulian } 791135051Sjulian 792135051Sjulian /* 793134791Sjulian * The thread we are about to run needs to be counted as if it had been 794134791Sjulian * added to the run queue and selected. 795135295Sjulian * it came from: 796135295Sjulian * A preemption 797135295Sjulian * An upcall 798135295Sjulian * A followon 799135295Sjulian * Do this before saving curthread so that the slot count 800135295Sjulian * doesn't give an overly optimistic view when that happens. 801134791Sjulian */ 802134791Sjulian if (newtd) { 803135181Sjulian KASSERT((newtd->td_inhibitors == 0), 804135181Sjulian ("trying to run inhibitted thread")); 805134791Sjulian newtd->td_ksegrp->kg_avail_opennings--; 806134791Sjulian newtd->td_kse->ke_flags |= KEF_DIDRUN; 807134791Sjulian TD_SET_RUNNING(newtd); 808134832Sjulian if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) 809134832Sjulian sched_tdcnt++; 810134791Sjulian } 811135051Sjulian 812113339Sjulian td->td_lastcpu = td->td_oncpu; 813132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 814132266Sjhb td->td_pflags &= ~TDP_OWEPREEMPT; 815113339Sjulian td->td_oncpu = NOCPU; 816104964Sjeff /* 817104964Sjeff * At the last moment, if this thread is still marked RUNNING, 818104964Sjeff * then put it back on the run queue as it has not been suspended 819131473Sjhb * or stopped or any thing else similar. We never put the idle 820131473Sjhb * threads on the run queue, however. 821104964Sjeff */ 822131473Sjhb if (td == PCPU_GET(idlethread)) 823131473Sjhb TD_SET_CAN_RUN(td); 824134791Sjulian else { 825134791Sjulian td->td_ksegrp->kg_avail_opennings++; 826134791Sjulian if (TD_IS_RUNNING(td)) { 827134791Sjulian /* Put us back on the run queue (kse and all). */ 828134791Sjulian setrunqueue(td, SRQ_OURSELF|SRQ_YIELDING); 829134791Sjulian } else if (p->p_flag & P_HADTHREADS) { 830134791Sjulian /* 831134791Sjulian * We will not be on the run queue. So we must be 832134791Sjulian * sleeping or similar. As it's available, 833134791Sjulian * someone else can use the KSE if they need it. 834134791Sjulian */ 835134791Sjulian slot_fill(td->td_ksegrp); 836134791Sjulian } 837104964Sjeff } 838131473Sjhb if (newtd == NULL) 839131473Sjhb newtd = choosethread(); 840121128Sjeff if (td != newtd) 841121128Sjeff cpu_switch(td, newtd); 842121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 843121128Sjeff td->td_oncpu = PCPU_GET(cpuid); 844104964Sjeff} 845104964Sjeff 846104964Sjeffvoid 847104964Sjeffsched_wakeup(struct thread *td) 848104964Sjeff{ 849104964Sjeff struct ksegrp *kg; 850104964Sjeff 851113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 852104964Sjeff kg = td->td_ksegrp; 853104964Sjeff if (kg->kg_slptime > 1) 854104964Sjeff updatepri(kg); 855104964Sjeff kg->kg_slptime = 0; 856134586Sjulian setrunqueue(td, SRQ_BORING); 857104964Sjeff} 858104964Sjeff 859134693Sjulian#ifdef SMP 860134688Sjulian/* enable HTT_2 if you have a 2-way HTT cpu.*/ 861134688Sjulianstatic int 862134688Sjulianforward_wakeup(int cpunum) 863134688Sjulian{ 864134688Sjulian cpumask_t map, me, dontuse; 865134688Sjulian cpumask_t map2; 866134688Sjulian struct pcpu *pc; 867134688Sjulian cpumask_t id, map3; 868134688Sjulian 869134688Sjulian mtx_assert(&sched_lock, MA_OWNED); 870134688Sjulian 871134791Sjulian CTR0(KTR_RUNQ, "forward_wakeup()"); 872134688Sjulian 873134688Sjulian if ((!forward_wakeup_enabled) || 874134688Sjulian (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0)) 875134688Sjulian return (0); 876134688Sjulian if (!smp_started || cold || panicstr) 877134688Sjulian return (0); 878134688Sjulian 879134688Sjulian forward_wakeups_requested++; 880134688Sjulian 881134688Sjulian/* 882134688Sjulian * check the idle mask we received against what we calculated before 883134688Sjulian * in the old version. 884134688Sjulian */ 885134688Sjulian me = PCPU_GET(cpumask); 886134688Sjulian /* 887134688Sjulian * don't bother if we should be doing it ourself.. 888134688Sjulian */ 889134688Sjulian if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) 890134688Sjulian return (0); 891134688Sjulian 892134688Sjulian dontuse = me | stopped_cpus | hlt_cpus_mask; 893134688Sjulian map3 = 0; 894134688Sjulian if (forward_wakeup_use_loop) { 895134688Sjulian SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 896134688Sjulian id = pc->pc_cpumask; 897134688Sjulian if ( (id & dontuse) == 0 && 898134688Sjulian pc->pc_curthread == pc->pc_idlethread) { 899134688Sjulian map3 |= id; 900134688Sjulian } 901134688Sjulian } 902134688Sjulian } 903134688Sjulian 904134688Sjulian if (forward_wakeup_use_mask) { 905134688Sjulian map = 0; 906134688Sjulian map = idle_cpus_mask & ~dontuse; 907134688Sjulian 908134688Sjulian /* If they are both on, compare and use loop if different */ 909134688Sjulian if (forward_wakeup_use_loop) { 910134688Sjulian if (map != map3) { 911134688Sjulian printf("map (%02X) != map3 (%02X)\n", 912134688Sjulian map, map3); 913134688Sjulian map = map3; 914134688Sjulian } 915134688Sjulian } 916134688Sjulian } else { 917134688Sjulian map = map3; 918134688Sjulian } 919134688Sjulian /* If we only allow a specific CPU, then mask off all the others */ 920134688Sjulian if (cpunum != NOCPU) { 921134688Sjulian KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); 922134688Sjulian map &= (1 << cpunum); 923134688Sjulian } else { 924134688Sjulian /* Try choose an idle die. */ 925134688Sjulian if (forward_wakeup_use_htt) { 926134688Sjulian map2 = (map & (map >> 1)) & 0x5555; 927134688Sjulian if (map2) { 928134688Sjulian map = map2; 929134688Sjulian } 930134688Sjulian } 931134688Sjulian 932134688Sjulian /* set only one bit */ 933134688Sjulian if (forward_wakeup_use_single) { 934134688Sjulian map = map & ((~map) + 1); 935134688Sjulian } 936134688Sjulian } 937134688Sjulian if (map) { 938134688Sjulian forward_wakeups_delivered++; 939134688Sjulian ipi_selected(map, IPI_AST); 940134688Sjulian return (1); 941134688Sjulian } 942134688Sjulian if (cpunum == NOCPU) 943134688Sjulian printf("forward_wakeup: Idle processor not found\n"); 944134688Sjulian return (0); 945134688Sjulian} 946134693Sjulian#endif 947134688Sjulian 948104964Sjeffvoid 949134586Sjuliansched_add(struct thread *td, int flags) 950104964Sjeff{ 951121127Sjeff struct kse *ke; 952134591Sjulian#ifdef SMP 953134591Sjulian int forwarded = 0; 954134591Sjulian int cpu; 955134591Sjulian#endif 956121127Sjeff 957121127Sjeff ke = td->td_kse; 958104964Sjeff mtx_assert(&sched_lock, MA_OWNED); 959104964Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 960124957Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 961104964Sjeff ke->ke_proc->p_comm)); 962104964Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 963124957Sjeff ("sched_add: process swapped out")); 964131481Sjhb 965131481Sjhb#ifdef SMP 966124955Sjeff if (KSE_CAN_MIGRATE(ke)) { 967134591Sjulian CTR2(KTR_RUNQ, 968134591Sjulian "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td); 969134591Sjulian cpu = NOCPU; 970124955Sjeff ke->ke_runq = &runq; 971124955Sjeff } else { 972124955Sjeff if (!SKE_RUNQ_PCPU(ke)) 973134591Sjulian ke->ke_runq = &runq_pcpu[(cpu = PCPU_GET(cpuid))]; 974134591Sjulian else 975134591Sjulian cpu = td->td_lastcpu; 976134591Sjulian CTR3(KTR_RUNQ, 977134591Sjulian "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); 978124955Sjeff } 979124955Sjeff#else 980133396Sjulian CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td); 981124955Sjeff ke->ke_runq = &runq; 982134591Sjulian 983124955Sjeff#endif 984134591Sjulian /* 985134591Sjulian * If we are yielding (on the way out anyhow) 986134591Sjulian * or the thread being saved is US, 987134591Sjulian * then don't try be smart about preemption 988134591Sjulian * or kicking off another CPU 989134591Sjulian * as it won't help and may hinder. 990134591Sjulian * In the YIEDLING case, we are about to run whoever is 991134591Sjulian * being put in the queue anyhow, and in the 992134591Sjulian * OURSELF case, we are puting ourself on the run queue 993134591Sjulian * which also only happens when we are about to yield. 994134591Sjulian */ 995134591Sjulian if((flags & SRQ_YIELDING) == 0) { 996134591Sjulian#ifdef SMP 997134591Sjulian cpumask_t me = PCPU_GET(cpumask); 998134591Sjulian int idle = idle_cpus_mask & me; 999134591Sjulian /* 1000134591Sjulian * Only try to kick off another CPU if 1001134591Sjulian * the thread is unpinned 1002134591Sjulian * or pinned to another cpu, 1003134591Sjulian * and there are other available and idle CPUs. 1004134837Sjulian * if we are idle, or it's an interrupt, 1005134837Sjulian * then skip straight to preemption. 1006134591Sjulian */ 1007134837Sjulian if ( (! idle) && ((flags & SRQ_INTR) == 0) && 1008134591Sjulian (idle_cpus_mask & ~(hlt_cpus_mask | me)) && 1009134591Sjulian ( KSE_CAN_MIGRATE(ke) || 1010134591Sjulian ke->ke_runq != &runq_pcpu[PCPU_GET(cpuid)])) { 1011134591Sjulian forwarded = forward_wakeup(cpu); 1012134591Sjulian } 1013134591Sjulian /* 1014134591Sjulian * If we failed to kick off another cpu, then look to 1015134591Sjulian * see if we should preempt this CPU. Only allow this 1016134591Sjulian * if it is not pinned or IS pinned to this CPU. 1017134591Sjulian * If we are the idle thread, we also try do preempt. 1018134591Sjulian * as it will be quicker and being idle, we won't 1019134591Sjulian * lose in doing so.. 1020134591Sjulian */ 1021134591Sjulian if ((!forwarded) && 1022134591Sjulian (ke->ke_runq == &runq || 1023134591Sjulian ke->ke_runq == &runq_pcpu[PCPU_GET(cpuid)])) 1024134591Sjulian#endif 1025134591Sjulian 1026134591Sjulian { 1027134591Sjulian if (maybe_preempt(td)) 1028134591Sjulian return; 1029134591Sjulian } 1030134591Sjulian } 1031125295Sjeff if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1032125288Sjeff sched_tdcnt++; 1033135295Sjulian td->td_ksegrp->kg_avail_opennings--; 1034124955Sjeff runq_add(ke->ke_runq, ke); 1035133520Sjulian ke->ke_ksegrp->kg_runq_kses++; 1036133520Sjulian ke->ke_state = KES_ONRUNQ; 1037132118Sjhb maybe_resched(td); 1038104964Sjeff} 1039104964Sjeff 1040104964Sjeffvoid 1041121127Sjeffsched_rem(struct thread *td) 1042104964Sjeff{ 1043121127Sjeff struct kse *ke; 1044121127Sjeff 1045121127Sjeff ke = td->td_kse; 1046104964Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1047124957Sjeff ("sched_rem: process swapped out")); 1048124957Sjeff KASSERT((ke->ke_state == KES_ONRUNQ), 1049124957Sjeff ("sched_rem: KSE not on run queue")); 1050104964Sjeff mtx_assert(&sched_lock, MA_OWNED); 1051104964Sjeff 1052125295Sjeff if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1053125288Sjeff sched_tdcnt--; 1054135295Sjulian td->td_ksegrp->kg_avail_opennings++; 1055134145Sjulian runq_remove(ke->ke_runq, ke); 1056124955Sjeff 1057104964Sjeff ke->ke_state = KES_THREAD; 1058135295Sjulian td->td_ksegrp->kg_runq_kses--; 1059104964Sjeff} 1060104964Sjeff 1061135295Sjulian/* 1062135295Sjulian * Select threads to run. 1063135295Sjulian * Notice that the running threads still consume a slot. 1064135295Sjulian */ 1065104964Sjeffstruct kse * 1066104964Sjeffsched_choose(void) 1067104964Sjeff{ 1068104964Sjeff struct kse *ke; 1069124955Sjeff struct runq *rq; 1070104964Sjeff 1071124955Sjeff#ifdef SMP 1072124955Sjeff struct kse *kecpu; 1073124955Sjeff 1074124955Sjeff rq = &runq; 1075104964Sjeff ke = runq_choose(&runq); 1076124955Sjeff kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); 1077104964Sjeff 1078124955Sjeff if (ke == NULL || 1079124955Sjeff (kecpu != NULL && 1080124955Sjeff kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) { 1081133396Sjulian CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu, 1082124955Sjeff PCPU_GET(cpuid)); 1083124955Sjeff ke = kecpu; 1084124955Sjeff rq = &runq_pcpu[PCPU_GET(cpuid)]; 1085124955Sjeff } else { 1086133396Sjulian CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke); 1087124955Sjeff } 1088124955Sjeff 1089124955Sjeff#else 1090124955Sjeff rq = &runq; 1091124955Sjeff ke = runq_choose(&runq); 1092124955Sjeff#endif 1093124955Sjeff 1094104964Sjeff if (ke != NULL) { 1095124955Sjeff runq_remove(rq, ke); 1096104964Sjeff ke->ke_state = KES_THREAD; 1097133520Sjulian ke->ke_ksegrp->kg_runq_kses--; 1098104964Sjeff 1099104964Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1100124957Sjeff ("sched_choose: process swapped out")); 1101104964Sjeff } 1102104964Sjeff return (ke); 1103104964Sjeff} 1104104964Sjeff 1105104964Sjeffvoid 1106104964Sjeffsched_userret(struct thread *td) 1107104964Sjeff{ 1108104964Sjeff struct ksegrp *kg; 1109104964Sjeff /* 1110104964Sjeff * XXX we cheat slightly on the locking here to avoid locking in 1111104964Sjeff * the usual case. Setting td_priority here is essentially an 1112104964Sjeff * incomplete workaround for not setting it properly elsewhere. 1113104964Sjeff * Now that some interrupt handlers are threads, not setting it 1114104964Sjeff * properly elsewhere can clobber it in the window between setting 1115104964Sjeff * it here and returning to user mode, so don't waste time setting 1116104964Sjeff * it perfectly here. 1117104964Sjeff */ 1118104964Sjeff kg = td->td_ksegrp; 1119104964Sjeff if (td->td_priority != kg->kg_user_pri) { 1120104964Sjeff mtx_lock_spin(&sched_lock); 1121104964Sjeff td->td_priority = kg->kg_user_pri; 1122104964Sjeff mtx_unlock_spin(&sched_lock); 1123104964Sjeff } 1124104964Sjeff} 1125107126Sjeff 1126124955Sjeffvoid 1127124955Sjeffsched_bind(struct thread *td, int cpu) 1128124955Sjeff{ 1129124955Sjeff struct kse *ke; 1130124955Sjeff 1131124955Sjeff mtx_assert(&sched_lock, MA_OWNED); 1132124955Sjeff KASSERT(TD_IS_RUNNING(td), 1133124955Sjeff ("sched_bind: cannot bind non-running thread")); 1134124955Sjeff 1135124955Sjeff ke = td->td_kse; 1136124955Sjeff 1137124955Sjeff ke->ke_flags |= KEF_BOUND; 1138124955Sjeff#ifdef SMP 1139124955Sjeff ke->ke_runq = &runq_pcpu[cpu]; 1140124955Sjeff if (PCPU_GET(cpuid) == cpu) 1141124955Sjeff return; 1142124955Sjeff 1143124955Sjeff ke->ke_state = KES_THREAD; 1144124955Sjeff 1145131473Sjhb mi_switch(SW_VOL, NULL); 1146124955Sjeff#endif 1147124955Sjeff} 1148124955Sjeff 1149124955Sjeffvoid 1150124955Sjeffsched_unbind(struct thread* td) 1151124955Sjeff{ 1152124955Sjeff mtx_assert(&sched_lock, MA_OWNED); 1153124955Sjeff td->td_kse->ke_flags &= ~KEF_BOUND; 1154124955Sjeff} 1155124955Sjeff 1156107126Sjeffint 1157125288Sjeffsched_load(void) 1158125288Sjeff{ 1159125288Sjeff return (sched_tdcnt); 1160125288Sjeff} 1161125288Sjeff 1162125288Sjeffint 1163107126Sjeffsched_sizeof_ksegrp(void) 1164107126Sjeff{ 1165134791Sjulian return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1166107126Sjeff} 1167107126Sjeffint 1168107126Sjeffsched_sizeof_proc(void) 1169107126Sjeff{ 1170107126Sjeff return (sizeof(struct proc)); 1171107126Sjeff} 1172107126Sjeffint 1173107126Sjeffsched_sizeof_thread(void) 1174107126Sjeff{ 1175134791Sjulian return (sizeof(struct thread) + sizeof(struct kse)); 1176107126Sjeff} 1177107137Sjeff 1178107137Sjefffixpt_t 1179121127Sjeffsched_pctcpu(struct thread *td) 1180107137Sjeff{ 1181121147Sjeff struct kse *ke; 1182121147Sjeff 1183121147Sjeff ke = td->td_kse; 1184134791Sjulian return (ke->ke_pctcpu); 1185121147Sjeff 1186121147Sjeff return (0); 1187107137Sjeff} 1188134791Sjulian#define KERN_SWITCH_INCLUDE 1 1189134791Sjulian#include "kern/kern_switch.c" 1190