sched_4bsd.c revision 179297
1104964Sjeff/*- 2104964Sjeff * Copyright (c) 1982, 1986, 1990, 1991, 1993 3104964Sjeff * The Regents of the University of California. All rights reserved. 4104964Sjeff * (c) UNIX System Laboratories, Inc. 5104964Sjeff * All or some portions of this file are derived from material licensed 6104964Sjeff * to the University of California by American Telephone and Telegraph 7104964Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8104964Sjeff * the permission of UNIX System Laboratories, Inc. 9104964Sjeff * 10104964Sjeff * Redistribution and use in source and binary forms, with or without 11104964Sjeff * modification, are permitted provided that the following conditions 12104964Sjeff * are met: 13104964Sjeff * 1. Redistributions of source code must retain the above copyright 14104964Sjeff * notice, this list of conditions and the following disclaimer. 15104964Sjeff * 2. Redistributions in binary form must reproduce the above copyright 16104964Sjeff * notice, this list of conditions and the following disclaimer in the 17104964Sjeff * documentation and/or other materials provided with the distribution. 18104964Sjeff * 4. Neither the name of the University nor the names of its contributors 19104964Sjeff * may be used to endorse or promote products derived from this software 20104964Sjeff * without specific prior written permission. 21104964Sjeff * 22104964Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23104964Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24104964Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25104964Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26104964Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27104964Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28104964Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29104964Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30104964Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31104964Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32104964Sjeff * SUCH DAMAGE. 33104964Sjeff */ 34104964Sjeff 35116182Sobrien#include <sys/cdefs.h> 36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 179297 2008-05-25 01:44:58Z jb $"); 37116182Sobrien 38147565Speter#include "opt_hwpmc_hooks.h" 39177418Sjeff#include "opt_sched.h" 40179297Sjb#include "opt_kdtrace.h" 41147565Speter 42104964Sjeff#include <sys/param.h> 43104964Sjeff#include <sys/systm.h> 44176750Smarcel#include <sys/cpuset.h> 45104964Sjeff#include <sys/kernel.h> 46104964Sjeff#include <sys/ktr.h> 47104964Sjeff#include <sys/lock.h> 48123871Sjhb#include <sys/kthread.h> 49104964Sjeff#include <sys/mutex.h> 50104964Sjeff#include <sys/proc.h> 51104964Sjeff#include <sys/resourcevar.h> 52104964Sjeff#include <sys/sched.h> 53104964Sjeff#include <sys/smp.h> 54104964Sjeff#include <sys/sysctl.h> 55104964Sjeff#include <sys/sx.h> 56139453Sjhb#include <sys/turnstile.h> 57161599Sdavidxu#include <sys/umtx.h> 58160039Sobrien#include <machine/pcb.h> 59134689Sjulian#include <machine/smp.h> 60104964Sjeff 61145256Sjkoshy#ifdef HWPMC_HOOKS 62145256Sjkoshy#include <sys/pmckern.h> 63145256Sjkoshy#endif 64145256Sjkoshy 65179297Sjb#ifdef KDTRACE_HOOKS 66179297Sjb#include <sys/dtrace_bsd.h> 67179297Sjbint dtrace_vtime_active; 68179297Sjbdtrace_vtime_switch_func_t dtrace_vtime_switch_func; 69179297Sjb#endif 70179297Sjb 71107135Sjeff/* 72107135Sjeff * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in 73107135Sjeff * the range 100-256 Hz (approximately). 74107135Sjeff */ 75107135Sjeff#define ESTCPULIM(e) \ 76107135Sjeff min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \ 77107135Sjeff RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1) 78122355Sbde#ifdef SMP 79122355Sbde#define INVERSE_ESTCPU_WEIGHT (8 * smp_cpus) 80122355Sbde#else 81107135Sjeff#define INVERSE_ESTCPU_WEIGHT 8 /* 1 / (priorities per estcpu level). */ 82122355Sbde#endif 83107135Sjeff#define NICE_WEIGHT 1 /* Priorities per nice level. */ 84107135Sjeff 85134791Sjulian/* 86163709Sjb * The schedulable entity that runs a context. 87164936Sjulian * This is an extension to the thread structure and is tailored to 88164936Sjulian * the requirements of this scheduler 89163709Sjb */ 90164936Sjulianstruct td_sched { 91164936Sjulian fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 92164936Sjulian int ts_cpticks; /* (j) Ticks of cpu time. */ 93172264Sjeff int ts_slptime; /* (j) Seconds !RUNNING. */ 94164936Sjulian struct runq *ts_runq; /* runq the thread is currently on */ 95109145Sjeff}; 96109145Sjeff 97134791Sjulian/* flags kept in td_flags */ 98164936Sjulian#define TDF_DIDRUN TDF_SCHED0 /* thread actually ran. */ 99177435Sjeff#define TDF_BOUND TDF_SCHED1 /* Bound to one CPU. */ 100134791Sjulian 101164936Sjulian#define SKE_RUNQ_PCPU(ts) \ 102164936Sjulian ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq) 103124955Sjeff 104164936Sjulianstatic struct td_sched td_sched0; 105171488Sjeffstruct mtx sched_lock; 106134791Sjulian 107125288Sjeffstatic int sched_tdcnt; /* Total runnable threads in the system. */ 108104964Sjeffstatic int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ 109112535Smux#define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ 110104964Sjeff 111124955Sjeffstatic void setup_runqs(void); 112123871Sjhbstatic void schedcpu(void); 113124955Sjeffstatic void schedcpu_thread(void); 114139453Sjhbstatic void sched_priority(struct thread *td, u_char prio); 115104964Sjeffstatic void sched_setup(void *dummy); 116104964Sjeffstatic void maybe_resched(struct thread *td); 117163709Sjbstatic void updatepri(struct thread *td); 118163709Sjbstatic void resetpriority(struct thread *td); 119163709Sjbstatic void resetpriority_thread(struct thread *td); 120134694Sjulian#ifdef SMP 121134688Sjulianstatic int forward_wakeup(int cpunum); 122134694Sjulian#endif 123104964Sjeff 124124955Sjeffstatic struct kproc_desc sched_kp = { 125124955Sjeff "schedcpu", 126124955Sjeff schedcpu_thread, 127124955Sjeff NULL 128124955Sjeff}; 129177253SrwatsonSYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, 130177253Srwatson &sched_kp); 131177253SrwatsonSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); 132104964Sjeff 133104964Sjeff/* 134104964Sjeff * Global run queue. 135104964Sjeff */ 136104964Sjeffstatic struct runq runq; 137104964Sjeff 138124955Sjeff#ifdef SMP 139124955Sjeff/* 140124955Sjeff * Per-CPU run queues 141124955Sjeff */ 142124955Sjeffstatic struct runq runq_pcpu[MAXCPU]; 143124955Sjeff#endif 144124955Sjeff 145124955Sjeffstatic void 146124955Sjeffsetup_runqs(void) 147124955Sjeff{ 148124955Sjeff#ifdef SMP 149124955Sjeff int i; 150124955Sjeff 151124955Sjeff for (i = 0; i < MAXCPU; ++i) 152124955Sjeff runq_init(&runq_pcpu[i]); 153124955Sjeff#endif 154124955Sjeff 155124955Sjeff runq_init(&runq); 156124955Sjeff} 157124955Sjeff 158104964Sjeffstatic int 159104964Sjeffsysctl_kern_quantum(SYSCTL_HANDLER_ARGS) 160104964Sjeff{ 161104964Sjeff int error, new_val; 162104964Sjeff 163104964Sjeff new_val = sched_quantum * tick; 164104964Sjeff error = sysctl_handle_int(oidp, &new_val, 0, req); 165104964Sjeff if (error != 0 || req->newptr == NULL) 166104964Sjeff return (error); 167104964Sjeff if (new_val < tick) 168104964Sjeff return (EINVAL); 169104964Sjeff sched_quantum = new_val / tick; 170104964Sjeff hogticks = 2 * sched_quantum; 171104964Sjeff return (0); 172104964Sjeff} 173104964Sjeff 174132589SscottlSYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler"); 175130881Sscottl 176132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, 177132589Sscottl "Scheduler name"); 178130881Sscottl 179132589SscottlSYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW, 180132589Sscottl 0, sizeof sched_quantum, sysctl_kern_quantum, "I", 181132589Sscottl "Roundrobin scheduling quantum in microseconds"); 182104964Sjeff 183134693Sjulian#ifdef SMP 184134688Sjulian/* Enable forwarding of wakeups to all other cpus */ 185134688SjulianSYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP"); 186134688Sjulian 187177419Sjeffstatic int runq_fuzz = 1; 188177419SjeffSYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 189177419Sjeff 190134792Sjulianstatic int forward_wakeup_enabled = 1; 191134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, 192134688Sjulian &forward_wakeup_enabled, 0, 193134688Sjulian "Forwarding of wakeup to idle CPUs"); 194134688Sjulian 195134688Sjulianstatic int forward_wakeups_requested = 0; 196134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, 197134688Sjulian &forward_wakeups_requested, 0, 198134688Sjulian "Requests for Forwarding of wakeup to idle CPUs"); 199134688Sjulian 200134688Sjulianstatic int forward_wakeups_delivered = 0; 201134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, 202134688Sjulian &forward_wakeups_delivered, 0, 203134688Sjulian "Completed Forwarding of wakeup to idle CPUs"); 204134688Sjulian 205134792Sjulianstatic int forward_wakeup_use_mask = 1; 206134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, 207134688Sjulian &forward_wakeup_use_mask, 0, 208134688Sjulian "Use the mask of idle cpus"); 209134688Sjulian 210134688Sjulianstatic int forward_wakeup_use_loop = 0; 211134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, 212134688Sjulian &forward_wakeup_use_loop, 0, 213134688Sjulian "Use a loop to find idle cpus"); 214134688Sjulian 215134688Sjulianstatic int forward_wakeup_use_single = 0; 216134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW, 217134688Sjulian &forward_wakeup_use_single, 0, 218134688Sjulian "Only signal one idle cpu"); 219134688Sjulian 220134688Sjulianstatic int forward_wakeup_use_htt = 0; 221134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW, 222134688Sjulian &forward_wakeup_use_htt, 0, 223134688Sjulian "account for htt"); 224135051Sjulian 225134693Sjulian#endif 226164936Sjulian#if 0 227135051Sjulianstatic int sched_followon = 0; 228135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, 229135051Sjulian &sched_followon, 0, 230135051Sjulian "allow threads to share a quantum"); 231163709Sjb#endif 232135051Sjulian 233139317Sjeffstatic __inline void 234139317Sjeffsched_load_add(void) 235139317Sjeff{ 236139317Sjeff sched_tdcnt++; 237139317Sjeff CTR1(KTR_SCHED, "global load: %d", sched_tdcnt); 238139317Sjeff} 239139317Sjeff 240139317Sjeffstatic __inline void 241139317Sjeffsched_load_rem(void) 242139317Sjeff{ 243139317Sjeff sched_tdcnt--; 244139317Sjeff CTR1(KTR_SCHED, "global load: %d", sched_tdcnt); 245139317Sjeff} 246104964Sjeff/* 247104964Sjeff * Arrange to reschedule if necessary, taking the priorities and 248104964Sjeff * schedulers into account. 249104964Sjeff */ 250104964Sjeffstatic void 251104964Sjeffmaybe_resched(struct thread *td) 252104964Sjeff{ 253104964Sjeff 254170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 255134791Sjulian if (td->td_priority < curthread->td_priority) 256111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 257104964Sjeff} 258104964Sjeff 259104964Sjeff/* 260177419Sjeff * This function is called when a thread is about to be put on run queue 261177419Sjeff * because it has been made runnable or its priority has been adjusted. It 262177419Sjeff * determines if the new thread should be immediately preempted to. If so, 263177419Sjeff * it switches to it and eventually returns true. If not, it returns false 264177419Sjeff * so that the caller may place the thread on an appropriate run queue. 265177419Sjeff */ 266177419Sjeffint 267177419Sjeffmaybe_preempt(struct thread *td) 268177419Sjeff{ 269177419Sjeff#ifdef PREEMPTION 270177419Sjeff struct thread *ctd; 271177419Sjeff int cpri, pri; 272177419Sjeff#endif 273177419Sjeff 274177419Sjeff#ifdef PREEMPTION 275177419Sjeff /* 276177419Sjeff * The new thread should not preempt the current thread if any of the 277177419Sjeff * following conditions are true: 278177419Sjeff * 279177419Sjeff * - The kernel is in the throes of crashing (panicstr). 280177419Sjeff * - The current thread has a higher (numerically lower) or 281177419Sjeff * equivalent priority. Note that this prevents curthread from 282177419Sjeff * trying to preempt to itself. 283177419Sjeff * - It is too early in the boot for context switches (cold is set). 284177419Sjeff * - The current thread has an inhibitor set or is in the process of 285177419Sjeff * exiting. In this case, the current thread is about to switch 286177419Sjeff * out anyways, so there's no point in preempting. If we did, 287177419Sjeff * the current thread would not be properly resumed as well, so 288177419Sjeff * just avoid that whole landmine. 289177419Sjeff * - If the new thread's priority is not a realtime priority and 290177419Sjeff * the current thread's priority is not an idle priority and 291177419Sjeff * FULL_PREEMPTION is disabled. 292177419Sjeff * 293177419Sjeff * If all of these conditions are false, but the current thread is in 294177419Sjeff * a nested critical section, then we have to defer the preemption 295177419Sjeff * until we exit the critical section. Otherwise, switch immediately 296177419Sjeff * to the new thread. 297177419Sjeff */ 298177419Sjeff ctd = curthread; 299177419Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 300177419Sjeff KASSERT((td->td_inhibitors == 0), 301177419Sjeff ("maybe_preempt: trying to run inhibited thread")); 302177419Sjeff pri = td->td_priority; 303177419Sjeff cpri = ctd->td_priority; 304177419Sjeff if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 305177419Sjeff TD_IS_INHIBITED(ctd)) 306177419Sjeff return (0); 307177419Sjeff#ifndef FULL_PREEMPTION 308177419Sjeff if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 309177419Sjeff return (0); 310177419Sjeff#endif 311177419Sjeff 312177419Sjeff if (ctd->td_critnest > 1) { 313177419Sjeff CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 314177419Sjeff ctd->td_critnest); 315177419Sjeff ctd->td_owepreempt = 1; 316177419Sjeff return (0); 317177419Sjeff } 318177419Sjeff /* 319177419Sjeff * Thread is runnable but not yet put on system run queue. 320177419Sjeff */ 321177419Sjeff MPASS(ctd->td_lock == td->td_lock); 322177419Sjeff MPASS(TD_ON_RUNQ(td)); 323177419Sjeff TD_SET_RUNNING(td); 324177419Sjeff CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 325177419Sjeff td->td_proc->p_pid, td->td_name); 326178272Sjeff mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, td); 327177419Sjeff /* 328177419Sjeff * td's lock pointer may have changed. We have to return with it 329177419Sjeff * locked. 330177419Sjeff */ 331177419Sjeff spinlock_enter(); 332177419Sjeff thread_unlock(ctd); 333177419Sjeff thread_lock(td); 334177419Sjeff spinlock_exit(); 335177419Sjeff return (1); 336177419Sjeff#else 337177419Sjeff return (0); 338177419Sjeff#endif 339177419Sjeff} 340177419Sjeff 341177419Sjeff/* 342104964Sjeff * Constants for digital decay and forget: 343163709Sjb * 90% of (td_estcpu) usage in 5 * loadav time 344164936Sjulian * 95% of (ts_pctcpu) usage in 60 seconds (load insensitive) 345104964Sjeff * Note that, as ps(1) mentions, this can let percentages 346104964Sjeff * total over 100% (I've seen 137.9% for 3 processes). 347104964Sjeff * 348163709Sjb * Note that schedclock() updates td_estcpu and p_cpticks asynchronously. 349104964Sjeff * 350163709Sjb * We wish to decay away 90% of td_estcpu in (5 * loadavg) seconds. 351104964Sjeff * That is, the system wants to compute a value of decay such 352104964Sjeff * that the following for loop: 353104964Sjeff * for (i = 0; i < (5 * loadavg); i++) 354163709Sjb * td_estcpu *= decay; 355104964Sjeff * will compute 356163709Sjb * td_estcpu *= 0.1; 357104964Sjeff * for all values of loadavg: 358104964Sjeff * 359104964Sjeff * Mathematically this loop can be expressed by saying: 360104964Sjeff * decay ** (5 * loadavg) ~= .1 361104964Sjeff * 362104964Sjeff * The system computes decay as: 363104964Sjeff * decay = (2 * loadavg) / (2 * loadavg + 1) 364104964Sjeff * 365104964Sjeff * We wish to prove that the system's computation of decay 366104964Sjeff * will always fulfill the equation: 367104964Sjeff * decay ** (5 * loadavg) ~= .1 368104964Sjeff * 369104964Sjeff * If we compute b as: 370104964Sjeff * b = 2 * loadavg 371104964Sjeff * then 372104964Sjeff * decay = b / (b + 1) 373104964Sjeff * 374104964Sjeff * We now need to prove two things: 375104964Sjeff * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 376104964Sjeff * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 377104964Sjeff * 378104964Sjeff * Facts: 379104964Sjeff * For x close to zero, exp(x) =~ 1 + x, since 380104964Sjeff * exp(x) = 0! + x**1/1! + x**2/2! + ... . 381104964Sjeff * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 382104964Sjeff * For x close to zero, ln(1+x) =~ x, since 383104964Sjeff * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 384104964Sjeff * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 385104964Sjeff * ln(.1) =~ -2.30 386104964Sjeff * 387104964Sjeff * Proof of (1): 388104964Sjeff * Solve (factor)**(power) =~ .1 given power (5*loadav): 389104964Sjeff * solving for factor, 390104964Sjeff * ln(factor) =~ (-2.30/5*loadav), or 391104964Sjeff * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 392104964Sjeff * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 393104964Sjeff * 394104964Sjeff * Proof of (2): 395104964Sjeff * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 396104964Sjeff * solving for power, 397104964Sjeff * power*ln(b/(b+1)) =~ -2.30, or 398104964Sjeff * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 399104964Sjeff * 400104964Sjeff * Actual power values for the implemented algorithm are as follows: 401104964Sjeff * loadav: 1 2 3 4 402104964Sjeff * power: 5.68 10.32 14.94 19.55 403104964Sjeff */ 404104964Sjeff 405104964Sjeff/* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 406104964Sjeff#define loadfactor(loadav) (2 * (loadav)) 407104964Sjeff#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 408104964Sjeff 409164936Sjulian/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 410104964Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 411158082SjmgSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 412104964Sjeff 413104964Sjeff/* 414104964Sjeff * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 415104964Sjeff * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 416104964Sjeff * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 417104964Sjeff * 418104964Sjeff * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 419104964Sjeff * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 420104964Sjeff * 421104964Sjeff * If you don't want to bother with the faster/more-accurate formula, you 422104964Sjeff * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 423104964Sjeff * (more general) method of calculating the %age of CPU used by a process. 424104964Sjeff */ 425104964Sjeff#define CCPU_SHIFT 11 426104964Sjeff 427104964Sjeff/* 428104964Sjeff * Recompute process priorities, every hz ticks. 429104964Sjeff * MP-safe, called without the Giant mutex. 430104964Sjeff */ 431104964Sjeff/* ARGSUSED */ 432104964Sjeffstatic void 433123871Sjhbschedcpu(void) 434104964Sjeff{ 435104964Sjeff register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 436104964Sjeff struct thread *td; 437104964Sjeff struct proc *p; 438164936Sjulian struct td_sched *ts; 439118972Sjhb int awake, realstathz; 440104964Sjeff 441104964Sjeff realstathz = stathz ? stathz : hz; 442104964Sjeff sx_slock(&allproc_lock); 443104964Sjeff FOREACH_PROC_IN_SYSTEM(p) { 444177368Sjeff PROC_LOCK(p); 445163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 446104964Sjeff awake = 0; 447170293Sjeff thread_lock(td); 448164936Sjulian ts = td->td_sched; 449163709Sjb /* 450163709Sjb * Increment sleep time (if sleeping). We 451163709Sjb * ignore overflow, as above. 452163709Sjb */ 453163709Sjb /* 454164936Sjulian * The td_sched slptimes are not touched in wakeup 455164936Sjulian * because the thread may not HAVE everything in 456164936Sjulian * memory? XXX I think this is out of date. 457163709Sjb */ 458166188Sjeff if (TD_ON_RUNQ(td)) { 459163709Sjb awake = 1; 460177435Sjeff td->td_flags &= ~TDF_DIDRUN; 461166188Sjeff } else if (TD_IS_RUNNING(td)) { 462163709Sjb awake = 1; 463177435Sjeff /* Do not clear TDF_DIDRUN */ 464177435Sjeff } else if (td->td_flags & TDF_DIDRUN) { 465163709Sjb awake = 1; 466177435Sjeff td->td_flags &= ~TDF_DIDRUN; 467163709Sjb } 468163709Sjb 469163709Sjb /* 470164936Sjulian * ts_pctcpu is only for ps and ttyinfo(). 471163709Sjb */ 472164936Sjulian ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT; 473163709Sjb /* 474164936Sjulian * If the td_sched has been idle the entire second, 475163709Sjb * stop recalculating its priority until 476163709Sjb * it wakes up. 477163709Sjb */ 478164936Sjulian if (ts->ts_cpticks != 0) { 479163709Sjb#if (FSHIFT >= CCPU_SHIFT) 480164936Sjulian ts->ts_pctcpu += (realstathz == 100) 481164936Sjulian ? ((fixpt_t) ts->ts_cpticks) << 482164936Sjulian (FSHIFT - CCPU_SHIFT) : 483164936Sjulian 100 * (((fixpt_t) ts->ts_cpticks) 484164936Sjulian << (FSHIFT - CCPU_SHIFT)) / realstathz; 485163709Sjb#else 486164936Sjulian ts->ts_pctcpu += ((FSCALE - ccpu) * 487164936Sjulian (ts->ts_cpticks * 488164936Sjulian FSCALE / realstathz)) >> FSHIFT; 489163709Sjb#endif 490164936Sjulian ts->ts_cpticks = 0; 491164267Sdavidxu } 492104964Sjeff /* 493163709Sjb * If there are ANY running threads in this process, 494104964Sjeff * then don't count it as sleeping. 495164936SjulianXXX this is broken 496164936Sjulian 497104964Sjeff */ 498104964Sjeff if (awake) { 499172264Sjeff if (ts->ts_slptime > 1) { 500104964Sjeff /* 501104964Sjeff * In an ideal world, this should not 502104964Sjeff * happen, because whoever woke us 503104964Sjeff * up from the long sleep should have 504104964Sjeff * unwound the slptime and reset our 505104964Sjeff * priority before we run at the stale 506104964Sjeff * priority. Should KASSERT at some 507104964Sjeff * point when all the cases are fixed. 508104964Sjeff */ 509163709Sjb updatepri(td); 510163709Sjb } 511172264Sjeff ts->ts_slptime = 0; 512163709Sjb } else 513172264Sjeff ts->ts_slptime++; 514172264Sjeff if (ts->ts_slptime > 1) { 515170293Sjeff thread_unlock(td); 516163709Sjb continue; 517170293Sjeff } 518163709Sjb td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); 519163709Sjb resetpriority(td); 520163709Sjb resetpriority_thread(td); 521170293Sjeff thread_unlock(td); 522163709Sjb } /* end of thread loop */ 523177368Sjeff PROC_UNLOCK(p); 524104964Sjeff } /* end of process loop */ 525104964Sjeff sx_sunlock(&allproc_lock); 526104964Sjeff} 527104964Sjeff 528104964Sjeff/* 529123871Sjhb * Main loop for a kthread that executes schedcpu once a second. 530123871Sjhb */ 531123871Sjhbstatic void 532124955Sjeffschedcpu_thread(void) 533123871Sjhb{ 534123871Sjhb 535123871Sjhb for (;;) { 536123871Sjhb schedcpu(); 537167086Sjhb pause("-", hz); 538123871Sjhb } 539123871Sjhb} 540123871Sjhb 541123871Sjhb/* 542104964Sjeff * Recalculate the priority of a process after it has slept for a while. 543163709Sjb * For all load averages >= 1 and max td_estcpu of 255, sleeping for at 544163709Sjb * least six times the loadfactor will decay td_estcpu to zero. 545104964Sjeff */ 546104964Sjeffstatic void 547163709Sjbupdatepri(struct thread *td) 548104964Sjeff{ 549172264Sjeff struct td_sched *ts; 550172264Sjeff fixpt_t loadfac; 551172264Sjeff unsigned int newcpu; 552104964Sjeff 553172264Sjeff ts = td->td_sched; 554118972Sjhb loadfac = loadfactor(averunnable.ldavg[0]); 555172264Sjeff if (ts->ts_slptime > 5 * loadfac) 556163709Sjb td->td_estcpu = 0; 557104964Sjeff else { 558163709Sjb newcpu = td->td_estcpu; 559172264Sjeff ts->ts_slptime--; /* was incremented in schedcpu() */ 560172264Sjeff while (newcpu && --ts->ts_slptime) 561104964Sjeff newcpu = decay_cpu(loadfac, newcpu); 562163709Sjb td->td_estcpu = newcpu; 563104964Sjeff } 564104964Sjeff} 565104964Sjeff 566104964Sjeff/* 567104964Sjeff * Compute the priority of a process when running in user mode. 568104964Sjeff * Arrange to reschedule if the resulting priority is better 569104964Sjeff * than that of the current process. 570104964Sjeff */ 571104964Sjeffstatic void 572163709Sjbresetpriority(struct thread *td) 573104964Sjeff{ 574104964Sjeff register unsigned int newpriority; 575104964Sjeff 576163709Sjb if (td->td_pri_class == PRI_TIMESHARE) { 577163709Sjb newpriority = PUSER + td->td_estcpu / INVERSE_ESTCPU_WEIGHT + 578163709Sjb NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN); 579104964Sjeff newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), 580104964Sjeff PRI_MAX_TIMESHARE); 581163709Sjb sched_user_prio(td, newpriority); 582104964Sjeff } 583104964Sjeff} 584104964Sjeff 585139453Sjhb/* 586164936Sjulian * Update the thread's priority when the associated process's user 587139453Sjhb * priority changes. 588139453Sjhb */ 589139453Sjhbstatic void 590163709Sjbresetpriority_thread(struct thread *td) 591139453Sjhb{ 592139453Sjhb 593139453Sjhb /* Only change threads with a time sharing user priority. */ 594139453Sjhb if (td->td_priority < PRI_MIN_TIMESHARE || 595139453Sjhb td->td_priority > PRI_MAX_TIMESHARE) 596139453Sjhb return; 597139453Sjhb 598139453Sjhb /* XXX the whole needresched thing is broken, but not silly. */ 599139453Sjhb maybe_resched(td); 600139453Sjhb 601163709Sjb sched_prio(td, td->td_user_pri); 602139453Sjhb} 603139453Sjhb 604104964Sjeff/* ARGSUSED */ 605104964Sjeffstatic void 606104964Sjeffsched_setup(void *dummy) 607104964Sjeff{ 608124955Sjeff setup_runqs(); 609118972Sjhb 610104964Sjeff if (sched_quantum == 0) 611104964Sjeff sched_quantum = SCHED_QUANTUM; 612104964Sjeff hogticks = 2 * sched_quantum; 613104964Sjeff 614125288Sjeff /* Account for thread0. */ 615139317Sjeff sched_load_add(); 616104964Sjeff} 617104964Sjeff 618104964Sjeff/* External interfaces start here */ 619134791Sjulian/* 620134791Sjulian * Very early in the boot some setup of scheduler-specific 621145109Smaxim * parts of proc0 and of some scheduler resources needs to be done. 622134791Sjulian * Called from: 623134791Sjulian * proc0_init() 624134791Sjulian */ 625134791Sjulianvoid 626134791Sjulianschedinit(void) 627134791Sjulian{ 628134791Sjulian /* 629134791Sjulian * Set up the scheduler specific parts of proc0. 630134791Sjulian */ 631134791Sjulian proc0.p_sched = NULL; /* XXX */ 632164936Sjulian thread0.td_sched = &td_sched0; 633170293Sjeff thread0.td_lock = &sched_lock; 634171488Sjeff mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); 635134791Sjulian} 636134791Sjulian 637104964Sjeffint 638104964Sjeffsched_runnable(void) 639104964Sjeff{ 640124955Sjeff#ifdef SMP 641124955Sjeff return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]); 642124955Sjeff#else 643124955Sjeff return runq_check(&runq); 644124955Sjeff#endif 645104964Sjeff} 646104964Sjeff 647104964Sjeffint 648104964Sjeffsched_rr_interval(void) 649104964Sjeff{ 650104964Sjeff if (sched_quantum == 0) 651104964Sjeff sched_quantum = SCHED_QUANTUM; 652104964Sjeff return (sched_quantum); 653104964Sjeff} 654104964Sjeff 655104964Sjeff/* 656104964Sjeff * We adjust the priority of the current process. The priority of 657104964Sjeff * a process gets worse as it accumulates CPU time. The cpu usage 658163709Sjb * estimator (td_estcpu) is increased here. resetpriority() will 659163709Sjb * compute a different priority each time td_estcpu increases by 660104964Sjeff * INVERSE_ESTCPU_WEIGHT 661104964Sjeff * (until MAXPRI is reached). The cpu usage estimator ramps up 662104964Sjeff * quite quickly when the process is running (linearly), and decays 663104964Sjeff * away exponentially, at a rate which is proportionally slower when 664104964Sjeff * the system is busy. The basic principle is that the system will 665104964Sjeff * 90% forget that the process used a lot of CPU time in 5 * loadav 666104964Sjeff * seconds. This causes the system to favor processes which haven't 667104964Sjeff * run much recently, and to round-robin among other processes. 668104964Sjeff */ 669104964Sjeffvoid 670121127Sjeffsched_clock(struct thread *td) 671104964Sjeff{ 672164936Sjulian struct td_sched *ts; 673104964Sjeff 674170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 675164936Sjulian ts = td->td_sched; 676113356Sjeff 677164936Sjulian ts->ts_cpticks++; 678163709Sjb td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); 679163709Sjb if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { 680163709Sjb resetpriority(td); 681163709Sjb resetpriority_thread(td); 682104964Sjeff } 683173081Sjhb 684173081Sjhb /* 685173081Sjhb * Force a context switch if the current thread has used up a full 686173081Sjhb * quantum (default quantum is 100ms). 687173081Sjhb */ 688173081Sjhb if (!TD_IS_IDLETHREAD(td) && 689173081Sjhb ticks - PCPU_GET(switchticks) >= sched_quantum) 690173081Sjhb td->td_flags |= TDF_NEEDRESCHED; 691104964Sjeff} 692118972Sjhb 693104964Sjeff/* 694104964Sjeff * charge childs scheduling cpu usage to parent. 695104964Sjeff */ 696104964Sjeffvoid 697132372Sjuliansched_exit(struct proc *p, struct thread *td) 698104964Sjeff{ 699163709Sjb 700163709Sjb CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 701173600Sjulian td, td->td_name, td->td_priority); 702177368Sjeff PROC_LOCK_ASSERT(p, MA_OWNED); 703164936Sjulian sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); 704113356Sjeff} 705113356Sjeff 706113356Sjeffvoid 707164936Sjuliansched_exit_thread(struct thread *td, struct thread *child) 708113356Sjeff{ 709113923Sjhb 710139317Sjeff CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 711173600Sjulian child, child->td_name, child->td_priority); 712170293Sjeff thread_lock(td); 713164936Sjulian td->td_estcpu = ESTCPULIM(td->td_estcpu + child->td_estcpu); 714170293Sjeff thread_unlock(td); 715170293Sjeff mtx_lock_spin(&sched_lock); 716127894Sdfr if ((child->td_proc->p_flag & P_NOLOAD) == 0) 717139317Sjeff sched_load_rem(); 718170293Sjeff mtx_unlock_spin(&sched_lock); 719113356Sjeff} 720109145Sjeff 721113356Sjeffvoid 722134791Sjuliansched_fork(struct thread *td, struct thread *childtd) 723113356Sjeff{ 724134791Sjulian sched_fork_thread(td, childtd); 725113356Sjeff} 726113356Sjeff 727113356Sjeffvoid 728134791Sjuliansched_fork_thread(struct thread *td, struct thread *childtd) 729113356Sjeff{ 730177426Sjeff struct td_sched *ts; 731177426Sjeff 732164936Sjulian childtd->td_estcpu = td->td_estcpu; 733170293Sjeff childtd->td_lock = &sched_lock; 734176750Smarcel childtd->td_cpuset = cpuset_ref(td->td_cpuset); 735177426Sjeff ts = childtd->td_sched; 736177426Sjeff bzero(ts, sizeof(*ts)); 737104964Sjeff} 738104964Sjeff 739104964Sjeffvoid 740130551Sjuliansched_nice(struct proc *p, int nice) 741104964Sjeff{ 742139453Sjhb struct thread *td; 743113873Sjhb 744130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 745130551Sjulian p->p_nice = nice; 746163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 747170293Sjeff thread_lock(td); 748163709Sjb resetpriority(td); 749163709Sjb resetpriority_thread(td); 750170293Sjeff thread_unlock(td); 751163709Sjb } 752104964Sjeff} 753104964Sjeff 754113356Sjeffvoid 755163709Sjbsched_class(struct thread *td, int class) 756113356Sjeff{ 757170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 758163709Sjb td->td_pri_class = class; 759113356Sjeff} 760113356Sjeff 761105127Sjulian/* 762105127Sjulian * Adjust the priority of a thread. 763105127Sjulian */ 764139453Sjhbstatic void 765139453Sjhbsched_priority(struct thread *td, u_char prio) 766104964Sjeff{ 767139317Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 768173600Sjulian td, td->td_name, td->td_priority, prio, curthread, 769173600Sjulian curthread->td_name); 770104964Sjeff 771170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 772139453Sjhb if (td->td_priority == prio) 773139453Sjhb return; 774166188Sjeff td->td_priority = prio; 775177435Sjeff if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) { 776166188Sjeff sched_rem(td); 777166188Sjeff sched_add(td, SRQ_BORING); 778104964Sjeff } 779104964Sjeff} 780104964Sjeff 781139453Sjhb/* 782139453Sjhb * Update a thread's priority when it is lent another thread's 783139453Sjhb * priority. 784139453Sjhb */ 785104964Sjeffvoid 786139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 787139453Sjhb{ 788139453Sjhb 789139453Sjhb td->td_flags |= TDF_BORROWING; 790139453Sjhb sched_priority(td, prio); 791139453Sjhb} 792139453Sjhb 793139453Sjhb/* 794139453Sjhb * Restore a thread's priority when priority propagation is 795139453Sjhb * over. The prio argument is the minimum priority the thread 796139453Sjhb * needs to have to satisfy other possible priority lending 797139453Sjhb * requests. If the thread's regulary priority is less 798139453Sjhb * important than prio the thread will keep a priority boost 799139453Sjhb * of prio. 800139453Sjhb */ 801139453Sjhbvoid 802139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 803139453Sjhb{ 804139453Sjhb u_char base_pri; 805139453Sjhb 806139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 807139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 808163709Sjb base_pri = td->td_user_pri; 809139453Sjhb else 810139453Sjhb base_pri = td->td_base_pri; 811139453Sjhb if (prio >= base_pri) { 812139453Sjhb td->td_flags &= ~TDF_BORROWING; 813139453Sjhb sched_prio(td, base_pri); 814139453Sjhb } else 815139453Sjhb sched_lend_prio(td, prio); 816139453Sjhb} 817139453Sjhb 818139453Sjhbvoid 819139453Sjhbsched_prio(struct thread *td, u_char prio) 820139453Sjhb{ 821139453Sjhb u_char oldprio; 822139453Sjhb 823139453Sjhb /* First, update the base priority. */ 824139453Sjhb td->td_base_pri = prio; 825139453Sjhb 826139453Sjhb /* 827139453Sjhb * If the thread is borrowing another thread's priority, don't ever 828139453Sjhb * lower the priority. 829139453Sjhb */ 830139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 831139453Sjhb return; 832139453Sjhb 833139453Sjhb /* Change the real priority. */ 834139453Sjhb oldprio = td->td_priority; 835139453Sjhb sched_priority(td, prio); 836139453Sjhb 837139453Sjhb /* 838139453Sjhb * If the thread is on a turnstile, then let the turnstile update 839139453Sjhb * its state. 840139453Sjhb */ 841139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 842139453Sjhb turnstile_adjust(td, oldprio); 843139453Sjhb} 844139453Sjhb 845139453Sjhbvoid 846163709Sjbsched_user_prio(struct thread *td, u_char prio) 847161599Sdavidxu{ 848161599Sdavidxu u_char oldprio; 849161599Sdavidxu 850174536Sdavidxu THREAD_LOCK_ASSERT(td, MA_OWNED); 851163709Sjb td->td_base_user_pri = prio; 852164177Sdavidxu if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 853164177Sdavidxu return; 854163709Sjb oldprio = td->td_user_pri; 855163709Sjb td->td_user_pri = prio; 856161599Sdavidxu} 857161599Sdavidxu 858161599Sdavidxuvoid 859161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio) 860161599Sdavidxu{ 861161599Sdavidxu u_char oldprio; 862161599Sdavidxu 863174536Sdavidxu THREAD_LOCK_ASSERT(td, MA_OWNED); 864161599Sdavidxu td->td_flags |= TDF_UBORROWING; 865163709Sjb oldprio = td->td_user_pri; 866163709Sjb td->td_user_pri = prio; 867161599Sdavidxu} 868161599Sdavidxu 869161599Sdavidxuvoid 870161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio) 871161599Sdavidxu{ 872161599Sdavidxu u_char base_pri; 873161599Sdavidxu 874174536Sdavidxu THREAD_LOCK_ASSERT(td, MA_OWNED); 875163709Sjb base_pri = td->td_base_user_pri; 876161599Sdavidxu if (prio >= base_pri) { 877161599Sdavidxu td->td_flags &= ~TDF_UBORROWING; 878163709Sjb sched_user_prio(td, base_pri); 879174536Sdavidxu } else { 880161599Sdavidxu sched_lend_user_prio(td, prio); 881174536Sdavidxu } 882161599Sdavidxu} 883161599Sdavidxu 884161599Sdavidxuvoid 885177085Sjeffsched_sleep(struct thread *td, int pri) 886104964Sjeff{ 887113923Sjhb 888170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 889172264Sjeff td->td_slptick = ticks; 890172264Sjeff td->td_sched->ts_slptime = 0; 891177085Sjeff if (pri) 892177085Sjeff sched_prio(td, pri); 893177085Sjeff if (TD_IS_SUSPENDED(td) || pri <= PSOCK) 894177085Sjeff td->td_flags |= TDF_CANSWAP; 895104964Sjeff} 896104964Sjeff 897104964Sjeffvoid 898135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 899104964Sjeff{ 900164936Sjulian struct td_sched *ts; 901104964Sjeff struct proc *p; 902104964Sjeff 903164936Sjulian ts = td->td_sched; 904104964Sjeff p = td->td_proc; 905104964Sjeff 906170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 907170293Sjeff /* 908170293Sjeff * Switch to the sched lock to fix things up and pick 909170293Sjeff * a new thread. 910170293Sjeff */ 911170293Sjeff if (td->td_lock != &sched_lock) { 912170293Sjeff mtx_lock_spin(&sched_lock); 913170293Sjeff thread_unlock(td); 914170293Sjeff } 915104964Sjeff 916125295Sjeff if ((p->p_flag & P_NOLOAD) == 0) 917139317Sjeff sched_load_rem(); 918135051Sjulian 919138527Sups if (newtd) 920138527Sups newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); 921138527Sups 922113339Sjulian td->td_lastcpu = td->td_oncpu; 923132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 924144777Sups td->td_owepreempt = 0; 925113339Sjulian td->td_oncpu = NOCPU; 926104964Sjeff /* 927104964Sjeff * At the last moment, if this thread is still marked RUNNING, 928104964Sjeff * then put it back on the run queue as it has not been suspended 929131473Sjhb * or stopped or any thing else similar. We never put the idle 930131473Sjhb * threads on the run queue, however. 931104964Sjeff */ 932166415Sjulian if (td->td_flags & TDF_IDLETD) { 933131473Sjhb TD_SET_CAN_RUN(td); 934166415Sjulian#ifdef SMP 935166415Sjulian idle_cpus_mask &= ~PCPU_GET(cpumask); 936166415Sjulian#endif 937166415Sjulian } else { 938134791Sjulian if (TD_IS_RUNNING(td)) { 939164936Sjulian /* Put us back on the run queue. */ 940166188Sjeff sched_add(td, (flags & SW_PREEMPT) ? 941136170Sjulian SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 942136170Sjulian SRQ_OURSELF|SRQ_YIELDING); 943134791Sjulian } 944104964Sjeff } 945136170Sjulian if (newtd) { 946136170Sjulian /* 947136170Sjulian * The thread we are about to run needs to be counted 948136170Sjulian * as if it had been added to the run queue and selected. 949136170Sjulian * It came from: 950136170Sjulian * * A preemption 951136170Sjulian * * An upcall 952136170Sjulian * * A followon 953136170Sjulian */ 954136170Sjulian KASSERT((newtd->td_inhibitors == 0), 955165693Srwatson ("trying to run inhibited thread")); 956177435Sjeff newtd->td_flags |= TDF_DIDRUN; 957136170Sjulian TD_SET_RUNNING(newtd); 958136170Sjulian if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) 959139317Sjeff sched_load_add(); 960136170Sjulian } else { 961131473Sjhb newtd = choosethread(); 962136170Sjulian } 963170293Sjeff MPASS(newtd->td_lock == &sched_lock); 964136170Sjulian 965145256Sjkoshy if (td != newtd) { 966145256Sjkoshy#ifdef HWPMC_HOOKS 967145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 968145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 969145256Sjkoshy#endif 970166415Sjulian /* I feel sleepy */ 971174629Sjeff lock_profile_release_lock(&sched_lock.lock_object); 972179297Sjb#ifdef KDTRACE_HOOKS 973179297Sjb /* 974179297Sjb * If DTrace has set the active vtime enum to anything 975179297Sjb * other than INACTIVE (0), then it should have set the 976179297Sjb * function to call. 977179297Sjb */ 978179297Sjb if (dtrace_vtime_active) 979179297Sjb (*dtrace_vtime_switch_func)(newtd); 980179297Sjb#endif 981179297Sjb 982170358Sjeff cpu_switch(td, newtd, td->td_lock); 983174629Sjeff lock_profile_obtain_lock_success(&sched_lock.lock_object, 984174629Sjeff 0, 0, __FILE__, __LINE__); 985166415Sjulian /* 986166415Sjulian * Where am I? What year is it? 987166415Sjulian * We are in the same thread that went to sleep above, 988166415Sjulian * but any amount of time may have passed. All out context 989166415Sjulian * will still be available as will local variables. 990166415Sjulian * PCPU values however may have changed as we may have 991166415Sjulian * changed CPU so don't trust cached values of them. 992166415Sjulian * New threads will go to fork_exit() instead of here 993166415Sjulian * so if you change things here you may need to change 994166415Sjulian * things there too. 995166415Sjulian * If the thread above was exiting it will never wake 996166415Sjulian * up again here, so either it has saved everything it 997166415Sjulian * needed to, or the thread_wait() or wait() will 998166415Sjulian * need to reap it. 999166415Sjulian */ 1000145256Sjkoshy#ifdef HWPMC_HOOKS 1001145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1002145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1003145256Sjkoshy#endif 1004145256Sjkoshy } 1005145256Sjkoshy 1006166415Sjulian#ifdef SMP 1007166415Sjulian if (td->td_flags & TDF_IDLETD) 1008166415Sjulian idle_cpus_mask |= PCPU_GET(cpumask); 1009166415Sjulian#endif 1010121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1011121128Sjeff td->td_oncpu = PCPU_GET(cpuid); 1012170293Sjeff MPASS(td->td_lock == &sched_lock); 1013104964Sjeff} 1014104964Sjeff 1015104964Sjeffvoid 1016104964Sjeffsched_wakeup(struct thread *td) 1017104964Sjeff{ 1018172264Sjeff struct td_sched *ts; 1019172264Sjeff 1020170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1021172264Sjeff ts = td->td_sched; 1022177085Sjeff td->td_flags &= ~TDF_CANSWAP; 1023172264Sjeff if (ts->ts_slptime > 1) { 1024163709Sjb updatepri(td); 1025163709Sjb resetpriority(td); 1026163709Sjb } 1027172264Sjeff td->td_slptick = ticks; 1028172264Sjeff ts->ts_slptime = 0; 1029166188Sjeff sched_add(td, SRQ_BORING); 1030104964Sjeff} 1031104964Sjeff 1032134693Sjulian#ifdef SMP 1033134688Sjulian/* enable HTT_2 if you have a 2-way HTT cpu.*/ 1034134688Sjulianstatic int 1035134688Sjulianforward_wakeup(int cpunum) 1036134688Sjulian{ 1037134688Sjulian cpumask_t map, me, dontuse; 1038134688Sjulian cpumask_t map2; 1039134688Sjulian struct pcpu *pc; 1040134688Sjulian cpumask_t id, map3; 1041134688Sjulian 1042134688Sjulian mtx_assert(&sched_lock, MA_OWNED); 1043134688Sjulian 1044134791Sjulian CTR0(KTR_RUNQ, "forward_wakeup()"); 1045134688Sjulian 1046134688Sjulian if ((!forward_wakeup_enabled) || 1047134688Sjulian (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0)) 1048134688Sjulian return (0); 1049134688Sjulian if (!smp_started || cold || panicstr) 1050134688Sjulian return (0); 1051134688Sjulian 1052134688Sjulian forward_wakeups_requested++; 1053134688Sjulian 1054134688Sjulian/* 1055134688Sjulian * check the idle mask we received against what we calculated before 1056134688Sjulian * in the old version. 1057134688Sjulian */ 1058134688Sjulian me = PCPU_GET(cpumask); 1059134688Sjulian /* 1060134688Sjulian * don't bother if we should be doing it ourself.. 1061134688Sjulian */ 1062134688Sjulian if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) 1063134688Sjulian return (0); 1064134688Sjulian 1065134688Sjulian dontuse = me | stopped_cpus | hlt_cpus_mask; 1066134688Sjulian map3 = 0; 1067134688Sjulian if (forward_wakeup_use_loop) { 1068134688Sjulian SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 1069134688Sjulian id = pc->pc_cpumask; 1070134688Sjulian if ( (id & dontuse) == 0 && 1071134688Sjulian pc->pc_curthread == pc->pc_idlethread) { 1072134688Sjulian map3 |= id; 1073134688Sjulian } 1074134688Sjulian } 1075134688Sjulian } 1076134688Sjulian 1077134688Sjulian if (forward_wakeup_use_mask) { 1078134688Sjulian map = 0; 1079134688Sjulian map = idle_cpus_mask & ~dontuse; 1080134688Sjulian 1081134688Sjulian /* If they are both on, compare and use loop if different */ 1082134688Sjulian if (forward_wakeup_use_loop) { 1083134688Sjulian if (map != map3) { 1084134688Sjulian printf("map (%02X) != map3 (%02X)\n", 1085134688Sjulian map, map3); 1086134688Sjulian map = map3; 1087134688Sjulian } 1088134688Sjulian } 1089134688Sjulian } else { 1090134688Sjulian map = map3; 1091134688Sjulian } 1092134688Sjulian /* If we only allow a specific CPU, then mask off all the others */ 1093134688Sjulian if (cpunum != NOCPU) { 1094134688Sjulian KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); 1095134688Sjulian map &= (1 << cpunum); 1096134688Sjulian } else { 1097134688Sjulian /* Try choose an idle die. */ 1098134688Sjulian if (forward_wakeup_use_htt) { 1099134688Sjulian map2 = (map & (map >> 1)) & 0x5555; 1100134688Sjulian if (map2) { 1101134688Sjulian map = map2; 1102134688Sjulian } 1103134688Sjulian } 1104134688Sjulian 1105134688Sjulian /* set only one bit */ 1106134688Sjulian if (forward_wakeup_use_single) { 1107134688Sjulian map = map & ((~map) + 1); 1108134688Sjulian } 1109134688Sjulian } 1110134688Sjulian if (map) { 1111134688Sjulian forward_wakeups_delivered++; 1112134688Sjulian ipi_selected(map, IPI_AST); 1113134688Sjulian return (1); 1114134688Sjulian } 1115134688Sjulian if (cpunum == NOCPU) 1116134688Sjulian printf("forward_wakeup: Idle processor not found\n"); 1117134688Sjulian return (0); 1118134688Sjulian} 1119134693Sjulian#endif 1120134688Sjulian 1121147182Sups#ifdef SMP 1122147190Supsstatic void kick_other_cpu(int pri,int cpuid); 1123147182Sups 1124147182Supsstatic void 1125147182Supskick_other_cpu(int pri,int cpuid) 1126147182Sups{ 1127147182Sups struct pcpu * pcpu = pcpu_find(cpuid); 1128147182Sups int cpri = pcpu->pc_curthread->td_priority; 1129147182Sups 1130147182Sups if (idle_cpus_mask & pcpu->pc_cpumask) { 1131147182Sups forward_wakeups_delivered++; 1132147182Sups ipi_selected(pcpu->pc_cpumask, IPI_AST); 1133147182Sups return; 1134147182Sups } 1135147182Sups 1136147182Sups if (pri >= cpri) 1137147182Sups return; 1138147182Sups 1139147182Sups#if defined(IPI_PREEMPTION) && defined(PREEMPTION) 1140147182Sups#if !defined(FULL_PREEMPTION) 1141147182Sups if (pri <= PRI_MAX_ITHD) 1142147182Sups#endif /* ! FULL_PREEMPTION */ 1143147182Sups { 1144147182Sups ipi_selected(pcpu->pc_cpumask, IPI_PREEMPT); 1145147182Sups return; 1146147182Sups } 1147147182Sups#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ 1148147182Sups 1149147182Sups pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; 1150147182Sups ipi_selected( pcpu->pc_cpumask , IPI_AST); 1151147182Sups return; 1152147182Sups} 1153147182Sups#endif /* SMP */ 1154147182Sups 1155104964Sjeffvoid 1156134586Sjuliansched_add(struct thread *td, int flags) 1157147182Sups#ifdef SMP 1158104964Sjeff{ 1159164936Sjulian struct td_sched *ts; 1160134591Sjulian int forwarded = 0; 1161134591Sjulian int cpu; 1162147182Sups int single_cpu = 0; 1163121127Sjeff 1164164936Sjulian ts = td->td_sched; 1165170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1166166188Sjeff KASSERT((td->td_inhibitors == 0), 1167166188Sjeff ("sched_add: trying to run inhibited thread")); 1168166188Sjeff KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 1169166188Sjeff ("sched_add: bad thread state")); 1170172207Sjeff KASSERT(td->td_flags & TDF_INMEM, 1171172207Sjeff ("sched_add: thread swapped out")); 1172139317Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1173173600Sjulian td, td->td_name, td->td_priority, curthread, 1174173600Sjulian curthread->td_name); 1175170293Sjeff /* 1176170293Sjeff * Now that the thread is moving to the run-queue, set the lock 1177170293Sjeff * to the scheduler's lock. 1178170293Sjeff */ 1179170293Sjeff if (td->td_lock != &sched_lock) { 1180170293Sjeff mtx_lock_spin(&sched_lock); 1181170293Sjeff thread_lock_set(td, &sched_lock); 1182170293Sjeff } 1183166188Sjeff TD_SET_RUNQ(td); 1184131481Sjhb 1185147182Sups if (td->td_pinned != 0) { 1186147182Sups cpu = td->td_lastcpu; 1187164936Sjulian ts->ts_runq = &runq_pcpu[cpu]; 1188147182Sups single_cpu = 1; 1189147182Sups CTR3(KTR_RUNQ, 1190164936Sjulian "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu); 1191177435Sjeff } else if ((td)->td_flags & TDF_BOUND) { 1192147182Sups /* Find CPU from bound runq */ 1193164936Sjulian KASSERT(SKE_RUNQ_PCPU(ts),("sched_add: bound td_sched not on cpu runq")); 1194164936Sjulian cpu = ts->ts_runq - &runq_pcpu[0]; 1195147182Sups single_cpu = 1; 1196147182Sups CTR3(KTR_RUNQ, 1197164936Sjulian "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu); 1198147182Sups } else { 1199134591Sjulian CTR2(KTR_RUNQ, 1200164936Sjulian "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts, td); 1201134591Sjulian cpu = NOCPU; 1202164936Sjulian ts->ts_runq = &runq; 1203147182Sups } 1204147182Sups 1205147190Sups if (single_cpu && (cpu != PCPU_GET(cpuid))) { 1206147182Sups kick_other_cpu(td->td_priority,cpu); 1207124955Sjeff } else { 1208147182Sups 1209147190Sups if (!single_cpu) { 1210147182Sups cpumask_t me = PCPU_GET(cpumask); 1211147182Sups int idle = idle_cpus_mask & me; 1212147182Sups 1213147190Sups if (!idle && ((flags & SRQ_INTR) == 0) && 1214147190Sups (idle_cpus_mask & ~(hlt_cpus_mask | me))) 1215147182Sups forwarded = forward_wakeup(cpu); 1216147182Sups } 1217147182Sups 1218147182Sups if (!forwarded) { 1219147190Sups if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td)) 1220147182Sups return; 1221147182Sups else 1222147182Sups maybe_resched(td); 1223147182Sups } 1224124955Sjeff } 1225147182Sups 1226147182Sups if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1227147182Sups sched_load_add(); 1228177435Sjeff runq_add(ts->ts_runq, td, flags); 1229147182Sups} 1230147182Sups#else /* SMP */ 1231147182Sups{ 1232164936Sjulian struct td_sched *ts; 1233164936Sjulian ts = td->td_sched; 1234170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1235166188Sjeff KASSERT((td->td_inhibitors == 0), 1236166188Sjeff ("sched_add: trying to run inhibited thread")); 1237166188Sjeff KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 1238166188Sjeff ("sched_add: bad thread state")); 1239172207Sjeff KASSERT(td->td_flags & TDF_INMEM, 1240172207Sjeff ("sched_add: thread swapped out")); 1241147182Sups CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1242173600Sjulian td, td->td_name, td->td_priority, curthread, 1243173600Sjulian curthread->td_name); 1244170293Sjeff /* 1245170293Sjeff * Now that the thread is moving to the run-queue, set the lock 1246170293Sjeff * to the scheduler's lock. 1247170293Sjeff */ 1248170293Sjeff if (td->td_lock != &sched_lock) { 1249170293Sjeff mtx_lock_spin(&sched_lock); 1250170293Sjeff thread_lock_set(td, &sched_lock); 1251170293Sjeff } 1252166188Sjeff TD_SET_RUNQ(td); 1253164936Sjulian CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td); 1254164936Sjulian ts->ts_runq = &runq; 1255134591Sjulian 1256134591Sjulian /* 1257134591Sjulian * If we are yielding (on the way out anyhow) 1258134591Sjulian * or the thread being saved is US, 1259134591Sjulian * then don't try be smart about preemption 1260134591Sjulian * or kicking off another CPU 1261134591Sjulian * as it won't help and may hinder. 1262134591Sjulian * In the YIEDLING case, we are about to run whoever is 1263134591Sjulian * being put in the queue anyhow, and in the 1264134591Sjulian * OURSELF case, we are puting ourself on the run queue 1265134591Sjulian * which also only happens when we are about to yield. 1266134591Sjulian */ 1267134591Sjulian if((flags & SRQ_YIELDING) == 0) { 1268147182Sups if (maybe_preempt(td)) 1269147182Sups return; 1270147182Sups } 1271125295Sjeff if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1272139317Sjeff sched_load_add(); 1273177435Sjeff runq_add(ts->ts_runq, td, flags); 1274132118Sjhb maybe_resched(td); 1275104964Sjeff} 1276147182Sups#endif /* SMP */ 1277147182Sups 1278104964Sjeffvoid 1279121127Sjeffsched_rem(struct thread *td) 1280104964Sjeff{ 1281164936Sjulian struct td_sched *ts; 1282121127Sjeff 1283164936Sjulian ts = td->td_sched; 1284172207Sjeff KASSERT(td->td_flags & TDF_INMEM, 1285172207Sjeff ("sched_rem: thread swapped out")); 1286166188Sjeff KASSERT(TD_ON_RUNQ(td), 1287164936Sjulian ("sched_rem: thread not on run queue")); 1288104964Sjeff mtx_assert(&sched_lock, MA_OWNED); 1289139317Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1290173600Sjulian td, td->td_name, td->td_priority, curthread, 1291173600Sjulian curthread->td_name); 1292104964Sjeff 1293125295Sjeff if ((td->td_proc->p_flag & P_NOLOAD) == 0) 1294139317Sjeff sched_load_rem(); 1295177435Sjeff runq_remove(ts->ts_runq, td); 1296166188Sjeff TD_SET_CAN_RUN(td); 1297104964Sjeff} 1298104964Sjeff 1299135295Sjulian/* 1300135295Sjulian * Select threads to run. 1301135295Sjulian * Notice that the running threads still consume a slot. 1302135295Sjulian */ 1303166188Sjeffstruct thread * 1304104964Sjeffsched_choose(void) 1305104964Sjeff{ 1306177435Sjeff struct thread *td; 1307124955Sjeff struct runq *rq; 1308104964Sjeff 1309170293Sjeff mtx_assert(&sched_lock, MA_OWNED); 1310124955Sjeff#ifdef SMP 1311177435Sjeff struct thread *tdcpu; 1312124955Sjeff 1313124955Sjeff rq = &runq; 1314177435Sjeff td = runq_choose_fuzz(&runq, runq_fuzz); 1315177435Sjeff tdcpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); 1316104964Sjeff 1317177435Sjeff if (td == NULL || 1318177435Sjeff (tdcpu != NULL && 1319177435Sjeff tdcpu->td_priority < td->td_priority)) { 1320177435Sjeff CTR2(KTR_RUNQ, "choosing td %p from pcpu runq %d", tdcpu, 1321124955Sjeff PCPU_GET(cpuid)); 1322177435Sjeff td = tdcpu; 1323124955Sjeff rq = &runq_pcpu[PCPU_GET(cpuid)]; 1324124955Sjeff } else { 1325177435Sjeff CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", td); 1326124955Sjeff } 1327124955Sjeff 1328124955Sjeff#else 1329124955Sjeff rq = &runq; 1330177435Sjeff td = runq_choose(&runq); 1331124955Sjeff#endif 1332124955Sjeff 1333177435Sjeff if (td) { 1334177435Sjeff runq_remove(rq, td); 1335177435Sjeff td->td_flags |= TDF_DIDRUN; 1336104964Sjeff 1337177435Sjeff KASSERT(td->td_flags & TDF_INMEM, 1338172207Sjeff ("sched_choose: thread swapped out")); 1339177435Sjeff return (td); 1340166188Sjeff } 1341166188Sjeff return (PCPU_GET(idlethread)); 1342104964Sjeff} 1343104964Sjeff 1344104964Sjeffvoid 1345177004Sjeffsched_preempt(struct thread *td) 1346177004Sjeff{ 1347177004Sjeff thread_lock(td); 1348177004Sjeff if (td->td_critnest > 1) 1349177004Sjeff td->td_owepreempt = 1; 1350177004Sjeff else 1351178272Sjeff mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL); 1352177004Sjeff thread_unlock(td); 1353177004Sjeff} 1354177004Sjeff 1355177004Sjeffvoid 1356104964Sjeffsched_userret(struct thread *td) 1357104964Sjeff{ 1358104964Sjeff /* 1359104964Sjeff * XXX we cheat slightly on the locking here to avoid locking in 1360104964Sjeff * the usual case. Setting td_priority here is essentially an 1361104964Sjeff * incomplete workaround for not setting it properly elsewhere. 1362104964Sjeff * Now that some interrupt handlers are threads, not setting it 1363104964Sjeff * properly elsewhere can clobber it in the window between setting 1364104964Sjeff * it here and returning to user mode, so don't waste time setting 1365104964Sjeff * it perfectly here. 1366104964Sjeff */ 1367139453Sjhb KASSERT((td->td_flags & TDF_BORROWING) == 0, 1368139453Sjhb ("thread with borrowed priority returning to userland")); 1369163709Sjb if (td->td_priority != td->td_user_pri) { 1370170293Sjeff thread_lock(td); 1371163709Sjb td->td_priority = td->td_user_pri; 1372163709Sjb td->td_base_pri = td->td_user_pri; 1373170293Sjeff thread_unlock(td); 1374163709Sjb } 1375104964Sjeff} 1376107126Sjeff 1377124955Sjeffvoid 1378124955Sjeffsched_bind(struct thread *td, int cpu) 1379124955Sjeff{ 1380164936Sjulian struct td_sched *ts; 1381124955Sjeff 1382170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1383124955Sjeff KASSERT(TD_IS_RUNNING(td), 1384124955Sjeff ("sched_bind: cannot bind non-running thread")); 1385124955Sjeff 1386164936Sjulian ts = td->td_sched; 1387124955Sjeff 1388177435Sjeff td->td_flags |= TDF_BOUND; 1389124955Sjeff#ifdef SMP 1390164936Sjulian ts->ts_runq = &runq_pcpu[cpu]; 1391124955Sjeff if (PCPU_GET(cpuid) == cpu) 1392124955Sjeff return; 1393124955Sjeff 1394131473Sjhb mi_switch(SW_VOL, NULL); 1395124955Sjeff#endif 1396124955Sjeff} 1397124955Sjeff 1398124955Sjeffvoid 1399124955Sjeffsched_unbind(struct thread* td) 1400124955Sjeff{ 1401170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1402177435Sjeff td->td_flags &= ~TDF_BOUND; 1403124955Sjeff} 1404124955Sjeff 1405107126Sjeffint 1406145256Sjkoshysched_is_bound(struct thread *td) 1407145256Sjkoshy{ 1408170293Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 1409177435Sjeff return (td->td_flags & TDF_BOUND); 1410145256Sjkoshy} 1411145256Sjkoshy 1412159630Sdavidxuvoid 1413159630Sdavidxusched_relinquish(struct thread *td) 1414159630Sdavidxu{ 1415170293Sjeff thread_lock(td); 1416178272Sjeff mi_switch(SW_VOL | SWT_RELINQUISH, NULL); 1417170293Sjeff thread_unlock(td); 1418159630Sdavidxu} 1419159630Sdavidxu 1420145256Sjkoshyint 1421125288Sjeffsched_load(void) 1422125288Sjeff{ 1423125288Sjeff return (sched_tdcnt); 1424125288Sjeff} 1425125288Sjeff 1426125288Sjeffint 1427107126Sjeffsched_sizeof_proc(void) 1428107126Sjeff{ 1429107126Sjeff return (sizeof(struct proc)); 1430107126Sjeff} 1431159630Sdavidxu 1432107126Sjeffint 1433107126Sjeffsched_sizeof_thread(void) 1434107126Sjeff{ 1435164936Sjulian return (sizeof(struct thread) + sizeof(struct td_sched)); 1436107126Sjeff} 1437107137Sjeff 1438107137Sjefffixpt_t 1439121127Sjeffsched_pctcpu(struct thread *td) 1440107137Sjeff{ 1441164936Sjulian struct td_sched *ts; 1442121147Sjeff 1443164936Sjulian ts = td->td_sched; 1444164936Sjulian return (ts->ts_pctcpu); 1445107137Sjeff} 1446159570Sdavidxu 1447159570Sdavidxuvoid 1448159570Sdavidxusched_tick(void) 1449159570Sdavidxu{ 1450159570Sdavidxu} 1451166188Sjeff 1452166188Sjeff/* 1453166188Sjeff * The actual idle process. 1454166188Sjeff */ 1455166188Sjeffvoid 1456166188Sjeffsched_idletd(void *dummy) 1457166188Sjeff{ 1458166188Sjeff 1459166188Sjeff for (;;) { 1460166188Sjeff mtx_assert(&Giant, MA_NOTOWNED); 1461166188Sjeff 1462166188Sjeff while (sched_runnable() == 0) 1463178471Sjeff cpu_idle(0); 1464166188Sjeff 1465166188Sjeff mtx_lock_spin(&sched_lock); 1466178272Sjeff mi_switch(SW_VOL | SWT_IDLE, NULL); 1467166188Sjeff mtx_unlock_spin(&sched_lock); 1468166188Sjeff } 1469166188Sjeff} 1470166188Sjeff 1471170293Sjeff/* 1472170293Sjeff * A CPU is entering for the first time or a thread is exiting. 1473170293Sjeff */ 1474170293Sjeffvoid 1475170293Sjeffsched_throw(struct thread *td) 1476170293Sjeff{ 1477170293Sjeff /* 1478170293Sjeff * Correct spinlock nesting. The idle thread context that we are 1479170293Sjeff * borrowing was created so that it would start out with a single 1480170293Sjeff * spin lock (sched_lock) held in fork_trampoline(). Since we've 1481170293Sjeff * explicitly acquired locks in this function, the nesting count 1482170293Sjeff * is now 2 rather than 1. Since we are nested, calling 1483170293Sjeff * spinlock_exit() will simply adjust the counts without allowing 1484170293Sjeff * spin lock using code to interrupt us. 1485170293Sjeff */ 1486170293Sjeff if (td == NULL) { 1487170293Sjeff mtx_lock_spin(&sched_lock); 1488170293Sjeff spinlock_exit(); 1489170293Sjeff } else { 1490174629Sjeff lock_profile_release_lock(&sched_lock.lock_object); 1491170293Sjeff MPASS(td->td_lock == &sched_lock); 1492170293Sjeff } 1493170293Sjeff mtx_assert(&sched_lock, MA_OWNED); 1494170293Sjeff KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); 1495170293Sjeff PCPU_SET(switchtime, cpu_ticks()); 1496170293Sjeff PCPU_SET(switchticks, ticks); 1497170293Sjeff cpu_throw(td, choosethread()); /* doesn't return */ 1498170293Sjeff} 1499170293Sjeff 1500170293Sjeffvoid 1501170600Sjeffsched_fork_exit(struct thread *td) 1502170293Sjeff{ 1503170293Sjeff 1504170293Sjeff /* 1505170293Sjeff * Finish setting up thread glue so that it begins execution in a 1506170293Sjeff * non-nested critical section with sched_lock held but not recursed. 1507170293Sjeff */ 1508170600Sjeff td->td_oncpu = PCPU_GET(cpuid); 1509170600Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1510174629Sjeff lock_profile_obtain_lock_success(&sched_lock.lock_object, 1511174629Sjeff 0, 0, __FILE__, __LINE__); 1512170600Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); 1513170293Sjeff} 1514170293Sjeff 1515176729Sjeffvoid 1516176729Sjeffsched_affinity(struct thread *td) 1517176729Sjeff{ 1518176729Sjeff} 1519