sched_ule.c revision 165620
1109864Sjeff/*- 2165620Sjeff * Copyright (c) 2002-2006, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 165620 2006-12-29 10:37:07Z jeff $"); 29116182Sobrien 30147565Speter#include "opt_hwpmc_hooks.h" 31147565Speter#include "opt_sched.h" 32134649Sscottl 33109864Sjeff#include <sys/param.h> 34109864Sjeff#include <sys/systm.h> 35131929Smarcel#include <sys/kdb.h> 36109864Sjeff#include <sys/kernel.h> 37109864Sjeff#include <sys/ktr.h> 38109864Sjeff#include <sys/lock.h> 39109864Sjeff#include <sys/mutex.h> 40109864Sjeff#include <sys/proc.h> 41112966Sjeff#include <sys/resource.h> 42122038Sjeff#include <sys/resourcevar.h> 43109864Sjeff#include <sys/sched.h> 44109864Sjeff#include <sys/smp.h> 45109864Sjeff#include <sys/sx.h> 46109864Sjeff#include <sys/sysctl.h> 47109864Sjeff#include <sys/sysproto.h> 48139453Sjhb#include <sys/turnstile.h> 49161599Sdavidxu#include <sys/umtx.h> 50109864Sjeff#include <sys/vmmeter.h> 51109864Sjeff#ifdef KTRACE 52109864Sjeff#include <sys/uio.h> 53109864Sjeff#include <sys/ktrace.h> 54109864Sjeff#endif 55109864Sjeff 56145256Sjkoshy#ifdef HWPMC_HOOKS 57145256Sjkoshy#include <sys/pmckern.h> 58145256Sjkoshy#endif 59145256Sjkoshy 60109864Sjeff#include <machine/cpu.h> 61121790Sjeff#include <machine/smp.h> 62109864Sjeff 63109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 64109864Sjeff/* XXX This is bogus compatability crap for ps */ 65109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 66109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 67109864Sjeff 68109864Sjeffstatic void sched_setup(void *dummy); 69109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 70109864Sjeff 71153533Sdavidxustatic void sched_initticks(void *dummy); 72153533SdavidxuSYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 73153533Sdavidxu 74132589Sscottlstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 75113357Sjeff 76132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 77132589Sscottl "Scheduler name"); 78130881Sscottl 79113357Sjeffstatic int slice_min = 1; 80113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 81113357Sjeff 82116365Sjeffstatic int slice_max = 10; 83113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 84113357Sjeff 85111857Sjeffint realstathz; 86153533Sdavidxuint tickincr = 1 << 10; 87111857Sjeff 88109864Sjeff/* 89164936Sjulian * Thread scheduler specific section. 90146954Sjeff */ 91164936Sjulianstruct td_sched { 92164936Sjulian TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 93164936Sjulian int ts_flags; /* (j) TSF_* flags. */ 94164936Sjulian struct thread *ts_thread; /* (*) Active associated thread. */ 95164936Sjulian fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 96164936Sjulian u_char ts_rqindex; /* (j) Run queue index. */ 97134791Sjulian enum { 98164936Sjulian TSS_THREAD = 0x0, /* slaved to thread state */ 99164936Sjulian TSS_ONRUNQ 100164936Sjulian } ts_state; /* (j) thread sched specific status. */ 101164936Sjulian int ts_slptime; 102164936Sjulian int ts_slice; 103164936Sjulian struct runq *ts_runq; 104164936Sjulian u_char ts_cpu; /* CPU that we have affinity for. */ 105134791Sjulian /* The following variables are only used for pctcpu calculation */ 106164936Sjulian int ts_ltick; /* Last tick that we were running on */ 107164936Sjulian int ts_ftick; /* First tick that we were running on */ 108164936Sjulian int ts_ticks; /* Tick count */ 109134791Sjulian 110163709Sjb /* originally from kg_sched */ 111163709Sjb int skg_slptime; /* Number of ticks we vol. slept */ 112163709Sjb int skg_runtime; /* Number of ticks we were running */ 113134791Sjulian}; 114164936Sjulian#define ts_assign ts_procq.tqe_next 115164936Sjulian/* flags kept in ts_flags */ 116164936Sjulian#define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 117164936Sjulian#define TSF_BOUND 0x0002 /* Thread can not migrate. */ 118164936Sjulian#define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 119164936Sjulian#define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 120164936Sjulian#define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 121164936Sjulian#define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 122164936Sjulian#define TSF_PREEMPTED 0x0040 /* Thread was preempted */ 123165620Sjeff#define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 124165620Sjeff#define TSF_EXIT 0x4000 /* Thread is being killed. */ 125121790Sjeff 126164936Sjulianstatic struct td_sched td_sched0; 127109864Sjeff 128109864Sjeff/* 129116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 130165620Sjeff * give lower(better) priorities to threads that use less CPU. The nice 131116642Sjeff * value is then directly added to this to allow nice to have some effect 132116642Sjeff * on latency. 133111857Sjeff * 134111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 135116642Sjeff * PRI_NRESV: Number of nice values. 136111857Sjeff * PRI_BASE: The start of the dynamic range. 137109864Sjeff */ 138111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 139121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 140121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 141116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 142113357Sjeff#define SCHED_PRI_INTERACT(score) \ 143116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 144109864Sjeff 145109864Sjeff/* 146111857Sjeff * These determine the interactivity of a process. 147109864Sjeff * 148110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 149110645Sjeff * before throttling back. 150121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 151116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 152111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 153109864Sjeff */ 154121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 155121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 156116365Sjeff#define SCHED_INTERACT_MAX (100) 157116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 158121126Sjeff#define SCHED_INTERACT_THRESH (30) 159111857Sjeff 160109864Sjeff/* 161109864Sjeff * These parameters and macros determine the size of the time slice that is 162109864Sjeff * granted to each thread. 163109864Sjeff * 164109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 165109864Sjeff * SLICE_MAX: Maximum time slice granted. 166109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 167112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 168112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 169121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 170109864Sjeff */ 171113357Sjeff#define SCHED_SLICE_MIN (slice_min) 172113357Sjeff#define SCHED_SLICE_MAX (slice_max) 173125299Sjeff#define SCHED_SLICE_INTERACTIVE (slice_max) 174121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 175111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 176109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 177112966Sjeff#define SCHED_SLICE_NICE(nice) \ 178121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 179109864Sjeff 180109864Sjeff/* 181134791Sjulian * This macro determines whether or not the thread belongs on the current or 182109864Sjeff * next run queue. 183109864Sjeff */ 184163709Sjb#define SCHED_INTERACTIVE(td) \ 185163709Sjb (sched_interact_score(td) < SCHED_INTERACT_THRESH) 186164936Sjulian#define SCHED_CURR(td, ts) \ 187164936Sjulian ((ts->ts_thread->td_flags & TDF_BORROWING) || \ 188164936Sjulian (ts->ts_flags & TSF_PREEMPTED) || SCHED_INTERACTIVE(td)) 189109864Sjeff 190109864Sjeff/* 191109864Sjeff * Cpu percentage computation macros and defines. 192109864Sjeff * 193109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 194109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 195109864Sjeff */ 196109864Sjeff 197112971Sjeff#define SCHED_CPU_TIME 10 198109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 199109864Sjeff 200109864Sjeff/* 201164936Sjulian * tdq - per processor runqs and statistics. 202109864Sjeff */ 203164936Sjulianstruct tdq { 204165620Sjeff struct runq tdq_idle; /* Queue of IDLE threads. */ 205165620Sjeff struct runq tdq_timeshare[2]; /* Run queues for !IDLE. */ 206165620Sjeff struct runq *tdq_next; /* Next timeshare queue. */ 207165620Sjeff struct runq *tdq_curr; /* Current queue. */ 208165620Sjeff int tdq_load_timeshare; /* Load for timeshare. */ 209165620Sjeff int tdq_load; /* Aggregate load. */ 210165620Sjeff short tdq_nice[SCHED_PRI_NRESV]; /* threadss in each nice bin. */ 211165620Sjeff short tdq_nicemin; /* Least nice. */ 212110267Sjeff#ifdef SMP 213165620Sjeff int tdq_transferable; 214165620Sjeff LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 215165620Sjeff struct tdq_group *tdq_group; /* Our processor group. */ 216165620Sjeff volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 217125289Sjeff#else 218165620Sjeff int tdq_sysload; /* For loadavg, !ITHD load. */ 219110267Sjeff#endif 220109864Sjeff}; 221109864Sjeff 222123433Sjeff#ifdef SMP 223109864Sjeff/* 224164936Sjulian * tdq groups are groups of processors which can cheaply share threads. When 225123433Sjeff * one processor in the group goes idle it will check the runqs of the other 226123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 227123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 228123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 229123433Sjeff * load balancer. 230123433Sjeff */ 231164936Sjulianstruct tdq_group { 232165620Sjeff int tdg_cpus; /* Count of CPUs in this tdq group. */ 233165620Sjeff cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 234165620Sjeff cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 235165620Sjeff cpumask_t tdg_mask; /* Bit mask for first cpu. */ 236165620Sjeff int tdg_load; /* Total load of this group. */ 237165620Sjeff int tdg_transferable; /* Transferable load of this group. */ 238165620Sjeff LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 239123433Sjeff}; 240123433Sjeff#endif 241123433Sjeff 242123433Sjeff/* 243165620Sjeff * One thread queue per processor. 244109864Sjeff */ 245110028Sjeff#ifdef SMP 246164936Sjulianstatic cpumask_t tdq_idle; 247165620Sjeffstatic int tdg_maxid; 248164936Sjulianstatic struct tdq tdq_cpu[MAXCPU]; 249164936Sjulianstatic struct tdq_group tdq_groups[MAXCPU]; 250129982Sjeffstatic int bal_tick; 251129982Sjeffstatic int gbal_tick; 252139334Sjeffstatic int balance_groups; 253129982Sjeff 254164936Sjulian#define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 255164936Sjulian#define TDQ_CPU(x) (&tdq_cpu[(x)]) 256164936Sjulian#define TDQ_ID(x) ((x) - tdq_cpu) 257164936Sjulian#define TDQ_GROUP(x) (&tdq_groups[(x)]) 258123433Sjeff#else /* !SMP */ 259164936Sjulianstatic struct tdq tdq_cpu; 260129982Sjeff 261164936Sjulian#define TDQ_SELF() (&tdq_cpu) 262164936Sjulian#define TDQ_CPU(x) (&tdq_cpu) 263110028Sjeff#endif 264109864Sjeff 265164936Sjulianstatic struct td_sched *sched_choose(void); /* XXX Should be thread * */ 266164936Sjulianstatic void sched_slice(struct td_sched *); 267163709Sjbstatic void sched_priority(struct thread *); 268146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 269163709Sjbstatic int sched_interact_score(struct thread *); 270163709Sjbstatic void sched_interact_update(struct thread *); 271163709Sjbstatic void sched_interact_fork(struct thread *); 272164936Sjulianstatic void sched_pctcpu_update(struct td_sched *); 273109864Sjeff 274110267Sjeff/* Operations on per processor queues */ 275164936Sjulianstatic struct td_sched * tdq_choose(struct tdq *); 276164936Sjulianstatic void tdq_setup(struct tdq *); 277164936Sjulianstatic void tdq_load_add(struct tdq *, struct td_sched *); 278164936Sjulianstatic void tdq_load_rem(struct tdq *, struct td_sched *); 279164936Sjulianstatic __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 280164936Sjulianstatic __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 281164936Sjulianstatic void tdq_nice_add(struct tdq *, int); 282164936Sjulianstatic void tdq_nice_rem(struct tdq *, int); 283164936Sjulianvoid tdq_print(int cpu); 284110267Sjeff#ifdef SMP 285164936Sjulianstatic int tdq_transfer(struct tdq *, struct td_sched *, int); 286164936Sjulianstatic struct td_sched *runq_steal(struct runq *); 287129982Sjeffstatic void sched_balance(void); 288129982Sjeffstatic void sched_balance_groups(void); 289164936Sjulianstatic void sched_balance_group(struct tdq_group *); 290164936Sjulianstatic void sched_balance_pair(struct tdq *, struct tdq *); 291164936Sjulianstatic void tdq_move(struct tdq *, int); 292164936Sjulianstatic int tdq_idled(struct tdq *); 293164936Sjulianstatic void tdq_notify(struct td_sched *, int); 294164936Sjulianstatic void tdq_assign(struct tdq *); 295164936Sjulianstatic struct td_sched *tdq_steal(struct tdq *, int); 296164936Sjulian#define THREAD_CAN_MIGRATE(ts) \ 297164936Sjulian ((ts)->ts_thread->td_pinned == 0 && ((ts)->ts_flags & TSF_BOUND) == 0) 298121790Sjeff#endif 299110028Sjeff 300113357Sjeffvoid 301164936Sjuliantdq_print(int cpu) 302110267Sjeff{ 303164936Sjulian struct tdq *tdq; 304113357Sjeff int i; 305112994Sjeff 306164936Sjulian tdq = TDQ_CPU(cpu); 307112994Sjeff 308164936Sjulian printf("tdq:\n"); 309165620Sjeff printf("\tload: %d\n", tdq->tdq_load); 310165620Sjeff printf("\tload TIMESHARE: %d\n", tdq->tdq_load_timeshare); 311121896Sjeff#ifdef SMP 312165620Sjeff printf("\tload transferable: %d\n", tdq->tdq_transferable); 313121896Sjeff#endif 314165620Sjeff printf("\tnicemin:\t%d\n", tdq->tdq_nicemin); 315113357Sjeff printf("\tnice counts:\n"); 316121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 317165620Sjeff if (tdq->tdq_nice[i]) 318113357Sjeff printf("\t\t%d = %d\n", 319165620Sjeff i - SCHED_PRI_NHALF, tdq->tdq_nice[i]); 320113357Sjeff} 321112994Sjeff 322122744Sjeffstatic __inline void 323164936Sjuliantdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 324122744Sjeff{ 325122744Sjeff#ifdef SMP 326164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 327165620Sjeff tdq->tdq_transferable++; 328165620Sjeff tdq->tdq_group->tdg_transferable++; 329164936Sjulian ts->ts_flags |= TSF_XFERABLE; 330123433Sjeff } 331122744Sjeff#endif 332164936Sjulian if (ts->ts_flags & TSF_PREEMPTED) 333148856Sdavidxu flags |= SRQ_PREEMPTED; 334164936Sjulian runq_add(ts->ts_runq, ts, flags); 335122744Sjeff} 336122744Sjeff 337122744Sjeffstatic __inline void 338164936Sjuliantdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 339122744Sjeff{ 340122744Sjeff#ifdef SMP 341164936Sjulian if (ts->ts_flags & TSF_XFERABLE) { 342165620Sjeff tdq->tdq_transferable--; 343165620Sjeff tdq->tdq_group->tdg_transferable--; 344164936Sjulian ts->ts_flags &= ~TSF_XFERABLE; 345123433Sjeff } 346122744Sjeff#endif 347164936Sjulian runq_remove(ts->ts_runq, ts); 348122744Sjeff} 349122744Sjeff 350113357Sjeffstatic void 351164936Sjuliantdq_load_add(struct tdq *tdq, struct td_sched *ts) 352113357Sjeff{ 353121896Sjeff int class; 354115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 355164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 356121896Sjeff if (class == PRI_TIMESHARE) 357165620Sjeff tdq->tdq_load_timeshare++; 358165620Sjeff tdq->tdq_load++; 359165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 360164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 361123487Sjeff#ifdef SMP 362165620Sjeff tdq->tdq_group->tdg_load++; 363125289Sjeff#else 364165620Sjeff tdq->tdq_sysload++; 365123487Sjeff#endif 366164936Sjulian if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 367164936Sjulian tdq_nice_add(tdq, ts->ts_thread->td_proc->p_nice); 368110267Sjeff} 369113357Sjeff 370112994Sjeffstatic void 371164936Sjuliantdq_load_rem(struct tdq *tdq, struct td_sched *ts) 372110267Sjeff{ 373121896Sjeff int class; 374115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 375164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 376121896Sjeff if (class == PRI_TIMESHARE) 377165620Sjeff tdq->tdq_load_timeshare--; 378164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 379123487Sjeff#ifdef SMP 380165620Sjeff tdq->tdq_group->tdg_load--; 381125289Sjeff#else 382165620Sjeff tdq->tdq_sysload--; 383123487Sjeff#endif 384165620Sjeff tdq->tdq_load--; 385165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 386164936Sjulian ts->ts_runq = NULL; 387164936Sjulian if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 388164936Sjulian tdq_nice_rem(tdq, ts->ts_thread->td_proc->p_nice); 389110267Sjeff} 390110267Sjeff 391113357Sjeffstatic void 392164936Sjuliantdq_nice_add(struct tdq *tdq, int nice) 393110267Sjeff{ 394115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 395113357Sjeff /* Normalize to zero. */ 396165620Sjeff tdq->tdq_nice[nice + SCHED_PRI_NHALF]++; 397165620Sjeff if (nice < tdq->tdq_nicemin || tdq->tdq_load_timeshare == 1) 398165620Sjeff tdq->tdq_nicemin = nice; 399110267Sjeff} 400110267Sjeff 401113357Sjeffstatic void 402164936Sjuliantdq_nice_rem(struct tdq *tdq, int nice) 403110267Sjeff{ 404113357Sjeff int n; 405113357Sjeff 406115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 407113357Sjeff /* Normalize to zero. */ 408113357Sjeff n = nice + SCHED_PRI_NHALF; 409165620Sjeff tdq->tdq_nice[n]--; 410165620Sjeff KASSERT(tdq->tdq_nice[n] >= 0, ("Negative nice count.")); 411113357Sjeff 412113357Sjeff /* 413113357Sjeff * If this wasn't the smallest nice value or there are more in 414113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 415113357Sjeff * the smallest nice. 416113357Sjeff */ 417165620Sjeff if (nice != tdq->tdq_nicemin || 418165620Sjeff tdq->tdq_nice[n] != 0 || 419165620Sjeff tdq->tdq_load_timeshare == 0) 420113357Sjeff return; 421113357Sjeff 422121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 423165620Sjeff if (tdq->tdq_nice[n]) { 424165620Sjeff tdq->tdq_nicemin = n - SCHED_PRI_NHALF; 425113357Sjeff return; 426113357Sjeff } 427110267Sjeff} 428110267Sjeff 429113357Sjeff#ifdef SMP 430116069Sjeff/* 431122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 432116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 433116069Sjeff * by migrating some processes. 434116069Sjeff * 435116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 436116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 437116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 438116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 439116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 440116069Sjeff * 441116069Sjeff * It could be improved by considering the priorities and slices assigned to 442116069Sjeff * each task prior to balancing them. There are many pathological cases with 443116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 444116069Sjeff * 445116069Sjeff */ 446121790Sjeffstatic void 447129982Sjeffsched_balance(void) 448116069Sjeff{ 449164936Sjulian struct tdq_group *high; 450164936Sjulian struct tdq_group *low; 451165620Sjeff struct tdq_group *tdg; 452123487Sjeff int cnt; 453123487Sjeff int i; 454123487Sjeff 455139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 456123487Sjeff if (smp_started == 0) 457139334Sjeff return; 458123487Sjeff low = high = NULL; 459165620Sjeff i = random() % (tdg_maxid + 1); 460165620Sjeff for (cnt = 0; cnt <= tdg_maxid; cnt++) { 461165620Sjeff tdg = TDQ_GROUP(i); 462123487Sjeff /* 463123487Sjeff * Find the CPU with the highest load that has some 464123487Sjeff * threads to transfer. 465123487Sjeff */ 466165620Sjeff if ((high == NULL || tdg->tdg_load > high->tdg_load) 467165620Sjeff && tdg->tdg_transferable) 468165620Sjeff high = tdg; 469165620Sjeff if (low == NULL || tdg->tdg_load < low->tdg_load) 470165620Sjeff low = tdg; 471165620Sjeff if (++i > tdg_maxid) 472123487Sjeff i = 0; 473123487Sjeff } 474123487Sjeff if (low != NULL && high != NULL && high != low) 475165620Sjeff sched_balance_pair(LIST_FIRST(&high->tdg_members), 476165620Sjeff LIST_FIRST(&low->tdg_members)); 477123487Sjeff} 478123487Sjeff 479123487Sjeffstatic void 480129982Sjeffsched_balance_groups(void) 481123487Sjeff{ 482123487Sjeff int i; 483123487Sjeff 484139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 485129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 486123487Sjeff if (smp_started) 487165620Sjeff for (i = 0; i <= tdg_maxid; i++) 488164936Sjulian sched_balance_group(TDQ_GROUP(i)); 489123487Sjeff} 490123487Sjeff 491123487Sjeffstatic void 492165620Sjeffsched_balance_group(struct tdq_group *tdg) 493123487Sjeff{ 494164936Sjulian struct tdq *tdq; 495164936Sjulian struct tdq *high; 496164936Sjulian struct tdq *low; 497123487Sjeff int load; 498123487Sjeff 499165620Sjeff if (tdg->tdg_transferable == 0) 500123487Sjeff return; 501123487Sjeff low = NULL; 502123487Sjeff high = NULL; 503165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 504165620Sjeff load = tdq->tdq_load; 505165620Sjeff if (high == NULL || load > high->tdq_load) 506164936Sjulian high = tdq; 507165620Sjeff if (low == NULL || load < low->tdq_load) 508164936Sjulian low = tdq; 509123487Sjeff } 510123487Sjeff if (high != NULL && low != NULL && high != low) 511123487Sjeff sched_balance_pair(high, low); 512123487Sjeff} 513123487Sjeff 514123487Sjeffstatic void 515164936Sjuliansched_balance_pair(struct tdq *high, struct tdq *low) 516123487Sjeff{ 517123433Sjeff int transferable; 518116069Sjeff int high_load; 519116069Sjeff int low_load; 520116069Sjeff int move; 521116069Sjeff int diff; 522116069Sjeff int i; 523116069Sjeff 524116069Sjeff /* 525123433Sjeff * If we're transfering within a group we have to use this specific 526164936Sjulian * tdq's transferable count, otherwise we can steal from other members 527123433Sjeff * of the group. 528123433Sjeff */ 529165620Sjeff if (high->tdq_group == low->tdq_group) { 530165620Sjeff transferable = high->tdq_transferable; 531165620Sjeff high_load = high->tdq_load; 532165620Sjeff low_load = low->tdq_load; 533123487Sjeff } else { 534165620Sjeff transferable = high->tdq_group->tdg_transferable; 535165620Sjeff high_load = high->tdq_group->tdg_load; 536165620Sjeff low_load = low->tdq_group->tdg_load; 537123487Sjeff } 538123433Sjeff if (transferable == 0) 539123487Sjeff return; 540123433Sjeff /* 541122744Sjeff * Determine what the imbalance is and then adjust that to how many 542165620Sjeff * threads we actually have to give up (transferable). 543122744Sjeff */ 544123487Sjeff diff = high_load - low_load; 545116069Sjeff move = diff / 2; 546116069Sjeff if (diff & 0x1) 547116069Sjeff move++; 548123433Sjeff move = min(move, transferable); 549116069Sjeff for (i = 0; i < move; i++) 550164936Sjulian tdq_move(high, TDQ_ID(low)); 551116069Sjeff return; 552116069Sjeff} 553116069Sjeff 554121790Sjeffstatic void 555164936Sjuliantdq_move(struct tdq *from, int cpu) 556116069Sjeff{ 557164936Sjulian struct tdq *tdq; 558164936Sjulian struct tdq *to; 559164936Sjulian struct td_sched *ts; 560116069Sjeff 561164936Sjulian tdq = from; 562164936Sjulian to = TDQ_CPU(cpu); 563164936Sjulian ts = tdq_steal(tdq, 1); 564164936Sjulian if (ts == NULL) { 565165620Sjeff struct tdq_group *tdg; 566123433Sjeff 567165620Sjeff tdg = tdq->tdq_group; 568165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 569165620Sjeff if (tdq == from || tdq->tdq_transferable == 0) 570123433Sjeff continue; 571164936Sjulian ts = tdq_steal(tdq, 1); 572123433Sjeff break; 573123433Sjeff } 574164936Sjulian if (ts == NULL) 575164936Sjulian panic("tdq_move: No threads available with a " 576123433Sjeff "transferable count of %d\n", 577165620Sjeff tdg->tdg_transferable); 578123433Sjeff } 579164936Sjulian if (tdq == to) 580123433Sjeff return; 581164936Sjulian ts->ts_state = TSS_THREAD; 582164936Sjulian tdq_runq_rem(tdq, ts); 583164936Sjulian tdq_load_rem(tdq, ts); 584164936Sjulian tdq_notify(ts, cpu); 585116069Sjeff} 586110267Sjeff 587123433Sjeffstatic int 588164936Sjuliantdq_idled(struct tdq *tdq) 589121790Sjeff{ 590165620Sjeff struct tdq_group *tdg; 591164936Sjulian struct tdq *steal; 592164936Sjulian struct td_sched *ts; 593123433Sjeff 594165620Sjeff tdg = tdq->tdq_group; 595123433Sjeff /* 596165620Sjeff * If we're in a cpu group, try and steal threads from another cpu in 597123433Sjeff * the group before idling. 598123433Sjeff */ 599165620Sjeff if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 600165620Sjeff LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 601165620Sjeff if (steal == tdq || steal->tdq_transferable == 0) 602123433Sjeff continue; 603164936Sjulian ts = tdq_steal(steal, 0); 604164936Sjulian if (ts == NULL) 605123433Sjeff continue; 606164936Sjulian ts->ts_state = TSS_THREAD; 607164936Sjulian tdq_runq_rem(steal, ts); 608164936Sjulian tdq_load_rem(steal, ts); 609164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 610164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 611164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 612123433Sjeff return (0); 613123433Sjeff } 614123433Sjeff } 615123433Sjeff /* 616123433Sjeff * We only set the idled bit when all of the cpus in the group are 617164936Sjulian * idle. Otherwise we could get into a situation where a thread bounces 618123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 619123433Sjeff */ 620165620Sjeff tdg->tdg_idlemask |= PCPU_GET(cpumask); 621165620Sjeff if (tdg->tdg_idlemask != tdg->tdg_cpumask) 622123433Sjeff return (1); 623165620Sjeff atomic_set_int(&tdq_idle, tdg->tdg_mask); 624123433Sjeff return (1); 625121790Sjeff} 626121790Sjeff 627121790Sjeffstatic void 628164936Sjuliantdq_assign(struct tdq *tdq) 629121790Sjeff{ 630164936Sjulian struct td_sched *nts; 631164936Sjulian struct td_sched *ts; 632121790Sjeff 633121790Sjeff do { 634165620Sjeff *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 635165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 636164936Sjulian (uintptr_t)ts, (uintptr_t)NULL)); 637164936Sjulian for (; ts != NULL; ts = nts) { 638164936Sjulian nts = ts->ts_assign; 639165620Sjeff tdq->tdq_group->tdg_load--; 640165620Sjeff tdq->tdq_load--; 641164936Sjulian ts->ts_flags &= ~TSF_ASSIGNED; 642164936Sjulian if (ts->ts_flags & TSF_REMOVED) { 643164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 644148603Sdavidxu continue; 645148603Sdavidxu } 646164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 647164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 648121790Sjeff } 649121790Sjeff} 650121790Sjeff 651121790Sjeffstatic void 652164936Sjuliantdq_notify(struct td_sched *ts, int cpu) 653121790Sjeff{ 654164936Sjulian struct tdq *tdq; 655121790Sjeff struct thread *td; 656121790Sjeff struct pcpu *pcpu; 657139334Sjeff int class; 658133427Sjeff int prio; 659121790Sjeff 660164936Sjulian tdq = TDQ_CPU(cpu); 661139334Sjeff /* XXX */ 662164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 663139334Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 664165620Sjeff (tdq_idle & tdq->tdq_group->tdg_mask)) 665165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 666165620Sjeff tdq->tdq_group->tdg_load++; 667165620Sjeff tdq->tdq_load++; 668164936Sjulian ts->ts_cpu = cpu; 669164936Sjulian ts->ts_flags |= TSF_ASSIGNED; 670164936Sjulian prio = ts->ts_thread->td_priority; 671121790Sjeff 672121790Sjeff /* 673164936Sjulian * Place a thread on another cpu's queue and force a resched. 674121790Sjeff */ 675121790Sjeff do { 676165620Sjeff *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 677165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 678164936Sjulian (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 679133427Sjeff /* 680133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 681133427Sjeff * on a thread that is switched out before the IPI is delivered. This 682133427Sjeff * would lead us to miss the resched. This will be a problem once 683133427Sjeff * sched_lock is pushed down. 684133427Sjeff */ 685121790Sjeff pcpu = pcpu_find(cpu); 686121790Sjeff td = pcpu->pc_curthread; 687164936Sjulian if (ts->ts_thread->td_priority < td->td_priority || 688121790Sjeff td == pcpu->pc_idlethread) { 689121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 690121790Sjeff ipi_selected(1 << cpu, IPI_AST); 691121790Sjeff } 692121790Sjeff} 693121790Sjeff 694164936Sjulianstatic struct td_sched * 695121790Sjeffrunq_steal(struct runq *rq) 696121790Sjeff{ 697121790Sjeff struct rqhead *rqh; 698121790Sjeff struct rqbits *rqb; 699164936Sjulian struct td_sched *ts; 700121790Sjeff int word; 701121790Sjeff int bit; 702121790Sjeff 703121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 704121790Sjeff rqb = &rq->rq_status; 705121790Sjeff for (word = 0; word < RQB_LEN; word++) { 706121790Sjeff if (rqb->rqb_bits[word] == 0) 707121790Sjeff continue; 708121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 709123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 710121790Sjeff continue; 711121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 712164936Sjulian TAILQ_FOREACH(ts, rqh, ts_procq) { 713164936Sjulian if (THREAD_CAN_MIGRATE(ts)) 714164936Sjulian return (ts); 715121790Sjeff } 716121790Sjeff } 717121790Sjeff } 718121790Sjeff return (NULL); 719121790Sjeff} 720121790Sjeff 721164936Sjulianstatic struct td_sched * 722164936Sjuliantdq_steal(struct tdq *tdq, int stealidle) 723121790Sjeff{ 724164936Sjulian struct td_sched *ts; 725121790Sjeff 726123433Sjeff /* 727123433Sjeff * Steal from next first to try to get a non-interactive task that 728123433Sjeff * may not have run for a while. 729123433Sjeff */ 730165620Sjeff if ((ts = runq_steal(tdq->tdq_next)) != NULL) 731164936Sjulian return (ts); 732165620Sjeff if ((ts = runq_steal(tdq->tdq_curr)) != NULL) 733164936Sjulian return (ts); 734123433Sjeff if (stealidle) 735165620Sjeff return (runq_steal(&tdq->tdq_idle)); 736123433Sjeff return (NULL); 737121790Sjeff} 738123433Sjeff 739123433Sjeffint 740164936Sjuliantdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 741123433Sjeff{ 742165620Sjeff struct tdq_group *ntdg; 743165620Sjeff struct tdq_group *tdg; 744164936Sjulian struct tdq *old; 745123433Sjeff int cpu; 746139334Sjeff int idx; 747123433Sjeff 748123685Sjeff if (smp_started == 0) 749123685Sjeff return (0); 750123433Sjeff cpu = 0; 751123433Sjeff /* 752133427Sjeff * If our load exceeds a certain threshold we should attempt to 753133427Sjeff * reassign this thread. The first candidate is the cpu that 754133427Sjeff * originally ran the thread. If it is idle, assign it there, 755133427Sjeff * otherwise, pick an idle cpu. 756133427Sjeff * 757165620Sjeff * The threshold at which we start to reassign has a large impact 758123685Sjeff * on the overall performance of the system. Tuned too high and 759123685Sjeff * some CPUs may idle. Too low and there will be excess migration 760128055Scognet * and context switches. 761123685Sjeff */ 762164936Sjulian old = TDQ_CPU(ts->ts_cpu); 763165620Sjeff ntdg = old->tdq_group; 764165620Sjeff tdg = tdq->tdq_group; 765164936Sjulian if (tdq_idle) { 766165620Sjeff if (tdq_idle & ntdg->tdg_mask) { 767165620Sjeff cpu = ffs(ntdg->tdg_idlemask); 768139334Sjeff if (cpu) { 769139334Sjeff CTR2(KTR_SCHED, 770164936Sjulian "tdq_transfer: %p found old cpu %X " 771164936Sjulian "in idlemask.", ts, cpu); 772133427Sjeff goto migrate; 773139334Sjeff } 774133427Sjeff } 775123433Sjeff /* 776123433Sjeff * Multiple cpus could find this bit simultaneously 777123433Sjeff * but the race shouldn't be terrible. 778123433Sjeff */ 779164936Sjulian cpu = ffs(tdq_idle); 780139334Sjeff if (cpu) { 781164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 782164936Sjulian "in idlemask.", ts, cpu); 783133427Sjeff goto migrate; 784139334Sjeff } 785123433Sjeff } 786139334Sjeff idx = 0; 787139334Sjeff#if 0 788165620Sjeff if (old->tdq_load < tdq->tdq_load) { 789164936Sjulian cpu = ts->ts_cpu + 1; 790164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 791164936Sjulian "load less than ours.", ts, cpu); 792139334Sjeff goto migrate; 793139334Sjeff } 794123433Sjeff /* 795139334Sjeff * No new CPU was found, look for one with less load. 796139334Sjeff */ 797165620Sjeff for (idx = 0; idx <= tdg_maxid; idx++) { 798165620Sjeff ntdg = TDQ_GROUP(idx); 799165620Sjeff if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 800165620Sjeff cpu = ffs(ntdg->tdg_cpumask); 801164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 802164936Sjulian "than ours.", ts, cpu); 803139334Sjeff goto migrate; 804139334Sjeff } 805139334Sjeff } 806139334Sjeff#endif 807139334Sjeff /* 808123433Sjeff * If another cpu in this group has idled, assign a thread over 809123433Sjeff * to them after checking to see if there are idled groups. 810123433Sjeff */ 811165620Sjeff if (tdg->tdg_idlemask) { 812165620Sjeff cpu = ffs(tdg->tdg_idlemask); 813139334Sjeff if (cpu) { 814164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 815164936Sjulian "group.", ts, cpu); 816133427Sjeff goto migrate; 817139334Sjeff } 818123433Sjeff } 819133427Sjeff return (0); 820133427Sjeffmigrate: 821133427Sjeff /* 822123433Sjeff * Now that we've found an idle CPU, migrate the thread. 823123433Sjeff */ 824133427Sjeff cpu--; 825164936Sjulian ts->ts_runq = NULL; 826164936Sjulian tdq_notify(ts, cpu); 827133427Sjeff 828133427Sjeff return (1); 829123433Sjeff} 830123433Sjeff 831121790Sjeff#endif /* SMP */ 832121790Sjeff 833117326Sjeff/* 834121790Sjeff * Pick the highest priority task we have and return it. 835117326Sjeff */ 836117326Sjeff 837164936Sjulianstatic struct td_sched * 838164936Sjuliantdq_choose(struct tdq *tdq) 839110267Sjeff{ 840137067Sjeff struct runq *swap; 841164936Sjulian struct td_sched *ts; 842137067Sjeff int nice; 843110267Sjeff 844115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 845113357Sjeff swap = NULL; 846112994Sjeff 847113357Sjeff for (;;) { 848165620Sjeff ts = runq_choose(tdq->tdq_curr); 849164936Sjulian if (ts == NULL) { 850113357Sjeff /* 851131473Sjhb * We already swapped once and didn't get anywhere. 852113357Sjeff */ 853113357Sjeff if (swap) 854113357Sjeff break; 855165620Sjeff swap = tdq->tdq_curr; 856165620Sjeff tdq->tdq_curr = tdq->tdq_next; 857165620Sjeff tdq->tdq_next = swap; 858113357Sjeff continue; 859113357Sjeff } 860113357Sjeff /* 861164936Sjulian * If we encounter a slice of 0 the td_sched is in a 862164936Sjulian * TIMESHARE td_sched group and its nice was too far out 863113357Sjeff * of the range that receives slices. 864113357Sjeff */ 865165620Sjeff nice = ts->ts_thread->td_proc->p_nice + (0 - tdq->tdq_nicemin); 866150442Sdavidxu#if 0 867164936Sjulian if (ts->ts_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 868164936Sjulian ts->ts_thread->td_proc->p_nice != 0)) { 869164936Sjulian runq_remove(ts->ts_runq, ts); 870164936Sjulian sched_slice(ts); 871165620Sjeff ts->ts_runq = tdq->tdq_next; 872164936Sjulian runq_add(ts->ts_runq, ts, 0); 873113357Sjeff continue; 874113357Sjeff } 875150442Sdavidxu#endif 876164936Sjulian return (ts); 877110267Sjeff } 878110267Sjeff 879165620Sjeff return (runq_choose(&tdq->tdq_idle)); 880110267Sjeff} 881110267Sjeff 882109864Sjeffstatic void 883164936Sjuliantdq_setup(struct tdq *tdq) 884110028Sjeff{ 885165620Sjeff runq_init(&tdq->tdq_timeshare[0]); 886165620Sjeff runq_init(&tdq->tdq_timeshare[1]); 887165620Sjeff runq_init(&tdq->tdq_idle); 888165620Sjeff tdq->tdq_curr = &tdq->tdq_timeshare[0]; 889165620Sjeff tdq->tdq_next = &tdq->tdq_timeshare[1]; 890165620Sjeff tdq->tdq_load = 0; 891165620Sjeff tdq->tdq_load_timeshare = 0; 892110028Sjeff} 893110028Sjeff 894110028Sjeffstatic void 895109864Sjeffsched_setup(void *dummy) 896109864Sjeff{ 897117313Sjeff#ifdef SMP 898109864Sjeff int i; 899117313Sjeff#endif 900109864Sjeff 901153533Sdavidxu /* 902153533Sdavidxu * To avoid divide-by-zero, we set realstathz a dummy value 903153533Sdavidxu * in case which sched_clock() called before sched_initticks(). 904153533Sdavidxu */ 905153533Sdavidxu realstathz = hz; 906116946Sjeff slice_min = (hz/100); /* 10ms */ 907116946Sjeff slice_max = (hz/7); /* ~140ms */ 908111857Sjeff 909117237Sjeff#ifdef SMP 910123487Sjeff balance_groups = 0; 911123433Sjeff /* 912164936Sjulian * Initialize the tdqs. 913123433Sjeff */ 914123433Sjeff for (i = 0; i < MAXCPU; i++) { 915164936Sjulian struct tdq *ksq; 916123433Sjeff 917164936Sjulian ksq = &tdq_cpu[i]; 918165620Sjeff ksq->tdq_assigned = NULL; 919164936Sjulian tdq_setup(&tdq_cpu[i]); 920123433Sjeff } 921117237Sjeff if (smp_topology == NULL) { 922165620Sjeff struct tdq_group *tdg; 923164936Sjulian struct tdq *ksq; 924139334Sjeff int cpus; 925123433Sjeff 926139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 927139334Sjeff if (CPU_ABSENT(i)) 928139334Sjeff continue; 929164936Sjulian ksq = &tdq_cpu[i]; 930165620Sjeff tdg = &tdq_groups[cpus]; 931123433Sjeff /* 932164936Sjulian * Setup a tdq group with one member. 933123433Sjeff */ 934165620Sjeff ksq->tdq_transferable = 0; 935165620Sjeff ksq->tdq_group = tdg; 936165620Sjeff tdg->tdg_cpus = 1; 937165620Sjeff tdg->tdg_idlemask = 0; 938165620Sjeff tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 939165620Sjeff tdg->tdg_load = 0; 940165620Sjeff tdg->tdg_transferable = 0; 941165620Sjeff LIST_INIT(&tdg->tdg_members); 942165620Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, ksq, tdq_siblings); 943139334Sjeff cpus++; 944117237Sjeff } 945165620Sjeff tdg_maxid = cpus - 1; 946117237Sjeff } else { 947165620Sjeff struct tdq_group *tdg; 948123433Sjeff struct cpu_group *cg; 949117237Sjeff int j; 950113357Sjeff 951117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 952117237Sjeff cg = &smp_topology->ct_group[i]; 953165620Sjeff tdg = &tdq_groups[i]; 954123433Sjeff /* 955123433Sjeff * Initialize the group. 956123433Sjeff */ 957165620Sjeff tdg->tdg_idlemask = 0; 958165620Sjeff tdg->tdg_load = 0; 959165620Sjeff tdg->tdg_transferable = 0; 960165620Sjeff tdg->tdg_cpus = cg->cg_count; 961165620Sjeff tdg->tdg_cpumask = cg->cg_mask; 962165620Sjeff LIST_INIT(&tdg->tdg_members); 963123433Sjeff /* 964123433Sjeff * Find all of the group members and add them. 965123433Sjeff */ 966123433Sjeff for (j = 0; j < MAXCPU; j++) { 967123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 968165620Sjeff if (tdg->tdg_mask == 0) 969165620Sjeff tdg->tdg_mask = 1 << j; 970165620Sjeff tdq_cpu[j].tdq_transferable = 0; 971165620Sjeff tdq_cpu[j].tdq_group = tdg; 972165620Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, 973165620Sjeff &tdq_cpu[j], tdq_siblings); 974123433Sjeff } 975123433Sjeff } 976165620Sjeff if (tdg->tdg_cpus > 1) 977123487Sjeff balance_groups = 1; 978117237Sjeff } 979165620Sjeff tdg_maxid = smp_topology->ct_count - 1; 980117237Sjeff } 981123487Sjeff /* 982123487Sjeff * Stagger the group and global load balancer so they do not 983123487Sjeff * interfere with each other. 984123487Sjeff */ 985129982Sjeff bal_tick = ticks + hz; 986123487Sjeff if (balance_groups) 987129982Sjeff gbal_tick = ticks + (hz / 2); 988117237Sjeff#else 989164936Sjulian tdq_setup(TDQ_SELF()); 990116069Sjeff#endif 991117237Sjeff mtx_lock_spin(&sched_lock); 992164936Sjulian tdq_load_add(TDQ_SELF(), &td_sched0); 993117237Sjeff mtx_unlock_spin(&sched_lock); 994109864Sjeff} 995109864Sjeff 996153533Sdavidxu/* ARGSUSED */ 997153533Sdavidxustatic void 998153533Sdavidxusched_initticks(void *dummy) 999153533Sdavidxu{ 1000153533Sdavidxu mtx_lock_spin(&sched_lock); 1001153533Sdavidxu realstathz = stathz ? stathz : hz; 1002153533Sdavidxu slice_min = (realstathz/100); /* 10ms */ 1003153533Sdavidxu slice_max = (realstathz/7); /* ~140ms */ 1004153533Sdavidxu 1005153533Sdavidxu tickincr = (hz << 10) / realstathz; 1006153533Sdavidxu /* 1007153533Sdavidxu * XXX This does not work for values of stathz that are much 1008153533Sdavidxu * larger than hz. 1009153533Sdavidxu */ 1010153533Sdavidxu if (tickincr == 0) 1011153533Sdavidxu tickincr = 1; 1012153533Sdavidxu mtx_unlock_spin(&sched_lock); 1013153533Sdavidxu} 1014153533Sdavidxu 1015153533Sdavidxu 1016109864Sjeff/* 1017109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1018109864Sjeff * process. 1019109864Sjeff */ 1020113357Sjeffstatic void 1021163709Sjbsched_priority(struct thread *td) 1022109864Sjeff{ 1023109864Sjeff int pri; 1024109864Sjeff 1025163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1026113357Sjeff return; 1027109864Sjeff 1028163709Sjb pri = SCHED_PRI_INTERACT(sched_interact_score(td)); 1029111857Sjeff pri += SCHED_PRI_BASE; 1030163709Sjb pri += td->td_proc->p_nice; 1031109864Sjeff 1032109864Sjeff if (pri > PRI_MAX_TIMESHARE) 1033109864Sjeff pri = PRI_MAX_TIMESHARE; 1034109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 1035109864Sjeff pri = PRI_MIN_TIMESHARE; 1036109864Sjeff 1037163709Sjb sched_user_prio(td, pri); 1038109864Sjeff 1039113357Sjeff return; 1040109864Sjeff} 1041109864Sjeff 1042109864Sjeff/* 1043164936Sjulian * Calculate a time slice based on the properties of the process 1044164936Sjulian * and the runq that we're on. This is only for PRI_TIMESHARE threads. 1045109864Sjeff */ 1046112966Sjeffstatic void 1047164936Sjuliansched_slice(struct td_sched *ts) 1048109864Sjeff{ 1049164936Sjulian struct tdq *tdq; 1050163709Sjb struct thread *td; 1051109864Sjeff 1052164936Sjulian td = ts->ts_thread; 1053164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1054109864Sjeff 1055163709Sjb if (td->td_flags & TDF_BORROWING) { 1056164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1057138842Sjeff return; 1058138842Sjeff } 1059138842Sjeff 1060112966Sjeff /* 1061112966Sjeff * Rationale: 1062164936Sjulian * Threads in interactive procs get a minimal slice so that we 1063112966Sjeff * quickly notice if it abuses its advantage. 1064112966Sjeff * 1065164936Sjulian * Threads in non-interactive procs are assigned a slice that is 1066164936Sjulian * based on the procs nice value relative to the least nice procs 1067112966Sjeff * on the run queue for this cpu. 1068112966Sjeff * 1069164936Sjulian * If the thread is less nice than all others it gets the maximum 1070164936Sjulian * slice and other threads will adjust their slice relative to 1071112966Sjeff * this when they first expire. 1072112966Sjeff * 1073112966Sjeff * There is 20 point window that starts relative to the least 1074164936Sjulian * nice td_sched on the run queue. Slice size is determined by 1075164936Sjulian * the td_sched distance from the last nice thread. 1076112966Sjeff * 1077164936Sjulian * If the td_sched is outside of the window it will get no slice 1078121871Sjeff * and will be reevaluated each time it is selected on the 1079164936Sjulian * run queue. The exception to this is nice 0 procs when 1080121871Sjeff * a nice -20 is running. They are always granted a minimum 1081121871Sjeff * slice. 1082112966Sjeff */ 1083163709Sjb if (!SCHED_INTERACTIVE(td)) { 1084112966Sjeff int nice; 1085112966Sjeff 1086165620Sjeff nice = td->td_proc->p_nice + (0 - tdq->tdq_nicemin); 1087165620Sjeff if (tdq->tdq_load_timeshare == 0 || 1088165620Sjeff td->td_proc->p_nice < tdq->tdq_nicemin) 1089164936Sjulian ts->ts_slice = SCHED_SLICE_MAX; 1090121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 1091164936Sjulian ts->ts_slice = SCHED_SLICE_NICE(nice); 1092163709Sjb else if (td->td_proc->p_nice == 0) 1093164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1094112966Sjeff else 1095164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; /* 0 */ 1096112966Sjeff } else 1097164936Sjulian ts->ts_slice = SCHED_SLICE_INTERACTIVE; 1098112966Sjeff 1099112966Sjeff return; 1100109864Sjeff} 1101109864Sjeff 1102121868Sjeff/* 1103121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1104121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1105121868Sjeff * This routine will not operate correctly when slp or run times have been 1106121868Sjeff * adjusted to more than double their maximum. 1107121868Sjeff */ 1108116463Sjeffstatic void 1109163709Sjbsched_interact_update(struct thread *td) 1110116463Sjeff{ 1111121868Sjeff int sum; 1112121605Sjeff 1113163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1114121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1115121868Sjeff return; 1116121868Sjeff /* 1117121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1118121868Sjeff * will not bring us back into range. Dividing by two here forces 1119133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1120121868Sjeff */ 1121127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1122163709Sjb td->td_sched->skg_runtime /= 2; 1123163709Sjb td->td_sched->skg_slptime /= 2; 1124121868Sjeff return; 1125116463Sjeff } 1126163709Sjb td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 1127163709Sjb td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1128116463Sjeff} 1129116463Sjeff 1130121868Sjeffstatic void 1131163709Sjbsched_interact_fork(struct thread *td) 1132121868Sjeff{ 1133121868Sjeff int ratio; 1134121868Sjeff int sum; 1135121868Sjeff 1136163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1137121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1138121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1139163709Sjb td->td_sched->skg_runtime /= ratio; 1140163709Sjb td->td_sched->skg_slptime /= ratio; 1141121868Sjeff } 1142121868Sjeff} 1143121868Sjeff 1144111857Sjeffstatic int 1145163709Sjbsched_interact_score(struct thread *td) 1146111857Sjeff{ 1147116365Sjeff int div; 1148111857Sjeff 1149163709Sjb if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1150163709Sjb div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1151116365Sjeff return (SCHED_INTERACT_HALF + 1152163709Sjb (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1153163709Sjb } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1154163709Sjb div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1155163709Sjb return (td->td_sched->skg_runtime / div); 1156111857Sjeff } 1157111857Sjeff 1158116365Sjeff /* 1159116365Sjeff * This can happen if slptime and runtime are 0. 1160116365Sjeff */ 1161116365Sjeff return (0); 1162111857Sjeff 1163111857Sjeff} 1164111857Sjeff 1165113357Sjeff/* 1166134791Sjulian * Very early in the boot some setup of scheduler-specific 1167134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 1168134791Sjulian * Called from: 1169134791Sjulian * proc0_init() 1170134791Sjulian */ 1171134791Sjulianvoid 1172134791Sjulianschedinit(void) 1173134791Sjulian{ 1174134791Sjulian /* 1175134791Sjulian * Set up the scheduler specific parts of proc0. 1176134791Sjulian */ 1177136167Sjulian proc0.p_sched = NULL; /* XXX */ 1178164936Sjulian thread0.td_sched = &td_sched0; 1179164936Sjulian td_sched0.ts_thread = &thread0; 1180164936Sjulian td_sched0.ts_state = TSS_THREAD; 1181134791Sjulian} 1182134791Sjulian 1183134791Sjulian/* 1184113357Sjeff * This is only somewhat accurate since given many processes of the same 1185113357Sjeff * priority they will switch when their slices run out, which will be 1186113357Sjeff * at most SCHED_SLICE_MAX. 1187113357Sjeff */ 1188109864Sjeffint 1189109864Sjeffsched_rr_interval(void) 1190109864Sjeff{ 1191109864Sjeff return (SCHED_SLICE_MAX); 1192109864Sjeff} 1193109864Sjeff 1194121790Sjeffstatic void 1195164936Sjuliansched_pctcpu_update(struct td_sched *ts) 1196109864Sjeff{ 1197109864Sjeff /* 1198109864Sjeff * Adjust counters and watermark for pctcpu calc. 1199116365Sjeff */ 1200164936Sjulian if (ts->ts_ltick > ticks - SCHED_CPU_TICKS) { 1201120272Sjeff /* 1202120272Sjeff * Shift the tick count out so that the divide doesn't 1203120272Sjeff * round away our results. 1204120272Sjeff */ 1205164936Sjulian ts->ts_ticks <<= 10; 1206164936Sjulian ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1207120272Sjeff SCHED_CPU_TICKS; 1208164936Sjulian ts->ts_ticks >>= 10; 1209120272Sjeff } else 1210164936Sjulian ts->ts_ticks = 0; 1211164936Sjulian ts->ts_ltick = ticks; 1212164936Sjulian ts->ts_ftick = ts->ts_ltick - SCHED_CPU_TICKS; 1213109864Sjeff} 1214109864Sjeff 1215109864Sjeffvoid 1216139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1217109864Sjeff{ 1218164936Sjulian struct td_sched *ts; 1219109864Sjeff 1220139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1221139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1222139316Sjeff curthread->td_proc->p_comm); 1223164936Sjulian ts = td->td_sched; 1224109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1225139453Sjhb if (td->td_priority == prio) 1226139453Sjhb return; 1227109864Sjeff if (TD_ON_RUNQ(td)) { 1228121605Sjeff /* 1229121605Sjeff * If the priority has been elevated due to priority 1230121605Sjeff * propagation, we may have to move ourselves to a new 1231121605Sjeff * queue. We still call adjustrunqueue below in case kse 1232121605Sjeff * needs to fix things up. 1233121605Sjeff */ 1234164936Sjulian if (prio < td->td_priority && ts->ts_runq != NULL && 1235164936Sjulian (ts->ts_flags & TSF_ASSIGNED) == 0 && 1236165620Sjeff ts->ts_runq != TDQ_CPU(ts->ts_cpu)->tdq_curr) { 1237164936Sjulian runq_remove(ts->ts_runq, ts); 1238165620Sjeff ts->ts_runq = TDQ_CPU(ts->ts_cpu)->tdq_curr; 1239164936Sjulian runq_add(ts->ts_runq, ts, 0); 1240121605Sjeff } 1241133555Sjeff /* 1242164936Sjulian * Hold this td_sched on this cpu so that sched_prio() doesn't 1243133555Sjeff * cause excessive migration. We only want migration to 1244133555Sjeff * happen as the result of a wakeup. 1245133555Sjeff */ 1246164936Sjulian ts->ts_flags |= TSF_HOLD; 1247119488Sdavidxu adjustrunqueue(td, prio); 1248164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1249121605Sjeff } else 1250119488Sdavidxu td->td_priority = prio; 1251109864Sjeff} 1252109864Sjeff 1253139453Sjhb/* 1254139453Sjhb * Update a thread's priority when it is lent another thread's 1255139453Sjhb * priority. 1256139453Sjhb */ 1257109864Sjeffvoid 1258139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1259139453Sjhb{ 1260139453Sjhb 1261139453Sjhb td->td_flags |= TDF_BORROWING; 1262139453Sjhb sched_thread_priority(td, prio); 1263139453Sjhb} 1264139453Sjhb 1265139453Sjhb/* 1266139453Sjhb * Restore a thread's priority when priority propagation is 1267139453Sjhb * over. The prio argument is the minimum priority the thread 1268139453Sjhb * needs to have to satisfy other possible priority lending 1269139453Sjhb * requests. If the thread's regular priority is less 1270139453Sjhb * important than prio, the thread will keep a priority boost 1271139453Sjhb * of prio. 1272139453Sjhb */ 1273139453Sjhbvoid 1274139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1275139453Sjhb{ 1276139453Sjhb u_char base_pri; 1277139453Sjhb 1278139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1279139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1280163709Sjb base_pri = td->td_user_pri; 1281139453Sjhb else 1282139453Sjhb base_pri = td->td_base_pri; 1283139453Sjhb if (prio >= base_pri) { 1284139455Sjhb td->td_flags &= ~TDF_BORROWING; 1285139453Sjhb sched_thread_priority(td, base_pri); 1286139453Sjhb } else 1287139453Sjhb sched_lend_prio(td, prio); 1288139453Sjhb} 1289139453Sjhb 1290139453Sjhbvoid 1291139453Sjhbsched_prio(struct thread *td, u_char prio) 1292139453Sjhb{ 1293139453Sjhb u_char oldprio; 1294139453Sjhb 1295139453Sjhb /* First, update the base priority. */ 1296139453Sjhb td->td_base_pri = prio; 1297139453Sjhb 1298139453Sjhb /* 1299139455Sjhb * If the thread is borrowing another thread's priority, don't 1300139453Sjhb * ever lower the priority. 1301139453Sjhb */ 1302139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1303139453Sjhb return; 1304139453Sjhb 1305139453Sjhb /* Change the real priority. */ 1306139453Sjhb oldprio = td->td_priority; 1307139453Sjhb sched_thread_priority(td, prio); 1308139453Sjhb 1309139453Sjhb /* 1310139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1311139453Sjhb * its state. 1312139453Sjhb */ 1313139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1314139453Sjhb turnstile_adjust(td, oldprio); 1315139453Sjhb} 1316139455Sjhb 1317139453Sjhbvoid 1318163709Sjbsched_user_prio(struct thread *td, u_char prio) 1319161599Sdavidxu{ 1320161599Sdavidxu u_char oldprio; 1321161599Sdavidxu 1322163709Sjb td->td_base_user_pri = prio; 1323164939Sjulian if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1324164939Sjulian return; 1325163709Sjb oldprio = td->td_user_pri; 1326163709Sjb td->td_user_pri = prio; 1327163709Sjb 1328161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1329161599Sdavidxu umtx_pi_adjust(td, oldprio); 1330161599Sdavidxu} 1331161599Sdavidxu 1332161599Sdavidxuvoid 1333161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio) 1334161599Sdavidxu{ 1335161599Sdavidxu u_char oldprio; 1336161599Sdavidxu 1337161599Sdavidxu td->td_flags |= TDF_UBORROWING; 1338161599Sdavidxu 1339164091Smaxim oldprio = td->td_user_pri; 1340163709Sjb td->td_user_pri = prio; 1341161599Sdavidxu 1342161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1343161599Sdavidxu umtx_pi_adjust(td, oldprio); 1344161599Sdavidxu} 1345161599Sdavidxu 1346161599Sdavidxuvoid 1347161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio) 1348161599Sdavidxu{ 1349161599Sdavidxu u_char base_pri; 1350161599Sdavidxu 1351163709Sjb base_pri = td->td_base_user_pri; 1352161599Sdavidxu if (prio >= base_pri) { 1353161599Sdavidxu td->td_flags &= ~TDF_UBORROWING; 1354163709Sjb sched_user_prio(td, base_pri); 1355161599Sdavidxu } else 1356161599Sdavidxu sched_lend_user_prio(td, prio); 1357161599Sdavidxu} 1358161599Sdavidxu 1359161599Sdavidxuvoid 1360135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1361109864Sjeff{ 1362164936Sjulian struct tdq *ksq; 1363164936Sjulian struct td_sched *ts; 1364109864Sjeff 1365109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1366109864Sjeff 1367164936Sjulian ts = td->td_sched; 1368164936Sjulian ksq = TDQ_SELF(); 1369109864Sjeff 1370133555Sjeff td->td_lastcpu = td->td_oncpu; 1371113339Sjulian td->td_oncpu = NOCPU; 1372132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1373144777Sups td->td_owepreempt = 0; 1374109864Sjeff 1375123434Sjeff /* 1376164936Sjulian * If the thread has been assigned it may be in the process of switching 1377123434Sjeff * to the new cpu. This is the case in sched_bind(). 1378123434Sjeff */ 1379139334Sjeff if (td == PCPU_GET(idlethread)) { 1380139334Sjeff TD_SET_CAN_RUN(td); 1381164936Sjulian } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1382139334Sjeff /* We are ending our run so make our slot available again */ 1383164936Sjulian tdq_load_rem(ksq, ts); 1384139334Sjeff if (TD_IS_RUNNING(td)) { 1385139334Sjeff /* 1386139334Sjeff * Don't allow the thread to migrate 1387139334Sjeff * from a preemption. 1388139334Sjeff */ 1389164936Sjulian ts->ts_flags |= TSF_HOLD; 1390139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1391139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1392139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1393164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1394163709Sjb } 1395121146Sjeff } 1396136167Sjulian if (newtd != NULL) { 1397136170Sjulian /* 1398147068Sjeff * If we bring in a thread account for it as if it had been 1399147068Sjeff * added to the run queue and then chosen. 1400136170Sjulian */ 1401164936Sjulian newtd->td_sched->ts_flags |= TSF_DIDRUN; 1402165620Sjeff newtd->td_sched->ts_runq = ksq->tdq_curr; 1403136173Sjulian TD_SET_RUNNING(newtd); 1404164936Sjulian tdq_load_add(TDQ_SELF(), newtd->td_sched); 1405136167Sjulian } else 1406131473Sjhb newtd = choosethread(); 1407145256Sjkoshy if (td != newtd) { 1408145256Sjkoshy#ifdef HWPMC_HOOKS 1409145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1410145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1411145256Sjkoshy#endif 1412163709Sjb 1413121128Sjeff cpu_switch(td, newtd); 1414145256Sjkoshy#ifdef HWPMC_HOOKS 1415145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1416145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1417145256Sjkoshy#endif 1418145256Sjkoshy } 1419145256Sjkoshy 1420121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1421109864Sjeff 1422113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1423109864Sjeff} 1424109864Sjeff 1425109864Sjeffvoid 1426130551Sjuliansched_nice(struct proc *p, int nice) 1427109864Sjeff{ 1428164936Sjulian struct td_sched *ts; 1429109864Sjeff struct thread *td; 1430164936Sjulian struct tdq *tdq; 1431109864Sjeff 1432130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1433113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1434113357Sjeff /* 1435164936Sjulian * We need to adjust the nice counts for running threads. 1436113357Sjeff */ 1437163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1438163709Sjb if (td->td_pri_class == PRI_TIMESHARE) { 1439164936Sjulian ts = td->td_sched; 1440164936Sjulian if (ts->ts_runq == NULL) 1441163709Sjb continue; 1442164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1443164936Sjulian tdq_nice_rem(tdq, p->p_nice); 1444164936Sjulian tdq_nice_add(tdq, nice); 1445113357Sjeff } 1446130551Sjulian } 1447130551Sjulian p->p_nice = nice; 1448163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1449163709Sjb sched_priority(td); 1450163709Sjb td->td_flags |= TDF_NEEDRESCHED; 1451130551Sjulian } 1452109864Sjeff} 1453109864Sjeff 1454109864Sjeffvoid 1455126326Sjhbsched_sleep(struct thread *td) 1456109864Sjeff{ 1457109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1458109864Sjeff 1459164936Sjulian td->td_sched->ts_slptime = ticks; 1460109864Sjeff} 1461109864Sjeff 1462109864Sjeffvoid 1463109864Sjeffsched_wakeup(struct thread *td) 1464109864Sjeff{ 1465109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1466109864Sjeff 1467109864Sjeff /* 1468164936Sjulian * Let the procs know how long we slept for. This is because process 1469164936Sjulian * interactivity behavior is modeled in the procs. 1470109864Sjeff */ 1471164936Sjulian if (td->td_sched->ts_slptime) { 1472113357Sjeff int hzticks; 1473109864Sjeff 1474164936Sjulian hzticks = (ticks - td->td_sched->ts_slptime) << 10; 1475121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1476163709Sjb td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 1477163709Sjb td->td_sched->skg_runtime = 1; 1478121868Sjeff } else { 1479163709Sjb td->td_sched->skg_slptime += hzticks; 1480163709Sjb sched_interact_update(td); 1481121868Sjeff } 1482163709Sjb sched_priority(td); 1483164936Sjulian sched_slice(td->td_sched); 1484164936Sjulian td->td_sched->ts_slptime = 0; 1485109864Sjeff } 1486134586Sjulian setrunqueue(td, SRQ_BORING); 1487109864Sjeff} 1488109864Sjeff 1489109864Sjeff/* 1490109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1491109864Sjeff * priority. 1492109864Sjeff */ 1493109864Sjeffvoid 1494163709Sjbsched_fork(struct thread *td, struct thread *child) 1495109864Sjeff{ 1496109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1497164936Sjulian sched_fork_thread(td, child); 1498164936Sjulian} 1499109864Sjeff 1500164936Sjulianvoid 1501164936Sjuliansched_fork_thread(struct thread *td, struct thread *child) 1502164936Sjulian{ 1503164936Sjulian struct td_sched *ts; 1504164936Sjulian struct td_sched *ts2; 1505164936Sjulian 1506163709Sjb child->td_sched->skg_slptime = td->td_sched->skg_slptime; 1507163709Sjb child->td_sched->skg_runtime = td->td_sched->skg_runtime; 1508163709Sjb child->td_user_pri = td->td_user_pri; 1509164091Smaxim child->td_base_user_pri = td->td_base_user_pri; 1510121868Sjeff sched_interact_fork(child); 1511163709Sjb td->td_sched->skg_runtime += tickincr; 1512163709Sjb sched_interact_update(td); 1513109864Sjeff 1514163709Sjb sched_newthread(child); 1515134791Sjulian 1516164936Sjulian ts = td->td_sched; 1517164936Sjulian ts2 = child->td_sched; 1518164936Sjulian ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1519164936Sjulian ts2->ts_cpu = ts->ts_cpu; 1520164936Sjulian ts2->ts_runq = NULL; 1521134791Sjulian 1522134791Sjulian /* Grab our parents cpu estimation information. */ 1523164936Sjulian ts2->ts_ticks = ts->ts_ticks; 1524164936Sjulian ts2->ts_ltick = ts->ts_ltick; 1525164936Sjulian ts2->ts_ftick = ts->ts_ftick; 1526113357Sjeff} 1527113357Sjeff 1528113357Sjeffvoid 1529163709Sjbsched_class(struct thread *td, int class) 1530113357Sjeff{ 1531164936Sjulian struct tdq *tdq; 1532164936Sjulian struct td_sched *ts; 1533121896Sjeff int nclass; 1534121896Sjeff int oclass; 1535113357Sjeff 1536113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1537163709Sjb if (td->td_pri_class == class) 1538113357Sjeff return; 1539113357Sjeff 1540121896Sjeff nclass = PRI_BASE(class); 1541163709Sjb oclass = PRI_BASE(td->td_pri_class); 1542164936Sjulian ts = td->td_sched; 1543164936Sjulian if (!((ts->ts_state != TSS_ONRUNQ && 1544164936Sjulian ts->ts_state != TSS_THREAD) || ts->ts_runq == NULL)) { 1545164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1546113357Sjeff 1547121896Sjeff#ifdef SMP 1548164936Sjulian /* 1549164936Sjulian * On SMP if we're on the RUNQ we must adjust the transferable 1550164936Sjulian * count because could be changing to or from an interrupt 1551164936Sjulian * class. 1552164936Sjulian */ 1553164936Sjulian if (ts->ts_state == TSS_ONRUNQ) { 1554164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 1555165620Sjeff tdq->tdq_transferable--; 1556165620Sjeff tdq->tdq_group->tdg_transferable--; 1557164936Sjulian } 1558164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 1559165620Sjeff tdq->tdq_transferable++; 1560165620Sjeff tdq->tdq_group->tdg_transferable++; 1561164936Sjulian } 1562122744Sjeff } 1563164936Sjulian#endif 1564164936Sjulian if (oclass == PRI_TIMESHARE) { 1565165620Sjeff tdq->tdq_load_timeshare--; 1566164936Sjulian tdq_nice_rem(tdq, td->td_proc->p_nice); 1567122744Sjeff } 1568164936Sjulian if (nclass == PRI_TIMESHARE) { 1569165620Sjeff tdq->tdq_load_timeshare++; 1570164936Sjulian tdq_nice_add(tdq, td->td_proc->p_nice); 1571164936Sjulian } 1572109970Sjeff } 1573109970Sjeff 1574163709Sjb td->td_pri_class = class; 1575109864Sjeff} 1576109864Sjeff 1577109864Sjeff/* 1578109864Sjeff * Return some of the child's priority and interactivity to the parent. 1579109864Sjeff */ 1580109864Sjeffvoid 1581164939Sjuliansched_exit(struct proc *p, struct thread *child) 1582109864Sjeff{ 1583164939Sjulian 1584163709Sjb CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1585164939Sjulian child, child->td_proc->p_comm, child->td_priority); 1586113372Sjeff 1587164939Sjulian sched_exit_thread(FIRST_THREAD_IN_PROC(p), child); 1588113372Sjeff} 1589113372Sjeff 1590113372Sjeffvoid 1591164939Sjuliansched_exit_thread(struct thread *td, struct thread *child) 1592164936Sjulian{ 1593164939Sjulian CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1594164939Sjulian child, childproc->p_comm, child->td_priority); 1595164939Sjulian 1596164939Sjulian td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1597164939Sjulian sched_interact_update(td); 1598164939Sjulian tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1599164936Sjulian} 1600164936Sjulian 1601164936Sjulianvoid 1602164936Sjuliansched_userret(struct thread *td) 1603164936Sjulian{ 1604164936Sjulian /* 1605164936Sjulian * XXX we cheat slightly on the locking here to avoid locking in 1606164936Sjulian * the usual case. Setting td_priority here is essentially an 1607164936Sjulian * incomplete workaround for not setting it properly elsewhere. 1608164936Sjulian * Now that some interrupt handlers are threads, not setting it 1609164936Sjulian * properly elsewhere can clobber it in the window between setting 1610164936Sjulian * it here and returning to user mode, so don't waste time setting 1611164936Sjulian * it perfectly here. 1612164936Sjulian */ 1613164936Sjulian KASSERT((td->td_flags & TDF_BORROWING) == 0, 1614164936Sjulian ("thread with borrowed priority returning to userland")); 1615164936Sjulian if (td->td_priority != td->td_user_pri) { 1616164936Sjulian mtx_lock_spin(&sched_lock); 1617164936Sjulian td->td_priority = td->td_user_pri; 1618164936Sjulian td->td_base_pri = td->td_user_pri; 1619164936Sjulian mtx_unlock_spin(&sched_lock); 1620164936Sjulian } 1621164936Sjulian} 1622164936Sjulian 1623164936Sjulianvoid 1624121127Sjeffsched_clock(struct thread *td) 1625109864Sjeff{ 1626164936Sjulian struct tdq *tdq; 1627164936Sjulian struct td_sched *ts; 1628109864Sjeff 1629129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1630164936Sjulian tdq = TDQ_SELF(); 1631129982Sjeff#ifdef SMP 1632139334Sjeff if (ticks >= bal_tick) 1633129982Sjeff sched_balance(); 1634139334Sjeff if (ticks >= gbal_tick && balance_groups) 1635129982Sjeff sched_balance_groups(); 1636133427Sjeff /* 1637133427Sjeff * We could have been assigned a non real-time thread without an 1638133427Sjeff * IPI. 1639133427Sjeff */ 1640165620Sjeff if (tdq->tdq_assigned) 1641164936Sjulian tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 1642129982Sjeff#endif 1643164936Sjulian ts = td->td_sched; 1644109864Sjeff 1645110028Sjeff /* Adjust ticks for pctcpu */ 1646164936Sjulian ts->ts_ticks++; 1647164936Sjulian ts->ts_ltick = ticks; 1648112994Sjeff 1649109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1650164936Sjulian if (ts->ts_ftick + SCHED_CPU_TICKS + hz < ts->ts_ltick) 1651164936Sjulian sched_pctcpu_update(ts); 1652109971Sjeff 1653114496Sjulian if (td->td_flags & TDF_IDLETD) 1654109864Sjeff return; 1655110028Sjeff /* 1656163709Sjb * We only do slicing code for TIMESHARE threads. 1657113357Sjeff */ 1658163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1659113357Sjeff return; 1660113357Sjeff /* 1661163709Sjb * We used a tick charge it to the thread so that we can compute our 1662113357Sjeff * interactivity. 1663109864Sjeff */ 1664163709Sjb td->td_sched->skg_runtime += tickincr; 1665163709Sjb sched_interact_update(td); 1666110645Sjeff 1667109864Sjeff /* 1668109864Sjeff * We used up one time slice. 1669109864Sjeff */ 1670164936Sjulian if (--ts->ts_slice > 0) 1671113357Sjeff return; 1672109864Sjeff /* 1673113357Sjeff * We're out of time, recompute priorities and requeue. 1674109864Sjeff */ 1675164936Sjulian tdq_load_rem(tdq, ts); 1676163709Sjb sched_priority(td); 1677164936Sjulian sched_slice(ts); 1678164936Sjulian if (SCHED_CURR(td, ts)) 1679165620Sjeff ts->ts_runq = tdq->tdq_curr; 1680113357Sjeff else 1681165620Sjeff ts->ts_runq = tdq->tdq_next; 1682164936Sjulian tdq_load_add(tdq, ts); 1683113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1684109864Sjeff} 1685109864Sjeff 1686109864Sjeffint 1687109864Sjeffsched_runnable(void) 1688109864Sjeff{ 1689164936Sjulian struct tdq *tdq; 1690115998Sjeff int load; 1691109864Sjeff 1692115998Sjeff load = 1; 1693115998Sjeff 1694164936Sjulian tdq = TDQ_SELF(); 1695121790Sjeff#ifdef SMP 1696165620Sjeff if (tdq->tdq_assigned) { 1697122094Sjeff mtx_lock_spin(&sched_lock); 1698164936Sjulian tdq_assign(tdq); 1699122094Sjeff mtx_unlock_spin(&sched_lock); 1700122094Sjeff } 1701121790Sjeff#endif 1702121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1703165620Sjeff if (tdq->tdq_load > 0) 1704121605Sjeff goto out; 1705121605Sjeff } else 1706165620Sjeff if (tdq->tdq_load - 1 > 0) 1707121605Sjeff goto out; 1708115998Sjeff load = 0; 1709115998Sjeffout: 1710115998Sjeff return (load); 1711109864Sjeff} 1712109864Sjeff 1713164936Sjulianstruct td_sched * 1714109970Sjeffsched_choose(void) 1715109970Sjeff{ 1716164936Sjulian struct tdq *tdq; 1717164936Sjulian struct td_sched *ts; 1718109970Sjeff 1719115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1720164936Sjulian tdq = TDQ_SELF(); 1721113357Sjeff#ifdef SMP 1722123433Sjeffrestart: 1723165620Sjeff if (tdq->tdq_assigned) 1724164936Sjulian tdq_assign(tdq); 1725113357Sjeff#endif 1726164936Sjulian ts = tdq_choose(tdq); 1727164936Sjulian if (ts) { 1728121790Sjeff#ifdef SMP 1729164936Sjulian if (ts->ts_thread->td_pri_class == PRI_IDLE) 1730164936Sjulian if (tdq_idled(tdq) == 0) 1731123433Sjeff goto restart; 1732121790Sjeff#endif 1733164936Sjulian tdq_runq_rem(tdq, ts); 1734164936Sjulian ts->ts_state = TSS_THREAD; 1735164936Sjulian ts->ts_flags &= ~TSF_PREEMPTED; 1736164936Sjulian return (ts); 1737109864Sjeff } 1738109970Sjeff#ifdef SMP 1739164936Sjulian if (tdq_idled(tdq) == 0) 1740123433Sjeff goto restart; 1741109970Sjeff#endif 1742113357Sjeff return (NULL); 1743109864Sjeff} 1744109864Sjeff 1745109864Sjeffvoid 1746134586Sjuliansched_add(struct thread *td, int flags) 1747109864Sjeff{ 1748164936Sjulian struct tdq *tdq; 1749164936Sjulian struct td_sched *ts; 1750139334Sjeff int preemptive; 1751133427Sjeff int canmigrate; 1752121790Sjeff int class; 1753109864Sjeff 1754139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1755139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1756139316Sjeff curthread->td_proc->p_comm); 1757121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1758164936Sjulian ts = td->td_sched; 1759139334Sjeff canmigrate = 1; 1760139334Sjeff preemptive = !(flags & SRQ_YIELDING); 1761163709Sjb class = PRI_BASE(td->td_pri_class); 1762164936Sjulian tdq = TDQ_SELF(); 1763164936Sjulian ts->ts_flags &= ~TSF_INTERNAL; 1764139334Sjeff#ifdef SMP 1765164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1766164936Sjulian if (ts->ts_flags & TSF_REMOVED) 1767164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 1768121790Sjeff return; 1769138802Sjeff } 1770164936Sjulian canmigrate = THREAD_CAN_MIGRATE(ts); 1771149278Sdavidxu /* 1772149278Sdavidxu * Don't migrate running threads here. Force the long term balancer 1773149278Sdavidxu * to do it. 1774149278Sdavidxu */ 1775164936Sjulian if (ts->ts_flags & TSF_HOLD) { 1776164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1777149278Sdavidxu canmigrate = 0; 1778149278Sdavidxu } 1779139334Sjeff#endif 1780164936Sjulian KASSERT(ts->ts_state != TSS_ONRUNQ, 1781164936Sjulian ("sched_add: thread %p (%s) already in run queue", td, 1782163709Sjb td->td_proc->p_comm)); 1783163709Sjb KASSERT(td->td_proc->p_sflag & PS_INMEM, 1784110267Sjeff ("sched_add: process swapped out")); 1785164936Sjulian KASSERT(ts->ts_runq == NULL, 1786164936Sjulian ("sched_add: thread %p is still assigned to a run queue", td)); 1787148856Sdavidxu if (flags & SRQ_PREEMPTED) 1788164936Sjulian ts->ts_flags |= TSF_PREEMPTED; 1789121790Sjeff switch (class) { 1790112994Sjeff case PRI_ITHD: 1791112994Sjeff case PRI_REALTIME: 1792165620Sjeff ts->ts_runq = tdq->tdq_curr; 1793164936Sjulian ts->ts_slice = SCHED_SLICE_MAX; 1794139334Sjeff if (canmigrate) 1795164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1796112994Sjeff break; 1797112994Sjeff case PRI_TIMESHARE: 1798164936Sjulian if (SCHED_CURR(td, ts)) 1799165620Sjeff ts->ts_runq = tdq->tdq_curr; 1800113387Sjeff else 1801165620Sjeff ts->ts_runq = tdq->tdq_next; 1802113357Sjeff break; 1803112994Sjeff case PRI_IDLE: 1804113357Sjeff /* 1805113357Sjeff * This is for priority prop. 1806113357Sjeff */ 1807164936Sjulian if (ts->ts_thread->td_priority < PRI_MIN_IDLE) 1808165620Sjeff ts->ts_runq = tdq->tdq_curr; 1809113357Sjeff else 1810165620Sjeff ts->ts_runq = &tdq->tdq_idle; 1811164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1812112994Sjeff break; 1813113357Sjeff default: 1814121868Sjeff panic("Unknown pri class."); 1815113357Sjeff break; 1816112994Sjeff } 1817121790Sjeff#ifdef SMP 1818133427Sjeff /* 1819133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1820133427Sjeff */ 1821164936Sjulian if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1822164936Sjulian ts->ts_runq = NULL; 1823164936Sjulian tdq_notify(ts, ts->ts_cpu); 1824123433Sjeff return; 1825123433Sjeff } 1826121790Sjeff /* 1827123685Sjeff * If we had been idle, clear our bit in the group and potentially 1828123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1829121790Sjeff */ 1830123433Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1831165620Sjeff (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 1832121790Sjeff /* 1833123433Sjeff * Check to see if our group is unidling, and if so, remove it 1834123433Sjeff * from the global idle mask. 1835121790Sjeff */ 1836165620Sjeff if (tdq->tdq_group->tdg_idlemask == 1837165620Sjeff tdq->tdq_group->tdg_cpumask) 1838165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 1839123433Sjeff /* 1840123433Sjeff * Now remove ourselves from the group specific idle mask. 1841123433Sjeff */ 1842165620Sjeff tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1843165620Sjeff } else if (canmigrate && tdq->tdq_load > 1 && class != PRI_ITHD) 1844164936Sjulian if (tdq_transfer(tdq, ts, class)) 1845123685Sjeff return; 1846164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1847121790Sjeff#endif 1848133555Sjeff if (td->td_priority < curthread->td_priority && 1849165620Sjeff ts->ts_runq == tdq->tdq_curr) 1850133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1851131839Sjhb if (preemptive && maybe_preempt(td)) 1852131481Sjhb return; 1853164936Sjulian ts->ts_state = TSS_ONRUNQ; 1854109864Sjeff 1855164936Sjulian tdq_runq_add(tdq, ts, flags); 1856164936Sjulian tdq_load_add(tdq, ts); 1857109864Sjeff} 1858109864Sjeff 1859109864Sjeffvoid 1860121127Sjeffsched_rem(struct thread *td) 1861109864Sjeff{ 1862164936Sjulian struct tdq *tdq; 1863164936Sjulian struct td_sched *ts; 1864113357Sjeff 1865139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1866139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1867139316Sjeff curthread->td_proc->p_comm); 1868139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1869164936Sjulian ts = td->td_sched; 1870164936Sjulian ts->ts_flags &= ~TSF_PREEMPTED; 1871164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1872164936Sjulian ts->ts_flags |= TSF_REMOVED; 1873121790Sjeff return; 1874138802Sjeff } 1875164936Sjulian KASSERT((ts->ts_state == TSS_ONRUNQ), 1876164936Sjulian ("sched_rem: thread not on run queue")); 1877109864Sjeff 1878164936Sjulian ts->ts_state = TSS_THREAD; 1879164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1880164936Sjulian tdq_runq_rem(tdq, ts); 1881164936Sjulian tdq_load_rem(tdq, ts); 1882109864Sjeff} 1883109864Sjeff 1884109864Sjefffixpt_t 1885121127Sjeffsched_pctcpu(struct thread *td) 1886109864Sjeff{ 1887109864Sjeff fixpt_t pctcpu; 1888164936Sjulian struct td_sched *ts; 1889109864Sjeff 1890109864Sjeff pctcpu = 0; 1891164936Sjulian ts = td->td_sched; 1892164936Sjulian if (ts == NULL) 1893121290Sjeff return (0); 1894109864Sjeff 1895115998Sjeff mtx_lock_spin(&sched_lock); 1896164936Sjulian if (ts->ts_ticks) { 1897109864Sjeff int rtick; 1898109864Sjeff 1899116365Sjeff /* 1900116365Sjeff * Don't update more frequently than twice a second. Allowing 1901116365Sjeff * this causes the cpu usage to decay away too quickly due to 1902116365Sjeff * rounding errors. 1903116365Sjeff */ 1904164936Sjulian if (ts->ts_ftick + SCHED_CPU_TICKS < ts->ts_ltick || 1905164936Sjulian ts->ts_ltick < (ticks - (hz / 2))) 1906164936Sjulian sched_pctcpu_update(ts); 1907109864Sjeff /* How many rtick per second ? */ 1908164936Sjulian rtick = min(ts->ts_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1909110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1910109864Sjeff } 1911109864Sjeff 1912164936Sjulian td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1913113865Sjhb mtx_unlock_spin(&sched_lock); 1914109864Sjeff 1915109864Sjeff return (pctcpu); 1916109864Sjeff} 1917109864Sjeff 1918122038Sjeffvoid 1919122038Sjeffsched_bind(struct thread *td, int cpu) 1920122038Sjeff{ 1921164936Sjulian struct td_sched *ts; 1922122038Sjeff 1923122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1924164936Sjulian ts = td->td_sched; 1925164936Sjulian ts->ts_flags |= TSF_BOUND; 1926123433Sjeff#ifdef SMP 1927123433Sjeff if (PCPU_GET(cpuid) == cpu) 1928122038Sjeff return; 1929122038Sjeff /* sched_rem without the runq_remove */ 1930164936Sjulian ts->ts_state = TSS_THREAD; 1931164936Sjulian tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1932164936Sjulian tdq_notify(ts, cpu); 1933122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1934131527Sphk mi_switch(SW_VOL, NULL); 1935122038Sjeff#endif 1936122038Sjeff} 1937122038Sjeff 1938122038Sjeffvoid 1939122038Sjeffsched_unbind(struct thread *td) 1940122038Sjeff{ 1941122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1942164936Sjulian td->td_sched->ts_flags &= ~TSF_BOUND; 1943122038Sjeff} 1944122038Sjeff 1945109864Sjeffint 1946145256Sjkoshysched_is_bound(struct thread *td) 1947145256Sjkoshy{ 1948145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1949164936Sjulian return (td->td_sched->ts_flags & TSF_BOUND); 1950145256Sjkoshy} 1951145256Sjkoshy 1952159630Sdavidxuvoid 1953159630Sdavidxusched_relinquish(struct thread *td) 1954159630Sdavidxu{ 1955159630Sdavidxu mtx_lock_spin(&sched_lock); 1956163709Sjb if (td->td_pri_class == PRI_TIMESHARE) 1957159630Sdavidxu sched_prio(td, PRI_MAX_TIMESHARE); 1958159630Sdavidxu mi_switch(SW_VOL, NULL); 1959159630Sdavidxu mtx_unlock_spin(&sched_lock); 1960159630Sdavidxu} 1961159630Sdavidxu 1962145256Sjkoshyint 1963125289Sjeffsched_load(void) 1964125289Sjeff{ 1965125289Sjeff#ifdef SMP 1966125289Sjeff int total; 1967125289Sjeff int i; 1968125289Sjeff 1969125289Sjeff total = 0; 1970165620Sjeff for (i = 0; i <= tdg_maxid; i++) 1971165620Sjeff total += TDQ_GROUP(i)->tdg_load; 1972125289Sjeff return (total); 1973125289Sjeff#else 1974165620Sjeff return (TDQ_SELF()->tdq_sysload); 1975125289Sjeff#endif 1976125289Sjeff} 1977125289Sjeff 1978125289Sjeffint 1979109864Sjeffsched_sizeof_proc(void) 1980109864Sjeff{ 1981109864Sjeff return (sizeof(struct proc)); 1982109864Sjeff} 1983109864Sjeff 1984109864Sjeffint 1985109864Sjeffsched_sizeof_thread(void) 1986109864Sjeff{ 1987109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1988109864Sjeff} 1989159570Sdavidxu 1990159570Sdavidxuvoid 1991159570Sdavidxusched_tick(void) 1992159570Sdavidxu{ 1993159570Sdavidxu} 1994134791Sjulian#define KERN_SWITCH_INCLUDE 1 1995134791Sjulian#include "kern/kern_switch.c" 1996