sched_ule.c revision 164939
1109864Sjeff/*- 2146955Sjeff * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 164939 2006-12-06 06:55:59Z julian $"); 29116182Sobrien 30147565Speter#include "opt_hwpmc_hooks.h" 31147565Speter#include "opt_sched.h" 32134649Sscottl 33109864Sjeff#include <sys/param.h> 34109864Sjeff#include <sys/systm.h> 35131929Smarcel#include <sys/kdb.h> 36109864Sjeff#include <sys/kernel.h> 37109864Sjeff#include <sys/ktr.h> 38109864Sjeff#include <sys/lock.h> 39109864Sjeff#include <sys/mutex.h> 40109864Sjeff#include <sys/proc.h> 41112966Sjeff#include <sys/resource.h> 42122038Sjeff#include <sys/resourcevar.h> 43109864Sjeff#include <sys/sched.h> 44109864Sjeff#include <sys/smp.h> 45109864Sjeff#include <sys/sx.h> 46109864Sjeff#include <sys/sysctl.h> 47109864Sjeff#include <sys/sysproto.h> 48139453Sjhb#include <sys/turnstile.h> 49161599Sdavidxu#include <sys/umtx.h> 50109864Sjeff#include <sys/vmmeter.h> 51109864Sjeff#ifdef KTRACE 52109864Sjeff#include <sys/uio.h> 53109864Sjeff#include <sys/ktrace.h> 54109864Sjeff#endif 55109864Sjeff 56145256Sjkoshy#ifdef HWPMC_HOOKS 57145256Sjkoshy#include <sys/pmckern.h> 58145256Sjkoshy#endif 59145256Sjkoshy 60109864Sjeff#include <machine/cpu.h> 61121790Sjeff#include <machine/smp.h> 62109864Sjeff 63109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 64109864Sjeff/* XXX This is bogus compatability crap for ps */ 65109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 66109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 67109864Sjeff 68109864Sjeffstatic void sched_setup(void *dummy); 69109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 70109864Sjeff 71153533Sdavidxustatic void sched_initticks(void *dummy); 72153533SdavidxuSYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 73153533Sdavidxu 74132589Sscottlstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 75113357Sjeff 76132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 77132589Sscottl "Scheduler name"); 78130881Sscottl 79113357Sjeffstatic int slice_min = 1; 80113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 81113357Sjeff 82116365Sjeffstatic int slice_max = 10; 83113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 84113357Sjeff 85111857Sjeffint realstathz; 86153533Sdavidxuint tickincr = 1 << 10; 87111857Sjeff 88109864Sjeff/* 89146954Sjeff * The following datastructures are allocated within their parent structure 90146954Sjeff * but are scheduler specific. 91134791Sjulian */ 92146954Sjeff/* 93164936Sjulian * Thread scheduler specific section. 94164936Sjulian * fields int he thread structure that are specific to this scheduler. 95146954Sjeff */ 96164936Sjulianstruct td_sched { 97164936Sjulian TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 98164936Sjulian int ts_flags; /* (j) TSF_* flags. */ 99164936Sjulian struct thread *ts_thread; /* (*) Active associated thread. */ 100164936Sjulian fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 101164936Sjulian u_char ts_rqindex; /* (j) Run queue index. */ 102134791Sjulian enum { 103164936Sjulian TSS_THREAD = 0x0, /* slaved to thread state */ 104164936Sjulian TSS_ONRUNQ 105164936Sjulian } ts_state; /* (j) thread sched specific status. */ 106164936Sjulian int ts_slptime; 107164936Sjulian int ts_slice; 108164936Sjulian struct runq *ts_runq; 109164936Sjulian u_char ts_cpu; /* CPU that we have affinity for. */ 110134791Sjulian /* The following variables are only used for pctcpu calculation */ 111164936Sjulian int ts_ltick; /* Last tick that we were running on */ 112164936Sjulian int ts_ftick; /* First tick that we were running on */ 113164936Sjulian int ts_ticks; /* Tick count */ 114134791Sjulian 115163709Sjb /* originally from kg_sched */ 116163709Sjb int skg_slptime; /* Number of ticks we vol. slept */ 117163709Sjb int skg_runtime; /* Number of ticks we were running */ 118134791Sjulian}; 119164936Sjulian#define ts_assign ts_procq.tqe_next 120164936Sjulian/* flags kept in ts_flags */ 121164936Sjulian#define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 122164936Sjulian#define TSF_BOUND 0x0002 /* Thread can not migrate. */ 123164936Sjulian#define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 124164936Sjulian#define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 125164936Sjulian#define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 126164936Sjulian#define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 127164936Sjulian#define TSF_PREEMPTED 0x0040 /* Thread was preempted */ 128164936Sjulian#define TSF_DIDRUN 0x02000 /* Thread actually ran. */ 129164936Sjulian#define TSF_EXIT 0x04000 /* Thread is being killed. */ 130121790Sjeff 131164936Sjulianstatic struct td_sched td_sched0; 132109864Sjeff 133109864Sjeff/* 134116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 135116642Sjeff * give lower(better) priorities to kse groups that use less CPU. The nice 136116642Sjeff * value is then directly added to this to allow nice to have some effect 137116642Sjeff * on latency. 138111857Sjeff * 139111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 140116642Sjeff * PRI_NRESV: Number of nice values. 141111857Sjeff * PRI_BASE: The start of the dynamic range. 142109864Sjeff */ 143111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 144121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 145121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 146116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 147113357Sjeff#define SCHED_PRI_INTERACT(score) \ 148116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 149109864Sjeff 150109864Sjeff/* 151111857Sjeff * These determine the interactivity of a process. 152109864Sjeff * 153110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 154110645Sjeff * before throttling back. 155121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 156116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 157111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 158109864Sjeff */ 159121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 160121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 161116365Sjeff#define SCHED_INTERACT_MAX (100) 162116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 163121126Sjeff#define SCHED_INTERACT_THRESH (30) 164111857Sjeff 165109864Sjeff/* 166109864Sjeff * These parameters and macros determine the size of the time slice that is 167109864Sjeff * granted to each thread. 168109864Sjeff * 169109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 170109864Sjeff * SLICE_MAX: Maximum time slice granted. 171109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 172112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 173112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 174121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 175109864Sjeff */ 176113357Sjeff#define SCHED_SLICE_MIN (slice_min) 177113357Sjeff#define SCHED_SLICE_MAX (slice_max) 178125299Sjeff#define SCHED_SLICE_INTERACTIVE (slice_max) 179121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 180111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 181109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 182112966Sjeff#define SCHED_SLICE_NICE(nice) \ 183121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 184109864Sjeff 185109864Sjeff/* 186134791Sjulian * This macro determines whether or not the thread belongs on the current or 187109864Sjeff * next run queue. 188109864Sjeff */ 189163709Sjb#define SCHED_INTERACTIVE(td) \ 190163709Sjb (sched_interact_score(td) < SCHED_INTERACT_THRESH) 191164936Sjulian#define SCHED_CURR(td, ts) \ 192164936Sjulian ((ts->ts_thread->td_flags & TDF_BORROWING) || \ 193164936Sjulian (ts->ts_flags & TSF_PREEMPTED) || SCHED_INTERACTIVE(td)) 194109864Sjeff 195109864Sjeff/* 196109864Sjeff * Cpu percentage computation macros and defines. 197109864Sjeff * 198109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 199109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 200109864Sjeff */ 201109864Sjeff 202112971Sjeff#define SCHED_CPU_TIME 10 203109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 204109864Sjeff 205109864Sjeff/* 206164936Sjulian * tdq - per processor runqs and statistics. 207109864Sjeff */ 208164936Sjulianstruct tdq { 209113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 210113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 211113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 212113357Sjeff struct runq *ksq_curr; /* Current queue. */ 213121896Sjeff int ksq_load_timeshare; /* Load for timeshare. */ 214113357Sjeff int ksq_load; /* Aggregate load. */ 215164936Sjulian short ksq_nice[SCHED_PRI_NRESV]; /* threadss in each nice bin. */ 216113357Sjeff short ksq_nicemin; /* Least nice. */ 217110267Sjeff#ifdef SMP 218123433Sjeff int ksq_transferable; 219164936Sjulian LIST_ENTRY(tdq) ksq_siblings; /* Next in tdq group. */ 220164936Sjulian struct tdq_group *ksq_group; /* Our processor group. */ 221164936Sjulian volatile struct td_sched *ksq_assigned; /* assigned by another CPU. */ 222125289Sjeff#else 223125289Sjeff int ksq_sysload; /* For loadavg, !ITHD load. */ 224110267Sjeff#endif 225109864Sjeff}; 226109864Sjeff 227123433Sjeff#ifdef SMP 228109864Sjeff/* 229164936Sjulian * tdq groups are groups of processors which can cheaply share threads. When 230123433Sjeff * one processor in the group goes idle it will check the runqs of the other 231123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 232123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 233123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 234123433Sjeff * load balancer. 235123433Sjeff */ 236164936Sjulianstruct tdq_group { 237164936Sjulian int ksg_cpus; /* Count of CPUs in this tdq group. */ 238127498Smarcel cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 239127498Smarcel cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 240127498Smarcel cpumask_t ksg_mask; /* Bit mask for first cpu. */ 241123487Sjeff int ksg_load; /* Total load of this group. */ 242123433Sjeff int ksg_transferable; /* Transferable load of this group. */ 243164936Sjulian LIST_HEAD(, tdq) ksg_members; /* Linked list of all members. */ 244123433Sjeff}; 245123433Sjeff#endif 246123433Sjeff 247123433Sjeff/* 248109864Sjeff * One kse queue per processor. 249109864Sjeff */ 250110028Sjeff#ifdef SMP 251164936Sjulianstatic cpumask_t tdq_idle; 252123487Sjeffstatic int ksg_maxid; 253164936Sjulianstatic struct tdq tdq_cpu[MAXCPU]; 254164936Sjulianstatic struct tdq_group tdq_groups[MAXCPU]; 255129982Sjeffstatic int bal_tick; 256129982Sjeffstatic int gbal_tick; 257139334Sjeffstatic int balance_groups; 258129982Sjeff 259164936Sjulian#define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 260164936Sjulian#define TDQ_CPU(x) (&tdq_cpu[(x)]) 261164936Sjulian#define TDQ_ID(x) ((x) - tdq_cpu) 262164936Sjulian#define TDQ_GROUP(x) (&tdq_groups[(x)]) 263123433Sjeff#else /* !SMP */ 264164936Sjulianstatic struct tdq tdq_cpu; 265129982Sjeff 266164936Sjulian#define TDQ_SELF() (&tdq_cpu) 267164936Sjulian#define TDQ_CPU(x) (&tdq_cpu) 268110028Sjeff#endif 269109864Sjeff 270164936Sjulianstatic struct td_sched *sched_choose(void); /* XXX Should be thread * */ 271164936Sjulianstatic void sched_slice(struct td_sched *); 272163709Sjbstatic void sched_priority(struct thread *); 273146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 274163709Sjbstatic int sched_interact_score(struct thread *); 275163709Sjbstatic void sched_interact_update(struct thread *); 276163709Sjbstatic void sched_interact_fork(struct thread *); 277164936Sjulianstatic void sched_pctcpu_update(struct td_sched *); 278109864Sjeff 279110267Sjeff/* Operations on per processor queues */ 280164936Sjulianstatic struct td_sched * tdq_choose(struct tdq *); 281164936Sjulianstatic void tdq_setup(struct tdq *); 282164936Sjulianstatic void tdq_load_add(struct tdq *, struct td_sched *); 283164936Sjulianstatic void tdq_load_rem(struct tdq *, struct td_sched *); 284164936Sjulianstatic __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 285164936Sjulianstatic __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 286164936Sjulianstatic void tdq_nice_add(struct tdq *, int); 287164936Sjulianstatic void tdq_nice_rem(struct tdq *, int); 288164936Sjulianvoid tdq_print(int cpu); 289110267Sjeff#ifdef SMP 290164936Sjulianstatic int tdq_transfer(struct tdq *, struct td_sched *, int); 291164936Sjulianstatic struct td_sched *runq_steal(struct runq *); 292129982Sjeffstatic void sched_balance(void); 293129982Sjeffstatic void sched_balance_groups(void); 294164936Sjulianstatic void sched_balance_group(struct tdq_group *); 295164936Sjulianstatic void sched_balance_pair(struct tdq *, struct tdq *); 296164936Sjulianstatic void tdq_move(struct tdq *, int); 297164936Sjulianstatic int tdq_idled(struct tdq *); 298164936Sjulianstatic void tdq_notify(struct td_sched *, int); 299164936Sjulianstatic void tdq_assign(struct tdq *); 300164936Sjulianstatic struct td_sched *tdq_steal(struct tdq *, int); 301164936Sjulian#define THREAD_CAN_MIGRATE(ts) \ 302164936Sjulian ((ts)->ts_thread->td_pinned == 0 && ((ts)->ts_flags & TSF_BOUND) == 0) 303121790Sjeff#endif 304110028Sjeff 305113357Sjeffvoid 306164936Sjuliantdq_print(int cpu) 307110267Sjeff{ 308164936Sjulian struct tdq *tdq; 309113357Sjeff int i; 310112994Sjeff 311164936Sjulian tdq = TDQ_CPU(cpu); 312112994Sjeff 313164936Sjulian printf("tdq:\n"); 314164936Sjulian printf("\tload: %d\n", tdq->ksq_load); 315164936Sjulian printf("\tload TIMESHARE: %d\n", tdq->ksq_load_timeshare); 316121896Sjeff#ifdef SMP 317164936Sjulian printf("\tload transferable: %d\n", tdq->ksq_transferable); 318121896Sjeff#endif 319164936Sjulian printf("\tnicemin:\t%d\n", tdq->ksq_nicemin); 320113357Sjeff printf("\tnice counts:\n"); 321121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 322164936Sjulian if (tdq->ksq_nice[i]) 323113357Sjeff printf("\t\t%d = %d\n", 324164936Sjulian i - SCHED_PRI_NHALF, tdq->ksq_nice[i]); 325113357Sjeff} 326112994Sjeff 327122744Sjeffstatic __inline void 328164936Sjuliantdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 329122744Sjeff{ 330122744Sjeff#ifdef SMP 331164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 332164936Sjulian tdq->ksq_transferable++; 333164936Sjulian tdq->ksq_group->ksg_transferable++; 334164936Sjulian ts->ts_flags |= TSF_XFERABLE; 335123433Sjeff } 336122744Sjeff#endif 337164936Sjulian if (ts->ts_flags & TSF_PREEMPTED) 338148856Sdavidxu flags |= SRQ_PREEMPTED; 339164936Sjulian runq_add(ts->ts_runq, ts, flags); 340122744Sjeff} 341122744Sjeff 342122744Sjeffstatic __inline void 343164936Sjuliantdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 344122744Sjeff{ 345122744Sjeff#ifdef SMP 346164936Sjulian if (ts->ts_flags & TSF_XFERABLE) { 347164936Sjulian tdq->ksq_transferable--; 348164936Sjulian tdq->ksq_group->ksg_transferable--; 349164936Sjulian ts->ts_flags &= ~TSF_XFERABLE; 350123433Sjeff } 351122744Sjeff#endif 352164936Sjulian runq_remove(ts->ts_runq, ts); 353122744Sjeff} 354122744Sjeff 355113357Sjeffstatic void 356164936Sjuliantdq_load_add(struct tdq *tdq, struct td_sched *ts) 357113357Sjeff{ 358121896Sjeff int class; 359115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 360164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 361121896Sjeff if (class == PRI_TIMESHARE) 362164936Sjulian tdq->ksq_load_timeshare++; 363164936Sjulian tdq->ksq_load++; 364164936Sjulian CTR1(KTR_SCHED, "load: %d", tdq->ksq_load); 365164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 366123487Sjeff#ifdef SMP 367164936Sjulian tdq->ksq_group->ksg_load++; 368125289Sjeff#else 369164936Sjulian tdq->ksq_sysload++; 370123487Sjeff#endif 371164936Sjulian if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 372164936Sjulian tdq_nice_add(tdq, ts->ts_thread->td_proc->p_nice); 373110267Sjeff} 374113357Sjeff 375112994Sjeffstatic void 376164936Sjuliantdq_load_rem(struct tdq *tdq, struct td_sched *ts) 377110267Sjeff{ 378121896Sjeff int class; 379115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 380164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 381121896Sjeff if (class == PRI_TIMESHARE) 382164936Sjulian tdq->ksq_load_timeshare--; 383164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 384123487Sjeff#ifdef SMP 385164936Sjulian tdq->ksq_group->ksg_load--; 386125289Sjeff#else 387164936Sjulian tdq->ksq_sysload--; 388123487Sjeff#endif 389164936Sjulian tdq->ksq_load--; 390164936Sjulian CTR1(KTR_SCHED, "load: %d", tdq->ksq_load); 391164936Sjulian ts->ts_runq = NULL; 392164936Sjulian if (ts->ts_thread->td_pri_class == PRI_TIMESHARE) 393164936Sjulian tdq_nice_rem(tdq, ts->ts_thread->td_proc->p_nice); 394110267Sjeff} 395110267Sjeff 396113357Sjeffstatic void 397164936Sjuliantdq_nice_add(struct tdq *tdq, int nice) 398110267Sjeff{ 399115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 400113357Sjeff /* Normalize to zero. */ 401164936Sjulian tdq->ksq_nice[nice + SCHED_PRI_NHALF]++; 402164936Sjulian if (nice < tdq->ksq_nicemin || tdq->ksq_load_timeshare == 1) 403164936Sjulian tdq->ksq_nicemin = nice; 404110267Sjeff} 405110267Sjeff 406113357Sjeffstatic void 407164936Sjuliantdq_nice_rem(struct tdq *tdq, int nice) 408110267Sjeff{ 409113357Sjeff int n; 410113357Sjeff 411115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 412113357Sjeff /* Normalize to zero. */ 413113357Sjeff n = nice + SCHED_PRI_NHALF; 414164936Sjulian tdq->ksq_nice[n]--; 415164936Sjulian KASSERT(tdq->ksq_nice[n] >= 0, ("Negative nice count.")); 416113357Sjeff 417113357Sjeff /* 418113357Sjeff * If this wasn't the smallest nice value or there are more in 419113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 420113357Sjeff * the smallest nice. 421113357Sjeff */ 422164936Sjulian if (nice != tdq->ksq_nicemin || 423164936Sjulian tdq->ksq_nice[n] != 0 || 424164936Sjulian tdq->ksq_load_timeshare == 0) 425113357Sjeff return; 426113357Sjeff 427121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 428164936Sjulian if (tdq->ksq_nice[n]) { 429164936Sjulian tdq->ksq_nicemin = n - SCHED_PRI_NHALF; 430113357Sjeff return; 431113357Sjeff } 432110267Sjeff} 433110267Sjeff 434113357Sjeff#ifdef SMP 435116069Sjeff/* 436122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 437116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 438116069Sjeff * by migrating some processes. 439116069Sjeff * 440116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 441116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 442116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 443116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 444116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 445116069Sjeff * 446116069Sjeff * It could be improved by considering the priorities and slices assigned to 447116069Sjeff * each task prior to balancing them. There are many pathological cases with 448116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 449116069Sjeff * 450116069Sjeff */ 451121790Sjeffstatic void 452129982Sjeffsched_balance(void) 453116069Sjeff{ 454164936Sjulian struct tdq_group *high; 455164936Sjulian struct tdq_group *low; 456164936Sjulian struct tdq_group *ksg; 457123487Sjeff int cnt; 458123487Sjeff int i; 459123487Sjeff 460139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 461123487Sjeff if (smp_started == 0) 462139334Sjeff return; 463123487Sjeff low = high = NULL; 464123487Sjeff i = random() % (ksg_maxid + 1); 465123487Sjeff for (cnt = 0; cnt <= ksg_maxid; cnt++) { 466164936Sjulian ksg = TDQ_GROUP(i); 467123487Sjeff /* 468123487Sjeff * Find the CPU with the highest load that has some 469123487Sjeff * threads to transfer. 470123487Sjeff */ 471123487Sjeff if ((high == NULL || ksg->ksg_load > high->ksg_load) 472123487Sjeff && ksg->ksg_transferable) 473123487Sjeff high = ksg; 474123487Sjeff if (low == NULL || ksg->ksg_load < low->ksg_load) 475123487Sjeff low = ksg; 476123487Sjeff if (++i > ksg_maxid) 477123487Sjeff i = 0; 478123487Sjeff } 479123487Sjeff if (low != NULL && high != NULL && high != low) 480123487Sjeff sched_balance_pair(LIST_FIRST(&high->ksg_members), 481123487Sjeff LIST_FIRST(&low->ksg_members)); 482123487Sjeff} 483123487Sjeff 484123487Sjeffstatic void 485129982Sjeffsched_balance_groups(void) 486123487Sjeff{ 487123487Sjeff int i; 488123487Sjeff 489139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 490129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 491123487Sjeff if (smp_started) 492123487Sjeff for (i = 0; i <= ksg_maxid; i++) 493164936Sjulian sched_balance_group(TDQ_GROUP(i)); 494123487Sjeff} 495123487Sjeff 496123487Sjeffstatic void 497164936Sjuliansched_balance_group(struct tdq_group *ksg) 498123487Sjeff{ 499164936Sjulian struct tdq *tdq; 500164936Sjulian struct tdq *high; 501164936Sjulian struct tdq *low; 502123487Sjeff int load; 503123487Sjeff 504123487Sjeff if (ksg->ksg_transferable == 0) 505123487Sjeff return; 506123487Sjeff low = NULL; 507123487Sjeff high = NULL; 508164936Sjulian LIST_FOREACH(tdq, &ksg->ksg_members, ksq_siblings) { 509164936Sjulian load = tdq->ksq_load; 510123487Sjeff if (high == NULL || load > high->ksq_load) 511164936Sjulian high = tdq; 512123487Sjeff if (low == NULL || load < low->ksq_load) 513164936Sjulian low = tdq; 514123487Sjeff } 515123487Sjeff if (high != NULL && low != NULL && high != low) 516123487Sjeff sched_balance_pair(high, low); 517123487Sjeff} 518123487Sjeff 519123487Sjeffstatic void 520164936Sjuliansched_balance_pair(struct tdq *high, struct tdq *low) 521123487Sjeff{ 522123433Sjeff int transferable; 523116069Sjeff int high_load; 524116069Sjeff int low_load; 525116069Sjeff int move; 526116069Sjeff int diff; 527116069Sjeff int i; 528116069Sjeff 529116069Sjeff /* 530123433Sjeff * If we're transfering within a group we have to use this specific 531164936Sjulian * tdq's transferable count, otherwise we can steal from other members 532123433Sjeff * of the group. 533123433Sjeff */ 534123487Sjeff if (high->ksq_group == low->ksq_group) { 535123487Sjeff transferable = high->ksq_transferable; 536123487Sjeff high_load = high->ksq_load; 537123487Sjeff low_load = low->ksq_load; 538123487Sjeff } else { 539123487Sjeff transferable = high->ksq_group->ksg_transferable; 540123487Sjeff high_load = high->ksq_group->ksg_load; 541123487Sjeff low_load = low->ksq_group->ksg_load; 542123487Sjeff } 543123433Sjeff if (transferable == 0) 544123487Sjeff return; 545123433Sjeff /* 546122744Sjeff * Determine what the imbalance is and then adjust that to how many 547123433Sjeff * kses we actually have to give up (transferable). 548122744Sjeff */ 549123487Sjeff diff = high_load - low_load; 550116069Sjeff move = diff / 2; 551116069Sjeff if (diff & 0x1) 552116069Sjeff move++; 553123433Sjeff move = min(move, transferable); 554116069Sjeff for (i = 0; i < move; i++) 555164936Sjulian tdq_move(high, TDQ_ID(low)); 556116069Sjeff return; 557116069Sjeff} 558116069Sjeff 559121790Sjeffstatic void 560164936Sjuliantdq_move(struct tdq *from, int cpu) 561116069Sjeff{ 562164936Sjulian struct tdq *tdq; 563164936Sjulian struct tdq *to; 564164936Sjulian struct td_sched *ts; 565116069Sjeff 566164936Sjulian tdq = from; 567164936Sjulian to = TDQ_CPU(cpu); 568164936Sjulian ts = tdq_steal(tdq, 1); 569164936Sjulian if (ts == NULL) { 570164936Sjulian struct tdq_group *ksg; 571123433Sjeff 572164936Sjulian ksg = tdq->ksq_group; 573164936Sjulian LIST_FOREACH(tdq, &ksg->ksg_members, ksq_siblings) { 574164936Sjulian if (tdq == from || tdq->ksq_transferable == 0) 575123433Sjeff continue; 576164936Sjulian ts = tdq_steal(tdq, 1); 577123433Sjeff break; 578123433Sjeff } 579164936Sjulian if (ts == NULL) 580164936Sjulian panic("tdq_move: No threads available with a " 581123433Sjeff "transferable count of %d\n", 582123433Sjeff ksg->ksg_transferable); 583123433Sjeff } 584164936Sjulian if (tdq == to) 585123433Sjeff return; 586164936Sjulian ts->ts_state = TSS_THREAD; 587164936Sjulian tdq_runq_rem(tdq, ts); 588164936Sjulian tdq_load_rem(tdq, ts); 589164936Sjulian tdq_notify(ts, cpu); 590116069Sjeff} 591110267Sjeff 592123433Sjeffstatic int 593164936Sjuliantdq_idled(struct tdq *tdq) 594121790Sjeff{ 595164936Sjulian struct tdq_group *ksg; 596164936Sjulian struct tdq *steal; 597164936Sjulian struct td_sched *ts; 598123433Sjeff 599164936Sjulian ksg = tdq->ksq_group; 600123433Sjeff /* 601123433Sjeff * If we're in a cpu group, try and steal kses from another cpu in 602123433Sjeff * the group before idling. 603123433Sjeff */ 604123433Sjeff if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 605123433Sjeff LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 606164936Sjulian if (steal == tdq || steal->ksq_transferable == 0) 607123433Sjeff continue; 608164936Sjulian ts = tdq_steal(steal, 0); 609164936Sjulian if (ts == NULL) 610123433Sjeff continue; 611164936Sjulian ts->ts_state = TSS_THREAD; 612164936Sjulian tdq_runq_rem(steal, ts); 613164936Sjulian tdq_load_rem(steal, ts); 614164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 615164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 616164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 617123433Sjeff return (0); 618123433Sjeff } 619123433Sjeff } 620123433Sjeff /* 621123433Sjeff * We only set the idled bit when all of the cpus in the group are 622164936Sjulian * idle. Otherwise we could get into a situation where a thread bounces 623123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 624123433Sjeff */ 625123433Sjeff ksg->ksg_idlemask |= PCPU_GET(cpumask); 626123433Sjeff if (ksg->ksg_idlemask != ksg->ksg_cpumask) 627123433Sjeff return (1); 628164936Sjulian atomic_set_int(&tdq_idle, ksg->ksg_mask); 629123433Sjeff return (1); 630121790Sjeff} 631121790Sjeff 632121790Sjeffstatic void 633164936Sjuliantdq_assign(struct tdq *tdq) 634121790Sjeff{ 635164936Sjulian struct td_sched *nts; 636164936Sjulian struct td_sched *ts; 637121790Sjeff 638121790Sjeff do { 639164936Sjulian *(volatile struct td_sched **)&ts = tdq->ksq_assigned; 640164936Sjulian } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->ksq_assigned, 641164936Sjulian (uintptr_t)ts, (uintptr_t)NULL)); 642164936Sjulian for (; ts != NULL; ts = nts) { 643164936Sjulian nts = ts->ts_assign; 644164936Sjulian tdq->ksq_group->ksg_load--; 645164936Sjulian tdq->ksq_load--; 646164936Sjulian ts->ts_flags &= ~TSF_ASSIGNED; 647164936Sjulian if (ts->ts_flags & TSF_REMOVED) { 648164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 649148603Sdavidxu continue; 650148603Sdavidxu } 651164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 652164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 653121790Sjeff } 654121790Sjeff} 655121790Sjeff 656121790Sjeffstatic void 657164936Sjuliantdq_notify(struct td_sched *ts, int cpu) 658121790Sjeff{ 659164936Sjulian struct tdq *tdq; 660121790Sjeff struct thread *td; 661121790Sjeff struct pcpu *pcpu; 662139334Sjeff int class; 663133427Sjeff int prio; 664121790Sjeff 665164936Sjulian tdq = TDQ_CPU(cpu); 666139334Sjeff /* XXX */ 667164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 668139334Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 669164936Sjulian (tdq_idle & tdq->ksq_group->ksg_mask)) 670164936Sjulian atomic_clear_int(&tdq_idle, tdq->ksq_group->ksg_mask); 671164936Sjulian tdq->ksq_group->ksg_load++; 672164936Sjulian tdq->ksq_load++; 673164936Sjulian ts->ts_cpu = cpu; 674164936Sjulian ts->ts_flags |= TSF_ASSIGNED; 675164936Sjulian prio = ts->ts_thread->td_priority; 676121790Sjeff 677121790Sjeff /* 678164936Sjulian * Place a thread on another cpu's queue and force a resched. 679121790Sjeff */ 680121790Sjeff do { 681164936Sjulian *(volatile struct td_sched **)&ts->ts_assign = tdq->ksq_assigned; 682164936Sjulian } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->ksq_assigned, 683164936Sjulian (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 684133427Sjeff /* 685133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 686133427Sjeff * on a thread that is switched out before the IPI is delivered. This 687133427Sjeff * would lead us to miss the resched. This will be a problem once 688133427Sjeff * sched_lock is pushed down. 689133427Sjeff */ 690121790Sjeff pcpu = pcpu_find(cpu); 691121790Sjeff td = pcpu->pc_curthread; 692164936Sjulian if (ts->ts_thread->td_priority < td->td_priority || 693121790Sjeff td == pcpu->pc_idlethread) { 694121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 695121790Sjeff ipi_selected(1 << cpu, IPI_AST); 696121790Sjeff } 697121790Sjeff} 698121790Sjeff 699164936Sjulianstatic struct td_sched * 700121790Sjeffrunq_steal(struct runq *rq) 701121790Sjeff{ 702121790Sjeff struct rqhead *rqh; 703121790Sjeff struct rqbits *rqb; 704164936Sjulian struct td_sched *ts; 705121790Sjeff int word; 706121790Sjeff int bit; 707121790Sjeff 708121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 709121790Sjeff rqb = &rq->rq_status; 710121790Sjeff for (word = 0; word < RQB_LEN; word++) { 711121790Sjeff if (rqb->rqb_bits[word] == 0) 712121790Sjeff continue; 713121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 714123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 715121790Sjeff continue; 716121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 717164936Sjulian TAILQ_FOREACH(ts, rqh, ts_procq) { 718164936Sjulian if (THREAD_CAN_MIGRATE(ts)) 719164936Sjulian return (ts); 720121790Sjeff } 721121790Sjeff } 722121790Sjeff } 723121790Sjeff return (NULL); 724121790Sjeff} 725121790Sjeff 726164936Sjulianstatic struct td_sched * 727164936Sjuliantdq_steal(struct tdq *tdq, int stealidle) 728121790Sjeff{ 729164936Sjulian struct td_sched *ts; 730121790Sjeff 731123433Sjeff /* 732123433Sjeff * Steal from next first to try to get a non-interactive task that 733123433Sjeff * may not have run for a while. 734123433Sjeff */ 735164936Sjulian if ((ts = runq_steal(tdq->ksq_next)) != NULL) 736164936Sjulian return (ts); 737164936Sjulian if ((ts = runq_steal(tdq->ksq_curr)) != NULL) 738164936Sjulian return (ts); 739123433Sjeff if (stealidle) 740164936Sjulian return (runq_steal(&tdq->ksq_idle)); 741123433Sjeff return (NULL); 742121790Sjeff} 743123433Sjeff 744123433Sjeffint 745164936Sjuliantdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 746123433Sjeff{ 747164936Sjulian struct tdq_group *nksg; 748164936Sjulian struct tdq_group *ksg; 749164936Sjulian struct tdq *old; 750123433Sjeff int cpu; 751139334Sjeff int idx; 752123433Sjeff 753123685Sjeff if (smp_started == 0) 754123685Sjeff return (0); 755123433Sjeff cpu = 0; 756123433Sjeff /* 757133427Sjeff * If our load exceeds a certain threshold we should attempt to 758133427Sjeff * reassign this thread. The first candidate is the cpu that 759133427Sjeff * originally ran the thread. If it is idle, assign it there, 760133427Sjeff * otherwise, pick an idle cpu. 761133427Sjeff * 762133427Sjeff * The threshold at which we start to reassign kses has a large impact 763123685Sjeff * on the overall performance of the system. Tuned too high and 764123685Sjeff * some CPUs may idle. Too low and there will be excess migration 765128055Scognet * and context switches. 766123685Sjeff */ 767164936Sjulian old = TDQ_CPU(ts->ts_cpu); 768139334Sjeff nksg = old->ksq_group; 769164936Sjulian ksg = tdq->ksq_group; 770164936Sjulian if (tdq_idle) { 771164936Sjulian if (tdq_idle & nksg->ksg_mask) { 772139334Sjeff cpu = ffs(nksg->ksg_idlemask); 773139334Sjeff if (cpu) { 774139334Sjeff CTR2(KTR_SCHED, 775164936Sjulian "tdq_transfer: %p found old cpu %X " 776164936Sjulian "in idlemask.", ts, cpu); 777133427Sjeff goto migrate; 778139334Sjeff } 779133427Sjeff } 780123433Sjeff /* 781123433Sjeff * Multiple cpus could find this bit simultaneously 782123433Sjeff * but the race shouldn't be terrible. 783123433Sjeff */ 784164936Sjulian cpu = ffs(tdq_idle); 785139334Sjeff if (cpu) { 786164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 787164936Sjulian "in idlemask.", ts, cpu); 788133427Sjeff goto migrate; 789139334Sjeff } 790123433Sjeff } 791139334Sjeff idx = 0; 792139334Sjeff#if 0 793164936Sjulian if (old->ksq_load < tdq->ksq_load) { 794164936Sjulian cpu = ts->ts_cpu + 1; 795164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 796164936Sjulian "load less than ours.", ts, cpu); 797139334Sjeff goto migrate; 798139334Sjeff } 799123433Sjeff /* 800139334Sjeff * No new CPU was found, look for one with less load. 801139334Sjeff */ 802139334Sjeff for (idx = 0; idx <= ksg_maxid; idx++) { 803164936Sjulian nksg = TDQ_GROUP(idx); 804139334Sjeff if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 805139334Sjeff cpu = ffs(nksg->ksg_cpumask); 806164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 807164936Sjulian "than ours.", ts, cpu); 808139334Sjeff goto migrate; 809139334Sjeff } 810139334Sjeff } 811139334Sjeff#endif 812139334Sjeff /* 813123433Sjeff * If another cpu in this group has idled, assign a thread over 814123433Sjeff * to them after checking to see if there are idled groups. 815123433Sjeff */ 816133427Sjeff if (ksg->ksg_idlemask) { 817123433Sjeff cpu = ffs(ksg->ksg_idlemask); 818139334Sjeff if (cpu) { 819164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 820164936Sjulian "group.", ts, cpu); 821133427Sjeff goto migrate; 822139334Sjeff } 823123433Sjeff } 824133427Sjeff return (0); 825133427Sjeffmigrate: 826133427Sjeff /* 827123433Sjeff * Now that we've found an idle CPU, migrate the thread. 828123433Sjeff */ 829133427Sjeff cpu--; 830164936Sjulian ts->ts_runq = NULL; 831164936Sjulian tdq_notify(ts, cpu); 832133427Sjeff 833133427Sjeff return (1); 834123433Sjeff} 835123433Sjeff 836121790Sjeff#endif /* SMP */ 837121790Sjeff 838117326Sjeff/* 839121790Sjeff * Pick the highest priority task we have and return it. 840117326Sjeff */ 841117326Sjeff 842164936Sjulianstatic struct td_sched * 843164936Sjuliantdq_choose(struct tdq *tdq) 844110267Sjeff{ 845137067Sjeff struct runq *swap; 846164936Sjulian struct td_sched *ts; 847137067Sjeff int nice; 848110267Sjeff 849115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 850113357Sjeff swap = NULL; 851112994Sjeff 852113357Sjeff for (;;) { 853164936Sjulian ts = runq_choose(tdq->ksq_curr); 854164936Sjulian if (ts == NULL) { 855113357Sjeff /* 856131473Sjhb * We already swapped once and didn't get anywhere. 857113357Sjeff */ 858113357Sjeff if (swap) 859113357Sjeff break; 860164936Sjulian swap = tdq->ksq_curr; 861164936Sjulian tdq->ksq_curr = tdq->ksq_next; 862164936Sjulian tdq->ksq_next = swap; 863113357Sjeff continue; 864113357Sjeff } 865113357Sjeff /* 866164936Sjulian * If we encounter a slice of 0 the td_sched is in a 867164936Sjulian * TIMESHARE td_sched group and its nice was too far out 868113357Sjeff * of the range that receives slices. 869113357Sjeff */ 870164936Sjulian nice = ts->ts_thread->td_proc->p_nice + (0 - tdq->ksq_nicemin); 871150442Sdavidxu#if 0 872164936Sjulian if (ts->ts_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 873164936Sjulian ts->ts_thread->td_proc->p_nice != 0)) { 874164936Sjulian runq_remove(ts->ts_runq, ts); 875164936Sjulian sched_slice(ts); 876164936Sjulian ts->ts_runq = tdq->ksq_next; 877164936Sjulian runq_add(ts->ts_runq, ts, 0); 878113357Sjeff continue; 879113357Sjeff } 880150442Sdavidxu#endif 881164936Sjulian return (ts); 882110267Sjeff } 883110267Sjeff 884164936Sjulian return (runq_choose(&tdq->ksq_idle)); 885110267Sjeff} 886110267Sjeff 887109864Sjeffstatic void 888164936Sjuliantdq_setup(struct tdq *tdq) 889110028Sjeff{ 890164936Sjulian runq_init(&tdq->ksq_timeshare[0]); 891164936Sjulian runq_init(&tdq->ksq_timeshare[1]); 892164936Sjulian runq_init(&tdq->ksq_idle); 893164936Sjulian tdq->ksq_curr = &tdq->ksq_timeshare[0]; 894164936Sjulian tdq->ksq_next = &tdq->ksq_timeshare[1]; 895164936Sjulian tdq->ksq_load = 0; 896164936Sjulian tdq->ksq_load_timeshare = 0; 897110028Sjeff} 898110028Sjeff 899110028Sjeffstatic void 900109864Sjeffsched_setup(void *dummy) 901109864Sjeff{ 902117313Sjeff#ifdef SMP 903109864Sjeff int i; 904117313Sjeff#endif 905109864Sjeff 906153533Sdavidxu /* 907153533Sdavidxu * To avoid divide-by-zero, we set realstathz a dummy value 908153533Sdavidxu * in case which sched_clock() called before sched_initticks(). 909153533Sdavidxu */ 910153533Sdavidxu realstathz = hz; 911116946Sjeff slice_min = (hz/100); /* 10ms */ 912116946Sjeff slice_max = (hz/7); /* ~140ms */ 913111857Sjeff 914117237Sjeff#ifdef SMP 915123487Sjeff balance_groups = 0; 916123433Sjeff /* 917164936Sjulian * Initialize the tdqs. 918123433Sjeff */ 919123433Sjeff for (i = 0; i < MAXCPU; i++) { 920164936Sjulian struct tdq *ksq; 921123433Sjeff 922164936Sjulian ksq = &tdq_cpu[i]; 923123433Sjeff ksq->ksq_assigned = NULL; 924164936Sjulian tdq_setup(&tdq_cpu[i]); 925123433Sjeff } 926117237Sjeff if (smp_topology == NULL) { 927164936Sjulian struct tdq_group *ksg; 928164936Sjulian struct tdq *ksq; 929139334Sjeff int cpus; 930123433Sjeff 931139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 932139334Sjeff if (CPU_ABSENT(i)) 933139334Sjeff continue; 934164936Sjulian ksq = &tdq_cpu[i]; 935164936Sjulian ksg = &tdq_groups[cpus]; 936123433Sjeff /* 937164936Sjulian * Setup a tdq group with one member. 938123433Sjeff */ 939123433Sjeff ksq->ksq_transferable = 0; 940123433Sjeff ksq->ksq_group = ksg; 941123433Sjeff ksg->ksg_cpus = 1; 942123433Sjeff ksg->ksg_idlemask = 0; 943123433Sjeff ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 944123487Sjeff ksg->ksg_load = 0; 945123433Sjeff ksg->ksg_transferable = 0; 946123433Sjeff LIST_INIT(&ksg->ksg_members); 947123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 948139334Sjeff cpus++; 949117237Sjeff } 950139334Sjeff ksg_maxid = cpus - 1; 951117237Sjeff } else { 952164936Sjulian struct tdq_group *ksg; 953123433Sjeff struct cpu_group *cg; 954117237Sjeff int j; 955113357Sjeff 956117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 957117237Sjeff cg = &smp_topology->ct_group[i]; 958164936Sjulian ksg = &tdq_groups[i]; 959123433Sjeff /* 960123433Sjeff * Initialize the group. 961123433Sjeff */ 962123433Sjeff ksg->ksg_idlemask = 0; 963123487Sjeff ksg->ksg_load = 0; 964123433Sjeff ksg->ksg_transferable = 0; 965123433Sjeff ksg->ksg_cpus = cg->cg_count; 966123433Sjeff ksg->ksg_cpumask = cg->cg_mask; 967123433Sjeff LIST_INIT(&ksg->ksg_members); 968123433Sjeff /* 969123433Sjeff * Find all of the group members and add them. 970123433Sjeff */ 971123433Sjeff for (j = 0; j < MAXCPU; j++) { 972123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 973123433Sjeff if (ksg->ksg_mask == 0) 974123433Sjeff ksg->ksg_mask = 1 << j; 975164936Sjulian tdq_cpu[j].ksq_transferable = 0; 976164936Sjulian tdq_cpu[j].ksq_group = ksg; 977123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, 978164936Sjulian &tdq_cpu[j], ksq_siblings); 979123433Sjeff } 980123433Sjeff } 981123487Sjeff if (ksg->ksg_cpus > 1) 982123487Sjeff balance_groups = 1; 983117237Sjeff } 984123487Sjeff ksg_maxid = smp_topology->ct_count - 1; 985117237Sjeff } 986123487Sjeff /* 987123487Sjeff * Stagger the group and global load balancer so they do not 988123487Sjeff * interfere with each other. 989123487Sjeff */ 990129982Sjeff bal_tick = ticks + hz; 991123487Sjeff if (balance_groups) 992129982Sjeff gbal_tick = ticks + (hz / 2); 993117237Sjeff#else 994164936Sjulian tdq_setup(TDQ_SELF()); 995116069Sjeff#endif 996117237Sjeff mtx_lock_spin(&sched_lock); 997164936Sjulian tdq_load_add(TDQ_SELF(), &td_sched0); 998117237Sjeff mtx_unlock_spin(&sched_lock); 999109864Sjeff} 1000109864Sjeff 1001153533Sdavidxu/* ARGSUSED */ 1002153533Sdavidxustatic void 1003153533Sdavidxusched_initticks(void *dummy) 1004153533Sdavidxu{ 1005153533Sdavidxu mtx_lock_spin(&sched_lock); 1006153533Sdavidxu realstathz = stathz ? stathz : hz; 1007153533Sdavidxu slice_min = (realstathz/100); /* 10ms */ 1008153533Sdavidxu slice_max = (realstathz/7); /* ~140ms */ 1009153533Sdavidxu 1010153533Sdavidxu tickincr = (hz << 10) / realstathz; 1011153533Sdavidxu /* 1012153533Sdavidxu * XXX This does not work for values of stathz that are much 1013153533Sdavidxu * larger than hz. 1014153533Sdavidxu */ 1015153533Sdavidxu if (tickincr == 0) 1016153533Sdavidxu tickincr = 1; 1017153533Sdavidxu mtx_unlock_spin(&sched_lock); 1018153533Sdavidxu} 1019153533Sdavidxu 1020153533Sdavidxu 1021109864Sjeff/* 1022109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1023109864Sjeff * process. 1024109864Sjeff */ 1025113357Sjeffstatic void 1026163709Sjbsched_priority(struct thread *td) 1027109864Sjeff{ 1028109864Sjeff int pri; 1029109864Sjeff 1030163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1031113357Sjeff return; 1032109864Sjeff 1033163709Sjb pri = SCHED_PRI_INTERACT(sched_interact_score(td)); 1034111857Sjeff pri += SCHED_PRI_BASE; 1035163709Sjb pri += td->td_proc->p_nice; 1036109864Sjeff 1037109864Sjeff if (pri > PRI_MAX_TIMESHARE) 1038109864Sjeff pri = PRI_MAX_TIMESHARE; 1039109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 1040109864Sjeff pri = PRI_MIN_TIMESHARE; 1041109864Sjeff 1042163709Sjb sched_user_prio(td, pri); 1043109864Sjeff 1044113357Sjeff return; 1045109864Sjeff} 1046109864Sjeff 1047109864Sjeff/* 1048164936Sjulian * Calculate a time slice based on the properties of the process 1049164936Sjulian * and the runq that we're on. This is only for PRI_TIMESHARE threads. 1050109864Sjeff */ 1051112966Sjeffstatic void 1052164936Sjuliansched_slice(struct td_sched *ts) 1053109864Sjeff{ 1054164936Sjulian struct tdq *tdq; 1055163709Sjb struct thread *td; 1056109864Sjeff 1057164936Sjulian td = ts->ts_thread; 1058164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1059109864Sjeff 1060163709Sjb if (td->td_flags & TDF_BORROWING) { 1061164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1062138842Sjeff return; 1063138842Sjeff } 1064138842Sjeff 1065112966Sjeff /* 1066112966Sjeff * Rationale: 1067164936Sjulian * Threads in interactive procs get a minimal slice so that we 1068112966Sjeff * quickly notice if it abuses its advantage. 1069112966Sjeff * 1070164936Sjulian * Threads in non-interactive procs are assigned a slice that is 1071164936Sjulian * based on the procs nice value relative to the least nice procs 1072112966Sjeff * on the run queue for this cpu. 1073112966Sjeff * 1074164936Sjulian * If the thread is less nice than all others it gets the maximum 1075164936Sjulian * slice and other threads will adjust their slice relative to 1076112966Sjeff * this when they first expire. 1077112966Sjeff * 1078112966Sjeff * There is 20 point window that starts relative to the least 1079164936Sjulian * nice td_sched on the run queue. Slice size is determined by 1080164936Sjulian * the td_sched distance from the last nice thread. 1081112966Sjeff * 1082164936Sjulian * If the td_sched is outside of the window it will get no slice 1083121871Sjeff * and will be reevaluated each time it is selected on the 1084164936Sjulian * run queue. The exception to this is nice 0 procs when 1085121871Sjeff * a nice -20 is running. They are always granted a minimum 1086121871Sjeff * slice. 1087112966Sjeff */ 1088163709Sjb if (!SCHED_INTERACTIVE(td)) { 1089112966Sjeff int nice; 1090112966Sjeff 1091164936Sjulian nice = td->td_proc->p_nice + (0 - tdq->ksq_nicemin); 1092164936Sjulian if (tdq->ksq_load_timeshare == 0 || 1093164936Sjulian td->td_proc->p_nice < tdq->ksq_nicemin) 1094164936Sjulian ts->ts_slice = SCHED_SLICE_MAX; 1095121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 1096164936Sjulian ts->ts_slice = SCHED_SLICE_NICE(nice); 1097163709Sjb else if (td->td_proc->p_nice == 0) 1098164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1099112966Sjeff else 1100164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; /* 0 */ 1101112966Sjeff } else 1102164936Sjulian ts->ts_slice = SCHED_SLICE_INTERACTIVE; 1103112966Sjeff 1104112966Sjeff return; 1105109864Sjeff} 1106109864Sjeff 1107121868Sjeff/* 1108121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1109121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1110121868Sjeff * This routine will not operate correctly when slp or run times have been 1111121868Sjeff * adjusted to more than double their maximum. 1112121868Sjeff */ 1113116463Sjeffstatic void 1114163709Sjbsched_interact_update(struct thread *td) 1115116463Sjeff{ 1116121868Sjeff int sum; 1117121605Sjeff 1118163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1119121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1120121868Sjeff return; 1121121868Sjeff /* 1122121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1123121868Sjeff * will not bring us back into range. Dividing by two here forces 1124133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1125121868Sjeff */ 1126127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1127163709Sjb td->td_sched->skg_runtime /= 2; 1128163709Sjb td->td_sched->skg_slptime /= 2; 1129121868Sjeff return; 1130116463Sjeff } 1131163709Sjb td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 1132163709Sjb td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1133116463Sjeff} 1134116463Sjeff 1135121868Sjeffstatic void 1136163709Sjbsched_interact_fork(struct thread *td) 1137121868Sjeff{ 1138121868Sjeff int ratio; 1139121868Sjeff int sum; 1140121868Sjeff 1141163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1142121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1143121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1144163709Sjb td->td_sched->skg_runtime /= ratio; 1145163709Sjb td->td_sched->skg_slptime /= ratio; 1146121868Sjeff } 1147121868Sjeff} 1148121868Sjeff 1149111857Sjeffstatic int 1150163709Sjbsched_interact_score(struct thread *td) 1151111857Sjeff{ 1152116365Sjeff int div; 1153111857Sjeff 1154163709Sjb if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1155163709Sjb div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1156116365Sjeff return (SCHED_INTERACT_HALF + 1157163709Sjb (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1158163709Sjb } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1159163709Sjb div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1160163709Sjb return (td->td_sched->skg_runtime / div); 1161111857Sjeff } 1162111857Sjeff 1163116365Sjeff /* 1164116365Sjeff * This can happen if slptime and runtime are 0. 1165116365Sjeff */ 1166116365Sjeff return (0); 1167111857Sjeff 1168111857Sjeff} 1169111857Sjeff 1170113357Sjeff/* 1171134791Sjulian * Very early in the boot some setup of scheduler-specific 1172134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 1173134791Sjulian * Called from: 1174134791Sjulian * proc0_init() 1175134791Sjulian */ 1176134791Sjulianvoid 1177134791Sjulianschedinit(void) 1178134791Sjulian{ 1179134791Sjulian /* 1180134791Sjulian * Set up the scheduler specific parts of proc0. 1181134791Sjulian */ 1182136167Sjulian proc0.p_sched = NULL; /* XXX */ 1183164936Sjulian thread0.td_sched = &td_sched0; 1184164936Sjulian td_sched0.ts_thread = &thread0; 1185164936Sjulian td_sched0.ts_state = TSS_THREAD; 1186134791Sjulian} 1187134791Sjulian 1188134791Sjulian/* 1189113357Sjeff * This is only somewhat accurate since given many processes of the same 1190113357Sjeff * priority they will switch when their slices run out, which will be 1191113357Sjeff * at most SCHED_SLICE_MAX. 1192113357Sjeff */ 1193109864Sjeffint 1194109864Sjeffsched_rr_interval(void) 1195109864Sjeff{ 1196109864Sjeff return (SCHED_SLICE_MAX); 1197109864Sjeff} 1198109864Sjeff 1199121790Sjeffstatic void 1200164936Sjuliansched_pctcpu_update(struct td_sched *ts) 1201109864Sjeff{ 1202109864Sjeff /* 1203109864Sjeff * Adjust counters and watermark for pctcpu calc. 1204116365Sjeff */ 1205164936Sjulian if (ts->ts_ltick > ticks - SCHED_CPU_TICKS) { 1206120272Sjeff /* 1207120272Sjeff * Shift the tick count out so that the divide doesn't 1208120272Sjeff * round away our results. 1209120272Sjeff */ 1210164936Sjulian ts->ts_ticks <<= 10; 1211164936Sjulian ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1212120272Sjeff SCHED_CPU_TICKS; 1213164936Sjulian ts->ts_ticks >>= 10; 1214120272Sjeff } else 1215164936Sjulian ts->ts_ticks = 0; 1216164936Sjulian ts->ts_ltick = ticks; 1217164936Sjulian ts->ts_ftick = ts->ts_ltick - SCHED_CPU_TICKS; 1218109864Sjeff} 1219109864Sjeff 1220109864Sjeffvoid 1221139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1222109864Sjeff{ 1223164936Sjulian struct td_sched *ts; 1224109864Sjeff 1225139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1226139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1227139316Sjeff curthread->td_proc->p_comm); 1228164936Sjulian ts = td->td_sched; 1229109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1230139453Sjhb if (td->td_priority == prio) 1231139453Sjhb return; 1232109864Sjeff if (TD_ON_RUNQ(td)) { 1233121605Sjeff /* 1234121605Sjeff * If the priority has been elevated due to priority 1235121605Sjeff * propagation, we may have to move ourselves to a new 1236121605Sjeff * queue. We still call adjustrunqueue below in case kse 1237121605Sjeff * needs to fix things up. 1238121605Sjeff */ 1239164936Sjulian if (prio < td->td_priority && ts->ts_runq != NULL && 1240164936Sjulian (ts->ts_flags & TSF_ASSIGNED) == 0 && 1241164936Sjulian ts->ts_runq != TDQ_CPU(ts->ts_cpu)->ksq_curr) { 1242164936Sjulian runq_remove(ts->ts_runq, ts); 1243164936Sjulian ts->ts_runq = TDQ_CPU(ts->ts_cpu)->ksq_curr; 1244164936Sjulian runq_add(ts->ts_runq, ts, 0); 1245121605Sjeff } 1246133555Sjeff /* 1247164936Sjulian * Hold this td_sched on this cpu so that sched_prio() doesn't 1248133555Sjeff * cause excessive migration. We only want migration to 1249133555Sjeff * happen as the result of a wakeup. 1250133555Sjeff */ 1251164936Sjulian ts->ts_flags |= TSF_HOLD; 1252119488Sdavidxu adjustrunqueue(td, prio); 1253164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1254121605Sjeff } else 1255119488Sdavidxu td->td_priority = prio; 1256109864Sjeff} 1257109864Sjeff 1258139453Sjhb/* 1259139453Sjhb * Update a thread's priority when it is lent another thread's 1260139453Sjhb * priority. 1261139453Sjhb */ 1262109864Sjeffvoid 1263139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1264139453Sjhb{ 1265139453Sjhb 1266139453Sjhb td->td_flags |= TDF_BORROWING; 1267139453Sjhb sched_thread_priority(td, prio); 1268139453Sjhb} 1269139453Sjhb 1270139453Sjhb/* 1271139453Sjhb * Restore a thread's priority when priority propagation is 1272139453Sjhb * over. The prio argument is the minimum priority the thread 1273139453Sjhb * needs to have to satisfy other possible priority lending 1274139453Sjhb * requests. If the thread's regular priority is less 1275139453Sjhb * important than prio, the thread will keep a priority boost 1276139453Sjhb * of prio. 1277139453Sjhb */ 1278139453Sjhbvoid 1279139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1280139453Sjhb{ 1281139453Sjhb u_char base_pri; 1282139453Sjhb 1283139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1284139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1285163709Sjb base_pri = td->td_user_pri; 1286139453Sjhb else 1287139453Sjhb base_pri = td->td_base_pri; 1288139453Sjhb if (prio >= base_pri) { 1289139455Sjhb td->td_flags &= ~TDF_BORROWING; 1290139453Sjhb sched_thread_priority(td, base_pri); 1291139453Sjhb } else 1292139453Sjhb sched_lend_prio(td, prio); 1293139453Sjhb} 1294139453Sjhb 1295139453Sjhbvoid 1296139453Sjhbsched_prio(struct thread *td, u_char prio) 1297139453Sjhb{ 1298139453Sjhb u_char oldprio; 1299139453Sjhb 1300139453Sjhb /* First, update the base priority. */ 1301139453Sjhb td->td_base_pri = prio; 1302139453Sjhb 1303139453Sjhb /* 1304139455Sjhb * If the thread is borrowing another thread's priority, don't 1305139453Sjhb * ever lower the priority. 1306139453Sjhb */ 1307139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1308139453Sjhb return; 1309139453Sjhb 1310139453Sjhb /* Change the real priority. */ 1311139453Sjhb oldprio = td->td_priority; 1312139453Sjhb sched_thread_priority(td, prio); 1313139453Sjhb 1314139453Sjhb /* 1315139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1316139453Sjhb * its state. 1317139453Sjhb */ 1318139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1319139453Sjhb turnstile_adjust(td, oldprio); 1320139453Sjhb} 1321139455Sjhb 1322139453Sjhbvoid 1323163709Sjbsched_user_prio(struct thread *td, u_char prio) 1324161599Sdavidxu{ 1325161599Sdavidxu u_char oldprio; 1326161599Sdavidxu 1327163709Sjb td->td_base_user_pri = prio; 1328164939Sjulian if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1329164939Sjulian return; 1330163709Sjb oldprio = td->td_user_pri; 1331163709Sjb td->td_user_pri = prio; 1332163709Sjb 1333161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1334161599Sdavidxu umtx_pi_adjust(td, oldprio); 1335161599Sdavidxu} 1336161599Sdavidxu 1337161599Sdavidxuvoid 1338161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio) 1339161599Sdavidxu{ 1340161599Sdavidxu u_char oldprio; 1341161599Sdavidxu 1342161599Sdavidxu td->td_flags |= TDF_UBORROWING; 1343161599Sdavidxu 1344164091Smaxim oldprio = td->td_user_pri; 1345163709Sjb td->td_user_pri = prio; 1346161599Sdavidxu 1347161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1348161599Sdavidxu umtx_pi_adjust(td, oldprio); 1349161599Sdavidxu} 1350161599Sdavidxu 1351161599Sdavidxuvoid 1352161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio) 1353161599Sdavidxu{ 1354161599Sdavidxu u_char base_pri; 1355161599Sdavidxu 1356163709Sjb base_pri = td->td_base_user_pri; 1357161599Sdavidxu if (prio >= base_pri) { 1358161599Sdavidxu td->td_flags &= ~TDF_UBORROWING; 1359163709Sjb sched_user_prio(td, base_pri); 1360161599Sdavidxu } else 1361161599Sdavidxu sched_lend_user_prio(td, prio); 1362161599Sdavidxu} 1363161599Sdavidxu 1364161599Sdavidxuvoid 1365135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1366109864Sjeff{ 1367164936Sjulian struct tdq *ksq; 1368164936Sjulian struct td_sched *ts; 1369109864Sjeff 1370109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1371109864Sjeff 1372164936Sjulian ts = td->td_sched; 1373164936Sjulian ksq = TDQ_SELF(); 1374109864Sjeff 1375133555Sjeff td->td_lastcpu = td->td_oncpu; 1376113339Sjulian td->td_oncpu = NOCPU; 1377132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1378144777Sups td->td_owepreempt = 0; 1379109864Sjeff 1380123434Sjeff /* 1381164936Sjulian * If the thread has been assigned it may be in the process of switching 1382123434Sjeff * to the new cpu. This is the case in sched_bind(). 1383123434Sjeff */ 1384139334Sjeff if (td == PCPU_GET(idlethread)) { 1385139334Sjeff TD_SET_CAN_RUN(td); 1386164936Sjulian } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1387139334Sjeff /* We are ending our run so make our slot available again */ 1388164936Sjulian tdq_load_rem(ksq, ts); 1389139334Sjeff if (TD_IS_RUNNING(td)) { 1390139334Sjeff /* 1391139334Sjeff * Don't allow the thread to migrate 1392139334Sjeff * from a preemption. 1393139334Sjeff */ 1394164936Sjulian ts->ts_flags |= TSF_HOLD; 1395139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1396139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1397139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1398164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1399163709Sjb } 1400121146Sjeff } 1401136167Sjulian if (newtd != NULL) { 1402136170Sjulian /* 1403147068Sjeff * If we bring in a thread account for it as if it had been 1404147068Sjeff * added to the run queue and then chosen. 1405136170Sjulian */ 1406164936Sjulian newtd->td_sched->ts_flags |= TSF_DIDRUN; 1407164936Sjulian newtd->td_sched->ts_runq = ksq->ksq_curr; 1408136173Sjulian TD_SET_RUNNING(newtd); 1409164936Sjulian tdq_load_add(TDQ_SELF(), newtd->td_sched); 1410136167Sjulian } else 1411131473Sjhb newtd = choosethread(); 1412145256Sjkoshy if (td != newtd) { 1413145256Sjkoshy#ifdef HWPMC_HOOKS 1414145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1415145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1416145256Sjkoshy#endif 1417163709Sjb 1418121128Sjeff cpu_switch(td, newtd); 1419145256Sjkoshy#ifdef HWPMC_HOOKS 1420145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1421145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1422145256Sjkoshy#endif 1423145256Sjkoshy } 1424145256Sjkoshy 1425121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1426109864Sjeff 1427113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1428109864Sjeff} 1429109864Sjeff 1430109864Sjeffvoid 1431130551Sjuliansched_nice(struct proc *p, int nice) 1432109864Sjeff{ 1433164936Sjulian struct td_sched *ts; 1434109864Sjeff struct thread *td; 1435164936Sjulian struct tdq *tdq; 1436109864Sjeff 1437130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1438113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1439113357Sjeff /* 1440164936Sjulian * We need to adjust the nice counts for running threads. 1441113357Sjeff */ 1442163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1443163709Sjb if (td->td_pri_class == PRI_TIMESHARE) { 1444164936Sjulian ts = td->td_sched; 1445164936Sjulian if (ts->ts_runq == NULL) 1446163709Sjb continue; 1447164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1448164936Sjulian tdq_nice_rem(tdq, p->p_nice); 1449164936Sjulian tdq_nice_add(tdq, nice); 1450113357Sjeff } 1451130551Sjulian } 1452130551Sjulian p->p_nice = nice; 1453163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1454163709Sjb sched_priority(td); 1455163709Sjb td->td_flags |= TDF_NEEDRESCHED; 1456130551Sjulian } 1457109864Sjeff} 1458109864Sjeff 1459109864Sjeffvoid 1460126326Sjhbsched_sleep(struct thread *td) 1461109864Sjeff{ 1462109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1463109864Sjeff 1464164936Sjulian td->td_sched->ts_slptime = ticks; 1465109864Sjeff} 1466109864Sjeff 1467109864Sjeffvoid 1468109864Sjeffsched_wakeup(struct thread *td) 1469109864Sjeff{ 1470109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1471109864Sjeff 1472109864Sjeff /* 1473164936Sjulian * Let the procs know how long we slept for. This is because process 1474164936Sjulian * interactivity behavior is modeled in the procs. 1475109864Sjeff */ 1476164936Sjulian if (td->td_sched->ts_slptime) { 1477113357Sjeff int hzticks; 1478109864Sjeff 1479164936Sjulian hzticks = (ticks - td->td_sched->ts_slptime) << 10; 1480121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1481163709Sjb td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 1482163709Sjb td->td_sched->skg_runtime = 1; 1483121868Sjeff } else { 1484163709Sjb td->td_sched->skg_slptime += hzticks; 1485163709Sjb sched_interact_update(td); 1486121868Sjeff } 1487163709Sjb sched_priority(td); 1488164936Sjulian sched_slice(td->td_sched); 1489164936Sjulian td->td_sched->ts_slptime = 0; 1490109864Sjeff } 1491134586Sjulian setrunqueue(td, SRQ_BORING); 1492109864Sjeff} 1493109864Sjeff 1494109864Sjeff/* 1495109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1496109864Sjeff * priority. 1497109864Sjeff */ 1498109864Sjeffvoid 1499163709Sjbsched_fork(struct thread *td, struct thread *child) 1500109864Sjeff{ 1501109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1502164936Sjulian sched_fork_thread(td, child); 1503164936Sjulian} 1504109864Sjeff 1505164936Sjulianvoid 1506164936Sjuliansched_fork_thread(struct thread *td, struct thread *child) 1507164936Sjulian{ 1508164936Sjulian struct td_sched *ts; 1509164936Sjulian struct td_sched *ts2; 1510164936Sjulian 1511163709Sjb child->td_sched->skg_slptime = td->td_sched->skg_slptime; 1512163709Sjb child->td_sched->skg_runtime = td->td_sched->skg_runtime; 1513163709Sjb child->td_user_pri = td->td_user_pri; 1514164091Smaxim child->td_base_user_pri = td->td_base_user_pri; 1515121868Sjeff sched_interact_fork(child); 1516163709Sjb td->td_sched->skg_runtime += tickincr; 1517163709Sjb sched_interact_update(td); 1518109864Sjeff 1519163709Sjb sched_newthread(child); 1520134791Sjulian 1521164936Sjulian ts = td->td_sched; 1522164936Sjulian ts2 = child->td_sched; 1523164936Sjulian ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1524164936Sjulian ts2->ts_cpu = ts->ts_cpu; 1525164936Sjulian ts2->ts_runq = NULL; 1526134791Sjulian 1527134791Sjulian /* Grab our parents cpu estimation information. */ 1528164936Sjulian ts2->ts_ticks = ts->ts_ticks; 1529164936Sjulian ts2->ts_ltick = ts->ts_ltick; 1530164936Sjulian ts2->ts_ftick = ts->ts_ftick; 1531113357Sjeff} 1532113357Sjeff 1533113357Sjeffvoid 1534163709Sjbsched_class(struct thread *td, int class) 1535113357Sjeff{ 1536164936Sjulian struct tdq *tdq; 1537164936Sjulian struct td_sched *ts; 1538121896Sjeff int nclass; 1539121896Sjeff int oclass; 1540113357Sjeff 1541113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1542163709Sjb if (td->td_pri_class == class) 1543113357Sjeff return; 1544113357Sjeff 1545121896Sjeff nclass = PRI_BASE(class); 1546163709Sjb oclass = PRI_BASE(td->td_pri_class); 1547164936Sjulian ts = td->td_sched; 1548164936Sjulian if (!((ts->ts_state != TSS_ONRUNQ && 1549164936Sjulian ts->ts_state != TSS_THREAD) || ts->ts_runq == NULL)) { 1550164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1551113357Sjeff 1552121896Sjeff#ifdef SMP 1553164936Sjulian /* 1554164936Sjulian * On SMP if we're on the RUNQ we must adjust the transferable 1555164936Sjulian * count because could be changing to or from an interrupt 1556164936Sjulian * class. 1557164936Sjulian */ 1558164936Sjulian if (ts->ts_state == TSS_ONRUNQ) { 1559164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 1560164936Sjulian tdq->ksq_transferable--; 1561164936Sjulian tdq->ksq_group->ksg_transferable--; 1562164936Sjulian } 1563164936Sjulian if (THREAD_CAN_MIGRATE(ts)) { 1564164936Sjulian tdq->ksq_transferable++; 1565164936Sjulian tdq->ksq_group->ksg_transferable++; 1566164936Sjulian } 1567122744Sjeff } 1568164936Sjulian#endif 1569164936Sjulian if (oclass == PRI_TIMESHARE) { 1570164936Sjulian tdq->ksq_load_timeshare--; 1571164936Sjulian tdq_nice_rem(tdq, td->td_proc->p_nice); 1572122744Sjeff } 1573164936Sjulian if (nclass == PRI_TIMESHARE) { 1574164936Sjulian tdq->ksq_load_timeshare++; 1575164936Sjulian tdq_nice_add(tdq, td->td_proc->p_nice); 1576164936Sjulian } 1577109970Sjeff } 1578109970Sjeff 1579163709Sjb td->td_pri_class = class; 1580109864Sjeff} 1581109864Sjeff 1582109864Sjeff/* 1583109864Sjeff * Return some of the child's priority and interactivity to the parent. 1584109864Sjeff */ 1585109864Sjeffvoid 1586164939Sjuliansched_exit(struct proc *p, struct thread *child) 1587109864Sjeff{ 1588164939Sjulian 1589163709Sjb CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1590164939Sjulian child, child->td_proc->p_comm, child->td_priority); 1591113372Sjeff 1592164939Sjulian sched_exit_thread(FIRST_THREAD_IN_PROC(p), child); 1593113372Sjeff} 1594113372Sjeff 1595113372Sjeffvoid 1596164939Sjuliansched_exit_thread(struct thread *td, struct thread *child) 1597164936Sjulian{ 1598164939Sjulian CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1599164939Sjulian child, childproc->p_comm, child->td_priority); 1600164939Sjulian 1601164939Sjulian td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1602164939Sjulian sched_interact_update(td); 1603164939Sjulian tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1604164936Sjulian} 1605164936Sjulian 1606164936Sjulianvoid 1607164936Sjuliansched_userret(struct thread *td) 1608164936Sjulian{ 1609164936Sjulian /* 1610164936Sjulian * XXX we cheat slightly on the locking here to avoid locking in 1611164936Sjulian * the usual case. Setting td_priority here is essentially an 1612164936Sjulian * incomplete workaround for not setting it properly elsewhere. 1613164936Sjulian * Now that some interrupt handlers are threads, not setting it 1614164936Sjulian * properly elsewhere can clobber it in the window between setting 1615164936Sjulian * it here and returning to user mode, so don't waste time setting 1616164936Sjulian * it perfectly here. 1617164936Sjulian */ 1618164936Sjulian KASSERT((td->td_flags & TDF_BORROWING) == 0, 1619164936Sjulian ("thread with borrowed priority returning to userland")); 1620164936Sjulian if (td->td_priority != td->td_user_pri) { 1621164936Sjulian mtx_lock_spin(&sched_lock); 1622164936Sjulian td->td_priority = td->td_user_pri; 1623164936Sjulian td->td_base_pri = td->td_user_pri; 1624164936Sjulian mtx_unlock_spin(&sched_lock); 1625164936Sjulian } 1626164936Sjulian} 1627164936Sjulian 1628164936Sjulianvoid 1629121127Sjeffsched_clock(struct thread *td) 1630109864Sjeff{ 1631164936Sjulian struct tdq *tdq; 1632164936Sjulian struct td_sched *ts; 1633109864Sjeff 1634129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1635164936Sjulian tdq = TDQ_SELF(); 1636129982Sjeff#ifdef SMP 1637139334Sjeff if (ticks >= bal_tick) 1638129982Sjeff sched_balance(); 1639139334Sjeff if (ticks >= gbal_tick && balance_groups) 1640129982Sjeff sched_balance_groups(); 1641133427Sjeff /* 1642133427Sjeff * We could have been assigned a non real-time thread without an 1643133427Sjeff * IPI. 1644133427Sjeff */ 1645164936Sjulian if (tdq->ksq_assigned) 1646164936Sjulian tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 1647129982Sjeff#endif 1648164936Sjulian ts = td->td_sched; 1649109864Sjeff 1650110028Sjeff /* Adjust ticks for pctcpu */ 1651164936Sjulian ts->ts_ticks++; 1652164936Sjulian ts->ts_ltick = ticks; 1653112994Sjeff 1654109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1655164936Sjulian if (ts->ts_ftick + SCHED_CPU_TICKS + hz < ts->ts_ltick) 1656164936Sjulian sched_pctcpu_update(ts); 1657109971Sjeff 1658114496Sjulian if (td->td_flags & TDF_IDLETD) 1659109864Sjeff return; 1660110028Sjeff /* 1661163709Sjb * We only do slicing code for TIMESHARE threads. 1662113357Sjeff */ 1663163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1664113357Sjeff return; 1665113357Sjeff /* 1666163709Sjb * We used a tick charge it to the thread so that we can compute our 1667113357Sjeff * interactivity. 1668109864Sjeff */ 1669163709Sjb td->td_sched->skg_runtime += tickincr; 1670163709Sjb sched_interact_update(td); 1671110645Sjeff 1672109864Sjeff /* 1673109864Sjeff * We used up one time slice. 1674109864Sjeff */ 1675164936Sjulian if (--ts->ts_slice > 0) 1676113357Sjeff return; 1677109864Sjeff /* 1678113357Sjeff * We're out of time, recompute priorities and requeue. 1679109864Sjeff */ 1680164936Sjulian tdq_load_rem(tdq, ts); 1681163709Sjb sched_priority(td); 1682164936Sjulian sched_slice(ts); 1683164936Sjulian if (SCHED_CURR(td, ts)) 1684164936Sjulian ts->ts_runq = tdq->ksq_curr; 1685113357Sjeff else 1686164936Sjulian ts->ts_runq = tdq->ksq_next; 1687164936Sjulian tdq_load_add(tdq, ts); 1688113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1689109864Sjeff} 1690109864Sjeff 1691109864Sjeffint 1692109864Sjeffsched_runnable(void) 1693109864Sjeff{ 1694164936Sjulian struct tdq *tdq; 1695115998Sjeff int load; 1696109864Sjeff 1697115998Sjeff load = 1; 1698115998Sjeff 1699164936Sjulian tdq = TDQ_SELF(); 1700121790Sjeff#ifdef SMP 1701164936Sjulian if (tdq->ksq_assigned) { 1702122094Sjeff mtx_lock_spin(&sched_lock); 1703164936Sjulian tdq_assign(tdq); 1704122094Sjeff mtx_unlock_spin(&sched_lock); 1705122094Sjeff } 1706121790Sjeff#endif 1707121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1708164936Sjulian if (tdq->ksq_load > 0) 1709121605Sjeff goto out; 1710121605Sjeff } else 1711164936Sjulian if (tdq->ksq_load - 1 > 0) 1712121605Sjeff goto out; 1713115998Sjeff load = 0; 1714115998Sjeffout: 1715115998Sjeff return (load); 1716109864Sjeff} 1717109864Sjeff 1718164936Sjulianstruct td_sched * 1719109970Sjeffsched_choose(void) 1720109970Sjeff{ 1721164936Sjulian struct tdq *tdq; 1722164936Sjulian struct td_sched *ts; 1723109970Sjeff 1724115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1725164936Sjulian tdq = TDQ_SELF(); 1726113357Sjeff#ifdef SMP 1727123433Sjeffrestart: 1728164936Sjulian if (tdq->ksq_assigned) 1729164936Sjulian tdq_assign(tdq); 1730113357Sjeff#endif 1731164936Sjulian ts = tdq_choose(tdq); 1732164936Sjulian if (ts) { 1733121790Sjeff#ifdef SMP 1734164936Sjulian if (ts->ts_thread->td_pri_class == PRI_IDLE) 1735164936Sjulian if (tdq_idled(tdq) == 0) 1736123433Sjeff goto restart; 1737121790Sjeff#endif 1738164936Sjulian tdq_runq_rem(tdq, ts); 1739164936Sjulian ts->ts_state = TSS_THREAD; 1740164936Sjulian ts->ts_flags &= ~TSF_PREEMPTED; 1741164936Sjulian return (ts); 1742109864Sjeff } 1743109970Sjeff#ifdef SMP 1744164936Sjulian if (tdq_idled(tdq) == 0) 1745123433Sjeff goto restart; 1746109970Sjeff#endif 1747113357Sjeff return (NULL); 1748109864Sjeff} 1749109864Sjeff 1750109864Sjeffvoid 1751134586Sjuliansched_add(struct thread *td, int flags) 1752109864Sjeff{ 1753164936Sjulian struct tdq *tdq; 1754164936Sjulian struct td_sched *ts; 1755139334Sjeff int preemptive; 1756133427Sjeff int canmigrate; 1757121790Sjeff int class; 1758109864Sjeff 1759139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1760139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1761139316Sjeff curthread->td_proc->p_comm); 1762121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1763164936Sjulian ts = td->td_sched; 1764139334Sjeff canmigrate = 1; 1765139334Sjeff preemptive = !(flags & SRQ_YIELDING); 1766163709Sjb class = PRI_BASE(td->td_pri_class); 1767164936Sjulian tdq = TDQ_SELF(); 1768164936Sjulian ts->ts_flags &= ~TSF_INTERNAL; 1769139334Sjeff#ifdef SMP 1770164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1771164936Sjulian if (ts->ts_flags & TSF_REMOVED) 1772164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 1773121790Sjeff return; 1774138802Sjeff } 1775164936Sjulian canmigrate = THREAD_CAN_MIGRATE(ts); 1776149278Sdavidxu /* 1777149278Sdavidxu * Don't migrate running threads here. Force the long term balancer 1778149278Sdavidxu * to do it. 1779149278Sdavidxu */ 1780164936Sjulian if (ts->ts_flags & TSF_HOLD) { 1781164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1782149278Sdavidxu canmigrate = 0; 1783149278Sdavidxu } 1784139334Sjeff#endif 1785164936Sjulian KASSERT(ts->ts_state != TSS_ONRUNQ, 1786164936Sjulian ("sched_add: thread %p (%s) already in run queue", td, 1787163709Sjb td->td_proc->p_comm)); 1788163709Sjb KASSERT(td->td_proc->p_sflag & PS_INMEM, 1789110267Sjeff ("sched_add: process swapped out")); 1790164936Sjulian KASSERT(ts->ts_runq == NULL, 1791164936Sjulian ("sched_add: thread %p is still assigned to a run queue", td)); 1792148856Sdavidxu if (flags & SRQ_PREEMPTED) 1793164936Sjulian ts->ts_flags |= TSF_PREEMPTED; 1794121790Sjeff switch (class) { 1795112994Sjeff case PRI_ITHD: 1796112994Sjeff case PRI_REALTIME: 1797164936Sjulian ts->ts_runq = tdq->ksq_curr; 1798164936Sjulian ts->ts_slice = SCHED_SLICE_MAX; 1799139334Sjeff if (canmigrate) 1800164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1801112994Sjeff break; 1802112994Sjeff case PRI_TIMESHARE: 1803164936Sjulian if (SCHED_CURR(td, ts)) 1804164936Sjulian ts->ts_runq = tdq->ksq_curr; 1805113387Sjeff else 1806164936Sjulian ts->ts_runq = tdq->ksq_next; 1807113357Sjeff break; 1808112994Sjeff case PRI_IDLE: 1809113357Sjeff /* 1810113357Sjeff * This is for priority prop. 1811113357Sjeff */ 1812164936Sjulian if (ts->ts_thread->td_priority < PRI_MIN_IDLE) 1813164936Sjulian ts->ts_runq = tdq->ksq_curr; 1814113357Sjeff else 1815164936Sjulian ts->ts_runq = &tdq->ksq_idle; 1816164936Sjulian ts->ts_slice = SCHED_SLICE_MIN; 1817112994Sjeff break; 1818113357Sjeff default: 1819121868Sjeff panic("Unknown pri class."); 1820113357Sjeff break; 1821112994Sjeff } 1822121790Sjeff#ifdef SMP 1823133427Sjeff /* 1824133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1825133427Sjeff */ 1826164936Sjulian if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1827164936Sjulian ts->ts_runq = NULL; 1828164936Sjulian tdq_notify(ts, ts->ts_cpu); 1829123433Sjeff return; 1830123433Sjeff } 1831121790Sjeff /* 1832123685Sjeff * If we had been idle, clear our bit in the group and potentially 1833123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1834121790Sjeff */ 1835123433Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1836164936Sjulian (tdq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1837121790Sjeff /* 1838123433Sjeff * Check to see if our group is unidling, and if so, remove it 1839123433Sjeff * from the global idle mask. 1840121790Sjeff */ 1841164936Sjulian if (tdq->ksq_group->ksg_idlemask == 1842164936Sjulian tdq->ksq_group->ksg_cpumask) 1843164936Sjulian atomic_clear_int(&tdq_idle, tdq->ksq_group->ksg_mask); 1844123433Sjeff /* 1845123433Sjeff * Now remove ourselves from the group specific idle mask. 1846123433Sjeff */ 1847164936Sjulian tdq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1848164936Sjulian } else if (canmigrate && tdq->ksq_load > 1 && class != PRI_ITHD) 1849164936Sjulian if (tdq_transfer(tdq, ts, class)) 1850123685Sjeff return; 1851164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1852121790Sjeff#endif 1853133555Sjeff if (td->td_priority < curthread->td_priority && 1854164936Sjulian ts->ts_runq == tdq->ksq_curr) 1855133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1856131839Sjhb if (preemptive && maybe_preempt(td)) 1857131481Sjhb return; 1858164936Sjulian ts->ts_state = TSS_ONRUNQ; 1859109864Sjeff 1860164936Sjulian tdq_runq_add(tdq, ts, flags); 1861164936Sjulian tdq_load_add(tdq, ts); 1862109864Sjeff} 1863109864Sjeff 1864109864Sjeffvoid 1865121127Sjeffsched_rem(struct thread *td) 1866109864Sjeff{ 1867164936Sjulian struct tdq *tdq; 1868164936Sjulian struct td_sched *ts; 1869113357Sjeff 1870139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1871139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1872139316Sjeff curthread->td_proc->p_comm); 1873139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1874164936Sjulian ts = td->td_sched; 1875164936Sjulian ts->ts_flags &= ~TSF_PREEMPTED; 1876164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1877164936Sjulian ts->ts_flags |= TSF_REMOVED; 1878121790Sjeff return; 1879138802Sjeff } 1880164936Sjulian KASSERT((ts->ts_state == TSS_ONRUNQ), 1881164936Sjulian ("sched_rem: thread not on run queue")); 1882109864Sjeff 1883164936Sjulian ts->ts_state = TSS_THREAD; 1884164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1885164936Sjulian tdq_runq_rem(tdq, ts); 1886164936Sjulian tdq_load_rem(tdq, ts); 1887109864Sjeff} 1888109864Sjeff 1889109864Sjefffixpt_t 1890121127Sjeffsched_pctcpu(struct thread *td) 1891109864Sjeff{ 1892109864Sjeff fixpt_t pctcpu; 1893164936Sjulian struct td_sched *ts; 1894109864Sjeff 1895109864Sjeff pctcpu = 0; 1896164936Sjulian ts = td->td_sched; 1897164936Sjulian if (ts == NULL) 1898121290Sjeff return (0); 1899109864Sjeff 1900115998Sjeff mtx_lock_spin(&sched_lock); 1901164936Sjulian if (ts->ts_ticks) { 1902109864Sjeff int rtick; 1903109864Sjeff 1904116365Sjeff /* 1905116365Sjeff * Don't update more frequently than twice a second. Allowing 1906116365Sjeff * this causes the cpu usage to decay away too quickly due to 1907116365Sjeff * rounding errors. 1908116365Sjeff */ 1909164936Sjulian if (ts->ts_ftick + SCHED_CPU_TICKS < ts->ts_ltick || 1910164936Sjulian ts->ts_ltick < (ticks - (hz / 2))) 1911164936Sjulian sched_pctcpu_update(ts); 1912109864Sjeff /* How many rtick per second ? */ 1913164936Sjulian rtick = min(ts->ts_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1914110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1915109864Sjeff } 1916109864Sjeff 1917164936Sjulian td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1918113865Sjhb mtx_unlock_spin(&sched_lock); 1919109864Sjeff 1920109864Sjeff return (pctcpu); 1921109864Sjeff} 1922109864Sjeff 1923122038Sjeffvoid 1924122038Sjeffsched_bind(struct thread *td, int cpu) 1925122038Sjeff{ 1926164936Sjulian struct td_sched *ts; 1927122038Sjeff 1928122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1929164936Sjulian ts = td->td_sched; 1930164936Sjulian ts->ts_flags |= TSF_BOUND; 1931123433Sjeff#ifdef SMP 1932123433Sjeff if (PCPU_GET(cpuid) == cpu) 1933122038Sjeff return; 1934122038Sjeff /* sched_rem without the runq_remove */ 1935164936Sjulian ts->ts_state = TSS_THREAD; 1936164936Sjulian tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1937164936Sjulian tdq_notify(ts, cpu); 1938122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1939131527Sphk mi_switch(SW_VOL, NULL); 1940122038Sjeff#endif 1941122038Sjeff} 1942122038Sjeff 1943122038Sjeffvoid 1944122038Sjeffsched_unbind(struct thread *td) 1945122038Sjeff{ 1946122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1947164936Sjulian td->td_sched->ts_flags &= ~TSF_BOUND; 1948122038Sjeff} 1949122038Sjeff 1950109864Sjeffint 1951145256Sjkoshysched_is_bound(struct thread *td) 1952145256Sjkoshy{ 1953145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1954164936Sjulian return (td->td_sched->ts_flags & TSF_BOUND); 1955145256Sjkoshy} 1956145256Sjkoshy 1957159630Sdavidxuvoid 1958159630Sdavidxusched_relinquish(struct thread *td) 1959159630Sdavidxu{ 1960159630Sdavidxu mtx_lock_spin(&sched_lock); 1961163709Sjb if (td->td_pri_class == PRI_TIMESHARE) 1962159630Sdavidxu sched_prio(td, PRI_MAX_TIMESHARE); 1963159630Sdavidxu mi_switch(SW_VOL, NULL); 1964159630Sdavidxu mtx_unlock_spin(&sched_lock); 1965159630Sdavidxu} 1966159630Sdavidxu 1967145256Sjkoshyint 1968125289Sjeffsched_load(void) 1969125289Sjeff{ 1970125289Sjeff#ifdef SMP 1971125289Sjeff int total; 1972125289Sjeff int i; 1973125289Sjeff 1974125289Sjeff total = 0; 1975125289Sjeff for (i = 0; i <= ksg_maxid; i++) 1976164936Sjulian total += TDQ_GROUP(i)->ksg_load; 1977125289Sjeff return (total); 1978125289Sjeff#else 1979164936Sjulian return (TDQ_SELF()->ksq_sysload); 1980125289Sjeff#endif 1981125289Sjeff} 1982125289Sjeff 1983125289Sjeffint 1984109864Sjeffsched_sizeof_proc(void) 1985109864Sjeff{ 1986109864Sjeff return (sizeof(struct proc)); 1987109864Sjeff} 1988109864Sjeff 1989109864Sjeffint 1990109864Sjeffsched_sizeof_thread(void) 1991109864Sjeff{ 1992109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1993109864Sjeff} 1994159570Sdavidxu 1995159570Sdavidxuvoid 1996159570Sdavidxusched_tick(void) 1997159570Sdavidxu{ 1998159570Sdavidxu} 1999134791Sjulian#define KERN_SWITCH_INCLUDE 1 2000134791Sjulian#include "kern/kern_switch.c" 2001