sched_ule.c revision 146954
1109864Sjeff/*- 2113357Sjeff * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 146954 2005-06-04 09:23:28Z jeff $"); 29116182Sobrien 30134649Sscottl#include <opt_sched.h> 31134649Sscottl 32134791Sjulian#define kse td_sched 33134791Sjulian 34109864Sjeff#include <sys/param.h> 35109864Sjeff#include <sys/systm.h> 36131929Smarcel#include <sys/kdb.h> 37109864Sjeff#include <sys/kernel.h> 38109864Sjeff#include <sys/ktr.h> 39109864Sjeff#include <sys/lock.h> 40109864Sjeff#include <sys/mutex.h> 41109864Sjeff#include <sys/proc.h> 42112966Sjeff#include <sys/resource.h> 43122038Sjeff#include <sys/resourcevar.h> 44109864Sjeff#include <sys/sched.h> 45109864Sjeff#include <sys/smp.h> 46109864Sjeff#include <sys/sx.h> 47109864Sjeff#include <sys/sysctl.h> 48109864Sjeff#include <sys/sysproto.h> 49139453Sjhb#include <sys/turnstile.h> 50109864Sjeff#include <sys/vmmeter.h> 51109864Sjeff#ifdef KTRACE 52109864Sjeff#include <sys/uio.h> 53109864Sjeff#include <sys/ktrace.h> 54109864Sjeff#endif 55109864Sjeff 56145256Sjkoshy#ifdef HWPMC_HOOKS 57145256Sjkoshy#include <sys/pmckern.h> 58145256Sjkoshy#endif 59145256Sjkoshy 60109864Sjeff#include <machine/cpu.h> 61121790Sjeff#include <machine/smp.h> 62109864Sjeff 63109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 64109864Sjeff/* XXX This is bogus compatability crap for ps */ 65109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 66109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 67109864Sjeff 68109864Sjeffstatic void sched_setup(void *dummy); 69109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 70109864Sjeff 71132589Sscottlstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 72113357Sjeff 73132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 74132589Sscottl "Scheduler name"); 75130881Sscottl 76113357Sjeffstatic int slice_min = 1; 77113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 78113357Sjeff 79116365Sjeffstatic int slice_max = 10; 80113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 81113357Sjeff 82111857Sjeffint realstathz; 83113357Sjeffint tickincr = 1; 84111857Sjeff 85109864Sjeff/* 86146954Sjeff * The following datastructures are allocated within their parent structure 87146954Sjeff * but are scheduler specific. 88134791Sjulian */ 89146954Sjeff/* 90146954Sjeff * The schedulable entity that can be given a context to run. A process may 91146954Sjeff * have several of these. 92146954Sjeff */ 93134791Sjulianstruct kse { 94134791Sjulian TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 95134791Sjulian int ke_flags; /* (j) KEF_* flags. */ 96134791Sjulian struct thread *ke_thread; /* (*) Active associated thread. */ 97134791Sjulian fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 98134791Sjulian char ke_rqindex; /* (j) Run queue index. */ 99134791Sjulian enum { 100134791Sjulian KES_THREAD = 0x0, /* slaved to thread state */ 101134791Sjulian KES_ONRUNQ 102134791Sjulian } ke_state; /* (j) thread sched specific status. */ 103134791Sjulian int ke_slptime; 104134791Sjulian int ke_slice; 105134791Sjulian struct runq *ke_runq; 106134791Sjulian u_char ke_cpu; /* CPU that we have affinity for. */ 107134791Sjulian /* The following variables are only used for pctcpu calculation */ 108134791Sjulian int ke_ltick; /* Last tick that we were running on */ 109134791Sjulian int ke_ftick; /* First tick that we were running on */ 110134791Sjulian int ke_ticks; /* Tick count */ 111134791Sjulian 112134791Sjulian}; 113146954Sjeff#define td_kse td_sched 114134791Sjulian#define td_slptime td_kse->ke_slptime 115134791Sjulian#define ke_proc ke_thread->td_proc 116134791Sjulian#define ke_ksegrp ke_thread->td_ksegrp 117146954Sjeff#define ke_assign ke_procq.tqe_next 118134791Sjulian/* flags kept in ke_flags */ 119139334Sjeff#define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 120139334Sjeff#define KEF_BOUND 0x0002 /* Thread can not migrate. */ 121139334Sjeff#define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 122139334Sjeff#define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 123139334Sjeff#define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 124146954Sjeff#define KEF_INTERNAL 0x0020 /* Thread added due to migration. */ 125146954Sjeff#define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 126146954Sjeff#define KEF_EXIT 0x04000 /* Thread is being killed. */ 127121790Sjeff 128109864Sjeffstruct kg_sched { 129134791Sjulian struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 130134791Sjulian /* the system scheduler */ 131110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 132110645Sjeff int skg_runtime; /* Number of ticks we were running */ 133134791Sjulian int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 134134791Sjulian int skg_concurrency; /* (j) Num threads requested in group.*/ 135109864Sjeff}; 136134791Sjulian#define kg_last_assigned kg_sched->skg_last_assigned 137134791Sjulian#define kg_avail_opennings kg_sched->skg_avail_opennings 138134791Sjulian#define kg_concurrency kg_sched->skg_concurrency 139134791Sjulian#define kg_runtime kg_sched->skg_runtime 140134791Sjulian#define kg_slptime kg_sched->skg_slptime 141109864Sjeff 142146954Sjeff#define SLOT_RELEASE(kg) (kg)->kg_avail_opennings++ 143146954Sjeff#define SLOT_USE(kg) (kg)->kg_avail_opennings-- 144109864Sjeff 145134791Sjulianstatic struct kse kse0; 146134791Sjulianstatic struct kg_sched kg_sched0; 147109864Sjeff 148109864Sjeff/* 149116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 150116642Sjeff * give lower(better) priorities to kse groups that use less CPU. The nice 151116642Sjeff * value is then directly added to this to allow nice to have some effect 152116642Sjeff * on latency. 153111857Sjeff * 154111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 155116642Sjeff * PRI_NRESV: Number of nice values. 156111857Sjeff * PRI_BASE: The start of the dynamic range. 157109864Sjeff */ 158111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 159121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 160121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 161116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 162113357Sjeff#define SCHED_PRI_INTERACT(score) \ 163116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 164109864Sjeff 165109864Sjeff/* 166111857Sjeff * These determine the interactivity of a process. 167109864Sjeff * 168110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 169110645Sjeff * before throttling back. 170121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 171116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 172111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 173109864Sjeff */ 174121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 175121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 176116365Sjeff#define SCHED_INTERACT_MAX (100) 177116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 178121126Sjeff#define SCHED_INTERACT_THRESH (30) 179111857Sjeff 180109864Sjeff/* 181109864Sjeff * These parameters and macros determine the size of the time slice that is 182109864Sjeff * granted to each thread. 183109864Sjeff * 184109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 185109864Sjeff * SLICE_MAX: Maximum time slice granted. 186109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 187112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 188112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 189121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 190109864Sjeff */ 191113357Sjeff#define SCHED_SLICE_MIN (slice_min) 192113357Sjeff#define SCHED_SLICE_MAX (slice_max) 193125299Sjeff#define SCHED_SLICE_INTERACTIVE (slice_max) 194121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 195111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 196109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 197112966Sjeff#define SCHED_SLICE_NICE(nice) \ 198121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 199109864Sjeff 200109864Sjeff/* 201134791Sjulian * This macro determines whether or not the thread belongs on the current or 202109864Sjeff * next run queue. 203109864Sjeff */ 204113357Sjeff#define SCHED_INTERACTIVE(kg) \ 205113357Sjeff (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 206113417Sjeff#define SCHED_CURR(kg, ke) \ 207139453Sjhb ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) 208109864Sjeff 209109864Sjeff/* 210109864Sjeff * Cpu percentage computation macros and defines. 211109864Sjeff * 212109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 213109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 214109864Sjeff */ 215109864Sjeff 216112971Sjeff#define SCHED_CPU_TIME 10 217109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 218109864Sjeff 219109864Sjeff/* 220113357Sjeff * kseq - per processor runqs and statistics. 221109864Sjeff */ 222109864Sjeffstruct kseq { 223113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 224113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 225113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 226113357Sjeff struct runq *ksq_curr; /* Current queue. */ 227121896Sjeff int ksq_load_timeshare; /* Load for timeshare. */ 228113357Sjeff int ksq_load; /* Aggregate load. */ 229121869Sjeff short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 230113357Sjeff short ksq_nicemin; /* Least nice. */ 231110267Sjeff#ifdef SMP 232123433Sjeff int ksq_transferable; 233123433Sjeff LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 234123433Sjeff struct kseq_group *ksq_group; /* Our processor group. */ 235123433Sjeff volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 236125289Sjeff#else 237125289Sjeff int ksq_sysload; /* For loadavg, !ITHD load. */ 238110267Sjeff#endif 239109864Sjeff}; 240109864Sjeff 241123433Sjeff#ifdef SMP 242109864Sjeff/* 243123433Sjeff * kseq groups are groups of processors which can cheaply share threads. When 244123433Sjeff * one processor in the group goes idle it will check the runqs of the other 245123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 246123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 247123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 248123433Sjeff * load balancer. 249123433Sjeff */ 250123433Sjeffstruct kseq_group { 251123433Sjeff int ksg_cpus; /* Count of CPUs in this kseq group. */ 252127498Smarcel cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 253127498Smarcel cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 254127498Smarcel cpumask_t ksg_mask; /* Bit mask for first cpu. */ 255123487Sjeff int ksg_load; /* Total load of this group. */ 256123433Sjeff int ksg_transferable; /* Transferable load of this group. */ 257123433Sjeff LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 258123433Sjeff}; 259123433Sjeff#endif 260123433Sjeff 261123433Sjeff/* 262109864Sjeff * One kse queue per processor. 263109864Sjeff */ 264110028Sjeff#ifdef SMP 265127498Smarcelstatic cpumask_t kseq_idle; 266123487Sjeffstatic int ksg_maxid; 267121790Sjeffstatic struct kseq kseq_cpu[MAXCPU]; 268123433Sjeffstatic struct kseq_group kseq_groups[MAXCPU]; 269129982Sjeffstatic int bal_tick; 270129982Sjeffstatic int gbal_tick; 271139334Sjeffstatic int balance_groups; 272129982Sjeff 273123433Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 274123433Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 275123487Sjeff#define KSEQ_ID(x) ((x) - kseq_cpu) 276123487Sjeff#define KSEQ_GROUP(x) (&kseq_groups[(x)]) 277123433Sjeff#else /* !SMP */ 278121790Sjeffstatic struct kseq kseq_cpu; 279129982Sjeff 280110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 281110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 282110028Sjeff#endif 283109864Sjeff 284146954Sjeffstatic void slot_fill(struct ksegrp *); 285134791Sjulianstatic struct kse *sched_choose(void); /* XXX Should be thread * */ 286146954Sjeffstatic void sched_slice(struct kse *); 287146954Sjeffstatic void sched_priority(struct ksegrp *); 288146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 289146954Sjeffstatic int sched_interact_score(struct ksegrp *); 290146954Sjeffstatic void sched_interact_update(struct ksegrp *); 291146954Sjeffstatic void sched_interact_fork(struct ksegrp *); 292146954Sjeffstatic void sched_pctcpu_update(struct kse *); 293109864Sjeff 294110267Sjeff/* Operations on per processor queues */ 295146954Sjeffstatic struct kse * kseq_choose(struct kseq *); 296146954Sjeffstatic void kseq_setup(struct kseq *); 297146954Sjeffstatic void kseq_load_add(struct kseq *, struct kse *); 298146954Sjeffstatic void kseq_load_rem(struct kseq *, struct kse *); 299146954Sjeffstatic __inline void kseq_runq_add(struct kseq *, struct kse *, int); 300146954Sjeffstatic __inline void kseq_runq_rem(struct kseq *, struct kse *); 301146954Sjeffstatic void kseq_nice_add(struct kseq *, int); 302146954Sjeffstatic void kseq_nice_rem(struct kseq *, int); 303113660Sjeffvoid kseq_print(int cpu); 304110267Sjeff#ifdef SMP 305146954Sjeffstatic int kseq_transfer(struct kseq *, struct kse *, int); 306146954Sjeffstatic struct kse *runq_steal(struct runq *); 307129982Sjeffstatic void sched_balance(void); 308129982Sjeffstatic void sched_balance_groups(void); 309146954Sjeffstatic void sched_balance_group(struct kseq_group *); 310146954Sjeffstatic void sched_balance_pair(struct kseq *, struct kseq *); 311146954Sjeffstatic void kseq_move(struct kseq *, int); 312146954Sjeffstatic int kseq_idled(struct kseq *); 313146954Sjeffstatic void kseq_notify(struct kse *, int); 314121790Sjeffstatic void kseq_assign(struct kseq *); 315146954Sjeffstatic struct kse *kseq_steal(struct kseq *, int); 316139334Sjeff#define KSE_CAN_MIGRATE(ke) \ 317135076Sscottl ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 318121790Sjeff#endif 319110028Sjeff 320113357Sjeffvoid 321113660Sjeffkseq_print(int cpu) 322110267Sjeff{ 323113660Sjeff struct kseq *kseq; 324113357Sjeff int i; 325112994Sjeff 326113660Sjeff kseq = KSEQ_CPU(cpu); 327112994Sjeff 328113357Sjeff printf("kseq:\n"); 329113357Sjeff printf("\tload: %d\n", kseq->ksq_load); 330122744Sjeff printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 331121896Sjeff#ifdef SMP 332123433Sjeff printf("\tload transferable: %d\n", kseq->ksq_transferable); 333121896Sjeff#endif 334113357Sjeff printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 335113357Sjeff printf("\tnice counts:\n"); 336121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 337113357Sjeff if (kseq->ksq_nice[i]) 338113357Sjeff printf("\t\t%d = %d\n", 339113357Sjeff i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 340113357Sjeff} 341112994Sjeff 342122744Sjeffstatic __inline void 343139334Sjeffkseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 344122744Sjeff{ 345122744Sjeff#ifdef SMP 346139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 347123433Sjeff kseq->ksq_transferable++; 348123433Sjeff kseq->ksq_group->ksg_transferable++; 349133427Sjeff ke->ke_flags |= KEF_XFERABLE; 350123433Sjeff } 351122744Sjeff#endif 352139334Sjeff runq_add(ke->ke_runq, ke, flags); 353122744Sjeff} 354122744Sjeff 355122744Sjeffstatic __inline void 356122744Sjeffkseq_runq_rem(struct kseq *kseq, struct kse *ke) 357122744Sjeff{ 358122744Sjeff#ifdef SMP 359133427Sjeff if (ke->ke_flags & KEF_XFERABLE) { 360123433Sjeff kseq->ksq_transferable--; 361123433Sjeff kseq->ksq_group->ksg_transferable--; 362133427Sjeff ke->ke_flags &= ~KEF_XFERABLE; 363123433Sjeff } 364122744Sjeff#endif 365122744Sjeff runq_remove(ke->ke_runq, ke); 366122744Sjeff} 367122744Sjeff 368113357Sjeffstatic void 369122744Sjeffkseq_load_add(struct kseq *kseq, struct kse *ke) 370113357Sjeff{ 371121896Sjeff int class; 372115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 373121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 374121896Sjeff if (class == PRI_TIMESHARE) 375121896Sjeff kseq->ksq_load_timeshare++; 376113357Sjeff kseq->ksq_load++; 377139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 378128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 379123487Sjeff#ifdef SMP 380123487Sjeff kseq->ksq_group->ksg_load++; 381125289Sjeff#else 382125289Sjeff kseq->ksq_sysload++; 383123487Sjeff#endif 384113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 385130551Sjulian kseq_nice_add(kseq, ke->ke_proc->p_nice); 386110267Sjeff} 387113357Sjeff 388112994Sjeffstatic void 389122744Sjeffkseq_load_rem(struct kseq *kseq, struct kse *ke) 390110267Sjeff{ 391121896Sjeff int class; 392115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 393121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 394121896Sjeff if (class == PRI_TIMESHARE) 395121896Sjeff kseq->ksq_load_timeshare--; 396128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 397123487Sjeff#ifdef SMP 398123487Sjeff kseq->ksq_group->ksg_load--; 399125289Sjeff#else 400125289Sjeff kseq->ksq_sysload--; 401123487Sjeff#endif 402113357Sjeff kseq->ksq_load--; 403139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 404113357Sjeff ke->ke_runq = NULL; 405113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 406130551Sjulian kseq_nice_rem(kseq, ke->ke_proc->p_nice); 407110267Sjeff} 408110267Sjeff 409113357Sjeffstatic void 410113357Sjeffkseq_nice_add(struct kseq *kseq, int nice) 411110267Sjeff{ 412115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 413113357Sjeff /* Normalize to zero. */ 414113357Sjeff kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 415121896Sjeff if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 416113357Sjeff kseq->ksq_nicemin = nice; 417110267Sjeff} 418110267Sjeff 419113357Sjeffstatic void 420113357Sjeffkseq_nice_rem(struct kseq *kseq, int nice) 421110267Sjeff{ 422113357Sjeff int n; 423113357Sjeff 424115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 425113357Sjeff /* Normalize to zero. */ 426113357Sjeff n = nice + SCHED_PRI_NHALF; 427113357Sjeff kseq->ksq_nice[n]--; 428113357Sjeff KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 429113357Sjeff 430113357Sjeff /* 431113357Sjeff * If this wasn't the smallest nice value or there are more in 432113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 433113357Sjeff * the smallest nice. 434113357Sjeff */ 435113357Sjeff if (nice != kseq->ksq_nicemin || 436113357Sjeff kseq->ksq_nice[n] != 0 || 437121896Sjeff kseq->ksq_load_timeshare == 0) 438113357Sjeff return; 439113357Sjeff 440121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 441113357Sjeff if (kseq->ksq_nice[n]) { 442113357Sjeff kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 443113357Sjeff return; 444113357Sjeff } 445110267Sjeff} 446110267Sjeff 447113357Sjeff#ifdef SMP 448116069Sjeff/* 449122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 450116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 451116069Sjeff * by migrating some processes. 452116069Sjeff * 453116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 454116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 455116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 456116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 457116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 458116069Sjeff * 459116069Sjeff * It could be improved by considering the priorities and slices assigned to 460116069Sjeff * each task prior to balancing them. There are many pathological cases with 461116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 462116069Sjeff * 463116069Sjeff */ 464121790Sjeffstatic void 465129982Sjeffsched_balance(void) 466116069Sjeff{ 467123487Sjeff struct kseq_group *high; 468123487Sjeff struct kseq_group *low; 469123487Sjeff struct kseq_group *ksg; 470123487Sjeff int cnt; 471123487Sjeff int i; 472123487Sjeff 473139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 474123487Sjeff if (smp_started == 0) 475139334Sjeff return; 476123487Sjeff low = high = NULL; 477123487Sjeff i = random() % (ksg_maxid + 1); 478123487Sjeff for (cnt = 0; cnt <= ksg_maxid; cnt++) { 479123487Sjeff ksg = KSEQ_GROUP(i); 480123487Sjeff /* 481123487Sjeff * Find the CPU with the highest load that has some 482123487Sjeff * threads to transfer. 483123487Sjeff */ 484123487Sjeff if ((high == NULL || ksg->ksg_load > high->ksg_load) 485123487Sjeff && ksg->ksg_transferable) 486123487Sjeff high = ksg; 487123487Sjeff if (low == NULL || ksg->ksg_load < low->ksg_load) 488123487Sjeff low = ksg; 489123487Sjeff if (++i > ksg_maxid) 490123487Sjeff i = 0; 491123487Sjeff } 492123487Sjeff if (low != NULL && high != NULL && high != low) 493123487Sjeff sched_balance_pair(LIST_FIRST(&high->ksg_members), 494123487Sjeff LIST_FIRST(&low->ksg_members)); 495123487Sjeff} 496123487Sjeff 497123487Sjeffstatic void 498129982Sjeffsched_balance_groups(void) 499123487Sjeff{ 500123487Sjeff int i; 501123487Sjeff 502139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 503129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 504123487Sjeff if (smp_started) 505123487Sjeff for (i = 0; i <= ksg_maxid; i++) 506123487Sjeff sched_balance_group(KSEQ_GROUP(i)); 507123487Sjeff} 508123487Sjeff 509123487Sjeffstatic void 510123487Sjeffsched_balance_group(struct kseq_group *ksg) 511123487Sjeff{ 512116069Sjeff struct kseq *kseq; 513123487Sjeff struct kseq *high; 514123487Sjeff struct kseq *low; 515123487Sjeff int load; 516123487Sjeff 517123487Sjeff if (ksg->ksg_transferable == 0) 518123487Sjeff return; 519123487Sjeff low = NULL; 520123487Sjeff high = NULL; 521123487Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 522123487Sjeff load = kseq->ksq_load; 523123487Sjeff if (high == NULL || load > high->ksq_load) 524123487Sjeff high = kseq; 525123487Sjeff if (low == NULL || load < low->ksq_load) 526123487Sjeff low = kseq; 527123487Sjeff } 528123487Sjeff if (high != NULL && low != NULL && high != low) 529123487Sjeff sched_balance_pair(high, low); 530123487Sjeff} 531123487Sjeff 532123487Sjeffstatic void 533123487Sjeffsched_balance_pair(struct kseq *high, struct kseq *low) 534123487Sjeff{ 535123433Sjeff int transferable; 536116069Sjeff int high_load; 537116069Sjeff int low_load; 538116069Sjeff int move; 539116069Sjeff int diff; 540116069Sjeff int i; 541116069Sjeff 542116069Sjeff /* 543123433Sjeff * If we're transfering within a group we have to use this specific 544123433Sjeff * kseq's transferable count, otherwise we can steal from other members 545123433Sjeff * of the group. 546123433Sjeff */ 547123487Sjeff if (high->ksq_group == low->ksq_group) { 548123487Sjeff transferable = high->ksq_transferable; 549123487Sjeff high_load = high->ksq_load; 550123487Sjeff low_load = low->ksq_load; 551123487Sjeff } else { 552123487Sjeff transferable = high->ksq_group->ksg_transferable; 553123487Sjeff high_load = high->ksq_group->ksg_load; 554123487Sjeff low_load = low->ksq_group->ksg_load; 555123487Sjeff } 556123433Sjeff if (transferable == 0) 557123487Sjeff return; 558123433Sjeff /* 559122744Sjeff * Determine what the imbalance is and then adjust that to how many 560123433Sjeff * kses we actually have to give up (transferable). 561122744Sjeff */ 562123487Sjeff diff = high_load - low_load; 563116069Sjeff move = diff / 2; 564116069Sjeff if (diff & 0x1) 565116069Sjeff move++; 566123433Sjeff move = min(move, transferable); 567116069Sjeff for (i = 0; i < move; i++) 568123487Sjeff kseq_move(high, KSEQ_ID(low)); 569116069Sjeff return; 570116069Sjeff} 571116069Sjeff 572121790Sjeffstatic void 573116069Sjeffkseq_move(struct kseq *from, int cpu) 574116069Sjeff{ 575123433Sjeff struct kseq *kseq; 576123433Sjeff struct kseq *to; 577116069Sjeff struct kse *ke; 578116069Sjeff 579123433Sjeff kseq = from; 580123433Sjeff to = KSEQ_CPU(cpu); 581123433Sjeff ke = kseq_steal(kseq, 1); 582123433Sjeff if (ke == NULL) { 583123433Sjeff struct kseq_group *ksg; 584123433Sjeff 585123433Sjeff ksg = kseq->ksq_group; 586123433Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 587123433Sjeff if (kseq == from || kseq->ksq_transferable == 0) 588123433Sjeff continue; 589123433Sjeff ke = kseq_steal(kseq, 1); 590123433Sjeff break; 591123433Sjeff } 592123433Sjeff if (ke == NULL) 593123433Sjeff panic("kseq_move: No KSEs available with a " 594123433Sjeff "transferable count of %d\n", 595123433Sjeff ksg->ksg_transferable); 596123433Sjeff } 597123433Sjeff if (kseq == to) 598123433Sjeff return; 599116069Sjeff ke->ke_state = KES_THREAD; 600123433Sjeff kseq_runq_rem(kseq, ke); 601123433Sjeff kseq_load_rem(kseq, ke); 602121923Sjeff kseq_notify(ke, cpu); 603116069Sjeff} 604110267Sjeff 605123433Sjeffstatic int 606123433Sjeffkseq_idled(struct kseq *kseq) 607121790Sjeff{ 608123433Sjeff struct kseq_group *ksg; 609123433Sjeff struct kseq *steal; 610123433Sjeff struct kse *ke; 611123433Sjeff 612123433Sjeff ksg = kseq->ksq_group; 613123433Sjeff /* 614123433Sjeff * If we're in a cpu group, try and steal kses from another cpu in 615123433Sjeff * the group before idling. 616123433Sjeff */ 617123433Sjeff if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 618123433Sjeff LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 619123433Sjeff if (steal == kseq || steal->ksq_transferable == 0) 620123433Sjeff continue; 621123433Sjeff ke = kseq_steal(steal, 0); 622123433Sjeff if (ke == NULL) 623123433Sjeff continue; 624123433Sjeff ke->ke_state = KES_THREAD; 625123433Sjeff kseq_runq_rem(steal, ke); 626123433Sjeff kseq_load_rem(steal, ke); 627123433Sjeff ke->ke_cpu = PCPU_GET(cpuid); 628139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 629139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 630123433Sjeff return (0); 631123433Sjeff } 632123433Sjeff } 633123433Sjeff /* 634123433Sjeff * We only set the idled bit when all of the cpus in the group are 635123433Sjeff * idle. Otherwise we could get into a situation where a KSE bounces 636123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 637123433Sjeff */ 638123433Sjeff ksg->ksg_idlemask |= PCPU_GET(cpumask); 639123433Sjeff if (ksg->ksg_idlemask != ksg->ksg_cpumask) 640123433Sjeff return (1); 641123433Sjeff atomic_set_int(&kseq_idle, ksg->ksg_mask); 642123433Sjeff return (1); 643121790Sjeff} 644121790Sjeff 645121790Sjeffstatic void 646121790Sjeffkseq_assign(struct kseq *kseq) 647121790Sjeff{ 648121790Sjeff struct kse *nke; 649121790Sjeff struct kse *ke; 650121790Sjeff 651121790Sjeff do { 652132776Skan *(volatile struct kse **)&ke = kseq->ksq_assigned; 653121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 654121790Sjeff for (; ke != NULL; ke = nke) { 655121790Sjeff nke = ke->ke_assign; 656139334Sjeff kseq->ksq_group->ksg_load--; 657139334Sjeff kseq->ksq_load--; 658121790Sjeff ke->ke_flags &= ~KEF_ASSIGNED; 659139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 660139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 661121790Sjeff } 662121790Sjeff} 663121790Sjeff 664121790Sjeffstatic void 665121790Sjeffkseq_notify(struct kse *ke, int cpu) 666121790Sjeff{ 667121790Sjeff struct kseq *kseq; 668121790Sjeff struct thread *td; 669121790Sjeff struct pcpu *pcpu; 670139334Sjeff int class; 671133427Sjeff int prio; 672121790Sjeff 673139334Sjeff kseq = KSEQ_CPU(cpu); 674139334Sjeff /* XXX */ 675139334Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 676139334Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 677139334Sjeff (kseq_idle & kseq->ksq_group->ksg_mask)) 678139334Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 679139334Sjeff kseq->ksq_group->ksg_load++; 680139334Sjeff kseq->ksq_load++; 681123529Sjeff ke->ke_cpu = cpu; 682121790Sjeff ke->ke_flags |= KEF_ASSIGNED; 683133427Sjeff prio = ke->ke_thread->td_priority; 684121790Sjeff 685121790Sjeff /* 686121790Sjeff * Place a KSE on another cpu's queue and force a resched. 687121790Sjeff */ 688121790Sjeff do { 689132776Skan *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 690121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 691133427Sjeff /* 692133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 693133427Sjeff * on a thread that is switched out before the IPI is delivered. This 694133427Sjeff * would lead us to miss the resched. This will be a problem once 695133427Sjeff * sched_lock is pushed down. 696133427Sjeff */ 697121790Sjeff pcpu = pcpu_find(cpu); 698121790Sjeff td = pcpu->pc_curthread; 699121790Sjeff if (ke->ke_thread->td_priority < td->td_priority || 700121790Sjeff td == pcpu->pc_idlethread) { 701121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 702121790Sjeff ipi_selected(1 << cpu, IPI_AST); 703121790Sjeff } 704121790Sjeff} 705121790Sjeff 706121790Sjeffstatic struct kse * 707121790Sjeffrunq_steal(struct runq *rq) 708121790Sjeff{ 709121790Sjeff struct rqhead *rqh; 710121790Sjeff struct rqbits *rqb; 711121790Sjeff struct kse *ke; 712121790Sjeff int word; 713121790Sjeff int bit; 714121790Sjeff 715121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 716121790Sjeff rqb = &rq->rq_status; 717121790Sjeff for (word = 0; word < RQB_LEN; word++) { 718121790Sjeff if (rqb->rqb_bits[word] == 0) 719121790Sjeff continue; 720121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 721123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 722121790Sjeff continue; 723121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 724121790Sjeff TAILQ_FOREACH(ke, rqh, ke_procq) { 725139334Sjeff if (KSE_CAN_MIGRATE(ke)) 726121790Sjeff return (ke); 727121790Sjeff } 728121790Sjeff } 729121790Sjeff } 730121790Sjeff return (NULL); 731121790Sjeff} 732121790Sjeff 733121790Sjeffstatic struct kse * 734123433Sjeffkseq_steal(struct kseq *kseq, int stealidle) 735121790Sjeff{ 736121790Sjeff struct kse *ke; 737121790Sjeff 738123433Sjeff /* 739123433Sjeff * Steal from next first to try to get a non-interactive task that 740123433Sjeff * may not have run for a while. 741123433Sjeff */ 742123433Sjeff if ((ke = runq_steal(kseq->ksq_next)) != NULL) 743123433Sjeff return (ke); 744121790Sjeff if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 745121790Sjeff return (ke); 746123433Sjeff if (stealidle) 747123433Sjeff return (runq_steal(&kseq->ksq_idle)); 748123433Sjeff return (NULL); 749121790Sjeff} 750123433Sjeff 751123433Sjeffint 752123433Sjeffkseq_transfer(struct kseq *kseq, struct kse *ke, int class) 753123433Sjeff{ 754139334Sjeff struct kseq_group *nksg; 755123433Sjeff struct kseq_group *ksg; 756139334Sjeff struct kseq *old; 757123433Sjeff int cpu; 758139334Sjeff int idx; 759123433Sjeff 760123685Sjeff if (smp_started == 0) 761123685Sjeff return (0); 762123433Sjeff cpu = 0; 763123433Sjeff /* 764133427Sjeff * If our load exceeds a certain threshold we should attempt to 765133427Sjeff * reassign this thread. The first candidate is the cpu that 766133427Sjeff * originally ran the thread. If it is idle, assign it there, 767133427Sjeff * otherwise, pick an idle cpu. 768133427Sjeff * 769133427Sjeff * The threshold at which we start to reassign kses has a large impact 770123685Sjeff * on the overall performance of the system. Tuned too high and 771123685Sjeff * some CPUs may idle. Too low and there will be excess migration 772128055Scognet * and context switches. 773123685Sjeff */ 774139334Sjeff old = KSEQ_CPU(ke->ke_cpu); 775139334Sjeff nksg = old->ksq_group; 776133427Sjeff ksg = kseq->ksq_group; 777139334Sjeff if (kseq_idle) { 778139334Sjeff if (kseq_idle & nksg->ksg_mask) { 779139334Sjeff cpu = ffs(nksg->ksg_idlemask); 780139334Sjeff if (cpu) { 781139334Sjeff CTR2(KTR_SCHED, 782139334Sjeff "kseq_transfer: %p found old cpu %X " 783139334Sjeff "in idlemask.", ke, cpu); 784133427Sjeff goto migrate; 785139334Sjeff } 786133427Sjeff } 787123433Sjeff /* 788123433Sjeff * Multiple cpus could find this bit simultaneously 789123433Sjeff * but the race shouldn't be terrible. 790123433Sjeff */ 791123433Sjeff cpu = ffs(kseq_idle); 792139334Sjeff if (cpu) { 793139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 794139334Sjeff "in idlemask.", ke, cpu); 795133427Sjeff goto migrate; 796139334Sjeff } 797123433Sjeff } 798139334Sjeff idx = 0; 799139334Sjeff#if 0 800139334Sjeff if (old->ksq_load < kseq->ksq_load) { 801139334Sjeff cpu = ke->ke_cpu + 1; 802139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 803139334Sjeff "load less than ours.", ke, cpu); 804139334Sjeff goto migrate; 805139334Sjeff } 806123433Sjeff /* 807139334Sjeff * No new CPU was found, look for one with less load. 808139334Sjeff */ 809139334Sjeff for (idx = 0; idx <= ksg_maxid; idx++) { 810139334Sjeff nksg = KSEQ_GROUP(idx); 811139334Sjeff if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 812139334Sjeff cpu = ffs(nksg->ksg_cpumask); 813139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 814139334Sjeff "than ours.", ke, cpu); 815139334Sjeff goto migrate; 816139334Sjeff } 817139334Sjeff } 818139334Sjeff#endif 819139334Sjeff /* 820123433Sjeff * If another cpu in this group has idled, assign a thread over 821123433Sjeff * to them after checking to see if there are idled groups. 822123433Sjeff */ 823133427Sjeff if (ksg->ksg_idlemask) { 824123433Sjeff cpu = ffs(ksg->ksg_idlemask); 825139334Sjeff if (cpu) { 826139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 827139334Sjeff "group.", ke, cpu); 828133427Sjeff goto migrate; 829139334Sjeff } 830123433Sjeff } 831133427Sjeff return (0); 832133427Sjeffmigrate: 833133427Sjeff /* 834123433Sjeff * Now that we've found an idle CPU, migrate the thread. 835123433Sjeff */ 836133427Sjeff cpu--; 837133427Sjeff ke->ke_runq = NULL; 838133427Sjeff kseq_notify(ke, cpu); 839133427Sjeff 840133427Sjeff return (1); 841123433Sjeff} 842123433Sjeff 843121790Sjeff#endif /* SMP */ 844121790Sjeff 845117326Sjeff/* 846121790Sjeff * Pick the highest priority task we have and return it. 847117326Sjeff */ 848117326Sjeff 849121790Sjeffstatic struct kse * 850121790Sjeffkseq_choose(struct kseq *kseq) 851110267Sjeff{ 852137067Sjeff struct runq *swap; 853110267Sjeff struct kse *ke; 854137067Sjeff int nice; 855110267Sjeff 856115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 857113357Sjeff swap = NULL; 858112994Sjeff 859113357Sjeff for (;;) { 860113357Sjeff ke = runq_choose(kseq->ksq_curr); 861113357Sjeff if (ke == NULL) { 862113357Sjeff /* 863131473Sjhb * We already swapped once and didn't get anywhere. 864113357Sjeff */ 865113357Sjeff if (swap) 866113357Sjeff break; 867113357Sjeff swap = kseq->ksq_curr; 868113357Sjeff kseq->ksq_curr = kseq->ksq_next; 869113357Sjeff kseq->ksq_next = swap; 870113357Sjeff continue; 871113357Sjeff } 872113357Sjeff /* 873113357Sjeff * If we encounter a slice of 0 the kse is in a 874113357Sjeff * TIMESHARE kse group and its nice was too far out 875113357Sjeff * of the range that receives slices. 876113357Sjeff */ 877137067Sjeff nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 878138842Sjeff if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 879138842Sjeff ke->ke_proc->p_nice != 0)) { 880113357Sjeff runq_remove(ke->ke_runq, ke); 881113357Sjeff sched_slice(ke); 882113357Sjeff ke->ke_runq = kseq->ksq_next; 883136170Sjulian runq_add(ke->ke_runq, ke, 0); 884113357Sjeff continue; 885113357Sjeff } 886113357Sjeff return (ke); 887110267Sjeff } 888110267Sjeff 889113357Sjeff return (runq_choose(&kseq->ksq_idle)); 890110267Sjeff} 891110267Sjeff 892109864Sjeffstatic void 893110028Sjeffkseq_setup(struct kseq *kseq) 894110028Sjeff{ 895113357Sjeff runq_init(&kseq->ksq_timeshare[0]); 896113357Sjeff runq_init(&kseq->ksq_timeshare[1]); 897112994Sjeff runq_init(&kseq->ksq_idle); 898113357Sjeff kseq->ksq_curr = &kseq->ksq_timeshare[0]; 899113357Sjeff kseq->ksq_next = &kseq->ksq_timeshare[1]; 900113660Sjeff kseq->ksq_load = 0; 901121896Sjeff kseq->ksq_load_timeshare = 0; 902110028Sjeff} 903110028Sjeff 904110028Sjeffstatic void 905109864Sjeffsched_setup(void *dummy) 906109864Sjeff{ 907117313Sjeff#ifdef SMP 908109864Sjeff int i; 909117313Sjeff#endif 910109864Sjeff 911116946Sjeff slice_min = (hz/100); /* 10ms */ 912116946Sjeff slice_max = (hz/7); /* ~140ms */ 913111857Sjeff 914117237Sjeff#ifdef SMP 915123487Sjeff balance_groups = 0; 916123433Sjeff /* 917123433Sjeff * Initialize the kseqs. 918123433Sjeff */ 919123433Sjeff for (i = 0; i < MAXCPU; i++) { 920123433Sjeff struct kseq *ksq; 921123433Sjeff 922123433Sjeff ksq = &kseq_cpu[i]; 923123433Sjeff ksq->ksq_assigned = NULL; 924123433Sjeff kseq_setup(&kseq_cpu[i]); 925123433Sjeff } 926117237Sjeff if (smp_topology == NULL) { 927123433Sjeff struct kseq_group *ksg; 928123433Sjeff struct kseq *ksq; 929139334Sjeff int cpus; 930123433Sjeff 931139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 932139334Sjeff if (CPU_ABSENT(i)) 933139334Sjeff continue; 934139334Sjeff ksq = &kseq_cpu[cpus]; 935139334Sjeff ksg = &kseq_groups[cpus]; 936123433Sjeff /* 937129982Sjeff * Setup a kseq group with one member. 938123433Sjeff */ 939123433Sjeff ksq->ksq_transferable = 0; 940123433Sjeff ksq->ksq_group = ksg; 941123433Sjeff ksg->ksg_cpus = 1; 942123433Sjeff ksg->ksg_idlemask = 0; 943123433Sjeff ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 944123487Sjeff ksg->ksg_load = 0; 945123433Sjeff ksg->ksg_transferable = 0; 946123433Sjeff LIST_INIT(&ksg->ksg_members); 947123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 948139334Sjeff cpus++; 949117237Sjeff } 950139334Sjeff ksg_maxid = cpus - 1; 951117237Sjeff } else { 952123433Sjeff struct kseq_group *ksg; 953123433Sjeff struct cpu_group *cg; 954117237Sjeff int j; 955113357Sjeff 956117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 957117237Sjeff cg = &smp_topology->ct_group[i]; 958123433Sjeff ksg = &kseq_groups[i]; 959123433Sjeff /* 960123433Sjeff * Initialize the group. 961123433Sjeff */ 962123433Sjeff ksg->ksg_idlemask = 0; 963123487Sjeff ksg->ksg_load = 0; 964123433Sjeff ksg->ksg_transferable = 0; 965123433Sjeff ksg->ksg_cpus = cg->cg_count; 966123433Sjeff ksg->ksg_cpumask = cg->cg_mask; 967123433Sjeff LIST_INIT(&ksg->ksg_members); 968123433Sjeff /* 969123433Sjeff * Find all of the group members and add them. 970123433Sjeff */ 971123433Sjeff for (j = 0; j < MAXCPU; j++) { 972123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 973123433Sjeff if (ksg->ksg_mask == 0) 974123433Sjeff ksg->ksg_mask = 1 << j; 975123433Sjeff kseq_cpu[j].ksq_transferable = 0; 976123433Sjeff kseq_cpu[j].ksq_group = ksg; 977123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, 978123433Sjeff &kseq_cpu[j], ksq_siblings); 979123433Sjeff } 980123433Sjeff } 981123487Sjeff if (ksg->ksg_cpus > 1) 982123487Sjeff balance_groups = 1; 983117237Sjeff } 984123487Sjeff ksg_maxid = smp_topology->ct_count - 1; 985117237Sjeff } 986123487Sjeff /* 987123487Sjeff * Stagger the group and global load balancer so they do not 988123487Sjeff * interfere with each other. 989123487Sjeff */ 990129982Sjeff bal_tick = ticks + hz; 991123487Sjeff if (balance_groups) 992129982Sjeff gbal_tick = ticks + (hz / 2); 993117237Sjeff#else 994117237Sjeff kseq_setup(KSEQ_SELF()); 995116069Sjeff#endif 996117237Sjeff mtx_lock_spin(&sched_lock); 997122744Sjeff kseq_load_add(KSEQ_SELF(), &kse0); 998117237Sjeff mtx_unlock_spin(&sched_lock); 999109864Sjeff} 1000109864Sjeff 1001109864Sjeff/* 1002109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1003109864Sjeff * process. 1004109864Sjeff */ 1005113357Sjeffstatic void 1006109864Sjeffsched_priority(struct ksegrp *kg) 1007109864Sjeff{ 1008109864Sjeff int pri; 1009109864Sjeff 1010109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1011113357Sjeff return; 1012109864Sjeff 1013113357Sjeff pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1014111857Sjeff pri += SCHED_PRI_BASE; 1015130551Sjulian pri += kg->kg_proc->p_nice; 1016109864Sjeff 1017109864Sjeff if (pri > PRI_MAX_TIMESHARE) 1018109864Sjeff pri = PRI_MAX_TIMESHARE; 1019109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 1020109864Sjeff pri = PRI_MIN_TIMESHARE; 1021109864Sjeff 1022109864Sjeff kg->kg_user_pri = pri; 1023109864Sjeff 1024113357Sjeff return; 1025109864Sjeff} 1026109864Sjeff 1027109864Sjeff/* 1028112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 1029112994Sjeff * that we're on. This is only for PRI_TIMESHARE ksegrps. 1030109864Sjeff */ 1031112966Sjeffstatic void 1032112966Sjeffsched_slice(struct kse *ke) 1033109864Sjeff{ 1034113357Sjeff struct kseq *kseq; 1035112966Sjeff struct ksegrp *kg; 1036109864Sjeff 1037112966Sjeff kg = ke->ke_ksegrp; 1038113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1039109864Sjeff 1040139453Sjhb if (ke->ke_thread->td_flags & TDF_BORROWING) { 1041138842Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1042138842Sjeff return; 1043138842Sjeff } 1044138842Sjeff 1045112966Sjeff /* 1046112966Sjeff * Rationale: 1047133427Sjeff * KSEs in interactive ksegs get a minimal slice so that we 1048112966Sjeff * quickly notice if it abuses its advantage. 1049112966Sjeff * 1050112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 1051112966Sjeff * based on the ksegs nice value relative to the least nice kseg 1052112966Sjeff * on the run queue for this cpu. 1053112966Sjeff * 1054112966Sjeff * If the KSE is less nice than all others it gets the maximum 1055112966Sjeff * slice and other KSEs will adjust their slice relative to 1056112966Sjeff * this when they first expire. 1057112966Sjeff * 1058112966Sjeff * There is 20 point window that starts relative to the least 1059112966Sjeff * nice kse on the run queue. Slice size is determined by 1060112966Sjeff * the kse distance from the last nice ksegrp. 1061112966Sjeff * 1062121871Sjeff * If the kse is outside of the window it will get no slice 1063121871Sjeff * and will be reevaluated each time it is selected on the 1064121871Sjeff * run queue. The exception to this is nice 0 ksegs when 1065121871Sjeff * a nice -20 is running. They are always granted a minimum 1066121871Sjeff * slice. 1067112966Sjeff */ 1068113357Sjeff if (!SCHED_INTERACTIVE(kg)) { 1069112966Sjeff int nice; 1070112966Sjeff 1071130551Sjulian nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1072121896Sjeff if (kseq->ksq_load_timeshare == 0 || 1073130551Sjulian kg->kg_proc->p_nice < kseq->ksq_nicemin) 1074112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1075121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 1076112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 1077130551Sjulian else if (kg->kg_proc->p_nice == 0) 1078121871Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1079112966Sjeff else 1080112966Sjeff ke->ke_slice = 0; 1081112966Sjeff } else 1082123684Sjeff ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1083112966Sjeff 1084112966Sjeff return; 1085109864Sjeff} 1086109864Sjeff 1087121868Sjeff/* 1088121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1089121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1090121868Sjeff * This routine will not operate correctly when slp or run times have been 1091121868Sjeff * adjusted to more than double their maximum. 1092121868Sjeff */ 1093116463Sjeffstatic void 1094116463Sjeffsched_interact_update(struct ksegrp *kg) 1095116463Sjeff{ 1096121868Sjeff int sum; 1097121605Sjeff 1098121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1099121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1100121868Sjeff return; 1101121868Sjeff /* 1102121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1103121868Sjeff * will not bring us back into range. Dividing by two here forces 1104133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1105121868Sjeff */ 1106127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1107121868Sjeff kg->kg_runtime /= 2; 1108121868Sjeff kg->kg_slptime /= 2; 1109121868Sjeff return; 1110116463Sjeff } 1111121868Sjeff kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1112121868Sjeff kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1113116463Sjeff} 1114116463Sjeff 1115121868Sjeffstatic void 1116121868Sjeffsched_interact_fork(struct ksegrp *kg) 1117121868Sjeff{ 1118121868Sjeff int ratio; 1119121868Sjeff int sum; 1120121868Sjeff 1121121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1122121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1123121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1124121868Sjeff kg->kg_runtime /= ratio; 1125121868Sjeff kg->kg_slptime /= ratio; 1126121868Sjeff } 1127121868Sjeff} 1128121868Sjeff 1129111857Sjeffstatic int 1130111857Sjeffsched_interact_score(struct ksegrp *kg) 1131111857Sjeff{ 1132116365Sjeff int div; 1133111857Sjeff 1134111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 1135116365Sjeff div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1136116365Sjeff return (SCHED_INTERACT_HALF + 1137116365Sjeff (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1138116365Sjeff } if (kg->kg_slptime > kg->kg_runtime) { 1139116365Sjeff div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1140116365Sjeff return (kg->kg_runtime / div); 1141111857Sjeff } 1142111857Sjeff 1143116365Sjeff /* 1144116365Sjeff * This can happen if slptime and runtime are 0. 1145116365Sjeff */ 1146116365Sjeff return (0); 1147111857Sjeff 1148111857Sjeff} 1149111857Sjeff 1150113357Sjeff/* 1151134791Sjulian * Very early in the boot some setup of scheduler-specific 1152134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 1153134791Sjulian * Called from: 1154134791Sjulian * proc0_init() 1155134791Sjulian */ 1156134791Sjulianvoid 1157134791Sjulianschedinit(void) 1158134791Sjulian{ 1159134791Sjulian /* 1160134791Sjulian * Set up the scheduler specific parts of proc0. 1161134791Sjulian */ 1162136167Sjulian proc0.p_sched = NULL; /* XXX */ 1163134791Sjulian ksegrp0.kg_sched = &kg_sched0; 1164136167Sjulian thread0.td_sched = &kse0; 1165134791Sjulian kse0.ke_thread = &thread0; 1166134791Sjulian kse0.ke_state = KES_THREAD; 1167134791Sjulian kg_sched0.skg_concurrency = 1; 1168134791Sjulian kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1169134791Sjulian} 1170134791Sjulian 1171134791Sjulian/* 1172113357Sjeff * This is only somewhat accurate since given many processes of the same 1173113357Sjeff * priority they will switch when their slices run out, which will be 1174113357Sjeff * at most SCHED_SLICE_MAX. 1175113357Sjeff */ 1176109864Sjeffint 1177109864Sjeffsched_rr_interval(void) 1178109864Sjeff{ 1179109864Sjeff return (SCHED_SLICE_MAX); 1180109864Sjeff} 1181109864Sjeff 1182121790Sjeffstatic void 1183109864Sjeffsched_pctcpu_update(struct kse *ke) 1184109864Sjeff{ 1185109864Sjeff /* 1186109864Sjeff * Adjust counters and watermark for pctcpu calc. 1187116365Sjeff */ 1188120272Sjeff if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1189120272Sjeff /* 1190120272Sjeff * Shift the tick count out so that the divide doesn't 1191120272Sjeff * round away our results. 1192120272Sjeff */ 1193120272Sjeff ke->ke_ticks <<= 10; 1194120272Sjeff ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1195120272Sjeff SCHED_CPU_TICKS; 1196120272Sjeff ke->ke_ticks >>= 10; 1197120272Sjeff } else 1198120272Sjeff ke->ke_ticks = 0; 1199109864Sjeff ke->ke_ltick = ticks; 1200109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1201109864Sjeff} 1202109864Sjeff 1203109864Sjeffvoid 1204139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1205109864Sjeff{ 1206121605Sjeff struct kse *ke; 1207109864Sjeff 1208139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1209139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1210139316Sjeff curthread->td_proc->p_comm); 1211121605Sjeff ke = td->td_kse; 1212109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1213139453Sjhb if (td->td_priority == prio) 1214139453Sjhb return; 1215109864Sjeff if (TD_ON_RUNQ(td)) { 1216121605Sjeff /* 1217121605Sjeff * If the priority has been elevated due to priority 1218121605Sjeff * propagation, we may have to move ourselves to a new 1219121605Sjeff * queue. We still call adjustrunqueue below in case kse 1220121605Sjeff * needs to fix things up. 1221121605Sjeff */ 1222138842Sjeff if (prio < td->td_priority && ke->ke_runq != NULL && 1223121872Sjeff (ke->ke_flags & KEF_ASSIGNED) == 0 && 1224121790Sjeff ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1225121605Sjeff runq_remove(ke->ke_runq, ke); 1226121605Sjeff ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1227136170Sjulian runq_add(ke->ke_runq, ke, 0); 1228121605Sjeff } 1229133555Sjeff /* 1230133555Sjeff * Hold this kse on this cpu so that sched_prio() doesn't 1231133555Sjeff * cause excessive migration. We only want migration to 1232133555Sjeff * happen as the result of a wakeup. 1233133555Sjeff */ 1234133555Sjeff ke->ke_flags |= KEF_HOLD; 1235119488Sdavidxu adjustrunqueue(td, prio); 1236139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1237121605Sjeff } else 1238119488Sdavidxu td->td_priority = prio; 1239109864Sjeff} 1240109864Sjeff 1241139453Sjhb/* 1242139453Sjhb * Update a thread's priority when it is lent another thread's 1243139453Sjhb * priority. 1244139453Sjhb */ 1245109864Sjeffvoid 1246139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1247139453Sjhb{ 1248139453Sjhb 1249139453Sjhb td->td_flags |= TDF_BORROWING; 1250139453Sjhb sched_thread_priority(td, prio); 1251139453Sjhb} 1252139453Sjhb 1253139453Sjhb/* 1254139453Sjhb * Restore a thread's priority when priority propagation is 1255139453Sjhb * over. The prio argument is the minimum priority the thread 1256139453Sjhb * needs to have to satisfy other possible priority lending 1257139453Sjhb * requests. If the thread's regular priority is less 1258139453Sjhb * important than prio, the thread will keep a priority boost 1259139453Sjhb * of prio. 1260139453Sjhb */ 1261139453Sjhbvoid 1262139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1263139453Sjhb{ 1264139453Sjhb u_char base_pri; 1265139453Sjhb 1266139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1267139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1268139453Sjhb base_pri = td->td_ksegrp->kg_user_pri; 1269139453Sjhb else 1270139453Sjhb base_pri = td->td_base_pri; 1271139453Sjhb if (prio >= base_pri) { 1272139455Sjhb td->td_flags &= ~TDF_BORROWING; 1273139453Sjhb sched_thread_priority(td, base_pri); 1274139453Sjhb } else 1275139453Sjhb sched_lend_prio(td, prio); 1276139453Sjhb} 1277139453Sjhb 1278139453Sjhbvoid 1279139453Sjhbsched_prio(struct thread *td, u_char prio) 1280139453Sjhb{ 1281139453Sjhb u_char oldprio; 1282139453Sjhb 1283139453Sjhb /* First, update the base priority. */ 1284139453Sjhb td->td_base_pri = prio; 1285139453Sjhb 1286139453Sjhb /* 1287139455Sjhb * If the thread is borrowing another thread's priority, don't 1288139453Sjhb * ever lower the priority. 1289139453Sjhb */ 1290139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1291139453Sjhb return; 1292139453Sjhb 1293139453Sjhb /* Change the real priority. */ 1294139453Sjhb oldprio = td->td_priority; 1295139453Sjhb sched_thread_priority(td, prio); 1296139453Sjhb 1297139453Sjhb /* 1298139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1299139453Sjhb * its state. 1300139453Sjhb */ 1301139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1302139453Sjhb turnstile_adjust(td, oldprio); 1303139453Sjhb} 1304139455Sjhb 1305139453Sjhbvoid 1306135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1307109864Sjeff{ 1308139334Sjeff struct kseq *ksq; 1309109864Sjeff struct kse *ke; 1310109864Sjeff 1311109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1312109864Sjeff 1313109864Sjeff ke = td->td_kse; 1314139334Sjeff ksq = KSEQ_SELF(); 1315109864Sjeff 1316133555Sjeff td->td_lastcpu = td->td_oncpu; 1317113339Sjulian td->td_oncpu = NOCPU; 1318132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1319144777Sups td->td_owepreempt = 0; 1320109864Sjeff 1321123434Sjeff /* 1322123434Sjeff * If the KSE has been assigned it may be in the process of switching 1323123434Sjeff * to the new cpu. This is the case in sched_bind(). 1324123434Sjeff */ 1325139334Sjeff if (td == PCPU_GET(idlethread)) { 1326139334Sjeff TD_SET_CAN_RUN(td); 1327139334Sjeff } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1328139334Sjeff /* We are ending our run so make our slot available again */ 1329139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1330139334Sjeff kseq_load_rem(ksq, ke); 1331139334Sjeff if (TD_IS_RUNNING(td)) { 1332139334Sjeff /* 1333139334Sjeff * Don't allow the thread to migrate 1334139334Sjeff * from a preemption. 1335139334Sjeff */ 1336139334Sjeff ke->ke_flags |= KEF_HOLD; 1337139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1338139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1339139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1340139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1341139334Sjeff } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1342139334Sjeff (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 1343139334Sjeff /* 1344139334Sjeff * We will not be on the run queue. 1345139334Sjeff * So we must be sleeping or similar. 1346139334Sjeff * Don't use the slot if we will need it 1347139334Sjeff * for newtd. 1348139334Sjeff */ 1349139334Sjeff slot_fill(td->td_ksegrp); 1350121146Sjeff } 1351136167Sjulian if (newtd != NULL) { 1352136170Sjulian /* 1353136170Sjulian * If we bring in a thread, 1354136170Sjulian * then account for it as if it had been added to the 1355136170Sjulian * run queue and then chosen. 1356136170Sjulian */ 1357136169Sjulian newtd->td_kse->ke_flags |= KEF_DIDRUN; 1358139334Sjeff newtd->td_kse->ke_runq = ksq->ksq_curr; 1359136173Sjulian TD_SET_RUNNING(newtd); 1360133427Sjeff kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1361136167Sjulian } else 1362131473Sjhb newtd = choosethread(); 1363145256Sjkoshy if (td != newtd) { 1364145256Sjkoshy#ifdef HWPMC_HOOKS 1365145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1366145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1367145256Sjkoshy#endif 1368121128Sjeff cpu_switch(td, newtd); 1369145256Sjkoshy#ifdef HWPMC_HOOKS 1370145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1371145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1372145256Sjkoshy#endif 1373145256Sjkoshy } 1374145256Sjkoshy 1375121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1376109864Sjeff 1377113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1378109864Sjeff} 1379109864Sjeff 1380109864Sjeffvoid 1381130551Sjuliansched_nice(struct proc *p, int nice) 1382109864Sjeff{ 1383130551Sjulian struct ksegrp *kg; 1384113357Sjeff struct kse *ke; 1385109864Sjeff struct thread *td; 1386113357Sjeff struct kseq *kseq; 1387109864Sjeff 1388130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1389113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1390113357Sjeff /* 1391113357Sjeff * We need to adjust the nice counts for running KSEs. 1392113357Sjeff */ 1393130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1394130551Sjulian if (kg->kg_pri_class == PRI_TIMESHARE) { 1395134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1396134791Sjulian ke = td->td_kse; 1397130551Sjulian if (ke->ke_runq == NULL) 1398130551Sjulian continue; 1399130551Sjulian kseq = KSEQ_CPU(ke->ke_cpu); 1400130551Sjulian kseq_nice_rem(kseq, p->p_nice); 1401130551Sjulian kseq_nice_add(kseq, nice); 1402130551Sjulian } 1403113357Sjeff } 1404130551Sjulian } 1405130551Sjulian p->p_nice = nice; 1406130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1407130551Sjulian sched_priority(kg); 1408130551Sjulian FOREACH_THREAD_IN_GROUP(kg, td) 1409130551Sjulian td->td_flags |= TDF_NEEDRESCHED; 1410130551Sjulian } 1411109864Sjeff} 1412109864Sjeff 1413109864Sjeffvoid 1414126326Sjhbsched_sleep(struct thread *td) 1415109864Sjeff{ 1416109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1417109864Sjeff 1418109864Sjeff td->td_slptime = ticks; 1419109864Sjeff} 1420109864Sjeff 1421109864Sjeffvoid 1422109864Sjeffsched_wakeup(struct thread *td) 1423109864Sjeff{ 1424109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1425109864Sjeff 1426109864Sjeff /* 1427109864Sjeff * Let the kseg know how long we slept for. This is because process 1428109864Sjeff * interactivity behavior is modeled in the kseg. 1429109864Sjeff */ 1430111788Sjeff if (td->td_slptime) { 1431111788Sjeff struct ksegrp *kg; 1432113357Sjeff int hzticks; 1433109864Sjeff 1434111788Sjeff kg = td->td_ksegrp; 1435121868Sjeff hzticks = (ticks - td->td_slptime) << 10; 1436121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1437121868Sjeff kg->kg_slptime = SCHED_SLP_RUN_MAX; 1438121868Sjeff kg->kg_runtime = 1; 1439121868Sjeff } else { 1440121868Sjeff kg->kg_slptime += hzticks; 1441121868Sjeff sched_interact_update(kg); 1442121868Sjeff } 1443111788Sjeff sched_priority(kg); 1444134791Sjulian sched_slice(td->td_kse); 1445111788Sjeff td->td_slptime = 0; 1446109864Sjeff } 1447134586Sjulian setrunqueue(td, SRQ_BORING); 1448109864Sjeff} 1449109864Sjeff 1450109864Sjeff/* 1451109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1452109864Sjeff * priority. 1453109864Sjeff */ 1454109864Sjeffvoid 1455134791Sjuliansched_fork(struct thread *td, struct thread *childtd) 1456109864Sjeff{ 1457109864Sjeff 1458109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1459109864Sjeff 1460134791Sjulian sched_fork_ksegrp(td, childtd->td_ksegrp); 1461134791Sjulian sched_fork_thread(td, childtd); 1462113357Sjeff} 1463113357Sjeff 1464113357Sjeffvoid 1465132372Sjuliansched_fork_ksegrp(struct thread *td, struct ksegrp *child) 1466113357Sjeff{ 1467132372Sjulian struct ksegrp *kg = td->td_ksegrp; 1468134791Sjulian mtx_assert(&sched_lock, MA_OWNED); 1469116365Sjeff 1470121868Sjeff child->kg_slptime = kg->kg_slptime; 1471121868Sjeff child->kg_runtime = kg->kg_runtime; 1472121868Sjeff child->kg_user_pri = kg->kg_user_pri; 1473121868Sjeff sched_interact_fork(child); 1474116463Sjeff kg->kg_runtime += tickincr << 10; 1475116463Sjeff sched_interact_update(kg); 1476113357Sjeff} 1477109864Sjeff 1478113357Sjeffvoid 1479113357Sjeffsched_fork_thread(struct thread *td, struct thread *child) 1480113357Sjeff{ 1481134791Sjulian struct kse *ke; 1482134791Sjulian struct kse *ke2; 1483134791Sjulian 1484134791Sjulian sched_newthread(child); 1485134791Sjulian ke = td->td_kse; 1486134791Sjulian ke2 = child->td_kse; 1487134791Sjulian ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1488134791Sjulian ke2->ke_cpu = ke->ke_cpu; 1489134791Sjulian ke2->ke_runq = NULL; 1490134791Sjulian 1491134791Sjulian /* Grab our parents cpu estimation information. */ 1492134791Sjulian ke2->ke_ticks = ke->ke_ticks; 1493134791Sjulian ke2->ke_ltick = ke->ke_ltick; 1494134791Sjulian ke2->ke_ftick = ke->ke_ftick; 1495113357Sjeff} 1496113357Sjeff 1497113357Sjeffvoid 1498113357Sjeffsched_class(struct ksegrp *kg, int class) 1499113357Sjeff{ 1500113357Sjeff struct kseq *kseq; 1501113357Sjeff struct kse *ke; 1502134791Sjulian struct thread *td; 1503121896Sjeff int nclass; 1504121896Sjeff int oclass; 1505113357Sjeff 1506113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1507113357Sjeff if (kg->kg_pri_class == class) 1508113357Sjeff return; 1509113357Sjeff 1510121896Sjeff nclass = PRI_BASE(class); 1511121896Sjeff oclass = PRI_BASE(kg->kg_pri_class); 1512134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1513134791Sjulian ke = td->td_kse; 1514141292Sjeff if ((ke->ke_state != KES_ONRUNQ && 1515141292Sjeff ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 1516113357Sjeff continue; 1517113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1518113357Sjeff 1519121896Sjeff#ifdef SMP 1520122744Sjeff /* 1521122744Sjeff * On SMP if we're on the RUNQ we must adjust the transferable 1522122744Sjeff * count because could be changing to or from an interrupt 1523122744Sjeff * class. 1524122744Sjeff */ 1525122744Sjeff if (ke->ke_state == KES_ONRUNQ) { 1526139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1527123433Sjeff kseq->ksq_transferable--; 1528123433Sjeff kseq->ksq_group->ksg_transferable--; 1529123433Sjeff } 1530139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1531123433Sjeff kseq->ksq_transferable++; 1532123433Sjeff kseq->ksq_group->ksg_transferable++; 1533123433Sjeff } 1534122744Sjeff } 1535121896Sjeff#endif 1536122744Sjeff if (oclass == PRI_TIMESHARE) { 1537121896Sjeff kseq->ksq_load_timeshare--; 1538130551Sjulian kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1539122744Sjeff } 1540122744Sjeff if (nclass == PRI_TIMESHARE) { 1541121896Sjeff kseq->ksq_load_timeshare++; 1542130551Sjulian kseq_nice_add(kseq, kg->kg_proc->p_nice); 1543122744Sjeff } 1544109970Sjeff } 1545109970Sjeff 1546113357Sjeff kg->kg_pri_class = class; 1547109864Sjeff} 1548109864Sjeff 1549109864Sjeff/* 1550109864Sjeff * Return some of the child's priority and interactivity to the parent. 1551109864Sjeff */ 1552109864Sjeffvoid 1553134791Sjuliansched_exit(struct proc *p, struct thread *childtd) 1554109864Sjeff{ 1555109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1556134791Sjulian sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 1557139316Sjeff sched_exit_thread(NULL, childtd); 1558109864Sjeff} 1559109864Sjeff 1560109864Sjeffvoid 1561132372Sjuliansched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1562113372Sjeff{ 1563132372Sjulian /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 1564132372Sjulian kg->kg_runtime += td->td_ksegrp->kg_runtime; 1565116463Sjeff sched_interact_update(kg); 1566113372Sjeff} 1567113372Sjeff 1568113372Sjeffvoid 1569134791Sjuliansched_exit_thread(struct thread *td, struct thread *childtd) 1570113372Sjeff{ 1571139316Sjeff CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1572139316Sjeff childtd, childtd->td_proc->p_comm, childtd->td_priority); 1573134791Sjulian kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1574113372Sjeff} 1575113372Sjeff 1576113372Sjeffvoid 1577121127Sjeffsched_clock(struct thread *td) 1578109864Sjeff{ 1579113357Sjeff struct kseq *kseq; 1580113357Sjeff struct ksegrp *kg; 1581121127Sjeff struct kse *ke; 1582109864Sjeff 1583129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1584133427Sjeff kseq = KSEQ_SELF(); 1585129982Sjeff#ifdef SMP 1586139334Sjeff if (ticks >= bal_tick) 1587129982Sjeff sched_balance(); 1588139334Sjeff if (ticks >= gbal_tick && balance_groups) 1589129982Sjeff sched_balance_groups(); 1590133427Sjeff /* 1591133427Sjeff * We could have been assigned a non real-time thread without an 1592133427Sjeff * IPI. 1593133427Sjeff */ 1594133427Sjeff if (kseq->ksq_assigned) 1595133427Sjeff kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1596129982Sjeff#endif 1597113357Sjeff /* 1598113357Sjeff * sched_setup() apparently happens prior to stathz being set. We 1599113357Sjeff * need to resolve the timers earlier in the boot so we can avoid 1600113357Sjeff * calculating this here. 1601113357Sjeff */ 1602113357Sjeff if (realstathz == 0) { 1603113357Sjeff realstathz = stathz ? stathz : hz; 1604113357Sjeff tickincr = hz / realstathz; 1605113357Sjeff /* 1606113357Sjeff * XXX This does not work for values of stathz that are much 1607113357Sjeff * larger than hz. 1608113357Sjeff */ 1609113357Sjeff if (tickincr == 0) 1610113357Sjeff tickincr = 1; 1611113357Sjeff } 1612109864Sjeff 1613121127Sjeff ke = td->td_kse; 1614113357Sjeff kg = ke->ke_ksegrp; 1615109864Sjeff 1616110028Sjeff /* Adjust ticks for pctcpu */ 1617111793Sjeff ke->ke_ticks++; 1618109971Sjeff ke->ke_ltick = ticks; 1619112994Sjeff 1620109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1621109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1622109971Sjeff sched_pctcpu_update(ke); 1623109971Sjeff 1624114496Sjulian if (td->td_flags & TDF_IDLETD) 1625109864Sjeff return; 1626110028Sjeff /* 1627113357Sjeff * We only do slicing code for TIMESHARE ksegrps. 1628113357Sjeff */ 1629113357Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1630113357Sjeff return; 1631113357Sjeff /* 1632110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 1633113357Sjeff * interactivity. 1634109864Sjeff */ 1635113357Sjeff kg->kg_runtime += tickincr << 10; 1636116463Sjeff sched_interact_update(kg); 1637110645Sjeff 1638109864Sjeff /* 1639109864Sjeff * We used up one time slice. 1640109864Sjeff */ 1641122847Sjeff if (--ke->ke_slice > 0) 1642113357Sjeff return; 1643109864Sjeff /* 1644113357Sjeff * We're out of time, recompute priorities and requeue. 1645109864Sjeff */ 1646122744Sjeff kseq_load_rem(kseq, ke); 1647113357Sjeff sched_priority(kg); 1648113357Sjeff sched_slice(ke); 1649113357Sjeff if (SCHED_CURR(kg, ke)) 1650113357Sjeff ke->ke_runq = kseq->ksq_curr; 1651113357Sjeff else 1652113357Sjeff ke->ke_runq = kseq->ksq_next; 1653122744Sjeff kseq_load_add(kseq, ke); 1654113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1655109864Sjeff} 1656109864Sjeff 1657109864Sjeffint 1658109864Sjeffsched_runnable(void) 1659109864Sjeff{ 1660109864Sjeff struct kseq *kseq; 1661115998Sjeff int load; 1662109864Sjeff 1663115998Sjeff load = 1; 1664115998Sjeff 1665110028Sjeff kseq = KSEQ_SELF(); 1666121790Sjeff#ifdef SMP 1667122094Sjeff if (kseq->ksq_assigned) { 1668122094Sjeff mtx_lock_spin(&sched_lock); 1669121790Sjeff kseq_assign(kseq); 1670122094Sjeff mtx_unlock_spin(&sched_lock); 1671122094Sjeff } 1672121790Sjeff#endif 1673121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1674121605Sjeff if (kseq->ksq_load > 0) 1675121605Sjeff goto out; 1676121605Sjeff } else 1677121605Sjeff if (kseq->ksq_load - 1 > 0) 1678121605Sjeff goto out; 1679115998Sjeff load = 0; 1680115998Sjeffout: 1681115998Sjeff return (load); 1682109864Sjeff} 1683109864Sjeff 1684109864Sjeffvoid 1685109864Sjeffsched_userret(struct thread *td) 1686109864Sjeff{ 1687109864Sjeff struct ksegrp *kg; 1688121605Sjeff 1689139453Sjhb KASSERT((td->td_flags & TDF_BORROWING) == 0, 1690139453Sjhb ("thread with borrowed priority returning to userland")); 1691139453Sjhb kg = td->td_ksegrp; 1692139453Sjhb if (td->td_priority != kg->kg_user_pri) { 1693109864Sjeff mtx_lock_spin(&sched_lock); 1694109864Sjeff td->td_priority = kg->kg_user_pri; 1695139453Sjhb td->td_base_pri = kg->kg_user_pri; 1696109864Sjeff mtx_unlock_spin(&sched_lock); 1697109864Sjeff } 1698109864Sjeff} 1699109864Sjeff 1700109864Sjeffstruct kse * 1701109970Sjeffsched_choose(void) 1702109970Sjeff{ 1703110028Sjeff struct kseq *kseq; 1704109970Sjeff struct kse *ke; 1705109970Sjeff 1706115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1707121790Sjeff kseq = KSEQ_SELF(); 1708113357Sjeff#ifdef SMP 1709123433Sjeffrestart: 1710121790Sjeff if (kseq->ksq_assigned) 1711121790Sjeff kseq_assign(kseq); 1712113357Sjeff#endif 1713121790Sjeff ke = kseq_choose(kseq); 1714109864Sjeff if (ke) { 1715121790Sjeff#ifdef SMP 1716121790Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1717123433Sjeff if (kseq_idled(kseq) == 0) 1718123433Sjeff goto restart; 1719121790Sjeff#endif 1720122744Sjeff kseq_runq_rem(kseq, ke); 1721109864Sjeff ke->ke_state = KES_THREAD; 1722113357Sjeff return (ke); 1723109864Sjeff } 1724109970Sjeff#ifdef SMP 1725123433Sjeff if (kseq_idled(kseq) == 0) 1726123433Sjeff goto restart; 1727109970Sjeff#endif 1728113357Sjeff return (NULL); 1729109864Sjeff} 1730109864Sjeff 1731109864Sjeffvoid 1732134586Sjuliansched_add(struct thread *td, int flags) 1733109864Sjeff{ 1734110267Sjeff struct kseq *kseq; 1735113357Sjeff struct ksegrp *kg; 1736121127Sjeff struct kse *ke; 1737139334Sjeff int preemptive; 1738133427Sjeff int canmigrate; 1739121790Sjeff int class; 1740109864Sjeff 1741139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1742139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1743139316Sjeff curthread->td_proc->p_comm); 1744121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1745121127Sjeff ke = td->td_kse; 1746121127Sjeff kg = td->td_ksegrp; 1747139334Sjeff canmigrate = 1; 1748139334Sjeff preemptive = !(flags & SRQ_YIELDING); 1749139334Sjeff class = PRI_BASE(kg->kg_pri_class); 1750139334Sjeff kseq = KSEQ_SELF(); 1751139334Sjeff if ((ke->ke_flags & KEF_INTERNAL) == 0) 1752139334Sjeff SLOT_USE(td->td_ksegrp); 1753139334Sjeff ke->ke_flags &= ~KEF_INTERNAL; 1754139334Sjeff#ifdef SMP 1755138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1756139334Sjeff if (ke->ke_flags & KEF_REMOVED) 1757138802Sjeff ke->ke_flags &= ~KEF_REMOVED; 1758121790Sjeff return; 1759138802Sjeff } 1760139334Sjeff canmigrate = KSE_CAN_MIGRATE(ke); 1761139334Sjeff#endif 1762109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 1763110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 1764109864Sjeff ke->ke_proc->p_comm)); 1765109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1766110267Sjeff ("sched_add: process swapped out")); 1767113387Sjeff KASSERT(ke->ke_runq == NULL, 1768113387Sjeff ("sched_add: KSE %p is still assigned to a run queue", ke)); 1769121790Sjeff switch (class) { 1770112994Sjeff case PRI_ITHD: 1771112994Sjeff case PRI_REALTIME: 1772113357Sjeff ke->ke_runq = kseq->ksq_curr; 1773113357Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1774139334Sjeff if (canmigrate) 1775139334Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1776112994Sjeff break; 1777112994Sjeff case PRI_TIMESHARE: 1778113387Sjeff if (SCHED_CURR(kg, ke)) 1779113387Sjeff ke->ke_runq = kseq->ksq_curr; 1780113387Sjeff else 1781113387Sjeff ke->ke_runq = kseq->ksq_next; 1782113357Sjeff break; 1783112994Sjeff case PRI_IDLE: 1784113357Sjeff /* 1785113357Sjeff * This is for priority prop. 1786113357Sjeff */ 1787121605Sjeff if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1788113357Sjeff ke->ke_runq = kseq->ksq_curr; 1789113357Sjeff else 1790113357Sjeff ke->ke_runq = &kseq->ksq_idle; 1791113357Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1792112994Sjeff break; 1793113357Sjeff default: 1794121868Sjeff panic("Unknown pri class."); 1795113357Sjeff break; 1796112994Sjeff } 1797121790Sjeff#ifdef SMP 1798133427Sjeff /* 1799133427Sjeff * Don't migrate running threads here. Force the long term balancer 1800133427Sjeff * to do it. 1801133427Sjeff */ 1802133555Sjeff if (ke->ke_flags & KEF_HOLD) { 1803133555Sjeff ke->ke_flags &= ~KEF_HOLD; 1804133427Sjeff canmigrate = 0; 1805133555Sjeff } 1806133427Sjeff /* 1807133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1808133427Sjeff */ 1809133427Sjeff if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1810123529Sjeff ke->ke_runq = NULL; 1811123433Sjeff kseq_notify(ke, ke->ke_cpu); 1812123433Sjeff return; 1813123433Sjeff } 1814121790Sjeff /* 1815123685Sjeff * If we had been idle, clear our bit in the group and potentially 1816123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1817121790Sjeff */ 1818123433Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1819123433Sjeff (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1820121790Sjeff /* 1821123433Sjeff * Check to see if our group is unidling, and if so, remove it 1822123433Sjeff * from the global idle mask. 1823121790Sjeff */ 1824123433Sjeff if (kseq->ksq_group->ksg_idlemask == 1825123433Sjeff kseq->ksq_group->ksg_cpumask) 1826123433Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1827123433Sjeff /* 1828123433Sjeff * Now remove ourselves from the group specific idle mask. 1829123433Sjeff */ 1830123433Sjeff kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1831139334Sjeff } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1832123685Sjeff if (kseq_transfer(kseq, ke, class)) 1833123685Sjeff return; 1834133427Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1835121790Sjeff#endif 1836133555Sjeff if (td->td_priority < curthread->td_priority && 1837133555Sjeff ke->ke_runq == kseq->ksq_curr) 1838133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1839131839Sjhb if (preemptive && maybe_preempt(td)) 1840131481Sjhb return; 1841109864Sjeff ke->ke_state = KES_ONRUNQ; 1842109864Sjeff 1843139334Sjeff kseq_runq_add(kseq, ke, flags); 1844122744Sjeff kseq_load_add(kseq, ke); 1845109864Sjeff} 1846109864Sjeff 1847109864Sjeffvoid 1848121127Sjeffsched_rem(struct thread *td) 1849109864Sjeff{ 1850113357Sjeff struct kseq *kseq; 1851121127Sjeff struct kse *ke; 1852113357Sjeff 1853139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1854139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1855139316Sjeff curthread->td_proc->p_comm); 1856139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1857139334Sjeff ke = td->td_kse; 1858139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1859138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1860138802Sjeff ke->ke_flags |= KEF_REMOVED; 1861121790Sjeff return; 1862138802Sjeff } 1863124958Sjeff KASSERT((ke->ke_state == KES_ONRUNQ), 1864124958Sjeff ("sched_rem: KSE not on run queue")); 1865109864Sjeff 1866109864Sjeff ke->ke_state = KES_THREAD; 1867113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1868122744Sjeff kseq_runq_rem(kseq, ke); 1869122744Sjeff kseq_load_rem(kseq, ke); 1870109864Sjeff} 1871109864Sjeff 1872109864Sjefffixpt_t 1873121127Sjeffsched_pctcpu(struct thread *td) 1874109864Sjeff{ 1875109864Sjeff fixpt_t pctcpu; 1876121127Sjeff struct kse *ke; 1877109864Sjeff 1878109864Sjeff pctcpu = 0; 1879121127Sjeff ke = td->td_kse; 1880121290Sjeff if (ke == NULL) 1881121290Sjeff return (0); 1882109864Sjeff 1883115998Sjeff mtx_lock_spin(&sched_lock); 1884109864Sjeff if (ke->ke_ticks) { 1885109864Sjeff int rtick; 1886109864Sjeff 1887116365Sjeff /* 1888116365Sjeff * Don't update more frequently than twice a second. Allowing 1889116365Sjeff * this causes the cpu usage to decay away too quickly due to 1890116365Sjeff * rounding errors. 1891116365Sjeff */ 1892123435Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1893123435Sjeff ke->ke_ltick < (ticks - (hz / 2))) 1894116365Sjeff sched_pctcpu_update(ke); 1895109864Sjeff /* How many rtick per second ? */ 1896116365Sjeff rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1897110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1898109864Sjeff } 1899109864Sjeff 1900109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1901113865Sjhb mtx_unlock_spin(&sched_lock); 1902109864Sjeff 1903109864Sjeff return (pctcpu); 1904109864Sjeff} 1905109864Sjeff 1906122038Sjeffvoid 1907122038Sjeffsched_bind(struct thread *td, int cpu) 1908122038Sjeff{ 1909122038Sjeff struct kse *ke; 1910122038Sjeff 1911122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1912122038Sjeff ke = td->td_kse; 1913122038Sjeff ke->ke_flags |= KEF_BOUND; 1914123433Sjeff#ifdef SMP 1915123433Sjeff if (PCPU_GET(cpuid) == cpu) 1916122038Sjeff return; 1917122038Sjeff /* sched_rem without the runq_remove */ 1918122038Sjeff ke->ke_state = KES_THREAD; 1919122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1920122038Sjeff kseq_notify(ke, cpu); 1921122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1922131527Sphk mi_switch(SW_VOL, NULL); 1923122038Sjeff#endif 1924122038Sjeff} 1925122038Sjeff 1926122038Sjeffvoid 1927122038Sjeffsched_unbind(struct thread *td) 1928122038Sjeff{ 1929122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1930122038Sjeff td->td_kse->ke_flags &= ~KEF_BOUND; 1931122038Sjeff} 1932122038Sjeff 1933109864Sjeffint 1934145256Sjkoshysched_is_bound(struct thread *td) 1935145256Sjkoshy{ 1936145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1937145256Sjkoshy return (td->td_kse->ke_flags & KEF_BOUND); 1938145256Sjkoshy} 1939145256Sjkoshy 1940145256Sjkoshyint 1941125289Sjeffsched_load(void) 1942125289Sjeff{ 1943125289Sjeff#ifdef SMP 1944125289Sjeff int total; 1945125289Sjeff int i; 1946125289Sjeff 1947125289Sjeff total = 0; 1948125289Sjeff for (i = 0; i <= ksg_maxid; i++) 1949125289Sjeff total += KSEQ_GROUP(i)->ksg_load; 1950125289Sjeff return (total); 1951125289Sjeff#else 1952125289Sjeff return (KSEQ_SELF()->ksq_sysload); 1953125289Sjeff#endif 1954125289Sjeff} 1955125289Sjeff 1956125289Sjeffint 1957109864Sjeffsched_sizeof_ksegrp(void) 1958109864Sjeff{ 1959109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1960109864Sjeff} 1961109864Sjeff 1962109864Sjeffint 1963109864Sjeffsched_sizeof_proc(void) 1964109864Sjeff{ 1965109864Sjeff return (sizeof(struct proc)); 1966109864Sjeff} 1967109864Sjeff 1968109864Sjeffint 1969109864Sjeffsched_sizeof_thread(void) 1970109864Sjeff{ 1971109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1972109864Sjeff} 1973134791Sjulian#define KERN_SWITCH_INCLUDE 1 1974134791Sjulian#include "kern/kern_switch.c" 1975