sched_ule.c revision 147565
1109864Sjeff/*- 2146955Sjeff * Copyright (c) 2002-2005, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 147565 2005-06-24 00:16:57Z peter $"); 29116182Sobrien 30147565Speter#include "opt_hwpmc_hooks.h" 31147565Speter#include "opt_sched.h" 32134649Sscottl 33134791Sjulian#define kse td_sched 34134791Sjulian 35109864Sjeff#include <sys/param.h> 36109864Sjeff#include <sys/systm.h> 37131929Smarcel#include <sys/kdb.h> 38109864Sjeff#include <sys/kernel.h> 39109864Sjeff#include <sys/ktr.h> 40109864Sjeff#include <sys/lock.h> 41109864Sjeff#include <sys/mutex.h> 42109864Sjeff#include <sys/proc.h> 43112966Sjeff#include <sys/resource.h> 44122038Sjeff#include <sys/resourcevar.h> 45109864Sjeff#include <sys/sched.h> 46109864Sjeff#include <sys/smp.h> 47109864Sjeff#include <sys/sx.h> 48109864Sjeff#include <sys/sysctl.h> 49109864Sjeff#include <sys/sysproto.h> 50139453Sjhb#include <sys/turnstile.h> 51109864Sjeff#include <sys/vmmeter.h> 52109864Sjeff#ifdef KTRACE 53109864Sjeff#include <sys/uio.h> 54109864Sjeff#include <sys/ktrace.h> 55109864Sjeff#endif 56109864Sjeff 57145256Sjkoshy#ifdef HWPMC_HOOKS 58145256Sjkoshy#include <sys/pmckern.h> 59145256Sjkoshy#endif 60145256Sjkoshy 61109864Sjeff#include <machine/cpu.h> 62121790Sjeff#include <machine/smp.h> 63109864Sjeff 64109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 65109864Sjeff/* XXX This is bogus compatability crap for ps */ 66109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 67109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 68109864Sjeff 69109864Sjeffstatic void sched_setup(void *dummy); 70109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 71109864Sjeff 72132589Sscottlstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 73113357Sjeff 74132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 75132589Sscottl "Scheduler name"); 76130881Sscottl 77113357Sjeffstatic int slice_min = 1; 78113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 79113357Sjeff 80116365Sjeffstatic int slice_max = 10; 81113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 82113357Sjeff 83111857Sjeffint realstathz; 84113357Sjeffint tickincr = 1; 85111857Sjeff 86109864Sjeff/* 87146954Sjeff * The following datastructures are allocated within their parent structure 88146954Sjeff * but are scheduler specific. 89134791Sjulian */ 90146954Sjeff/* 91146954Sjeff * The schedulable entity that can be given a context to run. A process may 92146954Sjeff * have several of these. 93146954Sjeff */ 94134791Sjulianstruct kse { 95134791Sjulian TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 96134791Sjulian int ke_flags; /* (j) KEF_* flags. */ 97134791Sjulian struct thread *ke_thread; /* (*) Active associated thread. */ 98134791Sjulian fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 99134791Sjulian char ke_rqindex; /* (j) Run queue index. */ 100134791Sjulian enum { 101134791Sjulian KES_THREAD = 0x0, /* slaved to thread state */ 102134791Sjulian KES_ONRUNQ 103134791Sjulian } ke_state; /* (j) thread sched specific status. */ 104134791Sjulian int ke_slptime; 105134791Sjulian int ke_slice; 106134791Sjulian struct runq *ke_runq; 107134791Sjulian u_char ke_cpu; /* CPU that we have affinity for. */ 108134791Sjulian /* The following variables are only used for pctcpu calculation */ 109134791Sjulian int ke_ltick; /* Last tick that we were running on */ 110134791Sjulian int ke_ftick; /* First tick that we were running on */ 111134791Sjulian int ke_ticks; /* Tick count */ 112134791Sjulian 113134791Sjulian}; 114146954Sjeff#define td_kse td_sched 115134791Sjulian#define td_slptime td_kse->ke_slptime 116134791Sjulian#define ke_proc ke_thread->td_proc 117134791Sjulian#define ke_ksegrp ke_thread->td_ksegrp 118146954Sjeff#define ke_assign ke_procq.tqe_next 119134791Sjulian/* flags kept in ke_flags */ 120139334Sjeff#define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 121139334Sjeff#define KEF_BOUND 0x0002 /* Thread can not migrate. */ 122139334Sjeff#define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 123139334Sjeff#define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 124139334Sjeff#define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 125146954Sjeff#define KEF_INTERNAL 0x0020 /* Thread added due to migration. */ 126146954Sjeff#define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 127146954Sjeff#define KEF_EXIT 0x04000 /* Thread is being killed. */ 128121790Sjeff 129109864Sjeffstruct kg_sched { 130134791Sjulian struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 131134791Sjulian /* the system scheduler */ 132110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 133110645Sjeff int skg_runtime; /* Number of ticks we were running */ 134134791Sjulian int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 135134791Sjulian int skg_concurrency; /* (j) Num threads requested in group.*/ 136109864Sjeff}; 137134791Sjulian#define kg_last_assigned kg_sched->skg_last_assigned 138134791Sjulian#define kg_avail_opennings kg_sched->skg_avail_opennings 139134791Sjulian#define kg_concurrency kg_sched->skg_concurrency 140134791Sjulian#define kg_runtime kg_sched->skg_runtime 141134791Sjulian#define kg_slptime kg_sched->skg_slptime 142109864Sjeff 143146954Sjeff#define SLOT_RELEASE(kg) (kg)->kg_avail_opennings++ 144146954Sjeff#define SLOT_USE(kg) (kg)->kg_avail_opennings-- 145109864Sjeff 146134791Sjulianstatic struct kse kse0; 147134791Sjulianstatic struct kg_sched kg_sched0; 148109864Sjeff 149109864Sjeff/* 150116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 151116642Sjeff * give lower(better) priorities to kse groups that use less CPU. The nice 152116642Sjeff * value is then directly added to this to allow nice to have some effect 153116642Sjeff * on latency. 154111857Sjeff * 155111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 156116642Sjeff * PRI_NRESV: Number of nice values. 157111857Sjeff * PRI_BASE: The start of the dynamic range. 158109864Sjeff */ 159111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 160121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 161121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 162116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 163113357Sjeff#define SCHED_PRI_INTERACT(score) \ 164116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 165109864Sjeff 166109864Sjeff/* 167111857Sjeff * These determine the interactivity of a process. 168109864Sjeff * 169110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 170110645Sjeff * before throttling back. 171121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 172116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 173111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 174109864Sjeff */ 175121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 176121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 177116365Sjeff#define SCHED_INTERACT_MAX (100) 178116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 179121126Sjeff#define SCHED_INTERACT_THRESH (30) 180111857Sjeff 181109864Sjeff/* 182109864Sjeff * These parameters and macros determine the size of the time slice that is 183109864Sjeff * granted to each thread. 184109864Sjeff * 185109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 186109864Sjeff * SLICE_MAX: Maximum time slice granted. 187109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 188112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 189112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 190121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 191109864Sjeff */ 192113357Sjeff#define SCHED_SLICE_MIN (slice_min) 193113357Sjeff#define SCHED_SLICE_MAX (slice_max) 194125299Sjeff#define SCHED_SLICE_INTERACTIVE (slice_max) 195121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 196111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 197109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 198112966Sjeff#define SCHED_SLICE_NICE(nice) \ 199121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 200109864Sjeff 201109864Sjeff/* 202134791Sjulian * This macro determines whether or not the thread belongs on the current or 203109864Sjeff * next run queue. 204109864Sjeff */ 205113357Sjeff#define SCHED_INTERACTIVE(kg) \ 206113357Sjeff (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 207113417Sjeff#define SCHED_CURR(kg, ke) \ 208139453Sjhb ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) 209109864Sjeff 210109864Sjeff/* 211109864Sjeff * Cpu percentage computation macros and defines. 212109864Sjeff * 213109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 214109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 215109864Sjeff */ 216109864Sjeff 217112971Sjeff#define SCHED_CPU_TIME 10 218109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 219109864Sjeff 220109864Sjeff/* 221113357Sjeff * kseq - per processor runqs and statistics. 222109864Sjeff */ 223109864Sjeffstruct kseq { 224113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 225113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 226113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 227113357Sjeff struct runq *ksq_curr; /* Current queue. */ 228121896Sjeff int ksq_load_timeshare; /* Load for timeshare. */ 229113357Sjeff int ksq_load; /* Aggregate load. */ 230121869Sjeff short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 231113357Sjeff short ksq_nicemin; /* Least nice. */ 232110267Sjeff#ifdef SMP 233123433Sjeff int ksq_transferable; 234123433Sjeff LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 235123433Sjeff struct kseq_group *ksq_group; /* Our processor group. */ 236123433Sjeff volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 237125289Sjeff#else 238125289Sjeff int ksq_sysload; /* For loadavg, !ITHD load. */ 239110267Sjeff#endif 240109864Sjeff}; 241109864Sjeff 242123433Sjeff#ifdef SMP 243109864Sjeff/* 244123433Sjeff * kseq groups are groups of processors which can cheaply share threads. When 245123433Sjeff * one processor in the group goes idle it will check the runqs of the other 246123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 247123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 248123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 249123433Sjeff * load balancer. 250123433Sjeff */ 251123433Sjeffstruct kseq_group { 252123433Sjeff int ksg_cpus; /* Count of CPUs in this kseq group. */ 253127498Smarcel cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 254127498Smarcel cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 255127498Smarcel cpumask_t ksg_mask; /* Bit mask for first cpu. */ 256123487Sjeff int ksg_load; /* Total load of this group. */ 257123433Sjeff int ksg_transferable; /* Transferable load of this group. */ 258123433Sjeff LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 259123433Sjeff}; 260123433Sjeff#endif 261123433Sjeff 262123433Sjeff/* 263109864Sjeff * One kse queue per processor. 264109864Sjeff */ 265110028Sjeff#ifdef SMP 266127498Smarcelstatic cpumask_t kseq_idle; 267123487Sjeffstatic int ksg_maxid; 268121790Sjeffstatic struct kseq kseq_cpu[MAXCPU]; 269123433Sjeffstatic struct kseq_group kseq_groups[MAXCPU]; 270129982Sjeffstatic int bal_tick; 271129982Sjeffstatic int gbal_tick; 272139334Sjeffstatic int balance_groups; 273129982Sjeff 274123433Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 275123433Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 276123487Sjeff#define KSEQ_ID(x) ((x) - kseq_cpu) 277123487Sjeff#define KSEQ_GROUP(x) (&kseq_groups[(x)]) 278123433Sjeff#else /* !SMP */ 279121790Sjeffstatic struct kseq kseq_cpu; 280129982Sjeff 281110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 282110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 283110028Sjeff#endif 284109864Sjeff 285146954Sjeffstatic void slot_fill(struct ksegrp *); 286134791Sjulianstatic struct kse *sched_choose(void); /* XXX Should be thread * */ 287146954Sjeffstatic void sched_slice(struct kse *); 288146954Sjeffstatic void sched_priority(struct ksegrp *); 289146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 290146954Sjeffstatic int sched_interact_score(struct ksegrp *); 291146954Sjeffstatic void sched_interact_update(struct ksegrp *); 292146954Sjeffstatic void sched_interact_fork(struct ksegrp *); 293146954Sjeffstatic void sched_pctcpu_update(struct kse *); 294109864Sjeff 295110267Sjeff/* Operations on per processor queues */ 296146954Sjeffstatic struct kse * kseq_choose(struct kseq *); 297146954Sjeffstatic void kseq_setup(struct kseq *); 298146954Sjeffstatic void kseq_load_add(struct kseq *, struct kse *); 299146954Sjeffstatic void kseq_load_rem(struct kseq *, struct kse *); 300146954Sjeffstatic __inline void kseq_runq_add(struct kseq *, struct kse *, int); 301146954Sjeffstatic __inline void kseq_runq_rem(struct kseq *, struct kse *); 302146954Sjeffstatic void kseq_nice_add(struct kseq *, int); 303146954Sjeffstatic void kseq_nice_rem(struct kseq *, int); 304113660Sjeffvoid kseq_print(int cpu); 305110267Sjeff#ifdef SMP 306146954Sjeffstatic int kseq_transfer(struct kseq *, struct kse *, int); 307146954Sjeffstatic struct kse *runq_steal(struct runq *); 308129982Sjeffstatic void sched_balance(void); 309129982Sjeffstatic void sched_balance_groups(void); 310146954Sjeffstatic void sched_balance_group(struct kseq_group *); 311146954Sjeffstatic void sched_balance_pair(struct kseq *, struct kseq *); 312146954Sjeffstatic void kseq_move(struct kseq *, int); 313146954Sjeffstatic int kseq_idled(struct kseq *); 314146954Sjeffstatic void kseq_notify(struct kse *, int); 315121790Sjeffstatic void kseq_assign(struct kseq *); 316146954Sjeffstatic struct kse *kseq_steal(struct kseq *, int); 317139334Sjeff#define KSE_CAN_MIGRATE(ke) \ 318135076Sscottl ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 319121790Sjeff#endif 320110028Sjeff 321113357Sjeffvoid 322113660Sjeffkseq_print(int cpu) 323110267Sjeff{ 324113660Sjeff struct kseq *kseq; 325113357Sjeff int i; 326112994Sjeff 327113660Sjeff kseq = KSEQ_CPU(cpu); 328112994Sjeff 329113357Sjeff printf("kseq:\n"); 330113357Sjeff printf("\tload: %d\n", kseq->ksq_load); 331122744Sjeff printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 332121896Sjeff#ifdef SMP 333123433Sjeff printf("\tload transferable: %d\n", kseq->ksq_transferable); 334121896Sjeff#endif 335113357Sjeff printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 336113357Sjeff printf("\tnice counts:\n"); 337121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 338113357Sjeff if (kseq->ksq_nice[i]) 339113357Sjeff printf("\t\t%d = %d\n", 340113357Sjeff i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 341113357Sjeff} 342112994Sjeff 343122744Sjeffstatic __inline void 344139334Sjeffkseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 345122744Sjeff{ 346122744Sjeff#ifdef SMP 347139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 348123433Sjeff kseq->ksq_transferable++; 349123433Sjeff kseq->ksq_group->ksg_transferable++; 350133427Sjeff ke->ke_flags |= KEF_XFERABLE; 351123433Sjeff } 352122744Sjeff#endif 353139334Sjeff runq_add(ke->ke_runq, ke, flags); 354122744Sjeff} 355122744Sjeff 356122744Sjeffstatic __inline void 357122744Sjeffkseq_runq_rem(struct kseq *kseq, struct kse *ke) 358122744Sjeff{ 359122744Sjeff#ifdef SMP 360133427Sjeff if (ke->ke_flags & KEF_XFERABLE) { 361123433Sjeff kseq->ksq_transferable--; 362123433Sjeff kseq->ksq_group->ksg_transferable--; 363133427Sjeff ke->ke_flags &= ~KEF_XFERABLE; 364123433Sjeff } 365122744Sjeff#endif 366122744Sjeff runq_remove(ke->ke_runq, ke); 367122744Sjeff} 368122744Sjeff 369113357Sjeffstatic void 370122744Sjeffkseq_load_add(struct kseq *kseq, struct kse *ke) 371113357Sjeff{ 372121896Sjeff int class; 373115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 374121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 375121896Sjeff if (class == PRI_TIMESHARE) 376121896Sjeff kseq->ksq_load_timeshare++; 377113357Sjeff kseq->ksq_load++; 378139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 379128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 380123487Sjeff#ifdef SMP 381123487Sjeff kseq->ksq_group->ksg_load++; 382125289Sjeff#else 383125289Sjeff kseq->ksq_sysload++; 384123487Sjeff#endif 385113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 386130551Sjulian kseq_nice_add(kseq, ke->ke_proc->p_nice); 387110267Sjeff} 388113357Sjeff 389112994Sjeffstatic void 390122744Sjeffkseq_load_rem(struct kseq *kseq, struct kse *ke) 391110267Sjeff{ 392121896Sjeff int class; 393115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 394121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 395121896Sjeff if (class == PRI_TIMESHARE) 396121896Sjeff kseq->ksq_load_timeshare--; 397128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 398123487Sjeff#ifdef SMP 399123487Sjeff kseq->ksq_group->ksg_load--; 400125289Sjeff#else 401125289Sjeff kseq->ksq_sysload--; 402123487Sjeff#endif 403113357Sjeff kseq->ksq_load--; 404139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 405113357Sjeff ke->ke_runq = NULL; 406113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 407130551Sjulian kseq_nice_rem(kseq, ke->ke_proc->p_nice); 408110267Sjeff} 409110267Sjeff 410113357Sjeffstatic void 411113357Sjeffkseq_nice_add(struct kseq *kseq, int nice) 412110267Sjeff{ 413115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 414113357Sjeff /* Normalize to zero. */ 415113357Sjeff kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 416121896Sjeff if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 417113357Sjeff kseq->ksq_nicemin = nice; 418110267Sjeff} 419110267Sjeff 420113357Sjeffstatic void 421113357Sjeffkseq_nice_rem(struct kseq *kseq, int nice) 422110267Sjeff{ 423113357Sjeff int n; 424113357Sjeff 425115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 426113357Sjeff /* Normalize to zero. */ 427113357Sjeff n = nice + SCHED_PRI_NHALF; 428113357Sjeff kseq->ksq_nice[n]--; 429113357Sjeff KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 430113357Sjeff 431113357Sjeff /* 432113357Sjeff * If this wasn't the smallest nice value or there are more in 433113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 434113357Sjeff * the smallest nice. 435113357Sjeff */ 436113357Sjeff if (nice != kseq->ksq_nicemin || 437113357Sjeff kseq->ksq_nice[n] != 0 || 438121896Sjeff kseq->ksq_load_timeshare == 0) 439113357Sjeff return; 440113357Sjeff 441121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 442113357Sjeff if (kseq->ksq_nice[n]) { 443113357Sjeff kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 444113357Sjeff return; 445113357Sjeff } 446110267Sjeff} 447110267Sjeff 448113357Sjeff#ifdef SMP 449116069Sjeff/* 450122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 451116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 452116069Sjeff * by migrating some processes. 453116069Sjeff * 454116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 455116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 456116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 457116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 458116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 459116069Sjeff * 460116069Sjeff * It could be improved by considering the priorities and slices assigned to 461116069Sjeff * each task prior to balancing them. There are many pathological cases with 462116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 463116069Sjeff * 464116069Sjeff */ 465121790Sjeffstatic void 466129982Sjeffsched_balance(void) 467116069Sjeff{ 468123487Sjeff struct kseq_group *high; 469123487Sjeff struct kseq_group *low; 470123487Sjeff struct kseq_group *ksg; 471123487Sjeff int cnt; 472123487Sjeff int i; 473123487Sjeff 474139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 475123487Sjeff if (smp_started == 0) 476139334Sjeff return; 477123487Sjeff low = high = NULL; 478123487Sjeff i = random() % (ksg_maxid + 1); 479123487Sjeff for (cnt = 0; cnt <= ksg_maxid; cnt++) { 480123487Sjeff ksg = KSEQ_GROUP(i); 481123487Sjeff /* 482123487Sjeff * Find the CPU with the highest load that has some 483123487Sjeff * threads to transfer. 484123487Sjeff */ 485123487Sjeff if ((high == NULL || ksg->ksg_load > high->ksg_load) 486123487Sjeff && ksg->ksg_transferable) 487123487Sjeff high = ksg; 488123487Sjeff if (low == NULL || ksg->ksg_load < low->ksg_load) 489123487Sjeff low = ksg; 490123487Sjeff if (++i > ksg_maxid) 491123487Sjeff i = 0; 492123487Sjeff } 493123487Sjeff if (low != NULL && high != NULL && high != low) 494123487Sjeff sched_balance_pair(LIST_FIRST(&high->ksg_members), 495123487Sjeff LIST_FIRST(&low->ksg_members)); 496123487Sjeff} 497123487Sjeff 498123487Sjeffstatic void 499129982Sjeffsched_balance_groups(void) 500123487Sjeff{ 501123487Sjeff int i; 502123487Sjeff 503139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 504129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 505123487Sjeff if (smp_started) 506123487Sjeff for (i = 0; i <= ksg_maxid; i++) 507123487Sjeff sched_balance_group(KSEQ_GROUP(i)); 508123487Sjeff} 509123487Sjeff 510123487Sjeffstatic void 511123487Sjeffsched_balance_group(struct kseq_group *ksg) 512123487Sjeff{ 513116069Sjeff struct kseq *kseq; 514123487Sjeff struct kseq *high; 515123487Sjeff struct kseq *low; 516123487Sjeff int load; 517123487Sjeff 518123487Sjeff if (ksg->ksg_transferable == 0) 519123487Sjeff return; 520123487Sjeff low = NULL; 521123487Sjeff high = NULL; 522123487Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 523123487Sjeff load = kseq->ksq_load; 524123487Sjeff if (high == NULL || load > high->ksq_load) 525123487Sjeff high = kseq; 526123487Sjeff if (low == NULL || load < low->ksq_load) 527123487Sjeff low = kseq; 528123487Sjeff } 529123487Sjeff if (high != NULL && low != NULL && high != low) 530123487Sjeff sched_balance_pair(high, low); 531123487Sjeff} 532123487Sjeff 533123487Sjeffstatic void 534123487Sjeffsched_balance_pair(struct kseq *high, struct kseq *low) 535123487Sjeff{ 536123433Sjeff int transferable; 537116069Sjeff int high_load; 538116069Sjeff int low_load; 539116069Sjeff int move; 540116069Sjeff int diff; 541116069Sjeff int i; 542116069Sjeff 543116069Sjeff /* 544123433Sjeff * If we're transfering within a group we have to use this specific 545123433Sjeff * kseq's transferable count, otherwise we can steal from other members 546123433Sjeff * of the group. 547123433Sjeff */ 548123487Sjeff if (high->ksq_group == low->ksq_group) { 549123487Sjeff transferable = high->ksq_transferable; 550123487Sjeff high_load = high->ksq_load; 551123487Sjeff low_load = low->ksq_load; 552123487Sjeff } else { 553123487Sjeff transferable = high->ksq_group->ksg_transferable; 554123487Sjeff high_load = high->ksq_group->ksg_load; 555123487Sjeff low_load = low->ksq_group->ksg_load; 556123487Sjeff } 557123433Sjeff if (transferable == 0) 558123487Sjeff return; 559123433Sjeff /* 560122744Sjeff * Determine what the imbalance is and then adjust that to how many 561123433Sjeff * kses we actually have to give up (transferable). 562122744Sjeff */ 563123487Sjeff diff = high_load - low_load; 564116069Sjeff move = diff / 2; 565116069Sjeff if (diff & 0x1) 566116069Sjeff move++; 567123433Sjeff move = min(move, transferable); 568116069Sjeff for (i = 0; i < move; i++) 569123487Sjeff kseq_move(high, KSEQ_ID(low)); 570116069Sjeff return; 571116069Sjeff} 572116069Sjeff 573121790Sjeffstatic void 574116069Sjeffkseq_move(struct kseq *from, int cpu) 575116069Sjeff{ 576123433Sjeff struct kseq *kseq; 577123433Sjeff struct kseq *to; 578116069Sjeff struct kse *ke; 579116069Sjeff 580123433Sjeff kseq = from; 581123433Sjeff to = KSEQ_CPU(cpu); 582123433Sjeff ke = kseq_steal(kseq, 1); 583123433Sjeff if (ke == NULL) { 584123433Sjeff struct kseq_group *ksg; 585123433Sjeff 586123433Sjeff ksg = kseq->ksq_group; 587123433Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 588123433Sjeff if (kseq == from || kseq->ksq_transferable == 0) 589123433Sjeff continue; 590123433Sjeff ke = kseq_steal(kseq, 1); 591123433Sjeff break; 592123433Sjeff } 593123433Sjeff if (ke == NULL) 594123433Sjeff panic("kseq_move: No KSEs available with a " 595123433Sjeff "transferable count of %d\n", 596123433Sjeff ksg->ksg_transferable); 597123433Sjeff } 598123433Sjeff if (kseq == to) 599123433Sjeff return; 600116069Sjeff ke->ke_state = KES_THREAD; 601123433Sjeff kseq_runq_rem(kseq, ke); 602123433Sjeff kseq_load_rem(kseq, ke); 603121923Sjeff kseq_notify(ke, cpu); 604116069Sjeff} 605110267Sjeff 606123433Sjeffstatic int 607123433Sjeffkseq_idled(struct kseq *kseq) 608121790Sjeff{ 609123433Sjeff struct kseq_group *ksg; 610123433Sjeff struct kseq *steal; 611123433Sjeff struct kse *ke; 612123433Sjeff 613123433Sjeff ksg = kseq->ksq_group; 614123433Sjeff /* 615123433Sjeff * If we're in a cpu group, try and steal kses from another cpu in 616123433Sjeff * the group before idling. 617123433Sjeff */ 618123433Sjeff if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 619123433Sjeff LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 620123433Sjeff if (steal == kseq || steal->ksq_transferable == 0) 621123433Sjeff continue; 622123433Sjeff ke = kseq_steal(steal, 0); 623123433Sjeff if (ke == NULL) 624123433Sjeff continue; 625123433Sjeff ke->ke_state = KES_THREAD; 626123433Sjeff kseq_runq_rem(steal, ke); 627123433Sjeff kseq_load_rem(steal, ke); 628123433Sjeff ke->ke_cpu = PCPU_GET(cpuid); 629139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 630139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 631123433Sjeff return (0); 632123433Sjeff } 633123433Sjeff } 634123433Sjeff /* 635123433Sjeff * We only set the idled bit when all of the cpus in the group are 636123433Sjeff * idle. Otherwise we could get into a situation where a KSE bounces 637123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 638123433Sjeff */ 639123433Sjeff ksg->ksg_idlemask |= PCPU_GET(cpumask); 640123433Sjeff if (ksg->ksg_idlemask != ksg->ksg_cpumask) 641123433Sjeff return (1); 642123433Sjeff atomic_set_int(&kseq_idle, ksg->ksg_mask); 643123433Sjeff return (1); 644121790Sjeff} 645121790Sjeff 646121790Sjeffstatic void 647121790Sjeffkseq_assign(struct kseq *kseq) 648121790Sjeff{ 649121790Sjeff struct kse *nke; 650121790Sjeff struct kse *ke; 651121790Sjeff 652121790Sjeff do { 653132776Skan *(volatile struct kse **)&ke = kseq->ksq_assigned; 654121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 655121790Sjeff for (; ke != NULL; ke = nke) { 656121790Sjeff nke = ke->ke_assign; 657139334Sjeff kseq->ksq_group->ksg_load--; 658139334Sjeff kseq->ksq_load--; 659121790Sjeff ke->ke_flags &= ~KEF_ASSIGNED; 660139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 661139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 662121790Sjeff } 663121790Sjeff} 664121790Sjeff 665121790Sjeffstatic void 666121790Sjeffkseq_notify(struct kse *ke, int cpu) 667121790Sjeff{ 668121790Sjeff struct kseq *kseq; 669121790Sjeff struct thread *td; 670121790Sjeff struct pcpu *pcpu; 671139334Sjeff int class; 672133427Sjeff int prio; 673121790Sjeff 674139334Sjeff kseq = KSEQ_CPU(cpu); 675139334Sjeff /* XXX */ 676139334Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 677139334Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 678139334Sjeff (kseq_idle & kseq->ksq_group->ksg_mask)) 679139334Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 680139334Sjeff kseq->ksq_group->ksg_load++; 681139334Sjeff kseq->ksq_load++; 682123529Sjeff ke->ke_cpu = cpu; 683121790Sjeff ke->ke_flags |= KEF_ASSIGNED; 684133427Sjeff prio = ke->ke_thread->td_priority; 685121790Sjeff 686121790Sjeff /* 687121790Sjeff * Place a KSE on another cpu's queue and force a resched. 688121790Sjeff */ 689121790Sjeff do { 690132776Skan *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 691121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 692133427Sjeff /* 693133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 694133427Sjeff * on a thread that is switched out before the IPI is delivered. This 695133427Sjeff * would lead us to miss the resched. This will be a problem once 696133427Sjeff * sched_lock is pushed down. 697133427Sjeff */ 698121790Sjeff pcpu = pcpu_find(cpu); 699121790Sjeff td = pcpu->pc_curthread; 700121790Sjeff if (ke->ke_thread->td_priority < td->td_priority || 701121790Sjeff td == pcpu->pc_idlethread) { 702121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 703121790Sjeff ipi_selected(1 << cpu, IPI_AST); 704121790Sjeff } 705121790Sjeff} 706121790Sjeff 707121790Sjeffstatic struct kse * 708121790Sjeffrunq_steal(struct runq *rq) 709121790Sjeff{ 710121790Sjeff struct rqhead *rqh; 711121790Sjeff struct rqbits *rqb; 712121790Sjeff struct kse *ke; 713121790Sjeff int word; 714121790Sjeff int bit; 715121790Sjeff 716121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 717121790Sjeff rqb = &rq->rq_status; 718121790Sjeff for (word = 0; word < RQB_LEN; word++) { 719121790Sjeff if (rqb->rqb_bits[word] == 0) 720121790Sjeff continue; 721121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 722123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 723121790Sjeff continue; 724121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 725121790Sjeff TAILQ_FOREACH(ke, rqh, ke_procq) { 726139334Sjeff if (KSE_CAN_MIGRATE(ke)) 727121790Sjeff return (ke); 728121790Sjeff } 729121790Sjeff } 730121790Sjeff } 731121790Sjeff return (NULL); 732121790Sjeff} 733121790Sjeff 734121790Sjeffstatic struct kse * 735123433Sjeffkseq_steal(struct kseq *kseq, int stealidle) 736121790Sjeff{ 737121790Sjeff struct kse *ke; 738121790Sjeff 739123433Sjeff /* 740123433Sjeff * Steal from next first to try to get a non-interactive task that 741123433Sjeff * may not have run for a while. 742123433Sjeff */ 743123433Sjeff if ((ke = runq_steal(kseq->ksq_next)) != NULL) 744123433Sjeff return (ke); 745121790Sjeff if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 746121790Sjeff return (ke); 747123433Sjeff if (stealidle) 748123433Sjeff return (runq_steal(&kseq->ksq_idle)); 749123433Sjeff return (NULL); 750121790Sjeff} 751123433Sjeff 752123433Sjeffint 753123433Sjeffkseq_transfer(struct kseq *kseq, struct kse *ke, int class) 754123433Sjeff{ 755139334Sjeff struct kseq_group *nksg; 756123433Sjeff struct kseq_group *ksg; 757139334Sjeff struct kseq *old; 758123433Sjeff int cpu; 759139334Sjeff int idx; 760123433Sjeff 761123685Sjeff if (smp_started == 0) 762123685Sjeff return (0); 763123433Sjeff cpu = 0; 764123433Sjeff /* 765133427Sjeff * If our load exceeds a certain threshold we should attempt to 766133427Sjeff * reassign this thread. The first candidate is the cpu that 767133427Sjeff * originally ran the thread. If it is idle, assign it there, 768133427Sjeff * otherwise, pick an idle cpu. 769133427Sjeff * 770133427Sjeff * The threshold at which we start to reassign kses has a large impact 771123685Sjeff * on the overall performance of the system. Tuned too high and 772123685Sjeff * some CPUs may idle. Too low and there will be excess migration 773128055Scognet * and context switches. 774123685Sjeff */ 775139334Sjeff old = KSEQ_CPU(ke->ke_cpu); 776139334Sjeff nksg = old->ksq_group; 777133427Sjeff ksg = kseq->ksq_group; 778139334Sjeff if (kseq_idle) { 779139334Sjeff if (kseq_idle & nksg->ksg_mask) { 780139334Sjeff cpu = ffs(nksg->ksg_idlemask); 781139334Sjeff if (cpu) { 782139334Sjeff CTR2(KTR_SCHED, 783139334Sjeff "kseq_transfer: %p found old cpu %X " 784139334Sjeff "in idlemask.", ke, cpu); 785133427Sjeff goto migrate; 786139334Sjeff } 787133427Sjeff } 788123433Sjeff /* 789123433Sjeff * Multiple cpus could find this bit simultaneously 790123433Sjeff * but the race shouldn't be terrible. 791123433Sjeff */ 792123433Sjeff cpu = ffs(kseq_idle); 793139334Sjeff if (cpu) { 794139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 795139334Sjeff "in idlemask.", ke, cpu); 796133427Sjeff goto migrate; 797139334Sjeff } 798123433Sjeff } 799139334Sjeff idx = 0; 800139334Sjeff#if 0 801139334Sjeff if (old->ksq_load < kseq->ksq_load) { 802139334Sjeff cpu = ke->ke_cpu + 1; 803139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 804139334Sjeff "load less than ours.", ke, cpu); 805139334Sjeff goto migrate; 806139334Sjeff } 807123433Sjeff /* 808139334Sjeff * No new CPU was found, look for one with less load. 809139334Sjeff */ 810139334Sjeff for (idx = 0; idx <= ksg_maxid; idx++) { 811139334Sjeff nksg = KSEQ_GROUP(idx); 812139334Sjeff if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 813139334Sjeff cpu = ffs(nksg->ksg_cpumask); 814139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 815139334Sjeff "than ours.", ke, cpu); 816139334Sjeff goto migrate; 817139334Sjeff } 818139334Sjeff } 819139334Sjeff#endif 820139334Sjeff /* 821123433Sjeff * If another cpu in this group has idled, assign a thread over 822123433Sjeff * to them after checking to see if there are idled groups. 823123433Sjeff */ 824133427Sjeff if (ksg->ksg_idlemask) { 825123433Sjeff cpu = ffs(ksg->ksg_idlemask); 826139334Sjeff if (cpu) { 827139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 828139334Sjeff "group.", ke, cpu); 829133427Sjeff goto migrate; 830139334Sjeff } 831123433Sjeff } 832133427Sjeff return (0); 833133427Sjeffmigrate: 834133427Sjeff /* 835123433Sjeff * Now that we've found an idle CPU, migrate the thread. 836123433Sjeff */ 837133427Sjeff cpu--; 838133427Sjeff ke->ke_runq = NULL; 839133427Sjeff kseq_notify(ke, cpu); 840133427Sjeff 841133427Sjeff return (1); 842123433Sjeff} 843123433Sjeff 844121790Sjeff#endif /* SMP */ 845121790Sjeff 846117326Sjeff/* 847121790Sjeff * Pick the highest priority task we have and return it. 848117326Sjeff */ 849117326Sjeff 850121790Sjeffstatic struct kse * 851121790Sjeffkseq_choose(struct kseq *kseq) 852110267Sjeff{ 853137067Sjeff struct runq *swap; 854110267Sjeff struct kse *ke; 855137067Sjeff int nice; 856110267Sjeff 857115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 858113357Sjeff swap = NULL; 859112994Sjeff 860113357Sjeff for (;;) { 861113357Sjeff ke = runq_choose(kseq->ksq_curr); 862113357Sjeff if (ke == NULL) { 863113357Sjeff /* 864131473Sjhb * We already swapped once and didn't get anywhere. 865113357Sjeff */ 866113357Sjeff if (swap) 867113357Sjeff break; 868113357Sjeff swap = kseq->ksq_curr; 869113357Sjeff kseq->ksq_curr = kseq->ksq_next; 870113357Sjeff kseq->ksq_next = swap; 871113357Sjeff continue; 872113357Sjeff } 873113357Sjeff /* 874113357Sjeff * If we encounter a slice of 0 the kse is in a 875113357Sjeff * TIMESHARE kse group and its nice was too far out 876113357Sjeff * of the range that receives slices. 877113357Sjeff */ 878137067Sjeff nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 879138842Sjeff if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 880138842Sjeff ke->ke_proc->p_nice != 0)) { 881113357Sjeff runq_remove(ke->ke_runq, ke); 882113357Sjeff sched_slice(ke); 883113357Sjeff ke->ke_runq = kseq->ksq_next; 884136170Sjulian runq_add(ke->ke_runq, ke, 0); 885113357Sjeff continue; 886113357Sjeff } 887113357Sjeff return (ke); 888110267Sjeff } 889110267Sjeff 890113357Sjeff return (runq_choose(&kseq->ksq_idle)); 891110267Sjeff} 892110267Sjeff 893109864Sjeffstatic void 894110028Sjeffkseq_setup(struct kseq *kseq) 895110028Sjeff{ 896113357Sjeff runq_init(&kseq->ksq_timeshare[0]); 897113357Sjeff runq_init(&kseq->ksq_timeshare[1]); 898112994Sjeff runq_init(&kseq->ksq_idle); 899113357Sjeff kseq->ksq_curr = &kseq->ksq_timeshare[0]; 900113357Sjeff kseq->ksq_next = &kseq->ksq_timeshare[1]; 901113660Sjeff kseq->ksq_load = 0; 902121896Sjeff kseq->ksq_load_timeshare = 0; 903110028Sjeff} 904110028Sjeff 905110028Sjeffstatic void 906109864Sjeffsched_setup(void *dummy) 907109864Sjeff{ 908117313Sjeff#ifdef SMP 909109864Sjeff int i; 910117313Sjeff#endif 911109864Sjeff 912116946Sjeff slice_min = (hz/100); /* 10ms */ 913116946Sjeff slice_max = (hz/7); /* ~140ms */ 914111857Sjeff 915117237Sjeff#ifdef SMP 916123487Sjeff balance_groups = 0; 917123433Sjeff /* 918123433Sjeff * Initialize the kseqs. 919123433Sjeff */ 920123433Sjeff for (i = 0; i < MAXCPU; i++) { 921123433Sjeff struct kseq *ksq; 922123433Sjeff 923123433Sjeff ksq = &kseq_cpu[i]; 924123433Sjeff ksq->ksq_assigned = NULL; 925123433Sjeff kseq_setup(&kseq_cpu[i]); 926123433Sjeff } 927117237Sjeff if (smp_topology == NULL) { 928123433Sjeff struct kseq_group *ksg; 929123433Sjeff struct kseq *ksq; 930139334Sjeff int cpus; 931123433Sjeff 932139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 933139334Sjeff if (CPU_ABSENT(i)) 934139334Sjeff continue; 935139334Sjeff ksq = &kseq_cpu[cpus]; 936139334Sjeff ksg = &kseq_groups[cpus]; 937123433Sjeff /* 938129982Sjeff * Setup a kseq group with one member. 939123433Sjeff */ 940123433Sjeff ksq->ksq_transferable = 0; 941123433Sjeff ksq->ksq_group = ksg; 942123433Sjeff ksg->ksg_cpus = 1; 943123433Sjeff ksg->ksg_idlemask = 0; 944123433Sjeff ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 945123487Sjeff ksg->ksg_load = 0; 946123433Sjeff ksg->ksg_transferable = 0; 947123433Sjeff LIST_INIT(&ksg->ksg_members); 948123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 949139334Sjeff cpus++; 950117237Sjeff } 951139334Sjeff ksg_maxid = cpus - 1; 952117237Sjeff } else { 953123433Sjeff struct kseq_group *ksg; 954123433Sjeff struct cpu_group *cg; 955117237Sjeff int j; 956113357Sjeff 957117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 958117237Sjeff cg = &smp_topology->ct_group[i]; 959123433Sjeff ksg = &kseq_groups[i]; 960123433Sjeff /* 961123433Sjeff * Initialize the group. 962123433Sjeff */ 963123433Sjeff ksg->ksg_idlemask = 0; 964123487Sjeff ksg->ksg_load = 0; 965123433Sjeff ksg->ksg_transferable = 0; 966123433Sjeff ksg->ksg_cpus = cg->cg_count; 967123433Sjeff ksg->ksg_cpumask = cg->cg_mask; 968123433Sjeff LIST_INIT(&ksg->ksg_members); 969123433Sjeff /* 970123433Sjeff * Find all of the group members and add them. 971123433Sjeff */ 972123433Sjeff for (j = 0; j < MAXCPU; j++) { 973123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 974123433Sjeff if (ksg->ksg_mask == 0) 975123433Sjeff ksg->ksg_mask = 1 << j; 976123433Sjeff kseq_cpu[j].ksq_transferable = 0; 977123433Sjeff kseq_cpu[j].ksq_group = ksg; 978123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, 979123433Sjeff &kseq_cpu[j], ksq_siblings); 980123433Sjeff } 981123433Sjeff } 982123487Sjeff if (ksg->ksg_cpus > 1) 983123487Sjeff balance_groups = 1; 984117237Sjeff } 985123487Sjeff ksg_maxid = smp_topology->ct_count - 1; 986117237Sjeff } 987123487Sjeff /* 988123487Sjeff * Stagger the group and global load balancer so they do not 989123487Sjeff * interfere with each other. 990123487Sjeff */ 991129982Sjeff bal_tick = ticks + hz; 992123487Sjeff if (balance_groups) 993129982Sjeff gbal_tick = ticks + (hz / 2); 994117237Sjeff#else 995117237Sjeff kseq_setup(KSEQ_SELF()); 996116069Sjeff#endif 997117237Sjeff mtx_lock_spin(&sched_lock); 998122744Sjeff kseq_load_add(KSEQ_SELF(), &kse0); 999117237Sjeff mtx_unlock_spin(&sched_lock); 1000109864Sjeff} 1001109864Sjeff 1002109864Sjeff/* 1003109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1004109864Sjeff * process. 1005109864Sjeff */ 1006113357Sjeffstatic void 1007109864Sjeffsched_priority(struct ksegrp *kg) 1008109864Sjeff{ 1009109864Sjeff int pri; 1010109864Sjeff 1011109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1012113357Sjeff return; 1013109864Sjeff 1014113357Sjeff pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1015111857Sjeff pri += SCHED_PRI_BASE; 1016130551Sjulian pri += kg->kg_proc->p_nice; 1017109864Sjeff 1018109864Sjeff if (pri > PRI_MAX_TIMESHARE) 1019109864Sjeff pri = PRI_MAX_TIMESHARE; 1020109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 1021109864Sjeff pri = PRI_MIN_TIMESHARE; 1022109864Sjeff 1023109864Sjeff kg->kg_user_pri = pri; 1024109864Sjeff 1025113357Sjeff return; 1026109864Sjeff} 1027109864Sjeff 1028109864Sjeff/* 1029112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 1030112994Sjeff * that we're on. This is only for PRI_TIMESHARE ksegrps. 1031109864Sjeff */ 1032112966Sjeffstatic void 1033112966Sjeffsched_slice(struct kse *ke) 1034109864Sjeff{ 1035113357Sjeff struct kseq *kseq; 1036112966Sjeff struct ksegrp *kg; 1037109864Sjeff 1038112966Sjeff kg = ke->ke_ksegrp; 1039113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1040109864Sjeff 1041139453Sjhb if (ke->ke_thread->td_flags & TDF_BORROWING) { 1042138842Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1043138842Sjeff return; 1044138842Sjeff } 1045138842Sjeff 1046112966Sjeff /* 1047112966Sjeff * Rationale: 1048133427Sjeff * KSEs in interactive ksegs get a minimal slice so that we 1049112966Sjeff * quickly notice if it abuses its advantage. 1050112966Sjeff * 1051112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 1052112966Sjeff * based on the ksegs nice value relative to the least nice kseg 1053112966Sjeff * on the run queue for this cpu. 1054112966Sjeff * 1055112966Sjeff * If the KSE is less nice than all others it gets the maximum 1056112966Sjeff * slice and other KSEs will adjust their slice relative to 1057112966Sjeff * this when they first expire. 1058112966Sjeff * 1059112966Sjeff * There is 20 point window that starts relative to the least 1060112966Sjeff * nice kse on the run queue. Slice size is determined by 1061112966Sjeff * the kse distance from the last nice ksegrp. 1062112966Sjeff * 1063121871Sjeff * If the kse is outside of the window it will get no slice 1064121871Sjeff * and will be reevaluated each time it is selected on the 1065121871Sjeff * run queue. The exception to this is nice 0 ksegs when 1066121871Sjeff * a nice -20 is running. They are always granted a minimum 1067121871Sjeff * slice. 1068112966Sjeff */ 1069113357Sjeff if (!SCHED_INTERACTIVE(kg)) { 1070112966Sjeff int nice; 1071112966Sjeff 1072130551Sjulian nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1073121896Sjeff if (kseq->ksq_load_timeshare == 0 || 1074130551Sjulian kg->kg_proc->p_nice < kseq->ksq_nicemin) 1075112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1076121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 1077112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 1078130551Sjulian else if (kg->kg_proc->p_nice == 0) 1079121871Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1080112966Sjeff else 1081112966Sjeff ke->ke_slice = 0; 1082112966Sjeff } else 1083123684Sjeff ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1084112966Sjeff 1085112966Sjeff return; 1086109864Sjeff} 1087109864Sjeff 1088121868Sjeff/* 1089121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1090121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1091121868Sjeff * This routine will not operate correctly when slp or run times have been 1092121868Sjeff * adjusted to more than double their maximum. 1093121868Sjeff */ 1094116463Sjeffstatic void 1095116463Sjeffsched_interact_update(struct ksegrp *kg) 1096116463Sjeff{ 1097121868Sjeff int sum; 1098121605Sjeff 1099121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1100121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1101121868Sjeff return; 1102121868Sjeff /* 1103121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1104121868Sjeff * will not bring us back into range. Dividing by two here forces 1105133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1106121868Sjeff */ 1107127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1108121868Sjeff kg->kg_runtime /= 2; 1109121868Sjeff kg->kg_slptime /= 2; 1110121868Sjeff return; 1111116463Sjeff } 1112121868Sjeff kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1113121868Sjeff kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1114116463Sjeff} 1115116463Sjeff 1116121868Sjeffstatic void 1117121868Sjeffsched_interact_fork(struct ksegrp *kg) 1118121868Sjeff{ 1119121868Sjeff int ratio; 1120121868Sjeff int sum; 1121121868Sjeff 1122121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1123121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1124121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1125121868Sjeff kg->kg_runtime /= ratio; 1126121868Sjeff kg->kg_slptime /= ratio; 1127121868Sjeff } 1128121868Sjeff} 1129121868Sjeff 1130111857Sjeffstatic int 1131111857Sjeffsched_interact_score(struct ksegrp *kg) 1132111857Sjeff{ 1133116365Sjeff int div; 1134111857Sjeff 1135111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 1136116365Sjeff div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1137116365Sjeff return (SCHED_INTERACT_HALF + 1138116365Sjeff (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1139116365Sjeff } if (kg->kg_slptime > kg->kg_runtime) { 1140116365Sjeff div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1141116365Sjeff return (kg->kg_runtime / div); 1142111857Sjeff } 1143111857Sjeff 1144116365Sjeff /* 1145116365Sjeff * This can happen if slptime and runtime are 0. 1146116365Sjeff */ 1147116365Sjeff return (0); 1148111857Sjeff 1149111857Sjeff} 1150111857Sjeff 1151113357Sjeff/* 1152134791Sjulian * Very early in the boot some setup of scheduler-specific 1153134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 1154134791Sjulian * Called from: 1155134791Sjulian * proc0_init() 1156134791Sjulian */ 1157134791Sjulianvoid 1158134791Sjulianschedinit(void) 1159134791Sjulian{ 1160134791Sjulian /* 1161134791Sjulian * Set up the scheduler specific parts of proc0. 1162134791Sjulian */ 1163136167Sjulian proc0.p_sched = NULL; /* XXX */ 1164134791Sjulian ksegrp0.kg_sched = &kg_sched0; 1165136167Sjulian thread0.td_sched = &kse0; 1166134791Sjulian kse0.ke_thread = &thread0; 1167134791Sjulian kse0.ke_state = KES_THREAD; 1168134791Sjulian kg_sched0.skg_concurrency = 1; 1169134791Sjulian kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1170134791Sjulian} 1171134791Sjulian 1172134791Sjulian/* 1173113357Sjeff * This is only somewhat accurate since given many processes of the same 1174113357Sjeff * priority they will switch when their slices run out, which will be 1175113357Sjeff * at most SCHED_SLICE_MAX. 1176113357Sjeff */ 1177109864Sjeffint 1178109864Sjeffsched_rr_interval(void) 1179109864Sjeff{ 1180109864Sjeff return (SCHED_SLICE_MAX); 1181109864Sjeff} 1182109864Sjeff 1183121790Sjeffstatic void 1184109864Sjeffsched_pctcpu_update(struct kse *ke) 1185109864Sjeff{ 1186109864Sjeff /* 1187109864Sjeff * Adjust counters and watermark for pctcpu calc. 1188116365Sjeff */ 1189120272Sjeff if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1190120272Sjeff /* 1191120272Sjeff * Shift the tick count out so that the divide doesn't 1192120272Sjeff * round away our results. 1193120272Sjeff */ 1194120272Sjeff ke->ke_ticks <<= 10; 1195120272Sjeff ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1196120272Sjeff SCHED_CPU_TICKS; 1197120272Sjeff ke->ke_ticks >>= 10; 1198120272Sjeff } else 1199120272Sjeff ke->ke_ticks = 0; 1200109864Sjeff ke->ke_ltick = ticks; 1201109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1202109864Sjeff} 1203109864Sjeff 1204109864Sjeffvoid 1205139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1206109864Sjeff{ 1207121605Sjeff struct kse *ke; 1208109864Sjeff 1209139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1210139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1211139316Sjeff curthread->td_proc->p_comm); 1212121605Sjeff ke = td->td_kse; 1213109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1214139453Sjhb if (td->td_priority == prio) 1215139453Sjhb return; 1216109864Sjeff if (TD_ON_RUNQ(td)) { 1217121605Sjeff /* 1218121605Sjeff * If the priority has been elevated due to priority 1219121605Sjeff * propagation, we may have to move ourselves to a new 1220121605Sjeff * queue. We still call adjustrunqueue below in case kse 1221121605Sjeff * needs to fix things up. 1222121605Sjeff */ 1223138842Sjeff if (prio < td->td_priority && ke->ke_runq != NULL && 1224121872Sjeff (ke->ke_flags & KEF_ASSIGNED) == 0 && 1225121790Sjeff ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1226121605Sjeff runq_remove(ke->ke_runq, ke); 1227121605Sjeff ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1228136170Sjulian runq_add(ke->ke_runq, ke, 0); 1229121605Sjeff } 1230133555Sjeff /* 1231133555Sjeff * Hold this kse on this cpu so that sched_prio() doesn't 1232133555Sjeff * cause excessive migration. We only want migration to 1233133555Sjeff * happen as the result of a wakeup. 1234133555Sjeff */ 1235133555Sjeff ke->ke_flags |= KEF_HOLD; 1236119488Sdavidxu adjustrunqueue(td, prio); 1237139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1238121605Sjeff } else 1239119488Sdavidxu td->td_priority = prio; 1240109864Sjeff} 1241109864Sjeff 1242139453Sjhb/* 1243139453Sjhb * Update a thread's priority when it is lent another thread's 1244139453Sjhb * priority. 1245139453Sjhb */ 1246109864Sjeffvoid 1247139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1248139453Sjhb{ 1249139453Sjhb 1250139453Sjhb td->td_flags |= TDF_BORROWING; 1251139453Sjhb sched_thread_priority(td, prio); 1252139453Sjhb} 1253139453Sjhb 1254139453Sjhb/* 1255139453Sjhb * Restore a thread's priority when priority propagation is 1256139453Sjhb * over. The prio argument is the minimum priority the thread 1257139453Sjhb * needs to have to satisfy other possible priority lending 1258139453Sjhb * requests. If the thread's regular priority is less 1259139453Sjhb * important than prio, the thread will keep a priority boost 1260139453Sjhb * of prio. 1261139453Sjhb */ 1262139453Sjhbvoid 1263139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1264139453Sjhb{ 1265139453Sjhb u_char base_pri; 1266139453Sjhb 1267139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1268139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1269139453Sjhb base_pri = td->td_ksegrp->kg_user_pri; 1270139453Sjhb else 1271139453Sjhb base_pri = td->td_base_pri; 1272139453Sjhb if (prio >= base_pri) { 1273139455Sjhb td->td_flags &= ~TDF_BORROWING; 1274139453Sjhb sched_thread_priority(td, base_pri); 1275139453Sjhb } else 1276139453Sjhb sched_lend_prio(td, prio); 1277139453Sjhb} 1278139453Sjhb 1279139453Sjhbvoid 1280139453Sjhbsched_prio(struct thread *td, u_char prio) 1281139453Sjhb{ 1282139453Sjhb u_char oldprio; 1283139453Sjhb 1284139453Sjhb /* First, update the base priority. */ 1285139453Sjhb td->td_base_pri = prio; 1286139453Sjhb 1287139453Sjhb /* 1288139455Sjhb * If the thread is borrowing another thread's priority, don't 1289139453Sjhb * ever lower the priority. 1290139453Sjhb */ 1291139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1292139453Sjhb return; 1293139453Sjhb 1294139453Sjhb /* Change the real priority. */ 1295139453Sjhb oldprio = td->td_priority; 1296139453Sjhb sched_thread_priority(td, prio); 1297139453Sjhb 1298139453Sjhb /* 1299139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1300139453Sjhb * its state. 1301139453Sjhb */ 1302139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1303139453Sjhb turnstile_adjust(td, oldprio); 1304139453Sjhb} 1305139455Sjhb 1306139453Sjhbvoid 1307135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1308109864Sjeff{ 1309139334Sjeff struct kseq *ksq; 1310109864Sjeff struct kse *ke; 1311109864Sjeff 1312109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1313109864Sjeff 1314109864Sjeff ke = td->td_kse; 1315139334Sjeff ksq = KSEQ_SELF(); 1316109864Sjeff 1317133555Sjeff td->td_lastcpu = td->td_oncpu; 1318113339Sjulian td->td_oncpu = NOCPU; 1319132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1320144777Sups td->td_owepreempt = 0; 1321109864Sjeff 1322123434Sjeff /* 1323123434Sjeff * If the KSE has been assigned it may be in the process of switching 1324123434Sjeff * to the new cpu. This is the case in sched_bind(). 1325123434Sjeff */ 1326139334Sjeff if (td == PCPU_GET(idlethread)) { 1327139334Sjeff TD_SET_CAN_RUN(td); 1328139334Sjeff } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1329139334Sjeff /* We are ending our run so make our slot available again */ 1330139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1331139334Sjeff kseq_load_rem(ksq, ke); 1332139334Sjeff if (TD_IS_RUNNING(td)) { 1333139334Sjeff /* 1334139334Sjeff * Don't allow the thread to migrate 1335139334Sjeff * from a preemption. 1336139334Sjeff */ 1337139334Sjeff ke->ke_flags |= KEF_HOLD; 1338139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1339139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1340139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1341139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1342139334Sjeff } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1343139334Sjeff (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 1344139334Sjeff /* 1345139334Sjeff * We will not be on the run queue. 1346139334Sjeff * So we must be sleeping or similar. 1347139334Sjeff * Don't use the slot if we will need it 1348139334Sjeff * for newtd. 1349139334Sjeff */ 1350139334Sjeff slot_fill(td->td_ksegrp); 1351121146Sjeff } 1352136167Sjulian if (newtd != NULL) { 1353136170Sjulian /* 1354147068Sjeff * If we bring in a thread account for it as if it had been 1355147068Sjeff * added to the run queue and then chosen. 1356136170Sjulian */ 1357136169Sjulian newtd->td_kse->ke_flags |= KEF_DIDRUN; 1358139334Sjeff newtd->td_kse->ke_runq = ksq->ksq_curr; 1359136173Sjulian TD_SET_RUNNING(newtd); 1360133427Sjeff kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1361147068Sjeff /* 1362147068Sjeff * XXX When we preempt, we've already consumed a slot because 1363147068Sjeff * we got here through sched_add(). However, newtd can come 1364147068Sjeff * from thread_switchout() which can't SLOT_USE() because 1365147068Sjeff * the SLOT code is scheduler dependent. We must use the 1366147068Sjeff * slot here otherwise. 1367147068Sjeff */ 1368147068Sjeff if ((flags & SW_PREEMPT) == 0) 1369147068Sjeff SLOT_USE(newtd->td_ksegrp); 1370136167Sjulian } else 1371131473Sjhb newtd = choosethread(); 1372145256Sjkoshy if (td != newtd) { 1373145256Sjkoshy#ifdef HWPMC_HOOKS 1374145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1375145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1376145256Sjkoshy#endif 1377121128Sjeff cpu_switch(td, newtd); 1378145256Sjkoshy#ifdef HWPMC_HOOKS 1379145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1380145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1381145256Sjkoshy#endif 1382145256Sjkoshy } 1383145256Sjkoshy 1384121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1385109864Sjeff 1386113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1387109864Sjeff} 1388109864Sjeff 1389109864Sjeffvoid 1390130551Sjuliansched_nice(struct proc *p, int nice) 1391109864Sjeff{ 1392130551Sjulian struct ksegrp *kg; 1393113357Sjeff struct kse *ke; 1394109864Sjeff struct thread *td; 1395113357Sjeff struct kseq *kseq; 1396109864Sjeff 1397130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1398113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1399113357Sjeff /* 1400113357Sjeff * We need to adjust the nice counts for running KSEs. 1401113357Sjeff */ 1402130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1403130551Sjulian if (kg->kg_pri_class == PRI_TIMESHARE) { 1404134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1405134791Sjulian ke = td->td_kse; 1406130551Sjulian if (ke->ke_runq == NULL) 1407130551Sjulian continue; 1408130551Sjulian kseq = KSEQ_CPU(ke->ke_cpu); 1409130551Sjulian kseq_nice_rem(kseq, p->p_nice); 1410130551Sjulian kseq_nice_add(kseq, nice); 1411130551Sjulian } 1412113357Sjeff } 1413130551Sjulian } 1414130551Sjulian p->p_nice = nice; 1415130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1416130551Sjulian sched_priority(kg); 1417130551Sjulian FOREACH_THREAD_IN_GROUP(kg, td) 1418130551Sjulian td->td_flags |= TDF_NEEDRESCHED; 1419130551Sjulian } 1420109864Sjeff} 1421109864Sjeff 1422109864Sjeffvoid 1423126326Sjhbsched_sleep(struct thread *td) 1424109864Sjeff{ 1425109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1426109864Sjeff 1427109864Sjeff td->td_slptime = ticks; 1428109864Sjeff} 1429109864Sjeff 1430109864Sjeffvoid 1431109864Sjeffsched_wakeup(struct thread *td) 1432109864Sjeff{ 1433109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1434109864Sjeff 1435109864Sjeff /* 1436109864Sjeff * Let the kseg know how long we slept for. This is because process 1437109864Sjeff * interactivity behavior is modeled in the kseg. 1438109864Sjeff */ 1439111788Sjeff if (td->td_slptime) { 1440111788Sjeff struct ksegrp *kg; 1441113357Sjeff int hzticks; 1442109864Sjeff 1443111788Sjeff kg = td->td_ksegrp; 1444121868Sjeff hzticks = (ticks - td->td_slptime) << 10; 1445121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1446121868Sjeff kg->kg_slptime = SCHED_SLP_RUN_MAX; 1447121868Sjeff kg->kg_runtime = 1; 1448121868Sjeff } else { 1449121868Sjeff kg->kg_slptime += hzticks; 1450121868Sjeff sched_interact_update(kg); 1451121868Sjeff } 1452111788Sjeff sched_priority(kg); 1453134791Sjulian sched_slice(td->td_kse); 1454111788Sjeff td->td_slptime = 0; 1455109864Sjeff } 1456134586Sjulian setrunqueue(td, SRQ_BORING); 1457109864Sjeff} 1458109864Sjeff 1459109864Sjeff/* 1460109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1461109864Sjeff * priority. 1462109864Sjeff */ 1463109864Sjeffvoid 1464134791Sjuliansched_fork(struct thread *td, struct thread *childtd) 1465109864Sjeff{ 1466109864Sjeff 1467109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1468109864Sjeff 1469134791Sjulian sched_fork_ksegrp(td, childtd->td_ksegrp); 1470134791Sjulian sched_fork_thread(td, childtd); 1471113357Sjeff} 1472113357Sjeff 1473113357Sjeffvoid 1474132372Sjuliansched_fork_ksegrp(struct thread *td, struct ksegrp *child) 1475113357Sjeff{ 1476132372Sjulian struct ksegrp *kg = td->td_ksegrp; 1477134791Sjulian mtx_assert(&sched_lock, MA_OWNED); 1478116365Sjeff 1479121868Sjeff child->kg_slptime = kg->kg_slptime; 1480121868Sjeff child->kg_runtime = kg->kg_runtime; 1481121868Sjeff child->kg_user_pri = kg->kg_user_pri; 1482121868Sjeff sched_interact_fork(child); 1483116463Sjeff kg->kg_runtime += tickincr << 10; 1484116463Sjeff sched_interact_update(kg); 1485113357Sjeff} 1486109864Sjeff 1487113357Sjeffvoid 1488113357Sjeffsched_fork_thread(struct thread *td, struct thread *child) 1489113357Sjeff{ 1490134791Sjulian struct kse *ke; 1491134791Sjulian struct kse *ke2; 1492134791Sjulian 1493134791Sjulian sched_newthread(child); 1494134791Sjulian ke = td->td_kse; 1495134791Sjulian ke2 = child->td_kse; 1496134791Sjulian ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1497134791Sjulian ke2->ke_cpu = ke->ke_cpu; 1498134791Sjulian ke2->ke_runq = NULL; 1499134791Sjulian 1500134791Sjulian /* Grab our parents cpu estimation information. */ 1501134791Sjulian ke2->ke_ticks = ke->ke_ticks; 1502134791Sjulian ke2->ke_ltick = ke->ke_ltick; 1503134791Sjulian ke2->ke_ftick = ke->ke_ftick; 1504113357Sjeff} 1505113357Sjeff 1506113357Sjeffvoid 1507113357Sjeffsched_class(struct ksegrp *kg, int class) 1508113357Sjeff{ 1509113357Sjeff struct kseq *kseq; 1510113357Sjeff struct kse *ke; 1511134791Sjulian struct thread *td; 1512121896Sjeff int nclass; 1513121896Sjeff int oclass; 1514113357Sjeff 1515113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1516113357Sjeff if (kg->kg_pri_class == class) 1517113357Sjeff return; 1518113357Sjeff 1519121896Sjeff nclass = PRI_BASE(class); 1520121896Sjeff oclass = PRI_BASE(kg->kg_pri_class); 1521134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1522134791Sjulian ke = td->td_kse; 1523141292Sjeff if ((ke->ke_state != KES_ONRUNQ && 1524141292Sjeff ke->ke_state != KES_THREAD) || ke->ke_runq == NULL) 1525113357Sjeff continue; 1526113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1527113357Sjeff 1528121896Sjeff#ifdef SMP 1529122744Sjeff /* 1530122744Sjeff * On SMP if we're on the RUNQ we must adjust the transferable 1531122744Sjeff * count because could be changing to or from an interrupt 1532122744Sjeff * class. 1533122744Sjeff */ 1534122744Sjeff if (ke->ke_state == KES_ONRUNQ) { 1535139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1536123433Sjeff kseq->ksq_transferable--; 1537123433Sjeff kseq->ksq_group->ksg_transferable--; 1538123433Sjeff } 1539139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1540123433Sjeff kseq->ksq_transferable++; 1541123433Sjeff kseq->ksq_group->ksg_transferable++; 1542123433Sjeff } 1543122744Sjeff } 1544121896Sjeff#endif 1545122744Sjeff if (oclass == PRI_TIMESHARE) { 1546121896Sjeff kseq->ksq_load_timeshare--; 1547130551Sjulian kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1548122744Sjeff } 1549122744Sjeff if (nclass == PRI_TIMESHARE) { 1550121896Sjeff kseq->ksq_load_timeshare++; 1551130551Sjulian kseq_nice_add(kseq, kg->kg_proc->p_nice); 1552122744Sjeff } 1553109970Sjeff } 1554109970Sjeff 1555113357Sjeff kg->kg_pri_class = class; 1556109864Sjeff} 1557109864Sjeff 1558109864Sjeff/* 1559109864Sjeff * Return some of the child's priority and interactivity to the parent. 1560109864Sjeff */ 1561109864Sjeffvoid 1562134791Sjuliansched_exit(struct proc *p, struct thread *childtd) 1563109864Sjeff{ 1564109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1565134791Sjulian sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 1566139316Sjeff sched_exit_thread(NULL, childtd); 1567109864Sjeff} 1568109864Sjeff 1569109864Sjeffvoid 1570132372Sjuliansched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1571113372Sjeff{ 1572132372Sjulian /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 1573132372Sjulian kg->kg_runtime += td->td_ksegrp->kg_runtime; 1574116463Sjeff sched_interact_update(kg); 1575113372Sjeff} 1576113372Sjeff 1577113372Sjeffvoid 1578134791Sjuliansched_exit_thread(struct thread *td, struct thread *childtd) 1579113372Sjeff{ 1580139316Sjeff CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1581139316Sjeff childtd, childtd->td_proc->p_comm, childtd->td_priority); 1582134791Sjulian kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1583113372Sjeff} 1584113372Sjeff 1585113372Sjeffvoid 1586121127Sjeffsched_clock(struct thread *td) 1587109864Sjeff{ 1588113357Sjeff struct kseq *kseq; 1589113357Sjeff struct ksegrp *kg; 1590121127Sjeff struct kse *ke; 1591109864Sjeff 1592129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1593133427Sjeff kseq = KSEQ_SELF(); 1594129982Sjeff#ifdef SMP 1595139334Sjeff if (ticks >= bal_tick) 1596129982Sjeff sched_balance(); 1597139334Sjeff if (ticks >= gbal_tick && balance_groups) 1598129982Sjeff sched_balance_groups(); 1599133427Sjeff /* 1600133427Sjeff * We could have been assigned a non real-time thread without an 1601133427Sjeff * IPI. 1602133427Sjeff */ 1603133427Sjeff if (kseq->ksq_assigned) 1604133427Sjeff kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1605129982Sjeff#endif 1606113357Sjeff /* 1607113357Sjeff * sched_setup() apparently happens prior to stathz being set. We 1608113357Sjeff * need to resolve the timers earlier in the boot so we can avoid 1609113357Sjeff * calculating this here. 1610113357Sjeff */ 1611113357Sjeff if (realstathz == 0) { 1612113357Sjeff realstathz = stathz ? stathz : hz; 1613113357Sjeff tickincr = hz / realstathz; 1614113357Sjeff /* 1615113357Sjeff * XXX This does not work for values of stathz that are much 1616113357Sjeff * larger than hz. 1617113357Sjeff */ 1618113357Sjeff if (tickincr == 0) 1619113357Sjeff tickincr = 1; 1620113357Sjeff } 1621109864Sjeff 1622121127Sjeff ke = td->td_kse; 1623113357Sjeff kg = ke->ke_ksegrp; 1624109864Sjeff 1625110028Sjeff /* Adjust ticks for pctcpu */ 1626111793Sjeff ke->ke_ticks++; 1627109971Sjeff ke->ke_ltick = ticks; 1628112994Sjeff 1629109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1630109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1631109971Sjeff sched_pctcpu_update(ke); 1632109971Sjeff 1633114496Sjulian if (td->td_flags & TDF_IDLETD) 1634109864Sjeff return; 1635110028Sjeff /* 1636113357Sjeff * We only do slicing code for TIMESHARE ksegrps. 1637113357Sjeff */ 1638113357Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1639113357Sjeff return; 1640113357Sjeff /* 1641110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 1642113357Sjeff * interactivity. 1643109864Sjeff */ 1644113357Sjeff kg->kg_runtime += tickincr << 10; 1645116463Sjeff sched_interact_update(kg); 1646110645Sjeff 1647109864Sjeff /* 1648109864Sjeff * We used up one time slice. 1649109864Sjeff */ 1650122847Sjeff if (--ke->ke_slice > 0) 1651113357Sjeff return; 1652109864Sjeff /* 1653113357Sjeff * We're out of time, recompute priorities and requeue. 1654109864Sjeff */ 1655122744Sjeff kseq_load_rem(kseq, ke); 1656113357Sjeff sched_priority(kg); 1657113357Sjeff sched_slice(ke); 1658113357Sjeff if (SCHED_CURR(kg, ke)) 1659113357Sjeff ke->ke_runq = kseq->ksq_curr; 1660113357Sjeff else 1661113357Sjeff ke->ke_runq = kseq->ksq_next; 1662122744Sjeff kseq_load_add(kseq, ke); 1663113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1664109864Sjeff} 1665109864Sjeff 1666109864Sjeffint 1667109864Sjeffsched_runnable(void) 1668109864Sjeff{ 1669109864Sjeff struct kseq *kseq; 1670115998Sjeff int load; 1671109864Sjeff 1672115998Sjeff load = 1; 1673115998Sjeff 1674110028Sjeff kseq = KSEQ_SELF(); 1675121790Sjeff#ifdef SMP 1676122094Sjeff if (kseq->ksq_assigned) { 1677122094Sjeff mtx_lock_spin(&sched_lock); 1678121790Sjeff kseq_assign(kseq); 1679122094Sjeff mtx_unlock_spin(&sched_lock); 1680122094Sjeff } 1681121790Sjeff#endif 1682121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1683121605Sjeff if (kseq->ksq_load > 0) 1684121605Sjeff goto out; 1685121605Sjeff } else 1686121605Sjeff if (kseq->ksq_load - 1 > 0) 1687121605Sjeff goto out; 1688115998Sjeff load = 0; 1689115998Sjeffout: 1690115998Sjeff return (load); 1691109864Sjeff} 1692109864Sjeff 1693109864Sjeffvoid 1694109864Sjeffsched_userret(struct thread *td) 1695109864Sjeff{ 1696109864Sjeff struct ksegrp *kg; 1697121605Sjeff 1698139453Sjhb KASSERT((td->td_flags & TDF_BORROWING) == 0, 1699139453Sjhb ("thread with borrowed priority returning to userland")); 1700139453Sjhb kg = td->td_ksegrp; 1701139453Sjhb if (td->td_priority != kg->kg_user_pri) { 1702109864Sjeff mtx_lock_spin(&sched_lock); 1703109864Sjeff td->td_priority = kg->kg_user_pri; 1704139453Sjhb td->td_base_pri = kg->kg_user_pri; 1705109864Sjeff mtx_unlock_spin(&sched_lock); 1706109864Sjeff } 1707109864Sjeff} 1708109864Sjeff 1709109864Sjeffstruct kse * 1710109970Sjeffsched_choose(void) 1711109970Sjeff{ 1712110028Sjeff struct kseq *kseq; 1713109970Sjeff struct kse *ke; 1714109970Sjeff 1715115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1716121790Sjeff kseq = KSEQ_SELF(); 1717113357Sjeff#ifdef SMP 1718123433Sjeffrestart: 1719121790Sjeff if (kseq->ksq_assigned) 1720121790Sjeff kseq_assign(kseq); 1721113357Sjeff#endif 1722121790Sjeff ke = kseq_choose(kseq); 1723109864Sjeff if (ke) { 1724121790Sjeff#ifdef SMP 1725121790Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1726123433Sjeff if (kseq_idled(kseq) == 0) 1727123433Sjeff goto restart; 1728121790Sjeff#endif 1729122744Sjeff kseq_runq_rem(kseq, ke); 1730109864Sjeff ke->ke_state = KES_THREAD; 1731113357Sjeff return (ke); 1732109864Sjeff } 1733109970Sjeff#ifdef SMP 1734123433Sjeff if (kseq_idled(kseq) == 0) 1735123433Sjeff goto restart; 1736109970Sjeff#endif 1737113357Sjeff return (NULL); 1738109864Sjeff} 1739109864Sjeff 1740109864Sjeffvoid 1741134586Sjuliansched_add(struct thread *td, int flags) 1742109864Sjeff{ 1743110267Sjeff struct kseq *kseq; 1744113357Sjeff struct ksegrp *kg; 1745121127Sjeff struct kse *ke; 1746139334Sjeff int preemptive; 1747133427Sjeff int canmigrate; 1748121790Sjeff int class; 1749109864Sjeff 1750139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1751139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1752139316Sjeff curthread->td_proc->p_comm); 1753121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1754121127Sjeff ke = td->td_kse; 1755121127Sjeff kg = td->td_ksegrp; 1756139334Sjeff canmigrate = 1; 1757139334Sjeff preemptive = !(flags & SRQ_YIELDING); 1758139334Sjeff class = PRI_BASE(kg->kg_pri_class); 1759139334Sjeff kseq = KSEQ_SELF(); 1760139334Sjeff if ((ke->ke_flags & KEF_INTERNAL) == 0) 1761139334Sjeff SLOT_USE(td->td_ksegrp); 1762139334Sjeff ke->ke_flags &= ~KEF_INTERNAL; 1763139334Sjeff#ifdef SMP 1764138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1765139334Sjeff if (ke->ke_flags & KEF_REMOVED) 1766138802Sjeff ke->ke_flags &= ~KEF_REMOVED; 1767121790Sjeff return; 1768138802Sjeff } 1769139334Sjeff canmigrate = KSE_CAN_MIGRATE(ke); 1770139334Sjeff#endif 1771109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 1772110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 1773109864Sjeff ke->ke_proc->p_comm)); 1774109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1775110267Sjeff ("sched_add: process swapped out")); 1776113387Sjeff KASSERT(ke->ke_runq == NULL, 1777113387Sjeff ("sched_add: KSE %p is still assigned to a run queue", ke)); 1778121790Sjeff switch (class) { 1779112994Sjeff case PRI_ITHD: 1780112994Sjeff case PRI_REALTIME: 1781113357Sjeff ke->ke_runq = kseq->ksq_curr; 1782113357Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1783139334Sjeff if (canmigrate) 1784139334Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1785112994Sjeff break; 1786112994Sjeff case PRI_TIMESHARE: 1787113387Sjeff if (SCHED_CURR(kg, ke)) 1788113387Sjeff ke->ke_runq = kseq->ksq_curr; 1789113387Sjeff else 1790113387Sjeff ke->ke_runq = kseq->ksq_next; 1791113357Sjeff break; 1792112994Sjeff case PRI_IDLE: 1793113357Sjeff /* 1794113357Sjeff * This is for priority prop. 1795113357Sjeff */ 1796121605Sjeff if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1797113357Sjeff ke->ke_runq = kseq->ksq_curr; 1798113357Sjeff else 1799113357Sjeff ke->ke_runq = &kseq->ksq_idle; 1800113357Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1801112994Sjeff break; 1802113357Sjeff default: 1803121868Sjeff panic("Unknown pri class."); 1804113357Sjeff break; 1805112994Sjeff } 1806121790Sjeff#ifdef SMP 1807133427Sjeff /* 1808133427Sjeff * Don't migrate running threads here. Force the long term balancer 1809133427Sjeff * to do it. 1810133427Sjeff */ 1811133555Sjeff if (ke->ke_flags & KEF_HOLD) { 1812133555Sjeff ke->ke_flags &= ~KEF_HOLD; 1813133427Sjeff canmigrate = 0; 1814133555Sjeff } 1815133427Sjeff /* 1816133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1817133427Sjeff */ 1818133427Sjeff if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1819123529Sjeff ke->ke_runq = NULL; 1820123433Sjeff kseq_notify(ke, ke->ke_cpu); 1821123433Sjeff return; 1822123433Sjeff } 1823121790Sjeff /* 1824123685Sjeff * If we had been idle, clear our bit in the group and potentially 1825123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1826121790Sjeff */ 1827123433Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1828123433Sjeff (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1829121790Sjeff /* 1830123433Sjeff * Check to see if our group is unidling, and if so, remove it 1831123433Sjeff * from the global idle mask. 1832121790Sjeff */ 1833123433Sjeff if (kseq->ksq_group->ksg_idlemask == 1834123433Sjeff kseq->ksq_group->ksg_cpumask) 1835123433Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1836123433Sjeff /* 1837123433Sjeff * Now remove ourselves from the group specific idle mask. 1838123433Sjeff */ 1839123433Sjeff kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1840139334Sjeff } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1841123685Sjeff if (kseq_transfer(kseq, ke, class)) 1842123685Sjeff return; 1843133427Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1844121790Sjeff#endif 1845133555Sjeff if (td->td_priority < curthread->td_priority && 1846133555Sjeff ke->ke_runq == kseq->ksq_curr) 1847133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1848131839Sjhb if (preemptive && maybe_preempt(td)) 1849131481Sjhb return; 1850109864Sjeff ke->ke_state = KES_ONRUNQ; 1851109864Sjeff 1852139334Sjeff kseq_runq_add(kseq, ke, flags); 1853122744Sjeff kseq_load_add(kseq, ke); 1854109864Sjeff} 1855109864Sjeff 1856109864Sjeffvoid 1857121127Sjeffsched_rem(struct thread *td) 1858109864Sjeff{ 1859113357Sjeff struct kseq *kseq; 1860121127Sjeff struct kse *ke; 1861113357Sjeff 1862139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1863139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1864139316Sjeff curthread->td_proc->p_comm); 1865139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1866139334Sjeff ke = td->td_kse; 1867139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1868138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1869138802Sjeff ke->ke_flags |= KEF_REMOVED; 1870121790Sjeff return; 1871138802Sjeff } 1872124958Sjeff KASSERT((ke->ke_state == KES_ONRUNQ), 1873124958Sjeff ("sched_rem: KSE not on run queue")); 1874109864Sjeff 1875109864Sjeff ke->ke_state = KES_THREAD; 1876113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1877122744Sjeff kseq_runq_rem(kseq, ke); 1878122744Sjeff kseq_load_rem(kseq, ke); 1879109864Sjeff} 1880109864Sjeff 1881109864Sjefffixpt_t 1882121127Sjeffsched_pctcpu(struct thread *td) 1883109864Sjeff{ 1884109864Sjeff fixpt_t pctcpu; 1885121127Sjeff struct kse *ke; 1886109864Sjeff 1887109864Sjeff pctcpu = 0; 1888121127Sjeff ke = td->td_kse; 1889121290Sjeff if (ke == NULL) 1890121290Sjeff return (0); 1891109864Sjeff 1892115998Sjeff mtx_lock_spin(&sched_lock); 1893109864Sjeff if (ke->ke_ticks) { 1894109864Sjeff int rtick; 1895109864Sjeff 1896116365Sjeff /* 1897116365Sjeff * Don't update more frequently than twice a second. Allowing 1898116365Sjeff * this causes the cpu usage to decay away too quickly due to 1899116365Sjeff * rounding errors. 1900116365Sjeff */ 1901123435Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1902123435Sjeff ke->ke_ltick < (ticks - (hz / 2))) 1903116365Sjeff sched_pctcpu_update(ke); 1904109864Sjeff /* How many rtick per second ? */ 1905116365Sjeff rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1906110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1907109864Sjeff } 1908109864Sjeff 1909109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1910113865Sjhb mtx_unlock_spin(&sched_lock); 1911109864Sjeff 1912109864Sjeff return (pctcpu); 1913109864Sjeff} 1914109864Sjeff 1915122038Sjeffvoid 1916122038Sjeffsched_bind(struct thread *td, int cpu) 1917122038Sjeff{ 1918122038Sjeff struct kse *ke; 1919122038Sjeff 1920122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1921122038Sjeff ke = td->td_kse; 1922122038Sjeff ke->ke_flags |= KEF_BOUND; 1923123433Sjeff#ifdef SMP 1924123433Sjeff if (PCPU_GET(cpuid) == cpu) 1925122038Sjeff return; 1926122038Sjeff /* sched_rem without the runq_remove */ 1927122038Sjeff ke->ke_state = KES_THREAD; 1928122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1929122038Sjeff kseq_notify(ke, cpu); 1930122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1931131527Sphk mi_switch(SW_VOL, NULL); 1932122038Sjeff#endif 1933122038Sjeff} 1934122038Sjeff 1935122038Sjeffvoid 1936122038Sjeffsched_unbind(struct thread *td) 1937122038Sjeff{ 1938122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1939122038Sjeff td->td_kse->ke_flags &= ~KEF_BOUND; 1940122038Sjeff} 1941122038Sjeff 1942109864Sjeffint 1943145256Sjkoshysched_is_bound(struct thread *td) 1944145256Sjkoshy{ 1945145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1946145256Sjkoshy return (td->td_kse->ke_flags & KEF_BOUND); 1947145256Sjkoshy} 1948145256Sjkoshy 1949145256Sjkoshyint 1950125289Sjeffsched_load(void) 1951125289Sjeff{ 1952125289Sjeff#ifdef SMP 1953125289Sjeff int total; 1954125289Sjeff int i; 1955125289Sjeff 1956125289Sjeff total = 0; 1957125289Sjeff for (i = 0; i <= ksg_maxid; i++) 1958125289Sjeff total += KSEQ_GROUP(i)->ksg_load; 1959125289Sjeff return (total); 1960125289Sjeff#else 1961125289Sjeff return (KSEQ_SELF()->ksq_sysload); 1962125289Sjeff#endif 1963125289Sjeff} 1964125289Sjeff 1965125289Sjeffint 1966109864Sjeffsched_sizeof_ksegrp(void) 1967109864Sjeff{ 1968109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1969109864Sjeff} 1970109864Sjeff 1971109864Sjeffint 1972109864Sjeffsched_sizeof_proc(void) 1973109864Sjeff{ 1974109864Sjeff return (sizeof(struct proc)); 1975109864Sjeff} 1976109864Sjeff 1977109864Sjeffint 1978109864Sjeffsched_sizeof_thread(void) 1979109864Sjeff{ 1980109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1981109864Sjeff} 1982134791Sjulian#define KERN_SWITCH_INCLUDE 1 1983134791Sjulian#include "kern/kern_switch.c" 1984