sched_ule.c revision 123231
1109864Sjeff/*- 2113357Sjeff * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 123231 2003-12-07 09:57:51Z peter $"); 29116182Sobrien 30109864Sjeff#include <sys/param.h> 31109864Sjeff#include <sys/systm.h> 32109864Sjeff#include <sys/kernel.h> 33109864Sjeff#include <sys/ktr.h> 34109864Sjeff#include <sys/lock.h> 35109864Sjeff#include <sys/mutex.h> 36109864Sjeff#include <sys/proc.h> 37112966Sjeff#include <sys/resource.h> 38122038Sjeff#include <sys/resourcevar.h> 39109864Sjeff#include <sys/sched.h> 40109864Sjeff#include <sys/smp.h> 41109864Sjeff#include <sys/sx.h> 42109864Sjeff#include <sys/sysctl.h> 43109864Sjeff#include <sys/sysproto.h> 44109864Sjeff#include <sys/vmmeter.h> 45109864Sjeff#ifdef DDB 46109864Sjeff#include <ddb/ddb.h> 47109864Sjeff#endif 48109864Sjeff#ifdef KTRACE 49109864Sjeff#include <sys/uio.h> 50109864Sjeff#include <sys/ktrace.h> 51109864Sjeff#endif 52109864Sjeff 53109864Sjeff#include <machine/cpu.h> 54121790Sjeff#include <machine/smp.h> 55109864Sjeff 56113357Sjeff#define KTR_ULE KTR_NFS 57113357Sjeff 58109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 59109864Sjeff/* XXX This is bogus compatability crap for ps */ 60109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 61109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 62109864Sjeff 63109864Sjeffstatic void sched_setup(void *dummy); 64109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 65109864Sjeff 66113357Sjeffstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 67113357Sjeff 68113357Sjeffstatic int sched_strict; 69113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 70113357Sjeff 71113357Sjeffstatic int slice_min = 1; 72113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 73113357Sjeff 74116365Sjeffstatic int slice_max = 10; 75113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 76113357Sjeff 77111857Sjeffint realstathz; 78113357Sjeffint tickincr = 1; 79111857Sjeff 80116069Sjeff#ifdef SMP 81116069Sjeff/* Callout to handle load balancing SMP systems. */ 82116069Sjeffstatic struct callout kseq_lb_callout; 83116069Sjeff#endif 84116069Sjeff 85109864Sjeff/* 86109864Sjeff * These datastructures are allocated within their parent datastructure but 87109864Sjeff * are scheduler specific. 88109864Sjeff */ 89109864Sjeff 90109864Sjeffstruct ke_sched { 91109864Sjeff int ske_slice; 92109864Sjeff struct runq *ske_runq; 93109864Sjeff /* The following variables are only used for pctcpu calculation */ 94109864Sjeff int ske_ltick; /* Last tick that we were running on */ 95109864Sjeff int ske_ftick; /* First tick that we were running on */ 96109864Sjeff int ske_ticks; /* Tick count */ 97113357Sjeff /* CPU that we have affinity for. */ 98110260Sjeff u_char ske_cpu; 99109864Sjeff}; 100109864Sjeff#define ke_slice ke_sched->ske_slice 101109864Sjeff#define ke_runq ke_sched->ske_runq 102109864Sjeff#define ke_ltick ke_sched->ske_ltick 103109864Sjeff#define ke_ftick ke_sched->ske_ftick 104109864Sjeff#define ke_ticks ke_sched->ske_ticks 105110260Sjeff#define ke_cpu ke_sched->ske_cpu 106121790Sjeff#define ke_assign ke_procq.tqe_next 107109864Sjeff 108121790Sjeff#define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 109122158Sjeff#define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ 110121790Sjeff 111109864Sjeffstruct kg_sched { 112110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 113110645Sjeff int skg_runtime; /* Number of ticks we were running */ 114109864Sjeff}; 115109864Sjeff#define kg_slptime kg_sched->skg_slptime 116110645Sjeff#define kg_runtime kg_sched->skg_runtime 117109864Sjeff 118109864Sjeffstruct td_sched { 119109864Sjeff int std_slptime; 120109864Sjeff}; 121109864Sjeff#define td_slptime td_sched->std_slptime 122109864Sjeff 123110267Sjeffstruct td_sched td_sched; 124109864Sjeffstruct ke_sched ke_sched; 125109864Sjeffstruct kg_sched kg_sched; 126109864Sjeff 127109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 128109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 129109864Sjeffstruct p_sched *proc0_sched = NULL; 130109864Sjeffstruct td_sched *thread0_sched = &td_sched; 131109864Sjeff 132109864Sjeff/* 133116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 134116642Sjeff * give lower(better) priorities to kse groups that use less CPU. The nice 135116642Sjeff * value is then directly added to this to allow nice to have some effect 136116642Sjeff * on latency. 137111857Sjeff * 138111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 139116642Sjeff * PRI_NRESV: Number of nice values. 140111857Sjeff * PRI_BASE: The start of the dynamic range. 141109864Sjeff */ 142111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 143121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 144121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 145116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 146113357Sjeff#define SCHED_PRI_INTERACT(score) \ 147116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 148109864Sjeff 149109864Sjeff/* 150111857Sjeff * These determine the interactivity of a process. 151109864Sjeff * 152110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 153110645Sjeff * before throttling back. 154121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 155116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 156111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 157109864Sjeff */ 158121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 159121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 160116365Sjeff#define SCHED_INTERACT_MAX (100) 161116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 162121126Sjeff#define SCHED_INTERACT_THRESH (30) 163111857Sjeff 164109864Sjeff/* 165109864Sjeff * These parameters and macros determine the size of the time slice that is 166109864Sjeff * granted to each thread. 167109864Sjeff * 168109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 169109864Sjeff * SLICE_MAX: Maximum time slice granted. 170109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 171112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 172112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 173121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 174109864Sjeff */ 175113357Sjeff#define SCHED_SLICE_MIN (slice_min) 176113357Sjeff#define SCHED_SLICE_MAX (slice_max) 177121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 178111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 179109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 180112966Sjeff#define SCHED_SLICE_NICE(nice) \ 181121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 182109864Sjeff 183109864Sjeff/* 184109864Sjeff * This macro determines whether or not the kse belongs on the current or 185109864Sjeff * next run queue. 186109864Sjeff */ 187113357Sjeff#define SCHED_INTERACTIVE(kg) \ 188113357Sjeff (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 189113417Sjeff#define SCHED_CURR(kg, ke) \ 190121107Sjeff (ke->ke_thread->td_priority != kg->kg_user_pri || \ 191121107Sjeff SCHED_INTERACTIVE(kg)) 192109864Sjeff 193109864Sjeff/* 194109864Sjeff * Cpu percentage computation macros and defines. 195109864Sjeff * 196109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 197109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 198109864Sjeff */ 199109864Sjeff 200112971Sjeff#define SCHED_CPU_TIME 10 201109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 202109864Sjeff 203109864Sjeff/* 204113357Sjeff * kseq - per processor runqs and statistics. 205109864Sjeff */ 206109864Sjeff 207113357Sjeff#define KSEQ_NCLASS (PRI_IDLE + 1) /* Number of run classes. */ 208113357Sjeff 209109864Sjeffstruct kseq { 210113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 211113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 212113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 213113357Sjeff struct runq *ksq_curr; /* Current queue. */ 214121896Sjeff int ksq_load_timeshare; /* Load for timeshare. */ 215113357Sjeff int ksq_load; /* Aggregate load. */ 216121869Sjeff short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 217113357Sjeff short ksq_nicemin; /* Least nice. */ 218110267Sjeff#ifdef SMP 219121896Sjeff int ksq_load_transferable; /* kses that may be migrated. */ 220121923Sjeff int ksq_idled; 221117237Sjeff int ksq_cpus; /* Count of CPUs in this kseq. */ 222122848Sjeff volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 223110267Sjeff#endif 224109864Sjeff}; 225109864Sjeff 226109864Sjeff/* 227109864Sjeff * One kse queue per processor. 228109864Sjeff */ 229110028Sjeff#ifdef SMP 230121790Sjeffstatic int kseq_idle; 231121790Sjeffstatic struct kseq kseq_cpu[MAXCPU]; 232121790Sjeffstatic struct kseq *kseq_idmap[MAXCPU]; 233117237Sjeff#define KSEQ_SELF() (kseq_idmap[PCPU_GET(cpuid)]) 234117237Sjeff#define KSEQ_CPU(x) (kseq_idmap[(x)]) 235110028Sjeff#else 236121790Sjeffstatic struct kseq kseq_cpu; 237110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 238110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 239110028Sjeff#endif 240109864Sjeff 241112966Sjeffstatic void sched_slice(struct kse *ke); 242113357Sjeffstatic void sched_priority(struct ksegrp *kg); 243111857Sjeffstatic int sched_interact_score(struct ksegrp *kg); 244116463Sjeffstatic void sched_interact_update(struct ksegrp *kg); 245121868Sjeffstatic void sched_interact_fork(struct ksegrp *kg); 246121790Sjeffstatic void sched_pctcpu_update(struct kse *ke); 247109864Sjeff 248110267Sjeff/* Operations on per processor queues */ 249121790Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 250110028Sjeffstatic void kseq_setup(struct kseq *kseq); 251122744Sjeffstatic void kseq_load_add(struct kseq *kseq, struct kse *ke); 252122744Sjeffstatic void kseq_load_rem(struct kseq *kseq, struct kse *ke); 253122744Sjeffstatic __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke); 254122744Sjeffstatic __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 255113357Sjeffstatic void kseq_nice_add(struct kseq *kseq, int nice); 256113357Sjeffstatic void kseq_nice_rem(struct kseq *kseq, int nice); 257113660Sjeffvoid kseq_print(int cpu); 258110267Sjeff#ifdef SMP 259121790Sjeffstatic struct kse *runq_steal(struct runq *rq); 260122744Sjeffstatic void sched_balance(void *arg); 261121790Sjeffstatic void kseq_move(struct kseq *from, int cpu); 262121923Sjeffstatic __inline void kseq_setidle(struct kseq *kseq); 263121790Sjeffstatic void kseq_notify(struct kse *ke, int cpu); 264121790Sjeffstatic void kseq_assign(struct kseq *); 265121790Sjeffstatic struct kse *kseq_steal(struct kseq *kseq); 266122038Sjeff#define KSE_CAN_MIGRATE(ke, class) \ 267122158Sjeff ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 && \ 268122165Sjeff ((ke)->ke_flags & KEF_BOUND) == 0) 269121790Sjeff#endif 270110028Sjeff 271113357Sjeffvoid 272113660Sjeffkseq_print(int cpu) 273110267Sjeff{ 274113660Sjeff struct kseq *kseq; 275113357Sjeff int i; 276112994Sjeff 277113660Sjeff kseq = KSEQ_CPU(cpu); 278112994Sjeff 279113357Sjeff printf("kseq:\n"); 280113357Sjeff printf("\tload: %d\n", kseq->ksq_load); 281122744Sjeff printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 282121896Sjeff#ifdef SMP 283121896Sjeff printf("\tload transferable: %d\n", kseq->ksq_load_transferable); 284121896Sjeff#endif 285113357Sjeff printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 286113357Sjeff printf("\tnice counts:\n"); 287121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 288113357Sjeff if (kseq->ksq_nice[i]) 289113357Sjeff printf("\t\t%d = %d\n", 290113357Sjeff i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 291113357Sjeff} 292112994Sjeff 293122744Sjeffstatic __inline void 294122744Sjeffkseq_runq_add(struct kseq *kseq, struct kse *ke) 295122744Sjeff{ 296122744Sjeff#ifdef SMP 297122744Sjeff if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 298122744Sjeff kseq->ksq_load_transferable++; 299122744Sjeff#endif 300122744Sjeff runq_add(ke->ke_runq, ke); 301122744Sjeff} 302122744Sjeff 303122744Sjeffstatic __inline void 304122744Sjeffkseq_runq_rem(struct kseq *kseq, struct kse *ke) 305122744Sjeff{ 306122744Sjeff#ifdef SMP 307122744Sjeff if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 308122744Sjeff kseq->ksq_load_transferable--; 309122744Sjeff#endif 310122744Sjeff runq_remove(ke->ke_runq, ke); 311122744Sjeff} 312122744Sjeff 313113357Sjeffstatic void 314122744Sjeffkseq_load_add(struct kseq *kseq, struct kse *ke) 315113357Sjeff{ 316121896Sjeff int class; 317115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 318121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 319121896Sjeff if (class == PRI_TIMESHARE) 320121896Sjeff kseq->ksq_load_timeshare++; 321113357Sjeff kseq->ksq_load++; 322113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 323122744Sjeff CTR6(KTR_ULE, 324122744Sjeff "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 325122744Sjeff ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 326122744Sjeff ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 327113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 328113357Sjeff kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 329110267Sjeff} 330113357Sjeff 331112994Sjeffstatic void 332122744Sjeffkseq_load_rem(struct kseq *kseq, struct kse *ke) 333110267Sjeff{ 334121896Sjeff int class; 335115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 336121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 337121896Sjeff if (class == PRI_TIMESHARE) 338121896Sjeff kseq->ksq_load_timeshare--; 339113357Sjeff kseq->ksq_load--; 340113357Sjeff ke->ke_runq = NULL; 341113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 342113357Sjeff kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 343110267Sjeff} 344110267Sjeff 345113357Sjeffstatic void 346113357Sjeffkseq_nice_add(struct kseq *kseq, int nice) 347110267Sjeff{ 348115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 349113357Sjeff /* Normalize to zero. */ 350113357Sjeff kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 351121896Sjeff if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 352113357Sjeff kseq->ksq_nicemin = nice; 353110267Sjeff} 354110267Sjeff 355113357Sjeffstatic void 356113357Sjeffkseq_nice_rem(struct kseq *kseq, int nice) 357110267Sjeff{ 358113357Sjeff int n; 359113357Sjeff 360115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 361113357Sjeff /* Normalize to zero. */ 362113357Sjeff n = nice + SCHED_PRI_NHALF; 363113357Sjeff kseq->ksq_nice[n]--; 364113357Sjeff KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 365113357Sjeff 366113357Sjeff /* 367113357Sjeff * If this wasn't the smallest nice value or there are more in 368113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 369113357Sjeff * the smallest nice. 370113357Sjeff */ 371113357Sjeff if (nice != kseq->ksq_nicemin || 372113357Sjeff kseq->ksq_nice[n] != 0 || 373121896Sjeff kseq->ksq_load_timeshare == 0) 374113357Sjeff return; 375113357Sjeff 376121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 377113357Sjeff if (kseq->ksq_nice[n]) { 378113357Sjeff kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 379113357Sjeff return; 380113357Sjeff } 381110267Sjeff} 382110267Sjeff 383113357Sjeff#ifdef SMP 384116069Sjeff/* 385122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 386116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 387116069Sjeff * by migrating some processes. 388116069Sjeff * 389116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 390116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 391116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 392116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 393116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 394116069Sjeff * 395116069Sjeff * It could be improved by considering the priorities and slices assigned to 396116069Sjeff * each task prior to balancing them. There are many pathological cases with 397116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 398116069Sjeff * 399116069Sjeff */ 400121790Sjeffstatic void 401122744Sjeffsched_balance(void *arg) 402116069Sjeff{ 403116069Sjeff struct kseq *kseq; 404116069Sjeff int high_load; 405116069Sjeff int low_load; 406116069Sjeff int high_cpu; 407116069Sjeff int low_cpu; 408116069Sjeff int move; 409116069Sjeff int diff; 410116069Sjeff int i; 411116069Sjeff 412116069Sjeff high_cpu = 0; 413116069Sjeff low_cpu = 0; 414116069Sjeff high_load = 0; 415116069Sjeff low_load = -1; 416116069Sjeff 417116069Sjeff mtx_lock_spin(&sched_lock); 418116962Sjeff if (smp_started == 0) 419116962Sjeff goto out; 420116962Sjeff 421123126Sjhb for (i = 0; i <= mp_maxid; i++) { 422116970Sjeff if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 423116069Sjeff continue; 424116069Sjeff kseq = KSEQ_CPU(i); 425122744Sjeff if (kseq->ksq_load_transferable > high_load) { 426122744Sjeff high_load = kseq->ksq_load_transferable; 427116069Sjeff high_cpu = i; 428116069Sjeff } 429116069Sjeff if (low_load == -1 || kseq->ksq_load < low_load) { 430116069Sjeff low_load = kseq->ksq_load; 431116069Sjeff low_cpu = i; 432116069Sjeff } 433116069Sjeff } 434117237Sjeff kseq = KSEQ_CPU(high_cpu); 435116069Sjeff /* 436116069Sjeff * Nothing to do. 437116069Sjeff */ 438122744Sjeff if (high_load == 0 || low_load >= kseq->ksq_load) 439116069Sjeff goto out; 440122744Sjeff /* 441122744Sjeff * Determine what the imbalance is and then adjust that to how many 442122744Sjeff * kses we actually have to give up (load_transferable). 443122744Sjeff */ 444122744Sjeff diff = kseq->ksq_load - low_load; 445116069Sjeff move = diff / 2; 446116069Sjeff if (diff & 0x1) 447116069Sjeff move++; 448122744Sjeff move = min(move, high_load); 449116069Sjeff for (i = 0; i < move; i++) 450117237Sjeff kseq_move(kseq, low_cpu); 451116069Sjeffout: 452116069Sjeff mtx_unlock_spin(&sched_lock); 453122744Sjeff callout_reset(&kseq_lb_callout, hz, sched_balance, NULL); 454116069Sjeff 455116069Sjeff return; 456116069Sjeff} 457116069Sjeff 458121790Sjeffstatic void 459116069Sjeffkseq_move(struct kseq *from, int cpu) 460116069Sjeff{ 461116069Sjeff struct kse *ke; 462116069Sjeff 463121790Sjeff ke = kseq_steal(from); 464116069Sjeff ke->ke_state = KES_THREAD; 465122744Sjeff kseq_runq_rem(from, ke); 466122744Sjeff kseq_load_rem(from, ke); 467116069Sjeff 468116069Sjeff ke->ke_cpu = cpu; 469121923Sjeff kseq_notify(ke, cpu); 470116069Sjeff} 471110267Sjeff 472121923Sjeffstatic __inline void 473121923Sjeffkseq_setidle(struct kseq *kseq) 474121790Sjeff{ 475121923Sjeff if (kseq->ksq_idled) 476121923Sjeff return; 477121923Sjeff kseq->ksq_idled = 1; 478121923Sjeff atomic_set_int(&kseq_idle, PCPU_GET(cpumask)); 479121923Sjeff return; 480121790Sjeff} 481121790Sjeff 482121790Sjeffstatic void 483121790Sjeffkseq_assign(struct kseq *kseq) 484121790Sjeff{ 485121790Sjeff struct kse *nke; 486121790Sjeff struct kse *ke; 487121790Sjeff 488121790Sjeff do { 489122848Sjeff (volatile struct kse *)ke = kseq->ksq_assigned; 490121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 491121790Sjeff for (; ke != NULL; ke = nke) { 492121790Sjeff nke = ke->ke_assign; 493121790Sjeff ke->ke_flags &= ~KEF_ASSIGNED; 494121790Sjeff sched_add(ke->ke_thread); 495121790Sjeff } 496121790Sjeff} 497121790Sjeff 498121790Sjeffstatic void 499121790Sjeffkseq_notify(struct kse *ke, int cpu) 500121790Sjeff{ 501121790Sjeff struct kseq *kseq; 502121790Sjeff struct thread *td; 503121790Sjeff struct pcpu *pcpu; 504121790Sjeff 505121790Sjeff ke->ke_flags |= KEF_ASSIGNED; 506121790Sjeff 507121790Sjeff kseq = KSEQ_CPU(cpu); 508121790Sjeff 509121790Sjeff /* 510121790Sjeff * Place a KSE on another cpu's queue and force a resched. 511121790Sjeff */ 512121790Sjeff do { 513122848Sjeff (volatile struct kse *)ke->ke_assign = kseq->ksq_assigned; 514121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 515121790Sjeff pcpu = pcpu_find(cpu); 516121790Sjeff td = pcpu->pc_curthread; 517121790Sjeff if (ke->ke_thread->td_priority < td->td_priority || 518121790Sjeff td == pcpu->pc_idlethread) { 519121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 520121790Sjeff ipi_selected(1 << cpu, IPI_AST); 521121790Sjeff } 522121790Sjeff} 523121790Sjeff 524121790Sjeffstatic struct kse * 525121790Sjeffrunq_steal(struct runq *rq) 526121790Sjeff{ 527121790Sjeff struct rqhead *rqh; 528121790Sjeff struct rqbits *rqb; 529121790Sjeff struct kse *ke; 530121790Sjeff int word; 531121790Sjeff int bit; 532121790Sjeff 533121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 534121790Sjeff rqb = &rq->rq_status; 535121790Sjeff for (word = 0; word < RQB_LEN; word++) { 536121790Sjeff if (rqb->rqb_bits[word] == 0) 537121790Sjeff continue; 538121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 539123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 540121790Sjeff continue; 541121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 542121790Sjeff TAILQ_FOREACH(ke, rqh, ke_procq) { 543121896Sjeff if (KSE_CAN_MIGRATE(ke, 544121896Sjeff PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 545121790Sjeff return (ke); 546121790Sjeff } 547121790Sjeff } 548121790Sjeff } 549121790Sjeff return (NULL); 550121790Sjeff} 551121790Sjeff 552121790Sjeffstatic struct kse * 553121790Sjeffkseq_steal(struct kseq *kseq) 554121790Sjeff{ 555121790Sjeff struct kse *ke; 556121790Sjeff 557121790Sjeff if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 558121790Sjeff return (ke); 559121790Sjeff if ((ke = runq_steal(kseq->ksq_next)) != NULL) 560121790Sjeff return (ke); 561121790Sjeff return (runq_steal(&kseq->ksq_idle)); 562121790Sjeff} 563121790Sjeff#endif /* SMP */ 564121790Sjeff 565117326Sjeff/* 566121790Sjeff * Pick the highest priority task we have and return it. 567117326Sjeff */ 568117326Sjeff 569121790Sjeffstatic struct kse * 570121790Sjeffkseq_choose(struct kseq *kseq) 571110267Sjeff{ 572110267Sjeff struct kse *ke; 573110267Sjeff struct runq *swap; 574110267Sjeff 575115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 576113357Sjeff swap = NULL; 577112994Sjeff 578113357Sjeff for (;;) { 579113357Sjeff ke = runq_choose(kseq->ksq_curr); 580113357Sjeff if (ke == NULL) { 581113357Sjeff /* 582113357Sjeff * We already swaped once and didn't get anywhere. 583113357Sjeff */ 584113357Sjeff if (swap) 585113357Sjeff break; 586113357Sjeff swap = kseq->ksq_curr; 587113357Sjeff kseq->ksq_curr = kseq->ksq_next; 588113357Sjeff kseq->ksq_next = swap; 589113357Sjeff continue; 590113357Sjeff } 591113357Sjeff /* 592113357Sjeff * If we encounter a slice of 0 the kse is in a 593113357Sjeff * TIMESHARE kse group and its nice was too far out 594113357Sjeff * of the range that receives slices. 595113357Sjeff */ 596121790Sjeff if (ke->ke_slice == 0) { 597113357Sjeff runq_remove(ke->ke_runq, ke); 598113357Sjeff sched_slice(ke); 599113357Sjeff ke->ke_runq = kseq->ksq_next; 600113357Sjeff runq_add(ke->ke_runq, ke); 601113357Sjeff continue; 602113357Sjeff } 603113357Sjeff return (ke); 604110267Sjeff } 605110267Sjeff 606113357Sjeff return (runq_choose(&kseq->ksq_idle)); 607110267Sjeff} 608110267Sjeff 609109864Sjeffstatic void 610110028Sjeffkseq_setup(struct kseq *kseq) 611110028Sjeff{ 612113357Sjeff runq_init(&kseq->ksq_timeshare[0]); 613113357Sjeff runq_init(&kseq->ksq_timeshare[1]); 614112994Sjeff runq_init(&kseq->ksq_idle); 615113357Sjeff kseq->ksq_curr = &kseq->ksq_timeshare[0]; 616113357Sjeff kseq->ksq_next = &kseq->ksq_timeshare[1]; 617113660Sjeff kseq->ksq_load = 0; 618121896Sjeff kseq->ksq_load_timeshare = 0; 619110267Sjeff#ifdef SMP 620121896Sjeff kseq->ksq_load_transferable = 0; 621121923Sjeff kseq->ksq_idled = 0; 622121790Sjeff kseq->ksq_assigned = NULL; 623110267Sjeff#endif 624110028Sjeff} 625110028Sjeff 626110028Sjeffstatic void 627109864Sjeffsched_setup(void *dummy) 628109864Sjeff{ 629117313Sjeff#ifdef SMP 630109864Sjeff int i; 631117313Sjeff#endif 632109864Sjeff 633116946Sjeff slice_min = (hz/100); /* 10ms */ 634116946Sjeff slice_max = (hz/7); /* ~140ms */ 635111857Sjeff 636117237Sjeff#ifdef SMP 637109864Sjeff /* init kseqs */ 638117237Sjeff /* Create the idmap. */ 639117237Sjeff#ifdef ULE_HTT_EXPERIMENTAL 640117237Sjeff if (smp_topology == NULL) { 641117237Sjeff#else 642117237Sjeff if (1) { 643117237Sjeff#endif 644117237Sjeff for (i = 0; i < MAXCPU; i++) { 645117237Sjeff kseq_setup(&kseq_cpu[i]); 646117237Sjeff kseq_idmap[i] = &kseq_cpu[i]; 647117237Sjeff kseq_cpu[i].ksq_cpus = 1; 648117237Sjeff } 649117237Sjeff } else { 650117237Sjeff int j; 651113357Sjeff 652117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 653117237Sjeff struct cpu_group *cg; 654117237Sjeff 655117237Sjeff cg = &smp_topology->ct_group[i]; 656117237Sjeff kseq_setup(&kseq_cpu[i]); 657117237Sjeff 658117237Sjeff for (j = 0; j < MAXCPU; j++) 659117237Sjeff if ((cg->cg_mask & (1 << j)) != 0) 660117237Sjeff kseq_idmap[j] = &kseq_cpu[i]; 661117237Sjeff kseq_cpu[i].ksq_cpus = cg->cg_count; 662117237Sjeff } 663117237Sjeff } 664119137Ssam callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); 665122744Sjeff sched_balance(NULL); 666117237Sjeff#else 667117237Sjeff kseq_setup(KSEQ_SELF()); 668116069Sjeff#endif 669117237Sjeff mtx_lock_spin(&sched_lock); 670122744Sjeff kseq_load_add(KSEQ_SELF(), &kse0); 671117237Sjeff mtx_unlock_spin(&sched_lock); 672109864Sjeff} 673109864Sjeff 674109864Sjeff/* 675109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 676109864Sjeff * process. 677109864Sjeff */ 678113357Sjeffstatic void 679109864Sjeffsched_priority(struct ksegrp *kg) 680109864Sjeff{ 681109864Sjeff int pri; 682109864Sjeff 683109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 684113357Sjeff return; 685109864Sjeff 686113357Sjeff pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 687111857Sjeff pri += SCHED_PRI_BASE; 688109864Sjeff pri += kg->kg_nice; 689109864Sjeff 690109864Sjeff if (pri > PRI_MAX_TIMESHARE) 691109864Sjeff pri = PRI_MAX_TIMESHARE; 692109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 693109864Sjeff pri = PRI_MIN_TIMESHARE; 694109864Sjeff 695109864Sjeff kg->kg_user_pri = pri; 696109864Sjeff 697113357Sjeff return; 698109864Sjeff} 699109864Sjeff 700109864Sjeff/* 701112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 702112994Sjeff * that we're on. This is only for PRI_TIMESHARE ksegrps. 703109864Sjeff */ 704112966Sjeffstatic void 705112966Sjeffsched_slice(struct kse *ke) 706109864Sjeff{ 707113357Sjeff struct kseq *kseq; 708112966Sjeff struct ksegrp *kg; 709109864Sjeff 710112966Sjeff kg = ke->ke_ksegrp; 711113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 712109864Sjeff 713112966Sjeff /* 714112966Sjeff * Rationale: 715112966Sjeff * KSEs in interactive ksegs get the minimum slice so that we 716112966Sjeff * quickly notice if it abuses its advantage. 717112966Sjeff * 718112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 719112966Sjeff * based on the ksegs nice value relative to the least nice kseg 720112966Sjeff * on the run queue for this cpu. 721112966Sjeff * 722112966Sjeff * If the KSE is less nice than all others it gets the maximum 723112966Sjeff * slice and other KSEs will adjust their slice relative to 724112966Sjeff * this when they first expire. 725112966Sjeff * 726112966Sjeff * There is 20 point window that starts relative to the least 727112966Sjeff * nice kse on the run queue. Slice size is determined by 728112966Sjeff * the kse distance from the last nice ksegrp. 729112966Sjeff * 730121871Sjeff * If the kse is outside of the window it will get no slice 731121871Sjeff * and will be reevaluated each time it is selected on the 732121871Sjeff * run queue. The exception to this is nice 0 ksegs when 733121871Sjeff * a nice -20 is running. They are always granted a minimum 734121871Sjeff * slice. 735112966Sjeff */ 736113357Sjeff if (!SCHED_INTERACTIVE(kg)) { 737112966Sjeff int nice; 738112966Sjeff 739113357Sjeff nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 740121896Sjeff if (kseq->ksq_load_timeshare == 0 || 741113357Sjeff kg->kg_nice < kseq->ksq_nicemin) 742112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 743121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 744112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 745121871Sjeff else if (kg->kg_nice == 0) 746121871Sjeff ke->ke_slice = SCHED_SLICE_MIN; 747112966Sjeff else 748112966Sjeff ke->ke_slice = 0; 749112966Sjeff } else 750112966Sjeff ke->ke_slice = SCHED_SLICE_MIN; 751112966Sjeff 752113357Sjeff CTR6(KTR_ULE, 753113357Sjeff "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 754113357Sjeff ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 755121896Sjeff kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 756113357Sjeff 757112966Sjeff return; 758109864Sjeff} 759109864Sjeff 760121868Sjeff/* 761121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 762121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 763121868Sjeff * This routine will not operate correctly when slp or run times have been 764121868Sjeff * adjusted to more than double their maximum. 765121868Sjeff */ 766116463Sjeffstatic void 767116463Sjeffsched_interact_update(struct ksegrp *kg) 768116463Sjeff{ 769121868Sjeff int sum; 770121605Sjeff 771121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 772121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 773121868Sjeff return; 774121868Sjeff /* 775121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 776121868Sjeff * will not bring us back into range. Dividing by two here forces 777121868Sjeff * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 778121868Sjeff */ 779121868Sjeff if (sum > (SCHED_INTERACT_MAX / 5) * 6) { 780121868Sjeff kg->kg_runtime /= 2; 781121868Sjeff kg->kg_slptime /= 2; 782121868Sjeff return; 783116463Sjeff } 784121868Sjeff kg->kg_runtime = (kg->kg_runtime / 5) * 4; 785121868Sjeff kg->kg_slptime = (kg->kg_slptime / 5) * 4; 786116463Sjeff} 787116463Sjeff 788121868Sjeffstatic void 789121868Sjeffsched_interact_fork(struct ksegrp *kg) 790121868Sjeff{ 791121868Sjeff int ratio; 792121868Sjeff int sum; 793121868Sjeff 794121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 795121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 796121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 797121868Sjeff kg->kg_runtime /= ratio; 798121868Sjeff kg->kg_slptime /= ratio; 799121868Sjeff } 800121868Sjeff} 801121868Sjeff 802111857Sjeffstatic int 803111857Sjeffsched_interact_score(struct ksegrp *kg) 804111857Sjeff{ 805116365Sjeff int div; 806111857Sjeff 807111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 808116365Sjeff div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 809116365Sjeff return (SCHED_INTERACT_HALF + 810116365Sjeff (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 811116365Sjeff } if (kg->kg_slptime > kg->kg_runtime) { 812116365Sjeff div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 813116365Sjeff return (kg->kg_runtime / div); 814111857Sjeff } 815111857Sjeff 816116365Sjeff /* 817116365Sjeff * This can happen if slptime and runtime are 0. 818116365Sjeff */ 819116365Sjeff return (0); 820111857Sjeff 821111857Sjeff} 822111857Sjeff 823113357Sjeff/* 824113357Sjeff * This is only somewhat accurate since given many processes of the same 825113357Sjeff * priority they will switch when their slices run out, which will be 826113357Sjeff * at most SCHED_SLICE_MAX. 827113357Sjeff */ 828109864Sjeffint 829109864Sjeffsched_rr_interval(void) 830109864Sjeff{ 831109864Sjeff return (SCHED_SLICE_MAX); 832109864Sjeff} 833109864Sjeff 834121790Sjeffstatic void 835109864Sjeffsched_pctcpu_update(struct kse *ke) 836109864Sjeff{ 837109864Sjeff /* 838109864Sjeff * Adjust counters and watermark for pctcpu calc. 839116365Sjeff */ 840120272Sjeff if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 841120272Sjeff /* 842120272Sjeff * Shift the tick count out so that the divide doesn't 843120272Sjeff * round away our results. 844120272Sjeff */ 845120272Sjeff ke->ke_ticks <<= 10; 846120272Sjeff ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 847120272Sjeff SCHED_CPU_TICKS; 848120272Sjeff ke->ke_ticks >>= 10; 849120272Sjeff } else 850120272Sjeff ke->ke_ticks = 0; 851109864Sjeff ke->ke_ltick = ticks; 852109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 853109864Sjeff} 854109864Sjeff 855109864Sjeffvoid 856109864Sjeffsched_prio(struct thread *td, u_char prio) 857109864Sjeff{ 858121605Sjeff struct kse *ke; 859109864Sjeff 860121605Sjeff ke = td->td_kse; 861109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 862109864Sjeff if (TD_ON_RUNQ(td)) { 863121605Sjeff /* 864121605Sjeff * If the priority has been elevated due to priority 865121605Sjeff * propagation, we may have to move ourselves to a new 866121605Sjeff * queue. We still call adjustrunqueue below in case kse 867121605Sjeff * needs to fix things up. 868121605Sjeff */ 869121872Sjeff if (prio < td->td_priority && ke && 870121872Sjeff (ke->ke_flags & KEF_ASSIGNED) == 0 && 871121790Sjeff ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 872121605Sjeff runq_remove(ke->ke_runq, ke); 873121605Sjeff ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 874121605Sjeff runq_add(ke->ke_runq, ke); 875121605Sjeff } 876119488Sdavidxu adjustrunqueue(td, prio); 877121605Sjeff } else 878119488Sdavidxu td->td_priority = prio; 879109864Sjeff} 880109864Sjeff 881109864Sjeffvoid 882121128Sjeffsched_switch(struct thread *td) 883109864Sjeff{ 884121128Sjeff struct thread *newtd; 885109864Sjeff struct kse *ke; 886109864Sjeff 887109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 888109864Sjeff 889109864Sjeff ke = td->td_kse; 890109864Sjeff 891109864Sjeff td->td_last_kse = ke; 892113339Sjulian td->td_lastcpu = td->td_oncpu; 893113339Sjulian td->td_oncpu = NOCPU; 894111032Sjulian td->td_flags &= ~TDF_NEEDRESCHED; 895109864Sjeff 896109864Sjeff if (TD_IS_RUNNING(td)) { 897119488Sdavidxu if (td->td_proc->p_flag & P_SA) { 898122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 899119488Sdavidxu setrunqueue(td); 900119488Sdavidxu } else { 901119488Sdavidxu /* 902121605Sjeff * This queue is always correct except for idle threads 903121605Sjeff * which have a higher priority due to priority 904121605Sjeff * propagation. 905119488Sdavidxu */ 906121605Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) { 907121605Sjeff if (td->td_priority < PRI_MIN_IDLE) 908121605Sjeff ke->ke_runq = KSEQ_SELF()->ksq_curr; 909121605Sjeff else 910121605Sjeff ke->ke_runq = &KSEQ_SELF()->ksq_idle; 911121605Sjeff } 912122744Sjeff kseq_runq_add(KSEQ_SELF(), ke); 913119488Sdavidxu } 914121146Sjeff } else { 915121146Sjeff if (ke->ke_runq) 916122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 917121146Sjeff /* 918121146Sjeff * We will not be on the run queue. So we must be 919121146Sjeff * sleeping or similar. 920121146Sjeff */ 921121146Sjeff if (td->td_proc->p_flag & P_SA) 922121146Sjeff kse_reassign(ke); 923121146Sjeff } 924121128Sjeff newtd = choosethread(); 925121128Sjeff if (td != newtd) 926121128Sjeff cpu_switch(td, newtd); 927121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 928109864Sjeff 929113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 930109864Sjeff} 931109864Sjeff 932109864Sjeffvoid 933109864Sjeffsched_nice(struct ksegrp *kg, int nice) 934109864Sjeff{ 935113357Sjeff struct kse *ke; 936109864Sjeff struct thread *td; 937113357Sjeff struct kseq *kseq; 938109864Sjeff 939113873Sjhb PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 940113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 941113357Sjeff /* 942113357Sjeff * We need to adjust the nice counts for running KSEs. 943113357Sjeff */ 944113357Sjeff if (kg->kg_pri_class == PRI_TIMESHARE) 945113357Sjeff FOREACH_KSE_IN_GROUP(kg, ke) { 946116500Sjeff if (ke->ke_runq == NULL) 947113357Sjeff continue; 948113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 949113357Sjeff kseq_nice_rem(kseq, kg->kg_nice); 950113357Sjeff kseq_nice_add(kseq, nice); 951113357Sjeff } 952109864Sjeff kg->kg_nice = nice; 953109864Sjeff sched_priority(kg); 954113357Sjeff FOREACH_THREAD_IN_GROUP(kg, td) 955111032Sjulian td->td_flags |= TDF_NEEDRESCHED; 956109864Sjeff} 957109864Sjeff 958109864Sjeffvoid 959109864Sjeffsched_sleep(struct thread *td, u_char prio) 960109864Sjeff{ 961109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 962109864Sjeff 963109864Sjeff td->td_slptime = ticks; 964109864Sjeff td->td_priority = prio; 965109864Sjeff 966113357Sjeff CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 967113357Sjeff td->td_kse, td->td_slptime); 968109864Sjeff} 969109864Sjeff 970109864Sjeffvoid 971109864Sjeffsched_wakeup(struct thread *td) 972109864Sjeff{ 973109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 974109864Sjeff 975109864Sjeff /* 976109864Sjeff * Let the kseg know how long we slept for. This is because process 977109864Sjeff * interactivity behavior is modeled in the kseg. 978109864Sjeff */ 979111788Sjeff if (td->td_slptime) { 980111788Sjeff struct ksegrp *kg; 981113357Sjeff int hzticks; 982109864Sjeff 983111788Sjeff kg = td->td_ksegrp; 984121868Sjeff hzticks = (ticks - td->td_slptime) << 10; 985121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 986121868Sjeff kg->kg_slptime = SCHED_SLP_RUN_MAX; 987121868Sjeff kg->kg_runtime = 1; 988121868Sjeff } else { 989121868Sjeff kg->kg_slptime += hzticks; 990121868Sjeff sched_interact_update(kg); 991121868Sjeff } 992111788Sjeff sched_priority(kg); 993116463Sjeff if (td->td_kse) 994116463Sjeff sched_slice(td->td_kse); 995113357Sjeff CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 996113357Sjeff td->td_kse, hzticks); 997111788Sjeff td->td_slptime = 0; 998109864Sjeff } 999109864Sjeff setrunqueue(td); 1000109864Sjeff} 1001109864Sjeff 1002109864Sjeff/* 1003109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1004109864Sjeff * priority. 1005109864Sjeff */ 1006109864Sjeffvoid 1007113357Sjeffsched_fork(struct proc *p, struct proc *p1) 1008109864Sjeff{ 1009109864Sjeff 1010109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1011109864Sjeff 1012113357Sjeff sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 1013113357Sjeff sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 1014113357Sjeff sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 1015113357Sjeff} 1016113357Sjeff 1017113357Sjeffvoid 1018113357Sjeffsched_fork_kse(struct kse *ke, struct kse *child) 1019113357Sjeff{ 1020113923Sjhb 1021116365Sjeff child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1022122847Sjeff child->ke_cpu = ke->ke_cpu; 1023113357Sjeff child->ke_runq = NULL; 1024113357Sjeff 1025121051Sjeff /* Grab our parents cpu estimation information. */ 1026121051Sjeff child->ke_ticks = ke->ke_ticks; 1027121051Sjeff child->ke_ltick = ke->ke_ltick; 1028121051Sjeff child->ke_ftick = ke->ke_ftick; 1029113357Sjeff} 1030113357Sjeff 1031113357Sjeffvoid 1032113357Sjeffsched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1033113357Sjeff{ 1034113923Sjhb PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1035116365Sjeff 1036121868Sjeff child->kg_slptime = kg->kg_slptime; 1037121868Sjeff child->kg_runtime = kg->kg_runtime; 1038121868Sjeff child->kg_user_pri = kg->kg_user_pri; 1039121868Sjeff child->kg_nice = kg->kg_nice; 1040121868Sjeff sched_interact_fork(child); 1041116463Sjeff kg->kg_runtime += tickincr << 10; 1042116463Sjeff sched_interact_update(kg); 1043113357Sjeff 1044121868Sjeff CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1045121868Sjeff kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1046121868Sjeff child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1047113357Sjeff} 1048109864Sjeff 1049113357Sjeffvoid 1050113357Sjeffsched_fork_thread(struct thread *td, struct thread *child) 1051113357Sjeff{ 1052113357Sjeff} 1053113357Sjeff 1054113357Sjeffvoid 1055113357Sjeffsched_class(struct ksegrp *kg, int class) 1056113357Sjeff{ 1057113357Sjeff struct kseq *kseq; 1058113357Sjeff struct kse *ke; 1059121896Sjeff int nclass; 1060121896Sjeff int oclass; 1061113357Sjeff 1062113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1063113357Sjeff if (kg->kg_pri_class == class) 1064113357Sjeff return; 1065113357Sjeff 1066121896Sjeff nclass = PRI_BASE(class); 1067121896Sjeff oclass = PRI_BASE(kg->kg_pri_class); 1068113357Sjeff FOREACH_KSE_IN_GROUP(kg, ke) { 1069113357Sjeff if (ke->ke_state != KES_ONRUNQ && 1070113357Sjeff ke->ke_state != KES_THREAD) 1071113357Sjeff continue; 1072113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1073113357Sjeff 1074121896Sjeff#ifdef SMP 1075122744Sjeff /* 1076122744Sjeff * On SMP if we're on the RUNQ we must adjust the transferable 1077122744Sjeff * count because could be changing to or from an interrupt 1078122744Sjeff * class. 1079122744Sjeff */ 1080122744Sjeff if (ke->ke_state == KES_ONRUNQ) { 1081122744Sjeff if (KSE_CAN_MIGRATE(ke, oclass)) 1082122744Sjeff kseq->ksq_load_transferable--; 1083122744Sjeff if (KSE_CAN_MIGRATE(ke, nclass)) 1084122744Sjeff kseq->ksq_load_transferable++; 1085122744Sjeff } 1086121896Sjeff#endif 1087122744Sjeff if (oclass == PRI_TIMESHARE) { 1088121896Sjeff kseq->ksq_load_timeshare--; 1089122744Sjeff kseq_nice_rem(kseq, kg->kg_nice); 1090122744Sjeff } 1091122744Sjeff if (nclass == PRI_TIMESHARE) { 1092121896Sjeff kseq->ksq_load_timeshare++; 1093113357Sjeff kseq_nice_add(kseq, kg->kg_nice); 1094122744Sjeff } 1095109970Sjeff } 1096109970Sjeff 1097113357Sjeff kg->kg_pri_class = class; 1098109864Sjeff} 1099109864Sjeff 1100109864Sjeff/* 1101109864Sjeff * Return some of the child's priority and interactivity to the parent. 1102109864Sjeff */ 1103109864Sjeffvoid 1104113357Sjeffsched_exit(struct proc *p, struct proc *child) 1105109864Sjeff{ 1106109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1107113372Sjeff sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1108116365Sjeff sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1109109864Sjeff} 1110109864Sjeff 1111109864Sjeffvoid 1112113372Sjeffsched_exit_kse(struct kse *ke, struct kse *child) 1113113372Sjeff{ 1114122744Sjeff kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); 1115113372Sjeff} 1116113372Sjeff 1117113372Sjeffvoid 1118113372Sjeffsched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1119113372Sjeff{ 1120116463Sjeff /* kg->kg_slptime += child->kg_slptime; */ 1121116365Sjeff kg->kg_runtime += child->kg_runtime; 1122116463Sjeff sched_interact_update(kg); 1123113372Sjeff} 1124113372Sjeff 1125113372Sjeffvoid 1126113372Sjeffsched_exit_thread(struct thread *td, struct thread *child) 1127113372Sjeff{ 1128113372Sjeff} 1129113372Sjeff 1130113372Sjeffvoid 1131121127Sjeffsched_clock(struct thread *td) 1132109864Sjeff{ 1133113357Sjeff struct kseq *kseq; 1134113357Sjeff struct ksegrp *kg; 1135121127Sjeff struct kse *ke; 1136109864Sjeff 1137113357Sjeff /* 1138113357Sjeff * sched_setup() apparently happens prior to stathz being set. We 1139113357Sjeff * need to resolve the timers earlier in the boot so we can avoid 1140113357Sjeff * calculating this here. 1141113357Sjeff */ 1142113357Sjeff if (realstathz == 0) { 1143113357Sjeff realstathz = stathz ? stathz : hz; 1144113357Sjeff tickincr = hz / realstathz; 1145113357Sjeff /* 1146113357Sjeff * XXX This does not work for values of stathz that are much 1147113357Sjeff * larger than hz. 1148113357Sjeff */ 1149113357Sjeff if (tickincr == 0) 1150113357Sjeff tickincr = 1; 1151113357Sjeff } 1152109864Sjeff 1153121127Sjeff ke = td->td_kse; 1154113357Sjeff kg = ke->ke_ksegrp; 1155109864Sjeff 1156110028Sjeff mtx_assert(&sched_lock, MA_OWNED); 1157110028Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 1158110028Sjeff 1159110028Sjeff /* Adjust ticks for pctcpu */ 1160111793Sjeff ke->ke_ticks++; 1161109971Sjeff ke->ke_ltick = ticks; 1162112994Sjeff 1163109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1164109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1165109971Sjeff sched_pctcpu_update(ke); 1166109971Sjeff 1167114496Sjulian if (td->td_flags & TDF_IDLETD) 1168109864Sjeff return; 1169110028Sjeff 1170113357Sjeff CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 1171113357Sjeff ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 1172110028Sjeff /* 1173113357Sjeff * We only do slicing code for TIMESHARE ksegrps. 1174113357Sjeff */ 1175113357Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1176113357Sjeff return; 1177113357Sjeff /* 1178110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 1179113357Sjeff * interactivity. 1180109864Sjeff */ 1181113357Sjeff kg->kg_runtime += tickincr << 10; 1182116463Sjeff sched_interact_update(kg); 1183110645Sjeff 1184109864Sjeff /* 1185109864Sjeff * We used up one time slice. 1186109864Sjeff */ 1187122847Sjeff if (--ke->ke_slice > 0) 1188113357Sjeff return; 1189109864Sjeff /* 1190113357Sjeff * We're out of time, recompute priorities and requeue. 1191109864Sjeff */ 1192122847Sjeff kseq = KSEQ_SELF(); 1193122744Sjeff kseq_load_rem(kseq, ke); 1194113357Sjeff sched_priority(kg); 1195113357Sjeff sched_slice(ke); 1196113357Sjeff if (SCHED_CURR(kg, ke)) 1197113357Sjeff ke->ke_runq = kseq->ksq_curr; 1198113357Sjeff else 1199113357Sjeff ke->ke_runq = kseq->ksq_next; 1200122744Sjeff kseq_load_add(kseq, ke); 1201113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1202109864Sjeff} 1203109864Sjeff 1204109864Sjeffint 1205109864Sjeffsched_runnable(void) 1206109864Sjeff{ 1207109864Sjeff struct kseq *kseq; 1208115998Sjeff int load; 1209109864Sjeff 1210115998Sjeff load = 1; 1211115998Sjeff 1212110028Sjeff kseq = KSEQ_SELF(); 1213121790Sjeff#ifdef SMP 1214122094Sjeff if (kseq->ksq_assigned) { 1215122094Sjeff mtx_lock_spin(&sched_lock); 1216121790Sjeff kseq_assign(kseq); 1217122094Sjeff mtx_unlock_spin(&sched_lock); 1218122094Sjeff } 1219121790Sjeff#endif 1220121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1221121605Sjeff if (kseq->ksq_load > 0) 1222121605Sjeff goto out; 1223121605Sjeff } else 1224121605Sjeff if (kseq->ksq_load - 1 > 0) 1225121605Sjeff goto out; 1226115998Sjeff load = 0; 1227115998Sjeffout: 1228115998Sjeff return (load); 1229109864Sjeff} 1230109864Sjeff 1231109864Sjeffvoid 1232109864Sjeffsched_userret(struct thread *td) 1233109864Sjeff{ 1234109864Sjeff struct ksegrp *kg; 1235121605Sjeff 1236121605Sjeff kg = td->td_ksegrp; 1237109864Sjeff 1238109864Sjeff if (td->td_priority != kg->kg_user_pri) { 1239109864Sjeff mtx_lock_spin(&sched_lock); 1240109864Sjeff td->td_priority = kg->kg_user_pri; 1241109864Sjeff mtx_unlock_spin(&sched_lock); 1242109864Sjeff } 1243109864Sjeff} 1244109864Sjeff 1245109864Sjeffstruct kse * 1246109970Sjeffsched_choose(void) 1247109970Sjeff{ 1248110028Sjeff struct kseq *kseq; 1249109970Sjeff struct kse *ke; 1250109970Sjeff 1251115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1252121790Sjeff kseq = KSEQ_SELF(); 1253113357Sjeff#ifdef SMP 1254121790Sjeff if (kseq->ksq_assigned) 1255121790Sjeff kseq_assign(kseq); 1256113357Sjeff#endif 1257121790Sjeff ke = kseq_choose(kseq); 1258109864Sjeff if (ke) { 1259121790Sjeff#ifdef SMP 1260121790Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1261121923Sjeff kseq_setidle(kseq); 1262121790Sjeff#endif 1263122744Sjeff kseq_runq_rem(kseq, ke); 1264109864Sjeff ke->ke_state = KES_THREAD; 1265112966Sjeff 1266113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 1267113357Sjeff CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 1268113357Sjeff ke, ke->ke_runq, ke->ke_slice, 1269113357Sjeff ke->ke_thread->td_priority); 1270113357Sjeff } 1271113357Sjeff return (ke); 1272109864Sjeff } 1273109970Sjeff#ifdef SMP 1274121923Sjeff kseq_setidle(kseq); 1275109970Sjeff#endif 1276113357Sjeff return (NULL); 1277109864Sjeff} 1278109864Sjeff 1279109864Sjeffvoid 1280121127Sjeffsched_add(struct thread *td) 1281109864Sjeff{ 1282110267Sjeff struct kseq *kseq; 1283113357Sjeff struct ksegrp *kg; 1284121127Sjeff struct kse *ke; 1285121790Sjeff int class; 1286109864Sjeff 1287121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1288121127Sjeff ke = td->td_kse; 1289121127Sjeff kg = td->td_ksegrp; 1290121790Sjeff if (ke->ke_flags & KEF_ASSIGNED) 1291121790Sjeff return; 1292121790Sjeff kseq = KSEQ_SELF(); 1293110267Sjeff KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 1294109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 1295110267Sjeff ("sched_add: No KSE on thread")); 1296109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 1297110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 1298109864Sjeff ke->ke_proc->p_comm)); 1299109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1300110267Sjeff ("sched_add: process swapped out")); 1301113387Sjeff KASSERT(ke->ke_runq == NULL, 1302113387Sjeff ("sched_add: KSE %p is still assigned to a run queue", ke)); 1303109864Sjeff 1304121790Sjeff class = PRI_BASE(kg->kg_pri_class); 1305121790Sjeff switch (class) { 1306112994Sjeff case PRI_ITHD: 1307112994Sjeff case PRI_REALTIME: 1308113357Sjeff ke->ke_runq = kseq->ksq_curr; 1309113357Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1310113660Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1311112994Sjeff break; 1312112994Sjeff case PRI_TIMESHARE: 1313121790Sjeff#ifdef SMP 1314121790Sjeff if (ke->ke_cpu != PCPU_GET(cpuid)) { 1315121790Sjeff kseq_notify(ke, ke->ke_cpu); 1316121790Sjeff return; 1317121790Sjeff } 1318121790Sjeff#endif 1319113387Sjeff if (SCHED_CURR(kg, ke)) 1320113387Sjeff ke->ke_runq = kseq->ksq_curr; 1321113387Sjeff else 1322113387Sjeff ke->ke_runq = kseq->ksq_next; 1323113357Sjeff break; 1324112994Sjeff case PRI_IDLE: 1325121790Sjeff#ifdef SMP 1326121790Sjeff if (ke->ke_cpu != PCPU_GET(cpuid)) { 1327121790Sjeff kseq_notify(ke, ke->ke_cpu); 1328121790Sjeff return; 1329121790Sjeff } 1330121790Sjeff#endif 1331113357Sjeff /* 1332113357Sjeff * This is for priority prop. 1333113357Sjeff */ 1334121605Sjeff if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1335113357Sjeff ke->ke_runq = kseq->ksq_curr; 1336113357Sjeff else 1337113357Sjeff ke->ke_runq = &kseq->ksq_idle; 1338113357Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1339112994Sjeff break; 1340113357Sjeff default: 1341121868Sjeff panic("Unknown pri class."); 1342113357Sjeff break; 1343112994Sjeff } 1344121790Sjeff#ifdef SMP 1345121790Sjeff /* 1346122744Sjeff * If there are any idle processors, give them our extra load. The 1347122744Sjeff * threshold at which we start to reassign kses has a large impact 1348122744Sjeff * on the overall performance of the system. Tuned too high and 1349122744Sjeff * some CPUs may idle. Too low and there will be excess migration 1350122744Sjeff * and context swiches. 1351121790Sjeff */ 1352122744Sjeff if (kseq->ksq_load_transferable > kseq->ksq_cpus && 1353122744Sjeff KSE_CAN_MIGRATE(ke, class) && kseq_idle) { 1354121790Sjeff int cpu; 1355109864Sjeff 1356121790Sjeff /* 1357121790Sjeff * Multiple cpus could find this bit simultaneously but the 1358121790Sjeff * race shouldn't be terrible. 1359121790Sjeff */ 1360121790Sjeff cpu = ffs(kseq_idle); 1361121790Sjeff if (cpu) { 1362121790Sjeff cpu--; 1363121790Sjeff atomic_clear_int(&kseq_idle, 1 << cpu); 1364121790Sjeff ke->ke_cpu = cpu; 1365121790Sjeff ke->ke_runq = NULL; 1366121790Sjeff kseq_notify(ke, cpu); 1367121790Sjeff return; 1368121790Sjeff } 1369121790Sjeff } 1370121923Sjeff if (kseq->ksq_idled && 1371121923Sjeff (class == PRI_TIMESHARE || class == PRI_REALTIME)) { 1372121790Sjeff atomic_clear_int(&kseq_idle, PCPU_GET(cpumask)); 1373121923Sjeff kseq->ksq_idled = 0; 1374121923Sjeff } 1375121790Sjeff#endif 1376121790Sjeff if (td->td_priority < curthread->td_priority) 1377121790Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1378121790Sjeff 1379109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 1380109864Sjeff ke->ke_state = KES_ONRUNQ; 1381109864Sjeff 1382122744Sjeff kseq_runq_add(kseq, ke); 1383122744Sjeff kseq_load_add(kseq, ke); 1384109864Sjeff} 1385109864Sjeff 1386109864Sjeffvoid 1387121127Sjeffsched_rem(struct thread *td) 1388109864Sjeff{ 1389113357Sjeff struct kseq *kseq; 1390121127Sjeff struct kse *ke; 1391113357Sjeff 1392121127Sjeff ke = td->td_kse; 1393121790Sjeff /* 1394121790Sjeff * It is safe to just return here because sched_rem() is only ever 1395121790Sjeff * used in places where we're immediately going to add the 1396121790Sjeff * kse back on again. In that case it'll be added with the correct 1397121790Sjeff * thread and priority when the caller drops the sched_lock. 1398121790Sjeff */ 1399121790Sjeff if (ke->ke_flags & KEF_ASSIGNED) 1400121790Sjeff return; 1401109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1402113387Sjeff KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); 1403109864Sjeff 1404109864Sjeff ke->ke_state = KES_THREAD; 1405109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 1406113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1407122744Sjeff kseq_runq_rem(kseq, ke); 1408122744Sjeff kseq_load_rem(kseq, ke); 1409109864Sjeff} 1410109864Sjeff 1411109864Sjefffixpt_t 1412121127Sjeffsched_pctcpu(struct thread *td) 1413109864Sjeff{ 1414109864Sjeff fixpt_t pctcpu; 1415121127Sjeff struct kse *ke; 1416109864Sjeff 1417109864Sjeff pctcpu = 0; 1418121127Sjeff ke = td->td_kse; 1419121290Sjeff if (ke == NULL) 1420121290Sjeff return (0); 1421109864Sjeff 1422115998Sjeff mtx_lock_spin(&sched_lock); 1423109864Sjeff if (ke->ke_ticks) { 1424109864Sjeff int rtick; 1425109864Sjeff 1426116365Sjeff /* 1427116365Sjeff * Don't update more frequently than twice a second. Allowing 1428116365Sjeff * this causes the cpu usage to decay away too quickly due to 1429116365Sjeff * rounding errors. 1430116365Sjeff */ 1431116365Sjeff if (ke->ke_ltick < (ticks - (hz / 2))) 1432116365Sjeff sched_pctcpu_update(ke); 1433109864Sjeff /* How many rtick per second ? */ 1434116365Sjeff rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1435110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1436109864Sjeff } 1437109864Sjeff 1438109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1439113865Sjhb mtx_unlock_spin(&sched_lock); 1440109864Sjeff 1441109864Sjeff return (pctcpu); 1442109864Sjeff} 1443109864Sjeff 1444122038Sjeffvoid 1445122038Sjeffsched_bind(struct thread *td, int cpu) 1446122038Sjeff{ 1447122038Sjeff struct kse *ke; 1448122038Sjeff 1449122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1450122038Sjeff ke = td->td_kse; 1451122038Sjeff#ifndef SMP 1452122038Sjeff ke->ke_flags |= KEF_BOUND; 1453122038Sjeff#else 1454122038Sjeff if (PCPU_GET(cpuid) == cpu) { 1455122038Sjeff ke->ke_flags |= KEF_BOUND; 1456122038Sjeff return; 1457122038Sjeff } 1458122038Sjeff /* sched_rem without the runq_remove */ 1459122038Sjeff ke->ke_state = KES_THREAD; 1460122038Sjeff ke->ke_ksegrp->kg_runq_kses--; 1461122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1462122038Sjeff ke->ke_cpu = cpu; 1463122038Sjeff kseq_notify(ke, cpu); 1464122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1465122038Sjeff td->td_proc->p_stats->p_ru.ru_nvcsw++; 1466122038Sjeff mi_switch(); 1467122038Sjeff#endif 1468122038Sjeff} 1469122038Sjeff 1470122038Sjeffvoid 1471122038Sjeffsched_unbind(struct thread *td) 1472122038Sjeff{ 1473122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1474122038Sjeff td->td_kse->ke_flags &= ~KEF_BOUND; 1475122038Sjeff} 1476122038Sjeff 1477109864Sjeffint 1478109864Sjeffsched_sizeof_kse(void) 1479109864Sjeff{ 1480109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 1481109864Sjeff} 1482109864Sjeff 1483109864Sjeffint 1484109864Sjeffsched_sizeof_ksegrp(void) 1485109864Sjeff{ 1486109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1487109864Sjeff} 1488109864Sjeff 1489109864Sjeffint 1490109864Sjeffsched_sizeof_proc(void) 1491109864Sjeff{ 1492109864Sjeff return (sizeof(struct proc)); 1493109864Sjeff} 1494109864Sjeff 1495109864Sjeffint 1496109864Sjeffsched_sizeof_thread(void) 1497109864Sjeff{ 1498109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1499109864Sjeff} 1500