sched_ule.c revision 139334
1109864Sjeff/*- 2113357Sjeff * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 139334 2004-12-26 22:56:08Z jeff $"); 29116182Sobrien 30134649Sscottl#include <opt_sched.h> 31134649Sscottl 32134791Sjulian#define kse td_sched 33134791Sjulian 34109864Sjeff#include <sys/param.h> 35109864Sjeff#include <sys/systm.h> 36131929Smarcel#include <sys/kdb.h> 37109864Sjeff#include <sys/kernel.h> 38109864Sjeff#include <sys/ktr.h> 39109864Sjeff#include <sys/lock.h> 40109864Sjeff#include <sys/mutex.h> 41109864Sjeff#include <sys/proc.h> 42112966Sjeff#include <sys/resource.h> 43122038Sjeff#include <sys/resourcevar.h> 44109864Sjeff#include <sys/sched.h> 45109864Sjeff#include <sys/smp.h> 46109864Sjeff#include <sys/sx.h> 47109864Sjeff#include <sys/sysctl.h> 48109864Sjeff#include <sys/sysproto.h> 49109864Sjeff#include <sys/vmmeter.h> 50109864Sjeff#ifdef KTRACE 51109864Sjeff#include <sys/uio.h> 52109864Sjeff#include <sys/ktrace.h> 53109864Sjeff#endif 54109864Sjeff 55109864Sjeff#include <machine/cpu.h> 56121790Sjeff#include <machine/smp.h> 57109864Sjeff 58109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 59109864Sjeff/* XXX This is bogus compatability crap for ps */ 60109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 61109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 62109864Sjeff 63109864Sjeffstatic void sched_setup(void *dummy); 64109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 65109864Sjeff 66132589Sscottlstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 67113357Sjeff 68132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 69132589Sscottl "Scheduler name"); 70130881Sscottl 71113357Sjeffstatic int slice_min = 1; 72113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 73113357Sjeff 74116365Sjeffstatic int slice_max = 10; 75113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 76113357Sjeff 77111857Sjeffint realstathz; 78113357Sjeffint tickincr = 1; 79111857Sjeff 80109864Sjeff/* 81134791Sjulian * The schedulable entity that can be given a context to run. 82134791Sjulian * A process may have several of these. Probably one per processor 83134791Sjulian * but posibly a few more. In this universe they are grouped 84134791Sjulian * with a KSEG that contains the priority and niceness 85134791Sjulian * for the group. 86134791Sjulian */ 87134791Sjulianstruct kse { 88134791Sjulian TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ 89134791Sjulian int ke_flags; /* (j) KEF_* flags. */ 90134791Sjulian struct thread *ke_thread; /* (*) Active associated thread. */ 91134791Sjulian fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ 92134791Sjulian char ke_rqindex; /* (j) Run queue index. */ 93134791Sjulian enum { 94134791Sjulian KES_THREAD = 0x0, /* slaved to thread state */ 95134791Sjulian KES_ONRUNQ 96134791Sjulian } ke_state; /* (j) thread sched specific status. */ 97134791Sjulian int ke_slptime; 98134791Sjulian int ke_slice; 99134791Sjulian struct runq *ke_runq; 100134791Sjulian u_char ke_cpu; /* CPU that we have affinity for. */ 101134791Sjulian /* The following variables are only used for pctcpu calculation */ 102134791Sjulian int ke_ltick; /* Last tick that we were running on */ 103134791Sjulian int ke_ftick; /* First tick that we were running on */ 104134791Sjulian int ke_ticks; /* Tick count */ 105134791Sjulian 106134791Sjulian}; 107134791Sjulian 108134791Sjulian 109134791Sjulian#define td_kse td_sched 110134791Sjulian#define td_slptime td_kse->ke_slptime 111134791Sjulian#define ke_proc ke_thread->td_proc 112134791Sjulian#define ke_ksegrp ke_thread->td_ksegrp 113134791Sjulian 114134791Sjulian/* flags kept in ke_flags */ 115134791Sjulian#define KEF_SCHED0 0x00001 /* For scheduler-specific use. */ 116134791Sjulian#define KEF_SCHED1 0x00002 /* For scheduler-specific use. */ 117134791Sjulian#define KEF_SCHED2 0x00004 /* For scheduler-specific use. */ 118134791Sjulian#define KEF_SCHED3 0x00008 /* For scheduler-specific use. */ 119138842Sjeff#define KEF_SCHED4 0x00010 120138842Sjeff#define KEF_SCHED5 0x00020 121134791Sjulian#define KEF_DIDRUN 0x02000 /* Thread actually ran. */ 122134791Sjulian#define KEF_EXIT 0x04000 /* Thread is being killed. */ 123134791Sjulian 124134791Sjulian/* 125109864Sjeff * These datastructures are allocated within their parent datastructure but 126109864Sjeff * are scheduler specific. 127109864Sjeff */ 128109864Sjeff 129121790Sjeff#define ke_assign ke_procq.tqe_next 130109864Sjeff 131139334Sjeff#define KEF_ASSIGNED 0x0001 /* Thread is being migrated. */ 132139334Sjeff#define KEF_BOUND 0x0002 /* Thread can not migrate. */ 133139334Sjeff#define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ 134139334Sjeff#define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ 135139334Sjeff#define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 136139334Sjeff#define KEF_PRIOELEV 0x0020 /* Thread has had its prio elevated. */ 137139334Sjeff#define KEF_INTERNAL 0x0040 138121790Sjeff 139109864Sjeffstruct kg_sched { 140134791Sjulian struct thread *skg_last_assigned; /* (j) Last thread assigned to */ 141134791Sjulian /* the system scheduler */ 142110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 143110645Sjeff int skg_runtime; /* Number of ticks we were running */ 144134791Sjulian int skg_avail_opennings; /* (j) Num unfilled slots in group.*/ 145134791Sjulian int skg_concurrency; /* (j) Num threads requested in group.*/ 146109864Sjeff}; 147134791Sjulian#define kg_last_assigned kg_sched->skg_last_assigned 148134791Sjulian#define kg_avail_opennings kg_sched->skg_avail_opennings 149134791Sjulian#define kg_concurrency kg_sched->skg_concurrency 150134791Sjulian#define kg_runtime kg_sched->skg_runtime 151134791Sjulian#define kg_slptime kg_sched->skg_slptime 152109864Sjeff 153136167Sjulian#define SLOT_RELEASE(kg) \ 154136167Sjuliando { \ 155136167Sjulian kg->kg_avail_opennings++; \ 156136167Sjulian CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)", \ 157136167Sjulian kg, \ 158136167Sjulian kg->kg_concurrency, \ 159136167Sjulian kg->kg_avail_opennings); \ 160136167Sjulian /*KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency), \ 161136167Sjulian ("slots out of whack")); */ \ 162136167Sjulian} while (0) 163109864Sjeff 164136167Sjulian#define SLOT_USE(kg) \ 165136167Sjuliando { \ 166136167Sjulian kg->kg_avail_opennings--; \ 167136167Sjulian CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)", \ 168136167Sjulian kg, \ 169136167Sjulian kg->kg_concurrency, \ 170136167Sjulian kg->kg_avail_opennings); \ 171136167Sjulian /*KASSERT((kg->kg_avail_opennings >= 0), \ 172136167Sjulian ("slots out of whack"));*/ \ 173136167Sjulian} while (0) 174136167Sjulian 175134791Sjulianstatic struct kse kse0; 176134791Sjulianstatic struct kg_sched kg_sched0; 177109864Sjeff 178109864Sjeff/* 179116642Sjeff * The priority is primarily determined by the interactivity score. Thus, we 180116642Sjeff * give lower(better) priorities to kse groups that use less CPU. The nice 181116642Sjeff * value is then directly added to this to allow nice to have some effect 182116642Sjeff * on latency. 183111857Sjeff * 184111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 185116642Sjeff * PRI_NRESV: Number of nice values. 186111857Sjeff * PRI_BASE: The start of the dynamic range. 187109864Sjeff */ 188111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 189121869Sjeff#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 190121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 191116642Sjeff#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 192113357Sjeff#define SCHED_PRI_INTERACT(score) \ 193116642Sjeff ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 194109864Sjeff 195109864Sjeff/* 196111857Sjeff * These determine the interactivity of a process. 197109864Sjeff * 198110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 199110645Sjeff * before throttling back. 200121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 201116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 202111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 203109864Sjeff */ 204121126Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 205121868Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 206116365Sjeff#define SCHED_INTERACT_MAX (100) 207116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 208121126Sjeff#define SCHED_INTERACT_THRESH (30) 209111857Sjeff 210109864Sjeff/* 211109864Sjeff * These parameters and macros determine the size of the time slice that is 212109864Sjeff * granted to each thread. 213109864Sjeff * 214109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 215109864Sjeff * SLICE_MAX: Maximum time slice granted. 216109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 217112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 218112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 219121871Sjeff * SLICE_NTHRESH: The nice cutoff point for slice assignment. 220109864Sjeff */ 221113357Sjeff#define SCHED_SLICE_MIN (slice_min) 222113357Sjeff#define SCHED_SLICE_MAX (slice_max) 223125299Sjeff#define SCHED_SLICE_INTERACTIVE (slice_max) 224121871Sjeff#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 225111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 226109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 227112966Sjeff#define SCHED_SLICE_NICE(nice) \ 228121871Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 229109864Sjeff 230109864Sjeff/* 231134791Sjulian * This macro determines whether or not the thread belongs on the current or 232109864Sjeff * next run queue. 233109864Sjeff */ 234113357Sjeff#define SCHED_INTERACTIVE(kg) \ 235113357Sjeff (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 236113417Sjeff#define SCHED_CURR(kg, ke) \ 237138842Sjeff ((ke->ke_flags & KEF_PRIOELEV) || SCHED_INTERACTIVE(kg)) 238109864Sjeff 239109864Sjeff/* 240109864Sjeff * Cpu percentage computation macros and defines. 241109864Sjeff * 242109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 243109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 244109864Sjeff */ 245109864Sjeff 246112971Sjeff#define SCHED_CPU_TIME 10 247109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 248109864Sjeff 249109864Sjeff/* 250113357Sjeff * kseq - per processor runqs and statistics. 251109864Sjeff */ 252109864Sjeffstruct kseq { 253113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 254113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 255113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 256113357Sjeff struct runq *ksq_curr; /* Current queue. */ 257121896Sjeff int ksq_load_timeshare; /* Load for timeshare. */ 258113357Sjeff int ksq_load; /* Aggregate load. */ 259121869Sjeff short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 260113357Sjeff short ksq_nicemin; /* Least nice. */ 261110267Sjeff#ifdef SMP 262123433Sjeff int ksq_transferable; 263123433Sjeff LIST_ENTRY(kseq) ksq_siblings; /* Next in kseq group. */ 264123433Sjeff struct kseq_group *ksq_group; /* Our processor group. */ 265123433Sjeff volatile struct kse *ksq_assigned; /* assigned by another CPU. */ 266125289Sjeff#else 267125289Sjeff int ksq_sysload; /* For loadavg, !ITHD load. */ 268110267Sjeff#endif 269109864Sjeff}; 270109864Sjeff 271123433Sjeff#ifdef SMP 272109864Sjeff/* 273123433Sjeff * kseq groups are groups of processors which can cheaply share threads. When 274123433Sjeff * one processor in the group goes idle it will check the runqs of the other 275123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 276123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 277123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 278123433Sjeff * load balancer. 279123433Sjeff */ 280123433Sjeffstruct kseq_group { 281123433Sjeff int ksg_cpus; /* Count of CPUs in this kseq group. */ 282127498Smarcel cpumask_t ksg_cpumask; /* Mask of cpus in this group. */ 283127498Smarcel cpumask_t ksg_idlemask; /* Idle cpus in this group. */ 284127498Smarcel cpumask_t ksg_mask; /* Bit mask for first cpu. */ 285123487Sjeff int ksg_load; /* Total load of this group. */ 286123433Sjeff int ksg_transferable; /* Transferable load of this group. */ 287123433Sjeff LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ 288123433Sjeff}; 289123433Sjeff#endif 290123433Sjeff 291123433Sjeff/* 292109864Sjeff * One kse queue per processor. 293109864Sjeff */ 294110028Sjeff#ifdef SMP 295127498Smarcelstatic cpumask_t kseq_idle; 296123487Sjeffstatic int ksg_maxid; 297121790Sjeffstatic struct kseq kseq_cpu[MAXCPU]; 298123433Sjeffstatic struct kseq_group kseq_groups[MAXCPU]; 299129982Sjeffstatic int bal_tick; 300129982Sjeffstatic int gbal_tick; 301139334Sjeffstatic int balance_groups; 302129982Sjeff 303123433Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 304123433Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 305123487Sjeff#define KSEQ_ID(x) ((x) - kseq_cpu) 306123487Sjeff#define KSEQ_GROUP(x) (&kseq_groups[(x)]) 307123433Sjeff#else /* !SMP */ 308121790Sjeffstatic struct kseq kseq_cpu; 309129982Sjeff 310110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 311110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 312110028Sjeff#endif 313109864Sjeff 314134791Sjulianstatic void slot_fill(struct ksegrp *kg); 315134791Sjulianstatic struct kse *sched_choose(void); /* XXX Should be thread * */ 316112966Sjeffstatic void sched_slice(struct kse *ke); 317113357Sjeffstatic void sched_priority(struct ksegrp *kg); 318111857Sjeffstatic int sched_interact_score(struct ksegrp *kg); 319116463Sjeffstatic void sched_interact_update(struct ksegrp *kg); 320121868Sjeffstatic void sched_interact_fork(struct ksegrp *kg); 321121790Sjeffstatic void sched_pctcpu_update(struct kse *ke); 322109864Sjeff 323110267Sjeff/* Operations on per processor queues */ 324121790Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 325110028Sjeffstatic void kseq_setup(struct kseq *kseq); 326122744Sjeffstatic void kseq_load_add(struct kseq *kseq, struct kse *ke); 327122744Sjeffstatic void kseq_load_rem(struct kseq *kseq, struct kse *ke); 328139334Sjeffstatic __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke, int); 329122744Sjeffstatic __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke); 330113357Sjeffstatic void kseq_nice_add(struct kseq *kseq, int nice); 331113357Sjeffstatic void kseq_nice_rem(struct kseq *kseq, int nice); 332113660Sjeffvoid kseq_print(int cpu); 333110267Sjeff#ifdef SMP 334123433Sjeffstatic int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); 335121790Sjeffstatic struct kse *runq_steal(struct runq *rq); 336129982Sjeffstatic void sched_balance(void); 337129982Sjeffstatic void sched_balance_groups(void); 338123487Sjeffstatic void sched_balance_group(struct kseq_group *ksg); 339123487Sjeffstatic void sched_balance_pair(struct kseq *high, struct kseq *low); 340121790Sjeffstatic void kseq_move(struct kseq *from, int cpu); 341123433Sjeffstatic int kseq_idled(struct kseq *kseq); 342121790Sjeffstatic void kseq_notify(struct kse *ke, int cpu); 343121790Sjeffstatic void kseq_assign(struct kseq *); 344123433Sjeffstatic struct kse *kseq_steal(struct kseq *kseq, int stealidle); 345139334Sjeff#define KSE_CAN_MIGRATE(ke) \ 346135076Sscottl ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) 347121790Sjeff#endif 348110028Sjeff 349113357Sjeffvoid 350113660Sjeffkseq_print(int cpu) 351110267Sjeff{ 352113660Sjeff struct kseq *kseq; 353113357Sjeff int i; 354112994Sjeff 355113660Sjeff kseq = KSEQ_CPU(cpu); 356112994Sjeff 357113357Sjeff printf("kseq:\n"); 358113357Sjeff printf("\tload: %d\n", kseq->ksq_load); 359122744Sjeff printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare); 360121896Sjeff#ifdef SMP 361123433Sjeff printf("\tload transferable: %d\n", kseq->ksq_transferable); 362121896Sjeff#endif 363113357Sjeff printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 364113357Sjeff printf("\tnice counts:\n"); 365121869Sjeff for (i = 0; i < SCHED_PRI_NRESV; i++) 366113357Sjeff if (kseq->ksq_nice[i]) 367113357Sjeff printf("\t\t%d = %d\n", 368113357Sjeff i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 369113357Sjeff} 370112994Sjeff 371122744Sjeffstatic __inline void 372139334Sjeffkseq_runq_add(struct kseq *kseq, struct kse *ke, int flags) 373122744Sjeff{ 374122744Sjeff#ifdef SMP 375139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 376123433Sjeff kseq->ksq_transferable++; 377123433Sjeff kseq->ksq_group->ksg_transferable++; 378133427Sjeff ke->ke_flags |= KEF_XFERABLE; 379123433Sjeff } 380122744Sjeff#endif 381139334Sjeff runq_add(ke->ke_runq, ke, flags); 382122744Sjeff} 383122744Sjeff 384122744Sjeffstatic __inline void 385122744Sjeffkseq_runq_rem(struct kseq *kseq, struct kse *ke) 386122744Sjeff{ 387122744Sjeff#ifdef SMP 388133427Sjeff if (ke->ke_flags & KEF_XFERABLE) { 389123433Sjeff kseq->ksq_transferable--; 390123433Sjeff kseq->ksq_group->ksg_transferable--; 391133427Sjeff ke->ke_flags &= ~KEF_XFERABLE; 392123433Sjeff } 393122744Sjeff#endif 394122744Sjeff runq_remove(ke->ke_runq, ke); 395122744Sjeff} 396122744Sjeff 397113357Sjeffstatic void 398122744Sjeffkseq_load_add(struct kseq *kseq, struct kse *ke) 399113357Sjeff{ 400121896Sjeff int class; 401115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 402121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 403121896Sjeff if (class == PRI_TIMESHARE) 404121896Sjeff kseq->ksq_load_timeshare++; 405113357Sjeff kseq->ksq_load++; 406139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 407128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 408123487Sjeff#ifdef SMP 409123487Sjeff kseq->ksq_group->ksg_load++; 410125289Sjeff#else 411125289Sjeff kseq->ksq_sysload++; 412123487Sjeff#endif 413113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 414130551Sjulian kseq_nice_add(kseq, ke->ke_proc->p_nice); 415110267Sjeff} 416113357Sjeff 417112994Sjeffstatic void 418122744Sjeffkseq_load_rem(struct kseq *kseq, struct kse *ke) 419110267Sjeff{ 420121896Sjeff int class; 421115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 422121896Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 423121896Sjeff if (class == PRI_TIMESHARE) 424121896Sjeff kseq->ksq_load_timeshare--; 425128563Sobrien if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0) 426123487Sjeff#ifdef SMP 427123487Sjeff kseq->ksq_group->ksg_load--; 428125289Sjeff#else 429125289Sjeff kseq->ksq_sysload--; 430123487Sjeff#endif 431113357Sjeff kseq->ksq_load--; 432139316Sjeff CTR1(KTR_SCHED, "load: %d", kseq->ksq_load); 433113357Sjeff ke->ke_runq = NULL; 434113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 435130551Sjulian kseq_nice_rem(kseq, ke->ke_proc->p_nice); 436110267Sjeff} 437110267Sjeff 438113357Sjeffstatic void 439113357Sjeffkseq_nice_add(struct kseq *kseq, int nice) 440110267Sjeff{ 441115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 442113357Sjeff /* Normalize to zero. */ 443113357Sjeff kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 444121896Sjeff if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 445113357Sjeff kseq->ksq_nicemin = nice; 446110267Sjeff} 447110267Sjeff 448113357Sjeffstatic void 449113357Sjeffkseq_nice_rem(struct kseq *kseq, int nice) 450110267Sjeff{ 451113357Sjeff int n; 452113357Sjeff 453115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 454113357Sjeff /* Normalize to zero. */ 455113357Sjeff n = nice + SCHED_PRI_NHALF; 456113357Sjeff kseq->ksq_nice[n]--; 457113357Sjeff KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 458113357Sjeff 459113357Sjeff /* 460113357Sjeff * If this wasn't the smallest nice value or there are more in 461113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 462113357Sjeff * the smallest nice. 463113357Sjeff */ 464113357Sjeff if (nice != kseq->ksq_nicemin || 465113357Sjeff kseq->ksq_nice[n] != 0 || 466121896Sjeff kseq->ksq_load_timeshare == 0) 467113357Sjeff return; 468113357Sjeff 469121869Sjeff for (; n < SCHED_PRI_NRESV; n++) 470113357Sjeff if (kseq->ksq_nice[n]) { 471113357Sjeff kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 472113357Sjeff return; 473113357Sjeff } 474110267Sjeff} 475110267Sjeff 476113357Sjeff#ifdef SMP 477116069Sjeff/* 478122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 479116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 480116069Sjeff * by migrating some processes. 481116069Sjeff * 482116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 483116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 484116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 485116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 486116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 487116069Sjeff * 488116069Sjeff * It could be improved by considering the priorities and slices assigned to 489116069Sjeff * each task prior to balancing them. There are many pathological cases with 490116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 491116069Sjeff * 492116069Sjeff */ 493121790Sjeffstatic void 494129982Sjeffsched_balance(void) 495116069Sjeff{ 496123487Sjeff struct kseq_group *high; 497123487Sjeff struct kseq_group *low; 498123487Sjeff struct kseq_group *ksg; 499123487Sjeff int cnt; 500123487Sjeff int i; 501123487Sjeff 502139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 503123487Sjeff if (smp_started == 0) 504139334Sjeff return; 505123487Sjeff low = high = NULL; 506123487Sjeff i = random() % (ksg_maxid + 1); 507123487Sjeff for (cnt = 0; cnt <= ksg_maxid; cnt++) { 508123487Sjeff ksg = KSEQ_GROUP(i); 509123487Sjeff /* 510123487Sjeff * Find the CPU with the highest load that has some 511123487Sjeff * threads to transfer. 512123487Sjeff */ 513123487Sjeff if ((high == NULL || ksg->ksg_load > high->ksg_load) 514123487Sjeff && ksg->ksg_transferable) 515123487Sjeff high = ksg; 516123487Sjeff if (low == NULL || ksg->ksg_load < low->ksg_load) 517123487Sjeff low = ksg; 518123487Sjeff if (++i > ksg_maxid) 519123487Sjeff i = 0; 520123487Sjeff } 521123487Sjeff if (low != NULL && high != NULL && high != low) 522123487Sjeff sched_balance_pair(LIST_FIRST(&high->ksg_members), 523123487Sjeff LIST_FIRST(&low->ksg_members)); 524123487Sjeff} 525123487Sjeff 526123487Sjeffstatic void 527129982Sjeffsched_balance_groups(void) 528123487Sjeff{ 529123487Sjeff int i; 530123487Sjeff 531139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 532129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 533123487Sjeff if (smp_started) 534123487Sjeff for (i = 0; i <= ksg_maxid; i++) 535123487Sjeff sched_balance_group(KSEQ_GROUP(i)); 536123487Sjeff} 537123487Sjeff 538123487Sjeffstatic void 539123487Sjeffsched_balance_group(struct kseq_group *ksg) 540123487Sjeff{ 541116069Sjeff struct kseq *kseq; 542123487Sjeff struct kseq *high; 543123487Sjeff struct kseq *low; 544123487Sjeff int load; 545123487Sjeff 546123487Sjeff if (ksg->ksg_transferable == 0) 547123487Sjeff return; 548123487Sjeff low = NULL; 549123487Sjeff high = NULL; 550123487Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 551123487Sjeff load = kseq->ksq_load; 552123487Sjeff if (high == NULL || load > high->ksq_load) 553123487Sjeff high = kseq; 554123487Sjeff if (low == NULL || load < low->ksq_load) 555123487Sjeff low = kseq; 556123487Sjeff } 557123487Sjeff if (high != NULL && low != NULL && high != low) 558123487Sjeff sched_balance_pair(high, low); 559123487Sjeff} 560123487Sjeff 561123487Sjeffstatic void 562123487Sjeffsched_balance_pair(struct kseq *high, struct kseq *low) 563123487Sjeff{ 564123433Sjeff int transferable; 565116069Sjeff int high_load; 566116069Sjeff int low_load; 567116069Sjeff int move; 568116069Sjeff int diff; 569116069Sjeff int i; 570116069Sjeff 571116069Sjeff /* 572123433Sjeff * If we're transfering within a group we have to use this specific 573123433Sjeff * kseq's transferable count, otherwise we can steal from other members 574123433Sjeff * of the group. 575123433Sjeff */ 576123487Sjeff if (high->ksq_group == low->ksq_group) { 577123487Sjeff transferable = high->ksq_transferable; 578123487Sjeff high_load = high->ksq_load; 579123487Sjeff low_load = low->ksq_load; 580123487Sjeff } else { 581123487Sjeff transferable = high->ksq_group->ksg_transferable; 582123487Sjeff high_load = high->ksq_group->ksg_load; 583123487Sjeff low_load = low->ksq_group->ksg_load; 584123487Sjeff } 585123433Sjeff if (transferable == 0) 586123487Sjeff return; 587123433Sjeff /* 588122744Sjeff * Determine what the imbalance is and then adjust that to how many 589123433Sjeff * kses we actually have to give up (transferable). 590122744Sjeff */ 591123487Sjeff diff = high_load - low_load; 592116069Sjeff move = diff / 2; 593116069Sjeff if (diff & 0x1) 594116069Sjeff move++; 595123433Sjeff move = min(move, transferable); 596116069Sjeff for (i = 0; i < move; i++) 597123487Sjeff kseq_move(high, KSEQ_ID(low)); 598116069Sjeff return; 599116069Sjeff} 600116069Sjeff 601121790Sjeffstatic void 602116069Sjeffkseq_move(struct kseq *from, int cpu) 603116069Sjeff{ 604123433Sjeff struct kseq *kseq; 605123433Sjeff struct kseq *to; 606116069Sjeff struct kse *ke; 607116069Sjeff 608123433Sjeff kseq = from; 609123433Sjeff to = KSEQ_CPU(cpu); 610123433Sjeff ke = kseq_steal(kseq, 1); 611123433Sjeff if (ke == NULL) { 612123433Sjeff struct kseq_group *ksg; 613123433Sjeff 614123433Sjeff ksg = kseq->ksq_group; 615123433Sjeff LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) { 616123433Sjeff if (kseq == from || kseq->ksq_transferable == 0) 617123433Sjeff continue; 618123433Sjeff ke = kseq_steal(kseq, 1); 619123433Sjeff break; 620123433Sjeff } 621123433Sjeff if (ke == NULL) 622123433Sjeff panic("kseq_move: No KSEs available with a " 623123433Sjeff "transferable count of %d\n", 624123433Sjeff ksg->ksg_transferable); 625123433Sjeff } 626123433Sjeff if (kseq == to) 627123433Sjeff return; 628116069Sjeff ke->ke_state = KES_THREAD; 629123433Sjeff kseq_runq_rem(kseq, ke); 630123433Sjeff kseq_load_rem(kseq, ke); 631121923Sjeff kseq_notify(ke, cpu); 632116069Sjeff} 633110267Sjeff 634123433Sjeffstatic int 635123433Sjeffkseq_idled(struct kseq *kseq) 636121790Sjeff{ 637123433Sjeff struct kseq_group *ksg; 638123433Sjeff struct kseq *steal; 639123433Sjeff struct kse *ke; 640123433Sjeff 641123433Sjeff ksg = kseq->ksq_group; 642123433Sjeff /* 643123433Sjeff * If we're in a cpu group, try and steal kses from another cpu in 644123433Sjeff * the group before idling. 645123433Sjeff */ 646123433Sjeff if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) { 647123433Sjeff LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) { 648123433Sjeff if (steal == kseq || steal->ksq_transferable == 0) 649123433Sjeff continue; 650123433Sjeff ke = kseq_steal(steal, 0); 651123433Sjeff if (ke == NULL) 652123433Sjeff continue; 653123433Sjeff ke->ke_state = KES_THREAD; 654123433Sjeff kseq_runq_rem(steal, ke); 655123433Sjeff kseq_load_rem(steal, ke); 656123433Sjeff ke->ke_cpu = PCPU_GET(cpuid); 657139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 658139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 659123433Sjeff return (0); 660123433Sjeff } 661123433Sjeff } 662123433Sjeff /* 663123433Sjeff * We only set the idled bit when all of the cpus in the group are 664123433Sjeff * idle. Otherwise we could get into a situation where a KSE bounces 665123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 666123433Sjeff */ 667123433Sjeff ksg->ksg_idlemask |= PCPU_GET(cpumask); 668123433Sjeff if (ksg->ksg_idlemask != ksg->ksg_cpumask) 669123433Sjeff return (1); 670123433Sjeff atomic_set_int(&kseq_idle, ksg->ksg_mask); 671123433Sjeff return (1); 672121790Sjeff} 673121790Sjeff 674121790Sjeffstatic void 675121790Sjeffkseq_assign(struct kseq *kseq) 676121790Sjeff{ 677121790Sjeff struct kse *nke; 678121790Sjeff struct kse *ke; 679121790Sjeff 680121790Sjeff do { 681132776Skan *(volatile struct kse **)&ke = kseq->ksq_assigned; 682121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 683121790Sjeff for (; ke != NULL; ke = nke) { 684121790Sjeff nke = ke->ke_assign; 685139334Sjeff kseq->ksq_group->ksg_load--; 686139334Sjeff kseq->ksq_load--; 687121790Sjeff ke->ke_flags &= ~KEF_ASSIGNED; 688139334Sjeff ke->ke_flags |= KEF_INTERNAL | KEF_HOLD; 689139334Sjeff sched_add(ke->ke_thread, SRQ_YIELDING); 690121790Sjeff } 691121790Sjeff} 692121790Sjeff 693121790Sjeffstatic void 694121790Sjeffkseq_notify(struct kse *ke, int cpu) 695121790Sjeff{ 696121790Sjeff struct kseq *kseq; 697121790Sjeff struct thread *td; 698121790Sjeff struct pcpu *pcpu; 699139334Sjeff int class; 700133427Sjeff int prio; 701121790Sjeff 702139334Sjeff kseq = KSEQ_CPU(cpu); 703139334Sjeff /* XXX */ 704139334Sjeff class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 705139334Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 706139334Sjeff (kseq_idle & kseq->ksq_group->ksg_mask)) 707139334Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 708139334Sjeff kseq->ksq_group->ksg_load++; 709139334Sjeff kseq->ksq_load++; 710123529Sjeff ke->ke_cpu = cpu; 711121790Sjeff ke->ke_flags |= KEF_ASSIGNED; 712133427Sjeff prio = ke->ke_thread->td_priority; 713121790Sjeff 714121790Sjeff /* 715121790Sjeff * Place a KSE on another cpu's queue and force a resched. 716121790Sjeff */ 717121790Sjeff do { 718132776Skan *(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned; 719121790Sjeff } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 720133427Sjeff /* 721133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 722133427Sjeff * on a thread that is switched out before the IPI is delivered. This 723133427Sjeff * would lead us to miss the resched. This will be a problem once 724133427Sjeff * sched_lock is pushed down. 725133427Sjeff */ 726121790Sjeff pcpu = pcpu_find(cpu); 727121790Sjeff td = pcpu->pc_curthread; 728121790Sjeff if (ke->ke_thread->td_priority < td->td_priority || 729121790Sjeff td == pcpu->pc_idlethread) { 730121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 731121790Sjeff ipi_selected(1 << cpu, IPI_AST); 732121790Sjeff } 733121790Sjeff} 734121790Sjeff 735121790Sjeffstatic struct kse * 736121790Sjeffrunq_steal(struct runq *rq) 737121790Sjeff{ 738121790Sjeff struct rqhead *rqh; 739121790Sjeff struct rqbits *rqb; 740121790Sjeff struct kse *ke; 741121790Sjeff int word; 742121790Sjeff int bit; 743121790Sjeff 744121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 745121790Sjeff rqb = &rq->rq_status; 746121790Sjeff for (word = 0; word < RQB_LEN; word++) { 747121790Sjeff if (rqb->rqb_bits[word] == 0) 748121790Sjeff continue; 749121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 750123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 751121790Sjeff continue; 752121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 753121790Sjeff TAILQ_FOREACH(ke, rqh, ke_procq) { 754139334Sjeff if (KSE_CAN_MIGRATE(ke)) 755121790Sjeff return (ke); 756121790Sjeff } 757121790Sjeff } 758121790Sjeff } 759121790Sjeff return (NULL); 760121790Sjeff} 761121790Sjeff 762121790Sjeffstatic struct kse * 763123433Sjeffkseq_steal(struct kseq *kseq, int stealidle) 764121790Sjeff{ 765121790Sjeff struct kse *ke; 766121790Sjeff 767123433Sjeff /* 768123433Sjeff * Steal from next first to try to get a non-interactive task that 769123433Sjeff * may not have run for a while. 770123433Sjeff */ 771123433Sjeff if ((ke = runq_steal(kseq->ksq_next)) != NULL) 772123433Sjeff return (ke); 773121790Sjeff if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 774121790Sjeff return (ke); 775123433Sjeff if (stealidle) 776123433Sjeff return (runq_steal(&kseq->ksq_idle)); 777123433Sjeff return (NULL); 778121790Sjeff} 779123433Sjeff 780123433Sjeffint 781123433Sjeffkseq_transfer(struct kseq *kseq, struct kse *ke, int class) 782123433Sjeff{ 783139334Sjeff struct kseq_group *nksg; 784123433Sjeff struct kseq_group *ksg; 785139334Sjeff struct kseq *old; 786123433Sjeff int cpu; 787139334Sjeff int idx; 788123433Sjeff 789123685Sjeff if (smp_started == 0) 790123685Sjeff return (0); 791123433Sjeff cpu = 0; 792123433Sjeff /* 793133427Sjeff * If our load exceeds a certain threshold we should attempt to 794133427Sjeff * reassign this thread. The first candidate is the cpu that 795133427Sjeff * originally ran the thread. If it is idle, assign it there, 796133427Sjeff * otherwise, pick an idle cpu. 797133427Sjeff * 798133427Sjeff * The threshold at which we start to reassign kses has a large impact 799123685Sjeff * on the overall performance of the system. Tuned too high and 800123685Sjeff * some CPUs may idle. Too low and there will be excess migration 801128055Scognet * and context switches. 802123685Sjeff */ 803139334Sjeff old = KSEQ_CPU(ke->ke_cpu); 804139334Sjeff nksg = old->ksq_group; 805133427Sjeff ksg = kseq->ksq_group; 806139334Sjeff if (kseq_idle) { 807139334Sjeff if (kseq_idle & nksg->ksg_mask) { 808139334Sjeff cpu = ffs(nksg->ksg_idlemask); 809139334Sjeff if (cpu) { 810139334Sjeff CTR2(KTR_SCHED, 811139334Sjeff "kseq_transfer: %p found old cpu %X " 812139334Sjeff "in idlemask.", ke, cpu); 813133427Sjeff goto migrate; 814139334Sjeff } 815133427Sjeff } 816123433Sjeff /* 817123433Sjeff * Multiple cpus could find this bit simultaneously 818123433Sjeff * but the race shouldn't be terrible. 819123433Sjeff */ 820123433Sjeff cpu = ffs(kseq_idle); 821139334Sjeff if (cpu) { 822139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p found %X " 823139334Sjeff "in idlemask.", ke, cpu); 824133427Sjeff goto migrate; 825139334Sjeff } 826123433Sjeff } 827139334Sjeff idx = 0; 828139334Sjeff#if 0 829139334Sjeff if (old->ksq_load < kseq->ksq_load) { 830139334Sjeff cpu = ke->ke_cpu + 1; 831139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p old cpu %X " 832139334Sjeff "load less than ours.", ke, cpu); 833139334Sjeff goto migrate; 834139334Sjeff } 835123433Sjeff /* 836139334Sjeff * No new CPU was found, look for one with less load. 837139334Sjeff */ 838139334Sjeff for (idx = 0; idx <= ksg_maxid; idx++) { 839139334Sjeff nksg = KSEQ_GROUP(idx); 840139334Sjeff if (nksg->ksg_load /*+ (nksg->ksg_cpus * 2)*/ < ksg->ksg_load) { 841139334Sjeff cpu = ffs(nksg->ksg_cpumask); 842139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X load less " 843139334Sjeff "than ours.", ke, cpu); 844139334Sjeff goto migrate; 845139334Sjeff } 846139334Sjeff } 847139334Sjeff#endif 848139334Sjeff /* 849123433Sjeff * If another cpu in this group has idled, assign a thread over 850123433Sjeff * to them after checking to see if there are idled groups. 851123433Sjeff */ 852133427Sjeff if (ksg->ksg_idlemask) { 853123433Sjeff cpu = ffs(ksg->ksg_idlemask); 854139334Sjeff if (cpu) { 855139334Sjeff CTR2(KTR_SCHED, "kseq_transfer: %p cpu %X idle in " 856139334Sjeff "group.", ke, cpu); 857133427Sjeff goto migrate; 858139334Sjeff } 859123433Sjeff } 860133427Sjeff return (0); 861133427Sjeffmigrate: 862133427Sjeff /* 863123433Sjeff * Now that we've found an idle CPU, migrate the thread. 864123433Sjeff */ 865133427Sjeff cpu--; 866133427Sjeff ke->ke_runq = NULL; 867133427Sjeff kseq_notify(ke, cpu); 868133427Sjeff 869133427Sjeff return (1); 870123433Sjeff} 871123433Sjeff 872121790Sjeff#endif /* SMP */ 873121790Sjeff 874117326Sjeff/* 875121790Sjeff * Pick the highest priority task we have and return it. 876117326Sjeff */ 877117326Sjeff 878121790Sjeffstatic struct kse * 879121790Sjeffkseq_choose(struct kseq *kseq) 880110267Sjeff{ 881137067Sjeff struct runq *swap; 882110267Sjeff struct kse *ke; 883137067Sjeff int nice; 884110267Sjeff 885115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 886113357Sjeff swap = NULL; 887112994Sjeff 888113357Sjeff for (;;) { 889113357Sjeff ke = runq_choose(kseq->ksq_curr); 890113357Sjeff if (ke == NULL) { 891113357Sjeff /* 892131473Sjhb * We already swapped once and didn't get anywhere. 893113357Sjeff */ 894113357Sjeff if (swap) 895113357Sjeff break; 896113357Sjeff swap = kseq->ksq_curr; 897113357Sjeff kseq->ksq_curr = kseq->ksq_next; 898113357Sjeff kseq->ksq_next = swap; 899113357Sjeff continue; 900113357Sjeff } 901113357Sjeff /* 902113357Sjeff * If we encounter a slice of 0 the kse is in a 903113357Sjeff * TIMESHARE kse group and its nice was too far out 904113357Sjeff * of the range that receives slices. 905113357Sjeff */ 906137067Sjeff nice = ke->ke_proc->p_nice + (0 - kseq->ksq_nicemin); 907138842Sjeff if (ke->ke_slice == 0 || (nice > SCHED_SLICE_NTHRESH && 908138842Sjeff ke->ke_proc->p_nice != 0)) { 909113357Sjeff runq_remove(ke->ke_runq, ke); 910113357Sjeff sched_slice(ke); 911113357Sjeff ke->ke_runq = kseq->ksq_next; 912136170Sjulian runq_add(ke->ke_runq, ke, 0); 913113357Sjeff continue; 914113357Sjeff } 915113357Sjeff return (ke); 916110267Sjeff } 917110267Sjeff 918113357Sjeff return (runq_choose(&kseq->ksq_idle)); 919110267Sjeff} 920110267Sjeff 921109864Sjeffstatic void 922110028Sjeffkseq_setup(struct kseq *kseq) 923110028Sjeff{ 924113357Sjeff runq_init(&kseq->ksq_timeshare[0]); 925113357Sjeff runq_init(&kseq->ksq_timeshare[1]); 926112994Sjeff runq_init(&kseq->ksq_idle); 927113357Sjeff kseq->ksq_curr = &kseq->ksq_timeshare[0]; 928113357Sjeff kseq->ksq_next = &kseq->ksq_timeshare[1]; 929113660Sjeff kseq->ksq_load = 0; 930121896Sjeff kseq->ksq_load_timeshare = 0; 931110028Sjeff} 932110028Sjeff 933110028Sjeffstatic void 934109864Sjeffsched_setup(void *dummy) 935109864Sjeff{ 936117313Sjeff#ifdef SMP 937109864Sjeff int i; 938117313Sjeff#endif 939109864Sjeff 940116946Sjeff slice_min = (hz/100); /* 10ms */ 941116946Sjeff slice_max = (hz/7); /* ~140ms */ 942111857Sjeff 943117237Sjeff#ifdef SMP 944123487Sjeff balance_groups = 0; 945123433Sjeff /* 946123433Sjeff * Initialize the kseqs. 947123433Sjeff */ 948123433Sjeff for (i = 0; i < MAXCPU; i++) { 949123433Sjeff struct kseq *ksq; 950123433Sjeff 951123433Sjeff ksq = &kseq_cpu[i]; 952123433Sjeff ksq->ksq_assigned = NULL; 953123433Sjeff kseq_setup(&kseq_cpu[i]); 954123433Sjeff } 955117237Sjeff if (smp_topology == NULL) { 956123433Sjeff struct kseq_group *ksg; 957123433Sjeff struct kseq *ksq; 958139334Sjeff int cpus; 959123433Sjeff 960139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 961139334Sjeff if (CPU_ABSENT(i)) 962139334Sjeff continue; 963139334Sjeff ksq = &kseq_cpu[cpus]; 964139334Sjeff ksg = &kseq_groups[cpus]; 965123433Sjeff /* 966129982Sjeff * Setup a kseq group with one member. 967123433Sjeff */ 968123433Sjeff ksq->ksq_transferable = 0; 969123433Sjeff ksq->ksq_group = ksg; 970123433Sjeff ksg->ksg_cpus = 1; 971123433Sjeff ksg->ksg_idlemask = 0; 972123433Sjeff ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; 973123487Sjeff ksg->ksg_load = 0; 974123433Sjeff ksg->ksg_transferable = 0; 975123433Sjeff LIST_INIT(&ksg->ksg_members); 976123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); 977139334Sjeff cpus++; 978117237Sjeff } 979139334Sjeff ksg_maxid = cpus - 1; 980117237Sjeff } else { 981123433Sjeff struct kseq_group *ksg; 982123433Sjeff struct cpu_group *cg; 983117237Sjeff int j; 984113357Sjeff 985117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 986117237Sjeff cg = &smp_topology->ct_group[i]; 987123433Sjeff ksg = &kseq_groups[i]; 988123433Sjeff /* 989123433Sjeff * Initialize the group. 990123433Sjeff */ 991123433Sjeff ksg->ksg_idlemask = 0; 992123487Sjeff ksg->ksg_load = 0; 993123433Sjeff ksg->ksg_transferable = 0; 994123433Sjeff ksg->ksg_cpus = cg->cg_count; 995123433Sjeff ksg->ksg_cpumask = cg->cg_mask; 996123433Sjeff LIST_INIT(&ksg->ksg_members); 997123433Sjeff /* 998123433Sjeff * Find all of the group members and add them. 999123433Sjeff */ 1000123433Sjeff for (j = 0; j < MAXCPU; j++) { 1001123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 1002123433Sjeff if (ksg->ksg_mask == 0) 1003123433Sjeff ksg->ksg_mask = 1 << j; 1004123433Sjeff kseq_cpu[j].ksq_transferable = 0; 1005123433Sjeff kseq_cpu[j].ksq_group = ksg; 1006123433Sjeff LIST_INSERT_HEAD(&ksg->ksg_members, 1007123433Sjeff &kseq_cpu[j], ksq_siblings); 1008123433Sjeff } 1009123433Sjeff } 1010123487Sjeff if (ksg->ksg_cpus > 1) 1011123487Sjeff balance_groups = 1; 1012117237Sjeff } 1013123487Sjeff ksg_maxid = smp_topology->ct_count - 1; 1014117237Sjeff } 1015123487Sjeff /* 1016123487Sjeff * Stagger the group and global load balancer so they do not 1017123487Sjeff * interfere with each other. 1018123487Sjeff */ 1019129982Sjeff bal_tick = ticks + hz; 1020123487Sjeff if (balance_groups) 1021129982Sjeff gbal_tick = ticks + (hz / 2); 1022117237Sjeff#else 1023117237Sjeff kseq_setup(KSEQ_SELF()); 1024116069Sjeff#endif 1025117237Sjeff mtx_lock_spin(&sched_lock); 1026122744Sjeff kseq_load_add(KSEQ_SELF(), &kse0); 1027117237Sjeff mtx_unlock_spin(&sched_lock); 1028109864Sjeff} 1029109864Sjeff 1030109864Sjeff/* 1031109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1032109864Sjeff * process. 1033109864Sjeff */ 1034113357Sjeffstatic void 1035109864Sjeffsched_priority(struct ksegrp *kg) 1036109864Sjeff{ 1037109864Sjeff int pri; 1038109864Sjeff 1039109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1040113357Sjeff return; 1041109864Sjeff 1042113357Sjeff pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 1043111857Sjeff pri += SCHED_PRI_BASE; 1044130551Sjulian pri += kg->kg_proc->p_nice; 1045109864Sjeff 1046109864Sjeff if (pri > PRI_MAX_TIMESHARE) 1047109864Sjeff pri = PRI_MAX_TIMESHARE; 1048109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 1049109864Sjeff pri = PRI_MIN_TIMESHARE; 1050109864Sjeff 1051109864Sjeff kg->kg_user_pri = pri; 1052109864Sjeff 1053113357Sjeff return; 1054109864Sjeff} 1055109864Sjeff 1056109864Sjeff/* 1057112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 1058112994Sjeff * that we're on. This is only for PRI_TIMESHARE ksegrps. 1059109864Sjeff */ 1060112966Sjeffstatic void 1061112966Sjeffsched_slice(struct kse *ke) 1062109864Sjeff{ 1063113357Sjeff struct kseq *kseq; 1064112966Sjeff struct ksegrp *kg; 1065109864Sjeff 1066112966Sjeff kg = ke->ke_ksegrp; 1067113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1068109864Sjeff 1069138842Sjeff if (ke->ke_flags & KEF_PRIOELEV) { 1070138842Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1071138842Sjeff return; 1072138842Sjeff } 1073138842Sjeff 1074112966Sjeff /* 1075112966Sjeff * Rationale: 1076133427Sjeff * KSEs in interactive ksegs get a minimal slice so that we 1077112966Sjeff * quickly notice if it abuses its advantage. 1078112966Sjeff * 1079112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 1080112966Sjeff * based on the ksegs nice value relative to the least nice kseg 1081112966Sjeff * on the run queue for this cpu. 1082112966Sjeff * 1083112966Sjeff * If the KSE is less nice than all others it gets the maximum 1084112966Sjeff * slice and other KSEs will adjust their slice relative to 1085112966Sjeff * this when they first expire. 1086112966Sjeff * 1087112966Sjeff * There is 20 point window that starts relative to the least 1088112966Sjeff * nice kse on the run queue. Slice size is determined by 1089112966Sjeff * the kse distance from the last nice ksegrp. 1090112966Sjeff * 1091121871Sjeff * If the kse is outside of the window it will get no slice 1092121871Sjeff * and will be reevaluated each time it is selected on the 1093121871Sjeff * run queue. The exception to this is nice 0 ksegs when 1094121871Sjeff * a nice -20 is running. They are always granted a minimum 1095121871Sjeff * slice. 1096112966Sjeff */ 1097113357Sjeff if (!SCHED_INTERACTIVE(kg)) { 1098112966Sjeff int nice; 1099112966Sjeff 1100130551Sjulian nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin); 1101121896Sjeff if (kseq->ksq_load_timeshare == 0 || 1102130551Sjulian kg->kg_proc->p_nice < kseq->ksq_nicemin) 1103112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1104121871Sjeff else if (nice <= SCHED_SLICE_NTHRESH) 1105112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 1106130551Sjulian else if (kg->kg_proc->p_nice == 0) 1107121871Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1108112966Sjeff else 1109112966Sjeff ke->ke_slice = 0; 1110112966Sjeff } else 1111123684Sjeff ke->ke_slice = SCHED_SLICE_INTERACTIVE; 1112112966Sjeff 1113112966Sjeff return; 1114109864Sjeff} 1115109864Sjeff 1116121868Sjeff/* 1117121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1118121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1119121868Sjeff * This routine will not operate correctly when slp or run times have been 1120121868Sjeff * adjusted to more than double their maximum. 1121121868Sjeff */ 1122116463Sjeffstatic void 1123116463Sjeffsched_interact_update(struct ksegrp *kg) 1124116463Sjeff{ 1125121868Sjeff int sum; 1126121605Sjeff 1127121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1128121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1129121868Sjeff return; 1130121868Sjeff /* 1131121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1132121868Sjeff * will not bring us back into range. Dividing by two here forces 1133133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1134121868Sjeff */ 1135127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1136121868Sjeff kg->kg_runtime /= 2; 1137121868Sjeff kg->kg_slptime /= 2; 1138121868Sjeff return; 1139116463Sjeff } 1140121868Sjeff kg->kg_runtime = (kg->kg_runtime / 5) * 4; 1141121868Sjeff kg->kg_slptime = (kg->kg_slptime / 5) * 4; 1142116463Sjeff} 1143116463Sjeff 1144121868Sjeffstatic void 1145121868Sjeffsched_interact_fork(struct ksegrp *kg) 1146121868Sjeff{ 1147121868Sjeff int ratio; 1148121868Sjeff int sum; 1149121868Sjeff 1150121868Sjeff sum = kg->kg_runtime + kg->kg_slptime; 1151121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1152121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1153121868Sjeff kg->kg_runtime /= ratio; 1154121868Sjeff kg->kg_slptime /= ratio; 1155121868Sjeff } 1156121868Sjeff} 1157121868Sjeff 1158111857Sjeffstatic int 1159111857Sjeffsched_interact_score(struct ksegrp *kg) 1160111857Sjeff{ 1161116365Sjeff int div; 1162111857Sjeff 1163111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 1164116365Sjeff div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 1165116365Sjeff return (SCHED_INTERACT_HALF + 1166116365Sjeff (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 1167116365Sjeff } if (kg->kg_slptime > kg->kg_runtime) { 1168116365Sjeff div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 1169116365Sjeff return (kg->kg_runtime / div); 1170111857Sjeff } 1171111857Sjeff 1172116365Sjeff /* 1173116365Sjeff * This can happen if slptime and runtime are 0. 1174116365Sjeff */ 1175116365Sjeff return (0); 1176111857Sjeff 1177111857Sjeff} 1178111857Sjeff 1179113357Sjeff/* 1180134791Sjulian * Very early in the boot some setup of scheduler-specific 1181134791Sjulian * parts of proc0 and of soem scheduler resources needs to be done. 1182134791Sjulian * Called from: 1183134791Sjulian * proc0_init() 1184134791Sjulian */ 1185134791Sjulianvoid 1186134791Sjulianschedinit(void) 1187134791Sjulian{ 1188134791Sjulian /* 1189134791Sjulian * Set up the scheduler specific parts of proc0. 1190134791Sjulian */ 1191136167Sjulian proc0.p_sched = NULL; /* XXX */ 1192134791Sjulian ksegrp0.kg_sched = &kg_sched0; 1193136167Sjulian thread0.td_sched = &kse0; 1194134791Sjulian kse0.ke_thread = &thread0; 1195134791Sjulian kse0.ke_state = KES_THREAD; 1196134791Sjulian kg_sched0.skg_concurrency = 1; 1197134791Sjulian kg_sched0.skg_avail_opennings = 0; /* we are already running */ 1198134791Sjulian} 1199134791Sjulian 1200134791Sjulian/* 1201113357Sjeff * This is only somewhat accurate since given many processes of the same 1202113357Sjeff * priority they will switch when their slices run out, which will be 1203113357Sjeff * at most SCHED_SLICE_MAX. 1204113357Sjeff */ 1205109864Sjeffint 1206109864Sjeffsched_rr_interval(void) 1207109864Sjeff{ 1208109864Sjeff return (SCHED_SLICE_MAX); 1209109864Sjeff} 1210109864Sjeff 1211121790Sjeffstatic void 1212109864Sjeffsched_pctcpu_update(struct kse *ke) 1213109864Sjeff{ 1214109864Sjeff /* 1215109864Sjeff * Adjust counters and watermark for pctcpu calc. 1216116365Sjeff */ 1217120272Sjeff if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 1218120272Sjeff /* 1219120272Sjeff * Shift the tick count out so that the divide doesn't 1220120272Sjeff * round away our results. 1221120272Sjeff */ 1222120272Sjeff ke->ke_ticks <<= 10; 1223120272Sjeff ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 1224120272Sjeff SCHED_CPU_TICKS; 1225120272Sjeff ke->ke_ticks >>= 10; 1226120272Sjeff } else 1227120272Sjeff ke->ke_ticks = 0; 1228109864Sjeff ke->ke_ltick = ticks; 1229109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 1230109864Sjeff} 1231109864Sjeff 1232109864Sjeffvoid 1233109864Sjeffsched_prio(struct thread *td, u_char prio) 1234109864Sjeff{ 1235121605Sjeff struct kse *ke; 1236109864Sjeff 1237139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1238139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1239139316Sjeff curthread->td_proc->p_comm); 1240121605Sjeff ke = td->td_kse; 1241109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1242109864Sjeff if (TD_ON_RUNQ(td)) { 1243121605Sjeff /* 1244121605Sjeff * If the priority has been elevated due to priority 1245121605Sjeff * propagation, we may have to move ourselves to a new 1246121605Sjeff * queue. We still call adjustrunqueue below in case kse 1247121605Sjeff * needs to fix things up. 1248121605Sjeff */ 1249138842Sjeff if (prio < td->td_priority && ke->ke_runq != NULL && 1250121872Sjeff (ke->ke_flags & KEF_ASSIGNED) == 0 && 1251121790Sjeff ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 1252121605Sjeff runq_remove(ke->ke_runq, ke); 1253121605Sjeff ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 1254136170Sjulian runq_add(ke->ke_runq, ke, 0); 1255121605Sjeff } 1256138842Sjeff if (prio < td->td_priority) 1257138842Sjeff ke->ke_flags |= KEF_PRIOELEV; 1258133555Sjeff /* 1259133555Sjeff * Hold this kse on this cpu so that sched_prio() doesn't 1260133555Sjeff * cause excessive migration. We only want migration to 1261133555Sjeff * happen as the result of a wakeup. 1262133555Sjeff */ 1263133555Sjeff ke->ke_flags |= KEF_HOLD; 1264119488Sdavidxu adjustrunqueue(td, prio); 1265139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1266121605Sjeff } else 1267119488Sdavidxu td->td_priority = prio; 1268109864Sjeff} 1269109864Sjeff 1270109864Sjeffvoid 1271135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1272109864Sjeff{ 1273139334Sjeff struct kseq *ksq; 1274109864Sjeff struct kse *ke; 1275109864Sjeff 1276109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1277109864Sjeff 1278109864Sjeff ke = td->td_kse; 1279139334Sjeff ksq = KSEQ_SELF(); 1280109864Sjeff 1281133555Sjeff td->td_lastcpu = td->td_oncpu; 1282113339Sjulian td->td_oncpu = NOCPU; 1283132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1284132266Sjhb td->td_pflags &= ~TDP_OWEPREEMPT; 1285109864Sjeff 1286123434Sjeff /* 1287123434Sjeff * If the KSE has been assigned it may be in the process of switching 1288123434Sjeff * to the new cpu. This is the case in sched_bind(). 1289123434Sjeff */ 1290139334Sjeff if (td == PCPU_GET(idlethread)) { 1291139334Sjeff TD_SET_CAN_RUN(td); 1292139334Sjeff } else if ((ke->ke_flags & KEF_ASSIGNED) == 0) { 1293139334Sjeff /* We are ending our run so make our slot available again */ 1294139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1295139334Sjeff if (ke->ke_runq == NULL) 1296139334Sjeff panic("Thread not on runq."); 1297139334Sjeff kseq_load_rem(ksq, ke); 1298139334Sjeff if (TD_IS_RUNNING(td)) { 1299139334Sjeff /* 1300139334Sjeff * Don't allow the thread to migrate 1301139334Sjeff * from a preemption. 1302139334Sjeff */ 1303139334Sjeff ke->ke_flags |= KEF_HOLD; 1304139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1305139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1306139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1307139334Sjeff ke->ke_flags &= ~KEF_HOLD; 1308139334Sjeff } else if ((td->td_proc->p_flag & P_HADTHREADS) && 1309139334Sjeff (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)) 1310139334Sjeff /* 1311139334Sjeff * We will not be on the run queue. 1312139334Sjeff * So we must be sleeping or similar. 1313139334Sjeff * Don't use the slot if we will need it 1314139334Sjeff * for newtd. 1315139334Sjeff */ 1316139334Sjeff slot_fill(td->td_ksegrp); 1317121146Sjeff } 1318136167Sjulian if (newtd != NULL) { 1319136170Sjulian /* 1320136170Sjulian * If we bring in a thread, 1321136170Sjulian * then account for it as if it had been added to the 1322136170Sjulian * run queue and then chosen. 1323136170Sjulian */ 1324136169Sjulian newtd->td_kse->ke_flags |= KEF_DIDRUN; 1325139334Sjeff newtd->td_kse->ke_runq = ksq->ksq_curr; 1326136167Sjulian SLOT_USE(newtd->td_ksegrp); 1327136173Sjulian TD_SET_RUNNING(newtd); 1328133427Sjeff kseq_load_add(KSEQ_SELF(), newtd->td_kse); 1329136167Sjulian } else 1330131473Sjhb newtd = choosethread(); 1331121128Sjeff if (td != newtd) 1332121128Sjeff cpu_switch(td, newtd); 1333121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1334109864Sjeff 1335113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1336109864Sjeff} 1337109864Sjeff 1338109864Sjeffvoid 1339130551Sjuliansched_nice(struct proc *p, int nice) 1340109864Sjeff{ 1341130551Sjulian struct ksegrp *kg; 1342113357Sjeff struct kse *ke; 1343109864Sjeff struct thread *td; 1344113357Sjeff struct kseq *kseq; 1345109864Sjeff 1346130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1347113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1348113357Sjeff /* 1349113357Sjeff * We need to adjust the nice counts for running KSEs. 1350113357Sjeff */ 1351130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1352130551Sjulian if (kg->kg_pri_class == PRI_TIMESHARE) { 1353134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1354134791Sjulian ke = td->td_kse; 1355130551Sjulian if (ke->ke_runq == NULL) 1356130551Sjulian continue; 1357130551Sjulian kseq = KSEQ_CPU(ke->ke_cpu); 1358130551Sjulian kseq_nice_rem(kseq, p->p_nice); 1359130551Sjulian kseq_nice_add(kseq, nice); 1360130551Sjulian } 1361113357Sjeff } 1362130551Sjulian } 1363130551Sjulian p->p_nice = nice; 1364130551Sjulian FOREACH_KSEGRP_IN_PROC(p, kg) { 1365130551Sjulian sched_priority(kg); 1366130551Sjulian FOREACH_THREAD_IN_GROUP(kg, td) 1367130551Sjulian td->td_flags |= TDF_NEEDRESCHED; 1368130551Sjulian } 1369109864Sjeff} 1370109864Sjeff 1371109864Sjeffvoid 1372126326Sjhbsched_sleep(struct thread *td) 1373109864Sjeff{ 1374109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1375109864Sjeff 1376109864Sjeff td->td_slptime = ticks; 1377126326Sjhb td->td_base_pri = td->td_priority; 1378109864Sjeff} 1379109864Sjeff 1380109864Sjeffvoid 1381109864Sjeffsched_wakeup(struct thread *td) 1382109864Sjeff{ 1383109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1384109864Sjeff 1385109864Sjeff /* 1386109864Sjeff * Let the kseg know how long we slept for. This is because process 1387109864Sjeff * interactivity behavior is modeled in the kseg. 1388109864Sjeff */ 1389111788Sjeff if (td->td_slptime) { 1390111788Sjeff struct ksegrp *kg; 1391113357Sjeff int hzticks; 1392109864Sjeff 1393111788Sjeff kg = td->td_ksegrp; 1394121868Sjeff hzticks = (ticks - td->td_slptime) << 10; 1395121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1396121868Sjeff kg->kg_slptime = SCHED_SLP_RUN_MAX; 1397121868Sjeff kg->kg_runtime = 1; 1398121868Sjeff } else { 1399121868Sjeff kg->kg_slptime += hzticks; 1400121868Sjeff sched_interact_update(kg); 1401121868Sjeff } 1402111788Sjeff sched_priority(kg); 1403134791Sjulian sched_slice(td->td_kse); 1404111788Sjeff td->td_slptime = 0; 1405109864Sjeff } 1406134586Sjulian setrunqueue(td, SRQ_BORING); 1407109864Sjeff} 1408109864Sjeff 1409109864Sjeff/* 1410109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1411109864Sjeff * priority. 1412109864Sjeff */ 1413109864Sjeffvoid 1414134791Sjuliansched_fork(struct thread *td, struct thread *childtd) 1415109864Sjeff{ 1416109864Sjeff 1417109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1418109864Sjeff 1419134791Sjulian sched_fork_ksegrp(td, childtd->td_ksegrp); 1420134791Sjulian sched_fork_thread(td, childtd); 1421113357Sjeff} 1422113357Sjeff 1423113357Sjeffvoid 1424132372Sjuliansched_fork_ksegrp(struct thread *td, struct ksegrp *child) 1425113357Sjeff{ 1426132372Sjulian struct ksegrp *kg = td->td_ksegrp; 1427134791Sjulian mtx_assert(&sched_lock, MA_OWNED); 1428116365Sjeff 1429121868Sjeff child->kg_slptime = kg->kg_slptime; 1430121868Sjeff child->kg_runtime = kg->kg_runtime; 1431121868Sjeff child->kg_user_pri = kg->kg_user_pri; 1432121868Sjeff sched_interact_fork(child); 1433116463Sjeff kg->kg_runtime += tickincr << 10; 1434116463Sjeff sched_interact_update(kg); 1435113357Sjeff} 1436109864Sjeff 1437113357Sjeffvoid 1438113357Sjeffsched_fork_thread(struct thread *td, struct thread *child) 1439113357Sjeff{ 1440134791Sjulian struct kse *ke; 1441134791Sjulian struct kse *ke2; 1442134791Sjulian 1443134791Sjulian sched_newthread(child); 1444134791Sjulian ke = td->td_kse; 1445134791Sjulian ke2 = child->td_kse; 1446134791Sjulian ke2->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1447134791Sjulian ke2->ke_cpu = ke->ke_cpu; 1448134791Sjulian ke2->ke_runq = NULL; 1449134791Sjulian 1450134791Sjulian /* Grab our parents cpu estimation information. */ 1451134791Sjulian ke2->ke_ticks = ke->ke_ticks; 1452134791Sjulian ke2->ke_ltick = ke->ke_ltick; 1453134791Sjulian ke2->ke_ftick = ke->ke_ftick; 1454113357Sjeff} 1455113357Sjeff 1456113357Sjeffvoid 1457113357Sjeffsched_class(struct ksegrp *kg, int class) 1458113357Sjeff{ 1459113357Sjeff struct kseq *kseq; 1460113357Sjeff struct kse *ke; 1461134791Sjulian struct thread *td; 1462121896Sjeff int nclass; 1463121896Sjeff int oclass; 1464113357Sjeff 1465113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1466113357Sjeff if (kg->kg_pri_class == class) 1467113357Sjeff return; 1468113357Sjeff 1469121896Sjeff nclass = PRI_BASE(class); 1470121896Sjeff oclass = PRI_BASE(kg->kg_pri_class); 1471134791Sjulian FOREACH_THREAD_IN_GROUP(kg, td) { 1472134791Sjulian ke = td->td_kse; 1473113357Sjeff if (ke->ke_state != KES_ONRUNQ && 1474113357Sjeff ke->ke_state != KES_THREAD) 1475113357Sjeff continue; 1476113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1477113357Sjeff 1478121896Sjeff#ifdef SMP 1479122744Sjeff /* 1480122744Sjeff * On SMP if we're on the RUNQ we must adjust the transferable 1481122744Sjeff * count because could be changing to or from an interrupt 1482122744Sjeff * class. 1483122744Sjeff */ 1484122744Sjeff if (ke->ke_state == KES_ONRUNQ) { 1485139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1486123433Sjeff kseq->ksq_transferable--; 1487123433Sjeff kseq->ksq_group->ksg_transferable--; 1488123433Sjeff } 1489139334Sjeff if (KSE_CAN_MIGRATE(ke)) { 1490123433Sjeff kseq->ksq_transferable++; 1491123433Sjeff kseq->ksq_group->ksg_transferable++; 1492123433Sjeff } 1493122744Sjeff } 1494121896Sjeff#endif 1495122744Sjeff if (oclass == PRI_TIMESHARE) { 1496121896Sjeff kseq->ksq_load_timeshare--; 1497130551Sjulian kseq_nice_rem(kseq, kg->kg_proc->p_nice); 1498122744Sjeff } 1499122744Sjeff if (nclass == PRI_TIMESHARE) { 1500121896Sjeff kseq->ksq_load_timeshare++; 1501130551Sjulian kseq_nice_add(kseq, kg->kg_proc->p_nice); 1502122744Sjeff } 1503109970Sjeff } 1504109970Sjeff 1505113357Sjeff kg->kg_pri_class = class; 1506109864Sjeff} 1507109864Sjeff 1508109864Sjeff/* 1509109864Sjeff * Return some of the child's priority and interactivity to the parent. 1510109864Sjeff */ 1511109864Sjeffvoid 1512134791Sjuliansched_exit(struct proc *p, struct thread *childtd) 1513109864Sjeff{ 1514109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1515134791Sjulian sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), childtd); 1516139316Sjeff sched_exit_thread(NULL, childtd); 1517109864Sjeff} 1518109864Sjeff 1519109864Sjeffvoid 1520132372Sjuliansched_exit_ksegrp(struct ksegrp *kg, struct thread *td) 1521113372Sjeff{ 1522132372Sjulian /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ 1523132372Sjulian kg->kg_runtime += td->td_ksegrp->kg_runtime; 1524116463Sjeff sched_interact_update(kg); 1525113372Sjeff} 1526113372Sjeff 1527113372Sjeffvoid 1528134791Sjuliansched_exit_thread(struct thread *td, struct thread *childtd) 1529113372Sjeff{ 1530139316Sjeff CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1531139316Sjeff childtd, childtd->td_proc->p_comm, childtd->td_priority); 1532134791Sjulian kseq_load_rem(KSEQ_CPU(childtd->td_kse->ke_cpu), childtd->td_kse); 1533113372Sjeff} 1534113372Sjeff 1535113372Sjeffvoid 1536121127Sjeffsched_clock(struct thread *td) 1537109864Sjeff{ 1538113357Sjeff struct kseq *kseq; 1539113357Sjeff struct ksegrp *kg; 1540121127Sjeff struct kse *ke; 1541109864Sjeff 1542129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1543133427Sjeff kseq = KSEQ_SELF(); 1544129982Sjeff#ifdef SMP 1545139334Sjeff if (ticks >= bal_tick) 1546129982Sjeff sched_balance(); 1547139334Sjeff if (ticks >= gbal_tick && balance_groups) 1548129982Sjeff sched_balance_groups(); 1549133427Sjeff /* 1550133427Sjeff * We could have been assigned a non real-time thread without an 1551133427Sjeff * IPI. 1552133427Sjeff */ 1553133427Sjeff if (kseq->ksq_assigned) 1554133427Sjeff kseq_assign(kseq); /* Potentially sets NEEDRESCHED */ 1555129982Sjeff#endif 1556113357Sjeff /* 1557113357Sjeff * sched_setup() apparently happens prior to stathz being set. We 1558113357Sjeff * need to resolve the timers earlier in the boot so we can avoid 1559113357Sjeff * calculating this here. 1560113357Sjeff */ 1561113357Sjeff if (realstathz == 0) { 1562113357Sjeff realstathz = stathz ? stathz : hz; 1563113357Sjeff tickincr = hz / realstathz; 1564113357Sjeff /* 1565113357Sjeff * XXX This does not work for values of stathz that are much 1566113357Sjeff * larger than hz. 1567113357Sjeff */ 1568113357Sjeff if (tickincr == 0) 1569113357Sjeff tickincr = 1; 1570113357Sjeff } 1571109864Sjeff 1572121127Sjeff ke = td->td_kse; 1573113357Sjeff kg = ke->ke_ksegrp; 1574109864Sjeff 1575110028Sjeff /* Adjust ticks for pctcpu */ 1576111793Sjeff ke->ke_ticks++; 1577109971Sjeff ke->ke_ltick = ticks; 1578112994Sjeff 1579109971Sjeff /* Go up to one second beyond our max and then trim back down */ 1580109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1581109971Sjeff sched_pctcpu_update(ke); 1582109971Sjeff 1583114496Sjulian if (td->td_flags & TDF_IDLETD) 1584109864Sjeff return; 1585110028Sjeff /* 1586113357Sjeff * We only do slicing code for TIMESHARE ksegrps. 1587113357Sjeff */ 1588113357Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 1589113357Sjeff return; 1590113357Sjeff /* 1591110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 1592113357Sjeff * interactivity. 1593109864Sjeff */ 1594113357Sjeff kg->kg_runtime += tickincr << 10; 1595116463Sjeff sched_interact_update(kg); 1596110645Sjeff 1597109864Sjeff /* 1598109864Sjeff * We used up one time slice. 1599109864Sjeff */ 1600122847Sjeff if (--ke->ke_slice > 0) 1601113357Sjeff return; 1602109864Sjeff /* 1603113357Sjeff * We're out of time, recompute priorities and requeue. 1604109864Sjeff */ 1605122744Sjeff kseq_load_rem(kseq, ke); 1606113357Sjeff sched_priority(kg); 1607113357Sjeff sched_slice(ke); 1608113357Sjeff if (SCHED_CURR(kg, ke)) 1609113357Sjeff ke->ke_runq = kseq->ksq_curr; 1610113357Sjeff else 1611113357Sjeff ke->ke_runq = kseq->ksq_next; 1612122744Sjeff kseq_load_add(kseq, ke); 1613113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1614109864Sjeff} 1615109864Sjeff 1616109864Sjeffint 1617109864Sjeffsched_runnable(void) 1618109864Sjeff{ 1619109864Sjeff struct kseq *kseq; 1620115998Sjeff int load; 1621109864Sjeff 1622115998Sjeff load = 1; 1623115998Sjeff 1624110028Sjeff kseq = KSEQ_SELF(); 1625121790Sjeff#ifdef SMP 1626122094Sjeff if (kseq->ksq_assigned) { 1627122094Sjeff mtx_lock_spin(&sched_lock); 1628121790Sjeff kseq_assign(kseq); 1629122094Sjeff mtx_unlock_spin(&sched_lock); 1630122094Sjeff } 1631121790Sjeff#endif 1632121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1633121605Sjeff if (kseq->ksq_load > 0) 1634121605Sjeff goto out; 1635121605Sjeff } else 1636121605Sjeff if (kseq->ksq_load - 1 > 0) 1637121605Sjeff goto out; 1638115998Sjeff load = 0; 1639115998Sjeffout: 1640115998Sjeff return (load); 1641109864Sjeff} 1642109864Sjeff 1643109864Sjeffvoid 1644109864Sjeffsched_userret(struct thread *td) 1645109864Sjeff{ 1646109864Sjeff struct ksegrp *kg; 1647138842Sjeff struct kse *ke; 1648121605Sjeff 1649121605Sjeff kg = td->td_ksegrp; 1650138842Sjeff ke = td->td_kse; 1651109864Sjeff 1652138842Sjeff if (td->td_priority != kg->kg_user_pri || 1653138842Sjeff ke->ke_flags & KEF_PRIOELEV) { 1654109864Sjeff mtx_lock_spin(&sched_lock); 1655109864Sjeff td->td_priority = kg->kg_user_pri; 1656138842Sjeff if (ke->ke_flags & KEF_PRIOELEV) { 1657138842Sjeff ke->ke_flags &= ~KEF_PRIOELEV; 1658138842Sjeff sched_slice(ke); 1659138842Sjeff if (ke->ke_slice == 0) 1660138842Sjeff mi_switch(SW_INVOL, NULL); 1661138842Sjeff } 1662109864Sjeff mtx_unlock_spin(&sched_lock); 1663109864Sjeff } 1664109864Sjeff} 1665109864Sjeff 1666109864Sjeffstruct kse * 1667109970Sjeffsched_choose(void) 1668109970Sjeff{ 1669110028Sjeff struct kseq *kseq; 1670109970Sjeff struct kse *ke; 1671109970Sjeff 1672115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1673121790Sjeff kseq = KSEQ_SELF(); 1674113357Sjeff#ifdef SMP 1675123433Sjeffrestart: 1676121790Sjeff if (kseq->ksq_assigned) 1677121790Sjeff kseq_assign(kseq); 1678113357Sjeff#endif 1679121790Sjeff ke = kseq_choose(kseq); 1680109864Sjeff if (ke) { 1681121790Sjeff#ifdef SMP 1682121790Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1683123433Sjeff if (kseq_idled(kseq) == 0) 1684123433Sjeff goto restart; 1685121790Sjeff#endif 1686122744Sjeff kseq_runq_rem(kseq, ke); 1687109864Sjeff ke->ke_state = KES_THREAD; 1688113357Sjeff return (ke); 1689109864Sjeff } 1690109970Sjeff#ifdef SMP 1691123433Sjeff if (kseq_idled(kseq) == 0) 1692123433Sjeff goto restart; 1693109970Sjeff#endif 1694113357Sjeff return (NULL); 1695109864Sjeff} 1696109864Sjeff 1697109864Sjeffvoid 1698134586Sjuliansched_add(struct thread *td, int flags) 1699109864Sjeff{ 1700110267Sjeff struct kseq *kseq; 1701113357Sjeff struct ksegrp *kg; 1702121127Sjeff struct kse *ke; 1703139334Sjeff int preemptive; 1704133427Sjeff int canmigrate; 1705121790Sjeff int class; 1706109864Sjeff 1707139334Sjeff if (td == NULL) { 1708139334Sjeff mtx_unlock_spin(&sched_lock); 1709139334Sjeff panic("wtf"); 1710139334Sjeff } 1711139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1712139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1713139316Sjeff curthread->td_proc->p_comm); 1714121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1715121127Sjeff ke = td->td_kse; 1716121127Sjeff kg = td->td_ksegrp; 1717139334Sjeff canmigrate = 1; 1718139334Sjeff preemptive = !(flags & SRQ_YIELDING); 1719139334Sjeff class = PRI_BASE(kg->kg_pri_class); 1720139334Sjeff kseq = KSEQ_SELF(); 1721139334Sjeff if ((ke->ke_flags & KEF_INTERNAL) == 0) 1722139334Sjeff SLOT_USE(td->td_ksegrp); 1723139334Sjeff ke->ke_flags &= ~KEF_INTERNAL; 1724139334Sjeff#ifdef SMP 1725138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1726139334Sjeff if (ke->ke_flags & KEF_REMOVED) 1727138802Sjeff ke->ke_flags &= ~KEF_REMOVED; 1728121790Sjeff return; 1729138802Sjeff } 1730139334Sjeff canmigrate = KSE_CAN_MIGRATE(ke); 1731139334Sjeff#endif 1732109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 1733110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 1734109864Sjeff ke->ke_proc->p_comm)); 1735109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1736110267Sjeff ("sched_add: process swapped out")); 1737113387Sjeff KASSERT(ke->ke_runq == NULL, 1738113387Sjeff ("sched_add: KSE %p is still assigned to a run queue", ke)); 1739121790Sjeff switch (class) { 1740112994Sjeff case PRI_ITHD: 1741112994Sjeff case PRI_REALTIME: 1742113357Sjeff ke->ke_runq = kseq->ksq_curr; 1743113357Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1744139334Sjeff if (canmigrate) 1745139334Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1746112994Sjeff break; 1747112994Sjeff case PRI_TIMESHARE: 1748113387Sjeff if (SCHED_CURR(kg, ke)) 1749113387Sjeff ke->ke_runq = kseq->ksq_curr; 1750113387Sjeff else 1751113387Sjeff ke->ke_runq = kseq->ksq_next; 1752113357Sjeff break; 1753112994Sjeff case PRI_IDLE: 1754113357Sjeff /* 1755113357Sjeff * This is for priority prop. 1756113357Sjeff */ 1757121605Sjeff if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1758113357Sjeff ke->ke_runq = kseq->ksq_curr; 1759113357Sjeff else 1760113357Sjeff ke->ke_runq = &kseq->ksq_idle; 1761113357Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1762112994Sjeff break; 1763113357Sjeff default: 1764121868Sjeff panic("Unknown pri class."); 1765113357Sjeff break; 1766112994Sjeff } 1767121790Sjeff#ifdef SMP 1768133427Sjeff /* 1769133427Sjeff * Don't migrate running threads here. Force the long term balancer 1770133427Sjeff * to do it. 1771133427Sjeff */ 1772133555Sjeff if (ke->ke_flags & KEF_HOLD) { 1773133555Sjeff ke->ke_flags &= ~KEF_HOLD; 1774133427Sjeff canmigrate = 0; 1775133555Sjeff } 1776133427Sjeff /* 1777133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1778133427Sjeff */ 1779133427Sjeff if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) { 1780123529Sjeff ke->ke_runq = NULL; 1781123433Sjeff kseq_notify(ke, ke->ke_cpu); 1782123433Sjeff return; 1783123433Sjeff } 1784121790Sjeff /* 1785123685Sjeff * If we had been idle, clear our bit in the group and potentially 1786123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1787121790Sjeff */ 1788123433Sjeff if ((class == PRI_TIMESHARE || class == PRI_REALTIME) && 1789123433Sjeff (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) { 1790121790Sjeff /* 1791123433Sjeff * Check to see if our group is unidling, and if so, remove it 1792123433Sjeff * from the global idle mask. 1793121790Sjeff */ 1794123433Sjeff if (kseq->ksq_group->ksg_idlemask == 1795123433Sjeff kseq->ksq_group->ksg_cpumask) 1796123433Sjeff atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask); 1797123433Sjeff /* 1798123433Sjeff * Now remove ourselves from the group specific idle mask. 1799123433Sjeff */ 1800123433Sjeff kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask); 1801139334Sjeff } else if (canmigrate && kseq->ksq_load > 1 && class != PRI_ITHD) 1802123685Sjeff if (kseq_transfer(kseq, ke, class)) 1803123685Sjeff return; 1804133427Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1805121790Sjeff#endif 1806133555Sjeff if (td->td_priority < curthread->td_priority && 1807133555Sjeff ke->ke_runq == kseq->ksq_curr) 1808133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1809131839Sjhb if (preemptive && maybe_preempt(td)) 1810131481Sjhb return; 1811109864Sjeff ke->ke_state = KES_ONRUNQ; 1812109864Sjeff 1813139334Sjeff kseq_runq_add(kseq, ke, flags); 1814122744Sjeff kseq_load_add(kseq, ke); 1815109864Sjeff} 1816109864Sjeff 1817109864Sjeffvoid 1818121127Sjeffsched_rem(struct thread *td) 1819109864Sjeff{ 1820113357Sjeff struct kseq *kseq; 1821121127Sjeff struct kse *ke; 1822113357Sjeff 1823139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1824139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1825139316Sjeff curthread->td_proc->p_comm); 1826139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1827139334Sjeff ke = td->td_kse; 1828139334Sjeff SLOT_RELEASE(td->td_ksegrp); 1829138802Sjeff if (ke->ke_flags & KEF_ASSIGNED) { 1830138802Sjeff ke->ke_flags |= KEF_REMOVED; 1831121790Sjeff return; 1832138802Sjeff } 1833124958Sjeff KASSERT((ke->ke_state == KES_ONRUNQ), 1834124958Sjeff ("sched_rem: KSE not on run queue")); 1835109864Sjeff 1836109864Sjeff ke->ke_state = KES_THREAD; 1837113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1838122744Sjeff kseq_runq_rem(kseq, ke); 1839122744Sjeff kseq_load_rem(kseq, ke); 1840109864Sjeff} 1841109864Sjeff 1842109864Sjefffixpt_t 1843121127Sjeffsched_pctcpu(struct thread *td) 1844109864Sjeff{ 1845109864Sjeff fixpt_t pctcpu; 1846121127Sjeff struct kse *ke; 1847109864Sjeff 1848109864Sjeff pctcpu = 0; 1849121127Sjeff ke = td->td_kse; 1850121290Sjeff if (ke == NULL) 1851121290Sjeff return (0); 1852109864Sjeff 1853115998Sjeff mtx_lock_spin(&sched_lock); 1854109864Sjeff if (ke->ke_ticks) { 1855109864Sjeff int rtick; 1856109864Sjeff 1857116365Sjeff /* 1858116365Sjeff * Don't update more frequently than twice a second. Allowing 1859116365Sjeff * this causes the cpu usage to decay away too quickly due to 1860116365Sjeff * rounding errors. 1861116365Sjeff */ 1862123435Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick || 1863123435Sjeff ke->ke_ltick < (ticks - (hz / 2))) 1864116365Sjeff sched_pctcpu_update(ke); 1865109864Sjeff /* How many rtick per second ? */ 1866116365Sjeff rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1867110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1868109864Sjeff } 1869109864Sjeff 1870109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1871113865Sjhb mtx_unlock_spin(&sched_lock); 1872109864Sjeff 1873109864Sjeff return (pctcpu); 1874109864Sjeff} 1875109864Sjeff 1876122038Sjeffvoid 1877122038Sjeffsched_bind(struct thread *td, int cpu) 1878122038Sjeff{ 1879122038Sjeff struct kse *ke; 1880122038Sjeff 1881122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1882122038Sjeff ke = td->td_kse; 1883122038Sjeff ke->ke_flags |= KEF_BOUND; 1884123433Sjeff#ifdef SMP 1885123433Sjeff if (PCPU_GET(cpuid) == cpu) 1886122038Sjeff return; 1887122038Sjeff /* sched_rem without the runq_remove */ 1888122038Sjeff ke->ke_state = KES_THREAD; 1889122744Sjeff kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke); 1890122038Sjeff kseq_notify(ke, cpu); 1891122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1892131527Sphk mi_switch(SW_VOL, NULL); 1893122038Sjeff#endif 1894122038Sjeff} 1895122038Sjeff 1896122038Sjeffvoid 1897122038Sjeffsched_unbind(struct thread *td) 1898122038Sjeff{ 1899122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1900122038Sjeff td->td_kse->ke_flags &= ~KEF_BOUND; 1901122038Sjeff} 1902122038Sjeff 1903109864Sjeffint 1904125289Sjeffsched_load(void) 1905125289Sjeff{ 1906125289Sjeff#ifdef SMP 1907125289Sjeff int total; 1908125289Sjeff int i; 1909125289Sjeff 1910125289Sjeff total = 0; 1911125289Sjeff for (i = 0; i <= ksg_maxid; i++) 1912125289Sjeff total += KSEQ_GROUP(i)->ksg_load; 1913125289Sjeff return (total); 1914125289Sjeff#else 1915125289Sjeff return (KSEQ_SELF()->ksq_sysload); 1916125289Sjeff#endif 1917125289Sjeff} 1918125289Sjeff 1919125289Sjeffint 1920109864Sjeffsched_sizeof_ksegrp(void) 1921109864Sjeff{ 1922109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1923109864Sjeff} 1924109864Sjeff 1925109864Sjeffint 1926109864Sjeffsched_sizeof_proc(void) 1927109864Sjeff{ 1928109864Sjeff return (sizeof(struct proc)); 1929109864Sjeff} 1930109864Sjeff 1931109864Sjeffint 1932109864Sjeffsched_sizeof_thread(void) 1933109864Sjeff{ 1934109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1935109864Sjeff} 1936134791Sjulian#define KERN_SWITCH_INCLUDE 1 1937134791Sjulian#include "kern/kern_switch.c" 1938