sched_ule.c revision 165827
1109864Sjeff/*- 2165762Sjeff * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 165827 2007-01-06 08:44:13Z jeff $"); 29116182Sobrien 30147565Speter#include "opt_hwpmc_hooks.h" 31147565Speter#include "opt_sched.h" 32134649Sscottl 33109864Sjeff#include <sys/param.h> 34109864Sjeff#include <sys/systm.h> 35131929Smarcel#include <sys/kdb.h> 36109864Sjeff#include <sys/kernel.h> 37109864Sjeff#include <sys/ktr.h> 38109864Sjeff#include <sys/lock.h> 39109864Sjeff#include <sys/mutex.h> 40109864Sjeff#include <sys/proc.h> 41112966Sjeff#include <sys/resource.h> 42122038Sjeff#include <sys/resourcevar.h> 43109864Sjeff#include <sys/sched.h> 44109864Sjeff#include <sys/smp.h> 45109864Sjeff#include <sys/sx.h> 46109864Sjeff#include <sys/sysctl.h> 47109864Sjeff#include <sys/sysproto.h> 48139453Sjhb#include <sys/turnstile.h> 49161599Sdavidxu#include <sys/umtx.h> 50109864Sjeff#include <sys/vmmeter.h> 51109864Sjeff#ifdef KTRACE 52109864Sjeff#include <sys/uio.h> 53109864Sjeff#include <sys/ktrace.h> 54109864Sjeff#endif 55109864Sjeff 56145256Sjkoshy#ifdef HWPMC_HOOKS 57145256Sjkoshy#include <sys/pmckern.h> 58145256Sjkoshy#endif 59145256Sjkoshy 60109864Sjeff#include <machine/cpu.h> 61121790Sjeff#include <machine/smp.h> 62109864Sjeff 63109864Sjeff/* 64164936Sjulian * Thread scheduler specific section. 65146954Sjeff */ 66164936Sjulianstruct td_sched { 67164936Sjulian TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 68164936Sjulian int ts_flags; /* (j) TSF_* flags. */ 69164936Sjulian struct thread *ts_thread; /* (*) Active associated thread. */ 70164936Sjulian u_char ts_rqindex; /* (j) Run queue index. */ 71134791Sjulian enum { 72165762Sjeff TSS_THREAD, 73164936Sjulian TSS_ONRUNQ 74164936Sjulian } ts_state; /* (j) thread sched specific status. */ 75164936Sjulian int ts_slptime; 76164936Sjulian int ts_slice; 77164936Sjulian struct runq *ts_runq; 78164936Sjulian u_char ts_cpu; /* CPU that we have affinity for. */ 79134791Sjulian /* The following variables are only used for pctcpu calculation */ 80164936Sjulian int ts_ltick; /* Last tick that we were running on */ 81164936Sjulian int ts_ftick; /* First tick that we were running on */ 82164936Sjulian int ts_ticks; /* Tick count */ 83134791Sjulian 84163709Sjb /* originally from kg_sched */ 85163709Sjb int skg_slptime; /* Number of ticks we vol. slept */ 86163709Sjb int skg_runtime; /* Number of ticks we were running */ 87134791Sjulian}; 88164936Sjulian#define ts_assign ts_procq.tqe_next 89164936Sjulian/* flags kept in ts_flags */ 90164936Sjulian#define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 91164936Sjulian#define TSF_BOUND 0x0002 /* Thread can not migrate. */ 92164936Sjulian#define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 93165827Sjeff#define TSF_REMOVED 0x0008 /* Thread was removed while ASSIGNED */ 94165620Sjeff#define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 95121790Sjeff 96164936Sjulianstatic struct td_sched td_sched0; 97109864Sjeff 98109864Sjeff/* 99165762Sjeff * Cpu percentage computation macros and defines. 100111857Sjeff * 101165762Sjeff * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 102165762Sjeff * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 103165796Sjeff * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 104165762Sjeff * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 105165762Sjeff * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 106165762Sjeff * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 107165762Sjeff */ 108165762Sjeff#define SCHED_TICK_SECS 10 109165762Sjeff#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 110165796Sjeff#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 111165762Sjeff#define SCHED_TICK_SHIFT 10 112165762Sjeff#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 113165762Sjeff#define SCHED_TICK_TOTAL(ts) ((ts)->ts_ltick - (ts)->ts_ftick) 114165762Sjeff 115165762Sjeff/* 116165762Sjeff * These macros determine priorities for non-interactive threads. They are 117165762Sjeff * assigned a priority based on their recent cpu utilization as expressed 118165762Sjeff * by the ratio of ticks to the tick total. NHALF priorities at the start 119165762Sjeff * and end of the MIN to MAX timeshare range are only reachable with negative 120165762Sjeff * or positive nice respectively. 121165762Sjeff * 122165762Sjeff * PRI_RANGE: Priority range for utilization dependent priorities. 123116642Sjeff * PRI_NRESV: Number of nice values. 124165762Sjeff * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 125165762Sjeff * PRI_NICE: Determines the part of the priority inherited from nice. 126109864Sjeff */ 127165762Sjeff#define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 128121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 129165762Sjeff#define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 130165762Sjeff#define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 131165762Sjeff#define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 132165762Sjeff#define SCHED_PRI_TICKS(ts) \ 133165762Sjeff (SCHED_TICK_HZ((ts)) / \ 134165827Sjeff (roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 135165762Sjeff#define SCHED_PRI_NICE(nice) (nice) 136109864Sjeff 137109864Sjeff/* 138165762Sjeff * These determine the interactivity of a process. Interactivity differs from 139165762Sjeff * cpu utilization in that it expresses the voluntary time slept vs time ran 140165762Sjeff * while cpu utilization includes all time not running. This more accurately 141165762Sjeff * models the intent of the thread. 142109864Sjeff * 143110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 144110645Sjeff * before throttling back. 145121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 146116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 147111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 148109864Sjeff */ 149165762Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 150165762Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 151116365Sjeff#define SCHED_INTERACT_MAX (100) 152116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 153121126Sjeff#define SCHED_INTERACT_THRESH (30) 154111857Sjeff 155109864Sjeff/* 156165762Sjeff * tickincr: Converts a stathz tick into a hz domain scaled by 157165762Sjeff * the shift factor. Without the shift the error rate 158165762Sjeff * due to rounding would be unacceptably high. 159165762Sjeff * realstathz: stathz is sometimes 0 and run off of hz. 160165762Sjeff * sched_slice: Runtime of each thread before rescheduling. 161109864Sjeff */ 162165762Sjeffstatic int sched_interact = SCHED_INTERACT_THRESH; 163165762Sjeffstatic int realstathz; 164165762Sjeffstatic int tickincr; 165165762Sjeffstatic int sched_slice; 166165827Sjeffstatic int sched_rebalance = 1; 167109864Sjeff 168109864Sjeff/* 169164936Sjulian * tdq - per processor runqs and statistics. 170109864Sjeff */ 171164936Sjulianstruct tdq { 172165620Sjeff struct runq tdq_idle; /* Queue of IDLE threads. */ 173165762Sjeff struct runq tdq_timeshare; /* timeshare run queue. */ 174165762Sjeff struct runq tdq_realtime; /* real-time run queue. */ 175165766Sjeff int tdq_idx; /* Current insert index. */ 176165766Sjeff int tdq_ridx; /* Current removal index. */ 177165620Sjeff int tdq_load; /* Aggregate load. */ 178110267Sjeff#ifdef SMP 179165620Sjeff int tdq_transferable; 180165620Sjeff LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 181165620Sjeff struct tdq_group *tdq_group; /* Our processor group. */ 182165620Sjeff volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 183125289Sjeff#else 184165620Sjeff int tdq_sysload; /* For loadavg, !ITHD load. */ 185110267Sjeff#endif 186109864Sjeff}; 187109864Sjeff 188123433Sjeff#ifdef SMP 189109864Sjeff/* 190164936Sjulian * tdq groups are groups of processors which can cheaply share threads. When 191123433Sjeff * one processor in the group goes idle it will check the runqs of the other 192123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 193123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 194123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 195123433Sjeff * load balancer. 196123433Sjeff */ 197164936Sjulianstruct tdq_group { 198165620Sjeff int tdg_cpus; /* Count of CPUs in this tdq group. */ 199165620Sjeff cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 200165620Sjeff cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 201165620Sjeff cpumask_t tdg_mask; /* Bit mask for first cpu. */ 202165620Sjeff int tdg_load; /* Total load of this group. */ 203165620Sjeff int tdg_transferable; /* Transferable load of this group. */ 204165620Sjeff LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 205123433Sjeff}; 206123433Sjeff#endif 207123433Sjeff 208123433Sjeff/* 209165620Sjeff * One thread queue per processor. 210109864Sjeff */ 211110028Sjeff#ifdef SMP 212164936Sjulianstatic cpumask_t tdq_idle; 213165620Sjeffstatic int tdg_maxid; 214164936Sjulianstatic struct tdq tdq_cpu[MAXCPU]; 215164936Sjulianstatic struct tdq_group tdq_groups[MAXCPU]; 216129982Sjeffstatic int bal_tick; 217129982Sjeffstatic int gbal_tick; 218139334Sjeffstatic int balance_groups; 219129982Sjeff 220164936Sjulian#define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 221164936Sjulian#define TDQ_CPU(x) (&tdq_cpu[(x)]) 222164936Sjulian#define TDQ_ID(x) ((x) - tdq_cpu) 223164936Sjulian#define TDQ_GROUP(x) (&tdq_groups[(x)]) 224123433Sjeff#else /* !SMP */ 225164936Sjulianstatic struct tdq tdq_cpu; 226129982Sjeff 227164936Sjulian#define TDQ_SELF() (&tdq_cpu) 228164936Sjulian#define TDQ_CPU(x) (&tdq_cpu) 229110028Sjeff#endif 230109864Sjeff 231165762Sjeffstatic struct td_sched *sched_choose(void); /* XXX Should be thread * */ 232163709Sjbstatic void sched_priority(struct thread *); 233146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 234163709Sjbstatic int sched_interact_score(struct thread *); 235163709Sjbstatic void sched_interact_update(struct thread *); 236163709Sjbstatic void sched_interact_fork(struct thread *); 237164936Sjulianstatic void sched_pctcpu_update(struct td_sched *); 238165827Sjeffstatic inline void sched_pin_td(struct thread *td); 239165827Sjeffstatic inline void sched_unpin_td(struct thread *td); 240109864Sjeff 241110267Sjeff/* Operations on per processor queues */ 242164936Sjulianstatic struct td_sched * tdq_choose(struct tdq *); 243164936Sjulianstatic void tdq_setup(struct tdq *); 244164936Sjulianstatic void tdq_load_add(struct tdq *, struct td_sched *); 245164936Sjulianstatic void tdq_load_rem(struct tdq *, struct td_sched *); 246164936Sjulianstatic __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 247164936Sjulianstatic __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 248164936Sjulianvoid tdq_print(int cpu); 249165762Sjeffstatic void runq_print(struct runq *rq); 250110267Sjeff#ifdef SMP 251164936Sjulianstatic int tdq_transfer(struct tdq *, struct td_sched *, int); 252164936Sjulianstatic struct td_sched *runq_steal(struct runq *); 253129982Sjeffstatic void sched_balance(void); 254129982Sjeffstatic void sched_balance_groups(void); 255164936Sjulianstatic void sched_balance_group(struct tdq_group *); 256164936Sjulianstatic void sched_balance_pair(struct tdq *, struct tdq *); 257165766Sjeffstatic void sched_smp_tick(void); 258164936Sjulianstatic void tdq_move(struct tdq *, int); 259164936Sjulianstatic int tdq_idled(struct tdq *); 260164936Sjulianstatic void tdq_notify(struct td_sched *, int); 261164936Sjulianstatic void tdq_assign(struct tdq *); 262164936Sjulianstatic struct td_sched *tdq_steal(struct tdq *, int); 263165827Sjeff 264165762Sjeff#define THREAD_CAN_MIGRATE(td) \ 265165762Sjeff ((td)->td_pinned == 0 && (td)->td_pri_class != PRI_ITHD) 266121790Sjeff#endif 267110028Sjeff 268165762Sjeffstatic void sched_setup(void *dummy); 269165762SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 270165762Sjeff 271165762Sjeffstatic void sched_initticks(void *dummy); 272165762SjeffSYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 273165762Sjeff 274165827Sjeffstatic inline void 275165827Sjeffsched_pin_td(struct thread *td) 276165827Sjeff{ 277165827Sjeff td->td_pinned++; 278165827Sjeff} 279165827Sjeff 280165827Sjeffstatic inline void 281165827Sjeffsched_unpin_td(struct thread *td) 282165827Sjeff{ 283165827Sjeff td->td_pinned--; 284165827Sjeff} 285165827Sjeff 286165762Sjeffstatic void 287165762Sjeffrunq_print(struct runq *rq) 288165762Sjeff{ 289165762Sjeff struct rqhead *rqh; 290165762Sjeff struct td_sched *ts; 291165762Sjeff int pri; 292165762Sjeff int j; 293165762Sjeff int i; 294165762Sjeff 295165762Sjeff for (i = 0; i < RQB_LEN; i++) { 296165762Sjeff printf("\t\trunq bits %d 0x%zx\n", 297165762Sjeff i, rq->rq_status.rqb_bits[i]); 298165762Sjeff for (j = 0; j < RQB_BPW; j++) 299165762Sjeff if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 300165762Sjeff pri = j + (i << RQB_L2BPW); 301165762Sjeff rqh = &rq->rq_queues[pri]; 302165762Sjeff TAILQ_FOREACH(ts, rqh, ts_procq) { 303165762Sjeff printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 304165762Sjeff ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 305165762Sjeff } 306165762Sjeff } 307165762Sjeff } 308165762Sjeff} 309165762Sjeff 310113357Sjeffvoid 311164936Sjuliantdq_print(int cpu) 312110267Sjeff{ 313164936Sjulian struct tdq *tdq; 314112994Sjeff 315164936Sjulian tdq = TDQ_CPU(cpu); 316112994Sjeff 317164936Sjulian printf("tdq:\n"); 318165620Sjeff printf("\tload: %d\n", tdq->tdq_load); 319165762Sjeff printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 320165766Sjeff printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 321165762Sjeff printf("\trealtime runq:\n"); 322165762Sjeff runq_print(&tdq->tdq_realtime); 323165762Sjeff printf("\ttimeshare runq:\n"); 324165762Sjeff runq_print(&tdq->tdq_timeshare); 325165762Sjeff printf("\tidle runq:\n"); 326165762Sjeff runq_print(&tdq->tdq_idle); 327121896Sjeff#ifdef SMP 328165620Sjeff printf("\tload transferable: %d\n", tdq->tdq_transferable); 329121896Sjeff#endif 330113357Sjeff} 331112994Sjeff 332122744Sjeffstatic __inline void 333164936Sjuliantdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 334122744Sjeff{ 335122744Sjeff#ifdef SMP 336165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 337165620Sjeff tdq->tdq_transferable++; 338165620Sjeff tdq->tdq_group->tdg_transferable++; 339164936Sjulian ts->ts_flags |= TSF_XFERABLE; 340123433Sjeff } 341122744Sjeff#endif 342165762Sjeff if (ts->ts_runq == &tdq->tdq_timeshare) { 343165762Sjeff int pri; 344165762Sjeff 345165762Sjeff pri = ts->ts_thread->td_priority; 346165762Sjeff KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 347165762Sjeff ("Invalid priority %d on timeshare runq", pri)); 348165762Sjeff /* 349165762Sjeff * This queue contains only priorities between MIN and MAX 350165762Sjeff * realtime. Use the whole queue to represent these values. 351165762Sjeff */ 352165762Sjeff#define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 353165762Sjeff if ((flags & SRQ_BORROWING) == 0) { 354165762Sjeff pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 355165762Sjeff pri = (pri + tdq->tdq_idx) % RQ_NQS; 356165766Sjeff /* 357165766Sjeff * This effectively shortens the queue by one so we 358165766Sjeff * can have a one slot difference between idx and 359165766Sjeff * ridx while we wait for threads to drain. 360165766Sjeff */ 361165766Sjeff if (tdq->tdq_ridx != tdq->tdq_idx && 362165766Sjeff pri == tdq->tdq_ridx) 363165766Sjeff pri = (pri - 1) % RQ_NQS; 364165762Sjeff } else 365165766Sjeff pri = tdq->tdq_ridx; 366165762Sjeff runq_add_pri(ts->ts_runq, ts, pri, flags); 367165762Sjeff } else 368165762Sjeff runq_add(ts->ts_runq, ts, flags); 369122744Sjeff} 370122744Sjeff 371122744Sjeffstatic __inline void 372164936Sjuliantdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 373122744Sjeff{ 374122744Sjeff#ifdef SMP 375164936Sjulian if (ts->ts_flags & TSF_XFERABLE) { 376165620Sjeff tdq->tdq_transferable--; 377165620Sjeff tdq->tdq_group->tdg_transferable--; 378164936Sjulian ts->ts_flags &= ~TSF_XFERABLE; 379123433Sjeff } 380122744Sjeff#endif 381165766Sjeff if (ts->ts_runq == &tdq->tdq_timeshare) { 382165766Sjeff if (tdq->tdq_idx != tdq->tdq_ridx) 383165766Sjeff runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 384165766Sjeff else 385165766Sjeff runq_remove_idx(ts->ts_runq, ts, NULL); 386165796Sjeff /* 387165796Sjeff * For timeshare threads we update the priority here so 388165796Sjeff * the priority reflects the time we've been sleeping. 389165796Sjeff */ 390165796Sjeff ts->ts_ltick = ticks; 391165796Sjeff sched_pctcpu_update(ts); 392165796Sjeff sched_priority(ts->ts_thread); 393165766Sjeff } else 394165762Sjeff runq_remove(ts->ts_runq, ts); 395122744Sjeff} 396122744Sjeff 397113357Sjeffstatic void 398164936Sjuliantdq_load_add(struct tdq *tdq, struct td_sched *ts) 399113357Sjeff{ 400121896Sjeff int class; 401115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 402164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 403165620Sjeff tdq->tdq_load++; 404165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 405164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 406123487Sjeff#ifdef SMP 407165620Sjeff tdq->tdq_group->tdg_load++; 408125289Sjeff#else 409165620Sjeff tdq->tdq_sysload++; 410123487Sjeff#endif 411110267Sjeff} 412113357Sjeff 413112994Sjeffstatic void 414164936Sjuliantdq_load_rem(struct tdq *tdq, struct td_sched *ts) 415110267Sjeff{ 416121896Sjeff int class; 417115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 418164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 419164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 420123487Sjeff#ifdef SMP 421165620Sjeff tdq->tdq_group->tdg_load--; 422125289Sjeff#else 423165620Sjeff tdq->tdq_sysload--; 424123487Sjeff#endif 425165620Sjeff tdq->tdq_load--; 426165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 427164936Sjulian ts->ts_runq = NULL; 428110267Sjeff} 429110267Sjeff 430113357Sjeff#ifdef SMP 431165766Sjeffstatic void 432165766Sjeffsched_smp_tick(void) 433165766Sjeff{ 434165766Sjeff struct tdq *tdq; 435165766Sjeff 436165766Sjeff tdq = TDQ_SELF(); 437165819Sjeff if (sched_rebalance) { 438165819Sjeff if (ticks >= bal_tick) 439165819Sjeff sched_balance(); 440165819Sjeff if (ticks >= gbal_tick && balance_groups) 441165819Sjeff sched_balance_groups(); 442165819Sjeff } 443165766Sjeff /* 444165766Sjeff * We could have been assigned a non real-time thread without an 445165766Sjeff * IPI. 446165766Sjeff */ 447165766Sjeff if (tdq->tdq_assigned) 448165766Sjeff tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 449165766Sjeff} 450165766Sjeff 451116069Sjeff/* 452122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 453116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 454116069Sjeff * by migrating some processes. 455116069Sjeff * 456116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 457116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 458116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 459116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 460116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 461116069Sjeff * 462116069Sjeff * It could be improved by considering the priorities and slices assigned to 463116069Sjeff * each task prior to balancing them. There are many pathological cases with 464116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 465116069Sjeff * 466116069Sjeff */ 467121790Sjeffstatic void 468129982Sjeffsched_balance(void) 469116069Sjeff{ 470164936Sjulian struct tdq_group *high; 471164936Sjulian struct tdq_group *low; 472165620Sjeff struct tdq_group *tdg; 473123487Sjeff int cnt; 474123487Sjeff int i; 475123487Sjeff 476139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 477123487Sjeff if (smp_started == 0) 478139334Sjeff return; 479123487Sjeff low = high = NULL; 480165620Sjeff i = random() % (tdg_maxid + 1); 481165620Sjeff for (cnt = 0; cnt <= tdg_maxid; cnt++) { 482165620Sjeff tdg = TDQ_GROUP(i); 483123487Sjeff /* 484123487Sjeff * Find the CPU with the highest load that has some 485123487Sjeff * threads to transfer. 486123487Sjeff */ 487165620Sjeff if ((high == NULL || tdg->tdg_load > high->tdg_load) 488165620Sjeff && tdg->tdg_transferable) 489165620Sjeff high = tdg; 490165620Sjeff if (low == NULL || tdg->tdg_load < low->tdg_load) 491165620Sjeff low = tdg; 492165620Sjeff if (++i > tdg_maxid) 493123487Sjeff i = 0; 494123487Sjeff } 495123487Sjeff if (low != NULL && high != NULL && high != low) 496165620Sjeff sched_balance_pair(LIST_FIRST(&high->tdg_members), 497165620Sjeff LIST_FIRST(&low->tdg_members)); 498123487Sjeff} 499123487Sjeff 500123487Sjeffstatic void 501129982Sjeffsched_balance_groups(void) 502123487Sjeff{ 503123487Sjeff int i; 504123487Sjeff 505139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 506129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 507123487Sjeff if (smp_started) 508165620Sjeff for (i = 0; i <= tdg_maxid; i++) 509164936Sjulian sched_balance_group(TDQ_GROUP(i)); 510123487Sjeff} 511123487Sjeff 512123487Sjeffstatic void 513165620Sjeffsched_balance_group(struct tdq_group *tdg) 514123487Sjeff{ 515164936Sjulian struct tdq *tdq; 516164936Sjulian struct tdq *high; 517164936Sjulian struct tdq *low; 518123487Sjeff int load; 519123487Sjeff 520165620Sjeff if (tdg->tdg_transferable == 0) 521123487Sjeff return; 522123487Sjeff low = NULL; 523123487Sjeff high = NULL; 524165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 525165620Sjeff load = tdq->tdq_load; 526165620Sjeff if (high == NULL || load > high->tdq_load) 527164936Sjulian high = tdq; 528165620Sjeff if (low == NULL || load < low->tdq_load) 529164936Sjulian low = tdq; 530123487Sjeff } 531123487Sjeff if (high != NULL && low != NULL && high != low) 532123487Sjeff sched_balance_pair(high, low); 533123487Sjeff} 534123487Sjeff 535123487Sjeffstatic void 536164936Sjuliansched_balance_pair(struct tdq *high, struct tdq *low) 537123487Sjeff{ 538123433Sjeff int transferable; 539116069Sjeff int high_load; 540116069Sjeff int low_load; 541116069Sjeff int move; 542116069Sjeff int diff; 543116069Sjeff int i; 544116069Sjeff 545116069Sjeff /* 546123433Sjeff * If we're transfering within a group we have to use this specific 547164936Sjulian * tdq's transferable count, otherwise we can steal from other members 548123433Sjeff * of the group. 549123433Sjeff */ 550165620Sjeff if (high->tdq_group == low->tdq_group) { 551165620Sjeff transferable = high->tdq_transferable; 552165620Sjeff high_load = high->tdq_load; 553165620Sjeff low_load = low->tdq_load; 554123487Sjeff } else { 555165620Sjeff transferable = high->tdq_group->tdg_transferable; 556165620Sjeff high_load = high->tdq_group->tdg_load; 557165620Sjeff low_load = low->tdq_group->tdg_load; 558123487Sjeff } 559123433Sjeff if (transferable == 0) 560123487Sjeff return; 561123433Sjeff /* 562122744Sjeff * Determine what the imbalance is and then adjust that to how many 563165620Sjeff * threads we actually have to give up (transferable). 564122744Sjeff */ 565123487Sjeff diff = high_load - low_load; 566116069Sjeff move = diff / 2; 567116069Sjeff if (diff & 0x1) 568116069Sjeff move++; 569123433Sjeff move = min(move, transferable); 570116069Sjeff for (i = 0; i < move; i++) 571164936Sjulian tdq_move(high, TDQ_ID(low)); 572116069Sjeff return; 573116069Sjeff} 574116069Sjeff 575121790Sjeffstatic void 576164936Sjuliantdq_move(struct tdq *from, int cpu) 577116069Sjeff{ 578164936Sjulian struct tdq *tdq; 579164936Sjulian struct tdq *to; 580164936Sjulian struct td_sched *ts; 581116069Sjeff 582164936Sjulian tdq = from; 583164936Sjulian to = TDQ_CPU(cpu); 584164936Sjulian ts = tdq_steal(tdq, 1); 585164936Sjulian if (ts == NULL) { 586165620Sjeff struct tdq_group *tdg; 587123433Sjeff 588165620Sjeff tdg = tdq->tdq_group; 589165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 590165620Sjeff if (tdq == from || tdq->tdq_transferable == 0) 591123433Sjeff continue; 592164936Sjulian ts = tdq_steal(tdq, 1); 593123433Sjeff break; 594123433Sjeff } 595164936Sjulian if (ts == NULL) 596164936Sjulian panic("tdq_move: No threads available with a " 597123433Sjeff "transferable count of %d\n", 598165620Sjeff tdg->tdg_transferable); 599123433Sjeff } 600164936Sjulian if (tdq == to) 601123433Sjeff return; 602164936Sjulian ts->ts_state = TSS_THREAD; 603164936Sjulian tdq_runq_rem(tdq, ts); 604164936Sjulian tdq_load_rem(tdq, ts); 605164936Sjulian tdq_notify(ts, cpu); 606116069Sjeff} 607110267Sjeff 608123433Sjeffstatic int 609164936Sjuliantdq_idled(struct tdq *tdq) 610121790Sjeff{ 611165620Sjeff struct tdq_group *tdg; 612164936Sjulian struct tdq *steal; 613164936Sjulian struct td_sched *ts; 614123433Sjeff 615165620Sjeff tdg = tdq->tdq_group; 616123433Sjeff /* 617165620Sjeff * If we're in a cpu group, try and steal threads from another cpu in 618123433Sjeff * the group before idling. 619123433Sjeff */ 620165620Sjeff if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 621165620Sjeff LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 622165620Sjeff if (steal == tdq || steal->tdq_transferable == 0) 623123433Sjeff continue; 624164936Sjulian ts = tdq_steal(steal, 0); 625164936Sjulian if (ts == NULL) 626123433Sjeff continue; 627164936Sjulian ts->ts_state = TSS_THREAD; 628164936Sjulian tdq_runq_rem(steal, ts); 629164936Sjulian tdq_load_rem(steal, ts); 630164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 631165827Sjeff sched_pin_td(ts->ts_thread); 632164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 633165827Sjeff sched_unpin_td(ts->ts_thread); 634123433Sjeff return (0); 635123433Sjeff } 636123433Sjeff } 637123433Sjeff /* 638123433Sjeff * We only set the idled bit when all of the cpus in the group are 639164936Sjulian * idle. Otherwise we could get into a situation where a thread bounces 640123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 641123433Sjeff */ 642165620Sjeff tdg->tdg_idlemask |= PCPU_GET(cpumask); 643165620Sjeff if (tdg->tdg_idlemask != tdg->tdg_cpumask) 644123433Sjeff return (1); 645165620Sjeff atomic_set_int(&tdq_idle, tdg->tdg_mask); 646123433Sjeff return (1); 647121790Sjeff} 648121790Sjeff 649121790Sjeffstatic void 650164936Sjuliantdq_assign(struct tdq *tdq) 651121790Sjeff{ 652164936Sjulian struct td_sched *nts; 653164936Sjulian struct td_sched *ts; 654121790Sjeff 655121790Sjeff do { 656165620Sjeff *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 657165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 658164936Sjulian (uintptr_t)ts, (uintptr_t)NULL)); 659164936Sjulian for (; ts != NULL; ts = nts) { 660164936Sjulian nts = ts->ts_assign; 661165620Sjeff tdq->tdq_group->tdg_load--; 662165620Sjeff tdq->tdq_load--; 663164936Sjulian ts->ts_flags &= ~TSF_ASSIGNED; 664164936Sjulian if (ts->ts_flags & TSF_REMOVED) { 665164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 666148603Sdavidxu continue; 667148603Sdavidxu } 668165827Sjeff sched_pin_td(ts->ts_thread); 669164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 670165827Sjeff sched_unpin_td(ts->ts_thread); 671121790Sjeff } 672121790Sjeff} 673121790Sjeff 674121790Sjeffstatic void 675164936Sjuliantdq_notify(struct td_sched *ts, int cpu) 676121790Sjeff{ 677164936Sjulian struct tdq *tdq; 678121790Sjeff struct thread *td; 679121790Sjeff struct pcpu *pcpu; 680139334Sjeff int class; 681133427Sjeff int prio; 682121790Sjeff 683164936Sjulian tdq = TDQ_CPU(cpu); 684164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 685165762Sjeff if ((class != PRI_IDLE && class != PRI_ITHD) 686165762Sjeff && (tdq_idle & tdq->tdq_group->tdg_mask)) 687165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 688165620Sjeff tdq->tdq_group->tdg_load++; 689165620Sjeff tdq->tdq_load++; 690164936Sjulian ts->ts_cpu = cpu; 691164936Sjulian ts->ts_flags |= TSF_ASSIGNED; 692164936Sjulian prio = ts->ts_thread->td_priority; 693121790Sjeff 694121790Sjeff /* 695164936Sjulian * Place a thread on another cpu's queue and force a resched. 696121790Sjeff */ 697121790Sjeff do { 698165620Sjeff *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 699165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 700164936Sjulian (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 701165819Sjeff /* Only ipi for realtime/ithd priorities */ 702165821Sjeff if (ts->ts_thread->td_priority > PRI_MIN_KERN) 703165819Sjeff return; 704133427Sjeff /* 705133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 706133427Sjeff * on a thread that is switched out before the IPI is delivered. This 707133427Sjeff * would lead us to miss the resched. This will be a problem once 708133427Sjeff * sched_lock is pushed down. 709133427Sjeff */ 710121790Sjeff pcpu = pcpu_find(cpu); 711121790Sjeff td = pcpu->pc_curthread; 712165819Sjeff if (ts->ts_thread->td_priority < td->td_priority) { 713121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 714121790Sjeff ipi_selected(1 << cpu, IPI_AST); 715121790Sjeff } 716121790Sjeff} 717121790Sjeff 718164936Sjulianstatic struct td_sched * 719121790Sjeffrunq_steal(struct runq *rq) 720121790Sjeff{ 721121790Sjeff struct rqhead *rqh; 722121790Sjeff struct rqbits *rqb; 723164936Sjulian struct td_sched *ts; 724121790Sjeff int word; 725121790Sjeff int bit; 726121790Sjeff 727121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 728121790Sjeff rqb = &rq->rq_status; 729121790Sjeff for (word = 0; word < RQB_LEN; word++) { 730121790Sjeff if (rqb->rqb_bits[word] == 0) 731121790Sjeff continue; 732121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 733123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 734121790Sjeff continue; 735121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 736164936Sjulian TAILQ_FOREACH(ts, rqh, ts_procq) { 737165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) 738164936Sjulian return (ts); 739121790Sjeff } 740121790Sjeff } 741121790Sjeff } 742121790Sjeff return (NULL); 743121790Sjeff} 744121790Sjeff 745164936Sjulianstatic struct td_sched * 746164936Sjuliantdq_steal(struct tdq *tdq, int stealidle) 747121790Sjeff{ 748164936Sjulian struct td_sched *ts; 749121790Sjeff 750123433Sjeff /* 751123433Sjeff * Steal from next first to try to get a non-interactive task that 752123433Sjeff * may not have run for a while. 753165762Sjeff * XXX Need to effect steal order for timeshare threads. 754123433Sjeff */ 755165762Sjeff if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 756164936Sjulian return (ts); 757165762Sjeff if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 758164936Sjulian return (ts); 759123433Sjeff if (stealidle) 760165620Sjeff return (runq_steal(&tdq->tdq_idle)); 761123433Sjeff return (NULL); 762121790Sjeff} 763123433Sjeff 764123433Sjeffint 765164936Sjuliantdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 766123433Sjeff{ 767165620Sjeff struct tdq_group *ntdg; 768165620Sjeff struct tdq_group *tdg; 769164936Sjulian struct tdq *old; 770123433Sjeff int cpu; 771139334Sjeff int idx; 772123433Sjeff 773123685Sjeff if (smp_started == 0) 774123685Sjeff return (0); 775123433Sjeff cpu = 0; 776123433Sjeff /* 777133427Sjeff * If our load exceeds a certain threshold we should attempt to 778133427Sjeff * reassign this thread. The first candidate is the cpu that 779133427Sjeff * originally ran the thread. If it is idle, assign it there, 780133427Sjeff * otherwise, pick an idle cpu. 781133427Sjeff * 782165620Sjeff * The threshold at which we start to reassign has a large impact 783123685Sjeff * on the overall performance of the system. Tuned too high and 784123685Sjeff * some CPUs may idle. Too low and there will be excess migration 785128055Scognet * and context switches. 786123685Sjeff */ 787164936Sjulian old = TDQ_CPU(ts->ts_cpu); 788165620Sjeff ntdg = old->tdq_group; 789165620Sjeff tdg = tdq->tdq_group; 790164936Sjulian if (tdq_idle) { 791165620Sjeff if (tdq_idle & ntdg->tdg_mask) { 792165620Sjeff cpu = ffs(ntdg->tdg_idlemask); 793139334Sjeff if (cpu) { 794139334Sjeff CTR2(KTR_SCHED, 795164936Sjulian "tdq_transfer: %p found old cpu %X " 796164936Sjulian "in idlemask.", ts, cpu); 797133427Sjeff goto migrate; 798139334Sjeff } 799133427Sjeff } 800123433Sjeff /* 801123433Sjeff * Multiple cpus could find this bit simultaneously 802123433Sjeff * but the race shouldn't be terrible. 803123433Sjeff */ 804164936Sjulian cpu = ffs(tdq_idle); 805139334Sjeff if (cpu) { 806164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 807164936Sjulian "in idlemask.", ts, cpu); 808133427Sjeff goto migrate; 809139334Sjeff } 810123433Sjeff } 811139334Sjeff idx = 0; 812139334Sjeff#if 0 813165620Sjeff if (old->tdq_load < tdq->tdq_load) { 814164936Sjulian cpu = ts->ts_cpu + 1; 815164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 816164936Sjulian "load less than ours.", ts, cpu); 817139334Sjeff goto migrate; 818139334Sjeff } 819123433Sjeff /* 820139334Sjeff * No new CPU was found, look for one with less load. 821139334Sjeff */ 822165620Sjeff for (idx = 0; idx <= tdg_maxid; idx++) { 823165620Sjeff ntdg = TDQ_GROUP(idx); 824165620Sjeff if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 825165620Sjeff cpu = ffs(ntdg->tdg_cpumask); 826164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 827164936Sjulian "than ours.", ts, cpu); 828139334Sjeff goto migrate; 829139334Sjeff } 830139334Sjeff } 831139334Sjeff#endif 832139334Sjeff /* 833123433Sjeff * If another cpu in this group has idled, assign a thread over 834123433Sjeff * to them after checking to see if there are idled groups. 835123433Sjeff */ 836165620Sjeff if (tdg->tdg_idlemask) { 837165620Sjeff cpu = ffs(tdg->tdg_idlemask); 838139334Sjeff if (cpu) { 839164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 840164936Sjulian "group.", ts, cpu); 841133427Sjeff goto migrate; 842139334Sjeff } 843123433Sjeff } 844133427Sjeff return (0); 845133427Sjeffmigrate: 846133427Sjeff /* 847123433Sjeff * Now that we've found an idle CPU, migrate the thread. 848123433Sjeff */ 849133427Sjeff cpu--; 850164936Sjulian ts->ts_runq = NULL; 851164936Sjulian tdq_notify(ts, cpu); 852133427Sjeff 853133427Sjeff return (1); 854123433Sjeff} 855123433Sjeff 856121790Sjeff#endif /* SMP */ 857121790Sjeff 858117326Sjeff/* 859121790Sjeff * Pick the highest priority task we have and return it. 860117326Sjeff */ 861117326Sjeff 862164936Sjulianstatic struct td_sched * 863164936Sjuliantdq_choose(struct tdq *tdq) 864110267Sjeff{ 865164936Sjulian struct td_sched *ts; 866110267Sjeff 867115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 868112994Sjeff 869165762Sjeff ts = runq_choose(&tdq->tdq_realtime); 870165762Sjeff if (ts != NULL) { 871165762Sjeff KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 872165762Sjeff ("tdq_choose: Invalid priority on realtime queue %d", 873165762Sjeff ts->ts_thread->td_priority)); 874164936Sjulian return (ts); 875110267Sjeff } 876165766Sjeff ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 877165762Sjeff if (ts != NULL) { 878165762Sjeff KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 879165762Sjeff ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 880165762Sjeff ("tdq_choose: Invalid priority on timeshare queue %d", 881165762Sjeff ts->ts_thread->td_priority)); 882165762Sjeff return (ts); 883165762Sjeff } 884110267Sjeff 885165762Sjeff ts = runq_choose(&tdq->tdq_idle); 886165762Sjeff if (ts != NULL) { 887165762Sjeff KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 888165762Sjeff ("tdq_choose: Invalid priority on idle queue %d", 889165762Sjeff ts->ts_thread->td_priority)); 890165762Sjeff return (ts); 891165762Sjeff } 892165762Sjeff 893165762Sjeff return (NULL); 894110267Sjeff} 895110267Sjeff 896109864Sjeffstatic void 897164936Sjuliantdq_setup(struct tdq *tdq) 898110028Sjeff{ 899165762Sjeff runq_init(&tdq->tdq_realtime); 900165762Sjeff runq_init(&tdq->tdq_timeshare); 901165620Sjeff runq_init(&tdq->tdq_idle); 902165620Sjeff tdq->tdq_load = 0; 903110028Sjeff} 904110028Sjeff 905110028Sjeffstatic void 906109864Sjeffsched_setup(void *dummy) 907109864Sjeff{ 908117313Sjeff#ifdef SMP 909109864Sjeff int i; 910117313Sjeff#endif 911109864Sjeff 912153533Sdavidxu /* 913153533Sdavidxu * To avoid divide-by-zero, we set realstathz a dummy value 914153533Sdavidxu * in case which sched_clock() called before sched_initticks(). 915153533Sdavidxu */ 916153533Sdavidxu realstathz = hz; 917165762Sjeff sched_slice = (realstathz/7); /* 140ms */ 918165762Sjeff tickincr = 1 << SCHED_TICK_SHIFT; 919111857Sjeff 920117237Sjeff#ifdef SMP 921123487Sjeff balance_groups = 0; 922123433Sjeff /* 923164936Sjulian * Initialize the tdqs. 924123433Sjeff */ 925123433Sjeff for (i = 0; i < MAXCPU; i++) { 926165627Sjeff struct tdq *tdq; 927123433Sjeff 928165627Sjeff tdq = &tdq_cpu[i]; 929165627Sjeff tdq->tdq_assigned = NULL; 930164936Sjulian tdq_setup(&tdq_cpu[i]); 931123433Sjeff } 932117237Sjeff if (smp_topology == NULL) { 933165620Sjeff struct tdq_group *tdg; 934165627Sjeff struct tdq *tdq; 935139334Sjeff int cpus; 936123433Sjeff 937139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 938139334Sjeff if (CPU_ABSENT(i)) 939139334Sjeff continue; 940165627Sjeff tdq = &tdq_cpu[i]; 941165620Sjeff tdg = &tdq_groups[cpus]; 942123433Sjeff /* 943164936Sjulian * Setup a tdq group with one member. 944123433Sjeff */ 945165627Sjeff tdq->tdq_transferable = 0; 946165627Sjeff tdq->tdq_group = tdg; 947165620Sjeff tdg->tdg_cpus = 1; 948165620Sjeff tdg->tdg_idlemask = 0; 949165620Sjeff tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 950165620Sjeff tdg->tdg_load = 0; 951165620Sjeff tdg->tdg_transferable = 0; 952165620Sjeff LIST_INIT(&tdg->tdg_members); 953165627Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 954139334Sjeff cpus++; 955117237Sjeff } 956165620Sjeff tdg_maxid = cpus - 1; 957117237Sjeff } else { 958165620Sjeff struct tdq_group *tdg; 959123433Sjeff struct cpu_group *cg; 960117237Sjeff int j; 961113357Sjeff 962117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 963117237Sjeff cg = &smp_topology->ct_group[i]; 964165620Sjeff tdg = &tdq_groups[i]; 965123433Sjeff /* 966123433Sjeff * Initialize the group. 967123433Sjeff */ 968165620Sjeff tdg->tdg_idlemask = 0; 969165620Sjeff tdg->tdg_load = 0; 970165620Sjeff tdg->tdg_transferable = 0; 971165620Sjeff tdg->tdg_cpus = cg->cg_count; 972165620Sjeff tdg->tdg_cpumask = cg->cg_mask; 973165620Sjeff LIST_INIT(&tdg->tdg_members); 974123433Sjeff /* 975123433Sjeff * Find all of the group members and add them. 976123433Sjeff */ 977123433Sjeff for (j = 0; j < MAXCPU; j++) { 978123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 979165620Sjeff if (tdg->tdg_mask == 0) 980165620Sjeff tdg->tdg_mask = 1 << j; 981165620Sjeff tdq_cpu[j].tdq_transferable = 0; 982165620Sjeff tdq_cpu[j].tdq_group = tdg; 983165620Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, 984165620Sjeff &tdq_cpu[j], tdq_siblings); 985123433Sjeff } 986123433Sjeff } 987165620Sjeff if (tdg->tdg_cpus > 1) 988123487Sjeff balance_groups = 1; 989117237Sjeff } 990165620Sjeff tdg_maxid = smp_topology->ct_count - 1; 991117237Sjeff } 992123487Sjeff /* 993123487Sjeff * Stagger the group and global load balancer so they do not 994123487Sjeff * interfere with each other. 995123487Sjeff */ 996129982Sjeff bal_tick = ticks + hz; 997123487Sjeff if (balance_groups) 998129982Sjeff gbal_tick = ticks + (hz / 2); 999117237Sjeff#else 1000164936Sjulian tdq_setup(TDQ_SELF()); 1001116069Sjeff#endif 1002117237Sjeff mtx_lock_spin(&sched_lock); 1003164936Sjulian tdq_load_add(TDQ_SELF(), &td_sched0); 1004117237Sjeff mtx_unlock_spin(&sched_lock); 1005109864Sjeff} 1006109864Sjeff 1007153533Sdavidxu/* ARGSUSED */ 1008153533Sdavidxustatic void 1009153533Sdavidxusched_initticks(void *dummy) 1010153533Sdavidxu{ 1011153533Sdavidxu mtx_lock_spin(&sched_lock); 1012153533Sdavidxu realstathz = stathz ? stathz : hz; 1013165762Sjeff sched_slice = (realstathz/7); /* ~140ms */ 1014153533Sdavidxu 1015153533Sdavidxu /* 1016165762Sjeff * tickincr is shifted out by 10 to avoid rounding errors due to 1017165766Sjeff * hz not being evenly divisible by stathz on all platforms. 1018153533Sdavidxu */ 1019165762Sjeff tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1020165762Sjeff /* 1021165762Sjeff * This does not work for values of stathz that are more than 1022165762Sjeff * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1023165762Sjeff */ 1024153533Sdavidxu if (tickincr == 0) 1025153533Sdavidxu tickincr = 1; 1026153533Sdavidxu mtx_unlock_spin(&sched_lock); 1027153533Sdavidxu} 1028153533Sdavidxu 1029153533Sdavidxu 1030109864Sjeff/* 1031109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1032109864Sjeff * process. 1033109864Sjeff */ 1034113357Sjeffstatic void 1035163709Sjbsched_priority(struct thread *td) 1036109864Sjeff{ 1037165762Sjeff int score; 1038109864Sjeff int pri; 1039109864Sjeff 1040163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1041113357Sjeff return; 1042112966Sjeff /* 1043165762Sjeff * If the score is interactive we place the thread in the realtime 1044165762Sjeff * queue with a priority that is less than kernel and interrupt 1045165762Sjeff * priorities. These threads are not subject to nice restrictions. 1046112966Sjeff * 1047165762Sjeff * Scores greater than this are placed on the normal realtime queue 1048165762Sjeff * where the priority is partially decided by the most recent cpu 1049165762Sjeff * utilization and the rest is decided by nice value. 1050112966Sjeff */ 1051165762Sjeff score = sched_interact_score(td); 1052165762Sjeff if (score < sched_interact) { 1053165762Sjeff pri = PRI_MIN_REALTIME; 1054165762Sjeff pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1055165762Sjeff * score; 1056165762Sjeff KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1057165762Sjeff ("sched_priority: invalid interactive priority %d", pri)); 1058165762Sjeff } else { 1059165762Sjeff pri = SCHED_PRI_MIN; 1060165762Sjeff if (td->td_sched->ts_ticks) 1061165762Sjeff pri += SCHED_PRI_TICKS(td->td_sched); 1062165762Sjeff pri += SCHED_PRI_NICE(td->td_proc->p_nice); 1063165796Sjeff if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 1064165796Sjeff static int once = 1; 1065165796Sjeff if (once) { 1066165796Sjeff printf("sched_priority: invalid priority %d", 1067165796Sjeff pri); 1068165796Sjeff printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 1069165796Sjeff td->td_proc->p_nice, 1070165796Sjeff td->td_sched->ts_ticks, 1071165796Sjeff td->td_sched->ts_ftick, 1072165796Sjeff td->td_sched->ts_ltick, 1073165796Sjeff SCHED_PRI_TICKS(td->td_sched)); 1074165796Sjeff once = 0; 1075165796Sjeff } 1076165796Sjeff pri = min(max(pri, PRI_MIN_TIMESHARE), 1077165796Sjeff PRI_MAX_TIMESHARE); 1078165796Sjeff } 1079165762Sjeff } 1080165762Sjeff sched_user_prio(td, pri); 1081112966Sjeff 1082112966Sjeff return; 1083109864Sjeff} 1084109864Sjeff 1085121868Sjeff/* 1086121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1087121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1088121868Sjeff */ 1089116463Sjeffstatic void 1090163709Sjbsched_interact_update(struct thread *td) 1091116463Sjeff{ 1092165819Sjeff struct td_sched *ts; 1093121868Sjeff int sum; 1094121605Sjeff 1095165819Sjeff ts = td->td_sched; 1096165819Sjeff sum = ts->skg_runtime + ts->skg_slptime; 1097121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1098121868Sjeff return; 1099121868Sjeff /* 1100165819Sjeff * This only happens from two places: 1101165819Sjeff * 1) We have added an unusual amount of run time from fork_exit. 1102165819Sjeff * 2) We have added an unusual amount of sleep time from sched_sleep(). 1103165819Sjeff */ 1104165819Sjeff if (sum > SCHED_SLP_RUN_MAX * 2) { 1105165819Sjeff if (ts->skg_runtime > ts->skg_slptime) { 1106165819Sjeff ts->skg_runtime = SCHED_SLP_RUN_MAX; 1107165819Sjeff ts->skg_slptime = 1; 1108165819Sjeff } else { 1109165819Sjeff ts->skg_slptime = SCHED_SLP_RUN_MAX; 1110165819Sjeff ts->skg_runtime = 1; 1111165819Sjeff } 1112165819Sjeff return; 1113165819Sjeff } 1114165819Sjeff /* 1115121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1116121868Sjeff * will not bring us back into range. Dividing by two here forces 1117133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1118121868Sjeff */ 1119127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1120165819Sjeff ts->skg_runtime /= 2; 1121165819Sjeff ts->skg_slptime /= 2; 1122121868Sjeff return; 1123116463Sjeff } 1124165819Sjeff ts->skg_runtime = (ts->skg_runtime / 5) * 4; 1125165819Sjeff ts->skg_slptime = (ts->skg_slptime / 5) * 4; 1126116463Sjeff} 1127116463Sjeff 1128121868Sjeffstatic void 1129163709Sjbsched_interact_fork(struct thread *td) 1130121868Sjeff{ 1131121868Sjeff int ratio; 1132121868Sjeff int sum; 1133121868Sjeff 1134163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1135121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1136121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1137163709Sjb td->td_sched->skg_runtime /= ratio; 1138163709Sjb td->td_sched->skg_slptime /= ratio; 1139121868Sjeff } 1140121868Sjeff} 1141121868Sjeff 1142111857Sjeffstatic int 1143163709Sjbsched_interact_score(struct thread *td) 1144111857Sjeff{ 1145116365Sjeff int div; 1146111857Sjeff 1147163709Sjb if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1148163709Sjb div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1149116365Sjeff return (SCHED_INTERACT_HALF + 1150163709Sjb (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1151163709Sjb } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1152163709Sjb div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1153163709Sjb return (td->td_sched->skg_runtime / div); 1154111857Sjeff } 1155111857Sjeff 1156116365Sjeff /* 1157116365Sjeff * This can happen if slptime and runtime are 0. 1158116365Sjeff */ 1159116365Sjeff return (0); 1160111857Sjeff 1161111857Sjeff} 1162111857Sjeff 1163113357Sjeff/* 1164165762Sjeff * Called from proc0_init() to bootstrap the scheduler. 1165134791Sjulian */ 1166134791Sjulianvoid 1167134791Sjulianschedinit(void) 1168134791Sjulian{ 1169165762Sjeff 1170134791Sjulian /* 1171134791Sjulian * Set up the scheduler specific parts of proc0. 1172134791Sjulian */ 1173136167Sjulian proc0.p_sched = NULL; /* XXX */ 1174164936Sjulian thread0.td_sched = &td_sched0; 1175165762Sjeff td_sched0.ts_ltick = ticks; 1176165796Sjeff td_sched0.ts_ftick = ticks; 1177164936Sjulian td_sched0.ts_thread = &thread0; 1178164936Sjulian td_sched0.ts_state = TSS_THREAD; 1179134791Sjulian} 1180134791Sjulian 1181134791Sjulian/* 1182113357Sjeff * This is only somewhat accurate since given many processes of the same 1183113357Sjeff * priority they will switch when their slices run out, which will be 1184165762Sjeff * at most sched_slice stathz ticks. 1185113357Sjeff */ 1186109864Sjeffint 1187109864Sjeffsched_rr_interval(void) 1188109864Sjeff{ 1189165762Sjeff 1190165762Sjeff /* Convert sched_slice to hz */ 1191165762Sjeff return (hz/(realstathz/sched_slice)); 1192109864Sjeff} 1193109864Sjeff 1194121790Sjeffstatic void 1195164936Sjuliansched_pctcpu_update(struct td_sched *ts) 1196109864Sjeff{ 1197165762Sjeff 1198165762Sjeff if (ts->ts_ticks == 0) 1199165762Sjeff return; 1200165796Sjeff if (ticks - (hz / 10) < ts->ts_ltick && 1201165796Sjeff SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 1202165796Sjeff return; 1203109864Sjeff /* 1204109864Sjeff * Adjust counters and watermark for pctcpu calc. 1205116365Sjeff */ 1206165762Sjeff if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1207164936Sjulian ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1208165762Sjeff SCHED_TICK_TARG; 1209165762Sjeff else 1210164936Sjulian ts->ts_ticks = 0; 1211164936Sjulian ts->ts_ltick = ticks; 1212165762Sjeff ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 1213109864Sjeff} 1214109864Sjeff 1215165762Sjeffstatic void 1216139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1217109864Sjeff{ 1218164936Sjulian struct td_sched *ts; 1219109864Sjeff 1220139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1221139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1222139316Sjeff curthread->td_proc->p_comm); 1223164936Sjulian ts = td->td_sched; 1224109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1225139453Sjhb if (td->td_priority == prio) 1226139453Sjhb return; 1227165762Sjeff 1228165766Sjeff if (TD_ON_RUNQ(td) && prio < td->td_priority) { 1229121605Sjeff /* 1230121605Sjeff * If the priority has been elevated due to priority 1231121605Sjeff * propagation, we may have to move ourselves to a new 1232165762Sjeff * queue. This could be optimized to not re-add in some 1233165762Sjeff * cases. 1234165762Sjeff * 1235164936Sjulian * Hold this td_sched on this cpu so that sched_prio() doesn't 1236133555Sjeff * cause excessive migration. We only want migration to 1237133555Sjeff * happen as the result of a wakeup. 1238133555Sjeff */ 1239165827Sjeff sched_pin_td(td); 1240165762Sjeff sched_rem(td); 1241165762Sjeff td->td_priority = prio; 1242165762Sjeff sched_add(td, SRQ_BORROWING); 1243165827Sjeff sched_unpin_td(td); 1244121605Sjeff } else 1245119488Sdavidxu td->td_priority = prio; 1246109864Sjeff} 1247109864Sjeff 1248139453Sjhb/* 1249139453Sjhb * Update a thread's priority when it is lent another thread's 1250139453Sjhb * priority. 1251139453Sjhb */ 1252109864Sjeffvoid 1253139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1254139453Sjhb{ 1255139453Sjhb 1256139453Sjhb td->td_flags |= TDF_BORROWING; 1257139453Sjhb sched_thread_priority(td, prio); 1258139453Sjhb} 1259139453Sjhb 1260139453Sjhb/* 1261139453Sjhb * Restore a thread's priority when priority propagation is 1262139453Sjhb * over. The prio argument is the minimum priority the thread 1263139453Sjhb * needs to have to satisfy other possible priority lending 1264139453Sjhb * requests. If the thread's regular priority is less 1265139453Sjhb * important than prio, the thread will keep a priority boost 1266139453Sjhb * of prio. 1267139453Sjhb */ 1268139453Sjhbvoid 1269139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1270139453Sjhb{ 1271139453Sjhb u_char base_pri; 1272139453Sjhb 1273139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1274139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1275163709Sjb base_pri = td->td_user_pri; 1276139453Sjhb else 1277139453Sjhb base_pri = td->td_base_pri; 1278139453Sjhb if (prio >= base_pri) { 1279139455Sjhb td->td_flags &= ~TDF_BORROWING; 1280139453Sjhb sched_thread_priority(td, base_pri); 1281139453Sjhb } else 1282139453Sjhb sched_lend_prio(td, prio); 1283139453Sjhb} 1284139453Sjhb 1285139453Sjhbvoid 1286139453Sjhbsched_prio(struct thread *td, u_char prio) 1287139453Sjhb{ 1288139453Sjhb u_char oldprio; 1289139453Sjhb 1290139453Sjhb /* First, update the base priority. */ 1291139453Sjhb td->td_base_pri = prio; 1292139453Sjhb 1293139453Sjhb /* 1294139455Sjhb * If the thread is borrowing another thread's priority, don't 1295139453Sjhb * ever lower the priority. 1296139453Sjhb */ 1297139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1298139453Sjhb return; 1299139453Sjhb 1300139453Sjhb /* Change the real priority. */ 1301139453Sjhb oldprio = td->td_priority; 1302139453Sjhb sched_thread_priority(td, prio); 1303139453Sjhb 1304139453Sjhb /* 1305139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1306139453Sjhb * its state. 1307139453Sjhb */ 1308139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1309139453Sjhb turnstile_adjust(td, oldprio); 1310139453Sjhb} 1311139455Sjhb 1312139453Sjhbvoid 1313163709Sjbsched_user_prio(struct thread *td, u_char prio) 1314161599Sdavidxu{ 1315161599Sdavidxu u_char oldprio; 1316161599Sdavidxu 1317163709Sjb td->td_base_user_pri = prio; 1318164939Sjulian if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1319164939Sjulian return; 1320163709Sjb oldprio = td->td_user_pri; 1321163709Sjb td->td_user_pri = prio; 1322163709Sjb 1323161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1324161599Sdavidxu umtx_pi_adjust(td, oldprio); 1325161599Sdavidxu} 1326161599Sdavidxu 1327161599Sdavidxuvoid 1328161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio) 1329161599Sdavidxu{ 1330161599Sdavidxu u_char oldprio; 1331161599Sdavidxu 1332161599Sdavidxu td->td_flags |= TDF_UBORROWING; 1333161599Sdavidxu 1334164091Smaxim oldprio = td->td_user_pri; 1335163709Sjb td->td_user_pri = prio; 1336161599Sdavidxu 1337161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1338161599Sdavidxu umtx_pi_adjust(td, oldprio); 1339161599Sdavidxu} 1340161599Sdavidxu 1341161599Sdavidxuvoid 1342161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio) 1343161599Sdavidxu{ 1344161599Sdavidxu u_char base_pri; 1345161599Sdavidxu 1346163709Sjb base_pri = td->td_base_user_pri; 1347161599Sdavidxu if (prio >= base_pri) { 1348161599Sdavidxu td->td_flags &= ~TDF_UBORROWING; 1349163709Sjb sched_user_prio(td, base_pri); 1350161599Sdavidxu } else 1351161599Sdavidxu sched_lend_user_prio(td, prio); 1352161599Sdavidxu} 1353161599Sdavidxu 1354161599Sdavidxuvoid 1355135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1356109864Sjeff{ 1357165627Sjeff struct tdq *tdq; 1358164936Sjulian struct td_sched *ts; 1359109864Sjeff 1360109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1361109864Sjeff 1362165762Sjeff tdq = TDQ_SELF(); 1363164936Sjulian ts = td->td_sched; 1364133555Sjeff td->td_lastcpu = td->td_oncpu; 1365113339Sjulian td->td_oncpu = NOCPU; 1366132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1367144777Sups td->td_owepreempt = 0; 1368123434Sjeff /* 1369164936Sjulian * If the thread has been assigned it may be in the process of switching 1370123434Sjeff * to the new cpu. This is the case in sched_bind(). 1371123434Sjeff */ 1372139334Sjeff if (td == PCPU_GET(idlethread)) { 1373139334Sjeff TD_SET_CAN_RUN(td); 1374164936Sjulian } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1375139334Sjeff /* We are ending our run so make our slot available again */ 1376165627Sjeff tdq_load_rem(tdq, ts); 1377139334Sjeff if (TD_IS_RUNNING(td)) { 1378139334Sjeff /* 1379139334Sjeff * Don't allow the thread to migrate 1380139334Sjeff * from a preemption. 1381139334Sjeff */ 1382165827Sjeff sched_pin_td(td); 1383139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1384139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1385139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1386165827Sjeff sched_unpin_td(td); 1387163709Sjb } 1388121146Sjeff } 1389136167Sjulian if (newtd != NULL) { 1390136170Sjulian /* 1391147068Sjeff * If we bring in a thread account for it as if it had been 1392147068Sjeff * added to the run queue and then chosen. 1393136170Sjulian */ 1394164936Sjulian newtd->td_sched->ts_flags |= TSF_DIDRUN; 1395136173Sjulian TD_SET_RUNNING(newtd); 1396164936Sjulian tdq_load_add(TDQ_SELF(), newtd->td_sched); 1397136167Sjulian } else 1398131473Sjhb newtd = choosethread(); 1399145256Sjkoshy if (td != newtd) { 1400145256Sjkoshy#ifdef HWPMC_HOOKS 1401145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1402145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1403145256Sjkoshy#endif 1404163709Sjb 1405121128Sjeff cpu_switch(td, newtd); 1406145256Sjkoshy#ifdef HWPMC_HOOKS 1407145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1408145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1409145256Sjkoshy#endif 1410145256Sjkoshy } 1411121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1412113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1413109864Sjeff} 1414109864Sjeff 1415109864Sjeffvoid 1416130551Sjuliansched_nice(struct proc *p, int nice) 1417109864Sjeff{ 1418109864Sjeff struct thread *td; 1419109864Sjeff 1420130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1421113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1422165762Sjeff 1423130551Sjulian p->p_nice = nice; 1424163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1425163709Sjb sched_priority(td); 1426165762Sjeff sched_prio(td, td->td_base_user_pri); 1427130551Sjulian } 1428109864Sjeff} 1429109864Sjeff 1430109864Sjeffvoid 1431126326Sjhbsched_sleep(struct thread *td) 1432109864Sjeff{ 1433165762Sjeff 1434109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1435109864Sjeff 1436164936Sjulian td->td_sched->ts_slptime = ticks; 1437109864Sjeff} 1438109864Sjeff 1439109864Sjeffvoid 1440109864Sjeffsched_wakeup(struct thread *td) 1441109864Sjeff{ 1442165762Sjeff int slptime; 1443165762Sjeff 1444109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1445109864Sjeff 1446109864Sjeff /* 1447165762Sjeff * If we slept for more than a tick update our interactivity and 1448165762Sjeff * priority. 1449109864Sjeff */ 1450165762Sjeff slptime = td->td_sched->ts_slptime; 1451165762Sjeff td->td_sched->ts_slptime = 0; 1452165762Sjeff if (slptime && slptime != ticks) { 1453113357Sjeff int hzticks; 1454109864Sjeff 1455165762Sjeff hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 1456165819Sjeff td->td_sched->skg_slptime += hzticks; 1457165819Sjeff sched_interact_update(td); 1458165796Sjeff sched_pctcpu_update(td->td_sched); 1459163709Sjb sched_priority(td); 1460109864Sjeff } 1461134586Sjulian setrunqueue(td, SRQ_BORING); 1462109864Sjeff} 1463109864Sjeff 1464109864Sjeff/* 1465109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1466109864Sjeff * priority. 1467109864Sjeff */ 1468109864Sjeffvoid 1469163709Sjbsched_fork(struct thread *td, struct thread *child) 1470109864Sjeff{ 1471109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1472164936Sjulian sched_fork_thread(td, child); 1473165762Sjeff /* 1474165762Sjeff * Penalize the parent and child for forking. 1475165762Sjeff */ 1476165762Sjeff sched_interact_fork(child); 1477165762Sjeff sched_priority(child); 1478165762Sjeff td->td_sched->skg_runtime += tickincr; 1479165762Sjeff sched_interact_update(td); 1480165762Sjeff sched_priority(td); 1481164936Sjulian} 1482109864Sjeff 1483164936Sjulianvoid 1484164936Sjuliansched_fork_thread(struct thread *td, struct thread *child) 1485164936Sjulian{ 1486164936Sjulian struct td_sched *ts; 1487164936Sjulian struct td_sched *ts2; 1488164936Sjulian 1489165762Sjeff /* 1490165762Sjeff * Initialize child. 1491165762Sjeff */ 1492163709Sjb sched_newthread(child); 1493164936Sjulian ts = td->td_sched; 1494164936Sjulian ts2 = child->td_sched; 1495164936Sjulian ts2->ts_cpu = ts->ts_cpu; 1496164936Sjulian ts2->ts_runq = NULL; 1497165762Sjeff /* 1498165762Sjeff * Grab our parents cpu estimation information and priority. 1499165762Sjeff */ 1500164936Sjulian ts2->ts_ticks = ts->ts_ticks; 1501164936Sjulian ts2->ts_ltick = ts->ts_ltick; 1502164936Sjulian ts2->ts_ftick = ts->ts_ftick; 1503165762Sjeff child->td_user_pri = td->td_user_pri; 1504165762Sjeff child->td_base_user_pri = td->td_base_user_pri; 1505165762Sjeff /* 1506165762Sjeff * And update interactivity score. 1507165762Sjeff */ 1508165762Sjeff ts2->skg_slptime = ts->skg_slptime; 1509165762Sjeff ts2->skg_runtime = ts->skg_runtime; 1510165762Sjeff ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1511113357Sjeff} 1512113357Sjeff 1513113357Sjeffvoid 1514163709Sjbsched_class(struct thread *td, int class) 1515113357Sjeff{ 1516113357Sjeff 1517113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1518163709Sjb if (td->td_pri_class == class) 1519113357Sjeff return; 1520113357Sjeff 1521121896Sjeff#ifdef SMP 1522165827Sjeff /* 1523165827Sjeff * On SMP if we're on the RUNQ we must adjust the transferable 1524165827Sjeff * count because could be changing to or from an interrupt 1525165827Sjeff * class. 1526165827Sjeff */ 1527165827Sjeff if (td->td_sched->ts_state == TSS_ONRUNQ) { 1528165827Sjeff struct tdq *tdq; 1529165827Sjeff 1530165827Sjeff tdq = TDQ_CPU(td->td_sched->ts_cpu); 1531165827Sjeff if (THREAD_CAN_MIGRATE(td)) { 1532165827Sjeff tdq->tdq_transferable--; 1533165827Sjeff tdq->tdq_group->tdg_transferable--; 1534122744Sjeff } 1535165827Sjeff td->td_pri_class = class; 1536165827Sjeff if (THREAD_CAN_MIGRATE(td)) { 1537165827Sjeff tdq->tdq_transferable++; 1538165827Sjeff tdq->tdq_group->tdg_transferable++; 1539165827Sjeff } 1540165827Sjeff } 1541164936Sjulian#endif 1542163709Sjb td->td_pri_class = class; 1543109864Sjeff} 1544109864Sjeff 1545109864Sjeff/* 1546109864Sjeff * Return some of the child's priority and interactivity to the parent. 1547109864Sjeff */ 1548109864Sjeffvoid 1549164939Sjuliansched_exit(struct proc *p, struct thread *child) 1550109864Sjeff{ 1551165762Sjeff struct thread *td; 1552164939Sjulian 1553163709Sjb CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1554164939Sjulian child, child->td_proc->p_comm, child->td_priority); 1555113372Sjeff 1556165762Sjeff td = FIRST_THREAD_IN_PROC(p); 1557165762Sjeff sched_exit_thread(td, child); 1558113372Sjeff} 1559113372Sjeff 1560113372Sjeffvoid 1561164939Sjuliansched_exit_thread(struct thread *td, struct thread *child) 1562164936Sjulian{ 1563165762Sjeff 1564164939Sjulian CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1565165762Sjeff child, child->td_proc->p_comm, child->td_priority); 1566164939Sjulian 1567165762Sjeff tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1568165762Sjeff#ifdef KSE 1569165762Sjeff /* 1570165762Sjeff * KSE forks and exits so often that this penalty causes short-lived 1571165762Sjeff * threads to always be non-interactive. This causes mozilla to 1572165762Sjeff * crawl under load. 1573165762Sjeff */ 1574165762Sjeff if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1575165762Sjeff return; 1576165762Sjeff#endif 1577165762Sjeff /* 1578165762Sjeff * Give the child's runtime to the parent without returning the 1579165762Sjeff * sleep time as a penalty to the parent. This causes shells that 1580165762Sjeff * launch expensive things to mark their children as expensive. 1581165762Sjeff */ 1582164939Sjulian td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1583164939Sjulian sched_interact_update(td); 1584165762Sjeff sched_priority(td); 1585164936Sjulian} 1586164936Sjulian 1587164936Sjulianvoid 1588164936Sjuliansched_userret(struct thread *td) 1589164936Sjulian{ 1590164936Sjulian /* 1591164936Sjulian * XXX we cheat slightly on the locking here to avoid locking in 1592164936Sjulian * the usual case. Setting td_priority here is essentially an 1593164936Sjulian * incomplete workaround for not setting it properly elsewhere. 1594164936Sjulian * Now that some interrupt handlers are threads, not setting it 1595164936Sjulian * properly elsewhere can clobber it in the window between setting 1596164936Sjulian * it here and returning to user mode, so don't waste time setting 1597164936Sjulian * it perfectly here. 1598164936Sjulian */ 1599164936Sjulian KASSERT((td->td_flags & TDF_BORROWING) == 0, 1600164936Sjulian ("thread with borrowed priority returning to userland")); 1601164936Sjulian if (td->td_priority != td->td_user_pri) { 1602164936Sjulian mtx_lock_spin(&sched_lock); 1603164936Sjulian td->td_priority = td->td_user_pri; 1604164936Sjulian td->td_base_pri = td->td_user_pri; 1605164936Sjulian mtx_unlock_spin(&sched_lock); 1606164936Sjulian } 1607164936Sjulian} 1608164936Sjulian 1609164936Sjulianvoid 1610121127Sjeffsched_clock(struct thread *td) 1611109864Sjeff{ 1612164936Sjulian struct tdq *tdq; 1613164936Sjulian struct td_sched *ts; 1614109864Sjeff 1615129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1616165766Sjeff#ifdef SMP 1617165766Sjeff sched_smp_tick(); 1618165766Sjeff#endif 1619164936Sjulian tdq = TDQ_SELF(); 1620133427Sjeff /* 1621165766Sjeff * Advance the insert index once for each tick to ensure that all 1622165766Sjeff * threads get a chance to run. 1623133427Sjeff */ 1624165766Sjeff if (tdq->tdq_idx == tdq->tdq_ridx) { 1625165766Sjeff tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 1626165766Sjeff if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 1627165766Sjeff tdq->tdq_ridx = tdq->tdq_idx; 1628165766Sjeff } 1629110028Sjeff /* Adjust ticks for pctcpu */ 1630165766Sjeff ts = td->td_sched; 1631165762Sjeff ts->ts_ticks += tickincr; 1632164936Sjulian ts->ts_ltick = ticks; 1633165762Sjeff /* 1634165762Sjeff * Update if we've exceeded our desired tick threshhold by over one 1635165762Sjeff * second. 1636165762Sjeff */ 1637165796Sjeff if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1638164936Sjulian sched_pctcpu_update(ts); 1639110028Sjeff /* 1640163709Sjb * We only do slicing code for TIMESHARE threads. 1641113357Sjeff */ 1642163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1643113357Sjeff return; 1644113357Sjeff /* 1645165766Sjeff * We used a tick; charge it to the thread so that we can compute our 1646113357Sjeff * interactivity. 1647109864Sjeff */ 1648163709Sjb td->td_sched->skg_runtime += tickincr; 1649163709Sjb sched_interact_update(td); 1650109864Sjeff /* 1651109864Sjeff * We used up one time slice. 1652109864Sjeff */ 1653164936Sjulian if (--ts->ts_slice > 0) 1654113357Sjeff return; 1655109864Sjeff /* 1656113357Sjeff * We're out of time, recompute priorities and requeue. 1657109864Sjeff */ 1658165796Sjeff sched_priority(td); 1659164936Sjulian tdq_load_rem(tdq, ts); 1660165762Sjeff ts->ts_slice = sched_slice; 1661164936Sjulian tdq_load_add(tdq, ts); 1662113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1663109864Sjeff} 1664109864Sjeff 1665109864Sjeffint 1666109864Sjeffsched_runnable(void) 1667109864Sjeff{ 1668164936Sjulian struct tdq *tdq; 1669115998Sjeff int load; 1670109864Sjeff 1671115998Sjeff load = 1; 1672115998Sjeff 1673164936Sjulian tdq = TDQ_SELF(); 1674121790Sjeff#ifdef SMP 1675165620Sjeff if (tdq->tdq_assigned) { 1676122094Sjeff mtx_lock_spin(&sched_lock); 1677164936Sjulian tdq_assign(tdq); 1678122094Sjeff mtx_unlock_spin(&sched_lock); 1679122094Sjeff } 1680121790Sjeff#endif 1681121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1682165620Sjeff if (tdq->tdq_load > 0) 1683121605Sjeff goto out; 1684121605Sjeff } else 1685165620Sjeff if (tdq->tdq_load - 1 > 0) 1686121605Sjeff goto out; 1687115998Sjeff load = 0; 1688115998Sjeffout: 1689115998Sjeff return (load); 1690109864Sjeff} 1691109864Sjeff 1692164936Sjulianstruct td_sched * 1693109970Sjeffsched_choose(void) 1694109970Sjeff{ 1695164936Sjulian struct tdq *tdq; 1696164936Sjulian struct td_sched *ts; 1697109970Sjeff 1698115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1699164936Sjulian tdq = TDQ_SELF(); 1700113357Sjeff#ifdef SMP 1701123433Sjeffrestart: 1702165620Sjeff if (tdq->tdq_assigned) 1703164936Sjulian tdq_assign(tdq); 1704113357Sjeff#endif 1705164936Sjulian ts = tdq_choose(tdq); 1706164936Sjulian if (ts) { 1707121790Sjeff#ifdef SMP 1708165819Sjeff if (ts->ts_thread->td_priority > PRI_MIN_IDLE) 1709164936Sjulian if (tdq_idled(tdq) == 0) 1710123433Sjeff goto restart; 1711121790Sjeff#endif 1712164936Sjulian tdq_runq_rem(tdq, ts); 1713164936Sjulian ts->ts_state = TSS_THREAD; 1714164936Sjulian return (ts); 1715109864Sjeff } 1716109970Sjeff#ifdef SMP 1717164936Sjulian if (tdq_idled(tdq) == 0) 1718123433Sjeff goto restart; 1719109970Sjeff#endif 1720113357Sjeff return (NULL); 1721109864Sjeff} 1722109864Sjeff 1723109864Sjeffvoid 1724134586Sjuliansched_add(struct thread *td, int flags) 1725109864Sjeff{ 1726164936Sjulian struct tdq *tdq; 1727164936Sjulian struct td_sched *ts; 1728139334Sjeff int preemptive; 1729133427Sjeff int canmigrate; 1730121790Sjeff int class; 1731109864Sjeff 1732139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1733139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1734139316Sjeff curthread->td_proc->p_comm); 1735121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1736165762Sjeff tdq = TDQ_SELF(); 1737164936Sjulian ts = td->td_sched; 1738165762Sjeff class = PRI_BASE(td->td_pri_class); 1739165762Sjeff preemptive = !(flags & SRQ_YIELDING); 1740139334Sjeff canmigrate = 1; 1741139334Sjeff#ifdef SMP 1742164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1743164936Sjulian if (ts->ts_flags & TSF_REMOVED) 1744164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 1745121790Sjeff return; 1746138802Sjeff } 1747165762Sjeff canmigrate = THREAD_CAN_MIGRATE(td); 1748139334Sjeff#endif 1749164936Sjulian KASSERT(ts->ts_state != TSS_ONRUNQ, 1750164936Sjulian ("sched_add: thread %p (%s) already in run queue", td, 1751163709Sjb td->td_proc->p_comm)); 1752163709Sjb KASSERT(td->td_proc->p_sflag & PS_INMEM, 1753110267Sjeff ("sched_add: process swapped out")); 1754164936Sjulian KASSERT(ts->ts_runq == NULL, 1755164936Sjulian ("sched_add: thread %p is still assigned to a run queue", td)); 1756165762Sjeff /* 1757165762Sjeff * Set the slice and pick the run queue. 1758165762Sjeff */ 1759165762Sjeff if (ts->ts_slice == 0) 1760165762Sjeff ts->ts_slice = sched_slice; 1761165796Sjeff if (class == PRI_TIMESHARE) 1762165796Sjeff sched_priority(td); 1763165762Sjeff if (td->td_priority <= PRI_MAX_REALTIME) { 1764165762Sjeff ts->ts_runq = &tdq->tdq_realtime; 1765113357Sjeff /* 1766165762Sjeff * If the thread is not artificially pinned and it's in 1767165762Sjeff * the realtime queue we directly dispatch it on this cpu 1768165762Sjeff * for minimum latency. Interrupt handlers may also have 1769165762Sjeff * to complete on the cpu that dispatched them. 1770113357Sjeff */ 1771165819Sjeff if (td->td_pinned == 0 && class == PRI_ITHD) 1772165762Sjeff ts->ts_cpu = PCPU_GET(cpuid); 1773165762Sjeff } else if (td->td_priority <= PRI_MAX_TIMESHARE) 1774165762Sjeff ts->ts_runq = &tdq->tdq_timeshare; 1775165762Sjeff else 1776165762Sjeff ts->ts_runq = &tdq->tdq_idle; 1777165762Sjeff 1778121790Sjeff#ifdef SMP 1779133427Sjeff /* 1780133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1781133427Sjeff */ 1782164936Sjulian if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1783164936Sjulian ts->ts_runq = NULL; 1784164936Sjulian tdq_notify(ts, ts->ts_cpu); 1785123433Sjeff return; 1786123433Sjeff } 1787121790Sjeff /* 1788123685Sjeff * If we had been idle, clear our bit in the group and potentially 1789123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1790121790Sjeff */ 1791165762Sjeff if ((class != PRI_IDLE && class != PRI_ITHD) && 1792165620Sjeff (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 1793121790Sjeff /* 1794123433Sjeff * Check to see if our group is unidling, and if so, remove it 1795123433Sjeff * from the global idle mask. 1796121790Sjeff */ 1797165620Sjeff if (tdq->tdq_group->tdg_idlemask == 1798165620Sjeff tdq->tdq_group->tdg_cpumask) 1799165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 1800123433Sjeff /* 1801123433Sjeff * Now remove ourselves from the group specific idle mask. 1802123433Sjeff */ 1803165620Sjeff tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1804165762Sjeff } else if (canmigrate && tdq->tdq_load > 1) 1805164936Sjulian if (tdq_transfer(tdq, ts, class)) 1806123685Sjeff return; 1807164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1808121790Sjeff#endif 1809165762Sjeff if (td->td_priority < curthread->td_priority) 1810133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1811131839Sjhb if (preemptive && maybe_preempt(td)) 1812131481Sjhb return; 1813164936Sjulian ts->ts_state = TSS_ONRUNQ; 1814109864Sjeff 1815164936Sjulian tdq_runq_add(tdq, ts, flags); 1816164936Sjulian tdq_load_add(tdq, ts); 1817109864Sjeff} 1818109864Sjeff 1819109864Sjeffvoid 1820121127Sjeffsched_rem(struct thread *td) 1821109864Sjeff{ 1822164936Sjulian struct tdq *tdq; 1823164936Sjulian struct td_sched *ts; 1824113357Sjeff 1825139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1826139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1827139316Sjeff curthread->td_proc->p_comm); 1828139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1829164936Sjulian ts = td->td_sched; 1830164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1831164936Sjulian ts->ts_flags |= TSF_REMOVED; 1832121790Sjeff return; 1833138802Sjeff } 1834164936Sjulian KASSERT((ts->ts_state == TSS_ONRUNQ), 1835164936Sjulian ("sched_rem: thread not on run queue")); 1836109864Sjeff 1837164936Sjulian ts->ts_state = TSS_THREAD; 1838164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1839164936Sjulian tdq_runq_rem(tdq, ts); 1840164936Sjulian tdq_load_rem(tdq, ts); 1841109864Sjeff} 1842109864Sjeff 1843109864Sjefffixpt_t 1844121127Sjeffsched_pctcpu(struct thread *td) 1845109864Sjeff{ 1846109864Sjeff fixpt_t pctcpu; 1847164936Sjulian struct td_sched *ts; 1848109864Sjeff 1849109864Sjeff pctcpu = 0; 1850164936Sjulian ts = td->td_sched; 1851164936Sjulian if (ts == NULL) 1852121290Sjeff return (0); 1853109864Sjeff 1854115998Sjeff mtx_lock_spin(&sched_lock); 1855164936Sjulian if (ts->ts_ticks) { 1856109864Sjeff int rtick; 1857109864Sjeff 1858165796Sjeff sched_pctcpu_update(ts); 1859109864Sjeff /* How many rtick per second ? */ 1860165762Sjeff rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1861165762Sjeff pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 1862109864Sjeff } 1863164936Sjulian td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1864113865Sjhb mtx_unlock_spin(&sched_lock); 1865109864Sjeff 1866109864Sjeff return (pctcpu); 1867109864Sjeff} 1868109864Sjeff 1869122038Sjeffvoid 1870122038Sjeffsched_bind(struct thread *td, int cpu) 1871122038Sjeff{ 1872164936Sjulian struct td_sched *ts; 1873122038Sjeff 1874122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1875164936Sjulian ts = td->td_sched; 1876165762Sjeff KASSERT((ts->ts_flags & TSF_BOUND) == 0, 1877165762Sjeff ("sched_bind: thread %p already bound.", td)); 1878164936Sjulian ts->ts_flags |= TSF_BOUND; 1879123433Sjeff#ifdef SMP 1880123433Sjeff if (PCPU_GET(cpuid) == cpu) 1881122038Sjeff return; 1882122038Sjeff /* sched_rem without the runq_remove */ 1883164936Sjulian ts->ts_state = TSS_THREAD; 1884164936Sjulian tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1885164936Sjulian tdq_notify(ts, cpu); 1886122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1887131527Sphk mi_switch(SW_VOL, NULL); 1888165762Sjeff sched_pin(); 1889122038Sjeff#endif 1890122038Sjeff} 1891122038Sjeff 1892122038Sjeffvoid 1893122038Sjeffsched_unbind(struct thread *td) 1894122038Sjeff{ 1895165762Sjeff struct td_sched *ts; 1896165762Sjeff 1897122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1898165762Sjeff ts = td->td_sched; 1899165762Sjeff KASSERT(ts->ts_flags & TSF_BOUND, 1900165762Sjeff ("sched_unbind: thread %p not bound.", td)); 1901165762Sjeff mtx_assert(&sched_lock, MA_OWNED); 1902165762Sjeff ts->ts_flags &= ~TSF_BOUND; 1903165762Sjeff#ifdef SMP 1904165762Sjeff sched_unpin(); 1905165762Sjeff#endif 1906122038Sjeff} 1907122038Sjeff 1908109864Sjeffint 1909145256Sjkoshysched_is_bound(struct thread *td) 1910145256Sjkoshy{ 1911145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1912164936Sjulian return (td->td_sched->ts_flags & TSF_BOUND); 1913145256Sjkoshy} 1914145256Sjkoshy 1915159630Sdavidxuvoid 1916159630Sdavidxusched_relinquish(struct thread *td) 1917159630Sdavidxu{ 1918159630Sdavidxu mtx_lock_spin(&sched_lock); 1919163709Sjb if (td->td_pri_class == PRI_TIMESHARE) 1920159630Sdavidxu sched_prio(td, PRI_MAX_TIMESHARE); 1921159630Sdavidxu mi_switch(SW_VOL, NULL); 1922159630Sdavidxu mtx_unlock_spin(&sched_lock); 1923159630Sdavidxu} 1924159630Sdavidxu 1925145256Sjkoshyint 1926125289Sjeffsched_load(void) 1927125289Sjeff{ 1928125289Sjeff#ifdef SMP 1929125289Sjeff int total; 1930125289Sjeff int i; 1931125289Sjeff 1932125289Sjeff total = 0; 1933165620Sjeff for (i = 0; i <= tdg_maxid; i++) 1934165620Sjeff total += TDQ_GROUP(i)->tdg_load; 1935125289Sjeff return (total); 1936125289Sjeff#else 1937165620Sjeff return (TDQ_SELF()->tdq_sysload); 1938125289Sjeff#endif 1939125289Sjeff} 1940125289Sjeff 1941125289Sjeffint 1942109864Sjeffsched_sizeof_proc(void) 1943109864Sjeff{ 1944109864Sjeff return (sizeof(struct proc)); 1945109864Sjeff} 1946109864Sjeff 1947109864Sjeffint 1948109864Sjeffsched_sizeof_thread(void) 1949109864Sjeff{ 1950109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1951109864Sjeff} 1952159570Sdavidxu 1953159570Sdavidxuvoid 1954159570Sdavidxusched_tick(void) 1955159570Sdavidxu{ 1956159570Sdavidxu} 1957165762Sjeff 1958165762Sjeffstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 1959165762SjeffSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 1960165762Sjeff "Scheduler name"); 1961165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 1962165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 1963165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 1964165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 1965165827SjeffSYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &sched_rebalance, 0, ""); 1966165762Sjeff 1967165762Sjeff/* ps compat */ 1968165762Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 1969165762SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 1970165762Sjeff 1971165762Sjeff 1972134791Sjulian#define KERN_SWITCH_INCLUDE 1 1973134791Sjulian#include "kern/kern_switch.c" 1974