sched_ule.c revision 165796
1109864Sjeff/*- 2165762Sjeff * Copyright (c) 2002-2007, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff */ 26109864Sjeff 27116182Sobrien#include <sys/cdefs.h> 28116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 165796 2007-01-05 08:50:38Z jeff $"); 29116182Sobrien 30147565Speter#include "opt_hwpmc_hooks.h" 31147565Speter#include "opt_sched.h" 32134649Sscottl 33109864Sjeff#include <sys/param.h> 34109864Sjeff#include <sys/systm.h> 35131929Smarcel#include <sys/kdb.h> 36109864Sjeff#include <sys/kernel.h> 37109864Sjeff#include <sys/ktr.h> 38109864Sjeff#include <sys/lock.h> 39109864Sjeff#include <sys/mutex.h> 40109864Sjeff#include <sys/proc.h> 41112966Sjeff#include <sys/resource.h> 42122038Sjeff#include <sys/resourcevar.h> 43109864Sjeff#include <sys/sched.h> 44109864Sjeff#include <sys/smp.h> 45109864Sjeff#include <sys/sx.h> 46109864Sjeff#include <sys/sysctl.h> 47109864Sjeff#include <sys/sysproto.h> 48139453Sjhb#include <sys/turnstile.h> 49161599Sdavidxu#include <sys/umtx.h> 50109864Sjeff#include <sys/vmmeter.h> 51109864Sjeff#ifdef KTRACE 52109864Sjeff#include <sys/uio.h> 53109864Sjeff#include <sys/ktrace.h> 54109864Sjeff#endif 55109864Sjeff 56145256Sjkoshy#ifdef HWPMC_HOOKS 57145256Sjkoshy#include <sys/pmckern.h> 58145256Sjkoshy#endif 59145256Sjkoshy 60109864Sjeff#include <machine/cpu.h> 61121790Sjeff#include <machine/smp.h> 62109864Sjeff 63109864Sjeff/* 64164936Sjulian * Thread scheduler specific section. 65146954Sjeff */ 66164936Sjulianstruct td_sched { 67164936Sjulian TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ 68164936Sjulian int ts_flags; /* (j) TSF_* flags. */ 69164936Sjulian struct thread *ts_thread; /* (*) Active associated thread. */ 70164936Sjulian fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ 71164936Sjulian u_char ts_rqindex; /* (j) Run queue index. */ 72134791Sjulian enum { 73165762Sjeff TSS_THREAD, 74164936Sjulian TSS_ONRUNQ 75164936Sjulian } ts_state; /* (j) thread sched specific status. */ 76164936Sjulian int ts_slptime; 77164936Sjulian int ts_slice; 78164936Sjulian struct runq *ts_runq; 79164936Sjulian u_char ts_cpu; /* CPU that we have affinity for. */ 80134791Sjulian /* The following variables are only used for pctcpu calculation */ 81164936Sjulian int ts_ltick; /* Last tick that we were running on */ 82164936Sjulian int ts_ftick; /* First tick that we were running on */ 83164936Sjulian int ts_ticks; /* Tick count */ 84134791Sjulian 85163709Sjb /* originally from kg_sched */ 86163709Sjb int skg_slptime; /* Number of ticks we vol. slept */ 87163709Sjb int skg_runtime; /* Number of ticks we were running */ 88134791Sjulian}; 89164936Sjulian#define ts_assign ts_procq.tqe_next 90164936Sjulian/* flags kept in ts_flags */ 91164936Sjulian#define TSF_ASSIGNED 0x0001 /* Thread is being migrated. */ 92164936Sjulian#define TSF_BOUND 0x0002 /* Thread can not migrate. */ 93164936Sjulian#define TSF_XFERABLE 0x0004 /* Thread was added as transferable. */ 94164936Sjulian#define TSF_HOLD 0x0008 /* Thread is temporarily bound. */ 95164936Sjulian#define TSF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ 96164936Sjulian#define TSF_INTERNAL 0x0020 /* Thread added due to migration. */ 97165620Sjeff#define TSF_DIDRUN 0x2000 /* Thread actually ran. */ 98165620Sjeff#define TSF_EXIT 0x4000 /* Thread is being killed. */ 99121790Sjeff 100164936Sjulianstatic struct td_sched td_sched0; 101109864Sjeff 102109864Sjeff/* 103165762Sjeff * Cpu percentage computation macros and defines. 104111857Sjeff * 105165762Sjeff * SCHED_TICK_SECS: Number of seconds to average the cpu usage across. 106165762Sjeff * SCHED_TICK_TARG: Number of hz ticks to average the cpu usage across. 107165796Sjeff * SCHED_TICK_MAX: Maximum number of ticks before scaling back. 108165762Sjeff * SCHED_TICK_SHIFT: Shift factor to avoid rounding away results. 109165762Sjeff * SCHED_TICK_HZ: Compute the number of hz ticks for a given ticks count. 110165762Sjeff * SCHED_TICK_TOTAL: Gives the amount of time we've been recording ticks. 111165762Sjeff */ 112165762Sjeff#define SCHED_TICK_SECS 10 113165762Sjeff#define SCHED_TICK_TARG (hz * SCHED_TICK_SECS) 114165796Sjeff#define SCHED_TICK_MAX (SCHED_TICK_TARG + hz) 115165762Sjeff#define SCHED_TICK_SHIFT 10 116165762Sjeff#define SCHED_TICK_HZ(ts) ((ts)->ts_ticks >> SCHED_TICK_SHIFT) 117165762Sjeff#define SCHED_TICK_TOTAL(ts) ((ts)->ts_ltick - (ts)->ts_ftick) 118165762Sjeff 119165762Sjeff/* 120165762Sjeff * These macros determine priorities for non-interactive threads. They are 121165762Sjeff * assigned a priority based on their recent cpu utilization as expressed 122165762Sjeff * by the ratio of ticks to the tick total. NHALF priorities at the start 123165762Sjeff * and end of the MIN to MAX timeshare range are only reachable with negative 124165762Sjeff * or positive nice respectively. 125165762Sjeff * 126165762Sjeff * PRI_RANGE: Priority range for utilization dependent priorities. 127116642Sjeff * PRI_NRESV: Number of nice values. 128165762Sjeff * PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total. 129165762Sjeff * PRI_NICE: Determines the part of the priority inherited from nice. 130109864Sjeff */ 131165762Sjeff#define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN) 132121869Sjeff#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 133165762Sjeff#define SCHED_PRI_MIN (PRI_MIN_TIMESHARE + SCHED_PRI_NHALF) 134165762Sjeff#define SCHED_PRI_MAX (PRI_MAX_TIMESHARE - SCHED_PRI_NHALF) 135165762Sjeff#define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1) 136165762Sjeff#define SCHED_PRI_TICKS(ts) \ 137165762Sjeff (SCHED_TICK_HZ((ts)) / \ 138165762Sjeff (max(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE)) 139165762Sjeff#define SCHED_PRI_NICE(nice) (nice) 140109864Sjeff 141109864Sjeff/* 142165762Sjeff * These determine the interactivity of a process. Interactivity differs from 143165762Sjeff * cpu utilization in that it expresses the voluntary time slept vs time ran 144165762Sjeff * while cpu utilization includes all time not running. This more accurately 145165762Sjeff * models the intent of the thread. 146109864Sjeff * 147110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 148110645Sjeff * before throttling back. 149121868Sjeff * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 150116365Sjeff * INTERACT_MAX: Maximum interactivity value. Smaller is better. 151111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 152109864Sjeff */ 153165762Sjeff#define SCHED_SLP_RUN_MAX ((hz * 5) << SCHED_TICK_SHIFT) 154165762Sjeff#define SCHED_SLP_RUN_FORK ((hz / 2) << SCHED_TICK_SHIFT) 155116365Sjeff#define SCHED_INTERACT_MAX (100) 156116365Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 157121126Sjeff#define SCHED_INTERACT_THRESH (30) 158111857Sjeff 159109864Sjeff/* 160165762Sjeff * tickincr: Converts a stathz tick into a hz domain scaled by 161165762Sjeff * the shift factor. Without the shift the error rate 162165762Sjeff * due to rounding would be unacceptably high. 163165762Sjeff * realstathz: stathz is sometimes 0 and run off of hz. 164165762Sjeff * sched_slice: Runtime of each thread before rescheduling. 165109864Sjeff */ 166165762Sjeffstatic int sched_interact = SCHED_INTERACT_THRESH; 167165762Sjeffstatic int realstathz; 168165762Sjeffstatic int tickincr; 169165762Sjeffstatic int sched_slice; 170109864Sjeff 171109864Sjeff/* 172164936Sjulian * tdq - per processor runqs and statistics. 173109864Sjeff */ 174164936Sjulianstruct tdq { 175165620Sjeff struct runq tdq_idle; /* Queue of IDLE threads. */ 176165762Sjeff struct runq tdq_timeshare; /* timeshare run queue. */ 177165762Sjeff struct runq tdq_realtime; /* real-time run queue. */ 178165766Sjeff int tdq_idx; /* Current insert index. */ 179165766Sjeff int tdq_ridx; /* Current removal index. */ 180165620Sjeff int tdq_load_timeshare; /* Load for timeshare. */ 181165620Sjeff int tdq_load; /* Aggregate load. */ 182110267Sjeff#ifdef SMP 183165620Sjeff int tdq_transferable; 184165620Sjeff LIST_ENTRY(tdq) tdq_siblings; /* Next in tdq group. */ 185165620Sjeff struct tdq_group *tdq_group; /* Our processor group. */ 186165620Sjeff volatile struct td_sched *tdq_assigned; /* assigned by another CPU. */ 187125289Sjeff#else 188165620Sjeff int tdq_sysload; /* For loadavg, !ITHD load. */ 189110267Sjeff#endif 190109864Sjeff}; 191109864Sjeff 192123433Sjeff#ifdef SMP 193109864Sjeff/* 194164936Sjulian * tdq groups are groups of processors which can cheaply share threads. When 195123433Sjeff * one processor in the group goes idle it will check the runqs of the other 196123433Sjeff * processors in its group prior to halting and waiting for an interrupt. 197123433Sjeff * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA. 198123433Sjeff * In a numa environment we'd want an idle bitmap per group and a two tiered 199123433Sjeff * load balancer. 200123433Sjeff */ 201164936Sjulianstruct tdq_group { 202165620Sjeff int tdg_cpus; /* Count of CPUs in this tdq group. */ 203165620Sjeff cpumask_t tdg_cpumask; /* Mask of cpus in this group. */ 204165620Sjeff cpumask_t tdg_idlemask; /* Idle cpus in this group. */ 205165620Sjeff cpumask_t tdg_mask; /* Bit mask for first cpu. */ 206165620Sjeff int tdg_load; /* Total load of this group. */ 207165620Sjeff int tdg_transferable; /* Transferable load of this group. */ 208165620Sjeff LIST_HEAD(, tdq) tdg_members; /* Linked list of all members. */ 209123433Sjeff}; 210123433Sjeff#endif 211123433Sjeff 212123433Sjeff/* 213165620Sjeff * One thread queue per processor. 214109864Sjeff */ 215110028Sjeff#ifdef SMP 216164936Sjulianstatic cpumask_t tdq_idle; 217165620Sjeffstatic int tdg_maxid; 218164936Sjulianstatic struct tdq tdq_cpu[MAXCPU]; 219164936Sjulianstatic struct tdq_group tdq_groups[MAXCPU]; 220129982Sjeffstatic int bal_tick; 221129982Sjeffstatic int gbal_tick; 222139334Sjeffstatic int balance_groups; 223129982Sjeff 224164936Sjulian#define TDQ_SELF() (&tdq_cpu[PCPU_GET(cpuid)]) 225164936Sjulian#define TDQ_CPU(x) (&tdq_cpu[(x)]) 226164936Sjulian#define TDQ_ID(x) ((x) - tdq_cpu) 227164936Sjulian#define TDQ_GROUP(x) (&tdq_groups[(x)]) 228123433Sjeff#else /* !SMP */ 229164936Sjulianstatic struct tdq tdq_cpu; 230129982Sjeff 231164936Sjulian#define TDQ_SELF() (&tdq_cpu) 232164936Sjulian#define TDQ_CPU(x) (&tdq_cpu) 233110028Sjeff#endif 234109864Sjeff 235165762Sjeffstatic struct td_sched *sched_choose(void); /* XXX Should be thread * */ 236163709Sjbstatic void sched_priority(struct thread *); 237146954Sjeffstatic void sched_thread_priority(struct thread *, u_char); 238163709Sjbstatic int sched_interact_score(struct thread *); 239163709Sjbstatic void sched_interact_update(struct thread *); 240163709Sjbstatic void sched_interact_fork(struct thread *); 241164936Sjulianstatic void sched_pctcpu_update(struct td_sched *); 242109864Sjeff 243110267Sjeff/* Operations on per processor queues */ 244164936Sjulianstatic struct td_sched * tdq_choose(struct tdq *); 245164936Sjulianstatic void tdq_setup(struct tdq *); 246164936Sjulianstatic void tdq_load_add(struct tdq *, struct td_sched *); 247164936Sjulianstatic void tdq_load_rem(struct tdq *, struct td_sched *); 248164936Sjulianstatic __inline void tdq_runq_add(struct tdq *, struct td_sched *, int); 249164936Sjulianstatic __inline void tdq_runq_rem(struct tdq *, struct td_sched *); 250164936Sjulianvoid tdq_print(int cpu); 251165762Sjeffstatic void runq_print(struct runq *rq); 252110267Sjeff#ifdef SMP 253164936Sjulianstatic int tdq_transfer(struct tdq *, struct td_sched *, int); 254164936Sjulianstatic struct td_sched *runq_steal(struct runq *); 255129982Sjeffstatic void sched_balance(void); 256129982Sjeffstatic void sched_balance_groups(void); 257164936Sjulianstatic void sched_balance_group(struct tdq_group *); 258164936Sjulianstatic void sched_balance_pair(struct tdq *, struct tdq *); 259165766Sjeffstatic void sched_smp_tick(void); 260164936Sjulianstatic void tdq_move(struct tdq *, int); 261164936Sjulianstatic int tdq_idled(struct tdq *); 262164936Sjulianstatic void tdq_notify(struct td_sched *, int); 263164936Sjulianstatic void tdq_assign(struct tdq *); 264164936Sjulianstatic struct td_sched *tdq_steal(struct tdq *, int); 265165762Sjeff#define THREAD_CAN_MIGRATE(td) \ 266165762Sjeff ((td)->td_pinned == 0 && (td)->td_pri_class != PRI_ITHD) 267121790Sjeff#endif 268110028Sjeff 269165762Sjeffstatic void sched_setup(void *dummy); 270165762SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 271165762Sjeff 272165762Sjeffstatic void sched_initticks(void *dummy); 273165762SjeffSYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL) 274165762Sjeff 275165762Sjeffstatic void 276165762Sjeffrunq_print(struct runq *rq) 277165762Sjeff{ 278165762Sjeff struct rqhead *rqh; 279165762Sjeff struct td_sched *ts; 280165762Sjeff int pri; 281165762Sjeff int j; 282165762Sjeff int i; 283165762Sjeff 284165762Sjeff for (i = 0; i < RQB_LEN; i++) { 285165762Sjeff printf("\t\trunq bits %d 0x%zx\n", 286165762Sjeff i, rq->rq_status.rqb_bits[i]); 287165762Sjeff for (j = 0; j < RQB_BPW; j++) 288165762Sjeff if (rq->rq_status.rqb_bits[i] & (1ul << j)) { 289165762Sjeff pri = j + (i << RQB_L2BPW); 290165762Sjeff rqh = &rq->rq_queues[pri]; 291165762Sjeff TAILQ_FOREACH(ts, rqh, ts_procq) { 292165762Sjeff printf("\t\t\ttd %p(%s) priority %d rqindex %d pri %d\n", 293165762Sjeff ts->ts_thread, ts->ts_thread->td_proc->p_comm, ts->ts_thread->td_priority, ts->ts_rqindex, pri); 294165762Sjeff } 295165762Sjeff } 296165762Sjeff } 297165762Sjeff} 298165762Sjeff 299113357Sjeffvoid 300164936Sjuliantdq_print(int cpu) 301110267Sjeff{ 302164936Sjulian struct tdq *tdq; 303112994Sjeff 304164936Sjulian tdq = TDQ_CPU(cpu); 305112994Sjeff 306164936Sjulian printf("tdq:\n"); 307165620Sjeff printf("\tload: %d\n", tdq->tdq_load); 308165620Sjeff printf("\tload TIMESHARE: %d\n", tdq->tdq_load_timeshare); 309165762Sjeff printf("\ttimeshare idx: %d\n", tdq->tdq_idx); 310165766Sjeff printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); 311165762Sjeff printf("\trealtime runq:\n"); 312165762Sjeff runq_print(&tdq->tdq_realtime); 313165762Sjeff printf("\ttimeshare runq:\n"); 314165762Sjeff runq_print(&tdq->tdq_timeshare); 315165762Sjeff printf("\tidle runq:\n"); 316165762Sjeff runq_print(&tdq->tdq_idle); 317121896Sjeff#ifdef SMP 318165620Sjeff printf("\tload transferable: %d\n", tdq->tdq_transferable); 319121896Sjeff#endif 320113357Sjeff} 321112994Sjeff 322122744Sjeffstatic __inline void 323164936Sjuliantdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags) 324122744Sjeff{ 325122744Sjeff#ifdef SMP 326165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 327165620Sjeff tdq->tdq_transferable++; 328165620Sjeff tdq->tdq_group->tdg_transferable++; 329164936Sjulian ts->ts_flags |= TSF_XFERABLE; 330123433Sjeff } 331122744Sjeff#endif 332165762Sjeff if (ts->ts_runq == &tdq->tdq_timeshare) { 333165762Sjeff int pri; 334165762Sjeff 335165762Sjeff pri = ts->ts_thread->td_priority; 336165762Sjeff KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE, 337165762Sjeff ("Invalid priority %d on timeshare runq", pri)); 338165762Sjeff /* 339165762Sjeff * This queue contains only priorities between MIN and MAX 340165762Sjeff * realtime. Use the whole queue to represent these values. 341165762Sjeff */ 342165762Sjeff#define TS_RQ_PPQ (((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE) + 1) / RQ_NQS) 343165762Sjeff if ((flags & SRQ_BORROWING) == 0) { 344165762Sjeff pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ; 345165762Sjeff pri = (pri + tdq->tdq_idx) % RQ_NQS; 346165766Sjeff /* 347165766Sjeff * This effectively shortens the queue by one so we 348165766Sjeff * can have a one slot difference between idx and 349165766Sjeff * ridx while we wait for threads to drain. 350165766Sjeff */ 351165766Sjeff if (tdq->tdq_ridx != tdq->tdq_idx && 352165766Sjeff pri == tdq->tdq_ridx) 353165766Sjeff pri = (pri - 1) % RQ_NQS; 354165762Sjeff } else 355165766Sjeff pri = tdq->tdq_ridx; 356165762Sjeff runq_add_pri(ts->ts_runq, ts, pri, flags); 357165762Sjeff } else 358165762Sjeff runq_add(ts->ts_runq, ts, flags); 359122744Sjeff} 360122744Sjeff 361122744Sjeffstatic __inline void 362164936Sjuliantdq_runq_rem(struct tdq *tdq, struct td_sched *ts) 363122744Sjeff{ 364122744Sjeff#ifdef SMP 365164936Sjulian if (ts->ts_flags & TSF_XFERABLE) { 366165620Sjeff tdq->tdq_transferable--; 367165620Sjeff tdq->tdq_group->tdg_transferable--; 368164936Sjulian ts->ts_flags &= ~TSF_XFERABLE; 369123433Sjeff } 370122744Sjeff#endif 371165766Sjeff if (ts->ts_runq == &tdq->tdq_timeshare) { 372165766Sjeff if (tdq->tdq_idx != tdq->tdq_ridx) 373165766Sjeff runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx); 374165766Sjeff else 375165766Sjeff runq_remove_idx(ts->ts_runq, ts, NULL); 376165796Sjeff /* 377165796Sjeff * For timeshare threads we update the priority here so 378165796Sjeff * the priority reflects the time we've been sleeping. 379165796Sjeff */ 380165796Sjeff ts->ts_ltick = ticks; 381165796Sjeff sched_pctcpu_update(ts); 382165796Sjeff sched_priority(ts->ts_thread); 383165766Sjeff } else 384165762Sjeff runq_remove(ts->ts_runq, ts); 385122744Sjeff} 386122744Sjeff 387113357Sjeffstatic void 388164936Sjuliantdq_load_add(struct tdq *tdq, struct td_sched *ts) 389113357Sjeff{ 390121896Sjeff int class; 391115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 392164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 393121896Sjeff if (class == PRI_TIMESHARE) 394165620Sjeff tdq->tdq_load_timeshare++; 395165620Sjeff tdq->tdq_load++; 396165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 397164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 398123487Sjeff#ifdef SMP 399165620Sjeff tdq->tdq_group->tdg_load++; 400125289Sjeff#else 401165620Sjeff tdq->tdq_sysload++; 402123487Sjeff#endif 403110267Sjeff} 404113357Sjeff 405112994Sjeffstatic void 406164936Sjuliantdq_load_rem(struct tdq *tdq, struct td_sched *ts) 407110267Sjeff{ 408121896Sjeff int class; 409115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 410164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 411121896Sjeff if (class == PRI_TIMESHARE) 412165620Sjeff tdq->tdq_load_timeshare--; 413164936Sjulian if (class != PRI_ITHD && (ts->ts_thread->td_proc->p_flag & P_NOLOAD) == 0) 414123487Sjeff#ifdef SMP 415165620Sjeff tdq->tdq_group->tdg_load--; 416125289Sjeff#else 417165620Sjeff tdq->tdq_sysload--; 418123487Sjeff#endif 419165620Sjeff tdq->tdq_load--; 420165620Sjeff CTR1(KTR_SCHED, "load: %d", tdq->tdq_load); 421164936Sjulian ts->ts_runq = NULL; 422110267Sjeff} 423110267Sjeff 424113357Sjeff#ifdef SMP 425165766Sjeffstatic void 426165766Sjeffsched_smp_tick(void) 427165766Sjeff{ 428165766Sjeff struct tdq *tdq; 429165766Sjeff 430165766Sjeff tdq = TDQ_SELF(); 431165766Sjeff if (ticks >= bal_tick) 432165766Sjeff sched_balance(); 433165766Sjeff if (ticks >= gbal_tick && balance_groups) 434165766Sjeff sched_balance_groups(); 435165766Sjeff /* 436165766Sjeff * We could have been assigned a non real-time thread without an 437165766Sjeff * IPI. 438165766Sjeff */ 439165766Sjeff if (tdq->tdq_assigned) 440165766Sjeff tdq_assign(tdq); /* Potentially sets NEEDRESCHED */ 441165766Sjeff} 442165766Sjeff 443116069Sjeff/* 444122744Sjeff * sched_balance is a simple CPU load balancing algorithm. It operates by 445116069Sjeff * finding the least loaded and most loaded cpu and equalizing their load 446116069Sjeff * by migrating some processes. 447116069Sjeff * 448116069Sjeff * Dealing only with two CPUs at a time has two advantages. Firstly, most 449116069Sjeff * installations will only have 2 cpus. Secondly, load balancing too much at 450116069Sjeff * once can have an unpleasant effect on the system. The scheduler rarely has 451116069Sjeff * enough information to make perfect decisions. So this algorithm chooses 452116069Sjeff * algorithm simplicity and more gradual effects on load in larger systems. 453116069Sjeff * 454116069Sjeff * It could be improved by considering the priorities and slices assigned to 455116069Sjeff * each task prior to balancing them. There are many pathological cases with 456116069Sjeff * any approach and so the semi random algorithm below may work as well as any. 457116069Sjeff * 458116069Sjeff */ 459121790Sjeffstatic void 460129982Sjeffsched_balance(void) 461116069Sjeff{ 462164936Sjulian struct tdq_group *high; 463164936Sjulian struct tdq_group *low; 464165620Sjeff struct tdq_group *tdg; 465123487Sjeff int cnt; 466123487Sjeff int i; 467123487Sjeff 468139334Sjeff bal_tick = ticks + (random() % (hz * 2)); 469123487Sjeff if (smp_started == 0) 470139334Sjeff return; 471123487Sjeff low = high = NULL; 472165620Sjeff i = random() % (tdg_maxid + 1); 473165620Sjeff for (cnt = 0; cnt <= tdg_maxid; cnt++) { 474165620Sjeff tdg = TDQ_GROUP(i); 475123487Sjeff /* 476123487Sjeff * Find the CPU with the highest load that has some 477123487Sjeff * threads to transfer. 478123487Sjeff */ 479165620Sjeff if ((high == NULL || tdg->tdg_load > high->tdg_load) 480165620Sjeff && tdg->tdg_transferable) 481165620Sjeff high = tdg; 482165620Sjeff if (low == NULL || tdg->tdg_load < low->tdg_load) 483165620Sjeff low = tdg; 484165620Sjeff if (++i > tdg_maxid) 485123487Sjeff i = 0; 486123487Sjeff } 487123487Sjeff if (low != NULL && high != NULL && high != low) 488165620Sjeff sched_balance_pair(LIST_FIRST(&high->tdg_members), 489165620Sjeff LIST_FIRST(&low->tdg_members)); 490123487Sjeff} 491123487Sjeff 492123487Sjeffstatic void 493129982Sjeffsched_balance_groups(void) 494123487Sjeff{ 495123487Sjeff int i; 496123487Sjeff 497139334Sjeff gbal_tick = ticks + (random() % (hz * 2)); 498129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 499123487Sjeff if (smp_started) 500165620Sjeff for (i = 0; i <= tdg_maxid; i++) 501164936Sjulian sched_balance_group(TDQ_GROUP(i)); 502123487Sjeff} 503123487Sjeff 504123487Sjeffstatic void 505165620Sjeffsched_balance_group(struct tdq_group *tdg) 506123487Sjeff{ 507164936Sjulian struct tdq *tdq; 508164936Sjulian struct tdq *high; 509164936Sjulian struct tdq *low; 510123487Sjeff int load; 511123487Sjeff 512165620Sjeff if (tdg->tdg_transferable == 0) 513123487Sjeff return; 514123487Sjeff low = NULL; 515123487Sjeff high = NULL; 516165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 517165620Sjeff load = tdq->tdq_load; 518165620Sjeff if (high == NULL || load > high->tdq_load) 519164936Sjulian high = tdq; 520165620Sjeff if (low == NULL || load < low->tdq_load) 521164936Sjulian low = tdq; 522123487Sjeff } 523123487Sjeff if (high != NULL && low != NULL && high != low) 524123487Sjeff sched_balance_pair(high, low); 525123487Sjeff} 526123487Sjeff 527123487Sjeffstatic void 528164936Sjuliansched_balance_pair(struct tdq *high, struct tdq *low) 529123487Sjeff{ 530123433Sjeff int transferable; 531116069Sjeff int high_load; 532116069Sjeff int low_load; 533116069Sjeff int move; 534116069Sjeff int diff; 535116069Sjeff int i; 536116069Sjeff 537116069Sjeff /* 538123433Sjeff * If we're transfering within a group we have to use this specific 539164936Sjulian * tdq's transferable count, otherwise we can steal from other members 540123433Sjeff * of the group. 541123433Sjeff */ 542165620Sjeff if (high->tdq_group == low->tdq_group) { 543165620Sjeff transferable = high->tdq_transferable; 544165620Sjeff high_load = high->tdq_load; 545165620Sjeff low_load = low->tdq_load; 546123487Sjeff } else { 547165620Sjeff transferable = high->tdq_group->tdg_transferable; 548165620Sjeff high_load = high->tdq_group->tdg_load; 549165620Sjeff low_load = low->tdq_group->tdg_load; 550123487Sjeff } 551123433Sjeff if (transferable == 0) 552123487Sjeff return; 553123433Sjeff /* 554122744Sjeff * Determine what the imbalance is and then adjust that to how many 555165620Sjeff * threads we actually have to give up (transferable). 556122744Sjeff */ 557123487Sjeff diff = high_load - low_load; 558116069Sjeff move = diff / 2; 559116069Sjeff if (diff & 0x1) 560116069Sjeff move++; 561123433Sjeff move = min(move, transferable); 562116069Sjeff for (i = 0; i < move; i++) 563164936Sjulian tdq_move(high, TDQ_ID(low)); 564116069Sjeff return; 565116069Sjeff} 566116069Sjeff 567121790Sjeffstatic void 568164936Sjuliantdq_move(struct tdq *from, int cpu) 569116069Sjeff{ 570164936Sjulian struct tdq *tdq; 571164936Sjulian struct tdq *to; 572164936Sjulian struct td_sched *ts; 573116069Sjeff 574164936Sjulian tdq = from; 575164936Sjulian to = TDQ_CPU(cpu); 576164936Sjulian ts = tdq_steal(tdq, 1); 577164936Sjulian if (ts == NULL) { 578165620Sjeff struct tdq_group *tdg; 579123433Sjeff 580165620Sjeff tdg = tdq->tdq_group; 581165620Sjeff LIST_FOREACH(tdq, &tdg->tdg_members, tdq_siblings) { 582165620Sjeff if (tdq == from || tdq->tdq_transferable == 0) 583123433Sjeff continue; 584164936Sjulian ts = tdq_steal(tdq, 1); 585123433Sjeff break; 586123433Sjeff } 587164936Sjulian if (ts == NULL) 588164936Sjulian panic("tdq_move: No threads available with a " 589123433Sjeff "transferable count of %d\n", 590165620Sjeff tdg->tdg_transferable); 591123433Sjeff } 592164936Sjulian if (tdq == to) 593123433Sjeff return; 594164936Sjulian ts->ts_state = TSS_THREAD; 595164936Sjulian tdq_runq_rem(tdq, ts); 596164936Sjulian tdq_load_rem(tdq, ts); 597164936Sjulian tdq_notify(ts, cpu); 598116069Sjeff} 599110267Sjeff 600123433Sjeffstatic int 601164936Sjuliantdq_idled(struct tdq *tdq) 602121790Sjeff{ 603165620Sjeff struct tdq_group *tdg; 604164936Sjulian struct tdq *steal; 605164936Sjulian struct td_sched *ts; 606123433Sjeff 607165620Sjeff tdg = tdq->tdq_group; 608123433Sjeff /* 609165620Sjeff * If we're in a cpu group, try and steal threads from another cpu in 610123433Sjeff * the group before idling. 611123433Sjeff */ 612165620Sjeff if (tdg->tdg_cpus > 1 && tdg->tdg_transferable) { 613165620Sjeff LIST_FOREACH(steal, &tdg->tdg_members, tdq_siblings) { 614165620Sjeff if (steal == tdq || steal->tdq_transferable == 0) 615123433Sjeff continue; 616164936Sjulian ts = tdq_steal(steal, 0); 617164936Sjulian if (ts == NULL) 618123433Sjeff continue; 619164936Sjulian ts->ts_state = TSS_THREAD; 620164936Sjulian tdq_runq_rem(steal, ts); 621164936Sjulian tdq_load_rem(steal, ts); 622164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 623164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 624164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 625123433Sjeff return (0); 626123433Sjeff } 627123433Sjeff } 628123433Sjeff /* 629123433Sjeff * We only set the idled bit when all of the cpus in the group are 630164936Sjulian * idle. Otherwise we could get into a situation where a thread bounces 631123433Sjeff * back and forth between two idle cores on seperate physical CPUs. 632123433Sjeff */ 633165620Sjeff tdg->tdg_idlemask |= PCPU_GET(cpumask); 634165620Sjeff if (tdg->tdg_idlemask != tdg->tdg_cpumask) 635123433Sjeff return (1); 636165620Sjeff atomic_set_int(&tdq_idle, tdg->tdg_mask); 637123433Sjeff return (1); 638121790Sjeff} 639121790Sjeff 640121790Sjeffstatic void 641164936Sjuliantdq_assign(struct tdq *tdq) 642121790Sjeff{ 643164936Sjulian struct td_sched *nts; 644164936Sjulian struct td_sched *ts; 645121790Sjeff 646121790Sjeff do { 647165620Sjeff *(volatile struct td_sched **)&ts = tdq->tdq_assigned; 648165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 649164936Sjulian (uintptr_t)ts, (uintptr_t)NULL)); 650164936Sjulian for (; ts != NULL; ts = nts) { 651164936Sjulian nts = ts->ts_assign; 652165620Sjeff tdq->tdq_group->tdg_load--; 653165620Sjeff tdq->tdq_load--; 654164936Sjulian ts->ts_flags &= ~TSF_ASSIGNED; 655164936Sjulian if (ts->ts_flags & TSF_REMOVED) { 656164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 657148603Sdavidxu continue; 658148603Sdavidxu } 659164936Sjulian ts->ts_flags |= TSF_INTERNAL | TSF_HOLD; 660164936Sjulian sched_add(ts->ts_thread, SRQ_YIELDING); 661121790Sjeff } 662121790Sjeff} 663121790Sjeff 664121790Sjeffstatic void 665164936Sjuliantdq_notify(struct td_sched *ts, int cpu) 666121790Sjeff{ 667164936Sjulian struct tdq *tdq; 668121790Sjeff struct thread *td; 669121790Sjeff struct pcpu *pcpu; 670139334Sjeff int class; 671133427Sjeff int prio; 672121790Sjeff 673164936Sjulian tdq = TDQ_CPU(cpu); 674164936Sjulian class = PRI_BASE(ts->ts_thread->td_pri_class); 675165762Sjeff if ((class != PRI_IDLE && class != PRI_ITHD) 676165762Sjeff && (tdq_idle & tdq->tdq_group->tdg_mask)) 677165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 678165620Sjeff tdq->tdq_group->tdg_load++; 679165620Sjeff tdq->tdq_load++; 680164936Sjulian ts->ts_cpu = cpu; 681164936Sjulian ts->ts_flags |= TSF_ASSIGNED; 682164936Sjulian prio = ts->ts_thread->td_priority; 683121790Sjeff 684121790Sjeff /* 685164936Sjulian * Place a thread on another cpu's queue and force a resched. 686121790Sjeff */ 687121790Sjeff do { 688165620Sjeff *(volatile struct td_sched **)&ts->ts_assign = tdq->tdq_assigned; 689165620Sjeff } while(!atomic_cmpset_ptr((volatile uintptr_t *)&tdq->tdq_assigned, 690164936Sjulian (uintptr_t)ts->ts_assign, (uintptr_t)ts)); 691133427Sjeff /* 692133427Sjeff * Without sched_lock we could lose a race where we set NEEDRESCHED 693133427Sjeff * on a thread that is switched out before the IPI is delivered. This 694133427Sjeff * would lead us to miss the resched. This will be a problem once 695133427Sjeff * sched_lock is pushed down. 696133427Sjeff */ 697121790Sjeff pcpu = pcpu_find(cpu); 698121790Sjeff td = pcpu->pc_curthread; 699164936Sjulian if (ts->ts_thread->td_priority < td->td_priority || 700121790Sjeff td == pcpu->pc_idlethread) { 701121790Sjeff td->td_flags |= TDF_NEEDRESCHED; 702121790Sjeff ipi_selected(1 << cpu, IPI_AST); 703121790Sjeff } 704121790Sjeff} 705121790Sjeff 706164936Sjulianstatic struct td_sched * 707121790Sjeffrunq_steal(struct runq *rq) 708121790Sjeff{ 709121790Sjeff struct rqhead *rqh; 710121790Sjeff struct rqbits *rqb; 711164936Sjulian struct td_sched *ts; 712121790Sjeff int word; 713121790Sjeff int bit; 714121790Sjeff 715121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 716121790Sjeff rqb = &rq->rq_status; 717121790Sjeff for (word = 0; word < RQB_LEN; word++) { 718121790Sjeff if (rqb->rqb_bits[word] == 0) 719121790Sjeff continue; 720121790Sjeff for (bit = 0; bit < RQB_BPW; bit++) { 721123231Speter if ((rqb->rqb_bits[word] & (1ul << bit)) == 0) 722121790Sjeff continue; 723121790Sjeff rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 724164936Sjulian TAILQ_FOREACH(ts, rqh, ts_procq) { 725165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) 726164936Sjulian return (ts); 727121790Sjeff } 728121790Sjeff } 729121790Sjeff } 730121790Sjeff return (NULL); 731121790Sjeff} 732121790Sjeff 733164936Sjulianstatic struct td_sched * 734164936Sjuliantdq_steal(struct tdq *tdq, int stealidle) 735121790Sjeff{ 736164936Sjulian struct td_sched *ts; 737121790Sjeff 738123433Sjeff /* 739123433Sjeff * Steal from next first to try to get a non-interactive task that 740123433Sjeff * may not have run for a while. 741165762Sjeff * XXX Need to effect steal order for timeshare threads. 742123433Sjeff */ 743165762Sjeff if ((ts = runq_steal(&tdq->tdq_realtime)) != NULL) 744164936Sjulian return (ts); 745165762Sjeff if ((ts = runq_steal(&tdq->tdq_timeshare)) != NULL) 746164936Sjulian return (ts); 747123433Sjeff if (stealidle) 748165620Sjeff return (runq_steal(&tdq->tdq_idle)); 749123433Sjeff return (NULL); 750121790Sjeff} 751123433Sjeff 752123433Sjeffint 753164936Sjuliantdq_transfer(struct tdq *tdq, struct td_sched *ts, int class) 754123433Sjeff{ 755165620Sjeff struct tdq_group *ntdg; 756165620Sjeff struct tdq_group *tdg; 757164936Sjulian struct tdq *old; 758123433Sjeff int cpu; 759139334Sjeff int idx; 760123433Sjeff 761123685Sjeff if (smp_started == 0) 762123685Sjeff return (0); 763123433Sjeff cpu = 0; 764123433Sjeff /* 765133427Sjeff * If our load exceeds a certain threshold we should attempt to 766133427Sjeff * reassign this thread. The first candidate is the cpu that 767133427Sjeff * originally ran the thread. If it is idle, assign it there, 768133427Sjeff * otherwise, pick an idle cpu. 769133427Sjeff * 770165620Sjeff * The threshold at which we start to reassign has a large impact 771123685Sjeff * on the overall performance of the system. Tuned too high and 772123685Sjeff * some CPUs may idle. Too low and there will be excess migration 773128055Scognet * and context switches. 774123685Sjeff */ 775164936Sjulian old = TDQ_CPU(ts->ts_cpu); 776165620Sjeff ntdg = old->tdq_group; 777165620Sjeff tdg = tdq->tdq_group; 778164936Sjulian if (tdq_idle) { 779165620Sjeff if (tdq_idle & ntdg->tdg_mask) { 780165620Sjeff cpu = ffs(ntdg->tdg_idlemask); 781139334Sjeff if (cpu) { 782139334Sjeff CTR2(KTR_SCHED, 783164936Sjulian "tdq_transfer: %p found old cpu %X " 784164936Sjulian "in idlemask.", ts, cpu); 785133427Sjeff goto migrate; 786139334Sjeff } 787133427Sjeff } 788123433Sjeff /* 789123433Sjeff * Multiple cpus could find this bit simultaneously 790123433Sjeff * but the race shouldn't be terrible. 791123433Sjeff */ 792164936Sjulian cpu = ffs(tdq_idle); 793139334Sjeff if (cpu) { 794164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p found %X " 795164936Sjulian "in idlemask.", ts, cpu); 796133427Sjeff goto migrate; 797139334Sjeff } 798123433Sjeff } 799139334Sjeff idx = 0; 800139334Sjeff#if 0 801165620Sjeff if (old->tdq_load < tdq->tdq_load) { 802164936Sjulian cpu = ts->ts_cpu + 1; 803164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p old cpu %X " 804164936Sjulian "load less than ours.", ts, cpu); 805139334Sjeff goto migrate; 806139334Sjeff } 807123433Sjeff /* 808139334Sjeff * No new CPU was found, look for one with less load. 809139334Sjeff */ 810165620Sjeff for (idx = 0; idx <= tdg_maxid; idx++) { 811165620Sjeff ntdg = TDQ_GROUP(idx); 812165620Sjeff if (ntdg->tdg_load /*+ (ntdg->tdg_cpus * 2)*/ < tdg->tdg_load) { 813165620Sjeff cpu = ffs(ntdg->tdg_cpumask); 814164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X load less " 815164936Sjulian "than ours.", ts, cpu); 816139334Sjeff goto migrate; 817139334Sjeff } 818139334Sjeff } 819139334Sjeff#endif 820139334Sjeff /* 821123433Sjeff * If another cpu in this group has idled, assign a thread over 822123433Sjeff * to them after checking to see if there are idled groups. 823123433Sjeff */ 824165620Sjeff if (tdg->tdg_idlemask) { 825165620Sjeff cpu = ffs(tdg->tdg_idlemask); 826139334Sjeff if (cpu) { 827164936Sjulian CTR2(KTR_SCHED, "tdq_transfer: %p cpu %X idle in " 828164936Sjulian "group.", ts, cpu); 829133427Sjeff goto migrate; 830139334Sjeff } 831123433Sjeff } 832133427Sjeff return (0); 833133427Sjeffmigrate: 834133427Sjeff /* 835123433Sjeff * Now that we've found an idle CPU, migrate the thread. 836123433Sjeff */ 837133427Sjeff cpu--; 838164936Sjulian ts->ts_runq = NULL; 839164936Sjulian tdq_notify(ts, cpu); 840133427Sjeff 841133427Sjeff return (1); 842123433Sjeff} 843123433Sjeff 844121790Sjeff#endif /* SMP */ 845121790Sjeff 846117326Sjeff/* 847121790Sjeff * Pick the highest priority task we have and return it. 848117326Sjeff */ 849117326Sjeff 850164936Sjulianstatic struct td_sched * 851164936Sjuliantdq_choose(struct tdq *tdq) 852110267Sjeff{ 853164936Sjulian struct td_sched *ts; 854110267Sjeff 855115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 856112994Sjeff 857165762Sjeff ts = runq_choose(&tdq->tdq_realtime); 858165762Sjeff if (ts != NULL) { 859165762Sjeff KASSERT(ts->ts_thread->td_priority <= PRI_MAX_REALTIME, 860165762Sjeff ("tdq_choose: Invalid priority on realtime queue %d", 861165762Sjeff ts->ts_thread->td_priority)); 862164936Sjulian return (ts); 863110267Sjeff } 864165766Sjeff ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx); 865165762Sjeff if (ts != NULL) { 866165762Sjeff KASSERT(ts->ts_thread->td_priority <= PRI_MAX_TIMESHARE && 867165762Sjeff ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE, 868165762Sjeff ("tdq_choose: Invalid priority on timeshare queue %d", 869165762Sjeff ts->ts_thread->td_priority)); 870165762Sjeff return (ts); 871165762Sjeff } 872110267Sjeff 873165762Sjeff ts = runq_choose(&tdq->tdq_idle); 874165762Sjeff if (ts != NULL) { 875165762Sjeff KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE, 876165762Sjeff ("tdq_choose: Invalid priority on idle queue %d", 877165762Sjeff ts->ts_thread->td_priority)); 878165762Sjeff return (ts); 879165762Sjeff } 880165762Sjeff 881165762Sjeff return (NULL); 882110267Sjeff} 883110267Sjeff 884109864Sjeffstatic void 885164936Sjuliantdq_setup(struct tdq *tdq) 886110028Sjeff{ 887165762Sjeff runq_init(&tdq->tdq_realtime); 888165762Sjeff runq_init(&tdq->tdq_timeshare); 889165620Sjeff runq_init(&tdq->tdq_idle); 890165620Sjeff tdq->tdq_load = 0; 891165620Sjeff tdq->tdq_load_timeshare = 0; 892110028Sjeff} 893110028Sjeff 894110028Sjeffstatic void 895109864Sjeffsched_setup(void *dummy) 896109864Sjeff{ 897117313Sjeff#ifdef SMP 898109864Sjeff int i; 899117313Sjeff#endif 900109864Sjeff 901153533Sdavidxu /* 902153533Sdavidxu * To avoid divide-by-zero, we set realstathz a dummy value 903153533Sdavidxu * in case which sched_clock() called before sched_initticks(). 904153533Sdavidxu */ 905153533Sdavidxu realstathz = hz; 906165762Sjeff sched_slice = (realstathz/7); /* 140ms */ 907165762Sjeff tickincr = 1 << SCHED_TICK_SHIFT; 908111857Sjeff 909117237Sjeff#ifdef SMP 910123487Sjeff balance_groups = 0; 911123433Sjeff /* 912164936Sjulian * Initialize the tdqs. 913123433Sjeff */ 914123433Sjeff for (i = 0; i < MAXCPU; i++) { 915165627Sjeff struct tdq *tdq; 916123433Sjeff 917165627Sjeff tdq = &tdq_cpu[i]; 918165627Sjeff tdq->tdq_assigned = NULL; 919164936Sjulian tdq_setup(&tdq_cpu[i]); 920123433Sjeff } 921117237Sjeff if (smp_topology == NULL) { 922165620Sjeff struct tdq_group *tdg; 923165627Sjeff struct tdq *tdq; 924139334Sjeff int cpus; 925123433Sjeff 926139334Sjeff for (cpus = 0, i = 0; i < MAXCPU; i++) { 927139334Sjeff if (CPU_ABSENT(i)) 928139334Sjeff continue; 929165627Sjeff tdq = &tdq_cpu[i]; 930165620Sjeff tdg = &tdq_groups[cpus]; 931123433Sjeff /* 932164936Sjulian * Setup a tdq group with one member. 933123433Sjeff */ 934165627Sjeff tdq->tdq_transferable = 0; 935165627Sjeff tdq->tdq_group = tdg; 936165620Sjeff tdg->tdg_cpus = 1; 937165620Sjeff tdg->tdg_idlemask = 0; 938165620Sjeff tdg->tdg_cpumask = tdg->tdg_mask = 1 << i; 939165620Sjeff tdg->tdg_load = 0; 940165620Sjeff tdg->tdg_transferable = 0; 941165620Sjeff LIST_INIT(&tdg->tdg_members); 942165627Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, tdq, tdq_siblings); 943139334Sjeff cpus++; 944117237Sjeff } 945165620Sjeff tdg_maxid = cpus - 1; 946117237Sjeff } else { 947165620Sjeff struct tdq_group *tdg; 948123433Sjeff struct cpu_group *cg; 949117237Sjeff int j; 950113357Sjeff 951117237Sjeff for (i = 0; i < smp_topology->ct_count; i++) { 952117237Sjeff cg = &smp_topology->ct_group[i]; 953165620Sjeff tdg = &tdq_groups[i]; 954123433Sjeff /* 955123433Sjeff * Initialize the group. 956123433Sjeff */ 957165620Sjeff tdg->tdg_idlemask = 0; 958165620Sjeff tdg->tdg_load = 0; 959165620Sjeff tdg->tdg_transferable = 0; 960165620Sjeff tdg->tdg_cpus = cg->cg_count; 961165620Sjeff tdg->tdg_cpumask = cg->cg_mask; 962165620Sjeff LIST_INIT(&tdg->tdg_members); 963123433Sjeff /* 964123433Sjeff * Find all of the group members and add them. 965123433Sjeff */ 966123433Sjeff for (j = 0; j < MAXCPU; j++) { 967123433Sjeff if ((cg->cg_mask & (1 << j)) != 0) { 968165620Sjeff if (tdg->tdg_mask == 0) 969165620Sjeff tdg->tdg_mask = 1 << j; 970165620Sjeff tdq_cpu[j].tdq_transferable = 0; 971165620Sjeff tdq_cpu[j].tdq_group = tdg; 972165620Sjeff LIST_INSERT_HEAD(&tdg->tdg_members, 973165620Sjeff &tdq_cpu[j], tdq_siblings); 974123433Sjeff } 975123433Sjeff } 976165620Sjeff if (tdg->tdg_cpus > 1) 977123487Sjeff balance_groups = 1; 978117237Sjeff } 979165620Sjeff tdg_maxid = smp_topology->ct_count - 1; 980117237Sjeff } 981123487Sjeff /* 982123487Sjeff * Stagger the group and global load balancer so they do not 983123487Sjeff * interfere with each other. 984123487Sjeff */ 985129982Sjeff bal_tick = ticks + hz; 986123487Sjeff if (balance_groups) 987129982Sjeff gbal_tick = ticks + (hz / 2); 988117237Sjeff#else 989164936Sjulian tdq_setup(TDQ_SELF()); 990116069Sjeff#endif 991117237Sjeff mtx_lock_spin(&sched_lock); 992164936Sjulian tdq_load_add(TDQ_SELF(), &td_sched0); 993117237Sjeff mtx_unlock_spin(&sched_lock); 994109864Sjeff} 995109864Sjeff 996153533Sdavidxu/* ARGSUSED */ 997153533Sdavidxustatic void 998153533Sdavidxusched_initticks(void *dummy) 999153533Sdavidxu{ 1000153533Sdavidxu mtx_lock_spin(&sched_lock); 1001153533Sdavidxu realstathz = stathz ? stathz : hz; 1002165762Sjeff sched_slice = (realstathz/7); /* ~140ms */ 1003153533Sdavidxu 1004153533Sdavidxu /* 1005165762Sjeff * tickincr is shifted out by 10 to avoid rounding errors due to 1006165766Sjeff * hz not being evenly divisible by stathz on all platforms. 1007153533Sdavidxu */ 1008165762Sjeff tickincr = (hz << SCHED_TICK_SHIFT) / realstathz; 1009165762Sjeff /* 1010165762Sjeff * This does not work for values of stathz that are more than 1011165762Sjeff * 1 << SCHED_TICK_SHIFT * hz. In practice this does not happen. 1012165762Sjeff */ 1013153533Sdavidxu if (tickincr == 0) 1014153533Sdavidxu tickincr = 1; 1015153533Sdavidxu mtx_unlock_spin(&sched_lock); 1016153533Sdavidxu} 1017153533Sdavidxu 1018153533Sdavidxu 1019109864Sjeff/* 1020109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 1021109864Sjeff * process. 1022109864Sjeff */ 1023113357Sjeffstatic void 1024163709Sjbsched_priority(struct thread *td) 1025109864Sjeff{ 1026165762Sjeff int score; 1027109864Sjeff int pri; 1028109864Sjeff 1029163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1030113357Sjeff return; 1031112966Sjeff /* 1032165762Sjeff * If the score is interactive we place the thread in the realtime 1033165762Sjeff * queue with a priority that is less than kernel and interrupt 1034165762Sjeff * priorities. These threads are not subject to nice restrictions. 1035112966Sjeff * 1036165762Sjeff * Scores greater than this are placed on the normal realtime queue 1037165762Sjeff * where the priority is partially decided by the most recent cpu 1038165762Sjeff * utilization and the rest is decided by nice value. 1039112966Sjeff */ 1040165762Sjeff score = sched_interact_score(td); 1041165762Sjeff if (score < sched_interact) { 1042165762Sjeff pri = PRI_MIN_REALTIME; 1043165762Sjeff pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact) 1044165762Sjeff * score; 1045165762Sjeff KASSERT(pri >= PRI_MIN_REALTIME && pri <= PRI_MAX_REALTIME, 1046165762Sjeff ("sched_priority: invalid interactive priority %d", pri)); 1047165762Sjeff } else { 1048165762Sjeff pri = SCHED_PRI_MIN; 1049165762Sjeff if (td->td_sched->ts_ticks) 1050165762Sjeff pri += SCHED_PRI_TICKS(td->td_sched); 1051165762Sjeff pri += SCHED_PRI_NICE(td->td_proc->p_nice); 1052165796Sjeff if (!(pri >= PRI_MIN_TIMESHARE && pri <= PRI_MAX_TIMESHARE)) { 1053165796Sjeff static int once = 1; 1054165796Sjeff if (once) { 1055165796Sjeff printf("sched_priority: invalid priority %d", 1056165796Sjeff pri); 1057165796Sjeff printf("nice %d, ticks %d ftick %d ltick %d tick pri %d\n", 1058165796Sjeff td->td_proc->p_nice, 1059165796Sjeff td->td_sched->ts_ticks, 1060165796Sjeff td->td_sched->ts_ftick, 1061165796Sjeff td->td_sched->ts_ltick, 1062165796Sjeff SCHED_PRI_TICKS(td->td_sched)); 1063165796Sjeff once = 0; 1064165796Sjeff } 1065165796Sjeff pri = min(max(pri, PRI_MIN_TIMESHARE), 1066165796Sjeff PRI_MAX_TIMESHARE); 1067165796Sjeff } 1068165762Sjeff } 1069165762Sjeff sched_user_prio(td, pri); 1070112966Sjeff 1071112966Sjeff return; 1072109864Sjeff} 1073109864Sjeff 1074121868Sjeff/* 1075121868Sjeff * This routine enforces a maximum limit on the amount of scheduling history 1076121868Sjeff * kept. It is called after either the slptime or runtime is adjusted. 1077121868Sjeff * This routine will not operate correctly when slp or run times have been 1078121868Sjeff * adjusted to more than double their maximum. 1079121868Sjeff */ 1080116463Sjeffstatic void 1081163709Sjbsched_interact_update(struct thread *td) 1082116463Sjeff{ 1083121868Sjeff int sum; 1084121605Sjeff 1085163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1086121868Sjeff if (sum < SCHED_SLP_RUN_MAX) 1087121868Sjeff return; 1088121868Sjeff /* 1089121868Sjeff * If we have exceeded by more than 1/5th then the algorithm below 1090121868Sjeff * will not bring us back into range. Dividing by two here forces 1091133427Sjeff * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 1092121868Sjeff */ 1093127850Sjeff if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) { 1094163709Sjb td->td_sched->skg_runtime /= 2; 1095163709Sjb td->td_sched->skg_slptime /= 2; 1096121868Sjeff return; 1097116463Sjeff } 1098163709Sjb td->td_sched->skg_runtime = (td->td_sched->skg_runtime / 5) * 4; 1099163709Sjb td->td_sched->skg_slptime = (td->td_sched->skg_slptime / 5) * 4; 1100116463Sjeff} 1101116463Sjeff 1102121868Sjeffstatic void 1103163709Sjbsched_interact_fork(struct thread *td) 1104121868Sjeff{ 1105121868Sjeff int ratio; 1106121868Sjeff int sum; 1107121868Sjeff 1108163709Sjb sum = td->td_sched->skg_runtime + td->td_sched->skg_slptime; 1109121868Sjeff if (sum > SCHED_SLP_RUN_FORK) { 1110121868Sjeff ratio = sum / SCHED_SLP_RUN_FORK; 1111163709Sjb td->td_sched->skg_runtime /= ratio; 1112163709Sjb td->td_sched->skg_slptime /= ratio; 1113121868Sjeff } 1114121868Sjeff} 1115121868Sjeff 1116111857Sjeffstatic int 1117163709Sjbsched_interact_score(struct thread *td) 1118111857Sjeff{ 1119116365Sjeff int div; 1120111857Sjeff 1121163709Sjb if (td->td_sched->skg_runtime > td->td_sched->skg_slptime) { 1122163709Sjb div = max(1, td->td_sched->skg_runtime / SCHED_INTERACT_HALF); 1123116365Sjeff return (SCHED_INTERACT_HALF + 1124163709Sjb (SCHED_INTERACT_HALF - (td->td_sched->skg_slptime / div))); 1125163709Sjb } if (td->td_sched->skg_slptime > td->td_sched->skg_runtime) { 1126163709Sjb div = max(1, td->td_sched->skg_slptime / SCHED_INTERACT_HALF); 1127163709Sjb return (td->td_sched->skg_runtime / div); 1128111857Sjeff } 1129111857Sjeff 1130116365Sjeff /* 1131116365Sjeff * This can happen if slptime and runtime are 0. 1132116365Sjeff */ 1133116365Sjeff return (0); 1134111857Sjeff 1135111857Sjeff} 1136111857Sjeff 1137113357Sjeff/* 1138165762Sjeff * Called from proc0_init() to bootstrap the scheduler. 1139134791Sjulian */ 1140134791Sjulianvoid 1141134791Sjulianschedinit(void) 1142134791Sjulian{ 1143165762Sjeff 1144134791Sjulian /* 1145134791Sjulian * Set up the scheduler specific parts of proc0. 1146134791Sjulian */ 1147136167Sjulian proc0.p_sched = NULL; /* XXX */ 1148164936Sjulian thread0.td_sched = &td_sched0; 1149165762Sjeff td_sched0.ts_ltick = ticks; 1150165796Sjeff td_sched0.ts_ftick = ticks; 1151164936Sjulian td_sched0.ts_thread = &thread0; 1152164936Sjulian td_sched0.ts_state = TSS_THREAD; 1153134791Sjulian} 1154134791Sjulian 1155134791Sjulian/* 1156113357Sjeff * This is only somewhat accurate since given many processes of the same 1157113357Sjeff * priority they will switch when their slices run out, which will be 1158165762Sjeff * at most sched_slice stathz ticks. 1159113357Sjeff */ 1160109864Sjeffint 1161109864Sjeffsched_rr_interval(void) 1162109864Sjeff{ 1163165762Sjeff 1164165762Sjeff /* Convert sched_slice to hz */ 1165165762Sjeff return (hz/(realstathz/sched_slice)); 1166109864Sjeff} 1167109864Sjeff 1168121790Sjeffstatic void 1169164936Sjuliansched_pctcpu_update(struct td_sched *ts) 1170109864Sjeff{ 1171165762Sjeff 1172165762Sjeff if (ts->ts_ticks == 0) 1173165762Sjeff return; 1174165796Sjeff if (ticks - (hz / 10) < ts->ts_ltick && 1175165796Sjeff SCHED_TICK_TOTAL(ts) < SCHED_TICK_MAX) 1176165796Sjeff return; 1177109864Sjeff /* 1178109864Sjeff * Adjust counters and watermark for pctcpu calc. 1179116365Sjeff */ 1180165762Sjeff if (ts->ts_ltick > ticks - SCHED_TICK_TARG) 1181164936Sjulian ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) * 1182165762Sjeff SCHED_TICK_TARG; 1183165762Sjeff else 1184164936Sjulian ts->ts_ticks = 0; 1185164936Sjulian ts->ts_ltick = ticks; 1186165762Sjeff ts->ts_ftick = ts->ts_ltick - SCHED_TICK_TARG; 1187109864Sjeff} 1188109864Sjeff 1189165762Sjeffstatic void 1190139453Sjhbsched_thread_priority(struct thread *td, u_char prio) 1191109864Sjeff{ 1192164936Sjulian struct td_sched *ts; 1193109864Sjeff 1194139316Sjeff CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", 1195139316Sjeff td, td->td_proc->p_comm, td->td_priority, prio, curthread, 1196139316Sjeff curthread->td_proc->p_comm); 1197164936Sjulian ts = td->td_sched; 1198109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1199139453Sjhb if (td->td_priority == prio) 1200139453Sjhb return; 1201165762Sjeff 1202165766Sjeff if (TD_ON_RUNQ(td) && prio < td->td_priority) { 1203121605Sjeff /* 1204121605Sjeff * If the priority has been elevated due to priority 1205121605Sjeff * propagation, we may have to move ourselves to a new 1206165762Sjeff * queue. This could be optimized to not re-add in some 1207165762Sjeff * cases. 1208165762Sjeff * 1209164936Sjulian * Hold this td_sched on this cpu so that sched_prio() doesn't 1210133555Sjeff * cause excessive migration. We only want migration to 1211133555Sjeff * happen as the result of a wakeup. 1212133555Sjeff */ 1213164936Sjulian ts->ts_flags |= TSF_HOLD; 1214165762Sjeff sched_rem(td); 1215165762Sjeff td->td_priority = prio; 1216165762Sjeff sched_add(td, SRQ_BORROWING); 1217164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1218121605Sjeff } else 1219119488Sdavidxu td->td_priority = prio; 1220109864Sjeff} 1221109864Sjeff 1222139453Sjhb/* 1223139453Sjhb * Update a thread's priority when it is lent another thread's 1224139453Sjhb * priority. 1225139453Sjhb */ 1226109864Sjeffvoid 1227139453Sjhbsched_lend_prio(struct thread *td, u_char prio) 1228139453Sjhb{ 1229139453Sjhb 1230139453Sjhb td->td_flags |= TDF_BORROWING; 1231139453Sjhb sched_thread_priority(td, prio); 1232139453Sjhb} 1233139453Sjhb 1234139453Sjhb/* 1235139453Sjhb * Restore a thread's priority when priority propagation is 1236139453Sjhb * over. The prio argument is the minimum priority the thread 1237139453Sjhb * needs to have to satisfy other possible priority lending 1238139453Sjhb * requests. If the thread's regular priority is less 1239139453Sjhb * important than prio, the thread will keep a priority boost 1240139453Sjhb * of prio. 1241139453Sjhb */ 1242139453Sjhbvoid 1243139453Sjhbsched_unlend_prio(struct thread *td, u_char prio) 1244139453Sjhb{ 1245139453Sjhb u_char base_pri; 1246139453Sjhb 1247139453Sjhb if (td->td_base_pri >= PRI_MIN_TIMESHARE && 1248139453Sjhb td->td_base_pri <= PRI_MAX_TIMESHARE) 1249163709Sjb base_pri = td->td_user_pri; 1250139453Sjhb else 1251139453Sjhb base_pri = td->td_base_pri; 1252139453Sjhb if (prio >= base_pri) { 1253139455Sjhb td->td_flags &= ~TDF_BORROWING; 1254139453Sjhb sched_thread_priority(td, base_pri); 1255139453Sjhb } else 1256139453Sjhb sched_lend_prio(td, prio); 1257139453Sjhb} 1258139453Sjhb 1259139453Sjhbvoid 1260139453Sjhbsched_prio(struct thread *td, u_char prio) 1261139453Sjhb{ 1262139453Sjhb u_char oldprio; 1263139453Sjhb 1264139453Sjhb /* First, update the base priority. */ 1265139453Sjhb td->td_base_pri = prio; 1266139453Sjhb 1267139453Sjhb /* 1268139455Sjhb * If the thread is borrowing another thread's priority, don't 1269139453Sjhb * ever lower the priority. 1270139453Sjhb */ 1271139453Sjhb if (td->td_flags & TDF_BORROWING && td->td_priority < prio) 1272139453Sjhb return; 1273139453Sjhb 1274139453Sjhb /* Change the real priority. */ 1275139453Sjhb oldprio = td->td_priority; 1276139453Sjhb sched_thread_priority(td, prio); 1277139453Sjhb 1278139453Sjhb /* 1279139453Sjhb * If the thread is on a turnstile, then let the turnstile update 1280139453Sjhb * its state. 1281139453Sjhb */ 1282139453Sjhb if (TD_ON_LOCK(td) && oldprio != prio) 1283139453Sjhb turnstile_adjust(td, oldprio); 1284139453Sjhb} 1285139455Sjhb 1286139453Sjhbvoid 1287163709Sjbsched_user_prio(struct thread *td, u_char prio) 1288161599Sdavidxu{ 1289161599Sdavidxu u_char oldprio; 1290161599Sdavidxu 1291163709Sjb td->td_base_user_pri = prio; 1292164939Sjulian if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) 1293164939Sjulian return; 1294163709Sjb oldprio = td->td_user_pri; 1295163709Sjb td->td_user_pri = prio; 1296163709Sjb 1297161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1298161599Sdavidxu umtx_pi_adjust(td, oldprio); 1299161599Sdavidxu} 1300161599Sdavidxu 1301161599Sdavidxuvoid 1302161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio) 1303161599Sdavidxu{ 1304161599Sdavidxu u_char oldprio; 1305161599Sdavidxu 1306161599Sdavidxu td->td_flags |= TDF_UBORROWING; 1307161599Sdavidxu 1308164091Smaxim oldprio = td->td_user_pri; 1309163709Sjb td->td_user_pri = prio; 1310161599Sdavidxu 1311161599Sdavidxu if (TD_ON_UPILOCK(td) && oldprio != prio) 1312161599Sdavidxu umtx_pi_adjust(td, oldprio); 1313161599Sdavidxu} 1314161599Sdavidxu 1315161599Sdavidxuvoid 1316161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio) 1317161599Sdavidxu{ 1318161599Sdavidxu u_char base_pri; 1319161599Sdavidxu 1320163709Sjb base_pri = td->td_base_user_pri; 1321161599Sdavidxu if (prio >= base_pri) { 1322161599Sdavidxu td->td_flags &= ~TDF_UBORROWING; 1323163709Sjb sched_user_prio(td, base_pri); 1324161599Sdavidxu } else 1325161599Sdavidxu sched_lend_user_prio(td, prio); 1326161599Sdavidxu} 1327161599Sdavidxu 1328161599Sdavidxuvoid 1329135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags) 1330109864Sjeff{ 1331165627Sjeff struct tdq *tdq; 1332164936Sjulian struct td_sched *ts; 1333109864Sjeff 1334109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1335109864Sjeff 1336165762Sjeff tdq = TDQ_SELF(); 1337164936Sjulian ts = td->td_sched; 1338133555Sjeff td->td_lastcpu = td->td_oncpu; 1339113339Sjulian td->td_oncpu = NOCPU; 1340132266Sjhb td->td_flags &= ~TDF_NEEDRESCHED; 1341144777Sups td->td_owepreempt = 0; 1342123434Sjeff /* 1343164936Sjulian * If the thread has been assigned it may be in the process of switching 1344123434Sjeff * to the new cpu. This is the case in sched_bind(). 1345123434Sjeff */ 1346139334Sjeff if (td == PCPU_GET(idlethread)) { 1347139334Sjeff TD_SET_CAN_RUN(td); 1348164936Sjulian } else if ((ts->ts_flags & TSF_ASSIGNED) == 0) { 1349139334Sjeff /* We are ending our run so make our slot available again */ 1350165627Sjeff tdq_load_rem(tdq, ts); 1351139334Sjeff if (TD_IS_RUNNING(td)) { 1352139334Sjeff /* 1353139334Sjeff * Don't allow the thread to migrate 1354139334Sjeff * from a preemption. 1355139334Sjeff */ 1356164936Sjulian ts->ts_flags |= TSF_HOLD; 1357139334Sjeff setrunqueue(td, (flags & SW_PREEMPT) ? 1358139334Sjeff SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : 1359139334Sjeff SRQ_OURSELF|SRQ_YIELDING); 1360164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1361163709Sjb } 1362121146Sjeff } 1363136167Sjulian if (newtd != NULL) { 1364136170Sjulian /* 1365147068Sjeff * If we bring in a thread account for it as if it had been 1366147068Sjeff * added to the run queue and then chosen. 1367136170Sjulian */ 1368164936Sjulian newtd->td_sched->ts_flags |= TSF_DIDRUN; 1369136173Sjulian TD_SET_RUNNING(newtd); 1370164936Sjulian tdq_load_add(TDQ_SELF(), newtd->td_sched); 1371136167Sjulian } else 1372131473Sjhb newtd = choosethread(); 1373145256Sjkoshy if (td != newtd) { 1374145256Sjkoshy#ifdef HWPMC_HOOKS 1375145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1376145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); 1377145256Sjkoshy#endif 1378163709Sjb 1379121128Sjeff cpu_switch(td, newtd); 1380145256Sjkoshy#ifdef HWPMC_HOOKS 1381145256Sjkoshy if (PMC_PROC_IS_USING_PMCS(td->td_proc)) 1382145256Sjkoshy PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); 1383145256Sjkoshy#endif 1384145256Sjkoshy } 1385121128Sjeff sched_lock.mtx_lock = (uintptr_t)td; 1386113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 1387109864Sjeff} 1388109864Sjeff 1389109864Sjeffvoid 1390130551Sjuliansched_nice(struct proc *p, int nice) 1391109864Sjeff{ 1392109864Sjeff struct thread *td; 1393109864Sjeff 1394130551Sjulian PROC_LOCK_ASSERT(p, MA_OWNED); 1395113873Sjhb mtx_assert(&sched_lock, MA_OWNED); 1396165762Sjeff 1397130551Sjulian p->p_nice = nice; 1398163709Sjb FOREACH_THREAD_IN_PROC(p, td) { 1399163709Sjb sched_priority(td); 1400165762Sjeff sched_prio(td, td->td_base_user_pri); 1401130551Sjulian } 1402109864Sjeff} 1403109864Sjeff 1404109864Sjeffvoid 1405126326Sjhbsched_sleep(struct thread *td) 1406109864Sjeff{ 1407165762Sjeff 1408109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1409109864Sjeff 1410164936Sjulian td->td_sched->ts_slptime = ticks; 1411109864Sjeff} 1412109864Sjeff 1413109864Sjeffvoid 1414109864Sjeffsched_wakeup(struct thread *td) 1415109864Sjeff{ 1416165762Sjeff int slptime; 1417165762Sjeff 1418109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1419109864Sjeff 1420109864Sjeff /* 1421165762Sjeff * If we slept for more than a tick update our interactivity and 1422165762Sjeff * priority. 1423109864Sjeff */ 1424165762Sjeff slptime = td->td_sched->ts_slptime; 1425165762Sjeff td->td_sched->ts_slptime = 0; 1426165762Sjeff if (slptime && slptime != ticks) { 1427113357Sjeff int hzticks; 1428109864Sjeff 1429165762Sjeff hzticks = (ticks - slptime) << SCHED_TICK_SHIFT; 1430121868Sjeff if (hzticks >= SCHED_SLP_RUN_MAX) { 1431163709Sjb td->td_sched->skg_slptime = SCHED_SLP_RUN_MAX; 1432163709Sjb td->td_sched->skg_runtime = 1; 1433121868Sjeff } else { 1434163709Sjb td->td_sched->skg_slptime += hzticks; 1435163709Sjb sched_interact_update(td); 1436121868Sjeff } 1437165796Sjeff sched_pctcpu_update(td->td_sched); 1438163709Sjb sched_priority(td); 1439109864Sjeff } 1440134586Sjulian setrunqueue(td, SRQ_BORING); 1441109864Sjeff} 1442109864Sjeff 1443109864Sjeff/* 1444109864Sjeff * Penalize the parent for creating a new child and initialize the child's 1445109864Sjeff * priority. 1446109864Sjeff */ 1447109864Sjeffvoid 1448163709Sjbsched_fork(struct thread *td, struct thread *child) 1449109864Sjeff{ 1450109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1451164936Sjulian sched_fork_thread(td, child); 1452165762Sjeff /* 1453165762Sjeff * Penalize the parent and child for forking. 1454165762Sjeff */ 1455165762Sjeff sched_interact_fork(child); 1456165762Sjeff sched_priority(child); 1457165762Sjeff td->td_sched->skg_runtime += tickincr; 1458165762Sjeff sched_interact_update(td); 1459165762Sjeff sched_priority(td); 1460164936Sjulian} 1461109864Sjeff 1462164936Sjulianvoid 1463164936Sjuliansched_fork_thread(struct thread *td, struct thread *child) 1464164936Sjulian{ 1465164936Sjulian struct td_sched *ts; 1466164936Sjulian struct td_sched *ts2; 1467164936Sjulian 1468165762Sjeff /* 1469165762Sjeff * Initialize child. 1470165762Sjeff */ 1471163709Sjb sched_newthread(child); 1472164936Sjulian ts = td->td_sched; 1473164936Sjulian ts2 = child->td_sched; 1474164936Sjulian ts2->ts_cpu = ts->ts_cpu; 1475164936Sjulian ts2->ts_runq = NULL; 1476165762Sjeff /* 1477165762Sjeff * Grab our parents cpu estimation information and priority. 1478165762Sjeff */ 1479164936Sjulian ts2->ts_ticks = ts->ts_ticks; 1480164936Sjulian ts2->ts_ltick = ts->ts_ltick; 1481164936Sjulian ts2->ts_ftick = ts->ts_ftick; 1482165762Sjeff child->td_user_pri = td->td_user_pri; 1483165762Sjeff child->td_base_user_pri = td->td_base_user_pri; 1484165762Sjeff /* 1485165762Sjeff * And update interactivity score. 1486165762Sjeff */ 1487165762Sjeff ts2->skg_slptime = ts->skg_slptime; 1488165762Sjeff ts2->skg_runtime = ts->skg_runtime; 1489165762Sjeff ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ 1490113357Sjeff} 1491113357Sjeff 1492113357Sjeffvoid 1493163709Sjbsched_class(struct thread *td, int class) 1494113357Sjeff{ 1495164936Sjulian struct tdq *tdq; 1496164936Sjulian struct td_sched *ts; 1497121896Sjeff int nclass; 1498121896Sjeff int oclass; 1499113357Sjeff 1500113923Sjhb mtx_assert(&sched_lock, MA_OWNED); 1501163709Sjb if (td->td_pri_class == class) 1502113357Sjeff return; 1503113357Sjeff 1504121896Sjeff nclass = PRI_BASE(class); 1505163709Sjb oclass = PRI_BASE(td->td_pri_class); 1506164936Sjulian ts = td->td_sched; 1507165762Sjeff if (ts->ts_state == TSS_ONRUNQ || td->td_state == TDS_RUNNING) { 1508164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1509121896Sjeff#ifdef SMP 1510164936Sjulian /* 1511164936Sjulian * On SMP if we're on the RUNQ we must adjust the transferable 1512164936Sjulian * count because could be changing to or from an interrupt 1513164936Sjulian * class. 1514164936Sjulian */ 1515164936Sjulian if (ts->ts_state == TSS_ONRUNQ) { 1516165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 1517165620Sjeff tdq->tdq_transferable--; 1518165620Sjeff tdq->tdq_group->tdg_transferable--; 1519164936Sjulian } 1520165762Sjeff if (THREAD_CAN_MIGRATE(ts->ts_thread)) { 1521165620Sjeff tdq->tdq_transferable++; 1522165620Sjeff tdq->tdq_group->tdg_transferable++; 1523164936Sjulian } 1524122744Sjeff } 1525164936Sjulian#endif 1526165762Sjeff if (oclass == PRI_TIMESHARE) 1527165620Sjeff tdq->tdq_load_timeshare--; 1528165762Sjeff if (nclass == PRI_TIMESHARE) 1529165620Sjeff tdq->tdq_load_timeshare++; 1530109970Sjeff } 1531109970Sjeff 1532163709Sjb td->td_pri_class = class; 1533109864Sjeff} 1534109864Sjeff 1535109864Sjeff/* 1536109864Sjeff * Return some of the child's priority and interactivity to the parent. 1537109864Sjeff */ 1538109864Sjeffvoid 1539164939Sjuliansched_exit(struct proc *p, struct thread *child) 1540109864Sjeff{ 1541165762Sjeff struct thread *td; 1542164939Sjulian 1543163709Sjb CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", 1544164939Sjulian child, child->td_proc->p_comm, child->td_priority); 1545113372Sjeff 1546165762Sjeff td = FIRST_THREAD_IN_PROC(p); 1547165762Sjeff sched_exit_thread(td, child); 1548113372Sjeff} 1549113372Sjeff 1550113372Sjeffvoid 1551164939Sjuliansched_exit_thread(struct thread *td, struct thread *child) 1552164936Sjulian{ 1553165762Sjeff 1554164939Sjulian CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", 1555165762Sjeff child, child->td_proc->p_comm, child->td_priority); 1556164939Sjulian 1557165762Sjeff tdq_load_rem(TDQ_CPU(child->td_sched->ts_cpu), child->td_sched); 1558165762Sjeff#ifdef KSE 1559165762Sjeff /* 1560165762Sjeff * KSE forks and exits so often that this penalty causes short-lived 1561165762Sjeff * threads to always be non-interactive. This causes mozilla to 1562165762Sjeff * crawl under load. 1563165762Sjeff */ 1564165762Sjeff if ((td->td_pflags & TDP_SA) && td->td_proc == child->td_proc) 1565165762Sjeff return; 1566165762Sjeff#endif 1567165762Sjeff /* 1568165762Sjeff * Give the child's runtime to the parent without returning the 1569165762Sjeff * sleep time as a penalty to the parent. This causes shells that 1570165762Sjeff * launch expensive things to mark their children as expensive. 1571165762Sjeff */ 1572164939Sjulian td->td_sched->skg_runtime += child->td_sched->skg_runtime; 1573164939Sjulian sched_interact_update(td); 1574165762Sjeff sched_priority(td); 1575164936Sjulian} 1576164936Sjulian 1577164936Sjulianvoid 1578164936Sjuliansched_userret(struct thread *td) 1579164936Sjulian{ 1580164936Sjulian /* 1581164936Sjulian * XXX we cheat slightly on the locking here to avoid locking in 1582164936Sjulian * the usual case. Setting td_priority here is essentially an 1583164936Sjulian * incomplete workaround for not setting it properly elsewhere. 1584164936Sjulian * Now that some interrupt handlers are threads, not setting it 1585164936Sjulian * properly elsewhere can clobber it in the window between setting 1586164936Sjulian * it here and returning to user mode, so don't waste time setting 1587164936Sjulian * it perfectly here. 1588164936Sjulian */ 1589164936Sjulian KASSERT((td->td_flags & TDF_BORROWING) == 0, 1590164936Sjulian ("thread with borrowed priority returning to userland")); 1591164936Sjulian if (td->td_priority != td->td_user_pri) { 1592164936Sjulian mtx_lock_spin(&sched_lock); 1593164936Sjulian td->td_priority = td->td_user_pri; 1594164936Sjulian td->td_base_pri = td->td_user_pri; 1595164936Sjulian mtx_unlock_spin(&sched_lock); 1596164936Sjulian } 1597164936Sjulian} 1598164936Sjulian 1599164936Sjulianvoid 1600121127Sjeffsched_clock(struct thread *td) 1601109864Sjeff{ 1602164936Sjulian struct tdq *tdq; 1603164936Sjulian struct td_sched *ts; 1604109864Sjeff 1605129982Sjeff mtx_assert(&sched_lock, MA_OWNED); 1606165766Sjeff#ifdef SMP 1607165766Sjeff sched_smp_tick(); 1608165766Sjeff#endif 1609164936Sjulian tdq = TDQ_SELF(); 1610133427Sjeff /* 1611165766Sjeff * Advance the insert index once for each tick to ensure that all 1612165766Sjeff * threads get a chance to run. 1613133427Sjeff */ 1614165766Sjeff if (tdq->tdq_idx == tdq->tdq_ridx) { 1615165766Sjeff tdq->tdq_idx = (tdq->tdq_idx + 1) % RQ_NQS; 1616165766Sjeff if (TAILQ_EMPTY(&tdq->tdq_timeshare.rq_queues[tdq->tdq_ridx])) 1617165766Sjeff tdq->tdq_ridx = tdq->tdq_idx; 1618165766Sjeff } 1619110028Sjeff /* Adjust ticks for pctcpu */ 1620165766Sjeff ts = td->td_sched; 1621165762Sjeff ts->ts_ticks += tickincr; 1622164936Sjulian ts->ts_ltick = ticks; 1623165762Sjeff /* 1624165762Sjeff * Update if we've exceeded our desired tick threshhold by over one 1625165762Sjeff * second. 1626165762Sjeff */ 1627165796Sjeff if (ts->ts_ftick + SCHED_TICK_MAX < ts->ts_ltick) 1628164936Sjulian sched_pctcpu_update(ts); 1629110028Sjeff /* 1630163709Sjb * We only do slicing code for TIMESHARE threads. 1631113357Sjeff */ 1632163709Sjb if (td->td_pri_class != PRI_TIMESHARE) 1633113357Sjeff return; 1634113357Sjeff /* 1635165766Sjeff * We used a tick; charge it to the thread so that we can compute our 1636113357Sjeff * interactivity. 1637109864Sjeff */ 1638163709Sjb td->td_sched->skg_runtime += tickincr; 1639163709Sjb sched_interact_update(td); 1640109864Sjeff /* 1641109864Sjeff * We used up one time slice. 1642109864Sjeff */ 1643164936Sjulian if (--ts->ts_slice > 0) 1644113357Sjeff return; 1645109864Sjeff /* 1646113357Sjeff * We're out of time, recompute priorities and requeue. 1647109864Sjeff */ 1648165796Sjeff sched_priority(td); 1649164936Sjulian tdq_load_rem(tdq, ts); 1650165762Sjeff ts->ts_slice = sched_slice; 1651164936Sjulian tdq_load_add(tdq, ts); 1652113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 1653109864Sjeff} 1654109864Sjeff 1655109864Sjeffint 1656109864Sjeffsched_runnable(void) 1657109864Sjeff{ 1658164936Sjulian struct tdq *tdq; 1659115998Sjeff int load; 1660109864Sjeff 1661115998Sjeff load = 1; 1662115998Sjeff 1663164936Sjulian tdq = TDQ_SELF(); 1664121790Sjeff#ifdef SMP 1665165620Sjeff if (tdq->tdq_assigned) { 1666122094Sjeff mtx_lock_spin(&sched_lock); 1667164936Sjulian tdq_assign(tdq); 1668122094Sjeff mtx_unlock_spin(&sched_lock); 1669122094Sjeff } 1670121790Sjeff#endif 1671121605Sjeff if ((curthread->td_flags & TDF_IDLETD) != 0) { 1672165620Sjeff if (tdq->tdq_load > 0) 1673121605Sjeff goto out; 1674121605Sjeff } else 1675165620Sjeff if (tdq->tdq_load - 1 > 0) 1676121605Sjeff goto out; 1677115998Sjeff load = 0; 1678115998Sjeffout: 1679115998Sjeff return (load); 1680109864Sjeff} 1681109864Sjeff 1682164936Sjulianstruct td_sched * 1683109970Sjeffsched_choose(void) 1684109970Sjeff{ 1685164936Sjulian struct tdq *tdq; 1686164936Sjulian struct td_sched *ts; 1687109970Sjeff 1688115998Sjeff mtx_assert(&sched_lock, MA_OWNED); 1689164936Sjulian tdq = TDQ_SELF(); 1690113357Sjeff#ifdef SMP 1691123433Sjeffrestart: 1692165620Sjeff if (tdq->tdq_assigned) 1693164936Sjulian tdq_assign(tdq); 1694113357Sjeff#endif 1695164936Sjulian ts = tdq_choose(tdq); 1696164936Sjulian if (ts) { 1697121790Sjeff#ifdef SMP 1698165762Sjeff if (ts->ts_thread->td_priority <= PRI_MIN_IDLE) 1699164936Sjulian if (tdq_idled(tdq) == 0) 1700123433Sjeff goto restart; 1701121790Sjeff#endif 1702164936Sjulian tdq_runq_rem(tdq, ts); 1703164936Sjulian ts->ts_state = TSS_THREAD; 1704164936Sjulian return (ts); 1705109864Sjeff } 1706109970Sjeff#ifdef SMP 1707164936Sjulian if (tdq_idled(tdq) == 0) 1708123433Sjeff goto restart; 1709109970Sjeff#endif 1710113357Sjeff return (NULL); 1711109864Sjeff} 1712109864Sjeff 1713109864Sjeffvoid 1714134586Sjuliansched_add(struct thread *td, int flags) 1715109864Sjeff{ 1716164936Sjulian struct tdq *tdq; 1717164936Sjulian struct td_sched *ts; 1718139334Sjeff int preemptive; 1719133427Sjeff int canmigrate; 1720121790Sjeff int class; 1721109864Sjeff 1722139316Sjeff CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", 1723139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1724139316Sjeff curthread->td_proc->p_comm); 1725121790Sjeff mtx_assert(&sched_lock, MA_OWNED); 1726165762Sjeff tdq = TDQ_SELF(); 1727164936Sjulian ts = td->td_sched; 1728165762Sjeff ts->ts_flags &= ~TSF_INTERNAL; 1729165762Sjeff class = PRI_BASE(td->td_pri_class); 1730165762Sjeff preemptive = !(flags & SRQ_YIELDING); 1731139334Sjeff canmigrate = 1; 1732139334Sjeff#ifdef SMP 1733164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1734164936Sjulian if (ts->ts_flags & TSF_REMOVED) 1735164936Sjulian ts->ts_flags &= ~TSF_REMOVED; 1736121790Sjeff return; 1737138802Sjeff } 1738165762Sjeff canmigrate = THREAD_CAN_MIGRATE(td); 1739149278Sdavidxu /* 1740149278Sdavidxu * Don't migrate running threads here. Force the long term balancer 1741149278Sdavidxu * to do it. 1742149278Sdavidxu */ 1743164936Sjulian if (ts->ts_flags & TSF_HOLD) { 1744164936Sjulian ts->ts_flags &= ~TSF_HOLD; 1745149278Sdavidxu canmigrate = 0; 1746149278Sdavidxu } 1747139334Sjeff#endif 1748164936Sjulian KASSERT(ts->ts_state != TSS_ONRUNQ, 1749164936Sjulian ("sched_add: thread %p (%s) already in run queue", td, 1750163709Sjb td->td_proc->p_comm)); 1751163709Sjb KASSERT(td->td_proc->p_sflag & PS_INMEM, 1752110267Sjeff ("sched_add: process swapped out")); 1753164936Sjulian KASSERT(ts->ts_runq == NULL, 1754164936Sjulian ("sched_add: thread %p is still assigned to a run queue", td)); 1755165762Sjeff /* 1756165762Sjeff * Set the slice and pick the run queue. 1757165762Sjeff */ 1758165762Sjeff if (ts->ts_slice == 0) 1759165762Sjeff ts->ts_slice = sched_slice; 1760165796Sjeff if (class == PRI_TIMESHARE) 1761165796Sjeff sched_priority(td); 1762165762Sjeff if (td->td_priority <= PRI_MAX_REALTIME) { 1763165762Sjeff ts->ts_runq = &tdq->tdq_realtime; 1764113357Sjeff /* 1765165762Sjeff * If the thread is not artificially pinned and it's in 1766165762Sjeff * the realtime queue we directly dispatch it on this cpu 1767165762Sjeff * for minimum latency. Interrupt handlers may also have 1768165762Sjeff * to complete on the cpu that dispatched them. 1769113357Sjeff */ 1770165762Sjeff if (td->td_pinned == 0) 1771165762Sjeff ts->ts_cpu = PCPU_GET(cpuid); 1772165762Sjeff } else if (td->td_priority <= PRI_MAX_TIMESHARE) 1773165762Sjeff ts->ts_runq = &tdq->tdq_timeshare; 1774165762Sjeff else 1775165762Sjeff ts->ts_runq = &tdq->tdq_idle; 1776165762Sjeff 1777121790Sjeff#ifdef SMP 1778133427Sjeff /* 1779133427Sjeff * If this thread is pinned or bound, notify the target cpu. 1780133427Sjeff */ 1781164936Sjulian if (!canmigrate && ts->ts_cpu != PCPU_GET(cpuid) ) { 1782164936Sjulian ts->ts_runq = NULL; 1783164936Sjulian tdq_notify(ts, ts->ts_cpu); 1784123433Sjeff return; 1785123433Sjeff } 1786121790Sjeff /* 1787123685Sjeff * If we had been idle, clear our bit in the group and potentially 1788123685Sjeff * the global bitmap. If not, see if we should transfer this thread. 1789121790Sjeff */ 1790165762Sjeff if ((class != PRI_IDLE && class != PRI_ITHD) && 1791165620Sjeff (tdq->tdq_group->tdg_idlemask & PCPU_GET(cpumask)) != 0) { 1792121790Sjeff /* 1793123433Sjeff * Check to see if our group is unidling, and if so, remove it 1794123433Sjeff * from the global idle mask. 1795121790Sjeff */ 1796165620Sjeff if (tdq->tdq_group->tdg_idlemask == 1797165620Sjeff tdq->tdq_group->tdg_cpumask) 1798165620Sjeff atomic_clear_int(&tdq_idle, tdq->tdq_group->tdg_mask); 1799123433Sjeff /* 1800123433Sjeff * Now remove ourselves from the group specific idle mask. 1801123433Sjeff */ 1802165620Sjeff tdq->tdq_group->tdg_idlemask &= ~PCPU_GET(cpumask); 1803165762Sjeff } else if (canmigrate && tdq->tdq_load > 1) 1804164936Sjulian if (tdq_transfer(tdq, ts, class)) 1805123685Sjeff return; 1806164936Sjulian ts->ts_cpu = PCPU_GET(cpuid); 1807121790Sjeff#endif 1808165762Sjeff if (td->td_priority < curthread->td_priority) 1809133555Sjeff curthread->td_flags |= TDF_NEEDRESCHED; 1810131839Sjhb if (preemptive && maybe_preempt(td)) 1811131481Sjhb return; 1812164936Sjulian ts->ts_state = TSS_ONRUNQ; 1813109864Sjeff 1814164936Sjulian tdq_runq_add(tdq, ts, flags); 1815164936Sjulian tdq_load_add(tdq, ts); 1816109864Sjeff} 1817109864Sjeff 1818109864Sjeffvoid 1819121127Sjeffsched_rem(struct thread *td) 1820109864Sjeff{ 1821164936Sjulian struct tdq *tdq; 1822164936Sjulian struct td_sched *ts; 1823113357Sjeff 1824139316Sjeff CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", 1825139316Sjeff td, td->td_proc->p_comm, td->td_priority, curthread, 1826139316Sjeff curthread->td_proc->p_comm); 1827139334Sjeff mtx_assert(&sched_lock, MA_OWNED); 1828164936Sjulian ts = td->td_sched; 1829164936Sjulian if (ts->ts_flags & TSF_ASSIGNED) { 1830164936Sjulian ts->ts_flags |= TSF_REMOVED; 1831121790Sjeff return; 1832138802Sjeff } 1833164936Sjulian KASSERT((ts->ts_state == TSS_ONRUNQ), 1834164936Sjulian ("sched_rem: thread not on run queue")); 1835109864Sjeff 1836164936Sjulian ts->ts_state = TSS_THREAD; 1837164936Sjulian tdq = TDQ_CPU(ts->ts_cpu); 1838164936Sjulian tdq_runq_rem(tdq, ts); 1839164936Sjulian tdq_load_rem(tdq, ts); 1840109864Sjeff} 1841109864Sjeff 1842109864Sjefffixpt_t 1843121127Sjeffsched_pctcpu(struct thread *td) 1844109864Sjeff{ 1845109864Sjeff fixpt_t pctcpu; 1846164936Sjulian struct td_sched *ts; 1847109864Sjeff 1848109864Sjeff pctcpu = 0; 1849164936Sjulian ts = td->td_sched; 1850164936Sjulian if (ts == NULL) 1851121290Sjeff return (0); 1852109864Sjeff 1853115998Sjeff mtx_lock_spin(&sched_lock); 1854164936Sjulian if (ts->ts_ticks) { 1855109864Sjeff int rtick; 1856109864Sjeff 1857165796Sjeff sched_pctcpu_update(ts); 1858109864Sjeff /* How many rtick per second ? */ 1859165762Sjeff rtick = min(SCHED_TICK_HZ(ts) / SCHED_TICK_SECS, hz); 1860165762Sjeff pctcpu = (FSCALE * ((FSCALE * rtick)/hz)) >> FSHIFT; 1861109864Sjeff } 1862164936Sjulian td->td_proc->p_swtime = ts->ts_ltick - ts->ts_ftick; 1863113865Sjhb mtx_unlock_spin(&sched_lock); 1864109864Sjeff 1865109864Sjeff return (pctcpu); 1866109864Sjeff} 1867109864Sjeff 1868122038Sjeffvoid 1869122038Sjeffsched_bind(struct thread *td, int cpu) 1870122038Sjeff{ 1871164936Sjulian struct td_sched *ts; 1872122038Sjeff 1873122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1874164936Sjulian ts = td->td_sched; 1875165762Sjeff KASSERT((ts->ts_flags & TSF_BOUND) == 0, 1876165762Sjeff ("sched_bind: thread %p already bound.", td)); 1877164936Sjulian ts->ts_flags |= TSF_BOUND; 1878123433Sjeff#ifdef SMP 1879123433Sjeff if (PCPU_GET(cpuid) == cpu) 1880122038Sjeff return; 1881122038Sjeff /* sched_rem without the runq_remove */ 1882164936Sjulian ts->ts_state = TSS_THREAD; 1883164936Sjulian tdq_load_rem(TDQ_CPU(ts->ts_cpu), ts); 1884164936Sjulian tdq_notify(ts, cpu); 1885122038Sjeff /* When we return from mi_switch we'll be on the correct cpu. */ 1886131527Sphk mi_switch(SW_VOL, NULL); 1887165762Sjeff sched_pin(); 1888122038Sjeff#endif 1889122038Sjeff} 1890122038Sjeff 1891122038Sjeffvoid 1892122038Sjeffsched_unbind(struct thread *td) 1893122038Sjeff{ 1894165762Sjeff struct td_sched *ts; 1895165762Sjeff 1896122038Sjeff mtx_assert(&sched_lock, MA_OWNED); 1897165762Sjeff ts = td->td_sched; 1898165762Sjeff KASSERT(ts->ts_flags & TSF_BOUND, 1899165762Sjeff ("sched_unbind: thread %p not bound.", td)); 1900165762Sjeff mtx_assert(&sched_lock, MA_OWNED); 1901165762Sjeff ts->ts_flags &= ~TSF_BOUND; 1902165762Sjeff#ifdef SMP 1903165762Sjeff sched_unpin(); 1904165762Sjeff#endif 1905122038Sjeff} 1906122038Sjeff 1907109864Sjeffint 1908145256Sjkoshysched_is_bound(struct thread *td) 1909145256Sjkoshy{ 1910145256Sjkoshy mtx_assert(&sched_lock, MA_OWNED); 1911164936Sjulian return (td->td_sched->ts_flags & TSF_BOUND); 1912145256Sjkoshy} 1913145256Sjkoshy 1914159630Sdavidxuvoid 1915159630Sdavidxusched_relinquish(struct thread *td) 1916159630Sdavidxu{ 1917159630Sdavidxu mtx_lock_spin(&sched_lock); 1918163709Sjb if (td->td_pri_class == PRI_TIMESHARE) 1919159630Sdavidxu sched_prio(td, PRI_MAX_TIMESHARE); 1920159630Sdavidxu mi_switch(SW_VOL, NULL); 1921159630Sdavidxu mtx_unlock_spin(&sched_lock); 1922159630Sdavidxu} 1923159630Sdavidxu 1924145256Sjkoshyint 1925125289Sjeffsched_load(void) 1926125289Sjeff{ 1927125289Sjeff#ifdef SMP 1928125289Sjeff int total; 1929125289Sjeff int i; 1930125289Sjeff 1931125289Sjeff total = 0; 1932165620Sjeff for (i = 0; i <= tdg_maxid; i++) 1933165620Sjeff total += TDQ_GROUP(i)->tdg_load; 1934125289Sjeff return (total); 1935125289Sjeff#else 1936165620Sjeff return (TDQ_SELF()->tdq_sysload); 1937125289Sjeff#endif 1938125289Sjeff} 1939125289Sjeff 1940125289Sjeffint 1941109864Sjeffsched_sizeof_proc(void) 1942109864Sjeff{ 1943109864Sjeff return (sizeof(struct proc)); 1944109864Sjeff} 1945109864Sjeff 1946109864Sjeffint 1947109864Sjeffsched_sizeof_thread(void) 1948109864Sjeff{ 1949109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1950109864Sjeff} 1951159570Sdavidxu 1952159570Sdavidxuvoid 1953159570Sdavidxusched_tick(void) 1954159570Sdavidxu{ 1955159570Sdavidxu} 1956165762Sjeff 1957165762Sjeffstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler"); 1958165762SjeffSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0, 1959165762Sjeff "Scheduler name"); 1960165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, ""); 1961165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0, ""); 1962165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, tickincr, CTLFLAG_RD, &tickincr, 0, ""); 1963165762SjeffSYSCTL_INT(_kern_sched, OID_AUTO, realstathz, CTLFLAG_RD, &realstathz, 0, ""); 1964165762Sjeff 1965165762Sjeff/* ps compat */ 1966165762Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 1967165762SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 1968165762Sjeff 1969165762Sjeff 1970134791Sjulian#define KERN_SWITCH_INCLUDE 1 1971134791Sjulian#include "kern/kern_switch.c" 1972