sched_ule.c revision 112966
1109864Sjeff/*- 2109864Sjeff * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff * 26109864Sjeff * $FreeBSD: head/sys/kern/sched_ule.c 112966 2003-04-02 06:46:43Z jeff $ 27109864Sjeff */ 28109864Sjeff 29109864Sjeff#include <sys/param.h> 30109864Sjeff#include <sys/systm.h> 31109864Sjeff#include <sys/kernel.h> 32109864Sjeff#include <sys/ktr.h> 33109864Sjeff#include <sys/lock.h> 34109864Sjeff#include <sys/mutex.h> 35109864Sjeff#include <sys/proc.h> 36112966Sjeff#include <sys/resource.h> 37109864Sjeff#include <sys/sched.h> 38109864Sjeff#include <sys/smp.h> 39109864Sjeff#include <sys/sx.h> 40109864Sjeff#include <sys/sysctl.h> 41109864Sjeff#include <sys/sysproto.h> 42109864Sjeff#include <sys/vmmeter.h> 43109864Sjeff#ifdef DDB 44109864Sjeff#include <ddb/ddb.h> 45109864Sjeff#endif 46109864Sjeff#ifdef KTRACE 47109864Sjeff#include <sys/uio.h> 48109864Sjeff#include <sys/ktrace.h> 49109864Sjeff#endif 50109864Sjeff 51109864Sjeff#include <machine/cpu.h> 52109864Sjeff 53109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 54109864Sjeff/* XXX This is bogus compatability crap for ps */ 55109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 56109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 57109864Sjeff 58109864Sjeffstatic void sched_setup(void *dummy); 59109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 60109864Sjeff 61111857Sjeffint realstathz; 62111857Sjeff 63110646Sjeff#define SCHED_STRICT_RESCHED 1 64110646Sjeff 65109864Sjeff/* 66109864Sjeff * These datastructures are allocated within their parent datastructure but 67109864Sjeff * are scheduler specific. 68109864Sjeff */ 69109864Sjeff 70109864Sjeffstruct ke_sched { 71109864Sjeff int ske_slice; 72109864Sjeff struct runq *ske_runq; 73109864Sjeff /* The following variables are only used for pctcpu calculation */ 74109864Sjeff int ske_ltick; /* Last tick that we were running on */ 75109864Sjeff int ske_ftick; /* First tick that we were running on */ 76109864Sjeff int ske_ticks; /* Tick count */ 77110260Sjeff u_char ske_cpu; 78109864Sjeff}; 79109864Sjeff#define ke_slice ke_sched->ske_slice 80109864Sjeff#define ke_runq ke_sched->ske_runq 81109864Sjeff#define ke_ltick ke_sched->ske_ltick 82109864Sjeff#define ke_ftick ke_sched->ske_ftick 83109864Sjeff#define ke_ticks ke_sched->ske_ticks 84110260Sjeff#define ke_cpu ke_sched->ske_cpu 85109864Sjeff 86109864Sjeffstruct kg_sched { 87110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 88110645Sjeff int skg_runtime; /* Number of ticks we were running */ 89109864Sjeff}; 90109864Sjeff#define kg_slptime kg_sched->skg_slptime 91110645Sjeff#define kg_runtime kg_sched->skg_runtime 92109864Sjeff 93109864Sjeffstruct td_sched { 94109864Sjeff int std_slptime; 95110267Sjeff int std_schedflag; 96109864Sjeff}; 97109864Sjeff#define td_slptime td_sched->std_slptime 98110267Sjeff#define td_schedflag td_sched->std_schedflag 99109864Sjeff 100110267Sjeff#define TD_SCHED_BLOAD 0x0001 /* 101110267Sjeff * thread was counted as being in short 102110267Sjeff * term sleep. 103110267Sjeff */ 104110267Sjeffstruct td_sched td_sched; 105109864Sjeffstruct ke_sched ke_sched; 106109864Sjeffstruct kg_sched kg_sched; 107109864Sjeff 108109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 109109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 110109864Sjeffstruct p_sched *proc0_sched = NULL; 111109864Sjeffstruct td_sched *thread0_sched = &td_sched; 112109864Sjeff 113109864Sjeff/* 114109864Sjeff * This priority range has 20 priorities on either end that are reachable 115109864Sjeff * only through nice values. 116111857Sjeff * 117111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 118111857Sjeff * PRI_NRESV: Reserved priorities for nice. 119111857Sjeff * PRI_BASE: The start of the dynamic range. 120111857Sjeff * DYN_RANGE: Number of priorities that are available int the dynamic 121111857Sjeff * priority range. 122111857Sjeff * DYN_HALF: Half of DYN_RANGE for convenience elsewhere. 123111857Sjeff * PRI_DYN: The dynamic priority which is derived from the number of ticks 124111857Sjeff * running vs the total number of ticks. 125109864Sjeff */ 126111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 127112966Sjeff#define SCHED_PRI_NRESV PRIO_TOTAL 128112966Sjeff#define SCHED_PRI_NHALF (PRIO_TOTAL >> 2) 129111857Sjeff#define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 130111857Sjeff#define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 131111857Sjeff#define SCHED_DYN_HALF (SCHED_DYN_RANGE / 2) 132111857Sjeff#define SCHED_PRI_DYN(run, total) (((run) * SCHED_DYN_RANGE) / (total)) 133109864Sjeff 134111857Sjeff 135109864Sjeff/* 136111857Sjeff * These determine the interactivity of a process. 137109864Sjeff * 138110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 139110645Sjeff * before throttling back. 140111857Sjeff * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 141111857Sjeff * INTERACT_RANGE: Range of interactivity values. Smaller is better. 142111857Sjeff * INTERACT_HALF: Convenience define, half of the interactivity range. 143111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 144109864Sjeff */ 145111857Sjeff#define SCHED_SLP_RUN_MAX ((hz * 30) << 10) 146110645Sjeff#define SCHED_SLP_RUN_THROTTLE (10) 147111857Sjeff#define SCHED_INTERACT_RANGE (100) 148111857Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 149111857Sjeff#define SCHED_INTERACT_THRESH (10) 150111857Sjeff 151109864Sjeff/* 152109864Sjeff * These parameters and macros determine the size of the time slice that is 153109864Sjeff * granted to each thread. 154109864Sjeff * 155109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 156109864Sjeff * SLICE_MAX: Maximum time slice granted. 157109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 158112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 159112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 160109864Sjeff */ 161111857Sjeff#define SCHED_SLICE_MIN (hz / 100) 162111857Sjeff#define SCHED_SLICE_MAX (hz / 10) 163111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165112966Sjeff#define SCHED_SLICE_NICE(nice) \ 166112966Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NHALF)) 167109864Sjeff 168109864Sjeff/* 169109864Sjeff * This macro determines whether or not the kse belongs on the current or 170109864Sjeff * next run queue. 171110645Sjeff * 172110645Sjeff * XXX nice value should effect how interactive a kg is. 173109864Sjeff */ 174111857Sjeff#define SCHED_CURR(kg) (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 175109864Sjeff 176109864Sjeff/* 177109864Sjeff * Cpu percentage computation macros and defines. 178109864Sjeff * 179109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 180109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 181109864Sjeff */ 182109864Sjeff 183109864Sjeff#define SCHED_CPU_TIME 60 184109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 185109864Sjeff 186109864Sjeff/* 187109864Sjeff * kseq - pair of runqs per processor 188109864Sjeff */ 189109864Sjeff 190109864Sjeffstruct kseq { 191109864Sjeff struct runq ksq_runqs[2]; 192109864Sjeff struct runq *ksq_curr; 193109864Sjeff struct runq *ksq_next; 194109864Sjeff int ksq_load; /* Total runnable */ 195110267Sjeff#ifdef SMP 196110267Sjeff unsigned int ksq_rslices; /* Slices on run queue */ 197110267Sjeff unsigned int ksq_bload; /* Threads waiting on IO */ 198110267Sjeff#endif 199109864Sjeff}; 200109864Sjeff 201109864Sjeff/* 202109864Sjeff * One kse queue per processor. 203109864Sjeff */ 204110028Sjeff#ifdef SMP 205109864Sjeffstruct kseq kseq_cpu[MAXCPU]; 206110028Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 207110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 208110028Sjeff#else 209110028Sjeffstruct kseq kseq_cpu; 210110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 211110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 212110028Sjeff#endif 213109864Sjeff 214112966Sjeffstatic void sched_slice(struct kse *ke); 215109864Sjeffstatic int sched_priority(struct ksegrp *kg); 216111857Sjeffstatic int sched_interact_score(struct ksegrp *kg); 217109864Sjeffvoid sched_pctcpu_update(struct kse *ke); 218109864Sjeffint sched_pickcpu(void); 219109864Sjeff 220110267Sjeff/* Operations on per processor queues */ 221110028Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 222112966Sjeffstatic int kseq_nice_min(struct kseq *kseq); 223110028Sjeffstatic void kseq_setup(struct kseq *kseq); 224110267Sjeffstatic __inline void kseq_add(struct kseq *kseq, struct kse *ke); 225110267Sjeffstatic __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 226110267Sjeff#ifdef SMP 227110267Sjeffstatic __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 228110267Sjeffstatic __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 229110267Sjeffstruct kseq * kseq_load_highest(void); 230110267Sjeff#endif 231110028Sjeff 232110267Sjeffstatic __inline void 233110267Sjeffkseq_add(struct kseq *kseq, struct kse *ke) 234110267Sjeff{ 235110267Sjeff runq_add(ke->ke_runq, ke); 236110267Sjeff kseq->ksq_load++; 237110267Sjeff#ifdef SMP 238110267Sjeff kseq->ksq_rslices += ke->ke_slice; 239110267Sjeff#endif 240110267Sjeff} 241110267Sjeffstatic __inline void 242110267Sjeffkseq_rem(struct kseq *kseq, struct kse *ke) 243110267Sjeff{ 244110267Sjeff kseq->ksq_load--; 245110267Sjeff runq_remove(ke->ke_runq, ke); 246110267Sjeff#ifdef SMP 247110267Sjeff kseq->ksq_rslices -= ke->ke_slice; 248110267Sjeff#endif 249110267Sjeff} 250110267Sjeff 251110267Sjeff#ifdef SMP 252110267Sjeffstatic __inline void 253110267Sjeffkseq_sleep(struct kseq *kseq, struct kse *ke) 254110267Sjeff{ 255110267Sjeff kseq->ksq_bload++; 256110267Sjeff} 257110267Sjeff 258110267Sjeffstatic __inline void 259110267Sjeffkseq_wakeup(struct kseq *kseq, struct kse *ke) 260110267Sjeff{ 261110267Sjeff kseq->ksq_bload--; 262110267Sjeff} 263110267Sjeff 264110267Sjeffstruct kseq * 265110267Sjeffkseq_load_highest(void) 266110267Sjeff{ 267110267Sjeff struct kseq *kseq; 268110267Sjeff int load; 269110267Sjeff int cpu; 270110267Sjeff int i; 271110267Sjeff 272110267Sjeff cpu = 0; 273110267Sjeff load = 0; 274110267Sjeff 275110267Sjeff for (i = 0; i < mp_maxid; i++) { 276110267Sjeff if (CPU_ABSENT(i)) 277110267Sjeff continue; 278110267Sjeff kseq = KSEQ_CPU(i); 279110267Sjeff if (kseq->ksq_load > load) { 280110267Sjeff load = kseq->ksq_load; 281110267Sjeff cpu = i; 282110267Sjeff } 283110267Sjeff } 284110267Sjeff if (load) 285110267Sjeff return (KSEQ_CPU(cpu)); 286110267Sjeff 287110267Sjeff return (NULL); 288110267Sjeff} 289110267Sjeff#endif 290110267Sjeff 291110267Sjeffstruct kse * 292110267Sjeffkseq_choose(struct kseq *kseq) 293110267Sjeff{ 294110267Sjeff struct kse *ke; 295110267Sjeff struct runq *swap; 296110267Sjeff 297110267Sjeff if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 298110267Sjeff swap = kseq->ksq_curr; 299110267Sjeff kseq->ksq_curr = kseq->ksq_next; 300110267Sjeff kseq->ksq_next = swap; 301110267Sjeff ke = runq_choose(kseq->ksq_curr); 302110267Sjeff } 303110267Sjeff 304110267Sjeff return (ke); 305110267Sjeff} 306110267Sjeff 307112966Sjeffstatic int 308112966Sjeffkseq_nice_min(struct kseq *kseq) 309112966Sjeff{ 310112966Sjeff struct kse *ke0; 311112966Sjeff struct kse *ke1; 312110267Sjeff 313112966Sjeff if (kseq->ksq_load == 0) 314112966Sjeff return (0); 315112966Sjeff 316112966Sjeff ke0 = runq_choose(kseq->ksq_curr); 317112966Sjeff ke1 = runq_choose(kseq->ksq_next); 318112966Sjeff 319112966Sjeff if (ke0 == NULL) 320112966Sjeff return (ke1->ke_ksegrp->kg_nice); 321112966Sjeff 322112966Sjeff if (ke1 == NULL) 323112966Sjeff return (ke0->ke_ksegrp->kg_nice); 324112966Sjeff 325112966Sjeff return (min(ke0->ke_ksegrp->kg_nice, ke1->ke_ksegrp->kg_nice)); 326112966Sjeff} 327112966Sjeff 328109864Sjeffstatic void 329110028Sjeffkseq_setup(struct kseq *kseq) 330110028Sjeff{ 331110028Sjeff kseq->ksq_curr = &kseq->ksq_runqs[0]; 332110028Sjeff kseq->ksq_next = &kseq->ksq_runqs[1]; 333110028Sjeff runq_init(kseq->ksq_curr); 334110028Sjeff runq_init(kseq->ksq_next); 335110267Sjeff kseq->ksq_load = 0; 336110267Sjeff#ifdef SMP 337110267Sjeff kseq->ksq_rslices = 0; 338110267Sjeff kseq->ksq_bload = 0; 339110267Sjeff#endif 340110028Sjeff} 341110028Sjeff 342110028Sjeffstatic void 343109864Sjeffsched_setup(void *dummy) 344109864Sjeff{ 345109864Sjeff int i; 346109864Sjeff 347111857Sjeff realstathz = stathz ? stathz : hz; 348111857Sjeff 349109864Sjeff mtx_lock_spin(&sched_lock); 350109864Sjeff /* init kseqs */ 351110028Sjeff for (i = 0; i < MAXCPU; i++) 352110028Sjeff kseq_setup(KSEQ_CPU(i)); 353109864Sjeff mtx_unlock_spin(&sched_lock); 354109864Sjeff} 355109864Sjeff 356109864Sjeff/* 357109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 358109864Sjeff * process. 359109864Sjeff */ 360109864Sjeffstatic int 361109864Sjeffsched_priority(struct ksegrp *kg) 362109864Sjeff{ 363109864Sjeff int pri; 364109864Sjeff 365109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 366109864Sjeff return (kg->kg_user_pri); 367109864Sjeff 368111857Sjeff pri = sched_interact_score(kg) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE; 369111857Sjeff pri += SCHED_PRI_BASE; 370109864Sjeff pri += kg->kg_nice; 371109864Sjeff 372109864Sjeff if (pri > PRI_MAX_TIMESHARE) 373109864Sjeff pri = PRI_MAX_TIMESHARE; 374109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 375109864Sjeff pri = PRI_MIN_TIMESHARE; 376109864Sjeff 377109864Sjeff kg->kg_user_pri = pri; 378109864Sjeff 379109864Sjeff return (kg->kg_user_pri); 380109864Sjeff} 381109864Sjeff 382109864Sjeff/* 383112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 384112966Sjeff * that we're on. 385109864Sjeff */ 386112966Sjeffstatic void 387112966Sjeffsched_slice(struct kse *ke) 388109864Sjeff{ 389112966Sjeff struct ksegrp *kg; 390109864Sjeff 391112966Sjeff kg = ke->ke_ksegrp; 392109864Sjeff 393112966Sjeff /* 394112966Sjeff * Rationale: 395112966Sjeff * KSEs in interactive ksegs get the minimum slice so that we 396112966Sjeff * quickly notice if it abuses its advantage. 397112966Sjeff * 398112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 399112966Sjeff * based on the ksegs nice value relative to the least nice kseg 400112966Sjeff * on the run queue for this cpu. 401112966Sjeff * 402112966Sjeff * If the KSE is less nice than all others it gets the maximum 403112966Sjeff * slice and other KSEs will adjust their slice relative to 404112966Sjeff * this when they first expire. 405112966Sjeff * 406112966Sjeff * There is 20 point window that starts relative to the least 407112966Sjeff * nice kse on the run queue. Slice size is determined by 408112966Sjeff * the kse distance from the last nice ksegrp. 409112966Sjeff * 410112966Sjeff * If you are outside of the window you will get no slice and 411112966Sjeff * you will be reevaluated each time you are selected on the 412112966Sjeff * run queue. 413112966Sjeff * 414112966Sjeff */ 415109864Sjeff 416112966Sjeff if (!SCHED_CURR(kg)) { 417112966Sjeff struct kseq *kseq; 418112966Sjeff int nice_base; 419112966Sjeff int nice; 420112966Sjeff 421112966Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 422112966Sjeff nice_base = kseq_nice_min(kseq); 423112966Sjeff nice = kg->kg_nice + (0 - nice_base); 424112966Sjeff 425112966Sjeff if (kseq->ksq_load == 0 || kg->kg_nice < nice_base) 426112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 427112966Sjeff else if (nice <= SCHED_PRI_NHALF) 428112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 429112966Sjeff else 430112966Sjeff ke->ke_slice = 0; 431112966Sjeff } else 432112966Sjeff ke->ke_slice = SCHED_SLICE_MIN; 433112966Sjeff 434110645Sjeff /* 435110645Sjeff * Every time we grant a new slice check to see if we need to scale 436110645Sjeff * back the slp and run time in the kg. This will cause us to forget 437110645Sjeff * old interactivity while maintaining the current ratio. 438110645Sjeff */ 439110645Sjeff if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 440110645Sjeff kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 441110645Sjeff kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 442110645Sjeff } 443110645Sjeff 444112966Sjeff return; 445109864Sjeff} 446109864Sjeff 447111857Sjeffstatic int 448111857Sjeffsched_interact_score(struct ksegrp *kg) 449111857Sjeff{ 450111857Sjeff int big; 451111857Sjeff int small; 452111857Sjeff int base; 453111857Sjeff 454111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 455111857Sjeff big = kg->kg_runtime; 456111857Sjeff small = kg->kg_slptime; 457111857Sjeff base = SCHED_INTERACT_HALF; 458111857Sjeff } else { 459111857Sjeff big = kg->kg_slptime; 460111857Sjeff small = kg->kg_runtime; 461111857Sjeff base = 0; 462111857Sjeff } 463111857Sjeff 464111857Sjeff big /= SCHED_INTERACT_HALF; 465111857Sjeff if (big != 0) 466111857Sjeff small /= big; 467111857Sjeff else 468111857Sjeff small = 0; 469111857Sjeff 470111857Sjeff small += base; 471111857Sjeff /* XXX Factor in nice */ 472111857Sjeff return (small); 473111857Sjeff} 474111857Sjeff 475109864Sjeffint 476109864Sjeffsched_rr_interval(void) 477109864Sjeff{ 478109864Sjeff return (SCHED_SLICE_MAX); 479109864Sjeff} 480109864Sjeff 481109864Sjeffvoid 482109864Sjeffsched_pctcpu_update(struct kse *ke) 483109864Sjeff{ 484109864Sjeff /* 485109864Sjeff * Adjust counters and watermark for pctcpu calc. 486109864Sjeff */ 487111793Sjeff /* 488111793Sjeff * Shift the tick count out so that the divide doesn't round away 489111793Sjeff * our results. 490111793Sjeff */ 491111793Sjeff ke->ke_ticks <<= 10; 492109864Sjeff ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 493109864Sjeff SCHED_CPU_TICKS; 494111793Sjeff ke->ke_ticks >>= 10; 495109864Sjeff ke->ke_ltick = ticks; 496109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 497109864Sjeff} 498109864Sjeff 499109864Sjeff#ifdef SMP 500110267Sjeff/* XXX Should be changed to kseq_load_lowest() */ 501109864Sjeffint 502109864Sjeffsched_pickcpu(void) 503109864Sjeff{ 504110028Sjeff struct kseq *kseq; 505110028Sjeff int load; 506109864Sjeff int cpu; 507109864Sjeff int i; 508109864Sjeff 509109864Sjeff if (!smp_started) 510109864Sjeff return (0); 511109864Sjeff 512110028Sjeff load = 0; 513110028Sjeff cpu = 0; 514109864Sjeff 515109864Sjeff for (i = 0; i < mp_maxid; i++) { 516109864Sjeff if (CPU_ABSENT(i)) 517109864Sjeff continue; 518110028Sjeff kseq = KSEQ_CPU(i); 519110028Sjeff if (kseq->ksq_load < load) { 520109864Sjeff cpu = i; 521110028Sjeff load = kseq->ksq_load; 522109864Sjeff } 523109864Sjeff } 524109864Sjeff 525109864Sjeff CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 526109864Sjeff return (cpu); 527109864Sjeff} 528109864Sjeff#else 529109864Sjeffint 530109864Sjeffsched_pickcpu(void) 531109864Sjeff{ 532109864Sjeff return (0); 533109864Sjeff} 534109864Sjeff#endif 535109864Sjeff 536109864Sjeffvoid 537109864Sjeffsched_prio(struct thread *td, u_char prio) 538109864Sjeff{ 539109864Sjeff struct kse *ke; 540109864Sjeff struct runq *rq; 541109864Sjeff 542109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 543109864Sjeff ke = td->td_kse; 544109864Sjeff td->td_priority = prio; 545109864Sjeff 546109864Sjeff if (TD_ON_RUNQ(td)) { 547109864Sjeff rq = ke->ke_runq; 548109864Sjeff 549109864Sjeff runq_remove(rq, ke); 550109864Sjeff runq_add(rq, ke); 551109864Sjeff } 552109864Sjeff} 553109864Sjeff 554109864Sjeffvoid 555109864Sjeffsched_switchout(struct thread *td) 556109864Sjeff{ 557109864Sjeff struct kse *ke; 558109864Sjeff 559109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 560109864Sjeff 561109864Sjeff ke = td->td_kse; 562109864Sjeff 563109864Sjeff td->td_last_kse = ke; 564109864Sjeff td->td_lastcpu = ke->ke_oncpu; 565110260Sjeff ke->ke_oncpu = NOCPU; 566111032Sjulian td->td_flags &= ~TDF_NEEDRESCHED; 567109864Sjeff 568109864Sjeff if (TD_IS_RUNNING(td)) { 569109864Sjeff setrunqueue(td); 570109864Sjeff return; 571111857Sjeff } 572111857Sjeff td->td_kse->ke_runq = NULL; 573109864Sjeff 574109864Sjeff /* 575109864Sjeff * We will not be on the run queue. So we must be 576109864Sjeff * sleeping or similar. 577109864Sjeff */ 578111585Sjulian if (td->td_proc->p_flag & P_THREADED) 579109864Sjeff kse_reassign(ke); 580109864Sjeff} 581109864Sjeff 582109864Sjeffvoid 583109864Sjeffsched_switchin(struct thread *td) 584109864Sjeff{ 585109864Sjeff /* struct kse *ke = td->td_kse; */ 586109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 587109864Sjeff 588110260Sjeff td->td_kse->ke_oncpu = PCPU_GET(cpuid); 589110267Sjeff#if SCHED_STRICT_RESCHED 590109864Sjeff if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 591109864Sjeff td->td_priority != td->td_ksegrp->kg_user_pri) 592111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 593110267Sjeff#endif 594109864Sjeff} 595109864Sjeff 596109864Sjeffvoid 597109864Sjeffsched_nice(struct ksegrp *kg, int nice) 598109864Sjeff{ 599109864Sjeff struct thread *td; 600109864Sjeff 601109864Sjeff kg->kg_nice = nice; 602109864Sjeff sched_priority(kg); 603109864Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 604111032Sjulian td->td_flags |= TDF_NEEDRESCHED; 605109864Sjeff } 606109864Sjeff} 607109864Sjeff 608109864Sjeffvoid 609109864Sjeffsched_sleep(struct thread *td, u_char prio) 610109864Sjeff{ 611109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 612109864Sjeff 613109864Sjeff td->td_slptime = ticks; 614109864Sjeff td->td_priority = prio; 615109864Sjeff 616110267Sjeff#ifdef SMP 617110267Sjeff if (td->td_priority < PZERO) { 618110267Sjeff kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 619110267Sjeff td->td_schedflag |= TD_SCHED_BLOAD; 620110267Sjeff } 621110028Sjeff#endif 622109864Sjeff} 623109864Sjeff 624109864Sjeffvoid 625109864Sjeffsched_wakeup(struct thread *td) 626109864Sjeff{ 627109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 628109864Sjeff 629109864Sjeff /* 630109864Sjeff * Let the kseg know how long we slept for. This is because process 631109864Sjeff * interactivity behavior is modeled in the kseg. 632109864Sjeff */ 633111788Sjeff if (td->td_slptime) { 634111788Sjeff struct ksegrp *kg; 635109864Sjeff 636111788Sjeff kg = td->td_ksegrp; 637111857Sjeff kg->kg_slptime += (ticks - td->td_slptime) << 10; 638111788Sjeff sched_priority(kg); 639111788Sjeff td->td_slptime = 0; 640109864Sjeff } 641110267Sjeff#ifdef SMP 642110267Sjeff if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 643110267Sjeff kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 644110267Sjeff td->td_schedflag &= ~TD_SCHED_BLOAD; 645110267Sjeff } 646110028Sjeff#endif 647109864Sjeff setrunqueue(td); 648110267Sjeff#if SCHED_STRICT_RESCHED 649109864Sjeff if (td->td_priority < curthread->td_priority) 650111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 651110267Sjeff#endif 652109864Sjeff} 653109864Sjeff 654109864Sjeff/* 655109864Sjeff * Penalize the parent for creating a new child and initialize the child's 656109864Sjeff * priority. 657109864Sjeff */ 658109864Sjeffvoid 659109864Sjeffsched_fork(struct ksegrp *kg, struct ksegrp *child) 660109864Sjeff{ 661109864Sjeff struct kse *ckse; 662109864Sjeff struct kse *pkse; 663109864Sjeff 664109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 665109864Sjeff ckse = FIRST_KSE_IN_KSEGRP(child); 666109864Sjeff pkse = FIRST_KSE_IN_KSEGRP(kg); 667109864Sjeff 668109864Sjeff /* XXX Need something better here */ 669110645Sjeff if (kg->kg_slptime > kg->kg_runtime) { 670111857Sjeff child->kg_slptime = SCHED_DYN_RANGE; 671111857Sjeff child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 672110645Sjeff } else { 673111857Sjeff child->kg_runtime = SCHED_DYN_RANGE; 674111857Sjeff child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 675110645Sjeff } 676110645Sjeff#if 0 677109864Sjeff child->kg_slptime = kg->kg_slptime; 678110645Sjeff child->kg_runtime = kg->kg_runtime; 679110645Sjeff#endif 680109864Sjeff child->kg_user_pri = kg->kg_user_pri; 681109864Sjeff 682110645Sjeff#if 0 683110260Sjeff if (pkse->ke_cpu != PCPU_GET(cpuid)) { 684110260Sjeff printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 685109970Sjeff printf("cpuid = %d", PCPU_GET(cpuid)); 686109970Sjeff Debugger("stop"); 687109970Sjeff } 688110645Sjeff#endif 689109970Sjeff 690109864Sjeff ckse->ke_slice = pkse->ke_slice; 691110260Sjeff ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 692109864Sjeff ckse->ke_runq = NULL; 693109864Sjeff /* 694109864Sjeff * Claim that we've been running for one second for statistical 695109864Sjeff * purposes. 696109864Sjeff */ 697109864Sjeff ckse->ke_ticks = 0; 698109864Sjeff ckse->ke_ltick = ticks; 699109864Sjeff ckse->ke_ftick = ticks - hz; 700109864Sjeff} 701109864Sjeff 702109864Sjeff/* 703109864Sjeff * Return some of the child's priority and interactivity to the parent. 704109864Sjeff */ 705109864Sjeffvoid 706109864Sjeffsched_exit(struct ksegrp *kg, struct ksegrp *child) 707109864Sjeff{ 708109864Sjeff /* XXX Need something better here */ 709109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 710109864Sjeff kg->kg_slptime = child->kg_slptime; 711110645Sjeff kg->kg_runtime = child->kg_runtime; 712109864Sjeff sched_priority(kg); 713109864Sjeff} 714109864Sjeff 715109864Sjeffvoid 716109864Sjeffsched_clock(struct thread *td) 717109864Sjeff{ 718109864Sjeff struct kse *ke; 719110267Sjeff#if SCHED_STRICT_RESCHED 720109864Sjeff struct kse *nke; 721110028Sjeff struct kseq *kseq; 722110267Sjeff#endif 723109864Sjeff struct ksegrp *kg; 724109864Sjeff 725109864Sjeff 726109864Sjeff ke = td->td_kse; 727109864Sjeff kg = td->td_ksegrp; 728109864Sjeff 729110028Sjeff mtx_assert(&sched_lock, MA_OWNED); 730110028Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 731110028Sjeff 732110028Sjeff /* Adjust ticks for pctcpu */ 733111793Sjeff ke->ke_ticks++; 734109971Sjeff ke->ke_ltick = ticks; 735109971Sjeff /* Go up to one second beyond our max and then trim back down */ 736109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 737109971Sjeff sched_pctcpu_update(ke); 738109971Sjeff 739110028Sjeff if (td->td_kse->ke_flags & KEF_IDLEKSE) 740109864Sjeff return; 741110028Sjeff 742110028Sjeff /* 743110028Sjeff * Check for a higher priority task on the run queue. This can happen 744110028Sjeff * on SMP if another processor woke up a process on our runq. 745110028Sjeff */ 746110267Sjeff#if SCHED_STRICT_RESCHED 747110028Sjeff kseq = KSEQ_SELF(); 748109970Sjeff nke = runq_choose(kseq->ksq_curr); 749109970Sjeff 750109864Sjeff if (nke && nke->ke_thread && 751110028Sjeff nke->ke_thread->td_priority < td->td_priority) 752111032Sjulian td->td_flags |= TDF_NEEDRESCHED; 753110267Sjeff#endif 754109864Sjeff /* 755110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 756109864Sjeff * "interactivity". 757109864Sjeff */ 758111857Sjeff kg->kg_runtime += 1 << 10; 759110645Sjeff 760109864Sjeff /* 761109864Sjeff * We used up one time slice. 762109864Sjeff */ 763109864Sjeff ke->ke_slice--; 764109864Sjeff /* 765109864Sjeff * We're out of time, recompute priorities and requeue 766109864Sjeff */ 767111857Sjeff if (ke->ke_slice <= 0) { 768111857Sjeff sched_priority(kg); 769112966Sjeff sched_slice(ke); 770111032Sjulian td->td_flags |= TDF_NEEDRESCHED; 771109864Sjeff ke->ke_runq = NULL; 772109864Sjeff } 773109864Sjeff} 774109864Sjeff 775109864Sjeffint 776109864Sjeffsched_runnable(void) 777109864Sjeff{ 778109864Sjeff struct kseq *kseq; 779109864Sjeff 780110028Sjeff kseq = KSEQ_SELF(); 781109864Sjeff 782110028Sjeff if (kseq->ksq_load) 783109970Sjeff return (1); 784109970Sjeff#ifdef SMP 785110028Sjeff /* 786110028Sjeff * For SMP we may steal other processor's KSEs. Just search until we 787110028Sjeff * verify that at least on other cpu has a runnable task. 788110028Sjeff */ 789109970Sjeff if (smp_started) { 790109970Sjeff int i; 791109970Sjeff 792110267Sjeff#if 0 793110267Sjeff if (kseq->ksq_bload) 794110267Sjeff return (0); 795110267Sjeff#endif 796110267Sjeff 797109970Sjeff for (i = 0; i < mp_maxid; i++) { 798109970Sjeff if (CPU_ABSENT(i)) 799109970Sjeff continue; 800110028Sjeff kseq = KSEQ_CPU(i); 801110028Sjeff if (kseq->ksq_load) 802109970Sjeff return (1); 803109970Sjeff } 804109970Sjeff } 805109970Sjeff#endif 806109970Sjeff return (0); 807109864Sjeff} 808109864Sjeff 809109864Sjeffvoid 810109864Sjeffsched_userret(struct thread *td) 811109864Sjeff{ 812109864Sjeff struct ksegrp *kg; 813109864Sjeff 814109864Sjeff kg = td->td_ksegrp; 815109864Sjeff 816109864Sjeff if (td->td_priority != kg->kg_user_pri) { 817109864Sjeff mtx_lock_spin(&sched_lock); 818109864Sjeff td->td_priority = kg->kg_user_pri; 819109864Sjeff mtx_unlock_spin(&sched_lock); 820109864Sjeff } 821109864Sjeff} 822109864Sjeff 823109864Sjeffstruct kse * 824109970Sjeffsched_choose(void) 825109970Sjeff{ 826110028Sjeff struct kseq *kseq; 827109970Sjeff struct kse *ke; 828109970Sjeff 829110028Sjeff kseq = KSEQ_SELF(); 830112966Sjeffretry: 831110028Sjeff ke = kseq_choose(kseq); 832109970Sjeff 833109864Sjeff if (ke) { 834109864Sjeff ke->ke_state = KES_THREAD; 835110267Sjeff kseq_rem(kseq, ke); 836112966Sjeff 837112966Sjeff /* 838112966Sjeff * If we dequeue a kse with a slice of zero it was below the 839112966Sjeff * nice threshold to acquire a slice. Recalculate the slice 840112966Sjeff * to see if the situation has changed and then requeue. 841112966Sjeff */ 842112966Sjeff if (ke->ke_slice == 0) { 843112966Sjeff sched_slice(ke); 844112966Sjeff ke->ke_runq = kseq->ksq_next; 845112966Sjeff kseq_add(kseq, ke); 846112966Sjeff goto retry; 847112966Sjeff } 848109864Sjeff } 849109864Sjeff 850109970Sjeff#ifdef SMP 851109970Sjeff if (ke == NULL && smp_started) { 852110267Sjeff#if 0 853110267Sjeff if (kseq->ksq_bload) 854110267Sjeff return (NULL); 855110267Sjeff#endif 856109970Sjeff /* 857109970Sjeff * Find the cpu with the highest load and steal one proc. 858109970Sjeff */ 859110267Sjeff kseq = kseq_load_highest(); 860110267Sjeff if (kseq == NULL) 861110267Sjeff return (NULL); 862110267Sjeff ke = kseq_choose(kseq); 863110267Sjeff kseq_rem(kseq, ke); 864109970Sjeff 865110267Sjeff ke->ke_state = KES_THREAD; 866110267Sjeff ke->ke_runq = NULL; 867110267Sjeff ke->ke_cpu = PCPU_GET(cpuid); 868109970Sjeff } 869109970Sjeff#endif 870109864Sjeff return (ke); 871109864Sjeff} 872109864Sjeff 873109864Sjeffvoid 874109864Sjeffsched_add(struct kse *ke) 875109864Sjeff{ 876110267Sjeff struct kseq *kseq; 877109864Sjeff 878109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 879110267Sjeff KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 880109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 881110267Sjeff ("sched_add: No KSE on thread")); 882109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 883110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 884109864Sjeff ke->ke_proc->p_comm)); 885109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 886110267Sjeff ("sched_add: process swapped out")); 887109864Sjeff 888111789Sjeff /* 889111789Sjeff * Timeshare threads get placed on the appropriate queue on their 890111789Sjeff * bound cpu. 891111789Sjeff */ 892111789Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 893111789Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 894109864Sjeff 895111789Sjeff if (ke->ke_runq == NULL) { 896111789Sjeff if (SCHED_CURR(ke->ke_ksegrp)) 897111789Sjeff ke->ke_runq = kseq->ksq_curr; 898111789Sjeff else 899111789Sjeff ke->ke_runq = kseq->ksq_next; 900111789Sjeff } 901111789Sjeff /* 902111789Sjeff * If we're a real-time or interrupt thread place us on the curr 903111789Sjeff * queue for the current processor. Hopefully this will yield the 904111789Sjeff * lowest latency response. 905111789Sjeff */ 906111789Sjeff } else { 907111789Sjeff kseq = KSEQ_SELF(); 908111789Sjeff ke->ke_runq = kseq->ksq_curr; 909109864Sjeff } 910109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 911109864Sjeff ke->ke_state = KES_ONRUNQ; 912109864Sjeff 913110267Sjeff kseq_add(kseq, ke); 914109864Sjeff} 915109864Sjeff 916109864Sjeffvoid 917109864Sjeffsched_rem(struct kse *ke) 918109864Sjeff{ 919109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 920109864Sjeff /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 921109864Sjeff 922109864Sjeff ke->ke_runq = NULL; 923109864Sjeff ke->ke_state = KES_THREAD; 924109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 925110267Sjeff 926110267Sjeff kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 927109864Sjeff} 928109864Sjeff 929109864Sjefffixpt_t 930109864Sjeffsched_pctcpu(struct kse *ke) 931109864Sjeff{ 932109864Sjeff fixpt_t pctcpu; 933110226Sscottl int realstathz; 934109864Sjeff 935109864Sjeff pctcpu = 0; 936110226Sscottl realstathz = stathz ? stathz : hz; 937109864Sjeff 938109864Sjeff if (ke->ke_ticks) { 939109864Sjeff int rtick; 940109864Sjeff 941109864Sjeff /* Update to account for time potentially spent sleeping */ 942109864Sjeff ke->ke_ltick = ticks; 943109864Sjeff sched_pctcpu_update(ke); 944109864Sjeff 945109864Sjeff /* How many rtick per second ? */ 946111793Sjeff rtick = ke->ke_ticks / SCHED_CPU_TIME; 947110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 948109864Sjeff } 949109864Sjeff 950109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 951109864Sjeff 952109864Sjeff return (pctcpu); 953109864Sjeff} 954109864Sjeff 955109864Sjeffint 956109864Sjeffsched_sizeof_kse(void) 957109864Sjeff{ 958109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 959109864Sjeff} 960109864Sjeff 961109864Sjeffint 962109864Sjeffsched_sizeof_ksegrp(void) 963109864Sjeff{ 964109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 965109864Sjeff} 966109864Sjeff 967109864Sjeffint 968109864Sjeffsched_sizeof_proc(void) 969109864Sjeff{ 970109864Sjeff return (sizeof(struct proc)); 971109864Sjeff} 972109864Sjeff 973109864Sjeffint 974109864Sjeffsched_sizeof_thread(void) 975109864Sjeff{ 976109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 977109864Sjeff} 978