sched_ule.c revision 113357
1109864Sjeff/*- 2113357Sjeff * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff * 26109864Sjeff * $FreeBSD: head/sys/kern/sched_ule.c 113357 2003-04-11 03:47:14Z jeff $ 27109864Sjeff */ 28109864Sjeff 29109864Sjeff#include <sys/param.h> 30109864Sjeff#include <sys/systm.h> 31109864Sjeff#include <sys/kernel.h> 32109864Sjeff#include <sys/ktr.h> 33109864Sjeff#include <sys/lock.h> 34109864Sjeff#include <sys/mutex.h> 35109864Sjeff#include <sys/proc.h> 36112966Sjeff#include <sys/resource.h> 37109864Sjeff#include <sys/sched.h> 38109864Sjeff#include <sys/smp.h> 39109864Sjeff#include <sys/sx.h> 40109864Sjeff#include <sys/sysctl.h> 41109864Sjeff#include <sys/sysproto.h> 42109864Sjeff#include <sys/vmmeter.h> 43109864Sjeff#ifdef DDB 44109864Sjeff#include <ddb/ddb.h> 45109864Sjeff#endif 46109864Sjeff#ifdef KTRACE 47109864Sjeff#include <sys/uio.h> 48109864Sjeff#include <sys/ktrace.h> 49109864Sjeff#endif 50109864Sjeff 51109864Sjeff#include <machine/cpu.h> 52109864Sjeff 53113357Sjeff#define KTR_ULE KTR_NFS 54113357Sjeff 55109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 56109864Sjeff/* XXX This is bogus compatability crap for ps */ 57109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 58109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 59109864Sjeff 60109864Sjeffstatic void sched_setup(void *dummy); 61109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 62109864Sjeff 63113357Sjeffstatic SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 64113357Sjeff 65113357Sjeffstatic int sched_strict; 66113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 67113357Sjeff 68113357Sjeffstatic int slice_min = 1; 69113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 70113357Sjeff 71113357Sjeffstatic int slice_max = 2; 72113357SjeffSYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 73113357Sjeff 74111857Sjeffint realstathz; 75113357Sjeffint tickincr = 1; 76111857Sjeff 77109864Sjeff/* 78109864Sjeff * These datastructures are allocated within their parent datastructure but 79109864Sjeff * are scheduler specific. 80109864Sjeff */ 81109864Sjeff 82109864Sjeffstruct ke_sched { 83109864Sjeff int ske_slice; 84109864Sjeff struct runq *ske_runq; 85109864Sjeff /* The following variables are only used for pctcpu calculation */ 86109864Sjeff int ske_ltick; /* Last tick that we were running on */ 87109864Sjeff int ske_ftick; /* First tick that we were running on */ 88109864Sjeff int ske_ticks; /* Tick count */ 89113357Sjeff /* CPU that we have affinity for. */ 90110260Sjeff u_char ske_cpu; 91109864Sjeff}; 92109864Sjeff#define ke_slice ke_sched->ske_slice 93109864Sjeff#define ke_runq ke_sched->ske_runq 94109864Sjeff#define ke_ltick ke_sched->ske_ltick 95109864Sjeff#define ke_ftick ke_sched->ske_ftick 96109864Sjeff#define ke_ticks ke_sched->ske_ticks 97110260Sjeff#define ke_cpu ke_sched->ske_cpu 98109864Sjeff 99109864Sjeffstruct kg_sched { 100110645Sjeff int skg_slptime; /* Number of ticks we vol. slept */ 101110645Sjeff int skg_runtime; /* Number of ticks we were running */ 102109864Sjeff}; 103109864Sjeff#define kg_slptime kg_sched->skg_slptime 104110645Sjeff#define kg_runtime kg_sched->skg_runtime 105109864Sjeff 106109864Sjeffstruct td_sched { 107109864Sjeff int std_slptime; 108109864Sjeff}; 109109864Sjeff#define td_slptime td_sched->std_slptime 110109864Sjeff 111110267Sjeffstruct td_sched td_sched; 112109864Sjeffstruct ke_sched ke_sched; 113109864Sjeffstruct kg_sched kg_sched; 114109864Sjeff 115109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 116109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 117109864Sjeffstruct p_sched *proc0_sched = NULL; 118109864Sjeffstruct td_sched *thread0_sched = &td_sched; 119109864Sjeff 120109864Sjeff/* 121109864Sjeff * This priority range has 20 priorities on either end that are reachable 122109864Sjeff * only through nice values. 123111857Sjeff * 124111857Sjeff * PRI_RANGE: Total priority range for timeshare threads. 125111857Sjeff * PRI_NRESV: Reserved priorities for nice. 126111857Sjeff * PRI_BASE: The start of the dynamic range. 127111857Sjeff * DYN_RANGE: Number of priorities that are available int the dynamic 128111857Sjeff * priority range. 129109864Sjeff */ 130111857Sjeff#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 131112966Sjeff#define SCHED_PRI_NRESV PRIO_TOTAL 132112970Sjeff#define SCHED_PRI_NHALF (PRIO_TOTAL / 2) 133113357Sjeff#define SCHED_PRI_NTHRESH (SCHED_PRI_NHALF - 1) 134111857Sjeff#define SCHED_PRI_BASE ((SCHED_PRI_NRESV / 2) + PRI_MIN_TIMESHARE) 135111857Sjeff#define SCHED_DYN_RANGE (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 136113357Sjeff#define SCHED_PRI_INTERACT(score) \ 137113357Sjeff ((score) * SCHED_DYN_RANGE / SCHED_INTERACT_RANGE) 138109864Sjeff 139109864Sjeff/* 140111857Sjeff * These determine the interactivity of a process. 141109864Sjeff * 142110645Sjeff * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 143110645Sjeff * before throttling back. 144111857Sjeff * SLP_RUN_THROTTLE: Divisor for reducing slp/run time. 145111857Sjeff * INTERACT_RANGE: Range of interactivity values. Smaller is better. 146111857Sjeff * INTERACT_HALF: Convenience define, half of the interactivity range. 147111857Sjeff * INTERACT_THRESH: Threshhold for placement on the current runq. 148109864Sjeff */ 149113357Sjeff#define SCHED_SLP_RUN_MAX ((hz / 10) << 10) 150110645Sjeff#define SCHED_SLP_RUN_THROTTLE (10) 151111857Sjeff#define SCHED_INTERACT_RANGE (100) 152111857Sjeff#define SCHED_INTERACT_HALF (SCHED_INTERACT_RANGE / 2) 153111857Sjeff#define SCHED_INTERACT_THRESH (10) 154111857Sjeff 155109864Sjeff/* 156109864Sjeff * These parameters and macros determine the size of the time slice that is 157109864Sjeff * granted to each thread. 158109864Sjeff * 159109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 160109864Sjeff * SLICE_MAX: Maximum time slice granted. 161109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 162112966Sjeff * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 163112966Sjeff * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 164109864Sjeff */ 165113357Sjeff#define SCHED_SLICE_MIN (slice_min) 166113357Sjeff#define SCHED_SLICE_MAX (slice_max) 167111857Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 168109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 169112966Sjeff#define SCHED_SLICE_NICE(nice) \ 170113357Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_PRI_NTHRESH)) 171109864Sjeff 172109864Sjeff/* 173109864Sjeff * This macro determines whether or not the kse belongs on the current or 174109864Sjeff * next run queue. 175110645Sjeff * 176110645Sjeff * XXX nice value should effect how interactive a kg is. 177109864Sjeff */ 178113357Sjeff#define SCHED_INTERACTIVE(kg) \ 179113357Sjeff (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 180113357Sjeff#define SCHED_CURR(kg, ke) SCHED_INTERACTIVE(kg) 181113357Sjeff#if 0 182113357Sjeff (ke->ke_thread->td_priority < PRI_MIN_TIMESHARE || SCHED_INTERACTIVE(kg)) 183113357Sjeff#endif 184109864Sjeff 185109864Sjeff/* 186109864Sjeff * Cpu percentage computation macros and defines. 187109864Sjeff * 188109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 189109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 190109864Sjeff */ 191109864Sjeff 192112971Sjeff#define SCHED_CPU_TIME 10 193109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 194109864Sjeff 195109864Sjeff/* 196113357Sjeff * kseq - per processor runqs and statistics. 197109864Sjeff */ 198109864Sjeff 199113357Sjeff#define KSEQ_NCLASS (PRI_IDLE + 1) /* Number of run classes. */ 200113357Sjeff 201109864Sjeffstruct kseq { 202113357Sjeff struct runq ksq_idle; /* Queue of IDLE threads. */ 203113357Sjeff struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 204113357Sjeff struct runq *ksq_next; /* Next timeshare queue. */ 205113357Sjeff struct runq *ksq_curr; /* Current queue. */ 206113357Sjeff int ksq_loads[KSEQ_NCLASS]; /* Load for each class */ 207113357Sjeff int ksq_load; /* Aggregate load. */ 208113357Sjeff short ksq_nice[PRIO_TOTAL + 1]; /* KSEs in each nice bin. */ 209113357Sjeff short ksq_nicemin; /* Least nice. */ 210110267Sjeff#ifdef SMP 211110267Sjeff unsigned int ksq_rslices; /* Slices on run queue */ 212110267Sjeff#endif 213109864Sjeff}; 214109864Sjeff 215109864Sjeff/* 216109864Sjeff * One kse queue per processor. 217109864Sjeff */ 218110028Sjeff#ifdef SMP 219109864Sjeffstruct kseq kseq_cpu[MAXCPU]; 220110028Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 221110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 222110028Sjeff#else 223110028Sjeffstruct kseq kseq_cpu; 224110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 225110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 226110028Sjeff#endif 227109864Sjeff 228112966Sjeffstatic void sched_slice(struct kse *ke); 229113357Sjeffstatic void sched_priority(struct ksegrp *kg); 230111857Sjeffstatic int sched_interact_score(struct ksegrp *kg); 231109864Sjeffvoid sched_pctcpu_update(struct kse *ke); 232109864Sjeffint sched_pickcpu(void); 233109864Sjeff 234110267Sjeff/* Operations on per processor queues */ 235110028Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 236110028Sjeffstatic void kseq_setup(struct kseq *kseq); 237112994Sjeffstatic void kseq_add(struct kseq *kseq, struct kse *ke); 238113357Sjeffstatic void kseq_rem(struct kseq *kseq, struct kse *ke); 239113357Sjeffstatic void kseq_nice_add(struct kseq *kseq, int nice); 240113357Sjeffstatic void kseq_nice_rem(struct kseq *kseq, int nice); 241113357Sjeffvoid kseq_print(struct kseq *kseq); 242110267Sjeff#ifdef SMP 243110267Sjeffstruct kseq * kseq_load_highest(void); 244110267Sjeff#endif 245110028Sjeff 246113357Sjeffvoid 247113357Sjeffkseq_print(struct kseq *kseq) 248110267Sjeff{ 249113357Sjeff int i; 250112994Sjeff 251113357Sjeff if (kseq == NULL) 252113357Sjeff kseq = KSEQ_SELF(); 253112994Sjeff 254113357Sjeff printf("kseq:\n"); 255113357Sjeff printf("\tload: %d\n", kseq->ksq_load); 256113357Sjeff printf("\tload ITHD: %d\n", kseq->ksq_loads[PRI_ITHD]); 257113357Sjeff printf("\tload REALTIME: %d\n", kseq->ksq_loads[PRI_REALTIME]); 258113357Sjeff printf("\tload TIMESHARE: %d\n", kseq->ksq_loads[PRI_TIMESHARE]); 259113357Sjeff printf("\tload IDLE: %d\n", kseq->ksq_loads[PRI_IDLE]); 260113357Sjeff printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 261113357Sjeff printf("\tnice counts:\n"); 262113357Sjeff for (i = 0; i < PRIO_TOTAL + 1; i++) 263113357Sjeff if (kseq->ksq_nice[i]) 264113357Sjeff printf("\t\t%d = %d\n", 265113357Sjeff i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 266113357Sjeff} 267112994Sjeff 268113357Sjeffstatic void 269113357Sjeffkseq_add(struct kseq *kseq, struct kse *ke) 270113357Sjeff{ 271113357Sjeff kseq->ksq_loads[ke->ke_ksegrp->kg_pri_class]++; 272113357Sjeff kseq->ksq_load++; 273113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 274113357Sjeff CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 275113357Sjeff ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 276113357Sjeff ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 277113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 278113357Sjeff kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 279110267Sjeff#ifdef SMP 280110267Sjeff kseq->ksq_rslices += ke->ke_slice; 281110267Sjeff#endif 282110267Sjeff} 283113357Sjeff 284112994Sjeffstatic void 285110267Sjeffkseq_rem(struct kseq *kseq, struct kse *ke) 286110267Sjeff{ 287113357Sjeff kseq->ksq_loads[ke->ke_ksegrp->kg_pri_class]--; 288113357Sjeff kseq->ksq_load--; 289113357Sjeff ke->ke_runq = NULL; 290113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 291113357Sjeff kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 292110267Sjeff#ifdef SMP 293110267Sjeff kseq->ksq_rslices -= ke->ke_slice; 294110267Sjeff#endif 295110267Sjeff} 296110267Sjeff 297113357Sjeffstatic void 298113357Sjeffkseq_nice_add(struct kseq *kseq, int nice) 299110267Sjeff{ 300113357Sjeff /* Normalize to zero. */ 301113357Sjeff kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 302113357Sjeff if (nice < kseq->ksq_nicemin || kseq->ksq_loads[PRI_TIMESHARE] == 0) 303113357Sjeff kseq->ksq_nicemin = nice; 304110267Sjeff} 305110267Sjeff 306113357Sjeffstatic void 307113357Sjeffkseq_nice_rem(struct kseq *kseq, int nice) 308110267Sjeff{ 309113357Sjeff int n; 310113357Sjeff 311113357Sjeff /* Normalize to zero. */ 312113357Sjeff n = nice + SCHED_PRI_NHALF; 313113357Sjeff kseq->ksq_nice[n]--; 314113357Sjeff KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 315113357Sjeff 316113357Sjeff /* 317113357Sjeff * If this wasn't the smallest nice value or there are more in 318113357Sjeff * this bucket we can just return. Otherwise we have to recalculate 319113357Sjeff * the smallest nice. 320113357Sjeff */ 321113357Sjeff if (nice != kseq->ksq_nicemin || 322113357Sjeff kseq->ksq_nice[n] != 0 || 323113357Sjeff kseq->ksq_loads[PRI_TIMESHARE] == 0) 324113357Sjeff return; 325113357Sjeff 326113357Sjeff for (; n < SCHED_PRI_NRESV + 1; n++) 327113357Sjeff if (kseq->ksq_nice[n]) { 328113357Sjeff kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 329113357Sjeff return; 330113357Sjeff } 331110267Sjeff} 332110267Sjeff 333113357Sjeff#ifdef SMP 334110267Sjeffstruct kseq * 335110267Sjeffkseq_load_highest(void) 336110267Sjeff{ 337110267Sjeff struct kseq *kseq; 338110267Sjeff int load; 339110267Sjeff int cpu; 340110267Sjeff int i; 341110267Sjeff 342110267Sjeff cpu = 0; 343110267Sjeff load = 0; 344110267Sjeff 345110267Sjeff for (i = 0; i < mp_maxid; i++) { 346110267Sjeff if (CPU_ABSENT(i)) 347110267Sjeff continue; 348110267Sjeff kseq = KSEQ_CPU(i); 349113357Sjeff if (kseq->ksq_load > load) { 350113357Sjeff load = kseq->ksq_load; 351110267Sjeff cpu = i; 352110267Sjeff } 353110267Sjeff } 354110267Sjeff if (load) 355110267Sjeff return (KSEQ_CPU(cpu)); 356110267Sjeff 357110267Sjeff return (NULL); 358110267Sjeff} 359110267Sjeff#endif 360110267Sjeff 361110267Sjeffstruct kse * 362110267Sjeffkseq_choose(struct kseq *kseq) 363110267Sjeff{ 364110267Sjeff struct kse *ke; 365110267Sjeff struct runq *swap; 366110267Sjeff 367113357Sjeff swap = NULL; 368112994Sjeff 369113357Sjeff for (;;) { 370113357Sjeff ke = runq_choose(kseq->ksq_curr); 371113357Sjeff if (ke == NULL) { 372113357Sjeff /* 373113357Sjeff * We already swaped once and didn't get anywhere. 374113357Sjeff */ 375113357Sjeff if (swap) 376113357Sjeff break; 377113357Sjeff swap = kseq->ksq_curr; 378113357Sjeff kseq->ksq_curr = kseq->ksq_next; 379113357Sjeff kseq->ksq_next = swap; 380113357Sjeff continue; 381113357Sjeff } 382113357Sjeff /* 383113357Sjeff * If we encounter a slice of 0 the kse is in a 384113357Sjeff * TIMESHARE kse group and its nice was too far out 385113357Sjeff * of the range that receives slices. 386113357Sjeff */ 387113357Sjeff if (ke->ke_slice == 0) { 388113357Sjeff runq_remove(ke->ke_runq, ke); 389113357Sjeff sched_slice(ke); 390113357Sjeff ke->ke_runq = kseq->ksq_next; 391113357Sjeff runq_add(ke->ke_runq, ke); 392113357Sjeff continue; 393113357Sjeff } 394113357Sjeff return (ke); 395110267Sjeff } 396110267Sjeff 397113357Sjeff return (runq_choose(&kseq->ksq_idle)); 398110267Sjeff} 399110267Sjeff 400109864Sjeffstatic void 401110028Sjeffkseq_setup(struct kseq *kseq) 402110028Sjeff{ 403113357Sjeff runq_init(&kseq->ksq_timeshare[0]); 404113357Sjeff runq_init(&kseq->ksq_timeshare[1]); 405112994Sjeff runq_init(&kseq->ksq_idle); 406113357Sjeff 407113357Sjeff kseq->ksq_curr = &kseq->ksq_timeshare[0]; 408113357Sjeff kseq->ksq_next = &kseq->ksq_timeshare[1]; 409113357Sjeff 410113357Sjeff kseq->ksq_loads[PRI_ITHD] = 0; 411113357Sjeff kseq->ksq_loads[PRI_REALTIME] = 0; 412113357Sjeff kseq->ksq_loads[PRI_TIMESHARE] = 0; 413113357Sjeff kseq->ksq_loads[PRI_IDLE] = 0; 414110267Sjeff#ifdef SMP 415110267Sjeff kseq->ksq_rslices = 0; 416110267Sjeff#endif 417110028Sjeff} 418110028Sjeff 419110028Sjeffstatic void 420109864Sjeffsched_setup(void *dummy) 421109864Sjeff{ 422109864Sjeff int i; 423109864Sjeff 424113357Sjeff slice_min = (hz/100); 425113357Sjeff slice_max = (hz/10); 426111857Sjeff 427109864Sjeff mtx_lock_spin(&sched_lock); 428109864Sjeff /* init kseqs */ 429110028Sjeff for (i = 0; i < MAXCPU; i++) 430110028Sjeff kseq_setup(KSEQ_CPU(i)); 431113357Sjeff 432113357Sjeff kseq_add(KSEQ_SELF(), &kse0); 433109864Sjeff mtx_unlock_spin(&sched_lock); 434109864Sjeff} 435109864Sjeff 436109864Sjeff/* 437109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 438109864Sjeff * process. 439109864Sjeff */ 440113357Sjeffstatic void 441109864Sjeffsched_priority(struct ksegrp *kg) 442109864Sjeff{ 443109864Sjeff int pri; 444109864Sjeff 445109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 446113357Sjeff return; 447109864Sjeff 448113357Sjeff pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 449111857Sjeff pri += SCHED_PRI_BASE; 450109864Sjeff pri += kg->kg_nice; 451109864Sjeff 452109864Sjeff if (pri > PRI_MAX_TIMESHARE) 453109864Sjeff pri = PRI_MAX_TIMESHARE; 454109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 455109864Sjeff pri = PRI_MIN_TIMESHARE; 456109864Sjeff 457109864Sjeff kg->kg_user_pri = pri; 458109864Sjeff 459113357Sjeff return; 460109864Sjeff} 461109864Sjeff 462109864Sjeff/* 463112966Sjeff * Calculate a time slice based on the properties of the kseg and the runq 464112994Sjeff * that we're on. This is only for PRI_TIMESHARE ksegrps. 465109864Sjeff */ 466112966Sjeffstatic void 467112966Sjeffsched_slice(struct kse *ke) 468109864Sjeff{ 469113357Sjeff struct kseq *kseq; 470112966Sjeff struct ksegrp *kg; 471109864Sjeff 472112966Sjeff kg = ke->ke_ksegrp; 473113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 474109864Sjeff 475112966Sjeff /* 476112966Sjeff * Rationale: 477112966Sjeff * KSEs in interactive ksegs get the minimum slice so that we 478112966Sjeff * quickly notice if it abuses its advantage. 479112966Sjeff * 480112966Sjeff * KSEs in non-interactive ksegs are assigned a slice that is 481112966Sjeff * based on the ksegs nice value relative to the least nice kseg 482112966Sjeff * on the run queue for this cpu. 483112966Sjeff * 484112966Sjeff * If the KSE is less nice than all others it gets the maximum 485112966Sjeff * slice and other KSEs will adjust their slice relative to 486112966Sjeff * this when they first expire. 487112966Sjeff * 488112966Sjeff * There is 20 point window that starts relative to the least 489112966Sjeff * nice kse on the run queue. Slice size is determined by 490112966Sjeff * the kse distance from the last nice ksegrp. 491112966Sjeff * 492112966Sjeff * If you are outside of the window you will get no slice and 493112966Sjeff * you will be reevaluated each time you are selected on the 494112966Sjeff * run queue. 495112966Sjeff * 496112966Sjeff */ 497109864Sjeff 498113357Sjeff if (!SCHED_INTERACTIVE(kg)) { 499112966Sjeff int nice; 500112966Sjeff 501113357Sjeff nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 502113357Sjeff if (kseq->ksq_loads[PRI_TIMESHARE] == 0 || 503113357Sjeff kg->kg_nice < kseq->ksq_nicemin) 504112966Sjeff ke->ke_slice = SCHED_SLICE_MAX; 505113357Sjeff else if (nice <= SCHED_PRI_NTHRESH) 506112966Sjeff ke->ke_slice = SCHED_SLICE_NICE(nice); 507112966Sjeff else 508112966Sjeff ke->ke_slice = 0; 509112966Sjeff } else 510112966Sjeff ke->ke_slice = SCHED_SLICE_MIN; 511112966Sjeff 512113357Sjeff CTR6(KTR_ULE, 513113357Sjeff "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 514113357Sjeff ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 515113357Sjeff kseq->ksq_loads[PRI_TIMESHARE], SCHED_INTERACTIVE(kg)); 516113357Sjeff 517110645Sjeff /* 518112994Sjeff * Check to see if we need to scale back the slp and run time 519112994Sjeff * in the kg. This will cause us to forget old interactivity 520112994Sjeff * while maintaining the current ratio. 521110645Sjeff */ 522113357Sjeff CTR4(KTR_ULE, "Slp vs Run %p (Slp %d, Run %d, Score %d)", 523113357Sjeff ke, kg->kg_slptime >> 10, kg->kg_runtime >> 10, 524113357Sjeff sched_interact_score(kg)); 525113357Sjeff 526110645Sjeff if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 527110645Sjeff kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 528110645Sjeff kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 529110645Sjeff } 530113357Sjeff CTR4(KTR_ULE, "Slp vs Run(2) %p (Slp %d, Run %d, Score %d)", 531113357Sjeff ke, kg->kg_slptime >> 10, kg->kg_runtime >> 10, 532113357Sjeff sched_interact_score(kg)); 533110645Sjeff 534112966Sjeff return; 535109864Sjeff} 536109864Sjeff 537111857Sjeffstatic int 538111857Sjeffsched_interact_score(struct ksegrp *kg) 539111857Sjeff{ 540111857Sjeff int big; 541111857Sjeff int small; 542111857Sjeff int base; 543111857Sjeff 544111857Sjeff if (kg->kg_runtime > kg->kg_slptime) { 545111857Sjeff big = kg->kg_runtime; 546111857Sjeff small = kg->kg_slptime; 547111857Sjeff base = SCHED_INTERACT_HALF; 548111857Sjeff } else { 549111857Sjeff big = kg->kg_slptime; 550111857Sjeff small = kg->kg_runtime; 551111857Sjeff base = 0; 552111857Sjeff } 553111857Sjeff 554111857Sjeff big /= SCHED_INTERACT_HALF; 555111857Sjeff if (big != 0) 556111857Sjeff small /= big; 557111857Sjeff else 558111857Sjeff small = 0; 559111857Sjeff 560111857Sjeff small += base; 561111857Sjeff /* XXX Factor in nice */ 562111857Sjeff return (small); 563111857Sjeff} 564111857Sjeff 565113357Sjeff/* 566113357Sjeff * This is only somewhat accurate since given many processes of the same 567113357Sjeff * priority they will switch when their slices run out, which will be 568113357Sjeff * at most SCHED_SLICE_MAX. 569113357Sjeff */ 570109864Sjeffint 571109864Sjeffsched_rr_interval(void) 572109864Sjeff{ 573109864Sjeff return (SCHED_SLICE_MAX); 574109864Sjeff} 575109864Sjeff 576109864Sjeffvoid 577109864Sjeffsched_pctcpu_update(struct kse *ke) 578109864Sjeff{ 579109864Sjeff /* 580109864Sjeff * Adjust counters and watermark for pctcpu calc. 581113357Sjeff * 582111793Sjeff * Shift the tick count out so that the divide doesn't round away 583111793Sjeff * our results. 584111793Sjeff */ 585111793Sjeff ke->ke_ticks <<= 10; 586109864Sjeff ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 587109864Sjeff SCHED_CPU_TICKS; 588111793Sjeff ke->ke_ticks >>= 10; 589109864Sjeff ke->ke_ltick = ticks; 590109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 591109864Sjeff} 592109864Sjeff 593109864Sjeff#ifdef SMP 594110267Sjeff/* XXX Should be changed to kseq_load_lowest() */ 595109864Sjeffint 596109864Sjeffsched_pickcpu(void) 597109864Sjeff{ 598110028Sjeff struct kseq *kseq; 599110028Sjeff int load; 600109864Sjeff int cpu; 601109864Sjeff int i; 602109864Sjeff 603109864Sjeff if (!smp_started) 604109864Sjeff return (0); 605109864Sjeff 606110028Sjeff load = 0; 607110028Sjeff cpu = 0; 608109864Sjeff 609109864Sjeff for (i = 0; i < mp_maxid; i++) { 610109864Sjeff if (CPU_ABSENT(i)) 611109864Sjeff continue; 612110028Sjeff kseq = KSEQ_CPU(i); 613113357Sjeff if (kseq->ksq_load < load) { 614109864Sjeff cpu = i; 615113357Sjeff load = kseq->ksq_load; 616109864Sjeff } 617109864Sjeff } 618109864Sjeff 619109864Sjeff CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 620109864Sjeff return (cpu); 621109864Sjeff} 622109864Sjeff#else 623109864Sjeffint 624109864Sjeffsched_pickcpu(void) 625109864Sjeff{ 626109864Sjeff return (0); 627109864Sjeff} 628109864Sjeff#endif 629109864Sjeff 630109864Sjeffvoid 631109864Sjeffsched_prio(struct thread *td, u_char prio) 632109864Sjeff{ 633109864Sjeff struct kse *ke; 634109864Sjeff struct runq *rq; 635109864Sjeff 636109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 637109864Sjeff ke = td->td_kse; 638109864Sjeff td->td_priority = prio; 639109864Sjeff 640109864Sjeff if (TD_ON_RUNQ(td)) { 641109864Sjeff rq = ke->ke_runq; 642109864Sjeff 643109864Sjeff runq_remove(rq, ke); 644109864Sjeff runq_add(rq, ke); 645109864Sjeff } 646109864Sjeff} 647109864Sjeff 648109864Sjeffvoid 649109864Sjeffsched_switchout(struct thread *td) 650109864Sjeff{ 651109864Sjeff struct kse *ke; 652109864Sjeff 653109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 654109864Sjeff 655109864Sjeff ke = td->td_kse; 656109864Sjeff 657109864Sjeff td->td_last_kse = ke; 658113339Sjulian td->td_lastcpu = td->td_oncpu; 659113339Sjulian td->td_oncpu = NOCPU; 660111032Sjulian td->td_flags &= ~TDF_NEEDRESCHED; 661109864Sjeff 662109864Sjeff if (TD_IS_RUNNING(td)) { 663113357Sjeff runq_add(ke->ke_runq, ke); 664113357Sjeff /* setrunqueue(td); */ 665109864Sjeff return; 666111857Sjeff } 667113357Sjeff if (ke->ke_runq) 668113357Sjeff kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 669109864Sjeff /* 670109864Sjeff * We will not be on the run queue. So we must be 671109864Sjeff * sleeping or similar. 672109864Sjeff */ 673111585Sjulian if (td->td_proc->p_flag & P_THREADED) 674109864Sjeff kse_reassign(ke); 675109864Sjeff} 676109864Sjeff 677109864Sjeffvoid 678109864Sjeffsched_switchin(struct thread *td) 679109864Sjeff{ 680109864Sjeff /* struct kse *ke = td->td_kse; */ 681109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 682109864Sjeff 683113339Sjulian td->td_oncpu = PCPU_GET(cpuid); 684113357Sjeff 685109864Sjeff if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 686109864Sjeff td->td_priority != td->td_ksegrp->kg_user_pri) 687111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 688109864Sjeff} 689109864Sjeff 690109864Sjeffvoid 691109864Sjeffsched_nice(struct ksegrp *kg, int nice) 692109864Sjeff{ 693113357Sjeff struct kse *ke; 694109864Sjeff struct thread *td; 695113357Sjeff struct kseq *kseq; 696109864Sjeff 697113357Sjeff /* 698113357Sjeff * We need to adjust the nice counts for running KSEs. 699113357Sjeff */ 700113357Sjeff if (kg->kg_pri_class == PRI_TIMESHARE) 701113357Sjeff FOREACH_KSE_IN_GROUP(kg, ke) { 702113357Sjeff if (ke->ke_state != KES_ONRUNQ && 703113357Sjeff ke->ke_state != KES_THREAD) 704113357Sjeff continue; 705113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 706113357Sjeff kseq_nice_rem(kseq, kg->kg_nice); 707113357Sjeff kseq_nice_add(kseq, nice); 708113357Sjeff } 709109864Sjeff kg->kg_nice = nice; 710109864Sjeff sched_priority(kg); 711113357Sjeff FOREACH_THREAD_IN_GROUP(kg, td) 712111032Sjulian td->td_flags |= TDF_NEEDRESCHED; 713109864Sjeff} 714109864Sjeff 715109864Sjeffvoid 716109864Sjeffsched_sleep(struct thread *td, u_char prio) 717109864Sjeff{ 718109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 719109864Sjeff 720109864Sjeff td->td_slptime = ticks; 721109864Sjeff td->td_priority = prio; 722109864Sjeff 723113357Sjeff CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 724113357Sjeff td->td_kse, td->td_slptime); 725109864Sjeff} 726109864Sjeff 727109864Sjeffvoid 728109864Sjeffsched_wakeup(struct thread *td) 729109864Sjeff{ 730109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 731109864Sjeff 732109864Sjeff /* 733109864Sjeff * Let the kseg know how long we slept for. This is because process 734109864Sjeff * interactivity behavior is modeled in the kseg. 735109864Sjeff */ 736111788Sjeff if (td->td_slptime) { 737111788Sjeff struct ksegrp *kg; 738113357Sjeff int hzticks; 739109864Sjeff 740111788Sjeff kg = td->td_ksegrp; 741113357Sjeff hzticks = ticks - td->td_slptime; 742113357Sjeff kg->kg_slptime += hzticks << 10; 743111788Sjeff sched_priority(kg); 744113357Sjeff CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 745113357Sjeff td->td_kse, hzticks); 746111788Sjeff td->td_slptime = 0; 747109864Sjeff } 748109864Sjeff setrunqueue(td); 749109864Sjeff if (td->td_priority < curthread->td_priority) 750111032Sjulian curthread->td_flags |= TDF_NEEDRESCHED; 751109864Sjeff} 752109864Sjeff 753109864Sjeff/* 754109864Sjeff * Penalize the parent for creating a new child and initialize the child's 755109864Sjeff * priority. 756109864Sjeff */ 757109864Sjeffvoid 758113357Sjeffsched_fork(struct proc *p, struct proc *p1) 759109864Sjeff{ 760109864Sjeff 761109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 762109864Sjeff 763113357Sjeff sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 764113357Sjeff sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 765113357Sjeff sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 766113357Sjeff} 767113357Sjeff 768113357Sjeffvoid 769113357Sjeffsched_fork_kse(struct kse *ke, struct kse *child) 770113357Sjeff{ 771113357Sjeff child->ke_slice = ke->ke_slice; 772113357Sjeff child->ke_cpu = ke->ke_cpu; /* sched_pickcpu(); */ 773113357Sjeff child->ke_runq = NULL; 774113357Sjeff 775113357Sjeff /* 776113357Sjeff * Claim that we've been running for one second for statistical 777113357Sjeff * purposes. 778113357Sjeff */ 779113357Sjeff child->ke_ticks = 0; 780113357Sjeff child->ke_ltick = ticks; 781113357Sjeff child->ke_ftick = ticks - hz; 782113357Sjeff} 783113357Sjeff 784113357Sjeffvoid 785113357Sjeffsched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 786113357Sjeff{ 787109864Sjeff /* XXX Need something better here */ 788110645Sjeff if (kg->kg_slptime > kg->kg_runtime) { 789111857Sjeff child->kg_slptime = SCHED_DYN_RANGE; 790111857Sjeff child->kg_runtime = kg->kg_slptime / SCHED_DYN_RANGE; 791110645Sjeff } else { 792111857Sjeff child->kg_runtime = SCHED_DYN_RANGE; 793111857Sjeff child->kg_slptime = kg->kg_runtime / SCHED_DYN_RANGE; 794110645Sjeff } 795113357Sjeff 796109864Sjeff child->kg_user_pri = kg->kg_user_pri; 797113357Sjeff child->kg_nice = kg->kg_nice; 798113357Sjeff} 799109864Sjeff 800113357Sjeffvoid 801113357Sjeffsched_fork_thread(struct thread *td, struct thread *child) 802113357Sjeff{ 803113357Sjeff} 804113357Sjeff 805113357Sjeffvoid 806113357Sjeffsched_class(struct ksegrp *kg, int class) 807113357Sjeff{ 808113357Sjeff struct kseq *kseq; 809113357Sjeff struct kse *ke; 810113357Sjeff 811113357Sjeff if (kg->kg_pri_class == class) 812113357Sjeff return; 813113357Sjeff 814113357Sjeff FOREACH_KSE_IN_GROUP(kg, ke) { 815113357Sjeff if (ke->ke_state != KES_ONRUNQ && 816113357Sjeff ke->ke_state != KES_THREAD) 817113357Sjeff continue; 818113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 819113357Sjeff 820113357Sjeff kseq->ksq_loads[kg->kg_pri_class]--; 821113357Sjeff kseq->ksq_loads[class]++; 822113357Sjeff 823113357Sjeff if (kg->kg_pri_class == PRI_TIMESHARE) 824113357Sjeff kseq_nice_rem(kseq, kg->kg_nice); 825113357Sjeff else if (class == PRI_TIMESHARE) 826113357Sjeff kseq_nice_add(kseq, kg->kg_nice); 827109970Sjeff } 828109970Sjeff 829113357Sjeff kg->kg_pri_class = class; 830109864Sjeff} 831109864Sjeff 832109864Sjeff/* 833109864Sjeff * Return some of the child's priority and interactivity to the parent. 834109864Sjeff */ 835109864Sjeffvoid 836113357Sjeffsched_exit(struct proc *p, struct proc *child) 837109864Sjeff{ 838113357Sjeff struct ksegrp *kg; 839113357Sjeff struct kse *ke; 840113357Sjeff 841109864Sjeff /* XXX Need something better here */ 842109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 843113357Sjeff kg = FIRST_KSEGRP_IN_PROC(child); 844113357Sjeff ke = FIRST_KSE_IN_KSEGRP(kg); 845113357Sjeff kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 846109864Sjeff} 847109864Sjeff 848109864Sjeffvoid 849113357Sjeffsched_clock(struct kse *ke) 850109864Sjeff{ 851113357Sjeff struct kseq *kseq; 852113357Sjeff struct ksegrp *kg; 853113357Sjeff struct thread *td; 854113357Sjeff#if 0 855109864Sjeff struct kse *nke; 856110267Sjeff#endif 857109864Sjeff 858113357Sjeff /* 859113357Sjeff * sched_setup() apparently happens prior to stathz being set. We 860113357Sjeff * need to resolve the timers earlier in the boot so we can avoid 861113357Sjeff * calculating this here. 862113357Sjeff */ 863113357Sjeff if (realstathz == 0) { 864113357Sjeff realstathz = stathz ? stathz : hz; 865113357Sjeff tickincr = hz / realstathz; 866113357Sjeff /* 867113357Sjeff * XXX This does not work for values of stathz that are much 868113357Sjeff * larger than hz. 869113357Sjeff */ 870113357Sjeff if (tickincr == 0) 871113357Sjeff tickincr = 1; 872113357Sjeff } 873109864Sjeff 874113357Sjeff td = ke->ke_thread; 875113357Sjeff kg = ke->ke_ksegrp; 876109864Sjeff 877110028Sjeff mtx_assert(&sched_lock, MA_OWNED); 878110028Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 879110028Sjeff 880110028Sjeff /* Adjust ticks for pctcpu */ 881111793Sjeff ke->ke_ticks++; 882109971Sjeff ke->ke_ltick = ticks; 883112994Sjeff 884109971Sjeff /* Go up to one second beyond our max and then trim back down */ 885109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 886109971Sjeff sched_pctcpu_update(ke); 887109971Sjeff 888110028Sjeff if (td->td_kse->ke_flags & KEF_IDLEKSE) 889109864Sjeff return; 890110028Sjeff 891113357Sjeff CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 892113357Sjeff ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 893113357Sjeff 894110028Sjeff /* 895113357Sjeff * We only do slicing code for TIMESHARE ksegrps. 896113357Sjeff */ 897113357Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 898113357Sjeff return; 899113357Sjeff /* 900110028Sjeff * Check for a higher priority task on the run queue. This can happen 901110028Sjeff * on SMP if another processor woke up a process on our runq. 902110028Sjeff */ 903110028Sjeff kseq = KSEQ_SELF(); 904113357Sjeff#if 0 905113357Sjeff if (kseq->ksq_load > 1 && (nke = kseq_choose(kseq)) != NULL) { 906113357Sjeff if (sched_strict && 907113357Sjeff nke->ke_thread->td_priority < td->td_priority) 908113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 909113357Sjeff else if (nke->ke_thread->td_priority < 910113357Sjeff td->td_priority SCHED_PRIO_SLOP) 911113357Sjeff 912113357Sjeff if (nke->ke_thread->td_priority < td->td_priority) 913113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 914113357Sjeff } 915110267Sjeff#endif 916109864Sjeff /* 917110645Sjeff * We used a tick charge it to the ksegrp so that we can compute our 918113357Sjeff * interactivity. 919109864Sjeff */ 920113357Sjeff kg->kg_runtime += tickincr << 10; 921110645Sjeff 922109864Sjeff /* 923109864Sjeff * We used up one time slice. 924109864Sjeff */ 925109864Sjeff ke->ke_slice--; 926113357Sjeff#ifdef SMP 927113357Sjeff kseq->ksq_rslice--; 928113357Sjeff#endif 929113357Sjeff 930113357Sjeff if (ke->ke_slice > 0) 931113357Sjeff return; 932109864Sjeff /* 933113357Sjeff * We're out of time, recompute priorities and requeue. 934109864Sjeff */ 935113357Sjeff kseq_rem(kseq, ke); 936113357Sjeff sched_priority(kg); 937113357Sjeff sched_slice(ke); 938113357Sjeff if (SCHED_CURR(kg, ke)) 939113357Sjeff ke->ke_runq = kseq->ksq_curr; 940113357Sjeff else 941113357Sjeff ke->ke_runq = kseq->ksq_next; 942113357Sjeff kseq_add(kseq, ke); 943113357Sjeff td->td_flags |= TDF_NEEDRESCHED; 944109864Sjeff} 945109864Sjeff 946109864Sjeffint 947109864Sjeffsched_runnable(void) 948109864Sjeff{ 949109864Sjeff struct kseq *kseq; 950109864Sjeff 951110028Sjeff kseq = KSEQ_SELF(); 952109864Sjeff 953113357Sjeff if (kseq->ksq_load) 954109970Sjeff return (1); 955109970Sjeff#ifdef SMP 956110028Sjeff /* 957110028Sjeff * For SMP we may steal other processor's KSEs. Just search until we 958110028Sjeff * verify that at least on other cpu has a runnable task. 959110028Sjeff */ 960109970Sjeff if (smp_started) { 961109970Sjeff int i; 962109970Sjeff 963109970Sjeff for (i = 0; i < mp_maxid; i++) { 964109970Sjeff if (CPU_ABSENT(i)) 965109970Sjeff continue; 966110028Sjeff kseq = KSEQ_CPU(i); 967113357Sjeff if (kseq->ksq_load) 968109970Sjeff return (1); 969109970Sjeff } 970109970Sjeff } 971109970Sjeff#endif 972109970Sjeff return (0); 973109864Sjeff} 974109864Sjeff 975109864Sjeffvoid 976109864Sjeffsched_userret(struct thread *td) 977109864Sjeff{ 978109864Sjeff struct ksegrp *kg; 979109864Sjeff 980109864Sjeff kg = td->td_ksegrp; 981109864Sjeff 982109864Sjeff if (td->td_priority != kg->kg_user_pri) { 983109864Sjeff mtx_lock_spin(&sched_lock); 984109864Sjeff td->td_priority = kg->kg_user_pri; 985109864Sjeff mtx_unlock_spin(&sched_lock); 986109864Sjeff } 987109864Sjeff} 988109864Sjeff 989109864Sjeffstruct kse * 990109970Sjeffsched_choose(void) 991109970Sjeff{ 992110028Sjeff struct kseq *kseq; 993109970Sjeff struct kse *ke; 994113357Sjeff#ifdef SMP 995113357Sjeff int steal; 996109970Sjeff 997113357Sjeff steal = 0; 998113357Sjeff#endif 999113357Sjeff 1000110028Sjeff kseq = KSEQ_SELF(); 1001113357Sjeff#ifdef SMP 1002112966Sjeffretry: 1003113357Sjeff#endif 1004110028Sjeff ke = kseq_choose(kseq); 1005109864Sjeff if (ke) { 1006113357Sjeff runq_remove(ke->ke_runq, ke); 1007109864Sjeff ke->ke_state = KES_THREAD; 1008112966Sjeff 1009113357Sjeff if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 1010113357Sjeff CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 1011113357Sjeff ke, ke->ke_runq, ke->ke_slice, 1012113357Sjeff ke->ke_thread->td_priority); 1013113357Sjeff } 1014113357Sjeff#ifdef SMP 1015112966Sjeff /* 1016113357Sjeff * If we've stolen this thread we need to kill the pointer 1017113357Sjeff * to the run queue and reset the cpu id. 1018112966Sjeff */ 1019113357Sjeff if (steal) { 1020113357Sjeff kseq_rem(kseq, ke); 1021113357Sjeff ke->ke_cpu = PCPU_GET(cpuid); 1022113357Sjeff kseq_add(KSEQ_SELF(), ke); 1023112966Sjeff } 1024113357Sjeff#endif 1025113357Sjeff return (ke); 1026109864Sjeff } 1027109864Sjeff 1028109970Sjeff#ifdef SMP 1029109970Sjeff if (ke == NULL && smp_started) { 1030109970Sjeff /* 1031109970Sjeff * Find the cpu with the highest load and steal one proc. 1032109970Sjeff */ 1033113357Sjeff steal = 1; 1034113357Sjeff if ((kseq = kseq_load_highest()) != NULL) 1035113357Sjeff goto retry; 1036109970Sjeff } 1037109970Sjeff#endif 1038113357Sjeff 1039113357Sjeff return (NULL); 1040109864Sjeff} 1041109864Sjeff 1042109864Sjeffvoid 1043109864Sjeffsched_add(struct kse *ke) 1044109864Sjeff{ 1045110267Sjeff struct kseq *kseq; 1046113357Sjeff struct ksegrp *kg; 1047109864Sjeff 1048109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1049110267Sjeff KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 1050109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 1051110267Sjeff ("sched_add: No KSE on thread")); 1052109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 1053110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 1054109864Sjeff ke->ke_proc->p_comm)); 1055109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1056110267Sjeff ("sched_add: process swapped out")); 1057109864Sjeff 1058113357Sjeff kg = ke->ke_ksegrp; 1059113357Sjeff 1060113357Sjeff if (ke->ke_runq) 1061113357Sjeff Debugger("hrm?"); 1062113357Sjeff 1063113357Sjeff switch (kg->kg_pri_class) { 1064112994Sjeff case PRI_ITHD: 1065112994Sjeff case PRI_REALTIME: 1066112994Sjeff kseq = KSEQ_SELF(); 1067113357Sjeff if (ke->ke_runq == NULL) 1068113357Sjeff kseq_add(kseq, ke); 1069113357Sjeff ke->ke_runq = kseq->ksq_curr; 1070113357Sjeff ke->ke_slice = SCHED_SLICE_MAX; 1071112994Sjeff break; 1072112994Sjeff case PRI_TIMESHARE: 1073113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1074113357Sjeff if (ke->ke_runq == NULL) { 1075113357Sjeff if (SCHED_CURR(kg, ke)) 1076113357Sjeff ke->ke_runq = kseq->ksq_curr; 1077113357Sjeff else 1078113357Sjeff ke->ke_runq = kseq->ksq_next; 1079113357Sjeff kseq_add(kseq, ke); 1080113357Sjeff } 1081113357Sjeff break; 1082112994Sjeff case PRI_IDLE: 1083111789Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1084113357Sjeff 1085113357Sjeff if (ke->ke_runq == NULL) 1086113357Sjeff kseq_add(kseq, ke); 1087113357Sjeff /* 1088113357Sjeff * This is for priority prop. 1089113357Sjeff */ 1090113357Sjeff if (ke->ke_thread->td_priority < PRI_MAX_TIMESHARE) 1091113357Sjeff ke->ke_runq = kseq->ksq_curr; 1092113357Sjeff else 1093113357Sjeff ke->ke_runq = &kseq->ksq_idle; 1094113357Sjeff ke->ke_slice = SCHED_SLICE_MIN; 1095112994Sjeff break; 1096113357Sjeff default: 1097113357Sjeff panic("Unknown pri class.\n"); 1098113357Sjeff break; 1099112994Sjeff } 1100109864Sjeff 1101109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 1102109864Sjeff ke->ke_state = KES_ONRUNQ; 1103109864Sjeff 1104113357Sjeff runq_add(ke->ke_runq, ke); 1105109864Sjeff} 1106109864Sjeff 1107109864Sjeffvoid 1108109864Sjeffsched_rem(struct kse *ke) 1109109864Sjeff{ 1110113357Sjeff struct kseq *kseq; 1111113357Sjeff 1112109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 1113109864Sjeff /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 1114113357Sjeff panic("WTF\n"); 1115109864Sjeff 1116109864Sjeff ke->ke_state = KES_THREAD; 1117109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 1118113357Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 1119113357Sjeff runq_remove(ke->ke_runq, ke); 1120113357Sjeff kseq_rem(kseq, ke); 1121109864Sjeff} 1122109864Sjeff 1123109864Sjefffixpt_t 1124109864Sjeffsched_pctcpu(struct kse *ke) 1125109864Sjeff{ 1126109864Sjeff fixpt_t pctcpu; 1127109864Sjeff 1128109864Sjeff pctcpu = 0; 1129109864Sjeff 1130109864Sjeff if (ke->ke_ticks) { 1131109864Sjeff int rtick; 1132109864Sjeff 1133109864Sjeff /* Update to account for time potentially spent sleeping */ 1134109864Sjeff ke->ke_ltick = ticks; 1135109864Sjeff sched_pctcpu_update(ke); 1136109864Sjeff 1137109864Sjeff /* How many rtick per second ? */ 1138111793Sjeff rtick = ke->ke_ticks / SCHED_CPU_TIME; 1139110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1140109864Sjeff } 1141109864Sjeff 1142109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1143109864Sjeff 1144109864Sjeff return (pctcpu); 1145109864Sjeff} 1146109864Sjeff 1147109864Sjeffint 1148109864Sjeffsched_sizeof_kse(void) 1149109864Sjeff{ 1150109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 1151109864Sjeff} 1152109864Sjeff 1153109864Sjeffint 1154109864Sjeffsched_sizeof_ksegrp(void) 1155109864Sjeff{ 1156109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1157109864Sjeff} 1158109864Sjeff 1159109864Sjeffint 1160109864Sjeffsched_sizeof_proc(void) 1161109864Sjeff{ 1162109864Sjeff return (sizeof(struct proc)); 1163109864Sjeff} 1164109864Sjeff 1165109864Sjeffint 1166109864Sjeffsched_sizeof_thread(void) 1167109864Sjeff{ 1168109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 1169109864Sjeff} 1170