sched_ule.c revision 109971
1109864Sjeff/*- 2109864Sjeff * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff * 26109864Sjeff * $FreeBSD: head/sys/kern/sched_ule.c 109971 2003-01-28 09:30:17Z jeff $ 27109864Sjeff */ 28109864Sjeff 29109864Sjeff#include <sys/param.h> 30109864Sjeff#include <sys/systm.h> 31109864Sjeff#include <sys/kernel.h> 32109864Sjeff#include <sys/ktr.h> 33109864Sjeff#include <sys/lock.h> 34109864Sjeff#include <sys/mutex.h> 35109864Sjeff#include <sys/proc.h> 36109864Sjeff#include <sys/sched.h> 37109864Sjeff#include <sys/smp.h> 38109864Sjeff#include <sys/sx.h> 39109864Sjeff#include <sys/sysctl.h> 40109864Sjeff#include <sys/sysproto.h> 41109864Sjeff#include <sys/vmmeter.h> 42109864Sjeff#ifdef DDB 43109864Sjeff#include <ddb/ddb.h> 44109864Sjeff#endif 45109864Sjeff#ifdef KTRACE 46109864Sjeff#include <sys/uio.h> 47109864Sjeff#include <sys/ktrace.h> 48109864Sjeff#endif 49109864Sjeff 50109864Sjeff#include <machine/cpu.h> 51109864Sjeff 52109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53109864Sjeff/* XXX This is bogus compatability crap for ps */ 54109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56109864Sjeff 57109864Sjeffstatic void sched_setup(void *dummy); 58109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59109864Sjeff 60109864Sjeff/* 61109864Sjeff * These datastructures are allocated within their parent datastructure but 62109864Sjeff * are scheduler specific. 63109864Sjeff */ 64109864Sjeff 65109864Sjeffstruct ke_sched { 66109864Sjeff int ske_slice; 67109864Sjeff struct runq *ske_runq; 68109864Sjeff /* The following variables are only used for pctcpu calculation */ 69109864Sjeff int ske_ltick; /* Last tick that we were running on */ 70109864Sjeff int ske_ftick; /* First tick that we were running on */ 71109864Sjeff int ske_ticks; /* Tick count */ 72109864Sjeff}; 73109864Sjeff#define ke_slice ke_sched->ske_slice 74109864Sjeff#define ke_runq ke_sched->ske_runq 75109864Sjeff#define ke_ltick ke_sched->ske_ltick 76109864Sjeff#define ke_ftick ke_sched->ske_ftick 77109864Sjeff#define ke_ticks ke_sched->ske_ticks 78109864Sjeff 79109864Sjeffstruct kg_sched { 80109864Sjeff int skg_slptime; 81109864Sjeff}; 82109864Sjeff#define kg_slptime kg_sched->skg_slptime 83109864Sjeff 84109864Sjeffstruct td_sched { 85109864Sjeff int std_slptime; 86109864Sjeff}; 87109864Sjeff#define td_slptime td_sched->std_slptime 88109864Sjeff 89109864Sjeffstruct ke_sched ke_sched; 90109864Sjeffstruct kg_sched kg_sched; 91109864Sjeffstruct td_sched td_sched; 92109864Sjeff 93109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 94109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 95109864Sjeffstruct p_sched *proc0_sched = NULL; 96109864Sjeffstruct td_sched *thread0_sched = &td_sched; 97109864Sjeff 98109864Sjeff/* 99109864Sjeff * This priority range has 20 priorities on either end that are reachable 100109864Sjeff * only through nice values. 101109864Sjeff */ 102109864Sjeff#define SCHED_PRI_NRESV 40 103109864Sjeff#define SCHED_PRI_RANGE ((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) - \ 104109864Sjeff SCHED_PRI_NRESV) 105109864Sjeff 106109864Sjeff/* 107109864Sjeff * These determine how sleep time effects the priority of a process. 108109864Sjeff * 109109864Sjeff * SLP_MAX: Maximum amount of accrued sleep time. 110109864Sjeff * SLP_SCALE: Scale the number of ticks slept across the dynamic priority 111109864Sjeff * range. 112109864Sjeff * SLP_TOPRI: Convert a number of ticks slept into a priority value. 113109864Sjeff * SLP_DECAY: Reduce the sleep time to 50% for every granted slice. 114109864Sjeff */ 115109864Sjeff#define SCHED_SLP_MAX (hz * 2) 116109864Sjeff#define SCHED_SLP_SCALE(slp) (((slp) * SCHED_PRI_RANGE) / SCHED_SLP_MAX) 117109864Sjeff#define SCHED_SLP_TOPRI(slp) (SCHED_PRI_RANGE - SCHED_SLP_SCALE((slp)) + \ 118109864Sjeff SCHED_PRI_NRESV / 2) 119109864Sjeff#define SCHED_SLP_DECAY(slp) ((slp) / 2) /* XXX Multiple kses break */ 120109864Sjeff 121109864Sjeff/* 122109864Sjeff * These parameters and macros determine the size of the time slice that is 123109864Sjeff * granted to each thread. 124109864Sjeff * 125109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 126109864Sjeff * SLICE_MAX: Maximum time slice granted. 127109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 128109864Sjeff * SLICE_SCALE: The number slices granted per unit of pri or slp. 129109864Sjeff * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 130109864Sjeff * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 131109864Sjeff * amount of time slept. (smaller slices for interactive ksegs) 132109864Sjeff * PRI_COMP: This determines what fraction of the actual slice comes from 133109864Sjeff * the slice size computed from the priority. 134109864Sjeff * SLP_COMP: This determines what component of the actual slice comes from 135109864Sjeff * the slize size computed from the sleep time. 136109864Sjeff */ 137109864Sjeff#define SCHED_SLICE_MIN (hz / 100) 138109864Sjeff#define SCHED_SLICE_MAX (hz / 10) 139109864Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 140109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 141109864Sjeff#define SCHED_PRI_TOSLICE(pri) \ 142109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 143109864Sjeff#define SCHED_SLP_TOSLICE(slp) \ 144109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_SLP_MAX)) 145109864Sjeff#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 146109864Sjeff#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 147109864Sjeff 148109864Sjeff/* 149109864Sjeff * This macro determines whether or not the kse belongs on the current or 150109864Sjeff * next run queue. 151109864Sjeff */ 152109864Sjeff#define SCHED_CURR(kg) ((kg)->kg_slptime > (hz / 4) || \ 153109864Sjeff (kg)->kg_pri_class != PRI_TIMESHARE) 154109864Sjeff 155109864Sjeff/* 156109864Sjeff * Cpu percentage computation macros and defines. 157109864Sjeff * 158109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 159109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 160109864Sjeff */ 161109864Sjeff 162109864Sjeff#define SCHED_CPU_TIME 60 163109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 164109864Sjeff 165109864Sjeff/* 166109864Sjeff * kseq - pair of runqs per processor 167109864Sjeff */ 168109864Sjeff 169109864Sjeffstruct kseq { 170109864Sjeff struct runq ksq_runqs[2]; 171109864Sjeff struct runq *ksq_curr; 172109864Sjeff struct runq *ksq_next; 173109864Sjeff int ksq_load; /* Total runnable */ 174109864Sjeff}; 175109864Sjeff 176109864Sjeff/* 177109864Sjeff * One kse queue per processor. 178109864Sjeff */ 179109864Sjeffstruct kseq kseq_cpu[MAXCPU]; 180109864Sjeff 181109864Sjeffstatic int sched_slice(struct ksegrp *kg); 182109864Sjeffstatic int sched_priority(struct ksegrp *kg); 183109864Sjeffvoid sched_pctcpu_update(struct kse *ke); 184109970Sjeffvoid sched_check_runqs(void); 185109864Sjeffint sched_pickcpu(void); 186109864Sjeff 187109864Sjeffstatic void 188109864Sjeffsched_setup(void *dummy) 189109864Sjeff{ 190109864Sjeff int i; 191109864Sjeff 192109864Sjeff mtx_lock_spin(&sched_lock); 193109864Sjeff /* init kseqs */ 194109864Sjeff for (i = 0; i < MAXCPU; i++) { 195109864Sjeff kseq_cpu[i].ksq_load = 0; 196109864Sjeff kseq_cpu[i].ksq_curr = &kseq_cpu[i].ksq_runqs[0]; 197109864Sjeff kseq_cpu[i].ksq_next = &kseq_cpu[i].ksq_runqs[1]; 198109864Sjeff runq_init(kseq_cpu[i].ksq_curr); 199109864Sjeff runq_init(kseq_cpu[i].ksq_next); 200109864Sjeff } 201109864Sjeff mtx_unlock_spin(&sched_lock); 202109864Sjeff} 203109864Sjeff 204109864Sjeff/* 205109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 206109864Sjeff * process. 207109864Sjeff */ 208109864Sjeffstatic int 209109864Sjeffsched_priority(struct ksegrp *kg) 210109864Sjeff{ 211109864Sjeff int pri; 212109864Sjeff 213109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 214109864Sjeff return (kg->kg_user_pri); 215109864Sjeff 216109864Sjeff pri = SCHED_SLP_TOPRI(kg->kg_slptime); 217109864Sjeff CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 218109864Sjeff kg->kg_slptime, pri); 219109864Sjeff 220109864Sjeff pri += PRI_MIN_TIMESHARE; 221109864Sjeff pri += kg->kg_nice; 222109864Sjeff 223109864Sjeff if (pri > PRI_MAX_TIMESHARE) 224109864Sjeff pri = PRI_MAX_TIMESHARE; 225109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 226109864Sjeff pri = PRI_MIN_TIMESHARE; 227109864Sjeff 228109864Sjeff kg->kg_user_pri = pri; 229109864Sjeff 230109864Sjeff return (kg->kg_user_pri); 231109864Sjeff} 232109864Sjeff 233109864Sjeff/* 234109864Sjeff * Calculate a time slice based on the process priority. 235109864Sjeff */ 236109864Sjeffstatic int 237109864Sjeffsched_slice(struct ksegrp *kg) 238109864Sjeff{ 239109864Sjeff int pslice; 240109864Sjeff int sslice; 241109864Sjeff int slice; 242109864Sjeff int pri; 243109864Sjeff 244109864Sjeff pri = kg->kg_user_pri; 245109864Sjeff pri -= PRI_MIN_TIMESHARE; 246109864Sjeff pslice = SCHED_PRI_TOSLICE(pri); 247109864Sjeff sslice = SCHED_SLP_TOSLICE(kg->kg_slptime); 248109864Sjeff slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 249109864Sjeff kg->kg_slptime = SCHED_SLP_DECAY(kg->kg_slptime); 250109864Sjeff 251109864Sjeff CTR4(KTR_RUNQ, 252109864Sjeff "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 253109864Sjeff pri, sslice, pslice, slice); 254109864Sjeff 255109864Sjeff if (slice < SCHED_SLICE_MIN) 256109864Sjeff slice = SCHED_SLICE_MIN; 257109864Sjeff else if (slice > SCHED_SLICE_MAX) 258109864Sjeff slice = SCHED_SLICE_MAX; 259109864Sjeff 260109864Sjeff return (slice); 261109864Sjeff} 262109864Sjeff 263109864Sjeffint 264109864Sjeffsched_rr_interval(void) 265109864Sjeff{ 266109864Sjeff return (SCHED_SLICE_MAX); 267109864Sjeff} 268109864Sjeff 269109864Sjeffvoid 270109864Sjeffsched_pctcpu_update(struct kse *ke) 271109864Sjeff{ 272109864Sjeff /* 273109864Sjeff * Adjust counters and watermark for pctcpu calc. 274109864Sjeff */ 275109864Sjeff ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 276109864Sjeff SCHED_CPU_TICKS; 277109864Sjeff ke->ke_ltick = ticks; 278109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 279109864Sjeff} 280109864Sjeff 281109864Sjeff#ifdef SMP 282109864Sjeffint 283109864Sjeffsched_pickcpu(void) 284109864Sjeff{ 285109864Sjeff int cpu; 286109864Sjeff int load; 287109864Sjeff int i; 288109864Sjeff 289109864Sjeff if (!smp_started) 290109864Sjeff return (0); 291109864Sjeff 292109864Sjeff cpu = PCPU_GET(cpuid); 293109864Sjeff load = kseq_cpu[cpu].ksq_load; 294109864Sjeff 295109864Sjeff for (i = 0; i < mp_maxid; i++) { 296109864Sjeff if (CPU_ABSENT(i)) 297109864Sjeff continue; 298109864Sjeff if (kseq_cpu[i].ksq_load < load) { 299109864Sjeff cpu = i; 300109864Sjeff load = kseq_cpu[i].ksq_load; 301109864Sjeff } 302109864Sjeff } 303109864Sjeff 304109864Sjeff CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 305109864Sjeff return (cpu); 306109864Sjeff} 307109864Sjeff#else 308109864Sjeffint 309109864Sjeffsched_pickcpu(void) 310109864Sjeff{ 311109864Sjeff return (0); 312109864Sjeff} 313109864Sjeff#endif 314109864Sjeff 315109864Sjeffvoid 316109864Sjeffsched_prio(struct thread *td, u_char prio) 317109864Sjeff{ 318109864Sjeff struct kse *ke; 319109864Sjeff struct runq *rq; 320109864Sjeff 321109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 322109864Sjeff ke = td->td_kse; 323109864Sjeff td->td_priority = prio; 324109864Sjeff 325109864Sjeff if (TD_ON_RUNQ(td)) { 326109864Sjeff rq = ke->ke_runq; 327109864Sjeff 328109864Sjeff runq_remove(rq, ke); 329109864Sjeff runq_add(rq, ke); 330109864Sjeff } 331109864Sjeff} 332109864Sjeff 333109864Sjeffvoid 334109864Sjeffsched_switchout(struct thread *td) 335109864Sjeff{ 336109864Sjeff struct kse *ke; 337109864Sjeff 338109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 339109864Sjeff 340109864Sjeff ke = td->td_kse; 341109864Sjeff 342109864Sjeff td->td_last_kse = ke; 343109864Sjeff td->td_lastcpu = ke->ke_oncpu; 344109864Sjeff ke->ke_flags &= ~KEF_NEEDRESCHED; 345109864Sjeff 346109864Sjeff if (TD_IS_RUNNING(td)) { 347109864Sjeff setrunqueue(td); 348109864Sjeff return; 349109864Sjeff } else 350109864Sjeff td->td_kse->ke_runq = NULL; 351109864Sjeff 352109864Sjeff /* 353109864Sjeff * We will not be on the run queue. So we must be 354109864Sjeff * sleeping or similar. 355109864Sjeff */ 356109864Sjeff if (td->td_proc->p_flag & P_KSES) 357109864Sjeff kse_reassign(ke); 358109864Sjeff} 359109864Sjeff 360109864Sjeffvoid 361109864Sjeffsched_switchin(struct thread *td) 362109864Sjeff{ 363109864Sjeff /* struct kse *ke = td->td_kse; */ 364109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 365109864Sjeff 366109864Sjeff td->td_kse->ke_oncpu = PCPU_GET(cpuid); /* XXX */ 367109864Sjeff if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 368109864Sjeff td->td_priority != td->td_ksegrp->kg_user_pri) 369109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 370109864Sjeff} 371109864Sjeff 372109864Sjeffvoid 373109864Sjeffsched_nice(struct ksegrp *kg, int nice) 374109864Sjeff{ 375109864Sjeff struct thread *td; 376109864Sjeff 377109864Sjeff kg->kg_nice = nice; 378109864Sjeff sched_priority(kg); 379109864Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 380109864Sjeff td->td_kse->ke_flags |= KEF_NEEDRESCHED; 381109864Sjeff } 382109864Sjeff} 383109864Sjeff 384109864Sjeffvoid 385109864Sjeffsched_sleep(struct thread *td, u_char prio) 386109864Sjeff{ 387109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 388109864Sjeff 389109864Sjeff td->td_slptime = ticks; 390109864Sjeff td->td_priority = prio; 391109864Sjeff 392109864Sjeff /* 393109864Sjeff * If this is an interactive task clear its queue so it moves back 394109864Sjeff * on to curr when it wakes up. Otherwise let it stay on the queue 395109864Sjeff * that it was assigned to. 396109864Sjeff */ 397109864Sjeff if (SCHED_CURR(td->td_kse->ke_ksegrp)) 398109864Sjeff td->td_kse->ke_runq = NULL; 399109864Sjeff} 400109864Sjeff 401109864Sjeffvoid 402109864Sjeffsched_wakeup(struct thread *td) 403109864Sjeff{ 404109864Sjeff struct ksegrp *kg; 405109864Sjeff 406109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 407109864Sjeff 408109864Sjeff /* 409109864Sjeff * Let the kseg know how long we slept for. This is because process 410109864Sjeff * interactivity behavior is modeled in the kseg. 411109864Sjeff */ 412109864Sjeff kg = td->td_ksegrp; 413109864Sjeff 414109864Sjeff if (td->td_slptime) { 415109864Sjeff kg->kg_slptime += ticks - td->td_slptime; 416109864Sjeff if (kg->kg_slptime > SCHED_SLP_MAX) 417109864Sjeff kg->kg_slptime = SCHED_SLP_MAX; 418109864Sjeff td->td_priority = sched_priority(kg); 419109864Sjeff } 420109864Sjeff td->td_slptime = 0; 421109864Sjeff setrunqueue(td); 422109864Sjeff if (td->td_priority < curthread->td_priority) 423109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 424109864Sjeff} 425109864Sjeff 426109864Sjeff/* 427109864Sjeff * Penalize the parent for creating a new child and initialize the child's 428109864Sjeff * priority. 429109864Sjeff */ 430109864Sjeffvoid 431109864Sjeffsched_fork(struct ksegrp *kg, struct ksegrp *child) 432109864Sjeff{ 433109864Sjeff struct kse *ckse; 434109864Sjeff struct kse *pkse; 435109864Sjeff 436109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 437109864Sjeff ckse = FIRST_KSE_IN_KSEGRP(child); 438109864Sjeff pkse = FIRST_KSE_IN_KSEGRP(kg); 439109864Sjeff 440109864Sjeff /* XXX Need something better here */ 441109864Sjeff child->kg_slptime = kg->kg_slptime; 442109864Sjeff child->kg_user_pri = kg->kg_user_pri; 443109864Sjeff 444109970Sjeff if (pkse->ke_oncpu != PCPU_GET(cpuid)) { 445109970Sjeff printf("pkse->ke_oncpu = %d\n", pkse->ke_oncpu); 446109970Sjeff printf("cpuid = %d", PCPU_GET(cpuid)); 447109970Sjeff Debugger("stop"); 448109970Sjeff } 449109970Sjeff 450109864Sjeff ckse->ke_slice = pkse->ke_slice; 451109970Sjeff ckse->ke_oncpu = pkse->ke_oncpu; /* sched_pickcpu(); */ 452109864Sjeff ckse->ke_runq = NULL; 453109864Sjeff /* 454109864Sjeff * Claim that we've been running for one second for statistical 455109864Sjeff * purposes. 456109864Sjeff */ 457109864Sjeff ckse->ke_ticks = 0; 458109864Sjeff ckse->ke_ltick = ticks; 459109864Sjeff ckse->ke_ftick = ticks - hz; 460109864Sjeff} 461109864Sjeff 462109864Sjeff/* 463109864Sjeff * Return some of the child's priority and interactivity to the parent. 464109864Sjeff */ 465109864Sjeffvoid 466109864Sjeffsched_exit(struct ksegrp *kg, struct ksegrp *child) 467109864Sjeff{ 468109864Sjeff struct kseq *kseq; 469109864Sjeff struct kse *ke; 470109864Sjeff 471109864Sjeff /* XXX Need something better here */ 472109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 473109864Sjeff kg->kg_slptime = child->kg_slptime; 474109864Sjeff sched_priority(kg); 475109864Sjeff 476109864Sjeff /* 477109864Sjeff * We drop the load here so that the running process leaves us with a 478109864Sjeff * load of at least one. 479109864Sjeff */ 480109864Sjeff ke = FIRST_KSE_IN_KSEGRP(kg); 481109864Sjeff kseq = &kseq_cpu[ke->ke_oncpu]; 482109864Sjeff} 483109864Sjeff 484109864Sjeffint sched_clock_switches; 485109864Sjeff 486109864Sjeffvoid 487109864Sjeffsched_clock(struct thread *td) 488109864Sjeff{ 489109864Sjeff struct kse *ke; 490109970Sjeff#if 0 491109864Sjeff struct kse *nke; 492109970Sjeff#endif 493109864Sjeff struct ksegrp *kg; 494109864Sjeff struct kseq *kseq; 495109864Sjeff int cpu; 496109864Sjeff 497109864Sjeff cpu = PCPU_GET(cpuid); 498109864Sjeff kseq = &kseq_cpu[cpu]; 499109864Sjeff 500109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 501109864Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 502109864Sjeff ke = td->td_kse; 503109864Sjeff kg = td->td_ksegrp; 504109864Sjeff 505109971Sjeff ke->ke_ticks += 10000; 506109971Sjeff ke->ke_ltick = ticks; 507109971Sjeff /* Go up to one second beyond our max and then trim back down */ 508109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 509109971Sjeff sched_pctcpu_update(ke); 510109971Sjeff 511109864Sjeff if (td->td_kse->ke_flags & KEF_IDLEKSE) { 512109864Sjeff#if 0 513109864Sjeff if (nke && nke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 514109864Sjeff printf("Idle running with %s on the runq!\n", 515109864Sjeff nke->ke_proc->p_comm); 516109864Sjeff Debugger("stop"); 517109864Sjeff } 518109864Sjeff#endif 519109864Sjeff return; 520109864Sjeff } 521109970Sjeff#if 0 522109970Sjeff nke = runq_choose(kseq->ksq_curr); 523109970Sjeff 524109864Sjeff if (nke && nke->ke_thread && 525109864Sjeff nke->ke_thread->td_priority < td->td_priority) { 526109864Sjeff sched_clock_switches++; 527109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 528109864Sjeff } 529109970Sjeff#endif 530109864Sjeff 531109864Sjeff /* 532109864Sjeff * We used a tick, decrease our total sleep time. This decreases our 533109864Sjeff * "interactivity". 534109864Sjeff */ 535109864Sjeff if (kg->kg_slptime) 536109864Sjeff kg->kg_slptime--; 537109864Sjeff /* 538109864Sjeff * We used up one time slice. 539109864Sjeff */ 540109864Sjeff ke->ke_slice--; 541109864Sjeff /* 542109864Sjeff * We're out of time, recompute priorities and requeue 543109864Sjeff */ 544109864Sjeff if (ke->ke_slice == 0) { 545109864Sjeff struct kseq *kseq; 546109864Sjeff 547109864Sjeff kseq = &kseq_cpu[ke->ke_oncpu]; 548109864Sjeff 549109864Sjeff td->td_priority = sched_priority(kg); 550109864Sjeff ke->ke_slice = sched_slice(kg); 551109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 552109864Sjeff ke->ke_runq = NULL; 553109864Sjeff } 554109864Sjeff} 555109864Sjeff 556109970Sjeffvoid sched_print_load(void); 557109970Sjeff 558109970Sjeffvoid 559109970Sjeffsched_print_load(void) 560109970Sjeff{ 561109970Sjeff int cpu; 562109970Sjeff 563109970Sjeff for (cpu = 0; cpu < mp_maxid; cpu++) { 564109970Sjeff if (CPU_ABSENT(cpu)) 565109970Sjeff continue; 566109970Sjeff printf("%d: %d\n", cpu, kseq_cpu[cpu].ksq_load); 567109970Sjeff } 568109970Sjeff} 569109970Sjeff 570109864Sjeffint 571109864Sjeffsched_runnable(void) 572109864Sjeff{ 573109864Sjeff struct kseq *kseq; 574109864Sjeff int cpu; 575109864Sjeff 576109864Sjeff cpu = PCPU_GET(cpuid); 577109864Sjeff kseq = &kseq_cpu[cpu]; 578109864Sjeff 579109970Sjeff if (runq_check(kseq->ksq_curr)) 580109970Sjeff return (1); 581109970Sjeff 582109970Sjeff if (runq_check(kseq->ksq_next)) 583109970Sjeff return (1); 584109970Sjeff#ifdef SMP 585109970Sjeff if (smp_started) { 586109970Sjeff int i; 587109970Sjeff 588109970Sjeff for (i = 0; i < mp_maxid; i++) { 589109970Sjeff if (CPU_ABSENT(i)) 590109970Sjeff continue; 591109970Sjeff if (kseq_cpu[i].ksq_load && i != cpu) 592109970Sjeff return (1); 593109970Sjeff } 594109970Sjeff } 595109970Sjeff#endif 596109970Sjeff return (0); 597109864Sjeff} 598109864Sjeff 599109864Sjeffvoid 600109864Sjeffsched_userret(struct thread *td) 601109864Sjeff{ 602109864Sjeff struct ksegrp *kg; 603109864Sjeff 604109864Sjeff kg = td->td_ksegrp; 605109864Sjeff 606109864Sjeff if (td->td_priority != kg->kg_user_pri) { 607109864Sjeff mtx_lock_spin(&sched_lock); 608109864Sjeff td->td_priority = kg->kg_user_pri; 609109864Sjeff mtx_unlock_spin(&sched_lock); 610109864Sjeff } 611109864Sjeff} 612109864Sjeff 613109970Sjeffvoid 614109970Sjeffsched_check_runqs(void) 615109970Sjeff{ 616109970Sjeff struct kseq *kseq; 617109970Sjeff int cpu; 618109970Sjeff 619109970Sjeff for (cpu = 0; cpu < mp_maxid; cpu++) { 620109970Sjeff if (CPU_ABSENT(cpu)) 621109970Sjeff continue; 622109970Sjeff kseq = &kseq_cpu[cpu]; 623109970Sjeff if (kseq->ksq_load != 624109970Sjeff (runq_depth(kseq->ksq_curr) + runq_depth(kseq->ksq_next))) { 625109970Sjeff printf("CPU: %d\tload: %d\tcurr: %d\tnext: %d\n", 626109970Sjeff cpu, kseq->ksq_load, runq_depth(kseq->ksq_curr), 627109970Sjeff runq_depth(kseq->ksq_next)); 628109970Sjeff Debugger("Imbalance"); 629109970Sjeff } 630109970Sjeff } 631109970Sjeff} 632109970Sjeff 633109970Sjeffstruct kse * sched_choose_kseq(struct kseq *kseq); 634109970Sjeff 635109864Sjeffstruct kse * 636109970Sjeffsched_choose_kseq(struct kseq *kseq) 637109864Sjeff{ 638109864Sjeff struct kse *ke; 639109864Sjeff struct runq *swap; 640109864Sjeff 641109864Sjeff if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 642109864Sjeff swap = kseq->ksq_curr; 643109864Sjeff kseq->ksq_curr = kseq->ksq_next; 644109864Sjeff kseq->ksq_next = swap; 645109864Sjeff ke = runq_choose(kseq->ksq_curr); 646109864Sjeff } 647109970Sjeff 648109970Sjeff return (ke); 649109970Sjeff} 650109970Sjeff 651109970Sjeffstruct kse * 652109970Sjeffsched_choose(void) 653109970Sjeff{ 654109970Sjeff struct kse *ke; 655109970Sjeff int cpu; 656109970Sjeff 657109970Sjeff cpu = PCPU_GET(cpuid); 658109970Sjeff ke = sched_choose_kseq(&kseq_cpu[cpu]); 659109970Sjeff 660109864Sjeff if (ke) { 661109864Sjeff runq_remove(ke->ke_runq, ke); 662109864Sjeff ke->ke_state = KES_THREAD; 663109970Sjeff#ifdef SMP 664109970Sjeff kseq_cpu[cpu].ksq_load--; 665109970Sjeff#if 0 666109970Sjeff sched_check_runqs(); 667109970Sjeff#endif 668109970Sjeff#endif 669109864Sjeff } 670109864Sjeff 671109970Sjeff#ifdef SMP 672109970Sjeff if (ke == NULL && smp_started) { 673109970Sjeff int load; 674109970Sjeff int me; 675109970Sjeff int i; 676109970Sjeff 677109970Sjeff me = cpu; 678109970Sjeff 679109970Sjeff /* 680109970Sjeff * Find the cpu with the highest load and steal one proc. 681109970Sjeff */ 682109970Sjeff for (load = 0, i = 0; i < mp_maxid; i++) { 683109970Sjeff if (CPU_ABSENT(i) || i == me) 684109970Sjeff continue; 685109970Sjeff if (kseq_cpu[i].ksq_load > load) { 686109970Sjeff load = kseq_cpu[i].ksq_load; 687109970Sjeff cpu = i; 688109970Sjeff } 689109970Sjeff } 690109970Sjeff if (load) { 691109970Sjeff ke = sched_choose_kseq(&kseq_cpu[cpu]); 692109970Sjeff kseq_cpu[cpu].ksq_load--; 693109970Sjeff ke->ke_state = KES_THREAD; 694109970Sjeff runq_remove(ke->ke_runq, ke); 695109970Sjeff ke->ke_runq = NULL; 696109970Sjeff ke->ke_oncpu = me; 697109970Sjeff } 698109970Sjeff 699109970Sjeff } 700109970Sjeff#endif 701109864Sjeff return (ke); 702109864Sjeff} 703109864Sjeff 704109864Sjeffvoid 705109864Sjeffsched_add(struct kse *ke) 706109864Sjeff{ 707109864Sjeff 708109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 709109864Sjeff KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE")); 710109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 711109864Sjeff ("runq_add: No KSE on thread")); 712109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 713109864Sjeff ("runq_add: kse %p (%s) already in run queue", ke, 714109864Sjeff ke->ke_proc->p_comm)); 715109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 716109864Sjeff ("runq_add: process swapped out")); 717109864Sjeff 718109864Sjeff 719109864Sjeff if (ke->ke_runq == NULL) { 720109970Sjeff struct kseq *kseq; 721109970Sjeff 722109970Sjeff kseq = &kseq_cpu[ke->ke_oncpu]; 723109864Sjeff if (SCHED_CURR(ke->ke_ksegrp)) 724109864Sjeff ke->ke_runq = kseq->ksq_curr; 725109864Sjeff else 726109864Sjeff ke->ke_runq = kseq->ksq_next; 727109864Sjeff } 728109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 729109864Sjeff ke->ke_state = KES_ONRUNQ; 730109864Sjeff 731109864Sjeff runq_add(ke->ke_runq, ke); 732109970Sjeff#ifdef SMP 733109970Sjeff kseq_cpu[ke->ke_oncpu].ksq_load++; 734109970Sjeff#if 0 735109970Sjeff sched_check_runqs(); 736109970Sjeff#endif 737109970Sjeff#endif 738109864Sjeff} 739109864Sjeff 740109864Sjeffvoid 741109864Sjeffsched_rem(struct kse *ke) 742109864Sjeff{ 743109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 744109864Sjeff /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 745109864Sjeff 746109864Sjeff runq_remove(ke->ke_runq, ke); 747109864Sjeff ke->ke_runq = NULL; 748109864Sjeff ke->ke_state = KES_THREAD; 749109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 750109970Sjeff#ifdef SMP 751109970Sjeff kseq_cpu[ke->ke_oncpu].ksq_load--; 752109970Sjeff#if 0 753109970Sjeff sched_check_runqs(); 754109970Sjeff#endif 755109970Sjeff#endif 756109864Sjeff} 757109864Sjeff 758109864Sjefffixpt_t 759109864Sjeffsched_pctcpu(struct kse *ke) 760109864Sjeff{ 761109864Sjeff fixpt_t pctcpu; 762109864Sjeff 763109864Sjeff pctcpu = 0; 764109864Sjeff 765109864Sjeff if (ke->ke_ticks) { 766109864Sjeff int rtick; 767109864Sjeff 768109864Sjeff /* Update to account for time potentially spent sleeping */ 769109864Sjeff ke->ke_ltick = ticks; 770109864Sjeff sched_pctcpu_update(ke); 771109864Sjeff 772109864Sjeff /* How many rtick per second ? */ 773109864Sjeff rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 774109864Sjeff pctcpu = (FSCALE * ((FSCALE * rtick)/stathz)) >> FSHIFT; 775109864Sjeff } 776109864Sjeff 777109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 778109864Sjeff 779109864Sjeff return (pctcpu); 780109864Sjeff} 781109864Sjeff 782109864Sjeffint 783109864Sjeffsched_sizeof_kse(void) 784109864Sjeff{ 785109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 786109864Sjeff} 787109864Sjeff 788109864Sjeffint 789109864Sjeffsched_sizeof_ksegrp(void) 790109864Sjeff{ 791109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 792109864Sjeff} 793109864Sjeff 794109864Sjeffint 795109864Sjeffsched_sizeof_proc(void) 796109864Sjeff{ 797109864Sjeff return (sizeof(struct proc)); 798109864Sjeff} 799109864Sjeff 800109864Sjeffint 801109864Sjeffsched_sizeof_thread(void) 802109864Sjeff{ 803109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 804109864Sjeff} 805