sched_ule.c revision 110226
1109864Sjeff/*- 2109864Sjeff * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff * 26109864Sjeff * $FreeBSD: head/sys/kern/sched_ule.c 110226 2003-02-02 08:24:32Z scottl $ 27109864Sjeff */ 28109864Sjeff 29109864Sjeff#include <sys/param.h> 30109864Sjeff#include <sys/systm.h> 31109864Sjeff#include <sys/kernel.h> 32109864Sjeff#include <sys/ktr.h> 33109864Sjeff#include <sys/lock.h> 34109864Sjeff#include <sys/mutex.h> 35109864Sjeff#include <sys/proc.h> 36109864Sjeff#include <sys/sched.h> 37109864Sjeff#include <sys/smp.h> 38109864Sjeff#include <sys/sx.h> 39109864Sjeff#include <sys/sysctl.h> 40109864Sjeff#include <sys/sysproto.h> 41109864Sjeff#include <sys/vmmeter.h> 42109864Sjeff#ifdef DDB 43109864Sjeff#include <ddb/ddb.h> 44109864Sjeff#endif 45109864Sjeff#ifdef KTRACE 46109864Sjeff#include <sys/uio.h> 47109864Sjeff#include <sys/ktrace.h> 48109864Sjeff#endif 49109864Sjeff 50109864Sjeff#include <machine/cpu.h> 51109864Sjeff 52109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53109864Sjeff/* XXX This is bogus compatability crap for ps */ 54109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56109864Sjeff 57109864Sjeffstatic void sched_setup(void *dummy); 58109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59109864Sjeff 60109864Sjeff/* 61109864Sjeff * These datastructures are allocated within their parent datastructure but 62109864Sjeff * are scheduler specific. 63109864Sjeff */ 64109864Sjeff 65109864Sjeffstruct ke_sched { 66109864Sjeff int ske_slice; 67109864Sjeff struct runq *ske_runq; 68109864Sjeff /* The following variables are only used for pctcpu calculation */ 69109864Sjeff int ske_ltick; /* Last tick that we were running on */ 70109864Sjeff int ske_ftick; /* First tick that we were running on */ 71109864Sjeff int ske_ticks; /* Tick count */ 72109864Sjeff}; 73109864Sjeff#define ke_slice ke_sched->ske_slice 74109864Sjeff#define ke_runq ke_sched->ske_runq 75109864Sjeff#define ke_ltick ke_sched->ske_ltick 76109864Sjeff#define ke_ftick ke_sched->ske_ftick 77109864Sjeff#define ke_ticks ke_sched->ske_ticks 78109864Sjeff 79109864Sjeffstruct kg_sched { 80109864Sjeff int skg_slptime; 81109864Sjeff}; 82109864Sjeff#define kg_slptime kg_sched->skg_slptime 83109864Sjeff 84109864Sjeffstruct td_sched { 85109864Sjeff int std_slptime; 86109864Sjeff}; 87109864Sjeff#define td_slptime td_sched->std_slptime 88109864Sjeff 89109864Sjeffstruct ke_sched ke_sched; 90109864Sjeffstruct kg_sched kg_sched; 91109864Sjeffstruct td_sched td_sched; 92109864Sjeff 93109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 94109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 95109864Sjeffstruct p_sched *proc0_sched = NULL; 96109864Sjeffstruct td_sched *thread0_sched = &td_sched; 97109864Sjeff 98109864Sjeff/* 99109864Sjeff * This priority range has 20 priorities on either end that are reachable 100109864Sjeff * only through nice values. 101109864Sjeff */ 102109864Sjeff#define SCHED_PRI_NRESV 40 103109864Sjeff#define SCHED_PRI_RANGE ((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) - \ 104109864Sjeff SCHED_PRI_NRESV) 105109864Sjeff 106109864Sjeff/* 107109864Sjeff * These determine how sleep time effects the priority of a process. 108109864Sjeff * 109109864Sjeff * SLP_MAX: Maximum amount of accrued sleep time. 110109864Sjeff * SLP_SCALE: Scale the number of ticks slept across the dynamic priority 111109864Sjeff * range. 112109864Sjeff * SLP_TOPRI: Convert a number of ticks slept into a priority value. 113109864Sjeff * SLP_DECAY: Reduce the sleep time to 50% for every granted slice. 114109864Sjeff */ 115109864Sjeff#define SCHED_SLP_MAX (hz * 2) 116109864Sjeff#define SCHED_SLP_SCALE(slp) (((slp) * SCHED_PRI_RANGE) / SCHED_SLP_MAX) 117109864Sjeff#define SCHED_SLP_TOPRI(slp) (SCHED_PRI_RANGE - SCHED_SLP_SCALE((slp)) + \ 118109864Sjeff SCHED_PRI_NRESV / 2) 119109864Sjeff#define SCHED_SLP_DECAY(slp) ((slp) / 2) /* XXX Multiple kses break */ 120109864Sjeff 121109864Sjeff/* 122109864Sjeff * These parameters and macros determine the size of the time slice that is 123109864Sjeff * granted to each thread. 124109864Sjeff * 125109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 126109864Sjeff * SLICE_MAX: Maximum time slice granted. 127109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 128109864Sjeff * SLICE_SCALE: The number slices granted per unit of pri or slp. 129109864Sjeff * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 130109864Sjeff * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 131109864Sjeff * amount of time slept. (smaller slices for interactive ksegs) 132109864Sjeff * PRI_COMP: This determines what fraction of the actual slice comes from 133109864Sjeff * the slice size computed from the priority. 134109864Sjeff * SLP_COMP: This determines what component of the actual slice comes from 135109864Sjeff * the slize size computed from the sleep time. 136109864Sjeff */ 137109864Sjeff#define SCHED_SLICE_MIN (hz / 100) 138109864Sjeff#define SCHED_SLICE_MAX (hz / 10) 139109864Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 140109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 141109864Sjeff#define SCHED_PRI_TOSLICE(pri) \ 142109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 143109864Sjeff#define SCHED_SLP_TOSLICE(slp) \ 144109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_SLP_MAX)) 145109864Sjeff#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 146109864Sjeff#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 147109864Sjeff 148109864Sjeff/* 149109864Sjeff * This macro determines whether or not the kse belongs on the current or 150109864Sjeff * next run queue. 151109864Sjeff */ 152109864Sjeff#define SCHED_CURR(kg) ((kg)->kg_slptime > (hz / 4) || \ 153109864Sjeff (kg)->kg_pri_class != PRI_TIMESHARE) 154109864Sjeff 155109864Sjeff/* 156109864Sjeff * Cpu percentage computation macros and defines. 157109864Sjeff * 158109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 159109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 160109864Sjeff */ 161109864Sjeff 162109864Sjeff#define SCHED_CPU_TIME 60 163109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 164109864Sjeff 165109864Sjeff/* 166109864Sjeff * kseq - pair of runqs per processor 167109864Sjeff */ 168109864Sjeff 169109864Sjeffstruct kseq { 170109864Sjeff struct runq ksq_runqs[2]; 171109864Sjeff struct runq *ksq_curr; 172109864Sjeff struct runq *ksq_next; 173109864Sjeff int ksq_load; /* Total runnable */ 174109864Sjeff}; 175109864Sjeff 176109864Sjeff/* 177109864Sjeff * One kse queue per processor. 178109864Sjeff */ 179110028Sjeff#ifdef SMP 180109864Sjeffstruct kseq kseq_cpu[MAXCPU]; 181110028Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 182110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 183110028Sjeff#else 184110028Sjeffstruct kseq kseq_cpu; 185110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 186110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 187110028Sjeff#endif 188109864Sjeff 189109864Sjeffstatic int sched_slice(struct ksegrp *kg); 190109864Sjeffstatic int sched_priority(struct ksegrp *kg); 191109864Sjeffvoid sched_pctcpu_update(struct kse *ke); 192109864Sjeffint sched_pickcpu(void); 193109864Sjeff 194110028Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 195110028Sjeffstatic void kseq_setup(struct kseq *kseq); 196110028Sjeff 197109864Sjeffstatic void 198110028Sjeffkseq_setup(struct kseq *kseq) 199110028Sjeff{ 200110028Sjeff kseq->ksq_load = 0; 201110028Sjeff kseq->ksq_curr = &kseq->ksq_runqs[0]; 202110028Sjeff kseq->ksq_next = &kseq->ksq_runqs[1]; 203110028Sjeff runq_init(kseq->ksq_curr); 204110028Sjeff runq_init(kseq->ksq_next); 205110028Sjeff} 206110028Sjeff 207110028Sjeffstatic void 208109864Sjeffsched_setup(void *dummy) 209109864Sjeff{ 210109864Sjeff int i; 211109864Sjeff 212109864Sjeff mtx_lock_spin(&sched_lock); 213109864Sjeff /* init kseqs */ 214110028Sjeff for (i = 0; i < MAXCPU; i++) 215110028Sjeff kseq_setup(KSEQ_CPU(i)); 216109864Sjeff mtx_unlock_spin(&sched_lock); 217109864Sjeff} 218109864Sjeff 219109864Sjeff/* 220109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 221109864Sjeff * process. 222109864Sjeff */ 223109864Sjeffstatic int 224109864Sjeffsched_priority(struct ksegrp *kg) 225109864Sjeff{ 226109864Sjeff int pri; 227109864Sjeff 228109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 229109864Sjeff return (kg->kg_user_pri); 230109864Sjeff 231109864Sjeff pri = SCHED_SLP_TOPRI(kg->kg_slptime); 232109864Sjeff CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 233109864Sjeff kg->kg_slptime, pri); 234109864Sjeff 235109864Sjeff pri += PRI_MIN_TIMESHARE; 236109864Sjeff pri += kg->kg_nice; 237109864Sjeff 238109864Sjeff if (pri > PRI_MAX_TIMESHARE) 239109864Sjeff pri = PRI_MAX_TIMESHARE; 240109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 241109864Sjeff pri = PRI_MIN_TIMESHARE; 242109864Sjeff 243109864Sjeff kg->kg_user_pri = pri; 244109864Sjeff 245109864Sjeff return (kg->kg_user_pri); 246109864Sjeff} 247109864Sjeff 248109864Sjeff/* 249109864Sjeff * Calculate a time slice based on the process priority. 250109864Sjeff */ 251109864Sjeffstatic int 252109864Sjeffsched_slice(struct ksegrp *kg) 253109864Sjeff{ 254109864Sjeff int pslice; 255109864Sjeff int sslice; 256109864Sjeff int slice; 257109864Sjeff int pri; 258109864Sjeff 259109864Sjeff pri = kg->kg_user_pri; 260109864Sjeff pri -= PRI_MIN_TIMESHARE; 261109864Sjeff pslice = SCHED_PRI_TOSLICE(pri); 262109864Sjeff sslice = SCHED_SLP_TOSLICE(kg->kg_slptime); 263109864Sjeff slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 264109864Sjeff kg->kg_slptime = SCHED_SLP_DECAY(kg->kg_slptime); 265109864Sjeff 266109864Sjeff CTR4(KTR_RUNQ, 267109864Sjeff "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 268109864Sjeff pri, sslice, pslice, slice); 269109864Sjeff 270109864Sjeff if (slice < SCHED_SLICE_MIN) 271109864Sjeff slice = SCHED_SLICE_MIN; 272109864Sjeff else if (slice > SCHED_SLICE_MAX) 273109864Sjeff slice = SCHED_SLICE_MAX; 274109864Sjeff 275109864Sjeff return (slice); 276109864Sjeff} 277109864Sjeff 278109864Sjeffint 279109864Sjeffsched_rr_interval(void) 280109864Sjeff{ 281109864Sjeff return (SCHED_SLICE_MAX); 282109864Sjeff} 283109864Sjeff 284109864Sjeffvoid 285109864Sjeffsched_pctcpu_update(struct kse *ke) 286109864Sjeff{ 287109864Sjeff /* 288109864Sjeff * Adjust counters and watermark for pctcpu calc. 289109864Sjeff */ 290109864Sjeff ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 291109864Sjeff SCHED_CPU_TICKS; 292109864Sjeff ke->ke_ltick = ticks; 293109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 294109864Sjeff} 295109864Sjeff 296109864Sjeff#ifdef SMP 297109864Sjeffint 298109864Sjeffsched_pickcpu(void) 299109864Sjeff{ 300110028Sjeff struct kseq *kseq; 301110028Sjeff int load; 302109864Sjeff int cpu; 303109864Sjeff int i; 304109864Sjeff 305109864Sjeff if (!smp_started) 306109864Sjeff return (0); 307109864Sjeff 308110028Sjeff load = 0; 309110028Sjeff cpu = 0; 310109864Sjeff 311109864Sjeff for (i = 0; i < mp_maxid; i++) { 312109864Sjeff if (CPU_ABSENT(i)) 313109864Sjeff continue; 314110028Sjeff kseq = KSEQ_CPU(i); 315110028Sjeff if (kseq->ksq_load < load) { 316109864Sjeff cpu = i; 317110028Sjeff load = kseq->ksq_load; 318109864Sjeff } 319109864Sjeff } 320109864Sjeff 321109864Sjeff CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 322109864Sjeff return (cpu); 323109864Sjeff} 324109864Sjeff#else 325109864Sjeffint 326109864Sjeffsched_pickcpu(void) 327109864Sjeff{ 328109864Sjeff return (0); 329109864Sjeff} 330109864Sjeff#endif 331109864Sjeff 332109864Sjeffvoid 333109864Sjeffsched_prio(struct thread *td, u_char prio) 334109864Sjeff{ 335109864Sjeff struct kse *ke; 336109864Sjeff struct runq *rq; 337109864Sjeff 338109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 339109864Sjeff ke = td->td_kse; 340109864Sjeff td->td_priority = prio; 341109864Sjeff 342109864Sjeff if (TD_ON_RUNQ(td)) { 343109864Sjeff rq = ke->ke_runq; 344109864Sjeff 345109864Sjeff runq_remove(rq, ke); 346109864Sjeff runq_add(rq, ke); 347109864Sjeff } 348109864Sjeff} 349109864Sjeff 350109864Sjeffvoid 351109864Sjeffsched_switchout(struct thread *td) 352109864Sjeff{ 353109864Sjeff struct kse *ke; 354109864Sjeff 355109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 356109864Sjeff 357109864Sjeff ke = td->td_kse; 358109864Sjeff 359109864Sjeff td->td_last_kse = ke; 360109864Sjeff td->td_lastcpu = ke->ke_oncpu; 361109864Sjeff ke->ke_flags &= ~KEF_NEEDRESCHED; 362109864Sjeff 363109864Sjeff if (TD_IS_RUNNING(td)) { 364109864Sjeff setrunqueue(td); 365109864Sjeff return; 366109864Sjeff } else 367109864Sjeff td->td_kse->ke_runq = NULL; 368109864Sjeff 369109864Sjeff /* 370109864Sjeff * We will not be on the run queue. So we must be 371109864Sjeff * sleeping or similar. 372109864Sjeff */ 373109864Sjeff if (td->td_proc->p_flag & P_KSES) 374109864Sjeff kse_reassign(ke); 375109864Sjeff} 376109864Sjeff 377109864Sjeffvoid 378109864Sjeffsched_switchin(struct thread *td) 379109864Sjeff{ 380109864Sjeff /* struct kse *ke = td->td_kse; */ 381109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 382109864Sjeff 383109864Sjeff td->td_kse->ke_oncpu = PCPU_GET(cpuid); /* XXX */ 384109864Sjeff if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 385109864Sjeff td->td_priority != td->td_ksegrp->kg_user_pri) 386109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 387109864Sjeff} 388109864Sjeff 389109864Sjeffvoid 390109864Sjeffsched_nice(struct ksegrp *kg, int nice) 391109864Sjeff{ 392109864Sjeff struct thread *td; 393109864Sjeff 394109864Sjeff kg->kg_nice = nice; 395109864Sjeff sched_priority(kg); 396109864Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 397109864Sjeff td->td_kse->ke_flags |= KEF_NEEDRESCHED; 398109864Sjeff } 399109864Sjeff} 400109864Sjeff 401109864Sjeffvoid 402109864Sjeffsched_sleep(struct thread *td, u_char prio) 403109864Sjeff{ 404109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 405109864Sjeff 406109864Sjeff td->td_slptime = ticks; 407109864Sjeff td->td_priority = prio; 408109864Sjeff 409109864Sjeff /* 410109864Sjeff * If this is an interactive task clear its queue so it moves back 411109864Sjeff * on to curr when it wakes up. Otherwise let it stay on the queue 412109864Sjeff * that it was assigned to. 413109864Sjeff */ 414109864Sjeff if (SCHED_CURR(td->td_kse->ke_ksegrp)) 415109864Sjeff td->td_kse->ke_runq = NULL; 416110028Sjeff#if 0 417110028Sjeff if (td->td_priority < PZERO) 418110028Sjeff kseq_cpu[td->td_kse->ke_oncpu].ksq_load++; 419110028Sjeff#endif 420109864Sjeff} 421109864Sjeff 422109864Sjeffvoid 423109864Sjeffsched_wakeup(struct thread *td) 424109864Sjeff{ 425109864Sjeff struct ksegrp *kg; 426109864Sjeff 427109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 428109864Sjeff 429109864Sjeff /* 430109864Sjeff * Let the kseg know how long we slept for. This is because process 431109864Sjeff * interactivity behavior is modeled in the kseg. 432109864Sjeff */ 433109864Sjeff kg = td->td_ksegrp; 434109864Sjeff 435109864Sjeff if (td->td_slptime) { 436109864Sjeff kg->kg_slptime += ticks - td->td_slptime; 437109864Sjeff if (kg->kg_slptime > SCHED_SLP_MAX) 438109864Sjeff kg->kg_slptime = SCHED_SLP_MAX; 439109864Sjeff td->td_priority = sched_priority(kg); 440109864Sjeff } 441109864Sjeff td->td_slptime = 0; 442110028Sjeff#if 0 443110028Sjeff if (td->td_priority < PZERO) 444110028Sjeff kseq_cpu[td->td_kse->ke_oncpu].ksq_load--; 445110028Sjeff#endif 446109864Sjeff setrunqueue(td); 447109864Sjeff if (td->td_priority < curthread->td_priority) 448109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 449109864Sjeff} 450109864Sjeff 451109864Sjeff/* 452109864Sjeff * Penalize the parent for creating a new child and initialize the child's 453109864Sjeff * priority. 454109864Sjeff */ 455109864Sjeffvoid 456109864Sjeffsched_fork(struct ksegrp *kg, struct ksegrp *child) 457109864Sjeff{ 458109864Sjeff struct kse *ckse; 459109864Sjeff struct kse *pkse; 460109864Sjeff 461109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 462109864Sjeff ckse = FIRST_KSE_IN_KSEGRP(child); 463109864Sjeff pkse = FIRST_KSE_IN_KSEGRP(kg); 464109864Sjeff 465109864Sjeff /* XXX Need something better here */ 466109864Sjeff child->kg_slptime = kg->kg_slptime; 467109864Sjeff child->kg_user_pri = kg->kg_user_pri; 468109864Sjeff 469109970Sjeff if (pkse->ke_oncpu != PCPU_GET(cpuid)) { 470109970Sjeff printf("pkse->ke_oncpu = %d\n", pkse->ke_oncpu); 471109970Sjeff printf("cpuid = %d", PCPU_GET(cpuid)); 472109970Sjeff Debugger("stop"); 473109970Sjeff } 474109970Sjeff 475109864Sjeff ckse->ke_slice = pkse->ke_slice; 476109970Sjeff ckse->ke_oncpu = pkse->ke_oncpu; /* sched_pickcpu(); */ 477109864Sjeff ckse->ke_runq = NULL; 478109864Sjeff /* 479109864Sjeff * Claim that we've been running for one second for statistical 480109864Sjeff * purposes. 481109864Sjeff */ 482109864Sjeff ckse->ke_ticks = 0; 483109864Sjeff ckse->ke_ltick = ticks; 484109864Sjeff ckse->ke_ftick = ticks - hz; 485109864Sjeff} 486109864Sjeff 487109864Sjeff/* 488109864Sjeff * Return some of the child's priority and interactivity to the parent. 489109864Sjeff */ 490109864Sjeffvoid 491109864Sjeffsched_exit(struct ksegrp *kg, struct ksegrp *child) 492109864Sjeff{ 493109864Sjeff /* XXX Need something better here */ 494109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 495109864Sjeff kg->kg_slptime = child->kg_slptime; 496109864Sjeff sched_priority(kg); 497109864Sjeff} 498109864Sjeff 499109864Sjeffvoid 500109864Sjeffsched_clock(struct thread *td) 501109864Sjeff{ 502109864Sjeff struct kse *ke; 503109864Sjeff struct kse *nke; 504110028Sjeff struct kseq *kseq; 505109864Sjeff struct ksegrp *kg; 506109864Sjeff 507109864Sjeff 508109864Sjeff ke = td->td_kse; 509109864Sjeff kg = td->td_ksegrp; 510109864Sjeff 511110028Sjeff mtx_assert(&sched_lock, MA_OWNED); 512110028Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 513110028Sjeff 514110028Sjeff /* Adjust ticks for pctcpu */ 515109971Sjeff ke->ke_ticks += 10000; 516109971Sjeff ke->ke_ltick = ticks; 517109971Sjeff /* Go up to one second beyond our max and then trim back down */ 518109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 519109971Sjeff sched_pctcpu_update(ke); 520109971Sjeff 521110028Sjeff if (td->td_kse->ke_flags & KEF_IDLEKSE) 522109864Sjeff return; 523110028Sjeff 524110028Sjeff /* 525110028Sjeff * Check for a higher priority task on the run queue. This can happen 526110028Sjeff * on SMP if another processor woke up a process on our runq. 527110028Sjeff */ 528110028Sjeff kseq = KSEQ_SELF(); 529109970Sjeff nke = runq_choose(kseq->ksq_curr); 530109970Sjeff 531109864Sjeff if (nke && nke->ke_thread && 532110028Sjeff nke->ke_thread->td_priority < td->td_priority) 533109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 534109864Sjeff /* 535109864Sjeff * We used a tick, decrease our total sleep time. This decreases our 536109864Sjeff * "interactivity". 537109864Sjeff */ 538109864Sjeff if (kg->kg_slptime) 539109864Sjeff kg->kg_slptime--; 540109864Sjeff /* 541109864Sjeff * We used up one time slice. 542109864Sjeff */ 543109864Sjeff ke->ke_slice--; 544109864Sjeff /* 545109864Sjeff * We're out of time, recompute priorities and requeue 546109864Sjeff */ 547109864Sjeff if (ke->ke_slice == 0) { 548109864Sjeff td->td_priority = sched_priority(kg); 549109864Sjeff ke->ke_slice = sched_slice(kg); 550109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 551109864Sjeff ke->ke_runq = NULL; 552109864Sjeff } 553109864Sjeff} 554109864Sjeff 555109864Sjeffint 556109864Sjeffsched_runnable(void) 557109864Sjeff{ 558109864Sjeff struct kseq *kseq; 559109864Sjeff 560110028Sjeff kseq = KSEQ_SELF(); 561109864Sjeff 562110028Sjeff if (kseq->ksq_load) 563109970Sjeff return (1); 564109970Sjeff#ifdef SMP 565110028Sjeff /* 566110028Sjeff * For SMP we may steal other processor's KSEs. Just search until we 567110028Sjeff * verify that at least on other cpu has a runnable task. 568110028Sjeff */ 569109970Sjeff if (smp_started) { 570109970Sjeff int i; 571109970Sjeff 572109970Sjeff for (i = 0; i < mp_maxid; i++) { 573109970Sjeff if (CPU_ABSENT(i)) 574109970Sjeff continue; 575110028Sjeff kseq = KSEQ_CPU(i); 576110028Sjeff if (kseq->ksq_load) 577109970Sjeff return (1); 578109970Sjeff } 579109970Sjeff } 580109970Sjeff#endif 581109970Sjeff return (0); 582109864Sjeff} 583109864Sjeff 584109864Sjeffvoid 585109864Sjeffsched_userret(struct thread *td) 586109864Sjeff{ 587109864Sjeff struct ksegrp *kg; 588109864Sjeff 589109864Sjeff kg = td->td_ksegrp; 590109864Sjeff 591109864Sjeff if (td->td_priority != kg->kg_user_pri) { 592109864Sjeff mtx_lock_spin(&sched_lock); 593109864Sjeff td->td_priority = kg->kg_user_pri; 594109864Sjeff mtx_unlock_spin(&sched_lock); 595109864Sjeff } 596109864Sjeff} 597109864Sjeff 598109864Sjeffstruct kse * 599110028Sjeffkseq_choose(struct kseq *kseq) 600109864Sjeff{ 601109864Sjeff struct kse *ke; 602109864Sjeff struct runq *swap; 603109864Sjeff 604109864Sjeff if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 605109864Sjeff swap = kseq->ksq_curr; 606109864Sjeff kseq->ksq_curr = kseq->ksq_next; 607109864Sjeff kseq->ksq_next = swap; 608109864Sjeff ke = runq_choose(kseq->ksq_curr); 609109864Sjeff } 610109970Sjeff 611109970Sjeff return (ke); 612109970Sjeff} 613109970Sjeff 614109970Sjeffstruct kse * 615109970Sjeffsched_choose(void) 616109970Sjeff{ 617110028Sjeff struct kseq *kseq; 618109970Sjeff struct kse *ke; 619109970Sjeff 620110028Sjeff kseq = KSEQ_SELF(); 621110028Sjeff ke = kseq_choose(kseq); 622109970Sjeff 623109864Sjeff if (ke) { 624109864Sjeff runq_remove(ke->ke_runq, ke); 625110028Sjeff kseq->ksq_load--; 626109864Sjeff ke->ke_state = KES_THREAD; 627109864Sjeff } 628109864Sjeff 629109970Sjeff#ifdef SMP 630109970Sjeff if (ke == NULL && smp_started) { 631109970Sjeff int load; 632110028Sjeff int cpu; 633109970Sjeff int i; 634109970Sjeff 635110028Sjeff load = 0; 636110028Sjeff cpu = 0; 637109970Sjeff 638109970Sjeff /* 639109970Sjeff * Find the cpu with the highest load and steal one proc. 640109970Sjeff */ 641110028Sjeff for (i = 0; i < mp_maxid; i++) { 642110028Sjeff if (CPU_ABSENT(i)) 643109970Sjeff continue; 644110028Sjeff kseq = KSEQ_CPU(i); 645110028Sjeff if (kseq->ksq_load > load) { 646110028Sjeff load = kseq->ksq_load; 647109970Sjeff cpu = i; 648109970Sjeff } 649109970Sjeff } 650109970Sjeff if (load) { 651110028Sjeff kseq = KSEQ_CPU(cpu); 652110028Sjeff ke = kseq_choose(kseq); 653110028Sjeff kseq->ksq_load--; 654109970Sjeff ke->ke_state = KES_THREAD; 655109970Sjeff runq_remove(ke->ke_runq, ke); 656109970Sjeff ke->ke_runq = NULL; 657110028Sjeff ke->ke_oncpu = PCPU_GET(cpuid); 658109970Sjeff } 659109970Sjeff 660109970Sjeff } 661109970Sjeff#endif 662109864Sjeff return (ke); 663109864Sjeff} 664109864Sjeff 665109864Sjeffvoid 666109864Sjeffsched_add(struct kse *ke) 667109864Sjeff{ 668109864Sjeff 669109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 670109864Sjeff KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE")); 671109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 672109864Sjeff ("runq_add: No KSE on thread")); 673109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 674109864Sjeff ("runq_add: kse %p (%s) already in run queue", ke, 675109864Sjeff ke->ke_proc->p_comm)); 676109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 677109864Sjeff ("runq_add: process swapped out")); 678109864Sjeff 679109864Sjeff 680109864Sjeff if (ke->ke_runq == NULL) { 681109970Sjeff struct kseq *kseq; 682109970Sjeff 683110028Sjeff kseq = KSEQ_CPU(ke->ke_oncpu); 684109864Sjeff if (SCHED_CURR(ke->ke_ksegrp)) 685109864Sjeff ke->ke_runq = kseq->ksq_curr; 686109864Sjeff else 687109864Sjeff ke->ke_runq = kseq->ksq_next; 688109864Sjeff } 689109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 690109864Sjeff ke->ke_state = KES_ONRUNQ; 691109864Sjeff 692109864Sjeff runq_add(ke->ke_runq, ke); 693110028Sjeff KSEQ_CPU(ke->ke_oncpu)->ksq_load++; 694109864Sjeff} 695109864Sjeff 696109864Sjeffvoid 697109864Sjeffsched_rem(struct kse *ke) 698109864Sjeff{ 699109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 700109864Sjeff /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 701109864Sjeff 702109864Sjeff runq_remove(ke->ke_runq, ke); 703109864Sjeff ke->ke_runq = NULL; 704109864Sjeff ke->ke_state = KES_THREAD; 705109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 706110028Sjeff KSEQ_CPU(ke->ke_oncpu)->ksq_load--; 707109864Sjeff} 708109864Sjeff 709109864Sjefffixpt_t 710109864Sjeffsched_pctcpu(struct kse *ke) 711109864Sjeff{ 712109864Sjeff fixpt_t pctcpu; 713110226Sscottl int realstathz; 714109864Sjeff 715109864Sjeff pctcpu = 0; 716110226Sscottl realstathz = stathz ? stathz : hz; 717109864Sjeff 718109864Sjeff if (ke->ke_ticks) { 719109864Sjeff int rtick; 720109864Sjeff 721109864Sjeff /* Update to account for time potentially spent sleeping */ 722109864Sjeff ke->ke_ltick = ticks; 723109864Sjeff sched_pctcpu_update(ke); 724109864Sjeff 725109864Sjeff /* How many rtick per second ? */ 726109864Sjeff rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 727110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 728109864Sjeff } 729109864Sjeff 730109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 731109864Sjeff 732109864Sjeff return (pctcpu); 733109864Sjeff} 734109864Sjeff 735109864Sjeffint 736109864Sjeffsched_sizeof_kse(void) 737109864Sjeff{ 738109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 739109864Sjeff} 740109864Sjeff 741109864Sjeffint 742109864Sjeffsched_sizeof_ksegrp(void) 743109864Sjeff{ 744109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 745109864Sjeff} 746109864Sjeff 747109864Sjeffint 748109864Sjeffsched_sizeof_proc(void) 749109864Sjeff{ 750109864Sjeff return (sizeof(struct proc)); 751109864Sjeff} 752109864Sjeff 753109864Sjeffint 754109864Sjeffsched_sizeof_thread(void) 755109864Sjeff{ 756109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 757109864Sjeff} 758