sched_ule.c revision 110267
1109864Sjeff/*- 2109864Sjeff * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3109864Sjeff * All rights reserved. 4109864Sjeff * 5109864Sjeff * Redistribution and use in source and binary forms, with or without 6109864Sjeff * modification, are permitted provided that the following conditions 7109864Sjeff * are met: 8109864Sjeff * 1. Redistributions of source code must retain the above copyright 9109864Sjeff * notice unmodified, this list of conditions, and the following 10109864Sjeff * disclaimer. 11109864Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12109864Sjeff * notice, this list of conditions and the following disclaimer in the 13109864Sjeff * documentation and/or other materials provided with the distribution. 14109864Sjeff * 15109864Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16109864Sjeff * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17109864Sjeff * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18109864Sjeff * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19109864Sjeff * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20109864Sjeff * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21109864Sjeff * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22109864Sjeff * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23109864Sjeff * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24109864Sjeff * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25109864Sjeff * 26109864Sjeff * $FreeBSD: head/sys/kern/sched_ule.c 110267 2003-02-03 05:30:07Z jeff $ 27109864Sjeff */ 28109864Sjeff 29109864Sjeff#include <sys/param.h> 30109864Sjeff#include <sys/systm.h> 31109864Sjeff#include <sys/kernel.h> 32109864Sjeff#include <sys/ktr.h> 33109864Sjeff#include <sys/lock.h> 34109864Sjeff#include <sys/mutex.h> 35109864Sjeff#include <sys/proc.h> 36109864Sjeff#include <sys/sched.h> 37109864Sjeff#include <sys/smp.h> 38109864Sjeff#include <sys/sx.h> 39109864Sjeff#include <sys/sysctl.h> 40109864Sjeff#include <sys/sysproto.h> 41109864Sjeff#include <sys/vmmeter.h> 42109864Sjeff#ifdef DDB 43109864Sjeff#include <ddb/ddb.h> 44109864Sjeff#endif 45109864Sjeff#ifdef KTRACE 46109864Sjeff#include <sys/uio.h> 47109864Sjeff#include <sys/ktrace.h> 48109864Sjeff#endif 49109864Sjeff 50109864Sjeff#include <machine/cpu.h> 51109864Sjeff 52109864Sjeff/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53109864Sjeff/* XXX This is bogus compatability crap for ps */ 54109864Sjeffstatic fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55109864SjeffSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56109864Sjeff 57109864Sjeffstatic void sched_setup(void *dummy); 58109864SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59109864Sjeff 60109864Sjeff/* 61109864Sjeff * These datastructures are allocated within their parent datastructure but 62109864Sjeff * are scheduler specific. 63109864Sjeff */ 64109864Sjeff 65109864Sjeffstruct ke_sched { 66109864Sjeff int ske_slice; 67109864Sjeff struct runq *ske_runq; 68109864Sjeff /* The following variables are only used for pctcpu calculation */ 69109864Sjeff int ske_ltick; /* Last tick that we were running on */ 70109864Sjeff int ske_ftick; /* First tick that we were running on */ 71109864Sjeff int ske_ticks; /* Tick count */ 72110260Sjeff u_char ske_cpu; 73109864Sjeff}; 74109864Sjeff#define ke_slice ke_sched->ske_slice 75109864Sjeff#define ke_runq ke_sched->ske_runq 76109864Sjeff#define ke_ltick ke_sched->ske_ltick 77109864Sjeff#define ke_ftick ke_sched->ske_ftick 78109864Sjeff#define ke_ticks ke_sched->ske_ticks 79110260Sjeff#define ke_cpu ke_sched->ske_cpu 80109864Sjeff 81109864Sjeffstruct kg_sched { 82109864Sjeff int skg_slptime; 83109864Sjeff}; 84109864Sjeff#define kg_slptime kg_sched->skg_slptime 85109864Sjeff 86109864Sjeffstruct td_sched { 87109864Sjeff int std_slptime; 88110267Sjeff int std_schedflag; 89109864Sjeff}; 90109864Sjeff#define td_slptime td_sched->std_slptime 91110267Sjeff#define td_schedflag td_sched->std_schedflag 92109864Sjeff 93110267Sjeff#define TD_SCHED_BLOAD 0x0001 /* 94110267Sjeff * thread was counted as being in short 95110267Sjeff * term sleep. 96110267Sjeff */ 97110267Sjeffstruct td_sched td_sched; 98109864Sjeffstruct ke_sched ke_sched; 99109864Sjeffstruct kg_sched kg_sched; 100109864Sjeff 101109864Sjeffstruct ke_sched *kse0_sched = &ke_sched; 102109864Sjeffstruct kg_sched *ksegrp0_sched = &kg_sched; 103109864Sjeffstruct p_sched *proc0_sched = NULL; 104109864Sjeffstruct td_sched *thread0_sched = &td_sched; 105109864Sjeff 106109864Sjeff/* 107109864Sjeff * This priority range has 20 priorities on either end that are reachable 108109864Sjeff * only through nice values. 109109864Sjeff */ 110109864Sjeff#define SCHED_PRI_NRESV 40 111109864Sjeff#define SCHED_PRI_RANGE ((PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) - \ 112109864Sjeff SCHED_PRI_NRESV) 113109864Sjeff 114109864Sjeff/* 115109864Sjeff * These determine how sleep time effects the priority of a process. 116109864Sjeff * 117109864Sjeff * SLP_MAX: Maximum amount of accrued sleep time. 118109864Sjeff * SLP_SCALE: Scale the number of ticks slept across the dynamic priority 119109864Sjeff * range. 120109864Sjeff * SLP_TOPRI: Convert a number of ticks slept into a priority value. 121109864Sjeff * SLP_DECAY: Reduce the sleep time to 50% for every granted slice. 122109864Sjeff */ 123109864Sjeff#define SCHED_SLP_MAX (hz * 2) 124109864Sjeff#define SCHED_SLP_SCALE(slp) (((slp) * SCHED_PRI_RANGE) / SCHED_SLP_MAX) 125109864Sjeff#define SCHED_SLP_TOPRI(slp) (SCHED_PRI_RANGE - SCHED_SLP_SCALE((slp)) + \ 126109864Sjeff SCHED_PRI_NRESV / 2) 127109864Sjeff#define SCHED_SLP_DECAY(slp) ((slp) / 2) /* XXX Multiple kses break */ 128109864Sjeff 129109864Sjeff/* 130109864Sjeff * These parameters and macros determine the size of the time slice that is 131109864Sjeff * granted to each thread. 132109864Sjeff * 133109864Sjeff * SLICE_MIN: Minimum time slice granted, in units of ticks. 134109864Sjeff * SLICE_MAX: Maximum time slice granted. 135109864Sjeff * SLICE_RANGE: Range of available time slices scaled by hz. 136109864Sjeff * SLICE_SCALE: The number slices granted per unit of pri or slp. 137109864Sjeff * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 138109864Sjeff * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 139109864Sjeff * amount of time slept. (smaller slices for interactive ksegs) 140109864Sjeff * PRI_COMP: This determines what fraction of the actual slice comes from 141109864Sjeff * the slice size computed from the priority. 142109864Sjeff * SLP_COMP: This determines what component of the actual slice comes from 143109864Sjeff * the slize size computed from the sleep time. 144109864Sjeff */ 145109864Sjeff#define SCHED_SLICE_MIN (hz / 100) 146110260Sjeff#define SCHED_SLICE_MAX (hz / 4) 147109864Sjeff#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 148109864Sjeff#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 149109864Sjeff#define SCHED_PRI_TOSLICE(pri) \ 150109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 151109864Sjeff#define SCHED_SLP_TOSLICE(slp) \ 152109864Sjeff (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_SLP_MAX)) 153109864Sjeff#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 154109864Sjeff#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 155109864Sjeff 156109864Sjeff/* 157109864Sjeff * This macro determines whether or not the kse belongs on the current or 158109864Sjeff * next run queue. 159109864Sjeff */ 160109864Sjeff#define SCHED_CURR(kg) ((kg)->kg_slptime > (hz / 4) || \ 161109864Sjeff (kg)->kg_pri_class != PRI_TIMESHARE) 162109864Sjeff 163109864Sjeff/* 164109864Sjeff * Cpu percentage computation macros and defines. 165109864Sjeff * 166109864Sjeff * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 167109864Sjeff * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 168109864Sjeff */ 169109864Sjeff 170109864Sjeff#define SCHED_CPU_TIME 60 171109864Sjeff#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 172109864Sjeff 173109864Sjeff/* 174109864Sjeff * kseq - pair of runqs per processor 175109864Sjeff */ 176109864Sjeff 177109864Sjeffstruct kseq { 178109864Sjeff struct runq ksq_runqs[2]; 179109864Sjeff struct runq *ksq_curr; 180109864Sjeff struct runq *ksq_next; 181109864Sjeff int ksq_load; /* Total runnable */ 182110267Sjeff#ifdef SMP 183110267Sjeff unsigned int ksq_rslices; /* Slices on run queue */ 184110267Sjeff unsigned int ksq_bload; /* Threads waiting on IO */ 185110267Sjeff#endif 186109864Sjeff}; 187109864Sjeff 188109864Sjeff/* 189109864Sjeff * One kse queue per processor. 190109864Sjeff */ 191110028Sjeff#ifdef SMP 192109864Sjeffstruct kseq kseq_cpu[MAXCPU]; 193110028Sjeff#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 194110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 195110028Sjeff#else 196110028Sjeffstruct kseq kseq_cpu; 197110028Sjeff#define KSEQ_SELF() (&kseq_cpu) 198110028Sjeff#define KSEQ_CPU(x) (&kseq_cpu) 199110028Sjeff#endif 200109864Sjeff 201109864Sjeffstatic int sched_slice(struct ksegrp *kg); 202109864Sjeffstatic int sched_priority(struct ksegrp *kg); 203109864Sjeffvoid sched_pctcpu_update(struct kse *ke); 204109864Sjeffint sched_pickcpu(void); 205109864Sjeff 206110267Sjeff/* Operations on per processor queues */ 207110028Sjeffstatic struct kse * kseq_choose(struct kseq *kseq); 208110028Sjeffstatic void kseq_setup(struct kseq *kseq); 209110267Sjeffstatic __inline void kseq_add(struct kseq *kseq, struct kse *ke); 210110267Sjeffstatic __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 211110267Sjeff#ifdef SMP 212110267Sjeffstatic __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 213110267Sjeffstatic __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 214110267Sjeffstruct kseq * kseq_load_highest(void); 215110267Sjeff#endif 216110028Sjeff 217110267Sjeffstatic __inline void 218110267Sjeffkseq_add(struct kseq *kseq, struct kse *ke) 219110267Sjeff{ 220110267Sjeff runq_add(ke->ke_runq, ke); 221110267Sjeff kseq->ksq_load++; 222110267Sjeff#ifdef SMP 223110267Sjeff kseq->ksq_rslices += ke->ke_slice; 224110267Sjeff#endif 225110267Sjeff} 226110267Sjeffstatic __inline void 227110267Sjeffkseq_rem(struct kseq *kseq, struct kse *ke) 228110267Sjeff{ 229110267Sjeff kseq->ksq_load--; 230110267Sjeff runq_remove(ke->ke_runq, ke); 231110267Sjeff#ifdef SMP 232110267Sjeff kseq->ksq_rslices -= ke->ke_slice; 233110267Sjeff#endif 234110267Sjeff} 235110267Sjeff 236110267Sjeff#ifdef SMP 237110267Sjeffstatic __inline void 238110267Sjeffkseq_sleep(struct kseq *kseq, struct kse *ke) 239110267Sjeff{ 240110267Sjeff kseq->ksq_bload++; 241110267Sjeff} 242110267Sjeff 243110267Sjeffstatic __inline void 244110267Sjeffkseq_wakeup(struct kseq *kseq, struct kse *ke) 245110267Sjeff{ 246110267Sjeff kseq->ksq_bload--; 247110267Sjeff} 248110267Sjeff 249110267Sjeffstruct kseq * 250110267Sjeffkseq_load_highest(void) 251110267Sjeff{ 252110267Sjeff struct kseq *kseq; 253110267Sjeff int load; 254110267Sjeff int cpu; 255110267Sjeff int i; 256110267Sjeff 257110267Sjeff cpu = 0; 258110267Sjeff load = 0; 259110267Sjeff 260110267Sjeff for (i = 0; i < mp_maxid; i++) { 261110267Sjeff if (CPU_ABSENT(i)) 262110267Sjeff continue; 263110267Sjeff kseq = KSEQ_CPU(i); 264110267Sjeff if (kseq->ksq_load > load) { 265110267Sjeff load = kseq->ksq_load; 266110267Sjeff cpu = i; 267110267Sjeff } 268110267Sjeff } 269110267Sjeff if (load) 270110267Sjeff return (KSEQ_CPU(cpu)); 271110267Sjeff 272110267Sjeff return (NULL); 273110267Sjeff} 274110267Sjeff#endif 275110267Sjeff 276110267Sjeffstruct kse * 277110267Sjeffkseq_choose(struct kseq *kseq) 278110267Sjeff{ 279110267Sjeff struct kse *ke; 280110267Sjeff struct runq *swap; 281110267Sjeff 282110267Sjeff if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 283110267Sjeff swap = kseq->ksq_curr; 284110267Sjeff kseq->ksq_curr = kseq->ksq_next; 285110267Sjeff kseq->ksq_next = swap; 286110267Sjeff ke = runq_choose(kseq->ksq_curr); 287110267Sjeff } 288110267Sjeff 289110267Sjeff return (ke); 290110267Sjeff} 291110267Sjeff 292110267Sjeff 293109864Sjeffstatic void 294110028Sjeffkseq_setup(struct kseq *kseq) 295110028Sjeff{ 296110028Sjeff kseq->ksq_curr = &kseq->ksq_runqs[0]; 297110028Sjeff kseq->ksq_next = &kseq->ksq_runqs[1]; 298110028Sjeff runq_init(kseq->ksq_curr); 299110028Sjeff runq_init(kseq->ksq_next); 300110267Sjeff kseq->ksq_load = 0; 301110267Sjeff#ifdef SMP 302110267Sjeff kseq->ksq_rslices = 0; 303110267Sjeff kseq->ksq_bload = 0; 304110267Sjeff#endif 305110028Sjeff} 306110028Sjeff 307110028Sjeffstatic void 308109864Sjeffsched_setup(void *dummy) 309109864Sjeff{ 310109864Sjeff int i; 311109864Sjeff 312109864Sjeff mtx_lock_spin(&sched_lock); 313109864Sjeff /* init kseqs */ 314110028Sjeff for (i = 0; i < MAXCPU; i++) 315110028Sjeff kseq_setup(KSEQ_CPU(i)); 316109864Sjeff mtx_unlock_spin(&sched_lock); 317109864Sjeff} 318109864Sjeff 319109864Sjeff/* 320109864Sjeff * Scale the scheduling priority according to the "interactivity" of this 321109864Sjeff * process. 322109864Sjeff */ 323109864Sjeffstatic int 324109864Sjeffsched_priority(struct ksegrp *kg) 325109864Sjeff{ 326109864Sjeff int pri; 327109864Sjeff 328109864Sjeff if (kg->kg_pri_class != PRI_TIMESHARE) 329109864Sjeff return (kg->kg_user_pri); 330109864Sjeff 331109864Sjeff pri = SCHED_SLP_TOPRI(kg->kg_slptime); 332109864Sjeff CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 333109864Sjeff kg->kg_slptime, pri); 334109864Sjeff 335109864Sjeff pri += PRI_MIN_TIMESHARE; 336109864Sjeff pri += kg->kg_nice; 337109864Sjeff 338109864Sjeff if (pri > PRI_MAX_TIMESHARE) 339109864Sjeff pri = PRI_MAX_TIMESHARE; 340109864Sjeff else if (pri < PRI_MIN_TIMESHARE) 341109864Sjeff pri = PRI_MIN_TIMESHARE; 342109864Sjeff 343109864Sjeff kg->kg_user_pri = pri; 344109864Sjeff 345109864Sjeff return (kg->kg_user_pri); 346109864Sjeff} 347109864Sjeff 348109864Sjeff/* 349109864Sjeff * Calculate a time slice based on the process priority. 350109864Sjeff */ 351109864Sjeffstatic int 352109864Sjeffsched_slice(struct ksegrp *kg) 353109864Sjeff{ 354109864Sjeff int pslice; 355109864Sjeff int sslice; 356109864Sjeff int slice; 357109864Sjeff int pri; 358109864Sjeff 359109864Sjeff pri = kg->kg_user_pri; 360109864Sjeff pri -= PRI_MIN_TIMESHARE; 361109864Sjeff pslice = SCHED_PRI_TOSLICE(pri); 362109864Sjeff sslice = SCHED_SLP_TOSLICE(kg->kg_slptime); 363109864Sjeff slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 364109864Sjeff kg->kg_slptime = SCHED_SLP_DECAY(kg->kg_slptime); 365109864Sjeff 366109864Sjeff CTR4(KTR_RUNQ, 367109864Sjeff "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 368109864Sjeff pri, sslice, pslice, slice); 369109864Sjeff 370109864Sjeff if (slice < SCHED_SLICE_MIN) 371109864Sjeff slice = SCHED_SLICE_MIN; 372109864Sjeff else if (slice > SCHED_SLICE_MAX) 373109864Sjeff slice = SCHED_SLICE_MAX; 374109864Sjeff 375109864Sjeff return (slice); 376109864Sjeff} 377109864Sjeff 378109864Sjeffint 379109864Sjeffsched_rr_interval(void) 380109864Sjeff{ 381109864Sjeff return (SCHED_SLICE_MAX); 382109864Sjeff} 383109864Sjeff 384109864Sjeffvoid 385109864Sjeffsched_pctcpu_update(struct kse *ke) 386109864Sjeff{ 387109864Sjeff /* 388109864Sjeff * Adjust counters and watermark for pctcpu calc. 389109864Sjeff */ 390109864Sjeff ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 391109864Sjeff SCHED_CPU_TICKS; 392109864Sjeff ke->ke_ltick = ticks; 393109864Sjeff ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 394109864Sjeff} 395109864Sjeff 396109864Sjeff#ifdef SMP 397110267Sjeff/* XXX Should be changed to kseq_load_lowest() */ 398109864Sjeffint 399109864Sjeffsched_pickcpu(void) 400109864Sjeff{ 401110028Sjeff struct kseq *kseq; 402110028Sjeff int load; 403109864Sjeff int cpu; 404109864Sjeff int i; 405109864Sjeff 406109864Sjeff if (!smp_started) 407109864Sjeff return (0); 408109864Sjeff 409110028Sjeff load = 0; 410110028Sjeff cpu = 0; 411109864Sjeff 412109864Sjeff for (i = 0; i < mp_maxid; i++) { 413109864Sjeff if (CPU_ABSENT(i)) 414109864Sjeff continue; 415110028Sjeff kseq = KSEQ_CPU(i); 416110028Sjeff if (kseq->ksq_load < load) { 417109864Sjeff cpu = i; 418110028Sjeff load = kseq->ksq_load; 419109864Sjeff } 420109864Sjeff } 421109864Sjeff 422109864Sjeff CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 423109864Sjeff return (cpu); 424109864Sjeff} 425109864Sjeff#else 426109864Sjeffint 427109864Sjeffsched_pickcpu(void) 428109864Sjeff{ 429109864Sjeff return (0); 430109864Sjeff} 431109864Sjeff#endif 432109864Sjeff 433109864Sjeffvoid 434109864Sjeffsched_prio(struct thread *td, u_char prio) 435109864Sjeff{ 436109864Sjeff struct kse *ke; 437109864Sjeff struct runq *rq; 438109864Sjeff 439109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 440109864Sjeff ke = td->td_kse; 441109864Sjeff td->td_priority = prio; 442109864Sjeff 443109864Sjeff if (TD_ON_RUNQ(td)) { 444109864Sjeff rq = ke->ke_runq; 445109864Sjeff 446109864Sjeff runq_remove(rq, ke); 447109864Sjeff runq_add(rq, ke); 448109864Sjeff } 449109864Sjeff} 450109864Sjeff 451109864Sjeffvoid 452109864Sjeffsched_switchout(struct thread *td) 453109864Sjeff{ 454109864Sjeff struct kse *ke; 455109864Sjeff 456109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 457109864Sjeff 458109864Sjeff ke = td->td_kse; 459109864Sjeff 460109864Sjeff td->td_last_kse = ke; 461109864Sjeff td->td_lastcpu = ke->ke_oncpu; 462110260Sjeff ke->ke_oncpu = NOCPU; 463109864Sjeff ke->ke_flags &= ~KEF_NEEDRESCHED; 464109864Sjeff 465109864Sjeff if (TD_IS_RUNNING(td)) { 466109864Sjeff setrunqueue(td); 467109864Sjeff return; 468109864Sjeff } else 469109864Sjeff td->td_kse->ke_runq = NULL; 470109864Sjeff 471109864Sjeff /* 472109864Sjeff * We will not be on the run queue. So we must be 473109864Sjeff * sleeping or similar. 474109864Sjeff */ 475109864Sjeff if (td->td_proc->p_flag & P_KSES) 476109864Sjeff kse_reassign(ke); 477109864Sjeff} 478109864Sjeff 479109864Sjeffvoid 480109864Sjeffsched_switchin(struct thread *td) 481109864Sjeff{ 482109864Sjeff /* struct kse *ke = td->td_kse; */ 483109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 484109864Sjeff 485110260Sjeff td->td_kse->ke_oncpu = PCPU_GET(cpuid); 486110267Sjeff#if SCHED_STRICT_RESCHED 487109864Sjeff if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 488109864Sjeff td->td_priority != td->td_ksegrp->kg_user_pri) 489109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 490110267Sjeff#endif 491109864Sjeff} 492109864Sjeff 493109864Sjeffvoid 494109864Sjeffsched_nice(struct ksegrp *kg, int nice) 495109864Sjeff{ 496109864Sjeff struct thread *td; 497109864Sjeff 498109864Sjeff kg->kg_nice = nice; 499109864Sjeff sched_priority(kg); 500109864Sjeff FOREACH_THREAD_IN_GROUP(kg, td) { 501109864Sjeff td->td_kse->ke_flags |= KEF_NEEDRESCHED; 502109864Sjeff } 503109864Sjeff} 504109864Sjeff 505109864Sjeffvoid 506109864Sjeffsched_sleep(struct thread *td, u_char prio) 507109864Sjeff{ 508109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 509109864Sjeff 510109864Sjeff td->td_slptime = ticks; 511109864Sjeff td->td_priority = prio; 512109864Sjeff 513109864Sjeff /* 514109864Sjeff * If this is an interactive task clear its queue so it moves back 515109864Sjeff * on to curr when it wakes up. Otherwise let it stay on the queue 516109864Sjeff * that it was assigned to. 517109864Sjeff */ 518109864Sjeff if (SCHED_CURR(td->td_kse->ke_ksegrp)) 519109864Sjeff td->td_kse->ke_runq = NULL; 520110267Sjeff#ifdef SMP 521110267Sjeff if (td->td_priority < PZERO) { 522110267Sjeff kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 523110267Sjeff td->td_schedflag |= TD_SCHED_BLOAD; 524110267Sjeff } 525110028Sjeff#endif 526109864Sjeff} 527109864Sjeff 528109864Sjeffvoid 529109864Sjeffsched_wakeup(struct thread *td) 530109864Sjeff{ 531109864Sjeff struct ksegrp *kg; 532109864Sjeff 533109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 534109864Sjeff 535109864Sjeff /* 536109864Sjeff * Let the kseg know how long we slept for. This is because process 537109864Sjeff * interactivity behavior is modeled in the kseg. 538109864Sjeff */ 539109864Sjeff kg = td->td_ksegrp; 540109864Sjeff 541109864Sjeff if (td->td_slptime) { 542109864Sjeff kg->kg_slptime += ticks - td->td_slptime; 543109864Sjeff if (kg->kg_slptime > SCHED_SLP_MAX) 544109864Sjeff kg->kg_slptime = SCHED_SLP_MAX; 545109864Sjeff td->td_priority = sched_priority(kg); 546109864Sjeff } 547109864Sjeff td->td_slptime = 0; 548110267Sjeff#ifdef SMP 549110267Sjeff if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 550110267Sjeff kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 551110267Sjeff td->td_schedflag &= ~TD_SCHED_BLOAD; 552110267Sjeff } 553110028Sjeff#endif 554109864Sjeff setrunqueue(td); 555110267Sjeff#if SCHED_STRICT_RESCHED 556109864Sjeff if (td->td_priority < curthread->td_priority) 557109864Sjeff curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 558110267Sjeff#endif 559109864Sjeff} 560109864Sjeff 561109864Sjeff/* 562109864Sjeff * Penalize the parent for creating a new child and initialize the child's 563109864Sjeff * priority. 564109864Sjeff */ 565109864Sjeffvoid 566109864Sjeffsched_fork(struct ksegrp *kg, struct ksegrp *child) 567109864Sjeff{ 568109864Sjeff struct kse *ckse; 569109864Sjeff struct kse *pkse; 570109864Sjeff 571109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 572109864Sjeff ckse = FIRST_KSE_IN_KSEGRP(child); 573109864Sjeff pkse = FIRST_KSE_IN_KSEGRP(kg); 574109864Sjeff 575109864Sjeff /* XXX Need something better here */ 576109864Sjeff child->kg_slptime = kg->kg_slptime; 577109864Sjeff child->kg_user_pri = kg->kg_user_pri; 578109864Sjeff 579110260Sjeff if (pkse->ke_cpu != PCPU_GET(cpuid)) { 580110260Sjeff printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 581109970Sjeff printf("cpuid = %d", PCPU_GET(cpuid)); 582109970Sjeff Debugger("stop"); 583109970Sjeff } 584109970Sjeff 585109864Sjeff ckse->ke_slice = pkse->ke_slice; 586110260Sjeff ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 587109864Sjeff ckse->ke_runq = NULL; 588109864Sjeff /* 589109864Sjeff * Claim that we've been running for one second for statistical 590109864Sjeff * purposes. 591109864Sjeff */ 592109864Sjeff ckse->ke_ticks = 0; 593109864Sjeff ckse->ke_ltick = ticks; 594109864Sjeff ckse->ke_ftick = ticks - hz; 595109864Sjeff} 596109864Sjeff 597109864Sjeff/* 598109864Sjeff * Return some of the child's priority and interactivity to the parent. 599109864Sjeff */ 600109864Sjeffvoid 601109864Sjeffsched_exit(struct ksegrp *kg, struct ksegrp *child) 602109864Sjeff{ 603109864Sjeff /* XXX Need something better here */ 604109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 605109864Sjeff kg->kg_slptime = child->kg_slptime; 606109864Sjeff sched_priority(kg); 607109864Sjeff} 608109864Sjeff 609109864Sjeffvoid 610109864Sjeffsched_clock(struct thread *td) 611109864Sjeff{ 612109864Sjeff struct kse *ke; 613110267Sjeff#if SCHED_STRICT_RESCHED 614109864Sjeff struct kse *nke; 615110028Sjeff struct kseq *kseq; 616110267Sjeff#endif 617109864Sjeff struct ksegrp *kg; 618109864Sjeff 619109864Sjeff 620109864Sjeff ke = td->td_kse; 621109864Sjeff kg = td->td_ksegrp; 622109864Sjeff 623110028Sjeff mtx_assert(&sched_lock, MA_OWNED); 624110028Sjeff KASSERT((td != NULL), ("schedclock: null thread pointer")); 625110028Sjeff 626110028Sjeff /* Adjust ticks for pctcpu */ 627109971Sjeff ke->ke_ticks += 10000; 628109971Sjeff ke->ke_ltick = ticks; 629109971Sjeff /* Go up to one second beyond our max and then trim back down */ 630109971Sjeff if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 631109971Sjeff sched_pctcpu_update(ke); 632109971Sjeff 633110028Sjeff if (td->td_kse->ke_flags & KEF_IDLEKSE) 634109864Sjeff return; 635110028Sjeff 636110028Sjeff /* 637110028Sjeff * Check for a higher priority task on the run queue. This can happen 638110028Sjeff * on SMP if another processor woke up a process on our runq. 639110028Sjeff */ 640110267Sjeff#if SCHED_STRICT_RESCHED 641110028Sjeff kseq = KSEQ_SELF(); 642109970Sjeff nke = runq_choose(kseq->ksq_curr); 643109970Sjeff 644109864Sjeff if (nke && nke->ke_thread && 645110028Sjeff nke->ke_thread->td_priority < td->td_priority) 646109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 647110267Sjeff#endif 648109864Sjeff /* 649109864Sjeff * We used a tick, decrease our total sleep time. This decreases our 650109864Sjeff * "interactivity". 651109864Sjeff */ 652109864Sjeff if (kg->kg_slptime) 653109864Sjeff kg->kg_slptime--; 654109864Sjeff /* 655109864Sjeff * We used up one time slice. 656109864Sjeff */ 657109864Sjeff ke->ke_slice--; 658109864Sjeff /* 659109864Sjeff * We're out of time, recompute priorities and requeue 660109864Sjeff */ 661109864Sjeff if (ke->ke_slice == 0) { 662109864Sjeff td->td_priority = sched_priority(kg); 663109864Sjeff ke->ke_slice = sched_slice(kg); 664109864Sjeff ke->ke_flags |= KEF_NEEDRESCHED; 665109864Sjeff ke->ke_runq = NULL; 666109864Sjeff } 667109864Sjeff} 668109864Sjeff 669109864Sjeffint 670109864Sjeffsched_runnable(void) 671109864Sjeff{ 672109864Sjeff struct kseq *kseq; 673109864Sjeff 674110028Sjeff kseq = KSEQ_SELF(); 675109864Sjeff 676110028Sjeff if (kseq->ksq_load) 677109970Sjeff return (1); 678109970Sjeff#ifdef SMP 679110028Sjeff /* 680110028Sjeff * For SMP we may steal other processor's KSEs. Just search until we 681110028Sjeff * verify that at least on other cpu has a runnable task. 682110028Sjeff */ 683109970Sjeff if (smp_started) { 684109970Sjeff int i; 685109970Sjeff 686110267Sjeff#if 0 687110267Sjeff if (kseq->ksq_bload) 688110267Sjeff return (0); 689110267Sjeff#endif 690110267Sjeff 691109970Sjeff for (i = 0; i < mp_maxid; i++) { 692109970Sjeff if (CPU_ABSENT(i)) 693109970Sjeff continue; 694110028Sjeff kseq = KSEQ_CPU(i); 695110028Sjeff if (kseq->ksq_load) 696109970Sjeff return (1); 697109970Sjeff } 698109970Sjeff } 699109970Sjeff#endif 700109970Sjeff return (0); 701109864Sjeff} 702109864Sjeff 703109864Sjeffvoid 704109864Sjeffsched_userret(struct thread *td) 705109864Sjeff{ 706109864Sjeff struct ksegrp *kg; 707109864Sjeff 708109864Sjeff kg = td->td_ksegrp; 709109864Sjeff 710109864Sjeff if (td->td_priority != kg->kg_user_pri) { 711109864Sjeff mtx_lock_spin(&sched_lock); 712109864Sjeff td->td_priority = kg->kg_user_pri; 713109864Sjeff mtx_unlock_spin(&sched_lock); 714109864Sjeff } 715109864Sjeff} 716109864Sjeff 717109864Sjeffstruct kse * 718109970Sjeffsched_choose(void) 719109970Sjeff{ 720110028Sjeff struct kseq *kseq; 721109970Sjeff struct kse *ke; 722109970Sjeff 723110028Sjeff kseq = KSEQ_SELF(); 724110028Sjeff ke = kseq_choose(kseq); 725109970Sjeff 726109864Sjeff if (ke) { 727109864Sjeff ke->ke_state = KES_THREAD; 728110267Sjeff kseq_rem(kseq, ke); 729109864Sjeff } 730109864Sjeff 731109970Sjeff#ifdef SMP 732109970Sjeff if (ke == NULL && smp_started) { 733110267Sjeff#if 0 734110267Sjeff if (kseq->ksq_bload) 735110267Sjeff return (NULL); 736110267Sjeff#endif 737109970Sjeff /* 738109970Sjeff * Find the cpu with the highest load and steal one proc. 739109970Sjeff */ 740110267Sjeff kseq = kseq_load_highest(); 741110267Sjeff if (kseq == NULL) 742110267Sjeff return (NULL); 743110267Sjeff ke = kseq_choose(kseq); 744110267Sjeff kseq_rem(kseq, ke); 745109970Sjeff 746110267Sjeff ke->ke_state = KES_THREAD; 747110267Sjeff ke->ke_runq = NULL; 748110267Sjeff ke->ke_cpu = PCPU_GET(cpuid); 749109970Sjeff } 750109970Sjeff#endif 751109864Sjeff return (ke); 752109864Sjeff} 753109864Sjeff 754109864Sjeffvoid 755109864Sjeffsched_add(struct kse *ke) 756109864Sjeff{ 757110267Sjeff struct kseq *kseq; 758109864Sjeff 759109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 760110267Sjeff KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 761109864Sjeff KASSERT((ke->ke_thread->td_kse != NULL), 762110267Sjeff ("sched_add: No KSE on thread")); 763109864Sjeff KASSERT(ke->ke_state != KES_ONRUNQ, 764110267Sjeff ("sched_add: kse %p (%s) already in run queue", ke, 765109864Sjeff ke->ke_proc->p_comm)); 766109864Sjeff KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 767110267Sjeff ("sched_add: process swapped out")); 768109864Sjeff 769110267Sjeff kseq = KSEQ_CPU(ke->ke_cpu); 770109864Sjeff 771109864Sjeff if (ke->ke_runq == NULL) { 772109864Sjeff if (SCHED_CURR(ke->ke_ksegrp)) 773109864Sjeff ke->ke_runq = kseq->ksq_curr; 774109864Sjeff else 775109864Sjeff ke->ke_runq = kseq->ksq_next; 776109864Sjeff } 777109864Sjeff ke->ke_ksegrp->kg_runq_kses++; 778109864Sjeff ke->ke_state = KES_ONRUNQ; 779109864Sjeff 780110267Sjeff kseq_add(kseq, ke); 781109864Sjeff} 782109864Sjeff 783109864Sjeffvoid 784109864Sjeffsched_rem(struct kse *ke) 785109864Sjeff{ 786109864Sjeff mtx_assert(&sched_lock, MA_OWNED); 787109864Sjeff /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 788109864Sjeff 789109864Sjeff ke->ke_runq = NULL; 790109864Sjeff ke->ke_state = KES_THREAD; 791109864Sjeff ke->ke_ksegrp->kg_runq_kses--; 792110267Sjeff 793110267Sjeff kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 794109864Sjeff} 795109864Sjeff 796109864Sjefffixpt_t 797109864Sjeffsched_pctcpu(struct kse *ke) 798109864Sjeff{ 799109864Sjeff fixpt_t pctcpu; 800110226Sscottl int realstathz; 801109864Sjeff 802109864Sjeff pctcpu = 0; 803110226Sscottl realstathz = stathz ? stathz : hz; 804109864Sjeff 805109864Sjeff if (ke->ke_ticks) { 806109864Sjeff int rtick; 807109864Sjeff 808109864Sjeff /* Update to account for time potentially spent sleeping */ 809109864Sjeff ke->ke_ltick = ticks; 810109864Sjeff sched_pctcpu_update(ke); 811109864Sjeff 812109864Sjeff /* How many rtick per second ? */ 813109864Sjeff rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 814110226Sscottl pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 815109864Sjeff } 816109864Sjeff 817109864Sjeff ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 818109864Sjeff 819109864Sjeff return (pctcpu); 820109864Sjeff} 821109864Sjeff 822109864Sjeffint 823109864Sjeffsched_sizeof_kse(void) 824109864Sjeff{ 825109864Sjeff return (sizeof(struct kse) + sizeof(struct ke_sched)); 826109864Sjeff} 827109864Sjeff 828109864Sjeffint 829109864Sjeffsched_sizeof_ksegrp(void) 830109864Sjeff{ 831109864Sjeff return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 832109864Sjeff} 833109864Sjeff 834109864Sjeffint 835109864Sjeffsched_sizeof_proc(void) 836109864Sjeff{ 837109864Sjeff return (sizeof(struct proc)); 838109864Sjeff} 839109864Sjeff 840109864Sjeffint 841109864Sjeffsched_sizeof_thread(void) 842109864Sjeff{ 843109864Sjeff return (sizeof(struct thread) + sizeof(struct td_sched)); 844109864Sjeff} 845