sched_ule.c revision 111793
1/*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/sched_ule.c 111793 2003-03-03 05:29:09Z jeff $ 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/ktr.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sched.h> 37#include <sys/smp.h> 38#include <sys/sx.h> 39#include <sys/sysctl.h> 40#include <sys/sysproto.h> 41#include <sys/vmmeter.h> 42#ifdef DDB 43#include <ddb/ddb.h> 44#endif 45#ifdef KTRACE 46#include <sys/uio.h> 47#include <sys/ktrace.h> 48#endif 49 50#include <machine/cpu.h> 51 52/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53/* XXX This is bogus compatability crap for ps */ 54static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57static void sched_setup(void *dummy); 58SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60#define SCHED_STRICT_RESCHED 1 61 62/* 63 * These datastructures are allocated within their parent datastructure but 64 * are scheduler specific. 65 */ 66 67struct ke_sched { 68 int ske_slice; 69 struct runq *ske_runq; 70 /* The following variables are only used for pctcpu calculation */ 71 int ske_ltick; /* Last tick that we were running on */ 72 int ske_ftick; /* First tick that we were running on */ 73 int ske_ticks; /* Tick count */ 74 u_char ske_cpu; 75}; 76#define ke_slice ke_sched->ske_slice 77#define ke_runq ke_sched->ske_runq 78#define ke_ltick ke_sched->ske_ltick 79#define ke_ftick ke_sched->ske_ftick 80#define ke_ticks ke_sched->ske_ticks 81#define ke_cpu ke_sched->ske_cpu 82 83struct kg_sched { 84 int skg_slptime; /* Number of ticks we vol. slept */ 85 int skg_runtime; /* Number of ticks we were running */ 86}; 87#define kg_slptime kg_sched->skg_slptime 88#define kg_runtime kg_sched->skg_runtime 89 90struct td_sched { 91 int std_slptime; 92 int std_schedflag; 93}; 94#define td_slptime td_sched->std_slptime 95#define td_schedflag td_sched->std_schedflag 96 97#define TD_SCHED_BLOAD 0x0001 /* 98 * thread was counted as being in short 99 * term sleep. 100 */ 101struct td_sched td_sched; 102struct ke_sched ke_sched; 103struct kg_sched kg_sched; 104 105struct ke_sched *kse0_sched = &ke_sched; 106struct kg_sched *ksegrp0_sched = &kg_sched; 107struct p_sched *proc0_sched = NULL; 108struct td_sched *thread0_sched = &td_sched; 109 110/* 111 * This priority range has 20 priorities on either end that are reachable 112 * only through nice values. 113 */ 114#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 115#define SCHED_PRI_NRESV 40 116#define SCHED_PRI_BASE (SCHED_PRI_NRESV / 2) 117#define SCHED_PRI_DYN (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 118#define SCHED_PRI_DYN_HALF (SCHED_PRI_DYN / 2) 119 120/* 121 * These determine how sleep time effects the priority of a process. 122 * 123 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 124 * before throttling back. 125 * SLP_RUN_THORTTLE: Divisor for reducing slp/run time. 126 * SLP_RATIO: Compute a bounded ratio of slp time vs run time. 127 * SLP_TOPRI: Convert a number of ticks slept and ticks ran into a priority 128 */ 129#define SCHED_SLP_RUN_MAX ((hz * 30) * 1024) 130#define SCHED_SLP_RUN_THROTTLE (10) 131static __inline int 132sched_slp_ratio(int b, int s) 133{ 134 b /= SCHED_PRI_DYN_HALF; 135 if (b == 0) 136 return (0); 137 s /= b; 138 return (s); 139} 140#define SCHED_SLP_TOPRI(slp, run) \ 141 ((((slp) > (run))? \ 142 sched_slp_ratio((slp), (run)): \ 143 SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \ 144 SCHED_PRI_NRESV / 2) 145/* 146 * These parameters and macros determine the size of the time slice that is 147 * granted to each thread. 148 * 149 * SLICE_MIN: Minimum time slice granted, in units of ticks. 150 * SLICE_MAX: Maximum time slice granted. 151 * SLICE_RANGE: Range of available time slices scaled by hz. 152 * SLICE_SCALE: The number slices granted per unit of pri or slp. 153 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 154 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 155 * amount of time slept. (smaller slices for interactive ksegs) 156 * PRI_COMP: This determines what fraction of the actual slice comes from 157 * the slice size computed from the priority. 158 * SLP_COMP: This determines what component of the actual slice comes from 159 * the slize size computed from the sleep time. 160 */ 161#define SCHED_SLICE_MIN (hz / 100) 162#define SCHED_SLICE_MAX (hz / 4) 163#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165#define SCHED_PRI_TOSLICE(pri) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 167#define SCHED_SLP_TOSLICE(slp) \ 168 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN)) 169#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 170#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 171 172/* 173 * This macro determines whether or not the kse belongs on the current or 174 * next run queue. 175 * 176 * XXX nice value should effect how interactive a kg is. 177 */ 178#define SCHED_CURR(kg) (((kg)->kg_slptime > (kg)->kg_runtime && \ 179 sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4)) 180 181/* 182 * Cpu percentage computation macros and defines. 183 * 184 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 185 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 186 */ 187 188#define SCHED_CPU_TIME 60 189#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 190 191/* 192 * kseq - pair of runqs per processor 193 */ 194 195struct kseq { 196 struct runq ksq_runqs[2]; 197 struct runq *ksq_curr; 198 struct runq *ksq_next; 199 int ksq_load; /* Total runnable */ 200#ifdef SMP 201 unsigned int ksq_rslices; /* Slices on run queue */ 202 unsigned int ksq_bload; /* Threads waiting on IO */ 203#endif 204}; 205 206/* 207 * One kse queue per processor. 208 */ 209#ifdef SMP 210struct kseq kseq_cpu[MAXCPU]; 211#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 212#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 213#else 214struct kseq kseq_cpu; 215#define KSEQ_SELF() (&kseq_cpu) 216#define KSEQ_CPU(x) (&kseq_cpu) 217#endif 218 219static int sched_slice(struct ksegrp *kg); 220static int sched_priority(struct ksegrp *kg); 221void sched_pctcpu_update(struct kse *ke); 222int sched_pickcpu(void); 223 224/* Operations on per processor queues */ 225static struct kse * kseq_choose(struct kseq *kseq); 226static void kseq_setup(struct kseq *kseq); 227static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 228static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 229#ifdef SMP 230static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 231static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 232struct kseq * kseq_load_highest(void); 233#endif 234 235static __inline void 236kseq_add(struct kseq *kseq, struct kse *ke) 237{ 238 runq_add(ke->ke_runq, ke); 239 kseq->ksq_load++; 240#ifdef SMP 241 kseq->ksq_rslices += ke->ke_slice; 242#endif 243} 244static __inline void 245kseq_rem(struct kseq *kseq, struct kse *ke) 246{ 247 kseq->ksq_load--; 248 runq_remove(ke->ke_runq, ke); 249#ifdef SMP 250 kseq->ksq_rslices -= ke->ke_slice; 251#endif 252} 253 254#ifdef SMP 255static __inline void 256kseq_sleep(struct kseq *kseq, struct kse *ke) 257{ 258 kseq->ksq_bload++; 259} 260 261static __inline void 262kseq_wakeup(struct kseq *kseq, struct kse *ke) 263{ 264 kseq->ksq_bload--; 265} 266 267struct kseq * 268kseq_load_highest(void) 269{ 270 struct kseq *kseq; 271 int load; 272 int cpu; 273 int i; 274 275 cpu = 0; 276 load = 0; 277 278 for (i = 0; i < mp_maxid; i++) { 279 if (CPU_ABSENT(i)) 280 continue; 281 kseq = KSEQ_CPU(i); 282 if (kseq->ksq_load > load) { 283 load = kseq->ksq_load; 284 cpu = i; 285 } 286 } 287 if (load) 288 return (KSEQ_CPU(cpu)); 289 290 return (NULL); 291} 292#endif 293 294struct kse * 295kseq_choose(struct kseq *kseq) 296{ 297 struct kse *ke; 298 struct runq *swap; 299 300 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 301 swap = kseq->ksq_curr; 302 kseq->ksq_curr = kseq->ksq_next; 303 kseq->ksq_next = swap; 304 ke = runq_choose(kseq->ksq_curr); 305 } 306 307 return (ke); 308} 309 310 311static void 312kseq_setup(struct kseq *kseq) 313{ 314 kseq->ksq_curr = &kseq->ksq_runqs[0]; 315 kseq->ksq_next = &kseq->ksq_runqs[1]; 316 runq_init(kseq->ksq_curr); 317 runq_init(kseq->ksq_next); 318 kseq->ksq_load = 0; 319#ifdef SMP 320 kseq->ksq_rslices = 0; 321 kseq->ksq_bload = 0; 322#endif 323} 324 325static void 326sched_setup(void *dummy) 327{ 328 int i; 329 330 mtx_lock_spin(&sched_lock); 331 /* init kseqs */ 332 for (i = 0; i < MAXCPU; i++) 333 kseq_setup(KSEQ_CPU(i)); 334 mtx_unlock_spin(&sched_lock); 335} 336 337/* 338 * Scale the scheduling priority according to the "interactivity" of this 339 * process. 340 */ 341static int 342sched_priority(struct ksegrp *kg) 343{ 344 int pri; 345 346 if (kg->kg_pri_class != PRI_TIMESHARE) 347 return (kg->kg_user_pri); 348 349 pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime); 350 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 351 kg->kg_slptime, pri); 352 353 pri += PRI_MIN_TIMESHARE; 354 pri += kg->kg_nice; 355 356 if (pri > PRI_MAX_TIMESHARE) 357 pri = PRI_MAX_TIMESHARE; 358 else if (pri < PRI_MIN_TIMESHARE) 359 pri = PRI_MIN_TIMESHARE; 360 361 kg->kg_user_pri = pri; 362 363 return (kg->kg_user_pri); 364} 365 366/* 367 * Calculate a time slice based on the process priority. 368 */ 369static int 370sched_slice(struct ksegrp *kg) 371{ 372 int pslice; 373 int sslice; 374 int slice; 375 int pri; 376 377 pri = kg->kg_user_pri; 378 pri -= PRI_MIN_TIMESHARE; 379 pslice = SCHED_PRI_TOSLICE(pri); 380 sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime)); 381/* 382SCHED_SLP_TOSLICE(SCHED_SLP_RATIO( 383 kg->kg_slptime, kg->kg_runtime)); 384*/ 385 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 386 387 CTR4(KTR_RUNQ, 388 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 389 pri, sslice, pslice, slice); 390 391 if (slice < SCHED_SLICE_MIN) 392 slice = SCHED_SLICE_MIN; 393 else if (slice > SCHED_SLICE_MAX) 394 slice = SCHED_SLICE_MAX; 395 396 /* 397 * Every time we grant a new slice check to see if we need to scale 398 * back the slp and run time in the kg. This will cause us to forget 399 * old interactivity while maintaining the current ratio. 400 */ 401 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 402 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 403 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 404 } 405 406 return (slice); 407} 408 409int 410sched_rr_interval(void) 411{ 412 return (SCHED_SLICE_MAX); 413} 414 415void 416sched_pctcpu_update(struct kse *ke) 417{ 418 /* 419 * Adjust counters and watermark for pctcpu calc. 420 */ 421 /* 422 * Shift the tick count out so that the divide doesn't round away 423 * our results. 424 */ 425 ke->ke_ticks <<= 10; 426 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 427 SCHED_CPU_TICKS; 428 ke->ke_ticks >>= 10; 429 ke->ke_ltick = ticks; 430 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 431} 432 433#ifdef SMP 434/* XXX Should be changed to kseq_load_lowest() */ 435int 436sched_pickcpu(void) 437{ 438 struct kseq *kseq; 439 int load; 440 int cpu; 441 int i; 442 443 if (!smp_started) 444 return (0); 445 446 load = 0; 447 cpu = 0; 448 449 for (i = 0; i < mp_maxid; i++) { 450 if (CPU_ABSENT(i)) 451 continue; 452 kseq = KSEQ_CPU(i); 453 if (kseq->ksq_load < load) { 454 cpu = i; 455 load = kseq->ksq_load; 456 } 457 } 458 459 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 460 return (cpu); 461} 462#else 463int 464sched_pickcpu(void) 465{ 466 return (0); 467} 468#endif 469 470void 471sched_prio(struct thread *td, u_char prio) 472{ 473 struct kse *ke; 474 struct runq *rq; 475 476 mtx_assert(&sched_lock, MA_OWNED); 477 ke = td->td_kse; 478 td->td_priority = prio; 479 480 if (TD_ON_RUNQ(td)) { 481 rq = ke->ke_runq; 482 483 runq_remove(rq, ke); 484 runq_add(rq, ke); 485 } 486} 487 488void 489sched_switchout(struct thread *td) 490{ 491 struct kse *ke; 492 493 mtx_assert(&sched_lock, MA_OWNED); 494 495 ke = td->td_kse; 496 497 td->td_last_kse = ke; 498 td->td_lastcpu = ke->ke_oncpu; 499 ke->ke_oncpu = NOCPU; 500 td->td_flags &= ~TDF_NEEDRESCHED; 501 502 if (TD_IS_RUNNING(td)) { 503 setrunqueue(td); 504 return; 505 } else 506 td->td_kse->ke_runq = NULL; 507 508 /* 509 * We will not be on the run queue. So we must be 510 * sleeping or similar. 511 */ 512 if (td->td_proc->p_flag & P_THREADED) 513 kse_reassign(ke); 514} 515 516void 517sched_switchin(struct thread *td) 518{ 519 /* struct kse *ke = td->td_kse; */ 520 mtx_assert(&sched_lock, MA_OWNED); 521 522 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 523#if SCHED_STRICT_RESCHED 524 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 525 td->td_priority != td->td_ksegrp->kg_user_pri) 526 curthread->td_flags |= TDF_NEEDRESCHED; 527#endif 528} 529 530void 531sched_nice(struct ksegrp *kg, int nice) 532{ 533 struct thread *td; 534 535 kg->kg_nice = nice; 536 sched_priority(kg); 537 FOREACH_THREAD_IN_GROUP(kg, td) { 538 td->td_flags |= TDF_NEEDRESCHED; 539 } 540} 541 542void 543sched_sleep(struct thread *td, u_char prio) 544{ 545 mtx_assert(&sched_lock, MA_OWNED); 546 547 td->td_slptime = ticks; 548 td->td_priority = prio; 549 550 /* 551 * If this is an interactive task clear its queue so it moves back 552 * on to curr when it wakes up. Otherwise let it stay on the queue 553 * that it was assigned to. 554 */ 555 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 556 td->td_kse->ke_runq = NULL; 557#ifdef SMP 558 if (td->td_priority < PZERO) { 559 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 560 td->td_schedflag |= TD_SCHED_BLOAD; 561 } 562#endif 563} 564 565void 566sched_wakeup(struct thread *td) 567{ 568 mtx_assert(&sched_lock, MA_OWNED); 569 570 /* 571 * Let the kseg know how long we slept for. This is because process 572 * interactivity behavior is modeled in the kseg. 573 */ 574 if (td->td_slptime) { 575 struct ksegrp *kg; 576 577 kg = td->td_ksegrp; 578 kg->kg_slptime += (ticks - td->td_slptime) * 1024; 579 sched_priority(kg); 580 td->td_slptime = 0; 581 } 582#ifdef SMP 583 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 584 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 585 td->td_schedflag &= ~TD_SCHED_BLOAD; 586 } 587#endif 588 setrunqueue(td); 589#if SCHED_STRICT_RESCHED 590 if (td->td_priority < curthread->td_priority) 591 curthread->td_flags |= TDF_NEEDRESCHED; 592#endif 593} 594 595/* 596 * Penalize the parent for creating a new child and initialize the child's 597 * priority. 598 */ 599void 600sched_fork(struct ksegrp *kg, struct ksegrp *child) 601{ 602 struct kse *ckse; 603 struct kse *pkse; 604 605 mtx_assert(&sched_lock, MA_OWNED); 606 ckse = FIRST_KSE_IN_KSEGRP(child); 607 pkse = FIRST_KSE_IN_KSEGRP(kg); 608 609 /* XXX Need something better here */ 610 if (kg->kg_slptime > kg->kg_runtime) { 611 child->kg_slptime = SCHED_PRI_DYN; 612 child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN; 613 } else { 614 child->kg_runtime = SCHED_PRI_DYN; 615 child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN; 616 } 617#if 0 618 child->kg_slptime = kg->kg_slptime; 619 child->kg_runtime = kg->kg_runtime; 620#endif 621 child->kg_user_pri = kg->kg_user_pri; 622 623#if 0 624 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 625 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 626 printf("cpuid = %d", PCPU_GET(cpuid)); 627 Debugger("stop"); 628 } 629#endif 630 631 ckse->ke_slice = pkse->ke_slice; 632 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 633 ckse->ke_runq = NULL; 634 /* 635 * Claim that we've been running for one second for statistical 636 * purposes. 637 */ 638 ckse->ke_ticks = 0; 639 ckse->ke_ltick = ticks; 640 ckse->ke_ftick = ticks - hz; 641} 642 643/* 644 * Return some of the child's priority and interactivity to the parent. 645 */ 646void 647sched_exit(struct ksegrp *kg, struct ksegrp *child) 648{ 649 /* XXX Need something better here */ 650 mtx_assert(&sched_lock, MA_OWNED); 651 kg->kg_slptime = child->kg_slptime; 652 kg->kg_runtime = child->kg_runtime; 653 sched_priority(kg); 654} 655 656void 657sched_clock(struct thread *td) 658{ 659 struct kse *ke; 660#if SCHED_STRICT_RESCHED 661 struct kse *nke; 662 struct kseq *kseq; 663#endif 664 struct ksegrp *kg; 665 666 667 ke = td->td_kse; 668 kg = td->td_ksegrp; 669 670 mtx_assert(&sched_lock, MA_OWNED); 671 KASSERT((td != NULL), ("schedclock: null thread pointer")); 672 673 /* Adjust ticks for pctcpu */ 674 ke->ke_ticks++; 675 ke->ke_ltick = ticks; 676 /* Go up to one second beyond our max and then trim back down */ 677 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 678 sched_pctcpu_update(ke); 679 680 if (td->td_kse->ke_flags & KEF_IDLEKSE) 681 return; 682 683 /* 684 * Check for a higher priority task on the run queue. This can happen 685 * on SMP if another processor woke up a process on our runq. 686 */ 687#if SCHED_STRICT_RESCHED 688 kseq = KSEQ_SELF(); 689 nke = runq_choose(kseq->ksq_curr); 690 691 if (nke && nke->ke_thread && 692 nke->ke_thread->td_priority < td->td_priority) 693 td->td_flags |= TDF_NEEDRESCHED; 694#endif 695 /* 696 * We used a tick charge it to the ksegrp so that we can compute our 697 * "interactivity". 698 */ 699 kg->kg_runtime += 1024; 700 701 /* 702 * We used up one time slice. 703 */ 704 ke->ke_slice--; 705 /* 706 * We're out of time, recompute priorities and requeue 707 */ 708 if (ke->ke_slice == 0) { 709 td->td_priority = sched_priority(kg); 710 ke->ke_slice = sched_slice(kg); 711 td->td_flags |= TDF_NEEDRESCHED; 712 ke->ke_runq = NULL; 713 } 714} 715 716int 717sched_runnable(void) 718{ 719 struct kseq *kseq; 720 721 kseq = KSEQ_SELF(); 722 723 if (kseq->ksq_load) 724 return (1); 725#ifdef SMP 726 /* 727 * For SMP we may steal other processor's KSEs. Just search until we 728 * verify that at least on other cpu has a runnable task. 729 */ 730 if (smp_started) { 731 int i; 732 733#if 0 734 if (kseq->ksq_bload) 735 return (0); 736#endif 737 738 for (i = 0; i < mp_maxid; i++) { 739 if (CPU_ABSENT(i)) 740 continue; 741 kseq = KSEQ_CPU(i); 742 if (kseq->ksq_load) 743 return (1); 744 } 745 } 746#endif 747 return (0); 748} 749 750void 751sched_userret(struct thread *td) 752{ 753 struct ksegrp *kg; 754 755 kg = td->td_ksegrp; 756 757 if (td->td_priority != kg->kg_user_pri) { 758 mtx_lock_spin(&sched_lock); 759 td->td_priority = kg->kg_user_pri; 760 mtx_unlock_spin(&sched_lock); 761 } 762} 763 764struct kse * 765sched_choose(void) 766{ 767 struct kseq *kseq; 768 struct kse *ke; 769 770 kseq = KSEQ_SELF(); 771 ke = kseq_choose(kseq); 772 773 if (ke) { 774 ke->ke_state = KES_THREAD; 775 kseq_rem(kseq, ke); 776 } 777 778#ifdef SMP 779 if (ke == NULL && smp_started) { 780#if 0 781 if (kseq->ksq_bload) 782 return (NULL); 783#endif 784 /* 785 * Find the cpu with the highest load and steal one proc. 786 */ 787 kseq = kseq_load_highest(); 788 if (kseq == NULL) 789 return (NULL); 790 ke = kseq_choose(kseq); 791 kseq_rem(kseq, ke); 792 793 ke->ke_state = KES_THREAD; 794 ke->ke_runq = NULL; 795 ke->ke_cpu = PCPU_GET(cpuid); 796 } 797#endif 798 return (ke); 799} 800 801void 802sched_add(struct kse *ke) 803{ 804 struct kseq *kseq; 805 806 mtx_assert(&sched_lock, MA_OWNED); 807 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 808 KASSERT((ke->ke_thread->td_kse != NULL), 809 ("sched_add: No KSE on thread")); 810 KASSERT(ke->ke_state != KES_ONRUNQ, 811 ("sched_add: kse %p (%s) already in run queue", ke, 812 ke->ke_proc->p_comm)); 813 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 814 ("sched_add: process swapped out")); 815 816 /* 817 * Timeshare threads get placed on the appropriate queue on their 818 * bound cpu. 819 */ 820 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 821 kseq = KSEQ_CPU(ke->ke_cpu); 822 823 if (ke->ke_runq == NULL) { 824 if (SCHED_CURR(ke->ke_ksegrp)) 825 ke->ke_runq = kseq->ksq_curr; 826 else 827 ke->ke_runq = kseq->ksq_next; 828 } 829 /* 830 * If we're a real-time or interrupt thread place us on the curr 831 * queue for the current processor. Hopefully this will yield the 832 * lowest latency response. 833 */ 834 } else { 835 kseq = KSEQ_SELF(); 836 ke->ke_runq = kseq->ksq_curr; 837 } 838 ke->ke_ksegrp->kg_runq_kses++; 839 ke->ke_state = KES_ONRUNQ; 840 841 kseq_add(kseq, ke); 842} 843 844void 845sched_rem(struct kse *ke) 846{ 847 mtx_assert(&sched_lock, MA_OWNED); 848 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 849 850 ke->ke_runq = NULL; 851 ke->ke_state = KES_THREAD; 852 ke->ke_ksegrp->kg_runq_kses--; 853 854 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 855} 856 857fixpt_t 858sched_pctcpu(struct kse *ke) 859{ 860 fixpt_t pctcpu; 861 int realstathz; 862 863 pctcpu = 0; 864 realstathz = stathz ? stathz : hz; 865 866 if (ke->ke_ticks) { 867 int rtick; 868 869 /* Update to account for time potentially spent sleeping */ 870 ke->ke_ltick = ticks; 871 sched_pctcpu_update(ke); 872 873 /* How many rtick per second ? */ 874 rtick = ke->ke_ticks / SCHED_CPU_TIME; 875 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 876 } 877 878 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 879 880 return (pctcpu); 881} 882 883int 884sched_sizeof_kse(void) 885{ 886 return (sizeof(struct kse) + sizeof(struct ke_sched)); 887} 888 889int 890sched_sizeof_ksegrp(void) 891{ 892 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 893} 894 895int 896sched_sizeof_proc(void) 897{ 898 return (sizeof(struct proc)); 899} 900 901int 902sched_sizeof_thread(void) 903{ 904 return (sizeof(struct thread) + sizeof(struct td_sched)); 905} 906