sched_ule.c revision 110646
1/*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/sched_ule.c 110646 2003-02-10 14:11:23Z jeff $ 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/ktr.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sched.h> 37#include <sys/smp.h> 38#include <sys/sx.h> 39#include <sys/sysctl.h> 40#include <sys/sysproto.h> 41#include <sys/vmmeter.h> 42#ifdef DDB 43#include <ddb/ddb.h> 44#endif 45#ifdef KTRACE 46#include <sys/uio.h> 47#include <sys/ktrace.h> 48#endif 49 50#include <machine/cpu.h> 51 52/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53/* XXX This is bogus compatability crap for ps */ 54static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57static void sched_setup(void *dummy); 58SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60#define SCHED_STRICT_RESCHED 1 61 62/* 63 * These datastructures are allocated within their parent datastructure but 64 * are scheduler specific. 65 */ 66 67struct ke_sched { 68 int ske_slice; 69 struct runq *ske_runq; 70 /* The following variables are only used for pctcpu calculation */ 71 int ske_ltick; /* Last tick that we were running on */ 72 int ske_ftick; /* First tick that we were running on */ 73 int ske_ticks; /* Tick count */ 74 u_char ske_cpu; 75}; 76#define ke_slice ke_sched->ske_slice 77#define ke_runq ke_sched->ske_runq 78#define ke_ltick ke_sched->ske_ltick 79#define ke_ftick ke_sched->ske_ftick 80#define ke_ticks ke_sched->ske_ticks 81#define ke_cpu ke_sched->ske_cpu 82 83struct kg_sched { 84 int skg_slptime; /* Number of ticks we vol. slept */ 85 int skg_runtime; /* Number of ticks we were running */ 86}; 87#define kg_slptime kg_sched->skg_slptime 88#define kg_runtime kg_sched->skg_runtime 89 90struct td_sched { 91 int std_slptime; 92 int std_schedflag; 93}; 94#define td_slptime td_sched->std_slptime 95#define td_schedflag td_sched->std_schedflag 96 97#define TD_SCHED_BLOAD 0x0001 /* 98 * thread was counted as being in short 99 * term sleep. 100 */ 101struct td_sched td_sched; 102struct ke_sched ke_sched; 103struct kg_sched kg_sched; 104 105struct ke_sched *kse0_sched = &ke_sched; 106struct kg_sched *ksegrp0_sched = &kg_sched; 107struct p_sched *proc0_sched = NULL; 108struct td_sched *thread0_sched = &td_sched; 109 110/* 111 * This priority range has 20 priorities on either end that are reachable 112 * only through nice values. 113 */ 114#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 115#define SCHED_PRI_NRESV 40 116#define SCHED_PRI_BASE (SCHED_PRI_NRESV / 2) 117#define SCHED_PRI_DYN (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 118#define SCHED_PRI_DYN_HALF (SCHED_PRI_DYN / 2) 119 120/* 121 * These determine how sleep time effects the priority of a process. 122 * 123 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 124 * before throttling back. 125 * SLP_RUN_THORTTLE: Divisor for reducing slp/run time. 126 * SLP_RATIO: Compute a bounded ratio of slp time vs run time. 127 * SLP_TOPRI: Convert a number of ticks slept and ticks ran into a priority 128 */ 129#define SCHED_SLP_RUN_MAX ((hz * 30) * 1024) 130#define SCHED_SLP_RUN_THROTTLE (10) 131static __inline int 132sched_slp_ratio(int b, int s) 133{ 134 b /= SCHED_PRI_DYN_HALF; 135 if (b == 0) 136 return (0); 137 s /= b; 138 return (s); 139} 140#define SCHED_SLP_TOPRI(slp, run) \ 141 ((((slp) > (run))? \ 142 sched_slp_ratio((slp), (run)): \ 143 SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \ 144 SCHED_PRI_NRESV / 2) 145/* 146 * These parameters and macros determine the size of the time slice that is 147 * granted to each thread. 148 * 149 * SLICE_MIN: Minimum time slice granted, in units of ticks. 150 * SLICE_MAX: Maximum time slice granted. 151 * SLICE_RANGE: Range of available time slices scaled by hz. 152 * SLICE_SCALE: The number slices granted per unit of pri or slp. 153 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 154 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 155 * amount of time slept. (smaller slices for interactive ksegs) 156 * PRI_COMP: This determines what fraction of the actual slice comes from 157 * the slice size computed from the priority. 158 * SLP_COMP: This determines what component of the actual slice comes from 159 * the slize size computed from the sleep time. 160 */ 161#define SCHED_SLICE_MIN (hz / 100) 162#define SCHED_SLICE_MAX (hz / 4) 163#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 164#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 165#define SCHED_PRI_TOSLICE(pri) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 167#define SCHED_SLP_TOSLICE(slp) \ 168 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN)) 169#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 170#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 171 172/* 173 * This macro determines whether or not the kse belongs on the current or 174 * next run queue. 175 * 176 * XXX nice value should effect how interactive a kg is. 177 */ 178#define SCHED_CURR(kg) (((kg)->kg_slptime > (kg)->kg_runtime && \ 179 sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4) || \ 180 (kg)->kg_pri_class != PRI_TIMESHARE) 181 182/* 183 * Cpu percentage computation macros and defines. 184 * 185 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 186 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 187 */ 188 189#define SCHED_CPU_TIME 60 190#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 191 192/* 193 * kseq - pair of runqs per processor 194 */ 195 196struct kseq { 197 struct runq ksq_runqs[2]; 198 struct runq *ksq_curr; 199 struct runq *ksq_next; 200 int ksq_load; /* Total runnable */ 201#ifdef SMP 202 unsigned int ksq_rslices; /* Slices on run queue */ 203 unsigned int ksq_bload; /* Threads waiting on IO */ 204#endif 205}; 206 207/* 208 * One kse queue per processor. 209 */ 210#ifdef SMP 211struct kseq kseq_cpu[MAXCPU]; 212#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 213#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 214#else 215struct kseq kseq_cpu; 216#define KSEQ_SELF() (&kseq_cpu) 217#define KSEQ_CPU(x) (&kseq_cpu) 218#endif 219 220static int sched_slice(struct ksegrp *kg); 221static int sched_priority(struct ksegrp *kg); 222void sched_pctcpu_update(struct kse *ke); 223int sched_pickcpu(void); 224 225/* Operations on per processor queues */ 226static struct kse * kseq_choose(struct kseq *kseq); 227static void kseq_setup(struct kseq *kseq); 228static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 229static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 230#ifdef SMP 231static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 232static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 233struct kseq * kseq_load_highest(void); 234#endif 235 236static __inline void 237kseq_add(struct kseq *kseq, struct kse *ke) 238{ 239 runq_add(ke->ke_runq, ke); 240 kseq->ksq_load++; 241#ifdef SMP 242 kseq->ksq_rslices += ke->ke_slice; 243#endif 244} 245static __inline void 246kseq_rem(struct kseq *kseq, struct kse *ke) 247{ 248 kseq->ksq_load--; 249 runq_remove(ke->ke_runq, ke); 250#ifdef SMP 251 kseq->ksq_rslices -= ke->ke_slice; 252#endif 253} 254 255#ifdef SMP 256static __inline void 257kseq_sleep(struct kseq *kseq, struct kse *ke) 258{ 259 kseq->ksq_bload++; 260} 261 262static __inline void 263kseq_wakeup(struct kseq *kseq, struct kse *ke) 264{ 265 kseq->ksq_bload--; 266} 267 268struct kseq * 269kseq_load_highest(void) 270{ 271 struct kseq *kseq; 272 int load; 273 int cpu; 274 int i; 275 276 cpu = 0; 277 load = 0; 278 279 for (i = 0; i < mp_maxid; i++) { 280 if (CPU_ABSENT(i)) 281 continue; 282 kseq = KSEQ_CPU(i); 283 if (kseq->ksq_load > load) { 284 load = kseq->ksq_load; 285 cpu = i; 286 } 287 } 288 if (load) 289 return (KSEQ_CPU(cpu)); 290 291 return (NULL); 292} 293#endif 294 295struct kse * 296kseq_choose(struct kseq *kseq) 297{ 298 struct kse *ke; 299 struct runq *swap; 300 301 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 302 swap = kseq->ksq_curr; 303 kseq->ksq_curr = kseq->ksq_next; 304 kseq->ksq_next = swap; 305 ke = runq_choose(kseq->ksq_curr); 306 } 307 308 return (ke); 309} 310 311 312static void 313kseq_setup(struct kseq *kseq) 314{ 315 kseq->ksq_curr = &kseq->ksq_runqs[0]; 316 kseq->ksq_next = &kseq->ksq_runqs[1]; 317 runq_init(kseq->ksq_curr); 318 runq_init(kseq->ksq_next); 319 kseq->ksq_load = 0; 320#ifdef SMP 321 kseq->ksq_rslices = 0; 322 kseq->ksq_bload = 0; 323#endif 324} 325 326static void 327sched_setup(void *dummy) 328{ 329 int i; 330 331 mtx_lock_spin(&sched_lock); 332 /* init kseqs */ 333 for (i = 0; i < MAXCPU; i++) 334 kseq_setup(KSEQ_CPU(i)); 335 mtx_unlock_spin(&sched_lock); 336} 337 338/* 339 * Scale the scheduling priority according to the "interactivity" of this 340 * process. 341 */ 342static int 343sched_priority(struct ksegrp *kg) 344{ 345 int pri; 346 347 if (kg->kg_pri_class != PRI_TIMESHARE) 348 return (kg->kg_user_pri); 349 350 pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime); 351 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 352 kg->kg_slptime, pri); 353 354 pri += PRI_MIN_TIMESHARE; 355 pri += kg->kg_nice; 356 357 if (pri > PRI_MAX_TIMESHARE) 358 pri = PRI_MAX_TIMESHARE; 359 else if (pri < PRI_MIN_TIMESHARE) 360 pri = PRI_MIN_TIMESHARE; 361 362 kg->kg_user_pri = pri; 363 364 return (kg->kg_user_pri); 365} 366 367/* 368 * Calculate a time slice based on the process priority. 369 */ 370static int 371sched_slice(struct ksegrp *kg) 372{ 373 int pslice; 374 int sslice; 375 int slice; 376 int pri; 377 378 pri = kg->kg_user_pri; 379 pri -= PRI_MIN_TIMESHARE; 380 pslice = SCHED_PRI_TOSLICE(pri); 381 sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime)); 382/* 383SCHED_SLP_TOSLICE(SCHED_SLP_RATIO( 384 kg->kg_slptime, kg->kg_runtime)); 385*/ 386 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 387 388 CTR4(KTR_RUNQ, 389 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 390 pri, sslice, pslice, slice); 391 392 if (slice < SCHED_SLICE_MIN) 393 slice = SCHED_SLICE_MIN; 394 else if (slice > SCHED_SLICE_MAX) 395 slice = SCHED_SLICE_MAX; 396 397 /* 398 * Every time we grant a new slice check to see if we need to scale 399 * back the slp and run time in the kg. This will cause us to forget 400 * old interactivity while maintaining the current ratio. 401 */ 402 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 403 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 404 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 405 } 406 407 return (slice); 408} 409 410int 411sched_rr_interval(void) 412{ 413 return (SCHED_SLICE_MAX); 414} 415 416void 417sched_pctcpu_update(struct kse *ke) 418{ 419 /* 420 * Adjust counters and watermark for pctcpu calc. 421 */ 422 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 423 SCHED_CPU_TICKS; 424 ke->ke_ltick = ticks; 425 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 426} 427 428#ifdef SMP 429/* XXX Should be changed to kseq_load_lowest() */ 430int 431sched_pickcpu(void) 432{ 433 struct kseq *kseq; 434 int load; 435 int cpu; 436 int i; 437 438 if (!smp_started) 439 return (0); 440 441 load = 0; 442 cpu = 0; 443 444 for (i = 0; i < mp_maxid; i++) { 445 if (CPU_ABSENT(i)) 446 continue; 447 kseq = KSEQ_CPU(i); 448 if (kseq->ksq_load < load) { 449 cpu = i; 450 load = kseq->ksq_load; 451 } 452 } 453 454 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 455 return (cpu); 456} 457#else 458int 459sched_pickcpu(void) 460{ 461 return (0); 462} 463#endif 464 465void 466sched_prio(struct thread *td, u_char prio) 467{ 468 struct kse *ke; 469 struct runq *rq; 470 471 mtx_assert(&sched_lock, MA_OWNED); 472 ke = td->td_kse; 473 td->td_priority = prio; 474 475 if (TD_ON_RUNQ(td)) { 476 rq = ke->ke_runq; 477 478 runq_remove(rq, ke); 479 runq_add(rq, ke); 480 } 481} 482 483void 484sched_switchout(struct thread *td) 485{ 486 struct kse *ke; 487 488 mtx_assert(&sched_lock, MA_OWNED); 489 490 ke = td->td_kse; 491 492 td->td_last_kse = ke; 493 td->td_lastcpu = ke->ke_oncpu; 494 ke->ke_oncpu = NOCPU; 495 ke->ke_flags &= ~KEF_NEEDRESCHED; 496 497 if (TD_IS_RUNNING(td)) { 498 setrunqueue(td); 499 return; 500 } else 501 td->td_kse->ke_runq = NULL; 502 503 /* 504 * We will not be on the run queue. So we must be 505 * sleeping or similar. 506 */ 507 if (td->td_proc->p_flag & P_KSES) 508 kse_reassign(ke); 509} 510 511void 512sched_switchin(struct thread *td) 513{ 514 /* struct kse *ke = td->td_kse; */ 515 mtx_assert(&sched_lock, MA_OWNED); 516 517 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 518#if SCHED_STRICT_RESCHED 519 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 520 td->td_priority != td->td_ksegrp->kg_user_pri) 521 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 522#endif 523} 524 525void 526sched_nice(struct ksegrp *kg, int nice) 527{ 528 struct thread *td; 529 530 kg->kg_nice = nice; 531 sched_priority(kg); 532 FOREACH_THREAD_IN_GROUP(kg, td) { 533 td->td_kse->ke_flags |= KEF_NEEDRESCHED; 534 } 535} 536 537void 538sched_sleep(struct thread *td, u_char prio) 539{ 540 mtx_assert(&sched_lock, MA_OWNED); 541 542 td->td_slptime = ticks; 543 td->td_priority = prio; 544 545 /* 546 * If this is an interactive task clear its queue so it moves back 547 * on to curr when it wakes up. Otherwise let it stay on the queue 548 * that it was assigned to. 549 */ 550 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 551 td->td_kse->ke_runq = NULL; 552#ifdef SMP 553 if (td->td_priority < PZERO) { 554 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 555 td->td_schedflag |= TD_SCHED_BLOAD; 556 } 557#endif 558} 559 560void 561sched_wakeup(struct thread *td) 562{ 563 struct ksegrp *kg; 564 565 mtx_assert(&sched_lock, MA_OWNED); 566 567 /* 568 * Let the kseg know how long we slept for. This is because process 569 * interactivity behavior is modeled in the kseg. 570 */ 571 kg = td->td_ksegrp; 572 573 if (td->td_slptime) { 574 kg->kg_slptime += (ticks - td->td_slptime) * 1024; 575 td->td_priority = sched_priority(kg); 576 } 577 td->td_slptime = 0; 578#ifdef SMP 579 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 580 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 581 td->td_schedflag &= ~TD_SCHED_BLOAD; 582 } 583#endif 584 setrunqueue(td); 585#if SCHED_STRICT_RESCHED 586 if (td->td_priority < curthread->td_priority) 587 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 588#endif 589} 590 591/* 592 * Penalize the parent for creating a new child and initialize the child's 593 * priority. 594 */ 595void 596sched_fork(struct ksegrp *kg, struct ksegrp *child) 597{ 598 struct kse *ckse; 599 struct kse *pkse; 600 601 mtx_assert(&sched_lock, MA_OWNED); 602 ckse = FIRST_KSE_IN_KSEGRP(child); 603 pkse = FIRST_KSE_IN_KSEGRP(kg); 604 605 /* XXX Need something better here */ 606 if (kg->kg_slptime > kg->kg_runtime) { 607 child->kg_slptime = SCHED_PRI_DYN; 608 child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN; 609 } else { 610 child->kg_runtime = SCHED_PRI_DYN; 611 child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN; 612 } 613#if 0 614 child->kg_slptime = kg->kg_slptime; 615 child->kg_runtime = kg->kg_runtime; 616#endif 617 child->kg_user_pri = kg->kg_user_pri; 618 619#if 0 620 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 621 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 622 printf("cpuid = %d", PCPU_GET(cpuid)); 623 Debugger("stop"); 624 } 625#endif 626 627 ckse->ke_slice = pkse->ke_slice; 628 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 629 ckse->ke_runq = NULL; 630 /* 631 * Claim that we've been running for one second for statistical 632 * purposes. 633 */ 634 ckse->ke_ticks = 0; 635 ckse->ke_ltick = ticks; 636 ckse->ke_ftick = ticks - hz; 637} 638 639/* 640 * Return some of the child's priority and interactivity to the parent. 641 */ 642void 643sched_exit(struct ksegrp *kg, struct ksegrp *child) 644{ 645 /* XXX Need something better here */ 646 mtx_assert(&sched_lock, MA_OWNED); 647 kg->kg_slptime = child->kg_slptime; 648 kg->kg_runtime = child->kg_runtime; 649 sched_priority(kg); 650} 651 652void 653sched_clock(struct thread *td) 654{ 655 struct kse *ke; 656#if SCHED_STRICT_RESCHED 657 struct kse *nke; 658 struct kseq *kseq; 659#endif 660 struct ksegrp *kg; 661 662 663 ke = td->td_kse; 664 kg = td->td_ksegrp; 665 666 mtx_assert(&sched_lock, MA_OWNED); 667 KASSERT((td != NULL), ("schedclock: null thread pointer")); 668 669 /* Adjust ticks for pctcpu */ 670 ke->ke_ticks += 10000; 671 ke->ke_ltick = ticks; 672 /* Go up to one second beyond our max and then trim back down */ 673 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 674 sched_pctcpu_update(ke); 675 676 if (td->td_kse->ke_flags & KEF_IDLEKSE) 677 return; 678 679 /* 680 * Check for a higher priority task on the run queue. This can happen 681 * on SMP if another processor woke up a process on our runq. 682 */ 683#if SCHED_STRICT_RESCHED 684 kseq = KSEQ_SELF(); 685 nke = runq_choose(kseq->ksq_curr); 686 687 if (nke && nke->ke_thread && 688 nke->ke_thread->td_priority < td->td_priority) 689 ke->ke_flags |= KEF_NEEDRESCHED; 690#endif 691 /* 692 * We used a tick charge it to the ksegrp so that we can compute our 693 * "interactivity". 694 */ 695 kg->kg_runtime += 1024; 696 697 /* 698 * We used up one time slice. 699 */ 700 ke->ke_slice--; 701 /* 702 * We're out of time, recompute priorities and requeue 703 */ 704 if (ke->ke_slice == 0) { 705 td->td_priority = sched_priority(kg); 706 ke->ke_slice = sched_slice(kg); 707 ke->ke_flags |= KEF_NEEDRESCHED; 708 ke->ke_runq = NULL; 709 } 710} 711 712int 713sched_runnable(void) 714{ 715 struct kseq *kseq; 716 717 kseq = KSEQ_SELF(); 718 719 if (kseq->ksq_load) 720 return (1); 721#ifdef SMP 722 /* 723 * For SMP we may steal other processor's KSEs. Just search until we 724 * verify that at least on other cpu has a runnable task. 725 */ 726 if (smp_started) { 727 int i; 728 729#if 0 730 if (kseq->ksq_bload) 731 return (0); 732#endif 733 734 for (i = 0; i < mp_maxid; i++) { 735 if (CPU_ABSENT(i)) 736 continue; 737 kseq = KSEQ_CPU(i); 738 if (kseq->ksq_load) 739 return (1); 740 } 741 } 742#endif 743 return (0); 744} 745 746void 747sched_userret(struct thread *td) 748{ 749 struct ksegrp *kg; 750 751 kg = td->td_ksegrp; 752 753 if (td->td_priority != kg->kg_user_pri) { 754 mtx_lock_spin(&sched_lock); 755 td->td_priority = kg->kg_user_pri; 756 mtx_unlock_spin(&sched_lock); 757 } 758} 759 760struct kse * 761sched_choose(void) 762{ 763 struct kseq *kseq; 764 struct kse *ke; 765 766 kseq = KSEQ_SELF(); 767 ke = kseq_choose(kseq); 768 769 if (ke) { 770 ke->ke_state = KES_THREAD; 771 kseq_rem(kseq, ke); 772 } 773 774#ifdef SMP 775 if (ke == NULL && smp_started) { 776#if 0 777 if (kseq->ksq_bload) 778 return (NULL); 779#endif 780 /* 781 * Find the cpu with the highest load and steal one proc. 782 */ 783 kseq = kseq_load_highest(); 784 if (kseq == NULL) 785 return (NULL); 786 ke = kseq_choose(kseq); 787 kseq_rem(kseq, ke); 788 789 ke->ke_state = KES_THREAD; 790 ke->ke_runq = NULL; 791 ke->ke_cpu = PCPU_GET(cpuid); 792 } 793#endif 794 return (ke); 795} 796 797void 798sched_add(struct kse *ke) 799{ 800 struct kseq *kseq; 801 802 mtx_assert(&sched_lock, MA_OWNED); 803 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 804 KASSERT((ke->ke_thread->td_kse != NULL), 805 ("sched_add: No KSE on thread")); 806 KASSERT(ke->ke_state != KES_ONRUNQ, 807 ("sched_add: kse %p (%s) already in run queue", ke, 808 ke->ke_proc->p_comm)); 809 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 810 ("sched_add: process swapped out")); 811 812 kseq = KSEQ_CPU(ke->ke_cpu); 813 814 if (ke->ke_runq == NULL) { 815 if (SCHED_CURR(ke->ke_ksegrp)) 816 ke->ke_runq = kseq->ksq_curr; 817 else 818 ke->ke_runq = kseq->ksq_next; 819 } 820 ke->ke_ksegrp->kg_runq_kses++; 821 ke->ke_state = KES_ONRUNQ; 822 823 kseq_add(kseq, ke); 824} 825 826void 827sched_rem(struct kse *ke) 828{ 829 mtx_assert(&sched_lock, MA_OWNED); 830 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 831 832 ke->ke_runq = NULL; 833 ke->ke_state = KES_THREAD; 834 ke->ke_ksegrp->kg_runq_kses--; 835 836 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 837} 838 839fixpt_t 840sched_pctcpu(struct kse *ke) 841{ 842 fixpt_t pctcpu; 843 int realstathz; 844 845 pctcpu = 0; 846 realstathz = stathz ? stathz : hz; 847 848 if (ke->ke_ticks) { 849 int rtick; 850 851 /* Update to account for time potentially spent sleeping */ 852 ke->ke_ltick = ticks; 853 sched_pctcpu_update(ke); 854 855 /* How many rtick per second ? */ 856 rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 857 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 858 } 859 860 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 861 862 return (pctcpu); 863} 864 865int 866sched_sizeof_kse(void) 867{ 868 return (sizeof(struct kse) + sizeof(struct ke_sched)); 869} 870 871int 872sched_sizeof_ksegrp(void) 873{ 874 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 875} 876 877int 878sched_sizeof_proc(void) 879{ 880 return (sizeof(struct proc)); 881} 882 883int 884sched_sizeof_thread(void) 885{ 886 return (sizeof(struct thread) + sizeof(struct td_sched)); 887} 888