sched_ule.c revision 110645
1/*- 2 * Copyright (c) 2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $FreeBSD: head/sys/kern/sched_ule.c 110645 2003-02-10 14:03:45Z jeff $ 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/kernel.h> 32#include <sys/ktr.h> 33#include <sys/lock.h> 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sched.h> 37#include <sys/smp.h> 38#include <sys/sx.h> 39#include <sys/sysctl.h> 40#include <sys/sysproto.h> 41#include <sys/vmmeter.h> 42#ifdef DDB 43#include <ddb/ddb.h> 44#endif 45#ifdef KTRACE 46#include <sys/uio.h> 47#include <sys/ktrace.h> 48#endif 49 50#include <machine/cpu.h> 51 52/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 53/* XXX This is bogus compatability crap for ps */ 54static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 55SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 56 57static void sched_setup(void *dummy); 58SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 59 60/* 61 * These datastructures are allocated within their parent datastructure but 62 * are scheduler specific. 63 */ 64 65struct ke_sched { 66 int ske_slice; 67 struct runq *ske_runq; 68 /* The following variables are only used for pctcpu calculation */ 69 int ske_ltick; /* Last tick that we were running on */ 70 int ske_ftick; /* First tick that we were running on */ 71 int ske_ticks; /* Tick count */ 72 u_char ske_cpu; 73}; 74#define ke_slice ke_sched->ske_slice 75#define ke_runq ke_sched->ske_runq 76#define ke_ltick ke_sched->ske_ltick 77#define ke_ftick ke_sched->ske_ftick 78#define ke_ticks ke_sched->ske_ticks 79#define ke_cpu ke_sched->ske_cpu 80 81struct kg_sched { 82 int skg_slptime; /* Number of ticks we vol. slept */ 83 int skg_runtime; /* Number of ticks we were running */ 84}; 85#define kg_slptime kg_sched->skg_slptime 86#define kg_runtime kg_sched->skg_runtime 87 88struct td_sched { 89 int std_slptime; 90 int std_schedflag; 91}; 92#define td_slptime td_sched->std_slptime 93#define td_schedflag td_sched->std_schedflag 94 95#define TD_SCHED_BLOAD 0x0001 /* 96 * thread was counted as being in short 97 * term sleep. 98 */ 99struct td_sched td_sched; 100struct ke_sched ke_sched; 101struct kg_sched kg_sched; 102 103struct ke_sched *kse0_sched = &ke_sched; 104struct kg_sched *ksegrp0_sched = &kg_sched; 105struct p_sched *proc0_sched = NULL; 106struct td_sched *thread0_sched = &td_sched; 107 108/* 109 * This priority range has 20 priorities on either end that are reachable 110 * only through nice values. 111 */ 112#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 113#define SCHED_PRI_NRESV 40 114#define SCHED_PRI_BASE (SCHED_PRI_NRESV / 2) 115#define SCHED_PRI_DYN (SCHED_PRI_RANGE - SCHED_PRI_NRESV) 116#define SCHED_PRI_DYN_HALF (SCHED_PRI_DYN / 2) 117 118/* 119 * These determine how sleep time effects the priority of a process. 120 * 121 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 122 * before throttling back. 123 * SLP_RUN_THORTTLE: Divisor for reducing slp/run time. 124 * SLP_RATIO: Compute a bounded ratio of slp time vs run time. 125 * SLP_TOPRI: Convert a number of ticks slept and ticks ran into a priority 126 */ 127#define SCHED_SLP_RUN_MAX ((hz * 30) * 1024) 128#define SCHED_SLP_RUN_THROTTLE (10) 129static __inline int 130sched_slp_ratio(int b, int s) 131{ 132 b /= SCHED_PRI_DYN_HALF; 133 if (b == 0) 134 return (0); 135 s /= b; 136 return (s); 137} 138#define SCHED_SLP_TOPRI(slp, run) \ 139 ((((slp) > (run))? \ 140 sched_slp_ratio((slp), (run)): \ 141 SCHED_PRI_DYN_HALF + (SCHED_PRI_DYN_HALF - sched_slp_ratio((run), (slp))))+ \ 142 SCHED_PRI_NRESV / 2) 143/* 144 * These parameters and macros determine the size of the time slice that is 145 * granted to each thread. 146 * 147 * SLICE_MIN: Minimum time slice granted, in units of ticks. 148 * SLICE_MAX: Maximum time slice granted. 149 * SLICE_RANGE: Range of available time slices scaled by hz. 150 * SLICE_SCALE: The number slices granted per unit of pri or slp. 151 * PRI_TOSLICE: Compute a slice size that is proportional to the priority. 152 * SLP_TOSLICE: Compute a slice size that is inversely proportional to the 153 * amount of time slept. (smaller slices for interactive ksegs) 154 * PRI_COMP: This determines what fraction of the actual slice comes from 155 * the slice size computed from the priority. 156 * SLP_COMP: This determines what component of the actual slice comes from 157 * the slize size computed from the sleep time. 158 */ 159#define SCHED_SLICE_MIN (hz / 100) 160#define SCHED_SLICE_MAX (hz / 4) 161#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 162#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 163#define SCHED_PRI_TOSLICE(pri) \ 164 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((pri), SCHED_PRI_RANGE)) 165#define SCHED_SLP_TOSLICE(slp) \ 166 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((slp), SCHED_PRI_DYN)) 167#define SCHED_SLP_COMP(slice) (((slice) / 5) * 3) /* 60% */ 168#define SCHED_PRI_COMP(slice) (((slice) / 5) * 2) /* 40% */ 169 170/* 171 * This macro determines whether or not the kse belongs on the current or 172 * next run queue. 173 * 174 * XXX nice value should effect how interactive a kg is. 175 */ 176#define SCHED_CURR(kg) (((kg)->kg_slptime > (kg)->kg_runtime && \ 177 sched_slp_ratio((kg)->kg_slptime, (kg)->kg_runtime) > 4) || \ 178 (kg)->kg_pri_class != PRI_TIMESHARE) 179 180/* 181 * Cpu percentage computation macros and defines. 182 * 183 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 184 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 185 */ 186 187#define SCHED_CPU_TIME 60 188#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 189 190/* 191 * kseq - pair of runqs per processor 192 */ 193 194struct kseq { 195 struct runq ksq_runqs[2]; 196 struct runq *ksq_curr; 197 struct runq *ksq_next; 198 int ksq_load; /* Total runnable */ 199#ifdef SMP 200 unsigned int ksq_rslices; /* Slices on run queue */ 201 unsigned int ksq_bload; /* Threads waiting on IO */ 202#endif 203}; 204 205/* 206 * One kse queue per processor. 207 */ 208#ifdef SMP 209struct kseq kseq_cpu[MAXCPU]; 210#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) 211#define KSEQ_CPU(x) (&kseq_cpu[(x)]) 212#else 213struct kseq kseq_cpu; 214#define KSEQ_SELF() (&kseq_cpu) 215#define KSEQ_CPU(x) (&kseq_cpu) 216#endif 217 218static int sched_slice(struct ksegrp *kg); 219static int sched_priority(struct ksegrp *kg); 220void sched_pctcpu_update(struct kse *ke); 221int sched_pickcpu(void); 222 223/* Operations on per processor queues */ 224static struct kse * kseq_choose(struct kseq *kseq); 225static void kseq_setup(struct kseq *kseq); 226static __inline void kseq_add(struct kseq *kseq, struct kse *ke); 227static __inline void kseq_rem(struct kseq *kseq, struct kse *ke); 228#ifdef SMP 229static __inline void kseq_sleep(struct kseq *kseq, struct kse *ke); 230static __inline void kseq_wakeup(struct kseq *kseq, struct kse *ke); 231struct kseq * kseq_load_highest(void); 232#endif 233 234static __inline void 235kseq_add(struct kseq *kseq, struct kse *ke) 236{ 237 runq_add(ke->ke_runq, ke); 238 kseq->ksq_load++; 239#ifdef SMP 240 kseq->ksq_rslices += ke->ke_slice; 241#endif 242} 243static __inline void 244kseq_rem(struct kseq *kseq, struct kse *ke) 245{ 246 kseq->ksq_load--; 247 runq_remove(ke->ke_runq, ke); 248#ifdef SMP 249 kseq->ksq_rslices -= ke->ke_slice; 250#endif 251} 252 253#ifdef SMP 254static __inline void 255kseq_sleep(struct kseq *kseq, struct kse *ke) 256{ 257 kseq->ksq_bload++; 258} 259 260static __inline void 261kseq_wakeup(struct kseq *kseq, struct kse *ke) 262{ 263 kseq->ksq_bload--; 264} 265 266struct kseq * 267kseq_load_highest(void) 268{ 269 struct kseq *kseq; 270 int load; 271 int cpu; 272 int i; 273 274 cpu = 0; 275 load = 0; 276 277 for (i = 0; i < mp_maxid; i++) { 278 if (CPU_ABSENT(i)) 279 continue; 280 kseq = KSEQ_CPU(i); 281 if (kseq->ksq_load > load) { 282 load = kseq->ksq_load; 283 cpu = i; 284 } 285 } 286 if (load) 287 return (KSEQ_CPU(cpu)); 288 289 return (NULL); 290} 291#endif 292 293struct kse * 294kseq_choose(struct kseq *kseq) 295{ 296 struct kse *ke; 297 struct runq *swap; 298 299 if ((ke = runq_choose(kseq->ksq_curr)) == NULL) { 300 swap = kseq->ksq_curr; 301 kseq->ksq_curr = kseq->ksq_next; 302 kseq->ksq_next = swap; 303 ke = runq_choose(kseq->ksq_curr); 304 } 305 306 return (ke); 307} 308 309 310static void 311kseq_setup(struct kseq *kseq) 312{ 313 kseq->ksq_curr = &kseq->ksq_runqs[0]; 314 kseq->ksq_next = &kseq->ksq_runqs[1]; 315 runq_init(kseq->ksq_curr); 316 runq_init(kseq->ksq_next); 317 kseq->ksq_load = 0; 318#ifdef SMP 319 kseq->ksq_rslices = 0; 320 kseq->ksq_bload = 0; 321#endif 322} 323 324static void 325sched_setup(void *dummy) 326{ 327 int i; 328 329 mtx_lock_spin(&sched_lock); 330 /* init kseqs */ 331 for (i = 0; i < MAXCPU; i++) 332 kseq_setup(KSEQ_CPU(i)); 333 mtx_unlock_spin(&sched_lock); 334} 335 336/* 337 * Scale the scheduling priority according to the "interactivity" of this 338 * process. 339 */ 340static int 341sched_priority(struct ksegrp *kg) 342{ 343 int pri; 344 345 if (kg->kg_pri_class != PRI_TIMESHARE) 346 return (kg->kg_user_pri); 347 348 pri = SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime); 349 CTR2(KTR_RUNQ, "sched_priority: slptime: %d\tpri: %d", 350 kg->kg_slptime, pri); 351 352 pri += PRI_MIN_TIMESHARE; 353 pri += kg->kg_nice; 354 355 if (pri > PRI_MAX_TIMESHARE) 356 pri = PRI_MAX_TIMESHARE; 357 else if (pri < PRI_MIN_TIMESHARE) 358 pri = PRI_MIN_TIMESHARE; 359 360 kg->kg_user_pri = pri; 361 362 return (kg->kg_user_pri); 363} 364 365/* 366 * Calculate a time slice based on the process priority. 367 */ 368static int 369sched_slice(struct ksegrp *kg) 370{ 371 int pslice; 372 int sslice; 373 int slice; 374 int pri; 375 376 pri = kg->kg_user_pri; 377 pri -= PRI_MIN_TIMESHARE; 378 pslice = SCHED_PRI_TOSLICE(pri); 379 sslice = SCHED_PRI_TOSLICE(SCHED_SLP_TOPRI(kg->kg_slptime, kg->kg_runtime)); 380/* 381SCHED_SLP_TOSLICE(SCHED_SLP_RATIO( 382 kg->kg_slptime, kg->kg_runtime)); 383*/ 384 slice = SCHED_SLP_COMP(sslice) + SCHED_PRI_COMP(pslice); 385 386 CTR4(KTR_RUNQ, 387 "sched_slice: pri: %d\tsslice: %d\tpslice: %d\tslice: %d", 388 pri, sslice, pslice, slice); 389 390 if (slice < SCHED_SLICE_MIN) 391 slice = SCHED_SLICE_MIN; 392 else if (slice > SCHED_SLICE_MAX) 393 slice = SCHED_SLICE_MAX; 394 395 /* 396 * Every time we grant a new slice check to see if we need to scale 397 * back the slp and run time in the kg. This will cause us to forget 398 * old interactivity while maintaining the current ratio. 399 */ 400 if ((kg->kg_runtime + kg->kg_slptime) > SCHED_SLP_RUN_MAX) { 401 kg->kg_runtime /= SCHED_SLP_RUN_THROTTLE; 402 kg->kg_slptime /= SCHED_SLP_RUN_THROTTLE; 403 } 404 405 return (slice); 406} 407 408int 409sched_rr_interval(void) 410{ 411 return (SCHED_SLICE_MAX); 412} 413 414void 415sched_pctcpu_update(struct kse *ke) 416{ 417 /* 418 * Adjust counters and watermark for pctcpu calc. 419 */ 420 ke->ke_ticks = (ke->ke_ticks / (ke->ke_ltick - ke->ke_ftick)) * 421 SCHED_CPU_TICKS; 422 ke->ke_ltick = ticks; 423 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 424} 425 426#ifdef SMP 427/* XXX Should be changed to kseq_load_lowest() */ 428int 429sched_pickcpu(void) 430{ 431 struct kseq *kseq; 432 int load; 433 int cpu; 434 int i; 435 436 if (!smp_started) 437 return (0); 438 439 load = 0; 440 cpu = 0; 441 442 for (i = 0; i < mp_maxid; i++) { 443 if (CPU_ABSENT(i)) 444 continue; 445 kseq = KSEQ_CPU(i); 446 if (kseq->ksq_load < load) { 447 cpu = i; 448 load = kseq->ksq_load; 449 } 450 } 451 452 CTR1(KTR_RUNQ, "sched_pickcpu: %d", cpu); 453 return (cpu); 454} 455#else 456int 457sched_pickcpu(void) 458{ 459 return (0); 460} 461#endif 462 463void 464sched_prio(struct thread *td, u_char prio) 465{ 466 struct kse *ke; 467 struct runq *rq; 468 469 mtx_assert(&sched_lock, MA_OWNED); 470 ke = td->td_kse; 471 td->td_priority = prio; 472 473 if (TD_ON_RUNQ(td)) { 474 rq = ke->ke_runq; 475 476 runq_remove(rq, ke); 477 runq_add(rq, ke); 478 } 479} 480 481void 482sched_switchout(struct thread *td) 483{ 484 struct kse *ke; 485 486 mtx_assert(&sched_lock, MA_OWNED); 487 488 ke = td->td_kse; 489 490 td->td_last_kse = ke; 491 td->td_lastcpu = ke->ke_oncpu; 492 ke->ke_oncpu = NOCPU; 493 ke->ke_flags &= ~KEF_NEEDRESCHED; 494 495 if (TD_IS_RUNNING(td)) { 496 setrunqueue(td); 497 return; 498 } else 499 td->td_kse->ke_runq = NULL; 500 501 /* 502 * We will not be on the run queue. So we must be 503 * sleeping or similar. 504 */ 505 if (td->td_proc->p_flag & P_KSES) 506 kse_reassign(ke); 507} 508 509void 510sched_switchin(struct thread *td) 511{ 512 /* struct kse *ke = td->td_kse; */ 513 mtx_assert(&sched_lock, MA_OWNED); 514 515 td->td_kse->ke_oncpu = PCPU_GET(cpuid); 516#if SCHED_STRICT_RESCHED 517 if (td->td_ksegrp->kg_pri_class == PRI_TIMESHARE && 518 td->td_priority != td->td_ksegrp->kg_user_pri) 519 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 520#endif 521} 522 523void 524sched_nice(struct ksegrp *kg, int nice) 525{ 526 struct thread *td; 527 528 kg->kg_nice = nice; 529 sched_priority(kg); 530 FOREACH_THREAD_IN_GROUP(kg, td) { 531 td->td_kse->ke_flags |= KEF_NEEDRESCHED; 532 } 533} 534 535void 536sched_sleep(struct thread *td, u_char prio) 537{ 538 mtx_assert(&sched_lock, MA_OWNED); 539 540 td->td_slptime = ticks; 541 td->td_priority = prio; 542 543 /* 544 * If this is an interactive task clear its queue so it moves back 545 * on to curr when it wakes up. Otherwise let it stay on the queue 546 * that it was assigned to. 547 */ 548 if (SCHED_CURR(td->td_kse->ke_ksegrp)) 549 td->td_kse->ke_runq = NULL; 550#ifdef SMP 551 if (td->td_priority < PZERO) { 552 kseq_sleep(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 553 td->td_schedflag |= TD_SCHED_BLOAD; 554 } 555#endif 556} 557 558void 559sched_wakeup(struct thread *td) 560{ 561 struct ksegrp *kg; 562 563 mtx_assert(&sched_lock, MA_OWNED); 564 565 /* 566 * Let the kseg know how long we slept for. This is because process 567 * interactivity behavior is modeled in the kseg. 568 */ 569 kg = td->td_ksegrp; 570 571 if (td->td_slptime) { 572 kg->kg_slptime += (ticks - td->td_slptime) * 1024; 573 td->td_priority = sched_priority(kg); 574 } 575 td->td_slptime = 0; 576#ifdef SMP 577 if (td->td_priority < PZERO && td->td_schedflag & TD_SCHED_BLOAD) { 578 kseq_wakeup(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse); 579 td->td_schedflag &= ~TD_SCHED_BLOAD; 580 } 581#endif 582 setrunqueue(td); 583#if SCHED_STRICT_RESCHED 584 if (td->td_priority < curthread->td_priority) 585 curthread->td_kse->ke_flags |= KEF_NEEDRESCHED; 586#endif 587} 588 589/* 590 * Penalize the parent for creating a new child and initialize the child's 591 * priority. 592 */ 593void 594sched_fork(struct ksegrp *kg, struct ksegrp *child) 595{ 596 struct kse *ckse; 597 struct kse *pkse; 598 599 mtx_assert(&sched_lock, MA_OWNED); 600 ckse = FIRST_KSE_IN_KSEGRP(child); 601 pkse = FIRST_KSE_IN_KSEGRP(kg); 602 603 /* XXX Need something better here */ 604 if (kg->kg_slptime > kg->kg_runtime) { 605 child->kg_slptime = SCHED_PRI_DYN; 606 child->kg_runtime = kg->kg_slptime / SCHED_PRI_DYN; 607 } else { 608 child->kg_runtime = SCHED_PRI_DYN; 609 child->kg_slptime = kg->kg_runtime / SCHED_PRI_DYN; 610 } 611#if 0 612 child->kg_slptime = kg->kg_slptime; 613 child->kg_runtime = kg->kg_runtime; 614#endif 615 child->kg_user_pri = kg->kg_user_pri; 616 617#if 0 618 if (pkse->ke_cpu != PCPU_GET(cpuid)) { 619 printf("pkse->ke_cpu = %d\n", pkse->ke_cpu); 620 printf("cpuid = %d", PCPU_GET(cpuid)); 621 Debugger("stop"); 622 } 623#endif 624 625 ckse->ke_slice = pkse->ke_slice; 626 ckse->ke_cpu = pkse->ke_cpu; /* sched_pickcpu(); */ 627 ckse->ke_runq = NULL; 628 /* 629 * Claim that we've been running for one second for statistical 630 * purposes. 631 */ 632 ckse->ke_ticks = 0; 633 ckse->ke_ltick = ticks; 634 ckse->ke_ftick = ticks - hz; 635} 636 637/* 638 * Return some of the child's priority and interactivity to the parent. 639 */ 640void 641sched_exit(struct ksegrp *kg, struct ksegrp *child) 642{ 643 /* XXX Need something better here */ 644 mtx_assert(&sched_lock, MA_OWNED); 645 kg->kg_slptime = child->kg_slptime; 646 kg->kg_runtime = child->kg_runtime; 647 sched_priority(kg); 648} 649 650void 651sched_clock(struct thread *td) 652{ 653 struct kse *ke; 654#if SCHED_STRICT_RESCHED 655 struct kse *nke; 656 struct kseq *kseq; 657#endif 658 struct ksegrp *kg; 659 660 661 ke = td->td_kse; 662 kg = td->td_ksegrp; 663 664 mtx_assert(&sched_lock, MA_OWNED); 665 KASSERT((td != NULL), ("schedclock: null thread pointer")); 666 667 /* Adjust ticks for pctcpu */ 668 ke->ke_ticks += 10000; 669 ke->ke_ltick = ticks; 670 /* Go up to one second beyond our max and then trim back down */ 671 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 672 sched_pctcpu_update(ke); 673 674 if (td->td_kse->ke_flags & KEF_IDLEKSE) 675 return; 676 677 /* 678 * Check for a higher priority task on the run queue. This can happen 679 * on SMP if another processor woke up a process on our runq. 680 */ 681#if SCHED_STRICT_RESCHED 682 kseq = KSEQ_SELF(); 683 nke = runq_choose(kseq->ksq_curr); 684 685 if (nke && nke->ke_thread && 686 nke->ke_thread->td_priority < td->td_priority) 687 ke->ke_flags |= KEF_NEEDRESCHED; 688#endif 689 /* 690 * We used a tick charge it to the ksegrp so that we can compute our 691 * "interactivity". 692 */ 693 kg->kg_runtime += 1024; 694 695 /* 696 * We used up one time slice. 697 */ 698 ke->ke_slice--; 699 /* 700 * We're out of time, recompute priorities and requeue 701 */ 702 if (ke->ke_slice == 0) { 703 td->td_priority = sched_priority(kg); 704 ke->ke_slice = sched_slice(kg); 705 ke->ke_flags |= KEF_NEEDRESCHED; 706 ke->ke_runq = NULL; 707 } 708} 709 710int 711sched_runnable(void) 712{ 713 struct kseq *kseq; 714 715 kseq = KSEQ_SELF(); 716 717 if (kseq->ksq_load) 718 return (1); 719#ifdef SMP 720 /* 721 * For SMP we may steal other processor's KSEs. Just search until we 722 * verify that at least on other cpu has a runnable task. 723 */ 724 if (smp_started) { 725 int i; 726 727#if 0 728 if (kseq->ksq_bload) 729 return (0); 730#endif 731 732 for (i = 0; i < mp_maxid; i++) { 733 if (CPU_ABSENT(i)) 734 continue; 735 kseq = KSEQ_CPU(i); 736 if (kseq->ksq_load) 737 return (1); 738 } 739 } 740#endif 741 return (0); 742} 743 744void 745sched_userret(struct thread *td) 746{ 747 struct ksegrp *kg; 748 749 kg = td->td_ksegrp; 750 751 if (td->td_priority != kg->kg_user_pri) { 752 mtx_lock_spin(&sched_lock); 753 td->td_priority = kg->kg_user_pri; 754 mtx_unlock_spin(&sched_lock); 755 } 756} 757 758struct kse * 759sched_choose(void) 760{ 761 struct kseq *kseq; 762 struct kse *ke; 763 764 kseq = KSEQ_SELF(); 765 ke = kseq_choose(kseq); 766 767 if (ke) { 768 ke->ke_state = KES_THREAD; 769 kseq_rem(kseq, ke); 770 } 771 772#ifdef SMP 773 if (ke == NULL && smp_started) { 774#if 0 775 if (kseq->ksq_bload) 776 return (NULL); 777#endif 778 /* 779 * Find the cpu with the highest load and steal one proc. 780 */ 781 kseq = kseq_load_highest(); 782 if (kseq == NULL) 783 return (NULL); 784 ke = kseq_choose(kseq); 785 kseq_rem(kseq, ke); 786 787 ke->ke_state = KES_THREAD; 788 ke->ke_runq = NULL; 789 ke->ke_cpu = PCPU_GET(cpuid); 790 } 791#endif 792 return (ke); 793} 794 795void 796sched_add(struct kse *ke) 797{ 798 struct kseq *kseq; 799 800 mtx_assert(&sched_lock, MA_OWNED); 801 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 802 KASSERT((ke->ke_thread->td_kse != NULL), 803 ("sched_add: No KSE on thread")); 804 KASSERT(ke->ke_state != KES_ONRUNQ, 805 ("sched_add: kse %p (%s) already in run queue", ke, 806 ke->ke_proc->p_comm)); 807 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 808 ("sched_add: process swapped out")); 809 810 kseq = KSEQ_CPU(ke->ke_cpu); 811 812 if (ke->ke_runq == NULL) { 813 if (SCHED_CURR(ke->ke_ksegrp)) 814 ke->ke_runq = kseq->ksq_curr; 815 else 816 ke->ke_runq = kseq->ksq_next; 817 } 818 ke->ke_ksegrp->kg_runq_kses++; 819 ke->ke_state = KES_ONRUNQ; 820 821 kseq_add(kseq, ke); 822} 823 824void 825sched_rem(struct kse *ke) 826{ 827 mtx_assert(&sched_lock, MA_OWNED); 828 /* KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); */ 829 830 ke->ke_runq = NULL; 831 ke->ke_state = KES_THREAD; 832 ke->ke_ksegrp->kg_runq_kses--; 833 834 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 835} 836 837fixpt_t 838sched_pctcpu(struct kse *ke) 839{ 840 fixpt_t pctcpu; 841 int realstathz; 842 843 pctcpu = 0; 844 realstathz = stathz ? stathz : hz; 845 846 if (ke->ke_ticks) { 847 int rtick; 848 849 /* Update to account for time potentially spent sleeping */ 850 ke->ke_ltick = ticks; 851 sched_pctcpu_update(ke); 852 853 /* How many rtick per second ? */ 854 rtick = ke->ke_ticks / (SCHED_CPU_TIME * 10000); 855 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 856 } 857 858 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 859 860 return (pctcpu); 861} 862 863int 864sched_sizeof_kse(void) 865{ 866 return (sizeof(struct kse) + sizeof(struct ke_sched)); 867} 868 869int 870sched_sizeof_ksegrp(void) 871{ 872 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 873} 874 875int 876sched_sizeof_proc(void) 877{ 878 return (sizeof(struct proc)); 879} 880 881int 882sched_sizeof_thread(void) 883{ 884 return (sizeof(struct thread) + sizeof(struct td_sched)); 885} 886