sched_ule.c revision 122094
1/*- 2 * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/kern/sched_ule.c 122094 2003-11-05 05:30:12Z jeff $"); 29 30#include <sys/param.h> 31#include <sys/systm.h> 32#include <sys/kernel.h> 33#include <sys/ktr.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/proc.h> 37#include <sys/resource.h> 38#include <sys/resourcevar.h> 39#include <sys/sched.h> 40#include <sys/smp.h> 41#include <sys/sx.h> 42#include <sys/sysctl.h> 43#include <sys/sysproto.h> 44#include <sys/vmmeter.h> 45#ifdef DDB 46#include <ddb/ddb.h> 47#endif 48#ifdef KTRACE 49#include <sys/uio.h> 50#include <sys/ktrace.h> 51#endif 52 53#include <machine/cpu.h> 54#include <machine/smp.h> 55 56#define KTR_ULE KTR_NFS 57 58/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 59/* XXX This is bogus compatability crap for ps */ 60static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 61SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); 62 63static void sched_setup(void *dummy); 64SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL) 65 66static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "SCHED"); 67 68static int sched_strict; 69SYSCTL_INT(_kern_sched, OID_AUTO, strict, CTLFLAG_RD, &sched_strict, 0, ""); 70 71static int slice_min = 1; 72SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, ""); 73 74static int slice_max = 10; 75SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, ""); 76 77int realstathz; 78int tickincr = 1; 79 80#ifdef SMP 81/* Callout to handle load balancing SMP systems. */ 82static struct callout kseq_lb_callout; 83#endif 84 85/* 86 * These datastructures are allocated within their parent datastructure but 87 * are scheduler specific. 88 */ 89 90struct ke_sched { 91 int ske_slice; 92 struct runq *ske_runq; 93 /* The following variables are only used for pctcpu calculation */ 94 int ske_ltick; /* Last tick that we were running on */ 95 int ske_ftick; /* First tick that we were running on */ 96 int ske_ticks; /* Tick count */ 97 /* CPU that we have affinity for. */ 98 u_char ske_cpu; 99}; 100#define ke_slice ke_sched->ske_slice 101#define ke_runq ke_sched->ske_runq 102#define ke_ltick ke_sched->ske_ltick 103#define ke_ftick ke_sched->ske_ftick 104#define ke_ticks ke_sched->ske_ticks 105#define ke_cpu ke_sched->ske_cpu 106#define ke_assign ke_procq.tqe_next 107 108#define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ 109#define KEF_PINNED KEF_SCHED1 /* KSE is temporarily bound. */ 110#define KEF_BOUND KEF_SCHED2 /* KSE can not migrate. */ 111 112struct kg_sched { 113 int skg_slptime; /* Number of ticks we vol. slept */ 114 int skg_runtime; /* Number of ticks we were running */ 115}; 116#define kg_slptime kg_sched->skg_slptime 117#define kg_runtime kg_sched->skg_runtime 118 119struct td_sched { 120 int std_slptime; 121}; 122#define td_slptime td_sched->std_slptime 123 124struct td_sched td_sched; 125struct ke_sched ke_sched; 126struct kg_sched kg_sched; 127 128struct ke_sched *kse0_sched = &ke_sched; 129struct kg_sched *ksegrp0_sched = &kg_sched; 130struct p_sched *proc0_sched = NULL; 131struct td_sched *thread0_sched = &td_sched; 132 133/* 134 * The priority is primarily determined by the interactivity score. Thus, we 135 * give lower(better) priorities to kse groups that use less CPU. The nice 136 * value is then directly added to this to allow nice to have some effect 137 * on latency. 138 * 139 * PRI_RANGE: Total priority range for timeshare threads. 140 * PRI_NRESV: Number of nice values. 141 * PRI_BASE: The start of the dynamic range. 142 */ 143#define SCHED_PRI_RANGE (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1) 144#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) + 1) 145#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2) 146#define SCHED_PRI_BASE (PRI_MIN_TIMESHARE) 147#define SCHED_PRI_INTERACT(score) \ 148 ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX) 149 150/* 151 * These determine the interactivity of a process. 152 * 153 * SLP_RUN_MAX: Maximum amount of sleep time + run time we'll accumulate 154 * before throttling back. 155 * SLP_RUN_FORK: Maximum slp+run time to inherit at fork time. 156 * INTERACT_MAX: Maximum interactivity value. Smaller is better. 157 * INTERACT_THRESH: Threshhold for placement on the current runq. 158 */ 159#define SCHED_SLP_RUN_MAX ((hz * 5) << 10) 160#define SCHED_SLP_RUN_FORK ((hz / 2) << 10) 161#define SCHED_INTERACT_MAX (100) 162#define SCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) 163#define SCHED_INTERACT_THRESH (30) 164 165/* 166 * These parameters and macros determine the size of the time slice that is 167 * granted to each thread. 168 * 169 * SLICE_MIN: Minimum time slice granted, in units of ticks. 170 * SLICE_MAX: Maximum time slice granted. 171 * SLICE_RANGE: Range of available time slices scaled by hz. 172 * SLICE_SCALE: The number slices granted per val in the range of [0, max]. 173 * SLICE_NICE: Determine the amount of slice granted to a scaled nice. 174 * SLICE_NTHRESH: The nice cutoff point for slice assignment. 175 */ 176#define SCHED_SLICE_MIN (slice_min) 177#define SCHED_SLICE_MAX (slice_max) 178#define SCHED_SLICE_NTHRESH (SCHED_PRI_NHALF - 1) 179#define SCHED_SLICE_RANGE (SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1) 180#define SCHED_SLICE_SCALE(val, max) (((val) * SCHED_SLICE_RANGE) / (max)) 181#define SCHED_SLICE_NICE(nice) \ 182 (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH)) 183 184/* 185 * This macro determines whether or not the kse belongs on the current or 186 * next run queue. 187 */ 188#define SCHED_INTERACTIVE(kg) \ 189 (sched_interact_score(kg) < SCHED_INTERACT_THRESH) 190#define SCHED_CURR(kg, ke) \ 191 (ke->ke_thread->td_priority != kg->kg_user_pri || \ 192 SCHED_INTERACTIVE(kg)) 193 194/* 195 * Cpu percentage computation macros and defines. 196 * 197 * SCHED_CPU_TIME: Number of seconds to average the cpu usage across. 198 * SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across. 199 */ 200 201#define SCHED_CPU_TIME 10 202#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME) 203 204/* 205 * kseq - per processor runqs and statistics. 206 */ 207 208#define KSEQ_NCLASS (PRI_IDLE + 1) /* Number of run classes. */ 209 210struct kseq { 211 struct runq ksq_idle; /* Queue of IDLE threads. */ 212 struct runq ksq_timeshare[2]; /* Run queues for !IDLE. */ 213 struct runq *ksq_next; /* Next timeshare queue. */ 214 struct runq *ksq_curr; /* Current queue. */ 215 int ksq_load_timeshare; /* Load for timeshare. */ 216 int ksq_load; /* Aggregate load. */ 217 short ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */ 218 short ksq_nicemin; /* Least nice. */ 219#ifdef SMP 220 int ksq_load_transferable; /* kses that may be migrated. */ 221 int ksq_idled; 222 unsigned int ksq_rslices; /* Slices on run queue */ 223 int ksq_cpus; /* Count of CPUs in this kseq. */ 224 struct kse *ksq_assigned; /* KSEs assigned by another CPU. */ 225#endif 226}; 227 228/* 229 * One kse queue per processor. 230 */ 231#ifdef SMP 232static int kseq_idle; 233static struct kseq kseq_cpu[MAXCPU]; 234static struct kseq *kseq_idmap[MAXCPU]; 235#define KSEQ_SELF() (kseq_idmap[PCPU_GET(cpuid)]) 236#define KSEQ_CPU(x) (kseq_idmap[(x)]) 237#else 238static struct kseq kseq_cpu; 239#define KSEQ_SELF() (&kseq_cpu) 240#define KSEQ_CPU(x) (&kseq_cpu) 241#endif 242 243static void sched_slice(struct kse *ke); 244static void sched_priority(struct ksegrp *kg); 245static int sched_interact_score(struct ksegrp *kg); 246static void sched_interact_update(struct ksegrp *kg); 247static void sched_interact_fork(struct ksegrp *kg); 248static void sched_pctcpu_update(struct kse *ke); 249 250/* Operations on per processor queues */ 251static struct kse * kseq_choose(struct kseq *kseq); 252static void kseq_setup(struct kseq *kseq); 253static void kseq_add(struct kseq *kseq, struct kse *ke); 254static void kseq_rem(struct kseq *kseq, struct kse *ke); 255static void kseq_nice_add(struct kseq *kseq, int nice); 256static void kseq_nice_rem(struct kseq *kseq, int nice); 257void kseq_print(int cpu); 258#ifdef SMP 259#if 0 260static int sched_pickcpu(void); 261#endif 262static struct kse *runq_steal(struct runq *rq); 263static void kseq_balance(void *arg); 264static void kseq_move(struct kseq *from, int cpu); 265static __inline void kseq_setidle(struct kseq *kseq); 266static void kseq_notify(struct kse *ke, int cpu); 267static void kseq_assign(struct kseq *); 268static struct kse *kseq_steal(struct kseq *kseq); 269#define KSE_CAN_MIGRATE(ke, class) \ 270 ((class) != PRI_ITHD && ((ke)->ke_flags & (KEF_BOUND|KEF_PINNED)) == 0) 271#endif 272 273void 274kseq_print(int cpu) 275{ 276 struct kseq *kseq; 277 int i; 278 279 kseq = KSEQ_CPU(cpu); 280 281 printf("kseq:\n"); 282 printf("\tload: %d\n", kseq->ksq_load); 283 printf("\tload REALTIME: %d\n", kseq->ksq_load_timeshare); 284#ifdef SMP 285 printf("\tload transferable: %d\n", kseq->ksq_load_transferable); 286#endif 287 printf("\tnicemin:\t%d\n", kseq->ksq_nicemin); 288 printf("\tnice counts:\n"); 289 for (i = 0; i < SCHED_PRI_NRESV; i++) 290 if (kseq->ksq_nice[i]) 291 printf("\t\t%d = %d\n", 292 i - SCHED_PRI_NHALF, kseq->ksq_nice[i]); 293} 294 295static void 296kseq_add(struct kseq *kseq, struct kse *ke) 297{ 298 int class; 299 mtx_assert(&sched_lock, MA_OWNED); 300 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 301 if (class == PRI_TIMESHARE) 302 kseq->ksq_load_timeshare++; 303#ifdef SMP 304 if (KSE_CAN_MIGRATE(ke, class)) 305 kseq->ksq_load_transferable++; 306 kseq->ksq_rslices += ke->ke_slice; 307#endif 308 kseq->ksq_load++; 309 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 310 CTR6(KTR_ULE, "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", 311 ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority, 312 ke->ke_ksegrp->kg_nice, kseq->ksq_nicemin); 313 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 314 kseq_nice_add(kseq, ke->ke_ksegrp->kg_nice); 315} 316 317static void 318kseq_rem(struct kseq *kseq, struct kse *ke) 319{ 320 int class; 321 mtx_assert(&sched_lock, MA_OWNED); 322 class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); 323 if (class == PRI_TIMESHARE) 324 kseq->ksq_load_timeshare--; 325#ifdef SMP 326 if (KSE_CAN_MIGRATE(ke, class)) 327 kseq->ksq_load_transferable--; 328 kseq->ksq_rslices -= ke->ke_slice; 329#endif 330 kseq->ksq_load--; 331 ke->ke_runq = NULL; 332 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) 333 kseq_nice_rem(kseq, ke->ke_ksegrp->kg_nice); 334} 335 336static void 337kseq_nice_add(struct kseq *kseq, int nice) 338{ 339 mtx_assert(&sched_lock, MA_OWNED); 340 /* Normalize to zero. */ 341 kseq->ksq_nice[nice + SCHED_PRI_NHALF]++; 342 if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1) 343 kseq->ksq_nicemin = nice; 344} 345 346static void 347kseq_nice_rem(struct kseq *kseq, int nice) 348{ 349 int n; 350 351 mtx_assert(&sched_lock, MA_OWNED); 352 /* Normalize to zero. */ 353 n = nice + SCHED_PRI_NHALF; 354 kseq->ksq_nice[n]--; 355 KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count.")); 356 357 /* 358 * If this wasn't the smallest nice value or there are more in 359 * this bucket we can just return. Otherwise we have to recalculate 360 * the smallest nice. 361 */ 362 if (nice != kseq->ksq_nicemin || 363 kseq->ksq_nice[n] != 0 || 364 kseq->ksq_load_timeshare == 0) 365 return; 366 367 for (; n < SCHED_PRI_NRESV; n++) 368 if (kseq->ksq_nice[n]) { 369 kseq->ksq_nicemin = n - SCHED_PRI_NHALF; 370 return; 371 } 372} 373 374#ifdef SMP 375/* 376 * kseq_balance is a simple CPU load balancing algorithm. It operates by 377 * finding the least loaded and most loaded cpu and equalizing their load 378 * by migrating some processes. 379 * 380 * Dealing only with two CPUs at a time has two advantages. Firstly, most 381 * installations will only have 2 cpus. Secondly, load balancing too much at 382 * once can have an unpleasant effect on the system. The scheduler rarely has 383 * enough information to make perfect decisions. So this algorithm chooses 384 * algorithm simplicity and more gradual effects on load in larger systems. 385 * 386 * It could be improved by considering the priorities and slices assigned to 387 * each task prior to balancing them. There are many pathological cases with 388 * any approach and so the semi random algorithm below may work as well as any. 389 * 390 */ 391static void 392kseq_balance(void *arg) 393{ 394 struct kseq *kseq; 395 int high_load; 396 int low_load; 397 int high_cpu; 398 int low_cpu; 399 int move; 400 int diff; 401 int i; 402 403 high_cpu = 0; 404 low_cpu = 0; 405 high_load = 0; 406 low_load = -1; 407 408 mtx_lock_spin(&sched_lock); 409 if (smp_started == 0) 410 goto out; 411 412 for (i = 0; i < mp_maxid; i++) { 413 if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 414 continue; 415 kseq = KSEQ_CPU(i); 416 if (kseq->ksq_load > high_load) { 417 high_load = kseq->ksq_load; 418 high_cpu = i; 419 } 420 if (low_load == -1 || kseq->ksq_load < low_load) { 421 low_load = kseq->ksq_load; 422 low_cpu = i; 423 } 424 } 425 426 kseq = KSEQ_CPU(high_cpu); 427 428 high_load = kseq->ksq_load_transferable; 429 /* 430 * Nothing to do. 431 */ 432 if (high_load < kseq->ksq_cpus + 1) 433 goto out; 434 435 high_load -= kseq->ksq_cpus; 436 437 if (low_load >= high_load) 438 goto out; 439 440 diff = high_load - low_load; 441 move = diff / 2; 442 if (diff & 0x1) 443 move++; 444 445 for (i = 0; i < move; i++) 446 kseq_move(kseq, low_cpu); 447 448out: 449 mtx_unlock_spin(&sched_lock); 450 callout_reset(&kseq_lb_callout, hz, kseq_balance, NULL); 451 452 return; 453} 454 455static void 456kseq_move(struct kseq *from, int cpu) 457{ 458 struct kse *ke; 459 460 ke = kseq_steal(from); 461 runq_remove(ke->ke_runq, ke); 462 ke->ke_state = KES_THREAD; 463 kseq_rem(from, ke); 464 465 ke->ke_cpu = cpu; 466 kseq_notify(ke, cpu); 467} 468 469static __inline void 470kseq_setidle(struct kseq *kseq) 471{ 472 if (kseq->ksq_idled) 473 return; 474 kseq->ksq_idled = 1; 475 atomic_set_int(&kseq_idle, PCPU_GET(cpumask)); 476 return; 477} 478 479static void 480kseq_assign(struct kseq *kseq) 481{ 482 struct kse *nke; 483 struct kse *ke; 484 485 do { 486 ke = kseq->ksq_assigned; 487 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL)); 488 for (; ke != NULL; ke = nke) { 489 nke = ke->ke_assign; 490 ke->ke_flags &= ~KEF_ASSIGNED; 491 sched_add(ke->ke_thread); 492 } 493} 494 495static void 496kseq_notify(struct kse *ke, int cpu) 497{ 498 struct kseq *kseq; 499 struct thread *td; 500 struct pcpu *pcpu; 501 502 ke->ke_flags |= KEF_ASSIGNED; 503 504 kseq = KSEQ_CPU(cpu); 505 506 /* 507 * Place a KSE on another cpu's queue and force a resched. 508 */ 509 do { 510 ke->ke_assign = kseq->ksq_assigned; 511 } while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke)); 512 pcpu = pcpu_find(cpu); 513 td = pcpu->pc_curthread; 514 if (ke->ke_thread->td_priority < td->td_priority || 515 td == pcpu->pc_idlethread) { 516 td->td_flags |= TDF_NEEDRESCHED; 517 ipi_selected(1 << cpu, IPI_AST); 518 } 519} 520 521static struct kse * 522runq_steal(struct runq *rq) 523{ 524 struct rqhead *rqh; 525 struct rqbits *rqb; 526 struct kse *ke; 527 int word; 528 int bit; 529 530 mtx_assert(&sched_lock, MA_OWNED); 531 rqb = &rq->rq_status; 532 for (word = 0; word < RQB_LEN; word++) { 533 if (rqb->rqb_bits[word] == 0) 534 continue; 535 for (bit = 0; bit < RQB_BPW; bit++) { 536 if ((rqb->rqb_bits[word] & (1 << bit)) == 0) 537 continue; 538 rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)]; 539 TAILQ_FOREACH(ke, rqh, ke_procq) { 540 if (KSE_CAN_MIGRATE(ke, 541 PRI_BASE(ke->ke_ksegrp->kg_pri_class))) 542 return (ke); 543 } 544 } 545 } 546 return (NULL); 547} 548 549static struct kse * 550kseq_steal(struct kseq *kseq) 551{ 552 struct kse *ke; 553 554 if ((ke = runq_steal(kseq->ksq_curr)) != NULL) 555 return (ke); 556 if ((ke = runq_steal(kseq->ksq_next)) != NULL) 557 return (ke); 558 return (runq_steal(&kseq->ksq_idle)); 559} 560#endif /* SMP */ 561 562/* 563 * Pick the highest priority task we have and return it. 564 */ 565 566static struct kse * 567kseq_choose(struct kseq *kseq) 568{ 569 struct kse *ke; 570 struct runq *swap; 571 572 mtx_assert(&sched_lock, MA_OWNED); 573 swap = NULL; 574 575 for (;;) { 576 ke = runq_choose(kseq->ksq_curr); 577 if (ke == NULL) { 578 /* 579 * We already swaped once and didn't get anywhere. 580 */ 581 if (swap) 582 break; 583 swap = kseq->ksq_curr; 584 kseq->ksq_curr = kseq->ksq_next; 585 kseq->ksq_next = swap; 586 continue; 587 } 588 /* 589 * If we encounter a slice of 0 the kse is in a 590 * TIMESHARE kse group and its nice was too far out 591 * of the range that receives slices. 592 */ 593 if (ke->ke_slice == 0) { 594 runq_remove(ke->ke_runq, ke); 595 sched_slice(ke); 596 ke->ke_runq = kseq->ksq_next; 597 runq_add(ke->ke_runq, ke); 598 continue; 599 } 600 return (ke); 601 } 602 603 return (runq_choose(&kseq->ksq_idle)); 604} 605 606static void 607kseq_setup(struct kseq *kseq) 608{ 609 runq_init(&kseq->ksq_timeshare[0]); 610 runq_init(&kseq->ksq_timeshare[1]); 611 runq_init(&kseq->ksq_idle); 612 kseq->ksq_curr = &kseq->ksq_timeshare[0]; 613 kseq->ksq_next = &kseq->ksq_timeshare[1]; 614 kseq->ksq_load = 0; 615 kseq->ksq_load_timeshare = 0; 616#ifdef SMP 617 kseq->ksq_load_transferable = 0; 618 kseq->ksq_rslices = 0; 619 kseq->ksq_idled = 0; 620 kseq->ksq_assigned = NULL; 621#endif 622} 623 624static void 625sched_setup(void *dummy) 626{ 627#ifdef SMP 628 int i; 629#endif 630 631 slice_min = (hz/100); /* 10ms */ 632 slice_max = (hz/7); /* ~140ms */ 633 634#ifdef SMP 635 /* init kseqs */ 636 /* Create the idmap. */ 637#ifdef ULE_HTT_EXPERIMENTAL 638 if (smp_topology == NULL) { 639#else 640 if (1) { 641#endif 642 for (i = 0; i < MAXCPU; i++) { 643 kseq_setup(&kseq_cpu[i]); 644 kseq_idmap[i] = &kseq_cpu[i]; 645 kseq_cpu[i].ksq_cpus = 1; 646 } 647 } else { 648 int j; 649 650 for (i = 0; i < smp_topology->ct_count; i++) { 651 struct cpu_group *cg; 652 653 cg = &smp_topology->ct_group[i]; 654 kseq_setup(&kseq_cpu[i]); 655 656 for (j = 0; j < MAXCPU; j++) 657 if ((cg->cg_mask & (1 << j)) != 0) 658 kseq_idmap[j] = &kseq_cpu[i]; 659 kseq_cpu[i].ksq_cpus = cg->cg_count; 660 } 661 } 662 callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); 663 kseq_balance(NULL); 664#else 665 kseq_setup(KSEQ_SELF()); 666#endif 667 mtx_lock_spin(&sched_lock); 668 kseq_add(KSEQ_SELF(), &kse0); 669 mtx_unlock_spin(&sched_lock); 670} 671 672/* 673 * Scale the scheduling priority according to the "interactivity" of this 674 * process. 675 */ 676static void 677sched_priority(struct ksegrp *kg) 678{ 679 int pri; 680 681 if (kg->kg_pri_class != PRI_TIMESHARE) 682 return; 683 684 pri = SCHED_PRI_INTERACT(sched_interact_score(kg)); 685 pri += SCHED_PRI_BASE; 686 pri += kg->kg_nice; 687 688 if (pri > PRI_MAX_TIMESHARE) 689 pri = PRI_MAX_TIMESHARE; 690 else if (pri < PRI_MIN_TIMESHARE) 691 pri = PRI_MIN_TIMESHARE; 692 693 kg->kg_user_pri = pri; 694 695 return; 696} 697 698/* 699 * Calculate a time slice based on the properties of the kseg and the runq 700 * that we're on. This is only for PRI_TIMESHARE ksegrps. 701 */ 702static void 703sched_slice(struct kse *ke) 704{ 705 struct kseq *kseq; 706 struct ksegrp *kg; 707 708 kg = ke->ke_ksegrp; 709 kseq = KSEQ_CPU(ke->ke_cpu); 710 711 /* 712 * Rationale: 713 * KSEs in interactive ksegs get the minimum slice so that we 714 * quickly notice if it abuses its advantage. 715 * 716 * KSEs in non-interactive ksegs are assigned a slice that is 717 * based on the ksegs nice value relative to the least nice kseg 718 * on the run queue for this cpu. 719 * 720 * If the KSE is less nice than all others it gets the maximum 721 * slice and other KSEs will adjust their slice relative to 722 * this when they first expire. 723 * 724 * There is 20 point window that starts relative to the least 725 * nice kse on the run queue. Slice size is determined by 726 * the kse distance from the last nice ksegrp. 727 * 728 * If the kse is outside of the window it will get no slice 729 * and will be reevaluated each time it is selected on the 730 * run queue. The exception to this is nice 0 ksegs when 731 * a nice -20 is running. They are always granted a minimum 732 * slice. 733 */ 734 if (!SCHED_INTERACTIVE(kg)) { 735 int nice; 736 737 nice = kg->kg_nice + (0 - kseq->ksq_nicemin); 738 if (kseq->ksq_load_timeshare == 0 || 739 kg->kg_nice < kseq->ksq_nicemin) 740 ke->ke_slice = SCHED_SLICE_MAX; 741 else if (nice <= SCHED_SLICE_NTHRESH) 742 ke->ke_slice = SCHED_SLICE_NICE(nice); 743 else if (kg->kg_nice == 0) 744 ke->ke_slice = SCHED_SLICE_MIN; 745 else 746 ke->ke_slice = 0; 747 } else 748 ke->ke_slice = SCHED_SLICE_MIN; 749 750 CTR6(KTR_ULE, 751 "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)", 752 ke, ke->ke_slice, kg->kg_nice, kseq->ksq_nicemin, 753 kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg)); 754 755 return; 756} 757 758/* 759 * This routine enforces a maximum limit on the amount of scheduling history 760 * kept. It is called after either the slptime or runtime is adjusted. 761 * This routine will not operate correctly when slp or run times have been 762 * adjusted to more than double their maximum. 763 */ 764static void 765sched_interact_update(struct ksegrp *kg) 766{ 767 int sum; 768 769 sum = kg->kg_runtime + kg->kg_slptime; 770 if (sum < SCHED_SLP_RUN_MAX) 771 return; 772 /* 773 * If we have exceeded by more than 1/5th then the algorithm below 774 * will not bring us back into range. Dividing by two here forces 775 * us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX] 776 */ 777 if (sum > (SCHED_INTERACT_MAX / 5) * 6) { 778 kg->kg_runtime /= 2; 779 kg->kg_slptime /= 2; 780 return; 781 } 782 kg->kg_runtime = (kg->kg_runtime / 5) * 4; 783 kg->kg_slptime = (kg->kg_slptime / 5) * 4; 784} 785 786static void 787sched_interact_fork(struct ksegrp *kg) 788{ 789 int ratio; 790 int sum; 791 792 sum = kg->kg_runtime + kg->kg_slptime; 793 if (sum > SCHED_SLP_RUN_FORK) { 794 ratio = sum / SCHED_SLP_RUN_FORK; 795 kg->kg_runtime /= ratio; 796 kg->kg_slptime /= ratio; 797 } 798} 799 800static int 801sched_interact_score(struct ksegrp *kg) 802{ 803 int div; 804 805 if (kg->kg_runtime > kg->kg_slptime) { 806 div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF); 807 return (SCHED_INTERACT_HALF + 808 (SCHED_INTERACT_HALF - (kg->kg_slptime / div))); 809 } if (kg->kg_slptime > kg->kg_runtime) { 810 div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF); 811 return (kg->kg_runtime / div); 812 } 813 814 /* 815 * This can happen if slptime and runtime are 0. 816 */ 817 return (0); 818 819} 820 821/* 822 * This is only somewhat accurate since given many processes of the same 823 * priority they will switch when their slices run out, which will be 824 * at most SCHED_SLICE_MAX. 825 */ 826int 827sched_rr_interval(void) 828{ 829 return (SCHED_SLICE_MAX); 830} 831 832static void 833sched_pctcpu_update(struct kse *ke) 834{ 835 /* 836 * Adjust counters and watermark for pctcpu calc. 837 */ 838 if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) { 839 /* 840 * Shift the tick count out so that the divide doesn't 841 * round away our results. 842 */ 843 ke->ke_ticks <<= 10; 844 ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) * 845 SCHED_CPU_TICKS; 846 ke->ke_ticks >>= 10; 847 } else 848 ke->ke_ticks = 0; 849 ke->ke_ltick = ticks; 850 ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS; 851} 852 853#if 0 854/* XXX Should be changed to kseq_load_lowest() */ 855int 856sched_pickcpu(void) 857{ 858 struct kseq *kseq; 859 int load; 860 int cpu; 861 int i; 862 863 mtx_assert(&sched_lock, MA_OWNED); 864 if (!smp_started) 865 return (0); 866 867 load = 0; 868 cpu = 0; 869 870 for (i = 0; i < mp_maxid; i++) { 871 if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) 872 continue; 873 kseq = KSEQ_CPU(i); 874 if (kseq->ksq_load < load) { 875 cpu = i; 876 load = kseq->ksq_load; 877 } 878 } 879 880 CTR1(KTR_ULE, "sched_pickcpu: %d", cpu); 881 return (cpu); 882} 883#endif 884 885void 886sched_prio(struct thread *td, u_char prio) 887{ 888 struct kse *ke; 889 890 ke = td->td_kse; 891 mtx_assert(&sched_lock, MA_OWNED); 892 if (TD_ON_RUNQ(td)) { 893 /* 894 * If the priority has been elevated due to priority 895 * propagation, we may have to move ourselves to a new 896 * queue. We still call adjustrunqueue below in case kse 897 * needs to fix things up. 898 */ 899 if (prio < td->td_priority && ke && 900 (ke->ke_flags & KEF_ASSIGNED) == 0 && 901 ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) { 902 runq_remove(ke->ke_runq, ke); 903 ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; 904 runq_add(ke->ke_runq, ke); 905 } 906 adjustrunqueue(td, prio); 907 } else 908 td->td_priority = prio; 909} 910 911void 912sched_switch(struct thread *td) 913{ 914 struct thread *newtd; 915 struct kse *ke; 916 917 mtx_assert(&sched_lock, MA_OWNED); 918 919 ke = td->td_kse; 920 921 td->td_last_kse = ke; 922 td->td_lastcpu = td->td_oncpu; 923 td->td_oncpu = NOCPU; 924 td->td_flags &= ~TDF_NEEDRESCHED; 925 926 if (TD_IS_RUNNING(td)) { 927 if (td->td_proc->p_flag & P_SA) { 928 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 929 setrunqueue(td); 930 } else { 931 /* 932 * This queue is always correct except for idle threads 933 * which have a higher priority due to priority 934 * propagation. 935 */ 936 if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) { 937 if (td->td_priority < PRI_MIN_IDLE) 938 ke->ke_runq = KSEQ_SELF()->ksq_curr; 939 else 940 ke->ke_runq = &KSEQ_SELF()->ksq_idle; 941 } 942 runq_add(ke->ke_runq, ke); 943 /* setrunqueue(td); */ 944 } 945 } else { 946 if (ke->ke_runq) 947 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 948 /* 949 * We will not be on the run queue. So we must be 950 * sleeping or similar. 951 */ 952 if (td->td_proc->p_flag & P_SA) 953 kse_reassign(ke); 954 } 955 newtd = choosethread(); 956 if (td != newtd) 957 cpu_switch(td, newtd); 958 sched_lock.mtx_lock = (uintptr_t)td; 959 960 td->td_oncpu = PCPU_GET(cpuid); 961} 962 963void 964sched_nice(struct ksegrp *kg, int nice) 965{ 966 struct kse *ke; 967 struct thread *td; 968 struct kseq *kseq; 969 970 PROC_LOCK_ASSERT(kg->kg_proc, MA_OWNED); 971 mtx_assert(&sched_lock, MA_OWNED); 972 /* 973 * We need to adjust the nice counts for running KSEs. 974 */ 975 if (kg->kg_pri_class == PRI_TIMESHARE) 976 FOREACH_KSE_IN_GROUP(kg, ke) { 977 if (ke->ke_runq == NULL) 978 continue; 979 kseq = KSEQ_CPU(ke->ke_cpu); 980 kseq_nice_rem(kseq, kg->kg_nice); 981 kseq_nice_add(kseq, nice); 982 } 983 kg->kg_nice = nice; 984 sched_priority(kg); 985 FOREACH_THREAD_IN_GROUP(kg, td) 986 td->td_flags |= TDF_NEEDRESCHED; 987} 988 989void 990sched_sleep(struct thread *td, u_char prio) 991{ 992 mtx_assert(&sched_lock, MA_OWNED); 993 994 td->td_slptime = ticks; 995 td->td_priority = prio; 996 997 CTR2(KTR_ULE, "sleep kse %p (tick: %d)", 998 td->td_kse, td->td_slptime); 999} 1000 1001void 1002sched_wakeup(struct thread *td) 1003{ 1004 mtx_assert(&sched_lock, MA_OWNED); 1005 1006 /* 1007 * Let the kseg know how long we slept for. This is because process 1008 * interactivity behavior is modeled in the kseg. 1009 */ 1010 if (td->td_slptime) { 1011 struct ksegrp *kg; 1012 int hzticks; 1013 1014 kg = td->td_ksegrp; 1015 hzticks = (ticks - td->td_slptime) << 10; 1016 if (hzticks >= SCHED_SLP_RUN_MAX) { 1017 kg->kg_slptime = SCHED_SLP_RUN_MAX; 1018 kg->kg_runtime = 1; 1019 } else { 1020 kg->kg_slptime += hzticks; 1021 sched_interact_update(kg); 1022 } 1023 sched_priority(kg); 1024 if (td->td_kse) 1025 sched_slice(td->td_kse); 1026 CTR2(KTR_ULE, "wakeup kse %p (%d ticks)", 1027 td->td_kse, hzticks); 1028 td->td_slptime = 0; 1029 } 1030 setrunqueue(td); 1031} 1032 1033/* 1034 * Penalize the parent for creating a new child and initialize the child's 1035 * priority. 1036 */ 1037void 1038sched_fork(struct proc *p, struct proc *p1) 1039{ 1040 1041 mtx_assert(&sched_lock, MA_OWNED); 1042 1043 sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); 1044 sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); 1045 sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); 1046} 1047 1048void 1049sched_fork_kse(struct kse *ke, struct kse *child) 1050{ 1051 1052 child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ 1053 child->ke_cpu = ke->ke_cpu; /* sched_pickcpu(); */ 1054 child->ke_runq = NULL; 1055 1056 /* Grab our parents cpu estimation information. */ 1057 child->ke_ticks = ke->ke_ticks; 1058 child->ke_ltick = ke->ke_ltick; 1059 child->ke_ftick = ke->ke_ftick; 1060} 1061 1062void 1063sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1064{ 1065 PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); 1066 1067 child->kg_slptime = kg->kg_slptime; 1068 child->kg_runtime = kg->kg_runtime; 1069 child->kg_user_pri = kg->kg_user_pri; 1070 child->kg_nice = kg->kg_nice; 1071 sched_interact_fork(child); 1072 kg->kg_runtime += tickincr << 10; 1073 sched_interact_update(kg); 1074 1075 CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)", 1076 kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 1077 child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime); 1078} 1079 1080void 1081sched_fork_thread(struct thread *td, struct thread *child) 1082{ 1083} 1084 1085void 1086sched_class(struct ksegrp *kg, int class) 1087{ 1088 struct kseq *kseq; 1089 struct kse *ke; 1090 int nclass; 1091 int oclass; 1092 1093 mtx_assert(&sched_lock, MA_OWNED); 1094 if (kg->kg_pri_class == class) 1095 return; 1096 1097 nclass = PRI_BASE(class); 1098 oclass = PRI_BASE(kg->kg_pri_class); 1099 FOREACH_KSE_IN_GROUP(kg, ke) { 1100 if (ke->ke_state != KES_ONRUNQ && 1101 ke->ke_state != KES_THREAD) 1102 continue; 1103 kseq = KSEQ_CPU(ke->ke_cpu); 1104 1105#ifdef SMP 1106 if (KSE_CAN_MIGRATE(ke, oclass)) 1107 kseq->ksq_load_transferable--; 1108 if (KSE_CAN_MIGRATE(ke, nclass)) 1109 kseq->ksq_load_transferable++; 1110#endif 1111 if (oclass == PRI_TIMESHARE) 1112 kseq->ksq_load_timeshare--; 1113 if (nclass == PRI_TIMESHARE) 1114 kseq->ksq_load_timeshare++; 1115 1116 if (kg->kg_pri_class == PRI_TIMESHARE) 1117 kseq_nice_rem(kseq, kg->kg_nice); 1118 else if (class == PRI_TIMESHARE) 1119 kseq_nice_add(kseq, kg->kg_nice); 1120 } 1121 1122 kg->kg_pri_class = class; 1123} 1124 1125/* 1126 * Return some of the child's priority and interactivity to the parent. 1127 */ 1128void 1129sched_exit(struct proc *p, struct proc *child) 1130{ 1131 mtx_assert(&sched_lock, MA_OWNED); 1132 sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); 1133 sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); 1134} 1135 1136void 1137sched_exit_kse(struct kse *ke, struct kse *child) 1138{ 1139 kseq_rem(KSEQ_CPU(child->ke_cpu), child); 1140} 1141 1142void 1143sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) 1144{ 1145 /* kg->kg_slptime += child->kg_slptime; */ 1146 kg->kg_runtime += child->kg_runtime; 1147 sched_interact_update(kg); 1148} 1149 1150void 1151sched_exit_thread(struct thread *td, struct thread *child) 1152{ 1153} 1154 1155void 1156sched_clock(struct thread *td) 1157{ 1158 struct kseq *kseq; 1159 struct ksegrp *kg; 1160 struct kse *ke; 1161 1162 /* 1163 * sched_setup() apparently happens prior to stathz being set. We 1164 * need to resolve the timers earlier in the boot so we can avoid 1165 * calculating this here. 1166 */ 1167 if (realstathz == 0) { 1168 realstathz = stathz ? stathz : hz; 1169 tickincr = hz / realstathz; 1170 /* 1171 * XXX This does not work for values of stathz that are much 1172 * larger than hz. 1173 */ 1174 if (tickincr == 0) 1175 tickincr = 1; 1176 } 1177 1178 ke = td->td_kse; 1179 kg = ke->ke_ksegrp; 1180 1181 mtx_assert(&sched_lock, MA_OWNED); 1182 KASSERT((td != NULL), ("schedclock: null thread pointer")); 1183 1184 /* Adjust ticks for pctcpu */ 1185 ke->ke_ticks++; 1186 ke->ke_ltick = ticks; 1187 1188 /* Go up to one second beyond our max and then trim back down */ 1189 if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick) 1190 sched_pctcpu_update(ke); 1191 1192 if (td->td_flags & TDF_IDLETD) 1193 return; 1194 1195 CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)", 1196 ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10); 1197 /* 1198 * We only do slicing code for TIMESHARE ksegrps. 1199 */ 1200 if (kg->kg_pri_class != PRI_TIMESHARE) 1201 return; 1202 /* 1203 * We used a tick charge it to the ksegrp so that we can compute our 1204 * interactivity. 1205 */ 1206 kg->kg_runtime += tickincr << 10; 1207 sched_interact_update(kg); 1208 1209 /* 1210 * We used up one time slice. 1211 */ 1212 ke->ke_slice--; 1213 kseq = KSEQ_SELF(); 1214#ifdef SMP 1215 kseq->ksq_rslices--; 1216#endif 1217 1218 if (ke->ke_slice > 0) 1219 return; 1220 /* 1221 * We're out of time, recompute priorities and requeue. 1222 */ 1223 kseq_rem(kseq, ke); 1224 sched_priority(kg); 1225 sched_slice(ke); 1226 if (SCHED_CURR(kg, ke)) 1227 ke->ke_runq = kseq->ksq_curr; 1228 else 1229 ke->ke_runq = kseq->ksq_next; 1230 kseq_add(kseq, ke); 1231 td->td_flags |= TDF_NEEDRESCHED; 1232} 1233 1234int 1235sched_runnable(void) 1236{ 1237 struct kseq *kseq; 1238 int load; 1239 1240 load = 1; 1241 1242 kseq = KSEQ_SELF(); 1243#ifdef SMP 1244 if (kseq->ksq_assigned) { 1245 mtx_lock_spin(&sched_lock); 1246 kseq_assign(kseq); 1247 mtx_unlock_spin(&sched_lock); 1248 } 1249#endif 1250 if ((curthread->td_flags & TDF_IDLETD) != 0) { 1251 if (kseq->ksq_load > 0) 1252 goto out; 1253 } else 1254 if (kseq->ksq_load - 1 > 0) 1255 goto out; 1256 load = 0; 1257out: 1258 return (load); 1259} 1260 1261void 1262sched_userret(struct thread *td) 1263{ 1264 struct ksegrp *kg; 1265 1266 kg = td->td_ksegrp; 1267 1268 if (td->td_priority != kg->kg_user_pri) { 1269 mtx_lock_spin(&sched_lock); 1270 td->td_priority = kg->kg_user_pri; 1271 mtx_unlock_spin(&sched_lock); 1272 } 1273} 1274 1275struct kse * 1276sched_choose(void) 1277{ 1278 struct kseq *kseq; 1279 struct kse *ke; 1280 1281 mtx_assert(&sched_lock, MA_OWNED); 1282 kseq = KSEQ_SELF(); 1283#ifdef SMP 1284 if (kseq->ksq_assigned) 1285 kseq_assign(kseq); 1286#endif 1287 ke = kseq_choose(kseq); 1288 if (ke) { 1289#ifdef SMP 1290 if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE) 1291 kseq_setidle(kseq); 1292#endif 1293 runq_remove(ke->ke_runq, ke); 1294 ke->ke_state = KES_THREAD; 1295 1296 if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) { 1297 CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)", 1298 ke, ke->ke_runq, ke->ke_slice, 1299 ke->ke_thread->td_priority); 1300 } 1301 return (ke); 1302 } 1303#ifdef SMP 1304 kseq_setidle(kseq); 1305#endif 1306 return (NULL); 1307} 1308 1309void 1310sched_add(struct thread *td) 1311{ 1312 struct kseq *kseq; 1313 struct ksegrp *kg; 1314 struct kse *ke; 1315 int class; 1316 1317 mtx_assert(&sched_lock, MA_OWNED); 1318 ke = td->td_kse; 1319 kg = td->td_ksegrp; 1320 if (ke->ke_flags & KEF_ASSIGNED) 1321 return; 1322 kseq = KSEQ_SELF(); 1323 KASSERT((ke->ke_thread != NULL), ("sched_add: No thread on KSE")); 1324 KASSERT((ke->ke_thread->td_kse != NULL), 1325 ("sched_add: No KSE on thread")); 1326 KASSERT(ke->ke_state != KES_ONRUNQ, 1327 ("sched_add: kse %p (%s) already in run queue", ke, 1328 ke->ke_proc->p_comm)); 1329 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 1330 ("sched_add: process swapped out")); 1331 KASSERT(ke->ke_runq == NULL, 1332 ("sched_add: KSE %p is still assigned to a run queue", ke)); 1333 1334 class = PRI_BASE(kg->kg_pri_class); 1335 switch (class) { 1336 case PRI_ITHD: 1337 case PRI_REALTIME: 1338 ke->ke_runq = kseq->ksq_curr; 1339 ke->ke_slice = SCHED_SLICE_MAX; 1340 ke->ke_cpu = PCPU_GET(cpuid); 1341 break; 1342 case PRI_TIMESHARE: 1343#ifdef SMP 1344 if (ke->ke_cpu != PCPU_GET(cpuid)) { 1345 kseq_notify(ke, ke->ke_cpu); 1346 return; 1347 } 1348#endif 1349 if (SCHED_CURR(kg, ke)) 1350 ke->ke_runq = kseq->ksq_curr; 1351 else 1352 ke->ke_runq = kseq->ksq_next; 1353 break; 1354 case PRI_IDLE: 1355#ifdef SMP 1356 if (ke->ke_cpu != PCPU_GET(cpuid)) { 1357 kseq_notify(ke, ke->ke_cpu); 1358 return; 1359 } 1360#endif 1361 /* 1362 * This is for priority prop. 1363 */ 1364 if (ke->ke_thread->td_priority < PRI_MIN_IDLE) 1365 ke->ke_runq = kseq->ksq_curr; 1366 else 1367 ke->ke_runq = &kseq->ksq_idle; 1368 ke->ke_slice = SCHED_SLICE_MIN; 1369 break; 1370 default: 1371 panic("Unknown pri class."); 1372 break; 1373 } 1374#ifdef SMP 1375 /* 1376 * If there are any idle processors, give them our extra load. 1377 */ 1378 if (kseq_idle && KSE_CAN_MIGRATE(ke, class) && 1379 kseq->ksq_load_transferable >= kseq->ksq_cpus) { 1380 int cpu; 1381 1382 /* 1383 * Multiple cpus could find this bit simultaneously but the 1384 * race shouldn't be terrible. 1385 */ 1386 cpu = ffs(kseq_idle); 1387 if (cpu) { 1388 cpu--; 1389 atomic_clear_int(&kseq_idle, 1 << cpu); 1390 ke->ke_cpu = cpu; 1391 ke->ke_runq = NULL; 1392 kseq_notify(ke, cpu); 1393 return; 1394 } 1395 } 1396 if (kseq->ksq_idled && 1397 (class == PRI_TIMESHARE || class == PRI_REALTIME)) { 1398 atomic_clear_int(&kseq_idle, PCPU_GET(cpumask)); 1399 kseq->ksq_idled = 0; 1400 } 1401#endif 1402 if (td->td_priority < curthread->td_priority) 1403 curthread->td_flags |= TDF_NEEDRESCHED; 1404 1405 ke->ke_ksegrp->kg_runq_kses++; 1406 ke->ke_state = KES_ONRUNQ; 1407 1408 runq_add(ke->ke_runq, ke); 1409 kseq_add(kseq, ke); 1410} 1411 1412void 1413sched_rem(struct thread *td) 1414{ 1415 struct kseq *kseq; 1416 struct kse *ke; 1417 1418 ke = td->td_kse; 1419 /* 1420 * It is safe to just return here because sched_rem() is only ever 1421 * used in places where we're immediately going to add the 1422 * kse back on again. In that case it'll be added with the correct 1423 * thread and priority when the caller drops the sched_lock. 1424 */ 1425 if (ke->ke_flags & KEF_ASSIGNED) 1426 return; 1427 mtx_assert(&sched_lock, MA_OWNED); 1428 KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue")); 1429 1430 ke->ke_state = KES_THREAD; 1431 ke->ke_ksegrp->kg_runq_kses--; 1432 kseq = KSEQ_CPU(ke->ke_cpu); 1433 runq_remove(ke->ke_runq, ke); 1434 kseq_rem(kseq, ke); 1435} 1436 1437fixpt_t 1438sched_pctcpu(struct thread *td) 1439{ 1440 fixpt_t pctcpu; 1441 struct kse *ke; 1442 1443 pctcpu = 0; 1444 ke = td->td_kse; 1445 if (ke == NULL) 1446 return (0); 1447 1448 mtx_lock_spin(&sched_lock); 1449 if (ke->ke_ticks) { 1450 int rtick; 1451 1452 /* 1453 * Don't update more frequently than twice a second. Allowing 1454 * this causes the cpu usage to decay away too quickly due to 1455 * rounding errors. 1456 */ 1457 if (ke->ke_ltick < (ticks - (hz / 2))) 1458 sched_pctcpu_update(ke); 1459 /* How many rtick per second ? */ 1460 rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS); 1461 pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT; 1462 } 1463 1464 ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick; 1465 mtx_unlock_spin(&sched_lock); 1466 1467 return (pctcpu); 1468} 1469 1470void 1471sched_pin(struct thread *td) 1472{ 1473 mtx_assert(&sched_lock, MA_OWNED); 1474 td->td_kse->ke_flags |= KEF_PINNED; 1475} 1476 1477void 1478sched_unpin(struct thread *td) 1479{ 1480 mtx_assert(&sched_lock, MA_OWNED); 1481 td->td_kse->ke_flags &= ~KEF_PINNED; 1482} 1483 1484void 1485sched_bind(struct thread *td, int cpu) 1486{ 1487 struct kse *ke; 1488 1489 mtx_assert(&sched_lock, MA_OWNED); 1490 ke = td->td_kse; 1491#ifndef SMP 1492 ke->ke_flags |= KEF_BOUND; 1493#else 1494 if (PCPU_GET(cpuid) == cpu) { 1495 ke->ke_flags |= KEF_BOUND; 1496 return; 1497 } 1498 /* sched_rem without the runq_remove */ 1499 ke->ke_state = KES_THREAD; 1500 ke->ke_ksegrp->kg_runq_kses--; 1501 kseq_rem(KSEQ_CPU(ke->ke_cpu), ke); 1502 ke->ke_cpu = cpu; 1503 kseq_notify(ke, cpu); 1504 /* When we return from mi_switch we'll be on the correct cpu. */ 1505 td->td_proc->p_stats->p_ru.ru_nvcsw++; 1506 mi_switch(); 1507#endif 1508} 1509 1510void 1511sched_unbind(struct thread *td) 1512{ 1513 mtx_assert(&sched_lock, MA_OWNED); 1514 td->td_kse->ke_flags &= ~KEF_BOUND; 1515} 1516 1517int 1518sched_sizeof_kse(void) 1519{ 1520 return (sizeof(struct kse) + sizeof(struct ke_sched)); 1521} 1522 1523int 1524sched_sizeof_ksegrp(void) 1525{ 1526 return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); 1527} 1528 1529int 1530sched_sizeof_proc(void) 1531{ 1532 return (sizeof(struct proc)); 1533} 1534 1535int 1536sched_sizeof_thread(void) 1537{ 1538 return (sizeof(struct thread) + sizeof(struct td_sched)); 1539} 1540