kern_switch.c revision 143884
1/*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/*** 28Here is the logic.. 29 30If there are N processors, then there are at most N KSEs (kernel 31schedulable entities) working to process threads that belong to a 32KSEGROUP (kg). If there are X of these KSEs actually running at the 33moment in question, then there are at most M (N-X) of these KSEs on 34the run queue, as running KSEs are not on the queue. 35 36Runnable threads are queued off the KSEGROUP in priority order. 37If there are M or more threads runnable, the top M threads 38(by priority) are 'preassigned' to the M KSEs not running. The KSEs take 39their priority from those threads and are put on the run queue. 40 41The last thread that had a priority high enough to have a KSE associated 42with it, AND IS ON THE RUN QUEUE is pointed to by 43kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 44assigned as all the available KSEs are activly running, or because there 45are no threads queued, that pointer is NULL. 46 47When a KSE is removed from the run queue to become runnable, we know 48it was associated with the highest priority thread in the queue (at the head 49of the queue). If it is also the last assigned we know M was 1 and must 50now be 0. Since the thread is no longer queued that pointer must be 51removed from it. Since we know there were no more KSEs available, 52(M was 1 and is now 0) and since we are not FREEING our KSE 53but using it, we know there are STILL no more KSEs available, we can prove 54that the next thread in the ksegrp list will not have a KSE to assign to 55it, so we can show that the pointer must be made 'invalid' (NULL). 56 57The pointer exists so that when a new thread is made runnable, it can 58have its priority compared with the last assigned thread to see if 59it should 'steal' its KSE or not.. i.e. is it 'earlier' 60on the list than that thread or later.. If it's earlier, then the KSE is 61removed from the last assigned (which is now not assigned a KSE) 62and reassigned to the new thread, which is placed earlier in the list. 63The pointer is then backed up to the previous thread (which may or may not 64be the new thread). 65 66When a thread sleeps or is removed, the KSE becomes available and if there 67are queued threads that are not assigned KSEs, the highest priority one of 68them is assigned the KSE, which is then placed back on the run queue at 69the approipriate place, and the kg->kg_last_assigned pointer is adjusted down 70to point to it. 71 72The following diagram shows 2 KSEs and 3 threads from a single process. 73 74 RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 75 \ \____ 76 \ \ 77 KSEGROUP---thread--thread--thread (queued in priority order) 78 \ / 79 \_______________/ 80 (last_assigned) 81 82The result of this scheme is that the M available KSEs are always 83queued at the priorities they have inherrited from the M highest priority 84threads for that KSEGROUP. If this situation changes, the KSEs are 85reassigned to keep this true. 86***/ 87 88#include <sys/cdefs.h> 89__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 143884 2005-03-20 17:05:12Z rwatson $"); 90 91#include "opt_sched.h" 92 93#ifndef KERN_SWITCH_INCLUDE 94#include <sys/param.h> 95#include <sys/systm.h> 96#include <sys/kdb.h> 97#include <sys/kernel.h> 98#include <sys/ktr.h> 99#include <sys/lock.h> 100#include <sys/mutex.h> 101#include <sys/proc.h> 102#include <sys/queue.h> 103#include <sys/sched.h> 104#else /* KERN_SWITCH_INCLUDE */ 105#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 106#include <sys/smp.h> 107#endif 108#include <machine/critical.h> 109#if defined(SMP) && defined(SCHED_4BSD) 110#include <sys/sysctl.h> 111#endif 112 113#ifdef FULL_PREEMPTION 114#ifndef PREEMPTION 115#error "The FULL_PREEMPTION option requires the PREEMPTION option" 116#endif 117#endif 118 119CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 120 121#define td_kse td_sched 122 123/* 124 * kern.sched.preemption allows user space to determine if preemption support 125 * is compiled in or not. It is not currently a boot or runtime flag that 126 * can be changed. 127 */ 128#ifdef PREEMPTION 129static int kern_sched_preemption = 1; 130#else 131static int kern_sched_preemption = 0; 132#endif 133SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 134 &kern_sched_preemption, 0, "Kernel preemption enabled"); 135 136/************************************************************************ 137 * Functions that manipulate runnability from a thread perspective. * 138 ************************************************************************/ 139/* 140 * Select the KSE that will be run next. From that find the thread, and 141 * remove it from the KSEGRP's run queue. If there is thread clustering, 142 * this will be what does it. 143 */ 144struct thread * 145choosethread(void) 146{ 147 struct kse *ke; 148 struct thread *td; 149 struct ksegrp *kg; 150 151#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 152 if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 153 /* Shutting down, run idlethread on AP's */ 154 td = PCPU_GET(idlethread); 155 ke = td->td_kse; 156 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 157 ke->ke_flags |= KEF_DIDRUN; 158 TD_SET_RUNNING(td); 159 return (td); 160 } 161#endif 162 163retry: 164 ke = sched_choose(); 165 if (ke) { 166 td = ke->ke_thread; 167 KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 168 kg = ke->ke_ksegrp; 169 if (td->td_proc->p_flag & P_HADTHREADS) { 170 if (kg->kg_last_assigned == td) { 171 kg->kg_last_assigned = TAILQ_PREV(td, 172 threadqueue, td_runq); 173 } 174 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 175 } 176 CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 177 td, td->td_priority); 178 } else { 179 /* Simulate runq_choose() having returned the idle thread */ 180 td = PCPU_GET(idlethread); 181 ke = td->td_kse; 182 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 183 } 184 ke->ke_flags |= KEF_DIDRUN; 185 186 /* 187 * If we are in panic, only allow system threads, 188 * plus the one we are running in, to be run. 189 */ 190 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 191 (td->td_flags & TDF_INPANIC) == 0)) { 192 /* note that it is no longer on the run queue */ 193 TD_SET_CAN_RUN(td); 194 goto retry; 195 } 196 197 TD_SET_RUNNING(td); 198 return (td); 199} 200 201/* 202 * Given a surplus system slot, try assign a new runnable thread to it. 203 * Called from: 204 * sched_thread_exit() (local) 205 * sched_switch() (local) 206 * sched_thread_exit() (local) 207 * remrunqueue() (local) (not at the moment) 208 */ 209static void 210slot_fill(struct ksegrp *kg) 211{ 212 struct thread *td; 213 214 mtx_assert(&sched_lock, MA_OWNED); 215 while (kg->kg_avail_opennings > 0) { 216 /* 217 * Find the first unassigned thread 218 */ 219 if ((td = kg->kg_last_assigned) != NULL) 220 td = TAILQ_NEXT(td, td_runq); 221 else 222 td = TAILQ_FIRST(&kg->kg_runq); 223 224 /* 225 * If we found one, send it to the system scheduler. 226 */ 227 if (td) { 228 kg->kg_last_assigned = td; 229 sched_add(td, SRQ_YIELDING); 230 CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg); 231 } else { 232 /* no threads to use up the slots. quit now */ 233 break; 234 } 235 } 236} 237 238#ifdef SCHED_4BSD 239/* 240 * Remove a thread from its KSEGRP's run queue. 241 * This in turn may remove it from a KSE if it was already assigned 242 * to one, possibly causing a new thread to be assigned to the KSE 243 * and the KSE getting a new priority. 244 */ 245static void 246remrunqueue(struct thread *td) 247{ 248 struct thread *td2, *td3; 249 struct ksegrp *kg; 250 struct kse *ke; 251 252 mtx_assert(&sched_lock, MA_OWNED); 253 KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 254 kg = td->td_ksegrp; 255 ke = td->td_kse; 256 CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 257 TD_SET_CAN_RUN(td); 258 /* 259 * If it is not a threaded process, take the shortcut. 260 */ 261 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 262 /* remve from sys run queue and free up a slot */ 263 sched_rem(td); 264 ke->ke_state = KES_THREAD; 265 return; 266 } 267 td3 = TAILQ_PREV(td, threadqueue, td_runq); 268 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 269 if (ke->ke_state == KES_ONRUNQ) { 270 /* 271 * This thread has been assigned to the system run queue. 272 * We need to dissociate it and try assign the 273 * KSE to the next available thread. Then, we should 274 * see if we need to move the KSE in the run queues. 275 */ 276 sched_rem(td); 277 ke->ke_state = KES_THREAD; 278 td2 = kg->kg_last_assigned; 279 KASSERT((td2 != NULL), ("last assigned has wrong value")); 280 if (td2 == td) 281 kg->kg_last_assigned = td3; 282 /* slot_fill(kg); */ /* will replace it with another */ 283 } 284} 285#endif 286 287/* 288 * Change the priority of a thread that is on the run queue. 289 */ 290void 291adjustrunqueue( struct thread *td, int newpri) 292{ 293 struct ksegrp *kg; 294 struct kse *ke; 295 296 mtx_assert(&sched_lock, MA_OWNED); 297 KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 298 299 ke = td->td_kse; 300 CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 301 /* 302 * If it is not a threaded process, take the shortcut. 303 */ 304 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 305 /* We only care about the kse in the run queue. */ 306 td->td_priority = newpri; 307 if (ke->ke_rqindex != (newpri / RQ_PPQ)) { 308 sched_rem(td); 309 sched_add(td, SRQ_BORING); 310 } 311 return; 312 } 313 314 /* It is a threaded process */ 315 kg = td->td_ksegrp; 316 if (ke->ke_state == KES_ONRUNQ) { 317 if (kg->kg_last_assigned == td) { 318 kg->kg_last_assigned = 319 TAILQ_PREV(td, threadqueue, td_runq); 320 } 321 sched_rem(td); 322 } 323 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 324 TD_SET_CAN_RUN(td); 325 td->td_priority = newpri; 326 setrunqueue(td, SRQ_BORING); 327} 328 329/* 330 * This function is called when a thread is about to be put on a 331 * ksegrp run queue because it has been made runnable or its 332 * priority has been adjusted and the ksegrp does not have a 333 * free kse slot. It determines if a thread from the same ksegrp 334 * should be preempted. If so, it tries to switch threads 335 * if the thread is on the same cpu or notifies another cpu that 336 * it should switch threads. 337 */ 338 339static void 340maybe_preempt_in_ksegrp(struct thread *td) 341#if !defined(SMP) 342{ 343 struct thread *running_thread; 344 345#ifndef FULL_PREEMPTION 346 int pri; 347 pri = td->td_priority; 348 if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD)) 349 return; 350#endif 351 mtx_assert(&sched_lock, MA_OWNED); 352 running_thread = curthread; 353 354 if (running_thread->td_ksegrp != td->td_ksegrp) 355 return; 356 357 if (td->td_priority > running_thread->td_priority) 358 return; 359#ifdef PREEMPTION 360 if (running_thread->td_critnest > 1) 361 running_thread->td_pflags |= TDP_OWEPREEMPT; 362 else 363 mi_switch(SW_INVOL, NULL); 364 365#else 366 running_thread->td_flags |= TDF_NEEDRESCHED; 367#endif 368 return; 369} 370 371#else /* SMP */ 372{ 373 struct thread *running_thread; 374 int worst_pri; 375 struct ksegrp *kg; 376 cpumask_t cpumask,dontuse; 377 struct pcpu *pc; 378 struct pcpu *best_pcpu; 379 struct thread *cputhread; 380 381#ifndef FULL_PREEMPTION 382 int pri; 383 pri = td->td_priority; 384 if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD)) 385 return; 386#endif 387 388 mtx_assert(&sched_lock, MA_OWNED); 389 390 running_thread = curthread; 391 392#if !defined(KSEG_PEEMPT_BEST_CPU) 393 if (running_thread->td_ksegrp != td->td_ksegrp) { 394#endif 395 kg = td->td_ksegrp; 396 397 /* if someone is ahead of this thread, wait our turn */ 398 if (td != TAILQ_FIRST(&kg->kg_runq)) 399 return; 400 401 worst_pri = td->td_priority; 402 best_pcpu = NULL; 403 dontuse = stopped_cpus | idle_cpus_mask; 404 405 /* 406 * Find a cpu with the worst priority that runs at thread from 407 * the same ksegrp - if multiple exist give first the last run 408 * cpu and then the current cpu priority 409 */ 410 411 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 412 cpumask = pc->pc_cpumask; 413 cputhread = pc->pc_curthread; 414 415 if ((cpumask & dontuse) || 416 cputhread->td_ksegrp != kg) 417 continue; 418 419 if (cputhread->td_priority > worst_pri) { 420 worst_pri = cputhread->td_priority; 421 best_pcpu = pc; 422 continue; 423 } 424 425 if (cputhread->td_priority == worst_pri && 426 best_pcpu != NULL && 427 (td->td_lastcpu == pc->pc_cpuid || 428 (PCPU_GET(cpumask) == cpumask && 429 td->td_lastcpu != best_pcpu->pc_cpuid))) 430 best_pcpu = pc; 431 } 432 433 /* Check if we need to preempt someone */ 434 if (best_pcpu == NULL) 435 return; 436 437 if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) { 438 best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; 439 ipi_selected(best_pcpu->pc_cpumask, IPI_AST); 440 return; 441 } 442#if !defined(KSEG_PEEMPT_BEST_CPU) 443 } 444#endif 445 446 if (td->td_priority > running_thread->td_priority) 447 return; 448#ifdef PREEMPTION 449 if (running_thread->td_critnest > 1) 450 running_thread->td_pflags |= TDP_OWEPREEMPT; 451 else 452 mi_switch(SW_INVOL, NULL); 453 454#else 455 running_thread->td_flags |= TDF_NEEDRESCHED; 456#endif 457 return; 458} 459#endif /* !SMP */ 460 461 462int limitcount; 463void 464setrunqueue(struct thread *td, int flags) 465{ 466 struct ksegrp *kg; 467 struct thread *td2; 468 struct thread *tda; 469 470 CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d", 471 td, td->td_ksegrp, td->td_proc->p_pid); 472 CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)", 473 td, td->td_proc->p_comm, td->td_priority, curthread, 474 curthread->td_proc->p_comm); 475 mtx_assert(&sched_lock, MA_OWNED); 476 KASSERT((td->td_inhibitors == 0), 477 ("setrunqueue: trying to run inhibitted thread")); 478 KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 479 ("setrunqueue: bad thread state")); 480 TD_SET_RUNQ(td); 481 kg = td->td_ksegrp; 482 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 483 /* 484 * Common path optimisation: Only one of everything 485 * and the KSE is always already attached. 486 * Totally ignore the ksegrp run queue. 487 */ 488 if (kg->kg_avail_opennings != 1) { 489 if (limitcount < 1) { 490 limitcount++; 491 printf("pid %d: corrected slot count (%d->1)\n", 492 td->td_proc->p_pid, kg->kg_avail_opennings); 493 494 } 495 kg->kg_avail_opennings = 1; 496 } 497 sched_add(td, flags); 498 return; 499 } 500 501 /* 502 * If the concurrency has reduced, and we would go in the 503 * assigned section, then keep removing entries from the 504 * system run queue, until we are not in that section 505 * or there is room for us to be put in that section. 506 * What we MUST avoid is the case where there are threads of less 507 * priority than the new one scheduled, but it can not 508 * be scheduled itself. That would lead to a non contiguous set 509 * of scheduled threads, and everything would break. 510 */ 511 tda = kg->kg_last_assigned; 512 while ((kg->kg_avail_opennings <= 0) && 513 (tda && (tda->td_priority > td->td_priority))) { 514 /* 515 * None free, but there is one we can commandeer. 516 */ 517 CTR2(KTR_RUNQ, 518 "setrunqueue: kg:%p: take slot from td: %p", kg, tda); 519 sched_rem(tda); 520 tda = kg->kg_last_assigned = 521 TAILQ_PREV(tda, threadqueue, td_runq); 522 } 523 524 /* 525 * Add the thread to the ksegrp's run queue at 526 * the appropriate place. 527 */ 528 TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 529 if (td2->td_priority > td->td_priority) { 530 TAILQ_INSERT_BEFORE(td2, td, td_runq); 531 break; 532 } 533 } 534 if (td2 == NULL) { 535 /* We ran off the end of the TAILQ or it was empty. */ 536 TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 537 } 538 539 /* 540 * If we have a slot to use, then put the thread on the system 541 * run queue and if needed, readjust the last_assigned pointer. 542 * it may be that we need to schedule something anyhow 543 * even if the availabel slots are -ve so that 544 * all the items < last_assigned are scheduled. 545 */ 546 if (kg->kg_avail_opennings > 0) { 547 if (tda == NULL) { 548 /* 549 * No pre-existing last assigned so whoever is first 550 * gets the slot.. (maybe us) 551 */ 552 td2 = TAILQ_FIRST(&kg->kg_runq); 553 kg->kg_last_assigned = td2; 554 } else if (tda->td_priority > td->td_priority) { 555 td2 = td; 556 } else { 557 /* 558 * We are past last_assigned, so 559 * give the next slot to whatever is next, 560 * which may or may not be us. 561 */ 562 td2 = TAILQ_NEXT(tda, td_runq); 563 kg->kg_last_assigned = td2; 564 } 565 sched_add(td2, flags); 566 } else { 567 CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", 568 td, td->td_ksegrp, td->td_proc->p_pid); 569 if ((flags & SRQ_YIELDING) == 0) 570 maybe_preempt_in_ksegrp(td); 571 } 572} 573 574/* 575 * Kernel thread preemption implementation. Critical sections mark 576 * regions of code in which preemptions are not allowed. 577 */ 578void 579critical_enter(void) 580{ 581 struct thread *td; 582 583 td = curthread; 584 if (td->td_critnest == 0) 585 cpu_critical_enter(td); 586 td->td_critnest++; 587 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 588 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 589} 590 591void 592critical_exit(void) 593{ 594 struct thread *td; 595 596 td = curthread; 597 KASSERT(td->td_critnest != 0, 598 ("critical_exit: td_critnest == 0")); 599 if (td->td_critnest == 1) { 600 if (td->td_pflags & TDP_WAKEPROC0) { 601 td->td_pflags &= ~TDP_WAKEPROC0; 602 wakeup(&proc0); 603 } 604#ifdef PREEMPTION 605 mtx_assert(&sched_lock, MA_NOTOWNED); 606 if (td->td_pflags & TDP_OWEPREEMPT) { 607 mtx_lock_spin(&sched_lock); 608 mi_switch(SW_INVOL, NULL); 609 mtx_unlock_spin(&sched_lock); 610 } 611#endif 612 td->td_critnest = 0; 613 cpu_critical_exit(td); 614 } else { 615 td->td_critnest--; 616 } 617 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 618 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 619} 620 621/* 622 * This function is called when a thread is about to be put on run queue 623 * because it has been made runnable or its priority has been adjusted. It 624 * determines if the new thread should be immediately preempted to. If so, 625 * it switches to it and eventually returns true. If not, it returns false 626 * so that the caller may place the thread on an appropriate run queue. 627 */ 628int 629maybe_preempt(struct thread *td) 630{ 631#ifdef PREEMPTION 632 struct thread *ctd; 633 int cpri, pri; 634#endif 635 636 mtx_assert(&sched_lock, MA_OWNED); 637#ifdef PREEMPTION 638 /* 639 * The new thread should not preempt the current thread if any of the 640 * following conditions are true: 641 * 642 * - The kernel is in the throes of crashing (panicstr). 643 * - The current thread has a higher (numerically lower) or 644 * equivalent priority. Note that this prevents curthread from 645 * trying to preempt to itself. 646 * - It is too early in the boot for context switches (cold is set). 647 * - The current thread has an inhibitor set or is in the process of 648 * exiting. In this case, the current thread is about to switch 649 * out anyways, so there's no point in preempting. If we did, 650 * the current thread would not be properly resumed as well, so 651 * just avoid that whole landmine. 652 * - If the new thread's priority is not a realtime priority and 653 * the current thread's priority is not an idle priority and 654 * FULL_PREEMPTION is disabled. 655 * 656 * If all of these conditions are false, but the current thread is in 657 * a nested critical section, then we have to defer the preemption 658 * until we exit the critical section. Otherwise, switch immediately 659 * to the new thread. 660 */ 661 ctd = curthread; 662 KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd), 663 ("thread has no (or wrong) sched-private part.")); 664 KASSERT((td->td_inhibitors == 0), 665 ("maybe_preempt: trying to run inhibitted thread")); 666 pri = td->td_priority; 667 cpri = ctd->td_priority; 668 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 669 TD_IS_INHIBITED(ctd) || td->td_kse->ke_state != KES_THREAD) 670 return (0); 671#ifndef FULL_PREEMPTION 672 if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) && 673 !(cpri >= PRI_MIN_IDLE)) 674 return (0); 675#endif 676 if (ctd->td_critnest > 1) { 677 CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 678 ctd->td_critnest); 679 ctd->td_pflags |= TDP_OWEPREEMPT; 680 return (0); 681 } 682 683 /* 684 * Thread is runnable but not yet put on system run queue. 685 */ 686 MPASS(TD_ON_RUNQ(td)); 687 MPASS(td->td_sched->ke_state != KES_ONRUNQ); 688 if (td->td_proc->p_flag & P_HADTHREADS) { 689 /* 690 * If this is a threaded process we actually ARE on the 691 * ksegrp run queue so take it off that first. 692 * Also undo any damage done to the last_assigned pointer. 693 * XXX Fix setrunqueue so this isn't needed 694 */ 695 struct ksegrp *kg; 696 697 kg = td->td_ksegrp; 698 if (kg->kg_last_assigned == td) 699 kg->kg_last_assigned = 700 TAILQ_PREV(td, threadqueue, td_runq); 701 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 702 } 703 704 TD_SET_RUNNING(td); 705 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 706 td->td_proc->p_pid, td->td_proc->p_comm); 707 mi_switch(SW_INVOL|SW_PREEMPT, td); 708 return (1); 709#else 710 return (0); 711#endif 712} 713 714#if 0 715#ifndef PREEMPTION 716/* XXX: There should be a non-static version of this. */ 717static void 718printf_caddr_t(void *data) 719{ 720 printf("%s", (char *)data); 721} 722static char preempt_warning[] = 723 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 724SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 725 preempt_warning) 726#endif 727#endif 728 729/************************************************************************ 730 * SYSTEM RUN QUEUE manipulations and tests * 731 ************************************************************************/ 732/* 733 * Initialize a run structure. 734 */ 735void 736runq_init(struct runq *rq) 737{ 738 int i; 739 740 bzero(rq, sizeof *rq); 741 for (i = 0; i < RQ_NQS; i++) 742 TAILQ_INIT(&rq->rq_queues[i]); 743} 744 745/* 746 * Clear the status bit of the queue corresponding to priority level pri, 747 * indicating that it is empty. 748 */ 749static __inline void 750runq_clrbit(struct runq *rq, int pri) 751{ 752 struct rqbits *rqb; 753 754 rqb = &rq->rq_status; 755 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 756 rqb->rqb_bits[RQB_WORD(pri)], 757 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 758 RQB_BIT(pri), RQB_WORD(pri)); 759 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 760} 761 762/* 763 * Find the index of the first non-empty run queue. This is done by 764 * scanning the status bits, a set bit indicates a non-empty queue. 765 */ 766static __inline int 767runq_findbit(struct runq *rq) 768{ 769 struct rqbits *rqb; 770 int pri; 771 int i; 772 773 rqb = &rq->rq_status; 774 for (i = 0; i < RQB_LEN; i++) 775 if (rqb->rqb_bits[i]) { 776 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 777 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 778 rqb->rqb_bits[i], i, pri); 779 return (pri); 780 } 781 782 return (-1); 783} 784 785/* 786 * Set the status bit of the queue corresponding to priority level pri, 787 * indicating that it is non-empty. 788 */ 789static __inline void 790runq_setbit(struct runq *rq, int pri) 791{ 792 struct rqbits *rqb; 793 794 rqb = &rq->rq_status; 795 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 796 rqb->rqb_bits[RQB_WORD(pri)], 797 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 798 RQB_BIT(pri), RQB_WORD(pri)); 799 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 800} 801 802/* 803 * Add the KSE to the queue specified by its priority, and set the 804 * corresponding status bit. 805 */ 806void 807runq_add(struct runq *rq, struct kse *ke, int flags) 808{ 809 struct rqhead *rqh; 810 int pri; 811 812 pri = ke->ke_thread->td_priority / RQ_PPQ; 813 ke->ke_rqindex = pri; 814 runq_setbit(rq, pri); 815 rqh = &rq->rq_queues[pri]; 816 CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", 817 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 818 if (flags & SRQ_PREEMPTED) { 819 TAILQ_INSERT_HEAD(rqh, ke, ke_procq); 820 } else { 821 TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 822 } 823} 824 825/* 826 * Return true if there are runnable processes of any priority on the run 827 * queue, false otherwise. Has no side effects, does not modify the run 828 * queue structure. 829 */ 830int 831runq_check(struct runq *rq) 832{ 833 struct rqbits *rqb; 834 int i; 835 836 rqb = &rq->rq_status; 837 for (i = 0; i < RQB_LEN; i++) 838 if (rqb->rqb_bits[i]) { 839 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 840 rqb->rqb_bits[i], i); 841 return (1); 842 } 843 CTR0(KTR_RUNQ, "runq_check: empty"); 844 845 return (0); 846} 847 848#if defined(SMP) && defined(SCHED_4BSD) 849int runq_fuzz = 1; 850SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 851#endif 852 853/* 854 * Find the highest priority process on the run queue. 855 */ 856struct kse * 857runq_choose(struct runq *rq) 858{ 859 struct rqhead *rqh; 860 struct kse *ke; 861 int pri; 862 863 mtx_assert(&sched_lock, MA_OWNED); 864 while ((pri = runq_findbit(rq)) != -1) { 865 rqh = &rq->rq_queues[pri]; 866#if defined(SMP) && defined(SCHED_4BSD) 867 /* fuzz == 1 is normal.. 0 or less are ignored */ 868 if (runq_fuzz > 1) { 869 /* 870 * In the first couple of entries, check if 871 * there is one for our CPU as a preference. 872 */ 873 int count = runq_fuzz; 874 int cpu = PCPU_GET(cpuid); 875 struct kse *ke2; 876 ke2 = ke = TAILQ_FIRST(rqh); 877 878 while (count-- && ke2) { 879 if (ke->ke_thread->td_lastcpu == cpu) { 880 ke = ke2; 881 break; 882 } 883 ke2 = TAILQ_NEXT(ke2, ke_procq); 884 } 885 } else 886#endif 887 ke = TAILQ_FIRST(rqh); 888 KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 889 CTR3(KTR_RUNQ, 890 "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 891 return (ke); 892 } 893 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 894 895 return (NULL); 896} 897 898/* 899 * Remove the KSE from the queue specified by its priority, and clear the 900 * corresponding status bit if the queue becomes empty. 901 * Caller must set ke->ke_state afterwards. 902 */ 903void 904runq_remove(struct runq *rq, struct kse *ke) 905{ 906 struct rqhead *rqh; 907 int pri; 908 909 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 910 ("runq_remove: process swapped out")); 911 pri = ke->ke_rqindex; 912 rqh = &rq->rq_queues[pri]; 913 CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", 914 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 915 KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 916 TAILQ_REMOVE(rqh, ke, ke_procq); 917 if (TAILQ_EMPTY(rqh)) { 918 CTR0(KTR_RUNQ, "runq_remove: empty"); 919 runq_clrbit(rq, pri); 920 } 921} 922 923/****** functions that are temporarily here ***********/ 924#include <vm/uma.h> 925extern struct mtx kse_zombie_lock; 926 927/* 928 * Allocate scheduler specific per-process resources. 929 * The thread and ksegrp have already been linked in. 930 * In this case just set the default concurrency value. 931 * 932 * Called from: 933 * proc_init() (UMA init method) 934 */ 935void 936sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) 937{ 938 939 /* This can go in sched_fork */ 940 sched_init_concurrency(kg); 941} 942 943/* 944 * thread is being either created or recycled. 945 * Fix up the per-scheduler resources associated with it. 946 * Called from: 947 * sched_fork_thread() 948 * thread_dtor() (*may go away) 949 * thread_init() (*may go away) 950 */ 951void 952sched_newthread(struct thread *td) 953{ 954 struct td_sched *ke; 955 956 ke = (struct td_sched *) (td + 1); 957 bzero(ke, sizeof(*ke)); 958 td->td_sched = ke; 959 ke->ke_thread = td; 960 ke->ke_state = KES_THREAD; 961} 962 963/* 964 * Set up an initial concurrency of 1 965 * and set the given thread (if given) to be using that 966 * concurrency slot. 967 * May be used "offline"..before the ksegrp is attached to the world 968 * and thus wouldn't need schedlock in that case. 969 * Called from: 970 * thr_create() 971 * proc_init() (UMA) via sched_newproc() 972 */ 973void 974sched_init_concurrency(struct ksegrp *kg) 975{ 976 977 CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg); 978 kg->kg_concurrency = 1; 979 kg->kg_avail_opennings = 1; 980} 981 982/* 983 * Change the concurrency of an existing ksegrp to N 984 * Called from: 985 * kse_create() 986 * kse_exit() 987 * thread_exit() 988 * thread_single() 989 */ 990void 991sched_set_concurrency(struct ksegrp *kg, int concurrency) 992{ 993 994 CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d", 995 kg, 996 concurrency, 997 kg->kg_avail_opennings, 998 kg->kg_avail_opennings + (concurrency - kg->kg_concurrency)); 999 kg->kg_avail_opennings += (concurrency - kg->kg_concurrency); 1000 kg->kg_concurrency = concurrency; 1001} 1002 1003/* 1004 * Called from thread_exit() for all exiting thread 1005 * 1006 * Not to be confused with sched_exit_thread() 1007 * that is only called from thread_exit() for threads exiting 1008 * without the rest of the process exiting because it is also called from 1009 * sched_exit() and we wouldn't want to call it twice. 1010 * XXX This can probably be fixed. 1011 */ 1012void 1013sched_thread_exit(struct thread *td) 1014{ 1015 1016 SLOT_RELEASE(td->td_ksegrp); 1017 slot_fill(td->td_ksegrp); 1018} 1019 1020#endif /* KERN_SWITCH_INCLUDE */ 1021