kern_switch.c revision 159154
1238438Sdteske/*- 2238438Sdteske * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3238438Sdteske * All rights reserved. 4247280Sdteske * 5238438Sdteske * Redistribution and use in source and binary forms, with or without 6238438Sdteske * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27/*** 28Here is the logic.. 29 30If there are N processors, then there are at most N KSEs (kernel 31schedulable entities) working to process threads that belong to a 32KSEGROUP (kg). If there are X of these KSEs actually running at the 33moment in question, then there are at most M (N-X) of these KSEs on 34the run queue, as running KSEs are not on the queue. 35 36Runnable threads are queued off the KSEGROUP in priority order. 37If there are M or more threads runnable, the top M threads 38(by priority) are 'preassigned' to the M KSEs not running. The KSEs take 39their priority from those threads and are put on the run queue. 40 41The last thread that had a priority high enough to have a KSE associated 42with it, AND IS ON THE RUN QUEUE is pointed to by 43kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs 44assigned as all the available KSEs are activly running, or because there 45are no threads queued, that pointer is NULL. 46 47When a KSE is removed from the run queue to become runnable, we know 48it was associated with the highest priority thread in the queue (at the head 49of the queue). If it is also the last assigned we know M was 1 and must 50now be 0. Since the thread is no longer queued that pointer must be 51removed from it. Since we know there were no more KSEs available, 52(M was 1 and is now 0) and since we are not FREEING our KSE 53but using it, we know there are STILL no more KSEs available, we can prove 54that the next thread in the ksegrp list will not have a KSE to assign to 55it, so we can show that the pointer must be made 'invalid' (NULL). 56 57The pointer exists so that when a new thread is made runnable, it can 58have its priority compared with the last assigned thread to see if 59it should 'steal' its KSE or not.. i.e. is it 'earlier' 60on the list than that thread or later.. If it's earlier, then the KSE is 61removed from the last assigned (which is now not assigned a KSE) 62and reassigned to the new thread, which is placed earlier in the list. 63The pointer is then backed up to the previous thread (which may or may not 64be the new thread). 65 66When a thread sleeps or is removed, the KSE becomes available and if there 67are queued threads that are not assigned KSEs, the highest priority one of 68them is assigned the KSE, which is then placed back on the run queue at 69the approipriate place, and the kg->kg_last_assigned pointer is adjusted down 70to point to it. 71 72The following diagram shows 2 KSEs and 3 threads from a single process. 73 74 RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) 75 \ \____ 76 \ \ 77 KSEGROUP---thread--thread--thread (queued in priority order) 78 \ / 79 \_______________/ 80 (last_assigned) 81 82The result of this scheme is that the M available KSEs are always 83queued at the priorities they have inherrited from the M highest priority 84threads for that KSEGROUP. If this situation changes, the KSEs are 85reassigned to keep this true. 86***/ 87 88#include <sys/cdefs.h> 89__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 159154 2006-06-01 22:45:56Z cognet $"); 90 91#include "opt_sched.h" 92 93#ifndef KERN_SWITCH_INCLUDE 94#include <sys/param.h> 95#include <sys/systm.h> 96#include <sys/kdb.h> 97#include <sys/kernel.h> 98#include <sys/ktr.h> 99#include <sys/lock.h> 100#include <sys/mutex.h> 101#include <sys/proc.h> 102#include <sys/queue.h> 103#include <sys/sched.h> 104#else /* KERN_SWITCH_INCLUDE */ 105#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 106#include <sys/smp.h> 107#endif 108#if defined(SMP) && defined(SCHED_4BSD) 109#include <sys/sysctl.h> 110#endif 111 112/* Uncomment this to enable logging of critical_enter/exit. */ 113#if 0 114#define KTR_CRITICAL KTR_SCHED 115#else 116#define KTR_CRITICAL 0 117#endif 118 119#ifdef FULL_PREEMPTION 120#ifndef PREEMPTION 121#error "The FULL_PREEMPTION option requires the PREEMPTION option" 122#endif 123#endif 124 125CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 126 127#define td_kse td_sched 128 129/* 130 * kern.sched.preemption allows user space to determine if preemption support 131 * is compiled in or not. It is not currently a boot or runtime flag that 132 * can be changed. 133 */ 134#ifdef PREEMPTION 135static int kern_sched_preemption = 1; 136#else 137static int kern_sched_preemption = 0; 138#endif 139SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 140 &kern_sched_preemption, 0, "Kernel preemption enabled"); 141 142/************************************************************************ 143 * Functions that manipulate runnability from a thread perspective. * 144 ************************************************************************/ 145/* 146 * Select the KSE that will be run next. From that find the thread, and 147 * remove it from the KSEGRP's run queue. If there is thread clustering, 148 * this will be what does it. 149 */ 150struct thread * 151choosethread(void) 152{ 153 struct kse *ke; 154 struct thread *td; 155 struct ksegrp *kg; 156 157#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 158 if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 159 /* Shutting down, run idlethread on AP's */ 160 td = PCPU_GET(idlethread); 161 ke = td->td_kse; 162 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 163 ke->ke_flags |= KEF_DIDRUN; 164 TD_SET_RUNNING(td); 165 return (td); 166 } 167#endif 168 169retry: 170 ke = sched_choose(); 171 if (ke) { 172 td = ke->ke_thread; 173 KASSERT((td->td_kse == ke), ("kse/thread mismatch")); 174 kg = ke->ke_ksegrp; 175 if (td->td_proc->p_flag & P_HADTHREADS) { 176 if (kg->kg_last_assigned == td) { 177 kg->kg_last_assigned = TAILQ_PREV(td, 178 threadqueue, td_runq); 179 } 180 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 181 } 182 CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", 183 td, td->td_priority); 184 } else { 185 /* Simulate runq_choose() having returned the idle thread */ 186 td = PCPU_GET(idlethread); 187 ke = td->td_kse; 188 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 189 } 190 ke->ke_flags |= KEF_DIDRUN; 191 192 /* 193 * If we are in panic, only allow system threads, 194 * plus the one we are running in, to be run. 195 */ 196 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 197 (td->td_flags & TDF_INPANIC) == 0)) { 198 /* note that it is no longer on the run queue */ 199 TD_SET_CAN_RUN(td); 200 goto retry; 201 } 202 203 TD_SET_RUNNING(td); 204 return (td); 205} 206 207/* 208 * Given a surplus system slot, try assign a new runnable thread to it. 209 * Called from: 210 * sched_thread_exit() (local) 211 * sched_switch() (local) 212 * sched_thread_exit() (local) 213 * remrunqueue() (local) (not at the moment) 214 */ 215static void 216slot_fill(struct ksegrp *kg) 217{ 218 struct thread *td; 219 220 mtx_assert(&sched_lock, MA_OWNED); 221 while (kg->kg_avail_opennings > 0) { 222 /* 223 * Find the first unassigned thread 224 */ 225 if ((td = kg->kg_last_assigned) != NULL) 226 td = TAILQ_NEXT(td, td_runq); 227 else 228 td = TAILQ_FIRST(&kg->kg_runq); 229 230 /* 231 * If we found one, send it to the system scheduler. 232 */ 233 if (td) { 234 kg->kg_last_assigned = td; 235 sched_add(td, SRQ_YIELDING); 236 CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg); 237 } else { 238 /* no threads to use up the slots. quit now */ 239 break; 240 } 241 } 242} 243 244#ifdef SCHED_4BSD 245/* 246 * Remove a thread from its KSEGRP's run queue. 247 * This in turn may remove it from a KSE if it was already assigned 248 * to one, possibly causing a new thread to be assigned to the KSE 249 * and the KSE getting a new priority. 250 */ 251static void 252remrunqueue(struct thread *td) 253{ 254 struct thread *td2, *td3; 255 struct ksegrp *kg; 256 struct kse *ke; 257 258 mtx_assert(&sched_lock, MA_OWNED); 259 KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); 260 kg = td->td_ksegrp; 261 ke = td->td_kse; 262 CTR1(KTR_RUNQ, "remrunqueue: td%p", td); 263 TD_SET_CAN_RUN(td); 264 /* 265 * If it is not a threaded process, take the shortcut. 266 */ 267 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 268 /* remve from sys run queue and free up a slot */ 269 sched_rem(td); 270 return; 271 } 272 td3 = TAILQ_PREV(td, threadqueue, td_runq); 273 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 274 if (ke->ke_state == KES_ONRUNQ) { 275 /* 276 * This thread has been assigned to the system run queue. 277 * We need to dissociate it and try assign the 278 * KSE to the next available thread. Then, we should 279 * see if we need to move the KSE in the run queues. 280 */ 281 sched_rem(td); 282 td2 = kg->kg_last_assigned; 283 KASSERT((td2 != NULL), ("last assigned has wrong value")); 284 if (td2 == td) 285 kg->kg_last_assigned = td3; 286 /* slot_fill(kg); */ /* will replace it with another */ 287 } 288} 289#endif 290 291/* 292 * Change the priority of a thread that is on the run queue. 293 */ 294void 295adjustrunqueue( struct thread *td, int newpri) 296{ 297 struct ksegrp *kg; 298 struct kse *ke; 299 300 mtx_assert(&sched_lock, MA_OWNED); 301 KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); 302 303 ke = td->td_kse; 304 CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); 305 /* 306 * If it is not a threaded process, take the shortcut. 307 */ 308 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 309 /* We only care about the kse in the run queue. */ 310 td->td_priority = newpri; 311 if (ke->ke_rqindex != (newpri / RQ_PPQ)) { 312 sched_rem(td); 313 sched_add(td, SRQ_BORING); 314 } 315 return; 316 } 317 318 /* It is a threaded process */ 319 kg = td->td_ksegrp; 320 if (ke->ke_state == KES_ONRUNQ 321#ifdef SCHED_ULE 322 || ((ke->ke_flags & KEF_ASSIGNED) != 0 && 323 (ke->ke_flags & KEF_REMOVED) == 0) 324#endif 325 ) { 326 if (kg->kg_last_assigned == td) { 327 kg->kg_last_assigned = 328 TAILQ_PREV(td, threadqueue, td_runq); 329 } 330 sched_rem(td); 331 } 332 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 333 TD_SET_CAN_RUN(td); 334 td->td_priority = newpri; 335 setrunqueue(td, SRQ_BORING); 336} 337 338/* 339 * This function is called when a thread is about to be put on a 340 * ksegrp run queue because it has been made runnable or its 341 * priority has been adjusted and the ksegrp does not have a 342 * free kse slot. It determines if a thread from the same ksegrp 343 * should be preempted. If so, it tries to switch threads 344 * if the thread is on the same cpu or notifies another cpu that 345 * it should switch threads. 346 */ 347 348static void 349maybe_preempt_in_ksegrp(struct thread *td) 350#if !defined(SMP) 351{ 352 struct thread *running_thread; 353 354 mtx_assert(&sched_lock, MA_OWNED); 355 running_thread = curthread; 356 357 if (running_thread->td_ksegrp != td->td_ksegrp) 358 return; 359 360 if (td->td_priority >= running_thread->td_priority) 361 return; 362#ifdef PREEMPTION 363#ifndef FULL_PREEMPTION 364 if (td->td_priority > PRI_MAX_ITHD) { 365 running_thread->td_flags |= TDF_NEEDRESCHED; 366 return; 367 } 368#endif /* FULL_PREEMPTION */ 369 370 if (running_thread->td_critnest > 1) 371 running_thread->td_owepreempt = 1; 372 else 373 mi_switch(SW_INVOL, NULL); 374 375#else /* PREEMPTION */ 376 running_thread->td_flags |= TDF_NEEDRESCHED; 377#endif /* PREEMPTION */ 378 return; 379} 380 381#else /* SMP */ 382{ 383 struct thread *running_thread; 384 int worst_pri; 385 struct ksegrp *kg; 386 cpumask_t cpumask,dontuse; 387 struct pcpu *pc; 388 struct pcpu *best_pcpu; 389 struct thread *cputhread; 390 391 mtx_assert(&sched_lock, MA_OWNED); 392 393 running_thread = curthread; 394 395#if !defined(KSEG_PEEMPT_BEST_CPU) 396 if (running_thread->td_ksegrp != td->td_ksegrp) { 397#endif 398 kg = td->td_ksegrp; 399 400 /* if someone is ahead of this thread, wait our turn */ 401 if (td != TAILQ_FIRST(&kg->kg_runq)) 402 return; 403 404 worst_pri = td->td_priority; 405 best_pcpu = NULL; 406 dontuse = stopped_cpus | idle_cpus_mask; 407 408 /* 409 * Find a cpu with the worst priority that runs at thread from 410 * the same ksegrp - if multiple exist give first the last run 411 * cpu and then the current cpu priority 412 */ 413 414 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 415 cpumask = pc->pc_cpumask; 416 cputhread = pc->pc_curthread; 417 418 if ((cpumask & dontuse) || 419 cputhread->td_ksegrp != kg) 420 continue; 421 422 if (cputhread->td_priority > worst_pri) { 423 worst_pri = cputhread->td_priority; 424 best_pcpu = pc; 425 continue; 426 } 427 428 if (cputhread->td_priority == worst_pri && 429 best_pcpu != NULL && 430 (td->td_lastcpu == pc->pc_cpuid || 431 (PCPU_GET(cpumask) == cpumask && 432 td->td_lastcpu != best_pcpu->pc_cpuid))) 433 best_pcpu = pc; 434 } 435 436 /* Check if we need to preempt someone */ 437 if (best_pcpu == NULL) 438 return; 439 440#if defined(IPI_PREEMPTION) && defined(PREEMPTION) 441#if !defined(FULL_PREEMPTION) 442 if (td->td_priority <= PRI_MAX_ITHD) 443#endif /* ! FULL_PREEMPTION */ 444 { 445 ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT); 446 return; 447 } 448#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */ 449 450 if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) { 451 best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED; 452 ipi_selected(best_pcpu->pc_cpumask, IPI_AST); 453 return; 454 } 455#if !defined(KSEG_PEEMPT_BEST_CPU) 456 } 457#endif 458 459 if (td->td_priority >= running_thread->td_priority) 460 return; 461#ifdef PREEMPTION 462 463#if !defined(FULL_PREEMPTION) 464 if (td->td_priority > PRI_MAX_ITHD) { 465 running_thread->td_flags |= TDF_NEEDRESCHED; 466 } 467#endif /* ! FULL_PREEMPTION */ 468 469 if (running_thread->td_critnest > 1) 470 running_thread->td_owepreempt = 1; 471 else 472 mi_switch(SW_INVOL, NULL); 473 474#else /* PREEMPTION */ 475 running_thread->td_flags |= TDF_NEEDRESCHED; 476#endif /* PREEMPTION */ 477 return; 478} 479#endif /* !SMP */ 480 481 482int limitcount; 483void 484setrunqueue(struct thread *td, int flags) 485{ 486 struct ksegrp *kg; 487 struct thread *td2; 488 struct thread *tda; 489 490 CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d", 491 td, td->td_ksegrp, td->td_proc->p_pid); 492 CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)", 493 td, td->td_proc->p_comm, td->td_priority, curthread, 494 curthread->td_proc->p_comm); 495 mtx_assert(&sched_lock, MA_OWNED); 496 KASSERT((td->td_inhibitors == 0), 497 ("setrunqueue: trying to run inhibitted thread")); 498 KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), 499 ("setrunqueue: bad thread state")); 500 TD_SET_RUNQ(td); 501 kg = td->td_ksegrp; 502 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) { 503 /* 504 * Common path optimisation: Only one of everything 505 * and the KSE is always already attached. 506 * Totally ignore the ksegrp run queue. 507 */ 508 if (kg->kg_avail_opennings != 1) { 509 if (limitcount < 1) { 510 limitcount++; 511 printf("pid %d: corrected slot count (%d->1)\n", 512 td->td_proc->p_pid, kg->kg_avail_opennings); 513 514 } 515 kg->kg_avail_opennings = 1; 516 } 517 sched_add(td, flags); 518 return; 519 } 520 521 /* 522 * If the concurrency has reduced, and we would go in the 523 * assigned section, then keep removing entries from the 524 * system run queue, until we are not in that section 525 * or there is room for us to be put in that section. 526 * What we MUST avoid is the case where there are threads of less 527 * priority than the new one scheduled, but it can not 528 * be scheduled itself. That would lead to a non contiguous set 529 * of scheduled threads, and everything would break. 530 */ 531 tda = kg->kg_last_assigned; 532 while ((kg->kg_avail_opennings <= 0) && 533 (tda && (tda->td_priority > td->td_priority))) { 534 /* 535 * None free, but there is one we can commandeer. 536 */ 537 CTR2(KTR_RUNQ, 538 "setrunqueue: kg:%p: take slot from td: %p", kg, tda); 539 sched_rem(tda); 540 tda = kg->kg_last_assigned = 541 TAILQ_PREV(tda, threadqueue, td_runq); 542 } 543 544 /* 545 * Add the thread to the ksegrp's run queue at 546 * the appropriate place. 547 */ 548 TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) { 549 if (td2->td_priority > td->td_priority) { 550 TAILQ_INSERT_BEFORE(td2, td, td_runq); 551 break; 552 } 553 } 554 if (td2 == NULL) { 555 /* We ran off the end of the TAILQ or it was empty. */ 556 TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq); 557 } 558 559 /* 560 * If we have a slot to use, then put the thread on the system 561 * run queue and if needed, readjust the last_assigned pointer. 562 * it may be that we need to schedule something anyhow 563 * even if the availabel slots are -ve so that 564 * all the items < last_assigned are scheduled. 565 */ 566 if (kg->kg_avail_opennings > 0) { 567 if (tda == NULL) { 568 /* 569 * No pre-existing last assigned so whoever is first 570 * gets the slot.. (maybe us) 571 */ 572 td2 = TAILQ_FIRST(&kg->kg_runq); 573 kg->kg_last_assigned = td2; 574 } else if (tda->td_priority > td->td_priority) { 575 td2 = td; 576 } else { 577 /* 578 * We are past last_assigned, so 579 * give the next slot to whatever is next, 580 * which may or may not be us. 581 */ 582 td2 = TAILQ_NEXT(tda, td_runq); 583 kg->kg_last_assigned = td2; 584 } 585 sched_add(td2, flags); 586 } else { 587 CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d", 588 td, td->td_ksegrp, td->td_proc->p_pid); 589 if ((flags & SRQ_YIELDING) == 0) 590 maybe_preempt_in_ksegrp(td); 591 } 592} 593 594/* 595 * Kernel thread preemption implementation. Critical sections mark 596 * regions of code in which preemptions are not allowed. 597 */ 598void 599critical_enter(void) 600{ 601 struct thread *td; 602 603 td = curthread; 604 td->td_critnest++; 605 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 606 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 607} 608 609void 610critical_exit(void) 611{ 612 struct thread *td; 613 614 td = curthread; 615 KASSERT(td->td_critnest != 0, 616 ("critical_exit: td_critnest == 0")); 617#ifdef PREEMPTION 618 if (td->td_critnest == 1) { 619 td->td_critnest = 0; 620 mtx_assert(&sched_lock, MA_NOTOWNED); 621 if (td->td_owepreempt) { 622 td->td_critnest = 1; 623 mtx_lock_spin(&sched_lock); 624 td->td_critnest--; 625 mi_switch(SW_INVOL, NULL); 626 mtx_unlock_spin(&sched_lock); 627 } 628 } else 629#endif 630 td->td_critnest--; 631 632 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 633 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 634} 635 636/* 637 * This function is called when a thread is about to be put on run queue 638 * because it has been made runnable or its priority has been adjusted. It 639 * determines if the new thread should be immediately preempted to. If so, 640 * it switches to it and eventually returns true. If not, it returns false 641 * so that the caller may place the thread on an appropriate run queue. 642 */ 643int 644maybe_preempt(struct thread *td) 645{ 646#ifdef PREEMPTION 647 struct thread *ctd; 648 int cpri, pri; 649#endif 650 651 mtx_assert(&sched_lock, MA_OWNED); 652#ifdef PREEMPTION 653 /* 654 * The new thread should not preempt the current thread if any of the 655 * following conditions are true: 656 * 657 * - The kernel is in the throes of crashing (panicstr). 658 * - The current thread has a higher (numerically lower) or 659 * equivalent priority. Note that this prevents curthread from 660 * trying to preempt to itself. 661 * - It is too early in the boot for context switches (cold is set). 662 * - The current thread has an inhibitor set or is in the process of 663 * exiting. In this case, the current thread is about to switch 664 * out anyways, so there's no point in preempting. If we did, 665 * the current thread would not be properly resumed as well, so 666 * just avoid that whole landmine. 667 * - If the new thread's priority is not a realtime priority and 668 * the current thread's priority is not an idle priority and 669 * FULL_PREEMPTION is disabled. 670 * 671 * If all of these conditions are false, but the current thread is in 672 * a nested critical section, then we have to defer the preemption 673 * until we exit the critical section. Otherwise, switch immediately 674 * to the new thread. 675 */ 676 ctd = curthread; 677 KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd), 678 ("thread has no (or wrong) sched-private part.")); 679 KASSERT((td->td_inhibitors == 0), 680 ("maybe_preempt: trying to run inhibitted thread")); 681 pri = td->td_priority; 682 cpri = ctd->td_priority; 683 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 684 TD_IS_INHIBITED(ctd) || td->td_kse->ke_state != KES_THREAD) 685 return (0); 686#ifndef FULL_PREEMPTION 687 if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 688 return (0); 689#endif 690 691 if (ctd->td_critnest > 1) { 692 CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 693 ctd->td_critnest); 694 ctd->td_owepreempt = 1; 695 return (0); 696 } 697 698 /* 699 * Thread is runnable but not yet put on system run queue. 700 */ 701 MPASS(TD_ON_RUNQ(td)); 702 MPASS(td->td_sched->ke_state != KES_ONRUNQ); 703 if (td->td_proc->p_flag & P_HADTHREADS) { 704 /* 705 * If this is a threaded process we actually ARE on the 706 * ksegrp run queue so take it off that first. 707 * Also undo any damage done to the last_assigned pointer. 708 * XXX Fix setrunqueue so this isn't needed 709 */ 710 struct ksegrp *kg; 711 712 kg = td->td_ksegrp; 713 if (kg->kg_last_assigned == td) 714 kg->kg_last_assigned = 715 TAILQ_PREV(td, threadqueue, td_runq); 716 TAILQ_REMOVE(&kg->kg_runq, td, td_runq); 717 } 718 719 TD_SET_RUNNING(td); 720 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 721 td->td_proc->p_pid, td->td_proc->p_comm); 722 mi_switch(SW_INVOL|SW_PREEMPT, td); 723 return (1); 724#else 725 return (0); 726#endif 727} 728 729#if 0 730#ifndef PREEMPTION 731/* XXX: There should be a non-static version of this. */ 732static void 733printf_caddr_t(void *data) 734{ 735 printf("%s", (char *)data); 736} 737static char preempt_warning[] = 738 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 739SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 740 preempt_warning) 741#endif 742#endif 743 744/************************************************************************ 745 * SYSTEM RUN QUEUE manipulations and tests * 746 ************************************************************************/ 747/* 748 * Initialize a run structure. 749 */ 750void 751runq_init(struct runq *rq) 752{ 753 int i; 754 755 bzero(rq, sizeof *rq); 756 for (i = 0; i < RQ_NQS; i++) 757 TAILQ_INIT(&rq->rq_queues[i]); 758} 759 760/* 761 * Clear the status bit of the queue corresponding to priority level pri, 762 * indicating that it is empty. 763 */ 764static __inline void 765runq_clrbit(struct runq *rq, int pri) 766{ 767 struct rqbits *rqb; 768 769 rqb = &rq->rq_status; 770 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 771 rqb->rqb_bits[RQB_WORD(pri)], 772 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 773 RQB_BIT(pri), RQB_WORD(pri)); 774 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 775} 776 777/* 778 * Find the index of the first non-empty run queue. This is done by 779 * scanning the status bits, a set bit indicates a non-empty queue. 780 */ 781static __inline int 782runq_findbit(struct runq *rq) 783{ 784 struct rqbits *rqb; 785 int pri; 786 int i; 787 788 rqb = &rq->rq_status; 789 for (i = 0; i < RQB_LEN; i++) 790 if (rqb->rqb_bits[i]) { 791 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 792 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 793 rqb->rqb_bits[i], i, pri); 794 return (pri); 795 } 796 797 return (-1); 798} 799 800/* 801 * Set the status bit of the queue corresponding to priority level pri, 802 * indicating that it is non-empty. 803 */ 804static __inline void 805runq_setbit(struct runq *rq, int pri) 806{ 807 struct rqbits *rqb; 808 809 rqb = &rq->rq_status; 810 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 811 rqb->rqb_bits[RQB_WORD(pri)], 812 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 813 RQB_BIT(pri), RQB_WORD(pri)); 814 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 815} 816 817/* 818 * Add the KSE to the queue specified by its priority, and set the 819 * corresponding status bit. 820 */ 821void 822runq_add(struct runq *rq, struct kse *ke, int flags) 823{ 824 struct rqhead *rqh; 825 int pri; 826 827 pri = ke->ke_thread->td_priority / RQ_PPQ; 828 ke->ke_rqindex = pri; 829 runq_setbit(rq, pri); 830 rqh = &rq->rq_queues[pri]; 831 CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p", 832 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 833 if (flags & SRQ_PREEMPTED) { 834 TAILQ_INSERT_HEAD(rqh, ke, ke_procq); 835 } else { 836 TAILQ_INSERT_TAIL(rqh, ke, ke_procq); 837 } 838} 839 840/* 841 * Return true if there are runnable processes of any priority on the run 842 * queue, false otherwise. Has no side effects, does not modify the run 843 * queue structure. 844 */ 845int 846runq_check(struct runq *rq) 847{ 848 struct rqbits *rqb; 849 int i; 850 851 rqb = &rq->rq_status; 852 for (i = 0; i < RQB_LEN; i++) 853 if (rqb->rqb_bits[i]) { 854 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 855 rqb->rqb_bits[i], i); 856 return (1); 857 } 858 CTR0(KTR_RUNQ, "runq_check: empty"); 859 860 return (0); 861} 862 863#if defined(SMP) && defined(SCHED_4BSD) 864int runq_fuzz = 1; 865SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 866#endif 867 868/* 869 * Find the highest priority process on the run queue. 870 */ 871struct kse * 872runq_choose(struct runq *rq) 873{ 874 struct rqhead *rqh; 875 struct kse *ke; 876 int pri; 877 878 mtx_assert(&sched_lock, MA_OWNED); 879 while ((pri = runq_findbit(rq)) != -1) { 880 rqh = &rq->rq_queues[pri]; 881#if defined(SMP) && defined(SCHED_4BSD) 882 /* fuzz == 1 is normal.. 0 or less are ignored */ 883 if (runq_fuzz > 1) { 884 /* 885 * In the first couple of entries, check if 886 * there is one for our CPU as a preference. 887 */ 888 int count = runq_fuzz; 889 int cpu = PCPU_GET(cpuid); 890 struct kse *ke2; 891 ke2 = ke = TAILQ_FIRST(rqh); 892 893 while (count-- && ke2) { 894 if (ke->ke_thread->td_lastcpu == cpu) { 895 ke = ke2; 896 break; 897 } 898 ke2 = TAILQ_NEXT(ke2, ke_procq); 899 } 900 } else 901#endif 902 ke = TAILQ_FIRST(rqh); 903 KASSERT(ke != NULL, ("runq_choose: no proc on busy queue")); 904 CTR3(KTR_RUNQ, 905 "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh); 906 return (ke); 907 } 908 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 909 910 return (NULL); 911} 912 913/* 914 * Remove the KSE from the queue specified by its priority, and clear the 915 * corresponding status bit if the queue becomes empty. 916 * Caller must set ke->ke_state afterwards. 917 */ 918void 919runq_remove(struct runq *rq, struct kse *ke) 920{ 921 struct rqhead *rqh; 922 int pri; 923 924 KASSERT(ke->ke_proc->p_sflag & PS_INMEM, 925 ("runq_remove: process swapped out")); 926 pri = ke->ke_rqindex; 927 rqh = &rq->rq_queues[pri]; 928 CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", 929 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); 930 KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); 931 TAILQ_REMOVE(rqh, ke, ke_procq); 932 if (TAILQ_EMPTY(rqh)) { 933 CTR0(KTR_RUNQ, "runq_remove: empty"); 934 runq_clrbit(rq, pri); 935 } 936} 937 938/****** functions that are temporarily here ***********/ 939#include <vm/uma.h> 940extern struct mtx kse_zombie_lock; 941 942/* 943 * Allocate scheduler specific per-process resources. 944 * The thread and ksegrp have already been linked in. 945 * In this case just set the default concurrency value. 946 * 947 * Called from: 948 * proc_init() (UMA init method) 949 */ 950void 951sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) 952{ 953 954 /* This can go in sched_fork */ 955 sched_init_concurrency(kg); 956} 957 958/* 959 * thread is being either created or recycled. 960 * Fix up the per-scheduler resources associated with it. 961 * Called from: 962 * sched_fork_thread() 963 * thread_dtor() (*may go away) 964 * thread_init() (*may go away) 965 */ 966void 967sched_newthread(struct thread *td) 968{ 969 struct td_sched *ke; 970 971 ke = (struct td_sched *) (td + 1); 972 bzero(ke, sizeof(*ke)); 973 td->td_sched = ke; 974 ke->ke_thread = td; 975 ke->ke_state = KES_THREAD; 976} 977 978/* 979 * Set up an initial concurrency of 1 980 * and set the given thread (if given) to be using that 981 * concurrency slot. 982 * May be used "offline"..before the ksegrp is attached to the world 983 * and thus wouldn't need schedlock in that case. 984 * Called from: 985 * thr_create() 986 * proc_init() (UMA) via sched_newproc() 987 */ 988void 989sched_init_concurrency(struct ksegrp *kg) 990{ 991 992 CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg); 993 kg->kg_concurrency = 1; 994 kg->kg_avail_opennings = 1; 995} 996 997/* 998 * Change the concurrency of an existing ksegrp to N 999 * Called from: 1000 * kse_create() 1001 * kse_exit() 1002 * thread_exit() 1003 * thread_single() 1004 */ 1005void 1006sched_set_concurrency(struct ksegrp *kg, int concurrency) 1007{ 1008 1009 CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d", 1010 kg, 1011 concurrency, 1012 kg->kg_avail_opennings, 1013 kg->kg_avail_opennings + (concurrency - kg->kg_concurrency)); 1014 kg->kg_avail_opennings += (concurrency - kg->kg_concurrency); 1015 kg->kg_concurrency = concurrency; 1016} 1017 1018/* 1019 * Called from thread_exit() for all exiting thread 1020 * 1021 * Not to be confused with sched_exit_thread() 1022 * that is only called from thread_exit() for threads exiting 1023 * without the rest of the process exiting because it is also called from 1024 * sched_exit() and we wouldn't want to call it twice. 1025 * XXX This can probably be fixed. 1026 */ 1027void 1028sched_thread_exit(struct thread *td) 1029{ 1030 1031 SLOT_RELEASE(td->td_ksegrp); 1032 slot_fill(td->td_ksegrp); 1033} 1034 1035#endif /* KERN_SWITCH_INCLUDE */ 1036