kern_switch.c revision 166557
1/*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 166557 2007-02-08 01:52:25Z jeff $"); 30 31#include "opt_sched.h" 32 33#ifndef KERN_SWITCH_INCLUDE 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/kdb.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/queue.h> 43#include <sys/sched.h> 44#else /* KERN_SWITCH_INCLUDE */ 45#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46#include <sys/smp.h> 47#endif 48#if defined(SMP) && defined(SCHED_4BSD) 49#include <sys/sysctl.h> 50#endif 51 52/* Uncomment this to enable logging of critical_enter/exit. */ 53#if 0 54#define KTR_CRITICAL KTR_SCHED 55#else 56#define KTR_CRITICAL 0 57#endif 58 59#ifdef FULL_PREEMPTION 60#ifndef PREEMPTION 61#error "The FULL_PREEMPTION option requires the PREEMPTION option" 62#endif 63#endif 64 65CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 66 67/* 68 * kern.sched.preemption allows user space to determine if preemption support 69 * is compiled in or not. It is not currently a boot or runtime flag that 70 * can be changed. 71 */ 72#ifdef PREEMPTION 73static int kern_sched_preemption = 1; 74#else 75static int kern_sched_preemption = 0; 76#endif 77SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 78 &kern_sched_preemption, 0, "Kernel preemption enabled"); 79 80/************************************************************************ 81 * Functions that manipulate runnability from a thread perspective. * 82 ************************************************************************/ 83/* 84 * Select the thread that will be run next. 85 */ 86struct thread * 87choosethread(void) 88{ 89 struct thread *td; 90 91#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 92 if (smp_active == 0 && PCPU_GET(cpuid) != 0) { 93 /* Shutting down, run idlethread on AP's */ 94 td = PCPU_GET(idlethread); 95 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); 96 TD_SET_RUNNING(td); 97 return (td); 98 } 99#endif 100 101retry: 102 td = sched_choose(); 103 104 /* 105 * If we are in panic, only allow system threads, 106 * plus the one we are running in, to be run. 107 */ 108 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 109 (td->td_flags & TDF_INPANIC) == 0)) { 110 /* note that it is no longer on the run queue */ 111 TD_SET_CAN_RUN(td); 112 goto retry; 113 } 114 115 TD_SET_RUNNING(td); 116 return (td); 117} 118 119/* 120 * Kernel thread preemption implementation. Critical sections mark 121 * regions of code in which preemptions are not allowed. 122 */ 123void 124critical_enter(void) 125{ 126 struct thread *td; 127 128 td = curthread; 129 td->td_critnest++; 130 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 131 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 132} 133 134void 135critical_exit(void) 136{ 137 struct thread *td; 138 139 td = curthread; 140 KASSERT(td->td_critnest != 0, 141 ("critical_exit: td_critnest == 0")); 142#ifdef PREEMPTION 143 if (td->td_critnest == 1) { 144 td->td_critnest = 0; 145 mtx_assert(&sched_lock, MA_NOTOWNED); 146 if (td->td_owepreempt) { 147 td->td_critnest = 1; 148 mtx_lock_spin(&sched_lock); 149 td->td_critnest--; 150 mi_switch(SW_INVOL, NULL); 151 mtx_unlock_spin(&sched_lock); 152 } 153 } else 154#endif 155 td->td_critnest--; 156 157 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 158 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest); 159} 160 161/* 162 * This function is called when a thread is about to be put on run queue 163 * because it has been made runnable or its priority has been adjusted. It 164 * determines if the new thread should be immediately preempted to. If so, 165 * it switches to it and eventually returns true. If not, it returns false 166 * so that the caller may place the thread on an appropriate run queue. 167 */ 168int 169maybe_preempt(struct thread *td) 170{ 171#ifdef PREEMPTION 172 struct thread *ctd; 173 int cpri, pri; 174#endif 175 176 mtx_assert(&sched_lock, MA_OWNED); 177#ifdef PREEMPTION 178 /* 179 * The new thread should not preempt the current thread if any of the 180 * following conditions are true: 181 * 182 * - The kernel is in the throes of crashing (panicstr). 183 * - The current thread has a higher (numerically lower) or 184 * equivalent priority. Note that this prevents curthread from 185 * trying to preempt to itself. 186 * - It is too early in the boot for context switches (cold is set). 187 * - The current thread has an inhibitor set or is in the process of 188 * exiting. In this case, the current thread is about to switch 189 * out anyways, so there's no point in preempting. If we did, 190 * the current thread would not be properly resumed as well, so 191 * just avoid that whole landmine. 192 * - If the new thread's priority is not a realtime priority and 193 * the current thread's priority is not an idle priority and 194 * FULL_PREEMPTION is disabled. 195 * 196 * If all of these conditions are false, but the current thread is in 197 * a nested critical section, then we have to defer the preemption 198 * until we exit the critical section. Otherwise, switch immediately 199 * to the new thread. 200 */ 201 ctd = curthread; 202 KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd), 203 ("thread has no (or wrong) sched-private part.")); 204 KASSERT((td->td_inhibitors == 0), 205 ("maybe_preempt: trying to run inhibited thread")); 206 pri = td->td_priority; 207 cpri = ctd->td_priority; 208 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ || 209 TD_IS_INHIBITED(ctd)) 210 return (0); 211#ifndef FULL_PREEMPTION 212 if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE) 213 return (0); 214#endif 215 216 if (ctd->td_critnest > 1) { 217 CTR1(KTR_PROC, "maybe_preempt: in critical section %d", 218 ctd->td_critnest); 219 ctd->td_owepreempt = 1; 220 return (0); 221 } 222 223 /* 224 * Thread is runnable but not yet put on system run queue. 225 */ 226 MPASS(TD_ON_RUNQ(td)); 227 TD_SET_RUNNING(td); 228 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td, 229 td->td_proc->p_pid, td->td_proc->p_comm); 230 mi_switch(SW_INVOL|SW_PREEMPT, td); 231 return (1); 232#else 233 return (0); 234#endif 235} 236 237#if 0 238#ifndef PREEMPTION 239/* XXX: There should be a non-static version of this. */ 240static void 241printf_caddr_t(void *data) 242{ 243 printf("%s", (char *)data); 244} 245static char preempt_warning[] = 246 "WARNING: Kernel preemption is disabled, expect reduced performance.\n"; 247SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t, 248 preempt_warning) 249#endif 250#endif 251 252/************************************************************************ 253 * SYSTEM RUN QUEUE manipulations and tests * 254 ************************************************************************/ 255/* 256 * Initialize a run structure. 257 */ 258void 259runq_init(struct runq *rq) 260{ 261 int i; 262 263 bzero(rq, sizeof *rq); 264 for (i = 0; i < RQ_NQS; i++) 265 TAILQ_INIT(&rq->rq_queues[i]); 266} 267 268/* 269 * Clear the status bit of the queue corresponding to priority level pri, 270 * indicating that it is empty. 271 */ 272static __inline void 273runq_clrbit(struct runq *rq, int pri) 274{ 275 struct rqbits *rqb; 276 277 rqb = &rq->rq_status; 278 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 279 rqb->rqb_bits[RQB_WORD(pri)], 280 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 281 RQB_BIT(pri), RQB_WORD(pri)); 282 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 283} 284 285/* 286 * Find the index of the first non-empty run queue. This is done by 287 * scanning the status bits, a set bit indicates a non-empty queue. 288 */ 289static __inline int 290runq_findbit(struct runq *rq) 291{ 292 struct rqbits *rqb; 293 int pri; 294 int i; 295 296 rqb = &rq->rq_status; 297 for (i = 0; i < RQB_LEN; i++) 298 if (rqb->rqb_bits[i]) { 299 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 300 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 301 rqb->rqb_bits[i], i, pri); 302 return (pri); 303 } 304 305 return (-1); 306} 307 308static __inline int 309runq_findbit_from(struct runq *rq, u_char start) 310{ 311 struct rqbits *rqb; 312 int bit; 313 int pri; 314 int i; 315 316 rqb = &rq->rq_status; 317 bit = start & (RQB_BPW -1); 318 pri = 0; 319 CTR1(KTR_RUNQ, "runq_findbit_from: start %d", start); 320again: 321 for (i = RQB_WORD(start); i < RQB_LEN; i++) { 322 CTR3(KTR_RUNQ, "runq_findbit_from: bits %d = %#x bit = %d", 323 i, rqb->rqb_bits[i], bit); 324 if (rqb->rqb_bits[i]) { 325 if (bit != 0) { 326 for (pri = bit; pri < RQB_BPW; pri++) 327 if (rqb->rqb_bits[i] & (1ul << pri)) 328 break; 329 bit = 0; 330 if (pri >= RQB_BPW) 331 continue; 332 } else 333 pri = RQB_FFS(rqb->rqb_bits[i]); 334 pri += (i << RQB_L2BPW); 335 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 336 rqb->rqb_bits[i], i, pri); 337 return (pri); 338 } 339 bit = 0; 340 } 341 if (start != 0) { 342 CTR0(KTR_RUNQ, "runq_findbit_from: restarting"); 343 start = 0; 344 goto again; 345 } 346 347 return (-1); 348} 349 350/* 351 * Set the status bit of the queue corresponding to priority level pri, 352 * indicating that it is non-empty. 353 */ 354static __inline void 355runq_setbit(struct runq *rq, int pri) 356{ 357 struct rqbits *rqb; 358 359 rqb = &rq->rq_status; 360 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 361 rqb->rqb_bits[RQB_WORD(pri)], 362 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 363 RQB_BIT(pri), RQB_WORD(pri)); 364 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 365} 366 367/* 368 * Add the thread to the queue specified by its priority, and set the 369 * corresponding status bit. 370 */ 371void 372runq_add(struct runq *rq, struct td_sched *ts, int flags) 373{ 374 struct rqhead *rqh; 375 int pri; 376 377 pri = ts->ts_thread->td_priority / RQ_PPQ; 378 ts->ts_rqindex = pri; 379 runq_setbit(rq, pri); 380 rqh = &rq->rq_queues[pri]; 381 CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 382 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 383 if (flags & SRQ_PREEMPTED) { 384 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 385 } else { 386 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 387 } 388} 389 390void 391runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags) 392{ 393 struct rqhead *rqh; 394 395 KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 396 ts->ts_rqindex = pri; 397 runq_setbit(rq, pri); 398 rqh = &rq->rq_queues[pri]; 399 CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 400 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 401 if (flags & SRQ_PREEMPTED) { 402 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 403 } else { 404 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 405 } 406} 407/* 408 * Return true if there are runnable processes of any priority on the run 409 * queue, false otherwise. Has no side effects, does not modify the run 410 * queue structure. 411 */ 412int 413runq_check(struct runq *rq) 414{ 415 struct rqbits *rqb; 416 int i; 417 418 rqb = &rq->rq_status; 419 for (i = 0; i < RQB_LEN; i++) 420 if (rqb->rqb_bits[i]) { 421 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 422 rqb->rqb_bits[i], i); 423 return (1); 424 } 425 CTR0(KTR_RUNQ, "runq_check: empty"); 426 427 return (0); 428} 429 430#if defined(SMP) && defined(SCHED_4BSD) 431int runq_fuzz = 1; 432SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); 433#endif 434 435/* 436 * Find the highest priority process on the run queue. 437 */ 438struct td_sched * 439runq_choose(struct runq *rq) 440{ 441 struct rqhead *rqh; 442 struct td_sched *ts; 443 int pri; 444 445 mtx_assert(&sched_lock, MA_OWNED); 446 while ((pri = runq_findbit(rq)) != -1) { 447 rqh = &rq->rq_queues[pri]; 448#if defined(SMP) && defined(SCHED_4BSD) 449 /* fuzz == 1 is normal.. 0 or less are ignored */ 450 if (runq_fuzz > 1) { 451 /* 452 * In the first couple of entries, check if 453 * there is one for our CPU as a preference. 454 */ 455 int count = runq_fuzz; 456 int cpu = PCPU_GET(cpuid); 457 struct td_sched *ts2; 458 ts2 = ts = TAILQ_FIRST(rqh); 459 460 while (count-- && ts2) { 461 if (ts->ts_thread->td_lastcpu == cpu) { 462 ts = ts2; 463 break; 464 } 465 ts2 = TAILQ_NEXT(ts2, ts_procq); 466 } 467 } else 468#endif 469 ts = TAILQ_FIRST(rqh); 470 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 471 CTR3(KTR_RUNQ, 472 "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 473 return (ts); 474 } 475 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 476 477 return (NULL); 478} 479 480struct td_sched * 481runq_choose_from(struct runq *rq, u_char idx) 482{ 483 struct rqhead *rqh; 484 struct td_sched *ts; 485 int pri; 486 487 mtx_assert(&sched_lock, MA_OWNED); 488 if ((pri = runq_findbit_from(rq, idx)) != -1) { 489 rqh = &rq->rq_queues[pri]; 490 ts = TAILQ_FIRST(rqh); 491 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 492 CTR4(KTR_RUNQ, 493 "runq_choose_from: pri=%d kse=%p idx=%d rqh=%p", 494 pri, ts, ts->ts_rqindex, rqh); 495 return (ts); 496 } 497 CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 498 499 return (NULL); 500} 501/* 502 * Remove the thread from the queue specified by its priority, and clear the 503 * corresponding status bit if the queue becomes empty. 504 * Caller must set state afterwards. 505 */ 506void 507runq_remove(struct runq *rq, struct td_sched *ts) 508{ 509 510 runq_remove_idx(rq, ts, NULL); 511} 512 513void 514runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) 515{ 516 struct rqhead *rqh; 517 u_char pri; 518 519 KASSERT(ts->ts_thread->td_proc->p_sflag & PS_INMEM, 520 ("runq_remove_idx: process swapped out")); 521 pri = ts->ts_rqindex; 522 rqh = &rq->rq_queues[pri]; 523 CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 524 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 525 TAILQ_REMOVE(rqh, ts, ts_procq); 526 if (TAILQ_EMPTY(rqh)) { 527 CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 528 runq_clrbit(rq, pri); 529 if (idx != NULL && *idx == pri) 530 *idx = (pri + 1) % RQ_NQS; 531 } 532} 533 534/****** functions that are temporarily here ***********/ 535#include <vm/uma.h> 536extern struct mtx kse_zombie_lock; 537 538/* 539 * Allocate scheduler specific per-process resources. 540 * The thread and proc have already been linked in. 541 * 542 * Called from: 543 * proc_init() (UMA init method) 544 */ 545void 546sched_newproc(struct proc *p, struct thread *td) 547{ 548} 549 550/* 551 * thread is being either created or recycled. 552 * Fix up the per-scheduler resources associated with it. 553 * Called from: 554 * sched_fork_thread() 555 * thread_dtor() (*may go away) 556 * thread_init() (*may go away) 557 */ 558void 559sched_newthread(struct thread *td) 560{ 561 struct td_sched *ts; 562 563 ts = (struct td_sched *) (td + 1); 564 bzero(ts, sizeof(*ts)); 565 td->td_sched = ts; 566 ts->ts_thread = td; 567} 568 569/* 570 * Called from: 571 * thr_create() 572 * proc_init() (UMA) via sched_newproc() 573 */ 574void 575sched_init_concurrency(struct proc *p) 576{ 577} 578 579/* 580 * Change the concurrency of an existing proc to N 581 * Called from: 582 * kse_create() 583 * kse_exit() 584 * thread_exit() 585 * thread_single() 586 */ 587void 588sched_set_concurrency(struct proc *p, int concurrency) 589{ 590} 591 592/* 593 * Called from thread_exit() for all exiting thread 594 * 595 * Not to be confused with sched_exit_thread() 596 * that is only called from thread_exit() for threads exiting 597 * without the rest of the process exiting because it is also called from 598 * sched_exit() and we wouldn't want to call it twice. 599 * XXX This can probably be fixed. 600 */ 601void 602sched_thread_exit(struct thread *td) 603{ 604} 605 606#endif /* KERN_SWITCH_INCLUDE */ 607