kern_switch.c revision 177435
1/*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 177435 2008-03-20 05:51:16Z jeff $"); 30 31#include "opt_sched.h" 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/kdb.h> 36#include <sys/kernel.h> 37#include <sys/ktr.h> 38#include <sys/lock.h> 39#include <sys/mutex.h> 40#include <sys/proc.h> 41#include <sys/queue.h> 42#include <sys/sched.h> 43#include <sys/smp.h> 44#include <sys/sysctl.h> 45 46#include <machine/cpu.h> 47 48/* Uncomment this to enable logging of critical_enter/exit. */ 49#if 0 50#define KTR_CRITICAL KTR_SCHED 51#else 52#define KTR_CRITICAL 0 53#endif 54 55#ifdef FULL_PREEMPTION 56#ifndef PREEMPTION 57#error "The FULL_PREEMPTION option requires the PREEMPTION option" 58#endif 59#endif 60 61CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 62 63/* 64 * kern.sched.preemption allows user space to determine if preemption support 65 * is compiled in or not. It is not currently a boot or runtime flag that 66 * can be changed. 67 */ 68#ifdef PREEMPTION 69static int kern_sched_preemption = 1; 70#else 71static int kern_sched_preemption = 0; 72#endif 73SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 74 &kern_sched_preemption, 0, "Kernel preemption enabled"); 75 76#ifdef SCHED_STATS 77long switch_preempt; 78long switch_owepreempt; 79long switch_turnstile; 80long switch_sleepq; 81long switch_sleepqtimo; 82long switch_relinquish; 83long switch_needresched; 84static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 85SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); 86SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); 87SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); 88SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); 89SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); 90SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); 91SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); 92static int 93sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 94{ 95 int error; 96 int val; 97 98 val = 0; 99 error = sysctl_handle_int(oidp, &val, 0, req); 100 if (error != 0 || req->newptr == NULL) 101 return (error); 102 if (val == 0) 103 return (0); 104 switch_preempt = 0; 105 switch_owepreempt = 0; 106 switch_turnstile = 0; 107 switch_sleepq = 0; 108 switch_sleepqtimo = 0; 109 switch_relinquish = 0; 110 switch_needresched = 0; 111 112 return (0); 113} 114 115SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 116 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 117#endif 118 119/************************************************************************ 120 * Functions that manipulate runnability from a thread perspective. * 121 ************************************************************************/ 122/* 123 * Select the thread that will be run next. 124 */ 125struct thread * 126choosethread(void) 127{ 128 struct thread *td; 129 130retry: 131 td = sched_choose(); 132 133 /* 134 * If we are in panic, only allow system threads, 135 * plus the one we are running in, to be run. 136 */ 137 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 138 (td->td_flags & TDF_INPANIC) == 0)) { 139 /* note that it is no longer on the run queue */ 140 TD_SET_CAN_RUN(td); 141 goto retry; 142 } 143 144 TD_SET_RUNNING(td); 145 return (td); 146} 147 148/* 149 * Kernel thread preemption implementation. Critical sections mark 150 * regions of code in which preemptions are not allowed. 151 */ 152void 153critical_enter(void) 154{ 155 struct thread *td; 156 157 td = curthread; 158 td->td_critnest++; 159 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 160 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 161} 162 163void 164critical_exit(void) 165{ 166 struct thread *td; 167 168 td = curthread; 169 KASSERT(td->td_critnest != 0, 170 ("critical_exit: td_critnest == 0")); 171 172 if (td->td_critnest == 1) { 173 td->td_critnest = 0; 174 if (td->td_owepreempt) { 175 td->td_critnest = 1; 176 thread_lock(td); 177 td->td_critnest--; 178 SCHED_STAT_INC(switch_owepreempt); 179 mi_switch(SW_INVOL|SW_PREEMPT, NULL); 180 thread_unlock(td); 181 } 182 } else 183 td->td_critnest--; 184 185 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 186 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 187} 188 189/************************************************************************ 190 * SYSTEM RUN QUEUE manipulations and tests * 191 ************************************************************************/ 192/* 193 * Initialize a run structure. 194 */ 195void 196runq_init(struct runq *rq) 197{ 198 int i; 199 200 bzero(rq, sizeof *rq); 201 for (i = 0; i < RQ_NQS; i++) 202 TAILQ_INIT(&rq->rq_queues[i]); 203} 204 205/* 206 * Clear the status bit of the queue corresponding to priority level pri, 207 * indicating that it is empty. 208 */ 209static __inline void 210runq_clrbit(struct runq *rq, int pri) 211{ 212 struct rqbits *rqb; 213 214 rqb = &rq->rq_status; 215 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 216 rqb->rqb_bits[RQB_WORD(pri)], 217 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 218 RQB_BIT(pri), RQB_WORD(pri)); 219 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 220} 221 222/* 223 * Find the index of the first non-empty run queue. This is done by 224 * scanning the status bits, a set bit indicates a non-empty queue. 225 */ 226static __inline int 227runq_findbit(struct runq *rq) 228{ 229 struct rqbits *rqb; 230 int pri; 231 int i; 232 233 rqb = &rq->rq_status; 234 for (i = 0; i < RQB_LEN; i++) 235 if (rqb->rqb_bits[i]) { 236 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 237 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 238 rqb->rqb_bits[i], i, pri); 239 return (pri); 240 } 241 242 return (-1); 243} 244 245static __inline int 246runq_findbit_from(struct runq *rq, u_char pri) 247{ 248 struct rqbits *rqb; 249 rqb_word_t mask; 250 int i; 251 252 /* 253 * Set the mask for the first word so we ignore priorities before 'pri'. 254 */ 255 mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 256 rqb = &rq->rq_status; 257again: 258 for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 259 mask = rqb->rqb_bits[i] & mask; 260 if (mask == 0) 261 continue; 262 pri = RQB_FFS(mask) + (i << RQB_L2BPW); 263 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 264 mask, i, pri); 265 return (pri); 266 } 267 if (pri == 0) 268 return (-1); 269 /* 270 * Wrap back around to the beginning of the list just once so we 271 * scan the whole thing. 272 */ 273 pri = 0; 274 goto again; 275} 276 277/* 278 * Set the status bit of the queue corresponding to priority level pri, 279 * indicating that it is non-empty. 280 */ 281static __inline void 282runq_setbit(struct runq *rq, int pri) 283{ 284 struct rqbits *rqb; 285 286 rqb = &rq->rq_status; 287 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 288 rqb->rqb_bits[RQB_WORD(pri)], 289 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 290 RQB_BIT(pri), RQB_WORD(pri)); 291 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 292} 293 294/* 295 * Add the thread to the queue specified by its priority, and set the 296 * corresponding status bit. 297 */ 298void 299runq_add(struct runq *rq, struct thread *td, int flags) 300{ 301 struct rqhead *rqh; 302 int pri; 303 304 pri = td->td_priority / RQ_PPQ; 305 td->td_rqindex = pri; 306 runq_setbit(rq, pri); 307 rqh = &rq->rq_queues[pri]; 308 CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p", 309 td, td->td_priority, pri, rqh); 310 if (flags & SRQ_PREEMPTED) { 311 TAILQ_INSERT_HEAD(rqh, td, td_runq); 312 } else { 313 TAILQ_INSERT_TAIL(rqh, td, td_runq); 314 } 315} 316 317void 318runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags) 319{ 320 struct rqhead *rqh; 321 322 KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 323 td->td_rqindex = pri; 324 runq_setbit(rq, pri); 325 rqh = &rq->rq_queues[pri]; 326 CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p", 327 td, td->td_priority, pri, rqh); 328 if (flags & SRQ_PREEMPTED) { 329 TAILQ_INSERT_HEAD(rqh, td, td_runq); 330 } else { 331 TAILQ_INSERT_TAIL(rqh, td, td_runq); 332 } 333} 334/* 335 * Return true if there are runnable processes of any priority on the run 336 * queue, false otherwise. Has no side effects, does not modify the run 337 * queue structure. 338 */ 339int 340runq_check(struct runq *rq) 341{ 342 struct rqbits *rqb; 343 int i; 344 345 rqb = &rq->rq_status; 346 for (i = 0; i < RQB_LEN; i++) 347 if (rqb->rqb_bits[i]) { 348 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 349 rqb->rqb_bits[i], i); 350 return (1); 351 } 352 CTR0(KTR_RUNQ, "runq_check: empty"); 353 354 return (0); 355} 356 357/* 358 * Find the highest priority process on the run queue. 359 */ 360struct thread * 361runq_choose_fuzz(struct runq *rq, int fuzz) 362{ 363 struct rqhead *rqh; 364 struct thread *td; 365 int pri; 366 367 while ((pri = runq_findbit(rq)) != -1) { 368 rqh = &rq->rq_queues[pri]; 369 /* fuzz == 1 is normal.. 0 or less are ignored */ 370 if (fuzz > 1) { 371 /* 372 * In the first couple of entries, check if 373 * there is one for our CPU as a preference. 374 */ 375 int count = fuzz; 376 int cpu = PCPU_GET(cpuid); 377 struct thread *td2; 378 td2 = td = TAILQ_FIRST(rqh); 379 380 while (count-- && td2) { 381 if (td->td_lastcpu == cpu) { 382 td = td2; 383 break; 384 } 385 td2 = TAILQ_NEXT(td2, td_runq); 386 } 387 } else 388 td = TAILQ_FIRST(rqh); 389 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue")); 390 CTR3(KTR_RUNQ, 391 "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh); 392 return (td); 393 } 394 CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); 395 396 return (NULL); 397} 398 399/* 400 * Find the highest priority process on the run queue. 401 */ 402struct thread * 403runq_choose(struct runq *rq) 404{ 405 struct rqhead *rqh; 406 struct thread *td; 407 int pri; 408 409 while ((pri = runq_findbit(rq)) != -1) { 410 rqh = &rq->rq_queues[pri]; 411 td = TAILQ_FIRST(rqh); 412 KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 413 CTR3(KTR_RUNQ, 414 "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh); 415 return (td); 416 } 417 CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri); 418 419 return (NULL); 420} 421 422struct thread * 423runq_choose_from(struct runq *rq, u_char idx) 424{ 425 struct rqhead *rqh; 426 struct thread *td; 427 int pri; 428 429 if ((pri = runq_findbit_from(rq, idx)) != -1) { 430 rqh = &rq->rq_queues[pri]; 431 td = TAILQ_FIRST(rqh); 432 KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 433 CTR4(KTR_RUNQ, 434 "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p", 435 pri, td, td->td_rqindex, rqh); 436 return (td); 437 } 438 CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri); 439 440 return (NULL); 441} 442/* 443 * Remove the thread from the queue specified by its priority, and clear the 444 * corresponding status bit if the queue becomes empty. 445 * Caller must set state afterwards. 446 */ 447void 448runq_remove(struct runq *rq, struct thread *td) 449{ 450 451 runq_remove_idx(rq, td, NULL); 452} 453 454void 455runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx) 456{ 457 struct rqhead *rqh; 458 u_char pri; 459 460 KASSERT(td->td_flags & TDF_INMEM, 461 ("runq_remove_idx: thread swapped out")); 462 pri = td->td_rqindex; 463 KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 464 rqh = &rq->rq_queues[pri]; 465 CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p", 466 td, td->td_priority, pri, rqh); 467 TAILQ_REMOVE(rqh, td, td_runq); 468 if (TAILQ_EMPTY(rqh)) { 469 CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 470 runq_clrbit(rq, pri); 471 if (idx != NULL && *idx == pri) 472 *idx = (pri + 1) % RQ_NQS; 473 } 474} 475