kern_switch.c revision 177419
1/*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 177419 2008-03-20 02:14:02Z jeff $"); 30 31#include "opt_sched.h" 32 33#ifndef KERN_SWITCH_INCLUDE 34#include <sys/param.h> 35#include <sys/systm.h> 36#include <sys/kdb.h> 37#include <sys/kernel.h> 38#include <sys/ktr.h> 39#include <sys/lock.h> 40#include <sys/mutex.h> 41#include <sys/proc.h> 42#include <sys/queue.h> 43#include <sys/sched.h> 44#else /* KERN_SWITCH_INCLUDE */ 45#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) 46#include <sys/smp.h> 47#endif 48 49#include <machine/cpu.h> 50 51/* Uncomment this to enable logging of critical_enter/exit. */ 52#if 0 53#define KTR_CRITICAL KTR_SCHED 54#else 55#define KTR_CRITICAL 0 56#endif 57 58#ifdef FULL_PREEMPTION 59#ifndef PREEMPTION 60#error "The FULL_PREEMPTION option requires the PREEMPTION option" 61#endif 62#endif 63 64CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 65 66/* 67 * kern.sched.preemption allows user space to determine if preemption support 68 * is compiled in or not. It is not currently a boot or runtime flag that 69 * can be changed. 70 */ 71#ifdef PREEMPTION 72static int kern_sched_preemption = 1; 73#else 74static int kern_sched_preemption = 0; 75#endif 76SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 77 &kern_sched_preemption, 0, "Kernel preemption enabled"); 78 79#ifdef SCHED_STATS 80long switch_preempt; 81long switch_owepreempt; 82long switch_turnstile; 83long switch_sleepq; 84long switch_sleepqtimo; 85long switch_relinquish; 86long switch_needresched; 87static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 88SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); 89SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); 90SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); 91SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); 92SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); 93SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); 94SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); 95static int 96sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 97{ 98 int error; 99 int val; 100 101 val = 0; 102 error = sysctl_handle_int(oidp, &val, 0, req); 103 if (error != 0 || req->newptr == NULL) 104 return (error); 105 if (val == 0) 106 return (0); 107 switch_preempt = 0; 108 switch_owepreempt = 0; 109 switch_turnstile = 0; 110 switch_sleepq = 0; 111 switch_sleepqtimo = 0; 112 switch_relinquish = 0; 113 switch_needresched = 0; 114 115 return (0); 116} 117 118SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 119 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 120#endif 121 122/************************************************************************ 123 * Functions that manipulate runnability from a thread perspective. * 124 ************************************************************************/ 125/* 126 * Select the thread that will be run next. 127 */ 128struct thread * 129choosethread(void) 130{ 131 struct thread *td; 132 133retry: 134 td = sched_choose(); 135 136 /* 137 * If we are in panic, only allow system threads, 138 * plus the one we are running in, to be run. 139 */ 140 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 141 (td->td_flags & TDF_INPANIC) == 0)) { 142 /* note that it is no longer on the run queue */ 143 TD_SET_CAN_RUN(td); 144 goto retry; 145 } 146 147 TD_SET_RUNNING(td); 148 return (td); 149} 150 151/* 152 * Kernel thread preemption implementation. Critical sections mark 153 * regions of code in which preemptions are not allowed. 154 */ 155void 156critical_enter(void) 157{ 158 struct thread *td; 159 160 td = curthread; 161 td->td_critnest++; 162 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 163 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 164} 165 166void 167critical_exit(void) 168{ 169 struct thread *td; 170 171 td = curthread; 172 KASSERT(td->td_critnest != 0, 173 ("critical_exit: td_critnest == 0")); 174 175 if (td->td_critnest == 1) { 176 td->td_critnest = 0; 177 if (td->td_owepreempt) { 178 td->td_critnest = 1; 179 thread_lock(td); 180 td->td_critnest--; 181 SCHED_STAT_INC(switch_owepreempt); 182 mi_switch(SW_INVOL|SW_PREEMPT, NULL); 183 thread_unlock(td); 184 } 185 } else 186 td->td_critnest--; 187 188 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 189 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 190} 191 192/************************************************************************ 193 * SYSTEM RUN QUEUE manipulations and tests * 194 ************************************************************************/ 195/* 196 * Initialize a run structure. 197 */ 198void 199runq_init(struct runq *rq) 200{ 201 int i; 202 203 bzero(rq, sizeof *rq); 204 for (i = 0; i < RQ_NQS; i++) 205 TAILQ_INIT(&rq->rq_queues[i]); 206} 207 208/* 209 * Clear the status bit of the queue corresponding to priority level pri, 210 * indicating that it is empty. 211 */ 212static __inline void 213runq_clrbit(struct runq *rq, int pri) 214{ 215 struct rqbits *rqb; 216 217 rqb = &rq->rq_status; 218 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 219 rqb->rqb_bits[RQB_WORD(pri)], 220 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 221 RQB_BIT(pri), RQB_WORD(pri)); 222 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 223} 224 225/* 226 * Find the index of the first non-empty run queue. This is done by 227 * scanning the status bits, a set bit indicates a non-empty queue. 228 */ 229static __inline int 230runq_findbit(struct runq *rq) 231{ 232 struct rqbits *rqb; 233 int pri; 234 int i; 235 236 rqb = &rq->rq_status; 237 for (i = 0; i < RQB_LEN; i++) 238 if (rqb->rqb_bits[i]) { 239 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 240 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 241 rqb->rqb_bits[i], i, pri); 242 return (pri); 243 } 244 245 return (-1); 246} 247 248static __inline int 249runq_findbit_from(struct runq *rq, u_char pri) 250{ 251 struct rqbits *rqb; 252 rqb_word_t mask; 253 int i; 254 255 /* 256 * Set the mask for the first word so we ignore priorities before 'pri'. 257 */ 258 mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 259 rqb = &rq->rq_status; 260again: 261 for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 262 mask = rqb->rqb_bits[i] & mask; 263 if (mask == 0) 264 continue; 265 pri = RQB_FFS(mask) + (i << RQB_L2BPW); 266 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 267 mask, i, pri); 268 return (pri); 269 } 270 if (pri == 0) 271 return (-1); 272 /* 273 * Wrap back around to the beginning of the list just once so we 274 * scan the whole thing. 275 */ 276 pri = 0; 277 goto again; 278} 279 280/* 281 * Set the status bit of the queue corresponding to priority level pri, 282 * indicating that it is non-empty. 283 */ 284static __inline void 285runq_setbit(struct runq *rq, int pri) 286{ 287 struct rqbits *rqb; 288 289 rqb = &rq->rq_status; 290 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 291 rqb->rqb_bits[RQB_WORD(pri)], 292 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 293 RQB_BIT(pri), RQB_WORD(pri)); 294 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 295} 296 297/* 298 * Add the thread to the queue specified by its priority, and set the 299 * corresponding status bit. 300 */ 301void 302runq_add(struct runq *rq, struct td_sched *ts, int flags) 303{ 304 struct rqhead *rqh; 305 int pri; 306 307 pri = ts->ts_thread->td_priority / RQ_PPQ; 308 ts->ts_rqindex = pri; 309 runq_setbit(rq, pri); 310 rqh = &rq->rq_queues[pri]; 311 CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p", 312 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 313 if (flags & SRQ_PREEMPTED) { 314 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 315 } else { 316 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 317 } 318} 319 320void 321runq_add_pri(struct runq *rq, struct td_sched *ts, u_char pri, int flags) 322{ 323 struct rqhead *rqh; 324 325 KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 326 ts->ts_rqindex = pri; 327 runq_setbit(rq, pri); 328 rqh = &rq->rq_queues[pri]; 329 CTR5(KTR_RUNQ, "runq_add_pri: td=%p ke=%p pri=%d idx=%d rqh=%p", 330 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 331 if (flags & SRQ_PREEMPTED) { 332 TAILQ_INSERT_HEAD(rqh, ts, ts_procq); 333 } else { 334 TAILQ_INSERT_TAIL(rqh, ts, ts_procq); 335 } 336} 337/* 338 * Return true if there are runnable processes of any priority on the run 339 * queue, false otherwise. Has no side effects, does not modify the run 340 * queue structure. 341 */ 342int 343runq_check(struct runq *rq) 344{ 345 struct rqbits *rqb; 346 int i; 347 348 rqb = &rq->rq_status; 349 for (i = 0; i < RQB_LEN; i++) 350 if (rqb->rqb_bits[i]) { 351 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 352 rqb->rqb_bits[i], i); 353 return (1); 354 } 355 CTR0(KTR_RUNQ, "runq_check: empty"); 356 357 return (0); 358} 359 360/* 361 * Find the highest priority process on the run queue. 362 */ 363struct td_sched * 364runq_choose_fuzz(struct runq *rq, int fuzz) 365{ 366 struct rqhead *rqh; 367 struct td_sched *ts; 368 int pri; 369 370 while ((pri = runq_findbit(rq)) != -1) { 371 rqh = &rq->rq_queues[pri]; 372 /* fuzz == 1 is normal.. 0 or less are ignored */ 373 if (fuzz > 1) { 374 /* 375 * In the first couple of entries, check if 376 * there is one for our CPU as a preference. 377 */ 378 int count = fuzz; 379 int cpu = PCPU_GET(cpuid); 380 struct td_sched *ts2; 381 ts2 = ts = TAILQ_FIRST(rqh); 382 383 while (count-- && ts2) { 384 if (ts->ts_thread->td_lastcpu == cpu) { 385 ts = ts2; 386 break; 387 } 388 ts2 = TAILQ_NEXT(ts2, ts_procq); 389 } 390 } else 391 ts = TAILQ_FIRST(rqh); 392 KASSERT(ts != NULL, ("runq_choose_fuzz: no proc on busy queue")); 393 CTR3(KTR_RUNQ, 394 "runq_choose_fuzz: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 395 return (ts); 396 } 397 CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); 398 399 return (NULL); 400} 401 402/* 403 * Find the highest priority process on the run queue. 404 */ 405struct td_sched * 406runq_choose(struct runq *rq) 407{ 408 struct rqhead *rqh; 409 struct td_sched *ts; 410 int pri; 411 412 while ((pri = runq_findbit(rq)) != -1) { 413 rqh = &rq->rq_queues[pri]; 414 ts = TAILQ_FIRST(rqh); 415 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 416 CTR3(KTR_RUNQ, 417 "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh); 418 return (ts); 419 } 420 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri); 421 422 return (NULL); 423} 424 425struct td_sched * 426runq_choose_from(struct runq *rq, u_char idx) 427{ 428 struct rqhead *rqh; 429 struct td_sched *ts; 430 int pri; 431 432 if ((pri = runq_findbit_from(rq, idx)) != -1) { 433 rqh = &rq->rq_queues[pri]; 434 ts = TAILQ_FIRST(rqh); 435 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue")); 436 CTR4(KTR_RUNQ, 437 "runq_choose_from: pri=%d td_sched=%p idx=%d rqh=%p", 438 pri, ts, ts->ts_rqindex, rqh); 439 return (ts); 440 } 441 CTR1(KTR_RUNQ, "runq_choose_from: idleproc pri=%d", pri); 442 443 return (NULL); 444} 445/* 446 * Remove the thread from the queue specified by its priority, and clear the 447 * corresponding status bit if the queue becomes empty. 448 * Caller must set state afterwards. 449 */ 450void 451runq_remove(struct runq *rq, struct td_sched *ts) 452{ 453 454 runq_remove_idx(rq, ts, NULL); 455} 456 457void 458runq_remove_idx(struct runq *rq, struct td_sched *ts, u_char *idx) 459{ 460 struct rqhead *rqh; 461 u_char pri; 462 463 KASSERT(ts->ts_thread->td_flags & TDF_INMEM, 464 ("runq_remove_idx: thread swapped out")); 465 pri = ts->ts_rqindex; 466 KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 467 rqh = &rq->rq_queues[pri]; 468 CTR5(KTR_RUNQ, "runq_remove_idx: td=%p, ts=%p pri=%d %d rqh=%p", 469 ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh); 470 { 471 struct td_sched *nts; 472 473 TAILQ_FOREACH(nts, rqh, ts_procq) 474 if (nts == ts) 475 break; 476 if (ts != nts) 477 panic("runq_remove_idx: ts %p not on rqindex %d", 478 ts, pri); 479 } 480 TAILQ_REMOVE(rqh, ts, ts_procq); 481 if (TAILQ_EMPTY(rqh)) { 482 CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 483 runq_clrbit(rq, pri); 484 if (idx != NULL && *idx == pri) 485 *idx = (pri + 1) % RQ_NQS; 486 } 487} 488 489/****** functions that are temporarily here ***********/ 490#include <vm/uma.h> 491 492/* 493 * Allocate scheduler specific per-process resources. 494 * The thread and proc have already been linked in. 495 * 496 * Called from: 497 * proc_init() (UMA init method) 498 */ 499void 500sched_newproc(struct proc *p, struct thread *td) 501{ 502} 503 504/* 505 * thread is being either created or recycled. 506 * Fix up the per-scheduler resources associated with it. 507 * Called from: 508 * sched_fork_thread() 509 * thread_dtor() (*may go away) 510 * thread_init() (*may go away) 511 */ 512void 513sched_newthread(struct thread *td) 514{ 515 struct td_sched *ts; 516 517 ts = (struct td_sched *) (td + 1); 518 bzero(ts, sizeof(*ts)); 519 td->td_sched = ts; 520 ts->ts_thread = td; 521} 522 523#endif /* KERN_SWITCH_INCLUDE */ 524