1139804Simp/*-
272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
372376Sjake * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter */
2650027Speter
2799072Sjulian
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD$");
30116182Sobrien
31134591Sjulian#include "opt_sched.h"
32131481Sjhb
3350027Speter#include <sys/param.h>
3450027Speter#include <sys/systm.h>
35131927Smarcel#include <sys/kdb.h>
3650027Speter#include <sys/kernel.h>
3765557Sjasone#include <sys/ktr.h>
3874914Sjhb#include <sys/lock.h>
3967365Sjhb#include <sys/mutex.h>
4050027Speter#include <sys/proc.h>
4150027Speter#include <sys/queue.h>
42104964Sjeff#include <sys/sched.h>
43112993Speter#include <sys/smp.h>
44177435Sjeff#include <sys/sysctl.h>
4550027Speter
46170293Sjeff#include <machine/cpu.h>
47170293Sjeff
48153510Snjl/* Uncomment this to enable logging of critical_enter/exit. */
49153510Snjl#if 0
50153510Snjl#define	KTR_CRITICAL	KTR_SCHED
51153510Snjl#else
52153510Snjl#define	KTR_CRITICAL	0
53153510Snjl#endif
54153510Snjl
55134649Sscottl#ifdef FULL_PREEMPTION
56134649Sscottl#ifndef PREEMPTION
57134649Sscottl#error "The FULL_PREEMPTION option requires the PREEMPTION option"
58134649Sscottl#endif
59134649Sscottl#endif
60134591Sjulian
6197261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
6297261Sjake
63143884Srwatson/*
64143884Srwatson * kern.sched.preemption allows user space to determine if preemption support
65143884Srwatson * is compiled in or not.  It is not currently a boot or runtime flag that
66143884Srwatson * can be changed.
67143884Srwatson */
68143884Srwatson#ifdef PREEMPTION
69143884Srwatsonstatic int kern_sched_preemption = 1;
70143884Srwatson#else
71143884Srwatsonstatic int kern_sched_preemption = 0;
72143884Srwatson#endif
73143884SrwatsonSYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
74143884Srwatson    &kern_sched_preemption, 0, "Kernel preemption enabled");
75143884Srwatson
76178272Sjeff/*
77178272Sjeff * Support for scheduler stats exported via kern.sched.stats.  All stats may
78178272Sjeff * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
79178272Sjeff * with SCHED_STAT_DEFINE().
80178272Sjeff */
81170293Sjeff#ifdef SCHED_STATS
82178272SjeffSYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
83178272Sjeff
84194936Sjeff/* Switch reasons from mi_switch(). */
85194936SjeffDPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
86194936SjeffSCHED_STAT_DEFINE_VAR(uncategorized,
87194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
88194936SjeffSCHED_STAT_DEFINE_VAR(preempt,
89194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
90194936SjeffSCHED_STAT_DEFINE_VAR(owepreempt,
91194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
92194936SjeffSCHED_STAT_DEFINE_VAR(turnstile,
93194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
94194936SjeffSCHED_STAT_DEFINE_VAR(sleepq,
95194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
96194936SjeffSCHED_STAT_DEFINE_VAR(sleepqtimo,
97194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
98194936SjeffSCHED_STAT_DEFINE_VAR(relinquish,
99194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
100194936SjeffSCHED_STAT_DEFINE_VAR(needresched,
101194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
102194936SjeffSCHED_STAT_DEFINE_VAR(idle,
103194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
104194936SjeffSCHED_STAT_DEFINE_VAR(iwait,
105194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
106194936SjeffSCHED_STAT_DEFINE_VAR(suspend,
107194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
108194936SjeffSCHED_STAT_DEFINE_VAR(remotepreempt,
109194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
110194936SjeffSCHED_STAT_DEFINE_VAR(remotewakeidle,
111194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
112194936Sjeff
113170293Sjeffstatic int
114170293Sjeffsysctl_stats_reset(SYSCTL_HANDLER_ARGS)
115170293Sjeff{
116178272Sjeff	struct sysctl_oid *p;
117194936Sjeff	uintptr_t counter;
118170293Sjeff        int error;
119170293Sjeff	int val;
120194936Sjeff	int i;
121170293Sjeff
122170293Sjeff        val = 0;
123170293Sjeff        error = sysctl_handle_int(oidp, &val, 0, req);
124170293Sjeff        if (error != 0 || req->newptr == NULL)
125170293Sjeff                return (error);
126170293Sjeff        if (val == 0)
127170293Sjeff                return (0);
128178272Sjeff	/*
129178272Sjeff	 * Traverse the list of children of _kern_sched_stats and reset each
130178272Sjeff	 * to 0.  Skip the reset entry.
131178272Sjeff	 */
132178272Sjeff	SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
133178272Sjeff		if (p == oidp || p->oid_arg1 == NULL)
134178272Sjeff			continue;
135194936Sjeff		counter = (uintptr_t)p->oid_arg1;
136209059Sjhb		CPU_FOREACH(i) {
137194936Sjeff			*(long *)(dpcpu_off[i] + counter) = 0;
138194936Sjeff		}
139178272Sjeff	}
140170293Sjeff	return (0);
141170293Sjeff}
142170293Sjeff
143170293SjeffSYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
144170293Sjeff    0, sysctl_stats_reset, "I", "Reset scheduler statistics");
145170293Sjeff#endif
146170293Sjeff
14799072Sjulian/************************************************************************
14899072Sjulian * Functions that manipulate runnability from a thread perspective.	*
14999072Sjulian ************************************************************************/
15050027Speter/*
151163709Sjb * Select the thread that will be run next.
152163709Sjb */
15383366Sjulianstruct thread *
15483366Sjulianchoosethread(void)
15550027Speter{
15699072Sjulian	struct thread *td;
15799072Sjulian
158100209Sgallatinretry:
159166188Sjeff	td = sched_choose();
160108338Sjulian
161108338Sjulian	/*
162115215Sjulian	 * If we are in panic, only allow system threads,
163115215Sjulian	 * plus the one we are running in, to be run.
164108338Sjulian	 */
165100209Sgallatin	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
166115215Sjulian	    (td->td_flags & TDF_INPANIC) == 0)) {
167115215Sjulian		/* note that it is no longer on the run queue */
168115215Sjulian		TD_SET_CAN_RUN(td);
169100209Sgallatin		goto retry;
170115215Sjulian	}
171108338Sjulian
172103216Sjulian	TD_SET_RUNNING(td);
17399072Sjulian	return (td);
17472376Sjake}
17550027Speter
17699072Sjulian/*
177131481Sjhb * Kernel thread preemption implementation.  Critical sections mark
178131481Sjhb * regions of code in which preemptions are not allowed.
179131481Sjhb */
18088088Sjhbvoid
18188088Sjhbcritical_enter(void)
18288088Sjhb{
18388088Sjhb	struct thread *td;
18488088Sjhb
18588088Sjhb	td = curthread;
18688088Sjhb	td->td_critnest++;
187153510Snjl	CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
188173600Sjulian	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
18988088Sjhb}
19088088Sjhb
19188088Sjhbvoid
19288088Sjhbcritical_exit(void)
19388088Sjhb{
19488088Sjhb	struct thread *td;
195178272Sjeff	int flags;
19688088Sjhb
19788088Sjhb	td = curthread;
198125315Sjeff	KASSERT(td->td_critnest != 0,
199125315Sjeff	    ("critical_exit: td_critnest == 0"));
200172481Sjeff
20188088Sjhb	if (td->td_critnest == 1) {
202144777Sups		td->td_critnest = 0;
203230175Savg		if (td->td_owepreempt && !kdb_active) {
204144777Sups			td->td_critnest = 1;
205170293Sjeff			thread_lock(td);
206144777Sups			td->td_critnest--;
207178272Sjeff			flags = SW_INVOL | SW_PREEMPT;
208178272Sjeff			if (TD_IS_IDLETHREAD(td))
209178272Sjeff				flags |= SWT_IDLE;
210178272Sjeff			else
211178272Sjeff				flags |= SWT_OWEPREEMPT;
212178272Sjeff			mi_switch(flags, NULL);
213170293Sjeff			thread_unlock(td);
214131481Sjhb		}
215153797Skan	} else
21688088Sjhb		td->td_critnest--;
217153797Skan
218153510Snjl	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
219173600Sjulian	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
22088088Sjhb}
22188088Sjhb
22299072Sjulian/************************************************************************
22399072Sjulian * SYSTEM RUN QUEUE manipulations and tests				*
22499072Sjulian ************************************************************************/
22572376Sjake/*
22699072Sjulian * Initialize a run structure.
22799072Sjulian */
22899072Sjulianvoid
22999072Sjulianrunq_init(struct runq *rq)
23099072Sjulian{
23199072Sjulian	int i;
23299072Sjulian
23399072Sjulian	bzero(rq, sizeof *rq);
23499072Sjulian	for (i = 0; i < RQ_NQS; i++)
23599072Sjulian		TAILQ_INIT(&rq->rq_queues[i]);
23699072Sjulian}
23799072Sjulian
23899072Sjulian/*
23972376Sjake * Clear the status bit of the queue corresponding to priority level pri,
24072376Sjake * indicating that it is empty.
24172376Sjake */
24272376Sjakestatic __inline void
24372376Sjakerunq_clrbit(struct runq *rq, int pri)
24472376Sjake{
24572376Sjake	struct rqbits *rqb;
24665557Sjasone
24772376Sjake	rqb = &rq->rq_status;
24872376Sjake	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
24972376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
25072376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
25172376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
25272376Sjake	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
25350027Speter}
25450027Speter
25550027Speter/*
25672376Sjake * Find the index of the first non-empty run queue.  This is done by
25772376Sjake * scanning the status bits, a set bit indicates a non-empty queue.
25850027Speter */
25972376Sjakestatic __inline int
26072376Sjakerunq_findbit(struct runq *rq)
26172376Sjake{
26272376Sjake	struct rqbits *rqb;
26372376Sjake	int pri;
26472376Sjake	int i;
26572376Sjake
26672376Sjake	rqb = &rq->rq_status;
26772376Sjake	for (i = 0; i < RQB_LEN; i++)
26872376Sjake		if (rqb->rqb_bits[i]) {
26998469Speter			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
27072376Sjake			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
27172376Sjake			    rqb->rqb_bits[i], i, pri);
27272376Sjake			return (pri);
27372376Sjake		}
27472376Sjake
27572376Sjake	return (-1);
27672376Sjake}
27772376Sjake
278165761Sjeffstatic __inline int
279171900Sjeffrunq_findbit_from(struct runq *rq, u_char pri)
280165761Sjeff{
281165761Sjeff	struct rqbits *rqb;
282171900Sjeff	rqb_word_t mask;
283165761Sjeff	int i;
284165761Sjeff
285171900Sjeff	/*
286171900Sjeff	 * Set the mask for the first word so we ignore priorities before 'pri'.
287171900Sjeff	 */
288171900Sjeff	mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
289165761Sjeff	rqb = &rq->rq_status;
290165761Sjeffagain:
291171900Sjeff	for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
292171900Sjeff		mask = rqb->rqb_bits[i] & mask;
293171900Sjeff		if (mask == 0)
294171900Sjeff			continue;
295171900Sjeff		pri = RQB_FFS(mask) + (i << RQB_L2BPW);
296171900Sjeff		CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
297171900Sjeff		    mask, i, pri);
298171900Sjeff		return (pri);
299165761Sjeff	}
300171900Sjeff	if (pri == 0)
301171900Sjeff		return (-1);
302171900Sjeff	/*
303171900Sjeff	 * Wrap back around to the beginning of the list just once so we
304171900Sjeff	 * scan the whole thing.
305171900Sjeff	 */
306171900Sjeff	pri = 0;
307171900Sjeff	goto again;
308165761Sjeff}
309165761Sjeff
31072376Sjake/*
31172376Sjake * Set the status bit of the queue corresponding to priority level pri,
31272376Sjake * indicating that it is non-empty.
31372376Sjake */
31472376Sjakestatic __inline void
31572376Sjakerunq_setbit(struct runq *rq, int pri)
31672376Sjake{
31772376Sjake	struct rqbits *rqb;
31872376Sjake
31972376Sjake	rqb = &rq->rq_status;
32072376Sjake	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
32172376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
32272376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
32372376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
32472376Sjake	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
32572376Sjake}
32672376Sjake
32772376Sjake/*
328164936Sjulian * Add the thread to the queue specified by its priority, and set the
32972376Sjake * corresponding status bit.
33072376Sjake */
33150027Spetervoid
332177435Sjeffrunq_add(struct runq *rq, struct thread *td, int flags)
33350027Speter{
33472376Sjake	struct rqhead *rqh;
33572376Sjake	int pri;
33650027Speter
337177435Sjeff	pri = td->td_priority / RQ_PPQ;
338177435Sjeff	td->td_rqindex = pri;
33972376Sjake	runq_setbit(rq, pri);
34072376Sjake	rqh = &rq->rq_queues[pri];
341177435Sjeff	CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
342177435Sjeff	    td, td->td_priority, pri, rqh);
343136170Sjulian	if (flags & SRQ_PREEMPTED) {
344177435Sjeff		TAILQ_INSERT_HEAD(rqh, td, td_runq);
345136170Sjulian	} else {
346177435Sjeff		TAILQ_INSERT_TAIL(rqh, td, td_runq);
347136170Sjulian	}
34850027Speter}
34950027Speter
350165761Sjeffvoid
351177435Sjeffrunq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
352165761Sjeff{
353165761Sjeff	struct rqhead *rqh;
354165761Sjeff
355165761Sjeff	KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
356177435Sjeff	td->td_rqindex = pri;
357165761Sjeff	runq_setbit(rq, pri);
358165761Sjeff	rqh = &rq->rq_queues[pri];
359177435Sjeff	CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
360177435Sjeff	    td, td->td_priority, pri, rqh);
361165761Sjeff	if (flags & SRQ_PREEMPTED) {
362177435Sjeff		TAILQ_INSERT_HEAD(rqh, td, td_runq);
363165761Sjeff	} else {
364177435Sjeff		TAILQ_INSERT_TAIL(rqh, td, td_runq);
365165761Sjeff	}
366165761Sjeff}
36750027Speter/*
36872376Sjake * Return true if there are runnable processes of any priority on the run
36972376Sjake * queue, false otherwise.  Has no side effects, does not modify the run
37072376Sjake * queue structure.
37150027Speter */
37272376Sjakeint
37372376Sjakerunq_check(struct runq *rq)
37450027Speter{
37572376Sjake	struct rqbits *rqb;
37672376Sjake	int i;
37772376Sjake
37872376Sjake	rqb = &rq->rq_status;
37972376Sjake	for (i = 0; i < RQB_LEN; i++)
38072376Sjake		if (rqb->rqb_bits[i]) {
38172376Sjake			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
38272376Sjake			    rqb->rqb_bits[i], i);
38372376Sjake			return (1);
38472376Sjake		}
38572376Sjake	CTR0(KTR_RUNQ, "runq_check: empty");
38672376Sjake
38772376Sjake	return (0);
38850027Speter}
38950027Speter
39050027Speter/*
391104964Sjeff * Find the highest priority process on the run queue.
39250027Speter */
393177435Sjeffstruct thread *
394177419Sjeffrunq_choose_fuzz(struct runq *rq, int fuzz)
39550027Speter{
39672376Sjake	struct rqhead *rqh;
397177435Sjeff	struct thread *td;
39872376Sjake	int pri;
39950027Speter
40099072Sjulian	while ((pri = runq_findbit(rq)) != -1) {
40172376Sjake		rqh = &rq->rq_queues[pri];
402134591Sjulian		/* fuzz == 1 is normal.. 0 or less are ignored */
403177419Sjeff		if (fuzz > 1) {
404134591Sjulian			/*
405134591Sjulian			 * In the first couple of entries, check if
406134591Sjulian			 * there is one for our CPU as a preference.
407134591Sjulian			 */
408177419Sjeff			int count = fuzz;
409134591Sjulian			int cpu = PCPU_GET(cpuid);
410177435Sjeff			struct thread *td2;
411177435Sjeff			td2 = td = TAILQ_FIRST(rqh);
412134591Sjulian
413177435Sjeff			while (count-- && td2) {
414178961Sjulian				if (td2->td_lastcpu == cpu) {
415177435Sjeff					td = td2;
416134591Sjulian					break;
417134591Sjulian				}
418177435Sjeff				td2 = TAILQ_NEXT(td2, td_runq);
419134591Sjulian			}
420153797Skan		} else
421177435Sjeff			td = TAILQ_FIRST(rqh);
422177435Sjeff		KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
423177419Sjeff		CTR3(KTR_RUNQ,
424177435Sjeff		    "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
425177435Sjeff		return (td);
426177419Sjeff	}
427177419Sjeff	CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
428177419Sjeff
429177419Sjeff	return (NULL);
430177419Sjeff}
431177419Sjeff
432177419Sjeff/*
433177419Sjeff * Find the highest priority process on the run queue.
434177419Sjeff */
435177435Sjeffstruct thread *
436177419Sjeffrunq_choose(struct runq *rq)
437177419Sjeff{
438177419Sjeff	struct rqhead *rqh;
439177435Sjeff	struct thread *td;
440177419Sjeff	int pri;
441177419Sjeff
442177419Sjeff	while ((pri = runq_findbit(rq)) != -1) {
443177419Sjeff		rqh = &rq->rq_queues[pri];
444177435Sjeff		td = TAILQ_FIRST(rqh);
445177435Sjeff		KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
44699072Sjulian		CTR3(KTR_RUNQ,
447177435Sjeff		    "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
448177435Sjeff		return (td);
44950027Speter	}
450177435Sjeff	CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
45172376Sjake
45299072Sjulian	return (NULL);
45350027Speter}
45472376Sjake
455177435Sjeffstruct thread *
456166557Sjeffrunq_choose_from(struct runq *rq, u_char idx)
457165761Sjeff{
458165761Sjeff	struct rqhead *rqh;
459177435Sjeff	struct thread *td;
460165761Sjeff	int pri;
461165761Sjeff
462165765Sjeff	if ((pri = runq_findbit_from(rq, idx)) != -1) {
463165761Sjeff		rqh = &rq->rq_queues[pri];
464177435Sjeff		td = TAILQ_FIRST(rqh);
465177435Sjeff		KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
466165761Sjeff		CTR4(KTR_RUNQ,
467177435Sjeff		    "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
468177435Sjeff		    pri, td, td->td_rqindex, rqh);
469177435Sjeff		return (td);
470165761Sjeff	}
471177435Sjeff	CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
472165761Sjeff
473165761Sjeff	return (NULL);
474165761Sjeff}
47572376Sjake/*
476164936Sjulian * Remove the thread from the queue specified by its priority, and clear the
47772376Sjake * corresponding status bit if the queue becomes empty.
478166188Sjeff * Caller must set state afterwards.
47972376Sjake */
48072376Sjakevoid
481177435Sjeffrunq_remove(struct runq *rq, struct thread *td)
48272376Sjake{
483165761Sjeff
484177435Sjeff	runq_remove_idx(rq, td, NULL);
485165761Sjeff}
486165761Sjeff
487165761Sjeffvoid
488177435Sjeffrunq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
489165761Sjeff{
49072376Sjake	struct rqhead *rqh;
491166557Sjeff	u_char pri;
49272376Sjake
493177435Sjeff	KASSERT(td->td_flags & TDF_INMEM,
494172207Sjeff		("runq_remove_idx: thread swapped out"));
495177435Sjeff	pri = td->td_rqindex;
496170293Sjeff	KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
49772376Sjake	rqh = &rq->rq_queues[pri];
498177435Sjeff	CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
499177435Sjeff	    td, td->td_priority, pri, rqh);
500177435Sjeff	TAILQ_REMOVE(rqh, td, td_runq);
50172376Sjake	if (TAILQ_EMPTY(rqh)) {
502165761Sjeff		CTR0(KTR_RUNQ, "runq_remove_idx: empty");
50372376Sjake		runq_clrbit(rq, pri);
504165761Sjeff		if (idx != NULL && *idx == pri)
505165761Sjeff			*idx = (pri + 1) % RQ_NQS;
50672376Sjake	}
50772376Sjake}
508