1139804Simp/*-
272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
372376Sjake * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter */
2650027Speter
2799072Sjulian
28116182Sobrien#include <sys/cdefs.h>
29116182Sobrien__FBSDID("$FreeBSD$");
30116182Sobrien
31134591Sjulian#include "opt_sched.h"
32131481Sjhb
3350027Speter#include <sys/param.h>
3450027Speter#include <sys/systm.h>
35131927Smarcel#include <sys/kdb.h>
3650027Speter#include <sys/kernel.h>
3765557Sjasone#include <sys/ktr.h>
3874914Sjhb#include <sys/lock.h>
3967365Sjhb#include <sys/mutex.h>
4050027Speter#include <sys/proc.h>
4150027Speter#include <sys/queue.h>
42104964Sjeff#include <sys/sched.h>
43112993Speter#include <sys/smp.h>
44177435Sjeff#include <sys/sysctl.h>
4550027Speter
46170293Sjeff#include <machine/cpu.h>
47170293Sjeff
48153510Snjl/* Uncomment this to enable logging of critical_enter/exit. */
49153510Snjl#if 0
50153510Snjl#define	KTR_CRITICAL	KTR_SCHED
51153510Snjl#else
52153510Snjl#define	KTR_CRITICAL	0
53153510Snjl#endif
54153510Snjl
55134649Sscottl#ifdef FULL_PREEMPTION
56134649Sscottl#ifndef PREEMPTION
57134649Sscottl#error "The FULL_PREEMPTION option requires the PREEMPTION option"
58134649Sscottl#endif
59134649Sscottl#endif
60134591Sjulian
6197261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
6297261Sjake
63143884Srwatson/*
64143884Srwatson * kern.sched.preemption allows user space to determine if preemption support
65143884Srwatson * is compiled in or not.  It is not currently a boot or runtime flag that
66143884Srwatson * can be changed.
67143884Srwatson */
68143884Srwatson#ifdef PREEMPTION
69143884Srwatsonstatic int kern_sched_preemption = 1;
70143884Srwatson#else
71143884Srwatsonstatic int kern_sched_preemption = 0;
72143884Srwatson#endif
73143884SrwatsonSYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
74143884Srwatson    &kern_sched_preemption, 0, "Kernel preemption enabled");
75143884Srwatson
76178272Sjeff/*
77178272Sjeff * Support for scheduler stats exported via kern.sched.stats.  All stats may
78178272Sjeff * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
79178272Sjeff * with SCHED_STAT_DEFINE().
80178272Sjeff */
81170293Sjeff#ifdef SCHED_STATS
82178272SjeffSYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
83178272Sjeff
84194936Sjeff/* Switch reasons from mi_switch(). */
85194936SjeffDPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
86194936SjeffSCHED_STAT_DEFINE_VAR(uncategorized,
87194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
88194936SjeffSCHED_STAT_DEFINE_VAR(preempt,
89194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
90194936SjeffSCHED_STAT_DEFINE_VAR(owepreempt,
91194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
92194936SjeffSCHED_STAT_DEFINE_VAR(turnstile,
93194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
94194936SjeffSCHED_STAT_DEFINE_VAR(sleepq,
95194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
96194936SjeffSCHED_STAT_DEFINE_VAR(sleepqtimo,
97194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
98194936SjeffSCHED_STAT_DEFINE_VAR(relinquish,
99194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
100194936SjeffSCHED_STAT_DEFINE_VAR(needresched,
101194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
102194936SjeffSCHED_STAT_DEFINE_VAR(idle,
103194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
104194936SjeffSCHED_STAT_DEFINE_VAR(iwait,
105194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
106194936SjeffSCHED_STAT_DEFINE_VAR(suspend,
107194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
108194936SjeffSCHED_STAT_DEFINE_VAR(remotepreempt,
109194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
110194936SjeffSCHED_STAT_DEFINE_VAR(remotewakeidle,
111194936Sjeff    &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
112194936Sjeff
113170293Sjeffstatic int
114170293Sjeffsysctl_stats_reset(SYSCTL_HANDLER_ARGS)
115170293Sjeff{
116178272Sjeff	struct sysctl_oid *p;
117194936Sjeff	uintptr_t counter;
118170293Sjeff        int error;
119170293Sjeff	int val;
120194936Sjeff	int i;
121170293Sjeff
122170293Sjeff        val = 0;
123170293Sjeff        error = sysctl_handle_int(oidp, &val, 0, req);
124170293Sjeff        if (error != 0 || req->newptr == NULL)
125170293Sjeff                return (error);
126170293Sjeff        if (val == 0)
127170293Sjeff                return (0);
128178272Sjeff	/*
129178272Sjeff	 * Traverse the list of children of _kern_sched_stats and reset each
130178272Sjeff	 * to 0.  Skip the reset entry.
131178272Sjeff	 */
132178272Sjeff	SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
133178272Sjeff		if (p == oidp || p->oid_arg1 == NULL)
134178272Sjeff			continue;
135194936Sjeff		counter = (uintptr_t)p->oid_arg1;
136209059Sjhb		CPU_FOREACH(i) {
137194936Sjeff			*(long *)(dpcpu_off[i] + counter) = 0;
138194936Sjeff		}
139178272Sjeff	}
140170293Sjeff	return (0);
141170293Sjeff}
142170293Sjeff
143170293SjeffSYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
144170293Sjeff    0, sysctl_stats_reset, "I", "Reset scheduler statistics");
145170293Sjeff#endif
146170293Sjeff
14799072Sjulian/************************************************************************
14899072Sjulian * Functions that manipulate runnability from a thread perspective.	*
14999072Sjulian ************************************************************************/
15050027Speter/*
151163709Sjb * Select the thread that will be run next.
152163709Sjb */
15383366Sjulianstruct thread *
15483366Sjulianchoosethread(void)
15550027Speter{
15699072Sjulian	struct thread *td;
15799072Sjulian
158100209Sgallatinretry:
159166188Sjeff	td = sched_choose();
160108338Sjulian
161108338Sjulian	/*
162115215Sjulian	 * If we are in panic, only allow system threads,
163115215Sjulian	 * plus the one we are running in, to be run.
164108338Sjulian	 */
165100209Sgallatin	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
166115215Sjulian	    (td->td_flags & TDF_INPANIC) == 0)) {
167115215Sjulian		/* note that it is no longer on the run queue */
168115215Sjulian		TD_SET_CAN_RUN(td);
169100209Sgallatin		goto retry;
170115215Sjulian	}
171108338Sjulian
172103216Sjulian	TD_SET_RUNNING(td);
17399072Sjulian	return (td);
17472376Sjake}
17550027Speter
17699072Sjulian/*
177131481Sjhb * Kernel thread preemption implementation.  Critical sections mark
178131481Sjhb * regions of code in which preemptions are not allowed.
179244046Sattilio *
180244046Sattilio * It might seem a good idea to inline critical_enter() but, in order
181244046Sattilio * to prevent instructions reordering by the compiler, a __compiler_membar()
182244046Sattilio * would have to be used here (the same as sched_pin()).  The performance
183244046Sattilio * penalty imposed by the membar could, then, produce slower code than
184244046Sattilio * the function call itself, for most cases.
185131481Sjhb */
18688088Sjhbvoid
18788088Sjhbcritical_enter(void)
18888088Sjhb{
18988088Sjhb	struct thread *td;
19088088Sjhb
19188088Sjhb	td = curthread;
19288088Sjhb	td->td_critnest++;
193153510Snjl	CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
194173600Sjulian	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
19588088Sjhb}
19688088Sjhb
19788088Sjhbvoid
19888088Sjhbcritical_exit(void)
19988088Sjhb{
20088088Sjhb	struct thread *td;
201178272Sjeff	int flags;
20288088Sjhb
20388088Sjhb	td = curthread;
204125315Sjeff	KASSERT(td->td_critnest != 0,
205125315Sjeff	    ("critical_exit: td_critnest == 0"));
206172481Sjeff
20788088Sjhb	if (td->td_critnest == 1) {
208144777Sups		td->td_critnest = 0;
209228265Savg		if (td->td_owepreempt && !kdb_active) {
210144777Sups			td->td_critnest = 1;
211170293Sjeff			thread_lock(td);
212144777Sups			td->td_critnest--;
213178272Sjeff			flags = SW_INVOL | SW_PREEMPT;
214178272Sjeff			if (TD_IS_IDLETHREAD(td))
215178272Sjeff				flags |= SWT_IDLE;
216178272Sjeff			else
217178272Sjeff				flags |= SWT_OWEPREEMPT;
218178272Sjeff			mi_switch(flags, NULL);
219170293Sjeff			thread_unlock(td);
220131481Sjhb		}
221153797Skan	} else
22288088Sjhb		td->td_critnest--;
223153797Skan
224153510Snjl	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
225173600Sjulian	    (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
22688088Sjhb}
22788088Sjhb
22899072Sjulian/************************************************************************
22999072Sjulian * SYSTEM RUN QUEUE manipulations and tests				*
23099072Sjulian ************************************************************************/
23172376Sjake/*
23299072Sjulian * Initialize a run structure.
23399072Sjulian */
23499072Sjulianvoid
23599072Sjulianrunq_init(struct runq *rq)
23699072Sjulian{
23799072Sjulian	int i;
23899072Sjulian
23999072Sjulian	bzero(rq, sizeof *rq);
24099072Sjulian	for (i = 0; i < RQ_NQS; i++)
24199072Sjulian		TAILQ_INIT(&rq->rq_queues[i]);
24299072Sjulian}
24399072Sjulian
24499072Sjulian/*
24572376Sjake * Clear the status bit of the queue corresponding to priority level pri,
24672376Sjake * indicating that it is empty.
24772376Sjake */
24872376Sjakestatic __inline void
24972376Sjakerunq_clrbit(struct runq *rq, int pri)
25072376Sjake{
25172376Sjake	struct rqbits *rqb;
25265557Sjasone
25372376Sjake	rqb = &rq->rq_status;
25472376Sjake	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
25572376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
25672376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
25772376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
25872376Sjake	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
25950027Speter}
26050027Speter
26150027Speter/*
26272376Sjake * Find the index of the first non-empty run queue.  This is done by
26372376Sjake * scanning the status bits, a set bit indicates a non-empty queue.
26450027Speter */
26572376Sjakestatic __inline int
26672376Sjakerunq_findbit(struct runq *rq)
26772376Sjake{
26872376Sjake	struct rqbits *rqb;
26972376Sjake	int pri;
27072376Sjake	int i;
27172376Sjake
27272376Sjake	rqb = &rq->rq_status;
27372376Sjake	for (i = 0; i < RQB_LEN; i++)
27472376Sjake		if (rqb->rqb_bits[i]) {
27598469Speter			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
27672376Sjake			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
27772376Sjake			    rqb->rqb_bits[i], i, pri);
27872376Sjake			return (pri);
27972376Sjake		}
28072376Sjake
28172376Sjake	return (-1);
28272376Sjake}
28372376Sjake
284165761Sjeffstatic __inline int
285171900Sjeffrunq_findbit_from(struct runq *rq, u_char pri)
286165761Sjeff{
287165761Sjeff	struct rqbits *rqb;
288171900Sjeff	rqb_word_t mask;
289165761Sjeff	int i;
290165761Sjeff
291171900Sjeff	/*
292171900Sjeff	 * Set the mask for the first word so we ignore priorities before 'pri'.
293171900Sjeff	 */
294171900Sjeff	mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
295165761Sjeff	rqb = &rq->rq_status;
296165761Sjeffagain:
297171900Sjeff	for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
298171900Sjeff		mask = rqb->rqb_bits[i] & mask;
299171900Sjeff		if (mask == 0)
300171900Sjeff			continue;
301171900Sjeff		pri = RQB_FFS(mask) + (i << RQB_L2BPW);
302171900Sjeff		CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
303171900Sjeff		    mask, i, pri);
304171900Sjeff		return (pri);
305165761Sjeff	}
306171900Sjeff	if (pri == 0)
307171900Sjeff		return (-1);
308171900Sjeff	/*
309171900Sjeff	 * Wrap back around to the beginning of the list just once so we
310171900Sjeff	 * scan the whole thing.
311171900Sjeff	 */
312171900Sjeff	pri = 0;
313171900Sjeff	goto again;
314165761Sjeff}
315165761Sjeff
31672376Sjake/*
31772376Sjake * Set the status bit of the queue corresponding to priority level pri,
31872376Sjake * indicating that it is non-empty.
31972376Sjake */
32072376Sjakestatic __inline void
32172376Sjakerunq_setbit(struct runq *rq, int pri)
32272376Sjake{
32372376Sjake	struct rqbits *rqb;
32472376Sjake
32572376Sjake	rqb = &rq->rq_status;
32672376Sjake	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
32772376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
32872376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
32972376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
33072376Sjake	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
33172376Sjake}
33272376Sjake
33372376Sjake/*
334164936Sjulian * Add the thread to the queue specified by its priority, and set the
33572376Sjake * corresponding status bit.
33672376Sjake */
33750027Spetervoid
338177435Sjeffrunq_add(struct runq *rq, struct thread *td, int flags)
33950027Speter{
34072376Sjake	struct rqhead *rqh;
34172376Sjake	int pri;
34250027Speter
343177435Sjeff	pri = td->td_priority / RQ_PPQ;
344177435Sjeff	td->td_rqindex = pri;
34572376Sjake	runq_setbit(rq, pri);
34672376Sjake	rqh = &rq->rq_queues[pri];
347177435Sjeff	CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
348177435Sjeff	    td, td->td_priority, pri, rqh);
349136170Sjulian	if (flags & SRQ_PREEMPTED) {
350177435Sjeff		TAILQ_INSERT_HEAD(rqh, td, td_runq);
351136170Sjulian	} else {
352177435Sjeff		TAILQ_INSERT_TAIL(rqh, td, td_runq);
353136170Sjulian	}
35450027Speter}
35550027Speter
356165761Sjeffvoid
357177435Sjeffrunq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
358165761Sjeff{
359165761Sjeff	struct rqhead *rqh;
360165761Sjeff
361165761Sjeff	KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
362177435Sjeff	td->td_rqindex = pri;
363165761Sjeff	runq_setbit(rq, pri);
364165761Sjeff	rqh = &rq->rq_queues[pri];
365177435Sjeff	CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
366177435Sjeff	    td, td->td_priority, pri, rqh);
367165761Sjeff	if (flags & SRQ_PREEMPTED) {
368177435Sjeff		TAILQ_INSERT_HEAD(rqh, td, td_runq);
369165761Sjeff	} else {
370177435Sjeff		TAILQ_INSERT_TAIL(rqh, td, td_runq);
371165761Sjeff	}
372165761Sjeff}
37350027Speter/*
37472376Sjake * Return true if there are runnable processes of any priority on the run
37572376Sjake * queue, false otherwise.  Has no side effects, does not modify the run
37672376Sjake * queue structure.
37750027Speter */
37872376Sjakeint
37972376Sjakerunq_check(struct runq *rq)
38050027Speter{
38172376Sjake	struct rqbits *rqb;
38272376Sjake	int i;
38372376Sjake
38472376Sjake	rqb = &rq->rq_status;
38572376Sjake	for (i = 0; i < RQB_LEN; i++)
38672376Sjake		if (rqb->rqb_bits[i]) {
38772376Sjake			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
38872376Sjake			    rqb->rqb_bits[i], i);
38972376Sjake			return (1);
39072376Sjake		}
39172376Sjake	CTR0(KTR_RUNQ, "runq_check: empty");
39272376Sjake
39372376Sjake	return (0);
39450027Speter}
39550027Speter
39650027Speter/*
397104964Sjeff * Find the highest priority process on the run queue.
39850027Speter */
399177435Sjeffstruct thread *
400177419Sjeffrunq_choose_fuzz(struct runq *rq, int fuzz)
40150027Speter{
40272376Sjake	struct rqhead *rqh;
403177435Sjeff	struct thread *td;
40472376Sjake	int pri;
40550027Speter
40699072Sjulian	while ((pri = runq_findbit(rq)) != -1) {
40772376Sjake		rqh = &rq->rq_queues[pri];
408134591Sjulian		/* fuzz == 1 is normal.. 0 or less are ignored */
409177419Sjeff		if (fuzz > 1) {
410134591Sjulian			/*
411134591Sjulian			 * In the first couple of entries, check if
412134591Sjulian			 * there is one for our CPU as a preference.
413134591Sjulian			 */
414177419Sjeff			int count = fuzz;
415134591Sjulian			int cpu = PCPU_GET(cpuid);
416177435Sjeff			struct thread *td2;
417177435Sjeff			td2 = td = TAILQ_FIRST(rqh);
418134591Sjulian
419177435Sjeff			while (count-- && td2) {
420178961Sjulian				if (td2->td_lastcpu == cpu) {
421177435Sjeff					td = td2;
422134591Sjulian					break;
423134591Sjulian				}
424177435Sjeff				td2 = TAILQ_NEXT(td2, td_runq);
425134591Sjulian			}
426153797Skan		} else
427177435Sjeff			td = TAILQ_FIRST(rqh);
428177435Sjeff		KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
429177419Sjeff		CTR3(KTR_RUNQ,
430177435Sjeff		    "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
431177435Sjeff		return (td);
432177419Sjeff	}
433177419Sjeff	CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
434177419Sjeff
435177419Sjeff	return (NULL);
436177419Sjeff}
437177419Sjeff
438177419Sjeff/*
439177419Sjeff * Find the highest priority process on the run queue.
440177419Sjeff */
441177435Sjeffstruct thread *
442177419Sjeffrunq_choose(struct runq *rq)
443177419Sjeff{
444177419Sjeff	struct rqhead *rqh;
445177435Sjeff	struct thread *td;
446177419Sjeff	int pri;
447177419Sjeff
448177419Sjeff	while ((pri = runq_findbit(rq)) != -1) {
449177419Sjeff		rqh = &rq->rq_queues[pri];
450177435Sjeff		td = TAILQ_FIRST(rqh);
451177435Sjeff		KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
45299072Sjulian		CTR3(KTR_RUNQ,
453177435Sjeff		    "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
454177435Sjeff		return (td);
45550027Speter	}
456177435Sjeff	CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
45772376Sjake
45899072Sjulian	return (NULL);
45950027Speter}
46072376Sjake
461177435Sjeffstruct thread *
462166557Sjeffrunq_choose_from(struct runq *rq, u_char idx)
463165761Sjeff{
464165761Sjeff	struct rqhead *rqh;
465177435Sjeff	struct thread *td;
466165761Sjeff	int pri;
467165761Sjeff
468165765Sjeff	if ((pri = runq_findbit_from(rq, idx)) != -1) {
469165761Sjeff		rqh = &rq->rq_queues[pri];
470177435Sjeff		td = TAILQ_FIRST(rqh);
471177435Sjeff		KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
472165761Sjeff		CTR4(KTR_RUNQ,
473177435Sjeff		    "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
474177435Sjeff		    pri, td, td->td_rqindex, rqh);
475177435Sjeff		return (td);
476165761Sjeff	}
477177435Sjeff	CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
478165761Sjeff
479165761Sjeff	return (NULL);
480165761Sjeff}
48172376Sjake/*
482164936Sjulian * Remove the thread from the queue specified by its priority, and clear the
48372376Sjake * corresponding status bit if the queue becomes empty.
484166188Sjeff * Caller must set state afterwards.
48572376Sjake */
48672376Sjakevoid
487177435Sjeffrunq_remove(struct runq *rq, struct thread *td)
48872376Sjake{
489165761Sjeff
490177435Sjeff	runq_remove_idx(rq, td, NULL);
491165761Sjeff}
492165761Sjeff
493165761Sjeffvoid
494177435Sjeffrunq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
495165761Sjeff{
49672376Sjake	struct rqhead *rqh;
497166557Sjeff	u_char pri;
49872376Sjake
499177435Sjeff	KASSERT(td->td_flags & TDF_INMEM,
500172207Sjeff		("runq_remove_idx: thread swapped out"));
501177435Sjeff	pri = td->td_rqindex;
502170293Sjeff	KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
50372376Sjake	rqh = &rq->rq_queues[pri];
504177435Sjeff	CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
505177435Sjeff	    td, td->td_priority, pri, rqh);
506177435Sjeff	TAILQ_REMOVE(rqh, td, td_runq);
50772376Sjake	if (TAILQ_EMPTY(rqh)) {
508165761Sjeff		CTR0(KTR_RUNQ, "runq_remove_idx: empty");
50972376Sjake		runq_clrbit(rq, pri);
510165761Sjeff		if (idx != NULL && *idx == pri)
511165761Sjeff			*idx = (pri + 1) % RQ_NQS;
51272376Sjake	}
51372376Sjake}
514