kern_switch.c revision 103216
150027Speter/*
272376Sjake * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
372376Sjake * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter *
2650027Speter * $FreeBSD: head/sys/kern/kern_switch.c 103216 2002-09-11 08:13:56Z julian $
2750027Speter */
2850027Speter
2999072Sjulian/***
3099072Sjulian
3199072SjulianHere is the logic..
3299072Sjulian
3399072SjulianIf there are N processors, then there are at most N KSEs (kernel
3499072Sjulianschedulable entities) working to process threads that belong to a
3599072SjulianKSEGOUP (kg). If there are X of these KSEs actually running at the
3699072Sjulianmoment in question, then there are at most M (N-X) of these KSEs on
3799072Sjulianthe run queue, as running KSEs are not on the queue.
3899072Sjulian
3999072SjulianRunnable threads are queued off the KSEGROUP in priority order.
4099072SjulianIf there are M or more threads runnable, the top M threads
4199072Sjulian(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
4299072Sjuliantheir priority from those threads and are put on the run queue.
4399072Sjulian
4499072SjulianThe last thread that had a priority high enough to have a KSE associated
4599072Sjulianwith it, AND IS ON THE RUN QUEUE is pointed to by
4699072Sjuliankg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
4799072Sjulianassigned as all the available KSEs are activly running, or because there
4899072Sjulianare no threads queued, that pointer is NULL.
4999072Sjulian
5099072SjulianWhen a KSE is removed from the run queue to become runnable, we know
5199072Sjulianit was associated with the highest priority thread in the queue (at the head
5299072Sjulianof the queue). If it is also the last assigned we know M was 1 and must
5399072Sjuliannow be 0. Since the thread is no longer queued that pointer must be
5499072Sjulianremoved from it. Since we know there were no more KSEs available,
5599072Sjulian(M was 1 and is now 0) and since we are not FREEING our KSE
5699072Sjulianbut using it, we know there are STILL no more KSEs available, we can prove
5799072Sjulianthat the next thread in the ksegrp list will not have a KSE to assign to
5899072Sjulianit, so we can show that the pointer must be made 'invalid' (NULL).
5999072Sjulian
6099072SjulianThe pointer exists so that when a new thread is made runnable, it can
6199072Sjulianhave its priority compared with the last assigned thread to see if
6299072Sjulianit should 'steal' its KSE or not.. i.e. is it 'earlier'
6399072Sjulianon the list than that thread or later.. If it's earlier, then the KSE is
6499072Sjulianremoved from the last assigned (which is now not assigned a KSE)
6599072Sjulianand reassigned to the new thread, which is placed earlier in the list.
6699072SjulianThe pointer is then backed up to the previous thread (which may or may not
6799072Sjulianbe the new thread).
6899072Sjulian
6999072SjulianWhen a thread sleeps or is removed, the KSE becomes available and if there
7099072Sjulianare queued threads that are not assigned KSEs, the highest priority one of
7199072Sjulianthem is assigned the KSE, which is then placed back on the run queue at
7299072Sjulianthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
7399072Sjulianto point to it.
7499072Sjulian
7599072SjulianThe following diagram shows 2 KSEs and 3 threads from a single process.
7699072Sjulian
7799072Sjulian RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
7899072Sjulian              \    \____
7999072Sjulian               \        \
8099072Sjulian    KSEGROUP---thread--thread--thread    (queued in priority order)
8199072Sjulian        \                 /
8299072Sjulian         \_______________/
8399072Sjulian          (last_assigned)
8499072Sjulian
8599072SjulianThe result of this scheme is that the M available KSEs are always
8699072Sjulianqueued at the priorities they have inherrited from the M highest priority
8799072Sjulianthreads for that KSEGROUP. If this situation changes, the KSEs are
8899072Sjulianreassigned to keep this true.
8999072Sjulian
9099072Sjulian*/
9199072Sjulian
9250027Speter#include <sys/param.h>
9350027Speter#include <sys/systm.h>
9450027Speter#include <sys/kernel.h>
9565557Sjasone#include <sys/ktr.h>
9674914Sjhb#include <sys/lock.h>
9767365Sjhb#include <sys/mutex.h>
9850027Speter#include <sys/proc.h>
9950027Speter#include <sys/queue.h>
10093607Sdillon#include <machine/critical.h>
10150027Speter
10297261SjakeCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
10397261Sjake
10450027Speter/*
10572376Sjake * Global run queue.
10650027Speter */
10772376Sjakestatic struct runq runq;
10872376SjakeSYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
10950027Speter
11099072Sjulianstatic void runq_readjust(struct runq *rq, struct kse *ke);
11199072Sjulian/************************************************************************
11299072Sjulian * Functions that manipulate runnability from a thread perspective.	*
11399072Sjulian ************************************************************************/
11499072Sjulian
11550027Speter/*
11699072Sjulian * Select the KSE that will be run next.  From that find the thread, and x
11799072Sjulian * remove it from the KSEGRP's run queue.  If there is thread clustering,
11899072Sjulian * this will be what does it.
11950027Speter */
12083366Sjulianstruct thread *
12183366Sjulianchoosethread(void)
12250027Speter{
12399072Sjulian	struct kse *ke;
12499072Sjulian	struct thread *td;
12599072Sjulian	struct ksegrp *kg;
12699072Sjulian
127100209Sgallatinretry:
12899072Sjulian	if ((ke = runq_choose(&runq))) {
12999072Sjulian		td = ke->ke_thread;
13099072Sjulian		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
13199072Sjulian		kg = ke->ke_ksegrp;
13299072Sjulian		if (td->td_flags & TDF_UNBOUND) {
13399072Sjulian			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
13499072Sjulian			if (kg->kg_last_assigned == td)
13599072Sjulian				if (TAILQ_PREV(td, threadqueue, td_runq)
13699072Sjulian				    != NULL)
13799072Sjulian					printf("Yo MAMA!\n");
13899072Sjulian				kg->kg_last_assigned = TAILQ_PREV(td,
13999072Sjulian				    threadqueue, td_runq);
14099072Sjulian			/*
14199072Sjulian			 *  If we have started running an upcall,
14299072Sjulian			 * Then TDF_UNBOUND WAS set because the thread was
14399072Sjulian			 * created without a KSE. Now that we have one,
14499072Sjulian			 * and it is our time to run, we make sure
14599072Sjulian			 * that BOUND semantics apply for the rest of
14699072Sjulian			 * the journey to userland, and into the UTS.
14799072Sjulian			 */
14899072Sjulian#ifdef	NOTYET
14999072Sjulian			if (td->td_flags & TDF_UPCALLING)
15099072Sjulian				tdf->td_flags &= ~TDF_UNBOUND;
15199072Sjulian#endif
15299072Sjulian		}
15399072Sjulian		kg->kg_runnable--;
15499072Sjulian		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
15599072Sjulian		    td, td->td_priority);
15699072Sjulian	} else {
15799889Sjulian		/* Simulate runq_choose() having returned the idle thread */
15899072Sjulian		td = PCPU_GET(idlethread);
159102592Sjulian		ke = td->td_kse;
16099072Sjulian		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
16199072Sjulian	}
162102592Sjulian	ke->ke_flags |= KEF_DIDRUN;
163100209Sgallatin	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
164100209Sgallatin	    (td->td_flags & TDF_INPANIC) == 0))
165100209Sgallatin		goto retry;
166103216Sjulian	TD_SET_RUNNING(td);
16799072Sjulian	return (td);
16872376Sjake}
16950027Speter
17099072Sjulian/*
17199072Sjulian * Given a KSE (now surplus), either assign a new runable thread to it
17299072Sjulian * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
17399072Sjulian * Assumes the kse is not linked to any threads any more. (has been cleaned).
17499072Sjulian */
17599072Sjulianvoid
17699072Sjuliankse_reassign(struct kse *ke)
17799072Sjulian{
17899072Sjulian	struct ksegrp *kg;
17999072Sjulian	struct thread *td;
18099072Sjulian
18199072Sjulian	kg = ke->ke_ksegrp;
18299072Sjulian
18399072Sjulian	/*
18499072Sjulian	 * Find the first unassigned thread
18599072Sjulian	 * If there is a 'last assigned' then see what's next.
18699072Sjulian	 * otherwise look at what is first.
18799072Sjulian	 */
18899072Sjulian	if ((td = kg->kg_last_assigned)) {
18999072Sjulian		td = TAILQ_NEXT(td, td_runq);
19099072Sjulian	} else {
19199072Sjulian		td = TAILQ_FIRST(&kg->kg_runq);
19299072Sjulian	}
19399072Sjulian
19499072Sjulian	/*
19599072Sjulian	 * If we found one assign it the kse, otherwise idle the kse.
19699072Sjulian	 */
19799072Sjulian	if (td) {
19899072Sjulian		kg->kg_last_assigned = td;
19999072Sjulian		td->td_kse = ke;
20099072Sjulian		ke->ke_thread = td;
20199072Sjulian		runq_add(&runq, ke);
20299072Sjulian		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
20399072Sjulian	} else {
20499072Sjulian		ke->ke_state = KES_IDLE;
20599072Sjulian		ke->ke_thread = NULL;
20699072Sjulian		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
20799072Sjulian		kg->kg_idle_kses++;
20899072Sjulian		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
20999072Sjulian	}
21099072Sjulian}
21199072Sjulian
21272376Sjakeint
21399072Sjuliankserunnable(void)
21472376Sjake{
21572376Sjake	return runq_check(&runq);
21650027Speter}
21750027Speter
21899072Sjulian/*
21999072Sjulian * Remove a thread from its KSEGRP's run queue.
22099072Sjulian * This in turn may remove it from a KSE if it was already assigned
22199072Sjulian * to one, possibly causing a new thread to be assigned to the KSE
22299072Sjulian * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
22399072Sjulian */
22450027Spetervoid
22583366Sjulianremrunqueue(struct thread *td)
22672376Sjake{
22799072Sjulian	struct thread *td2, *td3;
22899072Sjulian	struct ksegrp *kg;
22999072Sjulian	struct kse *ke;
23099072Sjulian
23199072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
232103216Sjulian	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
23399072Sjulian	kg = td->td_ksegrp;
23499072Sjulian	ke = td->td_kse;
23599072Sjulian	/*
23699072Sjulian	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
23799072Sjulian	 * threads are BOUND.
23899072Sjulian	 */
23999072Sjulian	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
24099072Sjulian	kg->kg_runnable--;
241103216Sjulian	TD_SET_CAN_RUN(td);
24299072Sjulian	if ((td->td_flags & TDF_UNBOUND) == 0)  {
24399072Sjulian		/* Bring its kse with it, leave the thread attached */
24499072Sjulian		runq_remove(&runq, ke);
24599942Sjulian		ke->ke_state = KES_THREAD;
24699072Sjulian		return;
24799072Sjulian	}
24899072Sjulian	if (ke) {
24999072Sjulian		/*
25099072Sjulian		 * This thread has been assigned to a KSE.
25199072Sjulian		 * We need to dissociate it and try assign the
25299072Sjulian		 * KSE to the next available thread. Then, we should
25399072Sjulian		 * see if we need to move the KSE in the run queues.
25499072Sjulian		 */
25599072Sjulian		td2 = kg->kg_last_assigned;
25699072Sjulian		KASSERT((td2 != NULL), ("last assigned has wrong value "));
25799072Sjulian		td->td_kse = NULL;
25899072Sjulian		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
25999072Sjulian			KASSERT(td3 != td, ("td3 somehow matched td"));
26099072Sjulian			/*
26199072Sjulian			 * Give the next unassigned thread to the KSE
26299072Sjulian			 * so the number of runnable KSEs remains
26399072Sjulian			 * constant.
26499072Sjulian			 */
26599072Sjulian			td3->td_kse = ke;
26699072Sjulian			ke->ke_thread = td3;
26799072Sjulian			kg->kg_last_assigned = td3;
26899072Sjulian			runq_readjust(&runq, ke);
26999072Sjulian		} else {
27099072Sjulian			/*
27199072Sjulian			 * There is no unassigned thread.
27299072Sjulian			 * If we were the last assigned one,
27399072Sjulian			 * adjust the last assigned pointer back
27499072Sjulian			 * one, which may result in NULL.
27599072Sjulian			 */
27699072Sjulian			if (td == td2) {
27799072Sjulian				kg->kg_last_assigned =
27899072Sjulian				    TAILQ_PREV(td, threadqueue, td_runq);
27999072Sjulian			}
28099072Sjulian			runq_remove(&runq, ke);
28199072Sjulian			KASSERT((ke->ke_state != KES_IDLE),
28299072Sjulian			    ("kse already idle"));
28399072Sjulian			ke->ke_state = KES_IDLE;
28499072Sjulian			ke->ke_thread = NULL;
28599072Sjulian			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
28699072Sjulian			kg->kg_idle_kses++;
28799072Sjulian		}
28899072Sjulian	}
28999072Sjulian	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
29072376Sjake}
29172376Sjake
29272376Sjakevoid
29383366Sjuliansetrunqueue(struct thread *td)
29450027Speter{
29599072Sjulian	struct kse *ke;
29699072Sjulian	struct ksegrp *kg;
29799072Sjulian	struct thread *td2;
29899072Sjulian	struct thread *tda;
29999072Sjulian
30099072Sjulian	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
30199072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
302103216Sjulian	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
303103216Sjulian	    ("setrunqueue: bad thread state"));
304103216Sjulian	TD_SET_RUNQ(td);
30599072Sjulian	kg = td->td_ksegrp;
30699072Sjulian	kg->kg_runnable++;
30799072Sjulian	if ((td->td_flags & TDF_UNBOUND) == 0) {
30899072Sjulian		KASSERT((td->td_kse != NULL),
30999072Sjulian		    ("queueing BAD thread to run queue"));
31099072Sjulian		/*
31199072Sjulian		 * Common path optimisation: Only one of everything
31299072Sjulian		 * and the KSE is always already attached.
31399072Sjulian		 * Totally ignore the ksegrp run queue.
31499072Sjulian		 */
31599072Sjulian		runq_add(&runq, td->td_kse);
31699072Sjulian		return;
31799072Sjulian	}
31899072Sjulian	/*
31999072Sjulian	 * Ok, so we are threading with this thread.
32099072Sjulian	 * We don't have a KSE, see if we can get one..
32199072Sjulian	 */
32299072Sjulian	tda = kg->kg_last_assigned;
32399072Sjulian	if ((ke = td->td_kse) == NULL) {
32499072Sjulian		/*
32599072Sjulian		 * We will need a KSE, see if there is one..
32699072Sjulian		 * First look for a free one, before getting desperate.
32799072Sjulian		 * If we can't get one, our priority is not high enough..
32899072Sjulian		 * that's ok..
32999072Sjulian		 */
33099072Sjulian		if (kg->kg_idle_kses) {
33199072Sjulian			/*
33299072Sjulian			 * There is a free one so it's ours for the asking..
33399072Sjulian			 */
33499072Sjulian			ke = TAILQ_FIRST(&kg->kg_iq);
33599072Sjulian			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
33699942Sjulian			ke->ke_state = KES_THREAD;
33799072Sjulian			kg->kg_idle_kses--;
33899072Sjulian		} else if (tda && (tda->td_priority > td->td_priority)) {
33999072Sjulian			/*
34099072Sjulian			 * None free, but there is one we can commandeer.
34199072Sjulian			 */
34299072Sjulian			ke = tda->td_kse;
34399072Sjulian			tda->td_kse = NULL;
34499072Sjulian			ke->ke_thread = NULL;
34599072Sjulian			tda = kg->kg_last_assigned =
34699072Sjulian		    	    TAILQ_PREV(tda, threadqueue, td_runq);
34799072Sjulian			runq_remove(&runq, ke);
34899072Sjulian		}
34999072Sjulian	} else {
35099942Sjulian		/*
35199942Sjulian		 * Temporarily disassociate so it looks like the other cases.
35299942Sjulian		 */
35399072Sjulian		ke->ke_thread = NULL;
35499072Sjulian		td->td_kse = NULL;
35599072Sjulian	}
35699072Sjulian
35799072Sjulian	/*
35899072Sjulian	 * Add the thread to the ksegrp's run queue at
35999072Sjulian	 * the appropriate place.
36099072Sjulian	 */
36199072Sjulian	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
36299072Sjulian		if (td2->td_priority > td->td_priority) {
36399072Sjulian			TAILQ_INSERT_BEFORE(td2, td, td_runq);
36499072Sjulian			break;
36599072Sjulian		}
36699072Sjulian	}
36799072Sjulian	if (td2 == NULL) {
36899072Sjulian		/* We ran off the end of the TAILQ or it was empty. */
36999072Sjulian		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
37099072Sjulian	}
37199072Sjulian
37299072Sjulian	/*
37399072Sjulian	 * If we have a ke to use, then put it on the run queue and
37499072Sjulian	 * If needed, readjust the last_assigned pointer.
37599072Sjulian	 */
37699072Sjulian	if (ke) {
37799072Sjulian		if (tda == NULL) {
37899072Sjulian			/*
37999072Sjulian			 * No pre-existing last assigned so whoever is first
38099942Sjulian			 * gets the KSE we brought in.. (maybe us)
38199072Sjulian			 */
38299072Sjulian			td2 = TAILQ_FIRST(&kg->kg_runq);
38399072Sjulian			KASSERT((td2->td_kse == NULL),
38499072Sjulian			    ("unexpected ke present"));
38599072Sjulian			td2->td_kse = ke;
38699072Sjulian			ke->ke_thread = td2;
38799072Sjulian			kg->kg_last_assigned = td2;
38899072Sjulian		} else if (tda->td_priority > td->td_priority) {
38999072Sjulian			/*
39099072Sjulian			 * It's ours, grab it, but last_assigned is past us
39199072Sjulian			 * so don't change it.
39299072Sjulian			 */
39399072Sjulian			td->td_kse = ke;
39499072Sjulian			ke->ke_thread = td;
39599072Sjulian		} else {
39699072Sjulian			/*
39799072Sjulian			 * We are past last_assigned, so
39899072Sjulian			 * put the new kse on whatever is next,
39999072Sjulian			 * which may or may not be us.
40099072Sjulian			 */
40199072Sjulian			td2 = TAILQ_NEXT(tda, td_runq);
40299072Sjulian			kg->kg_last_assigned = td2;
40399072Sjulian			td2->td_kse = ke;
40499072Sjulian			ke->ke_thread = td2;
40599072Sjulian		}
40699072Sjulian		runq_add(&runq, ke);
40799072Sjulian	}
40872376Sjake}
40950027Speter
41099072Sjulian/************************************************************************
41199072Sjulian * Critical section marker functions					*
41299072Sjulian ************************************************************************/
41388088Sjhb/* Critical sections that prevent preemption. */
41488088Sjhbvoid
41588088Sjhbcritical_enter(void)
41688088Sjhb{
41788088Sjhb	struct thread *td;
41888088Sjhb
41988088Sjhb	td = curthread;
42088088Sjhb	if (td->td_critnest == 0)
42193264Sdillon		cpu_critical_enter();
42288088Sjhb	td->td_critnest++;
42388088Sjhb}
42488088Sjhb
42588088Sjhbvoid
42688088Sjhbcritical_exit(void)
42788088Sjhb{
42888088Sjhb	struct thread *td;
42988088Sjhb
43088088Sjhb	td = curthread;
43188088Sjhb	if (td->td_critnest == 1) {
43288088Sjhb		td->td_critnest = 0;
43393264Sdillon		cpu_critical_exit();
43493264Sdillon	} else {
43588088Sjhb		td->td_critnest--;
43693264Sdillon	}
43788088Sjhb}
43888088Sjhb
43999072Sjulian
44099072Sjulian/************************************************************************
44199072Sjulian * SYSTEM RUN QUEUE manipulations and tests				*
44299072Sjulian ************************************************************************/
44372376Sjake/*
44499072Sjulian * Initialize a run structure.
44599072Sjulian */
44699072Sjulianvoid
44799072Sjulianrunq_init(struct runq *rq)
44899072Sjulian{
44999072Sjulian	int i;
45099072Sjulian
45199072Sjulian	bzero(rq, sizeof *rq);
45299072Sjulian	for (i = 0; i < RQ_NQS; i++)
45399072Sjulian		TAILQ_INIT(&rq->rq_queues[i]);
45499072Sjulian}
45599072Sjulian
45699072Sjulian/*
45772376Sjake * Clear the status bit of the queue corresponding to priority level pri,
45872376Sjake * indicating that it is empty.
45972376Sjake */
46072376Sjakestatic __inline void
46172376Sjakerunq_clrbit(struct runq *rq, int pri)
46272376Sjake{
46372376Sjake	struct rqbits *rqb;
46465557Sjasone
46572376Sjake	rqb = &rq->rq_status;
46672376Sjake	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
46772376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
46872376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
46972376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
47072376Sjake	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
47150027Speter}
47250027Speter
47350027Speter/*
47472376Sjake * Find the index of the first non-empty run queue.  This is done by
47572376Sjake * scanning the status bits, a set bit indicates a non-empty queue.
47650027Speter */
47772376Sjakestatic __inline int
47872376Sjakerunq_findbit(struct runq *rq)
47972376Sjake{
48072376Sjake	struct rqbits *rqb;
48172376Sjake	int pri;
48272376Sjake	int i;
48372376Sjake
48472376Sjake	rqb = &rq->rq_status;
48572376Sjake	for (i = 0; i < RQB_LEN; i++)
48672376Sjake		if (rqb->rqb_bits[i]) {
48798469Speter			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
48872376Sjake			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
48972376Sjake			    rqb->rqb_bits[i], i, pri);
49072376Sjake			return (pri);
49172376Sjake		}
49272376Sjake
49372376Sjake	return (-1);
49472376Sjake}
49572376Sjake
49672376Sjake/*
49772376Sjake * Set the status bit of the queue corresponding to priority level pri,
49872376Sjake * indicating that it is non-empty.
49972376Sjake */
50072376Sjakestatic __inline void
50172376Sjakerunq_setbit(struct runq *rq, int pri)
50272376Sjake{
50372376Sjake	struct rqbits *rqb;
50472376Sjake
50572376Sjake	rqb = &rq->rq_status;
50672376Sjake	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
50772376Sjake	    rqb->rqb_bits[RQB_WORD(pri)],
50872376Sjake	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
50972376Sjake	    RQB_BIT(pri), RQB_WORD(pri));
51072376Sjake	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
51172376Sjake}
51272376Sjake
51372376Sjake/*
51499072Sjulian * Add the KSE to the queue specified by its priority, and set the
51572376Sjake * corresponding status bit.
51672376Sjake */
51750027Spetervoid
51883366Sjulianrunq_add(struct runq *rq, struct kse *ke)
51950027Speter{
52072376Sjake	struct rqhead *rqh;
52172376Sjake	int pri;
52250027Speter
52399072Sjulian	mtx_assert(&sched_lock, MA_OWNED);
52499072Sjulian	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
52599942Sjulian	KASSERT((ke->ke_thread->td_kse != NULL),
52699942Sjulian	    ("runq_add: No KSE on thread"));
52799072Sjulian	KASSERT(ke->ke_state != KES_ONRUNQ,
52899072Sjulian	    ("runq_add: kse %p (%s) already in run queue", ke,
52999072Sjulian	    ke->ke_proc->p_comm));
530100913Stanimura	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
531100913Stanimura		("runq_add: process swapped out"));
53290538Sjulian	pri = ke->ke_thread->td_priority / RQ_PPQ;
53383366Sjulian	ke->ke_rqindex = pri;
53472376Sjake	runq_setbit(rq, pri);
53572376Sjake	rqh = &rq->rq_queues[pri];
53672376Sjake	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
53790538Sjulian	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
53883366Sjulian	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
53999072Sjulian	ke->ke_ksegrp->kg_runq_kses++;
54099072Sjulian	ke->ke_state = KES_ONRUNQ;
54150027Speter}
54250027Speter
54350027Speter/*
54472376Sjake * Return true if there are runnable processes of any priority on the run
54572376Sjake * queue, false otherwise.  Has no side effects, does not modify the run
54672376Sjake * queue structure.
54750027Speter */
54872376Sjakeint
54972376Sjakerunq_check(struct runq *rq)
55050027Speter{
55172376Sjake	struct rqbits *rqb;
55272376Sjake	int i;
55372376Sjake
55472376Sjake	rqb = &rq->rq_status;
55572376Sjake	for (i = 0; i < RQB_LEN; i++)
55672376Sjake		if (rqb->rqb_bits[i]) {
55772376Sjake			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
55872376Sjake			    rqb->rqb_bits[i], i);
55972376Sjake			return (1);
56072376Sjake		}
56172376Sjake	CTR0(KTR_RUNQ, "runq_check: empty");
56272376Sjake
56372376Sjake	return (0);
56450027Speter}
56550027Speter
56650027Speter/*
56772376Sjake * Find and remove the highest priority process from the run queue.
56872376Sjake * If there are no runnable processes, the per-cpu idle process is
56972376Sjake * returned.  Will not return NULL under any circumstances.
57050027Speter */
57183366Sjulianstruct kse *
57272376Sjakerunq_choose(struct runq *rq)
57350027Speter{
57472376Sjake	struct rqhead *rqh;
57583366Sjulian	struct kse *ke;
57672376Sjake	int pri;
57750027Speter
57865557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
57999072Sjulian	while ((pri = runq_findbit(rq)) != -1) {
58072376Sjake		rqh = &rq->rq_queues[pri];
58183366Sjulian		ke = TAILQ_FIRST(rqh);
58283366Sjulian		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
58399072Sjulian		CTR3(KTR_RUNQ,
58499072Sjulian		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
58583366Sjulian		TAILQ_REMOVE(rqh, ke, ke_procq);
58699072Sjulian		ke->ke_ksegrp->kg_runq_kses--;
58772376Sjake		if (TAILQ_EMPTY(rqh)) {
58872376Sjake			CTR0(KTR_RUNQ, "runq_choose: empty");
58972376Sjake			runq_clrbit(rq, pri);
59050027Speter		}
59199072Sjulian
59299942Sjulian		ke->ke_state = KES_THREAD;
59399072Sjulian		KASSERT((ke->ke_thread != NULL),
59499072Sjulian		    ("runq_choose: No thread on KSE"));
59599072Sjulian		KASSERT((ke->ke_thread->td_kse != NULL),
59699072Sjulian		    ("runq_choose: No KSE on thread"));
597100913Stanimura		KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
598100913Stanimura			("runq_choose: process swapped out"));
59983366Sjulian		return (ke);
60050027Speter	}
60172376Sjake	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
60272376Sjake
60399072Sjulian	return (NULL);
60450027Speter}
60572376Sjake
60672376Sjake/*
60799072Sjulian * Remove the KSE from the queue specified by its priority, and clear the
60872376Sjake * corresponding status bit if the queue becomes empty.
60999072Sjulian * Caller must set ke->ke_state afterwards.
61072376Sjake */
61172376Sjakevoid
61283366Sjulianrunq_remove(struct runq *rq, struct kse *ke)
61372376Sjake{
61472376Sjake	struct rqhead *rqh;
61572376Sjake	int pri;
61672376Sjake
61799072Sjulian	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
61872376Sjake	mtx_assert(&sched_lock, MA_OWNED);
619100913Stanimura	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
620100913Stanimura		("runq_remove: process swapped out"));
62183366Sjulian	pri = ke->ke_rqindex;
62272376Sjake	rqh = &rq->rq_queues[pri];
62372376Sjake	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
62490538Sjulian	    ke, ke->ke_thread->td_priority, pri, rqh);
62583366Sjulian	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
62683366Sjulian	TAILQ_REMOVE(rqh, ke, ke_procq);
62772376Sjake	if (TAILQ_EMPTY(rqh)) {
62872376Sjake		CTR0(KTR_RUNQ, "runq_remove: empty");
62972376Sjake		runq_clrbit(rq, pri);
63072376Sjake	}
63199942Sjulian	ke->ke_state = KES_THREAD;
63299072Sjulian	ke->ke_ksegrp->kg_runq_kses--;
63372376Sjake}
63499072Sjulian
63599072Sjulianstatic void
63699072Sjulianrunq_readjust(struct runq *rq, struct kse *ke)
63799072Sjulian{
63899072Sjulian
63999072Sjulian	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
64099072Sjulian		runq_remove(rq, ke);
64199072Sjulian		runq_add(rq, ke);
64299072Sjulian	}
64399072Sjulian}
64499072Sjulian
64599834Sjulian#if 0
64699072Sjulianvoid
64799072Sjulianthread_sanity_check(struct thread *td)
64899072Sjulian{
64999072Sjulian	struct proc *p;
65099072Sjulian	struct ksegrp *kg;
65199072Sjulian	struct kse *ke;
65299072Sjulian	struct thread *td2;
65399072Sjulian	unsigned int prevpri;
65499072Sjulian	int	saw_lastassigned;
65599072Sjulian	int unassigned;
65699072Sjulian	int assigned;
65799072Sjulian
65899072Sjulian	p = td->td_proc;
65999072Sjulian	kg = td->td_ksegrp;
66099072Sjulian	ke = td->td_kse;
66199072Sjulian
66299072Sjulian	if (kg != &p->p_ksegrp) {
66399072Sjulian		panic ("wrong ksegrp");
66499072Sjulian	}
66599072Sjulian
66699072Sjulian	if (ke) {
66799072Sjulian		if (ke != &p->p_kse) {
66899072Sjulian			panic("wrong kse");
66999072Sjulian		}
67099072Sjulian		if (ke->ke_thread != td) {
67199072Sjulian			panic("wrong thread");
67299072Sjulian		}
67399072Sjulian	}
67499072Sjulian
67599072Sjulian	if ((p->p_flag & P_KSES) == 0) {
67699072Sjulian		if (ke == NULL) {
67799072Sjulian			panic("non KSE thread lost kse");
67899072Sjulian		}
67999072Sjulian	} else {
68099072Sjulian		prevpri = 0;
68199072Sjulian		saw_lastassigned = 0;
68299072Sjulian		unassigned = 0;
68399072Sjulian		assigned = 0;
68499072Sjulian		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
68599072Sjulian			if (td2->td_priority < prevpri) {
68699072Sjulian				panic("thread runqueue unosorted");
68799072Sjulian			}
68899072Sjulian			prevpri = td2->td_priority;
68999072Sjulian			if (td2->td_kse) {
69099072Sjulian				assigned++;
69199072Sjulian				if (unassigned) {
69299072Sjulian					panic("unassigned before assigned");
69399072Sjulian				}
69499072Sjulian 				if  (kg->kg_last_assigned == NULL) {
69599072Sjulian					panic("lastassigned corrupt");
69699072Sjulian				}
69799072Sjulian				if (saw_lastassigned) {
69899072Sjulian					panic("last assigned not last");
69999072Sjulian				}
70099072Sjulian				if (td2->td_kse->ke_thread != td2) {
70199072Sjulian					panic("mismatched kse/thread");
70299072Sjulian				}
70399072Sjulian			} else {
70499072Sjulian				unassigned++;
70599072Sjulian			}
70699072Sjulian			if (td2 == kg->kg_last_assigned) {
70799072Sjulian				saw_lastassigned = 1;
70899072Sjulian				if (td2->td_kse == NULL) {
70999072Sjulian					panic("last assigned not assigned");
71099072Sjulian				}
71199072Sjulian			}
71299072Sjulian		}
71399072Sjulian		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
71499072Sjulian			panic("where on earth does lastassigned point?");
71599072Sjulian		}
71699072Sjulian		FOREACH_THREAD_IN_GROUP(kg, td2) {
71799072Sjulian			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
718103216Sjulian			    (TD_ON_RUNQ(td2))) {
71999072Sjulian				assigned++;
72099072Sjulian				if (td2->td_kse == NULL) {
72199072Sjulian					panic ("BOUND thread with no KSE");
72299072Sjulian				}
72399072Sjulian			}
72499072Sjulian		}
72599072Sjulian#if 0
72699072Sjulian		if ((unassigned + assigned) != kg->kg_runnable) {
72799072Sjulian			panic("wrong number in runnable");
72899072Sjulian		}
72999072Sjulian#endif
73099072Sjulian	}
73199072Sjulian}
73299834Sjulian#endif
73399072Sjulian
734