kern_switch.c revision 121171
155682Smarkm/*
255682Smarkm * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
355682Smarkm * All rights reserved.
455682Smarkm *
555682Smarkm * Redistribution and use in source and binary forms, with or without
655682Smarkm * modification, are permitted provided that the following conditions
755682Smarkm * are met:
855682Smarkm * 1. Redistributions of source code must retain the above copyright
955682Smarkm *    notice, this list of conditions and the following disclaimer.
1055682Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1155682Smarkm *    notice, this list of conditions and the following disclaimer in the
1255682Smarkm *    documentation and/or other materials provided with the distribution.
1355682Smarkm *
1455682Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1555682Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1655682Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1755682Smarkm * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1855682Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1955682Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2055682Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2155682Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2255682Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2355682Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2455682Smarkm * SUCH DAMAGE.
2555682Smarkm */
2655682Smarkm
2755682Smarkm/***
2855682SmarkmHere is the logic..
2955682Smarkm
3055682SmarkmIf there are N processors, then there are at most N KSEs (kernel
3155682Smarkmschedulable entities) working to process threads that belong to a
3255682SmarkmKSEGOUP (kg). If there are X of these KSEs actually running at the
3355682Smarkmmoment in question, then there are at most M (N-X) of these KSEs on
3455682Smarkmthe run queue, as running KSEs are not on the queue.
3555682Smarkm
3655682SmarkmRunnable threads are queued off the KSEGROUP in priority order.
3755682SmarkmIf there are M or more threads runnable, the top M threads
3855682Smarkm(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
3955682Smarkmtheir priority from those threads and are put on the run queue.
4055682Smarkm
4155682SmarkmThe last thread that had a priority high enough to have a KSE associated
4255682Smarkmwith it, AND IS ON THE RUN QUEUE is pointed to by
4355682Smarkmkg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
4455682Smarkmassigned as all the available KSEs are activly running, or because there
4555682Smarkmare no threads queued, that pointer is NULL.
4655682Smarkm
4755682SmarkmWhen a KSE is removed from the run queue to become runnable, we know
4855682Smarkmit was associated with the highest priority thread in the queue (at the head
4955682Smarkmof the queue). If it is also the last assigned we know M was 1 and must
5055682Smarkmnow be 0. Since the thread is no longer queued that pointer must be
5155682Smarkmremoved from it. Since we know there were no more KSEs available,
5255682Smarkm(M was 1 and is now 0) and since we are not FREEING our KSE
5355682Smarkmbut using it, we know there are STILL no more KSEs available, we can prove
5455682Smarkmthat the next thread in the ksegrp list will not have a KSE to assign to
5555682Smarkmit, so we can show that the pointer must be made 'invalid' (NULL).
5655682Smarkm
5755682SmarkmThe pointer exists so that when a new thread is made runnable, it can
5855682Smarkmhave its priority compared with the last assigned thread to see if
5955682Smarkmit should 'steal' its KSE or not.. i.e. is it 'earlier'
6055682Smarkmon the list than that thread or later.. If it's earlier, then the KSE is
6155682Smarkmremoved from the last assigned (which is now not assigned a KSE)
6255682Smarkmand reassigned to the new thread, which is placed earlier in the list.
6355682SmarkmThe pointer is then backed up to the previous thread (which may or may not
6455682Smarkmbe the new thread).
6555682Smarkm
6655682SmarkmWhen a thread sleeps or is removed, the KSE becomes available and if there
6755682Smarkmare queued threads that are not assigned KSEs, the highest priority one of
6855682Smarkmthem is assigned the KSE, which is then placed back on the run queue at
6955682Smarkmthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
7055682Smarkmto point to it.
7155682Smarkm
7255682SmarkmThe following diagram shows 2 KSEs and 3 threads from a single process.
7355682Smarkm
7455682Smarkm RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
7555682Smarkm              \    \____
7655682Smarkm               \        \
7755682Smarkm    KSEGROUP---thread--thread--thread    (queued in priority order)
7855682Smarkm        \                 /
7955682Smarkm         \_______________/
8055682Smarkm          (last_assigned)
8155682Smarkm
8255682SmarkmThe result of this scheme is that the M available KSEs are always
8355682Smarkmqueued at the priorities they have inherrited from the M highest priority
8455682Smarkmthreads for that KSEGROUP. If this situation changes, the KSEs are
8555682Smarkmreassigned to keep this true.
8655682Smarkm***/
8755682Smarkm
8855682Smarkm#include <sys/cdefs.h>
8955682Smarkm__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 121171 2003-10-17 20:53:04Z jeff $");
9055682Smarkm
9155682Smarkm#include <sys/param.h>
9255682Smarkm#include <sys/systm.h>
9355682Smarkm#include <sys/kernel.h>
9455682Smarkm#include <sys/ktr.h>
9555682Smarkm#include <sys/lock.h>
9655682Smarkm#include <sys/mutex.h>
9755682Smarkm#include <sys/proc.h>
9855682Smarkm#include <sys/queue.h>
9955682Smarkm#include <sys/sched.h>
10055682Smarkm#if defined(SMP) && defined(__i386__)
10155682Smarkm#include <sys/smp.h>
10255682Smarkm#endif
10355682Smarkm#include <machine/critical.h>
10455682Smarkm
10555682SmarkmCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
10655682Smarkm
10755682Smarkmvoid panc(char *string1, char *string2);
10855682Smarkm
10955682Smarkm#if 0
11055682Smarkmstatic void runq_readjust(struct runq *rq, struct kse *ke);
11155682Smarkm#endif
11255682Smarkm/************************************************************************
11355682Smarkm * Functions that manipulate runnability from a thread perspective.	*
11455682Smarkm ************************************************************************/
11555682Smarkm/*
11655682Smarkm * Select the KSE that will be run next.  From that find the thread, and
11755682Smarkm * remove it from the KSEGRP's run queue.  If there is thread clustering,
11855682Smarkm * this will be what does it.
11955682Smarkm */
12055682Smarkmstruct thread *
12155682Smarkmchoosethread(void)
12255682Smarkm{
12355682Smarkm	struct kse *ke;
12455682Smarkm	struct thread *td;
12555682Smarkm	struct ksegrp *kg;
12655682Smarkm
12755682Smarkm#if defined(SMP) && defined(__i386__)
12855682Smarkm	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
12955682Smarkm		/* Shutting down, run idlethread on AP's */
13055682Smarkm		td = PCPU_GET(idlethread);
13155682Smarkm		ke = td->td_kse;
13255682Smarkm		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
13355682Smarkm		ke->ke_flags |= KEF_DIDRUN;
13455682Smarkm		TD_SET_RUNNING(td);
13555682Smarkm		return (td);
13655682Smarkm	}
13755682Smarkm#endif
13855682Smarkm
13955682Smarkmretry:
14055682Smarkm	ke = sched_choose();
14155682Smarkm	if (ke) {
14255682Smarkm		td = ke->ke_thread;
14355682Smarkm		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
14455682Smarkm		kg = ke->ke_ksegrp;
14555682Smarkm		if (td->td_proc->p_flag & P_SA) {
14655682Smarkm			if (kg->kg_last_assigned == td) {
14755682Smarkm				kg->kg_last_assigned = TAILQ_PREV(td,
14855682Smarkm				    threadqueue, td_runq);
14955682Smarkm			}
15055682Smarkm			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
15155682Smarkm		}
15255682Smarkm		kg->kg_runnable--;
15355682Smarkm		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
15455682Smarkm		    td, td->td_priority);
15555682Smarkm	} else {
15655682Smarkm		/* Simulate runq_choose() having returned the idle thread */
15755682Smarkm		td = PCPU_GET(idlethread);
15855682Smarkm		ke = td->td_kse;
15955682Smarkm		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
16055682Smarkm	}
16155682Smarkm	ke->ke_flags |= KEF_DIDRUN;
16255682Smarkm
16355682Smarkm	/*
16455682Smarkm	 * If we are in panic, only allow system threads,
16555682Smarkm	 * plus the one we are running in, to be run.
16655682Smarkm	 */
16755682Smarkm	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
16855682Smarkm	    (td->td_flags & TDF_INPANIC) == 0)) {
16955682Smarkm		/* note that it is no longer on the run queue */
17055682Smarkm		TD_SET_CAN_RUN(td);
17155682Smarkm		goto retry;
17255682Smarkm	}
17355682Smarkm
17455682Smarkm	TD_SET_RUNNING(td);
17555682Smarkm	return (td);
17655682Smarkm}
17755682Smarkm
17855682Smarkm/*
17955682Smarkm * Given a surplus KSE, either assign a new runable thread to it
18055682Smarkm * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
18155682Smarkm * Assumes that the original thread is not runnable.
18255682Smarkm */
18355682Smarkmvoid
18455682Smarkmkse_reassign(struct kse *ke)
18555682Smarkm{
18655682Smarkm	struct ksegrp *kg;
18755682Smarkm	struct thread *td;
18855682Smarkm	struct thread *original;
18955682Smarkm
19055682Smarkm	mtx_assert(&sched_lock, MA_OWNED);
19155682Smarkm	original = ke->ke_thread;
19255682Smarkm	KASSERT(original == NULL || TD_IS_INHIBITED(original),
19355682Smarkm    	    ("reassigning KSE with runnable thread"));
19455682Smarkm	kg = ke->ke_ksegrp;
19555682Smarkm	if (original)
19655682Smarkm		original->td_kse = NULL;
19755682Smarkm
19855682Smarkm	/*
19955682Smarkm	 * Find the first unassigned thread
20055682Smarkm	 */
20155682Smarkm	if ((td = kg->kg_last_assigned) != NULL)
20255682Smarkm		td = TAILQ_NEXT(td, td_runq);
20355682Smarkm	else
20455682Smarkm		td = TAILQ_FIRST(&kg->kg_runq);
20555682Smarkm
20655682Smarkm	/*
20755682Smarkm	 * If we found one, assign it the kse, otherwise idle the kse.
20855682Smarkm	 */
20955682Smarkm	if (td) {
21055682Smarkm		kg->kg_last_assigned = td;
21155682Smarkm		td->td_kse = ke;
21255682Smarkm		ke->ke_thread = td;
21355682Smarkm		sched_add(td);
21455682Smarkm		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
21555682Smarkm		return;
21655682Smarkm	}
21755682Smarkm
21855682Smarkm	ke->ke_state = KES_IDLE;
21955682Smarkm	ke->ke_thread = NULL;
22055682Smarkm	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
22155682Smarkm	kg->kg_idle_kses++;
22255682Smarkm	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
22355682Smarkm	return;
22455682Smarkm}
22555682Smarkm
22655682Smarkm#if 0
22755682Smarkm/*
22855682Smarkm * Remove a thread from its KSEGRP's run queue.
22955682Smarkm * This in turn may remove it from a KSE if it was already assigned
23055682Smarkm * to one, possibly causing a new thread to be assigned to the KSE
23155682Smarkm * and the KSE getting a new priority.
23255682Smarkm */
23355682Smarkmstatic void
23455682Smarkmremrunqueue(struct thread *td)
23555682Smarkm{
23655682Smarkm	struct thread *td2, *td3;
23755682Smarkm	struct ksegrp *kg;
23855682Smarkm	struct kse *ke;
23955682Smarkm
24055682Smarkm	mtx_assert(&sched_lock, MA_OWNED);
24155682Smarkm	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
24255682Smarkm	kg = td->td_ksegrp;
24355682Smarkm	ke = td->td_kse;
24455682Smarkm	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
24555682Smarkm	kg->kg_runnable--;
24655682Smarkm	TD_SET_CAN_RUN(td);
24755682Smarkm	/*
24855682Smarkm	 * If it is not a threaded process, take the shortcut.
24955682Smarkm	 */
25055682Smarkm	if ((td->td_proc->p_flag & P_SA) == 0) {
25155682Smarkm		/* Bring its kse with it, leave the thread attached */
25255682Smarkm		sched_rem(td);
25355682Smarkm		ke->ke_state = KES_THREAD;
25455682Smarkm		return;
25555682Smarkm	}
25655682Smarkm   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
25755682Smarkm	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
25855682Smarkm	if (ke) {
25955682Smarkm		/*
26055682Smarkm		 * This thread has been assigned to a KSE.
26155682Smarkm		 * We need to dissociate it and try assign the
26255682Smarkm		 * KSE to the next available thread. Then, we should
26355682Smarkm		 * see if we need to move the KSE in the run queues.
26455682Smarkm		 */
26555682Smarkm		sched_rem(td);
26655682Smarkm		ke->ke_state = KES_THREAD;
26755682Smarkm		td2 = kg->kg_last_assigned;
26855682Smarkm		KASSERT((td2 != NULL), ("last assigned has wrong value"));
26955682Smarkm		if (td2 == td)
27055682Smarkm			kg->kg_last_assigned = td3;
27155682Smarkm		kse_reassign(ke);
27255682Smarkm	}
27355682Smarkm}
27455682Smarkm#endif
27555682Smarkm
27655682Smarkm/*
27755682Smarkm * Change the priority of a thread that is on the run queue.
27855682Smarkm */
27955682Smarkmvoid
28055682Smarkmadjustrunqueue( struct thread *td, int newpri)
28155682Smarkm{
28255682Smarkm	struct ksegrp *kg;
28355682Smarkm	struct kse *ke;
28455682Smarkm
28555682Smarkm	mtx_assert(&sched_lock, MA_OWNED);
28655682Smarkm	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
28755682Smarkm
28855682Smarkm	ke = td->td_kse;
28955682Smarkm	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
29055682Smarkm	/*
29155682Smarkm	 * If it is not a threaded process, take the shortcut.
29255682Smarkm	 */
29355682Smarkm	if ((td->td_proc->p_flag & P_SA) == 0) {
29455682Smarkm		/* We only care about the kse in the run queue. */
29555682Smarkm		td->td_priority = newpri;
29655682Smarkm		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
29755682Smarkm			sched_rem(td);
29855682Smarkm			sched_add(td);
29955682Smarkm		}
30055682Smarkm		return;
30155682Smarkm	}
30255682Smarkm
30355682Smarkm	/* It is a threaded process */
30455682Smarkm	kg = td->td_ksegrp;
30555682Smarkm	kg->kg_runnable--;
30655682Smarkm	TD_SET_CAN_RUN(td);
30755682Smarkm	if (ke) {
30855682Smarkm		if (kg->kg_last_assigned == td) {
30955682Smarkm			kg->kg_last_assigned =
31055682Smarkm			    TAILQ_PREV(td, threadqueue, td_runq);
31155682Smarkm		}
31255682Smarkm		sched_rem(td);
31355682Smarkm	}
31455682Smarkm	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
31555682Smarkm	td->td_priority = newpri;
31655682Smarkm	setrunqueue(td);
31755682Smarkm}
31855682Smarkm
31955682Smarkmvoid
32055682Smarkmsetrunqueue(struct thread *td)
32155682Smarkm{
32255682Smarkm	struct kse *ke;
32355682Smarkm	struct ksegrp *kg;
32455682Smarkm	struct thread *td2;
32555682Smarkm	struct thread *tda;
32655682Smarkm
32755682Smarkm	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
32855682Smarkm	mtx_assert(&sched_lock, MA_OWNED);
32955682Smarkm	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
33055682Smarkm	    ("setrunqueue: bad thread state"));
33155682Smarkm	TD_SET_RUNQ(td);
33255682Smarkm	kg = td->td_ksegrp;
33355682Smarkm	kg->kg_runnable++;
33455682Smarkm	if ((td->td_proc->p_flag & P_SA) == 0) {
33555682Smarkm		/*
33655682Smarkm		 * Common path optimisation: Only one of everything
33755682Smarkm		 * and the KSE is always already attached.
33855682Smarkm		 * Totally ignore the ksegrp run queue.
33955682Smarkm		 */
34055682Smarkm		sched_add(td);
34155682Smarkm		return;
34255682Smarkm	}
34355682Smarkm
34455682Smarkm	tda = kg->kg_last_assigned;
34555682Smarkm	if ((ke = td->td_kse) == NULL) {
34655682Smarkm		if (kg->kg_idle_kses) {
34755682Smarkm			/*
34855682Smarkm			 * There is a free one so it's ours for the asking..
34955682Smarkm			 */
35055682Smarkm			ke = TAILQ_FIRST(&kg->kg_iq);
35155682Smarkm			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
35255682Smarkm			ke->ke_state = KES_THREAD;
35355682Smarkm			kg->kg_idle_kses--;
35455682Smarkm		} else if (tda && (tda->td_priority > td->td_priority)) {
35555682Smarkm			/*
35655682Smarkm			 * None free, but there is one we can commandeer.
35755682Smarkm			 */
35855682Smarkm			ke = tda->td_kse;
35955682Smarkm			sched_rem(tda);
36055682Smarkm			tda->td_kse = NULL;
36155682Smarkm			ke->ke_thread = NULL;
36255682Smarkm			tda = kg->kg_last_assigned =
36355682Smarkm		    	    TAILQ_PREV(tda, threadqueue, td_runq);
36455682Smarkm		}
36555682Smarkm	} else {
36655682Smarkm		/*
36755682Smarkm		 * Temporarily disassociate so it looks like the other cases.
36855682Smarkm		 */
36955682Smarkm		ke->ke_thread = NULL;
37055682Smarkm		td->td_kse = NULL;
37155682Smarkm	}
37255682Smarkm
37355682Smarkm	/*
37455682Smarkm	 * Add the thread to the ksegrp's run queue at
37555682Smarkm	 * the appropriate place.
37655682Smarkm	 */
37755682Smarkm	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
37855682Smarkm		if (td2->td_priority > td->td_priority) {
37955682Smarkm			TAILQ_INSERT_BEFORE(td2, td, td_runq);
38055682Smarkm			break;
38155682Smarkm		}
38255682Smarkm	}
38355682Smarkm	if (td2 == NULL) {
38455682Smarkm		/* We ran off the end of the TAILQ or it was empty. */
38555682Smarkm		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
38655682Smarkm	}
38755682Smarkm
38855682Smarkm	/*
38955682Smarkm	 * If we have a ke to use, then put it on the run queue and
39055682Smarkm	 * If needed, readjust the last_assigned pointer.
39155682Smarkm	 */
39255682Smarkm	if (ke) {
39355682Smarkm		if (tda == NULL) {
39455682Smarkm			/*
39555682Smarkm			 * No pre-existing last assigned so whoever is first
39655682Smarkm			 * gets the KSE we brought in.. (maybe us)
39755682Smarkm			 */
39855682Smarkm			td2 = TAILQ_FIRST(&kg->kg_runq);
39955682Smarkm			KASSERT((td2->td_kse == NULL),
40055682Smarkm			    ("unexpected ke present"));
40155682Smarkm			td2->td_kse = ke;
40255682Smarkm			ke->ke_thread = td2;
40355682Smarkm			kg->kg_last_assigned = td2;
40455682Smarkm		} else if (tda->td_priority > td->td_priority) {
40555682Smarkm			/*
40655682Smarkm			 * It's ours, grab it, but last_assigned is past us
40755682Smarkm			 * so don't change it.
40855682Smarkm			 */
40955682Smarkm			td->td_kse = ke;
41055682Smarkm			ke->ke_thread = td;
41155682Smarkm		} else {
41255682Smarkm			/*
41355682Smarkm			 * We are past last_assigned, so
41455682Smarkm			 * put the new kse on whatever is next,
41555682Smarkm			 * which may or may not be us.
41655682Smarkm			 */
41755682Smarkm			td2 = TAILQ_NEXT(tda, td_runq);
41855682Smarkm			kg->kg_last_assigned = td2;
41955682Smarkm			td2->td_kse = ke;
42055682Smarkm			ke->ke_thread = td2;
42155682Smarkm		}
42255682Smarkm		sched_add(ke->ke_thread);
42355682Smarkm	}
42455682Smarkm}
42555682Smarkm
42655682Smarkm/************************************************************************
42755682Smarkm * Critical section marker functions					*
42855682Smarkm ************************************************************************/
42955682Smarkm/* Critical sections that prevent preemption. */
43055682Smarkmvoid
43155682Smarkmcritical_enter(void)
43255682Smarkm{
43355682Smarkm	struct thread *td;
43455682Smarkm
43555682Smarkm	td = curthread;
43655682Smarkm	if (td->td_critnest == 0)
43755682Smarkm		cpu_critical_enter();
43855682Smarkm	td->td_critnest++;
43955682Smarkm}
44055682Smarkm
44155682Smarkmvoid
44255682Smarkmcritical_exit(void)
44355682Smarkm{
44455682Smarkm	struct thread *td;
44555682Smarkm
446	td = curthread;
447	if (td->td_critnest == 1) {
448		td->td_critnest = 0;
449		cpu_critical_exit();
450	} else {
451		td->td_critnest--;
452	}
453}
454
455
456/************************************************************************
457 * SYSTEM RUN QUEUE manipulations and tests				*
458 ************************************************************************/
459/*
460 * Initialize a run structure.
461 */
462void
463runq_init(struct runq *rq)
464{
465	int i;
466
467	bzero(rq, sizeof *rq);
468	for (i = 0; i < RQ_NQS; i++)
469		TAILQ_INIT(&rq->rq_queues[i]);
470}
471
472/*
473 * Clear the status bit of the queue corresponding to priority level pri,
474 * indicating that it is empty.
475 */
476static __inline void
477runq_clrbit(struct runq *rq, int pri)
478{
479	struct rqbits *rqb;
480
481	rqb = &rq->rq_status;
482	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
483	    rqb->rqb_bits[RQB_WORD(pri)],
484	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
485	    RQB_BIT(pri), RQB_WORD(pri));
486	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
487}
488
489/*
490 * Find the index of the first non-empty run queue.  This is done by
491 * scanning the status bits, a set bit indicates a non-empty queue.
492 */
493static __inline int
494runq_findbit(struct runq *rq)
495{
496	struct rqbits *rqb;
497	int pri;
498	int i;
499
500	rqb = &rq->rq_status;
501	for (i = 0; i < RQB_LEN; i++)
502		if (rqb->rqb_bits[i]) {
503			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
504			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
505			    rqb->rqb_bits[i], i, pri);
506			return (pri);
507		}
508
509	return (-1);
510}
511
512/*
513 * Set the status bit of the queue corresponding to priority level pri,
514 * indicating that it is non-empty.
515 */
516static __inline void
517runq_setbit(struct runq *rq, int pri)
518{
519	struct rqbits *rqb;
520
521	rqb = &rq->rq_status;
522	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
523	    rqb->rqb_bits[RQB_WORD(pri)],
524	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
525	    RQB_BIT(pri), RQB_WORD(pri));
526	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
527}
528
529/*
530 * Add the KSE to the queue specified by its priority, and set the
531 * corresponding status bit.
532 */
533void
534runq_add(struct runq *rq, struct kse *ke)
535{
536	struct rqhead *rqh;
537	int pri;
538
539	pri = ke->ke_thread->td_priority / RQ_PPQ;
540	ke->ke_rqindex = pri;
541	runq_setbit(rq, pri);
542	rqh = &rq->rq_queues[pri];
543	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
544	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
545	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
546}
547
548/*
549 * Return true if there are runnable processes of any priority on the run
550 * queue, false otherwise.  Has no side effects, does not modify the run
551 * queue structure.
552 */
553int
554runq_check(struct runq *rq)
555{
556	struct rqbits *rqb;
557	int i;
558
559	rqb = &rq->rq_status;
560	for (i = 0; i < RQB_LEN; i++)
561		if (rqb->rqb_bits[i]) {
562			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
563			    rqb->rqb_bits[i], i);
564			return (1);
565		}
566	CTR0(KTR_RUNQ, "runq_check: empty");
567
568	return (0);
569}
570
571/*
572 * Find the highest priority process on the run queue.
573 */
574struct kse *
575runq_choose(struct runq *rq)
576{
577	struct rqhead *rqh;
578	struct kse *ke;
579	int pri;
580
581	mtx_assert(&sched_lock, MA_OWNED);
582	while ((pri = runq_findbit(rq)) != -1) {
583		rqh = &rq->rq_queues[pri];
584		ke = TAILQ_FIRST(rqh);
585		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
586		CTR3(KTR_RUNQ,
587		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
588		return (ke);
589	}
590	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
591
592	return (NULL);
593}
594
595/*
596 * Remove the KSE from the queue specified by its priority, and clear the
597 * corresponding status bit if the queue becomes empty.
598 * Caller must set ke->ke_state afterwards.
599 */
600void
601runq_remove(struct runq *rq, struct kse *ke)
602{
603	struct rqhead *rqh;
604	int pri;
605
606	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
607		("runq_remove: process swapped out"));
608	pri = ke->ke_rqindex;
609	rqh = &rq->rq_queues[pri];
610	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
611	    ke, ke->ke_thread->td_priority, pri, rqh);
612	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
613	TAILQ_REMOVE(rqh, ke, ke_procq);
614	if (TAILQ_EMPTY(rqh)) {
615		CTR0(KTR_RUNQ, "runq_remove: empty");
616		runq_clrbit(rq, pri);
617	}
618}
619
620#if 0
621void
622panc(char *string1, char *string2)
623{
624	printf("%s", string1);
625	Debugger(string2);
626}
627
628void
629thread_sanity_check(struct thread *td, char *string)
630{
631	struct proc *p;
632	struct ksegrp *kg;
633	struct kse *ke;
634	struct thread *td2 = NULL;
635	unsigned int prevpri;
636	int	saw_lastassigned = 0;
637	int unassigned = 0;
638	int assigned = 0;
639
640	p = td->td_proc;
641	kg = td->td_ksegrp;
642	ke = td->td_kse;
643
644
645	if (ke) {
646		if (p != ke->ke_proc) {
647			panc(string, "wrong proc");
648		}
649		if (ke->ke_thread != td) {
650			panc(string, "wrong thread");
651		}
652	}
653
654	if ((p->p_flag & P_SA) == 0) {
655		if (ke == NULL) {
656			panc(string, "non KSE thread lost kse");
657		}
658	} else {
659		prevpri = 0;
660		saw_lastassigned = 0;
661		unassigned = 0;
662		assigned = 0;
663		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
664			if (td2->td_priority < prevpri) {
665				panc(string, "thread runqueue unosorted");
666			}
667			if ((td2->td_state == TDS_RUNQ) &&
668			    td2->td_kse &&
669			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
670				panc(string, "KSE wrong state");
671			}
672			prevpri = td2->td_priority;
673			if (td2->td_kse) {
674				assigned++;
675				if (unassigned) {
676					panc(string, "unassigned before assigned");
677				}
678 				if  (kg->kg_last_assigned == NULL) {
679					panc(string, "lastassigned corrupt");
680				}
681				if (saw_lastassigned) {
682					panc(string, "last assigned not last");
683				}
684				if (td2->td_kse->ke_thread != td2) {
685					panc(string, "mismatched kse/thread");
686				}
687			} else {
688				unassigned++;
689			}
690			if (td2 == kg->kg_last_assigned) {
691				saw_lastassigned = 1;
692				if (td2->td_kse == NULL) {
693					panc(string, "last assigned not assigned");
694				}
695			}
696		}
697		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
698			panc(string, "where on earth does lastassigned point?");
699		}
700#if 0
701		FOREACH_THREAD_IN_GROUP(kg, td2) {
702			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
703			    (TD_ON_RUNQ(td2))) {
704				assigned++;
705				if (td2->td_kse == NULL) {
706					panc(string, "BOUND thread with no KSE");
707				}
708			}
709		}
710#endif
711#if 0
712		if ((unassigned + assigned) != kg->kg_runnable) {
713			panc(string, "wrong number in runnable");
714		}
715#endif
716	}
717	if (assigned == 12345) {
718		printf("%p %p %p %p %p %d, %d",
719		    td, td2, ke, kg, p, assigned, saw_lastassigned);
720	}
721}
722#endif
723
724