kern_switch.c revision 99887
11556Srgrimes/*
21556Srgrimes * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
31556Srgrimes * All rights reserved.
41556Srgrimes *
51556Srgrimes * Redistribution and use in source and binary forms, with or without
61556Srgrimes * modification, are permitted provided that the following conditions
71556Srgrimes * are met:
81556Srgrimes * 1. Redistributions of source code must retain the above copyright
91556Srgrimes *    notice, this list of conditions and the following disclaimer.
101556Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111556Srgrimes *    notice, this list of conditions and the following disclaimer in the
121556Srgrimes *    documentation and/or other materials provided with the distribution.
131556Srgrimes *
141556Srgrimes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151556Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161556Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171556Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181556Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191556Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201556Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211556Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221556Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231556Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241556Srgrimes * SUCH DAMAGE.
251556Srgrimes *
261556Srgrimes * $FreeBSD: head/sys/kern/kern_switch.c 99887 2002-07-12 18:34:22Z jhb $
271556Srgrimes */
28127499Sgad
29127499Sgad/***
30127499Sgad
31127499SgadHere is the logic..
32127499Sgad
33127499SgadIf there are N processors, then there are at most N KSEs (kernel
34127499Sgadschedulable entities) working to process threads that belong to a
351556SrgrimesKSEGOUP (kg). If there are X of these KSEs actually running at the
361556Srgrimesmoment in question, then there are at most M (N-X) of these KSEs on
371556Srgrimesthe run queue, as running KSEs are not on the queue.
3890143Smarkm
391556SrgrimesRunnable threads are queued off the KSEGROUP in priority order.
401556SrgrimesIf there are M or more threads runnable, the top M threads
411556Srgrimes(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
421556Srgrimestheir priority from those threads and are put on the run queue.
4390143Smarkm
441556SrgrimesThe last thread that had a priority high enough to have a KSE associated
4536049Scharnierwith it, AND IS ON THE RUN QUEUE is pointed to by
4690143Smarkmkg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
4736049Scharnierassigned as all the available KSEs are activly running, or because there
48110391Scharnierare no threads queued, that pointer is NULL.
4999110Sobrien
5099110SobrienWhen a KSE is removed from the run queue to become runnable, we know
511556Srgrimesit was associated with the highest priority thread in the queue (at the head
521556Srgrimesof the queue). If it is also the last assigned we know M was 1 and must
53127546Sgadnow be 0. Since the thread is no longer queued that pointer must be
543296Sdgremoved from it. Since we know there were no more KSEs available,
551556Srgrimes(M was 1 and is now 0) and since we are not FREEING our KSE
561556Srgrimesbut using it, we know there are STILL no more KSEs available, we can prove
571556Srgrimesthat the next thread in the ksegrp list will not have a KSE to assign to
581556Srgrimesit, so we can show that the pointer must be made 'invalid' (NULL).
591556Srgrimes
601556SrgrimesThe pointer exists so that when a new thread is made runnable, it can
61127149Sgadhave its priority compared with the last assigned thread to see if
621556Srgrimesit should 'steal' its KSE or not.. i.e. is it 'earlier'
63127499Sgadon the list than that thread or later.. If it's earlier, then the KSE is
641556Srgrimesremoved from the last assigned (which is now not assigned a KSE)
6513514Smppand reassigned to the new thread, which is placed earlier in the list.
6673367SacheThe pointer is then backed up to the previous thread (which may or may not
671556Srgrimesbe the new thread).
6890143Smarkm
691556SrgrimesWhen a thread sleeps or is removed, the KSE becomes available and if there
701556Srgrimesare queued threads that are not assigned KSEs, the highest priority one of
711556Srgrimesthem is assigned the KSE, which is then placed back on the run queue at
721556Srgrimesthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
731556Srgrimesto point to it.
741556Srgrimes
751556SrgrimesThe following diagram shows 2 KSEs and 3 threads from a single process.
76127499Sgad
77127499Sgad RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
7866377Sbrian              \    \____
79127537Sgad               \        \
80127555Sgad    KSEGROUP---thread--thread--thread    (queued in priority order)
81127537Sgad        \                 /
82127537Sgad         \_______________/
83127555Sgad          (last_assigned)
84127537Sgad
85127537SgadThe result of this scheme is that the M available KSEs are always
86127537Sgadqueued at the priorities they have inherrited from the M highest priority
87127537Sgadthreads for that KSEGROUP. If this situation changes, the KSEs are
88127537Sgadreassigned to keep this true.
89127537Sgad
90127537Sgad*/
91127537Sgad
92127537Sgad#include <sys/param.h>
93127537Sgad#include <sys/systm.h>
94127537Sgad#include <sys/kernel.h>
9590143Smarkm#include <sys/ktr.h>
961556Srgrimes#include <sys/lock.h>
97127537Sgad#include <sys/mutex.h>
98127537Sgad#include <sys/proc.h>
99127537Sgad#include <sys/queue.h>
100127537Sgad#include <machine/critical.h>
101127537Sgad
102127537SgadCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103127537Sgad
1041556Srgrimes/*
105127537Sgad * Global run queue.
10697966Sjmallett */
107127499Sgadstatic struct runq runq;
108127537SgadSYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109127499Sgad
110127499Sgadstatic void runq_readjust(struct runq *rq, struct kse *ke);
111127499Sgad/************************************************************************
112127499Sgad * Functions that manipulate runnability from a thread perspective.	*
113127499Sgad ************************************************************************/
114127499Sgad
115127499Sgad/*
116127499Sgad * Select the KSE that will be run next.  From that find the thread, and x
117127499Sgad * remove it from the KSEGRP's run queue.  If there is thread clustering,
118127499Sgad * this will be what does it.
119127499Sgad */
120127499Sgadstruct thread *
121127499Sgadchoosethread(void)
122127823Sgad{
123127499Sgad	struct kse *ke;
124127499Sgad	struct thread *td;
125127499Sgad	struct ksegrp *kg;
126127499Sgad
127127499Sgad	if ((ke = runq_choose(&runq))) {
128127499Sgad		td = ke->ke_thread;
129127499Sgad		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130127536Sgad		kg = ke->ke_ksegrp;
131127499Sgad		if (td->td_flags & TDF_UNBOUND) {
132127598Sgad			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133127598Sgad			if (kg->kg_last_assigned == td)
134127536Sgad				if (TAILQ_PREV(td, threadqueue, td_runq)
135127499Sgad				    != NULL)
136127499Sgad					printf("Yo MAMA!\n");
137127536Sgad				kg->kg_last_assigned = TAILQ_PREV(td,
138127536Sgad				    threadqueue, td_runq);
139127536Sgad			/*
140127536Sgad			 *  If we have started running an upcall,
141127536Sgad			 * Then TDF_UNBOUND WAS set because the thread was
142127536Sgad			 * created without a KSE. Now that we have one,
143127499Sgad			 * and it is our time to run, we make sure
14497875Sjmallett			 * that BOUND semantics apply for the rest of
14597875Sjmallett			 * the journey to userland, and into the UTS.
146127538Sgad			 */
147127538Sgad#ifdef	NOTYET
14890143Smarkm			if (td->td_flags & TDF_UPCALLING)
14997875Sjmallett				tdf->td_flags &= ~TDF_UNBOUND;
15097875Sjmallett#endif
151127538Sgad		}
152127538Sgad		kg->kg_runnable--;
153105831Srwatson		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
1541556Srgrimes		    td, td->td_priority);
155127843Sgad	} else {
15698494Ssobomax		/* Pretend the idle thread was on the run queue. */
1571556Srgrimes		td = PCPU_GET(idlethread);
15890110Simp		td->td_kse->ke_state = KES_UNQUEUED;
1591556Srgrimes		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
160127499Sgad	}
161127499Sgad	td->td_state = TDS_RUNNING;
1621556Srgrimes	return (td);
1631556Srgrimes}
1641556Srgrimes
165127539Sgad/*
166127539Sgad * Given a KSE (now surplus), either assign a new runable thread to it
167127499Sgad * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
168127499Sgad * Assumes the kse is not linked to any threads any more. (has been cleaned).
169127499Sgad */
17090143Smarkmvoid
1711556Srgrimeskse_reassign(struct kse *ke)
17211809Sache{
173127542Sgad	struct ksegrp *kg;
17411809Sache	struct thread *td;
17597804Stjr
17697804Stjr	kg = ke->ke_ksegrp;
17797804Stjr
1781556Srgrimes	/*
1791556Srgrimes	 * Find the first unassigned thread
1801556Srgrimes	 * If there is a 'last assigned' then see what's next.
1811556Srgrimes	 * otherwise look at what is first.
1821556Srgrimes	 */
1831556Srgrimes	if ((td = kg->kg_last_assigned)) {
1841556Srgrimes		td = TAILQ_NEXT(td, td_runq);
18598494Ssobomax	} else {
18698494Ssobomax		td = TAILQ_FIRST(&kg->kg_runq);
18798494Ssobomax	}
18898494Ssobomax
18998494Ssobomax	/*
19098494Ssobomax	 * If we found one assign it the kse, otherwise idle the kse.
19198494Ssobomax	 */
19298494Ssobomax	if (td) {
19398494Ssobomax		kg->kg_last_assigned = td;
19498494Ssobomax		td->td_kse = ke;
19598494Ssobomax		ke->ke_thread = td;
19698494Ssobomax		runq_add(&runq, ke);
19798494Ssobomax		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
19898494Ssobomax	} else {
19998494Ssobomax		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
20098494Ssobomax		ke->ke_state = KES_IDLE;
20198494Ssobomax		ke->ke_thread = NULL;
20298494Ssobomax		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
20398494Ssobomax		kg->kg_idle_kses++;
2041556Srgrimes		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
205127542Sgad	}
206127542Sgad}
207127542Sgad
208127499Sgadint
209127499Sgadkserunnable(void)
210127499Sgad{
211127499Sgad	return runq_check(&runq);
212127499Sgad}
213127499Sgad
214127499Sgad/*
21589909Sru * Remove a thread from its KSEGRP's run queue.
21698494Ssobomax * This in turn may remove it from a KSE if it was already assigned
2171556Srgrimes * to one, possibly causing a new thread to be assigned to the KSE
218127499Sgad * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
219127499Sgad */
220127499Sgadvoid
221127499Sgadremrunqueue(struct thread *td)
222127499Sgad{
223127499Sgad	struct thread *td2, *td3;
224127499Sgad	struct ksegrp *kg;
225127499Sgad	struct kse *ke;
226127499Sgad
2271556Srgrimes	mtx_assert(&sched_lock, MA_OWNED);
228127499Sgad	KASSERT ((td->td_state == TDS_RUNQ),
2291556Srgrimes		("remrunqueue: Bad state on run queue"));
2301556Srgrimes	kg = td->td_ksegrp;
23119068Speter	ke = td->td_kse;
23219068Speter	/*
23319068Speter	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
23419068Speter	 * threads are BOUND.
23519068Speter	 */
23619068Speter	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
2371556Srgrimes	td->td_state = TDS_UNQUEUED;
2381556Srgrimes	kg->kg_runnable--;
2391556Srgrimes	if ((td->td_flags & TDF_UNBOUND) == 0)  {
240127506Sgad		/* Bring its kse with it, leave the thread attached */
241127506Sgad		runq_remove(&runq, ke);
242127506Sgad		ke->ke_state = KES_UNQUEUED;
243127542Sgad		return;
244127506Sgad	}
245127506Sgad	if (ke) {
246127499Sgad		/*
247127499Sgad		 * This thread has been assigned to a KSE.
248127499Sgad		 * We need to dissociate it and try assign the
249127499Sgad		 * KSE to the next available thread. Then, we should
250127499Sgad		 * see if we need to move the KSE in the run queues.
251127542Sgad		 */
252127499Sgad		td2 = kg->kg_last_assigned;
253127597Sgad		KASSERT((td2 != NULL), ("last assigned has wrong value "));
254127542Sgad		td->td_kse = NULL;
255127542Sgad		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
256127542Sgad			KASSERT(td3 != td, ("td3 somehow matched td"));
257127542Sgad			/*
258127499Sgad			 * Give the next unassigned thread to the KSE
259127499Sgad			 * so the number of runnable KSEs remains
260127499Sgad			 * constant.
261127499Sgad			 */
262127499Sgad			td3->td_kse = ke;
263127542Sgad			ke->ke_thread = td3;
2641556Srgrimes			kg->kg_last_assigned = td3;
265127499Sgad			runq_readjust(&runq, ke);
266116265Sscottl		} else {
267126127Sdeischen			/*
268116265Sscottl			 * There is no unassigned thread.
2691556Srgrimes			 * If we were the last assigned one,
2701556Srgrimes			 * adjust the last assigned pointer back
2711556Srgrimes			 * one, which may result in NULL.
2721556Srgrimes			 */
273109502Sjmallett			if (td == td2) {
27490143Smarkm				kg->kg_last_assigned =
2751556Srgrimes				    TAILQ_PREV(td, threadqueue, td_runq);
2761556Srgrimes			}
2771556Srgrimes			runq_remove(&runq, ke);
2781556Srgrimes			KASSERT((ke->ke_state != KES_IDLE),
2791556Srgrimes			    ("kse already idle"));
2801556Srgrimes			ke->ke_state = KES_IDLE;
281109502Sjmallett			ke->ke_thread = NULL;
28290143Smarkm			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
2831556Srgrimes			kg->kg_idle_kses++;
2841556Srgrimes		}
2851556Srgrimes	}
2861556Srgrimes	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
28737317Sphk}
2881556Srgrimes
2891556Srgrimes#if 1 /* use the first version */
2901556Srgrimes
2911556Srgrimesvoid
2921556Srgrimessetrunqueue(struct thread *td)
2931556Srgrimes{
29437317Sphk	struct kse *ke;
2951556Srgrimes	struct ksegrp *kg;
2961556Srgrimes	struct thread *td2;
297109502Sjmallett	struct thread *tda;
298109502Sjmallett
299109502Sjmallett	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
3001556Srgrimes	mtx_assert(&sched_lock, MA_OWNED);
30190143Smarkm	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
3021556Srgrimes	td->td_state = TDS_RUNQ;
3031556Srgrimes	kg = td->td_ksegrp;
304109502Sjmallett	kg->kg_runnable++;
30590143Smarkm	if ((td->td_flags & TDF_UNBOUND) == 0) {
3061556Srgrimes		KASSERT((td->td_kse != NULL),
3071556Srgrimes		    ("queueing BAD thread to run queue"));
308127499Sgad		/*
309127499Sgad		 * Common path optimisation: Only one of everything
310127499Sgad		 * and the KSE is always already attached.
311127499Sgad		 * Totally ignore the ksegrp run queue.
312127499Sgad		 */
313127499Sgad		runq_add(&runq, td->td_kse);
314127499Sgad		return;
315127499Sgad	}
3161556Srgrimes	/*
317127499Sgad	 * Ok, so we are threading with this thread.
318127499Sgad	 * We don't have a KSE, see if we can get one..
319127597Sgad	 */
320127542Sgad	tda = kg->kg_last_assigned;
321127542Sgad	if ((ke = td->td_kse) == NULL) {
322127542Sgad		/*
323127542Sgad		 * We will need a KSE, see if there is one..
324127542Sgad		 * First look for a free one, before getting desperate.
325127542Sgad		 * If we can't get one, our priority is not high enough..
326127499Sgad		 * that's ok..
327127499Sgad		 */
328127499Sgad		if (kg->kg_idle_kses) {
329127499Sgad			/*
330127499Sgad			 * There is a free one so it's ours for the asking..
3311556Srgrimes			 */
3321556Srgrimes			ke = TAILQ_FIRST(&kg->kg_iq);
3331556Srgrimes			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
3341556Srgrimes			ke->ke_state = KES_UNQUEUED;
3351556Srgrimes			kg->kg_idle_kses--;
3361556Srgrimes		} else if (tda && (tda->td_priority > td->td_priority)) {
337127499Sgad			/*
338127499Sgad			 * None free, but there is one we can commandeer.
339127597Sgad			 */
340127542Sgad			ke = tda->td_kse;
341127542Sgad			tda->td_kse = NULL;
342127542Sgad			ke->ke_thread = NULL;
343127542Sgad			tda = kg->kg_last_assigned =
344127542Sgad		    	    TAILQ_PREV(tda, threadqueue, td_runq);
345127499Sgad			runq_remove(&runq, ke);
346127499Sgad		}
347127499Sgad	} else {
348127499Sgad		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
349127499Sgad		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
3501556Srgrimes		ke->ke_thread = NULL;
3511556Srgrimes		td->td_kse = NULL;
3521556Srgrimes	}
3531556Srgrimes
354127499Sgad	/*
355127499Sgad	 * Add the thread to the ksegrp's run queue at
356127499Sgad	 * the appropriate place.
357127499Sgad	 */
3581556Srgrimes	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
35913020Speter		if (td2->td_priority > td->td_priority) {
360127499Sgad			TAILQ_INSERT_BEFORE(td2, td, td_runq);
361127499Sgad			break;
362127499Sgad		}
363127499Sgad	}
36413020Speter	if (td2 == NULL) {
3651556Srgrimes		/* We ran off the end of the TAILQ or it was empty. */
366109502Sjmallett		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
3671556Srgrimes	}
36890143Smarkm
3691556Srgrimes	/*
3701556Srgrimes	 * If we have a ke to use, then put it on the run queue and
3711556Srgrimes	 * If needed, readjust the last_assigned pointer.
372109502Sjmallett	 */
3731556Srgrimes	if (ke) {
37490143Smarkm		if (tda == NULL) {
3751556Srgrimes			/*
3761556Srgrimes			 * No pre-existing last assigned so whoever is first
3771556Srgrimes			 * gets the KSE we borught in.. (may be us)
3781556Srgrimes			 */
3791556Srgrimes			td2 = TAILQ_FIRST(&kg->kg_runq);
3801556Srgrimes			KASSERT((td2->td_kse == NULL),
3811556Srgrimes			    ("unexpected ke present"));
3821556Srgrimes			td2->td_kse = ke;
3831556Srgrimes			ke->ke_thread = td2;
384127499Sgad			kg->kg_last_assigned = td2;
385127499Sgad		} else if (tda->td_priority > td->td_priority) {
386127499Sgad			/*
387127499Sgad			 * It's ours, grab it, but last_assigned is past us
388127499Sgad			 * so don't change it.
389127499Sgad			 */
390127499Sgad			td->td_kse = ke;
391127499Sgad			ke->ke_thread = td;
392127499Sgad		} else {
393127499Sgad			/*
394127499Sgad			 * We are past last_assigned, so
395127499Sgad			 * put the new kse on whatever is next,
396127499Sgad			 * which may or may not be us.
397127499Sgad			 */
3981556Srgrimes			td2 = TAILQ_NEXT(tda, td_runq);
399127499Sgad			kg->kg_last_assigned = td2;
4001556Srgrimes			td2->td_kse = ke;
40186922Sgreen			ke->ke_thread = td2;
402109502Sjmallett		}
40386922Sgreen		runq_add(&runq, ke);
40486922Sgreen	}
4051556Srgrimes}
4061556Srgrimes
4071556Srgrimes#else
4081556Srgrimes
4091556Srgrimesvoid
4101556Srgrimessetrunqueue(struct thread *td)
411127499Sgad{
412127542Sgad	struct kse *ke;
413127542Sgad	struct ksegrp *kg;
414127499Sgad	struct thread *td2;
415127499Sgad
4161556Srgrimes	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
4171556Srgrimes	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
4181556Srgrimes	td->td_state = TDS_RUNQ;
4191556Srgrimes	kg = td->td_ksegrp;
420127542Sgad	kg->kg_runnable++;
4211556Srgrimes	if ((td->td_flags & TDF_UNBOUND) == 0) {
4221556Srgrimes		/*
4231556Srgrimes		 * Common path optimisation: Only one of everything
4241556Srgrimes		 * and the KSE is always already attached.
4251556Srgrimes		 * Totally ignore the ksegrp run queue.
4261556Srgrimes		 */
4271556Srgrimes		runq_add(&runq, td->td_kse);
42837317Sphk		return;
4291556Srgrimes	}
43037317Sphk	/*
43137317Sphk	 * First add the thread to the ksegrp's run queue at
4321556Srgrimes	 * the appropriate place.
43389909Sru	 */
4341556Srgrimes	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
4351556Srgrimes		if (td2->td_priority > td->td_priority) {
4361556Srgrimes			TAILQ_INSERT_BEFORE(td2, td, td_runq);
43790143Smarkm			break;
438109502Sjmallett		}
4391556Srgrimes	}
440127499Sgad	if (td2 == NULL) {
441127823Sgad		/* We ran off the end of the TAILQ or it was empty. */
442127823Sgad		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
44397877Sjmallett	}
444127499Sgad
445127499Sgad	/*
446127823Sgad	 * The following could be achieved by simply doing:
44766377Sbrian	 * td->td_kse = NULL; kse_reassign(ke);
4481556Srgrimes	 * but I felt that I'd try do it inline here.
4491556Srgrimes	 * All this work may not be worth it.
4501556Srgrimes	 */
45153170Skris	if ((ke = td->td_kse)) { /* XXXKSE */
4521556Srgrimes		/*
4531556Srgrimes		 * We have a KSE already. See whether we can keep it
454127499Sgad		 * or if we need to give it to someone else.
4551556Srgrimes		 * Either way it will need to be inserted into
456127499Sgad		 * the runq. kse_reassign() will do this as will runq_add().
457127499Sgad		 */
458127499Sgad		if ((kg->kg_last_assigned) &&
459127499Sgad		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
460127499Sgad			/*
4611556Srgrimes			 * We can definitly keep the KSE
462127499Sgad			 * as the "last assignead thread" has
463127499Sgad			 * less priority than we do.
464127499Sgad			 * The "last assigned" pointer stays the same.
465129600Sgad			 */
466129600Sgad			runq_add(&runq, ke);
467129600Sgad			return;
468129600Sgad
469129600Sgad		}
470127499Sgad		/*
471127823Sgad		 * Give it to the correct thread,
472127499Sgad		 * which may be (often is) us, but may not be.
473127499Sgad		 */
474127499Sgad		td->td_kse = NULL;
475127823Sgad		kse_reassign(ke);
476127499Sgad		return;
477127499Sgad	}
478127499Sgad	/*
479127823Sgad	 * There are two cases where KSE adjustment is needed.
480127499Sgad	 * Usurpation of an already assigned KSE, and assignment
481127499Sgad	 * of a previously IDLE KSE.
482127499Sgad	 */
483127823Sgad	if (kg->kg_idle_kses) {
484127499Sgad		/*
485127499Sgad		 * If there are unassigned KSEs then we definitly
486127499Sgad		 * will be assigned one from the idle KSE list.
487127823Sgad		 * If we are the last, we should get the "last
488127499Sgad		 * assigned" pointer set to us as well.
489127499Sgad		 */
490127499Sgad		ke = TAILQ_FIRST(&kg->kg_iq);
491127823Sgad		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
492127499Sgad		ke->ke_state = KES_UNQUEUED;
493127499Sgad		kg->kg_idle_kses--;
494127499Sgad		ke->ke_thread = td;
495127499Sgad		td->td_kse = ke;
496127499Sgad		runq_add(&runq, ke);
4971556Srgrimes		if (TAILQ_NEXT(td, td_runq) == NULL) {
498126127Sdeischen			kg->kg_last_assigned = td;
4991556Srgrimes		}
5001556Srgrimes	} else if (kg->kg_last_assigned &&
5011556Srgrimes		(kg->kg_last_assigned->td_priority > td->td_priority)) {
502127499Sgad		/*
503127149Sgad		 * If there were none last-assigned, all KSEs
504127544Sgad		 * are actually out running as we speak.
5051556Srgrimes		 * If there was a last assigned, but we didn't see it,
506127499Sgad		 * we must be inserting before it, so take the KSE from
507127149Sgad		 * the last assigned, and back it up one entry. Then,
508127149Sgad		 * assign the KSE to the new thread and adjust its priority.
509127149Sgad		 */
510127149Sgad		td2 = kg->kg_last_assigned;
511127499Sgad		ke = td2->td_kse;
512127499Sgad		kg->kg_last_assigned =
513127499Sgad		    TAILQ_PREV(td2, threadqueue, td_runq);
514127499Sgad		td2->td_kse = NULL;
515127499Sgad		td->td_kse = ke;
516127499Sgad		ke->ke_thread = td;
517127499Sgad		runq_readjust(&runq, ke);
518127823Sgad	}
519127499Sgad}
520127499Sgad#endif
521127499Sgad
522127499Sgad/************************************************************************
523127499Sgad * Critical section marker functions					*
524127499Sgad ************************************************************************/
525127499Sgad/* Critical sections that prevent preemption. */
526127499Sgadvoid
527127499Sgadcritical_enter(void)
528127499Sgad{
529127499Sgad	struct thread *td;
530127499Sgad
531127499Sgad	td = curthread;
532127499Sgad	if (td->td_critnest == 0)
533127499Sgad		cpu_critical_enter();
534127499Sgad	td->td_critnest++;
535127823Sgad}
536127499Sgad
537127499Sgadvoid
538127499Sgadcritical_exit(void)
539127499Sgad{
540127823Sgad	struct thread *td;
541127823Sgad
542127499Sgad	td = curthread;
543127499Sgad	if (td->td_critnest == 1) {
544127499Sgad		td->td_critnest = 0;
545127499Sgad		cpu_critical_exit();
546127823Sgad	} else {
547127823Sgad		td->td_critnest--;
548127499Sgad	}
549127499Sgad}
550127499Sgad
551127499Sgad
552127823Sgad/************************************************************************
553127499Sgad * SYSTEM RUN QUEUE manipulations and tests				*
554127499Sgad ************************************************************************/
555127499Sgad/*
556127499Sgad * Initialize a run structure.
557127823Sgad */
558127499Sgadvoid
559127499Sgadrunq_init(struct runq *rq)
560127499Sgad{
561127499Sgad	int i;
562127823Sgad
563127499Sgad	bzero(rq, sizeof *rq);
564127499Sgad	for (i = 0; i < RQ_NQS; i++)
565127499Sgad		TAILQ_INIT(&rq->rq_queues[i]);
566127499Sgad}
567127499Sgad
568127499Sgad/*
569127499Sgad * Clear the status bit of the queue corresponding to priority level pri,
570127499Sgad * indicating that it is empty.
571127499Sgad */
572127499Sgadstatic __inline void
573127149Sgadrunq_clrbit(struct runq *rq, int pri)
574127499Sgad{
575127499Sgad	struct rqbits *rqb;
576127499Sgad
577127149Sgad	rqb = &rq->rq_status;
5781556Srgrimes	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
57925271Sjkh	    rqb->rqb_bits[RQB_WORD(pri)],
58025271Sjkh	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
58125271Sjkh	    RQB_BIT(pri), RQB_WORD(pri));
5821556Srgrimes	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
5831556Srgrimes}
5841556Srgrimes
5851556Srgrimes/*
586127499Sgad * Find the index of the first non-empty run queue.  This is done by
58762803Swill * scanning the status bits, a set bit indicates a non-empty queue.
588127499Sgad */
5891556Srgrimesstatic __inline int
5901556Srgrimesrunq_findbit(struct runq *rq)
5911556Srgrimes{
592127499Sgad	struct rqbits *rqb;
5931556Srgrimes	int pri;
594127499Sgad	int i;
5951556Srgrimes
596127499Sgad	rqb = &rq->rq_status;
5971556Srgrimes	for (i = 0; i < RQB_LEN; i++)
5981556Srgrimes		if (rqb->rqb_bits[i]) {
5991556Srgrimes			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
6001556Srgrimes			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
6011556Srgrimes			    rqb->rqb_bits[i], i, pri);
6021556Srgrimes			return (pri);
6031556Srgrimes		}
6041556Srgrimes
6051556Srgrimes	return (-1);
6061556Srgrimes}
6071556Srgrimes
6081556Srgrimes/*
609127499Sgad * Set the status bit of the queue corresponding to priority level pri,
610127499Sgad * indicating that it is non-empty.
611127499Sgad */
612127499Sgadstatic __inline void
613127499Sgadrunq_setbit(struct runq *rq, int pri)
614127499Sgad{
615127499Sgad	struct rqbits *rqb;
61666377Sbrian
6171556Srgrimes	rqb = &rq->rq_status;
6181556Srgrimes	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
6191556Srgrimes	    rqb->rqb_bits[RQB_WORD(pri)],
620127499Sgad	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
621127499Sgad	    RQB_BIT(pri), RQB_WORD(pri));
622127499Sgad	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
623127499Sgad}
624127499Sgad
625127499Sgad/*
626127602Sgad * Add the KSE to the queue specified by its priority, and set the
627127499Sgad * corresponding status bit.
628127499Sgad */
629127499Sgadvoid
630127499Sgadrunq_add(struct runq *rq, struct kse *ke)
631127499Sgad{
632127499Sgad	struct rqhead *rqh;
633127499Sgad	int pri;
634127542Sgad
635127499Sgad	mtx_assert(&sched_lock, MA_OWNED);
636127499Sgad	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
637127499Sgad	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
638127499Sgad	if (ke->ke_state == KES_ONRUNQ)
639127499Sgad		return;
640127499Sgad#if defined(INVARIANTS) && defined(DIAGNOSTIC)
641127499Sgad	KASSERT(ke->ke_state != KES_ONRUNQ,
642127499Sgad	    ("runq_add: kse %p (%s) already in run queue", ke,
643127499Sgad	    ke->ke_proc->p_comm));
644127499Sgad#endif
645127499Sgad	pri = ke->ke_thread->td_priority / RQ_PPQ;
646127499Sgad	ke->ke_rqindex = pri;
647127499Sgad	runq_setbit(rq, pri);
648127499Sgad	rqh = &rq->rq_queues[pri];
649127602Sgad	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
650127602Sgad	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
651127499Sgad	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
652127602Sgad	ke->ke_ksegrp->kg_runq_kses++;
653127499Sgad	ke->ke_state = KES_ONRUNQ;
654127499Sgad}
655127499Sgad
656127499Sgad/*
657127499Sgad * Return true if there are runnable processes of any priority on the run
658127499Sgad * queue, false otherwise.  Has no side effects, does not modify the run
659127542Sgad * queue structure.
660127499Sgad */
661127499Sgadint
662127499Sgadrunq_check(struct runq *rq)
663127499Sgad{
664127823Sgad	struct rqbits *rqb;
665127499Sgad	int i;
666127499Sgad
667127499Sgad	rqb = &rq->rq_status;
668127597Sgad	for (i = 0; i < RQB_LEN; i++)
669127499Sgad		if (rqb->rqb_bits[i]) {
670127499Sgad			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
671127149Sgad			    rqb->rqb_bits[i], i);
672127539Sgad			return (1);
673127149Sgad		}
674127149Sgad	CTR0(KTR_RUNQ, "runq_check: empty");
675127499Sgad
676127499Sgad	return (0);
677127499Sgad}
678127499Sgad
679127499Sgad/*
680127499Sgad * Find and remove the highest priority process from the run queue.
681127499Sgad * If there are no runnable processes, the per-cpu idle process is
682127499Sgad * returned.  Will not return NULL under any circumstances.
683127499Sgad */
684127499Sgadstruct kse *
685127499Sgadrunq_choose(struct runq *rq)
686127149Sgad{
687127499Sgad	struct rqhead *rqh;
688127499Sgad	struct kse *ke;
689127542Sgad	int pri;
690127149Sgad
691127149Sgad	mtx_assert(&sched_lock, MA_OWNED);
692127149Sgad	while ((pri = runq_findbit(rq)) != -1) {
693127499Sgad		rqh = &rq->rq_queues[pri];
694127499Sgad		ke = TAILQ_FIRST(rqh);
695127823Sgad		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
696127499Sgad		CTR3(KTR_RUNQ,
697127499Sgad		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
698127499Sgad		TAILQ_REMOVE(rqh, ke, ke_procq);
699127149Sgad		ke->ke_ksegrp->kg_runq_kses--;
700127499Sgad		if (TAILQ_EMPTY(rqh)) {
701127499Sgad			CTR0(KTR_RUNQ, "runq_choose: empty");
702127499Sgad			runq_clrbit(rq, pri);
703127539Sgad		}
704127539Sgad
705127499Sgad		ke->ke_state = KES_RUNNING;
706127499Sgad		KASSERT((ke->ke_thread != NULL),
707127499Sgad		    ("runq_choose: No thread on KSE"));
708127499Sgad		KASSERT((ke->ke_thread->td_kse != NULL),
709127499Sgad		    ("runq_choose: No KSE on thread"));
710127499Sgad		return (ke);
711127499Sgad	}
712127499Sgad	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
713127499Sgad
714127499Sgad	return (NULL);
715127499Sgad}
716127499Sgad
717127499Sgad/*
718127499Sgad * Remove the KSE from the queue specified by its priority, and clear the
719127499Sgad * corresponding status bit if the queue becomes empty.
720127542Sgad * Caller must set ke->ke_state afterwards.
721127499Sgad */
722127499Sgadvoid
723127499Sgadrunq_remove(struct runq *rq, struct kse *ke)
724127499Sgad{
725127542Sgad	struct rqhead *rqh;
726127499Sgad	int pri;
727127499Sgad
728127499Sgad	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
729127499Sgad	mtx_assert(&sched_lock, MA_OWNED);
730127823Sgad	pri = ke->ke_rqindex;
731127499Sgad	rqh = &rq->rq_queues[pri];
732127149Sgad	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
733127149Sgad	    ke, ke->ke_thread->td_priority, pri, rqh);
734127499Sgad	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
735127499Sgad	TAILQ_REMOVE(rqh, ke, ke_procq);
73666377Sbrian	if (TAILQ_EMPTY(rqh)) {
73766377Sbrian		CTR0(KTR_RUNQ, "runq_remove: empty");
738127539Sgad		runq_clrbit(rq, pri);
739127602Sgad	}
74066377Sbrian	ke->ke_state = KES_UNQUEUED;
741127499Sgad	ke->ke_ksegrp->kg_runq_kses--;
742127499Sgad}
743127499Sgad
744127499Sgadstatic void
745127499Sgadrunq_readjust(struct runq *rq, struct kse *ke)
746127499Sgad{
747127542Sgad
748127499Sgad	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
74966377Sbrian		runq_remove(rq, ke);
750127499Sgad		runq_add(rq, ke);
751127499Sgad	}
752127499Sgad}
753127602Sgad
754127602Sgad#if 0
755127499Sgadvoid
756127499Sgadthread_sanity_check(struct thread *td)
757127499Sgad{
758127602Sgad	struct proc *p;
759127499Sgad	struct ksegrp *kg;
760127499Sgad	struct kse *ke;
761127499Sgad	struct thread *td2;
76266377Sbrian	unsigned int prevpri;
763127499Sgad	int	saw_lastassigned;
764127499Sgad	int unassigned;
765127509Sgad	int assigned;
766127509Sgad
767127509Sgad	p = td->td_proc;
768127509Sgad	kg = td->td_ksegrp;
769127509Sgad	ke = td->td_kse;
770127509Sgad
771127542Sgad	if (kg != &p->p_ksegrp) {
772127499Sgad		panic ("wrong ksegrp");
773127499Sgad	}
774127499Sgad
775127499Sgad	if (ke) {
776127823Sgad		if (ke != &p->p_kse) {
777127499Sgad			panic("wrong kse");
778127499Sgad		}
779127499Sgad		if (ke->ke_thread != td) {
780127499Sgad			panic("wrong thread");
781127499Sgad		}
782127499Sgad	}
783127499Sgad
784127499Sgad	if ((p->p_flag & P_KSES) == 0) {
785127499Sgad		if (ke == NULL) {
786127539Sgad			panic("non KSE thread lost kse");
787127499Sgad		}
788127499Sgad	} else {
789127499Sgad		prevpri = 0;
790127499Sgad		saw_lastassigned = 0;
791127499Sgad		unassigned = 0;
792127499Sgad		assigned = 0;
793127499Sgad		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
794127499Sgad			if (td2->td_priority < prevpri) {
795127499Sgad				panic("thread runqueue unosorted");
796127499Sgad			}
797127499Sgad			prevpri = td2->td_priority;
798127499Sgad			if (td2->td_kse) {
799127499Sgad				assigned++;
800127499Sgad				if (unassigned) {
80166377Sbrian					panic("unassigned before assigned");
802127499Sgad				}
803127499Sgad 				if  (kg->kg_last_assigned == NULL) {
804127499Sgad					panic("lastassigned corrupt");
805127542Sgad				}
806127542Sgad				if (saw_lastassigned) {
807127542Sgad					panic("last assigned not last");
808127542Sgad				}
809127499Sgad				if (td2->td_kse->ke_thread != td2) {
810127499Sgad					panic("mismatched kse/thread");
811127597Sgad				}
812127542Sgad			} else {
813127542Sgad				unassigned++;
814127542Sgad			}
815127542Sgad			if (td2 == kg->kg_last_assigned) {
816127542Sgad				saw_lastassigned = 1;
817127542Sgad				if (td2->td_kse == NULL) {
818127542Sgad					panic("last assigned not assigned");
819127542Sgad				}
820127542Sgad			}
821127542Sgad		}
822127542Sgad		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
823127542Sgad			panic("where on earth does lastassigned point?");
824127499Sgad		}
825127499Sgad		FOREACH_THREAD_IN_GROUP(kg, td2) {
826127499Sgad			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
827127499Sgad			    (td2->td_state == TDS_RUNQ)) {
828127499Sgad				assigned++;
829127499Sgad				if (td2->td_kse == NULL) {
830127499Sgad					panic ("BOUND thread with no KSE");
831127499Sgad				}
832127499Sgad			}
833127499Sgad		}
834127499Sgad#if 0
835127499Sgad		if ((unassigned + assigned) != kg->kg_runnable) {
836127499Sgad			panic("wrong number in runnable");
837127499Sgad		}
838127499Sgad#endif
839127499Sgad	}
840127499Sgad}
841127499Sgad#endif
842127499Sgad
843127499Sgad