kern_switch.c revision 104157
1193326Sed/*
2193326Sed * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3193326Sed * All rights reserved.
4193326Sed *
5193326Sed * Redistribution and use in source and binary forms, with or without
6193326Sed * modification, are permitted provided that the following conditions
7193326Sed * are met:
8193326Sed * 1. Redistributions of source code must retain the above copyright
9193326Sed *    notice, this list of conditions and the following disclaimer.
10193326Sed * 2. Redistributions in binary form must reproduce the above copyright
11193326Sed *    notice, this list of conditions and the following disclaimer in the
12193326Sed *    documentation and/or other materials provided with the distribution.
13193326Sed *
14193326Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15212904Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16212904Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17193326Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18234353Sdim * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19234353Sdim * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20249423Sdim * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21193326Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22193326Sed * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23193326Sed * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24193326Sed * SUCH DAMAGE.
25193326Sed *
26193326Sed * $FreeBSD: head/sys/kern/kern_switch.c 104157 2002-09-29 23:04:34Z julian $
27193326Sed */
28193326Sed
29193326Sed/***
30193326Sed
31193326SedHere is the logic..
32203955Srdivacky
33203955SrdivackyIf there are N processors, then there are at most N KSEs (kernel
34203955Srdivackyschedulable entities) working to process threads that belong to a
35203955SrdivackyKSEGOUP (kg). If there are X of these KSEs actually running at the
36203955Srdivackymoment in question, then there are at most M (N-X) of these KSEs on
37203955Srdivackythe run queue, as running KSEs are not on the queue.
38203955Srdivacky
39203955SrdivackyRunnable threads are queued off the KSEGROUP in priority order.
40203955SrdivackyIf there are M or more threads runnable, the top M threads
41203955Srdivacky(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42203955Srdivackytheir priority from those threads and are put on the run queue.
43203955Srdivacky
44203955SrdivackyThe last thread that had a priority high enough to have a KSE associated
45193326Sedwith it, AND IS ON THE RUN QUEUE is pointed to by
46198092Srdivackykg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47193326Sedassigned as all the available KSEs are activly running, or because there
48203955Srdivackyare no threads queued, that pointer is NULL.
49193326Sed
50203955SrdivackyWhen a KSE is removed from the run queue to become runnable, we know
51203955Srdivackyit was associated with the highest priority thread in the queue (at the head
52203955Srdivackyof the queue). If it is also the last assigned we know M was 1 and must
53203955Srdivackynow be 0. Since the thread is no longer queued that pointer must be
54203955Srdivackyremoved from it. Since we know there were no more KSEs available,
55203955Srdivacky(M was 1 and is now 0) and since we are not FREEING our KSE
56203955Srdivackybut using it, we know there are STILL no more KSEs available, we can prove
57203955Srdivackythat the next thread in the ksegrp list will not have a KSE to assign to
58193326Sedit, so we can show that the pointer must be made 'invalid' (NULL).
59193326Sed
60193326SedThe pointer exists so that when a new thread is made runnable, it can
61193326Sedhave its priority compared with the last assigned thread to see if
62193326Sedit should 'steal' its KSE or not.. i.e. is it 'earlier'
63193326Sedon the list than that thread or later.. If it's earlier, then the KSE is
64193326Sedremoved from the last assigned (which is now not assigned a KSE)
65193326Sedand reassigned to the new thread, which is placed earlier in the list.
66193326SedThe pointer is then backed up to the previous thread (which may or may not
67193326Sedbe the new thread).
68193326Sed
69193326SedWhen a thread sleeps or is removed, the KSE becomes available and if there
70193326Sedare queued threads that are not assigned KSEs, the highest priority one of
71193326Sedthem is assigned the KSE, which is then placed back on the run queue at
72193326Sedthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73193326Sedto point to it.
74193326Sed
75193326SedThe following diagram shows 2 KSEs and 3 threads from a single process.
76193326Sed
77193326Sed RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78226633Sdim              \    \____
79193326Sed               \        \
80193326Sed    KSEGROUP---thread--thread--thread    (queued in priority order)
81193326Sed        \                 /
82193326Sed         \_______________/
83193326Sed          (last_assigned)
84193326Sed
85234353SdimThe result of this scheme is that the M available KSEs are always
86234353Sdimqueued at the priorities they have inherrited from the M highest priority
87234353Sdimthreads for that KSEGROUP. If this situation changes, the KSEs are
88193326Sedreassigned to keep this true.
89234353Sdim
90193326Sed*/
91193326Sed
92193326Sed#include <sys/param.h>
93193326Sed#include <sys/systm.h>
94193326Sed#include <sys/kernel.h>
95193326Sed#include <sys/ktr.h>
96193326Sed#include <sys/lock.h>
97249423Sdim#include <sys/mutex.h>
98221345Sdim#include <sys/proc.h>
99212904Sdim#include <sys/queue.h>
100193326Sed#include <machine/critical.h>
101234353Sdim
102193326SedCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103263508Sdim
104193326Sed/*
105193326Sed * Global run queue.
106212904Sdim */
107193326Sedstatic struct runq runq;
108193326SedSYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109193326Sed
110193326Sedstatic void runq_readjust(struct runq *rq, struct kse *ke);
111193326Sed/************************************************************************
112193326Sed * Functions that manipulate runnability from a thread perspective.	*
113193326Sed ************************************************************************/
114193326Sed
115193326Sed/*
116193326Sed * Select the KSE that will be run next.  From that find the thread, and x
117193326Sed * remove it from the KSEGRP's run queue.  If there is thread clustering,
118193326Sed * this will be what does it.
119193326Sed */
120193326Sedstruct thread *
121193326Sedchoosethread(void)
122243830Sdim{
123244628Sdim	struct kse *ke;
124244628Sdim	struct thread *td;
125244628Sdim	struct ksegrp *kg;
126244628Sdim
127244628Sdimretry:
128243830Sdim	if ((ke = runq_choose(&runq))) {
129193326Sed		td = ke->ke_thread;
130193326Sed		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
131193326Sed		kg = ke->ke_ksegrp;
132193326Sed		if (td->td_flags & TDF_UNBOUND) {
133221345Sdim			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
134221345Sdim			if (kg->kg_last_assigned == td) {
135221345Sdim				if (TAILQ_PREV(td, threadqueue, td_runq)
136221345Sdim				    != NULL)
137193326Sed					printf("Yo MAMA!\n");
138193326Sed				kg->kg_last_assigned = TAILQ_PREV(td,
139193326Sed				    threadqueue, td_runq);
140193326Sed			}
141193326Sed			/*
142212904Sdim			 *  If we have started running an upcall,
143234353Sdim			 * Then TDF_UNBOUND WAS set because the thread was
144212904Sdim			 * created without a KSE. Now that we have one,
145193326Sed			 * and it is our time to run, we make sure
146193326Sed			 * that BOUND semantics apply for the rest of
147193326Sed			 * the journey to userland, and into the UTS.
148193326Sed			 */
149193326Sed#ifdef	NOTYET
150193326Sed			if (td->td_flags & TDF_UPCALLING)
151193326Sed				tdf->td_flags &= ~TDF_UNBOUND;
152193326Sed#endif
153193326Sed		}
154193326Sed		kg->kg_runnable--;
155193326Sed		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
156193326Sed		    td, td->td_priority);
157193326Sed	} else {
158193326Sed		/* Simulate runq_choose() having returned the idle thread */
159193326Sed		td = PCPU_GET(idlethread);
160193326Sed		ke = td->td_kse;
161193326Sed		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162193326Sed	}
163193326Sed	ke->ke_flags |= KEF_DIDRUN;
164193326Sed	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
165221345Sdim	    (td->td_flags & TDF_INPANIC) == 0))
166221345Sdim		goto retry;
167234353Sdim	TD_SET_RUNNING(td);
168234353Sdim	return (td);
169234353Sdim}
170221345Sdim
171221345Sdim/*
172221345Sdim * Given a KSE (now surplus), either assign a new runable thread to it
173221345Sdim * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
174221345Sdim * Assumes the kse is not linked to any threads any more. (has been cleaned).
175221345Sdim */
176221345Sdimvoid
177221345Sdimkse_reassign(struct kse *ke)
178221345Sdim{
179221345Sdim	struct ksegrp *kg;
180221345Sdim	struct thread *td;
181221345Sdim	struct thread *owner;
182221345Sdim
183221345Sdim	mtx_assert(&sched_lock, MA_OWNED);
184221345Sdim	kg = ke->ke_ksegrp;
185221345Sdim	owner = ke->ke_bound;
186221345Sdim	KASSERT(!(owner && ((owner->td_kse != ke) ||
187221345Sdim		    (owner->td_flags & TDF_UNBOUND))),
188221345Sdim		("kse_reassign: bad thread bound state"));
189221345Sdim	if (owner && (owner->td_inhibitors == TDI_LOAN)) {
190221345Sdim		TD_CLR_LOAN(owner);
191221345Sdim		ke->ke_bound = NULL;
192221345Sdim		ke->ke_thread = owner;
193221345Sdim		owner->td_kse = ke;
194221345Sdim		setrunqueue(owner);
195221345Sdim		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p (give back)",
196221345Sdim			 ke, owner);
197221345Sdim		return;
198221345Sdim	}
199221345Sdim
200221345Sdim	/*
201221345Sdim	 * Find the first unassigned thread
202221345Sdim	 * If there is a 'last assigned' then see what's next.
203193326Sed	 * otherwise look at what is first.
204193326Sed	 */
205193326Sed	if ((td = kg->kg_last_assigned)) {
206193326Sed		td = TAILQ_NEXT(td, td_runq);
207193326Sed	} else {
208212904Sdim		td = TAILQ_FIRST(&kg->kg_runq);
209234353Sdim	}
210212904Sdim
211193326Sed	/*
212193326Sed	 * If we found one assign it the kse, otherwise idle the kse.
213193326Sed	 */
214193326Sed	if (td) {
215193326Sed		kg->kg_last_assigned = td;
216193326Sed		td->td_kse = ke;
217193326Sed		ke->ke_thread = td;
218193326Sed		runq_add(&runq, ke);
219193326Sed		if (owner)
220198092Srdivacky			TD_SET_LOAN(owner);
221193326Sed		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
222193326Sed	} else if (!owner) {
223193326Sed		ke->ke_state = KES_IDLE;
224193326Sed		ke->ke_thread = NULL;
225193326Sed		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
226193326Sed		kg->kg_idle_kses++;
227234353Sdim		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
228234353Sdim	} else {
229234353Sdim		TD_CLR_LOAN(owner);
230193326Sed		ke->ke_state = KES_THREAD;
231193326Sed		ke->ke_thread = owner;
232193326Sed		owner->td_kse = ke;
233193326Sed		ke->ke_flags |= KEF_ONLOANQ;
234193326Sed		TAILQ_INSERT_HEAD(&kg->kg_lq, ke, ke_kgrlist);
235193326Sed		kg->kg_loan_kses++;
236193326Sed		CTR1(KTR_RUNQ, "kse_reassign: ke%p is on loan queue", ke);
237193326Sed	}
238193326Sed}
239193326Sed
240193326Sedint
241193326Sedkserunnable(void)
242193326Sed{
243193326Sed	return runq_check(&runq);
244193326Sed}
245234353Sdim
246234353Sdim/*
247234353Sdim * Remove a thread from its KSEGRP's run queue.
248234353Sdim * This in turn may remove it from a KSE if it was already assigned
249234353Sdim * to one, possibly causing a new thread to be assigned to the KSE
250234353Sdim * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
251234353Sdim */
252193326Sedvoid
253234353Sdimremrunqueue(struct thread *td)
254234353Sdim{
255234353Sdim	struct thread *td2, *td3, *owner;
256234353Sdim	struct ksegrp *kg;
257234353Sdim	struct kse *ke;
258234353Sdim
259234353Sdim	mtx_assert(&sched_lock, MA_OWNED);
260234353Sdim	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
261234353Sdim	kg = td->td_ksegrp;
262234353Sdim	ke = td->td_kse;
263234353Sdim	/*
264234353Sdim	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
265234353Sdim	 * threads are BOUND.
266234353Sdim	 */
267249423Sdim	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
268249423Sdim	kg->kg_runnable--;
269249423Sdim	TD_SET_CAN_RUN(td);
270249423Sdim	if ((td->td_flags & TDF_UNBOUND) == 0)  {
271249423Sdim		/* Bring its kse with it, leave the thread attached */
272249423Sdim		runq_remove(&runq, ke);
273249423Sdim		ke->ke_state = KES_THREAD;
274249423Sdim		return;
275234353Sdim	}
276234353Sdim	if (ke) {
277234353Sdim		/*
278234353Sdim		 * This thread has been assigned to a KSE.
279234353Sdim		 * We need to dissociate it and try assign the
280234353Sdim		 * KSE to the next available thread. Then, we should
281234353Sdim		 * see if we need to move the KSE in the run queues.
282234353Sdim		 */
283234353Sdim		td2 = kg->kg_last_assigned;
284234353Sdim		KASSERT((td2 != NULL), ("last assigned has wrong value "));
285234353Sdim		td->td_kse = NULL;
286234353Sdim		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
287234353Sdim			KASSERT(td3 != td, ("td3 somehow matched td"));
288234353Sdim			/*
289234353Sdim			 * Give the next unassigned thread to the KSE
290234353Sdim			 * so the number of runnable KSEs remains
291234353Sdim			 * constant.
292234353Sdim			 */
293234353Sdim			td3->td_kse = ke;
294234353Sdim			ke->ke_thread = td3;
295234353Sdim			kg->kg_last_assigned = td3;
296234353Sdim			runq_readjust(&runq, ke);
297234353Sdim		} else {
298234353Sdim			/*
299193326Sed			 * There is no unassigned thread.
300234353Sdim			 * If we were the last assigned one,
301234353Sdim			 * adjust the last assigned pointer back
302193326Sed			 * one, which may result in NULL.
303234353Sdim			 */
304193326Sed			if (td == td2) {
305234353Sdim				kg->kg_last_assigned =
306193326Sed				    TAILQ_PREV(td, threadqueue, td_runq);
307193326Sed			}
308234353Sdim			runq_remove(&runq, ke);
309234353Sdim			KASSERT((ke->ke_state != KES_IDLE),
310234353Sdim			    ("kse already idle"));
311234353Sdim			if (ke->ke_bound) {
312234353Sdim				owner = ke->ke_bound;
313234353Sdim				if (owner->td_inhibitors == TDI_LOAN) {
314234353Sdim					TD_CLR_LOAN(owner);
315234353Sdim					ke->ke_bound = NULL;
316234353Sdim					ke->ke_thread = owner;
317234353Sdim					owner->td_kse = ke;
318234353Sdim					setrunqueue(owner);
319234353Sdim					CTR2(KTR_RUNQ,
320234353Sdim					"remrunqueue: ke%p -> td%p (give back)",
321234353Sdim			 			ke, owner);
322234353Sdim				} else {
323234353Sdim					TD_CLR_LOAN(owner);
324234353Sdim					ke->ke_state = KES_THREAD;
325234353Sdim					ke->ke_thread = owner;
326234353Sdim					owner->td_kse = ke;
327234353Sdim					ke->ke_flags |= KEF_ONLOANQ;
328234353Sdim					TAILQ_INSERT_HEAD(&kg->kg_lq, ke,
329234353Sdim						ke_kgrlist);
330234353Sdim					kg->kg_loan_kses++;
331234353Sdim				}
332234353Sdim			} else {
333234353Sdim				ke->ke_state = KES_IDLE;
334234353Sdim				ke->ke_thread = NULL;
335234353Sdim				TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
336234353Sdim				kg->kg_idle_kses++;
337193326Sed			}
338234353Sdim		}
339234353Sdim	}
340234353Sdim	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
341234353Sdim}
342234353Sdim
343234353Sdimvoid
344234353Sdimsetrunqueue(struct thread *td)
345234353Sdim{
346234353Sdim	struct kse *ke;
347234353Sdim	struct ksegrp *kg;
348234353Sdim	struct thread *td2;
349234353Sdim	struct thread *tda;
350234353Sdim
351234353Sdim	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
352234353Sdim	mtx_assert(&sched_lock, MA_OWNED);
353234353Sdim	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
354234353Sdim	    ("setrunqueue: bad thread state"));
355234353Sdim	TD_SET_RUNQ(td);
356234353Sdim	kg = td->td_ksegrp;
357234353Sdim	kg->kg_runnable++;
358234353Sdim	if ((td->td_flags & TDF_UNBOUND) == 0) {
359234353Sdim		KASSERT((td->td_kse != NULL),
360234353Sdim		    ("queueing BAD thread to run queue"));
361234353Sdim		ke = td->td_kse;
362234353Sdim		if (ke->ke_flags & KEF_ONLOANQ) {
363234353Sdim			ke->ke_flags &= ~KEF_ONLOANQ;
364234353Sdim			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
365193326Sed			kg->kg_loan_kses--;
366234353Sdim		}
367193326Sed		/*
368193326Sed		 * Common path optimisation: Only one of everything
369234353Sdim		 * and the KSE is always already attached.
370234353Sdim		 * Totally ignore the ksegrp run queue.
371234353Sdim		 */
372234353Sdim		runq_add(&runq, td->td_kse);
373234353Sdim		return;
374234353Sdim	}
375234353Sdim	/*
376234353Sdim	 * Ok, so we are threading with this thread.
377234353Sdim	 * We don't have a KSE, see if we can get one..
378234353Sdim	 */
379234353Sdim	tda = kg->kg_last_assigned;
380234353Sdim	if ((ke = td->td_kse) == NULL) {
381234353Sdim		/*
382193326Sed		 * We will need a KSE, see if there is one..
383193326Sed		 * First look for a free one, before getting desperate.
384193326Sed		 * If we can't get one, our priority is not high enough..
385193326Sed		 * that's ok..
386193326Sed		 */
387193326Sed		if (kg->kg_idle_kses) {
388193326Sed			/*
389193326Sed			 * There is a free one so it's ours for the asking..
390193326Sed			 */
391193326Sed			ke = TAILQ_FIRST(&kg->kg_iq);
392193326Sed			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
393193326Sed			ke->ke_state = KES_THREAD;
394203955Srdivacky			kg->kg_idle_kses--;
395203955Srdivacky		} else if (kg->kg_loan_kses) {
396193326Sed			ke = TAILQ_FIRST(&kg->kg_lq);
397193326Sed			TAILQ_REMOVE(&kg->kg_lq, ke, ke_kgrlist);
398203955Srdivacky			ke->ke_flags &= ~KEF_ONLOANQ;
399193326Sed			ke->ke_state = KES_THREAD;
400193326Sed			TD_SET_LOAN(ke->ke_bound);
401193326Sed			kg->kg_loan_kses--;
402193326Sed		} else if (tda && (tda->td_priority > td->td_priority)) {
403193326Sed			/*
404193326Sed			 * None free, but there is one we can commandeer.
405212904Sdim			 */
406212904Sdim			ke = tda->td_kse;
407212904Sdim			tda->td_kse = NULL;
408212904Sdim			ke->ke_thread = NULL;
409212904Sdim			tda = kg->kg_last_assigned =
410212904Sdim		    	    TAILQ_PREV(tda, threadqueue, td_runq);
411212904Sdim			runq_remove(&runq, ke);
412212904Sdim		}
413212904Sdim	} else {
414212904Sdim		/*
415212904Sdim		 * Temporarily disassociate so it looks like the other cases.
416212904Sdim		 */
417212904Sdim		ke->ke_thread = NULL;
418		td->td_kse = NULL;
419	}
420
421	/*
422	 * Add the thread to the ksegrp's run queue at
423	 * the appropriate place.
424	 */
425	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
426		if (td2->td_priority > td->td_priority) {
427			TAILQ_INSERT_BEFORE(td2, td, td_runq);
428			break;
429		}
430	}
431	if (td2 == NULL) {
432		/* We ran off the end of the TAILQ or it was empty. */
433		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
434	}
435
436	/*
437	 * If we have a ke to use, then put it on the run queue and
438	 * If needed, readjust the last_assigned pointer.
439	 */
440	if (ke) {
441		if (tda == NULL) {
442			/*
443			 * No pre-existing last assigned so whoever is first
444			 * gets the KSE we brought in.. (maybe us)
445			 */
446			td2 = TAILQ_FIRST(&kg->kg_runq);
447			KASSERT((td2->td_kse == NULL),
448			    ("unexpected ke present"));
449			td2->td_kse = ke;
450			ke->ke_thread = td2;
451			kg->kg_last_assigned = td2;
452		} else if (tda->td_priority > td->td_priority) {
453			/*
454			 * It's ours, grab it, but last_assigned is past us
455			 * so don't change it.
456			 */
457			td->td_kse = ke;
458			ke->ke_thread = td;
459		} else {
460			/*
461			 * We are past last_assigned, so
462			 * put the new kse on whatever is next,
463			 * which may or may not be us.
464			 */
465			td2 = TAILQ_NEXT(tda, td_runq);
466			kg->kg_last_assigned = td2;
467			td2->td_kse = ke;
468			ke->ke_thread = td2;
469		}
470		runq_add(&runq, ke);
471	}
472}
473
474/************************************************************************
475 * Critical section marker functions					*
476 ************************************************************************/
477/* Critical sections that prevent preemption. */
478void
479critical_enter(void)
480{
481	struct thread *td;
482
483	td = curthread;
484	if (td->td_critnest == 0)
485		cpu_critical_enter();
486	td->td_critnest++;
487}
488
489void
490critical_exit(void)
491{
492	struct thread *td;
493
494	td = curthread;
495	if (td->td_critnest == 1) {
496		td->td_critnest = 0;
497		cpu_critical_exit();
498	} else {
499		td->td_critnest--;
500	}
501}
502
503
504/************************************************************************
505 * SYSTEM RUN QUEUE manipulations and tests				*
506 ************************************************************************/
507/*
508 * Initialize a run structure.
509 */
510void
511runq_init(struct runq *rq)
512{
513	int i;
514
515	bzero(rq, sizeof *rq);
516	for (i = 0; i < RQ_NQS; i++)
517		TAILQ_INIT(&rq->rq_queues[i]);
518}
519
520/*
521 * Clear the status bit of the queue corresponding to priority level pri,
522 * indicating that it is empty.
523 */
524static __inline void
525runq_clrbit(struct runq *rq, int pri)
526{
527	struct rqbits *rqb;
528
529	rqb = &rq->rq_status;
530	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
531	    rqb->rqb_bits[RQB_WORD(pri)],
532	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
533	    RQB_BIT(pri), RQB_WORD(pri));
534	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
535}
536
537/*
538 * Find the index of the first non-empty run queue.  This is done by
539 * scanning the status bits, a set bit indicates a non-empty queue.
540 */
541static __inline int
542runq_findbit(struct runq *rq)
543{
544	struct rqbits *rqb;
545	int pri;
546	int i;
547
548	rqb = &rq->rq_status;
549	for (i = 0; i < RQB_LEN; i++)
550		if (rqb->rqb_bits[i]) {
551			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
552			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
553			    rqb->rqb_bits[i], i, pri);
554			return (pri);
555		}
556
557	return (-1);
558}
559
560/*
561 * Set the status bit of the queue corresponding to priority level pri,
562 * indicating that it is non-empty.
563 */
564static __inline void
565runq_setbit(struct runq *rq, int pri)
566{
567	struct rqbits *rqb;
568
569	rqb = &rq->rq_status;
570	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
571	    rqb->rqb_bits[RQB_WORD(pri)],
572	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
573	    RQB_BIT(pri), RQB_WORD(pri));
574	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
575}
576
577/*
578 * Add the KSE to the queue specified by its priority, and set the
579 * corresponding status bit.
580 */
581void
582runq_add(struct runq *rq, struct kse *ke)
583{
584	struct rqhead *rqh;
585	int pri;
586
587	mtx_assert(&sched_lock, MA_OWNED);
588	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
589	KASSERT((ke->ke_thread->td_kse != NULL),
590	    ("runq_add: No KSE on thread"));
591	KASSERT(ke->ke_state != KES_ONRUNQ,
592	    ("runq_add: kse %p (%s) already in run queue", ke,
593	    ke->ke_proc->p_comm));
594	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
595		("runq_add: process swapped out"));
596	pri = ke->ke_thread->td_priority / RQ_PPQ;
597	ke->ke_rqindex = pri;
598	runq_setbit(rq, pri);
599	rqh = &rq->rq_queues[pri];
600	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
601	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
602	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
603	ke->ke_ksegrp->kg_runq_kses++;
604	ke->ke_state = KES_ONRUNQ;
605}
606
607/*
608 * Return true if there are runnable processes of any priority on the run
609 * queue, false otherwise.  Has no side effects, does not modify the run
610 * queue structure.
611 */
612int
613runq_check(struct runq *rq)
614{
615	struct rqbits *rqb;
616	int i;
617
618	rqb = &rq->rq_status;
619	for (i = 0; i < RQB_LEN; i++)
620		if (rqb->rqb_bits[i]) {
621			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
622			    rqb->rqb_bits[i], i);
623			return (1);
624		}
625	CTR0(KTR_RUNQ, "runq_check: empty");
626
627	return (0);
628}
629
630/*
631 * Find and remove the highest priority process from the run queue.
632 * If there are no runnable processes, the per-cpu idle process is
633 * returned.  Will not return NULL under any circumstances.
634 */
635struct kse *
636runq_choose(struct runq *rq)
637{
638	struct rqhead *rqh;
639	struct kse *ke;
640	int pri;
641
642	mtx_assert(&sched_lock, MA_OWNED);
643	while ((pri = runq_findbit(rq)) != -1) {
644		rqh = &rq->rq_queues[pri];
645		ke = TAILQ_FIRST(rqh);
646		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
647		CTR3(KTR_RUNQ,
648		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
649		TAILQ_REMOVE(rqh, ke, ke_procq);
650		ke->ke_ksegrp->kg_runq_kses--;
651		if (TAILQ_EMPTY(rqh)) {
652			CTR0(KTR_RUNQ, "runq_choose: empty");
653			runq_clrbit(rq, pri);
654		}
655
656		ke->ke_state = KES_THREAD;
657		KASSERT((ke->ke_thread != NULL),
658		    ("runq_choose: No thread on KSE"));
659		KASSERT((ke->ke_thread->td_kse != NULL),
660		    ("runq_choose: No KSE on thread"));
661		KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
662			("runq_choose: process swapped out"));
663		return (ke);
664	}
665	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
666
667	return (NULL);
668}
669
670/*
671 * Remove the KSE from the queue specified by its priority, and clear the
672 * corresponding status bit if the queue becomes empty.
673 * Caller must set ke->ke_state afterwards.
674 */
675void
676runq_remove(struct runq *rq, struct kse *ke)
677{
678	struct rqhead *rqh;
679	int pri;
680
681	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
682	mtx_assert(&sched_lock, MA_OWNED);
683	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
684		("runq_remove: process swapped out"));
685	pri = ke->ke_rqindex;
686	rqh = &rq->rq_queues[pri];
687	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
688	    ke, ke->ke_thread->td_priority, pri, rqh);
689	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
690	TAILQ_REMOVE(rqh, ke, ke_procq);
691	if (TAILQ_EMPTY(rqh)) {
692		CTR0(KTR_RUNQ, "runq_remove: empty");
693		runq_clrbit(rq, pri);
694	}
695	ke->ke_state = KES_THREAD;
696	ke->ke_ksegrp->kg_runq_kses--;
697}
698
699static void
700runq_readjust(struct runq *rq, struct kse *ke)
701{
702
703	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
704		runq_remove(rq, ke);
705		runq_add(rq, ke);
706	}
707}
708
709#if 0
710void
711thread_sanity_check(struct thread *td)
712{
713	struct proc *p;
714	struct ksegrp *kg;
715	struct kse *ke;
716	struct thread *td2;
717	unsigned int prevpri;
718	int	saw_lastassigned;
719	int unassigned;
720	int assigned;
721
722	p = td->td_proc;
723	kg = td->td_ksegrp;
724	ke = td->td_kse;
725
726
727	if (ke) {
728		if (p != ke->ke_proc) {
729			panic("wrong proc");
730		}
731		if (ke->ke_thread != td) {
732			panic("wrong thread");
733		}
734	}
735
736	if ((p->p_flag & P_KSES) == 0) {
737		if (ke == NULL) {
738			panic("non KSE thread lost kse");
739		}
740	} else {
741		prevpri = 0;
742		saw_lastassigned = 0;
743		unassigned = 0;
744		assigned = 0;
745		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
746			if (td2->td_priority < prevpri) {
747				panic("thread runqueue unosorted");
748			}
749			prevpri = td2->td_priority;
750			if (td2->td_kse) {
751				assigned++;
752				if (unassigned) {
753					panic("unassigned before assigned");
754				}
755 				if  (kg->kg_last_assigned == NULL) {
756					panic("lastassigned corrupt");
757				}
758				if (saw_lastassigned) {
759					panic("last assigned not last");
760				}
761				if (td2->td_kse->ke_thread != td2) {
762					panic("mismatched kse/thread");
763				}
764			} else {
765				unassigned++;
766			}
767			if (td2 == kg->kg_last_assigned) {
768				saw_lastassigned = 1;
769				if (td2->td_kse == NULL) {
770					panic("last assigned not assigned");
771				}
772			}
773		}
774		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
775			panic("where on earth does lastassigned point?");
776		}
777		FOREACH_THREAD_IN_GROUP(kg, td2) {
778			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
779			    (TD_ON_RUNQ(td2))) {
780				assigned++;
781				if (td2->td_kse == NULL) {
782					panic ("BOUND thread with no KSE");
783				}
784			}
785		}
786#if 0
787		if ((unassigned + assigned) != kg->kg_runnable) {
788			panic("wrong number in runnable");
789		}
790#endif
791	}
792}
793#endif
794
795