kern_switch.c revision 103216
1171168Smlaier/*
2126258Smlaier * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3126258Smlaier * All rights reserved.
4126258Smlaier *
5126258Smlaier * Redistribution and use in source and binary forms, with or without
6126258Smlaier * modification, are permitted provided that the following conditions
7126258Smlaier * are met:
8126258Smlaier * 1. Redistributions of source code must retain the above copyright
9126258Smlaier *    notice, this list of conditions and the following disclaimer.
10126258Smlaier * 2. Redistributions in binary form must reproduce the above copyright
11126258Smlaier *    notice, this list of conditions and the following disclaimer in the
12126258Smlaier *    documentation and/or other materials provided with the distribution.
13126258Smlaier *
14126258Smlaier * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15126258Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16126258Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17126258Smlaier * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18126258Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19126258Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20126258Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21126258Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22126258Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23126258Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24126258Smlaier * SUCH DAMAGE.
25126258Smlaier *
26126258Smlaier * $FreeBSD: head/sys/kern/kern_switch.c 103216 2002-09-11 08:13:56Z julian $
27126258Smlaier */
28126258Smlaier
29126258Smlaier/***
30126258Smlaier
31126258SmlaierHere is the logic..
32126258Smlaier
33126258SmlaierIf there are N processors, then there are at most N KSEs (kernel
34126258Smlaierschedulable entities) working to process threads that belong to a
35126258SmlaierKSEGOUP (kg). If there are X of these KSEs actually running at the
36127145Smlaiermoment in question, then there are at most M (N-X) of these KSEs on
37126261Smlaierthe run queue, as running KSEs are not on the queue.
38126261Smlaier
39126261SmlaierRunnable threads are queued off the KSEGROUP in priority order.
40126261SmlaierIf there are M or more threads runnable, the top M threads
41153110Sru(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42171168Smlaiertheir priority from those threads and are put on the run queue.
43171168Smlaier
44171168SmlaierThe last thread that had a priority high enough to have a KSE associated
45153110Sruwith it, AND IS ON THE RUN QUEUE is pointed to by
46127145Smlaierkg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47153110Sruassigned as all the available KSEs are activly running, or because there
48153110Sruare no threads queued, that pointer is NULL.
49153110Sru
50153110SruWhen a KSE is removed from the run queue to become runnable, we know
51153110Sruit was associated with the highest priority thread in the queue (at the head
52127145Smlaierof the queue). If it is also the last assigned we know M was 1 and must
53153110Srunow be 0. Since the thread is no longer queued that pointer must be
54153110Sruremoved from it. Since we know there were no more KSEs available,
55126261Smlaier(M was 1 and is now 0) and since we are not FREEING our KSE
56126258Smlaierbut using it, we know there are STILL no more KSEs available, we can prove
57171168Smlaierthat the next thread in the ksegrp list will not have a KSE to assign to
58171168Smlaierit, so we can show that the pointer must be made 'invalid' (NULL).
59171168Smlaier
60171168SmlaierThe pointer exists so that when a new thread is made runnable, it can
61153110Sruhave its priority compared with the last assigned thread to see if
62126258Smlaierit should 'steal' its KSE or not.. i.e. is it 'earlier'
63126258Smlaieron the list than that thread or later.. If it's earlier, then the KSE is
64126258Smlaierremoved from the last assigned (which is now not assigned a KSE)
65171168Smlaierand reassigned to the new thread, which is placed earlier in the list.
66126258SmlaierThe pointer is then backed up to the previous thread (which may or may not
67127145Smlaierbe the new thread).
68126261Smlaier
69171168SmlaierWhen a thread sleeps or is removed, the KSE becomes available and if there
70126261Smlaierare queued threads that are not assigned KSEs, the highest priority one of
71129907Smlaierthem is assigned the KSE, which is then placed back on the run queue at
72126261Smlaierthe approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73126261Smlaierto point to it.
74126258Smlaier
75126261SmlaierThe following diagram shows 2 KSEs and 3 threads from a single process.
76126258Smlaier
77126258Smlaier RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78171168Smlaier              \    \____
79130933Sbrooks               \        \
80130933Sbrooks    KSEGROUP---thread--thread--thread    (queued in priority order)
81126258Smlaier        \                 /
82126258Smlaier         \_______________/
83126258Smlaier          (last_assigned)
84126258Smlaier
85126258SmlaierThe result of this scheme is that the M available KSEs are always
86126258Smlaierqueued at the priorities they have inherrited from the M highest priority
87126258Smlaierthreads for that KSEGROUP. If this situation changes, the KSEs are
88126258Smlaierreassigned to keep this true.
89126258Smlaier
90126258Smlaier*/
91126258Smlaier
92126258Smlaier#include <sys/param.h>
93126258Smlaier#include <sys/systm.h>
94126258Smlaier#include <sys/kernel.h>
95126258Smlaier#include <sys/ktr.h>
96126258Smlaier#include <sys/lock.h>
97126258Smlaier#include <sys/mutex.h>
98126258Smlaier#include <sys/proc.h>
99126258Smlaier#include <sys/queue.h>
100126258Smlaier#include <machine/critical.h>
101126258Smlaier
102127145SmlaierCTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103171168Smlaier
104126261Smlaier/*
105126261Smlaier * Global run queue.
106126258Smlaier */
107126258Smlaierstatic struct runq runq;
108126258SmlaierSYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109126258Smlaier
110126258Smlaierstatic void runq_readjust(struct runq *rq, struct kse *ke);
111126258Smlaier/************************************************************************
112126258Smlaier * Functions that manipulate runnability from a thread perspective.	*
113126258Smlaier ************************************************************************/
114126258Smlaier
115126258Smlaier/*
116126258Smlaier * Select the KSE that will be run next.  From that find the thread, and x
117126258Smlaier * remove it from the KSEGRP's run queue.  If there is thread clustering,
118126258Smlaier * this will be what does it.
119171168Smlaier */
120171168Smlaierstruct thread *
121171168Smlaierchoosethread(void)
122171168Smlaier{
123171168Smlaier	struct kse *ke;
124171168Smlaier	struct thread *td;
125171168Smlaier	struct ksegrp *kg;
126126258Smlaier
127171168Smlaierretry:
128171168Smlaier	if ((ke = runq_choose(&runq))) {
129171168Smlaier		td = ke->ke_thread;
130171168Smlaier		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
131171168Smlaier		kg = ke->ke_ksegrp;
132171168Smlaier		if (td->td_flags & TDF_UNBOUND) {
133171168Smlaier			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
134171168Smlaier			if (kg->kg_last_assigned == td)
135171168Smlaier				if (TAILQ_PREV(td, threadqueue, td_runq)
136171168Smlaier				    != NULL)
137127145Smlaier					printf("Yo MAMA!\n");
138126258Smlaier				kg->kg_last_assigned = TAILQ_PREV(td,
139126261Smlaier				    threadqueue, td_runq);
140126258Smlaier			/*
141171168Smlaier			 *  If we have started running an upcall,
142171168Smlaier			 * Then TDF_UNBOUND WAS set because the thread was
143126261Smlaier			 * created without a KSE. Now that we have one,
144171168Smlaier			 * and it is our time to run, we make sure
145171168Smlaier			 * that BOUND semantics apply for the rest of
146171168Smlaier			 * the journey to userland, and into the UTS.
147171168Smlaier			 */
148171168Smlaier#ifdef	NOTYET
149171168Smlaier			if (td->td_flags & TDF_UPCALLING)
150171168Smlaier				tdf->td_flags &= ~TDF_UNBOUND;
151171168Smlaier#endif
152126261Smlaier		}
153126261Smlaier		kg->kg_runnable--;
154171168Smlaier		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
155128209Sbrooks		    td, td->td_priority);
156171168Smlaier	} else {
157160195Ssam		/* Simulate runq_choose() having returned the idle thread */
158171168Smlaier		td = PCPU_GET(idlethread);
159126261Smlaier		ke = td->td_kse;
160160195Ssam		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
161126261Smlaier	}
162141584Smlaier	ke->ke_flags |= KEF_DIDRUN;
163171168Smlaier	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
164171168Smlaier	    (td->td_flags & TDF_INPANIC) == 0))
165126261Smlaier		goto retry;
166171168Smlaier	TD_SET_RUNNING(td);
167171168Smlaier	return (td);
168171168Smlaier}
169171168Smlaier
170171168Smlaier/*
171171168Smlaier * Given a KSE (now surplus), either assign a new runable thread to it
172171168Smlaier * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
173171168Smlaier * Assumes the kse is not linked to any threads any more. (has been cleaned).
174171168Smlaier */
175171168Smlaiervoid
176147256Sbrookskse_reassign(struct kse *ke)
177171168Smlaier{
178147256Sbrooks	struct ksegrp *kg;
179147256Sbrooks	struct thread *td;
180141584Smlaier
181171168Smlaier	kg = ke->ke_ksegrp;
182171168Smlaier
183171168Smlaier	/*
184171168Smlaier	 * Find the first unassigned thread
185171168Smlaier	 * If there is a 'last assigned' then see what's next.
186141584Smlaier	 * otherwise look at what is first.
187141584Smlaier	 */
188141584Smlaier	if ((td = kg->kg_last_assigned)) {
189141584Smlaier		td = TAILQ_NEXT(td, td_runq);
190171168Smlaier	} else {
191171168Smlaier		td = TAILQ_FIRST(&kg->kg_runq);
192171168Smlaier	}
193141584Smlaier
194141584Smlaier	/*
195141584Smlaier	 * If we found one assign it the kse, otherwise idle the kse.
196171168Smlaier	 */
197171168Smlaier	if (td) {
198171168Smlaier		kg->kg_last_assigned = td;
199126261Smlaier		td->td_kse = ke;
200126261Smlaier		ke->ke_thread = td;
201171168Smlaier		runq_add(&runq, ke);
202141584Smlaier		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
203171168Smlaier	} else {
204171168Smlaier		ke->ke_state = KES_IDLE;
205126261Smlaier		ke->ke_thread = NULL;
206171168Smlaier		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
207126261Smlaier		kg->kg_idle_kses++;
208171168Smlaier		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
209171168Smlaier	}
210171168Smlaier}
211171168Smlaier
212171168Smlaierint
213171168Smlaierkserunnable(void)
214171168Smlaier{
215171168Smlaier	return runq_check(&runq);
216171168Smlaier}
217171168Smlaier
218171168Smlaier/*
219141584Smlaier * Remove a thread from its KSEGRP's run queue.
220126261Smlaier * This in turn may remove it from a KSE if it was already assigned
221171168Smlaier * to one, possibly causing a new thread to be assigned to the KSE
222171168Smlaier * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
223171168Smlaier */
224171168Smlaiervoid
225171168Smlaierremrunqueue(struct thread *td)
226171168Smlaier{
227171168Smlaier	struct thread *td2, *td3;
228171168Smlaier	struct ksegrp *kg;
229126258Smlaier	struct kse *ke;
230171168Smlaier
231171168Smlaier	mtx_assert(&sched_lock, MA_OWNED);
232126258Smlaier	KASSERT ((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
233171168Smlaier	kg = td->td_ksegrp;
234171168Smlaier	ke = td->td_kse;
235171168Smlaier	/*
236171168Smlaier	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
237171168Smlaier	 * threads are BOUND.
238171168Smlaier	 */
239171168Smlaier	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
240171168Smlaier	kg->kg_runnable--;
241171168Smlaier	TD_SET_CAN_RUN(td);
242171168Smlaier	if ((td->td_flags & TDF_UNBOUND) == 0)  {
243126258Smlaier		/* Bring its kse with it, leave the thread attached */
244126258Smlaier		runq_remove(&runq, ke);
245171168Smlaier		ke->ke_state = KES_THREAD;
246126258Smlaier		return;
247171168Smlaier	}
248171168Smlaier	if (ke) {
249171168Smlaier		/*
250171168Smlaier		 * This thread has been assigned to a KSE.
251171168Smlaier		 * We need to dissociate it and try assign the
252171168Smlaier		 * KSE to the next available thread. Then, we should
253171168Smlaier		 * see if we need to move the KSE in the run queues.
254171168Smlaier		 */
255126258Smlaier		td2 = kg->kg_last_assigned;
256126258Smlaier		KASSERT((td2 != NULL), ("last assigned has wrong value "));
257126258Smlaier		td->td_kse = NULL;
258126258Smlaier		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
259126258Smlaier			KASSERT(td3 != td, ("td3 somehow matched td"));
260126258Smlaier			/*
261126258Smlaier			 * Give the next unassigned thread to the KSE
262126258Smlaier			 * so the number of runnable KSEs remains
263126258Smlaier			 * constant.
264130613Smlaier			 */
265126258Smlaier			td3->td_kse = ke;
266130613Smlaier			ke->ke_thread = td3;
267126258Smlaier			kg->kg_last_assigned = td3;
268126258Smlaier			runq_readjust(&runq, ke);
269127145Smlaier		} else {
270130475Smlaier			/*
271130475Smlaier			 * There is no unassigned thread.
272130475Smlaier			 * If we were the last assigned one,
273171168Smlaier			 * adjust the last assigned pointer back
274126261Smlaier			 * one, which may result in NULL.
275171168Smlaier			 */
276126258Smlaier			if (td == td2) {
277126258Smlaier				kg->kg_last_assigned =
278126258Smlaier				    TAILQ_PREV(td, threadqueue, td_runq);
279171168Smlaier			}
280171168Smlaier			runq_remove(&runq, ke);
281126258Smlaier			KASSERT((ke->ke_state != KES_IDLE),
282126258Smlaier			    ("kse already idle"));
283126258Smlaier			ke->ke_state = KES_IDLE;
284126258Smlaier			ke->ke_thread = NULL;
285126258Smlaier			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
286126258Smlaier			kg->kg_idle_kses++;
287126258Smlaier		}
288126258Smlaier	}
289126258Smlaier	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
290126258Smlaier}
291126258Smlaier
292126258Smlaiervoid
293126258Smlaiersetrunqueue(struct thread *td)
294126258Smlaier{
295126258Smlaier	struct kse *ke;
296126258Smlaier	struct ksegrp *kg;
297126258Smlaier	struct thread *td2;
298126258Smlaier	struct thread *tda;
299126258Smlaier
300126258Smlaier	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
301126258Smlaier	mtx_assert(&sched_lock, MA_OWNED);
302126258Smlaier	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
303126258Smlaier	    ("setrunqueue: bad thread state"));
304126258Smlaier	TD_SET_RUNQ(td);
305148891Smlaier	kg = td->td_ksegrp;
306126258Smlaier	kg->kg_runnable++;
307148887Srwatson	if ((td->td_flags & TDF_UNBOUND) == 0) {
308126258Smlaier		KASSERT((td->td_kse != NULL),
309148887Srwatson		    ("queueing BAD thread to run queue"));
310148891Smlaier		/*
311148891Smlaier		 * Common path optimisation: Only one of everything
312148891Smlaier		 * and the KSE is always already attached.
313148891Smlaier		 * Totally ignore the ksegrp run queue.
314148891Smlaier		 */
315148891Smlaier		runq_add(&runq, td->td_kse);
316126258Smlaier		return;
317126258Smlaier	}
318126258Smlaier	/*
319126258Smlaier	 * Ok, so we are threading with this thread.
320126258Smlaier	 * We don't have a KSE, see if we can get one..
321126258Smlaier	 */
322126258Smlaier	tda = kg->kg_last_assigned;
323126258Smlaier	if ((ke = td->td_kse) == NULL) {
324126258Smlaier		/*
325130613Smlaier		 * We will need a KSE, see if there is one..
326126258Smlaier		 * First look for a free one, before getting desperate.
327171168Smlaier		 * If we can't get one, our priority is not high enough..
328126258Smlaier		 * that's ok..
329126258Smlaier		 */
330126258Smlaier		if (kg->kg_idle_kses) {
331126258Smlaier			/*
332126258Smlaier			 * There is a free one so it's ours for the asking..
333171168Smlaier			 */
334126258Smlaier			ke = TAILQ_FIRST(&kg->kg_iq);
335126258Smlaier			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
336171168Smlaier			ke->ke_state = KES_THREAD;
337171168Smlaier			kg->kg_idle_kses--;
338171168Smlaier		} else if (tda && (tda->td_priority > td->td_priority)) {
339130613Smlaier			/*
340126258Smlaier			 * None free, but there is one we can commandeer.
341126258Smlaier			 */
342126258Smlaier			ke = tda->td_kse;
343126258Smlaier			tda->td_kse = NULL;
344130613Smlaier			ke->ke_thread = NULL;
345126258Smlaier			tda = kg->kg_last_assigned =
346126258Smlaier		    	    TAILQ_PREV(tda, threadqueue, td_runq);
347126258Smlaier			runq_remove(&runq, ke);
348126258Smlaier		}
349126258Smlaier	} else {
350126258Smlaier		/*
351126258Smlaier		 * Temporarily disassociate so it looks like the other cases.
352145836Smlaier		 */
353145836Smlaier		ke->ke_thread = NULL;
354126258Smlaier		td->td_kse = NULL;
355126258Smlaier	}
356171168Smlaier
357171168Smlaier	/*
358171168Smlaier	 * Add the thread to the ksegrp's run queue at
359171168Smlaier	 * the appropriate place.
360171168Smlaier	 */
361171168Smlaier	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
362171168Smlaier		if (td2->td_priority > td->td_priority) {
363171168Smlaier			TAILQ_INSERT_BEFORE(td2, td, td_runq);
364171168Smlaier			break;
365171168Smlaier		}
366171168Smlaier	}
367171168Smlaier	if (td2 == NULL) {
368171168Smlaier		/* We ran off the end of the TAILQ or it was empty. */
369171168Smlaier		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
370171168Smlaier	}
371171168Smlaier
372171168Smlaier	/*
373171168Smlaier	 * If we have a ke to use, then put it on the run queue and
374171168Smlaier	 * If needed, readjust the last_assigned pointer.
375126258Smlaier	 */
376126258Smlaier	if (ke) {
377126258Smlaier		if (tda == NULL) {
378126258Smlaier			/*
379126258Smlaier			 * No pre-existing last assigned so whoever is first
380126258Smlaier			 * gets the KSE we brought in.. (maybe us)
381126258Smlaier			 */
382126258Smlaier			td2 = TAILQ_FIRST(&kg->kg_runq);
383126258Smlaier			KASSERT((td2->td_kse == NULL),
384126258Smlaier			    ("unexpected ke present"));
385126258Smlaier			td2->td_kse = ke;
386126258Smlaier			ke->ke_thread = td2;
387171168Smlaier			kg->kg_last_assigned = td2;
388171168Smlaier		} else if (tda->td_priority > td->td_priority) {
389127145Smlaier			/*
390171168Smlaier			 * It's ours, grab it, but last_assigned is past us
391126261Smlaier			 * so don't change it.
392171168Smlaier			 */
393171168Smlaier			td->td_kse = ke;
394126258Smlaier			ke->ke_thread = td;
395130613Smlaier		} else {
396126258Smlaier			/*
397126258Smlaier			 * We are past last_assigned, so
398126258Smlaier			 * put the new kse on whatever is next,
399126261Smlaier			 * which may or may not be us.
400127145Smlaier			 */
401126261Smlaier			td2 = TAILQ_NEXT(tda, td_runq);
402126261Smlaier			kg->kg_last_assigned = td2;
403126261Smlaier			td2->td_kse = ke;
404126261Smlaier			ke->ke_thread = td2;
405126261Smlaier		}
406126261Smlaier		runq_add(&runq, ke);
407126261Smlaier	}
408171168Smlaier}
409155337Smlaier
410155337Smlaier/************************************************************************
411155337Smlaier * Critical section marker functions					*
412126261Smlaier ************************************************************************/
413126261Smlaier/* Critical sections that prevent preemption. */
414155337Smlaiervoid
415155337Smlaiercritical_enter(void)
416155337Smlaier{
417126261Smlaier	struct thread *td;
418126261Smlaier
419126261Smlaier	td = curthread;
420126261Smlaier	if (td->td_critnest == 0)
421126261Smlaier		cpu_critical_enter();
422126261Smlaier	td->td_critnest++;
423126261Smlaier}
424126261Smlaier
425126261Smlaiervoid
426126261Smlaiercritical_exit(void)
427171168Smlaier{
428126261Smlaier	struct thread *td;
429126261Smlaier
430126261Smlaier	td = curthread;
431135196Smlaier	if (td->td_critnest == 1) {
432126261Smlaier		td->td_critnest = 0;
433155337Smlaier		cpu_critical_exit();
434126261Smlaier	} else {
435		td->td_critnest--;
436	}
437}
438
439
440/************************************************************************
441 * SYSTEM RUN QUEUE manipulations and tests				*
442 ************************************************************************/
443/*
444 * Initialize a run structure.
445 */
446void
447runq_init(struct runq *rq)
448{
449	int i;
450
451	bzero(rq, sizeof *rq);
452	for (i = 0; i < RQ_NQS; i++)
453		TAILQ_INIT(&rq->rq_queues[i]);
454}
455
456/*
457 * Clear the status bit of the queue corresponding to priority level pri,
458 * indicating that it is empty.
459 */
460static __inline void
461runq_clrbit(struct runq *rq, int pri)
462{
463	struct rqbits *rqb;
464
465	rqb = &rq->rq_status;
466	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
467	    rqb->rqb_bits[RQB_WORD(pri)],
468	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
469	    RQB_BIT(pri), RQB_WORD(pri));
470	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
471}
472
473/*
474 * Find the index of the first non-empty run queue.  This is done by
475 * scanning the status bits, a set bit indicates a non-empty queue.
476 */
477static __inline int
478runq_findbit(struct runq *rq)
479{
480	struct rqbits *rqb;
481	int pri;
482	int i;
483
484	rqb = &rq->rq_status;
485	for (i = 0; i < RQB_LEN; i++)
486		if (rqb->rqb_bits[i]) {
487			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
488			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
489			    rqb->rqb_bits[i], i, pri);
490			return (pri);
491		}
492
493	return (-1);
494}
495
496/*
497 * Set the status bit of the queue corresponding to priority level pri,
498 * indicating that it is non-empty.
499 */
500static __inline void
501runq_setbit(struct runq *rq, int pri)
502{
503	struct rqbits *rqb;
504
505	rqb = &rq->rq_status;
506	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
507	    rqb->rqb_bits[RQB_WORD(pri)],
508	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
509	    RQB_BIT(pri), RQB_WORD(pri));
510	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
511}
512
513/*
514 * Add the KSE to the queue specified by its priority, and set the
515 * corresponding status bit.
516 */
517void
518runq_add(struct runq *rq, struct kse *ke)
519{
520	struct rqhead *rqh;
521	int pri;
522
523	mtx_assert(&sched_lock, MA_OWNED);
524	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
525	KASSERT((ke->ke_thread->td_kse != NULL),
526	    ("runq_add: No KSE on thread"));
527	KASSERT(ke->ke_state != KES_ONRUNQ,
528	    ("runq_add: kse %p (%s) already in run queue", ke,
529	    ke->ke_proc->p_comm));
530	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
531		("runq_add: process swapped out"));
532	pri = ke->ke_thread->td_priority / RQ_PPQ;
533	ke->ke_rqindex = pri;
534	runq_setbit(rq, pri);
535	rqh = &rq->rq_queues[pri];
536	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
537	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
538	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
539	ke->ke_ksegrp->kg_runq_kses++;
540	ke->ke_state = KES_ONRUNQ;
541}
542
543/*
544 * Return true if there are runnable processes of any priority on the run
545 * queue, false otherwise.  Has no side effects, does not modify the run
546 * queue structure.
547 */
548int
549runq_check(struct runq *rq)
550{
551	struct rqbits *rqb;
552	int i;
553
554	rqb = &rq->rq_status;
555	for (i = 0; i < RQB_LEN; i++)
556		if (rqb->rqb_bits[i]) {
557			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
558			    rqb->rqb_bits[i], i);
559			return (1);
560		}
561	CTR0(KTR_RUNQ, "runq_check: empty");
562
563	return (0);
564}
565
566/*
567 * Find and remove the highest priority process from the run queue.
568 * If there are no runnable processes, the per-cpu idle process is
569 * returned.  Will not return NULL under any circumstances.
570 */
571struct kse *
572runq_choose(struct runq *rq)
573{
574	struct rqhead *rqh;
575	struct kse *ke;
576	int pri;
577
578	mtx_assert(&sched_lock, MA_OWNED);
579	while ((pri = runq_findbit(rq)) != -1) {
580		rqh = &rq->rq_queues[pri];
581		ke = TAILQ_FIRST(rqh);
582		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
583		CTR3(KTR_RUNQ,
584		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
585		TAILQ_REMOVE(rqh, ke, ke_procq);
586		ke->ke_ksegrp->kg_runq_kses--;
587		if (TAILQ_EMPTY(rqh)) {
588			CTR0(KTR_RUNQ, "runq_choose: empty");
589			runq_clrbit(rq, pri);
590		}
591
592		ke->ke_state = KES_THREAD;
593		KASSERT((ke->ke_thread != NULL),
594		    ("runq_choose: No thread on KSE"));
595		KASSERT((ke->ke_thread->td_kse != NULL),
596		    ("runq_choose: No KSE on thread"));
597		KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
598			("runq_choose: process swapped out"));
599		return (ke);
600	}
601	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
602
603	return (NULL);
604}
605
606/*
607 * Remove the KSE from the queue specified by its priority, and clear the
608 * corresponding status bit if the queue becomes empty.
609 * Caller must set ke->ke_state afterwards.
610 */
611void
612runq_remove(struct runq *rq, struct kse *ke)
613{
614	struct rqhead *rqh;
615	int pri;
616
617	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
618	mtx_assert(&sched_lock, MA_OWNED);
619	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
620		("runq_remove: process swapped out"));
621	pri = ke->ke_rqindex;
622	rqh = &rq->rq_queues[pri];
623	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
624	    ke, ke->ke_thread->td_priority, pri, rqh);
625	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
626	TAILQ_REMOVE(rqh, ke, ke_procq);
627	if (TAILQ_EMPTY(rqh)) {
628		CTR0(KTR_RUNQ, "runq_remove: empty");
629		runq_clrbit(rq, pri);
630	}
631	ke->ke_state = KES_THREAD;
632	ke->ke_ksegrp->kg_runq_kses--;
633}
634
635static void
636runq_readjust(struct runq *rq, struct kse *ke)
637{
638
639	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
640		runq_remove(rq, ke);
641		runq_add(rq, ke);
642	}
643}
644
645#if 0
646void
647thread_sanity_check(struct thread *td)
648{
649	struct proc *p;
650	struct ksegrp *kg;
651	struct kse *ke;
652	struct thread *td2;
653	unsigned int prevpri;
654	int	saw_lastassigned;
655	int unassigned;
656	int assigned;
657
658	p = td->td_proc;
659	kg = td->td_ksegrp;
660	ke = td->td_kse;
661
662	if (kg != &p->p_ksegrp) {
663		panic ("wrong ksegrp");
664	}
665
666	if (ke) {
667		if (ke != &p->p_kse) {
668			panic("wrong kse");
669		}
670		if (ke->ke_thread != td) {
671			panic("wrong thread");
672		}
673	}
674
675	if ((p->p_flag & P_KSES) == 0) {
676		if (ke == NULL) {
677			panic("non KSE thread lost kse");
678		}
679	} else {
680		prevpri = 0;
681		saw_lastassigned = 0;
682		unassigned = 0;
683		assigned = 0;
684		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
685			if (td2->td_priority < prevpri) {
686				panic("thread runqueue unosorted");
687			}
688			prevpri = td2->td_priority;
689			if (td2->td_kse) {
690				assigned++;
691				if (unassigned) {
692					panic("unassigned before assigned");
693				}
694 				if  (kg->kg_last_assigned == NULL) {
695					panic("lastassigned corrupt");
696				}
697				if (saw_lastassigned) {
698					panic("last assigned not last");
699				}
700				if (td2->td_kse->ke_thread != td2) {
701					panic("mismatched kse/thread");
702				}
703			} else {
704				unassigned++;
705			}
706			if (td2 == kg->kg_last_assigned) {
707				saw_lastassigned = 1;
708				if (td2->td_kse == NULL) {
709					panic("last assigned not assigned");
710				}
711			}
712		}
713		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
714			panic("where on earth does lastassigned point?");
715		}
716		FOREACH_THREAD_IN_GROUP(kg, td2) {
717			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
718			    (TD_ON_RUNQ(td2))) {
719				assigned++;
720				if (td2->td_kse == NULL) {
721					panic ("BOUND thread with no KSE");
722				}
723			}
724		}
725#if 0
726		if ((unassigned + assigned) != kg->kg_runnable) {
727			panic("wrong number in runnable");
728		}
729#endif
730	}
731}
732#endif
733
734