kern_switch.c revision 99889
1/*
2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_switch.c 99889 2002-07-12 20:16:46Z julian $
27 */
28
29/***
30
31Here is the logic..
32
33If there are N processors, then there are at most N KSEs (kernel
34schedulable entities) working to process threads that belong to a
35KSEGOUP (kg). If there are X of these KSEs actually running at the
36moment in question, then there are at most M (N-X) of these KSEs on
37the run queue, as running KSEs are not on the queue.
38
39Runnable threads are queued off the KSEGROUP in priority order.
40If there are M or more threads runnable, the top M threads
41(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42their priority from those threads and are put on the run queue.
43
44The last thread that had a priority high enough to have a KSE associated
45with it, AND IS ON THE RUN QUEUE is pointed to by
46kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47assigned as all the available KSEs are activly running, or because there
48are no threads queued, that pointer is NULL.
49
50When a KSE is removed from the run queue to become runnable, we know
51it was associated with the highest priority thread in the queue (at the head
52of the queue). If it is also the last assigned we know M was 1 and must
53now be 0. Since the thread is no longer queued that pointer must be
54removed from it. Since we know there were no more KSEs available,
55(M was 1 and is now 0) and since we are not FREEING our KSE
56but using it, we know there are STILL no more KSEs available, we can prove
57that the next thread in the ksegrp list will not have a KSE to assign to
58it, so we can show that the pointer must be made 'invalid' (NULL).
59
60The pointer exists so that when a new thread is made runnable, it can
61have its priority compared with the last assigned thread to see if
62it should 'steal' its KSE or not.. i.e. is it 'earlier'
63on the list than that thread or later.. If it's earlier, then the KSE is
64removed from the last assigned (which is now not assigned a KSE)
65and reassigned to the new thread, which is placed earlier in the list.
66The pointer is then backed up to the previous thread (which may or may not
67be the new thread).
68
69When a thread sleeps or is removed, the KSE becomes available and if there
70are queued threads that are not assigned KSEs, the highest priority one of
71them is assigned the KSE, which is then placed back on the run queue at
72the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73to point to it.
74
75The following diagram shows 2 KSEs and 3 threads from a single process.
76
77 RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78              \    \____
79               \        \
80    KSEGROUP---thread--thread--thread    (queued in priority order)
81        \                 /
82         \_______________/
83          (last_assigned)
84
85The result of this scheme is that the M available KSEs are always
86queued at the priorities they have inherrited from the M highest priority
87threads for that KSEGROUP. If this situation changes, the KSEs are
88reassigned to keep this true.
89
90*/
91
92#include <sys/param.h>
93#include <sys/systm.h>
94#include <sys/kernel.h>
95#include <sys/ktr.h>
96#include <sys/lock.h>
97#include <sys/mutex.h>
98#include <sys/proc.h>
99#include <sys/queue.h>
100#include <machine/critical.h>
101
102CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103
104/*
105 * Global run queue.
106 */
107static struct runq runq;
108SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109
110static void runq_readjust(struct runq *rq, struct kse *ke);
111/************************************************************************
112 * Functions that manipulate runnability from a thread perspective.	*
113 ************************************************************************/
114
115/*
116 * Select the KSE that will be run next.  From that find the thread, and x
117 * remove it from the KSEGRP's run queue.  If there is thread clustering,
118 * this will be what does it.
119 */
120struct thread *
121choosethread(void)
122{
123	struct kse *ke;
124	struct thread *td;
125	struct ksegrp *kg;
126
127	if ((ke = runq_choose(&runq))) {
128		td = ke->ke_thread;
129		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130		kg = ke->ke_ksegrp;
131		if (td->td_flags & TDF_UNBOUND) {
132			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133			if (kg->kg_last_assigned == td)
134				if (TAILQ_PREV(td, threadqueue, td_runq)
135				    != NULL)
136					printf("Yo MAMA!\n");
137				kg->kg_last_assigned = TAILQ_PREV(td,
138				    threadqueue, td_runq);
139			/*
140			 *  If we have started running an upcall,
141			 * Then TDF_UNBOUND WAS set because the thread was
142			 * created without a KSE. Now that we have one,
143			 * and it is our time to run, we make sure
144			 * that BOUND semantics apply for the rest of
145			 * the journey to userland, and into the UTS.
146			 */
147#ifdef	NOTYET
148			if (td->td_flags & TDF_UPCALLING)
149				tdf->td_flags &= ~TDF_UNBOUND;
150#endif
151		}
152		kg->kg_runnable--;
153		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
154		    td, td->td_priority);
155	} else {
156		/* Simulate runq_choose() having returned the idle thread */
157		td = PCPU_GET(idlethread);
158		td->td_kse->ke_state = KES_RUNNING;
159		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
160	}
161	td->td_state = TDS_RUNNING;
162	return (td);
163}
164
165/*
166 * Given a KSE (now surplus), either assign a new runable thread to it
167 * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
168 * Assumes the kse is not linked to any threads any more. (has been cleaned).
169 */
170void
171kse_reassign(struct kse *ke)
172{
173	struct ksegrp *kg;
174	struct thread *td;
175
176	kg = ke->ke_ksegrp;
177
178	/*
179	 * Find the first unassigned thread
180	 * If there is a 'last assigned' then see what's next.
181	 * otherwise look at what is first.
182	 */
183	if ((td = kg->kg_last_assigned)) {
184		td = TAILQ_NEXT(td, td_runq);
185	} else {
186		td = TAILQ_FIRST(&kg->kg_runq);
187	}
188
189	/*
190	 * If we found one assign it the kse, otherwise idle the kse.
191	 */
192	if (td) {
193		kg->kg_last_assigned = td;
194		td->td_kse = ke;
195		ke->ke_thread = td;
196		runq_add(&runq, ke);
197		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
198	} else {
199		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
200		ke->ke_state = KES_IDLE;
201		ke->ke_thread = NULL;
202		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
203		kg->kg_idle_kses++;
204		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
205	}
206}
207
208int
209kserunnable(void)
210{
211	return runq_check(&runq);
212}
213
214/*
215 * Remove a thread from its KSEGRP's run queue.
216 * This in turn may remove it from a KSE if it was already assigned
217 * to one, possibly causing a new thread to be assigned to the KSE
218 * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
219 */
220void
221remrunqueue(struct thread *td)
222{
223	struct thread *td2, *td3;
224	struct ksegrp *kg;
225	struct kse *ke;
226
227	mtx_assert(&sched_lock, MA_OWNED);
228	KASSERT ((td->td_state == TDS_RUNQ),
229		("remrunqueue: Bad state on run queue"));
230	kg = td->td_ksegrp;
231	ke = td->td_kse;
232	/*
233	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
234	 * threads are BOUND.
235	 */
236	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
237	td->td_state = TDS_UNQUEUED;
238	kg->kg_runnable--;
239	if ((td->td_flags & TDF_UNBOUND) == 0)  {
240		/* Bring its kse with it, leave the thread attached */
241		runq_remove(&runq, ke);
242		ke->ke_state = KES_UNQUEUED;
243		return;
244	}
245	if (ke) {
246		/*
247		 * This thread has been assigned to a KSE.
248		 * We need to dissociate it and try assign the
249		 * KSE to the next available thread. Then, we should
250		 * see if we need to move the KSE in the run queues.
251		 */
252		td2 = kg->kg_last_assigned;
253		KASSERT((td2 != NULL), ("last assigned has wrong value "));
254		td->td_kse = NULL;
255		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
256			KASSERT(td3 != td, ("td3 somehow matched td"));
257			/*
258			 * Give the next unassigned thread to the KSE
259			 * so the number of runnable KSEs remains
260			 * constant.
261			 */
262			td3->td_kse = ke;
263			ke->ke_thread = td3;
264			kg->kg_last_assigned = td3;
265			runq_readjust(&runq, ke);
266		} else {
267			/*
268			 * There is no unassigned thread.
269			 * If we were the last assigned one,
270			 * adjust the last assigned pointer back
271			 * one, which may result in NULL.
272			 */
273			if (td == td2) {
274				kg->kg_last_assigned =
275				    TAILQ_PREV(td, threadqueue, td_runq);
276			}
277			runq_remove(&runq, ke);
278			KASSERT((ke->ke_state != KES_IDLE),
279			    ("kse already idle"));
280			ke->ke_state = KES_IDLE;
281			ke->ke_thread = NULL;
282			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
283			kg->kg_idle_kses++;
284		}
285	}
286	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
287}
288
289#if 1 /* use the first version */
290
291void
292setrunqueue(struct thread *td)
293{
294	struct kse *ke;
295	struct ksegrp *kg;
296	struct thread *td2;
297	struct thread *tda;
298
299	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
300	mtx_assert(&sched_lock, MA_OWNED);
301	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
302	td->td_state = TDS_RUNQ;
303	kg = td->td_ksegrp;
304	kg->kg_runnable++;
305	if ((td->td_flags & TDF_UNBOUND) == 0) {
306		KASSERT((td->td_kse != NULL),
307		    ("queueing BAD thread to run queue"));
308		/*
309		 * Common path optimisation: Only one of everything
310		 * and the KSE is always already attached.
311		 * Totally ignore the ksegrp run queue.
312		 */
313		runq_add(&runq, td->td_kse);
314		return;
315	}
316	/*
317	 * Ok, so we are threading with this thread.
318	 * We don't have a KSE, see if we can get one..
319	 */
320	tda = kg->kg_last_assigned;
321	if ((ke = td->td_kse) == NULL) {
322		/*
323		 * We will need a KSE, see if there is one..
324		 * First look for a free one, before getting desperate.
325		 * If we can't get one, our priority is not high enough..
326		 * that's ok..
327		 */
328		if (kg->kg_idle_kses) {
329			/*
330			 * There is a free one so it's ours for the asking..
331			 */
332			ke = TAILQ_FIRST(&kg->kg_iq);
333			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
334			ke->ke_state = KES_UNQUEUED;
335			kg->kg_idle_kses--;
336		} else if (tda && (tda->td_priority > td->td_priority)) {
337			/*
338			 * None free, but there is one we can commandeer.
339			 */
340			ke = tda->td_kse;
341			tda->td_kse = NULL;
342			ke->ke_thread = NULL;
343			tda = kg->kg_last_assigned =
344		    	    TAILQ_PREV(tda, threadqueue, td_runq);
345			runq_remove(&runq, ke);
346		}
347	} else {
348		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
349		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
350		ke->ke_thread = NULL;
351		td->td_kse = NULL;
352	}
353
354	/*
355	 * Add the thread to the ksegrp's run queue at
356	 * the appropriate place.
357	 */
358	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
359		if (td2->td_priority > td->td_priority) {
360			TAILQ_INSERT_BEFORE(td2, td, td_runq);
361			break;
362		}
363	}
364	if (td2 == NULL) {
365		/* We ran off the end of the TAILQ or it was empty. */
366		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
367	}
368
369	/*
370	 * If we have a ke to use, then put it on the run queue and
371	 * If needed, readjust the last_assigned pointer.
372	 */
373	if (ke) {
374		if (tda == NULL) {
375			/*
376			 * No pre-existing last assigned so whoever is first
377			 * gets the KSE we borught in.. (may be us)
378			 */
379			td2 = TAILQ_FIRST(&kg->kg_runq);
380			KASSERT((td2->td_kse == NULL),
381			    ("unexpected ke present"));
382			td2->td_kse = ke;
383			ke->ke_thread = td2;
384			kg->kg_last_assigned = td2;
385		} else if (tda->td_priority > td->td_priority) {
386			/*
387			 * It's ours, grab it, but last_assigned is past us
388			 * so don't change it.
389			 */
390			td->td_kse = ke;
391			ke->ke_thread = td;
392		} else {
393			/*
394			 * We are past last_assigned, so
395			 * put the new kse on whatever is next,
396			 * which may or may not be us.
397			 */
398			td2 = TAILQ_NEXT(tda, td_runq);
399			kg->kg_last_assigned = td2;
400			td2->td_kse = ke;
401			ke->ke_thread = td2;
402		}
403		runq_add(&runq, ke);
404	}
405}
406
407#else
408
409void
410setrunqueue(struct thread *td)
411{
412	struct kse *ke;
413	struct ksegrp *kg;
414	struct thread *td2;
415
416	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
417	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
418	td->td_state = TDS_RUNQ;
419	kg = td->td_ksegrp;
420	kg->kg_runnable++;
421	if ((td->td_flags & TDF_UNBOUND) == 0) {
422		/*
423		 * Common path optimisation: Only one of everything
424		 * and the KSE is always already attached.
425		 * Totally ignore the ksegrp run queue.
426		 */
427		runq_add(&runq, td->td_kse);
428		return;
429	}
430	/*
431	 * First add the thread to the ksegrp's run queue at
432	 * the appropriate place.
433	 */
434	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
435		if (td2->td_priority > td->td_priority) {
436			TAILQ_INSERT_BEFORE(td2, td, td_runq);
437			break;
438		}
439	}
440	if (td2 == NULL) {
441		/* We ran off the end of the TAILQ or it was empty. */
442		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
443	}
444
445	/*
446	 * The following could be achieved by simply doing:
447	 * td->td_kse = NULL; kse_reassign(ke);
448	 * but I felt that I'd try do it inline here.
449	 * All this work may not be worth it.
450	 */
451	if ((ke = td->td_kse)) { /* XXXKSE */
452		/*
453		 * We have a KSE already. See whether we can keep it
454		 * or if we need to give it to someone else.
455		 * Either way it will need to be inserted into
456		 * the runq. kse_reassign() will do this as will runq_add().
457		 */
458		if ((kg->kg_last_assigned) &&
459		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
460			/*
461			 * We can definitly keep the KSE
462			 * as the "last assignead thread" has
463			 * less priority than we do.
464			 * The "last assigned" pointer stays the same.
465			 */
466			runq_add(&runq, ke);
467			return;
468
469		}
470		/*
471		 * Give it to the correct thread,
472		 * which may be (often is) us, but may not be.
473		 */
474		td->td_kse = NULL;
475		kse_reassign(ke);
476		return;
477	}
478	/*
479	 * There are two cases where KSE adjustment is needed.
480	 * Usurpation of an already assigned KSE, and assignment
481	 * of a previously IDLE KSE.
482	 */
483	if (kg->kg_idle_kses) {
484		/*
485		 * If there are unassigned KSEs then we definitly
486		 * will be assigned one from the idle KSE list.
487		 * If we are the last, we should get the "last
488		 * assigned" pointer set to us as well.
489		 */
490		ke = TAILQ_FIRST(&kg->kg_iq);
491		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
492		ke->ke_state = KES_UNQUEUED;
493		kg->kg_idle_kses--;
494		ke->ke_thread = td;
495		td->td_kse = ke;
496		runq_add(&runq, ke);
497		if (TAILQ_NEXT(td, td_runq) == NULL) {
498			kg->kg_last_assigned = td;
499		}
500	} else if (kg->kg_last_assigned &&
501		(kg->kg_last_assigned->td_priority > td->td_priority)) {
502		/*
503		 * If there were none last-assigned, all KSEs
504		 * are actually out running as we speak.
505		 * If there was a last assigned, but we didn't see it,
506		 * we must be inserting before it, so take the KSE from
507		 * the last assigned, and back it up one entry. Then,
508		 * assign the KSE to the new thread and adjust its priority.
509		 */
510		td2 = kg->kg_last_assigned;
511		ke = td2->td_kse;
512		kg->kg_last_assigned =
513		    TAILQ_PREV(td2, threadqueue, td_runq);
514		td2->td_kse = NULL;
515		td->td_kse = ke;
516		ke->ke_thread = td;
517		runq_readjust(&runq, ke);
518	}
519}
520#endif
521
522/************************************************************************
523 * Critical section marker functions					*
524 ************************************************************************/
525/* Critical sections that prevent preemption. */
526void
527critical_enter(void)
528{
529	struct thread *td;
530
531	td = curthread;
532	if (td->td_critnest == 0)
533		cpu_critical_enter();
534	td->td_critnest++;
535}
536
537void
538critical_exit(void)
539{
540	struct thread *td;
541
542	td = curthread;
543	if (td->td_critnest == 1) {
544		td->td_critnest = 0;
545		cpu_critical_exit();
546	} else {
547		td->td_critnest--;
548	}
549}
550
551
552/************************************************************************
553 * SYSTEM RUN QUEUE manipulations and tests				*
554 ************************************************************************/
555/*
556 * Initialize a run structure.
557 */
558void
559runq_init(struct runq *rq)
560{
561	int i;
562
563	bzero(rq, sizeof *rq);
564	for (i = 0; i < RQ_NQS; i++)
565		TAILQ_INIT(&rq->rq_queues[i]);
566}
567
568/*
569 * Clear the status bit of the queue corresponding to priority level pri,
570 * indicating that it is empty.
571 */
572static __inline void
573runq_clrbit(struct runq *rq, int pri)
574{
575	struct rqbits *rqb;
576
577	rqb = &rq->rq_status;
578	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
579	    rqb->rqb_bits[RQB_WORD(pri)],
580	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
581	    RQB_BIT(pri), RQB_WORD(pri));
582	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
583}
584
585/*
586 * Find the index of the first non-empty run queue.  This is done by
587 * scanning the status bits, a set bit indicates a non-empty queue.
588 */
589static __inline int
590runq_findbit(struct runq *rq)
591{
592	struct rqbits *rqb;
593	int pri;
594	int i;
595
596	rqb = &rq->rq_status;
597	for (i = 0; i < RQB_LEN; i++)
598		if (rqb->rqb_bits[i]) {
599			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
600			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
601			    rqb->rqb_bits[i], i, pri);
602			return (pri);
603		}
604
605	return (-1);
606}
607
608/*
609 * Set the status bit of the queue corresponding to priority level pri,
610 * indicating that it is non-empty.
611 */
612static __inline void
613runq_setbit(struct runq *rq, int pri)
614{
615	struct rqbits *rqb;
616
617	rqb = &rq->rq_status;
618	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
619	    rqb->rqb_bits[RQB_WORD(pri)],
620	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
621	    RQB_BIT(pri), RQB_WORD(pri));
622	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
623}
624
625/*
626 * Add the KSE to the queue specified by its priority, and set the
627 * corresponding status bit.
628 */
629void
630runq_add(struct runq *rq, struct kse *ke)
631{
632	struct rqhead *rqh;
633	int pri;
634
635	mtx_assert(&sched_lock, MA_OWNED);
636	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
637	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
638	if (ke->ke_state == KES_ONRUNQ)
639		return;
640#if defined(INVARIANTS) && defined(DIAGNOSTIC)
641	KASSERT(ke->ke_state != KES_ONRUNQ,
642	    ("runq_add: kse %p (%s) already in run queue", ke,
643	    ke->ke_proc->p_comm));
644#endif
645	pri = ke->ke_thread->td_priority / RQ_PPQ;
646	ke->ke_rqindex = pri;
647	runq_setbit(rq, pri);
648	rqh = &rq->rq_queues[pri];
649	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
650	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
651	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
652	ke->ke_ksegrp->kg_runq_kses++;
653	ke->ke_state = KES_ONRUNQ;
654}
655
656/*
657 * Return true if there are runnable processes of any priority on the run
658 * queue, false otherwise.  Has no side effects, does not modify the run
659 * queue structure.
660 */
661int
662runq_check(struct runq *rq)
663{
664	struct rqbits *rqb;
665	int i;
666
667	rqb = &rq->rq_status;
668	for (i = 0; i < RQB_LEN; i++)
669		if (rqb->rqb_bits[i]) {
670			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
671			    rqb->rqb_bits[i], i);
672			return (1);
673		}
674	CTR0(KTR_RUNQ, "runq_check: empty");
675
676	return (0);
677}
678
679/*
680 * Find and remove the highest priority process from the run queue.
681 * If there are no runnable processes, the per-cpu idle process is
682 * returned.  Will not return NULL under any circumstances.
683 */
684struct kse *
685runq_choose(struct runq *rq)
686{
687	struct rqhead *rqh;
688	struct kse *ke;
689	int pri;
690
691	mtx_assert(&sched_lock, MA_OWNED);
692	while ((pri = runq_findbit(rq)) != -1) {
693		rqh = &rq->rq_queues[pri];
694		ke = TAILQ_FIRST(rqh);
695		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
696		CTR3(KTR_RUNQ,
697		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
698		TAILQ_REMOVE(rqh, ke, ke_procq);
699		ke->ke_ksegrp->kg_runq_kses--;
700		if (TAILQ_EMPTY(rqh)) {
701			CTR0(KTR_RUNQ, "runq_choose: empty");
702			runq_clrbit(rq, pri);
703		}
704
705		ke->ke_state = KES_RUNNING;
706		KASSERT((ke->ke_thread != NULL),
707		    ("runq_choose: No thread on KSE"));
708		KASSERT((ke->ke_thread->td_kse != NULL),
709		    ("runq_choose: No KSE on thread"));
710		return (ke);
711	}
712	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
713
714	return (NULL);
715}
716
717/*
718 * Remove the KSE from the queue specified by its priority, and clear the
719 * corresponding status bit if the queue becomes empty.
720 * Caller must set ke->ke_state afterwards.
721 */
722void
723runq_remove(struct runq *rq, struct kse *ke)
724{
725	struct rqhead *rqh;
726	int pri;
727
728	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
729	mtx_assert(&sched_lock, MA_OWNED);
730	pri = ke->ke_rqindex;
731	rqh = &rq->rq_queues[pri];
732	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
733	    ke, ke->ke_thread->td_priority, pri, rqh);
734	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
735	TAILQ_REMOVE(rqh, ke, ke_procq);
736	if (TAILQ_EMPTY(rqh)) {
737		CTR0(KTR_RUNQ, "runq_remove: empty");
738		runq_clrbit(rq, pri);
739	}
740	ke->ke_state = KES_UNQUEUED;
741	ke->ke_ksegrp->kg_runq_kses--;
742}
743
744static void
745runq_readjust(struct runq *rq, struct kse *ke)
746{
747
748	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
749		runq_remove(rq, ke);
750		runq_add(rq, ke);
751	}
752}
753
754#if 0
755void
756thread_sanity_check(struct thread *td)
757{
758	struct proc *p;
759	struct ksegrp *kg;
760	struct kse *ke;
761	struct thread *td2;
762	unsigned int prevpri;
763	int	saw_lastassigned;
764	int unassigned;
765	int assigned;
766
767	p = td->td_proc;
768	kg = td->td_ksegrp;
769	ke = td->td_kse;
770
771	if (kg != &p->p_ksegrp) {
772		panic ("wrong ksegrp");
773	}
774
775	if (ke) {
776		if (ke != &p->p_kse) {
777			panic("wrong kse");
778		}
779		if (ke->ke_thread != td) {
780			panic("wrong thread");
781		}
782	}
783
784	if ((p->p_flag & P_KSES) == 0) {
785		if (ke == NULL) {
786			panic("non KSE thread lost kse");
787		}
788	} else {
789		prevpri = 0;
790		saw_lastassigned = 0;
791		unassigned = 0;
792		assigned = 0;
793		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
794			if (td2->td_priority < prevpri) {
795				panic("thread runqueue unosorted");
796			}
797			prevpri = td2->td_priority;
798			if (td2->td_kse) {
799				assigned++;
800				if (unassigned) {
801					panic("unassigned before assigned");
802				}
803 				if  (kg->kg_last_assigned == NULL) {
804					panic("lastassigned corrupt");
805				}
806				if (saw_lastassigned) {
807					panic("last assigned not last");
808				}
809				if (td2->td_kse->ke_thread != td2) {
810					panic("mismatched kse/thread");
811				}
812			} else {
813				unassigned++;
814			}
815			if (td2 == kg->kg_last_assigned) {
816				saw_lastassigned = 1;
817				if (td2->td_kse == NULL) {
818					panic("last assigned not assigned");
819				}
820			}
821		}
822		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
823			panic("where on earth does lastassigned point?");
824		}
825		FOREACH_THREAD_IN_GROUP(kg, td2) {
826			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
827			    (td2->td_state == TDS_RUNQ)) {
828				assigned++;
829				if (td2->td_kse == NULL) {
830					panic ("BOUND thread with no KSE");
831				}
832			}
833		}
834#if 0
835		if ((unassigned + assigned) != kg->kg_runnable) {
836			panic("wrong number in runnable");
837		}
838#endif
839	}
840}
841#endif
842
843