kern_switch.c revision 131508
1/*
2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27/***
28Here is the logic..
29
30If there are N processors, then there are at most N KSEs (kernel
31schedulable entities) working to process threads that belong to a
32KSEGROUP (kg). If there are X of these KSEs actually running at the
33moment in question, then there are at most M (N-X) of these KSEs on
34the run queue, as running KSEs are not on the queue.
35
36Runnable threads are queued off the KSEGROUP in priority order.
37If there are M or more threads runnable, the top M threads
38(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
39their priority from those threads and are put on the run queue.
40
41The last thread that had a priority high enough to have a KSE associated
42with it, AND IS ON THE RUN QUEUE is pointed to by
43kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
44assigned as all the available KSEs are activly running, or because there
45are no threads queued, that pointer is NULL.
46
47When a KSE is removed from the run queue to become runnable, we know
48it was associated with the highest priority thread in the queue (at the head
49of the queue). If it is also the last assigned we know M was 1 and must
50now be 0. Since the thread is no longer queued that pointer must be
51removed from it. Since we know there were no more KSEs available,
52(M was 1 and is now 0) and since we are not FREEING our KSE
53but using it, we know there are STILL no more KSEs available, we can prove
54that the next thread in the ksegrp list will not have a KSE to assign to
55it, so we can show that the pointer must be made 'invalid' (NULL).
56
57The pointer exists so that when a new thread is made runnable, it can
58have its priority compared with the last assigned thread to see if
59it should 'steal' its KSE or not.. i.e. is it 'earlier'
60on the list than that thread or later.. If it's earlier, then the KSE is
61removed from the last assigned (which is now not assigned a KSE)
62and reassigned to the new thread, which is placed earlier in the list.
63The pointer is then backed up to the previous thread (which may or may not
64be the new thread).
65
66When a thread sleeps or is removed, the KSE becomes available and if there
67are queued threads that are not assigned KSEs, the highest priority one of
68them is assigned the KSE, which is then placed back on the run queue at
69the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
70to point to it.
71
72The following diagram shows 2 KSEs and 3 threads from a single process.
73
74 RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
75              \    \____
76               \        \
77    KSEGROUP---thread--thread--thread    (queued in priority order)
78        \                 /
79         \_______________/
80          (last_assigned)
81
82The result of this scheme is that the M available KSEs are always
83queued at the priorities they have inherrited from the M highest priority
84threads for that KSEGROUP. If this situation changes, the KSEs are
85reassigned to keep this true.
86***/
87
88#include <sys/cdefs.h>
89__FBSDID("$FreeBSD: head/sys/kern/kern_switch.c 131508 2004-07-03 00:57:43Z marcel $");
90
91#include "opt_full_preemption.h"
92
93#include <sys/param.h>
94#include <sys/systm.h>
95#include <sys/kernel.h>
96#include <sys/ktr.h>
97#include <sys/lock.h>
98#include <sys/mutex.h>
99#include <sys/proc.h>
100#include <sys/queue.h>
101#include <sys/sched.h>
102#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
103#include <sys/smp.h>
104#endif
105#include <machine/critical.h>
106
107CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
108
109void panc(char *string1, char *string2);
110
111#if 0
112static void runq_readjust(struct runq *rq, struct kse *ke);
113#endif
114/************************************************************************
115 * Functions that manipulate runnability from a thread perspective.	*
116 ************************************************************************/
117/*
118 * Select the KSE that will be run next.  From that find the thread, and
119 * remove it from the KSEGRP's run queue.  If there is thread clustering,
120 * this will be what does it.
121 */
122struct thread *
123choosethread(void)
124{
125	struct kse *ke;
126	struct thread *td;
127	struct ksegrp *kg;
128
129#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
130	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
131		/* Shutting down, run idlethread on AP's */
132		td = PCPU_GET(idlethread);
133		ke = td->td_kse;
134		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
135		ke->ke_flags |= KEF_DIDRUN;
136		TD_SET_RUNNING(td);
137		return (td);
138	}
139#endif
140
141retry:
142	ke = sched_choose();
143	if (ke) {
144		td = ke->ke_thread;
145		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
146		kg = ke->ke_ksegrp;
147		if (td->td_proc->p_flag & P_SA) {
148			if (kg->kg_last_assigned == td) {
149				kg->kg_last_assigned = TAILQ_PREV(td,
150				    threadqueue, td_runq);
151			}
152			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
153		}
154		kg->kg_runnable--;
155		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
156		    td, td->td_priority);
157	} else {
158		/* Simulate runq_choose() having returned the idle thread */
159		td = PCPU_GET(idlethread);
160		ke = td->td_kse;
161		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162	}
163	ke->ke_flags |= KEF_DIDRUN;
164
165	/*
166	 * If we are in panic, only allow system threads,
167	 * plus the one we are running in, to be run.
168	 */
169	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
170	    (td->td_flags & TDF_INPANIC) == 0)) {
171		/* note that it is no longer on the run queue */
172		TD_SET_CAN_RUN(td);
173		goto retry;
174	}
175
176	TD_SET_RUNNING(td);
177	return (td);
178}
179
180/*
181 * Given a surplus KSE, either assign a new runable thread to it
182 * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
183 * Assumes that the original thread is not runnable.
184 */
185void
186kse_reassign(struct kse *ke)
187{
188	struct ksegrp *kg;
189	struct thread *td;
190	struct thread *original;
191
192	mtx_assert(&sched_lock, MA_OWNED);
193	original = ke->ke_thread;
194	KASSERT(original == NULL || TD_IS_INHIBITED(original),
195    	    ("reassigning KSE with runnable thread"));
196	kg = ke->ke_ksegrp;
197	if (original)
198		original->td_kse = NULL;
199
200	/*
201	 * Find the first unassigned thread
202	 */
203	if ((td = kg->kg_last_assigned) != NULL)
204		td = TAILQ_NEXT(td, td_runq);
205	else
206		td = TAILQ_FIRST(&kg->kg_runq);
207
208	/*
209	 * If we found one, assign it the kse, otherwise idle the kse.
210	 */
211	if (td) {
212		kg->kg_last_assigned = td;
213		td->td_kse = ke;
214		ke->ke_thread = td;
215		sched_add(td);
216		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
217		return;
218	}
219
220	ke->ke_state = KES_IDLE;
221	ke->ke_thread = NULL;
222	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
223	kg->kg_idle_kses++;
224	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
225	return;
226}
227
228#if 0
229/*
230 * Remove a thread from its KSEGRP's run queue.
231 * This in turn may remove it from a KSE if it was already assigned
232 * to one, possibly causing a new thread to be assigned to the KSE
233 * and the KSE getting a new priority.
234 */
235static void
236remrunqueue(struct thread *td)
237{
238	struct thread *td2, *td3;
239	struct ksegrp *kg;
240	struct kse *ke;
241
242	mtx_assert(&sched_lock, MA_OWNED);
243	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
244	kg = td->td_ksegrp;
245	ke = td->td_kse;
246	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
247	kg->kg_runnable--;
248	TD_SET_CAN_RUN(td);
249	/*
250	 * If it is not a threaded process, take the shortcut.
251	 */
252	if ((td->td_proc->p_flag & P_SA) == 0) {
253		/* Bring its kse with it, leave the thread attached */
254		sched_rem(td);
255		ke->ke_state = KES_THREAD;
256		return;
257	}
258   	td3 = TAILQ_PREV(td, threadqueue, td_runq);
259	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
260	if (ke) {
261		/*
262		 * This thread has been assigned to a KSE.
263		 * We need to dissociate it and try assign the
264		 * KSE to the next available thread. Then, we should
265		 * see if we need to move the KSE in the run queues.
266		 */
267		sched_rem(td);
268		ke->ke_state = KES_THREAD;
269		td2 = kg->kg_last_assigned;
270		KASSERT((td2 != NULL), ("last assigned has wrong value"));
271		if (td2 == td)
272			kg->kg_last_assigned = td3;
273		kse_reassign(ke);
274	}
275}
276#endif
277
278/*
279 * Change the priority of a thread that is on the run queue.
280 */
281void
282adjustrunqueue( struct thread *td, int newpri)
283{
284	struct ksegrp *kg;
285	struct kse *ke;
286
287	mtx_assert(&sched_lock, MA_OWNED);
288	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
289
290	ke = td->td_kse;
291	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
292	/*
293	 * If it is not a threaded process, take the shortcut.
294	 */
295	if ((td->td_proc->p_flag & P_SA) == 0) {
296		/* We only care about the kse in the run queue. */
297		td->td_priority = newpri;
298		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
299			sched_rem(td);
300			sched_add(td);
301		}
302		return;
303	}
304
305	/* It is a threaded process */
306	kg = td->td_ksegrp;
307	kg->kg_runnable--;
308	TD_SET_CAN_RUN(td);
309	if (ke) {
310		if (kg->kg_last_assigned == td) {
311			kg->kg_last_assigned =
312			    TAILQ_PREV(td, threadqueue, td_runq);
313		}
314		sched_rem(td);
315	}
316	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
317	td->td_priority = newpri;
318	setrunqueue(td);
319}
320
321void
322setrunqueue(struct thread *td)
323{
324	struct kse *ke;
325	struct ksegrp *kg;
326	struct thread *td2;
327	struct thread *tda;
328
329	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
330	mtx_assert(&sched_lock, MA_OWNED);
331	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
332	    ("setrunqueue: bad thread state"));
333	TD_SET_RUNQ(td);
334	kg = td->td_ksegrp;
335	kg->kg_runnable++;
336	if ((td->td_proc->p_flag & P_SA) == 0) {
337		/*
338		 * Common path optimisation: Only one of everything
339		 * and the KSE is always already attached.
340		 * Totally ignore the ksegrp run queue.
341		 */
342		sched_add(td);
343		return;
344	}
345
346	tda = kg->kg_last_assigned;
347	if ((ke = td->td_kse) == NULL) {
348		if (kg->kg_idle_kses) {
349			/*
350			 * There is a free one so it's ours for the asking..
351			 */
352			ke = TAILQ_FIRST(&kg->kg_iq);
353			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
354			ke->ke_state = KES_THREAD;
355			kg->kg_idle_kses--;
356		} else if (tda && (tda->td_priority > td->td_priority)) {
357			/*
358			 * None free, but there is one we can commandeer.
359			 */
360			ke = tda->td_kse;
361			sched_rem(tda);
362			tda->td_kse = NULL;
363			ke->ke_thread = NULL;
364			tda = kg->kg_last_assigned =
365		    	    TAILQ_PREV(tda, threadqueue, td_runq);
366		}
367	} else {
368		/*
369		 * Temporarily disassociate so it looks like the other cases.
370		 */
371		ke->ke_thread = NULL;
372		td->td_kse = NULL;
373	}
374
375	/*
376	 * Add the thread to the ksegrp's run queue at
377	 * the appropriate place.
378	 */
379	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
380		if (td2->td_priority > td->td_priority) {
381			TAILQ_INSERT_BEFORE(td2, td, td_runq);
382			break;
383		}
384	}
385	if (td2 == NULL) {
386		/* We ran off the end of the TAILQ or it was empty. */
387		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
388	}
389
390	/*
391	 * If we have a ke to use, then put it on the run queue and
392	 * If needed, readjust the last_assigned pointer.
393	 */
394	if (ke) {
395		if (tda == NULL) {
396			/*
397			 * No pre-existing last assigned so whoever is first
398			 * gets the KSE we brought in.. (maybe us)
399			 */
400			td2 = TAILQ_FIRST(&kg->kg_runq);
401			KASSERT((td2->td_kse == NULL),
402			    ("unexpected ke present"));
403			td2->td_kse = ke;
404			ke->ke_thread = td2;
405			kg->kg_last_assigned = td2;
406		} else if (tda->td_priority > td->td_priority) {
407			/*
408			 * It's ours, grab it, but last_assigned is past us
409			 * so don't change it.
410			 */
411			td->td_kse = ke;
412			ke->ke_thread = td;
413		} else {
414			/*
415			 * We are past last_assigned, so
416			 * put the new kse on whatever is next,
417			 * which may or may not be us.
418			 */
419			td2 = TAILQ_NEXT(tda, td_runq);
420			kg->kg_last_assigned = td2;
421			td2->td_kse = ke;
422			ke->ke_thread = td2;
423		}
424		sched_add(ke->ke_thread);
425	}
426}
427
428/*
429 * Kernel thread preemption implementation.  Critical sections mark
430 * regions of code in which preemptions are not allowed.
431 */
432void
433critical_enter(void)
434{
435	struct thread *td;
436
437	td = curthread;
438	if (td->td_critnest == 0)
439		cpu_critical_enter();
440	td->td_critnest++;
441}
442
443void
444critical_exit(void)
445{
446	struct thread *td;
447
448	td = curthread;
449	KASSERT(td->td_critnest != 0,
450	    ("critical_exit: td_critnest == 0"));
451	if (td->td_critnest == 1) {
452#ifdef PREEMPTION
453		if (td->td_flags & TDF_OWEPREEMPT) {
454			mtx_lock_spin(&sched_lock);
455			mi_switch(SW_INVOL, NULL);
456			mtx_unlock_spin(&sched_lock);
457		}
458#endif
459		td->td_critnest = 0;
460		cpu_critical_exit();
461	} else {
462		td->td_critnest--;
463	}
464}
465
466/*
467 * This function is called when a thread is about to be put on run queue
468 * because it has been made runnable or its priority has been adjusted.  It
469 * determines if the new thread should be immediately preempted to.  If so,
470 * it switches to it and eventually returns true.  If not, it returns false
471 * so that the caller may place the thread on an appropriate run queue.
472 */
473int
474maybe_preempt(struct thread *td)
475{
476#ifdef PREEMPTION
477	struct thread *ctd;
478	int cpri, pri;
479#endif
480
481	mtx_assert(&sched_lock, MA_OWNED);
482#ifdef PREEMPTION
483	/*
484	 * The new thread should not preempt the current thread if any of the
485	 * following conditions are true:
486	 *
487	 *  - The current thread has a higher (numerically lower) priority.
488	 *  - It is too early in the boot for context switches (cold is set).
489	 *  - The current thread has an inhibitor set or is in the process of
490	 *    exiting.  In this case, the current thread is about to switch
491	 *    out anyways, so there's no point in preempting.  If we did,
492	 *    the current thread would not be properly resumed as well, so
493	 *    just avoid that whole landmine.
494	 *  - If the new thread's priority is not a realtime priority and
495	 *    the current thread's priority is not an idle priority and
496	 *    FULL_PREEMPTION is disabled.
497	 *
498	 * If all of these conditions are false, but the current thread is in
499	 * a nested critical section, then we have to defer the preemption
500	 * until we exit the critical section.  Otherwise, switch immediately
501	 * to the new thread.
502	 */
503	ctd = curthread;
504	pri = td->td_priority;
505	cpri = ctd->td_priority;
506	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
507	    td->td_kse->ke_state != KES_THREAD)
508		return (0);
509#ifndef FULL_PREEMPTION
510	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
511	    !(cpri >= PRI_MIN_IDLE))
512		return (0);
513#endif
514	if (ctd->td_critnest > 1) {
515		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
516		    ctd->td_critnest);
517		ctd->td_flags |= TDF_OWEPREEMPT;
518		return (0);
519	}
520
521	/*
522	 * Our thread state says that we are already on a run queue, so
523	 * update our state as if we had been dequeued by choosethread().
524	 */
525	MPASS(TD_ON_RUNQ(td));
526	TD_SET_RUNNING(td);
527	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
528	    td->td_proc->p_pid, td->td_proc->p_comm);
529	mi_switch(SW_INVOL, td);
530	return (1);
531#else
532	return (0);
533#endif
534}
535
536#ifndef PREEMPTION
537/* XXX: There should be a non-static version of this. */
538static void
539printf_caddr_t(void *data)
540{
541	printf("%s", (char *)data);
542}
543static char preempt_warning[] =
544    "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
545SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
546    preempt_warning)
547#endif
548
549/************************************************************************
550 * SYSTEM RUN QUEUE manipulations and tests				*
551 ************************************************************************/
552/*
553 * Initialize a run structure.
554 */
555void
556runq_init(struct runq *rq)
557{
558	int i;
559
560	bzero(rq, sizeof *rq);
561	for (i = 0; i < RQ_NQS; i++)
562		TAILQ_INIT(&rq->rq_queues[i]);
563}
564
565/*
566 * Clear the status bit of the queue corresponding to priority level pri,
567 * indicating that it is empty.
568 */
569static __inline void
570runq_clrbit(struct runq *rq, int pri)
571{
572	struct rqbits *rqb;
573
574	rqb = &rq->rq_status;
575	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
576	    rqb->rqb_bits[RQB_WORD(pri)],
577	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
578	    RQB_BIT(pri), RQB_WORD(pri));
579	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
580}
581
582/*
583 * Find the index of the first non-empty run queue.  This is done by
584 * scanning the status bits, a set bit indicates a non-empty queue.
585 */
586static __inline int
587runq_findbit(struct runq *rq)
588{
589	struct rqbits *rqb;
590	int pri;
591	int i;
592
593	rqb = &rq->rq_status;
594	for (i = 0; i < RQB_LEN; i++)
595		if (rqb->rqb_bits[i]) {
596			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
597			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
598			    rqb->rqb_bits[i], i, pri);
599			return (pri);
600		}
601
602	return (-1);
603}
604
605/*
606 * Set the status bit of the queue corresponding to priority level pri,
607 * indicating that it is non-empty.
608 */
609static __inline void
610runq_setbit(struct runq *rq, int pri)
611{
612	struct rqbits *rqb;
613
614	rqb = &rq->rq_status;
615	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
616	    rqb->rqb_bits[RQB_WORD(pri)],
617	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
618	    RQB_BIT(pri), RQB_WORD(pri));
619	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
620}
621
622/*
623 * Add the KSE to the queue specified by its priority, and set the
624 * corresponding status bit.
625 */
626void
627runq_add(struct runq *rq, struct kse *ke)
628{
629	struct rqhead *rqh;
630	int pri;
631
632	pri = ke->ke_thread->td_priority / RQ_PPQ;
633	ke->ke_rqindex = pri;
634	runq_setbit(rq, pri);
635	rqh = &rq->rq_queues[pri];
636	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
637	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
638	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
639}
640
641/*
642 * Return true if there are runnable processes of any priority on the run
643 * queue, false otherwise.  Has no side effects, does not modify the run
644 * queue structure.
645 */
646int
647runq_check(struct runq *rq)
648{
649	struct rqbits *rqb;
650	int i;
651
652	rqb = &rq->rq_status;
653	for (i = 0; i < RQB_LEN; i++)
654		if (rqb->rqb_bits[i]) {
655			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
656			    rqb->rqb_bits[i], i);
657			return (1);
658		}
659	CTR0(KTR_RUNQ, "runq_check: empty");
660
661	return (0);
662}
663
664/*
665 * Find the highest priority process on the run queue.
666 */
667struct kse *
668runq_choose(struct runq *rq)
669{
670	struct rqhead *rqh;
671	struct kse *ke;
672	int pri;
673
674	mtx_assert(&sched_lock, MA_OWNED);
675	while ((pri = runq_findbit(rq)) != -1) {
676		rqh = &rq->rq_queues[pri];
677		ke = TAILQ_FIRST(rqh);
678		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
679		CTR3(KTR_RUNQ,
680		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
681		return (ke);
682	}
683	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
684
685	return (NULL);
686}
687
688/*
689 * Remove the KSE from the queue specified by its priority, and clear the
690 * corresponding status bit if the queue becomes empty.
691 * Caller must set ke->ke_state afterwards.
692 */
693void
694runq_remove(struct runq *rq, struct kse *ke)
695{
696	struct rqhead *rqh;
697	int pri;
698
699	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
700		("runq_remove: process swapped out"));
701	pri = ke->ke_rqindex;
702	rqh = &rq->rq_queues[pri];
703	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
704	    ke, ke->ke_thread->td_priority, pri, rqh);
705	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
706	TAILQ_REMOVE(rqh, ke, ke_procq);
707	if (TAILQ_EMPTY(rqh)) {
708		CTR0(KTR_RUNQ, "runq_remove: empty");
709		runq_clrbit(rq, pri);
710	}
711}
712
713#if 0
714void
715panc(char *string1, char *string2)
716{
717	printf("%s", string1);
718	Debugger(string2);
719}
720
721void
722thread_sanity_check(struct thread *td, char *string)
723{
724	struct proc *p;
725	struct ksegrp *kg;
726	struct kse *ke;
727	struct thread *td2 = NULL;
728	unsigned int prevpri;
729	int	saw_lastassigned = 0;
730	int unassigned = 0;
731	int assigned = 0;
732
733	p = td->td_proc;
734	kg = td->td_ksegrp;
735	ke = td->td_kse;
736
737
738	if (ke) {
739		if (p != ke->ke_proc) {
740			panc(string, "wrong proc");
741		}
742		if (ke->ke_thread != td) {
743			panc(string, "wrong thread");
744		}
745	}
746
747	if ((p->p_flag & P_SA) == 0) {
748		if (ke == NULL) {
749			panc(string, "non KSE thread lost kse");
750		}
751	} else {
752		prevpri = 0;
753		saw_lastassigned = 0;
754		unassigned = 0;
755		assigned = 0;
756		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
757			if (td2->td_priority < prevpri) {
758				panc(string, "thread runqueue unosorted");
759			}
760			if ((td2->td_state == TDS_RUNQ) &&
761			    td2->td_kse &&
762			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
763				panc(string, "KSE wrong state");
764			}
765			prevpri = td2->td_priority;
766			if (td2->td_kse) {
767				assigned++;
768				if (unassigned) {
769					panc(string, "unassigned before assigned");
770				}
771 				if  (kg->kg_last_assigned == NULL) {
772					panc(string, "lastassigned corrupt");
773				}
774				if (saw_lastassigned) {
775					panc(string, "last assigned not last");
776				}
777				if (td2->td_kse->ke_thread != td2) {
778					panc(string, "mismatched kse/thread");
779				}
780			} else {
781				unassigned++;
782			}
783			if (td2 == kg->kg_last_assigned) {
784				saw_lastassigned = 1;
785				if (td2->td_kse == NULL) {
786					panc(string, "last assigned not assigned");
787				}
788			}
789		}
790		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
791			panc(string, "where on earth does lastassigned point?");
792		}
793#if 0
794		FOREACH_THREAD_IN_GROUP(kg, td2) {
795			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
796			    (TD_ON_RUNQ(td2))) {
797				assigned++;
798				if (td2->td_kse == NULL) {
799					panc(string, "BOUND thread with no KSE");
800				}
801			}
802		}
803#endif
804#if 0
805		if ((unassigned + assigned) != kg->kg_runnable) {
806			panc(string, "wrong number in runnable");
807		}
808#endif
809	}
810	if (assigned == 12345) {
811		printf("%p %p %p %p %p %d, %d",
812		    td, td2, ke, kg, p, assigned, saw_lastassigned);
813	}
814}
815#endif
816
817