kern_switch.c revision 99072
1/*
2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD: head/sys/kern/kern_switch.c 99072 2002-06-29 17:26:22Z julian $
27 */
28
29/***
30
31Here is the logic..
32
33If there are N processors, then there are at most N KSEs (kernel
34schedulable entities) working to process threads that belong to a
35KSEGOUP (kg). If there are X of these KSEs actually running at the
36moment in question, then there are at most M (N-X) of these KSEs on
37the run queue, as running KSEs are not on the queue.
38
39Runnable threads are queued off the KSEGROUP in priority order.
40If there are M or more threads runnable, the top M threads
41(by priority) are 'preassigned' to the M KSEs not running. The KSEs take
42their priority from those threads and are put on the run queue.
43
44The last thread that had a priority high enough to have a KSE associated
45with it, AND IS ON THE RUN QUEUE is pointed to by
46kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
47assigned as all the available KSEs are activly running, or because there
48are no threads queued, that pointer is NULL.
49
50When a KSE is removed from the run queue to become runnable, we know
51it was associated with the highest priority thread in the queue (at the head
52of the queue). If it is also the last assigned we know M was 1 and must
53now be 0. Since the thread is no longer queued that pointer must be
54removed from it. Since we know there were no more KSEs available,
55(M was 1 and is now 0) and since we are not FREEING our KSE
56but using it, we know there are STILL no more KSEs available, we can prove
57that the next thread in the ksegrp list will not have a KSE to assign to
58it, so we can show that the pointer must be made 'invalid' (NULL).
59
60The pointer exists so that when a new thread is made runnable, it can
61have its priority compared with the last assigned thread to see if
62it should 'steal' its KSE or not.. i.e. is it 'earlier'
63on the list than that thread or later.. If it's earlier, then the KSE is
64removed from the last assigned (which is now not assigned a KSE)
65and reassigned to the new thread, which is placed earlier in the list.
66The pointer is then backed up to the previous thread (which may or may not
67be the new thread).
68
69When a thread sleeps or is removed, the KSE becomes available and if there
70are queued threads that are not assigned KSEs, the highest priority one of
71them is assigned the KSE, which is then placed back on the run queue at
72the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73to point to it.
74
75The following diagram shows 2 KSEs and 3 threads from a single process.
76
77 RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
78              \    \____
79               \        \
80    KSEGROUP---thread--thread--thread    (queued in priority order)
81        \                 /
82         \_______________/
83          (last_assigned)
84
85The result of this scheme is that the M available KSEs are always
86queued at the priorities they have inherrited from the M highest priority
87threads for that KSEGROUP. If this situation changes, the KSEs are
88reassigned to keep this true.
89
90*/
91
92#include <sys/param.h>
93#include <sys/systm.h>
94#include <sys/kernel.h>
95#include <sys/ktr.h>
96#include <sys/lock.h>
97#include <sys/mutex.h>
98#include <sys/proc.h>
99#include <sys/queue.h>
100#include <machine/critical.h>
101
102CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
103
104/*
105 * Global run queue.
106 */
107static struct runq runq;
108SYSINIT(runq, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, runq_init, &runq)
109
110static void runq_readjust(struct runq *rq, struct kse *ke);
111/************************************************************************
112 * Functions that manipulate runnability from a thread perspective.	*
113 ************************************************************************/
114
115/*
116 * Select the KSE that will be run next.  From that find the thread, and x
117 * remove it from the KSEGRP's run queue.  If there is thread clustering,
118 * this will be what does it.
119 */
120struct thread *
121choosethread(void)
122{
123	struct kse *ke;
124	struct thread *td;
125	struct ksegrp *kg;
126
127	if ((ke = runq_choose(&runq))) {
128		td = ke->ke_thread;
129		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
130		kg = ke->ke_ksegrp;
131		if (td->td_flags & TDF_UNBOUND) {
132			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
133			if (kg->kg_last_assigned == td)
134				if (TAILQ_PREV(td, threadqueue, td_runq)
135				    != NULL)
136					printf("Yo MAMA!\n");
137				kg->kg_last_assigned = TAILQ_PREV(td,
138				    threadqueue, td_runq);
139			/*
140			 *  If we have started running an upcall,
141			 * Then TDF_UNBOUND WAS set because the thread was
142			 * created without a KSE. Now that we have one,
143			 * and it is our time to run, we make sure
144			 * that BOUND semantics apply for the rest of
145			 * the journey to userland, and into the UTS.
146			 */
147#ifdef	NOTYET
148			if (td->td_flags & TDF_UPCALLING)
149				tdf->td_flags &= ~TDF_UNBOUND;
150#endif
151		}
152		kg->kg_runnable--;
153		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
154		    td, td->td_priority);
155	} else {
156		/* Pretend the idle thread was on the run queue. */
157		td = PCPU_GET(idlethread);
158		/* Simulate that it was on the run queue */
159		td->td_state = TDS_RUNQ;
160		td->td_kse->ke_state = KES_UNQUEUED;
161		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
162	}
163	thread_sanity_check(td);
164	return (td);
165}
166
167/*
168 * Given a KSE (now surplus), either assign a new runable thread to it
169 * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
170 * Assumes the kse is not linked to any threads any more. (has been cleaned).
171 */
172void
173kse_reassign(struct kse *ke)
174{
175	struct ksegrp *kg;
176	struct thread *td;
177
178	kg = ke->ke_ksegrp;
179
180KASSERT((ke->ke_state != KES_ONRUNQ), ("kse_reassigning non-free kse"));
181	/*
182	 * Find the first unassigned thread
183	 * If there is a 'last assigned' then see what's next.
184	 * otherwise look at what is first.
185	 */
186	if ((td = kg->kg_last_assigned)) {
187		td = TAILQ_NEXT(td, td_runq);
188	} else {
189		td = TAILQ_FIRST(&kg->kg_runq);
190	}
191
192	/*
193	 * If we found one assign it the kse, otherwise idle the kse.
194	 */
195	if (td) {
196		thread_sanity_check(td);
197		kg->kg_last_assigned = td;
198		td->td_kse = ke;
199		ke->ke_thread = td;
200		runq_add(&runq, ke);
201		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
202	} else {
203		KASSERT((ke->ke_state != KES_IDLE), ("kse already idle"));
204KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
205		ke->ke_state = KES_IDLE;
206		ke->ke_thread = NULL;
207		TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
208		kg->kg_idle_kses++;
209		CTR1(KTR_RUNQ, "kse_reassign: ke%p idled", ke);
210KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
211	}
212}
213
214int
215kserunnable(void)
216{
217	return runq_check(&runq);
218}
219
220/*
221 * Remove a thread from its KSEGRP's run queue.
222 * This in turn may remove it from a KSE if it was already assigned
223 * to one, possibly causing a new thread to be assigned to the KSE
224 * and the KSE getting a new priority (unless it's a BOUND thread/KSE pair).
225 */
226void
227remrunqueue(struct thread *td)
228{
229	struct thread *td2, *td3;
230	struct ksegrp *kg;
231	struct kse *ke;
232
233	mtx_assert(&sched_lock, MA_OWNED);
234	thread_sanity_check(td);
235	KASSERT ((td->td_state == TDS_RUNQ),
236		("remrunqueue: Bad state on run queue"));
237	kg = td->td_ksegrp;
238	ke = td->td_kse;
239	/*
240	 * If it's a bound thread/KSE pair, take the shortcut. All non-KSE
241	 * threads are BOUND.
242	 */
243	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
244	td->td_state = TDS_UNQUEUED;
245	kg->kg_runnable--;
246	if ((td->td_flags & TDF_UNBOUND) == 0)  {
247		/* Bring its kse with it, leave the thread attached */
248		runq_remove(&runq, ke);
249		ke->ke_state = KES_UNQUEUED;
250		return;
251	}
252	if (ke) {
253		/*
254		 * This thread has been assigned to a KSE.
255		 * We need to dissociate it and try assign the
256		 * KSE to the next available thread. Then, we should
257		 * see if we need to move the KSE in the run queues.
258		 */
259		td2 = kg->kg_last_assigned;
260		KASSERT((td2 != NULL), ("last assigned has wrong value "));
261		td->td_kse = NULL;
262		if ((td3 = TAILQ_NEXT(td2, td_runq))) {
263			KASSERT(td3 != td, ("td3 somehow matched td"));
264			/*
265			 * Give the next unassigned thread to the KSE
266			 * so the number of runnable KSEs remains
267			 * constant.
268			 */
269			td3->td_kse = ke;
270			ke->ke_thread = td3;
271			kg->kg_last_assigned = td3;
272			runq_readjust(&runq, ke);
273		} else {
274			/*
275			 * There is no unassigned thread.
276			 * If we were the last assigned one,
277			 * adjust the last assigned pointer back
278			 * one, which may result in NULL.
279			 */
280			if (td == td2) {
281				kg->kg_last_assigned =
282				    TAILQ_PREV(td, threadqueue, td_runq);
283			}
284			runq_remove(&runq, ke);
285KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
286			KASSERT((ke->ke_state != KES_IDLE),
287			    ("kse already idle"));
288			ke->ke_state = KES_IDLE;
289			ke->ke_thread = NULL;
290KASSERT((TAILQ_FIRST(&kg->kg_iq) != ke), ("really bad screwup"));
291			TAILQ_INSERT_HEAD(&kg->kg_iq, ke, ke_kgrlist);
292			kg->kg_idle_kses++;
293KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self2!"));
294		}
295	}
296	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
297	thread_sanity_check(td);
298}
299
300#if 1 /* use the first version */
301
302void
303setrunqueue(struct thread *td)
304{
305	struct kse *ke;
306	struct ksegrp *kg;
307	struct thread *td2;
308	struct thread *tda;
309
310	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
311	mtx_assert(&sched_lock, MA_OWNED);
312	thread_sanity_check(td);
313	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
314	td->td_state = TDS_RUNQ;
315	kg = td->td_ksegrp;
316	kg->kg_runnable++;
317	if ((td->td_flags & TDF_UNBOUND) == 0) {
318		KASSERT((td->td_kse != NULL),
319		    ("queueing BAD thread to run queue"));
320		/*
321		 * Common path optimisation: Only one of everything
322		 * and the KSE is always already attached.
323		 * Totally ignore the ksegrp run queue.
324		 */
325		runq_add(&runq, td->td_kse);
326		return;
327	}
328	/*
329	 * Ok, so we are threading with this thread.
330	 * We don't have a KSE, see if we can get one..
331	 */
332	tda = kg->kg_last_assigned;
333	if ((ke = td->td_kse) == NULL) {
334		/*
335		 * We will need a KSE, see if there is one..
336		 * First look for a free one, before getting desperate.
337		 * If we can't get one, our priority is not high enough..
338		 * that's ok..
339		 */
340		if (kg->kg_idle_kses) {
341			/*
342			 * There is a free one so it's ours for the asking..
343			 */
344			ke = TAILQ_FIRST(&kg->kg_iq);
345KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self3!"));
346			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
347			ke->ke_state = KES_UNQUEUED;
348			kg->kg_idle_kses--;
349KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self4!"));
350		} else if (tda && (tda->td_priority > td->td_priority)) {
351			/*
352			 * None free, but there is one we can commandeer.
353			 */
354			ke = tda->td_kse;
355			tda->td_kse = NULL;
356			ke->ke_thread = NULL;
357			tda = kg->kg_last_assigned =
358		    	    TAILQ_PREV(tda, threadqueue, td_runq);
359			runq_remove(&runq, ke);
360KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self5!"));
361		}
362	} else {
363		KASSERT(ke->ke_thread == td, ("KSE/thread mismatch"));
364		KASSERT(ke->ke_state != KES_IDLE, ("KSE unexpectedly idle"));
365		ke->ke_thread = NULL;
366		td->td_kse = NULL;
367	}
368
369	/*
370	 * Add the thread to the ksegrp's run queue at
371	 * the appropriate place.
372	 */
373	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
374		if (td2->td_priority > td->td_priority) {
375			TAILQ_INSERT_BEFORE(td2, td, td_runq);
376			break;
377		}
378	}
379	if (td2 == NULL) {
380		/* We ran off the end of the TAILQ or it was empty. */
381		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
382	}
383
384	/*
385	 * If we have a ke to use, then put it on the run queue and
386	 * If needed, readjust the last_assigned pointer.
387	 */
388	if (ke) {
389		if (tda == NULL) {
390			/*
391			 * No pre-existing last assigned so whoever is first
392			 * gets the KSE we borught in.. (may be us)
393			 */
394			td2 = TAILQ_FIRST(&kg->kg_runq);
395			KASSERT((td2->td_kse == NULL),
396			    ("unexpected ke present"));
397			td2->td_kse = ke;
398			ke->ke_thread = td2;
399			kg->kg_last_assigned = td2;
400		} else if (tda->td_priority > td->td_priority) {
401			/*
402			 * It's ours, grab it, but last_assigned is past us
403			 * so don't change it.
404			 */
405			td->td_kse = ke;
406			ke->ke_thread = td;
407		} else {
408			/*
409			 * We are past last_assigned, so
410			 * put the new kse on whatever is next,
411			 * which may or may not be us.
412			 */
413			td2 = TAILQ_NEXT(tda, td_runq);
414			kg->kg_last_assigned = td2;
415			td2->td_kse = ke;
416			ke->ke_thread = td2;
417		}
418		runq_add(&runq, ke);
419	}
420	thread_sanity_check(td);
421}
422
423#else
424
425void
426setrunqueue(struct thread *td)
427{
428	struct kse *ke;
429	struct ksegrp *kg;
430	struct thread *td2;
431
432	CTR1(KTR_RUNQ, "setrunqueue: td%p", td);
433	KASSERT((td->td_state != TDS_RUNQ), ("setrunqueue: bad thread state"));
434	td->td_state = TDS_RUNQ;
435	kg = td->td_ksegrp;
436	kg->kg_runnable++;
437	if ((td->td_flags & TDF_UNBOUND) == 0) {
438		/*
439		 * Common path optimisation: Only one of everything
440		 * and the KSE is always already attached.
441		 * Totally ignore the ksegrp run queue.
442		 */
443		runq_add(&runq, td->td_kse);
444		return;
445	}
446	/*
447	 * First add the thread to the ksegrp's run queue at
448	 * the appropriate place.
449	 */
450	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
451		if (td2->td_priority > td->td_priority) {
452			TAILQ_INSERT_BEFORE(td2, td, td_runq);
453			break;
454		}
455	}
456	if (td2 == NULL) {
457		/* We ran off the end of the TAILQ or it was empty. */
458		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
459	}
460
461	/*
462	 * The following could be achieved by simply doing:
463	 * td->td_kse = NULL; kse_reassign(ke);
464	 * but I felt that I'd try do it inline here.
465	 * All this work may not be worth it.
466	 */
467	if ((ke = td->td_kse)) { /* XXXKSE */
468		/*
469		 * We have a KSE already. See whether we can keep it
470		 * or if we need to give it to someone else.
471		 * Either way it will need to be inserted into
472		 * the runq. kse_reassign() will do this as will runq_add().
473		 */
474		if ((kg->kg_last_assigned) &&
475		   (kg->kg_last_assigned->td_priority > td->td_priority)) {
476			/*
477			 * We can definitly keep the KSE
478			 * as the "last assignead thread" has
479			 * less priority than we do.
480			 * The "last assigned" pointer stays the same.
481			 */
482			runq_add(&runq, ke);
483			return;
484
485		}
486		/*
487		 * Give it to the correct thread,
488		 * which may be (often is) us, but may not be.
489		 */
490		td->td_kse = NULL;
491		kse_reassign(ke);
492		return;
493	}
494	/*
495	 * There are two cases where KSE adjustment is needed.
496	 * Usurpation of an already assigned KSE, and assignment
497	 * of a previously IDLE KSE.
498	 */
499	if (kg->kg_idle_kses) {
500		/*
501		 * If there are unassigned KSEs then we definitly
502		 * will be assigned one from the idle KSE list.
503		 * If we are the last, we should get the "last
504		 * assigned" pointer set to us as well.
505		 */
506		ke = TAILQ_FIRST(&kg->kg_iq);
507KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
508		TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
509		ke->ke_state = KES_UNQUEUED;
510		kg->kg_idle_kses--;
511KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
512		ke->ke_thread = td;
513		td->td_kse = ke;
514		runq_add(&runq, ke);
515KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
516		if (TAILQ_NEXT(td, td_runq) == NULL) {
517			kg->kg_last_assigned = td;
518		}
519	} else if (kg->kg_last_assigned &&
520		(kg->kg_last_assigned->td_priority > td->td_priority)) {
521		/*
522		 * If there were none last-assigned, all KSEs
523		 * are actually out running as we speak.
524		 * If there was a last assigned, but we didn't see it,
525		 * we must be inserting before it, so take the KSE from
526		 * the last assigned, and back it up one entry. Then,
527		 * assign the KSE to the new thread and adjust its priority.
528		 */
529		td2 = kg->kg_last_assigned;
530		ke = td2->td_kse;
531KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
532		kg->kg_last_assigned =
533		    TAILQ_PREV(td2, threadqueue, td_runq);
534		td2->td_kse = NULL;
535		td->td_kse = ke;
536		ke->ke_thread = td;
537		runq_readjust(&runq, ke);
538KASSERT((ke->ke_kgrlist.tqe_next != ke), ("linked to self!"));
539	}
540}
541#endif
542
543/************************************************************************
544 * Critical section marker functions					*
545 ************************************************************************/
546/* Critical sections that prevent preemption. */
547void
548critical_enter(void)
549{
550	struct thread *td;
551
552	td = curthread;
553	if (td->td_critnest == 0)
554		cpu_critical_enter();
555	td->td_critnest++;
556}
557
558void
559critical_exit(void)
560{
561	struct thread *td;
562
563	td = curthread;
564	if (td->td_critnest == 1) {
565		td->td_critnest = 0;
566		cpu_critical_exit();
567	} else {
568		td->td_critnest--;
569	}
570}
571
572
573/************************************************************************
574 * SYSTEM RUN QUEUE manipulations and tests				*
575 ************************************************************************/
576/*
577 * Initialize a run structure.
578 */
579void
580runq_init(struct runq *rq)
581{
582	int i;
583
584	bzero(rq, sizeof *rq);
585	for (i = 0; i < RQ_NQS; i++)
586		TAILQ_INIT(&rq->rq_queues[i]);
587}
588
589/*
590 * Clear the status bit of the queue corresponding to priority level pri,
591 * indicating that it is empty.
592 */
593static __inline void
594runq_clrbit(struct runq *rq, int pri)
595{
596	struct rqbits *rqb;
597
598	rqb = &rq->rq_status;
599	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
600	    rqb->rqb_bits[RQB_WORD(pri)],
601	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
602	    RQB_BIT(pri), RQB_WORD(pri));
603	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
604}
605
606/*
607 * Find the index of the first non-empty run queue.  This is done by
608 * scanning the status bits, a set bit indicates a non-empty queue.
609 */
610static __inline int
611runq_findbit(struct runq *rq)
612{
613	struct rqbits *rqb;
614	int pri;
615	int i;
616
617	rqb = &rq->rq_status;
618	for (i = 0; i < RQB_LEN; i++)
619		if (rqb->rqb_bits[i]) {
620			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
621			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
622			    rqb->rqb_bits[i], i, pri);
623			return (pri);
624		}
625
626	return (-1);
627}
628
629/*
630 * Set the status bit of the queue corresponding to priority level pri,
631 * indicating that it is non-empty.
632 */
633static __inline void
634runq_setbit(struct runq *rq, int pri)
635{
636	struct rqbits *rqb;
637
638	rqb = &rq->rq_status;
639	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
640	    rqb->rqb_bits[RQB_WORD(pri)],
641	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
642	    RQB_BIT(pri), RQB_WORD(pri));
643	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
644}
645
646/*
647 * Add the KSE to the queue specified by its priority, and set the
648 * corresponding status bit.
649 */
650void
651runq_add(struct runq *rq, struct kse *ke)
652{
653	struct rqhead *rqh;
654	int pri;
655
656	mtx_assert(&sched_lock, MA_OWNED);
657	KASSERT((ke->ke_thread != NULL), ("runq_add: No thread on KSE"));
658	KASSERT((ke->ke_thread->td_kse != NULL), ("runq_add: No KSE on thread"));
659	if (ke->ke_state == KES_ONRUNQ)
660		return;
661#if defined(INVARIANTS) && defined(DIAGNOSTIC)
662	KASSERT(ke->ke_state != KES_ONRUNQ,
663	    ("runq_add: kse %p (%s) already in run queue", ke,
664	    ke->ke_proc->p_comm));
665#endif
666	pri = ke->ke_thread->td_priority / RQ_PPQ;
667	ke->ke_rqindex = pri;
668	runq_setbit(rq, pri);
669	rqh = &rq->rq_queues[pri];
670	CTR4(KTR_RUNQ, "runq_add: p=%p pri=%d %d rqh=%p",
671	    ke->ke_proc, ke->ke_thread->td_priority, pri, rqh);
672	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
673	ke->ke_ksegrp->kg_runq_kses++;
674	ke->ke_state = KES_ONRUNQ;
675}
676
677/*
678 * Return true if there are runnable processes of any priority on the run
679 * queue, false otherwise.  Has no side effects, does not modify the run
680 * queue structure.
681 */
682int
683runq_check(struct runq *rq)
684{
685	struct rqbits *rqb;
686	int i;
687
688	rqb = &rq->rq_status;
689	for (i = 0; i < RQB_LEN; i++)
690		if (rqb->rqb_bits[i]) {
691			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
692			    rqb->rqb_bits[i], i);
693			return (1);
694		}
695	CTR0(KTR_RUNQ, "runq_check: empty");
696
697	return (0);
698}
699
700/*
701 * Find and remove the highest priority process from the run queue.
702 * If there are no runnable processes, the per-cpu idle process is
703 * returned.  Will not return NULL under any circumstances.
704 */
705struct kse *
706runq_choose(struct runq *rq)
707{
708	struct rqhead *rqh;
709	struct kse *ke;
710	int pri;
711
712	mtx_assert(&sched_lock, MA_OWNED);
713	while ((pri = runq_findbit(rq)) != -1) {
714		rqh = &rq->rq_queues[pri];
715		ke = TAILQ_FIRST(rqh);
716		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
717		CTR3(KTR_RUNQ,
718		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
719KASSERT(ke->ke_procq.tqe_prev != NULL, ("no prev"));
720if (ke->ke_procq.tqe_next)
721	KASSERT(ke->ke_procq.tqe_next->ke_procq.tqe_prev != NULL, ("no next"));
722		TAILQ_REMOVE(rqh, ke, ke_procq);
723		ke->ke_ksegrp->kg_runq_kses--;
724		if (TAILQ_EMPTY(rqh)) {
725			CTR0(KTR_RUNQ, "runq_choose: empty");
726			runq_clrbit(rq, pri);
727		}
728
729		ke->ke_state = KES_RUNNING;
730		KASSERT((ke->ke_thread != NULL),
731		    ("runq_choose: No thread on KSE"));
732		KASSERT((ke->ke_thread->td_kse != NULL),
733		    ("runq_choose: No KSE on thread"));
734		return (ke);
735	}
736	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
737
738	return (NULL);
739}
740
741/*
742 * Remove the KSE from the queue specified by its priority, and clear the
743 * corresponding status bit if the queue becomes empty.
744 * Caller must set ke->ke_state afterwards.
745 */
746void
747runq_remove(struct runq *rq, struct kse *ke)
748{
749	struct rqhead *rqh;
750	int pri;
751
752	KASSERT((ke->ke_state == KES_ONRUNQ), ("KSE not on run queue"));
753	mtx_assert(&sched_lock, MA_OWNED);
754	pri = ke->ke_rqindex;
755	rqh = &rq->rq_queues[pri];
756	CTR4(KTR_RUNQ, "runq_remove: p=%p pri=%d %d rqh=%p",
757	    ke, ke->ke_thread->td_priority, pri, rqh);
758	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
759	TAILQ_REMOVE(rqh, ke, ke_procq);
760	if (TAILQ_EMPTY(rqh)) {
761		CTR0(KTR_RUNQ, "runq_remove: empty");
762		runq_clrbit(rq, pri);
763	}
764	ke->ke_state = KES_UNQUEUED;
765	ke->ke_ksegrp->kg_runq_kses--;
766}
767
768static void
769runq_readjust(struct runq *rq, struct kse *ke)
770{
771
772	if (ke->ke_rqindex != (ke->ke_thread->td_priority / RQ_PPQ)) {
773		runq_remove(rq, ke);
774		runq_add(rq, ke);
775	}
776}
777
778void
779thread_sanity_check(struct thread *td)
780{
781	struct proc *p;
782	struct ksegrp *kg;
783	struct kse *ke;
784	struct thread *td2;
785	unsigned int prevpri;
786	int	saw_lastassigned;
787	int unassigned;
788	int assigned;
789
790	p = td->td_proc;
791	kg = td->td_ksegrp;
792	ke = td->td_kse;
793
794	if (kg != &p->p_ksegrp) {
795		panic ("wrong ksegrp");
796	}
797
798	if (ke) {
799		if (ke != &p->p_kse) {
800			panic("wrong kse");
801		}
802		if (ke->ke_thread != td) {
803			panic("wrong thread");
804		}
805	}
806
807	if ((p->p_flag & P_KSES) == 0) {
808		if (ke == NULL) {
809			panic("non KSE thread lost kse");
810		}
811	} else {
812		prevpri = 0;
813		saw_lastassigned = 0;
814		unassigned = 0;
815		assigned = 0;
816		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
817			if (td2->td_priority < prevpri) {
818				panic("thread runqueue unosorted");
819			}
820			prevpri = td2->td_priority;
821			if (td2->td_kse) {
822				assigned++;
823				if (unassigned) {
824					panic("unassigned before assigned");
825				}
826 				if  (kg->kg_last_assigned == NULL) {
827					panic("lastassigned corrupt");
828				}
829				if (saw_lastassigned) {
830					panic("last assigned not last");
831				}
832				if (td2->td_kse->ke_thread != td2) {
833					panic("mismatched kse/thread");
834				}
835			} else {
836				unassigned++;
837			}
838			if (td2 == kg->kg_last_assigned) {
839				saw_lastassigned = 1;
840				if (td2->td_kse == NULL) {
841					panic("last assigned not assigned");
842				}
843			}
844		}
845		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
846			panic("where on earth does lastassigned point?");
847		}
848		FOREACH_THREAD_IN_GROUP(kg, td2) {
849			if (((td2->td_flags & TDF_UNBOUND) == 0) &&
850			    (td2->td_state == TDS_RUNQ)) {
851				assigned++;
852				if (td2->td_kse == NULL) {
853					panic ("BOUND thread with no KSE");
854				}
855			}
856		}
857#if 0
858		if ((unassigned + assigned) != kg->kg_runnable) {
859			panic("wrong number in runnable");
860		}
861#endif
862	}
863}
864
865