kern_switch.c revision 67365
150027Speter/*
250027Speter * Copyright (c) 1999 Peter Wemm <peter@FreeBSD.org>
350027Speter * All rights reserved.
450027Speter *
550027Speter * Redistribution and use in source and binary forms, with or without
650027Speter * modification, are permitted provided that the following conditions
750027Speter * are met:
850027Speter * 1. Redistributions of source code must retain the above copyright
950027Speter *    notice, this list of conditions and the following disclaimer.
1050027Speter * 2. Redistributions in binary form must reproduce the above copyright
1150027Speter *    notice, this list of conditions and the following disclaimer in the
1250027Speter *    documentation and/or other materials provided with the distribution.
1350027Speter *
1450027Speter * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550027Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650027Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750027Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850027Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950027Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050027Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150027Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250027Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350027Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450027Speter * SUCH DAMAGE.
2550027Speter *
2650027Speter * $FreeBSD: head/sys/kern/kern_switch.c 67365 2000-10-20 07:58:15Z jhb $
2750027Speter */
2850027Speter
2950027Speter#include <sys/param.h>
3050027Speter#include <sys/systm.h>
3150027Speter#include <sys/kernel.h>
3265557Sjasone#include <sys/ktr.h>
3367365Sjhb#include <sys/mutex.h>
3450027Speter#include <sys/proc.h>
3550027Speter#include <sys/rtprio.h>
3650027Speter#include <sys/queue.h>
3750027Speter
3850027Speter/*
3950027Speter * We have NQS (32) run queues per scheduling class.  For the normal
4050027Speter * class, there are 128 priorities scaled onto these 32 queues.  New
4150027Speter * processes are added to the last entry in each queue, and processes
4250027Speter * are selected for running by taking them from the head and maintaining
4365557Sjasone * a simple FIFO arrangement.
4465557Sjasone *
4565557Sjasone * Interrupt, real time and idle priority processes have and explicit
4665557Sjasone * 0-31 priority which maps directly onto their class queue index.
4765557Sjasone * When a queue has something in it, the corresponding bit is set in
4865557Sjasone * the queuebits variable, allowing a single read to determine the
4965557Sjasone * state of all 32 queues and then a ffs() to find the first busy
5050027Speter * queue.
5165557Sjasone *
5265557Sjasone * XXX This needs fixing.  First, we only have one idle process, so we
5365557Sjasone * hardly need 32 queues for it.  Secondly, the number of classes
5465557Sjasone * makes things unwieldy.  We should be able to merge them into a
5565557Sjasone * single 96 or 128 entry queue.
5650027Speter */
5765557Sjasonestruct rq itqueues[NQS];		/* interrupt threads */
5865557Sjasonestruct rq rtqueues[NQS];		/* real time processes */
5965557Sjasonestruct rq queues[NQS];			/* time sharing processes */
6065557Sjasonestruct rq idqueues[NQS];		/* idle process */
6165557Sjasoneu_int32_t itqueuebits;
6265557Sjasoneu_int32_t rtqueuebits;
6350027Speteru_int32_t queuebits;
6450027Speteru_int32_t idqueuebits;
6550027Speter
6650027Speter/*
6750027Speter * Initialize the run queues at boot time.
6850027Speter */
6950027Speterstatic void
7050027Speterrqinit(void *dummy)
7150027Speter{
7250027Speter	int i;
7350027Speter
7450027Speter	for (i = 0; i < NQS; i++) {
7565557Sjasone		TAILQ_INIT(&itqueues[i]);
7665557Sjasone		TAILQ_INIT(&rtqueues[i]);
7750027Speter		TAILQ_INIT(&queues[i]);
7850027Speter		TAILQ_INIT(&idqueues[i]);
7950027Speter	}
8050027Speter}
8150027SpeterSYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
8250027Speter
8350027Speter/*
8450027Speter * setrunqueue() examines a process priority and class and inserts it on
8550027Speter * the tail of it's appropriate run queue (based on class and priority).
8650027Speter * This sets the queue busy bit.
8750027Speter * The process must be runnable.
8850027Speter * This must be called at splhigh().
8950027Speter */
9050027Spetervoid
9150027Spetersetrunqueue(struct proc *p)
9250027Speter{
9350027Speter	struct rq *q;
9450027Speter	u_int8_t pri;
9550027Speter
9665557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
9765557Sjasone	KASSERT(p->p_stat == SRUN, ("setrunqueue: proc %p (%s) not SRUN", p, \
9865557Sjasone	    p->p_comm));
9965557Sjasone
10065557Sjasone	/*
10165557Sjasone	 * Decide which class we want to run.  We now have four
10265557Sjasone	 * queues, and this is becoming ugly.  We should be able to
10365557Sjasone	 * collapse the first three classes into a single contiguous
10465557Sjasone	 * queue.  XXX FIXME.
10565557Sjasone	 */
10665764Sjhb	CTR4(KTR_PROC, "setrunqueue: proc %p (pid %d, %s), schedlock %lx",
10765764Sjhb		p, p->p_pid, p->p_comm, (long)sched_lock.mtx_lock);
10865557Sjasone	if (p->p_rtprio.type == RTP_PRIO_ITHREAD) {	/* interrupt thread */
10965557Sjasone		pri = p->p_rtprio.prio;
11065557Sjasone		q = &itqueues[pri];
11165557Sjasone		itqueuebits |= 1 << pri;
11265557Sjasone	} else if (p->p_rtprio.type == RTP_PRIO_REALTIME || /* real time */
11350056Speter		   p->p_rtprio.type == RTP_PRIO_FIFO) {
11450027Speter		pri = p->p_rtprio.prio;
11550027Speter		q = &rtqueues[pri];
11650027Speter		rtqueuebits |= 1 << pri;
11765557Sjasone	} else if (p->p_rtprio.type == RTP_PRIO_NORMAL) {   /* time sharing */
11865557Sjasone		pri = p->p_priority >> 2;
11965557Sjasone		q = &queues[pri];
12065557Sjasone		queuebits |= 1 << pri;
12165557Sjasone	} else if (p->p_rtprio.type == RTP_PRIO_IDLE) {	    /* idle proc */
12250027Speter		pri = p->p_rtprio.prio;
12350027Speter		q = &idqueues[pri];
12450027Speter		idqueuebits |= 1 << pri;
12550027Speter	} else {
12665557Sjasone		panic("setrunqueue: invalid rtprio type %d", p->p_rtprio.type);
12750027Speter	}
12850027Speter	p->p_rqindex = pri;		/* remember the queue index */
12950027Speter	TAILQ_INSERT_TAIL(q, p, p_procq);
13050027Speter}
13150027Speter
13250027Speter/*
13350027Speter * remrunqueue() removes a given process from the run queue that it is on,
13450027Speter * clearing the queue busy bit if it becomes empty.
13550027Speter * This must be called at splhigh().
13650027Speter */
13750027Spetervoid
13850027Speterremrunqueue(struct proc *p)
13950027Speter{
14050027Speter	struct rq *q;
14150027Speter	u_int32_t *which;
14250027Speter	u_int8_t pri;
14350027Speter
14465764Sjhb	CTR4(KTR_PROC, "remrunqueue: proc %p (pid %d, %s), schedlock %lx",
14565764Sjhb		p, p->p_pid, p->p_comm, (long)sched_lock.mtx_lock);
14665557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
14750027Speter	pri = p->p_rqindex;
14865557Sjasone	if (p->p_rtprio.type == RTP_PRIO_ITHREAD) {
14965557Sjasone		q = &itqueues[pri];
15065557Sjasone		which = &itqueuebits;
15150056Speter	} else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
15250056Speter		   p->p_rtprio.type == RTP_PRIO_FIFO) {
15350027Speter		q = &rtqueues[pri];
15450027Speter		which = &rtqueuebits;
15565557Sjasone	} else if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
15665557Sjasone		q = &queues[pri];
15765557Sjasone		which = &queuebits;
15850056Speter	} else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
15950027Speter		q = &idqueues[pri];
16050027Speter		which = &idqueuebits;
16150027Speter	} else {
16250027Speter		panic("remrunqueue: invalid rtprio type");
16350027Speter	}
16450027Speter	TAILQ_REMOVE(q, p, p_procq);
16550027Speter	if (TAILQ_EMPTY(q)) {
16650027Speter		KASSERT((*which & (1 << pri)) != 0,
16750027Speter			("remrunqueue: remove from empty queue"));
16850027Speter		*which &= ~(1 << pri);
16950027Speter	}
17050027Speter}
17150027Speter
17250027Speter/*
17350027Speter * procrunnable() returns a boolean true (non-zero) value if there are
17450027Speter * any runnable processes.  This is intended to be called from the idle
17550027Speter * loop to avoid the more expensive (and destructive) chooseproc().
17658717Sdillon *
17758717Sdillon * MP SAFE.  CALLED WITHOUT THE MP LOCK
17865557Sjasone *
17965557Sjasone * XXX I doubt this.  It's possibly fail-safe, but there's obviously
18065557Sjasone * the case here where one of the bits words gets loaded, the
18165557Sjasone * processor gets preempted, and by the time it returns from this
18265557Sjasone * function, some other processor has picked the runnable process.
18365557Sjasone * What am I missing?  (grog, 23 July 2000).
18450027Speter */
18550027Speteru_int32_t
18650027Speterprocrunnable(void)
18750027Speter{
18865557Sjasone	return (itqueuebits || rtqueuebits || queuebits || idqueuebits);
18950027Speter}
19050027Speter
19150027Speter/*
19250027Speter * chooseproc() selects the next process to run.  Ideally, cpu_switch()
19350027Speter * would have determined that there is a process available before calling
19450027Speter * this, but it is not a requirement.  The selected process is removed
19550027Speter * from it's queue, and the queue busy bit is cleared if it becomes empty.
19650027Speter * This must be called at splhigh().
19750027Speter *
19850027Speter * For SMP, trivial affinity is implemented by locating the first process
19950027Speter * on the queue that has a matching lastcpu id.  Since normal priorities
20050027Speter * are mapped four priority levels per queue, this may allow the cpu to
20150027Speter * choose a slightly lower priority process in order to preserve the cpu
20250027Speter * caches.
20350027Speter */
20450027Speterstruct proc *
20550027Speterchooseproc(void)
20650027Speter{
20750027Speter	struct proc *p;
20850027Speter	struct rq *q;
20950027Speter	u_int32_t *which;
21050027Speter	u_int32_t pri;
21150027Speter#ifdef SMP
21250027Speter	u_char id;
21350027Speter#endif
21450027Speter
21565557Sjasone	mtx_assert(&sched_lock, MA_OWNED);
21665557Sjasone	if (itqueuebits) {
21765557Sjasone		pri = ffs(itqueuebits) - 1;
21865557Sjasone		q = &itqueues[pri];
21965557Sjasone		which = &itqueuebits;
22065557Sjasone	} else if (rtqueuebits) {
22150027Speter		pri = ffs(rtqueuebits) - 1;
22250027Speter		q = &rtqueues[pri];
22350027Speter		which = &rtqueuebits;
22450027Speter	} else if (queuebits) {
22550027Speter		pri = ffs(queuebits) - 1;
22650027Speter		q = &queues[pri];
22750027Speter		which = &queuebits;
22850027Speter	} else if (idqueuebits) {
22950027Speter		pri = ffs(idqueuebits) - 1;
23050027Speter		q = &idqueues[pri];
23150027Speter		which = &idqueuebits;
23250027Speter	} else {
23365764Sjhb		CTR1(KTR_PROC, "chooseproc: idleproc, schedlock %lx",
23465764Sjhb			(long)sched_lock.mtx_lock);
23565557Sjasone		return idleproc;
23650027Speter	}
23750027Speter	p = TAILQ_FIRST(q);
23850027Speter#ifdef SMP
23950027Speter	/* wander down the current run queue for this pri level for a match */
24050027Speter	id = cpuid;
24150027Speter	while (p->p_lastcpu != id) {
24250027Speter		p = TAILQ_NEXT(p, p_procq);
24350027Speter		if (p == NULL) {
24450027Speter			p = TAILQ_FIRST(q);
24550027Speter			break;
24650027Speter		}
24750027Speter	}
24850027Speter#endif
24965764Sjhb	CTR4(KTR_PROC, "chooseproc: proc %p (pid %d, %s), schedlock %lx",
25065764Sjhb		p, p->p_pid, p->p_comm, (long)sched_lock.mtx_lock);
25165557Sjasone	KASSERT(p, ("chooseproc: no proc on busy queue"));
25250027Speter	TAILQ_REMOVE(q, p, p_procq);
25350027Speter	if (TAILQ_EMPTY(q))
25450027Speter		*which &= ~(1 << pri);
25550027Speter	return p;
25650027Speter}
257