sched_4bsd.c revision 160039
1104964Sjeff/*-
2104964Sjeff * Copyright (c) 1982, 1986, 1990, 1991, 1993
3104964Sjeff *	The Regents of the University of California.  All rights reserved.
4104964Sjeff * (c) UNIX System Laboratories, Inc.
5104964Sjeff * All or some portions of this file are derived from material licensed
6104964Sjeff * to the University of California by American Telephone and Telegraph
7104964Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8104964Sjeff * the permission of UNIX System Laboratories, Inc.
9104964Sjeff *
10104964Sjeff * Redistribution and use in source and binary forms, with or without
11104964Sjeff * modification, are permitted provided that the following conditions
12104964Sjeff * are met:
13104964Sjeff * 1. Redistributions of source code must retain the above copyright
14104964Sjeff *    notice, this list of conditions and the following disclaimer.
15104964Sjeff * 2. Redistributions in binary form must reproduce the above copyright
16104964Sjeff *    notice, this list of conditions and the following disclaimer in the
17104964Sjeff *    documentation and/or other materials provided with the distribution.
18104964Sjeff * 4. Neither the name of the University nor the names of its contributors
19104964Sjeff *    may be used to endorse or promote products derived from this software
20104964Sjeff *    without specific prior written permission.
21104964Sjeff *
22104964Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23104964Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24104964Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25104964Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26104964Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27104964Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28104964Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29104964Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30104964Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31104964Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32104964Sjeff * SUCH DAMAGE.
33104964Sjeff */
34104964Sjeff
35116182Sobrien#include <sys/cdefs.h>
36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 160039 2006-06-29 19:37:31Z obrien $");
37116182Sobrien
38147565Speter#include "opt_hwpmc_hooks.h"
39147565Speter
40134791Sjulian#define kse td_sched
41134791Sjulian
42104964Sjeff#include <sys/param.h>
43104964Sjeff#include <sys/systm.h>
44104964Sjeff#include <sys/kernel.h>
45104964Sjeff#include <sys/ktr.h>
46104964Sjeff#include <sys/lock.h>
47123871Sjhb#include <sys/kthread.h>
48104964Sjeff#include <sys/mutex.h>
49104964Sjeff#include <sys/proc.h>
50104964Sjeff#include <sys/resourcevar.h>
51104964Sjeff#include <sys/sched.h>
52104964Sjeff#include <sys/smp.h>
53104964Sjeff#include <sys/sysctl.h>
54104964Sjeff#include <sys/sx.h>
55139453Sjhb#include <sys/turnstile.h>
56160039Sobrien#include <machine/pcb.h>
57134689Sjulian#include <machine/smp.h>
58104964Sjeff
59145256Sjkoshy#ifdef HWPMC_HOOKS
60145256Sjkoshy#include <sys/pmckern.h>
61145256Sjkoshy#endif
62145256Sjkoshy
63107135Sjeff/*
64107135Sjeff * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
65107135Sjeff * the range 100-256 Hz (approximately).
66107135Sjeff */
67107135Sjeff#define	ESTCPULIM(e) \
68107135Sjeff    min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \
69107135Sjeff    RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1)
70122355Sbde#ifdef SMP
71122355Sbde#define	INVERSE_ESTCPU_WEIGHT	(8 * smp_cpus)
72122355Sbde#else
73107135Sjeff#define	INVERSE_ESTCPU_WEIGHT	8	/* 1 / (priorities per estcpu level). */
74122355Sbde#endif
75107135Sjeff#define	NICE_WEIGHT		1	/* Priorities per nice level. */
76107135Sjeff
77134791Sjulian/*
78134791Sjulian * The schedulable entity that can be given a context to run.
79134791Sjulian * A process may have several of these. Probably one per processor
80134791Sjulian * but posibly a few more. In this universe they are grouped
81134791Sjulian * with a KSEG that contains the priority and niceness
82134791Sjulian * for the group.
83134791Sjulian */
84134791Sjulianstruct kse {
85134791Sjulian	TAILQ_ENTRY(kse) ke_procq;	/* (j/z) Run queue. */
86134791Sjulian	struct thread	*ke_thread;	/* (*) Active associated thread. */
87134791Sjulian	fixpt_t		ke_pctcpu;	/* (j) %cpu during p_swtime. */
88159337Sdavidxu	u_char		ke_rqindex;	/* (j) Run queue index. */
89134791Sjulian	enum {
90134791Sjulian		KES_THREAD = 0x0,	/* slaved to thread state */
91134791Sjulian		KES_ONRUNQ
92134791Sjulian	} ke_state;			/* (j) KSE status. */
93134791Sjulian	int		ke_cpticks;	/* (j) Ticks of cpu time. */
94134791Sjulian	struct runq	*ke_runq;	/* runq the kse is currently on */
95109145Sjeff};
96109145Sjeff
97134791Sjulian#define ke_proc		ke_thread->td_proc
98134791Sjulian#define ke_ksegrp	ke_thread->td_ksegrp
99134791Sjulian
100134791Sjulian#define td_kse td_sched
101134791Sjulian
102134791Sjulian/* flags kept in td_flags */
103134791Sjulian#define TDF_DIDRUN	TDF_SCHED0	/* KSE actually ran. */
104134791Sjulian#define TDF_EXIT	TDF_SCHED1	/* KSE is being killed. */
105134791Sjulian#define TDF_BOUND	TDF_SCHED2
106134791Sjulian
107134791Sjulian#define ke_flags	ke_thread->td_flags
108134791Sjulian#define KEF_DIDRUN	TDF_DIDRUN /* KSE actually ran. */
109134791Sjulian#define KEF_EXIT	TDF_EXIT /* KSE is being killed. */
110134791Sjulian#define KEF_BOUND	TDF_BOUND /* stuck to one CPU */
111134791Sjulian
112124955Sjeff#define SKE_RUNQ_PCPU(ke)						\
113124955Sjeff    ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq)
114124955Sjeff
115134791Sjulianstruct kg_sched {
116134791Sjulian	struct thread	*skg_last_assigned; /* (j) Last thread assigned to */
117134791Sjulian					   /* the system scheduler. */
118134791Sjulian	int	skg_avail_opennings;	/* (j) Num KSEs requested in group. */
119134791Sjulian	int	skg_concurrency;	/* (j) Num KSEs requested in group. */
120134791Sjulian};
121134791Sjulian#define kg_last_assigned	kg_sched->skg_last_assigned
122134791Sjulian#define kg_avail_opennings	kg_sched->skg_avail_opennings
123134791Sjulian#define kg_concurrency		kg_sched->skg_concurrency
124134791Sjulian
125136167Sjulian#define SLOT_RELEASE(kg)						\
126136167Sjuliando {									\
127136167Sjulian	kg->kg_avail_opennings++; 					\
128136167Sjulian	CTR3(KTR_RUNQ, "kg %p(%d) Slot released (->%d)",		\
129136167Sjulian	kg,								\
130136167Sjulian	kg->kg_concurrency,						\
131136167Sjulian	 kg->kg_avail_opennings);					\
132136167Sjulian/*	KASSERT((kg->kg_avail_opennings <= kg->kg_concurrency),		\
133136167Sjulian	    ("slots out of whack"));*/					\
134136167Sjulian} while (0)
135136167Sjulian
136136167Sjulian#define SLOT_USE(kg)							\
137136167Sjuliando {									\
138136167Sjulian	kg->kg_avail_opennings--; 					\
139136167Sjulian	CTR3(KTR_RUNQ, "kg %p(%d) Slot used (->%d)",			\
140136167Sjulian	kg,								\
141136167Sjulian	kg->kg_concurrency,						\
142136167Sjulian	 kg->kg_avail_opennings);					\
143136167Sjulian/*	KASSERT((kg->kg_avail_opennings >= 0),				\
144136167Sjulian	    ("slots out of whack"));*/					\
145136167Sjulian} while (0)
146136167Sjulian
147124955Sjeff/*
148124955Sjeff * KSE_CAN_MIGRATE macro returns true if the kse can migrate between
149125295Sjeff * cpus.
150124955Sjeff */
151124955Sjeff#define KSE_CAN_MIGRATE(ke)						\
152135076Sscottl    ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0)
153109145Sjeff
154134791Sjulianstatic struct kse kse0;
155134791Sjulianstatic struct kg_sched kg_sched0;
156104964Sjeff
157125288Sjeffstatic int	sched_tdcnt;	/* Total runnable threads in the system. */
158104964Sjeffstatic int	sched_quantum;	/* Roundrobin scheduling quantum in ticks. */
159112535Smux#define	SCHED_QUANTUM	(hz / 10)	/* Default sched quantum */
160104964Sjeff
161104964Sjeffstatic struct callout roundrobin_callout;
162104964Sjeff
163134791Sjulianstatic void	slot_fill(struct ksegrp *kg);
164134791Sjulianstatic struct kse *sched_choose(void);		/* XXX Should be thread * */
165134791Sjulian
166124955Sjeffstatic void	setup_runqs(void);
167104964Sjeffstatic void	roundrobin(void *arg);
168123871Sjhbstatic void	schedcpu(void);
169124955Sjeffstatic void	schedcpu_thread(void);
170139453Sjhbstatic void	sched_priority(struct thread *td, u_char prio);
171104964Sjeffstatic void	sched_setup(void *dummy);
172104964Sjeffstatic void	maybe_resched(struct thread *td);
173104964Sjeffstatic void	updatepri(struct ksegrp *kg);
174104964Sjeffstatic void	resetpriority(struct ksegrp *kg);
175139453Sjhbstatic void	resetpriority_thread(struct thread *td, struct ksegrp *kg);
176134694Sjulian#ifdef SMP
177134688Sjulianstatic int	forward_wakeup(int  cpunum);
178134694Sjulian#endif
179104964Sjeff
180124955Sjeffstatic struct kproc_desc sched_kp = {
181124955Sjeff        "schedcpu",
182124955Sjeff        schedcpu_thread,
183124955Sjeff        NULL
184124955Sjeff};
185124955SjeffSYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start, &sched_kp)
186124955SjeffSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
187104964Sjeff
188104964Sjeff/*
189104964Sjeff * Global run queue.
190104964Sjeff */
191104964Sjeffstatic struct runq runq;
192104964Sjeff
193124955Sjeff#ifdef SMP
194124955Sjeff/*
195124955Sjeff * Per-CPU run queues
196124955Sjeff */
197124955Sjeffstatic struct runq runq_pcpu[MAXCPU];
198124955Sjeff#endif
199124955Sjeff
200124955Sjeffstatic void
201124955Sjeffsetup_runqs(void)
202124955Sjeff{
203124955Sjeff#ifdef SMP
204124955Sjeff	int i;
205124955Sjeff
206124955Sjeff	for (i = 0; i < MAXCPU; ++i)
207124955Sjeff		runq_init(&runq_pcpu[i]);
208124955Sjeff#endif
209124955Sjeff
210124955Sjeff	runq_init(&runq);
211124955Sjeff}
212124955Sjeff
213104964Sjeffstatic int
214104964Sjeffsysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
215104964Sjeff{
216104964Sjeff	int error, new_val;
217104964Sjeff
218104964Sjeff	new_val = sched_quantum * tick;
219104964Sjeff	error = sysctl_handle_int(oidp, &new_val, 0, req);
220104964Sjeff        if (error != 0 || req->newptr == NULL)
221104964Sjeff		return (error);
222104964Sjeff	if (new_val < tick)
223104964Sjeff		return (EINVAL);
224104964Sjeff	sched_quantum = new_val / tick;
225104964Sjeff	hogticks = 2 * sched_quantum;
226104964Sjeff	return (0);
227104964Sjeff}
228104964Sjeff
229132589SscottlSYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler");
230130881Sscottl
231132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0,
232132589Sscottl    "Scheduler name");
233130881Sscottl
234132589SscottlSYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW,
235132589Sscottl    0, sizeof sched_quantum, sysctl_kern_quantum, "I",
236132589Sscottl    "Roundrobin scheduling quantum in microseconds");
237104964Sjeff
238134693Sjulian#ifdef SMP
239134688Sjulian/* Enable forwarding of wakeups to all other cpus */
240134688SjulianSYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP");
241134688Sjulian
242134792Sjulianstatic int forward_wakeup_enabled = 1;
243134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
244134688Sjulian	   &forward_wakeup_enabled, 0,
245134688Sjulian	   "Forwarding of wakeup to idle CPUs");
246134688Sjulian
247134688Sjulianstatic int forward_wakeups_requested = 0;
248134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
249134688Sjulian	   &forward_wakeups_requested, 0,
250134688Sjulian	   "Requests for Forwarding of wakeup to idle CPUs");
251134688Sjulian
252134688Sjulianstatic int forward_wakeups_delivered = 0;
253134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
254134688Sjulian	   &forward_wakeups_delivered, 0,
255134688Sjulian	   "Completed Forwarding of wakeup to idle CPUs");
256134688Sjulian
257134792Sjulianstatic int forward_wakeup_use_mask = 1;
258134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
259134688Sjulian	   &forward_wakeup_use_mask, 0,
260134688Sjulian	   "Use the mask of idle cpus");
261134688Sjulian
262134688Sjulianstatic int forward_wakeup_use_loop = 0;
263134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
264134688Sjulian	   &forward_wakeup_use_loop, 0,
265134688Sjulian	   "Use a loop to find idle cpus");
266134688Sjulian
267134688Sjulianstatic int forward_wakeup_use_single = 0;
268134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
269134688Sjulian	   &forward_wakeup_use_single, 0,
270134688Sjulian	   "Only signal one idle cpu");
271134688Sjulian
272134688Sjulianstatic int forward_wakeup_use_htt = 0;
273134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
274134688Sjulian	   &forward_wakeup_use_htt, 0,
275134688Sjulian	   "account for htt");
276135051Sjulian
277134693Sjulian#endif
278135051Sjulianstatic int sched_followon = 0;
279135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
280135051Sjulian	   &sched_followon, 0,
281135051Sjulian	   "allow threads to share a quantum");
282134688Sjulian
283135051Sjulianstatic int sched_pfollowons = 0;
284135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, pfollowons, CTLFLAG_RD,
285135051Sjulian	   &sched_pfollowons, 0,
286135051Sjulian	   "number of followons done to a different ksegrp");
287135051Sjulian
288135051Sjulianstatic int sched_kgfollowons = 0;
289135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, kgfollowons, CTLFLAG_RD,
290135051Sjulian	   &sched_kgfollowons, 0,
291135051Sjulian	   "number of followons done in a ksegrp");
292135051Sjulian
293139317Sjeffstatic __inline void
294139317Sjeffsched_load_add(void)
295139317Sjeff{
296139317Sjeff	sched_tdcnt++;
297139317Sjeff	CTR1(KTR_SCHED, "global load: %d", sched_tdcnt);
298139317Sjeff}
299139317Sjeff
300139317Sjeffstatic __inline void
301139317Sjeffsched_load_rem(void)
302139317Sjeff{
303139317Sjeff	sched_tdcnt--;
304139317Sjeff	CTR1(KTR_SCHED, "global load: %d", sched_tdcnt);
305139317Sjeff}
306104964Sjeff/*
307104964Sjeff * Arrange to reschedule if necessary, taking the priorities and
308104964Sjeff * schedulers into account.
309104964Sjeff */
310104964Sjeffstatic void
311104964Sjeffmaybe_resched(struct thread *td)
312104964Sjeff{
313104964Sjeff
314104964Sjeff	mtx_assert(&sched_lock, MA_OWNED);
315134791Sjulian	if (td->td_priority < curthread->td_priority)
316111032Sjulian		curthread->td_flags |= TDF_NEEDRESCHED;
317104964Sjeff}
318104964Sjeff
319104964Sjeff/*
320104964Sjeff * Force switch among equal priority processes every 100ms.
321104964Sjeff * We don't actually need to force a context switch of the current process.
322104964Sjeff * The act of firing the event triggers a context switch to softclock() and
323104964Sjeff * then switching back out again which is equivalent to a preemption, thus
324104964Sjeff * no further work is needed on the local CPU.
325104964Sjeff */
326104964Sjeff/* ARGSUSED */
327104964Sjeffstatic void
328104964Sjeffroundrobin(void *arg)
329104964Sjeff{
330104964Sjeff
331104964Sjeff#ifdef SMP
332104964Sjeff	mtx_lock_spin(&sched_lock);
333104964Sjeff	forward_roundrobin();
334104964Sjeff	mtx_unlock_spin(&sched_lock);
335104964Sjeff#endif
336104964Sjeff
337104964Sjeff	callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL);
338104964Sjeff}
339104964Sjeff
340104964Sjeff/*
341104964Sjeff * Constants for digital decay and forget:
342118972Sjhb *	90% of (kg_estcpu) usage in 5 * loadav time
343118972Sjhb *	95% of (ke_pctcpu) usage in 60 seconds (load insensitive)
344104964Sjeff *          Note that, as ps(1) mentions, this can let percentages
345104964Sjeff *          total over 100% (I've seen 137.9% for 3 processes).
346104964Sjeff *
347118972Sjhb * Note that schedclock() updates kg_estcpu and p_cpticks asynchronously.
348104964Sjeff *
349118972Sjhb * We wish to decay away 90% of kg_estcpu in (5 * loadavg) seconds.
350104964Sjeff * That is, the system wants to compute a value of decay such
351104964Sjeff * that the following for loop:
352104964Sjeff * 	for (i = 0; i < (5 * loadavg); i++)
353118972Sjhb * 		kg_estcpu *= decay;
354104964Sjeff * will compute
355118972Sjhb * 	kg_estcpu *= 0.1;
356104964Sjeff * for all values of loadavg:
357104964Sjeff *
358104964Sjeff * Mathematically this loop can be expressed by saying:
359104964Sjeff * 	decay ** (5 * loadavg) ~= .1
360104964Sjeff *
361104964Sjeff * The system computes decay as:
362104964Sjeff * 	decay = (2 * loadavg) / (2 * loadavg + 1)
363104964Sjeff *
364104964Sjeff * We wish to prove that the system's computation of decay
365104964Sjeff * will always fulfill the equation:
366104964Sjeff * 	decay ** (5 * loadavg) ~= .1
367104964Sjeff *
368104964Sjeff * If we compute b as:
369104964Sjeff * 	b = 2 * loadavg
370104964Sjeff * then
371104964Sjeff * 	decay = b / (b + 1)
372104964Sjeff *
373104964Sjeff * We now need to prove two things:
374104964Sjeff *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
375104964Sjeff *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
376104964Sjeff *
377104964Sjeff * Facts:
378104964Sjeff *         For x close to zero, exp(x) =~ 1 + x, since
379104964Sjeff *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
380104964Sjeff *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
381104964Sjeff *         For x close to zero, ln(1+x) =~ x, since
382104964Sjeff *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
383104964Sjeff *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
384104964Sjeff *         ln(.1) =~ -2.30
385104964Sjeff *
386104964Sjeff * Proof of (1):
387104964Sjeff *    Solve (factor)**(power) =~ .1 given power (5*loadav):
388104964Sjeff *	solving for factor,
389104964Sjeff *      ln(factor) =~ (-2.30/5*loadav), or
390104964Sjeff *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
391104964Sjeff *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
392104964Sjeff *
393104964Sjeff * Proof of (2):
394104964Sjeff *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
395104964Sjeff *	solving for power,
396104964Sjeff *      power*ln(b/(b+1)) =~ -2.30, or
397104964Sjeff *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
398104964Sjeff *
399104964Sjeff * Actual power values for the implemented algorithm are as follows:
400104964Sjeff *      loadav: 1       2       3       4
401104964Sjeff *      power:  5.68    10.32   14.94   19.55
402104964Sjeff */
403104964Sjeff
404104964Sjeff/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
405104964Sjeff#define	loadfactor(loadav)	(2 * (loadav))
406104964Sjeff#define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
407104964Sjeff
408118972Sjhb/* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
409104964Sjeffstatic fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
410158082SjmgSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
411104964Sjeff
412104964Sjeff/*
413104964Sjeff * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
414104964Sjeff * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
415104964Sjeff * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
416104964Sjeff *
417104964Sjeff * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
418104964Sjeff *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
419104964Sjeff *
420104964Sjeff * If you don't want to bother with the faster/more-accurate formula, you
421104964Sjeff * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
422104964Sjeff * (more general) method of calculating the %age of CPU used by a process.
423104964Sjeff */
424104964Sjeff#define	CCPU_SHIFT	11
425104964Sjeff
426104964Sjeff/*
427104964Sjeff * Recompute process priorities, every hz ticks.
428104964Sjeff * MP-safe, called without the Giant mutex.
429104964Sjeff */
430104964Sjeff/* ARGSUSED */
431104964Sjeffstatic void
432123871Sjhbschedcpu(void)
433104964Sjeff{
434104964Sjeff	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
435104964Sjeff	struct thread *td;
436104964Sjeff	struct proc *p;
437104964Sjeff	struct kse *ke;
438104964Sjeff	struct ksegrp *kg;
439118972Sjhb	int awake, realstathz;
440104964Sjeff
441104964Sjeff	realstathz = stathz ? stathz : hz;
442104964Sjeff	sx_slock(&allproc_lock);
443104964Sjeff	FOREACH_PROC_IN_SYSTEM(p) {
444118972Sjhb		/*
445118972Sjhb		 * Prevent state changes and protect run queue.
446118972Sjhb		 */
447104964Sjeff		mtx_lock_spin(&sched_lock);
448118972Sjhb		/*
449118972Sjhb		 * Increment time in/out of memory.  We ignore overflow; with
450118972Sjhb		 * 16-bit int's (remember them?) overflow takes 45 days.
451118972Sjhb		 */
452104964Sjeff		p->p_swtime++;
453104964Sjeff		FOREACH_KSEGRP_IN_PROC(p, kg) {
454104964Sjeff			awake = 0;
455134791Sjulian			FOREACH_THREAD_IN_GROUP(kg, td) {
456134791Sjulian				ke = td->td_kse;
457104964Sjeff				/*
458118972Sjhb				 * Increment sleep time (if sleeping).  We
459118972Sjhb				 * ignore overflow, as above.
460104964Sjeff				 */
461104964Sjeff				/*
462104964Sjeff				 * The kse slptimes are not touched in wakeup
463104964Sjeff				 * because the thread may not HAVE a KSE.
464104964Sjeff				 */
465104964Sjeff				if (ke->ke_state == KES_ONRUNQ) {
466104964Sjeff					awake = 1;
467104964Sjeff					ke->ke_flags &= ~KEF_DIDRUN;
468104964Sjeff				} else if ((ke->ke_state == KES_THREAD) &&
469134791Sjulian				    (TD_IS_RUNNING(td))) {
470104964Sjeff					awake = 1;
471104964Sjeff					/* Do not clear KEF_DIDRUN */
472104964Sjeff				} else if (ke->ke_flags & KEF_DIDRUN) {
473104964Sjeff					awake = 1;
474104964Sjeff					ke->ke_flags &= ~KEF_DIDRUN;
475104964Sjeff				}
476104964Sjeff
477104964Sjeff				/*
478118972Sjhb				 * ke_pctcpu is only for ps and ttyinfo().
479118972Sjhb				 * Do it per kse, and add them up at the end?
480104964Sjeff				 * XXXKSE
481104964Sjeff				 */
482118972Sjhb				ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >>
483109145Sjeff				    FSHIFT;
484104964Sjeff				/*
485104964Sjeff				 * If the kse has been idle the entire second,
486104964Sjeff				 * stop recalculating its priority until
487104964Sjeff				 * it wakes up.
488104964Sjeff				 */
489134145Sjulian				if (ke->ke_cpticks == 0)
490104964Sjeff					continue;
491104964Sjeff#if	(FSHIFT >= CCPU_SHIFT)
492109157Sjeff				ke->ke_pctcpu += (realstathz == 100)
493134145Sjulian				    ? ((fixpt_t) ke->ke_cpticks) <<
494104964Sjeff				    (FSHIFT - CCPU_SHIFT) :
495134145Sjulian				    100 * (((fixpt_t) ke->ke_cpticks)
496109145Sjeff				    << (FSHIFT - CCPU_SHIFT)) / realstathz;
497104964Sjeff#else
498109157Sjeff				ke->ke_pctcpu += ((FSCALE - ccpu) *
499134145Sjulian				    (ke->ke_cpticks *
500109145Sjeff				    FSCALE / realstathz)) >> FSHIFT;
501104964Sjeff#endif
502134145Sjulian				ke->ke_cpticks = 0;
503104964Sjeff			} /* end of kse loop */
504104964Sjeff			/*
505104964Sjeff			 * If there are ANY running threads in this KSEGRP,
506104964Sjeff			 * then don't count it as sleeping.
507104964Sjeff			 */
508104964Sjeff			if (awake) {
509104964Sjeff				if (kg->kg_slptime > 1) {
510104964Sjeff					/*
511104964Sjeff					 * In an ideal world, this should not
512104964Sjeff					 * happen, because whoever woke us
513104964Sjeff					 * up from the long sleep should have
514104964Sjeff					 * unwound the slptime and reset our
515104964Sjeff					 * priority before we run at the stale
516104964Sjeff					 * priority.  Should KASSERT at some
517104964Sjeff					 * point when all the cases are fixed.
518104964Sjeff					 */
519104964Sjeff					updatepri(kg);
520104964Sjeff				}
521104964Sjeff				kg->kg_slptime = 0;
522118972Sjhb			} else
523104964Sjeff				kg->kg_slptime++;
524104964Sjeff			if (kg->kg_slptime > 1)
525104964Sjeff				continue;
526104964Sjeff			kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
527104964Sjeff		      	resetpriority(kg);
528104964Sjeff			FOREACH_THREAD_IN_GROUP(kg, td) {
529139453Sjhb				resetpriority_thread(td, kg);
530104964Sjeff			}
531104964Sjeff		} /* end of ksegrp loop */
532104964Sjeff		mtx_unlock_spin(&sched_lock);
533104964Sjeff	} /* end of process loop */
534104964Sjeff	sx_sunlock(&allproc_lock);
535104964Sjeff}
536104964Sjeff
537104964Sjeff/*
538123871Sjhb * Main loop for a kthread that executes schedcpu once a second.
539123871Sjhb */
540123871Sjhbstatic void
541124955Sjeffschedcpu_thread(void)
542123871Sjhb{
543123871Sjhb	int nowake;
544123871Sjhb
545123871Sjhb	for (;;) {
546123871Sjhb		schedcpu();
547157815Sjhb		tsleep(&nowake, 0, "-", hz);
548123871Sjhb	}
549123871Sjhb}
550123871Sjhb
551123871Sjhb/*
552104964Sjeff * Recalculate the priority of a process after it has slept for a while.
553118972Sjhb * For all load averages >= 1 and max kg_estcpu of 255, sleeping for at
554118972Sjhb * least six times the loadfactor will decay kg_estcpu to zero.
555104964Sjeff */
556104964Sjeffstatic void
557104964Sjeffupdatepri(struct ksegrp *kg)
558104964Sjeff{
559118972Sjhb	register fixpt_t loadfac;
560104964Sjeff	register unsigned int newcpu;
561104964Sjeff
562118972Sjhb	loadfac = loadfactor(averunnable.ldavg[0]);
563104964Sjeff	if (kg->kg_slptime > 5 * loadfac)
564104964Sjeff		kg->kg_estcpu = 0;
565104964Sjeff	else {
566118972Sjhb		newcpu = kg->kg_estcpu;
567118972Sjhb		kg->kg_slptime--;	/* was incremented in schedcpu() */
568104964Sjeff		while (newcpu && --kg->kg_slptime)
569104964Sjeff			newcpu = decay_cpu(loadfac, newcpu);
570104964Sjeff		kg->kg_estcpu = newcpu;
571104964Sjeff	}
572104964Sjeff}
573104964Sjeff
574104964Sjeff/*
575104964Sjeff * Compute the priority of a process when running in user mode.
576104964Sjeff * Arrange to reschedule if the resulting priority is better
577104964Sjeff * than that of the current process.
578104964Sjeff */
579104964Sjeffstatic void
580104964Sjeffresetpriority(struct ksegrp *kg)
581104964Sjeff{
582104964Sjeff	register unsigned int newpriority;
583104964Sjeff
584104964Sjeff	if (kg->kg_pri_class == PRI_TIMESHARE) {
585104964Sjeff		newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT +
586130551Sjulian		    NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN);
587104964Sjeff		newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
588104964Sjeff		    PRI_MAX_TIMESHARE);
589104964Sjeff		kg->kg_user_pri = newpriority;
590104964Sjeff	}
591104964Sjeff}
592104964Sjeff
593139453Sjhb/*
594139453Sjhb * Update the thread's priority when the associated ksegroup's user
595139453Sjhb * priority changes.
596139453Sjhb */
597139453Sjhbstatic void
598139453Sjhbresetpriority_thread(struct thread *td, struct ksegrp *kg)
599139453Sjhb{
600139453Sjhb
601139453Sjhb	/* Only change threads with a time sharing user priority. */
602139453Sjhb	if (td->td_priority < PRI_MIN_TIMESHARE ||
603139453Sjhb	    td->td_priority > PRI_MAX_TIMESHARE)
604139453Sjhb		return;
605139453Sjhb
606139453Sjhb	/* XXX the whole needresched thing is broken, but not silly. */
607139453Sjhb	maybe_resched(td);
608139453Sjhb
609139453Sjhb	sched_prio(td, kg->kg_user_pri);
610139453Sjhb}
611139453Sjhb
612104964Sjeff/* ARGSUSED */
613104964Sjeffstatic void
614104964Sjeffsched_setup(void *dummy)
615104964Sjeff{
616124955Sjeff	setup_runqs();
617118972Sjhb
618104964Sjeff	if (sched_quantum == 0)
619104964Sjeff		sched_quantum = SCHED_QUANTUM;
620104964Sjeff	hogticks = 2 * sched_quantum;
621104964Sjeff
622126665Srwatson	callout_init(&roundrobin_callout, CALLOUT_MPSAFE);
623104964Sjeff
624104964Sjeff	/* Kick off timeout driven events by calling first time. */
625104964Sjeff	roundrobin(NULL);
626125288Sjeff
627125288Sjeff	/* Account for thread0. */
628139317Sjeff	sched_load_add();
629104964Sjeff}
630104964Sjeff
631104964Sjeff/* External interfaces start here */
632134791Sjulian/*
633134791Sjulian * Very early in the boot some setup of scheduler-specific
634145109Smaxim * parts of proc0 and of some scheduler resources needs to be done.
635134791Sjulian * Called from:
636134791Sjulian *  proc0_init()
637134791Sjulian */
638134791Sjulianvoid
639134791Sjulianschedinit(void)
640134791Sjulian{
641134791Sjulian	/*
642134791Sjulian	 * Set up the scheduler specific parts of proc0.
643134791Sjulian	 */
644134791Sjulian	proc0.p_sched = NULL; /* XXX */
645134791Sjulian	ksegrp0.kg_sched = &kg_sched0;
646134791Sjulian	thread0.td_sched = &kse0;
647134791Sjulian	kse0.ke_thread = &thread0;
648134791Sjulian	kse0.ke_state = KES_THREAD;
649134791Sjulian	kg_sched0.skg_concurrency = 1;
650134791Sjulian	kg_sched0.skg_avail_opennings = 0; /* we are already running */
651134791Sjulian}
652134791Sjulian
653104964Sjeffint
654104964Sjeffsched_runnable(void)
655104964Sjeff{
656124955Sjeff#ifdef SMP
657124955Sjeff	return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]);
658124955Sjeff#else
659124955Sjeff	return runq_check(&runq);
660124955Sjeff#endif
661104964Sjeff}
662104964Sjeff
663104964Sjeffint
664104964Sjeffsched_rr_interval(void)
665104964Sjeff{
666104964Sjeff	if (sched_quantum == 0)
667104964Sjeff		sched_quantum = SCHED_QUANTUM;
668104964Sjeff	return (sched_quantum);
669104964Sjeff}
670104964Sjeff
671104964Sjeff/*
672104964Sjeff * We adjust the priority of the current process.  The priority of
673104964Sjeff * a process gets worse as it accumulates CPU time.  The cpu usage
674118972Sjhb * estimator (kg_estcpu) is increased here.  resetpriority() will
675118972Sjhb * compute a different priority each time kg_estcpu increases by
676104964Sjeff * INVERSE_ESTCPU_WEIGHT
677104964Sjeff * (until MAXPRI is reached).  The cpu usage estimator ramps up
678104964Sjeff * quite quickly when the process is running (linearly), and decays
679104964Sjeff * away exponentially, at a rate which is proportionally slower when
680104964Sjeff * the system is busy.  The basic principle is that the system will
681104964Sjeff * 90% forget that the process used a lot of CPU time in 5 * loadav
682104964Sjeff * seconds.  This causes the system to favor processes which haven't
683104964Sjeff * run much recently, and to round-robin among other processes.
684104964Sjeff */
685104964Sjeffvoid
686121127Sjeffsched_clock(struct thread *td)
687104964Sjeff{
688104964Sjeff	struct ksegrp *kg;
689121127Sjeff	struct kse *ke;
690104964Sjeff
691113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
692121127Sjeff	kg = td->td_ksegrp;
693121127Sjeff	ke = td->td_kse;
694113356Sjeff
695134145Sjulian	ke->ke_cpticks++;
696104964Sjeff	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1);
697104964Sjeff	if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
698104964Sjeff		resetpriority(kg);
699139453Sjhb		resetpriority_thread(td, kg);
700104964Sjeff	}
701104964Sjeff}
702118972Sjhb
703104964Sjeff/*
704104964Sjeff * charge childs scheduling cpu usage to parent.
705104964Sjeff *
706104964Sjeff * XXXKSE assume only one thread & kse & ksegrp keep estcpu in each ksegrp.
707104964Sjeff * Charge it to the ksegrp that did the wait since process estcpu is sum of
708104964Sjeff * all ksegrps, this is strictly as expected.  Assume that the child process
709104964Sjeff * aggregated all the estcpu into the 'built-in' ksegrp.
710104964Sjeff */
711104964Sjeffvoid
712132372Sjuliansched_exit(struct proc *p, struct thread *td)
713104964Sjeff{
714132372Sjulian	sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td);
715132372Sjulian	sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
716113356Sjeff}
717113356Sjeff
718113356Sjeffvoid
719132372Sjuliansched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd)
720113356Sjeff{
721113923Sjhb
722113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
723132372Sjulian	kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + childtd->td_ksegrp->kg_estcpu);
724104964Sjeff}
725104964Sjeff
726104964Sjeffvoid
727113356Sjeffsched_exit_thread(struct thread *td, struct thread *child)
728104964Sjeff{
729139317Sjeff	CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d",
730139317Sjeff	    child, child->td_proc->p_comm, child->td_priority);
731127894Sdfr	if ((child->td_proc->p_flag & P_NOLOAD) == 0)
732139317Sjeff		sched_load_rem();
733113356Sjeff}
734109145Sjeff
735113356Sjeffvoid
736134791Sjuliansched_fork(struct thread *td, struct thread *childtd)
737113356Sjeff{
738134791Sjulian	sched_fork_ksegrp(td, childtd->td_ksegrp);
739134791Sjulian	sched_fork_thread(td, childtd);
740113356Sjeff}
741113356Sjeff
742113356Sjeffvoid
743132372Sjuliansched_fork_ksegrp(struct thread *td, struct ksegrp *child)
744113356Sjeff{
745113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
746132372Sjulian	child->kg_estcpu = td->td_ksegrp->kg_estcpu;
747113356Sjeff}
748109145Sjeff
749113356Sjeffvoid
750134791Sjuliansched_fork_thread(struct thread *td, struct thread *childtd)
751113356Sjeff{
752134791Sjulian	sched_newthread(childtd);
753104964Sjeff}
754104964Sjeff
755104964Sjeffvoid
756130551Sjuliansched_nice(struct proc *p, int nice)
757104964Sjeff{
758130551Sjulian	struct ksegrp *kg;
759139453Sjhb	struct thread *td;
760113873Sjhb
761130551Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
762113873Sjhb	mtx_assert(&sched_lock, MA_OWNED);
763130551Sjulian	p->p_nice = nice;
764130551Sjulian	FOREACH_KSEGRP_IN_PROC(p, kg) {
765130551Sjulian		resetpriority(kg);
766139453Sjhb		FOREACH_THREAD_IN_GROUP(kg, td) {
767139453Sjhb			resetpriority_thread(td, kg);
768139453Sjhb		}
769130551Sjulian	}
770104964Sjeff}
771104964Sjeff
772113356Sjeffvoid
773113356Sjeffsched_class(struct ksegrp *kg, int class)
774113356Sjeff{
775113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
776113356Sjeff	kg->kg_pri_class = class;
777113356Sjeff}
778113356Sjeff
779105127Sjulian/*
780105127Sjulian * Adjust the priority of a thread.
781105127Sjulian * This may include moving the thread within the KSEGRP,
782105127Sjulian * changing the assignment of a kse to the thread,
783105127Sjulian * and moving a KSE in the system run queue.
784105127Sjulian */
785139453Sjhbstatic void
786139453Sjhbsched_priority(struct thread *td, u_char prio)
787104964Sjeff{
788139317Sjeff	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
789139317Sjeff	    td, td->td_proc->p_comm, td->td_priority, prio, curthread,
790139317Sjeff	    curthread->td_proc->p_comm);
791104964Sjeff
792113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
793139453Sjhb	if (td->td_priority == prio)
794139453Sjhb		return;
795104964Sjeff	if (TD_ON_RUNQ(td)) {
796105127Sjulian		adjustrunqueue(td, prio);
797105127Sjulian	} else {
798105127Sjulian		td->td_priority = prio;
799104964Sjeff	}
800104964Sjeff}
801104964Sjeff
802139453Sjhb/*
803139453Sjhb * Update a thread's priority when it is lent another thread's
804139453Sjhb * priority.
805139453Sjhb */
806104964Sjeffvoid
807139453Sjhbsched_lend_prio(struct thread *td, u_char prio)
808139453Sjhb{
809139453Sjhb
810139453Sjhb	td->td_flags |= TDF_BORROWING;
811139453Sjhb	sched_priority(td, prio);
812139453Sjhb}
813139453Sjhb
814139453Sjhb/*
815139453Sjhb * Restore a thread's priority when priority propagation is
816139453Sjhb * over.  The prio argument is the minimum priority the thread
817139453Sjhb * needs to have to satisfy other possible priority lending
818139453Sjhb * requests.  If the thread's regulary priority is less
819139453Sjhb * important than prio the thread will keep a priority boost
820139453Sjhb * of prio.
821139453Sjhb */
822139453Sjhbvoid
823139453Sjhbsched_unlend_prio(struct thread *td, u_char prio)
824139453Sjhb{
825139453Sjhb	u_char base_pri;
826139453Sjhb
827139453Sjhb	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
828139453Sjhb	    td->td_base_pri <= PRI_MAX_TIMESHARE)
829139453Sjhb		base_pri = td->td_ksegrp->kg_user_pri;
830139453Sjhb	else
831139453Sjhb		base_pri = td->td_base_pri;
832139453Sjhb	if (prio >= base_pri) {
833139453Sjhb		td->td_flags &= ~TDF_BORROWING;
834139453Sjhb		sched_prio(td, base_pri);
835139453Sjhb	} else
836139453Sjhb		sched_lend_prio(td, prio);
837139453Sjhb}
838139453Sjhb
839139453Sjhbvoid
840139453Sjhbsched_prio(struct thread *td, u_char prio)
841139453Sjhb{
842139453Sjhb	u_char oldprio;
843139453Sjhb
844139453Sjhb	/* First, update the base priority. */
845139453Sjhb	td->td_base_pri = prio;
846139453Sjhb
847139453Sjhb	/*
848139453Sjhb	 * If the thread is borrowing another thread's priority, don't ever
849139453Sjhb	 * lower the priority.
850139453Sjhb	 */
851139453Sjhb	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
852139453Sjhb		return;
853139453Sjhb
854139453Sjhb	/* Change the real priority. */
855139453Sjhb	oldprio = td->td_priority;
856139453Sjhb	sched_priority(td, prio);
857139453Sjhb
858139453Sjhb	/*
859139453Sjhb	 * If the thread is on a turnstile, then let the turnstile update
860139453Sjhb	 * its state.
861139453Sjhb	 */
862139453Sjhb	if (TD_ON_LOCK(td) && oldprio != prio)
863139453Sjhb		turnstile_adjust(td, oldprio);
864139453Sjhb}
865139453Sjhb
866139453Sjhbvoid
867126326Sjhbsched_sleep(struct thread *td)
868104964Sjeff{
869113923Sjhb
870113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
871104964Sjeff	td->td_ksegrp->kg_slptime = 0;
872104964Sjeff}
873104964Sjeff
874135051Sjulianstatic void remrunqueue(struct thread *td);
875135051Sjulian
876104964Sjeffvoid
877135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags)
878104964Sjeff{
879104964Sjeff	struct kse *ke;
880135051Sjulian	struct ksegrp *kg;
881104964Sjeff	struct proc *p;
882104964Sjeff
883104964Sjeff	ke = td->td_kse;
884104964Sjeff	p = td->td_proc;
885104964Sjeff
886113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
887104964Sjeff
888125295Sjeff	if ((p->p_flag & P_NOLOAD) == 0)
889139317Sjeff		sched_load_rem();
890134791Sjulian	/*
891135051Sjulian	 * We are volunteering to switch out so we get to nominate
892135051Sjulian	 * a successor for the rest of our quantum
893135051Sjulian	 * First try another thread in our ksegrp, and then look for
894135051Sjulian	 * other ksegrps in our process.
895135051Sjulian	 */
896135051Sjulian	if (sched_followon &&
897135051Sjulian	    (p->p_flag & P_HADTHREADS) &&
898135051Sjulian	    (flags & SW_VOL) &&
899135051Sjulian	    newtd == NULL) {
900135051Sjulian		/* lets schedule another thread from this process */
901135051Sjulian		 kg = td->td_ksegrp;
902135051Sjulian		 if ((newtd = TAILQ_FIRST(&kg->kg_runq))) {
903135051Sjulian			remrunqueue(newtd);
904135051Sjulian			sched_kgfollowons++;
905135051Sjulian		 } else {
906135051Sjulian			FOREACH_KSEGRP_IN_PROC(p, kg) {
907135051Sjulian				if ((newtd = TAILQ_FIRST(&kg->kg_runq))) {
908135051Sjulian					sched_pfollowons++;
909135051Sjulian					remrunqueue(newtd);
910135051Sjulian					break;
911135051Sjulian				}
912135051Sjulian			}
913135051Sjulian		}
914135051Sjulian	}
915135051Sjulian
916138527Sups	if (newtd)
917138527Sups		newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED);
918138527Sups
919113339Sjulian	td->td_lastcpu = td->td_oncpu;
920132266Sjhb	td->td_flags &= ~TDF_NEEDRESCHED;
921144777Sups	td->td_owepreempt = 0;
922113339Sjulian	td->td_oncpu = NOCPU;
923104964Sjeff	/*
924104964Sjeff	 * At the last moment, if this thread is still marked RUNNING,
925104964Sjeff	 * then put it back on the run queue as it has not been suspended
926131473Sjhb	 * or stopped or any thing else similar.  We never put the idle
927131473Sjhb	 * threads on the run queue, however.
928104964Sjeff	 */
929131473Sjhb	if (td == PCPU_GET(idlethread))
930131473Sjhb		TD_SET_CAN_RUN(td);
931134791Sjulian	else {
932136170Sjulian		SLOT_RELEASE(td->td_ksegrp);
933134791Sjulian		if (TD_IS_RUNNING(td)) {
934134791Sjulian			/* Put us back on the run queue (kse and all). */
935136170Sjulian			setrunqueue(td, (flags & SW_PREEMPT) ?
936136170Sjulian			    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
937136170Sjulian			    SRQ_OURSELF|SRQ_YIELDING);
938134791Sjulian		} else if (p->p_flag & P_HADTHREADS) {
939134791Sjulian			/*
940134791Sjulian			 * We will not be on the run queue. So we must be
941134791Sjulian			 * sleeping or similar. As it's available,
942134791Sjulian			 * someone else can use the KSE if they need it.
943136170Sjulian			 * It's NOT available if we are about to need it
944134791Sjulian			 */
945136170Sjulian			if (newtd == NULL || newtd->td_ksegrp != td->td_ksegrp)
946136170Sjulian				slot_fill(td->td_ksegrp);
947134791Sjulian		}
948104964Sjeff	}
949136170Sjulian	if (newtd) {
950136170Sjulian		/*
951136170Sjulian		 * The thread we are about to run needs to be counted
952136170Sjulian		 * as if it had been added to the run queue and selected.
953136170Sjulian		 * It came from:
954136170Sjulian		 * * A preemption
955136170Sjulian		 * * An upcall
956136170Sjulian		 * * A followon
957136170Sjulian		 */
958136170Sjulian		KASSERT((newtd->td_inhibitors == 0),
959136170Sjulian			("trying to run inhibitted thread"));
960136170Sjulian		SLOT_USE(newtd->td_ksegrp);
961136170Sjulian		newtd->td_kse->ke_flags |= KEF_DIDRUN;
962136170Sjulian        	TD_SET_RUNNING(newtd);
963136170Sjulian		if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
964139317Sjeff			sched_load_add();
965136170Sjulian	} else {
966131473Sjhb		newtd = choosethread();
967136170Sjulian	}
968136170Sjulian
969145256Sjkoshy	if (td != newtd) {
970145256Sjkoshy#ifdef	HWPMC_HOOKS
971145256Sjkoshy		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
972145256Sjkoshy			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
973145256Sjkoshy#endif
974121128Sjeff		cpu_switch(td, newtd);
975145256Sjkoshy#ifdef	HWPMC_HOOKS
976145256Sjkoshy		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
977145256Sjkoshy			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
978145256Sjkoshy#endif
979145256Sjkoshy	}
980145256Sjkoshy
981121128Sjeff	sched_lock.mtx_lock = (uintptr_t)td;
982121128Sjeff	td->td_oncpu = PCPU_GET(cpuid);
983104964Sjeff}
984104964Sjeff
985104964Sjeffvoid
986104964Sjeffsched_wakeup(struct thread *td)
987104964Sjeff{
988104964Sjeff	struct ksegrp *kg;
989104964Sjeff
990113923Sjhb	mtx_assert(&sched_lock, MA_OWNED);
991104964Sjeff	kg = td->td_ksegrp;
992139453Sjhb	if (kg->kg_slptime > 1) {
993104964Sjeff		updatepri(kg);
994139453Sjhb		resetpriority(kg);
995139453Sjhb	}
996104964Sjeff	kg->kg_slptime = 0;
997134586Sjulian	setrunqueue(td, SRQ_BORING);
998104964Sjeff}
999104964Sjeff
1000134693Sjulian#ifdef SMP
1001134688Sjulian/* enable HTT_2 if you have a 2-way HTT cpu.*/
1002134688Sjulianstatic int
1003134688Sjulianforward_wakeup(int  cpunum)
1004134688Sjulian{
1005134688Sjulian	cpumask_t map, me, dontuse;
1006134688Sjulian	cpumask_t map2;
1007134688Sjulian	struct pcpu *pc;
1008134688Sjulian	cpumask_t id, map3;
1009134688Sjulian
1010134688Sjulian	mtx_assert(&sched_lock, MA_OWNED);
1011134688Sjulian
1012134791Sjulian	CTR0(KTR_RUNQ, "forward_wakeup()");
1013134688Sjulian
1014134688Sjulian	if ((!forward_wakeup_enabled) ||
1015134688Sjulian	     (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
1016134688Sjulian		return (0);
1017134688Sjulian	if (!smp_started || cold || panicstr)
1018134688Sjulian		return (0);
1019134688Sjulian
1020134688Sjulian	forward_wakeups_requested++;
1021134688Sjulian
1022134688Sjulian/*
1023134688Sjulian * check the idle mask we received against what we calculated before
1024134688Sjulian * in the old version.
1025134688Sjulian */
1026134688Sjulian	me = PCPU_GET(cpumask);
1027134688Sjulian	/*
1028134688Sjulian	 * don't bother if we should be doing it ourself..
1029134688Sjulian	 */
1030134688Sjulian	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
1031134688Sjulian		return (0);
1032134688Sjulian
1033134688Sjulian	dontuse = me | stopped_cpus | hlt_cpus_mask;
1034134688Sjulian	map3 = 0;
1035134688Sjulian	if (forward_wakeup_use_loop) {
1036134688Sjulian		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
1037134688Sjulian			id = pc->pc_cpumask;
1038134688Sjulian			if ( (id & dontuse) == 0 &&
1039134688Sjulian			    pc->pc_curthread == pc->pc_idlethread) {
1040134688Sjulian				map3 |= id;
1041134688Sjulian			}
1042134688Sjulian		}
1043134688Sjulian	}
1044134688Sjulian
1045134688Sjulian	if (forward_wakeup_use_mask) {
1046134688Sjulian		map = 0;
1047134688Sjulian		map = idle_cpus_mask & ~dontuse;
1048134688Sjulian
1049134688Sjulian		/* If they are both on, compare and use loop if different */
1050134688Sjulian		if (forward_wakeup_use_loop) {
1051134688Sjulian			if (map != map3) {
1052134688Sjulian				printf("map (%02X) != map3 (%02X)\n",
1053134688Sjulian						map, map3);
1054134688Sjulian				map = map3;
1055134688Sjulian			}
1056134688Sjulian		}
1057134688Sjulian	} else {
1058134688Sjulian		map = map3;
1059134688Sjulian	}
1060134688Sjulian	/* If we only allow a specific CPU, then mask off all the others */
1061134688Sjulian	if (cpunum != NOCPU) {
1062134688Sjulian		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
1063134688Sjulian		map &= (1 << cpunum);
1064134688Sjulian	} else {
1065134688Sjulian		/* Try choose an idle die. */
1066134688Sjulian		if (forward_wakeup_use_htt) {
1067134688Sjulian			map2 =  (map & (map >> 1)) & 0x5555;
1068134688Sjulian			if (map2) {
1069134688Sjulian				map = map2;
1070134688Sjulian			}
1071134688Sjulian		}
1072134688Sjulian
1073134688Sjulian		/* set only one bit */
1074134688Sjulian		if (forward_wakeup_use_single) {
1075134688Sjulian			map = map & ((~map) + 1);
1076134688Sjulian		}
1077134688Sjulian	}
1078134688Sjulian	if (map) {
1079134688Sjulian		forward_wakeups_delivered++;
1080134688Sjulian		ipi_selected(map, IPI_AST);
1081134688Sjulian		return (1);
1082134688Sjulian	}
1083134688Sjulian	if (cpunum == NOCPU)
1084134688Sjulian		printf("forward_wakeup: Idle processor not found\n");
1085134688Sjulian	return (0);
1086134688Sjulian}
1087134693Sjulian#endif
1088134688Sjulian
1089147182Sups#ifdef SMP
1090147190Supsstatic void kick_other_cpu(int pri,int cpuid);
1091147182Sups
1092147182Supsstatic void
1093147182Supskick_other_cpu(int pri,int cpuid)
1094147182Sups{
1095147182Sups	struct pcpu * pcpu = pcpu_find(cpuid);
1096147182Sups	int cpri = pcpu->pc_curthread->td_priority;
1097147182Sups
1098147182Sups	if (idle_cpus_mask & pcpu->pc_cpumask) {
1099147182Sups		forward_wakeups_delivered++;
1100147182Sups		ipi_selected(pcpu->pc_cpumask, IPI_AST);
1101147182Sups		return;
1102147182Sups	}
1103147182Sups
1104147182Sups	if (pri >= cpri)
1105147182Sups		return;
1106147182Sups
1107147182Sups#if defined(IPI_PREEMPTION) && defined(PREEMPTION)
1108147182Sups#if !defined(FULL_PREEMPTION)
1109147182Sups	if (pri <= PRI_MAX_ITHD)
1110147182Sups#endif /* ! FULL_PREEMPTION */
1111147182Sups	{
1112147182Sups		ipi_selected(pcpu->pc_cpumask, IPI_PREEMPT);
1113147182Sups		return;
1114147182Sups	}
1115147182Sups#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
1116147182Sups
1117147182Sups	pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
1118147182Sups	ipi_selected( pcpu->pc_cpumask , IPI_AST);
1119147182Sups	return;
1120147182Sups}
1121147182Sups#endif /* SMP */
1122147182Sups
1123104964Sjeffvoid
1124134586Sjuliansched_add(struct thread *td, int flags)
1125147182Sups#ifdef SMP
1126104964Sjeff{
1127121127Sjeff	struct kse *ke;
1128134591Sjulian	int forwarded = 0;
1129134591Sjulian	int cpu;
1130147182Sups	int single_cpu = 0;
1131121127Sjeff
1132121127Sjeff	ke = td->td_kse;
1133104964Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1134104964Sjeff	KASSERT(ke->ke_state != KES_ONRUNQ,
1135124957Sjeff	    ("sched_add: kse %p (%s) already in run queue", ke,
1136104964Sjeff	    ke->ke_proc->p_comm));
1137104964Sjeff	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
1138124957Sjeff	    ("sched_add: process swapped out"));
1139139317Sjeff	CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
1140139317Sjeff	    td, td->td_proc->p_comm, td->td_priority, curthread,
1141139317Sjeff	    curthread->td_proc->p_comm);
1142131481Sjhb
1143147182Sups
1144147182Sups	if (td->td_pinned != 0) {
1145147182Sups		cpu = td->td_lastcpu;
1146147182Sups		ke->ke_runq = &runq_pcpu[cpu];
1147147182Sups		single_cpu = 1;
1148147182Sups		CTR3(KTR_RUNQ,
1149147182Sups		    "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
1150147182Sups	} else if ((ke)->ke_flags & KEF_BOUND) {
1151147182Sups		/* Find CPU from bound runq */
1152147182Sups		KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq"));
1153147190Sups		cpu = ke->ke_runq - &runq_pcpu[0];
1154147182Sups		single_cpu = 1;
1155147182Sups		CTR3(KTR_RUNQ,
1156147182Sups		    "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu);
1157147182Sups	} else {
1158134591Sjulian		CTR2(KTR_RUNQ,
1159134591Sjulian		    "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td);
1160134591Sjulian		cpu = NOCPU;
1161124955Sjeff		ke->ke_runq = &runq;
1162147182Sups	}
1163147182Sups
1164147190Sups	if (single_cpu && (cpu != PCPU_GET(cpuid))) {
1165147182Sups	        kick_other_cpu(td->td_priority,cpu);
1166124955Sjeff	} else {
1167147182Sups
1168147190Sups		if (!single_cpu) {
1169147182Sups			cpumask_t me = PCPU_GET(cpumask);
1170147182Sups			int idle = idle_cpus_mask & me;
1171147182Sups
1172147190Sups			if (!idle && ((flags & SRQ_INTR) == 0) &&
1173147190Sups			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
1174147182Sups				forwarded = forward_wakeup(cpu);
1175147182Sups		}
1176147182Sups
1177147182Sups		if (!forwarded) {
1178147190Sups			if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td))
1179147182Sups				return;
1180147182Sups			else
1181147182Sups				maybe_resched(td);
1182147182Sups		}
1183124955Sjeff	}
1184147182Sups
1185147182Sups	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1186147182Sups		sched_load_add();
1187147182Sups	SLOT_USE(td->td_ksegrp);
1188147182Sups	runq_add(ke->ke_runq, ke, flags);
1189147182Sups	ke->ke_state = KES_ONRUNQ;
1190147182Sups}
1191147182Sups#else /* SMP */
1192147182Sups{
1193147182Sups	struct kse *ke;
1194147182Sups	ke = td->td_kse;
1195147182Sups	mtx_assert(&sched_lock, MA_OWNED);
1196147182Sups	KASSERT(ke->ke_state != KES_ONRUNQ,
1197147182Sups	    ("sched_add: kse %p (%s) already in run queue", ke,
1198147182Sups	    ke->ke_proc->p_comm));
1199147182Sups	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
1200147182Sups	    ("sched_add: process swapped out"));
1201147182Sups	CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
1202147182Sups	    td, td->td_proc->p_comm, td->td_priority, curthread,
1203147182Sups	    curthread->td_proc->p_comm);
1204133396Sjulian	CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td);
1205124955Sjeff	ke->ke_runq = &runq;
1206134591Sjulian
1207134591Sjulian	/*
1208134591Sjulian	 * If we are yielding (on the way out anyhow)
1209134591Sjulian	 * or the thread being saved is US,
1210134591Sjulian	 * then don't try be smart about preemption
1211134591Sjulian	 * or kicking off another CPU
1212134591Sjulian	 * as it won't help and may hinder.
1213134591Sjulian	 * In the YIEDLING case, we are about to run whoever is
1214134591Sjulian	 * being put in the queue anyhow, and in the
1215134591Sjulian	 * OURSELF case, we are puting ourself on the run queue
1216134591Sjulian	 * which also only happens when we are about to yield.
1217134591Sjulian	 */
1218134591Sjulian	if((flags & SRQ_YIELDING) == 0) {
1219147182Sups		if (maybe_preempt(td))
1220147182Sups			return;
1221147182Sups	}
1222125295Sjeff	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1223139317Sjeff		sched_load_add();
1224136170Sjulian	SLOT_USE(td->td_ksegrp);
1225136170Sjulian	runq_add(ke->ke_runq, ke, flags);
1226133520Sjulian	ke->ke_state = KES_ONRUNQ;
1227132118Sjhb	maybe_resched(td);
1228104964Sjeff}
1229147182Sups#endif /* SMP */
1230147182Sups
1231104964Sjeffvoid
1232121127Sjeffsched_rem(struct thread *td)
1233104964Sjeff{
1234121127Sjeff	struct kse *ke;
1235121127Sjeff
1236121127Sjeff	ke = td->td_kse;
1237104964Sjeff	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
1238124957Sjeff	    ("sched_rem: process swapped out"));
1239124957Sjeff	KASSERT((ke->ke_state == KES_ONRUNQ),
1240124957Sjeff	    ("sched_rem: KSE not on run queue"));
1241104964Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1242139317Sjeff	CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)",
1243139317Sjeff	    td, td->td_proc->p_comm, td->td_priority, curthread,
1244139317Sjeff	    curthread->td_proc->p_comm);
1245104964Sjeff
1246125295Sjeff	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1247139317Sjeff		sched_load_rem();
1248136167Sjulian	SLOT_RELEASE(td->td_ksegrp);
1249134145Sjulian	runq_remove(ke->ke_runq, ke);
1250124955Sjeff
1251104964Sjeff	ke->ke_state = KES_THREAD;
1252104964Sjeff}
1253104964Sjeff
1254135295Sjulian/*
1255135295Sjulian * Select threads to run.
1256135295Sjulian * Notice that the running threads still consume a slot.
1257135295Sjulian */
1258104964Sjeffstruct kse *
1259104964Sjeffsched_choose(void)
1260104964Sjeff{
1261104964Sjeff	struct kse *ke;
1262124955Sjeff	struct runq *rq;
1263104964Sjeff
1264124955Sjeff#ifdef SMP
1265124955Sjeff	struct kse *kecpu;
1266124955Sjeff
1267124955Sjeff	rq = &runq;
1268104964Sjeff	ke = runq_choose(&runq);
1269124955Sjeff	kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
1270104964Sjeff
1271124955Sjeff	if (ke == NULL ||
1272124955Sjeff	    (kecpu != NULL &&
1273124955Sjeff	     kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) {
1274133396Sjulian		CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu,
1275124955Sjeff		     PCPU_GET(cpuid));
1276124955Sjeff		ke = kecpu;
1277124955Sjeff		rq = &runq_pcpu[PCPU_GET(cpuid)];
1278124955Sjeff	} else {
1279133396Sjulian		CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke);
1280124955Sjeff	}
1281124955Sjeff
1282124955Sjeff#else
1283124955Sjeff	rq = &runq;
1284124955Sjeff	ke = runq_choose(&runq);
1285124955Sjeff#endif
1286124955Sjeff
1287104964Sjeff	if (ke != NULL) {
1288124955Sjeff		runq_remove(rq, ke);
1289104964Sjeff		ke->ke_state = KES_THREAD;
1290104964Sjeff
1291104964Sjeff		KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
1292124957Sjeff		    ("sched_choose: process swapped out"));
1293104964Sjeff	}
1294104964Sjeff	return (ke);
1295104964Sjeff}
1296104964Sjeff
1297104964Sjeffvoid
1298104964Sjeffsched_userret(struct thread *td)
1299104964Sjeff{
1300104964Sjeff	struct ksegrp *kg;
1301104964Sjeff	/*
1302104964Sjeff	 * XXX we cheat slightly on the locking here to avoid locking in
1303104964Sjeff	 * the usual case.  Setting td_priority here is essentially an
1304104964Sjeff	 * incomplete workaround for not setting it properly elsewhere.
1305104964Sjeff	 * Now that some interrupt handlers are threads, not setting it
1306104964Sjeff	 * properly elsewhere can clobber it in the window between setting
1307104964Sjeff	 * it here and returning to user mode, so don't waste time setting
1308104964Sjeff	 * it perfectly here.
1309104964Sjeff	 */
1310139453Sjhb	KASSERT((td->td_flags & TDF_BORROWING) == 0,
1311139453Sjhb	    ("thread with borrowed priority returning to userland"));
1312104964Sjeff	kg = td->td_ksegrp;
1313104964Sjeff	if (td->td_priority != kg->kg_user_pri) {
1314104964Sjeff		mtx_lock_spin(&sched_lock);
1315104964Sjeff		td->td_priority = kg->kg_user_pri;
1316139453Sjhb		td->td_base_pri = kg->kg_user_pri;
1317104964Sjeff		mtx_unlock_spin(&sched_lock);
1318104964Sjeff	}
1319104964Sjeff}
1320107126Sjeff
1321124955Sjeffvoid
1322124955Sjeffsched_bind(struct thread *td, int cpu)
1323124955Sjeff{
1324124955Sjeff	struct kse *ke;
1325124955Sjeff
1326124955Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1327124955Sjeff	KASSERT(TD_IS_RUNNING(td),
1328124955Sjeff	    ("sched_bind: cannot bind non-running thread"));
1329124955Sjeff
1330124955Sjeff	ke = td->td_kse;
1331124955Sjeff
1332124955Sjeff	ke->ke_flags |= KEF_BOUND;
1333124955Sjeff#ifdef SMP
1334124955Sjeff	ke->ke_runq = &runq_pcpu[cpu];
1335124955Sjeff	if (PCPU_GET(cpuid) == cpu)
1336124955Sjeff		return;
1337124955Sjeff
1338124955Sjeff	ke->ke_state = KES_THREAD;
1339124955Sjeff
1340131473Sjhb	mi_switch(SW_VOL, NULL);
1341124955Sjeff#endif
1342124955Sjeff}
1343124955Sjeff
1344124955Sjeffvoid
1345124955Sjeffsched_unbind(struct thread* td)
1346124955Sjeff{
1347124955Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1348124955Sjeff	td->td_kse->ke_flags &= ~KEF_BOUND;
1349124955Sjeff}
1350124955Sjeff
1351107126Sjeffint
1352145256Sjkoshysched_is_bound(struct thread *td)
1353145256Sjkoshy{
1354145256Sjkoshy	mtx_assert(&sched_lock, MA_OWNED);
1355145256Sjkoshy	return (td->td_kse->ke_flags & KEF_BOUND);
1356145256Sjkoshy}
1357145256Sjkoshy
1358159630Sdavidxuvoid
1359159630Sdavidxusched_relinquish(struct thread *td)
1360159630Sdavidxu{
1361159630Sdavidxu	struct ksegrp *kg;
1362159630Sdavidxu
1363159630Sdavidxu	kg = td->td_ksegrp;
1364159630Sdavidxu	mtx_lock_spin(&sched_lock);
1365159630Sdavidxu	if (kg->kg_pri_class == PRI_TIMESHARE)
1366159630Sdavidxu		sched_prio(td, PRI_MAX_TIMESHARE);
1367159630Sdavidxu	mi_switch(SW_VOL, NULL);
1368159630Sdavidxu	mtx_unlock_spin(&sched_lock);
1369159630Sdavidxu}
1370159630Sdavidxu
1371145256Sjkoshyint
1372125288Sjeffsched_load(void)
1373125288Sjeff{
1374125288Sjeff	return (sched_tdcnt);
1375125288Sjeff}
1376125288Sjeff
1377125288Sjeffint
1378107126Sjeffsched_sizeof_ksegrp(void)
1379107126Sjeff{
1380134791Sjulian	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
1381107126Sjeff}
1382159630Sdavidxu
1383107126Sjeffint
1384107126Sjeffsched_sizeof_proc(void)
1385107126Sjeff{
1386107126Sjeff	return (sizeof(struct proc));
1387107126Sjeff}
1388159630Sdavidxu
1389107126Sjeffint
1390107126Sjeffsched_sizeof_thread(void)
1391107126Sjeff{
1392134791Sjulian	return (sizeof(struct thread) + sizeof(struct kse));
1393107126Sjeff}
1394107137Sjeff
1395107137Sjefffixpt_t
1396121127Sjeffsched_pctcpu(struct thread *td)
1397107137Sjeff{
1398121147Sjeff	struct kse *ke;
1399121147Sjeff
1400121147Sjeff	ke = td->td_kse;
1401134791Sjulian	return (ke->ke_pctcpu);
1402121147Sjeff
1403121147Sjeff	return (0);
1404107137Sjeff}
1405159570Sdavidxu
1406159570Sdavidxuvoid
1407159570Sdavidxusched_tick(void)
1408159570Sdavidxu{
1409159570Sdavidxu}
1410134791Sjulian#define KERN_SWITCH_INCLUDE 1
1411134791Sjulian#include "kern/kern_switch.c"
1412