sched_4bsd.c revision 177253
1104964Sjeff/*-
2104964Sjeff * Copyright (c) 1982, 1986, 1990, 1991, 1993
3104964Sjeff *	The Regents of the University of California.  All rights reserved.
4104964Sjeff * (c) UNIX System Laboratories, Inc.
5104964Sjeff * All or some portions of this file are derived from material licensed
6104964Sjeff * to the University of California by American Telephone and Telegraph
7104964Sjeff * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8104964Sjeff * the permission of UNIX System Laboratories, Inc.
9104964Sjeff *
10104964Sjeff * Redistribution and use in source and binary forms, with or without
11104964Sjeff * modification, are permitted provided that the following conditions
12104964Sjeff * are met:
13104964Sjeff * 1. Redistributions of source code must retain the above copyright
14104964Sjeff *    notice, this list of conditions and the following disclaimer.
15104964Sjeff * 2. Redistributions in binary form must reproduce the above copyright
16104964Sjeff *    notice, this list of conditions and the following disclaimer in the
17104964Sjeff *    documentation and/or other materials provided with the distribution.
18104964Sjeff * 4. Neither the name of the University nor the names of its contributors
19104964Sjeff *    may be used to endorse or promote products derived from this software
20104964Sjeff *    without specific prior written permission.
21104964Sjeff *
22104964Sjeff * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23104964Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24104964Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25104964Sjeff * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26104964Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27104964Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28104964Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29104964Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30104964Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31104964Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32104964Sjeff * SUCH DAMAGE.
33104964Sjeff */
34104964Sjeff
35116182Sobrien#include <sys/cdefs.h>
36116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sched_4bsd.c 177253 2008-03-16 10:58:09Z rwatson $");
37116182Sobrien
38147565Speter#include "opt_hwpmc_hooks.h"
39147565Speter
40104964Sjeff#include <sys/param.h>
41104964Sjeff#include <sys/systm.h>
42176750Smarcel#include <sys/cpuset.h>
43104964Sjeff#include <sys/kernel.h>
44104964Sjeff#include <sys/ktr.h>
45104964Sjeff#include <sys/lock.h>
46123871Sjhb#include <sys/kthread.h>
47104964Sjeff#include <sys/mutex.h>
48104964Sjeff#include <sys/proc.h>
49104964Sjeff#include <sys/resourcevar.h>
50104964Sjeff#include <sys/sched.h>
51104964Sjeff#include <sys/smp.h>
52104964Sjeff#include <sys/sysctl.h>
53104964Sjeff#include <sys/sx.h>
54139453Sjhb#include <sys/turnstile.h>
55161599Sdavidxu#include <sys/umtx.h>
56160039Sobrien#include <machine/pcb.h>
57134689Sjulian#include <machine/smp.h>
58104964Sjeff
59145256Sjkoshy#ifdef HWPMC_HOOKS
60145256Sjkoshy#include <sys/pmckern.h>
61145256Sjkoshy#endif
62145256Sjkoshy
63107135Sjeff/*
64107135Sjeff * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
65107135Sjeff * the range 100-256 Hz (approximately).
66107135Sjeff */
67107135Sjeff#define	ESTCPULIM(e) \
68107135Sjeff    min((e), INVERSE_ESTCPU_WEIGHT * (NICE_WEIGHT * (PRIO_MAX - PRIO_MIN) - \
69107135Sjeff    RQ_PPQ) + INVERSE_ESTCPU_WEIGHT - 1)
70122355Sbde#ifdef SMP
71122355Sbde#define	INVERSE_ESTCPU_WEIGHT	(8 * smp_cpus)
72122355Sbde#else
73107135Sjeff#define	INVERSE_ESTCPU_WEIGHT	8	/* 1 / (priorities per estcpu level). */
74122355Sbde#endif
75107135Sjeff#define	NICE_WEIGHT		1	/* Priorities per nice level. */
76107135Sjeff
77134791Sjulian/*
78163709Sjb * The schedulable entity that runs a context.
79164936Sjulian * This is  an extension to the thread structure and is tailored to
80164936Sjulian * the requirements of this scheduler
81163709Sjb */
82164936Sjulianstruct td_sched {
83164936Sjulian	TAILQ_ENTRY(td_sched) ts_procq;	/* (j/z) Run queue. */
84164936Sjulian	struct thread	*ts_thread;	/* (*) Active associated thread. */
85164936Sjulian	fixpt_t		ts_pctcpu;	/* (j) %cpu during p_swtime. */
86164936Sjulian	u_char		ts_rqindex;	/* (j) Run queue index. */
87164936Sjulian	int		ts_cpticks;	/* (j) Ticks of cpu time. */
88172264Sjeff	int		ts_slptime;	/* (j) Seconds !RUNNING. */
89164936Sjulian	struct runq	*ts_runq;	/* runq the thread is currently on */
90109145Sjeff};
91109145Sjeff
92134791Sjulian/* flags kept in td_flags */
93164936Sjulian#define TDF_DIDRUN	TDF_SCHED0	/* thread actually ran. */
94164936Sjulian#define TDF_EXIT	TDF_SCHED1	/* thread is being killed. */
95134791Sjulian#define TDF_BOUND	TDF_SCHED2
96134791Sjulian
97164936Sjulian#define ts_flags	ts_thread->td_flags
98164936Sjulian#define TSF_DIDRUN	TDF_DIDRUN /* thread actually ran. */
99164936Sjulian#define TSF_EXIT	TDF_EXIT /* thread is being killed. */
100164936Sjulian#define TSF_BOUND	TDF_BOUND /* stuck to one CPU */
101134791Sjulian
102164936Sjulian#define SKE_RUNQ_PCPU(ts)						\
103164936Sjulian    ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq)
104124955Sjeff
105164936Sjulianstatic struct td_sched td_sched0;
106171488Sjeffstruct mtx sched_lock;
107134791Sjulian
108125288Sjeffstatic int	sched_tdcnt;	/* Total runnable threads in the system. */
109104964Sjeffstatic int	sched_quantum;	/* Roundrobin scheduling quantum in ticks. */
110112535Smux#define	SCHED_QUANTUM	(hz / 10)	/* Default sched quantum */
111104964Sjeff
112124955Sjeffstatic void	setup_runqs(void);
113123871Sjhbstatic void	schedcpu(void);
114124955Sjeffstatic void	schedcpu_thread(void);
115139453Sjhbstatic void	sched_priority(struct thread *td, u_char prio);
116104964Sjeffstatic void	sched_setup(void *dummy);
117104964Sjeffstatic void	maybe_resched(struct thread *td);
118163709Sjbstatic void	updatepri(struct thread *td);
119163709Sjbstatic void	resetpriority(struct thread *td);
120163709Sjbstatic void	resetpriority_thread(struct thread *td);
121134694Sjulian#ifdef SMP
122134688Sjulianstatic int	forward_wakeup(int  cpunum);
123134694Sjulian#endif
124104964Sjeff
125124955Sjeffstatic struct kproc_desc sched_kp = {
126124955Sjeff        "schedcpu",
127124955Sjeff        schedcpu_thread,
128124955Sjeff        NULL
129124955Sjeff};
130177253SrwatsonSYSINIT(schedcpu, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, kproc_start,
131177253Srwatson    &sched_kp);
132177253SrwatsonSYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
133104964Sjeff
134104964Sjeff/*
135104964Sjeff * Global run queue.
136104964Sjeff */
137104964Sjeffstatic struct runq runq;
138104964Sjeff
139124955Sjeff#ifdef SMP
140124955Sjeff/*
141124955Sjeff * Per-CPU run queues
142124955Sjeff */
143124955Sjeffstatic struct runq runq_pcpu[MAXCPU];
144124955Sjeff#endif
145124955Sjeff
146124955Sjeffstatic void
147124955Sjeffsetup_runqs(void)
148124955Sjeff{
149124955Sjeff#ifdef SMP
150124955Sjeff	int i;
151124955Sjeff
152124955Sjeff	for (i = 0; i < MAXCPU; ++i)
153124955Sjeff		runq_init(&runq_pcpu[i]);
154124955Sjeff#endif
155124955Sjeff
156124955Sjeff	runq_init(&runq);
157124955Sjeff}
158124955Sjeff
159104964Sjeffstatic int
160104964Sjeffsysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
161104964Sjeff{
162104964Sjeff	int error, new_val;
163104964Sjeff
164104964Sjeff	new_val = sched_quantum * tick;
165104964Sjeff	error = sysctl_handle_int(oidp, &new_val, 0, req);
166104964Sjeff        if (error != 0 || req->newptr == NULL)
167104964Sjeff		return (error);
168104964Sjeff	if (new_val < tick)
169104964Sjeff		return (EINVAL);
170104964Sjeff	sched_quantum = new_val / tick;
171104964Sjeff	hogticks = 2 * sched_quantum;
172104964Sjeff	return (0);
173104964Sjeff}
174104964Sjeff
175132589SscottlSYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD, 0, "Scheduler");
176130881Sscottl
177132589SscottlSYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0,
178132589Sscottl    "Scheduler name");
179130881Sscottl
180132589SscottlSYSCTL_PROC(_kern_sched, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW,
181132589Sscottl    0, sizeof sched_quantum, sysctl_kern_quantum, "I",
182132589Sscottl    "Roundrobin scheduling quantum in microseconds");
183104964Sjeff
184134693Sjulian#ifdef SMP
185134688Sjulian/* Enable forwarding of wakeups to all other cpus */
186134688SjulianSYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP");
187134688Sjulian
188134792Sjulianstatic int forward_wakeup_enabled = 1;
189134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
190134688Sjulian	   &forward_wakeup_enabled, 0,
191134688Sjulian	   "Forwarding of wakeup to idle CPUs");
192134688Sjulian
193134688Sjulianstatic int forward_wakeups_requested = 0;
194134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
195134688Sjulian	   &forward_wakeups_requested, 0,
196134688Sjulian	   "Requests for Forwarding of wakeup to idle CPUs");
197134688Sjulian
198134688Sjulianstatic int forward_wakeups_delivered = 0;
199134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
200134688Sjulian	   &forward_wakeups_delivered, 0,
201134688Sjulian	   "Completed Forwarding of wakeup to idle CPUs");
202134688Sjulian
203134792Sjulianstatic int forward_wakeup_use_mask = 1;
204134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
205134688Sjulian	   &forward_wakeup_use_mask, 0,
206134688Sjulian	   "Use the mask of idle cpus");
207134688Sjulian
208134688Sjulianstatic int forward_wakeup_use_loop = 0;
209134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
210134688Sjulian	   &forward_wakeup_use_loop, 0,
211134688Sjulian	   "Use a loop to find idle cpus");
212134688Sjulian
213134688Sjulianstatic int forward_wakeup_use_single = 0;
214134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
215134688Sjulian	   &forward_wakeup_use_single, 0,
216134688Sjulian	   "Only signal one idle cpu");
217134688Sjulian
218134688Sjulianstatic int forward_wakeup_use_htt = 0;
219134688SjulianSYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
220134688Sjulian	   &forward_wakeup_use_htt, 0,
221134688Sjulian	   "account for htt");
222135051Sjulian
223134693Sjulian#endif
224164936Sjulian#if 0
225135051Sjulianstatic int sched_followon = 0;
226135051SjulianSYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
227135051Sjulian	   &sched_followon, 0,
228135051Sjulian	   "allow threads to share a quantum");
229163709Sjb#endif
230135051Sjulian
231139317Sjeffstatic __inline void
232139317Sjeffsched_load_add(void)
233139317Sjeff{
234139317Sjeff	sched_tdcnt++;
235139317Sjeff	CTR1(KTR_SCHED, "global load: %d", sched_tdcnt);
236139317Sjeff}
237139317Sjeff
238139317Sjeffstatic __inline void
239139317Sjeffsched_load_rem(void)
240139317Sjeff{
241139317Sjeff	sched_tdcnt--;
242139317Sjeff	CTR1(KTR_SCHED, "global load: %d", sched_tdcnt);
243139317Sjeff}
244104964Sjeff/*
245104964Sjeff * Arrange to reschedule if necessary, taking the priorities and
246104964Sjeff * schedulers into account.
247104964Sjeff */
248104964Sjeffstatic void
249104964Sjeffmaybe_resched(struct thread *td)
250104964Sjeff{
251104964Sjeff
252170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
253134791Sjulian	if (td->td_priority < curthread->td_priority)
254111032Sjulian		curthread->td_flags |= TDF_NEEDRESCHED;
255104964Sjeff}
256104964Sjeff
257104964Sjeff/*
258104964Sjeff * Constants for digital decay and forget:
259163709Sjb *	90% of (td_estcpu) usage in 5 * loadav time
260164936Sjulian *	95% of (ts_pctcpu) usage in 60 seconds (load insensitive)
261104964Sjeff *          Note that, as ps(1) mentions, this can let percentages
262104964Sjeff *          total over 100% (I've seen 137.9% for 3 processes).
263104964Sjeff *
264163709Sjb * Note that schedclock() updates td_estcpu and p_cpticks asynchronously.
265104964Sjeff *
266163709Sjb * We wish to decay away 90% of td_estcpu in (5 * loadavg) seconds.
267104964Sjeff * That is, the system wants to compute a value of decay such
268104964Sjeff * that the following for loop:
269104964Sjeff * 	for (i = 0; i < (5 * loadavg); i++)
270163709Sjb * 		td_estcpu *= decay;
271104964Sjeff * will compute
272163709Sjb * 	td_estcpu *= 0.1;
273104964Sjeff * for all values of loadavg:
274104964Sjeff *
275104964Sjeff * Mathematically this loop can be expressed by saying:
276104964Sjeff * 	decay ** (5 * loadavg) ~= .1
277104964Sjeff *
278104964Sjeff * The system computes decay as:
279104964Sjeff * 	decay = (2 * loadavg) / (2 * loadavg + 1)
280104964Sjeff *
281104964Sjeff * We wish to prove that the system's computation of decay
282104964Sjeff * will always fulfill the equation:
283104964Sjeff * 	decay ** (5 * loadavg) ~= .1
284104964Sjeff *
285104964Sjeff * If we compute b as:
286104964Sjeff * 	b = 2 * loadavg
287104964Sjeff * then
288104964Sjeff * 	decay = b / (b + 1)
289104964Sjeff *
290104964Sjeff * We now need to prove two things:
291104964Sjeff *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
292104964Sjeff *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
293104964Sjeff *
294104964Sjeff * Facts:
295104964Sjeff *         For x close to zero, exp(x) =~ 1 + x, since
296104964Sjeff *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
297104964Sjeff *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
298104964Sjeff *         For x close to zero, ln(1+x) =~ x, since
299104964Sjeff *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
300104964Sjeff *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
301104964Sjeff *         ln(.1) =~ -2.30
302104964Sjeff *
303104964Sjeff * Proof of (1):
304104964Sjeff *    Solve (factor)**(power) =~ .1 given power (5*loadav):
305104964Sjeff *	solving for factor,
306104964Sjeff *      ln(factor) =~ (-2.30/5*loadav), or
307104964Sjeff *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
308104964Sjeff *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
309104964Sjeff *
310104964Sjeff * Proof of (2):
311104964Sjeff *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
312104964Sjeff *	solving for power,
313104964Sjeff *      power*ln(b/(b+1)) =~ -2.30, or
314104964Sjeff *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
315104964Sjeff *
316104964Sjeff * Actual power values for the implemented algorithm are as follows:
317104964Sjeff *      loadav: 1       2       3       4
318104964Sjeff *      power:  5.68    10.32   14.94   19.55
319104964Sjeff */
320104964Sjeff
321104964Sjeff/* calculations for digital decay to forget 90% of usage in 5*loadav sec */
322104964Sjeff#define	loadfactor(loadav)	(2 * (loadav))
323104964Sjeff#define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
324104964Sjeff
325164936Sjulian/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
326104964Sjeffstatic fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
327158082SjmgSYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
328104964Sjeff
329104964Sjeff/*
330104964Sjeff * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
331104964Sjeff * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
332104964Sjeff * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
333104964Sjeff *
334104964Sjeff * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
335104964Sjeff *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
336104964Sjeff *
337104964Sjeff * If you don't want to bother with the faster/more-accurate formula, you
338104964Sjeff * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
339104964Sjeff * (more general) method of calculating the %age of CPU used by a process.
340104964Sjeff */
341104964Sjeff#define	CCPU_SHIFT	11
342104964Sjeff
343104964Sjeff/*
344104964Sjeff * Recompute process priorities, every hz ticks.
345104964Sjeff * MP-safe, called without the Giant mutex.
346104964Sjeff */
347104964Sjeff/* ARGSUSED */
348104964Sjeffstatic void
349123871Sjhbschedcpu(void)
350104964Sjeff{
351104964Sjeff	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
352104964Sjeff	struct thread *td;
353104964Sjeff	struct proc *p;
354164936Sjulian	struct td_sched *ts;
355118972Sjhb	int awake, realstathz;
356104964Sjeff
357104964Sjeff	realstathz = stathz ? stathz : hz;
358104964Sjeff	sx_slock(&allproc_lock);
359104964Sjeff	FOREACH_PROC_IN_SYSTEM(p) {
360170293Sjeff		PROC_SLOCK(p);
361163709Sjb		FOREACH_THREAD_IN_PROC(p, td) {
362104964Sjeff			awake = 0;
363170293Sjeff			thread_lock(td);
364164936Sjulian			ts = td->td_sched;
365163709Sjb			/*
366163709Sjb			 * Increment sleep time (if sleeping).  We
367163709Sjb			 * ignore overflow, as above.
368163709Sjb			 */
369163709Sjb			/*
370164936Sjulian			 * The td_sched slptimes are not touched in wakeup
371164936Sjulian			 * because the thread may not HAVE everything in
372164936Sjulian			 * memory? XXX I think this is out of date.
373163709Sjb			 */
374166188Sjeff			if (TD_ON_RUNQ(td)) {
375163709Sjb				awake = 1;
376164936Sjulian				ts->ts_flags &= ~TSF_DIDRUN;
377166188Sjeff			} else if (TD_IS_RUNNING(td)) {
378163709Sjb				awake = 1;
379164936Sjulian				/* Do not clear TSF_DIDRUN */
380164936Sjulian			} else if (ts->ts_flags & TSF_DIDRUN) {
381163709Sjb				awake = 1;
382164936Sjulian				ts->ts_flags &= ~TSF_DIDRUN;
383163709Sjb			}
384163709Sjb
385163709Sjb			/*
386164936Sjulian			 * ts_pctcpu is only for ps and ttyinfo().
387163709Sjb			 */
388164936Sjulian			ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT;
389163709Sjb			/*
390164936Sjulian			 * If the td_sched has been idle the entire second,
391163709Sjb			 * stop recalculating its priority until
392163709Sjb			 * it wakes up.
393163709Sjb			 */
394164936Sjulian			if (ts->ts_cpticks != 0) {
395163709Sjb#if	(FSHIFT >= CCPU_SHIFT)
396164936Sjulian				ts->ts_pctcpu += (realstathz == 100)
397164936Sjulian				    ? ((fixpt_t) ts->ts_cpticks) <<
398164936Sjulian				    (FSHIFT - CCPU_SHIFT) :
399164936Sjulian				    100 * (((fixpt_t) ts->ts_cpticks)
400164936Sjulian				    << (FSHIFT - CCPU_SHIFT)) / realstathz;
401163709Sjb#else
402164936Sjulian				ts->ts_pctcpu += ((FSCALE - ccpu) *
403164936Sjulian				    (ts->ts_cpticks *
404164936Sjulian				    FSCALE / realstathz)) >> FSHIFT;
405163709Sjb#endif
406164936Sjulian				ts->ts_cpticks = 0;
407164267Sdavidxu			}
408104964Sjeff			/*
409163709Sjb			 * If there are ANY running threads in this process,
410104964Sjeff			 * then don't count it as sleeping.
411164936SjulianXXX  this is broken
412164936Sjulian
413104964Sjeff			 */
414104964Sjeff			if (awake) {
415172264Sjeff				if (ts->ts_slptime > 1) {
416104964Sjeff					/*
417104964Sjeff					 * In an ideal world, this should not
418104964Sjeff					 * happen, because whoever woke us
419104964Sjeff					 * up from the long sleep should have
420104964Sjeff					 * unwound the slptime and reset our
421104964Sjeff					 * priority before we run at the stale
422104964Sjeff					 * priority.  Should KASSERT at some
423104964Sjeff					 * point when all the cases are fixed.
424104964Sjeff					 */
425163709Sjb					updatepri(td);
426163709Sjb				}
427172264Sjeff				ts->ts_slptime = 0;
428163709Sjb			} else
429172264Sjeff				ts->ts_slptime++;
430172264Sjeff			if (ts->ts_slptime > 1) {
431170293Sjeff				thread_unlock(td);
432163709Sjb				continue;
433170293Sjeff			}
434163709Sjb			td->td_estcpu = decay_cpu(loadfac, td->td_estcpu);
435163709Sjb		      	resetpriority(td);
436163709Sjb			resetpriority_thread(td);
437170293Sjeff			thread_unlock(td);
438163709Sjb		} /* end of thread loop */
439170293Sjeff		PROC_SUNLOCK(p);
440104964Sjeff	} /* end of process loop */
441104964Sjeff	sx_sunlock(&allproc_lock);
442104964Sjeff}
443104964Sjeff
444104964Sjeff/*
445123871Sjhb * Main loop for a kthread that executes schedcpu once a second.
446123871Sjhb */
447123871Sjhbstatic void
448124955Sjeffschedcpu_thread(void)
449123871Sjhb{
450123871Sjhb
451123871Sjhb	for (;;) {
452123871Sjhb		schedcpu();
453167086Sjhb		pause("-", hz);
454123871Sjhb	}
455123871Sjhb}
456123871Sjhb
457123871Sjhb/*
458104964Sjeff * Recalculate the priority of a process after it has slept for a while.
459163709Sjb * For all load averages >= 1 and max td_estcpu of 255, sleeping for at
460163709Sjb * least six times the loadfactor will decay td_estcpu to zero.
461104964Sjeff */
462104964Sjeffstatic void
463163709Sjbupdatepri(struct thread *td)
464104964Sjeff{
465172264Sjeff	struct td_sched *ts;
466172264Sjeff	fixpt_t loadfac;
467172264Sjeff	unsigned int newcpu;
468104964Sjeff
469172264Sjeff	ts = td->td_sched;
470118972Sjhb	loadfac = loadfactor(averunnable.ldavg[0]);
471172264Sjeff	if (ts->ts_slptime > 5 * loadfac)
472163709Sjb		td->td_estcpu = 0;
473104964Sjeff	else {
474163709Sjb		newcpu = td->td_estcpu;
475172264Sjeff		ts->ts_slptime--;	/* was incremented in schedcpu() */
476172264Sjeff		while (newcpu && --ts->ts_slptime)
477104964Sjeff			newcpu = decay_cpu(loadfac, newcpu);
478163709Sjb		td->td_estcpu = newcpu;
479104964Sjeff	}
480104964Sjeff}
481104964Sjeff
482104964Sjeff/*
483104964Sjeff * Compute the priority of a process when running in user mode.
484104964Sjeff * Arrange to reschedule if the resulting priority is better
485104964Sjeff * than that of the current process.
486104964Sjeff */
487104964Sjeffstatic void
488163709Sjbresetpriority(struct thread *td)
489104964Sjeff{
490104964Sjeff	register unsigned int newpriority;
491104964Sjeff
492163709Sjb	if (td->td_pri_class == PRI_TIMESHARE) {
493163709Sjb		newpriority = PUSER + td->td_estcpu / INVERSE_ESTCPU_WEIGHT +
494163709Sjb		    NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN);
495104964Sjeff		newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
496104964Sjeff		    PRI_MAX_TIMESHARE);
497163709Sjb		sched_user_prio(td, newpriority);
498104964Sjeff	}
499104964Sjeff}
500104964Sjeff
501139453Sjhb/*
502164936Sjulian * Update the thread's priority when the associated process's user
503139453Sjhb * priority changes.
504139453Sjhb */
505139453Sjhbstatic void
506163709Sjbresetpriority_thread(struct thread *td)
507139453Sjhb{
508139453Sjhb
509139453Sjhb	/* Only change threads with a time sharing user priority. */
510139453Sjhb	if (td->td_priority < PRI_MIN_TIMESHARE ||
511139453Sjhb	    td->td_priority > PRI_MAX_TIMESHARE)
512139453Sjhb		return;
513139453Sjhb
514139453Sjhb	/* XXX the whole needresched thing is broken, but not silly. */
515139453Sjhb	maybe_resched(td);
516139453Sjhb
517163709Sjb	sched_prio(td, td->td_user_pri);
518139453Sjhb}
519139453Sjhb
520104964Sjeff/* ARGSUSED */
521104964Sjeffstatic void
522104964Sjeffsched_setup(void *dummy)
523104964Sjeff{
524124955Sjeff	setup_runqs();
525118972Sjhb
526104964Sjeff	if (sched_quantum == 0)
527104964Sjeff		sched_quantum = SCHED_QUANTUM;
528104964Sjeff	hogticks = 2 * sched_quantum;
529104964Sjeff
530125288Sjeff	/* Account for thread0. */
531139317Sjeff	sched_load_add();
532104964Sjeff}
533104964Sjeff
534104964Sjeff/* External interfaces start here */
535134791Sjulian/*
536134791Sjulian * Very early in the boot some setup of scheduler-specific
537145109Smaxim * parts of proc0 and of some scheduler resources needs to be done.
538134791Sjulian * Called from:
539134791Sjulian *  proc0_init()
540134791Sjulian */
541134791Sjulianvoid
542134791Sjulianschedinit(void)
543134791Sjulian{
544134791Sjulian	/*
545134791Sjulian	 * Set up the scheduler specific parts of proc0.
546134791Sjulian	 */
547134791Sjulian	proc0.p_sched = NULL; /* XXX */
548164936Sjulian	thread0.td_sched = &td_sched0;
549170293Sjeff	thread0.td_lock = &sched_lock;
550164936Sjulian	td_sched0.ts_thread = &thread0;
551171488Sjeff	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
552134791Sjulian}
553134791Sjulian
554104964Sjeffint
555104964Sjeffsched_runnable(void)
556104964Sjeff{
557124955Sjeff#ifdef SMP
558124955Sjeff	return runq_check(&runq) + runq_check(&runq_pcpu[PCPU_GET(cpuid)]);
559124955Sjeff#else
560124955Sjeff	return runq_check(&runq);
561124955Sjeff#endif
562104964Sjeff}
563104964Sjeff
564104964Sjeffint
565104964Sjeffsched_rr_interval(void)
566104964Sjeff{
567104964Sjeff	if (sched_quantum == 0)
568104964Sjeff		sched_quantum = SCHED_QUANTUM;
569104964Sjeff	return (sched_quantum);
570104964Sjeff}
571104964Sjeff
572104964Sjeff/*
573104964Sjeff * We adjust the priority of the current process.  The priority of
574104964Sjeff * a process gets worse as it accumulates CPU time.  The cpu usage
575163709Sjb * estimator (td_estcpu) is increased here.  resetpriority() will
576163709Sjb * compute a different priority each time td_estcpu increases by
577104964Sjeff * INVERSE_ESTCPU_WEIGHT
578104964Sjeff * (until MAXPRI is reached).  The cpu usage estimator ramps up
579104964Sjeff * quite quickly when the process is running (linearly), and decays
580104964Sjeff * away exponentially, at a rate which is proportionally slower when
581104964Sjeff * the system is busy.  The basic principle is that the system will
582104964Sjeff * 90% forget that the process used a lot of CPU time in 5 * loadav
583104964Sjeff * seconds.  This causes the system to favor processes which haven't
584104964Sjeff * run much recently, and to round-robin among other processes.
585104964Sjeff */
586104964Sjeffvoid
587121127Sjeffsched_clock(struct thread *td)
588104964Sjeff{
589164936Sjulian	struct td_sched *ts;
590104964Sjeff
591170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
592164936Sjulian	ts = td->td_sched;
593113356Sjeff
594164936Sjulian	ts->ts_cpticks++;
595163709Sjb	td->td_estcpu = ESTCPULIM(td->td_estcpu + 1);
596163709Sjb	if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
597163709Sjb		resetpriority(td);
598163709Sjb		resetpriority_thread(td);
599104964Sjeff	}
600173081Sjhb
601173081Sjhb	/*
602173081Sjhb	 * Force a context switch if the current thread has used up a full
603173081Sjhb	 * quantum (default quantum is 100ms).
604173081Sjhb	 */
605173081Sjhb	if (!TD_IS_IDLETHREAD(td) &&
606173081Sjhb	    ticks - PCPU_GET(switchticks) >= sched_quantum)
607173081Sjhb		td->td_flags |= TDF_NEEDRESCHED;
608104964Sjeff}
609118972Sjhb
610104964Sjeff/*
611104964Sjeff * charge childs scheduling cpu usage to parent.
612104964Sjeff */
613104964Sjeffvoid
614132372Sjuliansched_exit(struct proc *p, struct thread *td)
615104964Sjeff{
616163709Sjb
617163709Sjb	CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d",
618173600Sjulian	    td, td->td_name, td->td_priority);
619170293Sjeff	PROC_SLOCK_ASSERT(p, MA_OWNED);
620164936Sjulian	sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
621113356Sjeff}
622113356Sjeff
623113356Sjeffvoid
624164936Sjuliansched_exit_thread(struct thread *td, struct thread *child)
625113356Sjeff{
626113923Sjhb
627139317Sjeff	CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d",
628173600Sjulian	    child, child->td_name, child->td_priority);
629170293Sjeff	thread_lock(td);
630164936Sjulian	td->td_estcpu = ESTCPULIM(td->td_estcpu + child->td_estcpu);
631170293Sjeff	thread_unlock(td);
632170293Sjeff	mtx_lock_spin(&sched_lock);
633127894Sdfr	if ((child->td_proc->p_flag & P_NOLOAD) == 0)
634139317Sjeff		sched_load_rem();
635170293Sjeff	mtx_unlock_spin(&sched_lock);
636113356Sjeff}
637109145Sjeff
638113356Sjeffvoid
639134791Sjuliansched_fork(struct thread *td, struct thread *childtd)
640113356Sjeff{
641134791Sjulian	sched_fork_thread(td, childtd);
642113356Sjeff}
643113356Sjeff
644113356Sjeffvoid
645134791Sjuliansched_fork_thread(struct thread *td, struct thread *childtd)
646113356Sjeff{
647164936Sjulian	childtd->td_estcpu = td->td_estcpu;
648170293Sjeff	childtd->td_lock = &sched_lock;
649176750Smarcel	childtd->td_cpuset = cpuset_ref(td->td_cpuset);
650134791Sjulian	sched_newthread(childtd);
651104964Sjeff}
652104964Sjeff
653104964Sjeffvoid
654130551Sjuliansched_nice(struct proc *p, int nice)
655104964Sjeff{
656139453Sjhb	struct thread *td;
657113873Sjhb
658130551Sjulian	PROC_LOCK_ASSERT(p, MA_OWNED);
659170293Sjeff	PROC_SLOCK_ASSERT(p, MA_OWNED);
660130551Sjulian	p->p_nice = nice;
661163709Sjb	FOREACH_THREAD_IN_PROC(p, td) {
662170293Sjeff		thread_lock(td);
663163709Sjb		resetpriority(td);
664163709Sjb		resetpriority_thread(td);
665170293Sjeff		thread_unlock(td);
666163709Sjb	}
667104964Sjeff}
668104964Sjeff
669113356Sjeffvoid
670163709Sjbsched_class(struct thread *td, int class)
671113356Sjeff{
672170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
673163709Sjb	td->td_pri_class = class;
674113356Sjeff}
675113356Sjeff
676105127Sjulian/*
677105127Sjulian * Adjust the priority of a thread.
678105127Sjulian */
679139453Sjhbstatic void
680139453Sjhbsched_priority(struct thread *td, u_char prio)
681104964Sjeff{
682139317Sjeff	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
683173600Sjulian	    td, td->td_name, td->td_priority, prio, curthread,
684173600Sjulian	    curthread->td_name);
685104964Sjeff
686170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
687139453Sjhb	if (td->td_priority == prio)
688139453Sjhb		return;
689166188Sjeff	td->td_priority = prio;
690166188Sjeff	if (TD_ON_RUNQ(td) &&
691166188Sjeff	    td->td_sched->ts_rqindex != (prio / RQ_PPQ)) {
692166188Sjeff		sched_rem(td);
693166188Sjeff		sched_add(td, SRQ_BORING);
694104964Sjeff	}
695104964Sjeff}
696104964Sjeff
697139453Sjhb/*
698139453Sjhb * Update a thread's priority when it is lent another thread's
699139453Sjhb * priority.
700139453Sjhb */
701104964Sjeffvoid
702139453Sjhbsched_lend_prio(struct thread *td, u_char prio)
703139453Sjhb{
704139453Sjhb
705139453Sjhb	td->td_flags |= TDF_BORROWING;
706139453Sjhb	sched_priority(td, prio);
707139453Sjhb}
708139453Sjhb
709139453Sjhb/*
710139453Sjhb * Restore a thread's priority when priority propagation is
711139453Sjhb * over.  The prio argument is the minimum priority the thread
712139453Sjhb * needs to have to satisfy other possible priority lending
713139453Sjhb * requests.  If the thread's regulary priority is less
714139453Sjhb * important than prio the thread will keep a priority boost
715139453Sjhb * of prio.
716139453Sjhb */
717139453Sjhbvoid
718139453Sjhbsched_unlend_prio(struct thread *td, u_char prio)
719139453Sjhb{
720139453Sjhb	u_char base_pri;
721139453Sjhb
722139453Sjhb	if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
723139453Sjhb	    td->td_base_pri <= PRI_MAX_TIMESHARE)
724163709Sjb		base_pri = td->td_user_pri;
725139453Sjhb	else
726139453Sjhb		base_pri = td->td_base_pri;
727139453Sjhb	if (prio >= base_pri) {
728139453Sjhb		td->td_flags &= ~TDF_BORROWING;
729139453Sjhb		sched_prio(td, base_pri);
730139453Sjhb	} else
731139453Sjhb		sched_lend_prio(td, prio);
732139453Sjhb}
733139453Sjhb
734139453Sjhbvoid
735139453Sjhbsched_prio(struct thread *td, u_char prio)
736139453Sjhb{
737139453Sjhb	u_char oldprio;
738139453Sjhb
739139453Sjhb	/* First, update the base priority. */
740139453Sjhb	td->td_base_pri = prio;
741139453Sjhb
742139453Sjhb	/*
743139453Sjhb	 * If the thread is borrowing another thread's priority, don't ever
744139453Sjhb	 * lower the priority.
745139453Sjhb	 */
746139453Sjhb	if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
747139453Sjhb		return;
748139453Sjhb
749139453Sjhb	/* Change the real priority. */
750139453Sjhb	oldprio = td->td_priority;
751139453Sjhb	sched_priority(td, prio);
752139453Sjhb
753139453Sjhb	/*
754139453Sjhb	 * If the thread is on a turnstile, then let the turnstile update
755139453Sjhb	 * its state.
756139453Sjhb	 */
757139453Sjhb	if (TD_ON_LOCK(td) && oldprio != prio)
758139453Sjhb		turnstile_adjust(td, oldprio);
759139453Sjhb}
760139453Sjhb
761139453Sjhbvoid
762163709Sjbsched_user_prio(struct thread *td, u_char prio)
763161599Sdavidxu{
764161599Sdavidxu	u_char oldprio;
765161599Sdavidxu
766174536Sdavidxu	THREAD_LOCK_ASSERT(td, MA_OWNED);
767163709Sjb	td->td_base_user_pri = prio;
768164177Sdavidxu	if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio)
769164177Sdavidxu		return;
770163709Sjb	oldprio = td->td_user_pri;
771163709Sjb	td->td_user_pri = prio;
772161599Sdavidxu}
773161599Sdavidxu
774161599Sdavidxuvoid
775161599Sdavidxusched_lend_user_prio(struct thread *td, u_char prio)
776161599Sdavidxu{
777161599Sdavidxu	u_char oldprio;
778161599Sdavidxu
779174536Sdavidxu	THREAD_LOCK_ASSERT(td, MA_OWNED);
780161599Sdavidxu	td->td_flags |= TDF_UBORROWING;
781163709Sjb	oldprio = td->td_user_pri;
782163709Sjb	td->td_user_pri = prio;
783161599Sdavidxu}
784161599Sdavidxu
785161599Sdavidxuvoid
786161599Sdavidxusched_unlend_user_prio(struct thread *td, u_char prio)
787161599Sdavidxu{
788161599Sdavidxu	u_char base_pri;
789161599Sdavidxu
790174536Sdavidxu	THREAD_LOCK_ASSERT(td, MA_OWNED);
791163709Sjb	base_pri = td->td_base_user_pri;
792161599Sdavidxu	if (prio >= base_pri) {
793161599Sdavidxu		td->td_flags &= ~TDF_UBORROWING;
794163709Sjb		sched_user_prio(td, base_pri);
795174536Sdavidxu	} else {
796161599Sdavidxu		sched_lend_user_prio(td, prio);
797174536Sdavidxu	}
798161599Sdavidxu}
799161599Sdavidxu
800161599Sdavidxuvoid
801177085Sjeffsched_sleep(struct thread *td, int pri)
802104964Sjeff{
803113923Sjhb
804170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
805172264Sjeff	td->td_slptick = ticks;
806172264Sjeff	td->td_sched->ts_slptime = 0;
807177085Sjeff	if (pri)
808177085Sjeff		sched_prio(td, pri);
809177085Sjeff	if (TD_IS_SUSPENDED(td) || pri <= PSOCK)
810177085Sjeff		td->td_flags |= TDF_CANSWAP;
811104964Sjeff}
812104964Sjeff
813104964Sjeffvoid
814135051Sjuliansched_switch(struct thread *td, struct thread *newtd, int flags)
815104964Sjeff{
816164936Sjulian	struct td_sched *ts;
817104964Sjeff	struct proc *p;
818104964Sjeff
819164936Sjulian	ts = td->td_sched;
820104964Sjeff	p = td->td_proc;
821104964Sjeff
822170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
823170293Sjeff	/*
824170293Sjeff	 * Switch to the sched lock to fix things up and pick
825170293Sjeff	 * a new thread.
826170293Sjeff	 */
827170293Sjeff	if (td->td_lock != &sched_lock) {
828170293Sjeff		mtx_lock_spin(&sched_lock);
829170293Sjeff		thread_unlock(td);
830170293Sjeff	}
831104964Sjeff
832125295Sjeff	if ((p->p_flag & P_NOLOAD) == 0)
833139317Sjeff		sched_load_rem();
834135051Sjulian
835138527Sups	if (newtd)
836138527Sups		newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED);
837138527Sups
838113339Sjulian	td->td_lastcpu = td->td_oncpu;
839132266Sjhb	td->td_flags &= ~TDF_NEEDRESCHED;
840144777Sups	td->td_owepreempt = 0;
841113339Sjulian	td->td_oncpu = NOCPU;
842104964Sjeff	/*
843104964Sjeff	 * At the last moment, if this thread is still marked RUNNING,
844104964Sjeff	 * then put it back on the run queue as it has not been suspended
845131473Sjhb	 * or stopped or any thing else similar.  We never put the idle
846131473Sjhb	 * threads on the run queue, however.
847104964Sjeff	 */
848166415Sjulian	if (td->td_flags & TDF_IDLETD) {
849131473Sjhb		TD_SET_CAN_RUN(td);
850166415Sjulian#ifdef SMP
851166415Sjulian		idle_cpus_mask &= ~PCPU_GET(cpumask);
852166415Sjulian#endif
853166415Sjulian	} else {
854134791Sjulian		if (TD_IS_RUNNING(td)) {
855164936Sjulian			/* Put us back on the run queue. */
856166188Sjeff			sched_add(td, (flags & SW_PREEMPT) ?
857136170Sjulian			    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
858136170Sjulian			    SRQ_OURSELF|SRQ_YIELDING);
859134791Sjulian		}
860104964Sjeff	}
861136170Sjulian	if (newtd) {
862136170Sjulian		/*
863136170Sjulian		 * The thread we are about to run needs to be counted
864136170Sjulian		 * as if it had been added to the run queue and selected.
865136170Sjulian		 * It came from:
866136170Sjulian		 * * A preemption
867136170Sjulian		 * * An upcall
868136170Sjulian		 * * A followon
869136170Sjulian		 */
870136170Sjulian		KASSERT((newtd->td_inhibitors == 0),
871165693Srwatson			("trying to run inhibited thread"));
872164936Sjulian		newtd->td_sched->ts_flags |= TSF_DIDRUN;
873136170Sjulian        	TD_SET_RUNNING(newtd);
874136170Sjulian		if ((newtd->td_proc->p_flag & P_NOLOAD) == 0)
875139317Sjeff			sched_load_add();
876136170Sjulian	} else {
877131473Sjhb		newtd = choosethread();
878136170Sjulian	}
879170293Sjeff	MPASS(newtd->td_lock == &sched_lock);
880136170Sjulian
881145256Sjkoshy	if (td != newtd) {
882145256Sjkoshy#ifdef	HWPMC_HOOKS
883145256Sjkoshy		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
884145256Sjkoshy			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
885145256Sjkoshy#endif
886166415Sjulian                /* I feel sleepy */
887174629Sjeff		lock_profile_release_lock(&sched_lock.lock_object);
888170358Sjeff		cpu_switch(td, newtd, td->td_lock);
889174629Sjeff		lock_profile_obtain_lock_success(&sched_lock.lock_object,
890174629Sjeff		    0, 0, __FILE__, __LINE__);
891166415Sjulian		/*
892166415Sjulian		 * Where am I?  What year is it?
893166415Sjulian		 * We are in the same thread that went to sleep above,
894166415Sjulian		 * but any amount of time may have passed. All out context
895166415Sjulian		 * will still be available as will local variables.
896166415Sjulian		 * PCPU values however may have changed as we may have
897166415Sjulian		 * changed CPU so don't trust cached values of them.
898166415Sjulian		 * New threads will go to fork_exit() instead of here
899166415Sjulian		 * so if you change things here you may need to change
900166415Sjulian		 * things there too.
901166415Sjulian		 * If the thread above was exiting it will never wake
902166415Sjulian		 * up again here, so either it has saved everything it
903166415Sjulian		 * needed to, or the thread_wait() or wait() will
904166415Sjulian		 * need to reap it.
905166415Sjulian		 */
906145256Sjkoshy#ifdef	HWPMC_HOOKS
907145256Sjkoshy		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
908145256Sjkoshy			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
909145256Sjkoshy#endif
910145256Sjkoshy	}
911145256Sjkoshy
912166415Sjulian#ifdef SMP
913166415Sjulian	if (td->td_flags & TDF_IDLETD)
914166415Sjulian		idle_cpus_mask |= PCPU_GET(cpumask);
915166415Sjulian#endif
916121128Sjeff	sched_lock.mtx_lock = (uintptr_t)td;
917121128Sjeff	td->td_oncpu = PCPU_GET(cpuid);
918170293Sjeff	MPASS(td->td_lock == &sched_lock);
919104964Sjeff}
920104964Sjeff
921104964Sjeffvoid
922104964Sjeffsched_wakeup(struct thread *td)
923104964Sjeff{
924172264Sjeff	struct td_sched *ts;
925172264Sjeff
926170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
927172264Sjeff	ts = td->td_sched;
928177085Sjeff	td->td_flags &= ~TDF_CANSWAP;
929172264Sjeff	if (ts->ts_slptime > 1) {
930163709Sjb		updatepri(td);
931163709Sjb		resetpriority(td);
932163709Sjb	}
933172264Sjeff	td->td_slptick = ticks;
934172264Sjeff	ts->ts_slptime = 0;
935166188Sjeff	sched_add(td, SRQ_BORING);
936104964Sjeff}
937104964Sjeff
938134693Sjulian#ifdef SMP
939134688Sjulian/* enable HTT_2 if you have a 2-way HTT cpu.*/
940134688Sjulianstatic int
941134688Sjulianforward_wakeup(int  cpunum)
942134688Sjulian{
943134688Sjulian	cpumask_t map, me, dontuse;
944134688Sjulian	cpumask_t map2;
945134688Sjulian	struct pcpu *pc;
946134688Sjulian	cpumask_t id, map3;
947134688Sjulian
948134688Sjulian	mtx_assert(&sched_lock, MA_OWNED);
949134688Sjulian
950134791Sjulian	CTR0(KTR_RUNQ, "forward_wakeup()");
951134688Sjulian
952134688Sjulian	if ((!forward_wakeup_enabled) ||
953134688Sjulian	     (forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
954134688Sjulian		return (0);
955134688Sjulian	if (!smp_started || cold || panicstr)
956134688Sjulian		return (0);
957134688Sjulian
958134688Sjulian	forward_wakeups_requested++;
959134688Sjulian
960134688Sjulian/*
961134688Sjulian * check the idle mask we received against what we calculated before
962134688Sjulian * in the old version.
963134688Sjulian */
964134688Sjulian	me = PCPU_GET(cpumask);
965134688Sjulian	/*
966134688Sjulian	 * don't bother if we should be doing it ourself..
967134688Sjulian	 */
968134688Sjulian	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
969134688Sjulian		return (0);
970134688Sjulian
971134688Sjulian	dontuse = me | stopped_cpus | hlt_cpus_mask;
972134688Sjulian	map3 = 0;
973134688Sjulian	if (forward_wakeup_use_loop) {
974134688Sjulian		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
975134688Sjulian			id = pc->pc_cpumask;
976134688Sjulian			if ( (id & dontuse) == 0 &&
977134688Sjulian			    pc->pc_curthread == pc->pc_idlethread) {
978134688Sjulian				map3 |= id;
979134688Sjulian			}
980134688Sjulian		}
981134688Sjulian	}
982134688Sjulian
983134688Sjulian	if (forward_wakeup_use_mask) {
984134688Sjulian		map = 0;
985134688Sjulian		map = idle_cpus_mask & ~dontuse;
986134688Sjulian
987134688Sjulian		/* If they are both on, compare and use loop if different */
988134688Sjulian		if (forward_wakeup_use_loop) {
989134688Sjulian			if (map != map3) {
990134688Sjulian				printf("map (%02X) != map3 (%02X)\n",
991134688Sjulian						map, map3);
992134688Sjulian				map = map3;
993134688Sjulian			}
994134688Sjulian		}
995134688Sjulian	} else {
996134688Sjulian		map = map3;
997134688Sjulian	}
998134688Sjulian	/* If we only allow a specific CPU, then mask off all the others */
999134688Sjulian	if (cpunum != NOCPU) {
1000134688Sjulian		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
1001134688Sjulian		map &= (1 << cpunum);
1002134688Sjulian	} else {
1003134688Sjulian		/* Try choose an idle die. */
1004134688Sjulian		if (forward_wakeup_use_htt) {
1005134688Sjulian			map2 =  (map & (map >> 1)) & 0x5555;
1006134688Sjulian			if (map2) {
1007134688Sjulian				map = map2;
1008134688Sjulian			}
1009134688Sjulian		}
1010134688Sjulian
1011134688Sjulian		/* set only one bit */
1012134688Sjulian		if (forward_wakeup_use_single) {
1013134688Sjulian			map = map & ((~map) + 1);
1014134688Sjulian		}
1015134688Sjulian	}
1016134688Sjulian	if (map) {
1017134688Sjulian		forward_wakeups_delivered++;
1018134688Sjulian		ipi_selected(map, IPI_AST);
1019134688Sjulian		return (1);
1020134688Sjulian	}
1021134688Sjulian	if (cpunum == NOCPU)
1022134688Sjulian		printf("forward_wakeup: Idle processor not found\n");
1023134688Sjulian	return (0);
1024134688Sjulian}
1025134693Sjulian#endif
1026134688Sjulian
1027147182Sups#ifdef SMP
1028147190Supsstatic void kick_other_cpu(int pri,int cpuid);
1029147182Sups
1030147182Supsstatic void
1031147182Supskick_other_cpu(int pri,int cpuid)
1032147182Sups{
1033147182Sups	struct pcpu * pcpu = pcpu_find(cpuid);
1034147182Sups	int cpri = pcpu->pc_curthread->td_priority;
1035147182Sups
1036147182Sups	if (idle_cpus_mask & pcpu->pc_cpumask) {
1037147182Sups		forward_wakeups_delivered++;
1038147182Sups		ipi_selected(pcpu->pc_cpumask, IPI_AST);
1039147182Sups		return;
1040147182Sups	}
1041147182Sups
1042147182Sups	if (pri >= cpri)
1043147182Sups		return;
1044147182Sups
1045147182Sups#if defined(IPI_PREEMPTION) && defined(PREEMPTION)
1046147182Sups#if !defined(FULL_PREEMPTION)
1047147182Sups	if (pri <= PRI_MAX_ITHD)
1048147182Sups#endif /* ! FULL_PREEMPTION */
1049147182Sups	{
1050147182Sups		ipi_selected(pcpu->pc_cpumask, IPI_PREEMPT);
1051147182Sups		return;
1052147182Sups	}
1053147182Sups#endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
1054147182Sups
1055147182Sups	pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
1056147182Sups	ipi_selected( pcpu->pc_cpumask , IPI_AST);
1057147182Sups	return;
1058147182Sups}
1059147182Sups#endif /* SMP */
1060147182Sups
1061104964Sjeffvoid
1062134586Sjuliansched_add(struct thread *td, int flags)
1063147182Sups#ifdef SMP
1064104964Sjeff{
1065164936Sjulian	struct td_sched *ts;
1066134591Sjulian	int forwarded = 0;
1067134591Sjulian	int cpu;
1068147182Sups	int single_cpu = 0;
1069121127Sjeff
1070164936Sjulian	ts = td->td_sched;
1071170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1072166188Sjeff	KASSERT((td->td_inhibitors == 0),
1073166188Sjeff	    ("sched_add: trying to run inhibited thread"));
1074166188Sjeff	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
1075166188Sjeff	    ("sched_add: bad thread state"));
1076172207Sjeff	KASSERT(td->td_flags & TDF_INMEM,
1077172207Sjeff	    ("sched_add: thread swapped out"));
1078139317Sjeff	CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
1079173600Sjulian	    td, td->td_name, td->td_priority, curthread,
1080173600Sjulian	    curthread->td_name);
1081170293Sjeff	/*
1082170293Sjeff	 * Now that the thread is moving to the run-queue, set the lock
1083170293Sjeff	 * to the scheduler's lock.
1084170293Sjeff	 */
1085170293Sjeff	if (td->td_lock != &sched_lock) {
1086170293Sjeff		mtx_lock_spin(&sched_lock);
1087170293Sjeff		thread_lock_set(td, &sched_lock);
1088170293Sjeff	}
1089166188Sjeff	TD_SET_RUNQ(td);
1090131481Sjhb
1091147182Sups	if (td->td_pinned != 0) {
1092147182Sups		cpu = td->td_lastcpu;
1093164936Sjulian		ts->ts_runq = &runq_pcpu[cpu];
1094147182Sups		single_cpu = 1;
1095147182Sups		CTR3(KTR_RUNQ,
1096164936Sjulian		    "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu);
1097164936Sjulian	} else if ((ts)->ts_flags & TSF_BOUND) {
1098147182Sups		/* Find CPU from bound runq */
1099164936Sjulian		KASSERT(SKE_RUNQ_PCPU(ts),("sched_add: bound td_sched not on cpu runq"));
1100164936Sjulian		cpu = ts->ts_runq - &runq_pcpu[0];
1101147182Sups		single_cpu = 1;
1102147182Sups		CTR3(KTR_RUNQ,
1103164936Sjulian		    "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu);
1104147182Sups	} else {
1105134591Sjulian		CTR2(KTR_RUNQ,
1106164936Sjulian		    "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts, td);
1107134591Sjulian		cpu = NOCPU;
1108164936Sjulian		ts->ts_runq = &runq;
1109147182Sups	}
1110147182Sups
1111147190Sups	if (single_cpu && (cpu != PCPU_GET(cpuid))) {
1112147182Sups	        kick_other_cpu(td->td_priority,cpu);
1113124955Sjeff	} else {
1114147182Sups
1115147190Sups		if (!single_cpu) {
1116147182Sups			cpumask_t me = PCPU_GET(cpumask);
1117147182Sups			int idle = idle_cpus_mask & me;
1118147182Sups
1119147190Sups			if (!idle && ((flags & SRQ_INTR) == 0) &&
1120147190Sups			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
1121147182Sups				forwarded = forward_wakeup(cpu);
1122147182Sups		}
1123147182Sups
1124147182Sups		if (!forwarded) {
1125147190Sups			if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td))
1126147182Sups				return;
1127147182Sups			else
1128147182Sups				maybe_resched(td);
1129147182Sups		}
1130124955Sjeff	}
1131147182Sups
1132147182Sups	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1133147182Sups		sched_load_add();
1134164936Sjulian	runq_add(ts->ts_runq, ts, flags);
1135147182Sups}
1136147182Sups#else /* SMP */
1137147182Sups{
1138164936Sjulian	struct td_sched *ts;
1139164936Sjulian	ts = td->td_sched;
1140170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1141166188Sjeff	KASSERT((td->td_inhibitors == 0),
1142166188Sjeff	    ("sched_add: trying to run inhibited thread"));
1143166188Sjeff	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
1144166188Sjeff	    ("sched_add: bad thread state"));
1145172207Sjeff	KASSERT(td->td_flags & TDF_INMEM,
1146172207Sjeff	    ("sched_add: thread swapped out"));
1147147182Sups	CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
1148173600Sjulian	    td, td->td_name, td->td_priority, curthread,
1149173600Sjulian	    curthread->td_name);
1150170293Sjeff	/*
1151170293Sjeff	 * Now that the thread is moving to the run-queue, set the lock
1152170293Sjeff	 * to the scheduler's lock.
1153170293Sjeff	 */
1154170293Sjeff	if (td->td_lock != &sched_lock) {
1155170293Sjeff		mtx_lock_spin(&sched_lock);
1156170293Sjeff		thread_lock_set(td, &sched_lock);
1157170293Sjeff	}
1158166188Sjeff	TD_SET_RUNQ(td);
1159164936Sjulian	CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
1160164936Sjulian	ts->ts_runq = &runq;
1161134591Sjulian
1162134591Sjulian	/*
1163134591Sjulian	 * If we are yielding (on the way out anyhow)
1164134591Sjulian	 * or the thread being saved is US,
1165134591Sjulian	 * then don't try be smart about preemption
1166134591Sjulian	 * or kicking off another CPU
1167134591Sjulian	 * as it won't help and may hinder.
1168134591Sjulian	 * In the YIEDLING case, we are about to run whoever is
1169134591Sjulian	 * being put in the queue anyhow, and in the
1170134591Sjulian	 * OURSELF case, we are puting ourself on the run queue
1171134591Sjulian	 * which also only happens when we are about to yield.
1172134591Sjulian	 */
1173134591Sjulian	if((flags & SRQ_YIELDING) == 0) {
1174147182Sups		if (maybe_preempt(td))
1175147182Sups			return;
1176147182Sups	}
1177125295Sjeff	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1178139317Sjeff		sched_load_add();
1179164936Sjulian	runq_add(ts->ts_runq, ts, flags);
1180132118Sjhb	maybe_resched(td);
1181104964Sjeff}
1182147182Sups#endif /* SMP */
1183147182Sups
1184104964Sjeffvoid
1185121127Sjeffsched_rem(struct thread *td)
1186104964Sjeff{
1187164936Sjulian	struct td_sched *ts;
1188121127Sjeff
1189164936Sjulian	ts = td->td_sched;
1190172207Sjeff	KASSERT(td->td_flags & TDF_INMEM,
1191172207Sjeff	    ("sched_rem: thread swapped out"));
1192166188Sjeff	KASSERT(TD_ON_RUNQ(td),
1193164936Sjulian	    ("sched_rem: thread not on run queue"));
1194104964Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1195139317Sjeff	CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)",
1196173600Sjulian	    td, td->td_name, td->td_priority, curthread,
1197173600Sjulian	    curthread->td_name);
1198104964Sjeff
1199125295Sjeff	if ((td->td_proc->p_flag & P_NOLOAD) == 0)
1200139317Sjeff		sched_load_rem();
1201164936Sjulian	runq_remove(ts->ts_runq, ts);
1202166188Sjeff	TD_SET_CAN_RUN(td);
1203104964Sjeff}
1204104964Sjeff
1205135295Sjulian/*
1206135295Sjulian * Select threads to run.
1207135295Sjulian * Notice that the running threads still consume a slot.
1208135295Sjulian */
1209166188Sjeffstruct thread *
1210104964Sjeffsched_choose(void)
1211104964Sjeff{
1212164936Sjulian	struct td_sched *ts;
1213124955Sjeff	struct runq *rq;
1214104964Sjeff
1215170293Sjeff	mtx_assert(&sched_lock,  MA_OWNED);
1216124955Sjeff#ifdef SMP
1217164936Sjulian	struct td_sched *kecpu;
1218124955Sjeff
1219124955Sjeff	rq = &runq;
1220164936Sjulian	ts = runq_choose(&runq);
1221124955Sjeff	kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]);
1222104964Sjeff
1223164936Sjulian	if (ts == NULL ||
1224124955Sjeff	    (kecpu != NULL &&
1225164936Sjulian	     kecpu->ts_thread->td_priority < ts->ts_thread->td_priority)) {
1226164936Sjulian		CTR2(KTR_RUNQ, "choosing td_sched %p from pcpu runq %d", kecpu,
1227124955Sjeff		     PCPU_GET(cpuid));
1228164936Sjulian		ts = kecpu;
1229124955Sjeff		rq = &runq_pcpu[PCPU_GET(cpuid)];
1230124955Sjeff	} else {
1231164936Sjulian		CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", ts);
1232124955Sjeff	}
1233124955Sjeff
1234124955Sjeff#else
1235124955Sjeff	rq = &runq;
1236164936Sjulian	ts = runq_choose(&runq);
1237124955Sjeff#endif
1238124955Sjeff
1239164936Sjulian	if (ts) {
1240164936Sjulian		runq_remove(rq, ts);
1241166188Sjeff		ts->ts_flags |= TSF_DIDRUN;
1242104964Sjeff
1243172207Sjeff		KASSERT(ts->ts_thread->td_flags & TDF_INMEM,
1244172207Sjeff		    ("sched_choose: thread swapped out"));
1245166188Sjeff		return (ts->ts_thread);
1246166188Sjeff	}
1247166188Sjeff	return (PCPU_GET(idlethread));
1248104964Sjeff}
1249104964Sjeff
1250104964Sjeffvoid
1251177004Sjeffsched_preempt(struct thread *td)
1252177004Sjeff{
1253177004Sjeff	thread_lock(td);
1254177004Sjeff	if (td->td_critnest > 1)
1255177004Sjeff		td->td_owepreempt = 1;
1256177004Sjeff	else
1257177004Sjeff		mi_switch(SW_INVOL | SW_PREEMPT, NULL);
1258177004Sjeff	thread_unlock(td);
1259177004Sjeff}
1260177004Sjeff
1261177004Sjeffvoid
1262104964Sjeffsched_userret(struct thread *td)
1263104964Sjeff{
1264104964Sjeff	/*
1265104964Sjeff	 * XXX we cheat slightly on the locking here to avoid locking in
1266104964Sjeff	 * the usual case.  Setting td_priority here is essentially an
1267104964Sjeff	 * incomplete workaround for not setting it properly elsewhere.
1268104964Sjeff	 * Now that some interrupt handlers are threads, not setting it
1269104964Sjeff	 * properly elsewhere can clobber it in the window between setting
1270104964Sjeff	 * it here and returning to user mode, so don't waste time setting
1271104964Sjeff	 * it perfectly here.
1272104964Sjeff	 */
1273139453Sjhb	KASSERT((td->td_flags & TDF_BORROWING) == 0,
1274139453Sjhb	    ("thread with borrowed priority returning to userland"));
1275163709Sjb	if (td->td_priority != td->td_user_pri) {
1276170293Sjeff		thread_lock(td);
1277163709Sjb		td->td_priority = td->td_user_pri;
1278163709Sjb		td->td_base_pri = td->td_user_pri;
1279170293Sjeff		thread_unlock(td);
1280163709Sjb	}
1281104964Sjeff}
1282107126Sjeff
1283124955Sjeffvoid
1284124955Sjeffsched_bind(struct thread *td, int cpu)
1285124955Sjeff{
1286164936Sjulian	struct td_sched *ts;
1287124955Sjeff
1288170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1289124955Sjeff	KASSERT(TD_IS_RUNNING(td),
1290124955Sjeff	    ("sched_bind: cannot bind non-running thread"));
1291124955Sjeff
1292164936Sjulian	ts = td->td_sched;
1293124955Sjeff
1294164936Sjulian	ts->ts_flags |= TSF_BOUND;
1295124955Sjeff#ifdef SMP
1296164936Sjulian	ts->ts_runq = &runq_pcpu[cpu];
1297124955Sjeff	if (PCPU_GET(cpuid) == cpu)
1298124955Sjeff		return;
1299124955Sjeff
1300131473Sjhb	mi_switch(SW_VOL, NULL);
1301124955Sjeff#endif
1302124955Sjeff}
1303124955Sjeff
1304124955Sjeffvoid
1305124955Sjeffsched_unbind(struct thread* td)
1306124955Sjeff{
1307170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1308164936Sjulian	td->td_sched->ts_flags &= ~TSF_BOUND;
1309124955Sjeff}
1310124955Sjeff
1311107126Sjeffint
1312145256Sjkoshysched_is_bound(struct thread *td)
1313145256Sjkoshy{
1314170293Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
1315164936Sjulian	return (td->td_sched->ts_flags & TSF_BOUND);
1316145256Sjkoshy}
1317145256Sjkoshy
1318159630Sdavidxuvoid
1319159630Sdavidxusched_relinquish(struct thread *td)
1320159630Sdavidxu{
1321170293Sjeff	thread_lock(td);
1322170293Sjeff	SCHED_STAT_INC(switch_relinquish);
1323159630Sdavidxu	mi_switch(SW_VOL, NULL);
1324170293Sjeff	thread_unlock(td);
1325159630Sdavidxu}
1326159630Sdavidxu
1327145256Sjkoshyint
1328125288Sjeffsched_load(void)
1329125288Sjeff{
1330125288Sjeff	return (sched_tdcnt);
1331125288Sjeff}
1332125288Sjeff
1333125288Sjeffint
1334107126Sjeffsched_sizeof_proc(void)
1335107126Sjeff{
1336107126Sjeff	return (sizeof(struct proc));
1337107126Sjeff}
1338159630Sdavidxu
1339107126Sjeffint
1340107126Sjeffsched_sizeof_thread(void)
1341107126Sjeff{
1342164936Sjulian	return (sizeof(struct thread) + sizeof(struct td_sched));
1343107126Sjeff}
1344107137Sjeff
1345107137Sjefffixpt_t
1346121127Sjeffsched_pctcpu(struct thread *td)
1347107137Sjeff{
1348164936Sjulian	struct td_sched *ts;
1349121147Sjeff
1350164936Sjulian	ts = td->td_sched;
1351164936Sjulian	return (ts->ts_pctcpu);
1352107137Sjeff}
1353159570Sdavidxu
1354159570Sdavidxuvoid
1355159570Sdavidxusched_tick(void)
1356159570Sdavidxu{
1357159570Sdavidxu}
1358166188Sjeff
1359166188Sjeff/*
1360166188Sjeff * The actual idle process.
1361166188Sjeff */
1362166188Sjeffvoid
1363166188Sjeffsched_idletd(void *dummy)
1364166188Sjeff{
1365166188Sjeff
1366166188Sjeff	for (;;) {
1367166188Sjeff		mtx_assert(&Giant, MA_NOTOWNED);
1368166188Sjeff
1369166188Sjeff		while (sched_runnable() == 0)
1370166188Sjeff			cpu_idle();
1371166188Sjeff
1372166188Sjeff		mtx_lock_spin(&sched_lock);
1373166188Sjeff		mi_switch(SW_VOL, NULL);
1374166188Sjeff		mtx_unlock_spin(&sched_lock);
1375166188Sjeff	}
1376166188Sjeff}
1377166188Sjeff
1378170293Sjeff/*
1379170293Sjeff * A CPU is entering for the first time or a thread is exiting.
1380170293Sjeff */
1381170293Sjeffvoid
1382170293Sjeffsched_throw(struct thread *td)
1383170293Sjeff{
1384170293Sjeff	/*
1385170293Sjeff	 * Correct spinlock nesting.  The idle thread context that we are
1386170293Sjeff	 * borrowing was created so that it would start out with a single
1387170293Sjeff	 * spin lock (sched_lock) held in fork_trampoline().  Since we've
1388170293Sjeff	 * explicitly acquired locks in this function, the nesting count
1389170293Sjeff	 * is now 2 rather than 1.  Since we are nested, calling
1390170293Sjeff	 * spinlock_exit() will simply adjust the counts without allowing
1391170293Sjeff	 * spin lock using code to interrupt us.
1392170293Sjeff	 */
1393170293Sjeff	if (td == NULL) {
1394170293Sjeff		mtx_lock_spin(&sched_lock);
1395170293Sjeff		spinlock_exit();
1396170293Sjeff	} else {
1397174629Sjeff		lock_profile_release_lock(&sched_lock.lock_object);
1398170293Sjeff		MPASS(td->td_lock == &sched_lock);
1399170293Sjeff	}
1400170293Sjeff	mtx_assert(&sched_lock, MA_OWNED);
1401170293Sjeff	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
1402170293Sjeff	PCPU_SET(switchtime, cpu_ticks());
1403170293Sjeff	PCPU_SET(switchticks, ticks);
1404170293Sjeff	cpu_throw(td, choosethread());	/* doesn't return */
1405170293Sjeff}
1406170293Sjeff
1407170293Sjeffvoid
1408170600Sjeffsched_fork_exit(struct thread *td)
1409170293Sjeff{
1410170293Sjeff
1411170293Sjeff	/*
1412170293Sjeff	 * Finish setting up thread glue so that it begins execution in a
1413170293Sjeff	 * non-nested critical section with sched_lock held but not recursed.
1414170293Sjeff	 */
1415170600Sjeff	td->td_oncpu = PCPU_GET(cpuid);
1416170600Sjeff	sched_lock.mtx_lock = (uintptr_t)td;
1417174629Sjeff	lock_profile_obtain_lock_success(&sched_lock.lock_object,
1418174629Sjeff	    0, 0, __FILE__, __LINE__);
1419170600Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
1420170293Sjeff}
1421170293Sjeff
1422176729Sjeffvoid
1423176729Sjeffsched_affinity(struct thread *td)
1424176729Sjeff{
1425176729Sjeff}
1426176729Sjeff
1427134791Sjulian#define KERN_SWITCH_INCLUDE 1
1428134791Sjulian#include "kern/kern_switch.c"
1429