kern_timeout.c revision 115810
11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 3. All advertising materials mentioning features or use of this software
191541Srgrimes *    must display the following acknowledgement:
201541Srgrimes *	This product includes software developed by the University of
211541Srgrimes *	California, Berkeley and its contributors.
221541Srgrimes * 4. Neither the name of the University nor the names of its contributors
231541Srgrimes *    may be used to endorse or promote products derived from this software
241541Srgrimes *    without specific prior written permission.
251541Srgrimes *
261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
291541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
361541Srgrimes * SUCH DAMAGE.
371541Srgrimes *
3844510Swollman *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
3950477Speter * $FreeBSD: head/sys/kern/kern_timeout.c 115810 2003-06-04 05:25:58Z phk $
401541Srgrimes */
411541Srgrimes
421541Srgrimes#include <sys/param.h>
431541Srgrimes#include <sys/systm.h>
4433392Sphk#include <sys/callout.h>
451541Srgrimes#include <sys/kernel.h>
4674914Sjhb#include <sys/lock.h>
4768840Sjhb#include <sys/mutex.h>
48115810Sphk#include <sys/sysctl.h>
491541Srgrimes
50115810Sphkstatic int avg_depth;
51115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
52115810Sphk    "Average number of items examined per softclock call. Units = 1/1000");
53115810Sphkstatic int avg_gcalls;
54115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
55115810Sphk    "Average number of Giant callouts made per softclock call. Units = 1/1000");
56115810Sphkstatic int avg_mpcalls;
57115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
58115810Sphk    "Average number of MP callouts made per softclock call. Units = 1/1000");
5933392Sphk/*
6033392Sphk * TODO:
6133392Sphk *	allocate more timeout table slots when table overflows.
6233392Sphk */
6333392Sphk
6433392Sphk/* Exported to machdep.c and/or kern_clock.c.  */
6529680Sgibbsstruct callout *callout;
6629680Sgibbsstruct callout_list callfree;
6729680Sgibbsint callwheelsize, callwheelbits, callwheelmask;
6829680Sgibbsstruct callout_tailq *callwheel;
6933392Sphkint softticks;			/* Like ticks, but for softclock(). */
7068889Sjakestruct mtx callout_lock;
712112Swollman
7229680Sgibbsstatic struct callout *nextsoftcheck;	/* Next callout to be checked. */
731541Srgrimes
741541Srgrimes/*
7582127Sdillon * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
7682127Sdillon *
7782127Sdillon *	This code is called very early in the kernel initialization sequence,
7882127Sdillon *	and may be called more then once.
7982127Sdillon */
8082127Sdilloncaddr_t
8182127Sdillonkern_timeout_callwheel_alloc(caddr_t v)
8282127Sdillon{
8382127Sdillon	/*
8482127Sdillon	 * Calculate callout wheel size
8582127Sdillon	 */
8682127Sdillon	for (callwheelsize = 1, callwheelbits = 0;
8782127Sdillon	     callwheelsize < ncallout;
8882127Sdillon	     callwheelsize <<= 1, ++callwheelbits)
8982127Sdillon		;
9082127Sdillon	callwheelmask = callwheelsize - 1;
9182127Sdillon
9282127Sdillon	callout = (struct callout *)v;
9382127Sdillon	v = (caddr_t)(callout + ncallout);
9482127Sdillon	callwheel = (struct callout_tailq *)v;
9582127Sdillon	v = (caddr_t)(callwheel + callwheelsize);
9682127Sdillon	return(v);
9782127Sdillon}
9882127Sdillon
9982127Sdillon/*
10082127Sdillon * kern_timeout_callwheel_init() - initialize previously reserved callwheel
10182127Sdillon *				   space.
10282127Sdillon *
10382127Sdillon *	This code is called just once, after the space reserved for the
10482127Sdillon *	callout wheel has been finalized.
10582127Sdillon */
10682127Sdillonvoid
10782127Sdillonkern_timeout_callwheel_init(void)
10882127Sdillon{
10982127Sdillon	int i;
11082127Sdillon
11182127Sdillon	SLIST_INIT(&callfree);
11282127Sdillon	for (i = 0; i < ncallout; i++) {
11382127Sdillon		callout_init(&callout[i], 0);
11482127Sdillon		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
11582127Sdillon		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
11682127Sdillon	}
11782127Sdillon	for (i = 0; i < callwheelsize; i++) {
11882127Sdillon		TAILQ_INIT(&callwheel[i]);
11982127Sdillon	}
12093818Sjhb	mtx_init(&callout_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
12182127Sdillon}
12282127Sdillon
12382127Sdillon/*
12429680Sgibbs * The callout mechanism is based on the work of Adam M. Costello and
12529680Sgibbs * George Varghese, published in a technical report entitled "Redesigning
12629680Sgibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion
12729680Sgibbs * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
12829680Sgibbs * used in this implementation was published by G.Varghese and A. Lauck in
12929680Sgibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
13029680Sgibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of
13129680Sgibbs * the 11th ACM Annual Symposium on Operating Systems Principles,
13229680Sgibbs * Austin, Texas Nov 1987.
13329680Sgibbs */
13432388Sphk
13529680Sgibbs/*
1361541Srgrimes * Software (low priority) clock interrupt.
1371541Srgrimes * Run periodic events from timeout queue.
1381541Srgrimes */
1391541Srgrimesvoid
14067551Sjhbsoftclock(void *dummy)
1411541Srgrimes{
142102936Sphk	struct callout *c;
143102936Sphk	struct callout_tailq *bucket;
144102936Sphk	int curticks;
145102936Sphk	int steps;	/* #steps since we last allowed interrupts */
146115810Sphk	int depth;
147115810Sphk	int mpcalls;
148115810Sphk	int gcalls;
149102936Sphk#ifdef DIAGNOSTIC
150102936Sphk	struct bintime bt1, bt2;
151102936Sphk	struct timespec ts2;
152102936Sphk	static uint64_t maxdt = 18446744073709551LL;	/* 1 msec */
153102936Sphk#endif
1541541Srgrimes
15533392Sphk#ifndef MAX_SOFTCLOCK_STEPS
15633392Sphk#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
15733392Sphk#endif /* MAX_SOFTCLOCK_STEPS */
15829680Sgibbs
159115810Sphk	mpcalls = 0;
160115810Sphk	gcalls = 0;
161115810Sphk	depth = 0;
16229680Sgibbs	steps = 0;
16372200Sbmilekic	mtx_lock_spin(&callout_lock);
16429680Sgibbs	while (softticks != ticks) {
16529805Sgibbs		softticks++;
16629805Sgibbs		/*
16729805Sgibbs		 * softticks may be modified by hard clock, so cache
16829805Sgibbs		 * it while we work on a given bucket.
16929805Sgibbs		 */
17029805Sgibbs		curticks = softticks;
17129805Sgibbs		bucket = &callwheel[curticks & callwheelmask];
17229805Sgibbs		c = TAILQ_FIRST(bucket);
17329680Sgibbs		while (c) {
174115810Sphk			depth++;
17529805Sgibbs			if (c->c_time != curticks) {
17629680Sgibbs				c = TAILQ_NEXT(c, c_links.tqe);
17729680Sgibbs				++steps;
17829680Sgibbs				if (steps >= MAX_SOFTCLOCK_STEPS) {
17929680Sgibbs					nextsoftcheck = c;
18029805Sgibbs					/* Give interrupts a chance. */
18172200Sbmilekic					mtx_unlock_spin(&callout_lock);
18281370Sjhb					;	/* nothing */
18372200Sbmilekic					mtx_lock_spin(&callout_lock);
18429680Sgibbs					c = nextsoftcheck;
18529680Sgibbs					steps = 0;
18629680Sgibbs				}
18729680Sgibbs			} else {
18829680Sgibbs				void (*c_func)(void *);
18929680Sgibbs				void *c_arg;
19068889Sjake				int c_flags;
19129680Sgibbs
19229680Sgibbs				nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
19329805Sgibbs				TAILQ_REMOVE(bucket, c, c_links.tqe);
19429680Sgibbs				c_func = c->c_func;
19529680Sgibbs				c_arg = c->c_arg;
19668889Sjake				c_flags = c->c_flags;
19729680Sgibbs				c->c_func = NULL;
19844510Swollman				if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
19944510Swollman					c->c_flags = CALLOUT_LOCAL_ALLOC;
20044510Swollman					SLIST_INSERT_HEAD(&callfree, c,
20144510Swollman							  c_links.sle);
20244510Swollman				} else {
20344510Swollman					c->c_flags =
20450673Sjlemon					    (c->c_flags & ~CALLOUT_PENDING);
20544510Swollman				}
20672200Sbmilekic				mtx_unlock_spin(&callout_lock);
207115810Sphk				if (!(c_flags & CALLOUT_MPSAFE)) {
20872200Sbmilekic					mtx_lock(&Giant);
209115810Sphk					gcalls++;
210115810Sphk				} else {
211115810Sphk					mpcalls++;
212115810Sphk				}
213102936Sphk#ifdef DIAGNOSTIC
214102936Sphk				binuptime(&bt1);
215102936Sphk#endif
21629680Sgibbs				c_func(c_arg);
217102936Sphk#ifdef DIAGNOSTIC
218102936Sphk				binuptime(&bt2);
219102936Sphk				bintime_sub(&bt2, &bt1);
220102936Sphk				if (bt2.frac > maxdt) {
221110185Sphk					maxdt = bt2.frac;
222102936Sphk					bintime2timespec(&bt2, &ts2);
223102936Sphk					printf(
224110185Sphk			"Expensive timeout(9) function: %p(%p) %d.%09ld s\n",
225102936Sphk					c_func, c_arg,
226102936Sphk					ts2.tv_sec, ts2.tv_nsec);
227102936Sphk				}
228102936Sphk#endif
22968889Sjake				if (!(c_flags & CALLOUT_MPSAFE))
23072200Sbmilekic					mtx_unlock(&Giant);
23172200Sbmilekic				mtx_lock_spin(&callout_lock);
23229680Sgibbs				steps = 0;
23329680Sgibbs				c = nextsoftcheck;
23429680Sgibbs			}
23529680Sgibbs		}
2361541Srgrimes	}
237115810Sphk	avg_depth += (depth * 1000 - avg_depth) >> 8;
238115810Sphk	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
239115810Sphk	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
24029680Sgibbs	nextsoftcheck = NULL;
24172200Sbmilekic	mtx_unlock_spin(&callout_lock);
2421541Srgrimes}
2431541Srgrimes
2441541Srgrimes/*
2451541Srgrimes * timeout --
2461541Srgrimes *	Execute a function after a specified length of time.
2471541Srgrimes *
2481541Srgrimes * untimeout --
2491541Srgrimes *	Cancel previous timeout function call.
2501541Srgrimes *
25129680Sgibbs * callout_handle_init --
25229680Sgibbs *	Initialize a handle so that using it with untimeout is benign.
25329680Sgibbs *
2541541Srgrimes *	See AT&T BCI Driver Reference Manual for specification.  This
25529680Sgibbs *	implementation differs from that one in that although an
25629680Sgibbs *	identification value is returned from timeout, the original
25729680Sgibbs *	arguments to timeout as well as the identifier are used to
25829680Sgibbs *	identify entries for untimeout.
2591541Srgrimes */
26029680Sgibbsstruct callout_handle
26129680Sgibbstimeout(ftn, arg, to_ticks)
26233824Sbde	timeout_t *ftn;
2631541Srgrimes	void *arg;
26469147Sjlemon	int to_ticks;
2651541Srgrimes{
26629680Sgibbs	struct callout *new;
26729680Sgibbs	struct callout_handle handle;
2681541Srgrimes
26972200Sbmilekic	mtx_lock_spin(&callout_lock);
2701541Srgrimes
2711541Srgrimes	/* Fill in the next free callout structure. */
27229680Sgibbs	new = SLIST_FIRST(&callfree);
27329680Sgibbs	if (new == NULL)
27429680Sgibbs		/* XXX Attempt to malloc first */
2751541Srgrimes		panic("timeout table full");
27629680Sgibbs	SLIST_REMOVE_HEAD(&callfree, c_links.sle);
27744510Swollman
27844510Swollman	callout_reset(new, to_ticks, ftn, arg);
2791541Srgrimes
28044510Swollman	handle.callout = new;
28172200Sbmilekic	mtx_unlock_spin(&callout_lock);
28229680Sgibbs	return (handle);
2831541Srgrimes}
2841541Srgrimes
2851541Srgrimesvoid
28629680Sgibbsuntimeout(ftn, arg, handle)
28733824Sbde	timeout_t *ftn;
2881541Srgrimes	void *arg;
28929680Sgibbs	struct callout_handle handle;
2901541Srgrimes{
2911541Srgrimes
29229680Sgibbs	/*
29329680Sgibbs	 * Check for a handle that was initialized
29429680Sgibbs	 * by callout_handle_init, but never used
29529680Sgibbs	 * for a real timeout.
29629680Sgibbs	 */
29729680Sgibbs	if (handle.callout == NULL)
29829680Sgibbs		return;
29929680Sgibbs
30072200Sbmilekic	mtx_lock_spin(&callout_lock);
30144510Swollman	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
30244510Swollman		callout_stop(handle.callout);
30372200Sbmilekic	mtx_unlock_spin(&callout_lock);
3041541Srgrimes}
3051541Srgrimes
30624101Sbdevoid
30729680Sgibbscallout_handle_init(struct callout_handle *handle)
30829680Sgibbs{
30929680Sgibbs	handle->callout = NULL;
31029680Sgibbs}
31129680Sgibbs
31244510Swollman/*
31344510Swollman * New interface; clients allocate their own callout structures.
31444510Swollman *
31544510Swollman * callout_reset() - establish or change a timeout
31644510Swollman * callout_stop() - disestablish a timeout
31744510Swollman * callout_init() - initialize a callout structure so that it can
31844510Swollman *	safely be passed to callout_reset() and callout_stop()
31944510Swollman *
32050673Sjlemon * <sys/callout.h> defines three convenience macros:
32144510Swollman *
32250673Sjlemon * callout_active() - returns truth if callout has not been serviced
32350673Sjlemon * callout_pending() - returns truth if callout is still waiting for timeout
32450673Sjlemon * callout_deactivate() - marks the callout as having been serviced
32544510Swollman */
32644510Swollmanvoid
32769147Sjlemoncallout_reset(c, to_ticks, ftn, arg)
32844510Swollman	struct	callout *c;
32944510Swollman	int	to_ticks;
33092723Salfred	void	(*ftn)(void *);
33144510Swollman	void	*arg;
33244510Swollman{
33344510Swollman
33472200Sbmilekic	mtx_lock_spin(&callout_lock);
33544510Swollman	if (c->c_flags & CALLOUT_PENDING)
33644510Swollman		callout_stop(c);
33744510Swollman
33844510Swollman	/*
33981370Sjhb	 * We could unlock callout_lock here and lock it again before the
34081370Sjhb	 * TAILQ_INSERT_TAIL, but there's no point since doing this setup
34181370Sjhb	 * doesn't take much time.
34244510Swollman	 */
34344510Swollman	if (to_ticks <= 0)
34444510Swollman		to_ticks = 1;
34544510Swollman
34644510Swollman	c->c_arg = arg;
34769147Sjlemon	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
34844510Swollman	c->c_func = ftn;
34944510Swollman	c->c_time = ticks + to_ticks;
35044510Swollman	TAILQ_INSERT_TAIL(&callwheel[c->c_time & callwheelmask],
35144510Swollman			  c, c_links.tqe);
35272200Sbmilekic	mtx_unlock_spin(&callout_lock);
35344510Swollman}
35444510Swollman
35581481Sjhbint
35644510Swollmancallout_stop(c)
35744510Swollman	struct	callout *c;
35844510Swollman{
35944510Swollman
36072200Sbmilekic	mtx_lock_spin(&callout_lock);
36144510Swollman	/*
36244510Swollman	 * Don't attempt to delete a callout that's not on the queue.
36344510Swollman	 */
36444510Swollman	if (!(c->c_flags & CALLOUT_PENDING)) {
36550673Sjlemon		c->c_flags &= ~CALLOUT_ACTIVE;
36672200Sbmilekic		mtx_unlock_spin(&callout_lock);
36781481Sjhb		return (0);
36844510Swollman	}
36950673Sjlemon	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
37044510Swollman
37144510Swollman	if (nextsoftcheck == c) {
37244510Swollman		nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
37344510Swollman	}
37444510Swollman	TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_links.tqe);
37544510Swollman	c->c_func = NULL;
37644510Swollman
37744510Swollman	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
37844510Swollman		SLIST_INSERT_HEAD(&callfree, c, c_links.sle);
37944510Swollman	}
38072200Sbmilekic	mtx_unlock_spin(&callout_lock);
38181481Sjhb	return (1);
38244510Swollman}
38344510Swollman
38444510Swollmanvoid
38569147Sjlemoncallout_init(c, mpsafe)
38644510Swollman	struct	callout *c;
38769147Sjlemon	int mpsafe;
38844510Swollman{
38944527Swollman	bzero(c, sizeof *c);
39069147Sjlemon	if (mpsafe)
39169147Sjlemon		c->c_flags |= CALLOUT_MPSAFE;
39244510Swollman}
39344510Swollman
39431950Snate#ifdef APM_FIXUP_CALLTODO
39531950Snate/*
39631950Snate * Adjust the kernel calltodo timeout list.  This routine is used after
39731950Snate * an APM resume to recalculate the calltodo timer list values with the
39831950Snate * number of hz's we have been sleeping.  The next hardclock() will detect
39931950Snate * that there are fired timers and run softclock() to execute them.
40031950Snate *
40131950Snate * Please note, I have not done an exhaustive analysis of what code this
40231950Snate * might break.  I am motivated to have my select()'s and alarm()'s that
40331950Snate * have expired during suspend firing upon resume so that the applications
40431950Snate * which set the timer can do the maintanence the timer was for as close
40531950Snate * as possible to the originally intended time.  Testing this code for a
40631950Snate * week showed that resuming from a suspend resulted in 22 to 25 timers
40731950Snate * firing, which seemed independant on whether the suspend was 2 hours or
40831950Snate * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
40931950Snate */
41031950Snatevoid
41131950Snateadjust_timeout_calltodo(time_change)
41231950Snate    struct timeval *time_change;
41331950Snate{
41431950Snate	register struct callout *p;
41531950Snate	unsigned long delta_ticks;
41631950Snate
41731950Snate	/*
41831950Snate	 * How many ticks were we asleep?
41936127Sbde	 * (stolen from tvtohz()).
42031950Snate	 */
42131950Snate
42231950Snate	/* Don't do anything */
42331950Snate	if (time_change->tv_sec < 0)
42431950Snate		return;
42531950Snate	else if (time_change->tv_sec <= LONG_MAX / 1000000)
42631950Snate		delta_ticks = (time_change->tv_sec * 1000000 +
42731950Snate			       time_change->tv_usec + (tick - 1)) / tick + 1;
42831950Snate	else if (time_change->tv_sec <= LONG_MAX / hz)
42931950Snate		delta_ticks = time_change->tv_sec * hz +
43031950Snate			      (time_change->tv_usec + (tick - 1)) / tick + 1;
43131950Snate	else
43231950Snate		delta_ticks = LONG_MAX;
43331950Snate
43431950Snate	if (delta_ticks > INT_MAX)
43531950Snate		delta_ticks = INT_MAX;
43631950Snate
43731950Snate	/*
43831950Snate	 * Now rip through the timer calltodo list looking for timers
43931950Snate	 * to expire.
44031950Snate	 */
44131950Snate
44231950Snate	/* don't collide with softclock() */
44372200Sbmilekic	mtx_lock_spin(&callout_lock);
44431950Snate	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
44531950Snate		p->c_time -= delta_ticks;
44631950Snate
44731950Snate		/* Break if the timer had more time on it than delta_ticks */
44831950Snate		if (p->c_time > 0)
44931950Snate			break;
45031950Snate
45131950Snate		/* take back the ticks the timer didn't use (p->c_time <= 0) */
45231950Snate		delta_ticks = -p->c_time;
45331950Snate	}
45472200Sbmilekic	mtx_unlock_spin(&callout_lock);
45531950Snate
45631950Snate	return;
45731950Snate}
45831950Snate#endif /* APM_FIXUP_CALLTODO */
459