kern_timeout.c revision 128630
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 128630 2004-04-25 04:10:17Z hmp $");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/callout.h>
43#include <sys/condvar.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mutex.h>
47#include <sys/sysctl.h>
48
49static int avg_depth;
50SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
51    "Average number of items examined per softclock call. Units = 1/1000");
52static int avg_gcalls;
53SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
54    "Average number of Giant callouts made per softclock call. Units = 1/1000");
55static int avg_mpcalls;
56SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
57    "Average number of MP callouts made per softclock call. Units = 1/1000");
58/*
59 * TODO:
60 *	allocate more timeout table slots when table overflows.
61 */
62
63/* Exported to machdep.c and/or kern_clock.c.  */
64struct callout *callout;
65struct callout_list callfree;
66int callwheelsize, callwheelbits, callwheelmask;
67struct callout_tailq *callwheel;
68int softticks;			/* Like ticks, but for softclock(). */
69struct mtx callout_lock;
70#ifdef DIAGNOSTIC
71struct mtx dont_sleep_in_callout;
72#endif
73
74static struct callout *nextsoftcheck;	/* Next callout to be checked. */
75
76/*-
77 * Locked by callout_lock:
78 *   curr_callout    - If a callout is in progress, it is curr_callout.
79 *                     If curr_callout is non-NULL, threads waiting on
80 *                     callout_wait will be woken up as soon as the
81 *                     relevant callout completes.
82 *   wakeup_ctr      - Incremented every time a thread wants to wait
83 *                     for a callout to complete.  Modified only when
84 *                     curr_callout is non-NULL.
85 *   wakeup_needed   - If a thread is waiting on callout_wait, then
86 *                     wakeup_needed is nonzero.  Increased only when
87 *                     cutt_callout is non-NULL.
88 */
89static struct callout *curr_callout;
90static int wakeup_ctr;
91static int wakeup_needed;
92
93/*-
94 * Locked by callout_wait_lock:
95 *   callout_wait    - If wakeup_needed is set, callout_wait will be
96 *                     triggered after the current callout finishes.
97 *   wakeup_done_ctr - Set to the current value of wakeup_ctr after
98 *                     callout_wait is triggered.
99 */
100static struct mtx callout_wait_lock;
101static struct cv callout_wait;
102static int wakeup_done_ctr;
103
104/*
105 * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
106 *
107 *	This code is called very early in the kernel initialization sequence,
108 *	and may be called more then once.
109 */
110caddr_t
111kern_timeout_callwheel_alloc(caddr_t v)
112{
113	/*
114	 * Calculate callout wheel size
115	 */
116	for (callwheelsize = 1, callwheelbits = 0;
117	     callwheelsize < ncallout;
118	     callwheelsize <<= 1, ++callwheelbits)
119		;
120	callwheelmask = callwheelsize - 1;
121
122	callout = (struct callout *)v;
123	v = (caddr_t)(callout + ncallout);
124	callwheel = (struct callout_tailq *)v;
125	v = (caddr_t)(callwheel + callwheelsize);
126	return(v);
127}
128
129/*
130 * kern_timeout_callwheel_init() - initialize previously reserved callwheel
131 *				   space.
132 *
133 *	This code is called just once, after the space reserved for the
134 *	callout wheel has been finalized.
135 */
136void
137kern_timeout_callwheel_init(void)
138{
139	int i;
140
141	SLIST_INIT(&callfree);
142	for (i = 0; i < ncallout; i++) {
143		callout_init(&callout[i], 0);
144		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
145		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
146	}
147	for (i = 0; i < callwheelsize; i++) {
148		TAILQ_INIT(&callwheel[i]);
149	}
150	mtx_init(&callout_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
151#ifdef DIAGNOSTIC
152	mtx_init(&dont_sleep_in_callout, "dont_sleep_in_callout", NULL, MTX_DEF);
153#endif
154	mtx_init(&callout_wait_lock, "callout_wait_lock", NULL, MTX_DEF);
155	cv_init(&callout_wait, "callout_wait");
156}
157
158/*
159 * The callout mechanism is based on the work of Adam M. Costello and
160 * George Varghese, published in a technical report entitled "Redesigning
161 * the BSD Callout and Timer Facilities" and modified slightly for inclusion
162 * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
163 * used in this implementation was published by G. Varghese and T. Lauck in
164 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
165 * the Efficient Implementation of a Timer Facility" in the Proceedings of
166 * the 11th ACM Annual Symposium on Operating Systems Principles,
167 * Austin, Texas Nov 1987.
168 */
169
170/*
171 * Software (low priority) clock interrupt.
172 * Run periodic events from timeout queue.
173 */
174void
175softclock(void *dummy)
176{
177	struct callout *c;
178	struct callout_tailq *bucket;
179	int curticks;
180	int steps;	/* #steps since we last allowed interrupts */
181	int depth;
182	int mpcalls;
183	int gcalls;
184	int wakeup_cookie;
185#ifdef DIAGNOSTIC
186	struct bintime bt1, bt2;
187	struct timespec ts2;
188	static uint64_t maxdt = 36893488147419102LL;	/* 2 msec */
189	static timeout_t *lastfunc;
190#endif
191
192#ifndef MAX_SOFTCLOCK_STEPS
193#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
194#endif /* MAX_SOFTCLOCK_STEPS */
195
196	mpcalls = 0;
197	gcalls = 0;
198	depth = 0;
199	steps = 0;
200	mtx_lock_spin(&callout_lock);
201	while (softticks != ticks) {
202		softticks++;
203		/*
204		 * softticks may be modified by hard clock, so cache
205		 * it while we work on a given bucket.
206		 */
207		curticks = softticks;
208		bucket = &callwheel[curticks & callwheelmask];
209		c = TAILQ_FIRST(bucket);
210		while (c) {
211			depth++;
212			if (c->c_time != curticks) {
213				c = TAILQ_NEXT(c, c_links.tqe);
214				++steps;
215				if (steps >= MAX_SOFTCLOCK_STEPS) {
216					nextsoftcheck = c;
217					/* Give interrupts a chance. */
218					mtx_unlock_spin(&callout_lock);
219					;	/* nothing */
220					mtx_lock_spin(&callout_lock);
221					c = nextsoftcheck;
222					steps = 0;
223				}
224			} else {
225				void (*c_func)(void *);
226				void *c_arg;
227				int c_flags;
228
229				nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
230				TAILQ_REMOVE(bucket, c, c_links.tqe);
231				c_func = c->c_func;
232				c_arg = c->c_arg;
233				c_flags = c->c_flags;
234				c->c_func = NULL;
235				if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
236					c->c_flags = CALLOUT_LOCAL_ALLOC;
237					SLIST_INSERT_HEAD(&callfree, c,
238							  c_links.sle);
239				} else {
240					c->c_flags =
241					    (c->c_flags & ~CALLOUT_PENDING);
242				}
243				curr_callout = c;
244				mtx_unlock_spin(&callout_lock);
245				if (!(c_flags & CALLOUT_MPSAFE)) {
246					mtx_lock(&Giant);
247					gcalls++;
248				} else {
249					mpcalls++;
250				}
251#ifdef DIAGNOSTIC
252				binuptime(&bt1);
253				mtx_lock(&dont_sleep_in_callout);
254#endif
255				c_func(c_arg);
256#ifdef DIAGNOSTIC
257				mtx_unlock(&dont_sleep_in_callout);
258				binuptime(&bt2);
259				bintime_sub(&bt2, &bt1);
260				if (bt2.frac > maxdt) {
261					if (lastfunc != c_func ||
262					    bt2.frac > maxdt * 2) {
263						bintime2timespec(&bt2, &ts2);
264						printf(
265			"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
266						    c_func, c_arg,
267						    (intmax_t)ts2.tv_sec,
268						    ts2.tv_nsec);
269					}
270					maxdt = bt2.frac;
271					lastfunc = c_func;
272				}
273#endif
274				if (!(c_flags & CALLOUT_MPSAFE))
275					mtx_unlock(&Giant);
276				mtx_lock_spin(&callout_lock);
277				curr_callout = NULL;
278				if (wakeup_needed) {
279					/*
280					 * There might be someone waiting
281					 * for the callout to complete.
282					 */
283					wakeup_cookie = wakeup_ctr;
284					mtx_unlock_spin(&callout_lock);
285					mtx_lock(&callout_wait_lock);
286					cv_broadcast(&callout_wait);
287					wakeup_done_ctr = wakeup_cookie;
288					mtx_unlock(&callout_wait_lock);
289					mtx_lock_spin(&callout_lock);
290					wakeup_needed = 0;
291				}
292				steps = 0;
293				c = nextsoftcheck;
294			}
295		}
296	}
297	avg_depth += (depth * 1000 - avg_depth) >> 8;
298	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
299	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
300	nextsoftcheck = NULL;
301	mtx_unlock_spin(&callout_lock);
302}
303
304/*
305 * timeout --
306 *	Execute a function after a specified length of time.
307 *
308 * untimeout --
309 *	Cancel previous timeout function call.
310 *
311 * callout_handle_init --
312 *	Initialize a handle so that using it with untimeout is benign.
313 *
314 *	See AT&T BCI Driver Reference Manual for specification.  This
315 *	implementation differs from that one in that although an
316 *	identification value is returned from timeout, the original
317 *	arguments to timeout as well as the identifier are used to
318 *	identify entries for untimeout.
319 */
320struct callout_handle
321timeout(ftn, arg, to_ticks)
322	timeout_t *ftn;
323	void *arg;
324	int to_ticks;
325{
326	struct callout *new;
327	struct callout_handle handle;
328
329	mtx_lock_spin(&callout_lock);
330
331	/* Fill in the next free callout structure. */
332	new = SLIST_FIRST(&callfree);
333	if (new == NULL)
334		/* XXX Attempt to malloc first */
335		panic("timeout table full");
336	SLIST_REMOVE_HEAD(&callfree, c_links.sle);
337
338	callout_reset(new, to_ticks, ftn, arg);
339
340	handle.callout = new;
341	mtx_unlock_spin(&callout_lock);
342	return (handle);
343}
344
345void
346untimeout(ftn, arg, handle)
347	timeout_t *ftn;
348	void *arg;
349	struct callout_handle handle;
350{
351
352	/*
353	 * Check for a handle that was initialized
354	 * by callout_handle_init, but never used
355	 * for a real timeout.
356	 */
357	if (handle.callout == NULL)
358		return;
359
360	mtx_lock_spin(&callout_lock);
361	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
362		callout_stop(handle.callout);
363	mtx_unlock_spin(&callout_lock);
364}
365
366void
367callout_handle_init(struct callout_handle *handle)
368{
369	handle->callout = NULL;
370}
371
372/*
373 * New interface; clients allocate their own callout structures.
374 *
375 * callout_reset() - establish or change a timeout
376 * callout_stop() - disestablish a timeout
377 * callout_init() - initialize a callout structure so that it can
378 *	safely be passed to callout_reset() and callout_stop()
379 *
380 * <sys/callout.h> defines three convenience macros:
381 *
382 * callout_active() - returns truth if callout has not been serviced
383 * callout_pending() - returns truth if callout is still waiting for timeout
384 * callout_deactivate() - marks the callout as having been serviced
385 */
386void
387callout_reset(c, to_ticks, ftn, arg)
388	struct	callout *c;
389	int	to_ticks;
390	void	(*ftn)(void *);
391	void	*arg;
392{
393
394	mtx_lock_spin(&callout_lock);
395	if (c == curr_callout && wakeup_needed) {
396		/*
397		 * We're being asked to reschedule a callout which is
398		 * currently in progress, and someone has called
399		 * callout_drain to kill that callout.  Don't reschedule.
400		 */
401		mtx_unlock_spin(&callout_lock);
402		return;
403	}
404	if (c->c_flags & CALLOUT_PENDING)
405		callout_stop(c);
406
407	/*
408	 * We could unlock callout_lock here and lock it again before the
409	 * TAILQ_INSERT_TAIL, but there's no point since doing this setup
410	 * doesn't take much time.
411	 */
412	if (to_ticks <= 0)
413		to_ticks = 1;
414
415	c->c_arg = arg;
416	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
417	c->c_func = ftn;
418	c->c_time = ticks + to_ticks;
419	TAILQ_INSERT_TAIL(&callwheel[c->c_time & callwheelmask],
420			  c, c_links.tqe);
421	mtx_unlock_spin(&callout_lock);
422}
423
424int
425_callout_stop_safe(c, safe)
426	struct	callout *c;
427	int	safe;
428{
429	int wakeup_cookie;
430
431	mtx_lock_spin(&callout_lock);
432	/*
433	 * Don't attempt to delete a callout that's not on the queue.
434	 */
435	if (!(c->c_flags & CALLOUT_PENDING)) {
436		c->c_flags &= ~CALLOUT_ACTIVE;
437		if (c == curr_callout && safe) {
438			/* We need to wait until the callout is finished. */
439			wakeup_needed = 1;
440			wakeup_cookie = wakeup_ctr++;
441			mtx_unlock_spin(&callout_lock);
442			mtx_lock(&callout_wait_lock);
443
444			/*
445			 * Check to make sure that softclock() didn't
446			 * do the wakeup in between our dropping
447			 * callout_lock and picking up callout_wait_lock
448			 */
449			if (wakeup_cookie - wakeup_done_ctr > 0)
450				cv_wait(&callout_wait, &callout_wait_lock);
451
452			mtx_unlock(&callout_wait_lock);
453		} else
454			mtx_unlock_spin(&callout_lock);
455		return (0);
456	}
457	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
458
459	if (nextsoftcheck == c) {
460		nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
461	}
462	TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_links.tqe);
463	c->c_func = NULL;
464
465	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
466		SLIST_INSERT_HEAD(&callfree, c, c_links.sle);
467	}
468	mtx_unlock_spin(&callout_lock);
469	return (1);
470}
471
472void
473callout_init(c, mpsafe)
474	struct	callout *c;
475	int mpsafe;
476{
477	bzero(c, sizeof *c);
478	if (mpsafe)
479		c->c_flags |= CALLOUT_MPSAFE;
480}
481
482#ifdef APM_FIXUP_CALLTODO
483/*
484 * Adjust the kernel calltodo timeout list.  This routine is used after
485 * an APM resume to recalculate the calltodo timer list values with the
486 * number of hz's we have been sleeping.  The next hardclock() will detect
487 * that there are fired timers and run softclock() to execute them.
488 *
489 * Please note, I have not done an exhaustive analysis of what code this
490 * might break.  I am motivated to have my select()'s and alarm()'s that
491 * have expired during suspend firing upon resume so that the applications
492 * which set the timer can do the maintanence the timer was for as close
493 * as possible to the originally intended time.  Testing this code for a
494 * week showed that resuming from a suspend resulted in 22 to 25 timers
495 * firing, which seemed independant on whether the suspend was 2 hours or
496 * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
497 */
498void
499adjust_timeout_calltodo(time_change)
500    struct timeval *time_change;
501{
502	register struct callout *p;
503	unsigned long delta_ticks;
504
505	/*
506	 * How many ticks were we asleep?
507	 * (stolen from tvtohz()).
508	 */
509
510	/* Don't do anything */
511	if (time_change->tv_sec < 0)
512		return;
513	else if (time_change->tv_sec <= LONG_MAX / 1000000)
514		delta_ticks = (time_change->tv_sec * 1000000 +
515			       time_change->tv_usec + (tick - 1)) / tick + 1;
516	else if (time_change->tv_sec <= LONG_MAX / hz)
517		delta_ticks = time_change->tv_sec * hz +
518			      (time_change->tv_usec + (tick - 1)) / tick + 1;
519	else
520		delta_ticks = LONG_MAX;
521
522	if (delta_ticks > INT_MAX)
523		delta_ticks = INT_MAX;
524
525	/*
526	 * Now rip through the timer calltodo list looking for timers
527	 * to expire.
528	 */
529
530	/* don't collide with softclock() */
531	mtx_lock_spin(&callout_lock);
532	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
533		p->c_time -= delta_ticks;
534
535		/* Break if the timer had more time on it than delta_ticks */
536		if (p->c_time > 0)
537			break;
538
539		/* take back the ticks the timer didn't use (p->c_time <= 0) */
540		delta_ticks = -p->c_time;
541	}
542	mtx_unlock_spin(&callout_lock);
543
544	return;
545}
546#endif /* APM_FIXUP_CALLTODO */
547