kern_timeout.c revision 140489
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD: head/sys/kern/kern_timeout.c 140489 2005-01-19 20:34:46Z cperciva $");
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/callout.h>
43#include <sys/condvar.h>
44#include <sys/kernel.h>
45#include <sys/ktr.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/sysctl.h>
49
50static int avg_depth;
51SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
52    "Average number of items examined per softclock call. Units = 1/1000");
53static int avg_gcalls;
54SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
55    "Average number of Giant callouts made per softclock call. Units = 1/1000");
56static int avg_mpcalls;
57SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
58    "Average number of MP callouts made per softclock call. Units = 1/1000");
59/*
60 * TODO:
61 *	allocate more timeout table slots when table overflows.
62 */
63
64/* Exported to machdep.c and/or kern_clock.c.  */
65struct callout *callout;
66struct callout_list callfree;
67int callwheelsize, callwheelbits, callwheelmask;
68struct callout_tailq *callwheel;
69int softticks;			/* Like ticks, but for softclock(). */
70struct mtx callout_lock;
71#ifdef DIAGNOSTIC
72struct mtx dont_sleep_in_callout;
73#endif
74
75static struct callout *nextsoftcheck;	/* Next callout to be checked. */
76
77/**
78 * Locked by callout_lock:
79 *   curr_callout    - If a callout is in progress, it is curr_callout.
80 *                     If curr_callout is non-NULL, threads waiting on
81 *                     callout_wait will be woken up as soon as the
82 *                     relevant callout completes.
83 *   wakeup_ctr      - Incremented every time a thread wants to wait
84 *                     for a callout to complete.  Modified only when
85 *                     curr_callout is non-NULL.
86 *   wakeup_needed   - If a thread is waiting on callout_wait, then
87 *                     wakeup_needed is nonzero.  Increased only when
88 *                     cutt_callout is non-NULL.
89 */
90static struct callout *curr_callout;
91static int wakeup_ctr;
92static int wakeup_needed;
93
94/**
95 * Locked by callout_wait_lock:
96 *   callout_wait    - If wakeup_needed is set, callout_wait will be
97 *                     triggered after the current callout finishes.
98 *   wakeup_done_ctr - Set to the current value of wakeup_ctr after
99 *                     callout_wait is triggered.
100 */
101static struct mtx callout_wait_lock;
102static struct cv callout_wait;
103static int wakeup_done_ctr;
104
105/*
106 * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
107 *
108 *	This code is called very early in the kernel initialization sequence,
109 *	and may be called more then once.
110 */
111caddr_t
112kern_timeout_callwheel_alloc(caddr_t v)
113{
114	/*
115	 * Calculate callout wheel size
116	 */
117	for (callwheelsize = 1, callwheelbits = 0;
118	     callwheelsize < ncallout;
119	     callwheelsize <<= 1, ++callwheelbits)
120		;
121	callwheelmask = callwheelsize - 1;
122
123	callout = (struct callout *)v;
124	v = (caddr_t)(callout + ncallout);
125	callwheel = (struct callout_tailq *)v;
126	v = (caddr_t)(callwheel + callwheelsize);
127	return(v);
128}
129
130/*
131 * kern_timeout_callwheel_init() - initialize previously reserved callwheel
132 *				   space.
133 *
134 *	This code is called just once, after the space reserved for the
135 *	callout wheel has been finalized.
136 */
137void
138kern_timeout_callwheel_init(void)
139{
140	int i;
141
142	SLIST_INIT(&callfree);
143	for (i = 0; i < ncallout; i++) {
144		callout_init(&callout[i], 0);
145		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
146		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
147	}
148	for (i = 0; i < callwheelsize; i++) {
149		TAILQ_INIT(&callwheel[i]);
150	}
151	mtx_init(&callout_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
152#ifdef DIAGNOSTIC
153	mtx_init(&dont_sleep_in_callout, "dont_sleep_in_callout", NULL, MTX_DEF);
154#endif
155	mtx_init(&callout_wait_lock, "callout_wait_lock", NULL, MTX_DEF);
156	cv_init(&callout_wait, "callout_wait");
157}
158
159/*
160 * The callout mechanism is based on the work of Adam M. Costello and
161 * George Varghese, published in a technical report entitled "Redesigning
162 * the BSD Callout and Timer Facilities" and modified slightly for inclusion
163 * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
164 * used in this implementation was published by G. Varghese and T. Lauck in
165 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
166 * the Efficient Implementation of a Timer Facility" in the Proceedings of
167 * the 11th ACM Annual Symposium on Operating Systems Principles,
168 * Austin, Texas Nov 1987.
169 */
170
171/*
172 * Software (low priority) clock interrupt.
173 * Run periodic events from timeout queue.
174 */
175void
176softclock(void *dummy)
177{
178	struct callout *c;
179	struct callout_tailq *bucket;
180	int curticks;
181	int steps;	/* #steps since we last allowed interrupts */
182	int depth;
183	int mpcalls;
184	int gcalls;
185	int wakeup_cookie;
186#ifdef DIAGNOSTIC
187	struct bintime bt1, bt2;
188	struct timespec ts2;
189	static uint64_t maxdt = 36893488147419102LL;	/* 2 msec */
190	static timeout_t *lastfunc;
191#endif
192
193#ifndef MAX_SOFTCLOCK_STEPS
194#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
195#endif /* MAX_SOFTCLOCK_STEPS */
196
197	mpcalls = 0;
198	gcalls = 0;
199	depth = 0;
200	steps = 0;
201	mtx_lock_spin(&callout_lock);
202	while (softticks != ticks) {
203		softticks++;
204		/*
205		 * softticks may be modified by hard clock, so cache
206		 * it while we work on a given bucket.
207		 */
208		curticks = softticks;
209		bucket = &callwheel[curticks & callwheelmask];
210		c = TAILQ_FIRST(bucket);
211		while (c) {
212			depth++;
213			if (c->c_time != curticks) {
214				c = TAILQ_NEXT(c, c_links.tqe);
215				++steps;
216				if (steps >= MAX_SOFTCLOCK_STEPS) {
217					nextsoftcheck = c;
218					/* Give interrupts a chance. */
219					mtx_unlock_spin(&callout_lock);
220					;	/* nothing */
221					mtx_lock_spin(&callout_lock);
222					c = nextsoftcheck;
223					steps = 0;
224				}
225			} else {
226				void (*c_func)(void *);
227				void *c_arg;
228				int c_flags;
229
230				nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
231				TAILQ_REMOVE(bucket, c, c_links.tqe);
232				c_func = c->c_func;
233				c_arg = c->c_arg;
234				c_flags = c->c_flags;
235				if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
236					c->c_func = NULL;
237					c->c_flags = CALLOUT_LOCAL_ALLOC;
238					SLIST_INSERT_HEAD(&callfree, c,
239							  c_links.sle);
240				} else {
241					c->c_flags =
242					    (c->c_flags & ~CALLOUT_PENDING);
243				}
244				curr_callout = c;
245				mtx_unlock_spin(&callout_lock);
246				if (!(c_flags & CALLOUT_MPSAFE)) {
247					mtx_lock(&Giant);
248					gcalls++;
249					CTR1(KTR_CALLOUT, "callout %p", c_func);
250				} else {
251					mpcalls++;
252					CTR1(KTR_CALLOUT, "callout mpsafe %p",
253					    c_func);
254				}
255#ifdef DIAGNOSTIC
256				binuptime(&bt1);
257				mtx_lock(&dont_sleep_in_callout);
258#endif
259				c_func(c_arg);
260#ifdef DIAGNOSTIC
261				mtx_unlock(&dont_sleep_in_callout);
262				binuptime(&bt2);
263				bintime_sub(&bt2, &bt1);
264				if (bt2.frac > maxdt) {
265					if (lastfunc != c_func ||
266					    bt2.frac > maxdt * 2) {
267						bintime2timespec(&bt2, &ts2);
268						printf(
269			"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
270						    c_func, c_arg,
271						    (intmax_t)ts2.tv_sec,
272						    ts2.tv_nsec);
273					}
274					maxdt = bt2.frac;
275					lastfunc = c_func;
276				}
277#endif
278				if (!(c_flags & CALLOUT_MPSAFE))
279					mtx_unlock(&Giant);
280				mtx_lock_spin(&callout_lock);
281				curr_callout = NULL;
282				if (wakeup_needed) {
283					/*
284					 * There might be someone waiting
285					 * for the callout to complete.
286					 */
287					wakeup_cookie = wakeup_ctr;
288					mtx_unlock_spin(&callout_lock);
289					mtx_lock(&callout_wait_lock);
290					cv_broadcast(&callout_wait);
291					wakeup_done_ctr = wakeup_cookie;
292					mtx_unlock(&callout_wait_lock);
293					mtx_lock_spin(&callout_lock);
294					wakeup_needed = 0;
295				}
296				steps = 0;
297				c = nextsoftcheck;
298			}
299		}
300	}
301	avg_depth += (depth * 1000 - avg_depth) >> 8;
302	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
303	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
304	nextsoftcheck = NULL;
305	mtx_unlock_spin(&callout_lock);
306}
307
308/*
309 * timeout --
310 *	Execute a function after a specified length of time.
311 *
312 * untimeout --
313 *	Cancel previous timeout function call.
314 *
315 * callout_handle_init --
316 *	Initialize a handle so that using it with untimeout is benign.
317 *
318 *	See AT&T BCI Driver Reference Manual for specification.  This
319 *	implementation differs from that one in that although an
320 *	identification value is returned from timeout, the original
321 *	arguments to timeout as well as the identifier are used to
322 *	identify entries for untimeout.
323 */
324struct callout_handle
325timeout(ftn, arg, to_ticks)
326	timeout_t *ftn;
327	void *arg;
328	int to_ticks;
329{
330	struct callout *new;
331	struct callout_handle handle;
332
333	mtx_lock_spin(&callout_lock);
334
335	/* Fill in the next free callout structure. */
336	new = SLIST_FIRST(&callfree);
337	if (new == NULL)
338		/* XXX Attempt to malloc first */
339		panic("timeout table full");
340	SLIST_REMOVE_HEAD(&callfree, c_links.sle);
341
342	callout_reset(new, to_ticks, ftn, arg);
343
344	handle.callout = new;
345	mtx_unlock_spin(&callout_lock);
346	return (handle);
347}
348
349void
350untimeout(ftn, arg, handle)
351	timeout_t *ftn;
352	void *arg;
353	struct callout_handle handle;
354{
355
356	/*
357	 * Check for a handle that was initialized
358	 * by callout_handle_init, but never used
359	 * for a real timeout.
360	 */
361	if (handle.callout == NULL)
362		return;
363
364	mtx_lock_spin(&callout_lock);
365	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
366		callout_stop(handle.callout);
367	mtx_unlock_spin(&callout_lock);
368}
369
370void
371callout_handle_init(struct callout_handle *handle)
372{
373	handle->callout = NULL;
374}
375
376/*
377 * New interface; clients allocate their own callout structures.
378 *
379 * callout_reset() - establish or change a timeout
380 * callout_stop() - disestablish a timeout
381 * callout_init() - initialize a callout structure so that it can
382 *	safely be passed to callout_reset() and callout_stop()
383 *
384 * <sys/callout.h> defines three convenience macros:
385 *
386 * callout_active() - returns truth if callout has not been stopped,
387 *	drained, or deactivated since the last time the callout was
388 *	reset.
389 * callout_pending() - returns truth if callout is still waiting for timeout
390 * callout_deactivate() - marks the callout as having been serviced
391 */
392void
393callout_reset(c, to_ticks, ftn, arg)
394	struct	callout *c;
395	int	to_ticks;
396	void	(*ftn)(void *);
397	void	*arg;
398{
399
400	mtx_lock_spin(&callout_lock);
401	if (c == curr_callout && wakeup_needed) {
402		/*
403		 * We're being asked to reschedule a callout which is
404		 * currently in progress, and someone has called
405		 * callout_drain to kill that callout.  Don't reschedule.
406		 */
407		mtx_unlock_spin(&callout_lock);
408		return;
409	}
410	if (c->c_flags & CALLOUT_PENDING) {
411		if (nextsoftcheck == c) {
412			nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
413		}
414		TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c,
415		    c_links.tqe);
416
417		/*
418		 * Part of the normal "stop a pending callout" process
419		 * is to clear the CALLOUT_ACTIVE and CALLOUT_PENDING
420		 * flags.  We're not going to bother doing that here,
421		 * because we're going to be setting those flags ten lines
422		 * after this point, and we're holding callout_lock
423		 * between now and then.
424		 */
425	}
426
427	/*
428	 * We could unlock callout_lock here and lock it again before the
429	 * TAILQ_INSERT_TAIL, but there's no point since doing this setup
430	 * doesn't take much time.
431	 */
432	if (to_ticks <= 0)
433		to_ticks = 1;
434
435	c->c_arg = arg;
436	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
437	c->c_func = ftn;
438	c->c_time = ticks + to_ticks;
439	TAILQ_INSERT_TAIL(&callwheel[c->c_time & callwheelmask],
440			  c, c_links.tqe);
441	mtx_unlock_spin(&callout_lock);
442}
443
444int
445_callout_stop_safe(c, safe)
446	struct	callout *c;
447	int	safe;
448{
449	int wakeup_cookie;
450
451	mtx_lock_spin(&callout_lock);
452	/*
453	 * Don't attempt to delete a callout that's not on the queue.
454	 */
455	if (!(c->c_flags & CALLOUT_PENDING)) {
456		c->c_flags &= ~CALLOUT_ACTIVE;
457		if (c == curr_callout && safe) {
458			/* We need to wait until the callout is finished. */
459			wakeup_needed = 1;
460			wakeup_cookie = wakeup_ctr++;
461			mtx_unlock_spin(&callout_lock);
462			mtx_lock(&callout_wait_lock);
463
464			/*
465			 * Check to make sure that softclock() didn't
466			 * do the wakeup in between our dropping
467			 * callout_lock and picking up callout_wait_lock
468			 */
469			if (wakeup_cookie - wakeup_done_ctr > 0)
470				cv_wait(&callout_wait, &callout_wait_lock);
471
472			mtx_unlock(&callout_wait_lock);
473		} else
474			mtx_unlock_spin(&callout_lock);
475		return (0);
476	}
477	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
478
479	if (nextsoftcheck == c) {
480		nextsoftcheck = TAILQ_NEXT(c, c_links.tqe);
481	}
482	TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_links.tqe);
483	c->c_func = NULL;
484
485	if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
486		SLIST_INSERT_HEAD(&callfree, c, c_links.sle);
487	}
488	mtx_unlock_spin(&callout_lock);
489	return (1);
490}
491
492void
493callout_init(c, mpsafe)
494	struct	callout *c;
495	int mpsafe;
496{
497	bzero(c, sizeof *c);
498	if (mpsafe)
499		c->c_flags |= CALLOUT_MPSAFE;
500}
501
502#ifdef APM_FIXUP_CALLTODO
503/*
504 * Adjust the kernel calltodo timeout list.  This routine is used after
505 * an APM resume to recalculate the calltodo timer list values with the
506 * number of hz's we have been sleeping.  The next hardclock() will detect
507 * that there are fired timers and run softclock() to execute them.
508 *
509 * Please note, I have not done an exhaustive analysis of what code this
510 * might break.  I am motivated to have my select()'s and alarm()'s that
511 * have expired during suspend firing upon resume so that the applications
512 * which set the timer can do the maintanence the timer was for as close
513 * as possible to the originally intended time.  Testing this code for a
514 * week showed that resuming from a suspend resulted in 22 to 25 timers
515 * firing, which seemed independant on whether the suspend was 2 hours or
516 * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
517 */
518void
519adjust_timeout_calltodo(time_change)
520    struct timeval *time_change;
521{
522	register struct callout *p;
523	unsigned long delta_ticks;
524
525	/*
526	 * How many ticks were we asleep?
527	 * (stolen from tvtohz()).
528	 */
529
530	/* Don't do anything */
531	if (time_change->tv_sec < 0)
532		return;
533	else if (time_change->tv_sec <= LONG_MAX / 1000000)
534		delta_ticks = (time_change->tv_sec * 1000000 +
535			       time_change->tv_usec + (tick - 1)) / tick + 1;
536	else if (time_change->tv_sec <= LONG_MAX / hz)
537		delta_ticks = time_change->tv_sec * hz +
538			      (time_change->tv_usec + (tick - 1)) / tick + 1;
539	else
540		delta_ticks = LONG_MAX;
541
542	if (delta_ticks > INT_MAX)
543		delta_ticks = INT_MAX;
544
545	/*
546	 * Now rip through the timer calltodo list looking for timers
547	 * to expire.
548	 */
549
550	/* don't collide with softclock() */
551	mtx_lock_spin(&callout_lock);
552	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
553		p->c_time -= delta_ticks;
554
555		/* Break if the timer had more time on it than delta_ticks */
556		if (p->c_time > 0)
557			break;
558
559		/* take back the ticks the timer didn't use (p->c_time <= 0) */
560		delta_ticks = -p->c_time;
561	}
562	mtx_unlock_spin(&callout_lock);
563
564	return;
565}
566#endif /* APM_FIXUP_CALLTODO */
567