callout.c revision 3642:9280dc401622
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/callo.h>
29#include <sys/param.h>
30#include <sys/types.h>
31#include <sys/systm.h>
32#include <sys/cpuvar.h>
33#include <sys/thread.h>
34#include <sys/kmem.h>
35#include <sys/cmn_err.h>
36#include <sys/callb.h>
37#include <sys/debug.h>
38#include <sys/vtrace.h>
39#include <sys/sysmacros.h>
40#include <sys/sdt.h>
41
42/*
43 * Callout tables.  See timeout(9F) for details.
44 */
45static int cpr_stop_callout;
46static int callout_fanout;
47static int ncallout;
48static callout_table_t *callout_table[CALLOUT_TABLES];
49
50#define	CALLOUT_HASH_INSERT(cthead, cp, cnext, cprev)	\
51{							\
52	callout_t **headpp = &cthead;			\
53	callout_t *headp = *headpp;			\
54	cp->cnext = headp;				\
55	cp->cprev = NULL;				\
56	if (headp != NULL)				\
57		headp->cprev = cp;			\
58	*headpp = cp;					\
59}
60
61#define	CALLOUT_HASH_DELETE(cthead, cp, cnext, cprev)	\
62{							\
63	callout_t *nextp = cp->cnext;			\
64	callout_t *prevp = cp->cprev;			\
65	if (nextp != NULL)				\
66		nextp->cprev = prevp;			\
67	if (prevp != NULL)				\
68		prevp->cnext = nextp;			\
69	else						\
70		cthead = nextp;				\
71}
72
73#define	CALLOUT_HASH_UPDATE(INSDEL, ct, cp, id, runtime)		\
74	ASSERT(MUTEX_HELD(&ct->ct_lock));				\
75	ASSERT(cp->c_xid == id && cp->c_runtime == runtime);		\
76	CALLOUT_HASH_##INSDEL(ct->ct_idhash[CALLOUT_IDHASH(id)],	\
77	cp, c_idnext, c_idprev)						\
78	CALLOUT_HASH_##INSDEL(ct->ct_lbhash[CALLOUT_LBHASH(runtime)],	\
79	cp, c_lbnext, c_lbprev)
80
81#define	CALLOUT_HRES_INSERT(ct, cp, cnext, cprev, hresms)		\
82{									\
83	callout_t *nextp = ct->ct_hresq;				\
84	callout_t *prevp;						\
85									\
86	if (nextp == NULL || hresms <= nextp->c_hresms) {		\
87		cp->cnext = ct->ct_hresq;				\
88		ct->ct_hresq = cp;					\
89		cp->cprev = NULL;					\
90		if (cp->cnext != NULL)					\
91			cp->cnext->cprev = cp;				\
92	} else {							\
93		do {							\
94			prevp = nextp;					\
95			nextp = nextp->cnext;				\
96		} while (nextp != NULL && hresms > nextp->c_hresms);	\
97		prevp->cnext = cp;					\
98		cp->cprev = prevp;					\
99		cp->cnext = nextp;					\
100		if (nextp != NULL) 					\
101			nextp->cprev = cp;				\
102	}								\
103}
104
105#define	CALLOUT_HRES_DELETE(ct, cp, cnext, cprev, hresms)	\
106{								\
107	if (cp == ct->ct_hresq) {				\
108		ct->ct_hresq = cp->cnext;			\
109		if (cp->cnext != NULL)				\
110			cp->cnext->cprev = NULL;		\
111	} else {						\
112		cp->cprev->cnext = cp->cnext;			\
113		if (cp->cnext != NULL)				\
114			cp->cnext->cprev = cp->cprev;		\
115	}							\
116}
117
118#define	CALLOUT_HRES_UPDATE(INSDEL, ct, cp, id, hresms)		\
119	ASSERT(MUTEX_HELD(&ct->ct_lock));			\
120	ASSERT(cp->c_xid == id);				\
121	CALLOUT_HRES_##INSDEL(ct, cp, c_hrnext,			\
122	c_hrprev, hresms)
123
124/*
125 * Allocate a callout structure.  We try quite hard because we
126 * can't sleep, and if we can't do the allocation, we're toast.
127 * Failing all, we try a KM_PANIC allocation.
128 */
129static callout_t *
130callout_alloc(callout_table_t *ct)
131{
132	size_t size = 0;
133	callout_t *cp = NULL;
134
135	mutex_exit(&ct->ct_lock);
136	cp = kmem_alloc_tryhard(sizeof (callout_t), &size,
137	    KM_NOSLEEP | KM_PANIC);
138	bzero(cp, sizeof (callout_t));
139	ncallout++;
140	mutex_enter(&ct->ct_lock);
141	return (cp);
142}
143
144/*
145 * Arrange that func(arg) be called after delta clock ticks.
146 */
147static timeout_id_t
148timeout_common(void (*func)(void *), void *arg, clock_t delta,
149    callout_table_t *ct)
150{
151	callout_t	*cp;
152	callout_id_t	id;
153	clock_t		runtime;
154	timestruc_t	now;
155	int64_t		hresms;
156
157	gethrestime(&now);
158
159	mutex_enter(&ct->ct_lock);
160
161	if ((cp = ct->ct_freelist) == NULL)
162		cp = callout_alloc(ct);
163	else
164		ct->ct_freelist = cp->c_idnext;
165
166	cp->c_func = func;
167	cp->c_arg = arg;
168
169	/*
170	 * Make sure the callout runs at least 1 tick in the future.
171	 */
172	if (delta <= 0)
173		delta = 1;
174	cp->c_runtime = runtime = lbolt + delta;
175
176	/* Calculate the future time in milli-second */
177	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC +
178	    TICK_TO_MSEC(delta);
179	cp->c_hresms = hresms;
180
181	/*
182	 * Assign an ID to this callout
183	 */
184	if (delta > CALLOUT_LONGTERM_TICKS)
185		ct->ct_long_id = id = (ct->ct_long_id - CALLOUT_COUNTER_LOW) |
186		    CALLOUT_COUNTER_HIGH;
187	else
188		ct->ct_short_id = id = (ct->ct_short_id - CALLOUT_COUNTER_LOW) |
189		    CALLOUT_COUNTER_HIGH;
190
191	cp->c_xid = id;
192
193	CALLOUT_HASH_UPDATE(INSERT, ct, cp, id, runtime);
194	CALLOUT_HRES_UPDATE(INSERT, ct, cp, id, hresms);
195
196	mutex_exit(&ct->ct_lock);
197
198	TRACE_4(TR_FAC_CALLOUT, TR_TIMEOUT,
199		"timeout:%K(%p) in %ld ticks, cp %p",
200		func, arg, delta, cp);
201
202	return ((timeout_id_t)id);
203}
204
205timeout_id_t
206timeout(void (*func)(void *), void *arg, clock_t delta)
207{
208	return (timeout_common(func, arg, delta,
209	    callout_table[CALLOUT_TABLE(CALLOUT_NORMAL, CPU->cpu_seqid)]));
210
211}
212
213timeout_id_t
214realtime_timeout(void (*func)(void *), void *arg, clock_t delta)
215{
216	return (timeout_common(func, arg, delta,
217	    callout_table[CALLOUT_TABLE(CALLOUT_REALTIME, CPU->cpu_seqid)]));
218}
219
220clock_t
221untimeout(timeout_id_t id_arg)
222{
223	callout_id_t id = (callout_id_t)id_arg;
224	callout_table_t *ct;
225	callout_t *cp;
226	callout_id_t xid;
227
228	ct = callout_table[id & CALLOUT_TABLE_MASK];
229
230	mutex_enter(&ct->ct_lock);
231
232	for (cp = ct->ct_idhash[CALLOUT_IDHASH(id)]; cp; cp = cp->c_idnext) {
233
234		if ((xid = cp->c_xid) == id) {
235			clock_t runtime = cp->c_runtime;
236			clock_t time_left = runtime - lbolt;
237
238			CALLOUT_HASH_UPDATE(DELETE, ct, cp, id, runtime);
239			CALLOUT_HRES_UPDATE(DELETE, ct, cp, id, 0);
240			cp->c_idnext = ct->ct_freelist;
241			ct->ct_freelist = cp;
242			mutex_exit(&ct->ct_lock);
243			TRACE_2(TR_FAC_CALLOUT, TR_UNTIMEOUT,
244			    "untimeout:ID %lx ticks_left %ld", id, time_left);
245			return (time_left < 0 ? 0 : time_left);
246		}
247
248		if (xid != (id | CALLOUT_EXECUTING))
249			continue;
250
251		/*
252		 * The callout we want to delete is currently executing.
253		 * The DDI states that we must wait until the callout
254		 * completes before returning, so we block on c_done until
255		 * the callout ID changes (to zero if it's on the freelist,
256		 * or to a new callout ID if it's in use).  This implicitly
257		 * assumes that callout structures are persistent (they are).
258		 */
259		if (cp->c_executor == curthread) {
260			/*
261			 * The timeout handler called untimeout() on itself.
262			 * Stupid, but legal.  We can't wait for the timeout
263			 * to complete without deadlocking, so we just return.
264			 */
265			mutex_exit(&ct->ct_lock);
266			TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_SELF,
267			    "untimeout_self:ID %x", id);
268			return (-1);
269		}
270		while (cp->c_xid == xid)
271			cv_wait(&cp->c_done, &ct->ct_lock);
272		mutex_exit(&ct->ct_lock);
273		TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_EXECUTING,
274		    "untimeout_executing:ID %lx", id);
275		return (-1);
276	}
277
278	mutex_exit(&ct->ct_lock);
279	TRACE_1(TR_FAC_CALLOUT, TR_UNTIMEOUT_BOGUS_ID,
280	    "untimeout_bogus_id:ID %lx", id);
281
282	/*
283	 * We didn't find the specified callout ID.  This means either
284	 * (1) the callout already fired, or (2) the caller passed us
285	 * a bogus value.  Perform a sanity check to detect case (2).
286	 */
287	if (id != 0 && (id & (CALLOUT_COUNTER_HIGH | CALLOUT_EXECUTING)) !=
288	    CALLOUT_COUNTER_HIGH)
289		panic("untimeout: impossible timeout id %lx", id);
290
291	return (-1);
292}
293
294/*
295 * Do the actual work of executing callouts.  This routine is called either
296 * by a taskq_thread (normal case), or by softcall (realtime case).
297 */
298static void
299callout_execute(callout_table_t *ct)
300{
301	callout_t	*cp;
302	callout_id_t	xid;
303	clock_t		runtime;
304	timestruc_t	now;
305	int64_t		hresms;
306
307	mutex_enter(&ct->ct_lock);
308
309	while (((runtime = ct->ct_runtime) - ct->ct_curtime) <= 0) {
310		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
311		    cp != NULL; cp = cp->c_lbnext) {
312			xid = cp->c_xid;
313			if (cp->c_runtime != runtime ||
314			    (xid & CALLOUT_EXECUTING))
315				continue;
316			cp->c_executor = curthread;
317			cp->c_xid = xid |= CALLOUT_EXECUTING;
318			mutex_exit(&ct->ct_lock);
319			DTRACE_PROBE1(callout__start, callout_t *, cp);
320			(*cp->c_func)(cp->c_arg);
321			DTRACE_PROBE1(callout__end, callout_t *, cp);
322			mutex_enter(&ct->ct_lock);
323
324			/*
325			 * Delete callout from both the hash tables and the
326			 * hres queue, return it to freelist, and tell anyone
327			 * who cares that we're done.
328			 * Even though we dropped and reacquired ct->ct_lock,
329			 * it's OK to pick up where we left off because only
330			 * newly-created timeouts can precede cp on ct_lbhash,
331			 * and those timeouts cannot be due on this tick.
332			 */
333			CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
334			CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
335			cp->c_idnext = ct->ct_freelist;
336			ct->ct_freelist = cp;
337			cp->c_xid = 0;	/* Indicate completion for c_done */
338			cv_broadcast(&cp->c_done);
339		}
340		/*
341		 * We have completed all callouts that were scheduled to
342		 * run at "runtime".  If the global run time still matches
343		 * our local copy, then we advance the global run time;
344		 * otherwise, another callout thread must have already done so.
345		 */
346		if (ct->ct_runtime == runtime)
347			ct->ct_runtime = runtime + 1;
348	}
349
350	gethrestime(&now);
351
352	/* Calculate the current time in milli-second */
353	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
354
355	cp = ct->ct_hresq;
356	while (cp != NULL && hresms >= cp->c_hresms) {
357		xid = cp->c_xid;
358		if (xid & CALLOUT_EXECUTING) {
359			cp = cp->c_hrnext;
360			continue;
361		}
362		cp->c_executor = curthread;
363		cp->c_xid = xid |= CALLOUT_EXECUTING;
364		runtime = cp->c_runtime;
365		mutex_exit(&ct->ct_lock);
366		DTRACE_PROBE1(callout__start, callout_t *, cp);
367		(*cp->c_func)(cp->c_arg);
368		DTRACE_PROBE1(callout__end, callout_t *, cp);
369		mutex_enter(&ct->ct_lock);
370
371		/*
372		 * See comments above.
373		 */
374		CALLOUT_HASH_UPDATE(DELETE, ct, cp, xid, runtime);
375		CALLOUT_HRES_UPDATE(DELETE, ct, cp, xid, hresms);
376		cp->c_idnext = ct->ct_freelist;
377		ct->ct_freelist = cp;
378		cp->c_xid = 0;	/* Indicate completion for c_done */
379		cv_broadcast(&cp->c_done);
380
381		/*
382		 * Start over from the head of the list, see if
383		 * any timeout bearing an earlier hres time.
384		 */
385		cp = ct->ct_hresq;
386	}
387	mutex_exit(&ct->ct_lock);
388}
389
390/*
391 * Schedule any callouts that are due on or before this tick.
392 */
393static void
394callout_schedule_1(callout_table_t *ct)
395{
396	callout_t	*cp;
397	clock_t		curtime, runtime;
398	timestruc_t	now;
399	int64_t		hresms;
400
401	mutex_enter(&ct->ct_lock);
402	ct->ct_curtime = curtime = lbolt;
403	while (((runtime = ct->ct_runtime) - curtime) <= 0) {
404		for (cp = ct->ct_lbhash[CALLOUT_LBHASH(runtime)];
405		    cp != NULL; cp = cp->c_lbnext) {
406			if (cp->c_runtime != runtime ||
407			    (cp->c_xid & CALLOUT_EXECUTING))
408				continue;
409			mutex_exit(&ct->ct_lock);
410			if (ct->ct_taskq == NULL)
411				softcall((void (*)(void *))callout_execute, ct);
412			else
413				(void) taskq_dispatch(ct->ct_taskq,
414				    (task_func_t *)callout_execute, ct,
415				    KM_NOSLEEP);
416			return;
417		}
418		ct->ct_runtime++;
419	}
420
421	gethrestime(&now);
422
423	/* Calculate the current time in milli-second */
424	hresms = now.tv_sec * MILLISEC + now.tv_nsec / MICROSEC;
425
426	cp = ct->ct_hresq;
427	while (cp != NULL && hresms >= cp->c_hresms) {
428		if (cp->c_xid & CALLOUT_EXECUTING) {
429			cp = cp->c_hrnext;
430			continue;
431		}
432		mutex_exit(&ct->ct_lock);
433		if (ct->ct_taskq == NULL)
434			softcall((void (*)(void *))callout_execute, ct);
435		else
436			(void) taskq_dispatch(ct->ct_taskq,
437			    (task_func_t *)callout_execute, ct, KM_NOSLEEP);
438		return;
439	}
440	mutex_exit(&ct->ct_lock);
441}
442
443/*
444 * Schedule callouts for all callout tables.  Called by clock() on each tick.
445 */
446void
447callout_schedule(void)
448{
449	int f, t;
450
451	if (cpr_stop_callout)
452		return;
453
454	for (t = 0; t < CALLOUT_NTYPES; t++)
455		for (f = 0; f < callout_fanout; f++)
456			callout_schedule_1(callout_table[CALLOUT_TABLE(t, f)]);
457}
458
459/*
460 * Callback handler used by CPR to stop and resume callouts.
461 */
462/*ARGSUSED*/
463static boolean_t
464callout_cpr_callb(void *arg, int code)
465{
466	cpr_stop_callout = (code == CB_CODE_CPR_CHKPT);
467	return (B_TRUE);
468}
469
470/*
471 * Initialize all callout tables.  Called at boot time just before clkstart().
472 */
473void
474callout_init(void)
475{
476	int f, t;
477	int table_id;
478	callout_table_t *ct;
479
480	callout_fanout = MIN(CALLOUT_FANOUT, max_ncpus);
481
482	for (t = 0; t < CALLOUT_NTYPES; t++) {
483		for (f = 0; f < CALLOUT_FANOUT; f++) {
484			table_id = CALLOUT_TABLE(t, f);
485			if (f >= callout_fanout) {
486				callout_table[table_id] =
487				    callout_table[table_id - callout_fanout];
488				continue;
489			}
490			ct = kmem_zalloc(sizeof (callout_table_t), KM_SLEEP);
491			callout_table[table_id] = ct;
492			ct->ct_short_id = (callout_id_t)table_id |
493			    CALLOUT_COUNTER_HIGH;
494			ct->ct_long_id = ct->ct_short_id | CALLOUT_LONGTERM;
495			ct->ct_curtime = ct->ct_runtime = lbolt;
496			if (t == CALLOUT_NORMAL) {
497				/*
498				 * Each callout thread consumes exactly one
499				 * task structure while active.  Therefore,
500				 * prepopulating with 2 * CALLOUT_THREADS tasks
501				 * ensures that there's at least one task per
502				 * thread that's either scheduled or on the
503				 * freelist.  In turn, this guarantees that
504				 * taskq_dispatch() will always either succeed
505				 * (because there's a free task structure) or
506				 * be unnecessary (because "callout_excute(ct)"
507				 * has already scheduled).
508				 */
509				ct->ct_taskq =
510				    taskq_create_instance("callout_taskq", f,
511				    CALLOUT_THREADS, maxclsyspri,
512				    2 * CALLOUT_THREADS, 2 * CALLOUT_THREADS,
513				    TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
514			}
515		}
516	}
517	(void) callb_add(callout_cpr_callb, 0, CB_CL_CPR_CALLOUT, "callout");
518}
519