11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1991, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
51541Srgrimes * All or some portions of this file are derived from material licensed
61541Srgrimes * to the University of California by American Telephone and Telegraph
71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
81541Srgrimes * the permission of UNIX System Laboratories, Inc.
91541Srgrimes *
101541Srgrimes * Redistribution and use in source and binary forms, with or without
111541Srgrimes * modification, are permitted provided that the following conditions
121541Srgrimes * are met:
131541Srgrimes * 1. Redistributions of source code must retain the above copyright
141541Srgrimes *    notice, this list of conditions and the following disclaimer.
151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
161541Srgrimes *    notice, this list of conditions and the following disclaimer in the
171541Srgrimes *    documentation and/or other materials provided with the distribution.
181541Srgrimes * 4. Neither the name of the University nor the names of its contributors
191541Srgrimes *    may be used to endorse or promote products derived from this software
201541Srgrimes *    without specific prior written permission.
211541Srgrimes *
221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
251541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
321541Srgrimes * SUCH DAMAGE.
331541Srgrimes *
3444510Swollman *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
351541Srgrimes */
361541Srgrimes
37116182Sobrien#include <sys/cdefs.h>
38116182Sobrien__FBSDID("$FreeBSD$");
39116182Sobrien
40247777Sdavide#include "opt_callout_profiling.h"
41187664Srwatson#include "opt_kdtrace.h"
42305853Shiren#include "opt_ddb.h"
43247777Sdavide#if defined(__arm__)
44247777Sdavide#include "opt_timer.h"
45247777Sdavide#endif
46187664Srwatson
471541Srgrimes#include <sys/param.h>
481541Srgrimes#include <sys/systm.h>
49177859Sjeff#include <sys/bus.h>
5033392Sphk#include <sys/callout.h>
51248031Sandre#include <sys/file.h>
52177859Sjeff#include <sys/interrupt.h>
531541Srgrimes#include <sys/kernel.h>
54133229Srwatson#include <sys/ktr.h>
5574914Sjhb#include <sys/lock.h>
56177859Sjeff#include <sys/malloc.h>
5768840Sjhb#include <sys/mutex.h>
58150188Sjhb#include <sys/proc.h>
59187664Srwatson#include <sys/sdt.h>
60171053Sattilio#include <sys/sleepqueue.h>
61115810Sphk#include <sys/sysctl.h>
62177859Sjeff#include <sys/smp.h>
631541Srgrimes
64305853Shiren#ifdef DDB
65305853Shiren#include <ddb/ddb.h>
66305853Shiren#include <machine/_inttypes.h>
67305853Shiren#endif
68305853Shiren
69220456Sattilio#ifdef SMP
70220456Sattilio#include <machine/cpu.h>
71220456Sattilio#endif
72220456Sattilio
73247777Sdavide#ifndef NO_EVENTTIMERS
74247777SdavideDPCPU_DECLARE(sbintime_t, hardclocktime);
75247777Sdavide#endif
76247777Sdavide
77187664SrwatsonSDT_PROVIDER_DEFINE(callout_execute);
78302237SbdrewerySDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
79302237SbdrewerySDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
80187664Srwatson
81247777Sdavide#ifdef CALLOUT_PROFILING
82115810Sphkstatic int avg_depth;
83115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
84115810Sphk    "Average number of items examined per softclock call. Units = 1/1000");
85115810Sphkstatic int avg_gcalls;
86115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
87115810Sphk    "Average number of Giant callouts made per softclock call. Units = 1/1000");
88173760Sattiliostatic int avg_lockcalls;
89173760SattilioSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
90173760Sattilio    "Average number of lock callouts made per softclock call. Units = 1/1000");
91115810Sphkstatic int avg_mpcalls;
92115810SphkSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
93115810Sphk    "Average number of MP callouts made per softclock call. Units = 1/1000");
94247777Sdavidestatic int avg_depth_dir;
95247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
96247777Sdavide    "Average number of direct callouts examined per callout_process call. "
97247777Sdavide    "Units = 1/1000");
98247777Sdavidestatic int avg_lockcalls_dir;
99247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
100247777Sdavide    &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
101247777Sdavide    "callout_process call. Units = 1/1000");
102247777Sdavidestatic int avg_mpcalls_dir;
103247777SdavideSYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
104247777Sdavide    0, "Average number of MP direct callouts made per callout_process call. "
105247777Sdavide    "Units = 1/1000");
106247777Sdavide#endif
107248031Sandre
108248031Sandrestatic int ncallout;
109248031SandreSYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN, &ncallout, 0,
110248031Sandre    "Number of entries in callwheel and size of timeout() preallocation");
111248031Sandre
11233392Sphk/*
11333392Sphk * TODO:
11433392Sphk *	allocate more timeout table slots when table overflows.
11533392Sphk */
116247715Sdavideu_int callwheelsize, callwheelmask;
1172112Swollman
118200510Sluigi/*
119247777Sdavide * The callout cpu exec entities represent informations necessary for
120247777Sdavide * describing the state of callouts currently running on the CPU and the ones
121247777Sdavide * necessary for migrating callouts to the new callout cpu. In particular,
122247777Sdavide * the first entry of the array cc_exec_entity holds informations for callout
123247777Sdavide * running in SWI thread context, while the second one holds informations
124247777Sdavide * for callout running directly from hardware interrupt context.
125220456Sattilio * The cached informations are very important for deferring migration when
126220456Sattilio * the migrating callout is already running.
127220456Sattilio */
128247777Sdavidestruct cc_exec {
129247777Sdavide	struct callout		*cc_curr;
130220456Sattilio#ifdef SMP
131247777Sdavide	void			(*ce_migration_func)(void *);
132247777Sdavide	void			*ce_migration_arg;
133247777Sdavide	int			ce_migration_cpu;
134247777Sdavide	sbintime_t		ce_migration_time;
135248699Sdavide	sbintime_t		ce_migration_prec;
136220456Sattilio#endif
137247813Sdavide	bool			cc_cancel;
138247813Sdavide	bool			cc_waiting;
139220456Sattilio};
140247467Sdavide
141220456Sattilio/*
142200510Sluigi * There is one struct callout_cpu per cpu, holding all relevant
143200510Sluigi * state for the callout processing thread on the individual CPU.
144200510Sluigi */
145177859Sjeffstruct callout_cpu {
146242402Sattilio	struct mtx_padalign	cc_lock;
147247777Sdavide	struct cc_exec 		cc_exec_entity[2];
148278800Srrs	struct callout		*cc_next;
149177859Sjeff	struct callout		*cc_callout;
150247777Sdavide	struct callout_list	*cc_callwheel;
151247777Sdavide	struct callout_tailq	cc_expireq;
152247777Sdavide	struct callout_slist	cc_callfree;
153247777Sdavide	sbintime_t		cc_firstevent;
154247777Sdavide	sbintime_t		cc_lastscan;
155177859Sjeff	void			*cc_cookie;
156247777Sdavide	u_int			cc_bucket;
157281657Srrs	u_int			cc_inited;
158278694Ssbruno	char			cc_ktr_event_name[20];
159177859Sjeff};
160128024Scperciva
161281657Srrs#define	callout_migrating(c)	((c)->c_iflags & CALLOUT_DFRMIGRATION)
162281657Srrs
163278800Srrs#define	cc_exec_curr(cc, dir)		cc->cc_exec_entity[dir].cc_curr
164278800Srrs#define	cc_exec_next(cc)		cc->cc_next
165278800Srrs#define	cc_exec_cancel(cc, dir)		cc->cc_exec_entity[dir].cc_cancel
166278800Srrs#define	cc_exec_waiting(cc, dir)	cc->cc_exec_entity[dir].cc_waiting
167177859Sjeff#ifdef SMP
168278800Srrs#define	cc_migration_func(cc, dir)	cc->cc_exec_entity[dir].ce_migration_func
169278800Srrs#define	cc_migration_arg(cc, dir)	cc->cc_exec_entity[dir].ce_migration_arg
170278800Srrs#define	cc_migration_cpu(cc, dir)	cc->cc_exec_entity[dir].ce_migration_cpu
171278800Srrs#define	cc_migration_time(cc, dir)	cc->cc_exec_entity[dir].ce_migration_time
172278800Srrs#define	cc_migration_prec(cc, dir)	cc->cc_exec_entity[dir].ce_migration_prec
173220456Sattilio
174177859Sjeffstruct callout_cpu cc_cpu[MAXCPU];
175220456Sattilio#define	CPUBLOCK	MAXCPU
176177859Sjeff#define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
177177859Sjeff#define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
178177859Sjeff#else
179177859Sjeffstruct callout_cpu cc_cpu;
180177859Sjeff#define	CC_CPU(cpu)	&cc_cpu
181177859Sjeff#define	CC_SELF()	&cc_cpu
182177859Sjeff#endif
183177859Sjeff#define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
184177859Sjeff#define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
185220456Sattilio#define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
186177859Sjeff
187177859Sjeffstatic int timeout_cpu;
188177859Sjeff
189278694Ssbrunostatic void	callout_cpu_init(struct callout_cpu *cc, int cpu);
190247777Sdavidestatic void	softclock_call_cc(struct callout *c, struct callout_cpu *cc,
191247777Sdavide#ifdef CALLOUT_PROFILING
192247777Sdavide		    int *mpcalls, int *lockcalls, int *gcalls,
193247777Sdavide#endif
194247777Sdavide		    int direct);
195247777Sdavide
196227293Sedstatic MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
197177859Sjeff
198139831Scperciva/**
199177859Sjeff * Locked by cc_lock:
200247777Sdavide *   cc_curr         - If a callout is in progress, it is cc_curr.
201247777Sdavide *                     If cc_curr is non-NULL, threads waiting in
202177859Sjeff *                     callout_drain() will be woken up as soon as the
203127969Scperciva *                     relevant callout completes.
204247777Sdavide *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
205141428Siedowse *                     guarantees that the current callout will not run.
206141428Siedowse *                     The softclock() function sets this to 0 before it
207173760Sattilio *                     drops callout_lock to acquire c_lock, and it calls
208155957Sjhb *                     the handler only if curr_cancelled is still 0 after
209247777Sdavide *                     cc_lock is successfully acquired.
210177859Sjeff *   cc_waiting      - If a thread is waiting in callout_drain(), then
211155957Sjhb *                     callout_wait is nonzero.  Set only when
212247777Sdavide *                     cc_curr is non-NULL.
213127969Scperciva */
214128024Scperciva
2151541Srgrimes/*
216247777Sdavide * Resets the execution entity tied to a specific callout cpu.
217220456Sattilio */
218220456Sattiliostatic void
219247777Sdavidecc_cce_cleanup(struct callout_cpu *cc, int direct)
220220456Sattilio{
221220456Sattilio
222278800Srrs	cc_exec_curr(cc, direct) = NULL;
223278800Srrs	cc_exec_cancel(cc, direct) = false;
224278800Srrs	cc_exec_waiting(cc, direct) = false;
225220456Sattilio#ifdef SMP
226278800Srrs	cc_migration_cpu(cc, direct) = CPUBLOCK;
227278800Srrs	cc_migration_time(cc, direct) = 0;
228278800Srrs	cc_migration_prec(cc, direct) = 0;
229278800Srrs	cc_migration_func(cc, direct) = NULL;
230278800Srrs	cc_migration_arg(cc, direct) = NULL;
231220456Sattilio#endif
232220456Sattilio}
233220456Sattilio
234220456Sattilio/*
235220456Sattilio * Checks if migration is requested by a specific callout cpu.
236220456Sattilio */
237220456Sattiliostatic int
238247777Sdavidecc_cce_migrating(struct callout_cpu *cc, int direct)
239220456Sattilio{
240220456Sattilio
241220456Sattilio#ifdef SMP
242278800Srrs	return (cc_migration_cpu(cc, direct) != CPUBLOCK);
243220456Sattilio#else
244220456Sattilio	return (0);
245220456Sattilio#endif
246220456Sattilio}
247220456Sattilio
248220456Sattilio/*
249248032Sandre * Kernel low level callwheel initialization
250248032Sandre * called on cpu0 during kernel startup.
25182127Sdillon */
252248032Sandrestatic void
253248032Sandrecallout_callwheel_init(void *dummy)
25482127Sdillon{
255177859Sjeff	struct callout_cpu *cc;
256177859Sjeff
25782127Sdillon	/*
258248031Sandre	 * Calculate the size of the callout wheel and the preallocated
259248031Sandre	 * timeout() structures.
260248141Sandre	 * XXX: Clip callout to result of previous function of maxusers
261248141Sandre	 * maximum 384.  This is still huge, but acceptable.
262248031Sandre	 */
263281921Sbz	memset(CC_CPU(0), 0, sizeof(cc_cpu));
264248031Sandre	ncallout = imin(16 + maxproc + maxfiles, 18508);
265248031Sandre	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
266248031Sandre
267248031Sandre	/*
268243853Salfred	 * Calculate callout wheel size, should be next power of two higher
269243853Salfred	 * than 'ncallout'.
27082127Sdillon	 */
271243853Salfred	callwheelsize = 1 << fls(ncallout);
27282127Sdillon	callwheelmask = callwheelsize - 1;
27382127Sdillon
274248032Sandre	/*
275248032Sandre	 * Only cpu0 handles timeout(9) and receives a preallocation.
276248032Sandre	 *
277248032Sandre	 * XXX: Once all timeout(9) consumers are converted this can
278248032Sandre	 * be removed.
279248032Sandre	 */
280248032Sandre	timeout_cpu = PCPU_GET(cpuid);
281248032Sandre	cc = CC_CPU(timeout_cpu);
282248032Sandre	cc->cc_callout = malloc(ncallout * sizeof(struct callout),
283248032Sandre	    M_CALLOUT, M_WAITOK);
284278694Ssbruno	callout_cpu_init(cc, timeout_cpu);
28582127Sdillon}
286248032SandreSYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
28782127Sdillon
288248032Sandre/*
289248032Sandre * Initialize the per-cpu callout structures.
290248032Sandre */
291177859Sjeffstatic void
292278694Ssbrunocallout_cpu_init(struct callout_cpu *cc, int cpu)
293177859Sjeff{
294177859Sjeff	struct callout *c;
295177859Sjeff	int i;
296177859Sjeff
297177859Sjeff	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
298177859Sjeff	SLIST_INIT(&cc->cc_callfree);
299281657Srrs	cc->cc_inited = 1;
300248113Sdavide	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
301248032Sandre	    M_CALLOUT, M_WAITOK);
302247777Sdavide	for (i = 0; i < callwheelsize; i++)
303247777Sdavide		LIST_INIT(&cc->cc_callwheel[i]);
304247777Sdavide	TAILQ_INIT(&cc->cc_expireq);
305304894Skib	cc->cc_firstevent = SBT_MAX;
306247777Sdavide	for (i = 0; i < 2; i++)
307247777Sdavide		cc_cce_cleanup(cc, i);
308278694Ssbruno	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
309278694Ssbruno	    "callwheel cpu %d", cpu);
310248032Sandre	if (cc->cc_callout == NULL)	/* Only cpu0 handles timeout(9) */
311177859Sjeff		return;
312177859Sjeff	for (i = 0; i < ncallout; i++) {
313177859Sjeff		c = &cc->cc_callout[i];
314177859Sjeff		callout_init(c, 0);
315281657Srrs		c->c_iflags = CALLOUT_LOCAL_ALLOC;
316177859Sjeff		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
317177859Sjeff	}
318177859Sjeff}
319177859Sjeff
320220456Sattilio#ifdef SMP
32182127Sdillon/*
322220456Sattilio * Switches the cpu tied to a specific callout.
323220456Sattilio * The function expects a locked incoming callout cpu and returns with
324220456Sattilio * locked outcoming callout cpu.
325220456Sattilio */
326220456Sattiliostatic struct callout_cpu *
327220456Sattiliocallout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
328220456Sattilio{
329220456Sattilio	struct callout_cpu *new_cc;
330220456Sattilio
331220456Sattilio	MPASS(c != NULL && cc != NULL);
332220456Sattilio	CC_LOCK_ASSERT(cc);
333220456Sattilio
334225057Sattilio	/*
335225057Sattilio	 * Avoid interrupts and preemption firing after the callout cpu
336225057Sattilio	 * is blocked in order to avoid deadlocks as the new thread
337225057Sattilio	 * may be willing to acquire the callout cpu lock.
338225057Sattilio	 */
339220456Sattilio	c->c_cpu = CPUBLOCK;
340225057Sattilio	spinlock_enter();
341220456Sattilio	CC_UNLOCK(cc);
342220456Sattilio	new_cc = CC_CPU(new_cpu);
343220456Sattilio	CC_LOCK(new_cc);
344225057Sattilio	spinlock_exit();
345220456Sattilio	c->c_cpu = new_cpu;
346220456Sattilio	return (new_cc);
347220456Sattilio}
348220456Sattilio#endif
349220456Sattilio
350220456Sattilio/*
351177859Sjeff * Start standard softclock thread.
352177859Sjeff */
353177859Sjeffstatic void
354177859Sjeffstart_softclock(void *dummy)
355177859Sjeff{
356177859Sjeff	struct callout_cpu *cc;
357177859Sjeff#ifdef SMP
358177859Sjeff	int cpu;
359177859Sjeff#endif
360177859Sjeff
361177859Sjeff	cc = CC_CPU(timeout_cpu);
362177859Sjeff	if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
363214746Sjhb	    INTR_MPSAFE, &cc->cc_cookie))
364177859Sjeff		panic("died while creating standard software ithreads");
365177859Sjeff#ifdef SMP
366209059Sjhb	CPU_FOREACH(cpu) {
367177859Sjeff		if (cpu == timeout_cpu)
368177859Sjeff			continue;
369177859Sjeff		cc = CC_CPU(cpu);
370248032Sandre		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(9). */
371278694Ssbruno		callout_cpu_init(cc, cpu);
372177859Sjeff		if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
373177859Sjeff		    INTR_MPSAFE, &cc->cc_cookie))
374177859Sjeff			panic("died while creating standard software ithreads");
37582127Sdillon	}
376177859Sjeff#endif
377177859Sjeff}
378177859SjeffSYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
379177859Sjeff
380247777Sdavide#define	CC_HASH_SHIFT	8
381247777Sdavide
382247777Sdavidestatic inline u_int
383247777Sdavidecallout_hash(sbintime_t sbt)
384247777Sdavide{
385247777Sdavide
386247777Sdavide	return (sbt >> (32 - CC_HASH_SHIFT));
387247777Sdavide}
388247777Sdavide
389247777Sdavidestatic inline u_int
390247777Sdavidecallout_get_bucket(sbintime_t sbt)
391247777Sdavide{
392247777Sdavide
393247777Sdavide	return (callout_hash(sbt) & callwheelmask);
394247777Sdavide}
395247777Sdavide
396177859Sjeffvoid
397247777Sdavidecallout_process(sbintime_t now)
398177859Sjeff{
399247777Sdavide	struct callout *tmp, *tmpn;
400177859Sjeff	struct callout_cpu *cc;
401247777Sdavide	struct callout_list *sc;
402247777Sdavide	sbintime_t first, last, max, tmp_max;
403247777Sdavide	uint32_t lookahead;
404247777Sdavide	u_int firstb, lastb, nowb;
405247777Sdavide#ifdef CALLOUT_PROFILING
406247777Sdavide	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
407247777Sdavide#endif
408177859Sjeff
409247777Sdavide	cc = CC_SELF();
410247777Sdavide	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
411247777Sdavide
412247777Sdavide	/* Compute the buckets of the last scan and present times. */
413247777Sdavide	firstb = callout_hash(cc->cc_lastscan);
414247777Sdavide	cc->cc_lastscan = now;
415247777Sdavide	nowb = callout_hash(now);
416247777Sdavide
417247777Sdavide	/* Compute the last bucket and minimum time of the bucket after it. */
418247777Sdavide	if (nowb == firstb)
419247777Sdavide		lookahead = (SBT_1S / 16);
420247777Sdavide	else if (nowb - firstb == 1)
421247777Sdavide		lookahead = (SBT_1S / 8);
422247777Sdavide	else
423247777Sdavide		lookahead = (SBT_1S / 2);
424247777Sdavide	first = last = now;
425247777Sdavide	first += (lookahead / 2);
426247777Sdavide	last += lookahead;
427247777Sdavide	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
428247777Sdavide	lastb = callout_hash(last) - 1;
429247777Sdavide	max = last;
430247777Sdavide
431177859Sjeff	/*
432247777Sdavide	 * Check if we wrapped around the entire wheel from the last scan.
433247777Sdavide	 * In case, we need to scan entirely the wheel for pending callouts.
434177859Sjeff	 */
435247777Sdavide	if (lastb - firstb >= callwheelsize) {
436247777Sdavide		lastb = firstb + callwheelsize - 1;
437247777Sdavide		if (nowb - firstb >= callwheelsize)
438247777Sdavide			nowb = lastb;
439247777Sdavide	}
440247777Sdavide
441247777Sdavide	/* Iterate callwheel from firstb to nowb and then up to lastb. */
442247777Sdavide	do {
443247777Sdavide		sc = &cc->cc_callwheel[firstb & callwheelmask];
444247777Sdavide		tmp = LIST_FIRST(sc);
445247777Sdavide		while (tmp != NULL) {
446247777Sdavide			/* Run the callout if present time within allowed. */
447247777Sdavide			if (tmp->c_time <= now) {
448247777Sdavide				/*
449247777Sdavide				 * Consumer told us the callout may be run
450247777Sdavide				 * directly from hardware interrupt context.
451247777Sdavide				 */
452281657Srrs				if (tmp->c_iflags & CALLOUT_DIRECT) {
453247777Sdavide#ifdef CALLOUT_PROFILING
454247777Sdavide					++depth_dir;
455247777Sdavide#endif
456278800Srrs					cc_exec_next(cc) =
457247777Sdavide					    LIST_NEXT(tmp, c_links.le);
458247777Sdavide					cc->cc_bucket = firstb & callwheelmask;
459247777Sdavide					LIST_REMOVE(tmp, c_links.le);
460247777Sdavide					softclock_call_cc(tmp, cc,
461247777Sdavide#ifdef CALLOUT_PROFILING
462247777Sdavide					    &mpcalls_dir, &lockcalls_dir, NULL,
463247777Sdavide#endif
464247777Sdavide					    1);
465278800Srrs					tmp = cc_exec_next(cc);
466278800Srrs					cc_exec_next(cc) = NULL;
467247777Sdavide				} else {
468247777Sdavide					tmpn = LIST_NEXT(tmp, c_links.le);
469247777Sdavide					LIST_REMOVE(tmp, c_links.le);
470247777Sdavide					TAILQ_INSERT_TAIL(&cc->cc_expireq,
471247777Sdavide					    tmp, c_links.tqe);
472281657Srrs					tmp->c_iflags |= CALLOUT_PROCESSED;
473247777Sdavide					tmp = tmpn;
474247777Sdavide				}
475247777Sdavide				continue;
476247777Sdavide			}
477247777Sdavide			/* Skip events from distant future. */
478247777Sdavide			if (tmp->c_time >= max)
479247777Sdavide				goto next;
480247777Sdavide			/*
481247777Sdavide			 * Event minimal time is bigger than present maximal
482247777Sdavide			 * time, so it cannot be aggregated.
483247777Sdavide			 */
484247777Sdavide			if (tmp->c_time > last) {
485247777Sdavide				lastb = nowb;
486247777Sdavide				goto next;
487247777Sdavide			}
488247777Sdavide			/* Update first and last time, respecting this event. */
489247777Sdavide			if (tmp->c_time < first)
490247777Sdavide				first = tmp->c_time;
491247777Sdavide			tmp_max = tmp->c_time + tmp->c_precision;
492247777Sdavide			if (tmp_max < last)
493247777Sdavide				last = tmp_max;
494247777Sdavidenext:
495247777Sdavide			tmp = LIST_NEXT(tmp, c_links.le);
496180608Sjeff		}
497247777Sdavide		/* Proceed with the next bucket. */
498247777Sdavide		firstb++;
499247777Sdavide		/*
500247777Sdavide		 * Stop if we looked after present time and found
501247777Sdavide		 * some event we can't execute at now.
502247777Sdavide		 * Stop if we looked far enough into the future.
503247777Sdavide		 */
504247777Sdavide	} while (((int)(firstb - lastb)) <= 0);
505247777Sdavide	cc->cc_firstevent = last;
506247777Sdavide#ifndef NO_EVENTTIMERS
507247777Sdavide	cpu_new_callout(curcpu, last, first);
508247777Sdavide#endif
509247777Sdavide#ifdef CALLOUT_PROFILING
510247777Sdavide	avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
511247777Sdavide	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
512247777Sdavide	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
513247777Sdavide#endif
514177859Sjeff	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
515177859Sjeff	/*
516177859Sjeff	 * swi_sched acquires the thread lock, so we don't want to call it
517177859Sjeff	 * with cc_lock held; incorrect locking order.
518177859Sjeff	 */
519247777Sdavide	if (!TAILQ_EMPTY(&cc->cc_expireq))
520177859Sjeff		swi_sched(cc->cc_cookie, 0);
521177859Sjeff}
522177859Sjeff
523177859Sjeffstatic struct callout_cpu *
524177859Sjeffcallout_lock(struct callout *c)
525177859Sjeff{
526177859Sjeff	struct callout_cpu *cc;
527177859Sjeff	int cpu;
528177859Sjeff
529177859Sjeff	for (;;) {
530177859Sjeff		cpu = c->c_cpu;
531220456Sattilio#ifdef SMP
532220456Sattilio		if (cpu == CPUBLOCK) {
533220456Sattilio			while (c->c_cpu == CPUBLOCK)
534220456Sattilio				cpu_spinwait();
535220456Sattilio			continue;
536220456Sattilio		}
537220456Sattilio#endif
538177859Sjeff		cc = CC_CPU(cpu);
539177859Sjeff		CC_LOCK(cc);
540177859Sjeff		if (cpu == c->c_cpu)
541177859Sjeff			break;
542177859Sjeff		CC_UNLOCK(cc);
54382127Sdillon	}
544177859Sjeff	return (cc);
54582127Sdillon}
54682127Sdillon
547220456Sattiliostatic void
548247777Sdavidecallout_cc_add(struct callout *c, struct callout_cpu *cc,
549247777Sdavide    sbintime_t sbt, sbintime_t precision, void (*func)(void *),
550247777Sdavide    void *arg, int cpu, int flags)
551220456Sattilio{
552247777Sdavide	int bucket;
553220456Sattilio
554220456Sattilio	CC_LOCK_ASSERT(cc);
555247777Sdavide	if (sbt < cc->cc_lastscan)
556247777Sdavide		sbt = cc->cc_lastscan;
557220456Sattilio	c->c_arg = arg;
558281657Srrs	c->c_iflags |= CALLOUT_PENDING;
559281657Srrs	c->c_iflags &= ~CALLOUT_PROCESSED;
560281657Srrs	c->c_flags |= CALLOUT_ACTIVE;
561281657Srrs	if (flags & C_DIRECT_EXEC)
562281657Srrs		c->c_iflags |= CALLOUT_DIRECT;
563220456Sattilio	c->c_func = func;
564247777Sdavide	c->c_time = sbt;
565247777Sdavide	c->c_precision = precision;
566247777Sdavide	bucket = callout_get_bucket(c->c_time);
567247777Sdavide	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
568247777Sdavide	    c, (int)(c->c_precision >> 32),
569247777Sdavide	    (u_int)(c->c_precision & 0xffffffff));
570247777Sdavide	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
571247777Sdavide	if (cc->cc_bucket == bucket)
572278800Srrs		cc_exec_next(cc) = c;
573247777Sdavide#ifndef NO_EVENTTIMERS
574247777Sdavide	/*
575247777Sdavide	 * Inform the eventtimers(4) subsystem there's a new callout
576247777Sdavide	 * that has been inserted, but only if really required.
577247777Sdavide	 */
578304894Skib	if (SBT_MAX - c->c_time < c->c_precision)
579304894Skib		c->c_precision = SBT_MAX - c->c_time;
580247777Sdavide	sbt = c->c_time + c->c_precision;
581247777Sdavide	if (sbt < cc->cc_firstevent) {
582247777Sdavide		cc->cc_firstevent = sbt;
583247777Sdavide		cpu_new_callout(cpu, sbt, c->c_time);
584220456Sattilio	}
585247777Sdavide#endif
586220456Sattilio}
587220456Sattilio
588234981Skibstatic void
589234981Skibcallout_cc_del(struct callout *c, struct callout_cpu *cc)
590234981Skib{
591234981Skib
592281657Srrs	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
593243901Skib		return;
594243901Skib	c->c_func = NULL;
595243901Skib	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
596234981Skib}
597234981Skib
598243901Skibstatic void
599247777Sdavidesoftclock_call_cc(struct callout *c, struct callout_cpu *cc,
600247777Sdavide#ifdef CALLOUT_PROFILING
601247777Sdavide    int *mpcalls, int *lockcalls, int *gcalls,
602247777Sdavide#endif
603247777Sdavide    int direct)
604234981Skib{
605255747Sdavide	struct rm_priotracker tracker;
606234981Skib	void (*c_func)(void *);
607234981Skib	void *c_arg;
608234981Skib	struct lock_class *class;
609234981Skib	struct lock_object *c_lock;
610255747Sdavide	uintptr_t lock_status;
611281657Srrs	int c_iflags;
612234981Skib#ifdef SMP
613234981Skib	struct callout_cpu *new_cc;
614234981Skib	void (*new_func)(void *);
615234981Skib	void *new_arg;
616247777Sdavide	int flags, new_cpu;
617248699Sdavide	sbintime_t new_prec, new_time;
618234981Skib#endif
619247777Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
620247793Sdavide	sbintime_t sbt1, sbt2;
621234981Skib	struct timespec ts2;
622247777Sdavide	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
623234981Skib	static timeout_t *lastfunc;
624234981Skib#endif
625234981Skib
626281657Srrs	KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
627281657Srrs	    ("softclock_call_cc: pend %p %x", c, c->c_iflags));
628281657Srrs	KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
629281657Srrs	    ("softclock_call_cc: act %p %x", c, c->c_flags));
630234981Skib	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
631255747Sdavide	lock_status = 0;
632255747Sdavide	if (c->c_flags & CALLOUT_SHAREDLOCK) {
633255747Sdavide		if (class == &lock_class_rm)
634255747Sdavide			lock_status = (uintptr_t)&tracker;
635255747Sdavide		else
636255747Sdavide			lock_status = 1;
637255747Sdavide	}
638234981Skib	c_lock = c->c_lock;
639234981Skib	c_func = c->c_func;
640234981Skib	c_arg = c->c_arg;
641281657Srrs	c_iflags = c->c_iflags;
642281657Srrs	if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
643281657Srrs		c->c_iflags = CALLOUT_LOCAL_ALLOC;
644234981Skib	else
645281657Srrs		c->c_iflags &= ~CALLOUT_PENDING;
646278800Srrs
647278800Srrs	cc_exec_curr(cc, direct) = c;
648278800Srrs	cc_exec_cancel(cc, direct) = false;
649234981Skib	CC_UNLOCK(cc);
650234981Skib	if (c_lock != NULL) {
651255747Sdavide		class->lc_lock(c_lock, lock_status);
652234981Skib		/*
653234981Skib		 * The callout may have been cancelled
654234981Skib		 * while we switched locks.
655234981Skib		 */
656278800Srrs		if (cc_exec_cancel(cc, direct)) {
657234981Skib			class->lc_unlock(c_lock);
658234981Skib			goto skip;
659234981Skib		}
660234981Skib		/* The callout cannot be stopped now. */
661278800Srrs		cc_exec_cancel(cc, direct) = true;
662234981Skib		if (c_lock == &Giant.lock_object) {
663247777Sdavide#ifdef CALLOUT_PROFILING
664234981Skib			(*gcalls)++;
665247777Sdavide#endif
666247777Sdavide			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
667234981Skib			    c, c_func, c_arg);
668234981Skib		} else {
669247777Sdavide#ifdef CALLOUT_PROFILING
670234981Skib			(*lockcalls)++;
671247777Sdavide#endif
672234981Skib			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
673234981Skib			    c, c_func, c_arg);
674234981Skib		}
675234981Skib	} else {
676247777Sdavide#ifdef CALLOUT_PROFILING
677234981Skib		(*mpcalls)++;
678247777Sdavide#endif
679247777Sdavide		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
680234981Skib		    c, c_func, c_arg);
681234981Skib	}
682278694Ssbruno	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
683278694Ssbruno	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
684247793Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
685247777Sdavide	sbt1 = sbinuptime();
686234981Skib#endif
687234981Skib	THREAD_NO_SLEEPING();
688302237Sbdrewery	SDT_PROBE1(callout_execute, , , callout__start, c);
689234981Skib	c_func(c_arg);
690302237Sbdrewery	SDT_PROBE1(callout_execute, , , callout__end, c);
691234981Skib	THREAD_SLEEPING_OK();
692247793Sdavide#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
693247793Sdavide	sbt2 = sbinuptime();
694247793Sdavide	sbt2 -= sbt1;
695247793Sdavide	if (sbt2 > maxdt) {
696247793Sdavide		if (lastfunc != c_func || sbt2 > maxdt * 2) {
697247793Sdavide			ts2 = sbttots(sbt2);
698234981Skib			printf(
699234981Skib		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
700234981Skib			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
701234981Skib		}
702247793Sdavide		maxdt = sbt2;
703234981Skib		lastfunc = c_func;
704234981Skib	}
705234981Skib#endif
706278694Ssbruno	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
707234981Skib	CTR1(KTR_CALLOUT, "callout %p finished", c);
708281657Srrs	if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
709234981Skib		class->lc_unlock(c_lock);
710234981Skibskip:
711234981Skib	CC_LOCK(cc);
712278800Srrs	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
713278800Srrs	cc_exec_curr(cc, direct) = NULL;
714278800Srrs	if (cc_exec_waiting(cc, direct)) {
715234981Skib		/*
716234981Skib		 * There is someone waiting for the
717234981Skib		 * callout to complete.
718234981Skib		 * If the callout was scheduled for
719234981Skib		 * migration just cancel it.
720234981Skib		 */
721247777Sdavide		if (cc_cce_migrating(cc, direct)) {
722247777Sdavide			cc_cce_cleanup(cc, direct);
723243912Sattilio
724243912Sattilio			/*
725243912Sattilio			 * It should be assert here that the callout is not
726243912Sattilio			 * destroyed but that is not easy.
727243912Sattilio			 */
728281657Srrs			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
729243901Skib		}
730278800Srrs		cc_exec_waiting(cc, direct) = false;
731234981Skib		CC_UNLOCK(cc);
732278800Srrs		wakeup(&cc_exec_waiting(cc, direct));
733234981Skib		CC_LOCK(cc);
734247777Sdavide	} else if (cc_cce_migrating(cc, direct)) {
735281657Srrs		KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
736243901Skib		    ("Migrating legacy callout %p", c));
737234981Skib#ifdef SMP
738234981Skib		/*
739234981Skib		 * If the callout was scheduled for
740234981Skib		 * migration just perform it now.
741234981Skib		 */
742278800Srrs		new_cpu = cc_migration_cpu(cc, direct);
743278800Srrs		new_time = cc_migration_time(cc, direct);
744278800Srrs		new_prec = cc_migration_prec(cc, direct);
745278800Srrs		new_func = cc_migration_func(cc, direct);
746278800Srrs		new_arg = cc_migration_arg(cc, direct);
747247777Sdavide		cc_cce_cleanup(cc, direct);
748234981Skib
749234981Skib		/*
750243912Sattilio		 * It should be assert here that the callout is not destroyed
751243912Sattilio		 * but that is not easy.
752243912Sattilio		 *
753243912Sattilio		 * As first thing, handle deferred callout stops.
754234981Skib		 */
755278800Srrs		if (!callout_migrating(c)) {
756234981Skib			CTR3(KTR_CALLOUT,
757234981Skib			     "deferred cancelled %p func %p arg %p",
758234981Skib			     c, new_func, new_arg);
759234981Skib			callout_cc_del(c, cc);
760243901Skib			return;
761234981Skib		}
762281657Srrs		c->c_iflags &= ~CALLOUT_DFRMIGRATION;
763234981Skib
764234981Skib		new_cc = callout_cpu_switch(c, cc, new_cpu);
765247777Sdavide		flags = (direct) ? C_DIRECT_EXEC : 0;
766248699Sdavide		callout_cc_add(c, new_cc, new_time, new_prec, new_func,
767247777Sdavide		    new_arg, new_cpu, flags);
768234981Skib		CC_UNLOCK(new_cc);
769234981Skib		CC_LOCK(cc);
770234981Skib#else
771234981Skib		panic("migration should not happen");
772234981Skib#endif
773234981Skib	}
774243901Skib	/*
775243901Skib	 * If the current callout is locally allocated (from
776243901Skib	 * timeout(9)) then put it on the freelist.
777243901Skib	 *
778281657Srrs	 * Note: we need to check the cached copy of c_iflags because
779243901Skib	 * if it was not local, then it's not safe to deref the
780243901Skib	 * callout pointer.
781243901Skib	 */
782281657Srrs	KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
783281657Srrs	    c->c_iflags == CALLOUT_LOCAL_ALLOC,
784243901Skib	    ("corrupted callout"));
785281657Srrs	if (c_iflags & CALLOUT_LOCAL_ALLOC)
786243912Sattilio		callout_cc_del(c, cc);
787234981Skib}
788234981Skib
78982127Sdillon/*
790247467Sdavide * The callout mechanism is based on the work of Adam M. Costello and
79129680Sgibbs * George Varghese, published in a technical report entitled "Redesigning
79229680Sgibbs * the BSD Callout and Timer Facilities" and modified slightly for inclusion
79329680Sgibbs * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
794128630Shmp * used in this implementation was published by G. Varghese and T. Lauck in
79529680Sgibbs * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
79629680Sgibbs * the Efficient Implementation of a Timer Facility" in the Proceedings of
79729680Sgibbs * the 11th ACM Annual Symposium on Operating Systems Principles,
79829680Sgibbs * Austin, Texas Nov 1987.
79929680Sgibbs */
80032388Sphk
80129680Sgibbs/*
8021541Srgrimes * Software (low priority) clock interrupt.
8031541Srgrimes * Run periodic events from timeout queue.
8041541Srgrimes */
8051541Srgrimesvoid
806177859Sjeffsoftclock(void *arg)
8071541Srgrimes{
808177859Sjeff	struct callout_cpu *cc;
809102936Sphk	struct callout *c;
810247777Sdavide#ifdef CALLOUT_PROFILING
811247777Sdavide	int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
812247777Sdavide#endif
8131541Srgrimes
814177859Sjeff	cc = (struct callout_cpu *)arg;
815177859Sjeff	CC_LOCK(cc);
816247777Sdavide	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
817247777Sdavide		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
818247777Sdavide		softclock_call_cc(c, cc,
819247777Sdavide#ifdef CALLOUT_PROFILING
820247777Sdavide		    &mpcalls, &lockcalls, &gcalls,
821247777Sdavide#endif
822247777Sdavide		    0);
823247777Sdavide#ifdef CALLOUT_PROFILING
824247777Sdavide		++depth;
825247777Sdavide#endif
8261541Srgrimes	}
827247777Sdavide#ifdef CALLOUT_PROFILING
828115810Sphk	avg_depth += (depth * 1000 - avg_depth) >> 8;
829115810Sphk	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
830173760Sattilio	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
831115810Sphk	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
832247777Sdavide#endif
833177859Sjeff	CC_UNLOCK(cc);
8341541Srgrimes}
8351541Srgrimes
8361541Srgrimes/*
8371541Srgrimes * timeout --
8381541Srgrimes *	Execute a function after a specified length of time.
8391541Srgrimes *
8401541Srgrimes * untimeout --
8411541Srgrimes *	Cancel previous timeout function call.
8421541Srgrimes *
84329680Sgibbs * callout_handle_init --
84429680Sgibbs *	Initialize a handle so that using it with untimeout is benign.
84529680Sgibbs *
8461541Srgrimes *	See AT&T BCI Driver Reference Manual for specification.  This
847247698Smav *	implementation differs from that one in that although an
84829680Sgibbs *	identification value is returned from timeout, the original
84929680Sgibbs *	arguments to timeout as well as the identifier are used to
85029680Sgibbs *	identify entries for untimeout.
8511541Srgrimes */
85229680Sgibbsstruct callout_handle
85329680Sgibbstimeout(ftn, arg, to_ticks)
85433824Sbde	timeout_t *ftn;
8551541Srgrimes	void *arg;
85669147Sjlemon	int to_ticks;
8571541Srgrimes{
858177859Sjeff	struct callout_cpu *cc;
85929680Sgibbs	struct callout *new;
86029680Sgibbs	struct callout_handle handle;
8611541Srgrimes
862177859Sjeff	cc = CC_CPU(timeout_cpu);
863177859Sjeff	CC_LOCK(cc);
8641541Srgrimes	/* Fill in the next free callout structure. */
865177859Sjeff	new = SLIST_FIRST(&cc->cc_callfree);
86629680Sgibbs	if (new == NULL)
86729680Sgibbs		/* XXX Attempt to malloc first */
8681541Srgrimes		panic("timeout table full");
869177859Sjeff	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
87044510Swollman	callout_reset(new, to_ticks, ftn, arg);
871177859Sjeff	handle.callout = new;
872177859Sjeff	CC_UNLOCK(cc);
8731541Srgrimes
87429680Sgibbs	return (handle);
8751541Srgrimes}
8761541Srgrimes
8771541Srgrimesvoid
87829680Sgibbsuntimeout(ftn, arg, handle)
87933824Sbde	timeout_t *ftn;
8801541Srgrimes	void *arg;
88129680Sgibbs	struct callout_handle handle;
8821541Srgrimes{
883177859Sjeff	struct callout_cpu *cc;
8841541Srgrimes
88529680Sgibbs	/*
88629680Sgibbs	 * Check for a handle that was initialized
88729680Sgibbs	 * by callout_handle_init, but never used
88829680Sgibbs	 * for a real timeout.
88929680Sgibbs	 */
89029680Sgibbs	if (handle.callout == NULL)
89129680Sgibbs		return;
89229680Sgibbs
893177859Sjeff	cc = callout_lock(handle.callout);
89444510Swollman	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
89544510Swollman		callout_stop(handle.callout);
896177859Sjeff	CC_UNLOCK(cc);
8971541Srgrimes}
8981541Srgrimes
89924101Sbdevoid
90029680Sgibbscallout_handle_init(struct callout_handle *handle)
90129680Sgibbs{
90229680Sgibbs	handle->callout = NULL;
90329680Sgibbs}
90429680Sgibbs
905304900Skibvoid
906304900Skibcallout_when(sbintime_t sbt, sbintime_t precision, int flags,
907304900Skib    sbintime_t *res, sbintime_t *prec_res)
908304900Skib{
909304900Skib	sbintime_t to_sbt, to_pr;
910304900Skib
911304900Skib	if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
912304900Skib		*res = sbt;
913304900Skib		*prec_res = precision;
914304900Skib		return;
915304900Skib	}
916304900Skib	if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
917304900Skib		sbt = tick_sbt;
918304900Skib	if ((flags & C_HARDCLOCK) != 0 ||
919304900Skib#ifdef NO_EVENTTIMERS
920304900Skib	    sbt >= sbt_timethreshold) {
921304900Skib		to_sbt = getsbinuptime();
922304900Skib
923304900Skib		/* Add safety belt for the case of hz > 1000. */
924304900Skib		to_sbt += tc_tick_sbt - tick_sbt;
925304900Skib#else
926304900Skib	    sbt >= sbt_tickthreshold) {
927304900Skib		/*
928304900Skib		 * Obtain the time of the last hardclock() call on
929304900Skib		 * this CPU directly from the kern_clocksource.c.
930304900Skib		 * This value is per-CPU, but it is equal for all
931304900Skib		 * active ones.
932304900Skib		 */
933304900Skib#ifdef __LP64__
934304900Skib		to_sbt = DPCPU_GET(hardclocktime);
935304900Skib#else
936304900Skib		spinlock_enter();
937304900Skib		to_sbt = DPCPU_GET(hardclocktime);
938304900Skib		spinlock_exit();
939304900Skib#endif
940304900Skib#endif
941304900Skib		if ((flags & C_HARDCLOCK) == 0)
942304900Skib			to_sbt += tick_sbt;
943304900Skib	} else
944304900Skib		to_sbt = sbinuptime();
945304900Skib	if (SBT_MAX - to_sbt < sbt)
946304900Skib		to_sbt = SBT_MAX;
947304900Skib	else
948304900Skib		to_sbt += sbt;
949304900Skib	*res = to_sbt;
950304900Skib	to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
951304900Skib	    sbt >> C_PRELGET(flags));
952304900Skib	*prec_res = to_pr > precision ? to_pr : precision;
953304900Skib}
954304900Skib
95544510Swollman/*
95644510Swollman * New interface; clients allocate their own callout structures.
95744510Swollman *
95844510Swollman * callout_reset() - establish or change a timeout
95944510Swollman * callout_stop() - disestablish a timeout
96044510Swollman * callout_init() - initialize a callout structure so that it can
96144510Swollman *	safely be passed to callout_reset() and callout_stop()
96244510Swollman *
96350673Sjlemon * <sys/callout.h> defines three convenience macros:
96444510Swollman *
965140487Scperciva * callout_active() - returns truth if callout has not been stopped,
966140487Scperciva *	drained, or deactivated since the last time the callout was
967140487Scperciva *	reset.
96850673Sjlemon * callout_pending() - returns truth if callout is still waiting for timeout
96950673Sjlemon * callout_deactivate() - marks the callout as having been serviced
97044510Swollman */
971149879Sglebiusint
972304900Skibcallout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
973247777Sdavide    void (*ftn)(void *), void *arg, int cpu, int flags)
97444510Swollman{
975304900Skib	sbintime_t to_sbt, precision;
976177859Sjeff	struct callout_cpu *cc;
977247777Sdavide	int cancelled, direct;
978281657Srrs	int ignore_cpu=0;
97944510Swollman
980247777Sdavide	cancelled = 0;
981281657Srrs	if (cpu == -1) {
982281657Srrs		ignore_cpu = 1;
983281657Srrs	} else if ((cpu >= MAXCPU) ||
984281921Sbz		   ((CC_CPU(cpu))->cc_inited == 0)) {
985281657Srrs		/* Invalid CPU spec */
986281657Srrs		panic("Invalid CPU in callout %d", cpu);
987281657Srrs	}
988304900Skib	callout_when(sbt, prec, flags, &to_sbt, &precision);
989247777Sdavide
990278800Srrs	/*
991278800Srrs	 * This flag used to be added by callout_cc_add, but the
992278800Srrs	 * first time you call this we could end up with the
993278800Srrs	 * wrong direct flag if we don't do it before we add.
994278800Srrs	 */
995278800Srrs	if (flags & C_DIRECT_EXEC) {
996281657Srrs		direct = 1;
997281657Srrs	} else {
998281657Srrs		direct = 0;
999278800Srrs	}
1000247777Sdavide	KASSERT(!direct || c->c_lock == NULL,
1001247777Sdavide	    ("%s: direct callout %p has lock", __func__, c));
1002177859Sjeff	cc = callout_lock(c);
1003281657Srrs	/*
1004281657Srrs	 * Don't allow migration of pre-allocated callouts lest they
1005281657Srrs	 * become unbalanced or handle the case where the user does
1006281657Srrs	 * not care.
1007281657Srrs	 */
1008281657Srrs	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
1009281657Srrs	    ignore_cpu) {
1010281657Srrs		cpu = c->c_cpu;
1011281657Srrs	}
1012281657Srrs
1013278800Srrs	if (cc_exec_curr(cc, direct) == c) {
1014127969Scperciva		/*
1015127969Scperciva		 * We're being asked to reschedule a callout which is
1016173760Sattilio		 * currently in progress.  If there is a lock then we
1017141428Siedowse		 * can cancel the callout if it has not really started.
1018127969Scperciva		 */
1019287663Savg		if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
1020278800Srrs			cancelled = cc_exec_cancel(cc, direct) = true;
1021278800Srrs		if (cc_exec_waiting(cc, direct)) {
1022141428Siedowse			/*
1023141428Siedowse			 * Someone has called callout_drain to kill this
1024141428Siedowse			 * callout.  Don't reschedule.
1025141428Siedowse			 */
1026163246Sglebius			CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
1027163246Sglebius			    cancelled ? "cancelled" : "failed to cancel",
1028163246Sglebius			    c, c->c_func, c->c_arg);
1029177859Sjeff			CC_UNLOCK(cc);
1030149879Sglebius			return (cancelled);
1031141428Siedowse		}
1032278800Srrs#ifdef SMP
1033278800Srrs		if (callout_migrating(c)) {
1034278800Srrs			/*
1035278800Srrs			 * This only occurs when a second callout_reset_sbt_on
1036278800Srrs			 * is made after a previous one moved it into
1037278800Srrs			 * deferred migration (below). Note we do *not* change
1038278800Srrs			 * the prev_cpu even though the previous target may
1039278800Srrs			 * be different.
1040278800Srrs			 */
1041278800Srrs			cc_migration_cpu(cc, direct) = cpu;
1042278800Srrs			cc_migration_time(cc, direct) = to_sbt;
1043278800Srrs			cc_migration_prec(cc, direct) = precision;
1044278800Srrs			cc_migration_func(cc, direct) = ftn;
1045278800Srrs			cc_migration_arg(cc, direct) = arg;
1046278800Srrs			cancelled = 1;
1047278800Srrs			CC_UNLOCK(cc);
1048278800Srrs			return (cancelled);
1049278800Srrs		}
1050278800Srrs#endif
1051128024Scperciva	}
1052281657Srrs	if (c->c_iflags & CALLOUT_PENDING) {
1053281657Srrs		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
1054278800Srrs			if (cc_exec_next(cc) == c)
1055278800Srrs				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
1056247777Sdavide			LIST_REMOVE(c, c_links.le);
1057281657Srrs		} else {
1058247777Sdavide			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
1059281657Srrs		}
1060149879Sglebius		cancelled = 1;
1061281657Srrs		c->c_iflags &= ~ CALLOUT_PENDING;
1062281657Srrs		c->c_flags &= ~ CALLOUT_ACTIVE;
1063133190Scperciva	}
1064220456Sattilio
1065220456Sattilio#ifdef SMP
106644510Swollman	/*
1067220456Sattilio	 * If the callout must migrate try to perform it immediately.
1068220456Sattilio	 * If the callout is currently running, just defer the migration
1069220456Sattilio	 * to a more appropriate moment.
107044510Swollman	 */
1071177859Sjeff	if (c->c_cpu != cpu) {
1072278800Srrs		if (cc_exec_curr(cc, direct) == c) {
1073278800Srrs			/*
1074278800Srrs			 * Pending will have been removed since we are
1075278800Srrs			 * actually executing the callout on another
1076278800Srrs			 * CPU. That callout should be waiting on the
1077278800Srrs			 * lock the caller holds. If we set both
1078278800Srrs			 * active/and/pending after we return and the
1079278800Srrs			 * lock on the executing callout proceeds, it
1080278800Srrs			 * will then see pending is true and return.
1081278800Srrs			 * At the return from the actual callout execution
1082278800Srrs			 * the migration will occur in softclock_call_cc
1083278800Srrs			 * and this new callout will be placed on the
1084278800Srrs			 * new CPU via a call to callout_cpu_switch() which
1085278800Srrs			 * will get the lock on the right CPU followed
1086278800Srrs			 * by a call callout_cc_add() which will add it there.
1087278800Srrs			 * (see above in softclock_call_cc()).
1088278800Srrs			 */
1089278800Srrs			cc_migration_cpu(cc, direct) = cpu;
1090278800Srrs			cc_migration_time(cc, direct) = to_sbt;
1091278800Srrs			cc_migration_prec(cc, direct) = precision;
1092278800Srrs			cc_migration_func(cc, direct) = ftn;
1093278800Srrs			cc_migration_arg(cc, direct) = arg;
1094281657Srrs			c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
1095281657Srrs			c->c_flags |= CALLOUT_ACTIVE;
1096247777Sdavide			CTR6(KTR_CALLOUT,
1097247777Sdavide		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
1098247777Sdavide			    c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
1099247777Sdavide			    (u_int)(to_sbt & 0xffffffff), cpu);
1100220456Sattilio			CC_UNLOCK(cc);
1101220456Sattilio			return (cancelled);
1102220456Sattilio		}
1103220456Sattilio		cc = callout_cpu_switch(c, cc, cpu);
1104177859Sjeff	}
1105220456Sattilio#endif
1106177859Sjeff
1107247777Sdavide	callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
1108247777Sdavide	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
1109247777Sdavide	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
1110247777Sdavide	    (u_int)(to_sbt & 0xffffffff));
1111177859Sjeff	CC_UNLOCK(cc);
1112149879Sglebius
1113149879Sglebius	return (cancelled);
111444510Swollman}
111544510Swollman
1116181191Ssam/*
1117181191Ssam * Common idioms that can be optimized in the future.
1118181191Ssam */
111981481Sjhbint
1120181191Ssamcallout_schedule_on(struct callout *c, int to_ticks, int cpu)
1121181191Ssam{
1122181191Ssam	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
1123181191Ssam}
1124181191Ssam
1125181191Ssamint
1126181191Ssamcallout_schedule(struct callout *c, int to_ticks)
1127181191Ssam{
1128181191Ssam	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
1129181191Ssam}
1130181191Ssam
1131181191Ssamint
1132296896Skib_callout_stop_safe(c, flags)
1133127969Scperciva	struct	callout *c;
1134296896Skib	int	flags;
1135127969Scperciva{
1136220456Sattilio	struct callout_cpu *cc, *old_cc;
1137173760Sattilio	struct lock_class *class;
1138247777Sdavide	int direct, sq_locked, use_lock;
1139278800Srrs	int not_on_a_list;
1140127969Scperciva
1141173760Sattilio	/*
1142173760Sattilio	 * Some old subsystems don't hold Giant while running a callout_stop(),
1143173760Sattilio	 * so just discard this check for the moment.
1144173760Sattilio	 */
1145296896Skib	if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
1146173760Sattilio		if (c->c_lock == &Giant.lock_object)
1147173760Sattilio			use_lock = mtx_owned(&Giant);
1148173760Sattilio		else {
1149173760Sattilio			use_lock = 1;
1150173760Sattilio			class = LOCK_CLASS(c->c_lock);
1151173760Sattilio			class->lc_assert(c->c_lock, LA_XLOCKED);
1152173760Sattilio		}
1153173760Sattilio	} else
1154173760Sattilio		use_lock = 0;
1155281657Srrs	if (c->c_iflags & CALLOUT_DIRECT) {
1156281657Srrs		direct = 1;
1157281657Srrs	} else {
1158281657Srrs		direct = 0;
1159281657Srrs	}
1160172025Sjhb	sq_locked = 0;
1161220456Sattilio	old_cc = NULL;
1162172025Sjhbagain:
1163177859Sjeff	cc = callout_lock(c);
1164220456Sattilio
1165281657Srrs	if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
1166281657Srrs	    (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
1167281657Srrs	    ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
1168278800Srrs		/*
1169278800Srrs		 * Special case where this slipped in while we
1170278800Srrs		 * were migrating *as* the callout is about to
1171278800Srrs		 * execute. The caller probably holds the lock
1172278800Srrs		 * the callout wants.
1173278800Srrs		 *
1174278800Srrs		 * Get rid of the migration first. Then set
1175278800Srrs		 * the flag that tells this code *not* to
1176278800Srrs		 * try to remove it from any lists (its not
1177278800Srrs		 * on one yet). When the callout wheel runs,
1178278800Srrs		 * it will ignore this callout.
1179278800Srrs		 */
1180281657Srrs		c->c_iflags &= ~CALLOUT_PENDING;
1181281657Srrs		c->c_flags &= ~CALLOUT_ACTIVE;
1182278800Srrs		not_on_a_list = 1;
1183278800Srrs	} else {
1184278800Srrs		not_on_a_list = 0;
1185278800Srrs	}
1186278800Srrs
118744510Swollman	/*
1188220456Sattilio	 * If the callout was migrating while the callout cpu lock was
1189220456Sattilio	 * dropped,  just drop the sleepqueue lock and check the states
1190220456Sattilio	 * again.
1191220456Sattilio	 */
1192220456Sattilio	if (sq_locked != 0 && cc != old_cc) {
1193220456Sattilio#ifdef SMP
1194220456Sattilio		CC_UNLOCK(cc);
1195278800Srrs		sleepq_release(&cc_exec_waiting(old_cc, direct));
1196220456Sattilio		sq_locked = 0;
1197220456Sattilio		old_cc = NULL;
1198220456Sattilio		goto again;
1199220456Sattilio#else
1200220456Sattilio		panic("migration should not happen");
1201220456Sattilio#endif
1202220456Sattilio	}
1203220456Sattilio
1204220456Sattilio	/*
1205155957Sjhb	 * If the callout isn't pending, it's not on the queue, so
1206155957Sjhb	 * don't attempt to remove it from the queue.  We can try to
1207155957Sjhb	 * stop it by other means however.
120844510Swollman	 */
1209281657Srrs	if (!(c->c_iflags & CALLOUT_PENDING)) {
121050673Sjlemon		c->c_flags &= ~CALLOUT_ACTIVE;
1211155957Sjhb
1212155957Sjhb		/*
1213155957Sjhb		 * If it wasn't on the queue and it isn't the current
1214155957Sjhb		 * callout, then we can't stop it, so just bail.
1215155957Sjhb		 */
1216278800Srrs		if (cc_exec_curr(cc, direct) != c) {
1217163246Sglebius			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
1218163246Sglebius			    c, c->c_func, c->c_arg);
1219177859Sjeff			CC_UNLOCK(cc);
1220172025Sjhb			if (sq_locked)
1221278800Srrs				sleepq_release(&cc_exec_waiting(cc, direct));
1222141428Siedowse			return (0);
1223141428Siedowse		}
1224155957Sjhb
1225296896Skib		if ((flags & CS_DRAIN) != 0) {
1226127969Scperciva			/*
1227155957Sjhb			 * The current callout is running (or just
1228155957Sjhb			 * about to run) and blocking is allowed, so
1229155957Sjhb			 * just wait for the current invocation to
1230155957Sjhb			 * finish.
1231127969Scperciva			 */
1232278800Srrs			while (cc_exec_curr(cc, direct) == c) {
1233171053Sattilio				/*
1234171053Sattilio				 * Use direct calls to sleepqueue interface
1235171053Sattilio				 * instead of cv/msleep in order to avoid
1236177859Sjeff				 * a LOR between cc_lock and sleepqueue
1237171053Sattilio				 * chain spinlocks.  This piece of code
1238171053Sattilio				 * emulates a msleep_spin() call actually.
1239172025Sjhb				 *
1240172025Sjhb				 * If we already have the sleepqueue chain
1241172025Sjhb				 * locked, then we can safely block.  If we
1242172025Sjhb				 * don't already have it locked, however,
1243177859Sjeff				 * we have to drop the cc_lock to lock
1244172025Sjhb				 * it.  This opens several races, so we
1245172025Sjhb				 * restart at the beginning once we have
1246172025Sjhb				 * both locks.  If nothing has changed, then
1247172025Sjhb				 * we will end up back here with sq_locked
1248172025Sjhb				 * set.
1249171053Sattilio				 */
1250172025Sjhb				if (!sq_locked) {
1251177859Sjeff					CC_UNLOCK(cc);
1252247777Sdavide					sleepq_lock(
1253278800Srrs					    &cc_exec_waiting(cc, direct));
1254172025Sjhb					sq_locked = 1;
1255220456Sattilio					old_cc = cc;
1256172025Sjhb					goto again;
1257172025Sjhb				}
1258220456Sattilio
1259220456Sattilio				/*
1260220456Sattilio				 * Migration could be cancelled here, but
1261220456Sattilio				 * as long as it is still not sure when it
1262220456Sattilio				 * will be packed up, just let softclock()
1263220456Sattilio				 * take care of it.
1264220456Sattilio				 */
1265278800Srrs				cc_exec_waiting(cc, direct) = true;
1266171053Sattilio				DROP_GIANT();
1267177859Sjeff				CC_UNLOCK(cc);
1268247777Sdavide				sleepq_add(
1269278800Srrs				    &cc_exec_waiting(cc, direct),
1270177859Sjeff				    &cc->cc_lock.lock_object, "codrain",
1271171053Sattilio				    SLEEPQ_SLEEP, 0);
1272247777Sdavide				sleepq_wait(
1273278800Srrs				    &cc_exec_waiting(cc, direct),
1274247777Sdavide					     0);
1275172025Sjhb				sq_locked = 0;
1276220456Sattilio				old_cc = NULL;
1277171053Sattilio
1278171053Sattilio				/* Reacquire locks previously released. */
1279171053Sattilio				PICKUP_GIANT();
1280177859Sjeff				CC_LOCK(cc);
1281155957Sjhb			}
1282247777Sdavide		} else if (use_lock &&
1283278800Srrs			   !cc_exec_cancel(cc, direct)) {
1284278800Srrs
1285155957Sjhb			/*
1286173760Sattilio			 * The current callout is waiting for its
1287173760Sattilio			 * lock which we hold.  Cancel the callout
1288155957Sjhb			 * and return.  After our caller drops the
1289173760Sattilio			 * lock, the callout will be skipped in
1290155957Sjhb			 * softclock().
1291155957Sjhb			 */
1292278800Srrs			cc_exec_cancel(cc, direct) = true;
1293163246Sglebius			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1294163246Sglebius			    c, c->c_func, c->c_arg);
1295247777Sdavide			KASSERT(!cc_cce_migrating(cc, direct),
1296220456Sattilio			    ("callout wrongly scheduled for migration"));
1297281657Srrs			if (callout_migrating(c)) {
1298281657Srrs				c->c_iflags &= ~CALLOUT_DFRMIGRATION;
1299281657Srrs#ifdef SMP
1300281657Srrs				cc_migration_cpu(cc, direct) = CPUBLOCK;
1301281657Srrs				cc_migration_time(cc, direct) = 0;
1302281657Srrs				cc_migration_prec(cc, direct) = 0;
1303281657Srrs				cc_migration_func(cc, direct) = NULL;
1304281657Srrs				cc_migration_arg(cc, direct) = NULL;
1305281657Srrs#endif
1306281657Srrs			}
1307177859Sjeff			CC_UNLOCK(cc);
1308172025Sjhb			KASSERT(!sq_locked, ("sleepqueue chain locked"));
1309141428Siedowse			return (1);
1310278800Srrs		} else if (callout_migrating(c)) {
1311278800Srrs			/*
1312278800Srrs			 * The callout is currently being serviced
1313278800Srrs			 * and the "next" callout is scheduled at
1314278800Srrs			 * its completion with a migration. We remove
1315278800Srrs			 * the migration flag so it *won't* get rescheduled,
1316278800Srrs			 * but we can't stop the one thats running so
1317278800Srrs			 * we return 0.
1318278800Srrs			 */
1319281657Srrs			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
1320278800Srrs#ifdef SMP
1321278800Srrs			/*
1322278800Srrs			 * We can't call cc_cce_cleanup here since
1323278800Srrs			 * if we do it will remove .ce_curr and
1324278800Srrs			 * its still running. This will prevent a
1325278800Srrs			 * reschedule of the callout when the
1326278800Srrs			 * execution completes.
1327278800Srrs			 */
1328278800Srrs			cc_migration_cpu(cc, direct) = CPUBLOCK;
1329278800Srrs			cc_migration_time(cc, direct) = 0;
1330278800Srrs			cc_migration_prec(cc, direct) = 0;
1331278800Srrs			cc_migration_func(cc, direct) = NULL;
1332278800Srrs			cc_migration_arg(cc, direct) = NULL;
1333278800Srrs#endif
1334234952Skib			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
1335234952Skib			    c, c->c_func, c->c_arg);
1336234952Skib			CC_UNLOCK(cc);
1337296896Skib			return ((flags & CS_MIGRBLOCK) != 0);
1338155957Sjhb		}
1339163246Sglebius		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
1340163246Sglebius		    c, c->c_func, c->c_arg);
1341177859Sjeff		CC_UNLOCK(cc);
1342172025Sjhb		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
134381481Sjhb		return (0);
134444510Swollman	}
1345172025Sjhb	if (sq_locked)
1346278800Srrs		sleepq_release(&cc_exec_waiting(cc, direct));
1347172025Sjhb
1348281657Srrs	c->c_iflags &= ~CALLOUT_PENDING;
1349281657Srrs	c->c_flags &= ~CALLOUT_ACTIVE;
135044510Swollman
1351234981Skib	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1352234981Skib	    c, c->c_func, c->c_arg);
1353278800Srrs	if (not_on_a_list == 0) {
1354281657Srrs		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
1355278800Srrs			if (cc_exec_next(cc) == c)
1356278800Srrs				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
1357278800Srrs			LIST_REMOVE(c, c_links.le);
1358281657Srrs		} else {
1359278800Srrs			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
1360281657Srrs		}
1361278800Srrs	}
1362234981Skib	callout_cc_del(c, cc);
1363177859Sjeff	CC_UNLOCK(cc);
136481481Sjhb	return (1);
136544510Swollman}
136644510Swollman
136744510Swollmanvoid
136869147Sjlemoncallout_init(c, mpsafe)
136944510Swollman	struct	callout *c;
137069147Sjlemon	int mpsafe;
137144510Swollman{
137244527Swollman	bzero(c, sizeof *c);
1373141428Siedowse	if (mpsafe) {
1374173760Sattilio		c->c_lock = NULL;
1375281657Srrs		c->c_iflags = CALLOUT_RETURNUNLOCKED;
1376141428Siedowse	} else {
1377173760Sattilio		c->c_lock = &Giant.lock_object;
1378281657Srrs		c->c_iflags = 0;
1379141428Siedowse	}
1380177859Sjeff	c->c_cpu = timeout_cpu;
138144510Swollman}
138244510Swollman
1383141428Siedowsevoid
1384173760Sattilio_callout_init_lock(c, lock, flags)
1385141428Siedowse	struct	callout *c;
1386173760Sattilio	struct	lock_object *lock;
1387141428Siedowse	int flags;
1388141428Siedowse{
1389141428Siedowse	bzero(c, sizeof *c);
1390173760Sattilio	c->c_lock = lock;
1391173760Sattilio	KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
1392173760Sattilio	    ("callout_init_lock: bad flags %d", flags));
1393173760Sattilio	KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
1394173760Sattilio	    ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
1395176013Sattilio	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
1396176013Sattilio	    (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
1397173760Sattilio	    __func__));
1398281657Srrs	c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
1399177859Sjeff	c->c_cpu = timeout_cpu;
1400141428Siedowse}
1401141428Siedowse
140231950Snate#ifdef APM_FIXUP_CALLTODO
140331950Snate/*
140431950Snate * Adjust the kernel calltodo timeout list.  This routine is used after
140531950Snate * an APM resume to recalculate the calltodo timer list values with the
140631950Snate * number of hz's we have been sleeping.  The next hardclock() will detect
140731950Snate * that there are fired timers and run softclock() to execute them.
140831950Snate *
140931950Snate * Please note, I have not done an exhaustive analysis of what code this
141031950Snate * might break.  I am motivated to have my select()'s and alarm()'s that
141131950Snate * have expired during suspend firing upon resume so that the applications
141231950Snate * which set the timer can do the maintanence the timer was for as close
141331950Snate * as possible to the originally intended time.  Testing this code for a
141431950Snate * week showed that resuming from a suspend resulted in 22 to 25 timers
1415302234Sbdrewery * firing, which seemed independent on whether the suspend was 2 hours or
141631950Snate * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
141731950Snate */
141831950Snatevoid
141931950Snateadjust_timeout_calltodo(time_change)
142031950Snate    struct timeval *time_change;
142131950Snate{
142231950Snate	register struct callout *p;
142331950Snate	unsigned long delta_ticks;
142431950Snate
142531950Snate	/*
142631950Snate	 * How many ticks were we asleep?
142736127Sbde	 * (stolen from tvtohz()).
142831950Snate	 */
142931950Snate
143031950Snate	/* Don't do anything */
143131950Snate	if (time_change->tv_sec < 0)
143231950Snate		return;
143331950Snate	else if (time_change->tv_sec <= LONG_MAX / 1000000)
143431950Snate		delta_ticks = (time_change->tv_sec * 1000000 +
143531950Snate			       time_change->tv_usec + (tick - 1)) / tick + 1;
143631950Snate	else if (time_change->tv_sec <= LONG_MAX / hz)
143731950Snate		delta_ticks = time_change->tv_sec * hz +
143831950Snate			      (time_change->tv_usec + (tick - 1)) / tick + 1;
143931950Snate	else
144031950Snate		delta_ticks = LONG_MAX;
144131950Snate
144231950Snate	if (delta_ticks > INT_MAX)
144331950Snate		delta_ticks = INT_MAX;
144431950Snate
144531950Snate	/*
144631950Snate	 * Now rip through the timer calltodo list looking for timers
144731950Snate	 * to expire.
144831950Snate	 */
144931950Snate
145031950Snate	/* don't collide with softclock() */
1451177859Sjeff	CC_LOCK(cc);
145231950Snate	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
145331950Snate		p->c_time -= delta_ticks;
145431950Snate
145531950Snate		/* Break if the timer had more time on it than delta_ticks */
145631950Snate		if (p->c_time > 0)
145731950Snate			break;
145831950Snate
145931950Snate		/* take back the ticks the timer didn't use (p->c_time <= 0) */
146031950Snate		delta_ticks = -p->c_time;
146131950Snate	}
1462177859Sjeff	CC_UNLOCK(cc);
146331950Snate
146431950Snate	return;
146531950Snate}
146631950Snate#endif /* APM_FIXUP_CALLTODO */
1467247777Sdavide
1468247777Sdavidestatic int
1469247777Sdavideflssbt(sbintime_t sbt)
1470247777Sdavide{
1471247777Sdavide
1472247777Sdavide	sbt += (uint64_t)sbt >> 1;
1473247777Sdavide	if (sizeof(long) >= sizeof(sbintime_t))
1474247777Sdavide		return (flsl(sbt));
1475247777Sdavide	if (sbt >= SBT_1S)
1476247777Sdavide		return (flsl(((uint64_t)sbt) >> 32) + 32);
1477247777Sdavide	return (flsl(sbt));
1478247777Sdavide}
1479247777Sdavide
1480247777Sdavide/*
1481247777Sdavide * Dump immediate statistic snapshot of the scheduled callouts.
1482247777Sdavide */
1483247777Sdavidestatic int
1484247777Sdavidesysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
1485247777Sdavide{
1486247777Sdavide	struct callout *tmp;
1487247777Sdavide	struct callout_cpu *cc;
1488247777Sdavide	struct callout_list *sc;
1489247777Sdavide	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
1490247777Sdavide	int ct[64], cpr[64], ccpbk[32];
1491247777Sdavide	int error, val, i, count, tcum, pcum, maxc, c, medc;
1492247777Sdavide#ifdef SMP
1493247777Sdavide	int cpu;
1494247777Sdavide#endif
1495247777Sdavide
1496247777Sdavide	val = 0;
1497247777Sdavide	error = sysctl_handle_int(oidp, &val, 0, req);
1498247777Sdavide	if (error != 0 || req->newptr == NULL)
1499247777Sdavide		return (error);
1500247777Sdavide	count = maxc = 0;
1501247777Sdavide	st = spr = maxt = maxpr = 0;
1502247777Sdavide	bzero(ccpbk, sizeof(ccpbk));
1503247777Sdavide	bzero(ct, sizeof(ct));
1504247777Sdavide	bzero(cpr, sizeof(cpr));
1505247777Sdavide	now = sbinuptime();
1506247777Sdavide#ifdef SMP
1507247777Sdavide	CPU_FOREACH(cpu) {
1508247777Sdavide		cc = CC_CPU(cpu);
1509247777Sdavide#else
1510247777Sdavide		cc = CC_CPU(timeout_cpu);
1511247777Sdavide#endif
1512247777Sdavide		CC_LOCK(cc);
1513247777Sdavide		for (i = 0; i < callwheelsize; i++) {
1514247777Sdavide			sc = &cc->cc_callwheel[i];
1515247777Sdavide			c = 0;
1516247777Sdavide			LIST_FOREACH(tmp, sc, c_links.le) {
1517247777Sdavide				c++;
1518247777Sdavide				t = tmp->c_time - now;
1519247777Sdavide				if (t < 0)
1520247777Sdavide					t = 0;
1521247777Sdavide				st += t / SBT_1US;
1522247777Sdavide				spr += tmp->c_precision / SBT_1US;
1523247777Sdavide				if (t > maxt)
1524247777Sdavide					maxt = t;
1525247777Sdavide				if (tmp->c_precision > maxpr)
1526247777Sdavide					maxpr = tmp->c_precision;
1527247777Sdavide				ct[flssbt(t)]++;
1528247777Sdavide				cpr[flssbt(tmp->c_precision)]++;
1529247777Sdavide			}
1530247777Sdavide			if (c > maxc)
1531247777Sdavide				maxc = c;
1532247777Sdavide			ccpbk[fls(c + c / 2)]++;
1533247777Sdavide			count += c;
1534247777Sdavide		}
1535247777Sdavide		CC_UNLOCK(cc);
1536247777Sdavide#ifdef SMP
1537247777Sdavide	}
1538247777Sdavide#endif
1539247777Sdavide
1540247777Sdavide	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
1541247777Sdavide		tcum += ct[i];
1542247777Sdavide	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
1543247777Sdavide	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
1544247777Sdavide		pcum += cpr[i];
1545247777Sdavide	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
1546247777Sdavide	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
1547247777Sdavide		c += ccpbk[i];
1548247777Sdavide	medc = (i >= 2) ? (1 << (i - 2)) : 0;
1549247777Sdavide
1550247777Sdavide	printf("Scheduled callouts statistic snapshot:\n");
1551247777Sdavide	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
1552247777Sdavide	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
1553247777Sdavide	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
1554247777Sdavide	    medc,
1555247777Sdavide	    count / callwheelsize / mp_ncpus,
1556247777Sdavide	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
1557247777Sdavide	    maxc);
1558247777Sdavide	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
1559247777Sdavide	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
1560247777Sdavide	    (st / count) / 1000000, (st / count) % 1000000,
1561247777Sdavide	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
1562247777Sdavide	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
1563247777Sdavide	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
1564247777Sdavide	    (spr / count) / 1000000, (spr / count) % 1000000,
1565247777Sdavide	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
1566247777Sdavide	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
1567247777Sdavide	    "   prec\t   pcum\n");
1568247777Sdavide	for (i = 0, tcum = pcum = 0; i < 64; i++) {
1569247777Sdavide		if (ct[i] == 0 && cpr[i] == 0)
1570247777Sdavide			continue;
1571247777Sdavide		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
1572247777Sdavide		tcum += ct[i];
1573247777Sdavide		pcum += cpr[i];
1574247777Sdavide		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
1575247777Sdavide		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
1576247777Sdavide		    i - 1 - (32 - CC_HASH_SHIFT),
1577247777Sdavide		    ct[i], tcum, cpr[i], pcum);
1578247777Sdavide	}
1579247777Sdavide	return (error);
1580247777Sdavide}
1581247777SdavideSYSCTL_PROC(_kern, OID_AUTO, callout_stat,
1582247777Sdavide    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1583247777Sdavide    0, 0, sysctl_kern_callout_stat, "I",
1584247777Sdavide    "Dump immediate statistic snapshot of the scheduled callouts");
1585305853Shiren
1586305853Shiren#ifdef DDB
1587305853Shirenstatic void
1588305853Shiren_show_callout(struct callout *c)
1589305853Shiren{
1590305853Shiren
1591305853Shiren	db_printf("callout %p\n", c);
1592305853Shiren#define	C_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, c->e);
1593305853Shiren	db_printf("   &c_links = %p\n", &(c->c_links));
1594305853Shiren	C_DB_PRINTF("%" PRId64,	c_time);
1595305853Shiren	C_DB_PRINTF("%" PRId64,	c_precision);
1596305853Shiren	C_DB_PRINTF("%p",	c_arg);
1597305853Shiren	C_DB_PRINTF("%p",	c_func);
1598305853Shiren	C_DB_PRINTF("%p",	c_lock);
1599305853Shiren	C_DB_PRINTF("%#x",	c_flags);
1600305853Shiren	C_DB_PRINTF("%#x",	c_iflags);
1601305853Shiren	C_DB_PRINTF("%d",	c_cpu);
1602305853Shiren#undef	C_DB_PRINTF
1603305853Shiren}
1604305853Shiren
1605305853ShirenDB_SHOW_COMMAND(callout, db_show_callout)
1606305853Shiren{
1607305853Shiren
1608305853Shiren	if (!have_addr) {
1609305853Shiren		db_printf("usage: show callout <struct callout *>\n");
1610305853Shiren		return;
1611305853Shiren	}
1612305853Shiren
1613305853Shiren	_show_callout((struct callout *)addr);
1614305853Shiren}
1615305853Shiren#endif /* DDB */
1616