1209371Smav/*-
2247777Sdavide * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
3209371Smav * All rights reserved.
4209371Smav *
5209371Smav * Redistribution and use in source and binary forms, with or without
6209371Smav * modification, are permitted provided that the following conditions
7209371Smav * are met:
8209371Smav * 1. Redistributions of source code must retain the above copyright
9209371Smav *    notice, this list of conditions and the following disclaimer,
10209371Smav *    without modification, immediately at the beginning of the file.
11209371Smav * 2. Redistributions in binary form must reproduce the above copyright
12209371Smav *    notice, this list of conditions and the following disclaimer in the
13209371Smav *    documentation and/or other materials provided with the distribution.
14209371Smav *
15209371Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16209371Smav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17209371Smav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18209371Smav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19209371Smav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20209371Smav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21209371Smav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22209371Smav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23209371Smav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24209371Smav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25209371Smav */
26209371Smav
27209371Smav#include <sys/cdefs.h>
28209371Smav__FBSDID("$FreeBSD$");
29209371Smav
30209371Smav/*
31209371Smav * Common routines to manage event timers hardware.
32209371Smav */
33209371Smav
34212992Smav#include "opt_device_polling.h"
35209371Smav#include "opt_kdtrace.h"
36209371Smav
37209371Smav#include <sys/param.h>
38209371Smav#include <sys/systm.h>
39209371Smav#include <sys/bus.h>
40247777Sdavide#include <sys/limits.h>
41209371Smav#include <sys/lock.h>
42209371Smav#include <sys/kdb.h>
43212541Smav#include <sys/ktr.h>
44209371Smav#include <sys/mutex.h>
45209371Smav#include <sys/proc.h>
46209371Smav#include <sys/kernel.h>
47209371Smav#include <sys/sched.h>
48209371Smav#include <sys/smp.h>
49209371Smav#include <sys/sysctl.h>
50209371Smav#include <sys/timeet.h>
51212603Smav#include <sys/timetc.h>
52209371Smav
53209371Smav#include <machine/atomic.h>
54209371Smav#include <machine/clock.h>
55209371Smav#include <machine/cpu.h>
56209371Smav#include <machine/smp.h>
57209371Smav
58209371Smav#ifdef KDTRACE_HOOKS
59209371Smav#include <sys/dtrace_bsd.h>
60221990Savgcyclic_clock_func_t	cyclic_clock_func = NULL;
61209371Smav#endif
62209371Smav
63223426Sjkimint			cpu_can_deep_sleep = 0;	/* C3 state is available. */
64212541Smavint			cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
65209371Smav
66212541Smavstatic void		setuptimer(void);
67247777Sdavidestatic void		loadtimer(sbintime_t now, int first);
68212541Smavstatic int		doconfigtimer(void);
69212541Smavstatic void		configtimer(int start);
70212541Smavstatic int		round_freq(struct eventtimer *et, int freq);
71209371Smav
72247777Sdavidestatic sbintime_t	getnextcpuevent(int idle);
73247777Sdavidestatic sbintime_t	getnextevent(void);
74247777Sdavidestatic int		handleevents(sbintime_t now, int fake);
75209371Smav
76212541Smavstatic struct mtx	et_hw_mtx;
77212541Smav
78212541Smav#define	ET_HW_LOCK(state)						\
79212541Smav	{								\
80212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
81212541Smav			mtx_lock_spin(&(state)->et_hw_mtx);		\
82212541Smav		else							\
83212541Smav			mtx_lock_spin(&et_hw_mtx);			\
84212541Smav	}
85212541Smav
86212541Smav#define	ET_HW_UNLOCK(state)						\
87212541Smav	{								\
88212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
89212541Smav			mtx_unlock_spin(&(state)->et_hw_mtx);		\
90212541Smav		else							\
91212541Smav			mtx_unlock_spin(&et_hw_mtx);			\
92212541Smav	}
93212541Smav
94212541Smavstatic struct eventtimer *timer = NULL;
95247777Sdavidestatic sbintime_t	timerperiod;	/* Timer period for periodic mode. */
96247777Sdavidestatic sbintime_t	statperiod;	/* statclock() events period. */
97247777Sdavidestatic sbintime_t	profperiod;	/* profclock() events period. */
98247777Sdavidestatic sbintime_t	nexttick;	/* Next global timer tick time. */
99247777Sdavidestatic u_int		busy = 1;	/* Reconfiguration is in progress. */
100212541Smavstatic int		profiling = 0;	/* Profiling events enabled. */
101212541Smav
102212541Smavstatic char		timername[32];	/* Wanted timer. */
103212541SmavTUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
104212541Smav
105212600Smavstatic int		singlemul = 0;	/* Multiplier for periodic mode. */
106209371SmavTUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
107209371SmavSYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
108212541Smav    0, "Multiplier for periodic mode");
109209371Smav
110232919Smavstatic u_int		idletick = 0;	/* Run periodic events when idle. */
111212541SmavTUNABLE_INT("kern.eventtimer.idletick", &idletick);
112217326SmdfSYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
113212541Smav    0, "Run periodic events when idle");
114209371Smav
115212541Smavstatic int		periodic = 0;	/* Periodic or one-shot mode. */
116212967Smavstatic int		want_periodic = 0; /* What mode to prefer. */
117212967SmavTUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
118212541Smav
119212541Smavstruct pcpu_state {
120212541Smav	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
121212541Smav	u_int		action;		/* Reconfiguration requests. */
122212541Smav	u_int		handle;		/* Immediate handle resuests. */
123247777Sdavide	sbintime_t	now;		/* Last tick time. */
124247777Sdavide	sbintime_t	nextevent;	/* Next scheduled event on this CPU. */
125247777Sdavide	sbintime_t	nexttick;	/* Next timer tick time. */
126247777Sdavide	sbintime_t	nexthard;	/* Next hardlock() event. */
127247777Sdavide	sbintime_t	nextstat;	/* Next statclock() event. */
128247777Sdavide	sbintime_t	nextprof;	/* Next profclock() event. */
129247777Sdavide	sbintime_t	nextcall;	/* Next callout event. */
130247777Sdavide	sbintime_t	nextcallopt;	/* Next optional callout event. */
131221990Savg#ifdef KDTRACE_HOOKS
132247777Sdavide	sbintime_t	nextcyc;	/* Next OpenSolaris cyclics event. */
133221990Savg#endif
134212541Smav	int		ipi;		/* This CPU needs IPI. */
135212541Smav	int		idle;		/* This CPU is in idle mode. */
136212541Smav};
137212541Smav
138215701Sdimstatic DPCPU_DEFINE(struct pcpu_state, timerstate);
139247777SdavideDPCPU_DEFINE(sbintime_t, hardclocktime);
140212541Smav
141212541Smav/*
142212541Smav * Timer broadcast IPI handler.
143212541Smav */
144212541Smavint
145212541Smavhardclockintr(void)
146212541Smav{
147247777Sdavide	sbintime_t now;
148212541Smav	struct pcpu_state *state;
149212541Smav	int done;
150212541Smav
151212541Smav	if (doconfigtimer() || busy)
152212541Smav		return (FILTER_HANDLED);
153212541Smav	state = DPCPU_PTR(timerstate);
154212541Smav	now = state->now;
155247777Sdavide	CTR3(KTR_SPARE2, "ipi  at %d:    now  %d.%08x",
156247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
157247777Sdavide	done = handleevents(now, 0);
158212541Smav	return (done ? FILTER_HANDLED : FILTER_STRAY);
159212541Smav}
160212541Smav
161212541Smav/*
162212541Smav * Handle all events for specified time on this CPU
163212541Smav */
164209371Smavstatic int
165247777Sdavidehandleevents(sbintime_t now, int fake)
166209371Smav{
167247777Sdavide	sbintime_t t, *hct;
168212541Smav	struct trapframe *frame;
169212541Smav	struct pcpu_state *state;
170212541Smav	int usermode;
171212541Smav	int done, runs;
172209371Smav
173247777Sdavide	CTR3(KTR_SPARE2, "handle at %d:  now  %d.%08x",
174247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
175212541Smav	done = 0;
176212541Smav	if (fake) {
177212541Smav		frame = NULL;
178212541Smav		usermode = 0;
179212541Smav	} else {
180212541Smav		frame = curthread->td_intr_frame;
181212541Smav		usermode = TRAPF_USERMODE(frame);
182212541Smav	}
183221990Savg
184212541Smav	state = DPCPU_PTR(timerstate);
185221990Savg
186232783Smav	runs = 0;
187247777Sdavide	while (now >= state->nexthard) {
188247777Sdavide		state->nexthard += tick_sbt;
189212541Smav		runs++;
190212541Smav	}
191239005Smav	if (runs) {
192247777Sdavide		hct = DPCPU_PTR(hardclocktime);
193247777Sdavide		*hct = state->nexthard - tick_sbt;
194239005Smav		if (fake < 2) {
195239005Smav			hardclock_cnt(runs, usermode);
196239005Smav			done = 1;
197239005Smav		}
198212541Smav	}
199232783Smav	runs = 0;
200247777Sdavide	while (now >= state->nextstat) {
201247777Sdavide		state->nextstat += statperiod;
202232783Smav		runs++;
203232783Smav	}
204232783Smav	if (runs && fake < 2) {
205232783Smav		statclock_cnt(runs, usermode);
206212541Smav		done = 1;
207212541Smav	}
208212541Smav	if (profiling) {
209232783Smav		runs = 0;
210247777Sdavide		while (now >= state->nextprof) {
211247777Sdavide			state->nextprof += profperiod;
212232783Smav			runs++;
213232783Smav		}
214232783Smav		if (runs && !fake) {
215247777Sdavide			profclock_cnt(runs, usermode, TRAPF_PC(frame));
216212541Smav			done = 1;
217212541Smav		}
218212541Smav	} else
219212541Smav		state->nextprof = state->nextstat;
220247777Sdavide	if (now >= state->nextcallopt) {
221247777Sdavide		state->nextcall = state->nextcallopt = INT64_MAX;
222247777Sdavide		callout_process(now);
223247777Sdavide	}
224221990Savg
225221990Savg#ifdef KDTRACE_HOOKS
226247777Sdavide	if (fake == 0 && now >= state->nextcyc && cyclic_clock_func != NULL) {
227247777Sdavide		state->nextcyc = INT64_MAX;
228221990Savg		(*cyclic_clock_func)(frame);
229221990Savg	}
230221990Savg#endif
231221990Savg
232247777Sdavide	t = getnextcpuevent(0);
233212541Smav	ET_HW_LOCK(state);
234212541Smav	if (!busy) {
235212541Smav		state->idle = 0;
236212541Smav		state->nextevent = t;
237212541Smav		loadtimer(now, 0);
238212541Smav	}
239212541Smav	ET_HW_UNLOCK(state);
240212541Smav	return (done);
241209371Smav}
242209371Smav
243212541Smav/*
244212541Smav * Schedule binuptime of the next event on current CPU.
245212541Smav */
246247777Sdavidestatic sbintime_t
247247777Sdavidegetnextcpuevent(int idle)
248209371Smav{
249247777Sdavide	sbintime_t event;
250212541Smav	struct pcpu_state *state;
251247777Sdavide	u_int hardfreq;
252209371Smav
253212541Smav	state = DPCPU_PTR(timerstate);
254247777Sdavide	/* Handle hardclock() events, skipping some if CPU is idle. */
255247777Sdavide	event = state->nexthard;
256247777Sdavide	if (idle) {
257247777Sdavide		hardfreq = (u_int)hz / 2;
258247777Sdavide		if (tc_min_ticktock_freq > 2
259247777Sdavide#ifdef SMP
260247777Sdavide		    && curcpu == CPU_FIRST()
261247777Sdavide#endif
262247777Sdavide		    )
263247777Sdavide			hardfreq = hz / tc_min_ticktock_freq;
264247777Sdavide		if (hardfreq > 1)
265247777Sdavide			event += tick_sbt * (hardfreq - 1);
266232919Smav	}
267247777Sdavide	/* Handle callout events. */
268247777Sdavide	if (event > state->nextcall)
269247777Sdavide		event = state->nextcall;
270232919Smav	if (!idle) { /* If CPU is active - handle other types of events. */
271247777Sdavide		if (event > state->nextstat)
272247777Sdavide			event = state->nextstat;
273247777Sdavide		if (profiling && event > state->nextprof)
274247777Sdavide			event = state->nextprof;
275212541Smav	}
276221990Savg#ifdef KDTRACE_HOOKS
277247777Sdavide	if (event > state->nextcyc)
278247777Sdavide		event = state->nextcyc;
279221990Savg#endif
280247777Sdavide	return (event);
281209371Smav}
282209371Smav
283212541Smav/*
284212541Smav * Schedule binuptime of the next event on all CPUs.
285212541Smav */
286247777Sdavidestatic sbintime_t
287247777Sdavidegetnextevent(void)
288209371Smav{
289212541Smav	struct pcpu_state *state;
290247777Sdavide	sbintime_t event;
291212541Smav#ifdef SMP
292212541Smav	int	cpu;
293212541Smav#endif
294247777Sdavide	int	c;
295209371Smav
296212541Smav	state = DPCPU_PTR(timerstate);
297247777Sdavide	event = state->nextevent;
298247777Sdavide	c = -1;
299247777Sdavide#ifdef SMP
300232919Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
301247777Sdavide		CPU_FOREACH(cpu) {
302247777Sdavide			state = DPCPU_ID_PTR(cpu, timerstate);
303247777Sdavide			if (event > state->nextevent) {
304247777Sdavide				event = state->nextevent;
305247777Sdavide				c = cpu;
306212541Smav			}
307212541Smav		}
308247777Sdavide	}
309232919Smav#endif
310247777Sdavide	CTR4(KTR_SPARE2, "next at %d:    next %d.%08x by %d",
311247777Sdavide	    curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
312247777Sdavide	return (event);
313209371Smav}
314209371Smav
315212541Smav/* Hardware timer callback function. */
316212541Smavstatic void
317212541Smavtimercb(struct eventtimer *et, void *arg)
318209371Smav{
319247777Sdavide	sbintime_t now;
320247777Sdavide	sbintime_t *next;
321212541Smav	struct pcpu_state *state;
322212541Smav#ifdef SMP
323212541Smav	int cpu, bcast;
324212541Smav#endif
325209371Smav
326212541Smav	/* Do not touch anything if somebody reconfiguring timers. */
327212541Smav	if (busy)
328212541Smav		return;
329212541Smav	/* Update present and next tick times. */
330212541Smav	state = DPCPU_PTR(timerstate);
331212541Smav	if (et->et_flags & ET_FLAGS_PERCPU) {
332212541Smav		next = &state->nexttick;
333212541Smav	} else
334212541Smav		next = &nexttick;
335247777Sdavide	now = sbinuptime();
336247777Sdavide	if (periodic)
337247777Sdavide		*next = now + timerperiod;
338247777Sdavide	else
339247777Sdavide		*next = -1;	/* Next tick is not scheduled yet. */
340212541Smav	state->now = now;
341247777Sdavide	CTR3(KTR_SPARE2, "intr at %d:    now  %d.%08x",
342247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
343209371Smav
344209371Smav#ifdef SMP
345212541Smav	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
346212541Smav	bcast = 0;
347212541Smav	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
348212541Smav		CPU_FOREACH(cpu) {
349212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
350212541Smav			ET_HW_LOCK(state);
351212541Smav			state->now = now;
352247777Sdavide			if (now >= state->nextevent) {
353247777Sdavide				state->nextevent += SBT_1S;
354212811Smav				if (curcpu != cpu) {
355212811Smav					state->ipi = 1;
356212811Smav					bcast = 1;
357212811Smav				}
358209371Smav			}
359212541Smav			ET_HW_UNLOCK(state);
360209371Smav		}
361209371Smav	}
362212541Smav#endif
363209371Smav
364212541Smav	/* Handle events for this time on this CPU. */
365247777Sdavide	handleevents(now, 0);
366209371Smav
367209371Smav#ifdef SMP
368212541Smav	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
369212541Smav	if (bcast) {
370212541Smav		CPU_FOREACH(cpu) {
371212541Smav			if (curcpu == cpu)
372212541Smav				continue;
373212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
374212541Smav			if (state->ipi) {
375212541Smav				state->ipi = 0;
376212541Smav				ipi_cpu(cpu, IPI_HARDCLOCK);
377209371Smav			}
378209371Smav		}
379209371Smav	}
380212541Smav#endif
381209371Smav}
382209371Smav
383209371Smav/*
384212541Smav * Load new value into hardware timer.
385209371Smav */
386209371Smavstatic void
387247777Sdavideloadtimer(sbintime_t now, int start)
388209371Smav{
389212541Smav	struct pcpu_state *state;
390247777Sdavide	sbintime_t new;
391247777Sdavide	sbintime_t *next;
392212541Smav	uint64_t tmp;
393212541Smav	int eq;
394209371Smav
395214987Smav	if (timer->et_flags & ET_FLAGS_PERCPU) {
396214987Smav		state = DPCPU_PTR(timerstate);
397214987Smav		next = &state->nexttick;
398214987Smav	} else
399214987Smav		next = &nexttick;
400212541Smav	if (periodic) {
401212541Smav		if (start) {
402212541Smav			/*
403212541Smav			 * Try to start all periodic timers aligned
404212541Smav			 * to period to make events synchronous.
405212541Smav			 */
406247777Sdavide			tmp = now % timerperiod;
407247777Sdavide			new = timerperiod - tmp;
408247777Sdavide			if (new < tmp)		/* Left less then passed. */
409247777Sdavide				new += timerperiod;
410212541Smav			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
411247777Sdavide			    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
412247777Sdavide			    (int)(new >> 32), (u_int)(new & 0xffffffff));
413247777Sdavide			*next = new + now;
414247777Sdavide			et_start(timer, new, timerperiod);
415209371Smav		}
416212541Smav	} else {
417247777Sdavide		new = getnextevent();
418247777Sdavide		eq = (new == *next);
419247777Sdavide		CTR4(KTR_SPARE2, "load at %d:    next %d.%08x eq %d",
420247777Sdavide		    curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
421212541Smav		if (!eq) {
422212541Smav			*next = new;
423247777Sdavide			et_start(timer, new - now, 0);
424212541Smav		}
425209371Smav	}
426209371Smav}
427209371Smav
428209371Smav/*
429212541Smav * Prepare event timer parameters after configuration changes.
430212541Smav */
431212541Smavstatic void
432212541Smavsetuptimer(void)
433212541Smav{
434212541Smav	int freq;
435212541Smav
436212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
437212541Smav		periodic = 0;
438212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
439212541Smav		periodic = 1;
440212600Smav	singlemul = MIN(MAX(singlemul, 1), 20);
441212541Smav	freq = hz * singlemul;
442212541Smav	while (freq < (profiling ? profhz : stathz))
443212541Smav		freq += hz;
444212541Smav	freq = round_freq(timer, freq);
445247777Sdavide	timerperiod = SBT_1S / freq;
446212541Smav}
447212541Smav
448212541Smav/*
449209371Smav * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
450209371Smav */
451212541Smavstatic int
452212541Smavdoconfigtimer(void)
453209371Smav{
454247777Sdavide	sbintime_t now;
455212541Smav	struct pcpu_state *state;
456209371Smav
457212541Smav	state = DPCPU_PTR(timerstate);
458212541Smav	switch (atomic_load_acq_int(&state->action)) {
459212541Smav	case 1:
460247777Sdavide		now = sbinuptime();
461212541Smav		ET_HW_LOCK(state);
462247777Sdavide		loadtimer(now, 1);
463212541Smav		ET_HW_UNLOCK(state);
464212541Smav		state->handle = 0;
465212541Smav		atomic_store_rel_int(&state->action, 0);
466209371Smav		return (1);
467212541Smav	case 2:
468212541Smav		ET_HW_LOCK(state);
469212541Smav		et_stop(timer);
470212541Smav		ET_HW_UNLOCK(state);
471212541Smav		state->handle = 0;
472212541Smav		atomic_store_rel_int(&state->action, 0);
473212541Smav		return (1);
474209371Smav	}
475212541Smav	if (atomic_readandclear_int(&state->handle) && !busy) {
476247777Sdavide		now = sbinuptime();
477247777Sdavide		handleevents(now, 0);
478212541Smav		return (1);
479212541Smav	}
480209371Smav	return (0);
481209371Smav}
482209371Smav
483209371Smav/*
484209371Smav * Reconfigure specified timer.
485209371Smav * For per-CPU timers use IPI to make other CPUs to reconfigure.
486209371Smav */
487209371Smavstatic void
488212541Smavconfigtimer(int start)
489209371Smav{
490247777Sdavide	sbintime_t now, next;
491212541Smav	struct pcpu_state *state;
492209371Smav	int cpu;
493209371Smav
494212541Smav	if (start) {
495212541Smav		setuptimer();
496247777Sdavide		now = sbinuptime();
497247777Sdavide	} else
498247777Sdavide		now = 0;
499209371Smav	critical_enter();
500212541Smav	ET_HW_LOCK(DPCPU_PTR(timerstate));
501212541Smav	if (start) {
502212541Smav		/* Initialize time machine parameters. */
503247777Sdavide		next = now + timerperiod;
504212541Smav		if (periodic)
505212541Smav			nexttick = next;
506212541Smav		else
507247777Sdavide			nexttick = -1;
508212541Smav		CPU_FOREACH(cpu) {
509212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
510212541Smav			state->now = now;
511247777Sdavide			if (!smp_started && cpu != CPU_FIRST())
512247777Sdavide				state->nextevent = INT64_MAX;
513247777Sdavide			else
514247777Sdavide				state->nextevent = next;
515212541Smav			if (periodic)
516212541Smav				state->nexttick = next;
517212541Smav			else
518247777Sdavide				state->nexttick = -1;
519212541Smav			state->nexthard = next;
520212541Smav			state->nextstat = next;
521212541Smav			state->nextprof = next;
522247777Sdavide			state->nextcall = next;
523247777Sdavide			state->nextcallopt = next;
524212541Smav			hardclock_sync(cpu);
525212541Smav		}
526212541Smav		busy = 0;
527212541Smav		/* Start global timer or per-CPU timer of this CPU. */
528247777Sdavide		loadtimer(now, 1);
529212541Smav	} else {
530212541Smav		busy = 1;
531212541Smav		/* Stop global timer or per-CPU timer of this CPU. */
532212541Smav		et_stop(timer);
533212541Smav	}
534212541Smav	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
535209371Smav#ifdef SMP
536212541Smav	/* If timer is global or there is no other CPUs yet - we are done. */
537212541Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
538209371Smav		critical_exit();
539209371Smav		return;
540209371Smav	}
541209371Smav	/* Set reconfigure flags for other CPUs. */
542209371Smav	CPU_FOREACH(cpu) {
543212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
544212541Smav		atomic_store_rel_int(&state->action,
545212541Smav		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
546209371Smav	}
547212541Smav	/* Broadcast reconfigure IPI. */
548212541Smav	ipi_all_but_self(IPI_HARDCLOCK);
549209371Smav	/* Wait for reconfiguration completed. */
550209371Smavrestart:
551209371Smav	cpu_spinwait();
552209371Smav	CPU_FOREACH(cpu) {
553209371Smav		if (cpu == curcpu)
554209371Smav			continue;
555212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
556212541Smav		if (atomic_load_acq_int(&state->action))
557209371Smav			goto restart;
558209371Smav	}
559212541Smav#endif
560209371Smav	critical_exit();
561209371Smav}
562209371Smav
563212541Smav/*
564212541Smav * Calculate nearest frequency supported by hardware timer.
565212541Smav */
566210290Smavstatic int
567210290Smavround_freq(struct eventtimer *et, int freq)
568210290Smav{
569210290Smav	uint64_t div;
570210290Smav
571210290Smav	if (et->et_frequency != 0) {
572210298Smav		div = lmax((et->et_frequency + freq / 2) / freq, 1);
573210290Smav		if (et->et_flags & ET_FLAGS_POW2DIV)
574210290Smav			div = 1 << (flsl(div + div / 2) - 1);
575210290Smav		freq = (et->et_frequency + div / 2) / div;
576210290Smav	}
577247463Smav	if (et->et_min_period > SBT_1S)
578241413Smav		panic("Event timer \"%s\" doesn't support sub-second periods!",
579241413Smav		    et->et_name);
580247463Smav	else if (et->et_min_period != 0)
581247463Smav		freq = min(freq, SBT2FREQ(et->et_min_period));
582247463Smav	if (et->et_max_period < SBT_1S && et->et_max_period != 0)
583247463Smav		freq = max(freq, SBT2FREQ(et->et_max_period));
584210290Smav	return (freq);
585210290Smav}
586210290Smav
587209371Smav/*
588212541Smav * Configure and start event timers (BSP part).
589209371Smav */
590209371Smavvoid
591209371Smavcpu_initclocks_bsp(void)
592209371Smav{
593212541Smav	struct pcpu_state *state;
594212541Smav	int base, div, cpu;
595209371Smav
596212541Smav	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
597212541Smav	CPU_FOREACH(cpu) {
598212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
599212541Smav		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
600221990Savg#ifdef KDTRACE_HOOKS
601247777Sdavide		state->nextcyc = INT64_MAX;
602221990Savg#endif
603247777Sdavide		state->nextcall = INT64_MAX;
604247777Sdavide		state->nextcallopt = INT64_MAX;
605212541Smav	}
606212967Smav	periodic = want_periodic;
607212541Smav	/* Grab requested timer or the best of present. */
608212541Smav	if (timername[0])
609212541Smav		timer = et_find(timername, 0, 0);
610212541Smav	if (timer == NULL && periodic) {
611212541Smav		timer = et_find(NULL,
612212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
613212541Smav	}
614212541Smav	if (timer == NULL) {
615212541Smav		timer = et_find(NULL,
616212541Smav		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
617212541Smav	}
618212541Smav	if (timer == NULL && !periodic) {
619212541Smav		timer = et_find(NULL,
620212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
621212541Smav	}
622212541Smav	if (timer == NULL)
623209901Smav		panic("No usable event timer found!");
624212541Smav	et_init(timer, timercb, NULL, NULL);
625212541Smav
626212541Smav	/* Adapt to timer capabilities. */
627212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
628212541Smav		periodic = 0;
629212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
630212541Smav		periodic = 1;
631212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
632212541Smav		cpu_disable_deep_sleep++;
633212541Smav
634209371Smav	/*
635209371Smav	 * We honor the requested 'hz' value.
636209371Smav	 * We want to run stathz in the neighborhood of 128hz.
637209371Smav	 * We would like profhz to run as often as possible.
638209371Smav	 */
639212600Smav	if (singlemul <= 0 || singlemul > 20) {
640209371Smav		if (hz >= 1500 || (hz % 128) == 0)
641209371Smav			singlemul = 1;
642209371Smav		else if (hz >= 750)
643209371Smav			singlemul = 2;
644209371Smav		else
645209371Smav			singlemul = 4;
646209371Smav	}
647212541Smav	if (periodic) {
648212541Smav		base = round_freq(timer, hz * singlemul);
649210290Smav		singlemul = max((base + hz / 2) / hz, 1);
650210290Smav		hz = (base + singlemul / 2) / singlemul;
651210290Smav		if (base <= 128)
652209371Smav			stathz = base;
653209371Smav		else {
654209371Smav			div = base / 128;
655210290Smav			if (div >= singlemul && (div % singlemul) == 0)
656209371Smav				div++;
657209371Smav			stathz = base / div;
658209371Smav		}
659209371Smav		profhz = stathz;
660210290Smav		while ((profhz + stathz) <= 128 * 64)
661209371Smav			profhz += stathz;
662212541Smav		profhz = round_freq(timer, profhz);
663209371Smav	} else {
664212541Smav		hz = round_freq(timer, hz);
665212541Smav		stathz = round_freq(timer, 127);
666212541Smav		profhz = round_freq(timer, stathz * 64);
667209371Smav	}
668210298Smav	tick = 1000000 / hz;
669247777Sdavide	tick_sbt = SBT_1S / hz;
670247777Sdavide	tick_bt = sbttobt(tick_sbt);
671247777Sdavide	statperiod = SBT_1S / stathz;
672247777Sdavide	profperiod = SBT_1S / profhz;
673209371Smav	ET_LOCK();
674212541Smav	configtimer(1);
675209371Smav	ET_UNLOCK();
676209371Smav}
677209371Smav
678212541Smav/*
679212541Smav * Start per-CPU event timers on APs.
680212541Smav */
681209371Smavvoid
682209371Smavcpu_initclocks_ap(void)
683209371Smav{
684247777Sdavide	sbintime_t now;
685212541Smav	struct pcpu_state *state;
686247777Sdavide	struct thread *td;
687209371Smav
688214987Smav	state = DPCPU_PTR(timerstate);
689247777Sdavide	now = sbinuptime();
690214987Smav	ET_HW_LOCK(state);
691239036Smav	state->now = now;
692214987Smav	hardclock_sync(curcpu);
693247777Sdavide	spinlock_enter();
694214987Smav	ET_HW_UNLOCK(state);
695247777Sdavide	td = curthread;
696247777Sdavide	td->td_intr_nesting_level++;
697247777Sdavide	handleevents(state->now, 2);
698247777Sdavide	td->td_intr_nesting_level--;
699247777Sdavide	spinlock_exit();
700209371Smav}
701209371Smav
702212541Smav/*
703212541Smav * Switch to profiling clock rates.
704212541Smav */
705212541Smavvoid
706212541Smavcpu_startprofclock(void)
707209371Smav{
708209371Smav
709212541Smav	ET_LOCK();
710247329Smav	if (profiling == 0) {
711247329Smav		if (periodic) {
712247329Smav			configtimer(0);
713247329Smav			profiling = 1;
714247329Smav			configtimer(1);
715247329Smav		} else
716247329Smav			profiling = 1;
717212541Smav	} else
718247329Smav		profiling++;
719212541Smav	ET_UNLOCK();
720209371Smav}
721209371Smav
722212541Smav/*
723212541Smav * Switch to regular clock rates.
724212541Smav */
725209371Smavvoid
726212541Smavcpu_stopprofclock(void)
727209371Smav{
728209371Smav
729209371Smav	ET_LOCK();
730247329Smav	if (profiling == 1) {
731247329Smav		if (periodic) {
732247329Smav			configtimer(0);
733247329Smav			profiling = 0;
734247329Smav			configtimer(1);
735247329Smav		} else
736212541Smav		profiling = 0;
737212541Smav	} else
738247329Smav		profiling--;
739209371Smav	ET_UNLOCK();
740209371Smav}
741209371Smav
742212541Smav/*
743212541Smav * Switch to idle mode (all ticks handled).
744212541Smav */
745247454Sdavidesbintime_t
746212541Smavcpu_idleclock(void)
747209371Smav{
748247777Sdavide	sbintime_t now, t;
749212541Smav	struct pcpu_state *state;
750209371Smav
751212541Smav	if (idletick || busy ||
752212992Smav	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
753212992Smav#ifdef DEVICE_POLLING
754212992Smav	    || curcpu == CPU_FIRST()
755212992Smav#endif
756212992Smav	    )
757247454Sdavide		return (-1);
758212541Smav	state = DPCPU_PTR(timerstate);
759212541Smav	if (periodic)
760212541Smav		now = state->now;
761212541Smav	else
762247777Sdavide		now = sbinuptime();
763247777Sdavide	CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
764247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
765247777Sdavide	t = getnextcpuevent(1);
766212541Smav	ET_HW_LOCK(state);
767212541Smav	state->idle = 1;
768212541Smav	state->nextevent = t;
769212541Smav	if (!periodic)
770247777Sdavide		loadtimer(now, 0);
771212541Smav	ET_HW_UNLOCK(state);
772247777Sdavide	return (MAX(t - now, 0));
773209371Smav}
774209371Smav
775212541Smav/*
776212541Smav * Switch to active mode (skip empty ticks).
777212541Smav */
778212541Smavvoid
779212541Smavcpu_activeclock(void)
780212541Smav{
781247777Sdavide	sbintime_t now;
782212541Smav	struct pcpu_state *state;
783212541Smav	struct thread *td;
784212541Smav
785212541Smav	state = DPCPU_PTR(timerstate);
786212541Smav	if (state->idle == 0 || busy)
787212541Smav		return;
788212541Smav	if (periodic)
789212541Smav		now = state->now;
790212541Smav	else
791247777Sdavide		now = sbinuptime();
792247777Sdavide	CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
793247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
794212541Smav	spinlock_enter();
795212541Smav	td = curthread;
796212541Smav	td->td_intr_nesting_level++;
797247777Sdavide	handleevents(now, 1);
798212541Smav	td->td_intr_nesting_level--;
799212541Smav	spinlock_exit();
800212541Smav}
801212541Smav
802221990Savg#ifdef KDTRACE_HOOKS
803221990Savgvoid
804247777Sdavideclocksource_cyc_set(const struct bintime *bt)
805221990Savg{
806247777Sdavide	sbintime_t now, t;
807221990Savg	struct pcpu_state *state;
808221990Savg
809247777Sdavide	/* Do not touch anything if somebody reconfiguring timers. */
810247777Sdavide	if (busy)
811247777Sdavide		return;
812247777Sdavide	t = bttosbt(*bt);
813221990Savg	state = DPCPU_PTR(timerstate);
814221990Savg	if (periodic)
815221990Savg		now = state->now;
816221990Savg	else
817247777Sdavide		now = sbinuptime();
818221990Savg
819247777Sdavide	CTR5(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x  t  %d.%08x",
820247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
821247777Sdavide	    (int)(t >> 32), (u_int)(t & 0xffffffff));
822221990Savg
823221990Savg	ET_HW_LOCK(state);
824247777Sdavide	if (t == state->nextcyc)
825247777Sdavide		goto done;
826247777Sdavide	state->nextcyc = t;
827247777Sdavide	if (t >= state->nextevent)
828247777Sdavide		goto done;
829247777Sdavide	state->nextevent = t;
830221990Savg	if (!periodic)
831247777Sdavide		loadtimer(now, 0);
832247777Sdavidedone:
833221990Savg	ET_HW_UNLOCK(state);
834221990Savg}
835221990Savg#endif
836221990Savg
837247777Sdavidevoid
838247777Sdavidecpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
839212541Smav{
840212541Smav	struct pcpu_state *state;
841212541Smav
842247777Sdavide	/* Do not touch anything if somebody reconfiguring timers. */
843247777Sdavide	if (busy)
844247777Sdavide		return;
845247777Sdavide	CTR6(KTR_SPARE2, "new co at %d:    on %d at %d.%08x - %d.%08x",
846247777Sdavide	    curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
847247777Sdavide	    (int)(bt >> 32), (u_int)(bt & 0xffffffff));
848212541Smav	state = DPCPU_ID_PTR(cpu, timerstate);
849212541Smav	ET_HW_LOCK(state);
850247777Sdavide
851247777Sdavide	/*
852247777Sdavide	 * If there is callout time already set earlier -- do nothing.
853247777Sdavide	 * This check may appear redundant because we check already in
854247777Sdavide	 * callout_process() but this double check guarantees we're safe
855247777Sdavide	 * with respect to race conditions between interrupts execution
856247777Sdavide	 * and scheduling.
857247777Sdavide	 */
858247777Sdavide	state->nextcallopt = bt_opt;
859247777Sdavide	if (bt >= state->nextcall)
860247777Sdavide		goto done;
861247777Sdavide	state->nextcall = bt;
862247777Sdavide	/* If there is some other event set earlier -- do nothing. */
863247777Sdavide	if (bt >= state->nextevent)
864247777Sdavide		goto done;
865247777Sdavide	state->nextevent = bt;
866247777Sdavide	/* If timer is periodic -- there is nothing to reprogram. */
867247777Sdavide	if (periodic)
868247777Sdavide		goto done;
869247777Sdavide	/* If timer is global or of the current CPU -- reprogram it. */
870247777Sdavide	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
871247777Sdavide		loadtimer(sbinuptime(), 0);
872247777Sdavidedone:
873212541Smav		ET_HW_UNLOCK(state);
874212541Smav		return;
875212541Smav	}
876247777Sdavide	/* Otherwise make other CPU to reprogram it. */
877247777Sdavide	state->handle = 1;
878212541Smav	ET_HW_UNLOCK(state);
879247777Sdavide#ifdef SMP
880247777Sdavide	ipi_cpu(cpu, IPI_HARDCLOCK);
881247777Sdavide#endif
882212541Smav}
883212541Smav
884212541Smav/*
885212541Smav * Report or change the active event timers hardware.
886212541Smav */
887209371Smavstatic int
888212541Smavsysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
889209371Smav{
890209371Smav	char buf[32];
891209371Smav	struct eventtimer *et;
892209371Smav	int error;
893209371Smav
894209371Smav	ET_LOCK();
895212541Smav	et = timer;
896209371Smav	snprintf(buf, sizeof(buf), "%s", et->et_name);
897209371Smav	ET_UNLOCK();
898209371Smav	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
899209371Smav	ET_LOCK();
900212541Smav	et = timer;
901209371Smav	if (error != 0 || req->newptr == NULL ||
902212541Smav	    strcasecmp(buf, et->et_name) == 0) {
903209371Smav		ET_UNLOCK();
904209371Smav		return (error);
905209371Smav	}
906212541Smav	et = et_find(buf, 0, 0);
907209371Smav	if (et == NULL) {
908209371Smav		ET_UNLOCK();
909209371Smav		return (ENOENT);
910209371Smav	}
911209371Smav	configtimer(0);
912212541Smav	et_free(timer);
913212541Smav	if (et->et_flags & ET_FLAGS_C3STOP)
914212541Smav		cpu_disable_deep_sleep++;
915212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
916212541Smav		cpu_disable_deep_sleep--;
917212967Smav	periodic = want_periodic;
918212541Smav	timer = et;
919212541Smav	et_init(timer, timercb, NULL, NULL);
920212541Smav	configtimer(1);
921209371Smav	ET_UNLOCK();
922209371Smav	return (error);
923209371Smav}
924212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
925209371Smav    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
926212600Smav    0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
927209371Smav
928212541Smav/*
929212541Smav * Report or change the active event timer periodicity.
930212541Smav */
931209371Smavstatic int
932212541Smavsysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
933209371Smav{
934212541Smav	int error, val;
935209371Smav
936212541Smav	val = periodic;
937212541Smav	error = sysctl_handle_int(oidp, &val, 0, req);
938212541Smav	if (error != 0 || req->newptr == NULL)
939212541Smav		return (error);
940209371Smav	ET_LOCK();
941212541Smav	configtimer(0);
942212967Smav	periodic = want_periodic = val;
943212541Smav	configtimer(1);
944209371Smav	ET_UNLOCK();
945209371Smav	return (error);
946209371Smav}
947212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
948212541Smav    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
949212600Smav    0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
950