1209371Smav/*-
2247777Sdavide * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
3209371Smav * All rights reserved.
4209371Smav *
5209371Smav * Redistribution and use in source and binary forms, with or without
6209371Smav * modification, are permitted provided that the following conditions
7209371Smav * are met:
8209371Smav * 1. Redistributions of source code must retain the above copyright
9209371Smav *    notice, this list of conditions and the following disclaimer,
10209371Smav *    without modification, immediately at the beginning of the file.
11209371Smav * 2. Redistributions in binary form must reproduce the above copyright
12209371Smav *    notice, this list of conditions and the following disclaimer in the
13209371Smav *    documentation and/or other materials provided with the distribution.
14209371Smav *
15209371Smav * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16209371Smav * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17209371Smav * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18209371Smav * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19209371Smav * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20209371Smav * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21209371Smav * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22209371Smav * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23209371Smav * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24209371Smav * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25209371Smav */
26209371Smav
27209371Smav#include <sys/cdefs.h>
28209371Smav__FBSDID("$FreeBSD: stable/11/sys/kern/kern_clocksource.c 360500 2020-04-30 17:51:26Z mav $");
29209371Smav
30209371Smav/*
31209371Smav * Common routines to manage event timers hardware.
32209371Smav */
33209371Smav
34212992Smav#include "opt_device_polling.h"
35209371Smav
36209371Smav#include <sys/param.h>
37209371Smav#include <sys/systm.h>
38209371Smav#include <sys/bus.h>
39247777Sdavide#include <sys/limits.h>
40209371Smav#include <sys/lock.h>
41209371Smav#include <sys/kdb.h>
42212541Smav#include <sys/ktr.h>
43209371Smav#include <sys/mutex.h>
44209371Smav#include <sys/proc.h>
45209371Smav#include <sys/kernel.h>
46209371Smav#include <sys/sched.h>
47209371Smav#include <sys/smp.h>
48209371Smav#include <sys/sysctl.h>
49209371Smav#include <sys/timeet.h>
50212603Smav#include <sys/timetc.h>
51209371Smav
52209371Smav#include <machine/atomic.h>
53209371Smav#include <machine/clock.h>
54209371Smav#include <machine/cpu.h>
55209371Smav#include <machine/smp.h>
56209371Smav
57276724Sjhbint			cpu_disable_c2_sleep = 0; /* Timer dies in C2. */
58276724Sjhbint			cpu_disable_c3_sleep = 0; /* Timer dies in C3. */
59209371Smav
60212541Smavstatic void		setuptimer(void);
61247777Sdavidestatic void		loadtimer(sbintime_t now, int first);
62212541Smavstatic int		doconfigtimer(void);
63212541Smavstatic void		configtimer(int start);
64212541Smavstatic int		round_freq(struct eventtimer *et, int freq);
65209371Smav
66247777Sdavidestatic sbintime_t	getnextcpuevent(int idle);
67247777Sdavidestatic sbintime_t	getnextevent(void);
68247777Sdavidestatic int		handleevents(sbintime_t now, int fake);
69209371Smav
70212541Smavstatic struct mtx	et_hw_mtx;
71212541Smav
72212541Smav#define	ET_HW_LOCK(state)						\
73212541Smav	{								\
74212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
75212541Smav			mtx_lock_spin(&(state)->et_hw_mtx);		\
76212541Smav		else							\
77212541Smav			mtx_lock_spin(&et_hw_mtx);			\
78212541Smav	}
79212541Smav
80212541Smav#define	ET_HW_UNLOCK(state)						\
81212541Smav	{								\
82212541Smav		if (timer->et_flags & ET_FLAGS_PERCPU)			\
83212541Smav			mtx_unlock_spin(&(state)->et_hw_mtx);		\
84212541Smav		else							\
85212541Smav			mtx_unlock_spin(&et_hw_mtx);			\
86212541Smav	}
87212541Smav
88212541Smavstatic struct eventtimer *timer = NULL;
89247777Sdavidestatic sbintime_t	timerperiod;	/* Timer period for periodic mode. */
90247777Sdavidestatic sbintime_t	statperiod;	/* statclock() events period. */
91247777Sdavidestatic sbintime_t	profperiod;	/* profclock() events period. */
92247777Sdavidestatic sbintime_t	nexttick;	/* Next global timer tick time. */
93247777Sdavidestatic u_int		busy = 1;	/* Reconfiguration is in progress. */
94267992Shselaskystatic int		profiling;	/* Profiling events enabled. */
95212541Smav
96212541Smavstatic char		timername[32];	/* Wanted timer. */
97212541SmavTUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
98212541Smav
99267992Shselaskystatic int		singlemul;	/* Multiplier for periodic mode. */
100267992ShselaskySYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RWTUN, &singlemul,
101212541Smav    0, "Multiplier for periodic mode");
102209371Smav
103267992Shselaskystatic u_int		idletick;	/* Run periodic events when idle. */
104267992ShselaskySYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RWTUN, &idletick,
105212541Smav    0, "Run periodic events when idle");
106209371Smav
107267992Shselaskystatic int		periodic;	/* Periodic or one-shot mode. */
108267992Shselaskystatic int		want_periodic;	/* What mode to prefer. */
109212967SmavTUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
110212541Smav
111212541Smavstruct pcpu_state {
112212541Smav	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
113212541Smav	u_int		action;		/* Reconfiguration requests. */
114212541Smav	u_int		handle;		/* Immediate handle resuests. */
115247777Sdavide	sbintime_t	now;		/* Last tick time. */
116247777Sdavide	sbintime_t	nextevent;	/* Next scheduled event on this CPU. */
117247777Sdavide	sbintime_t	nexttick;	/* Next timer tick time. */
118285714Smav	sbintime_t	nexthard;	/* Next hardclock() event. */
119247777Sdavide	sbintime_t	nextstat;	/* Next statclock() event. */
120247777Sdavide	sbintime_t	nextprof;	/* Next profclock() event. */
121247777Sdavide	sbintime_t	nextcall;	/* Next callout event. */
122247777Sdavide	sbintime_t	nextcallopt;	/* Next optional callout event. */
123212541Smav	int		ipi;		/* This CPU needs IPI. */
124212541Smav	int		idle;		/* This CPU is in idle mode. */
125212541Smav};
126212541Smav
127215701Sdimstatic DPCPU_DEFINE(struct pcpu_state, timerstate);
128247777SdavideDPCPU_DEFINE(sbintime_t, hardclocktime);
129212541Smav
130212541Smav/*
131212541Smav * Timer broadcast IPI handler.
132212541Smav */
133212541Smavint
134212541Smavhardclockintr(void)
135212541Smav{
136247777Sdavide	sbintime_t now;
137212541Smav	struct pcpu_state *state;
138212541Smav	int done;
139212541Smav
140212541Smav	if (doconfigtimer() || busy)
141212541Smav		return (FILTER_HANDLED);
142212541Smav	state = DPCPU_PTR(timerstate);
143212541Smav	now = state->now;
144247777Sdavide	CTR3(KTR_SPARE2, "ipi  at %d:    now  %d.%08x",
145247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
146247777Sdavide	done = handleevents(now, 0);
147212541Smav	return (done ? FILTER_HANDLED : FILTER_STRAY);
148212541Smav}
149212541Smav
150212541Smav/*
151212541Smav * Handle all events for specified time on this CPU
152212541Smav */
153209371Smavstatic int
154247777Sdavidehandleevents(sbintime_t now, int fake)
155209371Smav{
156247777Sdavide	sbintime_t t, *hct;
157212541Smav	struct trapframe *frame;
158212541Smav	struct pcpu_state *state;
159212541Smav	int usermode;
160212541Smav	int done, runs;
161209371Smav
162247777Sdavide	CTR3(KTR_SPARE2, "handle at %d:  now  %d.%08x",
163247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
164212541Smav	done = 0;
165212541Smav	if (fake) {
166212541Smav		frame = NULL;
167212541Smav		usermode = 0;
168212541Smav	} else {
169212541Smav		frame = curthread->td_intr_frame;
170212541Smav		usermode = TRAPF_USERMODE(frame);
171212541Smav	}
172221990Savg
173212541Smav	state = DPCPU_PTR(timerstate);
174221990Savg
175232783Smav	runs = 0;
176247777Sdavide	while (now >= state->nexthard) {
177247777Sdavide		state->nexthard += tick_sbt;
178212541Smav		runs++;
179212541Smav	}
180239005Smav	if (runs) {
181247777Sdavide		hct = DPCPU_PTR(hardclocktime);
182247777Sdavide		*hct = state->nexthard - tick_sbt;
183239005Smav		if (fake < 2) {
184239005Smav			hardclock_cnt(runs, usermode);
185239005Smav			done = 1;
186239005Smav		}
187212541Smav	}
188232783Smav	runs = 0;
189247777Sdavide	while (now >= state->nextstat) {
190247777Sdavide		state->nextstat += statperiod;
191232783Smav		runs++;
192232783Smav	}
193232783Smav	if (runs && fake < 2) {
194232783Smav		statclock_cnt(runs, usermode);
195212541Smav		done = 1;
196212541Smav	}
197212541Smav	if (profiling) {
198232783Smav		runs = 0;
199247777Sdavide		while (now >= state->nextprof) {
200247777Sdavide			state->nextprof += profperiod;
201232783Smav			runs++;
202232783Smav		}
203232783Smav		if (runs && !fake) {
204247777Sdavide			profclock_cnt(runs, usermode, TRAPF_PC(frame));
205212541Smav			done = 1;
206212541Smav		}
207212541Smav	} else
208212541Smav		state->nextprof = state->nextstat;
209315254Shselasky	if (now >= state->nextcallopt || now >= state->nextcall) {
210264388Sdavide		state->nextcall = state->nextcallopt = SBT_MAX;
211247777Sdavide		callout_process(now);
212247777Sdavide	}
213221990Savg
214247777Sdavide	t = getnextcpuevent(0);
215212541Smav	ET_HW_LOCK(state);
216212541Smav	if (!busy) {
217212541Smav		state->idle = 0;
218212541Smav		state->nextevent = t;
219259464Smav		loadtimer(now, (fake == 2) &&
220259464Smav		    (timer->et_flags & ET_FLAGS_PERCPU));
221212541Smav	}
222212541Smav	ET_HW_UNLOCK(state);
223212541Smav	return (done);
224209371Smav}
225209371Smav
226212541Smav/*
227212541Smav * Schedule binuptime of the next event on current CPU.
228212541Smav */
229247777Sdavidestatic sbintime_t
230247777Sdavidegetnextcpuevent(int idle)
231209371Smav{
232247777Sdavide	sbintime_t event;
233212541Smav	struct pcpu_state *state;
234247777Sdavide	u_int hardfreq;
235209371Smav
236212541Smav	state = DPCPU_PTR(timerstate);
237247777Sdavide	/* Handle hardclock() events, skipping some if CPU is idle. */
238247777Sdavide	event = state->nexthard;
239247777Sdavide	if (idle) {
240247777Sdavide		hardfreq = (u_int)hz / 2;
241247777Sdavide		if (tc_min_ticktock_freq > 2
242247777Sdavide#ifdef SMP
243247777Sdavide		    && curcpu == CPU_FIRST()
244247777Sdavide#endif
245247777Sdavide		    )
246247777Sdavide			hardfreq = hz / tc_min_ticktock_freq;
247247777Sdavide		if (hardfreq > 1)
248247777Sdavide			event += tick_sbt * (hardfreq - 1);
249232919Smav	}
250247777Sdavide	/* Handle callout events. */
251247777Sdavide	if (event > state->nextcall)
252247777Sdavide		event = state->nextcall;
253232919Smav	if (!idle) { /* If CPU is active - handle other types of events. */
254247777Sdavide		if (event > state->nextstat)
255247777Sdavide			event = state->nextstat;
256247777Sdavide		if (profiling && event > state->nextprof)
257247777Sdavide			event = state->nextprof;
258212541Smav	}
259247777Sdavide	return (event);
260209371Smav}
261209371Smav
262212541Smav/*
263212541Smav * Schedule binuptime of the next event on all CPUs.
264212541Smav */
265247777Sdavidestatic sbintime_t
266247777Sdavidegetnextevent(void)
267209371Smav{
268212541Smav	struct pcpu_state *state;
269247777Sdavide	sbintime_t event;
270212541Smav#ifdef SMP
271212541Smav	int	cpu;
272212541Smav#endif
273360500Smav#ifdef KTR
274247777Sdavide	int	c;
275209371Smav
276360500Smav	c = -1;
277360500Smav#endif
278212541Smav	state = DPCPU_PTR(timerstate);
279247777Sdavide	event = state->nextevent;
280247777Sdavide#ifdef SMP
281232919Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
282247777Sdavide		CPU_FOREACH(cpu) {
283247777Sdavide			state = DPCPU_ID_PTR(cpu, timerstate);
284247777Sdavide			if (event > state->nextevent) {
285247777Sdavide				event = state->nextevent;
286360500Smav#ifdef KTR
287247777Sdavide				c = cpu;
288360500Smav#endif
289212541Smav			}
290212541Smav		}
291247777Sdavide	}
292232919Smav#endif
293247777Sdavide	CTR4(KTR_SPARE2, "next at %d:    next %d.%08x by %d",
294247777Sdavide	    curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
295247777Sdavide	return (event);
296209371Smav}
297209371Smav
298212541Smav/* Hardware timer callback function. */
299212541Smavstatic void
300212541Smavtimercb(struct eventtimer *et, void *arg)
301209371Smav{
302247777Sdavide	sbintime_t now;
303247777Sdavide	sbintime_t *next;
304212541Smav	struct pcpu_state *state;
305212541Smav#ifdef SMP
306212541Smav	int cpu, bcast;
307212541Smav#endif
308209371Smav
309212541Smav	/* Do not touch anything if somebody reconfiguring timers. */
310212541Smav	if (busy)
311212541Smav		return;
312212541Smav	/* Update present and next tick times. */
313212541Smav	state = DPCPU_PTR(timerstate);
314212541Smav	if (et->et_flags & ET_FLAGS_PERCPU) {
315212541Smav		next = &state->nexttick;
316212541Smav	} else
317212541Smav		next = &nexttick;
318247777Sdavide	now = sbinuptime();
319247777Sdavide	if (periodic)
320247777Sdavide		*next = now + timerperiod;
321247777Sdavide	else
322247777Sdavide		*next = -1;	/* Next tick is not scheduled yet. */
323212541Smav	state->now = now;
324247777Sdavide	CTR3(KTR_SPARE2, "intr at %d:    now  %d.%08x",
325247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
326209371Smav
327209371Smav#ifdef SMP
328299746Sjhb#ifdef EARLY_AP_STARTUP
329299746Sjhb	MPASS(mp_ncpus == 1 || smp_started);
330299746Sjhb#endif
331212541Smav	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
332212541Smav	bcast = 0;
333299746Sjhb#ifdef EARLY_AP_STARTUP
334299746Sjhb	if ((et->et_flags & ET_FLAGS_PERCPU) == 0) {
335299746Sjhb#else
336212541Smav	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
337299746Sjhb#endif
338212541Smav		CPU_FOREACH(cpu) {
339212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
340212541Smav			ET_HW_LOCK(state);
341212541Smav			state->now = now;
342247777Sdavide			if (now >= state->nextevent) {
343247777Sdavide				state->nextevent += SBT_1S;
344212811Smav				if (curcpu != cpu) {
345212811Smav					state->ipi = 1;
346212811Smav					bcast = 1;
347212811Smav				}
348209371Smav			}
349212541Smav			ET_HW_UNLOCK(state);
350209371Smav		}
351209371Smav	}
352212541Smav#endif
353209371Smav
354212541Smav	/* Handle events for this time on this CPU. */
355247777Sdavide	handleevents(now, 0);
356209371Smav
357209371Smav#ifdef SMP
358212541Smav	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
359212541Smav	if (bcast) {
360212541Smav		CPU_FOREACH(cpu) {
361212541Smav			if (curcpu == cpu)
362212541Smav				continue;
363212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
364212541Smav			if (state->ipi) {
365212541Smav				state->ipi = 0;
366212541Smav				ipi_cpu(cpu, IPI_HARDCLOCK);
367209371Smav			}
368209371Smav		}
369209371Smav	}
370212541Smav#endif
371209371Smav}
372209371Smav
373209371Smav/*
374212541Smav * Load new value into hardware timer.
375209371Smav */
376209371Smavstatic void
377247777Sdavideloadtimer(sbintime_t now, int start)
378209371Smav{
379212541Smav	struct pcpu_state *state;
380247777Sdavide	sbintime_t new;
381247777Sdavide	sbintime_t *next;
382212541Smav	uint64_t tmp;
383212541Smav	int eq;
384209371Smav
385214987Smav	if (timer->et_flags & ET_FLAGS_PERCPU) {
386214987Smav		state = DPCPU_PTR(timerstate);
387214987Smav		next = &state->nexttick;
388214987Smav	} else
389214987Smav		next = &nexttick;
390212541Smav	if (periodic) {
391212541Smav		if (start) {
392212541Smav			/*
393212541Smav			 * Try to start all periodic timers aligned
394212541Smav			 * to period to make events synchronous.
395212541Smav			 */
396247777Sdavide			tmp = now % timerperiod;
397247777Sdavide			new = timerperiod - tmp;
398247777Sdavide			if (new < tmp)		/* Left less then passed. */
399247777Sdavide				new += timerperiod;
400212541Smav			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
401247777Sdavide			    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
402247777Sdavide			    (int)(new >> 32), (u_int)(new & 0xffffffff));
403247777Sdavide			*next = new + now;
404247777Sdavide			et_start(timer, new, timerperiod);
405209371Smav		}
406212541Smav	} else {
407247777Sdavide		new = getnextevent();
408247777Sdavide		eq = (new == *next);
409247777Sdavide		CTR4(KTR_SPARE2, "load at %d:    next %d.%08x eq %d",
410247777Sdavide		    curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
411212541Smav		if (!eq) {
412212541Smav			*next = new;
413247777Sdavide			et_start(timer, new - now, 0);
414212541Smav		}
415209371Smav	}
416209371Smav}
417209371Smav
418209371Smav/*
419212541Smav * Prepare event timer parameters after configuration changes.
420212541Smav */
421212541Smavstatic void
422212541Smavsetuptimer(void)
423212541Smav{
424212541Smav	int freq;
425212541Smav
426212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
427212541Smav		periodic = 0;
428212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
429212541Smav		periodic = 1;
430212600Smav	singlemul = MIN(MAX(singlemul, 1), 20);
431212541Smav	freq = hz * singlemul;
432212541Smav	while (freq < (profiling ? profhz : stathz))
433212541Smav		freq += hz;
434212541Smav	freq = round_freq(timer, freq);
435247777Sdavide	timerperiod = SBT_1S / freq;
436212541Smav}
437212541Smav
438212541Smav/*
439209371Smav * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
440209371Smav */
441212541Smavstatic int
442212541Smavdoconfigtimer(void)
443209371Smav{
444247777Sdavide	sbintime_t now;
445212541Smav	struct pcpu_state *state;
446209371Smav
447212541Smav	state = DPCPU_PTR(timerstate);
448212541Smav	switch (atomic_load_acq_int(&state->action)) {
449212541Smav	case 1:
450247777Sdavide		now = sbinuptime();
451212541Smav		ET_HW_LOCK(state);
452247777Sdavide		loadtimer(now, 1);
453212541Smav		ET_HW_UNLOCK(state);
454212541Smav		state->handle = 0;
455212541Smav		atomic_store_rel_int(&state->action, 0);
456209371Smav		return (1);
457212541Smav	case 2:
458212541Smav		ET_HW_LOCK(state);
459212541Smav		et_stop(timer);
460212541Smav		ET_HW_UNLOCK(state);
461212541Smav		state->handle = 0;
462212541Smav		atomic_store_rel_int(&state->action, 0);
463212541Smav		return (1);
464209371Smav	}
465212541Smav	if (atomic_readandclear_int(&state->handle) && !busy) {
466247777Sdavide		now = sbinuptime();
467247777Sdavide		handleevents(now, 0);
468212541Smav		return (1);
469212541Smav	}
470209371Smav	return (0);
471209371Smav}
472209371Smav
473209371Smav/*
474209371Smav * Reconfigure specified timer.
475209371Smav * For per-CPU timers use IPI to make other CPUs to reconfigure.
476209371Smav */
477209371Smavstatic void
478212541Smavconfigtimer(int start)
479209371Smav{
480247777Sdavide	sbintime_t now, next;
481212541Smav	struct pcpu_state *state;
482209371Smav	int cpu;
483209371Smav
484212541Smav	if (start) {
485212541Smav		setuptimer();
486247777Sdavide		now = sbinuptime();
487247777Sdavide	} else
488247777Sdavide		now = 0;
489209371Smav	critical_enter();
490212541Smav	ET_HW_LOCK(DPCPU_PTR(timerstate));
491212541Smav	if (start) {
492212541Smav		/* Initialize time machine parameters. */
493247777Sdavide		next = now + timerperiod;
494212541Smav		if (periodic)
495212541Smav			nexttick = next;
496212541Smav		else
497247777Sdavide			nexttick = -1;
498299746Sjhb#ifdef EARLY_AP_STARTUP
499299746Sjhb		MPASS(mp_ncpus == 1 || smp_started);
500299746Sjhb#endif
501212541Smav		CPU_FOREACH(cpu) {
502212541Smav			state = DPCPU_ID_PTR(cpu, timerstate);
503212541Smav			state->now = now;
504299746Sjhb#ifndef EARLY_AP_STARTUP
505247777Sdavide			if (!smp_started && cpu != CPU_FIRST())
506264388Sdavide				state->nextevent = SBT_MAX;
507247777Sdavide			else
508299746Sjhb#endif
509247777Sdavide				state->nextevent = next;
510212541Smav			if (periodic)
511212541Smav				state->nexttick = next;
512212541Smav			else
513247777Sdavide				state->nexttick = -1;
514212541Smav			state->nexthard = next;
515212541Smav			state->nextstat = next;
516212541Smav			state->nextprof = next;
517247777Sdavide			state->nextcall = next;
518247777Sdavide			state->nextcallopt = next;
519212541Smav			hardclock_sync(cpu);
520212541Smav		}
521212541Smav		busy = 0;
522212541Smav		/* Start global timer or per-CPU timer of this CPU. */
523247777Sdavide		loadtimer(now, 1);
524212541Smav	} else {
525212541Smav		busy = 1;
526212541Smav		/* Stop global timer or per-CPU timer of this CPU. */
527212541Smav		et_stop(timer);
528212541Smav	}
529212541Smav	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
530209371Smav#ifdef SMP
531299746Sjhb#ifdef EARLY_AP_STARTUP
532299746Sjhb	/* If timer is global we are done. */
533299746Sjhb	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
534299746Sjhb#else
535212541Smav	/* If timer is global or there is no other CPUs yet - we are done. */
536212541Smav	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
537299746Sjhb#endif
538209371Smav		critical_exit();
539209371Smav		return;
540209371Smav	}
541209371Smav	/* Set reconfigure flags for other CPUs. */
542209371Smav	CPU_FOREACH(cpu) {
543212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
544212541Smav		atomic_store_rel_int(&state->action,
545212541Smav		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
546209371Smav	}
547212541Smav	/* Broadcast reconfigure IPI. */
548212541Smav	ipi_all_but_self(IPI_HARDCLOCK);
549209371Smav	/* Wait for reconfiguration completed. */
550209371Smavrestart:
551209371Smav	cpu_spinwait();
552209371Smav	CPU_FOREACH(cpu) {
553209371Smav		if (cpu == curcpu)
554209371Smav			continue;
555212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
556212541Smav		if (atomic_load_acq_int(&state->action))
557209371Smav			goto restart;
558209371Smav	}
559212541Smav#endif
560209371Smav	critical_exit();
561209371Smav}
562209371Smav
563212541Smav/*
564212541Smav * Calculate nearest frequency supported by hardware timer.
565212541Smav */
566210290Smavstatic int
567210290Smavround_freq(struct eventtimer *et, int freq)
568210290Smav{
569210290Smav	uint64_t div;
570210290Smav
571210290Smav	if (et->et_frequency != 0) {
572210298Smav		div = lmax((et->et_frequency + freq / 2) / freq, 1);
573210290Smav		if (et->et_flags & ET_FLAGS_POW2DIV)
574210290Smav			div = 1 << (flsl(div + div / 2) - 1);
575210290Smav		freq = (et->et_frequency + div / 2) / div;
576210290Smav	}
577247463Smav	if (et->et_min_period > SBT_1S)
578241413Smav		panic("Event timer \"%s\" doesn't support sub-second periods!",
579241413Smav		    et->et_name);
580247463Smav	else if (et->et_min_period != 0)
581247463Smav		freq = min(freq, SBT2FREQ(et->et_min_period));
582247463Smav	if (et->et_max_period < SBT_1S && et->et_max_period != 0)
583247463Smav		freq = max(freq, SBT2FREQ(et->et_max_period));
584210290Smav	return (freq);
585210290Smav}
586210290Smav
587209371Smav/*
588212541Smav * Configure and start event timers (BSP part).
589209371Smav */
590209371Smavvoid
591209371Smavcpu_initclocks_bsp(void)
592209371Smav{
593212541Smav	struct pcpu_state *state;
594212541Smav	int base, div, cpu;
595209371Smav
596212541Smav	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
597212541Smav	CPU_FOREACH(cpu) {
598212541Smav		state = DPCPU_ID_PTR(cpu, timerstate);
599212541Smav		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
600264388Sdavide		state->nextcall = SBT_MAX;
601264388Sdavide		state->nextcallopt = SBT_MAX;
602212541Smav	}
603212967Smav	periodic = want_periodic;
604212541Smav	/* Grab requested timer or the best of present. */
605212541Smav	if (timername[0])
606212541Smav		timer = et_find(timername, 0, 0);
607212541Smav	if (timer == NULL && periodic) {
608212541Smav		timer = et_find(NULL,
609212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
610212541Smav	}
611212541Smav	if (timer == NULL) {
612212541Smav		timer = et_find(NULL,
613212541Smav		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
614212541Smav	}
615212541Smav	if (timer == NULL && !periodic) {
616212541Smav		timer = et_find(NULL,
617212541Smav		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
618212541Smav	}
619212541Smav	if (timer == NULL)
620209901Smav		panic("No usable event timer found!");
621212541Smav	et_init(timer, timercb, NULL, NULL);
622212541Smav
623212541Smav	/* Adapt to timer capabilities. */
624212541Smav	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
625212541Smav		periodic = 0;
626212541Smav	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
627212541Smav		periodic = 1;
628212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
629276724Sjhb		cpu_disable_c3_sleep++;
630212541Smav
631209371Smav	/*
632209371Smav	 * We honor the requested 'hz' value.
633209371Smav	 * We want to run stathz in the neighborhood of 128hz.
634209371Smav	 * We would like profhz to run as often as possible.
635209371Smav	 */
636212600Smav	if (singlemul <= 0 || singlemul > 20) {
637209371Smav		if (hz >= 1500 || (hz % 128) == 0)
638209371Smav			singlemul = 1;
639209371Smav		else if (hz >= 750)
640209371Smav			singlemul = 2;
641209371Smav		else
642209371Smav			singlemul = 4;
643209371Smav	}
644212541Smav	if (periodic) {
645212541Smav		base = round_freq(timer, hz * singlemul);
646210290Smav		singlemul = max((base + hz / 2) / hz, 1);
647210290Smav		hz = (base + singlemul / 2) / singlemul;
648210290Smav		if (base <= 128)
649209371Smav			stathz = base;
650209371Smav		else {
651209371Smav			div = base / 128;
652210290Smav			if (div >= singlemul && (div % singlemul) == 0)
653209371Smav				div++;
654209371Smav			stathz = base / div;
655209371Smav		}
656209371Smav		profhz = stathz;
657210290Smav		while ((profhz + stathz) <= 128 * 64)
658209371Smav			profhz += stathz;
659212541Smav		profhz = round_freq(timer, profhz);
660209371Smav	} else {
661212541Smav		hz = round_freq(timer, hz);
662212541Smav		stathz = round_freq(timer, 127);
663212541Smav		profhz = round_freq(timer, stathz * 64);
664209371Smav	}
665210298Smav	tick = 1000000 / hz;
666247777Sdavide	tick_sbt = SBT_1S / hz;
667247777Sdavide	tick_bt = sbttobt(tick_sbt);
668247777Sdavide	statperiod = SBT_1S / stathz;
669247777Sdavide	profperiod = SBT_1S / profhz;
670209371Smav	ET_LOCK();
671212541Smav	configtimer(1);
672209371Smav	ET_UNLOCK();
673209371Smav}
674209371Smav
675212541Smav/*
676212541Smav * Start per-CPU event timers on APs.
677212541Smav */
678209371Smavvoid
679209371Smavcpu_initclocks_ap(void)
680209371Smav{
681247777Sdavide	sbintime_t now;
682212541Smav	struct pcpu_state *state;
683247777Sdavide	struct thread *td;
684209371Smav
685214987Smav	state = DPCPU_PTR(timerstate);
686247777Sdavide	now = sbinuptime();
687214987Smav	ET_HW_LOCK(state);
688239036Smav	state->now = now;
689214987Smav	hardclock_sync(curcpu);
690247777Sdavide	spinlock_enter();
691214987Smav	ET_HW_UNLOCK(state);
692247777Sdavide	td = curthread;
693247777Sdavide	td->td_intr_nesting_level++;
694247777Sdavide	handleevents(state->now, 2);
695247777Sdavide	td->td_intr_nesting_level--;
696247777Sdavide	spinlock_exit();
697209371Smav}
698209371Smav
699335656Savgvoid
700335656Savgsuspendclock(void)
701335656Savg{
702335656Savg	ET_LOCK();
703335656Savg	configtimer(0);
704335656Savg	ET_UNLOCK();
705335656Savg}
706335656Savg
707335656Savgvoid
708335656Savgresumeclock(void)
709335656Savg{
710335656Savg	ET_LOCK();
711335656Savg	configtimer(1);
712335656Savg	ET_UNLOCK();
713335656Savg}
714335656Savg
715212541Smav/*
716212541Smav * Switch to profiling clock rates.
717212541Smav */
718212541Smavvoid
719212541Smavcpu_startprofclock(void)
720209371Smav{
721209371Smav
722212541Smav	ET_LOCK();
723247329Smav	if (profiling == 0) {
724247329Smav		if (periodic) {
725247329Smav			configtimer(0);
726247329Smav			profiling = 1;
727247329Smav			configtimer(1);
728247329Smav		} else
729247329Smav			profiling = 1;
730212541Smav	} else
731247329Smav		profiling++;
732212541Smav	ET_UNLOCK();
733209371Smav}
734209371Smav
735212541Smav/*
736212541Smav * Switch to regular clock rates.
737212541Smav */
738209371Smavvoid
739212541Smavcpu_stopprofclock(void)
740209371Smav{
741209371Smav
742209371Smav	ET_LOCK();
743247329Smav	if (profiling == 1) {
744247329Smav		if (periodic) {
745247329Smav			configtimer(0);
746247329Smav			profiling = 0;
747247329Smav			configtimer(1);
748247329Smav		} else
749212541Smav		profiling = 0;
750212541Smav	} else
751247329Smav		profiling--;
752209371Smav	ET_UNLOCK();
753209371Smav}
754209371Smav
755212541Smav/*
756212541Smav * Switch to idle mode (all ticks handled).
757212541Smav */
758247454Sdavidesbintime_t
759212541Smavcpu_idleclock(void)
760209371Smav{
761247777Sdavide	sbintime_t now, t;
762212541Smav	struct pcpu_state *state;
763209371Smav
764212541Smav	if (idletick || busy ||
765212992Smav	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
766212992Smav#ifdef DEVICE_POLLING
767212992Smav	    || curcpu == CPU_FIRST()
768212992Smav#endif
769212992Smav	    )
770247454Sdavide		return (-1);
771212541Smav	state = DPCPU_PTR(timerstate);
772212541Smav	if (periodic)
773212541Smav		now = state->now;
774212541Smav	else
775247777Sdavide		now = sbinuptime();
776247777Sdavide	CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
777247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
778247777Sdavide	t = getnextcpuevent(1);
779212541Smav	ET_HW_LOCK(state);
780212541Smav	state->idle = 1;
781212541Smav	state->nextevent = t;
782212541Smav	if (!periodic)
783247777Sdavide		loadtimer(now, 0);
784212541Smav	ET_HW_UNLOCK(state);
785247777Sdavide	return (MAX(t - now, 0));
786209371Smav}
787209371Smav
788212541Smav/*
789212541Smav * Switch to active mode (skip empty ticks).
790212541Smav */
791212541Smavvoid
792212541Smavcpu_activeclock(void)
793212541Smav{
794247777Sdavide	sbintime_t now;
795212541Smav	struct pcpu_state *state;
796212541Smav	struct thread *td;
797212541Smav
798212541Smav	state = DPCPU_PTR(timerstate);
799212541Smav	if (state->idle == 0 || busy)
800212541Smav		return;
801212541Smav	if (periodic)
802212541Smav		now = state->now;
803212541Smav	else
804247777Sdavide		now = sbinuptime();
805247777Sdavide	CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
806247777Sdavide	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
807212541Smav	spinlock_enter();
808212541Smav	td = curthread;
809212541Smav	td->td_intr_nesting_level++;
810247777Sdavide	handleevents(now, 1);
811212541Smav	td->td_intr_nesting_level--;
812212541Smav	spinlock_exit();
813212541Smav}
814212541Smav
815264041Sian/*
816264041Sian * Change the frequency of the given timer.  This changes et->et_frequency and
817264041Sian * if et is the active timer it reconfigures the timer on all CPUs.  This is
818264041Sian * intended to be a private interface for the use of et_change_frequency() only.
819264041Sian */
820264041Sianvoid
821264041Siancpu_et_frequency(struct eventtimer *et, uint64_t newfreq)
822264041Sian{
823264041Sian
824264041Sian	ET_LOCK();
825264041Sian	if (et == timer) {
826264041Sian		configtimer(0);
827264041Sian		et->et_frequency = newfreq;
828264041Sian		configtimer(1);
829264041Sian	} else
830264041Sian		et->et_frequency = newfreq;
831264041Sian	ET_UNLOCK();
832264041Sian}
833264041Sian
834221990Savgvoid
835247777Sdavidecpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
836212541Smav{
837212541Smav	struct pcpu_state *state;
838212541Smav
839247777Sdavide	/* Do not touch anything if somebody reconfiguring timers. */
840247777Sdavide	if (busy)
841247777Sdavide		return;
842247777Sdavide	CTR6(KTR_SPARE2, "new co at %d:    on %d at %d.%08x - %d.%08x",
843247777Sdavide	    curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
844247777Sdavide	    (int)(bt >> 32), (u_int)(bt & 0xffffffff));
845212541Smav	state = DPCPU_ID_PTR(cpu, timerstate);
846212541Smav	ET_HW_LOCK(state);
847247777Sdavide
848247777Sdavide	/*
849247777Sdavide	 * If there is callout time already set earlier -- do nothing.
850247777Sdavide	 * This check may appear redundant because we check already in
851247777Sdavide	 * callout_process() but this double check guarantees we're safe
852247777Sdavide	 * with respect to race conditions between interrupts execution
853247777Sdavide	 * and scheduling.
854247777Sdavide	 */
855247777Sdavide	state->nextcallopt = bt_opt;
856247777Sdavide	if (bt >= state->nextcall)
857247777Sdavide		goto done;
858247777Sdavide	state->nextcall = bt;
859247777Sdavide	/* If there is some other event set earlier -- do nothing. */
860247777Sdavide	if (bt >= state->nextevent)
861247777Sdavide		goto done;
862247777Sdavide	state->nextevent = bt;
863247777Sdavide	/* If timer is periodic -- there is nothing to reprogram. */
864247777Sdavide	if (periodic)
865247777Sdavide		goto done;
866247777Sdavide	/* If timer is global or of the current CPU -- reprogram it. */
867247777Sdavide	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
868247777Sdavide		loadtimer(sbinuptime(), 0);
869247777Sdavidedone:
870212541Smav		ET_HW_UNLOCK(state);
871212541Smav		return;
872212541Smav	}
873247777Sdavide	/* Otherwise make other CPU to reprogram it. */
874247777Sdavide	state->handle = 1;
875212541Smav	ET_HW_UNLOCK(state);
876247777Sdavide#ifdef SMP
877247777Sdavide	ipi_cpu(cpu, IPI_HARDCLOCK);
878247777Sdavide#endif
879212541Smav}
880212541Smav
881212541Smav/*
882212541Smav * Report or change the active event timers hardware.
883212541Smav */
884209371Smavstatic int
885212541Smavsysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
886209371Smav{
887209371Smav	char buf[32];
888209371Smav	struct eventtimer *et;
889209371Smav	int error;
890209371Smav
891209371Smav	ET_LOCK();
892212541Smav	et = timer;
893209371Smav	snprintf(buf, sizeof(buf), "%s", et->et_name);
894209371Smav	ET_UNLOCK();
895209371Smav	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
896209371Smav	ET_LOCK();
897212541Smav	et = timer;
898209371Smav	if (error != 0 || req->newptr == NULL ||
899212541Smav	    strcasecmp(buf, et->et_name) == 0) {
900209371Smav		ET_UNLOCK();
901209371Smav		return (error);
902209371Smav	}
903212541Smav	et = et_find(buf, 0, 0);
904209371Smav	if (et == NULL) {
905209371Smav		ET_UNLOCK();
906209371Smav		return (ENOENT);
907209371Smav	}
908209371Smav	configtimer(0);
909212541Smav	et_free(timer);
910212541Smav	if (et->et_flags & ET_FLAGS_C3STOP)
911276724Sjhb		cpu_disable_c3_sleep++;
912212541Smav	if (timer->et_flags & ET_FLAGS_C3STOP)
913276724Sjhb		cpu_disable_c3_sleep--;
914212967Smav	periodic = want_periodic;
915212541Smav	timer = et;
916212541Smav	et_init(timer, timercb, NULL, NULL);
917212541Smav	configtimer(1);
918209371Smav	ET_UNLOCK();
919209371Smav	return (error);
920209371Smav}
921212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
922209371Smav    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
923212600Smav    0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
924209371Smav
925212541Smav/*
926212541Smav * Report or change the active event timer periodicity.
927212541Smav */
928209371Smavstatic int
929212541Smavsysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
930209371Smav{
931212541Smav	int error, val;
932209371Smav
933212541Smav	val = periodic;
934212541Smav	error = sysctl_handle_int(oidp, &val, 0, req);
935212541Smav	if (error != 0 || req->newptr == NULL)
936212541Smav		return (error);
937209371Smav	ET_LOCK();
938212541Smav	configtimer(0);
939212967Smav	periodic = want_periodic = val;
940212541Smav	configtimer(1);
941209371Smav	ET_UNLOCK();
942209371Smav	return (error);
943209371Smav}
944212541SmavSYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
945212541Smav    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
946212600Smav    0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
947278209Skib
948278209Skib#include "opt_ddb.h"
949278209Skib
950278209Skib#ifdef DDB
951278209Skib#include <ddb/ddb.h>
952278209Skib
953278209SkibDB_SHOW_COMMAND(clocksource, db_show_clocksource)
954278209Skib{
955278209Skib	struct pcpu_state *st;
956278209Skib	int c;
957278209Skib
958278209Skib	CPU_FOREACH(c) {
959278209Skib		st = DPCPU_ID_PTR(c, timerstate);
960278209Skib		db_printf(
961278209Skib		    "CPU %2d: action %d handle %d  ipi %d idle %d\n"
962278209Skib		    "        now %#jx nevent %#jx (%jd)\n"
963278209Skib		    "        ntick %#jx (%jd) nhard %#jx (%jd)\n"
964278209Skib		    "        nstat %#jx (%jd) nprof %#jx (%jd)\n"
965278209Skib		    "        ncall %#jx (%jd) ncallopt %#jx (%jd)\n",
966278209Skib		    c, st->action, st->handle, st->ipi, st->idle,
967278209Skib		    (uintmax_t)st->now,
968278209Skib		    (uintmax_t)st->nextevent,
969278209Skib		    (uintmax_t)(st->nextevent - st->now) / tick_sbt,
970278209Skib		    (uintmax_t)st->nexttick,
971278209Skib		    (uintmax_t)(st->nexttick - st->now) / tick_sbt,
972278209Skib		    (uintmax_t)st->nexthard,
973278209Skib		    (uintmax_t)(st->nexthard - st->now) / tick_sbt,
974278209Skib		    (uintmax_t)st->nextstat,
975278209Skib		    (uintmax_t)(st->nextstat - st->now) / tick_sbt,
976278209Skib		    (uintmax_t)st->nextprof,
977278209Skib		    (uintmax_t)(st->nextprof - st->now) / tick_sbt,
978278209Skib		    (uintmax_t)st->nextcall,
979278209Skib		    (uintmax_t)(st->nextcall - st->now) / tick_sbt,
980278209Skib		    (uintmax_t)st->nextcallopt,
981278209Skib		    (uintmax_t)(st->nextcallopt - st->now) / tick_sbt);
982278209Skib	}
983278209Skib}
984278209Skib
985278209Skib#endif
986