1/*-
2 * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    without modification, immediately at the beginning of the file.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD$");
29
30/*
31 * Common routines to manage event timers hardware.
32 */
33
34#include "opt_device_polling.h"
35#include "opt_kdtrace.h"
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/bus.h>
40#include <sys/limits.h>
41#include <sys/lock.h>
42#include <sys/kdb.h>
43#include <sys/ktr.h>
44#include <sys/mutex.h>
45#include <sys/proc.h>
46#include <sys/kernel.h>
47#include <sys/sched.h>
48#include <sys/smp.h>
49#include <sys/sysctl.h>
50#include <sys/timeet.h>
51#include <sys/timetc.h>
52
53#include <machine/atomic.h>
54#include <machine/clock.h>
55#include <machine/cpu.h>
56#include <machine/smp.h>
57
58#ifdef KDTRACE_HOOKS
59#include <sys/dtrace_bsd.h>
60cyclic_clock_func_t	cyclic_clock_func = NULL;
61#endif
62
63int			cpu_can_deep_sleep = 0;	/* C3 state is available. */
64int			cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
65
66static void		setuptimer(void);
67static void		loadtimer(sbintime_t now, int first);
68static int		doconfigtimer(void);
69static void		configtimer(int start);
70static int		round_freq(struct eventtimer *et, int freq);
71
72static sbintime_t	getnextcpuevent(int idle);
73static sbintime_t	getnextevent(void);
74static int		handleevents(sbintime_t now, int fake);
75
76static struct mtx	et_hw_mtx;
77
78#define	ET_HW_LOCK(state)						\
79	{								\
80		if (timer->et_flags & ET_FLAGS_PERCPU)			\
81			mtx_lock_spin(&(state)->et_hw_mtx);		\
82		else							\
83			mtx_lock_spin(&et_hw_mtx);			\
84	}
85
86#define	ET_HW_UNLOCK(state)						\
87	{								\
88		if (timer->et_flags & ET_FLAGS_PERCPU)			\
89			mtx_unlock_spin(&(state)->et_hw_mtx);		\
90		else							\
91			mtx_unlock_spin(&et_hw_mtx);			\
92	}
93
94static struct eventtimer *timer = NULL;
95static sbintime_t	timerperiod;	/* Timer period for periodic mode. */
96static sbintime_t	statperiod;	/* statclock() events period. */
97static sbintime_t	profperiod;	/* profclock() events period. */
98static sbintime_t	nexttick;	/* Next global timer tick time. */
99static u_int		busy = 1;	/* Reconfiguration is in progress. */
100static int		profiling = 0;	/* Profiling events enabled. */
101
102static char		timername[32];	/* Wanted timer. */
103TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
104
105static int		singlemul = 0;	/* Multiplier for periodic mode. */
106TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
107SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
108    0, "Multiplier for periodic mode");
109
110static u_int		idletick = 0;	/* Run periodic events when idle. */
111TUNABLE_INT("kern.eventtimer.idletick", &idletick);
112SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
113    0, "Run periodic events when idle");
114
115static int		periodic = 0;	/* Periodic or one-shot mode. */
116static int		want_periodic = 0; /* What mode to prefer. */
117TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
118
119struct pcpu_state {
120	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
121	u_int		action;		/* Reconfiguration requests. */
122	u_int		handle;		/* Immediate handle resuests. */
123	sbintime_t	now;		/* Last tick time. */
124	sbintime_t	nextevent;	/* Next scheduled event on this CPU. */
125	sbintime_t	nexttick;	/* Next timer tick time. */
126	sbintime_t	nexthard;	/* Next hardlock() event. */
127	sbintime_t	nextstat;	/* Next statclock() event. */
128	sbintime_t	nextprof;	/* Next profclock() event. */
129	sbintime_t	nextcall;	/* Next callout event. */
130	sbintime_t	nextcallopt;	/* Next optional callout event. */
131#ifdef KDTRACE_HOOKS
132	sbintime_t	nextcyc;	/* Next OpenSolaris cyclics event. */
133#endif
134	int		ipi;		/* This CPU needs IPI. */
135	int		idle;		/* This CPU is in idle mode. */
136};
137
138static DPCPU_DEFINE(struct pcpu_state, timerstate);
139DPCPU_DEFINE(sbintime_t, hardclocktime);
140
141/*
142 * Timer broadcast IPI handler.
143 */
144int
145hardclockintr(void)
146{
147	sbintime_t now;
148	struct pcpu_state *state;
149	int done;
150
151	if (doconfigtimer() || busy)
152		return (FILTER_HANDLED);
153	state = DPCPU_PTR(timerstate);
154	now = state->now;
155	CTR3(KTR_SPARE2, "ipi  at %d:    now  %d.%08x",
156	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
157	done = handleevents(now, 0);
158	return (done ? FILTER_HANDLED : FILTER_STRAY);
159}
160
161/*
162 * Handle all events for specified time on this CPU
163 */
164static int
165handleevents(sbintime_t now, int fake)
166{
167	sbintime_t t, *hct;
168	struct trapframe *frame;
169	struct pcpu_state *state;
170	int usermode;
171	int done, runs;
172
173	CTR3(KTR_SPARE2, "handle at %d:  now  %d.%08x",
174	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
175	done = 0;
176	if (fake) {
177		frame = NULL;
178		usermode = 0;
179	} else {
180		frame = curthread->td_intr_frame;
181		usermode = TRAPF_USERMODE(frame);
182	}
183
184	state = DPCPU_PTR(timerstate);
185
186	runs = 0;
187	while (now >= state->nexthard) {
188		state->nexthard += tick_sbt;
189		runs++;
190	}
191	if (runs) {
192		hct = DPCPU_PTR(hardclocktime);
193		*hct = state->nexthard - tick_sbt;
194		if (fake < 2) {
195			hardclock_cnt(runs, usermode);
196			done = 1;
197		}
198	}
199	runs = 0;
200	while (now >= state->nextstat) {
201		state->nextstat += statperiod;
202		runs++;
203	}
204	if (runs && fake < 2) {
205		statclock_cnt(runs, usermode);
206		done = 1;
207	}
208	if (profiling) {
209		runs = 0;
210		while (now >= state->nextprof) {
211			state->nextprof += profperiod;
212			runs++;
213		}
214		if (runs && !fake) {
215			profclock_cnt(runs, usermode, TRAPF_PC(frame));
216			done = 1;
217		}
218	} else
219		state->nextprof = state->nextstat;
220	if (now >= state->nextcallopt) {
221		state->nextcall = state->nextcallopt = INT64_MAX;
222		callout_process(now);
223	}
224
225#ifdef KDTRACE_HOOKS
226	if (fake == 0 && now >= state->nextcyc && cyclic_clock_func != NULL) {
227		state->nextcyc = INT64_MAX;
228		(*cyclic_clock_func)(frame);
229	}
230#endif
231
232	t = getnextcpuevent(0);
233	ET_HW_LOCK(state);
234	if (!busy) {
235		state->idle = 0;
236		state->nextevent = t;
237		loadtimer(now, (fake == 2) &&
238		    (timer->et_flags & ET_FLAGS_PERCPU));
239	}
240	ET_HW_UNLOCK(state);
241	return (done);
242}
243
244/*
245 * Schedule binuptime of the next event on current CPU.
246 */
247static sbintime_t
248getnextcpuevent(int idle)
249{
250	sbintime_t event;
251	struct pcpu_state *state;
252	u_int hardfreq;
253
254	state = DPCPU_PTR(timerstate);
255	/* Handle hardclock() events, skipping some if CPU is idle. */
256	event = state->nexthard;
257	if (idle) {
258		hardfreq = (u_int)hz / 2;
259		if (tc_min_ticktock_freq > 2
260#ifdef SMP
261		    && curcpu == CPU_FIRST()
262#endif
263		    )
264			hardfreq = hz / tc_min_ticktock_freq;
265		if (hardfreq > 1)
266			event += tick_sbt * (hardfreq - 1);
267	}
268	/* Handle callout events. */
269	if (event > state->nextcall)
270		event = state->nextcall;
271	if (!idle) { /* If CPU is active - handle other types of events. */
272		if (event > state->nextstat)
273			event = state->nextstat;
274		if (profiling && event > state->nextprof)
275			event = state->nextprof;
276	}
277#ifdef KDTRACE_HOOKS
278	if (event > state->nextcyc)
279		event = state->nextcyc;
280#endif
281	return (event);
282}
283
284/*
285 * Schedule binuptime of the next event on all CPUs.
286 */
287static sbintime_t
288getnextevent(void)
289{
290	struct pcpu_state *state;
291	sbintime_t event;
292#ifdef SMP
293	int	cpu;
294#endif
295	int	c;
296
297	state = DPCPU_PTR(timerstate);
298	event = state->nextevent;
299	c = -1;
300#ifdef SMP
301	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
302		CPU_FOREACH(cpu) {
303			state = DPCPU_ID_PTR(cpu, timerstate);
304			if (event > state->nextevent) {
305				event = state->nextevent;
306				c = cpu;
307			}
308		}
309	}
310#endif
311	CTR4(KTR_SPARE2, "next at %d:    next %d.%08x by %d",
312	    curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
313	return (event);
314}
315
316/* Hardware timer callback function. */
317static void
318timercb(struct eventtimer *et, void *arg)
319{
320	sbintime_t now;
321	sbintime_t *next;
322	struct pcpu_state *state;
323#ifdef SMP
324	int cpu, bcast;
325#endif
326
327	/* Do not touch anything if somebody reconfiguring timers. */
328	if (busy)
329		return;
330	/* Update present and next tick times. */
331	state = DPCPU_PTR(timerstate);
332	if (et->et_flags & ET_FLAGS_PERCPU) {
333		next = &state->nexttick;
334	} else
335		next = &nexttick;
336	now = sbinuptime();
337	if (periodic)
338		*next = now + timerperiod;
339	else
340		*next = -1;	/* Next tick is not scheduled yet. */
341	state->now = now;
342	CTR3(KTR_SPARE2, "intr at %d:    now  %d.%08x",
343	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
344
345#ifdef SMP
346	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
347	bcast = 0;
348	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
349		CPU_FOREACH(cpu) {
350			state = DPCPU_ID_PTR(cpu, timerstate);
351			ET_HW_LOCK(state);
352			state->now = now;
353			if (now >= state->nextevent) {
354				state->nextevent += SBT_1S;
355				if (curcpu != cpu) {
356					state->ipi = 1;
357					bcast = 1;
358				}
359			}
360			ET_HW_UNLOCK(state);
361		}
362	}
363#endif
364
365	/* Handle events for this time on this CPU. */
366	handleevents(now, 0);
367
368#ifdef SMP
369	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
370	if (bcast) {
371		CPU_FOREACH(cpu) {
372			if (curcpu == cpu)
373				continue;
374			state = DPCPU_ID_PTR(cpu, timerstate);
375			if (state->ipi) {
376				state->ipi = 0;
377				ipi_cpu(cpu, IPI_HARDCLOCK);
378			}
379		}
380	}
381#endif
382}
383
384/*
385 * Load new value into hardware timer.
386 */
387static void
388loadtimer(sbintime_t now, int start)
389{
390	struct pcpu_state *state;
391	sbintime_t new;
392	sbintime_t *next;
393	uint64_t tmp;
394	int eq;
395
396	if (timer->et_flags & ET_FLAGS_PERCPU) {
397		state = DPCPU_PTR(timerstate);
398		next = &state->nexttick;
399	} else
400		next = &nexttick;
401	if (periodic) {
402		if (start) {
403			/*
404			 * Try to start all periodic timers aligned
405			 * to period to make events synchronous.
406			 */
407			tmp = now % timerperiod;
408			new = timerperiod - tmp;
409			if (new < tmp)		/* Left less then passed. */
410				new += timerperiod;
411			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
412			    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
413			    (int)(new >> 32), (u_int)(new & 0xffffffff));
414			*next = new + now;
415			et_start(timer, new, timerperiod);
416		}
417	} else {
418		new = getnextevent();
419		eq = (new == *next);
420		CTR4(KTR_SPARE2, "load at %d:    next %d.%08x eq %d",
421		    curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
422		if (!eq) {
423			*next = new;
424			et_start(timer, new - now, 0);
425		}
426	}
427}
428
429/*
430 * Prepare event timer parameters after configuration changes.
431 */
432static void
433setuptimer(void)
434{
435	int freq;
436
437	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
438		periodic = 0;
439	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
440		periodic = 1;
441	singlemul = MIN(MAX(singlemul, 1), 20);
442	freq = hz * singlemul;
443	while (freq < (profiling ? profhz : stathz))
444		freq += hz;
445	freq = round_freq(timer, freq);
446	timerperiod = SBT_1S / freq;
447}
448
449/*
450 * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
451 */
452static int
453doconfigtimer(void)
454{
455	sbintime_t now;
456	struct pcpu_state *state;
457
458	state = DPCPU_PTR(timerstate);
459	switch (atomic_load_acq_int(&state->action)) {
460	case 1:
461		now = sbinuptime();
462		ET_HW_LOCK(state);
463		loadtimer(now, 1);
464		ET_HW_UNLOCK(state);
465		state->handle = 0;
466		atomic_store_rel_int(&state->action, 0);
467		return (1);
468	case 2:
469		ET_HW_LOCK(state);
470		et_stop(timer);
471		ET_HW_UNLOCK(state);
472		state->handle = 0;
473		atomic_store_rel_int(&state->action, 0);
474		return (1);
475	}
476	if (atomic_readandclear_int(&state->handle) && !busy) {
477		now = sbinuptime();
478		handleevents(now, 0);
479		return (1);
480	}
481	return (0);
482}
483
484/*
485 * Reconfigure specified timer.
486 * For per-CPU timers use IPI to make other CPUs to reconfigure.
487 */
488static void
489configtimer(int start)
490{
491	sbintime_t now, next;
492	struct pcpu_state *state;
493	int cpu;
494
495	if (start) {
496		setuptimer();
497		now = sbinuptime();
498	} else
499		now = 0;
500	critical_enter();
501	ET_HW_LOCK(DPCPU_PTR(timerstate));
502	if (start) {
503		/* Initialize time machine parameters. */
504		next = now + timerperiod;
505		if (periodic)
506			nexttick = next;
507		else
508			nexttick = -1;
509		CPU_FOREACH(cpu) {
510			state = DPCPU_ID_PTR(cpu, timerstate);
511			state->now = now;
512			if (!smp_started && cpu != CPU_FIRST())
513				state->nextevent = INT64_MAX;
514			else
515				state->nextevent = next;
516			if (periodic)
517				state->nexttick = next;
518			else
519				state->nexttick = -1;
520			state->nexthard = next;
521			state->nextstat = next;
522			state->nextprof = next;
523			state->nextcall = next;
524			state->nextcallopt = next;
525			hardclock_sync(cpu);
526		}
527		busy = 0;
528		/* Start global timer or per-CPU timer of this CPU. */
529		loadtimer(now, 1);
530	} else {
531		busy = 1;
532		/* Stop global timer or per-CPU timer of this CPU. */
533		et_stop(timer);
534	}
535	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
536#ifdef SMP
537	/* If timer is global or there is no other CPUs yet - we are done. */
538	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
539		critical_exit();
540		return;
541	}
542	/* Set reconfigure flags for other CPUs. */
543	CPU_FOREACH(cpu) {
544		state = DPCPU_ID_PTR(cpu, timerstate);
545		atomic_store_rel_int(&state->action,
546		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
547	}
548	/* Broadcast reconfigure IPI. */
549	ipi_all_but_self(IPI_HARDCLOCK);
550	/* Wait for reconfiguration completed. */
551restart:
552	cpu_spinwait();
553	CPU_FOREACH(cpu) {
554		if (cpu == curcpu)
555			continue;
556		state = DPCPU_ID_PTR(cpu, timerstate);
557		if (atomic_load_acq_int(&state->action))
558			goto restart;
559	}
560#endif
561	critical_exit();
562}
563
564/*
565 * Calculate nearest frequency supported by hardware timer.
566 */
567static int
568round_freq(struct eventtimer *et, int freq)
569{
570	uint64_t div;
571
572	if (et->et_frequency != 0) {
573		div = lmax((et->et_frequency + freq / 2) / freq, 1);
574		if (et->et_flags & ET_FLAGS_POW2DIV)
575			div = 1 << (flsl(div + div / 2) - 1);
576		freq = (et->et_frequency + div / 2) / div;
577	}
578	if (et->et_min_period > SBT_1S)
579		panic("Event timer \"%s\" doesn't support sub-second periods!",
580		    et->et_name);
581	else if (et->et_min_period != 0)
582		freq = min(freq, SBT2FREQ(et->et_min_period));
583	if (et->et_max_period < SBT_1S && et->et_max_period != 0)
584		freq = max(freq, SBT2FREQ(et->et_max_period));
585	return (freq);
586}
587
588/*
589 * Configure and start event timers (BSP part).
590 */
591void
592cpu_initclocks_bsp(void)
593{
594	struct pcpu_state *state;
595	int base, div, cpu;
596
597	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
598	CPU_FOREACH(cpu) {
599		state = DPCPU_ID_PTR(cpu, timerstate);
600		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
601#ifdef KDTRACE_HOOKS
602		state->nextcyc = INT64_MAX;
603#endif
604		state->nextcall = INT64_MAX;
605		state->nextcallopt = INT64_MAX;
606	}
607	periodic = want_periodic;
608	/* Grab requested timer or the best of present. */
609	if (timername[0])
610		timer = et_find(timername, 0, 0);
611	if (timer == NULL && periodic) {
612		timer = et_find(NULL,
613		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
614	}
615	if (timer == NULL) {
616		timer = et_find(NULL,
617		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
618	}
619	if (timer == NULL && !periodic) {
620		timer = et_find(NULL,
621		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
622	}
623	if (timer == NULL)
624		panic("No usable event timer found!");
625	et_init(timer, timercb, NULL, NULL);
626
627	/* Adapt to timer capabilities. */
628	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
629		periodic = 0;
630	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
631		periodic = 1;
632	if (timer->et_flags & ET_FLAGS_C3STOP)
633		cpu_disable_deep_sleep++;
634
635	/*
636	 * We honor the requested 'hz' value.
637	 * We want to run stathz in the neighborhood of 128hz.
638	 * We would like profhz to run as often as possible.
639	 */
640	if (singlemul <= 0 || singlemul > 20) {
641		if (hz >= 1500 || (hz % 128) == 0)
642			singlemul = 1;
643		else if (hz >= 750)
644			singlemul = 2;
645		else
646			singlemul = 4;
647	}
648	if (periodic) {
649		base = round_freq(timer, hz * singlemul);
650		singlemul = max((base + hz / 2) / hz, 1);
651		hz = (base + singlemul / 2) / singlemul;
652		if (base <= 128)
653			stathz = base;
654		else {
655			div = base / 128;
656			if (div >= singlemul && (div % singlemul) == 0)
657				div++;
658			stathz = base / div;
659		}
660		profhz = stathz;
661		while ((profhz + stathz) <= 128 * 64)
662			profhz += stathz;
663		profhz = round_freq(timer, profhz);
664	} else {
665		hz = round_freq(timer, hz);
666		stathz = round_freq(timer, 127);
667		profhz = round_freq(timer, stathz * 64);
668	}
669	tick = 1000000 / hz;
670	tick_sbt = SBT_1S / hz;
671	tick_bt = sbttobt(tick_sbt);
672	statperiod = SBT_1S / stathz;
673	profperiod = SBT_1S / profhz;
674	ET_LOCK();
675	configtimer(1);
676	ET_UNLOCK();
677}
678
679/*
680 * Start per-CPU event timers on APs.
681 */
682void
683cpu_initclocks_ap(void)
684{
685	sbintime_t now;
686	struct pcpu_state *state;
687	struct thread *td;
688
689	state = DPCPU_PTR(timerstate);
690	now = sbinuptime();
691	ET_HW_LOCK(state);
692	state->now = now;
693	hardclock_sync(curcpu);
694	spinlock_enter();
695	ET_HW_UNLOCK(state);
696	td = curthread;
697	td->td_intr_nesting_level++;
698	handleevents(state->now, 2);
699	td->td_intr_nesting_level--;
700	spinlock_exit();
701}
702
703/*
704 * Switch to profiling clock rates.
705 */
706void
707cpu_startprofclock(void)
708{
709
710	ET_LOCK();
711	if (profiling == 0) {
712		if (periodic) {
713			configtimer(0);
714			profiling = 1;
715			configtimer(1);
716		} else
717			profiling = 1;
718	} else
719		profiling++;
720	ET_UNLOCK();
721}
722
723/*
724 * Switch to regular clock rates.
725 */
726void
727cpu_stopprofclock(void)
728{
729
730	ET_LOCK();
731	if (profiling == 1) {
732		if (periodic) {
733			configtimer(0);
734			profiling = 0;
735			configtimer(1);
736		} else
737		profiling = 0;
738	} else
739		profiling--;
740	ET_UNLOCK();
741}
742
743/*
744 * Switch to idle mode (all ticks handled).
745 */
746sbintime_t
747cpu_idleclock(void)
748{
749	sbintime_t now, t;
750	struct pcpu_state *state;
751
752	if (idletick || busy ||
753	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU))
754#ifdef DEVICE_POLLING
755	    || curcpu == CPU_FIRST()
756#endif
757	    )
758		return (-1);
759	state = DPCPU_PTR(timerstate);
760	if (periodic)
761		now = state->now;
762	else
763		now = sbinuptime();
764	CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
765	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
766	t = getnextcpuevent(1);
767	ET_HW_LOCK(state);
768	state->idle = 1;
769	state->nextevent = t;
770	if (!periodic)
771		loadtimer(now, 0);
772	ET_HW_UNLOCK(state);
773	return (MAX(t - now, 0));
774}
775
776/*
777 * Switch to active mode (skip empty ticks).
778 */
779void
780cpu_activeclock(void)
781{
782	sbintime_t now;
783	struct pcpu_state *state;
784	struct thread *td;
785
786	state = DPCPU_PTR(timerstate);
787	if (state->idle == 0 || busy)
788		return;
789	if (periodic)
790		now = state->now;
791	else
792		now = sbinuptime();
793	CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
794	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
795	spinlock_enter();
796	td = curthread;
797	td->td_intr_nesting_level++;
798	handleevents(now, 1);
799	td->td_intr_nesting_level--;
800	spinlock_exit();
801}
802
803/*
804 * Change the frequency of the given timer.  This changes et->et_frequency and
805 * if et is the active timer it reconfigures the timer on all CPUs.  This is
806 * intended to be a private interface for the use of et_change_frequency() only.
807 */
808void
809cpu_et_frequency(struct eventtimer *et, uint64_t newfreq)
810{
811
812	ET_LOCK();
813	if (et == timer) {
814		configtimer(0);
815		et->et_frequency = newfreq;
816		configtimer(1);
817	} else
818		et->et_frequency = newfreq;
819	ET_UNLOCK();
820}
821
822#ifdef KDTRACE_HOOKS
823void
824clocksource_cyc_set(const struct bintime *bt)
825{
826	sbintime_t now, t;
827	struct pcpu_state *state;
828
829	/* Do not touch anything if somebody reconfiguring timers. */
830	if (busy)
831		return;
832	t = bttosbt(*bt);
833	state = DPCPU_PTR(timerstate);
834	if (periodic)
835		now = state->now;
836	else
837		now = sbinuptime();
838
839	CTR5(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x  t  %d.%08x",
840	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
841	    (int)(t >> 32), (u_int)(t & 0xffffffff));
842
843	ET_HW_LOCK(state);
844	if (t == state->nextcyc)
845		goto done;
846	state->nextcyc = t;
847	if (t >= state->nextevent)
848		goto done;
849	state->nextevent = t;
850	if (!periodic)
851		loadtimer(now, 0);
852done:
853	ET_HW_UNLOCK(state);
854}
855#endif
856
857void
858cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
859{
860	struct pcpu_state *state;
861
862	/* Do not touch anything if somebody reconfiguring timers. */
863	if (busy)
864		return;
865	CTR6(KTR_SPARE2, "new co at %d:    on %d at %d.%08x - %d.%08x",
866	    curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
867	    (int)(bt >> 32), (u_int)(bt & 0xffffffff));
868	state = DPCPU_ID_PTR(cpu, timerstate);
869	ET_HW_LOCK(state);
870
871	/*
872	 * If there is callout time already set earlier -- do nothing.
873	 * This check may appear redundant because we check already in
874	 * callout_process() but this double check guarantees we're safe
875	 * with respect to race conditions between interrupts execution
876	 * and scheduling.
877	 */
878	state->nextcallopt = bt_opt;
879	if (bt >= state->nextcall)
880		goto done;
881	state->nextcall = bt;
882	/* If there is some other event set earlier -- do nothing. */
883	if (bt >= state->nextevent)
884		goto done;
885	state->nextevent = bt;
886	/* If timer is periodic -- there is nothing to reprogram. */
887	if (periodic)
888		goto done;
889	/* If timer is global or of the current CPU -- reprogram it. */
890	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
891		loadtimer(sbinuptime(), 0);
892done:
893		ET_HW_UNLOCK(state);
894		return;
895	}
896	/* Otherwise make other CPU to reprogram it. */
897	state->handle = 1;
898	ET_HW_UNLOCK(state);
899#ifdef SMP
900	ipi_cpu(cpu, IPI_HARDCLOCK);
901#endif
902}
903
904/*
905 * Report or change the active event timers hardware.
906 */
907static int
908sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
909{
910	char buf[32];
911	struct eventtimer *et;
912	int error;
913
914	ET_LOCK();
915	et = timer;
916	snprintf(buf, sizeof(buf), "%s", et->et_name);
917	ET_UNLOCK();
918	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
919	ET_LOCK();
920	et = timer;
921	if (error != 0 || req->newptr == NULL ||
922	    strcasecmp(buf, et->et_name) == 0) {
923		ET_UNLOCK();
924		return (error);
925	}
926	et = et_find(buf, 0, 0);
927	if (et == NULL) {
928		ET_UNLOCK();
929		return (ENOENT);
930	}
931	configtimer(0);
932	et_free(timer);
933	if (et->et_flags & ET_FLAGS_C3STOP)
934		cpu_disable_deep_sleep++;
935	if (timer->et_flags & ET_FLAGS_C3STOP)
936		cpu_disable_deep_sleep--;
937	periodic = want_periodic;
938	timer = et;
939	et_init(timer, timercb, NULL, NULL);
940	configtimer(1);
941	ET_UNLOCK();
942	return (error);
943}
944SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
945    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
946    0, 0, sysctl_kern_eventtimer_timer, "A", "Chosen event timer");
947
948/*
949 * Report or change the active event timer periodicity.
950 */
951static int
952sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
953{
954	int error, val;
955
956	val = periodic;
957	error = sysctl_handle_int(oidp, &val, 0, req);
958	if (error != 0 || req->newptr == NULL)
959		return (error);
960	ET_LOCK();
961	configtimer(0);
962	periodic = want_periodic = val;
963	configtimer(1);
964	ET_UNLOCK();
965	return (error);
966}
967SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
968    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
969    0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
970