1/*-
2 * SPDX-License-Identifier: Beerware
3 *
4 * ----------------------------------------------------------------------------
5 * "THE BEER-WARE LICENSE" (Revision 42):
6 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7 * can do whatever you want with this stuff. If we meet some day, and you think
8 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9 * ----------------------------------------------------------------------------
10 *
11 * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation
12 * All rights reserved.
13 *
14 * Portions of this software were developed by Julien Ridoux at the University
15 * of Melbourne under sponsorship from the FreeBSD Foundation.
16 *
17 * Portions of this software were developed by Konstantin Belousov
18 * under sponsorship from the FreeBSD Foundation.
19 */
20
21#include <sys/cdefs.h>
22__FBSDID("$FreeBSD$");
23
24#include "opt_ntp.h"
25#include "opt_ffclock.h"
26
27#include <sys/param.h>
28#include <sys/kernel.h>
29#include <sys/limits.h>
30#include <sys/lock.h>
31#include <sys/mutex.h>
32#include <sys/proc.h>
33#include <sys/sbuf.h>
34#include <sys/sleepqueue.h>
35#include <sys/sysctl.h>
36#include <sys/syslog.h>
37#include <sys/systm.h>
38#include <sys/timeffc.h>
39#include <sys/timepps.h>
40#include <sys/timetc.h>
41#include <sys/timex.h>
42#include <sys/vdso.h>
43
44/*
45 * A large step happens on boot.  This constant detects such steps.
46 * It is relatively small so that ntp_update_second gets called enough
47 * in the typical 'missed a couple of seconds' case, but doesn't loop
48 * forever when the time step is large.
49 */
50#define LARGE_STEP	200
51
52/*
53 * Implement a dummy timecounter which we can use until we get a real one
54 * in the air.  This allows the console and other early stuff to use
55 * time services.
56 */
57
58static u_int
59dummy_get_timecount(struct timecounter *tc)
60{
61	static u_int now;
62
63	return (++now);
64}
65
66static struct timecounter dummy_timecounter = {
67	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
68};
69
70struct timehands {
71	/* These fields must be initialized by the driver. */
72	struct timecounter	*th_counter;
73	int64_t			th_adjustment;
74	uint64_t		th_scale;
75	u_int			th_large_delta;
76	u_int	 		th_offset_count;
77	struct bintime		th_offset;
78	struct bintime		th_bintime;
79	struct timeval		th_microtime;
80	struct timespec		th_nanotime;
81	struct bintime		th_boottime;
82	/* Fields not to be copied in tc_windup start with th_generation. */
83	u_int			th_generation;
84	struct timehands	*th_next;
85};
86
87static struct timehands ths[16] = {
88    [0] =  {
89	.th_counter = &dummy_timecounter,
90	.th_scale = (uint64_t)-1 / 1000000,
91	.th_large_delta = 1000000,
92	.th_offset = { .sec = 1 },
93	.th_generation = 1,
94    },
95};
96
97static struct timehands *volatile timehands = &ths[0];
98struct timecounter *timecounter = &dummy_timecounter;
99static struct timecounter *timecounters = &dummy_timecounter;
100
101int tc_min_ticktock_freq = 1;
102
103volatile time_t time_second = 1;
104volatile time_t time_uptime = 1;
105
106static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
107SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
108    NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
109
110SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
111static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, "");
112
113static int timestepwarnings;
114SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
115    &timestepwarnings, 0, "Log time steps");
116
117static int timehands_count = 2;
118SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count,
119    CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
120    &timehands_count, 0, "Count of timehands in rotation");
121
122struct bintime bt_timethreshold;
123struct bintime bt_tickthreshold;
124sbintime_t sbt_timethreshold;
125sbintime_t sbt_tickthreshold;
126struct bintime tc_tick_bt;
127sbintime_t tc_tick_sbt;
128int tc_precexp;
129int tc_timepercentage = TC_DEFAULTPERC;
130static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
131SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
132    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
133    sysctl_kern_timecounter_adjprecision, "I",
134    "Allowed time interval deviation in percents");
135
136volatile int rtc_generation = 1;
137
138static int tc_chosen;	/* Non-zero if a specific tc was chosen via sysctl. */
139static char tc_from_tunable[16];
140
141static void tc_windup(struct bintime *new_boottimebin);
142static void cpu_tick_calibrate(int);
143
144void dtrace_getnanotime(struct timespec *tsp);
145
146static int
147sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
148{
149	struct timeval boottime;
150
151	getboottime(&boottime);
152
153#ifndef __mips__
154#ifdef SCTL_MASK32
155	int tv[2];
156
157	if (req->flags & SCTL_MASK32) {
158		tv[0] = boottime.tv_sec;
159		tv[1] = boottime.tv_usec;
160		return (SYSCTL_OUT(req, tv, sizeof(tv)));
161	}
162#endif
163#endif
164	return (SYSCTL_OUT(req, &boottime, sizeof(boottime)));
165}
166
167static int
168sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
169{
170	u_int ncount;
171	struct timecounter *tc = arg1;
172
173	ncount = tc->tc_get_timecount(tc);
174	return (sysctl_handle_int(oidp, &ncount, 0, req));
175}
176
177static int
178sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
179{
180	uint64_t freq;
181	struct timecounter *tc = arg1;
182
183	freq = tc->tc_frequency;
184	return (sysctl_handle_64(oidp, &freq, 0, req));
185}
186
187/*
188 * Return the difference between the timehands' counter value now and what
189 * was when we copied it to the timehands' offset_count.
190 */
191static __inline u_int
192tc_delta(struct timehands *th)
193{
194	struct timecounter *tc;
195
196	tc = th->th_counter;
197	return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
198	    tc->tc_counter_mask);
199}
200
201/*
202 * Functions for reading the time.  We have to loop until we are sure that
203 * the timehands that we operated on was not updated under our feet.  See
204 * the comment in <sys/time.h> for a description of these 12 functions.
205 */
206
207static __inline void
208bintime_off(struct bintime *bt, u_int off)
209{
210	struct timehands *th;
211	struct bintime *btp;
212	uint64_t scale, x;
213	u_int delta, gen, large_delta;
214
215	do {
216		th = timehands;
217		gen = atomic_load_acq_int(&th->th_generation);
218		btp = (struct bintime *)((vm_offset_t)th + off);
219		*bt = *btp;
220		scale = th->th_scale;
221		delta = tc_delta(th);
222		large_delta = th->th_large_delta;
223		atomic_thread_fence_acq();
224	} while (gen == 0 || gen != th->th_generation);
225
226	if (__predict_false(delta >= large_delta)) {
227		/* Avoid overflow for scale * delta. */
228		x = (scale >> 32) * delta;
229		bt->sec += x >> 32;
230		bintime_addx(bt, x << 32);
231		bintime_addx(bt, (scale & 0xffffffff) * delta);
232	} else {
233		bintime_addx(bt, scale * delta);
234	}
235}
236#define	GETTHBINTIME(dst, member)					\
237do {									\
238/*									\
239	_Static_assert(_Generic(((struct timehands *)NULL)->member,	\
240	    struct bintime: 1, default: 0) == 1,			\
241	    "struct timehands member is not of struct bintime type");	\
242*/									\
243	bintime_off(dst, __offsetof(struct timehands, member));		\
244} while (0)
245
246static __inline void
247getthmember(void *out, size_t out_size, u_int off)
248{
249	struct timehands *th;
250	u_int gen;
251
252	do {
253		th = timehands;
254		gen = atomic_load_acq_int(&th->th_generation);
255		memcpy(out, (char *)th + off, out_size);
256		atomic_thread_fence_acq();
257	} while (gen == 0 || gen != th->th_generation);
258}
259#define	GETTHMEMBER(dst, member)					\
260do {									\
261/*									\
262	_Static_assert(_Generic(*dst,					\
263	    __typeof(((struct timehands *)NULL)->member): 1,		\
264	    default: 0) == 1,						\
265	    "*dst and struct timehands member have different types");	\
266*/									\
267	getthmember(dst, sizeof(*dst), __offsetof(struct timehands,	\
268	    member));							\
269} while (0)
270
271#ifdef FFCLOCK
272void
273fbclock_binuptime(struct bintime *bt)
274{
275
276	GETTHBINTIME(bt, th_offset);
277}
278
279void
280fbclock_nanouptime(struct timespec *tsp)
281{
282	struct bintime bt;
283
284	fbclock_binuptime(&bt);
285	bintime2timespec(&bt, tsp);
286}
287
288void
289fbclock_microuptime(struct timeval *tvp)
290{
291	struct bintime bt;
292
293	fbclock_binuptime(&bt);
294	bintime2timeval(&bt, tvp);
295}
296
297void
298fbclock_bintime(struct bintime *bt)
299{
300
301	GETTHBINTIME(bt, th_bintime);
302}
303
304void
305fbclock_nanotime(struct timespec *tsp)
306{
307	struct bintime bt;
308
309	fbclock_bintime(&bt);
310	bintime2timespec(&bt, tsp);
311}
312
313void
314fbclock_microtime(struct timeval *tvp)
315{
316	struct bintime bt;
317
318	fbclock_bintime(&bt);
319	bintime2timeval(&bt, tvp);
320}
321
322void
323fbclock_getbinuptime(struct bintime *bt)
324{
325
326	GETTHMEMBER(bt, th_offset);
327}
328
329void
330fbclock_getnanouptime(struct timespec *tsp)
331{
332	struct bintime bt;
333
334	GETTHMEMBER(&bt, th_offset);
335	bintime2timespec(&bt, tsp);
336}
337
338void
339fbclock_getmicrouptime(struct timeval *tvp)
340{
341	struct bintime bt;
342
343	GETTHMEMBER(&bt, th_offset);
344	bintime2timeval(&bt, tvp);
345}
346
347void
348fbclock_getbintime(struct bintime *bt)
349{
350
351	GETTHMEMBER(bt, th_bintime);
352}
353
354void
355fbclock_getnanotime(struct timespec *tsp)
356{
357
358	GETTHMEMBER(tsp, th_nanotime);
359}
360
361void
362fbclock_getmicrotime(struct timeval *tvp)
363{
364
365	GETTHMEMBER(tvp, th_microtime);
366}
367#else /* !FFCLOCK */
368
369void
370binuptime(struct bintime *bt)
371{
372
373	GETTHBINTIME(bt, th_offset);
374}
375
376void
377nanouptime(struct timespec *tsp)
378{
379	struct bintime bt;
380
381	binuptime(&bt);
382	bintime2timespec(&bt, tsp);
383}
384
385void
386microuptime(struct timeval *tvp)
387{
388	struct bintime bt;
389
390	binuptime(&bt);
391	bintime2timeval(&bt, tvp);
392}
393
394void
395bintime(struct bintime *bt)
396{
397
398	GETTHBINTIME(bt, th_bintime);
399}
400
401void
402nanotime(struct timespec *tsp)
403{
404	struct bintime bt;
405
406	bintime(&bt);
407	bintime2timespec(&bt, tsp);
408}
409
410void
411microtime(struct timeval *tvp)
412{
413	struct bintime bt;
414
415	bintime(&bt);
416	bintime2timeval(&bt, tvp);
417}
418
419void
420getbinuptime(struct bintime *bt)
421{
422
423	GETTHMEMBER(bt, th_offset);
424}
425
426void
427getnanouptime(struct timespec *tsp)
428{
429	struct bintime bt;
430
431	GETTHMEMBER(&bt, th_offset);
432	bintime2timespec(&bt, tsp);
433}
434
435void
436getmicrouptime(struct timeval *tvp)
437{
438	struct bintime bt;
439
440	GETTHMEMBER(&bt, th_offset);
441	bintime2timeval(&bt, tvp);
442}
443
444void
445getbintime(struct bintime *bt)
446{
447
448	GETTHMEMBER(bt, th_bintime);
449}
450
451void
452getnanotime(struct timespec *tsp)
453{
454
455	GETTHMEMBER(tsp, th_nanotime);
456}
457
458void
459getmicrotime(struct timeval *tvp)
460{
461
462	GETTHMEMBER(tvp, th_microtime);
463}
464#endif /* FFCLOCK */
465
466void
467getboottime(struct timeval *boottime)
468{
469	struct bintime boottimebin;
470
471	getboottimebin(&boottimebin);
472	bintime2timeval(&boottimebin, boottime);
473}
474
475void
476getboottimebin(struct bintime *boottimebin)
477{
478
479	GETTHMEMBER(boottimebin, th_boottime);
480}
481
482#ifdef FFCLOCK
483/*
484 * Support for feed-forward synchronization algorithms. This is heavily inspired
485 * by the timehands mechanism but kept independent from it. *_windup() functions
486 * have some connection to avoid accessing the timecounter hardware more than
487 * necessary.
488 */
489
490/* Feed-forward clock estimates kept updated by the synchronization daemon. */
491struct ffclock_estimate ffclock_estimate;
492struct bintime ffclock_boottime;	/* Feed-forward boot time estimate. */
493uint32_t ffclock_status;		/* Feed-forward clock status. */
494int8_t ffclock_updated;			/* New estimates are available. */
495struct mtx ffclock_mtx;			/* Mutex on ffclock_estimate. */
496
497struct fftimehands {
498	struct ffclock_estimate	cest;
499	struct bintime		tick_time;
500	struct bintime		tick_time_lerp;
501	ffcounter		tick_ffcount;
502	uint64_t		period_lerp;
503	volatile uint8_t	gen;
504	struct fftimehands	*next;
505};
506
507#define	NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
508
509static struct fftimehands ffth[10];
510static struct fftimehands *volatile fftimehands = ffth;
511
512static void
513ffclock_init(void)
514{
515	struct fftimehands *cur;
516	struct fftimehands *last;
517
518	memset(ffth, 0, sizeof(ffth));
519
520	last = ffth + NUM_ELEMENTS(ffth) - 1;
521	for (cur = ffth; cur < last; cur++)
522		cur->next = cur + 1;
523	last->next = ffth;
524
525	ffclock_updated = 0;
526	ffclock_status = FFCLOCK_STA_UNSYNC;
527	mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF);
528}
529
530/*
531 * Reset the feed-forward clock estimates. Called from inittodr() to get things
532 * kick started and uses the timecounter nominal frequency as a first period
533 * estimate. Note: this function may be called several time just after boot.
534 * Note: this is the only function that sets the value of boot time for the
535 * monotonic (i.e. uptime) version of the feed-forward clock.
536 */
537void
538ffclock_reset_clock(struct timespec *ts)
539{
540	struct timecounter *tc;
541	struct ffclock_estimate cest;
542
543	tc = timehands->th_counter;
544	memset(&cest, 0, sizeof(struct ffclock_estimate));
545
546	timespec2bintime(ts, &ffclock_boottime);
547	timespec2bintime(ts, &(cest.update_time));
548	ffclock_read_counter(&cest.update_ffcount);
549	cest.leapsec_next = 0;
550	cest.period = ((1ULL << 63) / tc->tc_frequency) << 1;
551	cest.errb_abs = 0;
552	cest.errb_rate = 0;
553	cest.status = FFCLOCK_STA_UNSYNC;
554	cest.leapsec_total = 0;
555	cest.leapsec = 0;
556
557	mtx_lock(&ffclock_mtx);
558	bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
559	ffclock_updated = INT8_MAX;
560	mtx_unlock(&ffclock_mtx);
561
562	printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name,
563	    (unsigned long long)tc->tc_frequency, (long)ts->tv_sec,
564	    (unsigned long)ts->tv_nsec);
565}
566
567/*
568 * Sub-routine to convert a time interval measured in RAW counter units to time
569 * in seconds stored in bintime format.
570 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be
571 * larger than the max value of u_int (on 32 bit architecture). Loop to consume
572 * extra cycles.
573 */
574static void
575ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt)
576{
577	struct bintime bt2;
578	ffcounter delta, delta_max;
579
580	delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1;
581	bintime_clear(bt);
582	do {
583		if (ffdelta > delta_max)
584			delta = delta_max;
585		else
586			delta = ffdelta;
587		bt2.sec = 0;
588		bt2.frac = period;
589		bintime_mul(&bt2, (unsigned int)delta);
590		bintime_add(bt, &bt2);
591		ffdelta -= delta;
592	} while (ffdelta > 0);
593}
594
595/*
596 * Update the fftimehands.
597 * Push the tick ffcount and time(s) forward based on current clock estimate.
598 * The conversion from ffcounter to bintime relies on the difference clock
599 * principle, whose accuracy relies on computing small time intervals. If a new
600 * clock estimate has been passed by the synchronisation daemon, make it
601 * current, and compute the linear interpolation for monotonic time if needed.
602 */
603static void
604ffclock_windup(unsigned int delta)
605{
606	struct ffclock_estimate *cest;
607	struct fftimehands *ffth;
608	struct bintime bt, gap_lerp;
609	ffcounter ffdelta;
610	uint64_t frac;
611	unsigned int polling;
612	uint8_t forward_jump, ogen;
613
614	/*
615	 * Pick the next timehand, copy current ffclock estimates and move tick
616	 * times and counter forward.
617	 */
618	forward_jump = 0;
619	ffth = fftimehands->next;
620	ogen = ffth->gen;
621	ffth->gen = 0;
622	cest = &ffth->cest;
623	bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate));
624	ffdelta = (ffcounter)delta;
625	ffth->period_lerp = fftimehands->period_lerp;
626
627	ffth->tick_time = fftimehands->tick_time;
628	ffclock_convert_delta(ffdelta, cest->period, &bt);
629	bintime_add(&ffth->tick_time, &bt);
630
631	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
632	ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt);
633	bintime_add(&ffth->tick_time_lerp, &bt);
634
635	ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta;
636
637	/*
638	 * Assess the status of the clock, if the last update is too old, it is
639	 * likely the synchronisation daemon is dead and the clock is free
640	 * running.
641	 */
642	if (ffclock_updated == 0) {
643		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
644		ffclock_convert_delta(ffdelta, cest->period, &bt);
645		if (bt.sec > 2 * FFCLOCK_SKM_SCALE)
646			ffclock_status |= FFCLOCK_STA_UNSYNC;
647	}
648
649	/*
650	 * If available, grab updated clock estimates and make them current.
651	 * Recompute time at this tick using the updated estimates. The clock
652	 * estimates passed the feed-forward synchronisation daemon may result
653	 * in time conversion that is not monotonically increasing (just after
654	 * the update). time_lerp is a particular linear interpolation over the
655	 * synchronisation algo polling period that ensures monotonicity for the
656	 * clock ids requesting it.
657	 */
658	if (ffclock_updated > 0) {
659		bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate));
660		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
661		ffth->tick_time = cest->update_time;
662		ffclock_convert_delta(ffdelta, cest->period, &bt);
663		bintime_add(&ffth->tick_time, &bt);
664
665		/* ffclock_reset sets ffclock_updated to INT8_MAX */
666		if (ffclock_updated == INT8_MAX)
667			ffth->tick_time_lerp = ffth->tick_time;
668
669		if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >))
670			forward_jump = 1;
671		else
672			forward_jump = 0;
673
674		bintime_clear(&gap_lerp);
675		if (forward_jump) {
676			gap_lerp = ffth->tick_time;
677			bintime_sub(&gap_lerp, &ffth->tick_time_lerp);
678		} else {
679			gap_lerp = ffth->tick_time_lerp;
680			bintime_sub(&gap_lerp, &ffth->tick_time);
681		}
682
683		/*
684		 * The reset from the RTC clock may be far from accurate, and
685		 * reducing the gap between real time and interpolated time
686		 * could take a very long time if the interpolated clock insists
687		 * on strict monotonicity. The clock is reset under very strict
688		 * conditions (kernel time is known to be wrong and
689		 * synchronization daemon has been restarted recently.
690		 * ffclock_boottime absorbs the jump to ensure boot time is
691		 * correct and uptime functions stay consistent.
692		 */
693		if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) &&
694		    ((cest->status & FFCLOCK_STA_UNSYNC) == 0) &&
695		    ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) {
696			if (forward_jump)
697				bintime_add(&ffclock_boottime, &gap_lerp);
698			else
699				bintime_sub(&ffclock_boottime, &gap_lerp);
700			ffth->tick_time_lerp = ffth->tick_time;
701			bintime_clear(&gap_lerp);
702		}
703
704		ffclock_status = cest->status;
705		ffth->period_lerp = cest->period;
706
707		/*
708		 * Compute corrected period used for the linear interpolation of
709		 * time. The rate of linear interpolation is capped to 5000PPM
710		 * (5ms/s).
711		 */
712		if (bintime_isset(&gap_lerp)) {
713			ffdelta = cest->update_ffcount;
714			ffdelta -= fftimehands->cest.update_ffcount;
715			ffclock_convert_delta(ffdelta, cest->period, &bt);
716			polling = bt.sec;
717			bt.sec = 0;
718			bt.frac = 5000000 * (uint64_t)18446744073LL;
719			bintime_mul(&bt, polling);
720			if (bintime_cmp(&gap_lerp, &bt, >))
721				gap_lerp = bt;
722
723			/* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */
724			frac = 0;
725			if (gap_lerp.sec > 0) {
726				frac -= 1;
727				frac /= ffdelta / gap_lerp.sec;
728			}
729			frac += gap_lerp.frac / ffdelta;
730
731			if (forward_jump)
732				ffth->period_lerp += frac;
733			else
734				ffth->period_lerp -= frac;
735		}
736
737		ffclock_updated = 0;
738	}
739	if (++ogen == 0)
740		ogen = 1;
741	ffth->gen = ogen;
742	fftimehands = ffth;
743}
744
745/*
746 * Adjust the fftimehands when the timecounter is changed. Stating the obvious,
747 * the old and new hardware counter cannot be read simultaneously. tc_windup()
748 * does read the two counters 'back to back', but a few cycles are effectively
749 * lost, and not accumulated in tick_ffcount. This is a fairly radical
750 * operation for a feed-forward synchronization daemon, and it is its job to not
751 * pushing irrelevant data to the kernel. Because there is no locking here,
752 * simply force to ignore pending or next update to give daemon a chance to
753 * realize the counter has changed.
754 */
755static void
756ffclock_change_tc(struct timehands *th)
757{
758	struct fftimehands *ffth;
759	struct ffclock_estimate *cest;
760	struct timecounter *tc;
761	uint8_t ogen;
762
763	tc = th->th_counter;
764	ffth = fftimehands->next;
765	ogen = ffth->gen;
766	ffth->gen = 0;
767
768	cest = &ffth->cest;
769	bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate));
770	cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1;
771	cest->errb_abs = 0;
772	cest->errb_rate = 0;
773	cest->status |= FFCLOCK_STA_UNSYNC;
774
775	ffth->tick_ffcount = fftimehands->tick_ffcount;
776	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
777	ffth->tick_time = fftimehands->tick_time;
778	ffth->period_lerp = cest->period;
779
780	/* Do not lock but ignore next update from synchronization daemon. */
781	ffclock_updated--;
782
783	if (++ogen == 0)
784		ogen = 1;
785	ffth->gen = ogen;
786	fftimehands = ffth;
787}
788
789/*
790 * Retrieve feed-forward counter and time of last kernel tick.
791 */
792void
793ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags)
794{
795	struct fftimehands *ffth;
796	uint8_t gen;
797
798	/*
799	 * No locking but check generation has not changed. Also need to make
800	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
801	 */
802	do {
803		ffth = fftimehands;
804		gen = ffth->gen;
805		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP)
806			*bt = ffth->tick_time_lerp;
807		else
808			*bt = ffth->tick_time;
809		*ffcount = ffth->tick_ffcount;
810	} while (gen == 0 || gen != ffth->gen);
811}
812
813/*
814 * Absolute clock conversion. Low level function to convert ffcounter to
815 * bintime. The ffcounter is converted using the current ffclock period estimate
816 * or the "interpolated period" to ensure monotonicity.
817 * NOTE: this conversion may have been deferred, and the clock updated since the
818 * hardware counter has been read.
819 */
820void
821ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags)
822{
823	struct fftimehands *ffth;
824	struct bintime bt2;
825	ffcounter ffdelta;
826	uint8_t gen;
827
828	/*
829	 * No locking but check generation has not changed. Also need to make
830	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
831	 */
832	do {
833		ffth = fftimehands;
834		gen = ffth->gen;
835		if (ffcount > ffth->tick_ffcount)
836			ffdelta = ffcount - ffth->tick_ffcount;
837		else
838			ffdelta = ffth->tick_ffcount - ffcount;
839
840		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) {
841			*bt = ffth->tick_time_lerp;
842			ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2);
843		} else {
844			*bt = ffth->tick_time;
845			ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2);
846		}
847
848		if (ffcount > ffth->tick_ffcount)
849			bintime_add(bt, &bt2);
850		else
851			bintime_sub(bt, &bt2);
852	} while (gen == 0 || gen != ffth->gen);
853}
854
855/*
856 * Difference clock conversion.
857 * Low level function to Convert a time interval measured in RAW counter units
858 * into bintime. The difference clock allows measuring small intervals much more
859 * reliably than the absolute clock.
860 */
861void
862ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt)
863{
864	struct fftimehands *ffth;
865	uint8_t gen;
866
867	/* No locking but check generation has not changed. */
868	do {
869		ffth = fftimehands;
870		gen = ffth->gen;
871		ffclock_convert_delta(ffdelta, ffth->cest.period, bt);
872	} while (gen == 0 || gen != ffth->gen);
873}
874
875/*
876 * Access to current ffcounter value.
877 */
878void
879ffclock_read_counter(ffcounter *ffcount)
880{
881	struct timehands *th;
882	struct fftimehands *ffth;
883	unsigned int gen, delta;
884
885	/*
886	 * ffclock_windup() called from tc_windup(), safe to rely on
887	 * th->th_generation only, for correct delta and ffcounter.
888	 */
889	do {
890		th = timehands;
891		gen = atomic_load_acq_int(&th->th_generation);
892		ffth = fftimehands;
893		delta = tc_delta(th);
894		*ffcount = ffth->tick_ffcount;
895		atomic_thread_fence_acq();
896	} while (gen == 0 || gen != th->th_generation);
897
898	*ffcount += delta;
899}
900
901void
902binuptime(struct bintime *bt)
903{
904
905	binuptime_fromclock(bt, sysclock_active);
906}
907
908void
909nanouptime(struct timespec *tsp)
910{
911
912	nanouptime_fromclock(tsp, sysclock_active);
913}
914
915void
916microuptime(struct timeval *tvp)
917{
918
919	microuptime_fromclock(tvp, sysclock_active);
920}
921
922void
923bintime(struct bintime *bt)
924{
925
926	bintime_fromclock(bt, sysclock_active);
927}
928
929void
930nanotime(struct timespec *tsp)
931{
932
933	nanotime_fromclock(tsp, sysclock_active);
934}
935
936void
937microtime(struct timeval *tvp)
938{
939
940	microtime_fromclock(tvp, sysclock_active);
941}
942
943void
944getbinuptime(struct bintime *bt)
945{
946
947	getbinuptime_fromclock(bt, sysclock_active);
948}
949
950void
951getnanouptime(struct timespec *tsp)
952{
953
954	getnanouptime_fromclock(tsp, sysclock_active);
955}
956
957void
958getmicrouptime(struct timeval *tvp)
959{
960
961	getmicrouptime_fromclock(tvp, sysclock_active);
962}
963
964void
965getbintime(struct bintime *bt)
966{
967
968	getbintime_fromclock(bt, sysclock_active);
969}
970
971void
972getnanotime(struct timespec *tsp)
973{
974
975	getnanotime_fromclock(tsp, sysclock_active);
976}
977
978void
979getmicrotime(struct timeval *tvp)
980{
981
982	getmicrouptime_fromclock(tvp, sysclock_active);
983}
984
985#endif /* FFCLOCK */
986
987/*
988 * This is a clone of getnanotime and used for walltimestamps.
989 * The dtrace_ prefix prevents fbt from creating probes for
990 * it so walltimestamp can be safely used in all fbt probes.
991 */
992void
993dtrace_getnanotime(struct timespec *tsp)
994{
995
996	GETTHMEMBER(tsp, th_nanotime);
997}
998
999/*
1000 * System clock currently providing time to the system. Modifiable via sysctl
1001 * when the FFCLOCK option is defined.
1002 */
1003int sysclock_active = SYSCLOCK_FBCK;
1004
1005/* Internal NTP status and error estimates. */
1006extern int time_status;
1007extern long time_esterror;
1008
1009/*
1010 * Take a snapshot of sysclock data which can be used to compare system clocks
1011 * and generate timestamps after the fact.
1012 */
1013void
1014sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
1015{
1016	struct fbclock_info *fbi;
1017	struct timehands *th;
1018	struct bintime bt;
1019	unsigned int delta, gen;
1020#ifdef FFCLOCK
1021	ffcounter ffcount;
1022	struct fftimehands *ffth;
1023	struct ffclock_info *ffi;
1024	struct ffclock_estimate cest;
1025
1026	ffi = &clock_snap->ff_info;
1027#endif
1028
1029	fbi = &clock_snap->fb_info;
1030	delta = 0;
1031
1032	do {
1033		th = timehands;
1034		gen = atomic_load_acq_int(&th->th_generation);
1035		fbi->th_scale = th->th_scale;
1036		fbi->tick_time = th->th_offset;
1037#ifdef FFCLOCK
1038		ffth = fftimehands;
1039		ffi->tick_time = ffth->tick_time_lerp;
1040		ffi->tick_time_lerp = ffth->tick_time_lerp;
1041		ffi->period = ffth->cest.period;
1042		ffi->period_lerp = ffth->period_lerp;
1043		clock_snap->ffcount = ffth->tick_ffcount;
1044		cest = ffth->cest;
1045#endif
1046		if (!fast)
1047			delta = tc_delta(th);
1048		atomic_thread_fence_acq();
1049	} while (gen == 0 || gen != th->th_generation);
1050
1051	clock_snap->delta = delta;
1052	clock_snap->sysclock_active = sysclock_active;
1053
1054	/* Record feedback clock status and error. */
1055	clock_snap->fb_info.status = time_status;
1056	/* XXX: Very crude estimate of feedback clock error. */
1057	bt.sec = time_esterror / 1000000;
1058	bt.frac = ((time_esterror - bt.sec) * 1000000) *
1059	    (uint64_t)18446744073709ULL;
1060	clock_snap->fb_info.error = bt;
1061
1062#ifdef FFCLOCK
1063	if (!fast)
1064		clock_snap->ffcount += delta;
1065
1066	/* Record feed-forward clock leap second adjustment. */
1067	ffi->leapsec_adjustment = cest.leapsec_total;
1068	if (clock_snap->ffcount > cest.leapsec_next)
1069		ffi->leapsec_adjustment -= cest.leapsec;
1070
1071	/* Record feed-forward clock status and error. */
1072	clock_snap->ff_info.status = cest.status;
1073	ffcount = clock_snap->ffcount - cest.update_ffcount;
1074	ffclock_convert_delta(ffcount, cest.period, &bt);
1075	/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */
1076	bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL);
1077	/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */
1078	bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL);
1079	clock_snap->ff_info.error = bt;
1080#endif
1081}
1082
1083/*
1084 * Convert a sysclock snapshot into a struct bintime based on the specified
1085 * clock source and flags.
1086 */
1087int
1088sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt,
1089    int whichclock, uint32_t flags)
1090{
1091	struct bintime boottimebin;
1092#ifdef FFCLOCK
1093	struct bintime bt2;
1094	uint64_t period;
1095#endif
1096
1097	switch (whichclock) {
1098	case SYSCLOCK_FBCK:
1099		*bt = cs->fb_info.tick_time;
1100
1101		/* If snapshot was created with !fast, delta will be >0. */
1102		if (cs->delta > 0)
1103			bintime_addx(bt, cs->fb_info.th_scale * cs->delta);
1104
1105		if ((flags & FBCLOCK_UPTIME) == 0) {
1106			getboottimebin(&boottimebin);
1107			bintime_add(bt, &boottimebin);
1108		}
1109		break;
1110#ifdef FFCLOCK
1111	case SYSCLOCK_FFWD:
1112		if (flags & FFCLOCK_LERP) {
1113			*bt = cs->ff_info.tick_time_lerp;
1114			period = cs->ff_info.period_lerp;
1115		} else {
1116			*bt = cs->ff_info.tick_time;
1117			period = cs->ff_info.period;
1118		}
1119
1120		/* If snapshot was created with !fast, delta will be >0. */
1121		if (cs->delta > 0) {
1122			ffclock_convert_delta(cs->delta, period, &bt2);
1123			bintime_add(bt, &bt2);
1124		}
1125
1126		/* Leap second adjustment. */
1127		if (flags & FFCLOCK_LEAPSEC)
1128			bt->sec -= cs->ff_info.leapsec_adjustment;
1129
1130		/* Boot time adjustment, for uptime/monotonic clocks. */
1131		if (flags & FFCLOCK_UPTIME)
1132			bintime_sub(bt, &ffclock_boottime);
1133		break;
1134#endif
1135	default:
1136		return (EINVAL);
1137		break;
1138	}
1139
1140	return (0);
1141}
1142
1143/*
1144 * Initialize a new timecounter and possibly use it.
1145 */
1146void
1147tc_init(struct timecounter *tc)
1148{
1149	u_int u;
1150	struct sysctl_oid *tc_root;
1151
1152	u = tc->tc_frequency / tc->tc_counter_mask;
1153	/* XXX: We need some margin here, 10% is a guess */
1154	u *= 11;
1155	u /= 10;
1156	if (u > hz && tc->tc_quality >= 0) {
1157		tc->tc_quality = -2000;
1158		if (bootverbose) {
1159			printf("Timecounter \"%s\" frequency %ju Hz",
1160			    tc->tc_name, (uintmax_t)tc->tc_frequency);
1161			printf(" -- Insufficient hz, needs at least %u\n", u);
1162		}
1163	} else if (tc->tc_quality >= 0 || bootverbose) {
1164		printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
1165		    tc->tc_name, (uintmax_t)tc->tc_frequency,
1166		    tc->tc_quality);
1167	}
1168
1169	tc->tc_next = timecounters;
1170	timecounters = tc;
1171	/*
1172	 * Set up sysctl tree for this counter.
1173	 */
1174	tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL,
1175	    SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
1176	    CTLFLAG_RW, 0, "timecounter description", "timecounter");
1177	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1178	    "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
1179	    "mask for implemented bits");
1180	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1181	    "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc),
1182	    sysctl_kern_timecounter_get, "IU", "current timecounter value");
1183	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1184	    "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc),
1185	     sysctl_kern_timecounter_freq, "QU", "timecounter frequency");
1186	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1187	    "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
1188	    "goodness of time counter");
1189	/*
1190	 * Do not automatically switch if the current tc was specifically
1191	 * chosen.  Never automatically use a timecounter with negative quality.
1192	 * Even though we run on the dummy counter, switching here may be
1193	 * worse since this timecounter may not be monotonic.
1194	 */
1195	if (tc_chosen)
1196		return;
1197	if (tc->tc_quality < 0)
1198		return;
1199	if (tc_from_tunable[0] != '\0' &&
1200	    strcmp(tc->tc_name, tc_from_tunable) == 0) {
1201		tc_chosen = 1;
1202		tc_from_tunable[0] = '\0';
1203	} else {
1204		if (tc->tc_quality < timecounter->tc_quality)
1205			return;
1206		if (tc->tc_quality == timecounter->tc_quality &&
1207		    tc->tc_frequency < timecounter->tc_frequency)
1208			return;
1209	}
1210	(void)tc->tc_get_timecount(tc);
1211	timecounter = tc;
1212}
1213
1214/* Report the frequency of the current timecounter. */
1215uint64_t
1216tc_getfrequency(void)
1217{
1218
1219	return (timehands->th_counter->tc_frequency);
1220}
1221
1222static bool
1223sleeping_on_old_rtc(struct thread *td)
1224{
1225
1226	/*
1227	 * td_rtcgen is modified by curthread when it is running,
1228	 * and by other threads in this function.  By finding the thread
1229	 * on a sleepqueue and holding the lock on the sleepqueue
1230	 * chain, we guarantee that the thread is not running and that
1231	 * modifying td_rtcgen is safe.  Setting td_rtcgen to zero informs
1232	 * the thread that it was woken due to a real-time clock adjustment.
1233	 * (The declaration of td_rtcgen refers to this comment.)
1234	 */
1235	if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) {
1236		td->td_rtcgen = 0;
1237		return (true);
1238	}
1239	return (false);
1240}
1241
1242static struct mtx tc_setclock_mtx;
1243MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
1244
1245/*
1246 * Step our concept of UTC.  This is done by modifying our estimate of
1247 * when we booted.
1248 */
1249void
1250tc_setclock(struct timespec *ts)
1251{
1252	struct timespec tbef, taft;
1253	struct bintime bt, bt2;
1254
1255	timespec2bintime(ts, &bt);
1256	nanotime(&tbef);
1257	mtx_lock_spin(&tc_setclock_mtx);
1258	cpu_tick_calibrate(1);
1259	binuptime(&bt2);
1260	bintime_sub(&bt, &bt2);
1261
1262	/* XXX fiddle all the little crinkly bits around the fiords... */
1263	tc_windup(&bt);
1264	mtx_unlock_spin(&tc_setclock_mtx);
1265
1266	/* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */
1267	atomic_add_rel_int(&rtc_generation, 2);
1268	sleepq_chains_remove_matching(sleeping_on_old_rtc);
1269	if (timestepwarnings) {
1270		nanotime(&taft);
1271		log(LOG_INFO,
1272		    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
1273		    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
1274		    (intmax_t)taft.tv_sec, taft.tv_nsec,
1275		    (intmax_t)ts->tv_sec, ts->tv_nsec);
1276	}
1277}
1278
1279/*
1280 * Initialize the next struct timehands in the ring and make
1281 * it the active timehands.  Along the way we might switch to a different
1282 * timecounter and/or do seconds processing in NTP.  Slightly magic.
1283 */
1284static void
1285tc_windup(struct bintime *new_boottimebin)
1286{
1287	struct bintime bt;
1288	struct timehands *th, *tho;
1289	uint64_t scale;
1290	u_int delta, ncount, ogen;
1291	int i;
1292	time_t t;
1293
1294	/*
1295	 * Make the next timehands a copy of the current one, but do
1296	 * not overwrite the generation or next pointer.  While we
1297	 * update the contents, the generation must be zero.  We need
1298	 * to ensure that the zero generation is visible before the
1299	 * data updates become visible, which requires release fence.
1300	 * For similar reasons, re-reading of the generation after the
1301	 * data is read should use acquire fence.
1302	 */
1303	tho = timehands;
1304	th = tho->th_next;
1305	ogen = th->th_generation;
1306	th->th_generation = 0;
1307	atomic_thread_fence_rel();
1308	memcpy(th, tho, offsetof(struct timehands, th_generation));
1309	if (new_boottimebin != NULL)
1310		th->th_boottime = *new_boottimebin;
1311
1312	/*
1313	 * Capture a timecounter delta on the current timecounter and if
1314	 * changing timecounters, a counter value from the new timecounter.
1315	 * Update the offset fields accordingly.
1316	 */
1317	delta = tc_delta(th);
1318	if (th->th_counter != timecounter)
1319		ncount = timecounter->tc_get_timecount(timecounter);
1320	else
1321		ncount = 0;
1322#ifdef FFCLOCK
1323	ffclock_windup(delta);
1324#endif
1325	th->th_offset_count += delta;
1326	th->th_offset_count &= th->th_counter->tc_counter_mask;
1327	while (delta > th->th_counter->tc_frequency) {
1328		/* Eat complete unadjusted seconds. */
1329		delta -= th->th_counter->tc_frequency;
1330		th->th_offset.sec++;
1331	}
1332	if ((delta > th->th_counter->tc_frequency / 2) &&
1333	    (th->th_scale * delta < ((uint64_t)1 << 63))) {
1334		/* The product th_scale * delta just barely overflows. */
1335		th->th_offset.sec++;
1336	}
1337	bintime_addx(&th->th_offset, th->th_scale * delta);
1338
1339	/*
1340	 * Hardware latching timecounters may not generate interrupts on
1341	 * PPS events, so instead we poll them.  There is a finite risk that
1342	 * the hardware might capture a count which is later than the one we
1343	 * got above, and therefore possibly in the next NTP second which might
1344	 * have a different rate than the current NTP second.  It doesn't
1345	 * matter in practice.
1346	 */
1347	if (tho->th_counter->tc_poll_pps)
1348		tho->th_counter->tc_poll_pps(tho->th_counter);
1349
1350	/*
1351	 * Deal with NTP second processing.  The for loop normally
1352	 * iterates at most once, but in extreme situations it might
1353	 * keep NTP sane if timeouts are not run for several seconds.
1354	 * At boot, the time step can be large when the TOD hardware
1355	 * has been read, so on really large steps, we call
1356	 * ntp_update_second only twice.  We need to call it twice in
1357	 * case we missed a leap second.
1358	 */
1359	bt = th->th_offset;
1360	bintime_add(&bt, &th->th_boottime);
1361	i = bt.sec - tho->th_microtime.tv_sec;
1362	if (i > LARGE_STEP)
1363		i = 2;
1364	for (; i > 0; i--) {
1365		t = bt.sec;
1366		ntp_update_second(&th->th_adjustment, &bt.sec);
1367		if (bt.sec != t)
1368			th->th_boottime.sec += bt.sec - t;
1369	}
1370	/* Update the UTC timestamps used by the get*() functions. */
1371	th->th_bintime = bt;
1372	bintime2timeval(&bt, &th->th_microtime);
1373	bintime2timespec(&bt, &th->th_nanotime);
1374
1375	/* Now is a good time to change timecounters. */
1376	if (th->th_counter != timecounter) {
1377#ifndef __arm__
1378		if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0)
1379			cpu_disable_c2_sleep++;
1380		if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0)
1381			cpu_disable_c2_sleep--;
1382#endif
1383		th->th_counter = timecounter;
1384		th->th_offset_count = ncount;
1385		tc_min_ticktock_freq = max(1, timecounter->tc_frequency /
1386		    (((uint64_t)timecounter->tc_counter_mask + 1) / 3));
1387#ifdef FFCLOCK
1388		ffclock_change_tc(th);
1389#endif
1390	}
1391
1392	/*-
1393	 * Recalculate the scaling factor.  We want the number of 1/2^64
1394	 * fractions of a second per period of the hardware counter, taking
1395	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
1396	 * processing provides us with.
1397	 *
1398	 * The th_adjustment is nanoseconds per second with 32 bit binary
1399	 * fraction and we want 64 bit binary fraction of second:
1400	 *
1401	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
1402	 *
1403	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
1404	 * we can only multiply by about 850 without overflowing, that
1405	 * leaves no suitably precise fractions for multiply before divide.
1406	 *
1407	 * Divide before multiply with a fraction of 2199/512 results in a
1408	 * systematic undercompensation of 10PPM of th_adjustment.  On a
1409	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
1410 	 *
1411	 * We happily sacrifice the lowest of the 64 bits of our result
1412	 * to the goddess of code clarity.
1413	 *
1414	 */
1415	scale = (uint64_t)1 << 63;
1416	scale += (th->th_adjustment / 1024) * 2199;
1417	scale /= th->th_counter->tc_frequency;
1418	th->th_scale = scale * 2;
1419	th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX);
1420
1421	/*
1422	 * Now that the struct timehands is again consistent, set the new
1423	 * generation number, making sure to not make it zero.
1424	 */
1425	if (++ogen == 0)
1426		ogen = 1;
1427	atomic_store_rel_int(&th->th_generation, ogen);
1428
1429	/* Go live with the new struct timehands. */
1430#ifdef FFCLOCK
1431	switch (sysclock_active) {
1432	case SYSCLOCK_FBCK:
1433#endif
1434		time_second = th->th_microtime.tv_sec;
1435		time_uptime = th->th_offset.sec;
1436#ifdef FFCLOCK
1437		break;
1438	case SYSCLOCK_FFWD:
1439		time_second = fftimehands->tick_time_lerp.sec;
1440		time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec;
1441		break;
1442	}
1443#endif
1444
1445	timehands = th;
1446	timekeep_push_vdso();
1447}
1448
1449/* Report or change the active timecounter hardware. */
1450static int
1451sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
1452{
1453	char newname[32];
1454	struct timecounter *newtc, *tc;
1455	int error;
1456
1457	tc = timecounter;
1458	strlcpy(newname, tc->tc_name, sizeof(newname));
1459
1460	error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
1461	if (error != 0 || req->newptr == NULL)
1462		return (error);
1463	/* Record that the tc in use now was specifically chosen. */
1464	tc_chosen = 1;
1465	if (strcmp(newname, tc->tc_name) == 0)
1466		return (0);
1467	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
1468		if (strcmp(newname, newtc->tc_name) != 0)
1469			continue;
1470
1471		/* Warm up new timecounter. */
1472		(void)newtc->tc_get_timecount(newtc);
1473
1474		timecounter = newtc;
1475
1476		/*
1477		 * The vdso timehands update is deferred until the next
1478		 * 'tc_windup()'.
1479		 *
1480		 * This is prudent given that 'timekeep_push_vdso()' does not
1481		 * use any locking and that it can be called in hard interrupt
1482		 * context via 'tc_windup()'.
1483		 */
1484		return (0);
1485	}
1486	return (EINVAL);
1487}
1488
1489SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware,
1490    CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 0, 0,
1491    sysctl_kern_timecounter_hardware, "A",
1492    "Timecounter hardware selected");
1493
1494
1495/* Report the available timecounter hardware. */
1496static int
1497sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
1498{
1499	struct sbuf sb;
1500	struct timecounter *tc;
1501	int error;
1502
1503	sbuf_new_for_sysctl(&sb, NULL, 0, req);
1504	for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
1505		if (tc != timecounters)
1506			sbuf_putc(&sb, ' ');
1507		sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality);
1508	}
1509	error = sbuf_finish(&sb);
1510	sbuf_delete(&sb);
1511	return (error);
1512}
1513
1514SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD,
1515    0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected");
1516
1517/*
1518 * RFC 2783 PPS-API implementation.
1519 */
1520
1521/*
1522 *  Return true if the driver is aware of the abi version extensions in the
1523 *  pps_state structure, and it supports at least the given abi version number.
1524 */
1525static inline int
1526abi_aware(struct pps_state *pps, int vers)
1527{
1528
1529	return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers);
1530}
1531
1532static int
1533pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps)
1534{
1535	int err, timo;
1536	pps_seq_t aseq, cseq;
1537	struct timeval tv;
1538
1539	if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
1540		return (EINVAL);
1541
1542	/*
1543	 * If no timeout is requested, immediately return whatever values were
1544	 * most recently captured.  If timeout seconds is -1, that's a request
1545	 * to block without a timeout.  WITNESS won't let us sleep forever
1546	 * without a lock (we really don't need a lock), so just repeatedly
1547	 * sleep a long time.
1548	 */
1549	if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) {
1550		if (fapi->timeout.tv_sec == -1)
1551			timo = 0x7fffffff;
1552		else {
1553			tv.tv_sec = fapi->timeout.tv_sec;
1554			tv.tv_usec = fapi->timeout.tv_nsec / 1000;
1555			timo = tvtohz(&tv);
1556		}
1557		aseq = atomic_load_int(&pps->ppsinfo.assert_sequence);
1558		cseq = atomic_load_int(&pps->ppsinfo.clear_sequence);
1559		while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) &&
1560		    cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) {
1561			if (abi_aware(pps, 1) && pps->driver_mtx != NULL) {
1562				if (pps->flags & PPSFLAG_MTX_SPIN) {
1563					err = msleep_spin(pps, pps->driver_mtx,
1564					    "ppsfch", timo);
1565				} else {
1566					err = msleep(pps, pps->driver_mtx, PCATCH,
1567					    "ppsfch", timo);
1568				}
1569			} else {
1570				err = tsleep(pps, PCATCH, "ppsfch", timo);
1571			}
1572			if (err == EWOULDBLOCK) {
1573				if (fapi->timeout.tv_sec == -1) {
1574					continue;
1575				} else {
1576					return (ETIMEDOUT);
1577				}
1578			} else if (err != 0) {
1579				return (err);
1580			}
1581		}
1582	}
1583
1584	pps->ppsinfo.current_mode = pps->ppsparam.mode;
1585	fapi->pps_info_buf = pps->ppsinfo;
1586
1587	return (0);
1588}
1589
1590int
1591pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
1592{
1593	pps_params_t *app;
1594	struct pps_fetch_args *fapi;
1595#ifdef FFCLOCK
1596	struct pps_fetch_ffc_args *fapi_ffc;
1597#endif
1598#ifdef PPS_SYNC
1599	struct pps_kcbind_args *kapi;
1600#endif
1601
1602	KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
1603	switch (cmd) {
1604	case PPS_IOC_CREATE:
1605		return (0);
1606	case PPS_IOC_DESTROY:
1607		return (0);
1608	case PPS_IOC_SETPARAMS:
1609		app = (pps_params_t *)data;
1610		if (app->mode & ~pps->ppscap)
1611			return (EINVAL);
1612#ifdef FFCLOCK
1613		/* Ensure only a single clock is selected for ffc timestamp. */
1614		if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK)
1615			return (EINVAL);
1616#endif
1617		pps->ppsparam = *app;
1618		return (0);
1619	case PPS_IOC_GETPARAMS:
1620		app = (pps_params_t *)data;
1621		*app = pps->ppsparam;
1622		app->api_version = PPS_API_VERS_1;
1623		return (0);
1624	case PPS_IOC_GETCAP:
1625		*(int*)data = pps->ppscap;
1626		return (0);
1627	case PPS_IOC_FETCH:
1628		fapi = (struct pps_fetch_args *)data;
1629		return (pps_fetch(fapi, pps));
1630#ifdef FFCLOCK
1631	case PPS_IOC_FETCH_FFCOUNTER:
1632		fapi_ffc = (struct pps_fetch_ffc_args *)data;
1633		if (fapi_ffc->tsformat && fapi_ffc->tsformat !=
1634		    PPS_TSFMT_TSPEC)
1635			return (EINVAL);
1636		if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec)
1637			return (EOPNOTSUPP);
1638		pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode;
1639		fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc;
1640		/* Overwrite timestamps if feedback clock selected. */
1641		switch (pps->ppsparam.mode & PPS_TSCLK_MASK) {
1642		case PPS_TSCLK_FBCK:
1643			fapi_ffc->pps_info_buf_ffc.assert_timestamp =
1644			    pps->ppsinfo.assert_timestamp;
1645			fapi_ffc->pps_info_buf_ffc.clear_timestamp =
1646			    pps->ppsinfo.clear_timestamp;
1647			break;
1648		case PPS_TSCLK_FFWD:
1649			break;
1650		default:
1651			break;
1652		}
1653		return (0);
1654#endif /* FFCLOCK */
1655	case PPS_IOC_KCBIND:
1656#ifdef PPS_SYNC
1657		kapi = (struct pps_kcbind_args *)data;
1658		/* XXX Only root should be able to do this */
1659		if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
1660			return (EINVAL);
1661		if (kapi->kernel_consumer != PPS_KC_HARDPPS)
1662			return (EINVAL);
1663		if (kapi->edge & ~pps->ppscap)
1664			return (EINVAL);
1665		pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) |
1666		    (pps->kcmode & KCMODE_ABIFLAG);
1667		return (0);
1668#else
1669		return (EOPNOTSUPP);
1670#endif
1671	default:
1672		return (ENOIOCTL);
1673	}
1674}
1675
1676void
1677pps_init(struct pps_state *pps)
1678{
1679	pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT;
1680	if (pps->ppscap & PPS_CAPTUREASSERT)
1681		pps->ppscap |= PPS_OFFSETASSERT;
1682	if (pps->ppscap & PPS_CAPTURECLEAR)
1683		pps->ppscap |= PPS_OFFSETCLEAR;
1684#ifdef FFCLOCK
1685	pps->ppscap |= PPS_TSCLK_MASK;
1686#endif
1687	pps->kcmode &= ~KCMODE_ABIFLAG;
1688}
1689
1690void
1691pps_init_abi(struct pps_state *pps)
1692{
1693
1694	pps_init(pps);
1695	if (pps->driver_abi > 0) {
1696		pps->kcmode |= KCMODE_ABIFLAG;
1697		pps->kernel_abi = PPS_ABI_VERSION;
1698	}
1699}
1700
1701void
1702pps_capture(struct pps_state *pps)
1703{
1704	struct timehands *th;
1705
1706	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
1707	th = timehands;
1708	pps->capgen = atomic_load_acq_int(&th->th_generation);
1709	pps->capth = th;
1710#ifdef FFCLOCK
1711	pps->capffth = fftimehands;
1712#endif
1713	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
1714	atomic_thread_fence_acq();
1715	if (pps->capgen != th->th_generation)
1716		pps->capgen = 0;
1717}
1718
1719void
1720pps_event(struct pps_state *pps, int event)
1721{
1722	struct bintime bt;
1723	struct timespec ts, *tsp, *osp;
1724	u_int tcount, *pcount;
1725	int foff;
1726	pps_seq_t *pseq;
1727#ifdef FFCLOCK
1728	struct timespec *tsp_ffc;
1729	pps_seq_t *pseq_ffc;
1730	ffcounter *ffcount;
1731#endif
1732#ifdef PPS_SYNC
1733	int fhard;
1734#endif
1735
1736	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
1737	/* Nothing to do if not currently set to capture this event type. */
1738	if ((event & pps->ppsparam.mode) == 0)
1739		return;
1740	/* If the timecounter was wound up underneath us, bail out. */
1741	if (pps->capgen == 0 || pps->capgen !=
1742	    atomic_load_acq_int(&pps->capth->th_generation))
1743		return;
1744
1745	/* Things would be easier with arrays. */
1746	if (event == PPS_CAPTUREASSERT) {
1747		tsp = &pps->ppsinfo.assert_timestamp;
1748		osp = &pps->ppsparam.assert_offset;
1749		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1750#ifdef PPS_SYNC
1751		fhard = pps->kcmode & PPS_CAPTUREASSERT;
1752#endif
1753		pcount = &pps->ppscount[0];
1754		pseq = &pps->ppsinfo.assert_sequence;
1755#ifdef FFCLOCK
1756		ffcount = &pps->ppsinfo_ffc.assert_ffcount;
1757		tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp;
1758		pseq_ffc = &pps->ppsinfo_ffc.assert_sequence;
1759#endif
1760	} else {
1761		tsp = &pps->ppsinfo.clear_timestamp;
1762		osp = &pps->ppsparam.clear_offset;
1763		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1764#ifdef PPS_SYNC
1765		fhard = pps->kcmode & PPS_CAPTURECLEAR;
1766#endif
1767		pcount = &pps->ppscount[1];
1768		pseq = &pps->ppsinfo.clear_sequence;
1769#ifdef FFCLOCK
1770		ffcount = &pps->ppsinfo_ffc.clear_ffcount;
1771		tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp;
1772		pseq_ffc = &pps->ppsinfo_ffc.clear_sequence;
1773#endif
1774	}
1775
1776	/*
1777	 * If the timecounter changed, we cannot compare the count values, so
1778	 * we have to drop the rest of the PPS-stuff until the next event.
1779	 */
1780	if (pps->ppstc != pps->capth->th_counter) {
1781		pps->ppstc = pps->capth->th_counter;
1782		*pcount = pps->capcount;
1783		pps->ppscount[2] = pps->capcount;
1784		return;
1785	}
1786
1787	/* Convert the count to a timespec. */
1788	tcount = pps->capcount - pps->capth->th_offset_count;
1789	tcount &= pps->capth->th_counter->tc_counter_mask;
1790	bt = pps->capth->th_bintime;
1791	bintime_addx(&bt, pps->capth->th_scale * tcount);
1792	bintime2timespec(&bt, &ts);
1793
1794	/* If the timecounter was wound up underneath us, bail out. */
1795	atomic_thread_fence_acq();
1796	if (pps->capgen != pps->capth->th_generation)
1797		return;
1798
1799	*pcount = pps->capcount;
1800	(*pseq)++;
1801	*tsp = ts;
1802
1803	if (foff) {
1804		timespecadd(tsp, osp, tsp);
1805		if (tsp->tv_nsec < 0) {
1806			tsp->tv_nsec += 1000000000;
1807			tsp->tv_sec -= 1;
1808		}
1809	}
1810
1811#ifdef FFCLOCK
1812	*ffcount = pps->capffth->tick_ffcount + tcount;
1813	bt = pps->capffth->tick_time;
1814	ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt);
1815	bintime_add(&bt, &pps->capffth->tick_time);
1816	bintime2timespec(&bt, &ts);
1817	(*pseq_ffc)++;
1818	*tsp_ffc = ts;
1819#endif
1820
1821#ifdef PPS_SYNC
1822	if (fhard) {
1823		uint64_t scale;
1824
1825		/*
1826		 * Feed the NTP PLL/FLL.
1827		 * The FLL wants to know how many (hardware) nanoseconds
1828		 * elapsed since the previous event.
1829		 */
1830		tcount = pps->capcount - pps->ppscount[2];
1831		pps->ppscount[2] = pps->capcount;
1832		tcount &= pps->capth->th_counter->tc_counter_mask;
1833		scale = (uint64_t)1 << 63;
1834		scale /= pps->capth->th_counter->tc_frequency;
1835		scale *= 2;
1836		bt.sec = 0;
1837		bt.frac = 0;
1838		bintime_addx(&bt, scale * tcount);
1839		bintime2timespec(&bt, &ts);
1840		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
1841	}
1842#endif
1843
1844	/* Wakeup anyone sleeping in pps_fetch().  */
1845	wakeup(pps);
1846}
1847
1848/*
1849 * Timecounters need to be updated every so often to prevent the hardware
1850 * counter from overflowing.  Updating also recalculates the cached values
1851 * used by the get*() family of functions, so their precision depends on
1852 * the update frequency.
1853 */
1854
1855static int tc_tick;
1856SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
1857    "Approximate number of hardclock ticks in a millisecond");
1858
1859void
1860tc_ticktock(int cnt)
1861{
1862	static int count;
1863
1864	if (mtx_trylock_spin(&tc_setclock_mtx)) {
1865		count += cnt;
1866		if (count >= tc_tick) {
1867			count = 0;
1868			tc_windup(NULL);
1869		}
1870		mtx_unlock_spin(&tc_setclock_mtx);
1871	}
1872}
1873
1874static void __inline
1875tc_adjprecision(void)
1876{
1877	int t;
1878
1879	if (tc_timepercentage > 0) {
1880		t = (99 + tc_timepercentage) / tc_timepercentage;
1881		tc_precexp = fls(t + (t >> 1)) - 1;
1882		FREQ2BT(hz / tc_tick, &bt_timethreshold);
1883		FREQ2BT(hz, &bt_tickthreshold);
1884		bintime_shift(&bt_timethreshold, tc_precexp);
1885		bintime_shift(&bt_tickthreshold, tc_precexp);
1886	} else {
1887		tc_precexp = 31;
1888		bt_timethreshold.sec = INT_MAX;
1889		bt_timethreshold.frac = ~(uint64_t)0;
1890		bt_tickthreshold = bt_timethreshold;
1891	}
1892	sbt_timethreshold = bttosbt(bt_timethreshold);
1893	sbt_tickthreshold = bttosbt(bt_tickthreshold);
1894}
1895
1896static int
1897sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS)
1898{
1899	int error, val;
1900
1901	val = tc_timepercentage;
1902	error = sysctl_handle_int(oidp, &val, 0, req);
1903	if (error != 0 || req->newptr == NULL)
1904		return (error);
1905	tc_timepercentage = val;
1906	if (cold)
1907		goto done;
1908	tc_adjprecision();
1909done:
1910	return (0);
1911}
1912
1913/* Set up the requested number of timehands. */
1914static void
1915inittimehands(void *dummy)
1916{
1917	struct timehands *thp;
1918	int i;
1919
1920	TUNABLE_INT_FETCH("kern.timecounter.timehands_count",
1921	    &timehands_count);
1922	if (timehands_count < 1)
1923		timehands_count = 1;
1924	if (timehands_count > nitems(ths))
1925		timehands_count = nitems(ths);
1926	for (i = 1, thp = &ths[0]; i < timehands_count;  thp = &ths[i++])
1927		thp->th_next = &ths[i];
1928	thp->th_next = &ths[0];
1929
1930	TUNABLE_STR_FETCH("kern.timecounter.hardware", tc_from_tunable,
1931	    sizeof(tc_from_tunable));
1932}
1933SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL);
1934
1935static void
1936inittimecounter(void *dummy)
1937{
1938	u_int p;
1939	int tick_rate;
1940
1941	/*
1942	 * Set the initial timeout to
1943	 * max(1, <approx. number of hardclock ticks in a millisecond>).
1944	 * People should probably not use the sysctl to set the timeout
1945	 * to smaller than its initial value, since that value is the
1946	 * smallest reasonable one.  If they want better timestamps they
1947	 * should use the non-"get"* functions.
1948	 */
1949	if (hz > 1000)
1950		tc_tick = (hz + 500) / 1000;
1951	else
1952		tc_tick = 1;
1953	tc_adjprecision();
1954	FREQ2BT(hz, &tick_bt);
1955	tick_sbt = bttosbt(tick_bt);
1956	tick_rate = hz / tc_tick;
1957	FREQ2BT(tick_rate, &tc_tick_bt);
1958	tc_tick_sbt = bttosbt(tc_tick_bt);
1959	p = (tc_tick * 1000000) / hz;
1960	printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
1961
1962#ifdef FFCLOCK
1963	ffclock_init();
1964#endif
1965
1966	/* warm up new timecounter (again) and get rolling. */
1967	(void)timecounter->tc_get_timecount(timecounter);
1968	mtx_lock_spin(&tc_setclock_mtx);
1969	tc_windup(NULL);
1970	mtx_unlock_spin(&tc_setclock_mtx);
1971}
1972
1973SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
1974
1975/* Cpu tick handling -------------------------------------------------*/
1976
1977static int cpu_tick_variable;
1978static uint64_t	cpu_tick_frequency;
1979
1980DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base);
1981DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last);
1982
1983static uint64_t
1984tc_cpu_ticks(void)
1985{
1986	struct timecounter *tc;
1987	uint64_t res, *base;
1988	unsigned u, *last;
1989
1990	critical_enter();
1991	base = DPCPU_PTR(tc_cpu_ticks_base);
1992	last = DPCPU_PTR(tc_cpu_ticks_last);
1993	tc = timehands->th_counter;
1994	u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
1995	if (u < *last)
1996		*base += (uint64_t)tc->tc_counter_mask + 1;
1997	*last = u;
1998	res = u + *base;
1999	critical_exit();
2000	return (res);
2001}
2002
2003void
2004cpu_tick_calibration(void)
2005{
2006	static time_t last_calib;
2007
2008	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
2009		cpu_tick_calibrate(0);
2010		last_calib = time_uptime;
2011	}
2012}
2013
2014/*
2015 * This function gets called every 16 seconds on only one designated
2016 * CPU in the system from hardclock() via cpu_tick_calibration()().
2017 *
2018 * Whenever the real time clock is stepped we get called with reset=1
2019 * to make sure we handle suspend/resume and similar events correctly.
2020 */
2021
2022static void
2023cpu_tick_calibrate(int reset)
2024{
2025	static uint64_t c_last;
2026	uint64_t c_this, c_delta;
2027	static struct bintime  t_last;
2028	struct bintime t_this, t_delta;
2029	uint32_t divi;
2030
2031	if (reset) {
2032		/* The clock was stepped, abort & reset */
2033		t_last.sec = 0;
2034		return;
2035	}
2036
2037	/* we don't calibrate fixed rate cputicks */
2038	if (!cpu_tick_variable)
2039		return;
2040
2041	getbinuptime(&t_this);
2042	c_this = cpu_ticks();
2043	if (t_last.sec != 0) {
2044		c_delta = c_this - c_last;
2045		t_delta = t_this;
2046		bintime_sub(&t_delta, &t_last);
2047		/*
2048		 * Headroom:
2049		 * 	2^(64-20) / 16[s] =
2050		 * 	2^(44) / 16[s] =
2051		 * 	17.592.186.044.416 / 16 =
2052		 * 	1.099.511.627.776 [Hz]
2053		 */
2054		divi = t_delta.sec << 20;
2055		divi |= t_delta.frac >> (64 - 20);
2056		c_delta <<= 20;
2057		c_delta /= divi;
2058		if (c_delta > cpu_tick_frequency) {
2059			if (0 && bootverbose)
2060				printf("cpu_tick increased to %ju Hz\n",
2061				    c_delta);
2062			cpu_tick_frequency = c_delta;
2063		}
2064	}
2065	c_last = c_this;
2066	t_last = t_this;
2067}
2068
2069void
2070set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
2071{
2072
2073	if (func == NULL) {
2074		cpu_ticks = tc_cpu_ticks;
2075	} else {
2076		cpu_tick_frequency = freq;
2077		cpu_tick_variable = var;
2078		cpu_ticks = func;
2079	}
2080}
2081
2082uint64_t
2083cpu_tickrate(void)
2084{
2085
2086	if (cpu_ticks == tc_cpu_ticks)
2087		return (tc_getfrequency());
2088	return (cpu_tick_frequency);
2089}
2090
2091/*
2092 * We need to be slightly careful converting cputicks to microseconds.
2093 * There is plenty of margin in 64 bits of microseconds (half a million
2094 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
2095 * before divide conversion (to retain precision) we find that the
2096 * margin shrinks to 1.5 hours (one millionth of 146y).
2097 * With a three prong approach we never lose significant bits, no
2098 * matter what the cputick rate and length of timeinterval is.
2099 */
2100
2101uint64_t
2102cputick2usec(uint64_t tick)
2103{
2104
2105	if (tick > 18446744073709551LL)		/* floor(2^64 / 1000) */
2106		return (tick / (cpu_tickrate() / 1000000LL));
2107	else if (tick > 18446744073709LL)	/* floor(2^64 / 1000000) */
2108		return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
2109	else
2110		return ((tick * 1000000LL) / cpu_tickrate());
2111}
2112
2113cpu_tick_f	*cpu_ticks = tc_cpu_ticks;
2114
2115static int vdso_th_enable = 1;
2116static int
2117sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
2118{
2119	int old_vdso_th_enable, error;
2120
2121	old_vdso_th_enable = vdso_th_enable;
2122	error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
2123	if (error != 0)
2124		return (error);
2125	vdso_th_enable = old_vdso_th_enable;
2126	return (0);
2127}
2128SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
2129    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2130    NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
2131
2132uint32_t
2133tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
2134{
2135	struct timehands *th;
2136	uint32_t enabled;
2137
2138	th = timehands;
2139	vdso_th->th_scale = th->th_scale;
2140	vdso_th->th_offset_count = th->th_offset_count;
2141	vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
2142	vdso_th->th_offset = th->th_offset;
2143	vdso_th->th_boottime = th->th_boottime;
2144	if (th->th_counter->tc_fill_vdso_timehands != NULL) {
2145		enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th,
2146		    th->th_counter);
2147	} else
2148		enabled = 0;
2149	if (!vdso_th_enable)
2150		enabled = 0;
2151	return (enabled);
2152}
2153
2154#ifdef COMPAT_FREEBSD32
2155uint32_t
2156tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
2157{
2158	struct timehands *th;
2159	uint32_t enabled;
2160
2161	th = timehands;
2162	*(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
2163	vdso_th32->th_offset_count = th->th_offset_count;
2164	vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
2165	vdso_th32->th_offset.sec = th->th_offset.sec;
2166	*(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
2167	vdso_th32->th_boottime.sec = th->th_boottime.sec;
2168	*(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac;
2169	if (th->th_counter->tc_fill_vdso_timehands32 != NULL) {
2170		enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32,
2171		    th->th_counter);
2172	} else
2173		enabled = 0;
2174	if (!vdso_th_enable)
2175		enabled = 0;
2176	return (enabled);
2177}
2178#endif
2179
2180#include "opt_ddb.h"
2181#ifdef DDB
2182#include <ddb/ddb.h>
2183
2184DB_SHOW_COMMAND(timecounter, db_show_timecounter)
2185{
2186	struct timehands *th;
2187	struct timecounter *tc;
2188	u_int val1, val2;
2189
2190	th = timehands;
2191	tc = th->th_counter;
2192	val1 = tc->tc_get_timecount(tc);
2193	__compiler_membar();
2194	val2 = tc->tc_get_timecount(tc);
2195
2196	db_printf("timecounter %p %s\n", tc, tc->tc_name);
2197	db_printf("  mask %#x freq %ju qual %d flags %#x priv %p\n",
2198	    tc->tc_counter_mask, (uintmax_t)tc->tc_frequency, tc->tc_quality,
2199	    tc->tc_flags, tc->tc_priv);
2200	db_printf("  val %#x %#x\n", val1, val2);
2201	db_printf("timehands adj %#jx scale %#jx ldelta %d off_cnt %d gen %d\n",
2202	    (uintmax_t)th->th_adjustment, (uintmax_t)th->th_scale,
2203	    th->th_large_delta, th->th_offset_count, th->th_generation);
2204	db_printf("  offset %jd %jd boottime %jd %jd\n",
2205	    (intmax_t)th->th_offset.sec, (uintmax_t)th->th_offset.frac,
2206	    (intmax_t)th->th_boottime.sec, (uintmax_t)th->th_boottime.frac);
2207}
2208#endif
2209