kern_tc.c revision 237474
1/*-
2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
8 *
9 * Copyright (c) 2011 The FreeBSD Foundation
10 * All rights reserved.
11 *
12 * Portions of this software were developed by Julien Ridoux at the University
13 * of Melbourne under sponsorship from the FreeBSD Foundation.
14 */
15
16#include <sys/cdefs.h>
17__FBSDID("$FreeBSD: head/sys/kern/kern_tc.c 237474 2012-06-23 09:33:06Z kib $");
18
19#include "opt_compat.h"
20#include "opt_ntp.h"
21#include "opt_ffclock.h"
22
23#include <sys/param.h>
24#include <sys/kernel.h>
25#ifdef FFCLOCK
26#include <sys/lock.h>
27#include <sys/mutex.h>
28#endif
29#include <sys/sysctl.h>
30#include <sys/syslog.h>
31#include <sys/systm.h>
32#include <sys/timeffc.h>
33#include <sys/timepps.h>
34#include <sys/timetc.h>
35#include <sys/timex.h>
36#include <sys/vdso.h>
37
38/*
39 * A large step happens on boot.  This constant detects such steps.
40 * It is relatively small so that ntp_update_second gets called enough
41 * in the typical 'missed a couple of seconds' case, but doesn't loop
42 * forever when the time step is large.
43 */
44#define LARGE_STEP	200
45
46/*
47 * Implement a dummy timecounter which we can use until we get a real one
48 * in the air.  This allows the console and other early stuff to use
49 * time services.
50 */
51
52static u_int
53dummy_get_timecount(struct timecounter *tc)
54{
55	static u_int now;
56
57	return (++now);
58}
59
60static struct timecounter dummy_timecounter = {
61	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
62};
63
64struct timehands {
65	/* These fields must be initialized by the driver. */
66	struct timecounter	*th_counter;
67	int64_t			th_adjustment;
68	uint64_t		th_scale;
69	u_int	 		th_offset_count;
70	struct bintime		th_offset;
71	struct timeval		th_microtime;
72	struct timespec		th_nanotime;
73	/* Fields not to be copied in tc_windup start with th_generation. */
74	volatile u_int		th_generation;
75	struct timehands	*th_next;
76};
77
78static struct timehands th0;
79static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
80static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
81static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
82static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
83static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
84static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
85static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
86static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
87static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
88static struct timehands th0 = {
89	&dummy_timecounter,
90	0,
91	(uint64_t)-1 / 1000000,
92	0,
93	{1, 0},
94	{0, 0},
95	{0, 0},
96	1,
97	&th1
98};
99
100static struct timehands *volatile timehands = &th0;
101struct timecounter *timecounter = &dummy_timecounter;
102static struct timecounter *timecounters = &dummy_timecounter;
103
104int tc_min_ticktock_freq = 1;
105
106time_t time_second = 1;
107time_t time_uptime = 1;
108
109struct bintime boottimebin;
110struct timeval boottime;
111static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS);
112SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD,
113    NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime");
114
115SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
116static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, "");
117
118static int timestepwarnings;
119SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
120    &timestepwarnings, 0, "Log time steps");
121
122static void tc_windup(void);
123static void cpu_tick_calibrate(int);
124
125static int
126sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
127{
128#ifndef __mips__
129#ifdef SCTL_MASK32
130	int tv[2];
131
132	if (req->flags & SCTL_MASK32) {
133		tv[0] = boottime.tv_sec;
134		tv[1] = boottime.tv_usec;
135		return SYSCTL_OUT(req, tv, sizeof(tv));
136	} else
137#endif
138#endif
139		return SYSCTL_OUT(req, &boottime, sizeof(boottime));
140}
141
142static int
143sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS)
144{
145	u_int ncount;
146	struct timecounter *tc = arg1;
147
148	ncount = tc->tc_get_timecount(tc);
149	return sysctl_handle_int(oidp, &ncount, 0, req);
150}
151
152static int
153sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS)
154{
155	uint64_t freq;
156	struct timecounter *tc = arg1;
157
158	freq = tc->tc_frequency;
159	return sysctl_handle_64(oidp, &freq, 0, req);
160}
161
162/*
163 * Return the difference between the timehands' counter value now and what
164 * was when we copied it to the timehands' offset_count.
165 */
166static __inline u_int
167tc_delta(struct timehands *th)
168{
169	struct timecounter *tc;
170
171	tc = th->th_counter;
172	return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
173	    tc->tc_counter_mask);
174}
175
176/*
177 * Functions for reading the time.  We have to loop until we are sure that
178 * the timehands that we operated on was not updated under our feet.  See
179 * the comment in <sys/time.h> for a description of these 12 functions.
180 */
181
182#ifdef FFCLOCK
183void
184fbclock_binuptime(struct bintime *bt)
185{
186	struct timehands *th;
187	unsigned int gen;
188
189	do {
190		th = timehands;
191		gen = th->th_generation;
192		*bt = th->th_offset;
193		bintime_addx(bt, th->th_scale * tc_delta(th));
194	} while (gen == 0 || gen != th->th_generation);
195}
196
197void
198fbclock_nanouptime(struct timespec *tsp)
199{
200	struct bintime bt;
201
202	fbclock_binuptime(&bt);
203	bintime2timespec(&bt, tsp);
204}
205
206void
207fbclock_microuptime(struct timeval *tvp)
208{
209	struct bintime bt;
210
211	fbclock_binuptime(&bt);
212	bintime2timeval(&bt, tvp);
213}
214
215void
216fbclock_bintime(struct bintime *bt)
217{
218
219	fbclock_binuptime(bt);
220	bintime_add(bt, &boottimebin);
221}
222
223void
224fbclock_nanotime(struct timespec *tsp)
225{
226	struct bintime bt;
227
228	fbclock_bintime(&bt);
229	bintime2timespec(&bt, tsp);
230}
231
232void
233fbclock_microtime(struct timeval *tvp)
234{
235	struct bintime bt;
236
237	fbclock_bintime(&bt);
238	bintime2timeval(&bt, tvp);
239}
240
241void
242fbclock_getbinuptime(struct bintime *bt)
243{
244	struct timehands *th;
245	unsigned int gen;
246
247	do {
248		th = timehands;
249		gen = th->th_generation;
250		*bt = th->th_offset;
251	} while (gen == 0 || gen != th->th_generation);
252}
253
254void
255fbclock_getnanouptime(struct timespec *tsp)
256{
257	struct timehands *th;
258	unsigned int gen;
259
260	do {
261		th = timehands;
262		gen = th->th_generation;
263		bintime2timespec(&th->th_offset, tsp);
264	} while (gen == 0 || gen != th->th_generation);
265}
266
267void
268fbclock_getmicrouptime(struct timeval *tvp)
269{
270	struct timehands *th;
271	unsigned int gen;
272
273	do {
274		th = timehands;
275		gen = th->th_generation;
276		bintime2timeval(&th->th_offset, tvp);
277	} while (gen == 0 || gen != th->th_generation);
278}
279
280void
281fbclock_getbintime(struct bintime *bt)
282{
283	struct timehands *th;
284	unsigned int gen;
285
286	do {
287		th = timehands;
288		gen = th->th_generation;
289		*bt = th->th_offset;
290	} while (gen == 0 || gen != th->th_generation);
291	bintime_add(bt, &boottimebin);
292}
293
294void
295fbclock_getnanotime(struct timespec *tsp)
296{
297	struct timehands *th;
298	unsigned int gen;
299
300	do {
301		th = timehands;
302		gen = th->th_generation;
303		*tsp = th->th_nanotime;
304	} while (gen == 0 || gen != th->th_generation);
305}
306
307void
308fbclock_getmicrotime(struct timeval *tvp)
309{
310	struct timehands *th;
311	unsigned int gen;
312
313	do {
314		th = timehands;
315		gen = th->th_generation;
316		*tvp = th->th_microtime;
317	} while (gen == 0 || gen != th->th_generation);
318}
319#else /* !FFCLOCK */
320void
321binuptime(struct bintime *bt)
322{
323	struct timehands *th;
324	u_int gen;
325
326	do {
327		th = timehands;
328		gen = th->th_generation;
329		*bt = th->th_offset;
330		bintime_addx(bt, th->th_scale * tc_delta(th));
331	} while (gen == 0 || gen != th->th_generation);
332}
333
334void
335nanouptime(struct timespec *tsp)
336{
337	struct bintime bt;
338
339	binuptime(&bt);
340	bintime2timespec(&bt, tsp);
341}
342
343void
344microuptime(struct timeval *tvp)
345{
346	struct bintime bt;
347
348	binuptime(&bt);
349	bintime2timeval(&bt, tvp);
350}
351
352void
353bintime(struct bintime *bt)
354{
355
356	binuptime(bt);
357	bintime_add(bt, &boottimebin);
358}
359
360void
361nanotime(struct timespec *tsp)
362{
363	struct bintime bt;
364
365	bintime(&bt);
366	bintime2timespec(&bt, tsp);
367}
368
369void
370microtime(struct timeval *tvp)
371{
372	struct bintime bt;
373
374	bintime(&bt);
375	bintime2timeval(&bt, tvp);
376}
377
378void
379getbinuptime(struct bintime *bt)
380{
381	struct timehands *th;
382	u_int gen;
383
384	do {
385		th = timehands;
386		gen = th->th_generation;
387		*bt = th->th_offset;
388	} while (gen == 0 || gen != th->th_generation);
389}
390
391void
392getnanouptime(struct timespec *tsp)
393{
394	struct timehands *th;
395	u_int gen;
396
397	do {
398		th = timehands;
399		gen = th->th_generation;
400		bintime2timespec(&th->th_offset, tsp);
401	} while (gen == 0 || gen != th->th_generation);
402}
403
404void
405getmicrouptime(struct timeval *tvp)
406{
407	struct timehands *th;
408	u_int gen;
409
410	do {
411		th = timehands;
412		gen = th->th_generation;
413		bintime2timeval(&th->th_offset, tvp);
414	} while (gen == 0 || gen != th->th_generation);
415}
416
417void
418getbintime(struct bintime *bt)
419{
420	struct timehands *th;
421	u_int gen;
422
423	do {
424		th = timehands;
425		gen = th->th_generation;
426		*bt = th->th_offset;
427	} while (gen == 0 || gen != th->th_generation);
428	bintime_add(bt, &boottimebin);
429}
430
431void
432getnanotime(struct timespec *tsp)
433{
434	struct timehands *th;
435	u_int gen;
436
437	do {
438		th = timehands;
439		gen = th->th_generation;
440		*tsp = th->th_nanotime;
441	} while (gen == 0 || gen != th->th_generation);
442}
443
444void
445getmicrotime(struct timeval *tvp)
446{
447	struct timehands *th;
448	u_int gen;
449
450	do {
451		th = timehands;
452		gen = th->th_generation;
453		*tvp = th->th_microtime;
454	} while (gen == 0 || gen != th->th_generation);
455}
456#endif /* FFCLOCK */
457
458#ifdef FFCLOCK
459/*
460 * Support for feed-forward synchronization algorithms. This is heavily inspired
461 * by the timehands mechanism but kept independent from it. *_windup() functions
462 * have some connection to avoid accessing the timecounter hardware more than
463 * necessary.
464 */
465
466/* Feed-forward clock estimates kept updated by the synchronization daemon. */
467struct ffclock_estimate ffclock_estimate;
468struct bintime ffclock_boottime;	/* Feed-forward boot time estimate. */
469uint32_t ffclock_status;		/* Feed-forward clock status. */
470int8_t ffclock_updated;			/* New estimates are available. */
471struct mtx ffclock_mtx;			/* Mutex on ffclock_estimate. */
472
473struct fftimehands {
474	struct ffclock_estimate	cest;
475	struct bintime		tick_time;
476	struct bintime		tick_time_lerp;
477	ffcounter		tick_ffcount;
478	uint64_t		period_lerp;
479	volatile uint8_t	gen;
480	struct fftimehands	*next;
481};
482
483#define	NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
484
485static struct fftimehands ffth[10];
486static struct fftimehands *volatile fftimehands = ffth;
487
488static void
489ffclock_init(void)
490{
491	struct fftimehands *cur;
492	struct fftimehands *last;
493
494	memset(ffth, 0, sizeof(ffth));
495
496	last = ffth + NUM_ELEMENTS(ffth) - 1;
497	for (cur = ffth; cur < last; cur++)
498		cur->next = cur + 1;
499	last->next = ffth;
500
501	ffclock_updated = 0;
502	ffclock_status = FFCLOCK_STA_UNSYNC;
503	mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF);
504}
505
506/*
507 * Reset the feed-forward clock estimates. Called from inittodr() to get things
508 * kick started and uses the timecounter nominal frequency as a first period
509 * estimate. Note: this function may be called several time just after boot.
510 * Note: this is the only function that sets the value of boot time for the
511 * monotonic (i.e. uptime) version of the feed-forward clock.
512 */
513void
514ffclock_reset_clock(struct timespec *ts)
515{
516	struct timecounter *tc;
517	struct ffclock_estimate cest;
518
519	tc = timehands->th_counter;
520	memset(&cest, 0, sizeof(struct ffclock_estimate));
521
522	timespec2bintime(ts, &ffclock_boottime);
523	timespec2bintime(ts, &(cest.update_time));
524	ffclock_read_counter(&cest.update_ffcount);
525	cest.leapsec_next = 0;
526	cest.period = ((1ULL << 63) / tc->tc_frequency) << 1;
527	cest.errb_abs = 0;
528	cest.errb_rate = 0;
529	cest.status = FFCLOCK_STA_UNSYNC;
530	cest.leapsec_total = 0;
531	cest.leapsec = 0;
532
533	mtx_lock(&ffclock_mtx);
534	bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate));
535	ffclock_updated = INT8_MAX;
536	mtx_unlock(&ffclock_mtx);
537
538	printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name,
539	    (unsigned long long)tc->tc_frequency, (long)ts->tv_sec,
540	    (unsigned long)ts->tv_nsec);
541}
542
543/*
544 * Sub-routine to convert a time interval measured in RAW counter units to time
545 * in seconds stored in bintime format.
546 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be
547 * larger than the max value of u_int (on 32 bit architecture). Loop to consume
548 * extra cycles.
549 */
550static void
551ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt)
552{
553	struct bintime bt2;
554	ffcounter delta, delta_max;
555
556	delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1;
557	bintime_clear(bt);
558	do {
559		if (ffdelta > delta_max)
560			delta = delta_max;
561		else
562			delta = ffdelta;
563		bt2.sec = 0;
564		bt2.frac = period;
565		bintime_mul(&bt2, (unsigned int)delta);
566		bintime_add(bt, &bt2);
567		ffdelta -= delta;
568	} while (ffdelta > 0);
569}
570
571/*
572 * Update the fftimehands.
573 * Push the tick ffcount and time(s) forward based on current clock estimate.
574 * The conversion from ffcounter to bintime relies on the difference clock
575 * principle, whose accuracy relies on computing small time intervals. If a new
576 * clock estimate has been passed by the synchronisation daemon, make it
577 * current, and compute the linear interpolation for monotonic time if needed.
578 */
579static void
580ffclock_windup(unsigned int delta)
581{
582	struct ffclock_estimate *cest;
583	struct fftimehands *ffth;
584	struct bintime bt, gap_lerp;
585	ffcounter ffdelta;
586	uint64_t frac;
587	unsigned int polling;
588	uint8_t forward_jump, ogen;
589
590	/*
591	 * Pick the next timehand, copy current ffclock estimates and move tick
592	 * times and counter forward.
593	 */
594	forward_jump = 0;
595	ffth = fftimehands->next;
596	ogen = ffth->gen;
597	ffth->gen = 0;
598	cest = &ffth->cest;
599	bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate));
600	ffdelta = (ffcounter)delta;
601	ffth->period_lerp = fftimehands->period_lerp;
602
603	ffth->tick_time = fftimehands->tick_time;
604	ffclock_convert_delta(ffdelta, cest->period, &bt);
605	bintime_add(&ffth->tick_time, &bt);
606
607	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
608	ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt);
609	bintime_add(&ffth->tick_time_lerp, &bt);
610
611	ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta;
612
613	/*
614	 * Assess the status of the clock, if the last update is too old, it is
615	 * likely the synchronisation daemon is dead and the clock is free
616	 * running.
617	 */
618	if (ffclock_updated == 0) {
619		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
620		ffclock_convert_delta(ffdelta, cest->period, &bt);
621		if (bt.sec > 2 * FFCLOCK_SKM_SCALE)
622			ffclock_status |= FFCLOCK_STA_UNSYNC;
623	}
624
625	/*
626	 * If available, grab updated clock estimates and make them current.
627	 * Recompute time at this tick using the updated estimates. The clock
628	 * estimates passed the feed-forward synchronisation daemon may result
629	 * in time conversion that is not monotonically increasing (just after
630	 * the update). time_lerp is a particular linear interpolation over the
631	 * synchronisation algo polling period that ensures monotonicity for the
632	 * clock ids requesting it.
633	 */
634	if (ffclock_updated > 0) {
635		bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate));
636		ffdelta = ffth->tick_ffcount - cest->update_ffcount;
637		ffth->tick_time = cest->update_time;
638		ffclock_convert_delta(ffdelta, cest->period, &bt);
639		bintime_add(&ffth->tick_time, &bt);
640
641		/* ffclock_reset sets ffclock_updated to INT8_MAX */
642		if (ffclock_updated == INT8_MAX)
643			ffth->tick_time_lerp = ffth->tick_time;
644
645		if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >))
646			forward_jump = 1;
647		else
648			forward_jump = 0;
649
650		bintime_clear(&gap_lerp);
651		if (forward_jump) {
652			gap_lerp = ffth->tick_time;
653			bintime_sub(&gap_lerp, &ffth->tick_time_lerp);
654		} else {
655			gap_lerp = ffth->tick_time_lerp;
656			bintime_sub(&gap_lerp, &ffth->tick_time);
657		}
658
659		/*
660		 * The reset from the RTC clock may be far from accurate, and
661		 * reducing the gap between real time and interpolated time
662		 * could take a very long time if the interpolated clock insists
663		 * on strict monotonicity. The clock is reset under very strict
664		 * conditions (kernel time is known to be wrong and
665		 * synchronization daemon has been restarted recently.
666		 * ffclock_boottime absorbs the jump to ensure boot time is
667		 * correct and uptime functions stay consistent.
668		 */
669		if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) &&
670		    ((cest->status & FFCLOCK_STA_UNSYNC) == 0) &&
671		    ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) {
672			if (forward_jump)
673				bintime_add(&ffclock_boottime, &gap_lerp);
674			else
675				bintime_sub(&ffclock_boottime, &gap_lerp);
676			ffth->tick_time_lerp = ffth->tick_time;
677			bintime_clear(&gap_lerp);
678		}
679
680		ffclock_status = cest->status;
681		ffth->period_lerp = cest->period;
682
683		/*
684		 * Compute corrected period used for the linear interpolation of
685		 * time. The rate of linear interpolation is capped to 5000PPM
686		 * (5ms/s).
687		 */
688		if (bintime_isset(&gap_lerp)) {
689			ffdelta = cest->update_ffcount;
690			ffdelta -= fftimehands->cest.update_ffcount;
691			ffclock_convert_delta(ffdelta, cest->period, &bt);
692			polling = bt.sec;
693			bt.sec = 0;
694			bt.frac = 5000000 * (uint64_t)18446744073LL;
695			bintime_mul(&bt, polling);
696			if (bintime_cmp(&gap_lerp, &bt, >))
697				gap_lerp = bt;
698
699			/* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */
700			frac = 0;
701			if (gap_lerp.sec > 0) {
702				frac -= 1;
703				frac /= ffdelta / gap_lerp.sec;
704			}
705			frac += gap_lerp.frac / ffdelta;
706
707			if (forward_jump)
708				ffth->period_lerp += frac;
709			else
710				ffth->period_lerp -= frac;
711		}
712
713		ffclock_updated = 0;
714	}
715	if (++ogen == 0)
716		ogen = 1;
717	ffth->gen = ogen;
718	fftimehands = ffth;
719}
720
721/*
722 * Adjust the fftimehands when the timecounter is changed. Stating the obvious,
723 * the old and new hardware counter cannot be read simultaneously. tc_windup()
724 * does read the two counters 'back to back', but a few cycles are effectively
725 * lost, and not accumulated in tick_ffcount. This is a fairly radical
726 * operation for a feed-forward synchronization daemon, and it is its job to not
727 * pushing irrelevant data to the kernel. Because there is no locking here,
728 * simply force to ignore pending or next update to give daemon a chance to
729 * realize the counter has changed.
730 */
731static void
732ffclock_change_tc(struct timehands *th)
733{
734	struct fftimehands *ffth;
735	struct ffclock_estimate *cest;
736	struct timecounter *tc;
737	uint8_t ogen;
738
739	tc = th->th_counter;
740	ffth = fftimehands->next;
741	ogen = ffth->gen;
742	ffth->gen = 0;
743
744	cest = &ffth->cest;
745	bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate));
746	cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1;
747	cest->errb_abs = 0;
748	cest->errb_rate = 0;
749	cest->status |= FFCLOCK_STA_UNSYNC;
750
751	ffth->tick_ffcount = fftimehands->tick_ffcount;
752	ffth->tick_time_lerp = fftimehands->tick_time_lerp;
753	ffth->tick_time = fftimehands->tick_time;
754	ffth->period_lerp = cest->period;
755
756	/* Do not lock but ignore next update from synchronization daemon. */
757	ffclock_updated--;
758
759	if (++ogen == 0)
760		ogen = 1;
761	ffth->gen = ogen;
762	fftimehands = ffth;
763}
764
765/*
766 * Retrieve feed-forward counter and time of last kernel tick.
767 */
768void
769ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags)
770{
771	struct fftimehands *ffth;
772	uint8_t gen;
773
774	/*
775	 * No locking but check generation has not changed. Also need to make
776	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
777	 */
778	do {
779		ffth = fftimehands;
780		gen = ffth->gen;
781		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP)
782			*bt = ffth->tick_time_lerp;
783		else
784			*bt = ffth->tick_time;
785		*ffcount = ffth->tick_ffcount;
786	} while (gen == 0 || gen != ffth->gen);
787}
788
789/*
790 * Absolute clock conversion. Low level function to convert ffcounter to
791 * bintime. The ffcounter is converted using the current ffclock period estimate
792 * or the "interpolated period" to ensure monotonicity.
793 * NOTE: this conversion may have been deferred, and the clock updated since the
794 * hardware counter has been read.
795 */
796void
797ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags)
798{
799	struct fftimehands *ffth;
800	struct bintime bt2;
801	ffcounter ffdelta;
802	uint8_t gen;
803
804	/*
805	 * No locking but check generation has not changed. Also need to make
806	 * sure ffdelta is positive, i.e. ffcount > tick_ffcount.
807	 */
808	do {
809		ffth = fftimehands;
810		gen = ffth->gen;
811		if (ffcount > ffth->tick_ffcount)
812			ffdelta = ffcount - ffth->tick_ffcount;
813		else
814			ffdelta = ffth->tick_ffcount - ffcount;
815
816		if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) {
817			*bt = ffth->tick_time_lerp;
818			ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2);
819		} else {
820			*bt = ffth->tick_time;
821			ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2);
822		}
823
824		if (ffcount > ffth->tick_ffcount)
825			bintime_add(bt, &bt2);
826		else
827			bintime_sub(bt, &bt2);
828	} while (gen == 0 || gen != ffth->gen);
829}
830
831/*
832 * Difference clock conversion.
833 * Low level function to Convert a time interval measured in RAW counter units
834 * into bintime. The difference clock allows measuring small intervals much more
835 * reliably than the absolute clock.
836 */
837void
838ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt)
839{
840	struct fftimehands *ffth;
841	uint8_t gen;
842
843	/* No locking but check generation has not changed. */
844	do {
845		ffth = fftimehands;
846		gen = ffth->gen;
847		ffclock_convert_delta(ffdelta, ffth->cest.period, bt);
848	} while (gen == 0 || gen != ffth->gen);
849}
850
851/*
852 * Access to current ffcounter value.
853 */
854void
855ffclock_read_counter(ffcounter *ffcount)
856{
857	struct timehands *th;
858	struct fftimehands *ffth;
859	unsigned int gen, delta;
860
861	/*
862	 * ffclock_windup() called from tc_windup(), safe to rely on
863	 * th->th_generation only, for correct delta and ffcounter.
864	 */
865	do {
866		th = timehands;
867		gen = th->th_generation;
868		ffth = fftimehands;
869		delta = tc_delta(th);
870		*ffcount = ffth->tick_ffcount;
871	} while (gen == 0 || gen != th->th_generation);
872
873	*ffcount += delta;
874}
875
876void
877binuptime(struct bintime *bt)
878{
879
880	binuptime_fromclock(bt, sysclock_active);
881}
882
883void
884nanouptime(struct timespec *tsp)
885{
886
887	nanouptime_fromclock(tsp, sysclock_active);
888}
889
890void
891microuptime(struct timeval *tvp)
892{
893
894	microuptime_fromclock(tvp, sysclock_active);
895}
896
897void
898bintime(struct bintime *bt)
899{
900
901	bintime_fromclock(bt, sysclock_active);
902}
903
904void
905nanotime(struct timespec *tsp)
906{
907
908	nanotime_fromclock(tsp, sysclock_active);
909}
910
911void
912microtime(struct timeval *tvp)
913{
914
915	microtime_fromclock(tvp, sysclock_active);
916}
917
918void
919getbinuptime(struct bintime *bt)
920{
921
922	getbinuptime_fromclock(bt, sysclock_active);
923}
924
925void
926getnanouptime(struct timespec *tsp)
927{
928
929	getnanouptime_fromclock(tsp, sysclock_active);
930}
931
932void
933getmicrouptime(struct timeval *tvp)
934{
935
936	getmicrouptime_fromclock(tvp, sysclock_active);
937}
938
939void
940getbintime(struct bintime *bt)
941{
942
943	getbintime_fromclock(bt, sysclock_active);
944}
945
946void
947getnanotime(struct timespec *tsp)
948{
949
950	getnanotime_fromclock(tsp, sysclock_active);
951}
952
953void
954getmicrotime(struct timeval *tvp)
955{
956
957	getmicrouptime_fromclock(tvp, sysclock_active);
958}
959
960#endif /* FFCLOCK */
961
962/*
963 * System clock currently providing time to the system. Modifiable via sysctl
964 * when the FFCLOCK option is defined.
965 */
966int sysclock_active = SYSCLOCK_FBCK;
967
968/* Internal NTP status and error estimates. */
969extern int time_status;
970extern long time_esterror;
971
972/*
973 * Take a snapshot of sysclock data which can be used to compare system clocks
974 * and generate timestamps after the fact.
975 */
976void
977sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
978{
979	struct fbclock_info *fbi;
980	struct timehands *th;
981	struct bintime bt;
982	unsigned int delta, gen;
983#ifdef FFCLOCK
984	ffcounter ffcount;
985	struct fftimehands *ffth;
986	struct ffclock_info *ffi;
987	struct ffclock_estimate cest;
988
989	ffi = &clock_snap->ff_info;
990#endif
991
992	fbi = &clock_snap->fb_info;
993	delta = 0;
994
995	do {
996		th = timehands;
997		gen = th->th_generation;
998		fbi->th_scale = th->th_scale;
999		fbi->tick_time = th->th_offset;
1000#ifdef FFCLOCK
1001		ffth = fftimehands;
1002		ffi->tick_time = ffth->tick_time_lerp;
1003		ffi->tick_time_lerp = ffth->tick_time_lerp;
1004		ffi->period = ffth->cest.period;
1005		ffi->period_lerp = ffth->period_lerp;
1006		clock_snap->ffcount = ffth->tick_ffcount;
1007		cest = ffth->cest;
1008#endif
1009		if (!fast)
1010			delta = tc_delta(th);
1011	} while (gen == 0 || gen != th->th_generation);
1012
1013	clock_snap->delta = delta;
1014	clock_snap->sysclock_active = sysclock_active;
1015
1016	/* Record feedback clock status and error. */
1017	clock_snap->fb_info.status = time_status;
1018	/* XXX: Very crude estimate of feedback clock error. */
1019	bt.sec = time_esterror / 1000000;
1020	bt.frac = ((time_esterror - bt.sec) * 1000000) *
1021	    (uint64_t)18446744073709ULL;
1022	clock_snap->fb_info.error = bt;
1023
1024#ifdef FFCLOCK
1025	if (!fast)
1026		clock_snap->ffcount += delta;
1027
1028	/* Record feed-forward clock leap second adjustment. */
1029	ffi->leapsec_adjustment = cest.leapsec_total;
1030	if (clock_snap->ffcount > cest.leapsec_next)
1031		ffi->leapsec_adjustment -= cest.leapsec;
1032
1033	/* Record feed-forward clock status and error. */
1034	clock_snap->ff_info.status = cest.status;
1035	ffcount = clock_snap->ffcount - cest.update_ffcount;
1036	ffclock_convert_delta(ffcount, cest.period, &bt);
1037	/* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */
1038	bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL);
1039	/* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */
1040	bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL);
1041	clock_snap->ff_info.error = bt;
1042#endif
1043}
1044
1045/*
1046 * Convert a sysclock snapshot into a struct bintime based on the specified
1047 * clock source and flags.
1048 */
1049int
1050sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt,
1051    int whichclock, uint32_t flags)
1052{
1053#ifdef FFCLOCK
1054	struct bintime bt2;
1055	uint64_t period;
1056#endif
1057
1058	switch (whichclock) {
1059	case SYSCLOCK_FBCK:
1060		*bt = cs->fb_info.tick_time;
1061
1062		/* If snapshot was created with !fast, delta will be >0. */
1063		if (cs->delta > 0)
1064			bintime_addx(bt, cs->fb_info.th_scale * cs->delta);
1065
1066		if ((flags & FBCLOCK_UPTIME) == 0)
1067			bintime_add(bt, &boottimebin);
1068		break;
1069#ifdef FFCLOCK
1070	case SYSCLOCK_FFWD:
1071		if (flags & FFCLOCK_LERP) {
1072			*bt = cs->ff_info.tick_time_lerp;
1073			period = cs->ff_info.period_lerp;
1074		} else {
1075			*bt = cs->ff_info.tick_time;
1076			period = cs->ff_info.period;
1077		}
1078
1079		/* If snapshot was created with !fast, delta will be >0. */
1080		if (cs->delta > 0) {
1081			ffclock_convert_delta(cs->delta, period, &bt2);
1082			bintime_add(bt, &bt2);
1083		}
1084
1085		/* Leap second adjustment. */
1086		if (flags & FFCLOCK_LEAPSEC)
1087			bt->sec -= cs->ff_info.leapsec_adjustment;
1088
1089		/* Boot time adjustment, for uptime/monotonic clocks. */
1090		if (flags & FFCLOCK_UPTIME)
1091			bintime_sub(bt, &ffclock_boottime);
1092		break;
1093#endif
1094	default:
1095		return (EINVAL);
1096		break;
1097	}
1098
1099	return (0);
1100}
1101
1102/*
1103 * Initialize a new timecounter and possibly use it.
1104 */
1105void
1106tc_init(struct timecounter *tc)
1107{
1108	u_int u;
1109	struct sysctl_oid *tc_root;
1110
1111	u = tc->tc_frequency / tc->tc_counter_mask;
1112	/* XXX: We need some margin here, 10% is a guess */
1113	u *= 11;
1114	u /= 10;
1115	if (u > hz && tc->tc_quality >= 0) {
1116		tc->tc_quality = -2000;
1117		if (bootverbose) {
1118			printf("Timecounter \"%s\" frequency %ju Hz",
1119			    tc->tc_name, (uintmax_t)tc->tc_frequency);
1120			printf(" -- Insufficient hz, needs at least %u\n", u);
1121		}
1122	} else if (tc->tc_quality >= 0 || bootverbose) {
1123		printf("Timecounter \"%s\" frequency %ju Hz quality %d\n",
1124		    tc->tc_name, (uintmax_t)tc->tc_frequency,
1125		    tc->tc_quality);
1126	}
1127
1128	tc->tc_next = timecounters;
1129	timecounters = tc;
1130	/*
1131	 * Set up sysctl tree for this counter.
1132	 */
1133	tc_root = SYSCTL_ADD_NODE(NULL,
1134	    SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name,
1135	    CTLFLAG_RW, 0, "timecounter description");
1136	SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1137	    "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0,
1138	    "mask for implemented bits");
1139	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1140	    "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc),
1141	    sysctl_kern_timecounter_get, "IU", "current timecounter value");
1142	SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1143	    "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc),
1144	     sysctl_kern_timecounter_freq, "QU", "timecounter frequency");
1145	SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO,
1146	    "quality", CTLFLAG_RD, &(tc->tc_quality), 0,
1147	    "goodness of time counter");
1148	/*
1149	 * Never automatically use a timecounter with negative quality.
1150	 * Even though we run on the dummy counter, switching here may be
1151	 * worse since this timecounter may not be monotonous.
1152	 */
1153	if (tc->tc_quality < 0)
1154		return;
1155	if (tc->tc_quality < timecounter->tc_quality)
1156		return;
1157	if (tc->tc_quality == timecounter->tc_quality &&
1158	    tc->tc_frequency < timecounter->tc_frequency)
1159		return;
1160	(void)tc->tc_get_timecount(tc);
1161	(void)tc->tc_get_timecount(tc);
1162	timecounter = tc;
1163}
1164
1165/* Report the frequency of the current timecounter. */
1166uint64_t
1167tc_getfrequency(void)
1168{
1169
1170	return (timehands->th_counter->tc_frequency);
1171}
1172
1173/*
1174 * Step our concept of UTC.  This is done by modifying our estimate of
1175 * when we booted.
1176 * XXX: not locked.
1177 */
1178void
1179tc_setclock(struct timespec *ts)
1180{
1181	struct timespec tbef, taft;
1182	struct bintime bt, bt2;
1183
1184	cpu_tick_calibrate(1);
1185	nanotime(&tbef);
1186	timespec2bintime(ts, &bt);
1187	binuptime(&bt2);
1188	bintime_sub(&bt, &bt2);
1189	bintime_add(&bt2, &boottimebin);
1190	boottimebin = bt;
1191	bintime2timeval(&bt, &boottime);
1192
1193	/* XXX fiddle all the little crinkly bits around the fiords... */
1194	tc_windup();
1195	nanotime(&taft);
1196	if (timestepwarnings) {
1197		log(LOG_INFO,
1198		    "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n",
1199		    (intmax_t)tbef.tv_sec, tbef.tv_nsec,
1200		    (intmax_t)taft.tv_sec, taft.tv_nsec,
1201		    (intmax_t)ts->tv_sec, ts->tv_nsec);
1202	}
1203	cpu_tick_calibrate(1);
1204}
1205
1206/*
1207 * Initialize the next struct timehands in the ring and make
1208 * it the active timehands.  Along the way we might switch to a different
1209 * timecounter and/or do seconds processing in NTP.  Slightly magic.
1210 */
1211static void
1212tc_windup(void)
1213{
1214	struct bintime bt;
1215	struct timehands *th, *tho;
1216	uint64_t scale;
1217	u_int delta, ncount, ogen;
1218	int i;
1219	time_t t;
1220
1221	/*
1222	 * Make the next timehands a copy of the current one, but do not
1223	 * overwrite the generation or next pointer.  While we update
1224	 * the contents, the generation must be zero.
1225	 */
1226	tho = timehands;
1227	th = tho->th_next;
1228	ogen = th->th_generation;
1229	th->th_generation = 0;
1230	bcopy(tho, th, offsetof(struct timehands, th_generation));
1231
1232	/*
1233	 * Capture a timecounter delta on the current timecounter and if
1234	 * changing timecounters, a counter value from the new timecounter.
1235	 * Update the offset fields accordingly.
1236	 */
1237	delta = tc_delta(th);
1238	if (th->th_counter != timecounter)
1239		ncount = timecounter->tc_get_timecount(timecounter);
1240	else
1241		ncount = 0;
1242#ifdef FFCLOCK
1243	ffclock_windup(delta);
1244#endif
1245	th->th_offset_count += delta;
1246	th->th_offset_count &= th->th_counter->tc_counter_mask;
1247	while (delta > th->th_counter->tc_frequency) {
1248		/* Eat complete unadjusted seconds. */
1249		delta -= th->th_counter->tc_frequency;
1250		th->th_offset.sec++;
1251	}
1252	if ((delta > th->th_counter->tc_frequency / 2) &&
1253	    (th->th_scale * delta < ((uint64_t)1 << 63))) {
1254		/* The product th_scale * delta just barely overflows. */
1255		th->th_offset.sec++;
1256	}
1257	bintime_addx(&th->th_offset, th->th_scale * delta);
1258
1259	/*
1260	 * Hardware latching timecounters may not generate interrupts on
1261	 * PPS events, so instead we poll them.  There is a finite risk that
1262	 * the hardware might capture a count which is later than the one we
1263	 * got above, and therefore possibly in the next NTP second which might
1264	 * have a different rate than the current NTP second.  It doesn't
1265	 * matter in practice.
1266	 */
1267	if (tho->th_counter->tc_poll_pps)
1268		tho->th_counter->tc_poll_pps(tho->th_counter);
1269
1270	/*
1271	 * Deal with NTP second processing.  The for loop normally
1272	 * iterates at most once, but in extreme situations it might
1273	 * keep NTP sane if timeouts are not run for several seconds.
1274	 * At boot, the time step can be large when the TOD hardware
1275	 * has been read, so on really large steps, we call
1276	 * ntp_update_second only twice.  We need to call it twice in
1277	 * case we missed a leap second.
1278	 */
1279	bt = th->th_offset;
1280	bintime_add(&bt, &boottimebin);
1281	i = bt.sec - tho->th_microtime.tv_sec;
1282	if (i > LARGE_STEP)
1283		i = 2;
1284	for (; i > 0; i--) {
1285		t = bt.sec;
1286		ntp_update_second(&th->th_adjustment, &bt.sec);
1287		if (bt.sec != t)
1288			boottimebin.sec += bt.sec - t;
1289	}
1290	/* Update the UTC timestamps used by the get*() functions. */
1291	/* XXX shouldn't do this here.  Should force non-`get' versions. */
1292	bintime2timeval(&bt, &th->th_microtime);
1293	bintime2timespec(&bt, &th->th_nanotime);
1294
1295	/* Now is a good time to change timecounters. */
1296	if (th->th_counter != timecounter) {
1297#ifndef __arm__
1298		if ((timecounter->tc_flags & TC_FLAGS_C3STOP) != 0)
1299			cpu_disable_deep_sleep++;
1300		if ((th->th_counter->tc_flags & TC_FLAGS_C3STOP) != 0)
1301			cpu_disable_deep_sleep--;
1302#endif
1303		th->th_counter = timecounter;
1304		th->th_offset_count = ncount;
1305		tc_min_ticktock_freq = max(1, timecounter->tc_frequency /
1306		    (((uint64_t)timecounter->tc_counter_mask + 1) / 3));
1307#ifdef FFCLOCK
1308		ffclock_change_tc(th);
1309#endif
1310	}
1311
1312	/*-
1313	 * Recalculate the scaling factor.  We want the number of 1/2^64
1314	 * fractions of a second per period of the hardware counter, taking
1315	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
1316	 * processing provides us with.
1317	 *
1318	 * The th_adjustment is nanoseconds per second with 32 bit binary
1319	 * fraction and we want 64 bit binary fraction of second:
1320	 *
1321	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
1322	 *
1323	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
1324	 * we can only multiply by about 850 without overflowing, that
1325	 * leaves no suitably precise fractions for multiply before divide.
1326	 *
1327	 * Divide before multiply with a fraction of 2199/512 results in a
1328	 * systematic undercompensation of 10PPM of th_adjustment.  On a
1329	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
1330 	 *
1331	 * We happily sacrifice the lowest of the 64 bits of our result
1332	 * to the goddess of code clarity.
1333	 *
1334	 */
1335	scale = (uint64_t)1 << 63;
1336	scale += (th->th_adjustment / 1024) * 2199;
1337	scale /= th->th_counter->tc_frequency;
1338	th->th_scale = scale * 2;
1339
1340	/*
1341	 * Now that the struct timehands is again consistent, set the new
1342	 * generation number, making sure to not make it zero.
1343	 */
1344	if (++ogen == 0)
1345		ogen = 1;
1346	th->th_generation = ogen;
1347
1348	/* Go live with the new struct timehands. */
1349#ifdef FFCLOCK
1350	switch (sysclock_active) {
1351	case SYSCLOCK_FBCK:
1352#endif
1353		time_second = th->th_microtime.tv_sec;
1354		time_uptime = th->th_offset.sec;
1355#ifdef FFCLOCK
1356		break;
1357	case SYSCLOCK_FFWD:
1358		time_second = fftimehands->tick_time_lerp.sec;
1359		time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec;
1360		break;
1361	}
1362#endif
1363
1364	timehands = th;
1365	timekeep_push_vdso();
1366}
1367
1368/* Report or change the active timecounter hardware. */
1369static int
1370sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS)
1371{
1372	char newname[32];
1373	struct timecounter *newtc, *tc;
1374	int error;
1375
1376	tc = timecounter;
1377	strlcpy(newname, tc->tc_name, sizeof(newname));
1378
1379	error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req);
1380	if (error != 0 || req->newptr == NULL ||
1381	    strcmp(newname, tc->tc_name) == 0)
1382		return (error);
1383	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
1384		if (strcmp(newname, newtc->tc_name) != 0)
1385			continue;
1386
1387		/* Warm up new timecounter. */
1388		(void)newtc->tc_get_timecount(newtc);
1389		(void)newtc->tc_get_timecount(newtc);
1390
1391		timecounter = newtc;
1392		timekeep_push_vdso();
1393		return (0);
1394	}
1395	return (EINVAL);
1396}
1397
1398SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW,
1399    0, 0, sysctl_kern_timecounter_hardware, "A",
1400    "Timecounter hardware selected");
1401
1402
1403/* Report or change the active timecounter hardware. */
1404static int
1405sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS)
1406{
1407	char buf[32], *spc;
1408	struct timecounter *tc;
1409	int error;
1410
1411	spc = "";
1412	error = 0;
1413	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
1414		sprintf(buf, "%s%s(%d)",
1415		    spc, tc->tc_name, tc->tc_quality);
1416		error = SYSCTL_OUT(req, buf, strlen(buf));
1417		spc = " ";
1418	}
1419	return (error);
1420}
1421
1422SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD,
1423    0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected");
1424
1425/*
1426 * RFC 2783 PPS-API implementation.
1427 */
1428
1429int
1430pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps)
1431{
1432	pps_params_t *app;
1433	struct pps_fetch_args *fapi;
1434#ifdef FFCLOCK
1435	struct pps_fetch_ffc_args *fapi_ffc;
1436#endif
1437#ifdef PPS_SYNC
1438	struct pps_kcbind_args *kapi;
1439#endif
1440
1441	KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl"));
1442	switch (cmd) {
1443	case PPS_IOC_CREATE:
1444		return (0);
1445	case PPS_IOC_DESTROY:
1446		return (0);
1447	case PPS_IOC_SETPARAMS:
1448		app = (pps_params_t *)data;
1449		if (app->mode & ~pps->ppscap)
1450			return (EINVAL);
1451#ifdef FFCLOCK
1452		/* Ensure only a single clock is selected for ffc timestamp. */
1453		if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK)
1454			return (EINVAL);
1455#endif
1456		pps->ppsparam = *app;
1457		return (0);
1458	case PPS_IOC_GETPARAMS:
1459		app = (pps_params_t *)data;
1460		*app = pps->ppsparam;
1461		app->api_version = PPS_API_VERS_1;
1462		return (0);
1463	case PPS_IOC_GETCAP:
1464		*(int*)data = pps->ppscap;
1465		return (0);
1466	case PPS_IOC_FETCH:
1467		fapi = (struct pps_fetch_args *)data;
1468		if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC)
1469			return (EINVAL);
1470		if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec)
1471			return (EOPNOTSUPP);
1472		pps->ppsinfo.current_mode = pps->ppsparam.mode;
1473		fapi->pps_info_buf = pps->ppsinfo;
1474		return (0);
1475#ifdef FFCLOCK
1476	case PPS_IOC_FETCH_FFCOUNTER:
1477		fapi_ffc = (struct pps_fetch_ffc_args *)data;
1478		if (fapi_ffc->tsformat && fapi_ffc->tsformat !=
1479		    PPS_TSFMT_TSPEC)
1480			return (EINVAL);
1481		if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec)
1482			return (EOPNOTSUPP);
1483		pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode;
1484		fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc;
1485		/* Overwrite timestamps if feedback clock selected. */
1486		switch (pps->ppsparam.mode & PPS_TSCLK_MASK) {
1487		case PPS_TSCLK_FBCK:
1488			fapi_ffc->pps_info_buf_ffc.assert_timestamp =
1489			    pps->ppsinfo.assert_timestamp;
1490			fapi_ffc->pps_info_buf_ffc.clear_timestamp =
1491			    pps->ppsinfo.clear_timestamp;
1492			break;
1493		case PPS_TSCLK_FFWD:
1494			break;
1495		default:
1496			break;
1497		}
1498		return (0);
1499#endif /* FFCLOCK */
1500	case PPS_IOC_KCBIND:
1501#ifdef PPS_SYNC
1502		kapi = (struct pps_kcbind_args *)data;
1503		/* XXX Only root should be able to do this */
1504		if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC)
1505			return (EINVAL);
1506		if (kapi->kernel_consumer != PPS_KC_HARDPPS)
1507			return (EINVAL);
1508		if (kapi->edge & ~pps->ppscap)
1509			return (EINVAL);
1510		pps->kcmode = kapi->edge;
1511		return (0);
1512#else
1513		return (EOPNOTSUPP);
1514#endif
1515	default:
1516		return (ENOIOCTL);
1517	}
1518}
1519
1520void
1521pps_init(struct pps_state *pps)
1522{
1523	pps->ppscap |= PPS_TSFMT_TSPEC;
1524	if (pps->ppscap & PPS_CAPTUREASSERT)
1525		pps->ppscap |= PPS_OFFSETASSERT;
1526	if (pps->ppscap & PPS_CAPTURECLEAR)
1527		pps->ppscap |= PPS_OFFSETCLEAR;
1528#ifdef FFCLOCK
1529	pps->ppscap |= PPS_TSCLK_MASK;
1530#endif
1531}
1532
1533void
1534pps_capture(struct pps_state *pps)
1535{
1536	struct timehands *th;
1537
1538	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
1539	th = timehands;
1540	pps->capgen = th->th_generation;
1541	pps->capth = th;
1542#ifdef FFCLOCK
1543	pps->capffth = fftimehands;
1544#endif
1545	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
1546	if (pps->capgen != th->th_generation)
1547		pps->capgen = 0;
1548}
1549
1550void
1551pps_event(struct pps_state *pps, int event)
1552{
1553	struct bintime bt;
1554	struct timespec ts, *tsp, *osp;
1555	u_int tcount, *pcount;
1556	int foff, fhard;
1557	pps_seq_t *pseq;
1558#ifdef FFCLOCK
1559	struct timespec *tsp_ffc;
1560	pps_seq_t *pseq_ffc;
1561	ffcounter *ffcount;
1562#endif
1563
1564	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
1565	/* If the timecounter was wound up underneath us, bail out. */
1566	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
1567		return;
1568
1569	/* Things would be easier with arrays. */
1570	if (event == PPS_CAPTUREASSERT) {
1571		tsp = &pps->ppsinfo.assert_timestamp;
1572		osp = &pps->ppsparam.assert_offset;
1573		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1574		fhard = pps->kcmode & PPS_CAPTUREASSERT;
1575		pcount = &pps->ppscount[0];
1576		pseq = &pps->ppsinfo.assert_sequence;
1577#ifdef FFCLOCK
1578		ffcount = &pps->ppsinfo_ffc.assert_ffcount;
1579		tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp;
1580		pseq_ffc = &pps->ppsinfo_ffc.assert_sequence;
1581#endif
1582	} else {
1583		tsp = &pps->ppsinfo.clear_timestamp;
1584		osp = &pps->ppsparam.clear_offset;
1585		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1586		fhard = pps->kcmode & PPS_CAPTURECLEAR;
1587		pcount = &pps->ppscount[1];
1588		pseq = &pps->ppsinfo.clear_sequence;
1589#ifdef FFCLOCK
1590		ffcount = &pps->ppsinfo_ffc.clear_ffcount;
1591		tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp;
1592		pseq_ffc = &pps->ppsinfo_ffc.clear_sequence;
1593#endif
1594	}
1595
1596	/*
1597	 * If the timecounter changed, we cannot compare the count values, so
1598	 * we have to drop the rest of the PPS-stuff until the next event.
1599	 */
1600	if (pps->ppstc != pps->capth->th_counter) {
1601		pps->ppstc = pps->capth->th_counter;
1602		*pcount = pps->capcount;
1603		pps->ppscount[2] = pps->capcount;
1604		return;
1605	}
1606
1607	/* Convert the count to a timespec. */
1608	tcount = pps->capcount - pps->capth->th_offset_count;
1609	tcount &= pps->capth->th_counter->tc_counter_mask;
1610	bt = pps->capth->th_offset;
1611	bintime_addx(&bt, pps->capth->th_scale * tcount);
1612	bintime_add(&bt, &boottimebin);
1613	bintime2timespec(&bt, &ts);
1614
1615	/* If the timecounter was wound up underneath us, bail out. */
1616	if (pps->capgen != pps->capth->th_generation)
1617		return;
1618
1619	*pcount = pps->capcount;
1620	(*pseq)++;
1621	*tsp = ts;
1622
1623	if (foff) {
1624		timespecadd(tsp, osp);
1625		if (tsp->tv_nsec < 0) {
1626			tsp->tv_nsec += 1000000000;
1627			tsp->tv_sec -= 1;
1628		}
1629	}
1630
1631#ifdef FFCLOCK
1632	*ffcount = pps->capffth->tick_ffcount + tcount;
1633	bt = pps->capffth->tick_time;
1634	ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt);
1635	bintime_add(&bt, &pps->capffth->tick_time);
1636	bintime2timespec(&bt, &ts);
1637	(*pseq_ffc)++;
1638	*tsp_ffc = ts;
1639#endif
1640
1641#ifdef PPS_SYNC
1642	if (fhard) {
1643		uint64_t scale;
1644
1645		/*
1646		 * Feed the NTP PLL/FLL.
1647		 * The FLL wants to know how many (hardware) nanoseconds
1648		 * elapsed since the previous event.
1649		 */
1650		tcount = pps->capcount - pps->ppscount[2];
1651		pps->ppscount[2] = pps->capcount;
1652		tcount &= pps->capth->th_counter->tc_counter_mask;
1653		scale = (uint64_t)1 << 63;
1654		scale /= pps->capth->th_counter->tc_frequency;
1655		scale *= 2;
1656		bt.sec = 0;
1657		bt.frac = 0;
1658		bintime_addx(&bt, scale * tcount);
1659		bintime2timespec(&bt, &ts);
1660		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
1661	}
1662#endif
1663}
1664
1665/*
1666 * Timecounters need to be updated every so often to prevent the hardware
1667 * counter from overflowing.  Updating also recalculates the cached values
1668 * used by the get*() family of functions, so their precision depends on
1669 * the update frequency.
1670 */
1671
1672static int tc_tick;
1673SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0,
1674    "Approximate number of hardclock ticks in a millisecond");
1675
1676void
1677tc_ticktock(int cnt)
1678{
1679	static int count;
1680
1681	count += cnt;
1682	if (count < tc_tick)
1683		return;
1684	count = 0;
1685	tc_windup();
1686}
1687
1688static void
1689inittimecounter(void *dummy)
1690{
1691	u_int p;
1692
1693	/*
1694	 * Set the initial timeout to
1695	 * max(1, <approx. number of hardclock ticks in a millisecond>).
1696	 * People should probably not use the sysctl to set the timeout
1697	 * to smaller than its inital value, since that value is the
1698	 * smallest reasonable one.  If they want better timestamps they
1699	 * should use the non-"get"* functions.
1700	 */
1701	if (hz > 1000)
1702		tc_tick = (hz + 500) / 1000;
1703	else
1704		tc_tick = 1;
1705	p = (tc_tick * 1000000) / hz;
1706	printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
1707
1708#ifdef FFCLOCK
1709	ffclock_init();
1710#endif
1711	/* warm up new timecounter (again) and get rolling. */
1712	(void)timecounter->tc_get_timecount(timecounter);
1713	(void)timecounter->tc_get_timecount(timecounter);
1714	tc_windup();
1715}
1716
1717SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL);
1718
1719/* Cpu tick handling -------------------------------------------------*/
1720
1721static int cpu_tick_variable;
1722static uint64_t	cpu_tick_frequency;
1723
1724static uint64_t
1725tc_cpu_ticks(void)
1726{
1727	static uint64_t base;
1728	static unsigned last;
1729	unsigned u;
1730	struct timecounter *tc;
1731
1732	tc = timehands->th_counter;
1733	u = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
1734	if (u < last)
1735		base += (uint64_t)tc->tc_counter_mask + 1;
1736	last = u;
1737	return (u + base);
1738}
1739
1740void
1741cpu_tick_calibration(void)
1742{
1743	static time_t last_calib;
1744
1745	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
1746		cpu_tick_calibrate(0);
1747		last_calib = time_uptime;
1748	}
1749}
1750
1751/*
1752 * This function gets called every 16 seconds on only one designated
1753 * CPU in the system from hardclock() via cpu_tick_calibration()().
1754 *
1755 * Whenever the real time clock is stepped we get called with reset=1
1756 * to make sure we handle suspend/resume and similar events correctly.
1757 */
1758
1759static void
1760cpu_tick_calibrate(int reset)
1761{
1762	static uint64_t c_last;
1763	uint64_t c_this, c_delta;
1764	static struct bintime  t_last;
1765	struct bintime t_this, t_delta;
1766	uint32_t divi;
1767
1768	if (reset) {
1769		/* The clock was stepped, abort & reset */
1770		t_last.sec = 0;
1771		return;
1772	}
1773
1774	/* we don't calibrate fixed rate cputicks */
1775	if (!cpu_tick_variable)
1776		return;
1777
1778	getbinuptime(&t_this);
1779	c_this = cpu_ticks();
1780	if (t_last.sec != 0) {
1781		c_delta = c_this - c_last;
1782		t_delta = t_this;
1783		bintime_sub(&t_delta, &t_last);
1784		/*
1785		 * Headroom:
1786		 * 	2^(64-20) / 16[s] =
1787		 * 	2^(44) / 16[s] =
1788		 * 	17.592.186.044.416 / 16 =
1789		 * 	1.099.511.627.776 [Hz]
1790		 */
1791		divi = t_delta.sec << 20;
1792		divi |= t_delta.frac >> (64 - 20);
1793		c_delta <<= 20;
1794		c_delta /= divi;
1795		if (c_delta > cpu_tick_frequency) {
1796			if (0 && bootverbose)
1797				printf("cpu_tick increased to %ju Hz\n",
1798				    c_delta);
1799			cpu_tick_frequency = c_delta;
1800		}
1801	}
1802	c_last = c_this;
1803	t_last = t_this;
1804}
1805
1806void
1807set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
1808{
1809
1810	if (func == NULL) {
1811		cpu_ticks = tc_cpu_ticks;
1812	} else {
1813		cpu_tick_frequency = freq;
1814		cpu_tick_variable = var;
1815		cpu_ticks = func;
1816	}
1817}
1818
1819uint64_t
1820cpu_tickrate(void)
1821{
1822
1823	if (cpu_ticks == tc_cpu_ticks)
1824		return (tc_getfrequency());
1825	return (cpu_tick_frequency);
1826}
1827
1828/*
1829 * We need to be slightly careful converting cputicks to microseconds.
1830 * There is plenty of margin in 64 bits of microseconds (half a million
1831 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
1832 * before divide conversion (to retain precision) we find that the
1833 * margin shrinks to 1.5 hours (one millionth of 146y).
1834 * With a three prong approach we never lose significant bits, no
1835 * matter what the cputick rate and length of timeinterval is.
1836 */
1837
1838uint64_t
1839cputick2usec(uint64_t tick)
1840{
1841
1842	if (tick > 18446744073709551LL)		/* floor(2^64 / 1000) */
1843		return (tick / (cpu_tickrate() / 1000000LL));
1844	else if (tick > 18446744073709LL)	/* floor(2^64 / 1000000) */
1845		return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
1846	else
1847		return ((tick * 1000000LL) / cpu_tickrate());
1848}
1849
1850cpu_tick_f	*cpu_ticks = tc_cpu_ticks;
1851
1852static int vdso_th_enable = 1;
1853static int
1854sysctl_fast_gettime(SYSCTL_HANDLER_ARGS)
1855{
1856	int old_vdso_th_enable, error;
1857
1858	old_vdso_th_enable = vdso_th_enable;
1859	error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req);
1860	if (error != 0)
1861		return (error);
1862	vdso_th_enable = old_vdso_th_enable;
1863	timekeep_push_vdso();
1864	return (0);
1865}
1866SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime,
1867    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
1868    NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day");
1869
1870uint32_t
1871tc_fill_vdso_timehands(struct vdso_timehands *vdso_th)
1872{
1873	struct timehands *th;
1874	uint32_t enabled;
1875
1876	th = timehands;
1877	vdso_th->th_algo = VDSO_TH_ALGO_1;
1878	vdso_th->th_scale = th->th_scale;
1879	vdso_th->th_offset_count = th->th_offset_count;
1880	vdso_th->th_counter_mask = th->th_counter->tc_counter_mask;
1881	vdso_th->th_offset = th->th_offset;
1882	vdso_th->th_boottime = boottimebin;
1883	enabled = cpu_fill_vdso_timehands(vdso_th);
1884	if (!vdso_th_enable)
1885		enabled = 0;
1886	return (enabled);
1887}
1888
1889#ifdef COMPAT_FREEBSD32
1890uint32_t
1891tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32)
1892{
1893	struct timehands *th;
1894	uint32_t enabled;
1895
1896	th = timehands;
1897	vdso_th32->th_algo = VDSO_TH_ALGO_1;
1898	*(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale;
1899	vdso_th32->th_offset_count = th->th_offset_count;
1900	vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask;
1901	vdso_th32->th_offset.sec = th->th_offset.sec;
1902	*(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac;
1903	vdso_th32->th_boottime.sec = boottimebin.sec;
1904	*(uint64_t *)&vdso_th32->th_boottime.frac[0] = boottimebin.frac;
1905	enabled = cpu_fill_vdso_timehands32(vdso_th32);
1906	if (!vdso_th_enable)
1907		enabled = 0;
1908	return (enabled);
1909}
1910#endif
1911