1/* $NetBSD: kern_tc.c,v 1.77 2024/05/11 06:34:45 andvar Exp $ */
2
3/*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*-
33 * ----------------------------------------------------------------------------
34 * "THE BEER-WARE LICENSE" (Revision 42):
35 * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
36 * can do whatever you want with this stuff. If we meet some day, and you think
37 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
38 * ---------------------------------------------------------------------------
39 */
40
41/*
42 * https://papers.freebsd.org/2002/phk-timecounters.files/timecounter.pdf
43 */
44
45#include <sys/cdefs.h>
46/* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
47__KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.77 2024/05/11 06:34:45 andvar Exp $");
48
49#ifdef _KERNEL_OPT
50#include "opt_ntp.h"
51#endif
52
53#include <sys/param.h>
54
55#include <sys/atomic.h>
56#include <sys/evcnt.h>
57#include <sys/kauth.h>
58#include <sys/kernel.h>
59#include <sys/lock.h>
60#include <sys/mutex.h>
61#include <sys/reboot.h>	/* XXX just to get AB_VERBOSE */
62#include <sys/sysctl.h>
63#include <sys/syslog.h>
64#include <sys/systm.h>
65#include <sys/timepps.h>
66#include <sys/timetc.h>
67#include <sys/timex.h>
68#include <sys/xcall.h>
69
70/*
71 * A large step happens on boot.  This constant detects such steps.
72 * It is relatively small so that ntp_update_second gets called enough
73 * in the typical 'missed a couple of seconds' case, but doesn't loop
74 * forever when the time step is large.
75 */
76#define LARGE_STEP	200
77
78/*
79 * Implement a dummy timecounter which we can use until we get a real one
80 * in the air.  This allows the console and other early stuff to use
81 * time services.
82 */
83
84static u_int
85dummy_get_timecount(struct timecounter *tc)
86{
87	static u_int now;
88
89	return ++now;
90}
91
92static struct timecounter dummy_timecounter = {
93	.tc_get_timecount	= dummy_get_timecount,
94	.tc_counter_mask	= ~0u,
95	.tc_frequency		= 1000000,
96	.tc_name		= "dummy",
97	.tc_quality		= -1000000,
98	.tc_priv		= NULL,
99};
100
101struct timehands {
102	/* These fields must be initialized by the driver. */
103	struct timecounter	*th_counter;     /* active timecounter */
104	int64_t			th_adjustment;   /* frequency adjustment */
105						 /* (NTP/adjtime) */
106	uint64_t		th_scale;        /* scale factor (counter */
107						 /* tick->time) */
108	uint64_t 		th_offset_count; /* offset at last time */
109						 /* update (tc_windup()) */
110	struct bintime		th_offset;       /* bin (up)time at windup */
111	struct timeval		th_microtime;    /* cached microtime */
112	struct timespec		th_nanotime;     /* cached nanotime */
113	/* Fields not to be copied in tc_windup start with th_generation. */
114	volatile u_int		th_generation;   /* current genration */
115	struct timehands	*th_next;        /* next timehand */
116};
117
118static struct timehands th0;
119static struct timehands th9 = { .th_next = &th0, };
120static struct timehands th8 = { .th_next = &th9, };
121static struct timehands th7 = { .th_next = &th8, };
122static struct timehands th6 = { .th_next = &th7, };
123static struct timehands th5 = { .th_next = &th6, };
124static struct timehands th4 = { .th_next = &th5, };
125static struct timehands th3 = { .th_next = &th4, };
126static struct timehands th2 = { .th_next = &th3, };
127static struct timehands th1 = { .th_next = &th2, };
128static struct timehands th0 = {
129	.th_counter = &dummy_timecounter,
130	.th_scale = (uint64_t)-1 / 1000000,
131	.th_offset = { .sec = 1, .frac = 0 },
132	.th_generation = 1,
133	.th_next = &th1,
134};
135
136static struct timehands *volatile timehands = &th0;
137struct timecounter *timecounter = &dummy_timecounter;
138static struct timecounter *timecounters = &dummy_timecounter;
139
140/* used by savecore(8) */
141time_t time_second_legacy asm("time_second");
142
143#ifdef __HAVE_ATOMIC64_LOADSTORE
144volatile time_t time__second __cacheline_aligned = 1;
145volatile time_t time__uptime __cacheline_aligned = 1;
146#else
147static volatile struct {
148	uint32_t lo, hi;
149} time__uptime32 __cacheline_aligned = {
150	.lo = 1,
151}, time__second32 __cacheline_aligned = {
152	.lo = 1,
153};
154#endif
155
156static struct {
157	struct bintime bin;
158	volatile unsigned gen;	/* even when stable, odd when changing */
159} timebase __cacheline_aligned;
160
161static int timestepwarnings;
162
163kmutex_t timecounter_lock;
164static u_int timecounter_mods;
165static volatile int timecounter_removals = 1;
166static u_int timecounter_bad;
167
168#ifdef __HAVE_ATOMIC64_LOADSTORE
169
170static inline void
171setrealuptime(time_t second, time_t uptime)
172{
173
174	time_second_legacy = second;
175
176	atomic_store_relaxed(&time__second, second);
177	atomic_store_relaxed(&time__uptime, uptime);
178}
179
180#else
181
182static inline void
183setrealuptime(time_t second, time_t uptime)
184{
185	uint32_t seclo = second & 0xffffffff, sechi = second >> 32;
186	uint32_t uplo = uptime & 0xffffffff, uphi = uptime >> 32;
187
188	KDASSERT(mutex_owned(&timecounter_lock));
189
190	time_second_legacy = second;
191
192	/*
193	 * Fast path -- no wraparound, just updating the low bits, so
194	 * no need for seqlocked access.
195	 */
196	if (__predict_true(sechi == time__second32.hi) &&
197	    __predict_true(uphi == time__uptime32.hi)) {
198		atomic_store_relaxed(&time__second32.lo, seclo);
199		atomic_store_relaxed(&time__uptime32.lo, uplo);
200		return;
201	}
202
203	atomic_store_relaxed(&time__second32.hi, 0xffffffff);
204	atomic_store_relaxed(&time__uptime32.hi, 0xffffffff);
205	membar_producer();
206	atomic_store_relaxed(&time__second32.lo, seclo);
207	atomic_store_relaxed(&time__uptime32.lo, uplo);
208	membar_producer();
209	atomic_store_relaxed(&time__second32.hi, sechi);
210	atomic_store_relaxed(&time__uptime32.hi, uphi);
211}
212
213time_t
214getrealtime(void)
215{
216	uint32_t lo, hi;
217
218	do {
219		for (;;) {
220			hi = atomic_load_relaxed(&time__second32.hi);
221			if (__predict_true(hi != 0xffffffff))
222				break;
223			SPINLOCK_BACKOFF_HOOK;
224		}
225		membar_consumer();
226		lo = atomic_load_relaxed(&time__second32.lo);
227		membar_consumer();
228	} while (hi != atomic_load_relaxed(&time__second32.hi));
229
230	return ((time_t)hi << 32) | lo;
231}
232
233time_t
234getuptime(void)
235{
236	uint32_t lo, hi;
237
238	do {
239		for (;;) {
240			hi = atomic_load_relaxed(&time__uptime32.hi);
241			if (__predict_true(hi != 0xffffffff))
242				break;
243			SPINLOCK_BACKOFF_HOOK;
244		}
245		membar_consumer();
246		lo = atomic_load_relaxed(&time__uptime32.lo);
247		membar_consumer();
248	} while (hi != atomic_load_relaxed(&time__uptime32.hi));
249
250	return ((time_t)hi << 32) | lo;
251}
252
253time_t
254getboottime(void)
255{
256
257	return getrealtime() - getuptime();
258}
259
260uint32_t
261getuptime32(void)
262{
263
264	return atomic_load_relaxed(&time__uptime32.lo);
265}
266
267#endif	/* !defined(__HAVE_ATOMIC64_LOADSTORE) */
268
269/*
270 * sysctl helper routine for kern.timercounter.hardware
271 */
272static int
273sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
274{
275	struct sysctlnode node;
276	int error;
277	char newname[MAX_TCNAMELEN];
278	struct timecounter *newtc, *tc;
279
280	tc = timecounter;
281
282	strlcpy(newname, tc->tc_name, sizeof(newname));
283
284	node = *rnode;
285	node.sysctl_data = newname;
286	node.sysctl_size = sizeof(newname);
287
288	error = sysctl_lookup(SYSCTLFN_CALL(&node));
289
290	if (error ||
291	    newp == NULL ||
292	    strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
293		return error;
294
295	if (l != NULL && (error = kauth_authorize_system(l->l_cred,
296	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
297	    NULL, NULL)) != 0)
298		return error;
299
300	if (!cold)
301		mutex_spin_enter(&timecounter_lock);
302	error = EINVAL;
303	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
304		if (strcmp(newname, newtc->tc_name) != 0)
305			continue;
306		/* Warm up new timecounter. */
307		(void)newtc->tc_get_timecount(newtc);
308		(void)newtc->tc_get_timecount(newtc);
309		timecounter = newtc;
310		error = 0;
311		break;
312	}
313	if (!cold)
314		mutex_spin_exit(&timecounter_lock);
315	return error;
316}
317
318static int
319sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
320{
321	char buf[MAX_TCNAMELEN+48];
322	char *where;
323	const char *spc;
324	struct timecounter *tc;
325	size_t needed, left, slen;
326	int error, mods;
327
328	if (newp != NULL)
329		return EPERM;
330	if (namelen != 0)
331		return EINVAL;
332
333	mutex_spin_enter(&timecounter_lock);
334 retry:
335	spc = "";
336	error = 0;
337	needed = 0;
338	left = *oldlenp;
339	where = oldp;
340	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
341		if (where == NULL) {
342			needed += sizeof(buf);  /* be conservative */
343		} else {
344			slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
345					" Hz)", spc, tc->tc_name, tc->tc_quality,
346					tc->tc_frequency);
347			if (left < slen + 1)
348				break;
349		 	mods = timecounter_mods;
350			mutex_spin_exit(&timecounter_lock);
351			error = copyout(buf, where, slen + 1);
352			mutex_spin_enter(&timecounter_lock);
353			if (mods != timecounter_mods) {
354				goto retry;
355			}
356			spc = " ";
357			where += slen;
358			needed += slen;
359			left -= slen;
360		}
361	}
362	mutex_spin_exit(&timecounter_lock);
363
364	*oldlenp = needed;
365	return error;
366}
367
368SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
369{
370	const struct sysctlnode *node;
371
372	sysctl_createv(clog, 0, NULL, &node,
373		       CTLFLAG_PERMANENT,
374		       CTLTYPE_NODE, "timecounter",
375		       SYSCTL_DESCR("time counter information"),
376		       NULL, 0, NULL, 0,
377		       CTL_KERN, CTL_CREATE, CTL_EOL);
378
379	if (node != NULL) {
380		sysctl_createv(clog, 0, NULL, NULL,
381			       CTLFLAG_PERMANENT,
382			       CTLTYPE_STRING, "choice",
383			       SYSCTL_DESCR("available counters"),
384			       sysctl_kern_timecounter_choice, 0, NULL, 0,
385			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
386
387		sysctl_createv(clog, 0, NULL, NULL,
388			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
389			       CTLTYPE_STRING, "hardware",
390			       SYSCTL_DESCR("currently active time counter"),
391			       sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
392			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
393
394		sysctl_createv(clog, 0, NULL, NULL,
395			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
396			       CTLTYPE_INT, "timestepwarnings",
397			       SYSCTL_DESCR("log time steps"),
398			       NULL, 0, &timestepwarnings, 0,
399			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
400	}
401}
402
403#ifdef TC_COUNTERS
404#define	TC_STATS(name)							\
405static struct evcnt n##name =						\
406    EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);	\
407EVCNT_ATTACH_STATIC(n##name)
408TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
409TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
410TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
411TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
412TC_STATS(setclock);
413#define	TC_COUNT(var)	var.ev_count++
414#undef TC_STATS
415#else
416#define	TC_COUNT(var)	/* nothing */
417#endif	/* TC_COUNTERS */
418
419static void tc_windup(void);
420
421/*
422 * Return the difference between the timehands' counter value now and what
423 * was when we copied it to the timehands' offset_count.
424 */
425static inline u_int
426tc_delta(struct timehands *th)
427{
428	struct timecounter *tc;
429
430	tc = th->th_counter;
431	return (tc->tc_get_timecount(tc) -
432		 th->th_offset_count) & tc->tc_counter_mask;
433}
434
435/*
436 * Functions for reading the time.  We have to loop until we are sure that
437 * the timehands that we operated on was not updated under our feet.  See
438 * the comment in <sys/timevar.h> for a description of these 12 functions.
439 */
440
441void
442binuptime(struct bintime *bt)
443{
444	struct timehands *th;
445	lwp_t *l;
446	u_int lgen, gen;
447
448	TC_COUNT(nbinuptime);
449
450	/*
451	 * Provide exclusion against tc_detach().
452	 *
453	 * We record the number of timecounter removals before accessing
454	 * timecounter state.  Note that the LWP can be using multiple
455	 * "generations" at once, due to interrupts (interrupted while in
456	 * this function).  Hardware interrupts will borrow the interrupted
457	 * LWP's l_tcgen value for this purpose, and can themselves be
458	 * interrupted by higher priority interrupts.  In this case we need
459	 * to ensure that the oldest generation in use is recorded.
460	 *
461	 * splsched() is too expensive to use, so we take care to structure
462	 * this code in such a way that it is not required.  Likewise, we
463	 * do not disable preemption.
464	 *
465	 * Memory barriers are also too expensive to use for such a
466	 * performance critical function.  The good news is that we do not
467	 * need memory barriers for this type of exclusion, as the thread
468	 * updating timecounter_removals will issue a broadcast cross call
469	 * before inspecting our l_tcgen value (this elides memory ordering
470	 * issues).
471	 *
472	 * XXX If the author of the above comment knows how to make it
473	 * safe to avoid memory barriers around the access to
474	 * th->th_generation, I'm all ears.
475	 */
476	l = curlwp;
477	lgen = l->l_tcgen;
478	if (__predict_true(lgen == 0)) {
479		l->l_tcgen = timecounter_removals;
480	}
481	__insn_barrier();
482
483	do {
484		th = atomic_load_consume(&timehands);
485		gen = th->th_generation;
486		membar_consumer();
487		*bt = th->th_offset;
488		bintime_addx(bt, th->th_scale * tc_delta(th));
489		membar_consumer();
490	} while (gen == 0 || gen != th->th_generation);
491
492	__insn_barrier();
493	l->l_tcgen = lgen;
494}
495
496void
497nanouptime(struct timespec *tsp)
498{
499	struct bintime bt;
500
501	TC_COUNT(nnanouptime);
502	binuptime(&bt);
503	bintime2timespec(&bt, tsp);
504}
505
506void
507microuptime(struct timeval *tvp)
508{
509	struct bintime bt;
510
511	TC_COUNT(nmicrouptime);
512	binuptime(&bt);
513	bintime2timeval(&bt, tvp);
514}
515
516void
517bintime(struct bintime *bt)
518{
519	struct bintime boottime;
520
521	TC_COUNT(nbintime);
522	binuptime(bt);
523	getbinboottime(&boottime);
524	bintime_add(bt, &boottime);
525}
526
527void
528nanotime(struct timespec *tsp)
529{
530	struct bintime bt;
531
532	TC_COUNT(nnanotime);
533	bintime(&bt);
534	bintime2timespec(&bt, tsp);
535}
536
537void
538microtime(struct timeval *tvp)
539{
540	struct bintime bt;
541
542	TC_COUNT(nmicrotime);
543	bintime(&bt);
544	bintime2timeval(&bt, tvp);
545}
546
547void
548getbinuptime(struct bintime *bt)
549{
550	struct timehands *th;
551	u_int gen;
552
553	TC_COUNT(ngetbinuptime);
554	do {
555		th = atomic_load_consume(&timehands);
556		gen = th->th_generation;
557		membar_consumer();
558		*bt = th->th_offset;
559		membar_consumer();
560	} while (gen == 0 || gen != th->th_generation);
561}
562
563void
564getnanouptime(struct timespec *tsp)
565{
566	struct timehands *th;
567	u_int gen;
568
569	TC_COUNT(ngetnanouptime);
570	do {
571		th = atomic_load_consume(&timehands);
572		gen = th->th_generation;
573		membar_consumer();
574		bintime2timespec(&th->th_offset, tsp);
575		membar_consumer();
576	} while (gen == 0 || gen != th->th_generation);
577}
578
579void
580getmicrouptime(struct timeval *tvp)
581{
582	struct timehands *th;
583	u_int gen;
584
585	TC_COUNT(ngetmicrouptime);
586	do {
587		th = atomic_load_consume(&timehands);
588		gen = th->th_generation;
589		membar_consumer();
590		bintime2timeval(&th->th_offset, tvp);
591		membar_consumer();
592	} while (gen == 0 || gen != th->th_generation);
593}
594
595void
596getbintime(struct bintime *bt)
597{
598	struct timehands *th;
599	struct bintime boottime;
600	u_int gen;
601
602	TC_COUNT(ngetbintime);
603	do {
604		th = atomic_load_consume(&timehands);
605		gen = th->th_generation;
606		membar_consumer();
607		*bt = th->th_offset;
608		membar_consumer();
609	} while (gen == 0 || gen != th->th_generation);
610	getbinboottime(&boottime);
611	bintime_add(bt, &boottime);
612}
613
614static inline void
615dogetnanotime(struct timespec *tsp)
616{
617	struct timehands *th;
618	u_int gen;
619
620	TC_COUNT(ngetnanotime);
621	do {
622		th = atomic_load_consume(&timehands);
623		gen = th->th_generation;
624		membar_consumer();
625		*tsp = th->th_nanotime;
626		membar_consumer();
627	} while (gen == 0 || gen != th->th_generation);
628}
629
630void
631getnanotime(struct timespec *tsp)
632{
633
634	dogetnanotime(tsp);
635}
636
637void dtrace_getnanotime(struct timespec *tsp);
638
639void
640dtrace_getnanotime(struct timespec *tsp)
641{
642
643	dogetnanotime(tsp);
644}
645
646void
647getmicrotime(struct timeval *tvp)
648{
649	struct timehands *th;
650	u_int gen;
651
652	TC_COUNT(ngetmicrotime);
653	do {
654		th = atomic_load_consume(&timehands);
655		gen = th->th_generation;
656		membar_consumer();
657		*tvp = th->th_microtime;
658		membar_consumer();
659	} while (gen == 0 || gen != th->th_generation);
660}
661
662void
663getnanoboottime(struct timespec *tsp)
664{
665	struct bintime bt;
666
667	getbinboottime(&bt);
668	bintime2timespec(&bt, tsp);
669}
670
671void
672getmicroboottime(struct timeval *tvp)
673{
674	struct bintime bt;
675
676	getbinboottime(&bt);
677	bintime2timeval(&bt, tvp);
678}
679
680void
681getbinboottime(struct bintime *basep)
682{
683	struct bintime base;
684	unsigned gen;
685
686	do {
687		/* Spin until the timebase isn't changing.  */
688		while ((gen = atomic_load_relaxed(&timebase.gen)) & 1)
689			SPINLOCK_BACKOFF_HOOK;
690
691		/* Read out a snapshot of the timebase.  */
692		membar_consumer();
693		base = timebase.bin;
694		membar_consumer();
695
696		/* Restart if it changed while we were reading.  */
697	} while (gen != atomic_load_relaxed(&timebase.gen));
698
699	*basep = base;
700}
701
702/*
703 * Initialize a new timecounter and possibly use it.
704 */
705void
706tc_init(struct timecounter *tc)
707{
708	u_int u;
709
710	KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised",
711	    tc->tc_name);
712
713	u = tc->tc_frequency / tc->tc_counter_mask;
714	/* XXX: We need some margin here, 10% is a guess */
715	u *= 11;
716	u /= 10;
717	if (u > hz && tc->tc_quality >= 0) {
718		tc->tc_quality = -2000;
719		aprint_verbose(
720		    "timecounter: Timecounter \"%s\" frequency %ju Hz",
721			    tc->tc_name, (uintmax_t)tc->tc_frequency);
722		aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
723	} else if (tc->tc_quality >= 0 || bootverbose) {
724		aprint_verbose(
725		    "timecounter: Timecounter \"%s\" frequency %ju Hz "
726		    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
727		    tc->tc_quality);
728	}
729
730	mutex_spin_enter(&timecounter_lock);
731	tc->tc_next = timecounters;
732	timecounters = tc;
733	timecounter_mods++;
734	/*
735	 * Never automatically use a timecounter with negative quality.
736	 * Even though we run on the dummy counter, switching here may be
737	 * worse since this timecounter may not be monotonous.
738	 */
739	if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
740	    (tc->tc_quality == timecounter->tc_quality &&
741	    tc->tc_frequency > timecounter->tc_frequency))) {
742		(void)tc->tc_get_timecount(tc);
743		(void)tc->tc_get_timecount(tc);
744		timecounter = tc;
745		tc_windup();
746	}
747	mutex_spin_exit(&timecounter_lock);
748}
749
750/*
751 * Pick a new timecounter due to the existing counter going bad.
752 */
753static void
754tc_pick(void)
755{
756	struct timecounter *best, *tc;
757
758	KASSERT(mutex_owned(&timecounter_lock));
759
760	for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
761		if (tc->tc_quality > best->tc_quality)
762			best = tc;
763		else if (tc->tc_quality < best->tc_quality)
764			continue;
765		else if (tc->tc_frequency > best->tc_frequency)
766			best = tc;
767	}
768	(void)best->tc_get_timecount(best);
769	(void)best->tc_get_timecount(best);
770	timecounter = best;
771}
772
773/*
774 * A timecounter has gone bad, arrange to pick a new one at the next
775 * clock tick.
776 */
777void
778tc_gonebad(struct timecounter *tc)
779{
780
781	tc->tc_quality = -100;
782	membar_producer();
783	atomic_inc_uint(&timecounter_bad);
784}
785
786/*
787 * Stop using a timecounter and remove it from the timecounters list.
788 */
789int
790tc_detach(struct timecounter *target)
791{
792	struct timecounter *tc;
793	struct timecounter **tcp = NULL;
794	int removals;
795	lwp_t *l;
796
797	/* First, find the timecounter. */
798	mutex_spin_enter(&timecounter_lock);
799	for (tcp = &timecounters, tc = timecounters;
800	     tc != NULL;
801	     tcp = &tc->tc_next, tc = tc->tc_next) {
802		if (tc == target)
803			break;
804	}
805	if (tc == NULL) {
806		mutex_spin_exit(&timecounter_lock);
807		return ESRCH;
808	}
809
810	/* And now, remove it. */
811	*tcp = tc->tc_next;
812	if (timecounter == target) {
813		tc_pick();
814		tc_windup();
815	}
816	timecounter_mods++;
817	removals = timecounter_removals++;
818	mutex_spin_exit(&timecounter_lock);
819
820	/*
821	 * We now have to determine if any threads in the system are still
822	 * making use of this timecounter.
823	 *
824	 * We issue a broadcast cross call to elide memory ordering issues,
825	 * then scan all LWPs in the system looking at each's timecounter
826	 * generation number.  We need to see a value of zero (not actively
827	 * using a timecounter) or a value greater than our removal value.
828	 *
829	 * We may race with threads that read `timecounter_removals' and
830	 * and then get preempted before updating `l_tcgen'.  This is not
831	 * a problem, since it means that these threads have not yet started
832	 * accessing timecounter state.  All we do need is one clean
833	 * snapshot of the system where every thread appears not to be using
834	 * old timecounter state.
835	 */
836	for (;;) {
837		xc_barrier(0);
838
839		mutex_enter(&proc_lock);
840		LIST_FOREACH(l, &alllwp, l_list) {
841			if (l->l_tcgen == 0 || l->l_tcgen > removals) {
842				/*
843				 * Not using timecounter or old timecounter
844				 * state at time of our xcall or later.
845				 */
846				continue;
847			}
848			break;
849		}
850		mutex_exit(&proc_lock);
851
852		/*
853		 * If the timecounter is still in use, wait at least 10ms
854		 * before retrying.
855		 */
856		if (l == NULL) {
857			break;
858		}
859		(void)kpause("tcdetach", false, mstohz(10), NULL);
860	}
861
862	tc->tc_next = NULL;
863	return 0;
864}
865
866/* Report the frequency of the current timecounter. */
867uint64_t
868tc_getfrequency(void)
869{
870
871	return atomic_load_consume(&timehands)->th_counter->tc_frequency;
872}
873
874/*
875 * Step our concept of UTC.  This is done by modifying our estimate of
876 * when we booted.
877 */
878void
879tc_setclock(const struct timespec *ts)
880{
881	struct timespec ts2;
882	struct bintime bt, bt2;
883
884	mutex_spin_enter(&timecounter_lock);
885	TC_COUNT(nsetclock);
886	binuptime(&bt2);
887	timespec2bintime(ts, &bt);
888	bintime_sub(&bt, &bt2);
889	bintime_add(&bt2, &timebase.bin);
890	timebase.gen |= 1;	/* change in progress */
891	membar_producer();
892	timebase.bin = bt;
893	membar_producer();
894	timebase.gen++;		/* commit change */
895	tc_windup();
896	mutex_spin_exit(&timecounter_lock);
897
898	if (timestepwarnings) {
899		bintime2timespec(&bt2, &ts2);
900		log(LOG_INFO,
901		    "Time stepped from %lld.%09ld to %lld.%09ld\n",
902		    (long long)ts2.tv_sec, ts2.tv_nsec,
903		    (long long)ts->tv_sec, ts->tv_nsec);
904	}
905}
906
907/*
908 * Initialize the next struct timehands in the ring and make
909 * it the active timehands.  Along the way we might switch to a different
910 * timecounter and/or do seconds processing in NTP.  Slightly magic.
911 */
912static void
913tc_windup(void)
914{
915	struct bintime bt;
916	struct timehands *th, *tho;
917	uint64_t scale;
918	u_int delta, ncount, ogen;
919	int i, s_update;
920	time_t t;
921
922	KASSERT(mutex_owned(&timecounter_lock));
923
924	s_update = 0;
925
926	/*
927	 * Make the next timehands a copy of the current one, but do not
928	 * overwrite the generation or next pointer.  While we update
929	 * the contents, the generation must be zero.  Ensure global
930	 * visibility of the generation before proceeding.
931	 */
932	tho = timehands;
933	th = tho->th_next;
934	ogen = th->th_generation;
935	th->th_generation = 0;
936	membar_producer();
937	bcopy(tho, th, offsetof(struct timehands, th_generation));
938
939	/*
940	 * Capture a timecounter delta on the current timecounter and if
941	 * changing timecounters, a counter value from the new timecounter.
942	 * Update the offset fields accordingly.
943	 */
944	delta = tc_delta(th);
945	if (th->th_counter != timecounter)
946		ncount = timecounter->tc_get_timecount(timecounter);
947	else
948		ncount = 0;
949	th->th_offset_count += delta;
950	bintime_addx(&th->th_offset, th->th_scale * delta);
951
952	/*
953	 * Hardware latching timecounters may not generate interrupts on
954	 * PPS events, so instead we poll them.  There is a finite risk that
955	 * the hardware might capture a count which is later than the one we
956	 * got above, and therefore possibly in the next NTP second which might
957	 * have a different rate than the current NTP second.  It doesn't
958	 * matter in practice.
959	 */
960	if (tho->th_counter->tc_poll_pps)
961		tho->th_counter->tc_poll_pps(tho->th_counter);
962
963	/*
964	 * Deal with NTP second processing.  The for loop normally
965	 * iterates at most once, but in extreme situations it might
966	 * keep NTP sane if timeouts are not run for several seconds.
967	 * At boot, the time step can be large when the TOD hardware
968	 * has been read, so on really large steps, we call
969	 * ntp_update_second only twice.  We need to call it twice in
970	 * case we missed a leap second.
971	 * If NTP is not compiled in ntp_update_second still calculates
972	 * the adjustment resulting from adjtime() calls.
973	 */
974	bt = th->th_offset;
975	bintime_add(&bt, &timebase.bin);
976	i = bt.sec - tho->th_microtime.tv_sec;
977	if (i > LARGE_STEP)
978		i = 2;
979	for (; i > 0; i--) {
980		t = bt.sec;
981		ntp_update_second(&th->th_adjustment, &bt.sec);
982		s_update = 1;
983		if (bt.sec != t) {
984			timebase.gen |= 1;	/* change in progress */
985			membar_producer();
986			timebase.bin.sec += bt.sec - t;
987			membar_producer();
988			timebase.gen++;		/* commit change */
989		}
990	}
991
992	/* Update the UTC timestamps used by the get*() functions. */
993	/* XXX shouldn't do this here.  Should force non-`get' versions. */
994	bintime2timeval(&bt, &th->th_microtime);
995	bintime2timespec(&bt, &th->th_nanotime);
996	/* Now is a good time to change timecounters. */
997	if (th->th_counter != timecounter) {
998		th->th_counter = timecounter;
999		th->th_offset_count = ncount;
1000		s_update = 1;
1001	}
1002
1003	/*-
1004	 * Recalculate the scaling factor.  We want the number of 1/2^64
1005	 * fractions of a second per period of the hardware counter, taking
1006	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
1007	 * processing provides us with.
1008	 *
1009	 * The th_adjustment is nanoseconds per second with 32 bit binary
1010	 * fraction and we want 64 bit binary fraction of second:
1011	 *
1012	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
1013	 *
1014	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
1015	 * we can only multiply by about 850 without overflowing, but that
1016	 * leaves suitably precise fractions for multiply before divide.
1017	 *
1018	 * Divide before multiply with a fraction of 2199/512 results in a
1019	 * systematic undercompensation of 10PPM of th_adjustment.  On a
1020	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
1021 	 *
1022	 * We happily sacrifice the lowest of the 64 bits of our result
1023	 * to the goddess of code clarity.
1024	 *
1025	 */
1026	if (s_update) {
1027		scale = (uint64_t)1 << 63;
1028		scale += (th->th_adjustment / 1024) * 2199;
1029		scale /= th->th_counter->tc_frequency;
1030		th->th_scale = scale * 2;
1031	}
1032	/*
1033	 * Now that the struct timehands is again consistent, set the new
1034	 * generation number, making sure to not make it zero.  Ensure
1035	 * changes are globally visible before changing.
1036	 */
1037	if (++ogen == 0)
1038		ogen = 1;
1039	membar_producer();
1040	th->th_generation = ogen;
1041
1042	/*
1043	 * Go live with the new struct timehands.  Ensure changes are
1044	 * globally visible before changing.
1045	 */
1046	setrealuptime(th->th_microtime.tv_sec, th->th_offset.sec);
1047	atomic_store_release(&timehands, th);
1048
1049	/*
1050	 * Force users of the old timehand to move on.  This is
1051	 * necessary for MP systems; we need to ensure that the
1052	 * consumers will move away from the old timehand before
1053	 * we begin updating it again when we eventually wrap
1054	 * around.
1055	 */
1056	if (++tho->th_generation == 0)
1057		tho->th_generation = 1;
1058}
1059
1060/*
1061 * RFC 2783 PPS-API implementation.
1062 */
1063
1064int
1065pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
1066{
1067	pps_params_t *app;
1068	pps_info_t *pipi;
1069#ifdef PPS_SYNC
1070	int *epi;
1071#endif
1072
1073	KASSERT(mutex_owned(&timecounter_lock));
1074
1075	KASSERT(pps != NULL);
1076
1077	switch (cmd) {
1078	case PPS_IOC_CREATE:
1079		return 0;
1080	case PPS_IOC_DESTROY:
1081		return 0;
1082	case PPS_IOC_SETPARAMS:
1083		app = (pps_params_t *)data;
1084		if (app->mode & ~pps->ppscap)
1085			return EINVAL;
1086		pps->ppsparam = *app;
1087		return 0;
1088	case PPS_IOC_GETPARAMS:
1089		app = (pps_params_t *)data;
1090		*app = pps->ppsparam;
1091		app->api_version = PPS_API_VERS_1;
1092		return 0;
1093	case PPS_IOC_GETCAP:
1094		*(int*)data = pps->ppscap;
1095		return 0;
1096	case PPS_IOC_FETCH:
1097		pipi = (pps_info_t *)data;
1098		pps->ppsinfo.current_mode = pps->ppsparam.mode;
1099		*pipi = pps->ppsinfo;
1100		return 0;
1101	case PPS_IOC_KCBIND:
1102#ifdef PPS_SYNC
1103		epi = (int *)data;
1104		/* XXX Only root should be able to do this */
1105		if (*epi & ~pps->ppscap)
1106			return EINVAL;
1107		pps->kcmode = *epi;
1108		return 0;
1109#else
1110		return EOPNOTSUPP;
1111#endif
1112	default:
1113		return EPASSTHROUGH;
1114	}
1115}
1116
1117void
1118pps_init(struct pps_state *pps)
1119{
1120
1121	KASSERT(mutex_owned(&timecounter_lock));
1122
1123	pps->ppscap |= PPS_TSFMT_TSPEC;
1124	if (pps->ppscap & PPS_CAPTUREASSERT)
1125		pps->ppscap |= PPS_OFFSETASSERT;
1126	if (pps->ppscap & PPS_CAPTURECLEAR)
1127		pps->ppscap |= PPS_OFFSETCLEAR;
1128}
1129
1130/*
1131 * capture a timestamp in the pps structure
1132 */
1133void
1134pps_capture(struct pps_state *pps)
1135{
1136	struct timehands *th;
1137
1138	KASSERT(mutex_owned(&timecounter_lock));
1139	KASSERT(pps != NULL);
1140
1141	th = timehands;
1142	pps->capgen = th->th_generation;
1143	pps->capth = th;
1144	pps->capcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1145	if (pps->capgen != th->th_generation)
1146		pps->capgen = 0;
1147}
1148
1149#ifdef PPS_DEBUG
1150int ppsdebug = 0;
1151#endif
1152
1153/*
1154 * process a pps_capture()ed event
1155 */
1156void
1157pps_event(struct pps_state *pps, int event)
1158{
1159	pps_ref_event(pps, event, NULL, PPS_REFEVNT_PPS|PPS_REFEVNT_CAPTURE);
1160}
1161
1162/*
1163 * extended pps api /  kernel pll/fll entry point
1164 *
1165 * feed reference time stamps to PPS engine
1166 *
1167 * will simulate a PPS event and feed
1168 * the NTP PLL/FLL if requested.
1169 *
1170 * the ref time stamps should be roughly once
1171 * a second but do not need to be exactly in phase
1172 * with the UTC second but should be close to it.
1173 * this relaxation of requirements allows callout
1174 * driven timestamping mechanisms to feed to pps
1175 * capture/kernel pll logic.
1176 *
1177 * calling pattern is:
1178 *  pps_capture() (for PPS_REFEVNT_{CAPTURE|CAPCUR})
1179 *  read timestamp from reference source
1180 *  pps_ref_event()
1181 *
1182 * supported refmodes:
1183 *  PPS_REFEVNT_CAPTURE
1184 *    use system timestamp of pps_capture()
1185 *  PPS_REFEVNT_CURRENT
1186 *    use system timestamp of this call
1187 *  PPS_REFEVNT_CAPCUR
1188 *    use average of read capture and current system time stamp
1189 *  PPS_REFEVNT_PPS
1190 *    assume timestamp on second mark - ref_ts is ignored
1191 *
1192 */
1193
1194void
1195pps_ref_event(struct pps_state *pps,
1196	      int event,
1197	      struct bintime *ref_ts,
1198	      int refmode
1199	)
1200{
1201	struct bintime bt;	/* current time */
1202	struct bintime btd;	/* time difference */
1203	struct bintime bt_ref;	/* reference time */
1204	struct timespec ts, *tsp, *osp;
1205	struct timehands *th;
1206	uint64_t tcount, acount, dcount, *pcount;
1207	int foff, gen;
1208#ifdef PPS_SYNC
1209	int fhard;
1210#endif
1211	pps_seq_t *pseq;
1212
1213	KASSERT(mutex_owned(&timecounter_lock));
1214
1215	KASSERT(pps != NULL);
1216
1217        /* pick up current time stamp if needed */
1218	if (refmode & (PPS_REFEVNT_CURRENT|PPS_REFEVNT_CAPCUR)) {
1219		/* pick up current time stamp */
1220		th = timehands;
1221		gen = th->th_generation;
1222		tcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1223		if (gen != th->th_generation)
1224			gen = 0;
1225
1226		/* If the timecounter was wound up underneath us, bail out. */
1227		if (pps->capgen == 0 ||
1228		    pps->capgen != pps->capth->th_generation ||
1229		    gen == 0 ||
1230		    gen != pps->capgen) {
1231#ifdef PPS_DEBUG
1232			if (ppsdebug & 0x1) {
1233				log(LOG_DEBUG,
1234				    "pps_ref_event(pps=%p, event=%d, ...): DROP (wind-up)\n",
1235				    pps, event);
1236			}
1237#endif
1238			return;
1239		}
1240	} else {
1241		tcount = 0;	/* keep GCC happy */
1242	}
1243
1244#ifdef PPS_DEBUG
1245	if (ppsdebug & 0x1) {
1246		struct timespec tmsp;
1247
1248		if (ref_ts == NULL) {
1249			tmsp.tv_sec = 0;
1250			tmsp.tv_nsec = 0;
1251		} else {
1252			bintime2timespec(ref_ts, &tmsp);
1253		}
1254
1255		log(LOG_DEBUG,
1256		    "pps_ref_event(pps=%p, event=%d, ref_ts=%"PRIi64
1257		    ".%09"PRIi32", refmode=0x%1x)\n",
1258		    pps, event, tmsp.tv_sec, (int32_t)tmsp.tv_nsec, refmode);
1259	}
1260#endif
1261
1262	/* setup correct event references */
1263	if (event == PPS_CAPTUREASSERT) {
1264		tsp = &pps->ppsinfo.assert_timestamp;
1265		osp = &pps->ppsparam.assert_offset;
1266		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1267#ifdef PPS_SYNC
1268		fhard = pps->kcmode & PPS_CAPTUREASSERT;
1269#endif
1270		pcount = &pps->ppscount[0];
1271		pseq = &pps->ppsinfo.assert_sequence;
1272	} else {
1273		tsp = &pps->ppsinfo.clear_timestamp;
1274		osp = &pps->ppsparam.clear_offset;
1275		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1276#ifdef PPS_SYNC
1277		fhard = pps->kcmode & PPS_CAPTURECLEAR;
1278#endif
1279		pcount = &pps->ppscount[1];
1280		pseq = &pps->ppsinfo.clear_sequence;
1281	}
1282
1283	/* determine system time stamp according to refmode */
1284	dcount = 0;		/* keep GCC happy */
1285	switch (refmode & PPS_REFEVNT_RMASK) {
1286	case PPS_REFEVNT_CAPTURE:
1287		acount = pps->capcount;	/* use capture timestamp */
1288		break;
1289
1290	case PPS_REFEVNT_CURRENT:
1291		acount = tcount; /* use current timestamp */
1292		break;
1293
1294	case PPS_REFEVNT_CAPCUR:
1295		/*
1296		 * calculate counter value between pps_capture() and
1297		 * pps_ref_event()
1298		 */
1299		dcount = tcount - pps->capcount;
1300		acount = (dcount / 2) + pps->capcount;
1301		break;
1302
1303	default:		/* ignore call error silently */
1304		return;
1305	}
1306
1307	/*
1308	 * If the timecounter changed, we cannot compare the count values, so
1309	 * we have to drop the rest of the PPS-stuff until the next event.
1310	 */
1311	if (pps->ppstc != pps->capth->th_counter) {
1312		pps->ppstc = pps->capth->th_counter;
1313		pps->capcount = acount;
1314		*pcount = acount;
1315		pps->ppscount[2] = acount;
1316#ifdef PPS_DEBUG
1317		if (ppsdebug & 0x1) {
1318			log(LOG_DEBUG,
1319			    "pps_ref_event(pps=%p, event=%d, ...): DROP (time-counter change)\n",
1320			    pps, event);
1321		}
1322#endif
1323		return;
1324	}
1325
1326	pps->capcount = acount;
1327
1328	/* Convert the count to a bintime. */
1329	bt = pps->capth->th_offset;
1330	bintime_addx(&bt, pps->capth->th_scale * (acount - pps->capth->th_offset_count));
1331	bintime_add(&bt, &timebase.bin);
1332
1333	if ((refmode & PPS_REFEVNT_PPS) == 0) {
1334		/* determine difference to reference time stamp */
1335		bt_ref = *ref_ts;
1336
1337		btd = bt;
1338		bintime_sub(&btd, &bt_ref);
1339
1340		/*
1341		 * simulate a PPS timestamp by dropping the fraction
1342		 * and applying the offset
1343		 */
1344		if (bt.frac >= (uint64_t)1<<63)	/* skip to nearest second */
1345			bt.sec++;
1346		bt.frac = 0;
1347		bintime_add(&bt, &btd);
1348	} else {
1349		/*
1350		 * create ref_ts from current time -
1351		 * we are supposed to be called on
1352		 * the second mark
1353		 */
1354		bt_ref = bt;
1355		if (bt_ref.frac >= (uint64_t)1<<63)	/* skip to nearest second */
1356			bt_ref.sec++;
1357		bt_ref.frac = 0;
1358	}
1359
1360	/* convert bintime to timestamp */
1361	bintime2timespec(&bt, &ts);
1362
1363	/* If the timecounter was wound up underneath us, bail out. */
1364	if (pps->capgen != pps->capth->th_generation)
1365		return;
1366
1367	/* store time stamp */
1368	*pcount = pps->capcount;
1369	(*pseq)++;
1370	*tsp = ts;
1371
1372	/* add offset correction */
1373	if (foff) {
1374		timespecadd(tsp, osp, tsp);
1375		if (tsp->tv_nsec < 0) {
1376			tsp->tv_nsec += 1000000000;
1377			tsp->tv_sec -= 1;
1378		}
1379	}
1380
1381#ifdef PPS_DEBUG
1382	if (ppsdebug & 0x2) {
1383		struct timespec ts2;
1384		struct timespec ts3;
1385
1386		bintime2timespec(&bt_ref, &ts2);
1387
1388		bt.sec = 0;
1389		bt.frac = 0;
1390
1391		if (refmode & PPS_REFEVNT_CAPCUR) {
1392			    bintime_addx(&bt, pps->capth->th_scale * dcount);
1393		}
1394		bintime2timespec(&bt, &ts3);
1395
1396		log(LOG_DEBUG, "ref_ts=%"PRIi64".%09"PRIi32
1397		    ", ts=%"PRIi64".%09"PRIi32", read latency=%"PRIi64" ns\n",
1398		    ts2.tv_sec, (int32_t)ts2.tv_nsec,
1399		    tsp->tv_sec, (int32_t)tsp->tv_nsec,
1400		    timespec2ns(&ts3));
1401	}
1402#endif
1403
1404#ifdef PPS_SYNC
1405	if (fhard) {
1406		uint64_t scale;
1407		uint64_t div;
1408
1409		/*
1410		 * Feed the NTP PLL/FLL.
1411		 * The FLL wants to know how many (hardware) nanoseconds
1412		 * elapsed since the previous event (mod 1 second) thus
1413		 * we are actually looking at the frequency difference scaled
1414		 * in nsec.
1415		 * As the counter time stamps are not truly at 1Hz
1416		 * we need to scale the count by the elapsed
1417		 * reference time.
1418		 * valid sampling interval: [0.5..2[ sec
1419		 */
1420
1421		/* calculate elapsed raw count */
1422		tcount = pps->capcount - pps->ppscount[2];
1423		pps->ppscount[2] = pps->capcount;
1424		tcount &= pps->capth->th_counter->tc_counter_mask;
1425
1426		/* calculate elapsed ref time */
1427		btd = bt_ref;
1428		bintime_sub(&btd, &pps->ref_time);
1429		pps->ref_time = bt_ref;
1430
1431		/* check that we stay below 2 sec */
1432		if (btd.sec < 0 || btd.sec > 1)
1433			return;
1434
1435		/* we want at least 0.5 sec between samples */
1436		if (btd.sec == 0 && btd.frac < (uint64_t)1<<63)
1437			return;
1438
1439		/*
1440		 * calculate cycles per period by multiplying
1441		 * the frequency with the elapsed period
1442		 * we pick a fraction of 30 bits
1443		 * ~1ns resolution for elapsed time
1444		 */
1445		div   = (uint64_t)btd.sec << 30;
1446		div  |= (btd.frac >> 34) & (((uint64_t)1 << 30) - 1);
1447		div  *= pps->capth->th_counter->tc_frequency;
1448		div >>= 30;
1449
1450		if (div == 0)	/* safeguard */
1451			return;
1452
1453		scale = (uint64_t)1 << 63;
1454		scale /= div;
1455		scale *= 2;
1456
1457		bt.sec = 0;
1458		bt.frac = 0;
1459		bintime_addx(&bt, scale * tcount);
1460		bintime2timespec(&bt, &ts);
1461
1462#ifdef PPS_DEBUG
1463		if (ppsdebug & 0x4) {
1464			struct timespec ts2;
1465			int64_t df;
1466
1467			bintime2timespec(&bt_ref, &ts2);
1468			df = timespec2ns(&ts);
1469			if (df > 500000000)
1470				df -= 1000000000;
1471			log(LOG_DEBUG, "hardpps: ref_ts=%"PRIi64
1472			    ".%09"PRIi32", ts=%"PRIi64".%09"PRIi32
1473			    ", freqdiff=%"PRIi64" ns/s\n",
1474			    ts2.tv_sec, (int32_t)ts2.tv_nsec,
1475			    tsp->tv_sec, (int32_t)tsp->tv_nsec,
1476			    df);
1477		}
1478#endif
1479
1480		hardpps(tsp, timespec2ns(&ts));
1481	}
1482#endif
1483}
1484
1485/*
1486 * Timecounters need to be updated every so often to prevent the hardware
1487 * counter from overflowing.  Updating also recalculates the cached values
1488 * used by the get*() family of functions, so their precision depends on
1489 * the update frequency.
1490 */
1491
1492static int tc_tick;
1493
1494void
1495tc_ticktock(void)
1496{
1497	static int count;
1498
1499	if (++count < tc_tick)
1500		return;
1501	count = 0;
1502	mutex_spin_enter(&timecounter_lock);
1503	if (__predict_false(timecounter_bad != 0)) {
1504		/* An existing timecounter has gone bad, pick a new one. */
1505		(void)atomic_swap_uint(&timecounter_bad, 0);
1506		if (timecounter->tc_quality < 0) {
1507			tc_pick();
1508		}
1509	}
1510	tc_windup();
1511	mutex_spin_exit(&timecounter_lock);
1512}
1513
1514void
1515inittimecounter(void)
1516{
1517	u_int p;
1518
1519	mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);
1520
1521	/*
1522	 * Set the initial timeout to
1523	 * max(1, <approx. number of hardclock ticks in a millisecond>).
1524	 * People should probably not use the sysctl to set the timeout
1525	 * to smaller than its initial value, since that value is the
1526	 * smallest reasonable one.  If they want better timestamps they
1527	 * should use the non-"get"* functions.
1528	 */
1529	if (hz > 1000)
1530		tc_tick = (hz + 500) / 1000;
1531	else
1532		tc_tick = 1;
1533	p = (tc_tick * 1000000) / hz;
1534	aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
1535	    p / 1000, p % 1000);
1536
1537	/* warm up new timecounter (again) and get rolling. */
1538	(void)timecounter->tc_get_timecount(timecounter);
1539	(void)timecounter->tc_get_timecount(timecounter);
1540}
1541