kern_tc.c revision 41305
1/*-
2 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
3 * Copyright (c) 1982, 1986, 1991, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 * (c) UNIX System Laboratories, Inc.
6 * All or some portions of this file are derived from material licensed
7 * to the University of California by American Telephone and Telegraph
8 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9 * the permission of UNIX System Laboratories, Inc.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
40 * $Id: kern_clock.c,v 1.83 1998/10/26 06:13:18 bde Exp $
41 */
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/dkstat.h>
46#include <sys/callout.h>
47#include <sys/kernel.h>
48#include <sys/proc.h>
49#include <sys/malloc.h>
50#include <sys/resourcevar.h>
51#include <sys/signalvar.h>
52#include <sys/timex.h>
53#include <vm/vm.h>
54#include <sys/lock.h>
55#include <vm/pmap.h>
56#include <vm/vm_map.h>
57#include <sys/sysctl.h>
58
59#include <machine/cpu.h>
60#include <machine/limits.h>
61
62#ifdef GPROF
63#include <sys/gmon.h>
64#endif
65
66#if defined(SMP) && defined(BETTER_CLOCK)
67#include <machine/smp.h>
68#endif
69
70/*
71 * Number of timecounters used to implement stable storage
72 */
73#ifndef NTIMECOUNTER
74#define NTIMECOUNTER	2
75#endif
76
77static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter",
78	"Timecounter stable storage");
79
80static void initclocks __P((void *dummy));
81SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
82
83static void tco_forward __P((void));
84static void tco_setscales __P((struct timecounter *tc));
85static __inline unsigned tco_delta __P((struct timecounter *tc));
86
87/* Some of these don't belong here, but it's easiest to concentrate them. */
88#if defined(SMP) && defined(BETTER_CLOCK)
89long cp_time[CPUSTATES];
90#else
91static long cp_time[CPUSTATES];
92#endif
93
94long tk_cancc;
95long tk_nin;
96long tk_nout;
97long tk_rawcc;
98
99time_t time_second;
100
101/*
102 * Implement a dummy timecounter which we can use until we get a real one
103 * in the air.  This allows the console and other early stuff to use
104 * timeservices.
105 */
106
107static unsigned
108dummy_get_timecount(struct timecounter *tc)
109{
110	static unsigned now;
111	return (++now);
112}
113
114static struct timecounter dummy_timecounter = {
115	dummy_get_timecount,
116	0,
117	~0u,
118	1000000,
119	"dummy"
120};
121
122struct timecounter *timecounter = &dummy_timecounter;
123
124/*
125 * Clock handling routines.
126 *
127 * This code is written to operate with two timers that run independently of
128 * each other.
129 *
130 * The main timer, running hz times per second, is used to trigger interval
131 * timers, timeouts and rescheduling as needed.
132 *
133 * The second timer handles kernel and user profiling,
134 * and does resource use estimation.  If the second timer is programmable,
135 * it is randomized to avoid aliasing between the two clocks.  For example,
136 * the randomization prevents an adversary from always giving up the cpu
137 * just before its quantum expires.  Otherwise, it would never accumulate
138 * cpu ticks.  The mean frequency of the second timer is stathz.
139 *
140 * If no second timer exists, stathz will be zero; in this case we drive
141 * profiling and statistics off the main clock.  This WILL NOT be accurate;
142 * do not do it unless absolutely necessary.
143 *
144 * The statistics clock may (or may not) be run at a higher rate while
145 * profiling.  This profile clock runs at profhz.  We require that profhz
146 * be an integral multiple of stathz.
147 *
148 * If the statistics clock is running fast, it must be divided by the ratio
149 * profhz/stathz for statistics.  (For profiling, every tick counts.)
150 *
151 * Time-of-day is maintained using a "timecounter", which may or may
152 * not be related to the hardware generating the above mentioned
153 * interrupts.
154 */
155
156int	stathz;
157int	profhz;
158static int profprocs;
159int	ticks;
160static int psdiv, pscnt;		/* prof => stat divider */
161int	psratio;			/* ratio: prof / stat */
162
163/*
164 * Initialize clock frequencies and start both clocks running.
165 */
166/* ARGSUSED*/
167static void
168initclocks(dummy)
169	void *dummy;
170{
171	register int i;
172
173	/*
174	 * Set divisors to 1 (normal case) and let the machine-specific
175	 * code do its bit.
176	 */
177	psdiv = pscnt = 1;
178	cpu_initclocks();
179
180	/*
181	 * Compute profhz/stathz, and fix profhz if needed.
182	 */
183	i = stathz ? stathz : hz;
184	if (profhz == 0)
185		profhz = i;
186	psratio = profhz / i;
187}
188
189/*
190 * The real-time timer, interrupting hz times per second.
191 */
192void
193hardclock(frame)
194	register struct clockframe *frame;
195{
196	register struct proc *p;
197
198	p = curproc;
199	if (p) {
200		register struct pstats *pstats;
201
202		/*
203		 * Run current process's virtual and profile time, as needed.
204		 */
205		pstats = p->p_stats;
206		if (CLKF_USERMODE(frame) &&
207		    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
208		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
209			psignal(p, SIGVTALRM);
210		if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
211		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
212			psignal(p, SIGPROF);
213	}
214
215#if defined(SMP) && defined(BETTER_CLOCK)
216	forward_hardclock(pscnt);
217#endif
218
219	/*
220	 * If no separate statistics clock is available, run it from here.
221	 */
222	if (stathz == 0)
223		statclock(frame);
224
225	tco_forward();
226	ticks++;
227
228	/*
229	 * Process callouts at a very low cpu priority, so we don't keep the
230	 * relatively high clock interrupt priority any longer than necessary.
231	 */
232	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
233		if (CLKF_BASEPRI(frame)) {
234			/*
235			 * Save the overhead of a software interrupt;
236			 * it will happen as soon as we return, so do it now.
237			 */
238			(void)splsoftclock();
239			softclock();
240		} else
241			setsoftclock();
242	} else if (softticks + 1 == ticks)
243		++softticks;
244}
245
246/*
247 * Compute number of ticks in the specified amount of time.
248 */
249int
250tvtohz(tv)
251	struct timeval *tv;
252{
253	register unsigned long ticks;
254	register long sec, usec;
255
256	/*
257	 * If the number of usecs in the whole seconds part of the time
258	 * difference fits in a long, then the total number of usecs will
259	 * fit in an unsigned long.  Compute the total and convert it to
260	 * ticks, rounding up and adding 1 to allow for the current tick
261	 * to expire.  Rounding also depends on unsigned long arithmetic
262	 * to avoid overflow.
263	 *
264	 * Otherwise, if the number of ticks in the whole seconds part of
265	 * the time difference fits in a long, then convert the parts to
266	 * ticks separately and add, using similar rounding methods and
267	 * overflow avoidance.  This method would work in the previous
268	 * case but it is slightly slower and assumes that hz is integral.
269	 *
270	 * Otherwise, round the time difference down to the maximum
271	 * representable value.
272	 *
273	 * If ints have 32 bits, then the maximum value for any timeout in
274	 * 10ms ticks is 248 days.
275	 */
276	sec = tv->tv_sec;
277	usec = tv->tv_usec;
278	if (usec < 0) {
279		sec--;
280		usec += 1000000;
281	}
282	if (sec < 0) {
283#ifdef DIAGNOSTIC
284		if (usec > 0) {
285			sec++;
286			usec -= 1000000;
287		}
288		printf("tvotohz: negative time difference %ld sec %ld usec\n",
289		       sec, usec);
290#endif
291		ticks = 1;
292	} else if (sec <= LONG_MAX / 1000000)
293		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
294			/ tick + 1;
295	else if (sec <= LONG_MAX / hz)
296		ticks = sec * hz
297			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
298	else
299		ticks = LONG_MAX;
300	if (ticks > INT_MAX)
301		ticks = INT_MAX;
302	return ((int)ticks);
303}
304
305/*
306 * Start profiling on a process.
307 *
308 * Kernel profiling passes proc0 which never exits and hence
309 * keeps the profile clock running constantly.
310 */
311void
312startprofclock(p)
313	register struct proc *p;
314{
315	int s;
316
317	if ((p->p_flag & P_PROFIL) == 0) {
318		p->p_flag |= P_PROFIL;
319		if (++profprocs == 1 && stathz != 0) {
320			s = splstatclock();
321			psdiv = pscnt = psratio;
322			setstatclockrate(profhz);
323			splx(s);
324		}
325	}
326}
327
328/*
329 * Stop profiling on a process.
330 */
331void
332stopprofclock(p)
333	register struct proc *p;
334{
335	int s;
336
337	if (p->p_flag & P_PROFIL) {
338		p->p_flag &= ~P_PROFIL;
339		if (--profprocs == 0 && stathz != 0) {
340			s = splstatclock();
341			psdiv = pscnt = 1;
342			setstatclockrate(stathz);
343			splx(s);
344		}
345	}
346}
347
348/*
349 * Statistics clock.  Grab profile sample, and if divider reaches 0,
350 * do process and kernel statistics.
351 */
352void
353statclock(frame)
354	register struct clockframe *frame;
355{
356#ifdef GPROF
357	register struct gmonparam *g;
358	int i;
359#endif
360	register struct proc *p;
361	struct pstats *pstats;
362	long rss;
363	struct rusage *ru;
364	struct vmspace *vm;
365
366	if (curproc != NULL && CLKF_USERMODE(frame)) {
367		p = curproc;
368		if (p->p_flag & P_PROFIL)
369			addupc_intr(p, CLKF_PC(frame), 1);
370#if defined(SMP) && defined(BETTER_CLOCK)
371		if (stathz != 0)
372			forward_statclock(pscnt);
373#endif
374		if (--pscnt > 0)
375			return;
376		/*
377		 * Came from user mode; CPU was in user state.
378		 * If this process is being profiled record the tick.
379		 */
380		p->p_uticks++;
381		if (p->p_nice > NZERO)
382			cp_time[CP_NICE]++;
383		else
384			cp_time[CP_USER]++;
385	} else {
386#ifdef GPROF
387		/*
388		 * Kernel statistics are just like addupc_intr, only easier.
389		 */
390		g = &_gmonparam;
391		if (g->state == GMON_PROF_ON) {
392			i = CLKF_PC(frame) - g->lowpc;
393			if (i < g->textsize) {
394				i /= HISTFRACTION * sizeof(*g->kcount);
395				g->kcount[i]++;
396			}
397		}
398#endif
399#if defined(SMP) && defined(BETTER_CLOCK)
400		if (stathz != 0)
401			forward_statclock(pscnt);
402#endif
403		if (--pscnt > 0)
404			return;
405		/*
406		 * Came from kernel mode, so we were:
407		 * - handling an interrupt,
408		 * - doing syscall or trap work on behalf of the current
409		 *   user process, or
410		 * - spinning in the idle loop.
411		 * Whichever it is, charge the time as appropriate.
412		 * Note that we charge interrupts to the current process,
413		 * regardless of whether they are ``for'' that process,
414		 * so that we know how much of its real time was spent
415		 * in ``non-process'' (i.e., interrupt) work.
416		 */
417		p = curproc;
418		if (CLKF_INTR(frame)) {
419			if (p != NULL)
420				p->p_iticks++;
421			cp_time[CP_INTR]++;
422		} else if (p != NULL) {
423			p->p_sticks++;
424			cp_time[CP_SYS]++;
425		} else
426			cp_time[CP_IDLE]++;
427	}
428	pscnt = psdiv;
429
430	/*
431	 * We maintain statistics shown by user-level statistics
432	 * programs:  the amount of time in each cpu state.
433	 */
434
435	/*
436	 * We adjust the priority of the current process.  The priority of
437	 * a process gets worse as it accumulates CPU time.  The cpu usage
438	 * estimator (p_estcpu) is increased here.  The formula for computing
439	 * priorities (in kern_synch.c) will compute a different value each
440	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
441	 * quite quickly when the process is running (linearly), and decays
442	 * away exponentially, at a rate which is proportionally slower when
443	 * the system is busy.  The basic principal is that the system will
444	 * 90% forget that the process used a lot of CPU time in 5 * loadav
445	 * seconds.  This causes the system to favor processes which haven't
446	 * run much recently, and to round-robin among other processes.
447	 */
448	if (p != NULL) {
449		p->p_cpticks++;
450		if (++p->p_estcpu == 0)
451			p->p_estcpu--;
452		if ((p->p_estcpu & 3) == 0) {
453			resetpriority(p);
454			if (p->p_priority >= PUSER)
455				p->p_priority = p->p_usrpri;
456		}
457
458		/* Update resource usage integrals and maximums. */
459		if ((pstats = p->p_stats) != NULL &&
460		    (ru = &pstats->p_ru) != NULL &&
461		    (vm = p->p_vmspace) != NULL) {
462			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
463			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
464			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
465			rss = vm->vm_pmap.pm_stats.resident_count *
466			      PAGE_SIZE / 1024;
467			if (ru->ru_maxrss < rss)
468				ru->ru_maxrss = rss;
469        	}
470	}
471}
472
473/*
474 * Return information about system clocks.
475 */
476static int
477sysctl_kern_clockrate SYSCTL_HANDLER_ARGS
478{
479	struct clockinfo clkinfo;
480	/*
481	 * Construct clockinfo structure.
482	 */
483	clkinfo.hz = hz;
484	clkinfo.tick = tick;
485	clkinfo.tickadj = tickadj;
486	clkinfo.profhz = profhz;
487	clkinfo.stathz = stathz ? stathz : hz;
488	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
489}
490
491SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
492	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
493
494static __inline unsigned
495tco_delta(struct timecounter *tc)
496{
497
498	return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
499	    tc->tc_counter_mask);
500}
501
502/*
503 * We have four functions for looking at the clock, two for microseconds
504 * and two for nanoseconds.  For each there is fast but less precise
505 * version "get{nano|micro}time" which will return a time which is up
506 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time"
507 * will return a timestamp which is as precise as possible.
508 */
509
510void
511getmicrotime(struct timeval *tvp)
512{
513	struct timecounter *tc;
514
515	tc = timecounter;
516	*tvp = tc->tc_microtime;
517}
518
519void
520getnanotime(struct timespec *tsp)
521{
522	struct timecounter *tc;
523
524	tc = timecounter;
525	*tsp = tc->tc_nanotime;
526}
527
528void
529microtime(struct timeval *tv)
530{
531	struct timecounter *tc;
532
533	tc = (struct timecounter *)timecounter;
534	tv->tv_sec = tc->tc_offset_sec;
535	tv->tv_usec = tc->tc_offset_micro;
536	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
537	tv->tv_usec += boottime.tv_usec;
538	tv->tv_sec += boottime.tv_sec;
539	while (tv->tv_usec >= 1000000) {
540		tv->tv_usec -= 1000000;
541		tv->tv_sec++;
542	}
543}
544
545void
546nanotime(struct timespec *ts)
547{
548	unsigned count;
549	u_int64_t delta;
550	struct timecounter *tc;
551
552	tc = (struct timecounter *)timecounter;
553	ts->tv_sec = tc->tc_offset_sec;
554	count = tco_delta(tc);
555	delta = tc->tc_offset_nano;
556	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
557	delta >>= 32;
558	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
559	delta += boottime.tv_usec * 1000;
560	ts->tv_sec += boottime.tv_sec;
561	while (delta >= 1000000000) {
562		delta -= 1000000000;
563		ts->tv_sec++;
564	}
565	ts->tv_nsec = delta;
566}
567
568void
569timecounter_timespec(unsigned count, struct timespec *ts)
570{
571	u_int64_t delta;
572	struct timecounter *tc;
573
574	tc = (struct timecounter *)timecounter;
575	ts->tv_sec = tc->tc_offset_sec;
576	count -= tc->tc_offset_count;
577	count &= tc->tc_counter_mask;
578	delta = tc->tc_offset_nano;
579	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
580	delta >>= 32;
581	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
582	delta += boottime.tv_usec * 1000;
583	ts->tv_sec += boottime.tv_sec;
584	while (delta >= 1000000000) {
585		delta -= 1000000000;
586		ts->tv_sec++;
587	}
588	ts->tv_nsec = delta;
589}
590
591void
592getmicrouptime(struct timeval *tvp)
593{
594	struct timecounter *tc;
595
596	tc = timecounter;
597	tvp->tv_sec = tc->tc_offset_sec;
598	tvp->tv_usec = tc->tc_offset_micro;
599}
600
601void
602getnanouptime(struct timespec *tsp)
603{
604	struct timecounter *tc;
605
606	tc = timecounter;
607	tsp->tv_sec = tc->tc_offset_sec;
608	tsp->tv_nsec = tc->tc_offset_nano >> 32;
609}
610
611void
612microuptime(struct timeval *tv)
613{
614	struct timecounter *tc;
615
616	tc = (struct timecounter *)timecounter;
617	tv->tv_sec = tc->tc_offset_sec;
618	tv->tv_usec = tc->tc_offset_micro;
619	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
620	if (tv->tv_usec >= 1000000) {
621		tv->tv_usec -= 1000000;
622		tv->tv_sec++;
623	}
624}
625
626void
627nanouptime(struct timespec *tv)
628{
629	unsigned count;
630	u_int64_t delta;
631	struct timecounter *tc;
632
633	tc = (struct timecounter *)timecounter;
634	tv->tv_sec = tc->tc_offset_sec;
635	count = tco_delta(tc);
636	delta = tc->tc_offset_nano;
637	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
638	delta >>= 32;
639	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
640	if (delta >= 1000000000) {
641		delta -= 1000000000;
642		tv->tv_sec++;
643	}
644	tv->tv_nsec = delta;
645}
646
647static void
648tco_setscales(struct timecounter *tc)
649{
650	u_int64_t scale;
651
652	scale = 1000000000LL << 32;
653	if (tc->tc_adjustment > 0)
654		scale += (tc->tc_adjustment * 1000LL) << 10;
655	else
656		scale -= (-tc->tc_adjustment * 1000LL) << 10;
657	scale /= tc->tc_frequency;
658	tc->tc_scale_micro = scale / 1000;
659	tc->tc_scale_nano_f = scale & 0xffffffff;
660	tc->tc_scale_nano_i = scale >> 32;
661}
662
663void
664init_timecounter(struct timecounter *tc)
665{
666	struct timespec ts1;
667	struct timecounter *t1, *t2, *t3;
668	int i;
669
670	tc->tc_adjustment = 0;
671	tco_setscales(tc);
672	tc->tc_offset_count = tc->tc_get_timecount(tc);
673	tc->tc_tweak = tc;
674	MALLOC(t1, struct timecounter *, sizeof *t1, M_TIMECOUNTER, M_WAITOK);
675	*t1 = *tc;
676	t2 = t1;
677	for (i = 1; i < NTIMECOUNTER; i++) {
678		MALLOC(t3, struct timecounter *, sizeof *t3,
679		    M_TIMECOUNTER, M_WAITOK);
680		*t3 = *tc;
681		t3->tc_other = t2;
682		t2 = t3;
683	}
684	t1->tc_other = t3;
685	tc = t1;
686
687	printf("Timecounter \"%s\"  frequency %lu Hz\n",
688	    tc->tc_name, (u_long)tc->tc_frequency);
689
690	/* XXX: For now always start using the counter. */
691	tc->tc_offset_count = tc->tc_get_timecount(tc);
692	nanouptime(&ts1);
693	tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
694	tc->tc_offset_micro = ts1.tv_nsec / 1000;
695	tc->tc_offset_sec = ts1.tv_sec;
696	timecounter = tc;
697}
698
699void
700set_timecounter(struct timespec *ts)
701{
702	struct timespec ts2;
703
704	nanouptime(&ts2);
705	boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
706	boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
707	if (boottime.tv_usec < 0) {
708		boottime.tv_usec += 1000000;
709		boottime.tv_sec--;
710	}
711	/* fiddle all the little crinkly bits around the fiords... */
712	tco_forward();
713}
714
715
716#if 0 /* Currently unused */
717void
718switch_timecounter(struct timecounter *newtc)
719{
720	int s;
721	struct timecounter *tc;
722	struct timespec ts;
723
724	s = splclock();
725	tc = timecounter;
726	if (newtc == tc || newtc == tc->tc_other) {
727		splx(s);
728		return;
729	}
730	nanouptime(&ts);
731	newtc->tc_offset_sec = ts.tv_sec;
732	newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
733	newtc->tc_offset_micro = ts.tv_nsec / 1000;
734	newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
735	timecounter = newtc;
736	splx(s);
737}
738#endif
739
740static struct timecounter *
741sync_other_counter(void)
742{
743	struct timecounter *tc, *tcn, *tco;
744	unsigned delta;
745
746	tco = timecounter;
747	tc = tco->tc_other;
748	tcn = tc->tc_other;
749	*tc = *tco;
750	tc->tc_other = tcn;
751	delta = tco_delta(tc);
752	tc->tc_offset_count += delta;
753	tc->tc_offset_count &= tc->tc_counter_mask;
754	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
755	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
756	return (tc);
757}
758
759static void
760tco_forward(void)
761{
762	struct timecounter *tc, *tco;
763
764	tco = timecounter;
765	tc = sync_other_counter();
766	/*
767	 * We may be inducing a tiny error here, the tc_poll_pps() may
768	 * process a latched count which happens after the tco_delta()
769	 * in sync_other_counter(), which would extend the previous
770	 * counters parameters into the domain of this new one.
771	 * Since the timewindow is very small for this, the error is
772	 * going to be only a few weenieseconds (as Dave Mills would
773	 * say), so lets just not talk more about it, OK ?
774	 */
775	if (tco->tc_poll_pps)
776		tco->tc_poll_pps(tco);
777	if (timedelta != 0) {
778		tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32;
779		timedelta -= tickdelta;
780	}
781
782	while (tc->tc_offset_nano >= 1000000000ULL << 32) {
783		tc->tc_offset_nano -= 1000000000ULL << 32;
784		tc->tc_offset_sec++;
785		tc->tc_frequency = tc->tc_tweak->tc_frequency;
786		tc->tc_adjustment = tc->tc_tweak->tc_adjustment;
787		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
788		tco_setscales(tc);
789	}
790
791	tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
792
793	/* Figure out the wall-clock time */
794	tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
795	tc->tc_nanotime.tv_nsec =
796	    (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
797	tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
798	if (tc->tc_nanotime.tv_nsec >= 1000000000) {
799		tc->tc_nanotime.tv_nsec -= 1000000000;
800		tc->tc_microtime.tv_usec -= 1000000;
801		tc->tc_nanotime.tv_sec++;
802	}
803	time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
804
805	timecounter = tc;
806}
807
808static int
809sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
810{
811
812	return (sysctl_handle_opaque(oidp,
813	    &timecounter->tc_tweak->tc_frequency,
814	    sizeof(timecounter->tc_tweak->tc_frequency), req));
815}
816
817static int
818sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
819{
820
821	return (sysctl_handle_opaque(oidp,
822	    &timecounter->tc_tweak->tc_adjustment,
823	    sizeof(timecounter->tc_tweak->tc_adjustment), req));
824}
825
826SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
827
828SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW,
829    0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", "");
830
831SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW,
832    0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", "");
833