kern_tc.c revision 39245
1static volatile int print_tci = 1;
2
3/*-
4 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
5 * Copyright (c) 1982, 1986, 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
42 * $Id: kern_clock.c,v 1.78 1998/08/05 18:06:40 bde Exp $
43 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/dkstat.h>
48#include <sys/callout.h>
49#include <sys/kernel.h>
50#include <sys/proc.h>
51#include <sys/resourcevar.h>
52#include <sys/signalvar.h>
53#include <sys/timex.h>
54#include <vm/vm.h>
55#include <sys/lock.h>
56#include <vm/pmap.h>
57#include <vm/vm_map.h>
58#include <sys/sysctl.h>
59
60#include <machine/cpu.h>
61#include <machine/limits.h>
62
63#ifdef GPROF
64#include <sys/gmon.h>
65#endif
66
67#if defined(SMP) && defined(BETTER_CLOCK)
68#include <machine/smp.h>
69#endif
70
71static void initclocks __P((void *dummy));
72SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
73
74static void tco_forward __P((void));
75static void tco_setscales __P((struct timecounter *tc));
76static __inline unsigned tco_delta __P((struct timecounter *tc));
77
78/* Some of these don't belong here, but it's easiest to concentrate them. */
79#if defined(SMP) && defined(BETTER_CLOCK)
80long cp_time[CPUSTATES];
81#else
82static long cp_time[CPUSTATES];
83#endif
84
85long tk_cancc;
86long tk_nin;
87long tk_nout;
88long tk_rawcc;
89
90struct timecounter *timecounter;
91
92time_t time_second;
93
94/*
95 * Clock handling routines.
96 *
97 * This code is written to operate with two timers that run independently of
98 * each other.
99 *
100 * The main timer, running hz times per second, is used to trigger interval
101 * timers, timeouts and rescheduling as needed.
102 *
103 * The second timer handles kernel and user profiling,
104 * and does resource use estimation.  If the second timer is programmable,
105 * it is randomized to avoid aliasing between the two clocks.  For example,
106 * the randomization prevents an adversary from always giving up the cpu
107 * just before its quantum expires.  Otherwise, it would never accumulate
108 * cpu ticks.  The mean frequency of the second timer is stathz.
109 *
110 * If no second timer exists, stathz will be zero; in this case we drive
111 * profiling and statistics off the main clock.  This WILL NOT be accurate;
112 * do not do it unless absolutely necessary.
113 *
114 * The statistics clock may (or may not) be run at a higher rate while
115 * profiling.  This profile clock runs at profhz.  We require that profhz
116 * be an integral multiple of stathz.
117 *
118 * If the statistics clock is running fast, it must be divided by the ratio
119 * profhz/stathz for statistics.  (For profiling, every tick counts.)
120 *
121 * Time-of-day is maintained using a "timecounter", which may or may
122 * not be related to the hardware generating the above mentioned
123 * interrupts.
124 */
125
126int	stathz;
127int	profhz;
128static int profprocs;
129int	ticks;
130static int psdiv, pscnt;		/* prof => stat divider */
131int	psratio;			/* ratio: prof / stat */
132
133/*
134 * Initialize clock frequencies and start both clocks running.
135 */
136/* ARGSUSED*/
137static void
138initclocks(dummy)
139	void *dummy;
140{
141	register int i;
142
143	/*
144	 * Set divisors to 1 (normal case) and let the machine-specific
145	 * code do its bit.
146	 */
147	psdiv = pscnt = 1;
148	cpu_initclocks();
149
150	/*
151	 * Compute profhz/stathz, and fix profhz if needed.
152	 */
153	i = stathz ? stathz : hz;
154	if (profhz == 0)
155		profhz = i;
156	psratio = profhz / i;
157}
158
159/*
160 * The real-time timer, interrupting hz times per second.
161 */
162void
163hardclock(frame)
164	register struct clockframe *frame;
165{
166	register struct proc *p;
167
168	p = curproc;
169	if (p) {
170		register struct pstats *pstats;
171
172		/*
173		 * Run current process's virtual and profile time, as needed.
174		 */
175		pstats = p->p_stats;
176		if (CLKF_USERMODE(frame) &&
177		    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
178		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
179			psignal(p, SIGVTALRM);
180		if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
181		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
182			psignal(p, SIGPROF);
183	}
184
185#if defined(SMP) && defined(BETTER_CLOCK)
186	forward_hardclock(pscnt);
187#endif
188
189	/*
190	 * If no separate statistics clock is available, run it from here.
191	 */
192	if (stathz == 0)
193		statclock(frame);
194
195	tco_forward();
196	ticks++;
197
198	/*
199	 * Process callouts at a very low cpu priority, so we don't keep the
200	 * relatively high clock interrupt priority any longer than necessary.
201	 */
202	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
203		if (CLKF_BASEPRI(frame)) {
204			/*
205			 * Save the overhead of a software interrupt;
206			 * it will happen as soon as we return, so do it now.
207			 */
208			(void)splsoftclock();
209			softclock();
210		} else
211			setsoftclock();
212	} else if (softticks + 1 == ticks)
213		++softticks;
214}
215
216/*
217 * Compute number of ticks in the specified amount of time.
218 */
219int
220tvtohz(tv)
221	struct timeval *tv;
222{
223	register unsigned long ticks;
224	register long sec, usec;
225
226	/*
227	 * If the number of usecs in the whole seconds part of the time
228	 * difference fits in a long, then the total number of usecs will
229	 * fit in an unsigned long.  Compute the total and convert it to
230	 * ticks, rounding up and adding 1 to allow for the current tick
231	 * to expire.  Rounding also depends on unsigned long arithmetic
232	 * to avoid overflow.
233	 *
234	 * Otherwise, if the number of ticks in the whole seconds part of
235	 * the time difference fits in a long, then convert the parts to
236	 * ticks separately and add, using similar rounding methods and
237	 * overflow avoidance.  This method would work in the previous
238	 * case but it is slightly slower and assumes that hz is integral.
239	 *
240	 * Otherwise, round the time difference down to the maximum
241	 * representable value.
242	 *
243	 * If ints have 32 bits, then the maximum value for any timeout in
244	 * 10ms ticks is 248 days.
245	 */
246	sec = tv->tv_sec;
247	usec = tv->tv_usec;
248	if (usec < 0) {
249		sec--;
250		usec += 1000000;
251	}
252	if (sec < 0) {
253#ifdef DIAGNOSTIC
254		if (usec > 0) {
255			sec++;
256			usec -= 1000000;
257		}
258		printf("tvotohz: negative time difference %ld sec %ld usec\n",
259		       sec, usec);
260#endif
261		ticks = 1;
262	} else if (sec <= LONG_MAX / 1000000)
263		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
264			/ tick + 1;
265	else if (sec <= LONG_MAX / hz)
266		ticks = sec * hz
267			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
268	else
269		ticks = LONG_MAX;
270	if (ticks > INT_MAX)
271		ticks = INT_MAX;
272	return (ticks);
273}
274
275/*
276 * Start profiling on a process.
277 *
278 * Kernel profiling passes proc0 which never exits and hence
279 * keeps the profile clock running constantly.
280 */
281void
282startprofclock(p)
283	register struct proc *p;
284{
285	int s;
286
287	if ((p->p_flag & P_PROFIL) == 0) {
288		p->p_flag |= P_PROFIL;
289		if (++profprocs == 1 && stathz != 0) {
290			s = splstatclock();
291			psdiv = pscnt = psratio;
292			setstatclockrate(profhz);
293			splx(s);
294		}
295	}
296}
297
298/*
299 * Stop profiling on a process.
300 */
301void
302stopprofclock(p)
303	register struct proc *p;
304{
305	int s;
306
307	if (p->p_flag & P_PROFIL) {
308		p->p_flag &= ~P_PROFIL;
309		if (--profprocs == 0 && stathz != 0) {
310			s = splstatclock();
311			psdiv = pscnt = 1;
312			setstatclockrate(stathz);
313			splx(s);
314		}
315	}
316}
317
318/*
319 * Statistics clock.  Grab profile sample, and if divider reaches 0,
320 * do process and kernel statistics.
321 */
322void
323statclock(frame)
324	register struct clockframe *frame;
325{
326#ifdef GPROF
327	register struct gmonparam *g;
328#endif
329	register struct proc *p;
330	register int i;
331	struct pstats *pstats;
332	long rss;
333	struct rusage *ru;
334	struct vmspace *vm;
335
336	if (CLKF_USERMODE(frame)) {
337		p = curproc;
338		if (p->p_flag & P_PROFIL)
339			addupc_intr(p, CLKF_PC(frame), 1);
340#if defined(SMP) && defined(BETTER_CLOCK)
341		if (stathz != 0)
342			forward_statclock(pscnt);
343#endif
344		if (--pscnt > 0)
345			return;
346		/*
347		 * Came from user mode; CPU was in user state.
348		 * If this process is being profiled record the tick.
349		 */
350		p->p_uticks++;
351		if (p->p_nice > NZERO)
352			cp_time[CP_NICE]++;
353		else
354			cp_time[CP_USER]++;
355	} else {
356#ifdef GPROF
357		/*
358		 * Kernel statistics are just like addupc_intr, only easier.
359		 */
360		g = &_gmonparam;
361		if (g->state == GMON_PROF_ON) {
362			i = CLKF_PC(frame) - g->lowpc;
363			if (i < g->textsize) {
364				i /= HISTFRACTION * sizeof(*g->kcount);
365				g->kcount[i]++;
366			}
367		}
368#endif
369#if defined(SMP) && defined(BETTER_CLOCK)
370		if (stathz != 0)
371			forward_statclock(pscnt);
372#endif
373		if (--pscnt > 0)
374			return;
375		/*
376		 * Came from kernel mode, so we were:
377		 * - handling an interrupt,
378		 * - doing syscall or trap work on behalf of the current
379		 *   user process, or
380		 * - spinning in the idle loop.
381		 * Whichever it is, charge the time as appropriate.
382		 * Note that we charge interrupts to the current process,
383		 * regardless of whether they are ``for'' that process,
384		 * so that we know how much of its real time was spent
385		 * in ``non-process'' (i.e., interrupt) work.
386		 */
387		p = curproc;
388		if (CLKF_INTR(frame)) {
389			if (p != NULL)
390				p->p_iticks++;
391			cp_time[CP_INTR]++;
392		} else if (p != NULL) {
393			p->p_sticks++;
394			cp_time[CP_SYS]++;
395		} else
396			cp_time[CP_IDLE]++;
397	}
398	pscnt = psdiv;
399
400	/*
401	 * We maintain statistics shown by user-level statistics
402	 * programs:  the amount of time in each cpu state.
403	 */
404
405	/*
406	 * We adjust the priority of the current process.  The priority of
407	 * a process gets worse as it accumulates CPU time.  The cpu usage
408	 * estimator (p_estcpu) is increased here.  The formula for computing
409	 * priorities (in kern_synch.c) will compute a different value each
410	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
411	 * quite quickly when the process is running (linearly), and decays
412	 * away exponentially, at a rate which is proportionally slower when
413	 * the system is busy.  The basic principal is that the system will
414	 * 90% forget that the process used a lot of CPU time in 5 * loadav
415	 * seconds.  This causes the system to favor processes which haven't
416	 * run much recently, and to round-robin among other processes.
417	 */
418	if (p != NULL) {
419		p->p_cpticks++;
420		if (++p->p_estcpu == 0)
421			p->p_estcpu--;
422		if ((p->p_estcpu & 3) == 0) {
423			resetpriority(p);
424			if (p->p_priority >= PUSER)
425				p->p_priority = p->p_usrpri;
426		}
427
428		/* Update resource usage integrals and maximums. */
429		if ((pstats = p->p_stats) != NULL &&
430		    (ru = &pstats->p_ru) != NULL &&
431		    (vm = p->p_vmspace) != NULL) {
432			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
433			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
434			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
435			rss = vm->vm_pmap.pm_stats.resident_count *
436			      PAGE_SIZE / 1024;
437			if (ru->ru_maxrss < rss)
438				ru->ru_maxrss = rss;
439        	}
440	}
441}
442
443/*
444 * Return information about system clocks.
445 */
446static int
447sysctl_kern_clockrate SYSCTL_HANDLER_ARGS
448{
449	struct clockinfo clkinfo;
450	/*
451	 * Construct clockinfo structure.
452	 */
453	clkinfo.hz = hz;
454	clkinfo.tick = tick;
455	clkinfo.tickadj = tickadj;
456	clkinfo.profhz = profhz;
457	clkinfo.stathz = stathz ? stathz : hz;
458	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
459}
460
461SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
462	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
463
464static __inline unsigned
465tco_delta(struct timecounter *tc)
466{
467
468	return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
469	    tc->tc_counter_mask);
470}
471
472/*
473 * We have four functions for looking at the clock, two for microseconds
474 * and two for nanoseconds.  For each there is fast but less precise
475 * version "get{nano|micro}time" which will return a time which is up
476 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time"
477 * will return a timestamp which is as precise as possible.
478 */
479
480void
481getmicrotime(struct timeval *tvp)
482{
483	struct timecounter *tc;
484
485	tc = timecounter;
486	*tvp = tc->tc_microtime;
487}
488
489void
490getnanotime(struct timespec *tsp)
491{
492	struct timecounter *tc;
493
494	tc = timecounter;
495	*tsp = tc->tc_nanotime;
496}
497
498void
499microtime(struct timeval *tv)
500{
501	struct timecounter *tc;
502
503	tc = (struct timecounter *)timecounter;
504	tv->tv_sec = tc->tc_offset_sec;
505	tv->tv_usec = tc->tc_offset_micro;
506	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
507	tv->tv_usec += boottime.tv_usec;
508	tv->tv_sec += boottime.tv_sec;
509	while (tv->tv_usec >= 1000000) {
510		tv->tv_usec -= 1000000;
511		tv->tv_sec++;
512	}
513}
514
515void
516nanotime(struct timespec *ts)
517{
518	unsigned count;
519	u_int64_t delta;
520	struct timecounter *tc;
521
522	tc = (struct timecounter *)timecounter;
523	ts->tv_sec = tc->tc_offset_sec;
524	count = tco_delta(tc);
525	delta = tc->tc_offset_nano;
526	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
527	delta >>= 32;
528	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
529	delta += boottime.tv_usec * 1000;
530	ts->tv_sec += boottime.tv_sec;
531	while (delta >= 1000000000) {
532		delta -= 1000000000;
533		ts->tv_sec++;
534	}
535	ts->tv_nsec = delta;
536}
537
538void
539timecounter_timespec(unsigned count, struct timespec *ts)
540{
541	u_int64_t delta;
542	struct timecounter *tc;
543
544	tc = (struct timecounter *)timecounter;
545	ts->tv_sec = tc->tc_offset_sec;
546	count -= tc->tc_offset_count;
547	count &= tc->tc_counter_mask;
548	delta = tc->tc_offset_nano;
549	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
550	delta >>= 32;
551	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
552	delta += boottime.tv_usec * 1000;
553	ts->tv_sec += boottime.tv_sec;
554	while (delta >= 1000000000) {
555		delta -= 1000000000;
556		ts->tv_sec++;
557	}
558	ts->tv_nsec = delta;
559}
560
561void
562getmicrouptime(struct timeval *tvp)
563{
564	struct timecounter *tc;
565
566	tc = timecounter;
567	tvp->tv_sec = tc->tc_offset_sec;
568	tvp->tv_usec = tc->tc_offset_micro;
569}
570
571void
572getnanouptime(struct timespec *tsp)
573{
574	struct timecounter *tc;
575
576	tc = timecounter;
577	tsp->tv_sec = tc->tc_offset_sec;
578	tsp->tv_nsec = tc->tc_offset_nano >> 32;
579}
580
581void
582microuptime(struct timeval *tv)
583{
584	struct timecounter *tc;
585
586	tc = (struct timecounter *)timecounter;
587	tv->tv_sec = tc->tc_offset_sec;
588	tv->tv_usec = tc->tc_offset_micro;
589	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
590	if (tv->tv_usec >= 1000000) {
591		tv->tv_usec -= 1000000;
592		tv->tv_sec++;
593	}
594}
595
596void
597nanouptime(struct timespec *tv)
598{
599	unsigned count;
600	u_int64_t delta;
601	struct timecounter *tc;
602
603	tc = (struct timecounter *)timecounter;
604	tv->tv_sec = tc->tc_offset_sec;
605	count = tco_delta(tc);
606	delta = tc->tc_offset_nano;
607	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
608	delta >>= 32;
609	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
610	if (delta >= 1000000000) {
611		delta -= 1000000000;
612		tv->tv_sec++;
613	}
614	tv->tv_nsec = delta;
615}
616
617static void
618tco_setscales(struct timecounter *tc)
619{
620	u_int64_t scale;
621
622	scale = 1000000000LL << 32;
623	if (tc->tc_adjustment > 0)
624		scale += (tc->tc_adjustment * 1000LL) << 10;
625	else
626		scale -= (-tc->tc_adjustment * 1000LL) << 10;
627	scale /= tc->tc_frequency;
628	tc->tc_scale_micro = scale / 1000;
629	tc->tc_scale_nano_f = scale & 0xffffffff;
630	tc->tc_scale_nano_i = scale >> 32;
631}
632
633void
634init_timecounter(struct timecounter *tc)
635{
636	struct timespec ts0, ts1;
637	int i;
638
639	tc->tc_adjustment = 0;
640	tco_setscales(tc);
641	tc->tc_offset_count = tc->tc_get_timecount(tc);
642	tc[0].tc_tweak = &tc[0];
643	tc[2] = tc[1] = tc[0];
644	tc[1].tc_other = &tc[2];
645	tc[2].tc_other = &tc[1];
646	if (!timecounter || !strcmp(timecounter->tc_name, "dummy"))
647		timecounter = &tc[2];
648	tc = &tc[1];
649
650	/*
651	 * Figure out the cost of calling this timecounter.
652	 */
653	nanotime(&ts0);
654	for (i = 0; i < 256; i ++)
655		tc->tc_get_timecount(tc);
656	nanotime(&ts1);
657	ts1.tv_sec -= ts0.tv_sec;
658	tc->tc_cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
659	tc->tc_cost >>= 8;
660	if (print_tci && strcmp(tc->tc_name, "dummy"))
661		printf("Timecounter \"%s\"  frequency %lu Hz  cost %u ns\n",
662		    tc->tc_name, (u_long)tc->tc_frequency, tc->tc_cost);
663
664	/* XXX: For now always start using the counter. */
665	tc->tc_offset_count = tc->tc_get_timecount(tc);
666	nanouptime(&ts1);
667	tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
668	tc->tc_offset_micro = ts1.tv_nsec / 1000;
669	tc->tc_offset_sec = ts1.tv_sec;
670	timecounter = tc;
671}
672
673void
674set_timecounter(struct timespec *ts)
675{
676	struct timespec ts2;
677
678	nanouptime(&ts2);
679	boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
680	boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
681	if (boottime.tv_usec < 0) {
682		boottime.tv_usec += 1000000;
683		boottime.tv_sec--;
684	}
685	/* fiddle all the little crinkly bits around the fiords... */
686	tco_forward();
687}
688
689
690#if 0 /* Currently unused */
691void
692switch_timecounter(struct timecounter *newtc)
693{
694	int s;
695	struct timecounter *tc;
696	struct timespec ts;
697
698	s = splclock();
699	tc = timecounter;
700	if (newtc == tc || newtc == tc->tc_other) {
701		splx(s);
702		return;
703	}
704	nanouptime(&ts);
705	newtc->tc_offset_sec = ts.tv_sec;
706	newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
707	newtc->tc_offset_micro = ts.tv_nsec / 1000;
708	newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
709	timecounter = newtc;
710	splx(s);
711}
712#endif
713
714static struct timecounter *
715sync_other_counter(void)
716{
717	struct timecounter *tc, *tcn, *tco;
718	unsigned delta;
719
720	tco = timecounter;
721	tc = tco->tc_other;
722	tcn = tc->tc_other;
723	*tc = *tco;
724	tc->tc_other = tcn;
725	delta = tco_delta(tc);
726	tc->tc_offset_count += delta;
727	tc->tc_offset_count &= tc->tc_counter_mask;
728	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
729	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
730	return (tc);
731}
732
733static void
734tco_forward(void)
735{
736	struct timecounter *tc, *tco;
737
738	tco = timecounter;
739	tc = sync_other_counter();
740	/*
741	 * We may be inducing a tiny error here, the tc_poll_pps() may
742	 * process a latched count which happens after the tco_delta()
743	 * in sync_other_counter(), which would extend the previous
744	 * counters parameters into the domain of this new one.
745	 * Since the timewindow is very small for this, the error is
746	 * going to be only a few weenieseconds (as Dave Mills would
747	 * say), so lets just not talk more about it, OK ?
748	 */
749	if (tco->tc_poll_pps)
750		tco->tc_poll_pps(tco);
751	if (timedelta != 0) {
752		tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32;
753		timedelta -= tickdelta;
754	}
755
756	while (tc->tc_offset_nano >= 1000000000ULL << 32) {
757		tc->tc_offset_nano -= 1000000000ULL << 32;
758		tc->tc_offset_sec++;
759		tc->tc_frequency = tc->tc_tweak->tc_frequency;
760		tc->tc_adjustment = tc->tc_tweak->tc_adjustment;
761		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
762		tco_setscales(tc);
763	}
764
765	tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
766
767	/* Figure out the wall-clock time */
768	tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
769	tc->tc_nanotime.tv_nsec =
770	    (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
771	tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
772	if (tc->tc_nanotime.tv_nsec >= 1000000000) {
773		tc->tc_nanotime.tv_nsec -= 1000000000;
774		tc->tc_microtime.tv_usec -= 1000000;
775		tc->tc_nanotime.tv_sec++;
776	}
777	time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
778
779	timecounter = tc;
780}
781
782static int
783sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
784{
785
786	return (sysctl_handle_opaque(oidp,
787	    &timecounter->tc_tweak->tc_frequency,
788	    sizeof(timecounter->tc_tweak->tc_frequency), req));
789}
790
791static int
792sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
793{
794
795	return (sysctl_handle_opaque(oidp,
796	    &timecounter->tc_tweak->tc_adjustment,
797	    sizeof(timecounter->tc_tweak->tc_adjustment), req));
798}
799
800SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
801
802SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW,
803    0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", "");
804
805SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW,
806    0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", "");
807
808/*
809 * Implement a dummy timecounter which we can use until we get a real one
810 * in the air.  This allows the console and other early stuff to use
811 * timeservices.
812 */
813
814static unsigned
815dummy_get_timecount(struct timecounter *tc)
816{
817	static unsigned now;
818	return (++now);
819}
820
821static struct timecounter dummy_timecounter[3] = {
822	{
823		dummy_get_timecount,
824		0,
825		~0u,
826		1000000,
827		"dummy"
828	}
829};
830
831static void
832initdummytimecounter(void *dummy)
833{
834	init_timecounter(dummy_timecounter);
835}
836
837SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL)
838