kern_tc.c revision 38129
1static volatile int print_tci = 1;
2
3/*-
4 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
5 * Copyright (c) 1982, 1986, 1991, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 *    notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 *    notice, this list of conditions and the following disclaimer in the
20 *    documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 *    must display the following acknowledgement:
23 *	This product includes software developed by the University of
24 *	California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 *    may be used to endorse or promote products derived from this software
27 *    without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
42 * $Id: kern_clock.c,v 1.77 1998/07/11 07:45:39 bde Exp $
43 */
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/dkstat.h>
48#include <sys/callout.h>
49#include <sys/kernel.h>
50#include <sys/proc.h>
51#include <sys/resourcevar.h>
52#include <sys/signalvar.h>
53#include <sys/timex.h>
54#include <vm/vm.h>
55#include <sys/lock.h>
56#include <vm/pmap.h>
57#include <vm/vm_map.h>
58#include <sys/sysctl.h>
59
60#include <machine/cpu.h>
61#include <machine/limits.h>
62
63#ifdef GPROF
64#include <sys/gmon.h>
65#endif
66
67#if defined(SMP) && defined(BETTER_CLOCK)
68#include <machine/smp.h>
69#endif
70
71static void initclocks __P((void *dummy));
72SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
73
74static void tco_forward __P((void));
75static void tco_setscales __P((struct timecounter *tc));
76static __inline unsigned tco_delta __P((struct timecounter *tc));
77
78/* Some of these don't belong here, but it's easiest to concentrate them. */
79#if defined(SMP) && defined(BETTER_CLOCK)
80long cp_time[CPUSTATES];
81#else
82static long cp_time[CPUSTATES];
83#endif
84long dk_seek[DK_NDRIVE];
85static long dk_time[DK_NDRIVE];	/* time busy (in statclock ticks) */
86long dk_wds[DK_NDRIVE];
87long dk_wpms[DK_NDRIVE];
88long dk_xfer[DK_NDRIVE];
89
90int dk_busy;
91int dk_ndrive = 0;
92char dk_names[DK_NDRIVE][DK_NAMELEN];
93
94long tk_cancc;
95long tk_nin;
96long tk_nout;
97long tk_rawcc;
98
99struct timecounter *timecounter;
100
101time_t time_second;
102
103/*
104 * Clock handling routines.
105 *
106 * This code is written to operate with two timers that run independently of
107 * each other.
108 *
109 * The main timer, running hz times per second, is used to trigger interval
110 * timers, timeouts and rescheduling as needed.
111 *
112 * The second timer handles kernel and user profiling,
113 * and does resource use estimation.  If the second timer is programmable,
114 * it is randomized to avoid aliasing between the two clocks.  For example,
115 * the randomization prevents an adversary from always giving up the cpu
116 * just before its quantum expires.  Otherwise, it would never accumulate
117 * cpu ticks.  The mean frequency of the second timer is stathz.
118 *
119 * If no second timer exists, stathz will be zero; in this case we drive
120 * profiling and statistics off the main clock.  This WILL NOT be accurate;
121 * do not do it unless absolutely necessary.
122 *
123 * The statistics clock may (or may not) be run at a higher rate while
124 * profiling.  This profile clock runs at profhz.  We require that profhz
125 * be an integral multiple of stathz.
126 *
127 * If the statistics clock is running fast, it must be divided by the ratio
128 * profhz/stathz for statistics.  (For profiling, every tick counts.)
129 *
130 * Time-of-day is maintained using a "timecounter", which may or may
131 * not be related to the hardware generating the above mentioned
132 * interrupts.
133 */
134
135int	stathz;
136int	profhz;
137static int profprocs;
138int	ticks;
139static int psdiv, pscnt;		/* prof => stat divider */
140int	psratio;			/* ratio: prof / stat */
141
142/*
143 * Initialize clock frequencies and start both clocks running.
144 */
145/* ARGSUSED*/
146static void
147initclocks(dummy)
148	void *dummy;
149{
150	register int i;
151
152	/*
153	 * Set divisors to 1 (normal case) and let the machine-specific
154	 * code do its bit.
155	 */
156	psdiv = pscnt = 1;
157	cpu_initclocks();
158
159	/*
160	 * Compute profhz/stathz, and fix profhz if needed.
161	 */
162	i = stathz ? stathz : hz;
163	if (profhz == 0)
164		profhz = i;
165	psratio = profhz / i;
166}
167
168/*
169 * The real-time timer, interrupting hz times per second.
170 */
171void
172hardclock(frame)
173	register struct clockframe *frame;
174{
175	register struct proc *p;
176
177	p = curproc;
178	if (p) {
179		register struct pstats *pstats;
180
181		/*
182		 * Run current process's virtual and profile time, as needed.
183		 */
184		pstats = p->p_stats;
185		if (CLKF_USERMODE(frame) &&
186		    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
187		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
188			psignal(p, SIGVTALRM);
189		if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
190		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
191			psignal(p, SIGPROF);
192	}
193
194#if defined(SMP) && defined(BETTER_CLOCK)
195	forward_hardclock(pscnt);
196#endif
197
198	/*
199	 * If no separate statistics clock is available, run it from here.
200	 */
201	if (stathz == 0)
202		statclock(frame);
203
204	tco_forward();
205	ticks++;
206
207	/*
208	 * Process callouts at a very low cpu priority, so we don't keep the
209	 * relatively high clock interrupt priority any longer than necessary.
210	 */
211	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) {
212		if (CLKF_BASEPRI(frame)) {
213			/*
214			 * Save the overhead of a software interrupt;
215			 * it will happen as soon as we return, so do it now.
216			 */
217			(void)splsoftclock();
218			softclock();
219		} else
220			setsoftclock();
221	} else if (softticks + 1 == ticks)
222		++softticks;
223}
224
225/*
226 * Compute number of ticks in the specified amount of time.
227 */
228int
229tvtohz(tv)
230	struct timeval *tv;
231{
232	register unsigned long ticks;
233	register long sec, usec;
234
235	/*
236	 * If the number of usecs in the whole seconds part of the time
237	 * difference fits in a long, then the total number of usecs will
238	 * fit in an unsigned long.  Compute the total and convert it to
239	 * ticks, rounding up and adding 1 to allow for the current tick
240	 * to expire.  Rounding also depends on unsigned long arithmetic
241	 * to avoid overflow.
242	 *
243	 * Otherwise, if the number of ticks in the whole seconds part of
244	 * the time difference fits in a long, then convert the parts to
245	 * ticks separately and add, using similar rounding methods and
246	 * overflow avoidance.  This method would work in the previous
247	 * case but it is slightly slower and assumes that hz is integral.
248	 *
249	 * Otherwise, round the time difference down to the maximum
250	 * representable value.
251	 *
252	 * If ints have 32 bits, then the maximum value for any timeout in
253	 * 10ms ticks is 248 days.
254	 */
255	sec = tv->tv_sec;
256	usec = tv->tv_usec;
257	if (usec < 0) {
258		sec--;
259		usec += 1000000;
260	}
261	if (sec < 0) {
262#ifdef DIAGNOSTIC
263		if (usec > 0) {
264			sec++;
265			usec -= 1000000;
266		}
267		printf("tvotohz: negative time difference %ld sec %ld usec\n",
268		       sec, usec);
269#endif
270		ticks = 1;
271	} else if (sec <= LONG_MAX / 1000000)
272		ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
273			/ tick + 1;
274	else if (sec <= LONG_MAX / hz)
275		ticks = sec * hz
276			+ ((unsigned long)usec + (tick - 1)) / tick + 1;
277	else
278		ticks = LONG_MAX;
279	if (ticks > INT_MAX)
280		ticks = INT_MAX;
281	return (ticks);
282}
283
284/*
285 * Start profiling on a process.
286 *
287 * Kernel profiling passes proc0 which never exits and hence
288 * keeps the profile clock running constantly.
289 */
290void
291startprofclock(p)
292	register struct proc *p;
293{
294	int s;
295
296	if ((p->p_flag & P_PROFIL) == 0) {
297		p->p_flag |= P_PROFIL;
298		if (++profprocs == 1 && stathz != 0) {
299			s = splstatclock();
300			psdiv = pscnt = psratio;
301			setstatclockrate(profhz);
302			splx(s);
303		}
304	}
305}
306
307/*
308 * Stop profiling on a process.
309 */
310void
311stopprofclock(p)
312	register struct proc *p;
313{
314	int s;
315
316	if (p->p_flag & P_PROFIL) {
317		p->p_flag &= ~P_PROFIL;
318		if (--profprocs == 0 && stathz != 0) {
319			s = splstatclock();
320			psdiv = pscnt = 1;
321			setstatclockrate(stathz);
322			splx(s);
323		}
324	}
325}
326
327/*
328 * Statistics clock.  Grab profile sample, and if divider reaches 0,
329 * do process and kernel statistics.
330 */
331void
332statclock(frame)
333	register struct clockframe *frame;
334{
335#ifdef GPROF
336	register struct gmonparam *g;
337#endif
338	register struct proc *p;
339	register int i;
340	struct pstats *pstats;
341	long rss;
342	struct rusage *ru;
343	struct vmspace *vm;
344
345	if (CLKF_USERMODE(frame)) {
346		p = curproc;
347		if (p->p_flag & P_PROFIL)
348			addupc_intr(p, CLKF_PC(frame), 1);
349#if defined(SMP) && defined(BETTER_CLOCK)
350		if (stathz != 0)
351			forward_statclock(pscnt);
352#endif
353		if (--pscnt > 0)
354			return;
355		/*
356		 * Came from user mode; CPU was in user state.
357		 * If this process is being profiled record the tick.
358		 */
359		p->p_uticks++;
360		if (p->p_nice > NZERO)
361			cp_time[CP_NICE]++;
362		else
363			cp_time[CP_USER]++;
364	} else {
365#ifdef GPROF
366		/*
367		 * Kernel statistics are just like addupc_intr, only easier.
368		 */
369		g = &_gmonparam;
370		if (g->state == GMON_PROF_ON) {
371			i = CLKF_PC(frame) - g->lowpc;
372			if (i < g->textsize) {
373				i /= HISTFRACTION * sizeof(*g->kcount);
374				g->kcount[i]++;
375			}
376		}
377#endif
378#if defined(SMP) && defined(BETTER_CLOCK)
379		if (stathz != 0)
380			forward_statclock(pscnt);
381#endif
382		if (--pscnt > 0)
383			return;
384		/*
385		 * Came from kernel mode, so we were:
386		 * - handling an interrupt,
387		 * - doing syscall or trap work on behalf of the current
388		 *   user process, or
389		 * - spinning in the idle loop.
390		 * Whichever it is, charge the time as appropriate.
391		 * Note that we charge interrupts to the current process,
392		 * regardless of whether they are ``for'' that process,
393		 * so that we know how much of its real time was spent
394		 * in ``non-process'' (i.e., interrupt) work.
395		 */
396		p = curproc;
397		if (CLKF_INTR(frame)) {
398			if (p != NULL)
399				p->p_iticks++;
400			cp_time[CP_INTR]++;
401		} else if (p != NULL) {
402			p->p_sticks++;
403			cp_time[CP_SYS]++;
404		} else
405			cp_time[CP_IDLE]++;
406	}
407	pscnt = psdiv;
408
409	/*
410	 * We maintain statistics shown by user-level statistics
411	 * programs:  the amount of time in each cpu state, and
412	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
413	 *
414	 * XXX	should either run linked list of drives, or (better)
415	 *	grab timestamps in the start & done code.
416	 */
417	for (i = 0; i < DK_NDRIVE; i++)
418		if (dk_busy & (1 << i))
419			dk_time[i]++;
420
421	/*
422	 * We adjust the priority of the current process.  The priority of
423	 * a process gets worse as it accumulates CPU time.  The cpu usage
424	 * estimator (p_estcpu) is increased here.  The formula for computing
425	 * priorities (in kern_synch.c) will compute a different value each
426	 * time p_estcpu increases by 4.  The cpu usage estimator ramps up
427	 * quite quickly when the process is running (linearly), and decays
428	 * away exponentially, at a rate which is proportionally slower when
429	 * the system is busy.  The basic principal is that the system will
430	 * 90% forget that the process used a lot of CPU time in 5 * loadav
431	 * seconds.  This causes the system to favor processes which haven't
432	 * run much recently, and to round-robin among other processes.
433	 */
434	if (p != NULL) {
435		p->p_cpticks++;
436		if (++p->p_estcpu == 0)
437			p->p_estcpu--;
438		if ((p->p_estcpu & 3) == 0) {
439			resetpriority(p);
440			if (p->p_priority >= PUSER)
441				p->p_priority = p->p_usrpri;
442		}
443
444		/* Update resource usage integrals and maximums. */
445		if ((pstats = p->p_stats) != NULL &&
446		    (ru = &pstats->p_ru) != NULL &&
447		    (vm = p->p_vmspace) != NULL) {
448			ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024;
449			ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024;
450			ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024;
451			rss = vm->vm_pmap.pm_stats.resident_count *
452			      PAGE_SIZE / 1024;
453			if (ru->ru_maxrss < rss)
454				ru->ru_maxrss = rss;
455        	}
456	}
457}
458
459/*
460 * Return information about system clocks.
461 */
462static int
463sysctl_kern_clockrate SYSCTL_HANDLER_ARGS
464{
465	struct clockinfo clkinfo;
466	/*
467	 * Construct clockinfo structure.
468	 */
469	clkinfo.hz = hz;
470	clkinfo.tick = tick;
471	clkinfo.tickadj = tickadj;
472	clkinfo.profhz = profhz;
473	clkinfo.stathz = stathz ? stathz : hz;
474	return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
475}
476
477SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
478	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
479
480static __inline unsigned
481tco_delta(struct timecounter *tc)
482{
483
484	return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) &
485	    tc->tc_counter_mask);
486}
487
488/*
489 * We have four functions for looking at the clock, two for microseconds
490 * and two for nanoseconds.  For each there is fast but less precise
491 * version "get{nano|micro}time" which will return a time which is up
492 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time"
493 * will return a timestamp which is as precise as possible.
494 */
495
496void
497getmicrotime(struct timeval *tvp)
498{
499	struct timecounter *tc;
500
501	tc = timecounter;
502	*tvp = tc->tc_microtime;
503}
504
505void
506getnanotime(struct timespec *tsp)
507{
508	struct timecounter *tc;
509
510	tc = timecounter;
511	*tsp = tc->tc_nanotime;
512}
513
514void
515microtime(struct timeval *tv)
516{
517	struct timecounter *tc;
518
519	tc = (struct timecounter *)timecounter;
520	tv->tv_sec = tc->tc_offset_sec;
521	tv->tv_usec = tc->tc_offset_micro;
522	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
523	tv->tv_usec += boottime.tv_usec;
524	tv->tv_sec += boottime.tv_sec;
525	while (tv->tv_usec >= 1000000) {
526		tv->tv_usec -= 1000000;
527		tv->tv_sec++;
528	}
529}
530
531void
532nanotime(struct timespec *ts)
533{
534	unsigned count;
535	u_int64_t delta;
536	struct timecounter *tc;
537
538	tc = (struct timecounter *)timecounter;
539	ts->tv_sec = tc->tc_offset_sec;
540	count = tco_delta(tc);
541	delta = tc->tc_offset_nano;
542	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
543	delta >>= 32;
544	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
545	delta += boottime.tv_usec * 1000;
546	ts->tv_sec += boottime.tv_sec;
547	while (delta >= 1000000000) {
548		delta -= 1000000000;
549		ts->tv_sec++;
550	}
551	ts->tv_nsec = delta;
552}
553
554void
555timecounter_timespec(unsigned count, struct timespec *ts)
556{
557	u_int64_t delta;
558	struct timecounter *tc;
559
560	tc = (struct timecounter *)timecounter;
561	ts->tv_sec = tc->tc_offset_sec;
562	count -= tc->tc_offset_count;
563	count &= tc->tc_counter_mask;
564	delta = tc->tc_offset_nano;
565	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
566	delta >>= 32;
567	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
568	delta += boottime.tv_usec * 1000;
569	ts->tv_sec += boottime.tv_sec;
570	while (delta >= 1000000000) {
571		delta -= 1000000000;
572		ts->tv_sec++;
573	}
574	ts->tv_nsec = delta;
575}
576
577void
578getmicrouptime(struct timeval *tvp)
579{
580	struct timecounter *tc;
581
582	tc = timecounter;
583	tvp->tv_sec = tc->tc_offset_sec;
584	tvp->tv_usec = tc->tc_offset_micro;
585}
586
587void
588getnanouptime(struct timespec *tsp)
589{
590	struct timecounter *tc;
591
592	tc = timecounter;
593	tsp->tv_sec = tc->tc_offset_sec;
594	tsp->tv_nsec = tc->tc_offset_nano >> 32;
595}
596
597void
598microuptime(struct timeval *tv)
599{
600	struct timecounter *tc;
601
602	tc = (struct timecounter *)timecounter;
603	tv->tv_sec = tc->tc_offset_sec;
604	tv->tv_usec = tc->tc_offset_micro;
605	tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32;
606	if (tv->tv_usec >= 1000000) {
607		tv->tv_usec -= 1000000;
608		tv->tv_sec++;
609	}
610}
611
612void
613nanouptime(struct timespec *tv)
614{
615	unsigned count;
616	u_int64_t delta;
617	struct timecounter *tc;
618
619	tc = (struct timecounter *)timecounter;
620	tv->tv_sec = tc->tc_offset_sec;
621	count = tco_delta(tc);
622	delta = tc->tc_offset_nano;
623	delta += ((u_int64_t)count * tc->tc_scale_nano_f);
624	delta >>= 32;
625	delta += ((u_int64_t)count * tc->tc_scale_nano_i);
626	if (delta >= 1000000000) {
627		delta -= 1000000000;
628		tv->tv_sec++;
629	}
630	tv->tv_nsec = delta;
631}
632
633static void
634tco_setscales(struct timecounter *tc)
635{
636	u_int64_t scale;
637
638	scale = 1000000000LL << 32;
639	if (tc->tc_adjustment > 0)
640		scale += (tc->tc_adjustment * 1000LL) << 10;
641	else
642		scale -= (-tc->tc_adjustment * 1000LL) << 10;
643	scale /= tc->tc_frequency;
644	tc->tc_scale_micro = scale / 1000;
645	tc->tc_scale_nano_f = scale & 0xffffffff;
646	tc->tc_scale_nano_i = scale >> 32;
647}
648
649void
650init_timecounter(struct timecounter *tc)
651{
652	struct timespec ts0, ts1;
653	int i;
654
655	tc->tc_adjustment = 0;
656	tco_setscales(tc);
657	tc->tc_offset_count = tc->tc_get_timecount(tc);
658	tc[0].tc_tweak = &tc[0];
659	tc[2] = tc[1] = tc[0];
660	tc[1].tc_other = &tc[2];
661	tc[2].tc_other = &tc[1];
662	if (!timecounter || !strcmp(timecounter->tc_name, "dummy"))
663		timecounter = &tc[2];
664	tc = &tc[1];
665
666	/*
667	 * Figure out the cost of calling this timecounter.
668	 */
669	nanotime(&ts0);
670	for (i = 0; i < 256; i ++)
671		tc->tc_get_timecount(tc);
672	nanotime(&ts1);
673	ts1.tv_sec -= ts0.tv_sec;
674	tc->tc_cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
675	tc->tc_cost >>= 8;
676	if (print_tci && strcmp(tc->tc_name, "dummy"))
677		printf("Timecounter \"%s\"  frequency %lu Hz  cost %u ns\n",
678		    tc->tc_name, (u_long)tc->tc_frequency, tc->tc_cost);
679
680	/* XXX: For now always start using the counter. */
681	tc->tc_offset_count = tc->tc_get_timecount(tc);
682	nanouptime(&ts1);
683	tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32;
684	tc->tc_offset_micro = ts1.tv_nsec / 1000;
685	tc->tc_offset_sec = ts1.tv_sec;
686	timecounter = tc;
687}
688
689void
690set_timecounter(struct timespec *ts)
691{
692	struct timespec ts2;
693
694	nanouptime(&ts2);
695	boottime.tv_sec = ts->tv_sec - ts2.tv_sec;
696	boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000;
697	if (boottime.tv_usec < 0) {
698		boottime.tv_usec += 1000000;
699		boottime.tv_sec--;
700	}
701	/* fiddle all the little crinkly bits around the fiords... */
702	tco_forward();
703}
704
705
706#if 0 /* Currently unused */
707void
708switch_timecounter(struct timecounter *newtc)
709{
710	int s;
711	struct timecounter *tc;
712	struct timespec ts;
713
714	s = splclock();
715	tc = timecounter;
716	if (newtc == tc || newtc == tc->tc_other) {
717		splx(s);
718		return;
719	}
720	nanouptime(&ts);
721	newtc->tc_offset_sec = ts.tv_sec;
722	newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32;
723	newtc->tc_offset_micro = ts.tv_nsec / 1000;
724	newtc->tc_offset_count = newtc->tc_get_timecount(newtc);
725	timecounter = newtc;
726	splx(s);
727}
728#endif
729
730static struct timecounter *
731sync_other_counter(void)
732{
733	struct timecounter *tc, *tcn, *tco;
734	unsigned delta;
735
736	tco = timecounter;
737	tc = tco->tc_other;
738	tcn = tc->tc_other;
739	*tc = *tco;
740	tc->tc_other = tcn;
741	delta = tco_delta(tc);
742	tc->tc_offset_count += delta;
743	tc->tc_offset_count &= tc->tc_counter_mask;
744	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f;
745	tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32;
746	return (tc);
747}
748
749static void
750tco_forward(void)
751{
752	struct timecounter *tc, *tco;
753
754	tco = timecounter;
755	tc = sync_other_counter();
756	/*
757	 * We may be inducing a tiny error here, the tc_poll_pps() may
758	 * process a latched count which happens after the tco_delta()
759	 * in sync_other_counter(), which would extend the previous
760	 * counters parameters into the domain of this new one.
761	 * Since the timewindow is very small for this, the error is
762	 * going to be only a few weenieseconds (as Dave Mills would
763	 * say), so lets just not talk more about it, OK ?
764	 */
765	if (tco->tc_poll_pps)
766		tco->tc_poll_pps(tco);
767	if (timedelta != 0) {
768		tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32;
769		timedelta -= tickdelta;
770	}
771
772	while (tc->tc_offset_nano >= 1000000000ULL << 32) {
773		tc->tc_offset_nano -= 1000000000ULL << 32;
774		tc->tc_offset_sec++;
775		tc->tc_frequency = tc->tc_tweak->tc_frequency;
776		tc->tc_adjustment = tc->tc_tweak->tc_adjustment;
777		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
778		tco_setscales(tc);
779	}
780
781	tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32;
782
783	/* Figure out the wall-clock time */
784	tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec;
785	tc->tc_nanotime.tv_nsec =
786	    (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000;
787	tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec;
788	if (tc->tc_nanotime.tv_nsec >= 1000000000) {
789		tc->tc_nanotime.tv_nsec -= 1000000000;
790		tc->tc_microtime.tv_usec -= 1000000;
791		tc->tc_nanotime.tv_sec++;
792	}
793	time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec;
794
795	timecounter = tc;
796}
797
798static int
799sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
800{
801
802	return (sysctl_handle_opaque(oidp,
803	    &timecounter->tc_tweak->tc_frequency,
804	    sizeof(timecounter->tc_tweak->tc_frequency), req));
805}
806
807static int
808sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
809{
810
811	return (sysctl_handle_opaque(oidp,
812	    &timecounter->tc_tweak->tc_adjustment,
813	    sizeof(timecounter->tc_tweak->tc_adjustment), req));
814}
815
816SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
817
818SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW,
819    0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", "");
820
821SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW,
822    0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", "");
823
824/*
825 * Implement a dummy timecounter which we can use until we get a real one
826 * in the air.  This allows the console and other early stuff to use
827 * timeservices.
828 */
829
830static unsigned
831dummy_get_timecount(struct timecounter *tc)
832{
833	static unsigned now;
834	return (++now);
835}
836
837static struct timecounter dummy_timecounter[3] = {
838	{
839		dummy_get_timecount,
840		0,
841		~0u,
842		1000000,
843		"dummy"
844	}
845};
846
847static void
848initdummytimecounter(void *dummy)
849{
850	init_timecounter(dummy_timecounter);
851}
852
853SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL)
854