kern_tc.c revision 38129
1static volatile int print_tci = 1; 2 3/*- 4 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 42 * $Id: kern_clock.c,v 1.77 1998/07/11 07:45:39 bde Exp $ 43 */ 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/dkstat.h> 48#include <sys/callout.h> 49#include <sys/kernel.h> 50#include <sys/proc.h> 51#include <sys/resourcevar.h> 52#include <sys/signalvar.h> 53#include <sys/timex.h> 54#include <vm/vm.h> 55#include <sys/lock.h> 56#include <vm/pmap.h> 57#include <vm/vm_map.h> 58#include <sys/sysctl.h> 59 60#include <machine/cpu.h> 61#include <machine/limits.h> 62 63#ifdef GPROF 64#include <sys/gmon.h> 65#endif 66 67#if defined(SMP) && defined(BETTER_CLOCK) 68#include <machine/smp.h> 69#endif 70 71static void initclocks __P((void *dummy)); 72SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 73 74static void tco_forward __P((void)); 75static void tco_setscales __P((struct timecounter *tc)); 76static __inline unsigned tco_delta __P((struct timecounter *tc)); 77 78/* Some of these don't belong here, but it's easiest to concentrate them. */ 79#if defined(SMP) && defined(BETTER_CLOCK) 80long cp_time[CPUSTATES]; 81#else 82static long cp_time[CPUSTATES]; 83#endif 84long dk_seek[DK_NDRIVE]; 85static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ 86long dk_wds[DK_NDRIVE]; 87long dk_wpms[DK_NDRIVE]; 88long dk_xfer[DK_NDRIVE]; 89 90int dk_busy; 91int dk_ndrive = 0; 92char dk_names[DK_NDRIVE][DK_NAMELEN]; 93 94long tk_cancc; 95long tk_nin; 96long tk_nout; 97long tk_rawcc; 98 99struct timecounter *timecounter; 100 101time_t time_second; 102 103/* 104 * Clock handling routines. 105 * 106 * This code is written to operate with two timers that run independently of 107 * each other. 108 * 109 * The main timer, running hz times per second, is used to trigger interval 110 * timers, timeouts and rescheduling as needed. 111 * 112 * The second timer handles kernel and user profiling, 113 * and does resource use estimation. If the second timer is programmable, 114 * it is randomized to avoid aliasing between the two clocks. For example, 115 * the randomization prevents an adversary from always giving up the cpu 116 * just before its quantum expires. Otherwise, it would never accumulate 117 * cpu ticks. The mean frequency of the second timer is stathz. 118 * 119 * If no second timer exists, stathz will be zero; in this case we drive 120 * profiling and statistics off the main clock. This WILL NOT be accurate; 121 * do not do it unless absolutely necessary. 122 * 123 * The statistics clock may (or may not) be run at a higher rate while 124 * profiling. This profile clock runs at profhz. We require that profhz 125 * be an integral multiple of stathz. 126 * 127 * If the statistics clock is running fast, it must be divided by the ratio 128 * profhz/stathz for statistics. (For profiling, every tick counts.) 129 * 130 * Time-of-day is maintained using a "timecounter", which may or may 131 * not be related to the hardware generating the above mentioned 132 * interrupts. 133 */ 134 135int stathz; 136int profhz; 137static int profprocs; 138int ticks; 139static int psdiv, pscnt; /* prof => stat divider */ 140int psratio; /* ratio: prof / stat */ 141 142/* 143 * Initialize clock frequencies and start both clocks running. 144 */ 145/* ARGSUSED*/ 146static void 147initclocks(dummy) 148 void *dummy; 149{ 150 register int i; 151 152 /* 153 * Set divisors to 1 (normal case) and let the machine-specific 154 * code do its bit. 155 */ 156 psdiv = pscnt = 1; 157 cpu_initclocks(); 158 159 /* 160 * Compute profhz/stathz, and fix profhz if needed. 161 */ 162 i = stathz ? stathz : hz; 163 if (profhz == 0) 164 profhz = i; 165 psratio = profhz / i; 166} 167 168/* 169 * The real-time timer, interrupting hz times per second. 170 */ 171void 172hardclock(frame) 173 register struct clockframe *frame; 174{ 175 register struct proc *p; 176 177 p = curproc; 178 if (p) { 179 register struct pstats *pstats; 180 181 /* 182 * Run current process's virtual and profile time, as needed. 183 */ 184 pstats = p->p_stats; 185 if (CLKF_USERMODE(frame) && 186 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 187 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 188 psignal(p, SIGVTALRM); 189 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 190 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 191 psignal(p, SIGPROF); 192 } 193 194#if defined(SMP) && defined(BETTER_CLOCK) 195 forward_hardclock(pscnt); 196#endif 197 198 /* 199 * If no separate statistics clock is available, run it from here. 200 */ 201 if (stathz == 0) 202 statclock(frame); 203 204 tco_forward(); 205 ticks++; 206 207 /* 208 * Process callouts at a very low cpu priority, so we don't keep the 209 * relatively high clock interrupt priority any longer than necessary. 210 */ 211 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 212 if (CLKF_BASEPRI(frame)) { 213 /* 214 * Save the overhead of a software interrupt; 215 * it will happen as soon as we return, so do it now. 216 */ 217 (void)splsoftclock(); 218 softclock(); 219 } else 220 setsoftclock(); 221 } else if (softticks + 1 == ticks) 222 ++softticks; 223} 224 225/* 226 * Compute number of ticks in the specified amount of time. 227 */ 228int 229tvtohz(tv) 230 struct timeval *tv; 231{ 232 register unsigned long ticks; 233 register long sec, usec; 234 235 /* 236 * If the number of usecs in the whole seconds part of the time 237 * difference fits in a long, then the total number of usecs will 238 * fit in an unsigned long. Compute the total and convert it to 239 * ticks, rounding up and adding 1 to allow for the current tick 240 * to expire. Rounding also depends on unsigned long arithmetic 241 * to avoid overflow. 242 * 243 * Otherwise, if the number of ticks in the whole seconds part of 244 * the time difference fits in a long, then convert the parts to 245 * ticks separately and add, using similar rounding methods and 246 * overflow avoidance. This method would work in the previous 247 * case but it is slightly slower and assumes that hz is integral. 248 * 249 * Otherwise, round the time difference down to the maximum 250 * representable value. 251 * 252 * If ints have 32 bits, then the maximum value for any timeout in 253 * 10ms ticks is 248 days. 254 */ 255 sec = tv->tv_sec; 256 usec = tv->tv_usec; 257 if (usec < 0) { 258 sec--; 259 usec += 1000000; 260 } 261 if (sec < 0) { 262#ifdef DIAGNOSTIC 263 if (usec > 0) { 264 sec++; 265 usec -= 1000000; 266 } 267 printf("tvotohz: negative time difference %ld sec %ld usec\n", 268 sec, usec); 269#endif 270 ticks = 1; 271 } else if (sec <= LONG_MAX / 1000000) 272 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 273 / tick + 1; 274 else if (sec <= LONG_MAX / hz) 275 ticks = sec * hz 276 + ((unsigned long)usec + (tick - 1)) / tick + 1; 277 else 278 ticks = LONG_MAX; 279 if (ticks > INT_MAX) 280 ticks = INT_MAX; 281 return (ticks); 282} 283 284/* 285 * Start profiling on a process. 286 * 287 * Kernel profiling passes proc0 which never exits and hence 288 * keeps the profile clock running constantly. 289 */ 290void 291startprofclock(p) 292 register struct proc *p; 293{ 294 int s; 295 296 if ((p->p_flag & P_PROFIL) == 0) { 297 p->p_flag |= P_PROFIL; 298 if (++profprocs == 1 && stathz != 0) { 299 s = splstatclock(); 300 psdiv = pscnt = psratio; 301 setstatclockrate(profhz); 302 splx(s); 303 } 304 } 305} 306 307/* 308 * Stop profiling on a process. 309 */ 310void 311stopprofclock(p) 312 register struct proc *p; 313{ 314 int s; 315 316 if (p->p_flag & P_PROFIL) { 317 p->p_flag &= ~P_PROFIL; 318 if (--profprocs == 0 && stathz != 0) { 319 s = splstatclock(); 320 psdiv = pscnt = 1; 321 setstatclockrate(stathz); 322 splx(s); 323 } 324 } 325} 326 327/* 328 * Statistics clock. Grab profile sample, and if divider reaches 0, 329 * do process and kernel statistics. 330 */ 331void 332statclock(frame) 333 register struct clockframe *frame; 334{ 335#ifdef GPROF 336 register struct gmonparam *g; 337#endif 338 register struct proc *p; 339 register int i; 340 struct pstats *pstats; 341 long rss; 342 struct rusage *ru; 343 struct vmspace *vm; 344 345 if (CLKF_USERMODE(frame)) { 346 p = curproc; 347 if (p->p_flag & P_PROFIL) 348 addupc_intr(p, CLKF_PC(frame), 1); 349#if defined(SMP) && defined(BETTER_CLOCK) 350 if (stathz != 0) 351 forward_statclock(pscnt); 352#endif 353 if (--pscnt > 0) 354 return; 355 /* 356 * Came from user mode; CPU was in user state. 357 * If this process is being profiled record the tick. 358 */ 359 p->p_uticks++; 360 if (p->p_nice > NZERO) 361 cp_time[CP_NICE]++; 362 else 363 cp_time[CP_USER]++; 364 } else { 365#ifdef GPROF 366 /* 367 * Kernel statistics are just like addupc_intr, only easier. 368 */ 369 g = &_gmonparam; 370 if (g->state == GMON_PROF_ON) { 371 i = CLKF_PC(frame) - g->lowpc; 372 if (i < g->textsize) { 373 i /= HISTFRACTION * sizeof(*g->kcount); 374 g->kcount[i]++; 375 } 376 } 377#endif 378#if defined(SMP) && defined(BETTER_CLOCK) 379 if (stathz != 0) 380 forward_statclock(pscnt); 381#endif 382 if (--pscnt > 0) 383 return; 384 /* 385 * Came from kernel mode, so we were: 386 * - handling an interrupt, 387 * - doing syscall or trap work on behalf of the current 388 * user process, or 389 * - spinning in the idle loop. 390 * Whichever it is, charge the time as appropriate. 391 * Note that we charge interrupts to the current process, 392 * regardless of whether they are ``for'' that process, 393 * so that we know how much of its real time was spent 394 * in ``non-process'' (i.e., interrupt) work. 395 */ 396 p = curproc; 397 if (CLKF_INTR(frame)) { 398 if (p != NULL) 399 p->p_iticks++; 400 cp_time[CP_INTR]++; 401 } else if (p != NULL) { 402 p->p_sticks++; 403 cp_time[CP_SYS]++; 404 } else 405 cp_time[CP_IDLE]++; 406 } 407 pscnt = psdiv; 408 409 /* 410 * We maintain statistics shown by user-level statistics 411 * programs: the amount of time in each cpu state, and 412 * the amount of time each of DK_NDRIVE ``drives'' is busy. 413 * 414 * XXX should either run linked list of drives, or (better) 415 * grab timestamps in the start & done code. 416 */ 417 for (i = 0; i < DK_NDRIVE; i++) 418 if (dk_busy & (1 << i)) 419 dk_time[i]++; 420 421 /* 422 * We adjust the priority of the current process. The priority of 423 * a process gets worse as it accumulates CPU time. The cpu usage 424 * estimator (p_estcpu) is increased here. The formula for computing 425 * priorities (in kern_synch.c) will compute a different value each 426 * time p_estcpu increases by 4. The cpu usage estimator ramps up 427 * quite quickly when the process is running (linearly), and decays 428 * away exponentially, at a rate which is proportionally slower when 429 * the system is busy. The basic principal is that the system will 430 * 90% forget that the process used a lot of CPU time in 5 * loadav 431 * seconds. This causes the system to favor processes which haven't 432 * run much recently, and to round-robin among other processes. 433 */ 434 if (p != NULL) { 435 p->p_cpticks++; 436 if (++p->p_estcpu == 0) 437 p->p_estcpu--; 438 if ((p->p_estcpu & 3) == 0) { 439 resetpriority(p); 440 if (p->p_priority >= PUSER) 441 p->p_priority = p->p_usrpri; 442 } 443 444 /* Update resource usage integrals and maximums. */ 445 if ((pstats = p->p_stats) != NULL && 446 (ru = &pstats->p_ru) != NULL && 447 (vm = p->p_vmspace) != NULL) { 448 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 449 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 450 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 451 rss = vm->vm_pmap.pm_stats.resident_count * 452 PAGE_SIZE / 1024; 453 if (ru->ru_maxrss < rss) 454 ru->ru_maxrss = rss; 455 } 456 } 457} 458 459/* 460 * Return information about system clocks. 461 */ 462static int 463sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 464{ 465 struct clockinfo clkinfo; 466 /* 467 * Construct clockinfo structure. 468 */ 469 clkinfo.hz = hz; 470 clkinfo.tick = tick; 471 clkinfo.tickadj = tickadj; 472 clkinfo.profhz = profhz; 473 clkinfo.stathz = stathz ? stathz : hz; 474 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 475} 476 477SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 478 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 479 480static __inline unsigned 481tco_delta(struct timecounter *tc) 482{ 483 484 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 485 tc->tc_counter_mask); 486} 487 488/* 489 * We have four functions for looking at the clock, two for microseconds 490 * and two for nanoseconds. For each there is fast but less precise 491 * version "get{nano|micro}time" which will return a time which is up 492 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time" 493 * will return a timestamp which is as precise as possible. 494 */ 495 496void 497getmicrotime(struct timeval *tvp) 498{ 499 struct timecounter *tc; 500 501 tc = timecounter; 502 *tvp = tc->tc_microtime; 503} 504 505void 506getnanotime(struct timespec *tsp) 507{ 508 struct timecounter *tc; 509 510 tc = timecounter; 511 *tsp = tc->tc_nanotime; 512} 513 514void 515microtime(struct timeval *tv) 516{ 517 struct timecounter *tc; 518 519 tc = (struct timecounter *)timecounter; 520 tv->tv_sec = tc->tc_offset_sec; 521 tv->tv_usec = tc->tc_offset_micro; 522 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 523 tv->tv_usec += boottime.tv_usec; 524 tv->tv_sec += boottime.tv_sec; 525 while (tv->tv_usec >= 1000000) { 526 tv->tv_usec -= 1000000; 527 tv->tv_sec++; 528 } 529} 530 531void 532nanotime(struct timespec *ts) 533{ 534 unsigned count; 535 u_int64_t delta; 536 struct timecounter *tc; 537 538 tc = (struct timecounter *)timecounter; 539 ts->tv_sec = tc->tc_offset_sec; 540 count = tco_delta(tc); 541 delta = tc->tc_offset_nano; 542 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 543 delta >>= 32; 544 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 545 delta += boottime.tv_usec * 1000; 546 ts->tv_sec += boottime.tv_sec; 547 while (delta >= 1000000000) { 548 delta -= 1000000000; 549 ts->tv_sec++; 550 } 551 ts->tv_nsec = delta; 552} 553 554void 555timecounter_timespec(unsigned count, struct timespec *ts) 556{ 557 u_int64_t delta; 558 struct timecounter *tc; 559 560 tc = (struct timecounter *)timecounter; 561 ts->tv_sec = tc->tc_offset_sec; 562 count -= tc->tc_offset_count; 563 count &= tc->tc_counter_mask; 564 delta = tc->tc_offset_nano; 565 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 566 delta >>= 32; 567 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 568 delta += boottime.tv_usec * 1000; 569 ts->tv_sec += boottime.tv_sec; 570 while (delta >= 1000000000) { 571 delta -= 1000000000; 572 ts->tv_sec++; 573 } 574 ts->tv_nsec = delta; 575} 576 577void 578getmicrouptime(struct timeval *tvp) 579{ 580 struct timecounter *tc; 581 582 tc = timecounter; 583 tvp->tv_sec = tc->tc_offset_sec; 584 tvp->tv_usec = tc->tc_offset_micro; 585} 586 587void 588getnanouptime(struct timespec *tsp) 589{ 590 struct timecounter *tc; 591 592 tc = timecounter; 593 tsp->tv_sec = tc->tc_offset_sec; 594 tsp->tv_nsec = tc->tc_offset_nano >> 32; 595} 596 597void 598microuptime(struct timeval *tv) 599{ 600 struct timecounter *tc; 601 602 tc = (struct timecounter *)timecounter; 603 tv->tv_sec = tc->tc_offset_sec; 604 tv->tv_usec = tc->tc_offset_micro; 605 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 606 if (tv->tv_usec >= 1000000) { 607 tv->tv_usec -= 1000000; 608 tv->tv_sec++; 609 } 610} 611 612void 613nanouptime(struct timespec *tv) 614{ 615 unsigned count; 616 u_int64_t delta; 617 struct timecounter *tc; 618 619 tc = (struct timecounter *)timecounter; 620 tv->tv_sec = tc->tc_offset_sec; 621 count = tco_delta(tc); 622 delta = tc->tc_offset_nano; 623 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 624 delta >>= 32; 625 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 626 if (delta >= 1000000000) { 627 delta -= 1000000000; 628 tv->tv_sec++; 629 } 630 tv->tv_nsec = delta; 631} 632 633static void 634tco_setscales(struct timecounter *tc) 635{ 636 u_int64_t scale; 637 638 scale = 1000000000LL << 32; 639 if (tc->tc_adjustment > 0) 640 scale += (tc->tc_adjustment * 1000LL) << 10; 641 else 642 scale -= (-tc->tc_adjustment * 1000LL) << 10; 643 scale /= tc->tc_frequency; 644 tc->tc_scale_micro = scale / 1000; 645 tc->tc_scale_nano_f = scale & 0xffffffff; 646 tc->tc_scale_nano_i = scale >> 32; 647} 648 649void 650init_timecounter(struct timecounter *tc) 651{ 652 struct timespec ts0, ts1; 653 int i; 654 655 tc->tc_adjustment = 0; 656 tco_setscales(tc); 657 tc->tc_offset_count = tc->tc_get_timecount(tc); 658 tc[0].tc_tweak = &tc[0]; 659 tc[2] = tc[1] = tc[0]; 660 tc[1].tc_other = &tc[2]; 661 tc[2].tc_other = &tc[1]; 662 if (!timecounter || !strcmp(timecounter->tc_name, "dummy")) 663 timecounter = &tc[2]; 664 tc = &tc[1]; 665 666 /* 667 * Figure out the cost of calling this timecounter. 668 */ 669 nanotime(&ts0); 670 for (i = 0; i < 256; i ++) 671 tc->tc_get_timecount(tc); 672 nanotime(&ts1); 673 ts1.tv_sec -= ts0.tv_sec; 674 tc->tc_cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; 675 tc->tc_cost >>= 8; 676 if (print_tci && strcmp(tc->tc_name, "dummy")) 677 printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", 678 tc->tc_name, (u_long)tc->tc_frequency, tc->tc_cost); 679 680 /* XXX: For now always start using the counter. */ 681 tc->tc_offset_count = tc->tc_get_timecount(tc); 682 nanouptime(&ts1); 683 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; 684 tc->tc_offset_micro = ts1.tv_nsec / 1000; 685 tc->tc_offset_sec = ts1.tv_sec; 686 timecounter = tc; 687} 688 689void 690set_timecounter(struct timespec *ts) 691{ 692 struct timespec ts2; 693 694 nanouptime(&ts2); 695 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 696 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 697 if (boottime.tv_usec < 0) { 698 boottime.tv_usec += 1000000; 699 boottime.tv_sec--; 700 } 701 /* fiddle all the little crinkly bits around the fiords... */ 702 tco_forward(); 703} 704 705 706#if 0 /* Currently unused */ 707void 708switch_timecounter(struct timecounter *newtc) 709{ 710 int s; 711 struct timecounter *tc; 712 struct timespec ts; 713 714 s = splclock(); 715 tc = timecounter; 716 if (newtc == tc || newtc == tc->tc_other) { 717 splx(s); 718 return; 719 } 720 nanouptime(&ts); 721 newtc->tc_offset_sec = ts.tv_sec; 722 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; 723 newtc->tc_offset_micro = ts.tv_nsec / 1000; 724 newtc->tc_offset_count = newtc->tc_get_timecount(newtc); 725 timecounter = newtc; 726 splx(s); 727} 728#endif 729 730static struct timecounter * 731sync_other_counter(void) 732{ 733 struct timecounter *tc, *tcn, *tco; 734 unsigned delta; 735 736 tco = timecounter; 737 tc = tco->tc_other; 738 tcn = tc->tc_other; 739 *tc = *tco; 740 tc->tc_other = tcn; 741 delta = tco_delta(tc); 742 tc->tc_offset_count += delta; 743 tc->tc_offset_count &= tc->tc_counter_mask; 744 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; 745 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; 746 return (tc); 747} 748 749static void 750tco_forward(void) 751{ 752 struct timecounter *tc, *tco; 753 754 tco = timecounter; 755 tc = sync_other_counter(); 756 /* 757 * We may be inducing a tiny error here, the tc_poll_pps() may 758 * process a latched count which happens after the tco_delta() 759 * in sync_other_counter(), which would extend the previous 760 * counters parameters into the domain of this new one. 761 * Since the timewindow is very small for this, the error is 762 * going to be only a few weenieseconds (as Dave Mills would 763 * say), so lets just not talk more about it, OK ? 764 */ 765 if (tco->tc_poll_pps) 766 tco->tc_poll_pps(tco); 767 if (timedelta != 0) { 768 tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32; 769 timedelta -= tickdelta; 770 } 771 772 while (tc->tc_offset_nano >= 1000000000ULL << 32) { 773 tc->tc_offset_nano -= 1000000000ULL << 32; 774 tc->tc_offset_sec++; 775 tc->tc_frequency = tc->tc_tweak->tc_frequency; 776 tc->tc_adjustment = tc->tc_tweak->tc_adjustment; 777 ntp_update_second(tc); /* XXX only needed if xntpd runs */ 778 tco_setscales(tc); 779 } 780 781 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; 782 783 /* Figure out the wall-clock time */ 784 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; 785 tc->tc_nanotime.tv_nsec = 786 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; 787 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; 788 if (tc->tc_nanotime.tv_nsec >= 1000000000) { 789 tc->tc_nanotime.tv_nsec -= 1000000000; 790 tc->tc_microtime.tv_usec -= 1000000; 791 tc->tc_nanotime.tv_sec++; 792 } 793 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; 794 795 timecounter = tc; 796} 797 798static int 799sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS 800{ 801 802 return (sysctl_handle_opaque(oidp, 803 &timecounter->tc_tweak->tc_frequency, 804 sizeof(timecounter->tc_tweak->tc_frequency), req)); 805} 806 807static int 808sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS 809{ 810 811 return (sysctl_handle_opaque(oidp, 812 &timecounter->tc_tweak->tc_adjustment, 813 sizeof(timecounter->tc_tweak->tc_adjustment), req)); 814} 815 816SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 817 818SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW, 819 0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", ""); 820 821SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW, 822 0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", ""); 823 824/* 825 * Implement a dummy timecounter which we can use until we get a real one 826 * in the air. This allows the console and other early stuff to use 827 * timeservices. 828 */ 829 830static unsigned 831dummy_get_timecount(struct timecounter *tc) 832{ 833 static unsigned now; 834 return (++now); 835} 836 837static struct timecounter dummy_timecounter[3] = { 838 { 839 dummy_get_timecount, 840 0, 841 ~0u, 842 1000000, 843 "dummy" 844 } 845}; 846 847static void 848initdummytimecounter(void *dummy) 849{ 850 init_timecounter(dummy_timecounter); 851} 852 853SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL) 854