kern_tc.c revision 37555
1static volatile int print_tci = 1; 2 3/*- 4 * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> 5 * Copyright (c) 1982, 1986, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 42 * $Id: kern_clock.c,v 1.76 1998/07/04 19:29:15 phk Exp $ 43 */ 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/dkstat.h> 48#include <sys/callout.h> 49#include <sys/kernel.h> 50#include <sys/proc.h> 51#include <sys/resourcevar.h> 52#include <sys/signalvar.h> 53#include <sys/timex.h> 54#include <vm/vm.h> 55#include <sys/lock.h> 56#include <vm/pmap.h> 57#include <vm/vm_map.h> 58#include <sys/sysctl.h> 59 60#include <machine/cpu.h> 61#include <machine/limits.h> 62 63#ifdef GPROF 64#include <sys/gmon.h> 65#endif 66 67#if defined(SMP) && defined(BETTER_CLOCK) 68#include <machine/smp.h> 69#endif 70 71static void initclocks __P((void *dummy)); 72SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) 73 74static void tco_forward __P((void)); 75static void tco_setscales __P((struct timecounter *tc)); 76static __inline unsigned tco_delta __P((struct timecounter *tc)); 77 78/* Some of these don't belong here, but it's easiest to concentrate them. */ 79#if defined(SMP) && defined(BETTER_CLOCK) 80long cp_time[CPUSTATES]; 81#else 82static long cp_time[CPUSTATES]; 83#endif 84long dk_seek[DK_NDRIVE]; 85static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ 86long dk_wds[DK_NDRIVE]; 87long dk_wpms[DK_NDRIVE]; 88long dk_xfer[DK_NDRIVE]; 89 90int dk_busy; 91int dk_ndrive = 0; 92char dk_names[DK_NDRIVE][DK_NAMELEN]; 93 94long tk_cancc; 95long tk_nin; 96long tk_nout; 97long tk_rawcc; 98 99struct timecounter *timecounter; 100 101time_t time_second; 102 103/* 104 * Clock handling routines. 105 * 106 * This code is written to operate with two timers that run independently of 107 * each other. 108 * 109 * The main timer, running hz times per second, is used to trigger interval 110 * timers, timeouts and rescheduling as needed. 111 * 112 * The second timer handles kernel and user profiling, 113 * and does resource use estimation. If the second timer is programmable, 114 * it is randomized to avoid aliasing between the two clocks. For example, 115 * the randomization prevents an adversary from always giving up the cpu 116 * just before its quantum expires. Otherwise, it would never accumulate 117 * cpu ticks. The mean frequency of the second timer is stathz. 118 * 119 * If no second timer exists, stathz will be zero; in this case we drive 120 * profiling and statistics off the main clock. This WILL NOT be accurate; 121 * do not do it unless absolutely necessary. 122 * 123 * The statistics clock may (or may not) be run at a higher rate while 124 * profiling. This profile clock runs at profhz. We require that profhz 125 * be an integral multiple of stathz. 126 * 127 * If the statistics clock is running fast, it must be divided by the ratio 128 * profhz/stathz for statistics. (For profiling, every tick counts.) 129 * 130 * Time-of-day is maintained using a "timecounter", which may or may 131 * not be related to the hardware generating the above mentioned 132 * interrupts. 133 */ 134 135int stathz; 136int profhz; 137static int profprocs; 138int ticks; 139static int psdiv, pscnt; /* prof => stat divider */ 140int psratio; /* ratio: prof / stat */ 141 142/* 143 * Initialize clock frequencies and start both clocks running. 144 */ 145/* ARGSUSED*/ 146static void 147initclocks(dummy) 148 void *dummy; 149{ 150 register int i; 151 152 /* 153 * Set divisors to 1 (normal case) and let the machine-specific 154 * code do its bit. 155 */ 156 psdiv = pscnt = 1; 157 cpu_initclocks(); 158 159 /* 160 * Compute profhz/stathz, and fix profhz if needed. 161 */ 162 i = stathz ? stathz : hz; 163 if (profhz == 0) 164 profhz = i; 165 psratio = profhz / i; 166} 167 168/* 169 * The real-time timer, interrupting hz times per second. 170 */ 171void 172hardclock(frame) 173 register struct clockframe *frame; 174{ 175 register struct proc *p; 176 177 p = curproc; 178 if (p) { 179 register struct pstats *pstats; 180 181 /* 182 * Run current process's virtual and profile time, as needed. 183 */ 184 pstats = p->p_stats; 185 if (CLKF_USERMODE(frame) && 186 timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && 187 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) 188 psignal(p, SIGVTALRM); 189 if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && 190 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) 191 psignal(p, SIGPROF); 192 } 193 194#if defined(SMP) && defined(BETTER_CLOCK) 195 forward_hardclock(pscnt); 196#endif 197 198 /* 199 * If no separate statistics clock is available, run it from here. 200 */ 201 if (stathz == 0) 202 statclock(frame); 203 204 tco_forward(); 205 ticks++; 206 207 /* 208 * Process callouts at a very low cpu priority, so we don't keep the 209 * relatively high clock interrupt priority any longer than necessary. 210 */ 211 if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { 212 if (CLKF_BASEPRI(frame)) { 213 /* 214 * Save the overhead of a software interrupt; 215 * it will happen as soon as we return, so do it now. 216 */ 217 (void)splsoftclock(); 218 softclock(); 219 } else 220 setsoftclock(); 221 } else if (softticks + 1 == ticks) 222 ++softticks; 223} 224 225/* 226 * Compute number of ticks in the specified amount of time. 227 */ 228int 229tvtohz(tv) 230 struct timeval *tv; 231{ 232 register unsigned long ticks; 233 register long sec, usec; 234 235 /* 236 * If the number of usecs in the whole seconds part of the time 237 * difference fits in a long, then the total number of usecs will 238 * fit in an unsigned long. Compute the total and convert it to 239 * ticks, rounding up and adding 1 to allow for the current tick 240 * to expire. Rounding also depends on unsigned long arithmetic 241 * to avoid overflow. 242 * 243 * Otherwise, if the number of ticks in the whole seconds part of 244 * the time difference fits in a long, then convert the parts to 245 * ticks separately and add, using similar rounding methods and 246 * overflow avoidance. This method would work in the previous 247 * case but it is slightly slower and assumes that hz is integral. 248 * 249 * Otherwise, round the time difference down to the maximum 250 * representable value. 251 * 252 * If ints have 32 bits, then the maximum value for any timeout in 253 * 10ms ticks is 248 days. 254 */ 255 sec = tv->tv_sec; 256 usec = tv->tv_usec; 257 if (usec < 0) { 258 sec--; 259 usec += 1000000; 260 } 261 if (sec < 0) { 262#ifdef DIAGNOSTIC 263 if (usec > 0) { 264 sec++; 265 usec -= 1000000; 266 } 267 printf("tvotohz: negative time difference %ld sec %ld usec\n", 268 sec, usec); 269#endif 270 ticks = 1; 271 } else if (sec <= LONG_MAX / 1000000) 272 ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) 273 / tick + 1; 274 else if (sec <= LONG_MAX / hz) 275 ticks = sec * hz 276 + ((unsigned long)usec + (tick - 1)) / tick + 1; 277 else 278 ticks = LONG_MAX; 279 if (ticks > INT_MAX) 280 ticks = INT_MAX; 281 return (ticks); 282} 283 284 285/* 286 * Compute number of hz until specified time. Used to 287 * compute third argument to timeout() from an absolute time. 288 */ 289int 290hzto(tv) 291 struct timeval *tv; 292{ 293 struct timeval t2; 294 295 getmicrotime(&t2); 296 t2.tv_sec = tv->tv_sec - t2.tv_sec; 297 t2.tv_usec = tv->tv_usec - t2.tv_usec; 298 return (tvtohz(&t2)); 299} 300 301/* 302 * Start profiling on a process. 303 * 304 * Kernel profiling passes proc0 which never exits and hence 305 * keeps the profile clock running constantly. 306 */ 307void 308startprofclock(p) 309 register struct proc *p; 310{ 311 int s; 312 313 if ((p->p_flag & P_PROFIL) == 0) { 314 p->p_flag |= P_PROFIL; 315 if (++profprocs == 1 && stathz != 0) { 316 s = splstatclock(); 317 psdiv = pscnt = psratio; 318 setstatclockrate(profhz); 319 splx(s); 320 } 321 } 322} 323 324/* 325 * Stop profiling on a process. 326 */ 327void 328stopprofclock(p) 329 register struct proc *p; 330{ 331 int s; 332 333 if (p->p_flag & P_PROFIL) { 334 p->p_flag &= ~P_PROFIL; 335 if (--profprocs == 0 && stathz != 0) { 336 s = splstatclock(); 337 psdiv = pscnt = 1; 338 setstatclockrate(stathz); 339 splx(s); 340 } 341 } 342} 343 344/* 345 * Statistics clock. Grab profile sample, and if divider reaches 0, 346 * do process and kernel statistics. 347 */ 348void 349statclock(frame) 350 register struct clockframe *frame; 351{ 352#ifdef GPROF 353 register struct gmonparam *g; 354#endif 355 register struct proc *p; 356 register int i; 357 struct pstats *pstats; 358 long rss; 359 struct rusage *ru; 360 struct vmspace *vm; 361 362 if (CLKF_USERMODE(frame)) { 363 p = curproc; 364 if (p->p_flag & P_PROFIL) 365 addupc_intr(p, CLKF_PC(frame), 1); 366#if defined(SMP) && defined(BETTER_CLOCK) 367 if (stathz != 0) 368 forward_statclock(pscnt); 369#endif 370 if (--pscnt > 0) 371 return; 372 /* 373 * Came from user mode; CPU was in user state. 374 * If this process is being profiled record the tick. 375 */ 376 p->p_uticks++; 377 if (p->p_nice > NZERO) 378 cp_time[CP_NICE]++; 379 else 380 cp_time[CP_USER]++; 381 } else { 382#ifdef GPROF 383 /* 384 * Kernel statistics are just like addupc_intr, only easier. 385 */ 386 g = &_gmonparam; 387 if (g->state == GMON_PROF_ON) { 388 i = CLKF_PC(frame) - g->lowpc; 389 if (i < g->textsize) { 390 i /= HISTFRACTION * sizeof(*g->kcount); 391 g->kcount[i]++; 392 } 393 } 394#endif 395#if defined(SMP) && defined(BETTER_CLOCK) 396 if (stathz != 0) 397 forward_statclock(pscnt); 398#endif 399 if (--pscnt > 0) 400 return; 401 /* 402 * Came from kernel mode, so we were: 403 * - handling an interrupt, 404 * - doing syscall or trap work on behalf of the current 405 * user process, or 406 * - spinning in the idle loop. 407 * Whichever it is, charge the time as appropriate. 408 * Note that we charge interrupts to the current process, 409 * regardless of whether they are ``for'' that process, 410 * so that we know how much of its real time was spent 411 * in ``non-process'' (i.e., interrupt) work. 412 */ 413 p = curproc; 414 if (CLKF_INTR(frame)) { 415 if (p != NULL) 416 p->p_iticks++; 417 cp_time[CP_INTR]++; 418 } else if (p != NULL) { 419 p->p_sticks++; 420 cp_time[CP_SYS]++; 421 } else 422 cp_time[CP_IDLE]++; 423 } 424 pscnt = psdiv; 425 426 /* 427 * We maintain statistics shown by user-level statistics 428 * programs: the amount of time in each cpu state, and 429 * the amount of time each of DK_NDRIVE ``drives'' is busy. 430 * 431 * XXX should either run linked list of drives, or (better) 432 * grab timestamps in the start & done code. 433 */ 434 for (i = 0; i < DK_NDRIVE; i++) 435 if (dk_busy & (1 << i)) 436 dk_time[i]++; 437 438 /* 439 * We adjust the priority of the current process. The priority of 440 * a process gets worse as it accumulates CPU time. The cpu usage 441 * estimator (p_estcpu) is increased here. The formula for computing 442 * priorities (in kern_synch.c) will compute a different value each 443 * time p_estcpu increases by 4. The cpu usage estimator ramps up 444 * quite quickly when the process is running (linearly), and decays 445 * away exponentially, at a rate which is proportionally slower when 446 * the system is busy. The basic principal is that the system will 447 * 90% forget that the process used a lot of CPU time in 5 * loadav 448 * seconds. This causes the system to favor processes which haven't 449 * run much recently, and to round-robin among other processes. 450 */ 451 if (p != NULL) { 452 p->p_cpticks++; 453 if (++p->p_estcpu == 0) 454 p->p_estcpu--; 455 if ((p->p_estcpu & 3) == 0) { 456 resetpriority(p); 457 if (p->p_priority >= PUSER) 458 p->p_priority = p->p_usrpri; 459 } 460 461 /* Update resource usage integrals and maximums. */ 462 if ((pstats = p->p_stats) != NULL && 463 (ru = &pstats->p_ru) != NULL && 464 (vm = p->p_vmspace) != NULL) { 465 ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; 466 ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; 467 ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; 468 rss = vm->vm_pmap.pm_stats.resident_count * 469 PAGE_SIZE / 1024; 470 if (ru->ru_maxrss < rss) 471 ru->ru_maxrss = rss; 472 } 473 } 474} 475 476/* 477 * Return information about system clocks. 478 */ 479static int 480sysctl_kern_clockrate SYSCTL_HANDLER_ARGS 481{ 482 struct clockinfo clkinfo; 483 /* 484 * Construct clockinfo structure. 485 */ 486 clkinfo.hz = hz; 487 clkinfo.tick = tick; 488 clkinfo.tickadj = tickadj; 489 clkinfo.profhz = profhz; 490 clkinfo.stathz = stathz ? stathz : hz; 491 return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); 492} 493 494SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 495 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); 496 497static __inline unsigned 498tco_delta(struct timecounter *tc) 499{ 500 501 return ((tc->tc_get_timecount(tc) - tc->tc_offset_count) & 502 tc->tc_counter_mask); 503} 504 505/* 506 * We have four functions for looking at the clock, two for microseconds 507 * and two for nanoseconds. For each there is fast but less precise 508 * version "get{nano|micro}time" which will return a time which is up 509 * to 1/HZ previous to the call, whereas the raw version "{nano|micro}time" 510 * will return a timestamp which is as precise as possible. 511 */ 512 513void 514getmicrotime(struct timeval *tvp) 515{ 516 struct timecounter *tc; 517 518 tc = timecounter; 519 *tvp = tc->tc_microtime; 520} 521 522void 523getnanotime(struct timespec *tsp) 524{ 525 struct timecounter *tc; 526 527 tc = timecounter; 528 *tsp = tc->tc_nanotime; 529} 530 531void 532microtime(struct timeval *tv) 533{ 534 struct timecounter *tc; 535 536 tc = (struct timecounter *)timecounter; 537 tv->tv_sec = tc->tc_offset_sec; 538 tv->tv_usec = tc->tc_offset_micro; 539 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 540 tv->tv_usec += boottime.tv_usec; 541 tv->tv_sec += boottime.tv_sec; 542 while (tv->tv_usec >= 1000000) { 543 tv->tv_usec -= 1000000; 544 tv->tv_sec++; 545 } 546} 547 548void 549nanotime(struct timespec *ts) 550{ 551 unsigned count; 552 u_int64_t delta; 553 struct timecounter *tc; 554 555 tc = (struct timecounter *)timecounter; 556 ts->tv_sec = tc->tc_offset_sec; 557 count = tco_delta(tc); 558 delta = tc->tc_offset_nano; 559 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 560 delta >>= 32; 561 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 562 delta += boottime.tv_usec * 1000; 563 ts->tv_sec += boottime.tv_sec; 564 while (delta >= 1000000000) { 565 delta -= 1000000000; 566 ts->tv_sec++; 567 } 568 ts->tv_nsec = delta; 569} 570 571void 572timecounter_timespec(unsigned count, struct timespec *ts) 573{ 574 u_int64_t delta; 575 struct timecounter *tc; 576 577 tc = (struct timecounter *)timecounter; 578 ts->tv_sec = tc->tc_offset_sec; 579 count -= tc->tc_offset_count; 580 count &= tc->tc_counter_mask; 581 delta = tc->tc_offset_nano; 582 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 583 delta >>= 32; 584 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 585 delta += boottime.tv_usec * 1000; 586 ts->tv_sec += boottime.tv_sec; 587 while (delta >= 1000000000) { 588 delta -= 1000000000; 589 ts->tv_sec++; 590 } 591 ts->tv_nsec = delta; 592} 593 594void 595getmicrouptime(struct timeval *tvp) 596{ 597 struct timecounter *tc; 598 599 tc = timecounter; 600 tvp->tv_sec = tc->tc_offset_sec; 601 tvp->tv_usec = tc->tc_offset_micro; 602} 603 604void 605getnanouptime(struct timespec *tsp) 606{ 607 struct timecounter *tc; 608 609 tc = timecounter; 610 tsp->tv_sec = tc->tc_offset_sec; 611 tsp->tv_nsec = tc->tc_offset_nano >> 32; 612} 613 614void 615microuptime(struct timeval *tv) 616{ 617 struct timecounter *tc; 618 619 tc = (struct timecounter *)timecounter; 620 tv->tv_sec = tc->tc_offset_sec; 621 tv->tv_usec = tc->tc_offset_micro; 622 tv->tv_usec += ((u_int64_t)tco_delta(tc) * tc->tc_scale_micro) >> 32; 623 if (tv->tv_usec >= 1000000) { 624 tv->tv_usec -= 1000000; 625 tv->tv_sec++; 626 } 627} 628 629void 630nanouptime(struct timespec *tv) 631{ 632 unsigned count; 633 u_int64_t delta; 634 struct timecounter *tc; 635 636 tc = (struct timecounter *)timecounter; 637 tv->tv_sec = tc->tc_offset_sec; 638 count = tco_delta(tc); 639 delta = tc->tc_offset_nano; 640 delta += ((u_int64_t)count * tc->tc_scale_nano_f); 641 delta >>= 32; 642 delta += ((u_int64_t)count * tc->tc_scale_nano_i); 643 if (delta >= 1000000000) { 644 delta -= 1000000000; 645 tv->tv_sec++; 646 } 647 tv->tv_nsec = delta; 648} 649 650static void 651tco_setscales(struct timecounter *tc) 652{ 653 u_int64_t scale; 654 655 scale = 1000000000LL << 32; 656 if (tc->tc_adjustment > 0) 657 scale += (tc->tc_adjustment * 1000LL) << 10; 658 else 659 scale -= (-tc->tc_adjustment * 1000LL) << 10; 660 scale /= tc->tc_frequency; 661 tc->tc_scale_micro = scale / 1000; 662 tc->tc_scale_nano_f = scale & 0xffffffff; 663 tc->tc_scale_nano_i = scale >> 32; 664} 665 666void 667init_timecounter(struct timecounter *tc) 668{ 669 struct timespec ts0, ts1; 670 int i; 671 672 tc->tc_adjustment = 0; 673 tco_setscales(tc); 674 tc->tc_offset_count = tc->tc_get_timecount(tc); 675 tc[0].tc_tweak = &tc[0]; 676 tc[2] = tc[1] = tc[0]; 677 tc[1].tc_other = &tc[2]; 678 tc[2].tc_other = &tc[1]; 679 if (!timecounter || !strcmp(timecounter->tc_name, "dummy")) 680 timecounter = &tc[2]; 681 tc = &tc[1]; 682 683 /* 684 * Figure out the cost of calling this timecounter. 685 */ 686 nanotime(&ts0); 687 for (i = 0; i < 256; i ++) 688 tc->tc_get_timecount(tc); 689 nanotime(&ts1); 690 ts1.tv_sec -= ts0.tv_sec; 691 tc->tc_cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; 692 tc->tc_cost >>= 8; 693 if (print_tci && strcmp(tc->tc_name, "dummy")) 694 printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", 695 tc->tc_name, (u_long)tc->tc_frequency, tc->tc_cost); 696 697 /* XXX: For now always start using the counter. */ 698 tc->tc_offset_count = tc->tc_get_timecount(tc); 699 nanouptime(&ts1); 700 tc->tc_offset_nano = (u_int64_t)ts1.tv_nsec << 32; 701 tc->tc_offset_micro = ts1.tv_nsec / 1000; 702 tc->tc_offset_sec = ts1.tv_sec; 703 timecounter = tc; 704} 705 706void 707set_timecounter(struct timespec *ts) 708{ 709 struct timespec ts2; 710 711 nanouptime(&ts2); 712 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 713 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 714 if (boottime.tv_usec < 0) { 715 boottime.tv_usec += 1000000; 716 boottime.tv_sec--; 717 } 718 /* fiddle all the little crinkly bits around the fiords... */ 719 tco_forward(); 720} 721 722 723#if 0 /* Currently unused */ 724void 725switch_timecounter(struct timecounter *newtc) 726{ 727 int s; 728 struct timecounter *tc; 729 struct timespec ts; 730 731 s = splclock(); 732 tc = timecounter; 733 if (newtc == tc || newtc == tc->tc_other) { 734 splx(s); 735 return; 736 } 737 nanouptime(&ts); 738 newtc->tc_offset_sec = ts.tv_sec; 739 newtc->tc_offset_nano = (u_int64_t)ts.tv_nsec << 32; 740 newtc->tc_offset_micro = ts.tv_nsec / 1000; 741 newtc->tc_offset_count = newtc->tc_get_timecount(newtc); 742 timecounter = newtc; 743 splx(s); 744} 745#endif 746 747static struct timecounter * 748sync_other_counter(void) 749{ 750 struct timecounter *tc, *tcn, *tco; 751 unsigned delta; 752 753 tco = timecounter; 754 tc = tco->tc_other; 755 tcn = tc->tc_other; 756 *tc = *tco; 757 tc->tc_other = tcn; 758 delta = tco_delta(tc); 759 tc->tc_offset_count += delta; 760 tc->tc_offset_count &= tc->tc_counter_mask; 761 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_f; 762 tc->tc_offset_nano += (u_int64_t)delta * tc->tc_scale_nano_i << 32; 763 return (tc); 764} 765 766static void 767tco_forward(void) 768{ 769 struct timecounter *tc, *tco; 770 771 tco = timecounter; 772 tc = sync_other_counter(); 773 /* 774 * We may be inducing a tiny error here, the tc_poll_pps() may 775 * process a latched count which happens after the tco_delta() 776 * in sync_other_counter(), which would extend the previous 777 * counters parameters into the domain of this new one. 778 * Since the timewindow is very small for this, the error is 779 * going to be only a few weenieseconds (as Dave Mills would 780 * say), so lets just not talk more about it, OK ? 781 */ 782 if (tco->tc_poll_pps) 783 tco->tc_poll_pps(tco); 784 if (timedelta != 0) { 785 tc->tc_offset_nano += (u_int64_t)(tickdelta * 1000) << 32; 786 timedelta -= tickdelta; 787 } 788 789 while (tc->tc_offset_nano >= 1000000000ULL << 32) { 790 tc->tc_offset_nano -= 1000000000ULL << 32; 791 tc->tc_offset_sec++; 792 tc->tc_frequency = tc->tc_tweak->tc_frequency; 793 tc->tc_adjustment = tc->tc_tweak->tc_adjustment; 794 ntp_update_second(tc); /* XXX only needed if xntpd runs */ 795 tco_setscales(tc); 796 } 797 798 tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; 799 800 /* Figure out the wall-clock time */ 801 tc->tc_nanotime.tv_sec = tc->tc_offset_sec + boottime.tv_sec; 802 tc->tc_nanotime.tv_nsec = 803 (tc->tc_offset_nano >> 32) + boottime.tv_usec * 1000; 804 tc->tc_microtime.tv_usec = tc->tc_offset_micro + boottime.tv_usec; 805 if (tc->tc_nanotime.tv_nsec >= 1000000000) { 806 tc->tc_nanotime.tv_nsec -= 1000000000; 807 tc->tc_microtime.tv_usec -= 1000000; 808 tc->tc_nanotime.tv_sec++; 809 } 810 time_second = tc->tc_microtime.tv_sec = tc->tc_nanotime.tv_sec; 811 812 timecounter = tc; 813} 814 815static int 816sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS 817{ 818 819 return (sysctl_handle_opaque(oidp, 820 &timecounter->tc_tweak->tc_frequency, 821 sizeof(timecounter->tc_tweak->tc_frequency), req)); 822} 823 824static int 825sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS 826{ 827 828 return (sysctl_handle_opaque(oidp, 829 &timecounter->tc_tweak->tc_adjustment, 830 sizeof(timecounter->tc_tweak->tc_adjustment), req)); 831} 832 833SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 834 835SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT | CTLFLAG_RW, 836 0, sizeof(u_int), sysctl_kern_timecounter_frequency, "I", ""); 837 838SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT | CTLFLAG_RW, 839 0, sizeof(int), sysctl_kern_timecounter_adjustment, "I", ""); 840 841/* 842 * Implement a dummy timecounter which we can use until we get a real one 843 * in the air. This allows the console and other early stuff to use 844 * timeservices. 845 */ 846 847static unsigned 848dummy_get_timecount(struct timecounter *tc) 849{ 850 static unsigned now; 851 return (++now); 852} 853 854static struct timecounter dummy_timecounter[3] = { 855 { 856 dummy_get_timecount, 857 0, 858 ~0u, 859 1000000, 860 "dummy" 861 } 862}; 863 864static void 865initdummytimecounter(void *dummy) 866{ 867 init_timecounter(dummy_timecounter); 868} 869 870SYSINIT(dummytc, SI_SUB_CONSOLE, SI_ORDER_FIRST, initdummytimecounter, NULL) 871