kern_tc.c revision 106304
1/*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $FreeBSD: head/sys/kern/kern_tc.c 106304 2002-11-01 18:52:20Z phk $ 10 */ 11 12#include "opt_ntp.h" 13 14#include <sys/param.h> 15#include <sys/kernel.h> 16#include <sys/sysctl.h> 17#include <sys/systm.h> 18#include <sys/timepps.h> 19#include <sys/timetc.h> 20#include <sys/timex.h> 21 22/* 23 * Implement a dummy timecounter which we can use until we get a real one 24 * in the air. This allows the console and other early stuff to use 25 * time services. 26 */ 27 28static u_int 29dummy_get_timecount(struct timecounter *tc) 30{ 31 static u_int now; 32 33 return (++now); 34} 35 36static struct timecounter dummy_timecounter = { 37 dummy_get_timecount, 0, ~0u, 1000000, "dummy", 38}; 39 40struct timehands { 41 /* These fields must be initialized by the driver. */ 42 struct timecounter *th_counter; 43 int64_t th_adjustment; 44 u_int64_t th_scale; 45 u_int th_offset_count; 46 struct bintime th_offset; 47 struct timeval th_microtime; 48 struct timespec th_nanotime; 49 /* Fields not to be copied in tc_windup start with th_generation. */ 50 volatile u_int th_generation; 51 struct timehands *th_next; 52}; 53 54extern struct timehands th0; 55static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 56static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 57static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 58static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 59static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 60static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 61static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 62static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 63static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 64static struct timehands th0 = { 65 &dummy_timecounter, 66 0, 67 (uint64_t)-1 / 1000000, 68 0, 69 {1, 0}, 70 {0, 0}, 71 {0, 0}, 72 1, 73 &th1 74}; 75 76static struct timehands *volatile timehands = &th0; 77struct timecounter *timecounter = &dummy_timecounter; 78static struct timecounter *timecounters = &dummy_timecounter; 79 80time_t time_second = 1; 81time_t time_uptime = 0; 82 83static struct bintime boottimebin; 84struct timeval boottime; 85SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 86 &boottime, timeval, "System boottime"); 87 88SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 89 90#define TC_STATS(foo) \ 91 static u_int foo; \ 92 SYSCTL_UINT(_kern_timecounter, OID_AUTO, foo, CTLFLAG_RD, &foo, 0, "");\ 93 struct __hack 94 95TC_STATS(nbinuptime); TC_STATS(nnanouptime); TC_STATS(nmicrouptime); 96TC_STATS(nbintime); TC_STATS(nnanotime); TC_STATS(nmicrotime); 97TC_STATS(ngetbinuptime); TC_STATS(ngetnanouptime); TC_STATS(ngetmicrouptime); 98TC_STATS(ngetbintime); TC_STATS(ngetnanotime); TC_STATS(ngetmicrotime); 99 100#undef TC_STATS 101 102static void tc_windup(void); 103 104/* 105 * Return the difference between the timehands' counter value now and what 106 * was when we copied it to the timehands' offset_count. 107 */ 108static __inline u_int 109tc_delta(struct timehands *th) 110{ 111 struct timecounter *tc; 112 113 tc = th->th_counter; 114 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 115 tc->tc_counter_mask); 116} 117 118/* 119 * Functions for reading the time. We have to loop until we are sure that 120 * the timehands that we operated on was not updated under our feet. See 121 * the comment in <sys/time.h> for a description of these 12 functions. 122 */ 123 124void 125binuptime(struct bintime *bt) 126{ 127 struct timehands *th; 128 u_int gen; 129 130 nbinuptime++; 131 do { 132 th = timehands; 133 gen = th->th_generation; 134 *bt = th->th_offset; 135 bintime_addx(bt, th->th_scale * tc_delta(th)); 136 } while (gen == 0 || gen != th->th_generation); 137} 138 139void 140nanouptime(struct timespec *tsp) 141{ 142 struct bintime bt; 143 144 nnanouptime++; 145 binuptime(&bt); 146 bintime2timespec(&bt, tsp); 147} 148 149void 150microuptime(struct timeval *tvp) 151{ 152 struct bintime bt; 153 154 nmicrouptime++; 155 binuptime(&bt); 156 bintime2timeval(&bt, tvp); 157} 158 159void 160bintime(struct bintime *bt) 161{ 162 163 nbintime++; 164 binuptime(bt); 165 bintime_add(bt, &boottimebin); 166} 167 168void 169nanotime(struct timespec *tsp) 170{ 171 struct bintime bt; 172 173 nnanotime++; 174 bintime(&bt); 175 bintime2timespec(&bt, tsp); 176} 177 178void 179microtime(struct timeval *tvp) 180{ 181 struct bintime bt; 182 183 nmicrotime++; 184 bintime(&bt); 185 bintime2timeval(&bt, tvp); 186} 187 188void 189getbinuptime(struct bintime *bt) 190{ 191 struct timehands *th; 192 u_int gen; 193 194 ngetbinuptime++; 195 do { 196 th = timehands; 197 gen = th->th_generation; 198 *bt = th->th_offset; 199 } while (gen == 0 || gen != th->th_generation); 200} 201 202void 203getnanouptime(struct timespec *tsp) 204{ 205 struct timehands *th; 206 u_int gen; 207 208 ngetnanouptime++; 209 do { 210 th = timehands; 211 gen = th->th_generation; 212 bintime2timespec(&th->th_offset, tsp); 213 } while (gen == 0 || gen != th->th_generation); 214} 215 216void 217getmicrouptime(struct timeval *tvp) 218{ 219 struct timehands *th; 220 u_int gen; 221 222 ngetmicrouptime++; 223 do { 224 th = timehands; 225 gen = th->th_generation; 226 bintime2timeval(&th->th_offset, tvp); 227 } while (gen == 0 || gen != th->th_generation); 228} 229 230void 231getbintime(struct bintime *bt) 232{ 233 struct timehands *th; 234 u_int gen; 235 236 ngetbintime++; 237 do { 238 th = timehands; 239 gen = th->th_generation; 240 *bt = th->th_offset; 241 } while (gen == 0 || gen != th->th_generation); 242 bintime_add(bt, &boottimebin); 243} 244 245void 246getnanotime(struct timespec *tsp) 247{ 248 struct timehands *th; 249 u_int gen; 250 251 ngetnanotime++; 252 do { 253 th = timehands; 254 gen = th->th_generation; 255 *tsp = th->th_nanotime; 256 } while (gen == 0 || gen != th->th_generation); 257} 258 259void 260getmicrotime(struct timeval *tvp) 261{ 262 struct timehands *th; 263 u_int gen; 264 265 ngetmicrotime++; 266 do { 267 th = timehands; 268 gen = th->th_generation; 269 *tvp = th->th_microtime; 270 } while (gen == 0 || gen != th->th_generation); 271} 272 273/* 274 * Initialize a new timecounter. 275 * We should really try to rank the timecounters and intelligently determine 276 * if the new timecounter is better than the current one. This is subject 277 * to further study. For now always use the new timecounter. 278 */ 279void 280tc_init(struct timecounter *tc) 281{ 282 unsigned u; 283 284 printf("Timecounter \"%s\" frequency %lu Hz", 285 tc->tc_name, (u_long)tc->tc_frequency); 286 287 u = tc->tc_frequency / tc->tc_counter_mask; 288 if (u > hz) { 289 printf(" -- Insufficient hz, needs at least %u\n", u); 290 return; 291 } 292 tc->tc_next = timecounters; 293 timecounters = tc; 294 printf("\n"); 295 (void)tc->tc_get_timecount(tc); 296 (void)tc->tc_get_timecount(tc); 297 timecounter = tc; 298} 299 300/* Report the frequency of the current timecounter. */ 301u_int32_t 302tc_getfrequency(void) 303{ 304 305 return (timehands->th_counter->tc_frequency); 306} 307 308/* 309 * Step our concept of GMT. This is done by modifying our estimate of 310 * when we booted. XXX: needs futher work. 311 */ 312void 313tc_setclock(struct timespec *ts) 314{ 315 struct timespec ts2; 316 317 nanouptime(&ts2); 318 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 319 /* XXX boottime should probably be a timespec. */ 320 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 321 if (boottime.tv_usec < 0) { 322 boottime.tv_usec += 1000000; 323 boottime.tv_sec--; 324 } 325 timeval2bintime(&boottime, &boottimebin); 326 327 /* XXX fiddle all the little crinkly bits around the fiords... */ 328 tc_windup(); 329} 330 331/* 332 * Initialize the next struct timehands in the ring and make 333 * it the active timehands. Along the way we might switch to a different 334 * timecounter and/or do seconds processing in NTP. Slightly magic. 335 */ 336static void 337tc_windup(void) 338{ 339 struct bintime bt; 340 struct timehands *th, *tho; 341 u_int64_t scale; 342 u_int delta, ncount, ogen; 343 int i; 344 345 /* 346 * Make the next timehands a copy of the current one, but do not 347 * overwrite the generation or next pointer. While we update 348 * the contents, the generation must be zero. 349 */ 350 tho = timehands; 351 th = tho->th_next; 352 ogen = th->th_generation; 353 th->th_generation = 0; 354 bcopy(tho, th, offsetof(struct timehands, th_generation)); 355 356 /* 357 * Capture a timecounter delta on the current timecounter and if 358 * changing timecounters, a counter value from the new timecounter. 359 * Update the offset fields accordingly. 360 */ 361 delta = tc_delta(th); 362 if (th->th_counter != timecounter) 363 ncount = timecounter->tc_get_timecount(timecounter); 364 else 365 ncount = 0; 366 th->th_offset_count += delta; 367 th->th_offset_count &= th->th_counter->tc_counter_mask; 368 bintime_addx(&th->th_offset, th->th_scale * delta); 369 370 /* 371 * Hardware latching timecounters may not generate interrupts on 372 * PPS events, so instead we poll them. There is a finite risk that 373 * the hardware might capture a count which is later than the one we 374 * got above, and therefore possibly in the next NTP second which might 375 * have a different rate than the current NTP second. It doesn't 376 * matter in practice. 377 */ 378 if (tho->th_counter->tc_poll_pps) 379 tho->th_counter->tc_poll_pps(tho->th_counter); 380 381 /* 382 * Deal with NTP second processing. The for loop normally only 383 * iterates once, but in extreme situations it might keep NTP sane 384 * if timeouts are not run for several seconds. 385 */ 386 for (i = th->th_offset.sec - tho->th_offset.sec; i > 0; i--) 387 ntp_update_second(&th->th_adjustment, &th->th_offset.sec); 388 389 /* Now is a good time to change timecounters. */ 390 if (th->th_counter != timecounter) { 391 th->th_counter = timecounter; 392 th->th_offset_count = ncount; 393 } 394 395 /*- 396 * Recalculate the scaling factor. We want the number of 1/2^64 397 * fractions of a second per period of the hardware counter, taking 398 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 399 * processing provides us with. 400 * 401 * The th_adjustment is nanoseconds per second with 32 bit binary 402 * fraction and want 64 bit binary fraction of second: 403 * 404 * x = a * 2^32 / 10^9 = a * 4.294967296 405 * 406 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 407 * we can only multiply by about 850 without overflowing, but that 408 * leaves suitably precise fractions for multiply before divide. 409 * 410 * Divide before multiply with a fraction of 2199/512 results in a 411 * systematic undercompensation of 10PPM of th_adjustment. On a 412 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 413 * 414 * We happily sacrifice the lowest of the 64 bits of our result 415 * to the goddess of code clarity. 416 * 417 */ 418 scale = (u_int64_t)1 << 63; 419 scale += (th->th_adjustment / 1024) * 2199; 420 scale /= th->th_counter->tc_frequency; 421 th->th_scale = scale * 2; 422 423 /* Update the GMT timestamps used for the get*() functions. */ 424 bt = th->th_offset; 425 bintime_add(&bt, &boottimebin); 426 bintime2timeval(&bt, &th->th_microtime); 427 bintime2timespec(&bt, &th->th_nanotime); 428 429 /* 430 * Now that the struct timehands is again consistent, set the new 431 * generation number, making sure to not make it zero. 432 */ 433 if (++ogen == 0) 434 ogen = 1; 435 th->th_generation = ogen; 436 437 /* Go live with the new struct timehands. */ 438 time_second = th->th_microtime.tv_sec; 439 time_uptime = th->th_offset.sec; 440 timehands = th; 441} 442 443/* Report or change the active timecounter hardware. */ 444static int 445sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 446{ 447 char newname[32]; 448 struct timecounter *newtc, *tc; 449 int error; 450 451 tc = timecounter; 452 strlcpy(newname, tc->tc_name, sizeof(newname)); 453 454 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 455 if (error != 0 || req->newptr == NULL || 456 strcmp(newname, tc->tc_name) == 0) 457 return (error); 458 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 459 if (strcmp(newname, newtc->tc_name) != 0) 460 continue; 461 462 /* Warm up new timecounter. */ 463 (void)newtc->tc_get_timecount(newtc); 464 (void)newtc->tc_get_timecount(newtc); 465 466 timecounter = newtc; 467 return (0); 468 } 469 return (EINVAL); 470} 471 472SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 473 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 474 475/* 476 * RFC 2783 PPS-API implementation. 477 */ 478 479int 480pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 481{ 482 pps_params_t *app; 483 struct pps_fetch_args *fapi; 484#ifdef PPS_SYNC 485 struct pps_kcbind_args *kapi; 486#endif 487 488 switch (cmd) { 489 case PPS_IOC_CREATE: 490 return (0); 491 case PPS_IOC_DESTROY: 492 return (0); 493 case PPS_IOC_SETPARAMS: 494 app = (pps_params_t *)data; 495 if (app->mode & ~pps->ppscap) 496 return (EINVAL); 497 pps->ppsparam = *app; 498 return (0); 499 case PPS_IOC_GETPARAMS: 500 app = (pps_params_t *)data; 501 *app = pps->ppsparam; 502 app->api_version = PPS_API_VERS_1; 503 return (0); 504 case PPS_IOC_GETCAP: 505 *(int*)data = pps->ppscap; 506 return (0); 507 case PPS_IOC_FETCH: 508 fapi = (struct pps_fetch_args *)data; 509 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 510 return (EINVAL); 511 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 512 return (EOPNOTSUPP); 513 pps->ppsinfo.current_mode = pps->ppsparam.mode; 514 fapi->pps_info_buf = pps->ppsinfo; 515 return (0); 516 case PPS_IOC_KCBIND: 517#ifdef PPS_SYNC 518 kapi = (struct pps_kcbind_args *)data; 519 /* XXX Only root should be able to do this */ 520 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 521 return (EINVAL); 522 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 523 return (EINVAL); 524 if (kapi->edge & ~pps->ppscap) 525 return (EINVAL); 526 pps->kcmode = kapi->edge; 527 return (0); 528#else 529 return (EOPNOTSUPP); 530#endif 531 default: 532 return (ENOTTY); 533 } 534} 535 536void 537pps_init(struct pps_state *pps) 538{ 539 pps->ppscap |= PPS_TSFMT_TSPEC; 540 if (pps->ppscap & PPS_CAPTUREASSERT) 541 pps->ppscap |= PPS_OFFSETASSERT; 542 if (pps->ppscap & PPS_CAPTURECLEAR) 543 pps->ppscap |= PPS_OFFSETCLEAR; 544} 545 546void 547pps_capture(struct pps_state *pps) 548{ 549 struct timehands *th; 550 551 th = timehands; 552 pps->capgen = th->th_generation; 553 pps->capth = th; 554 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 555 if (pps->capgen != th->th_generation) 556 pps->capgen = 0; 557} 558 559void 560pps_event(struct pps_state *pps, int event) 561{ 562 struct bintime bt; 563 struct timespec ts, *tsp, *osp; 564 u_int tcount, *pcount; 565 int foff, fhard; 566 pps_seq_t *pseq; 567 568 /* If the timecounter was wound up underneath us, bail out. */ 569 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) 570 return; 571 572 /* Things would be easier with arrays. */ 573 if (event == PPS_CAPTUREASSERT) { 574 tsp = &pps->ppsinfo.assert_timestamp; 575 osp = &pps->ppsparam.assert_offset; 576 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 577 fhard = pps->kcmode & PPS_CAPTUREASSERT; 578 pcount = &pps->ppscount[0]; 579 pseq = &pps->ppsinfo.assert_sequence; 580 } else { 581 tsp = &pps->ppsinfo.clear_timestamp; 582 osp = &pps->ppsparam.clear_offset; 583 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 584 fhard = pps->kcmode & PPS_CAPTURECLEAR; 585 pcount = &pps->ppscount[1]; 586 pseq = &pps->ppsinfo.clear_sequence; 587 } 588 589 /* 590 * If the timecounter changed, we cannot compare the count values, so 591 * we have to drop the rest of the PPS-stuff until the next event. 592 */ 593 if (pps->ppstc != pps->capth->th_counter) { 594 pps->ppstc = pps->capth->th_counter; 595 *pcount = pps->capcount; 596 pps->ppscount[2] = pps->capcount; 597 return; 598 } 599 600 /* Return if nothing really happened. */ 601 if (*pcount == pps->capcount) 602 return; 603 604 /* Convert the count to a timespec. */ 605 tcount = pps->capcount - pps->capth->th_offset_count; 606 tcount &= pps->capth->th_counter->tc_counter_mask; 607 bt = pps->capth->th_offset; 608 bintime_addx(&bt, pps->capth->th_scale * tcount); 609 bintime_add(&bt, &boottimebin); 610 bintime2timespec(&bt, &ts); 611 612 /* If the timecounter was wound up underneath us, bail out. */ 613 if (pps->capgen != pps->capth->th_generation) 614 return; 615 616 *pcount = pps->capcount; 617 (*pseq)++; 618 *tsp = ts; 619 620 if (foff) { 621 timespecadd(tsp, osp); 622 if (tsp->tv_nsec < 0) { 623 tsp->tv_nsec += 1000000000; 624 tsp->tv_sec -= 1; 625 } 626 } 627#ifdef PPS_SYNC 628 if (fhard) { 629 /* 630 * Feed the NTP PLL/FLL. 631 * The FLL wants to know how many nanoseconds elapsed since 632 * the previous event. 633 * I have never been able to convince myself that this code 634 * is actually correct: Using th_scale is bound to contain 635 * a phase correction component from userland, when running 636 * as FLL, so the number hardpps() gets is not meaningful IMO. 637 */ 638 tcount = pps->capcount - pps->ppscount[2]; 639 pps->ppscount[2] = pps->capcount; 640 tcount &= pps->capth->th_counter->tc_counter_mask; 641 bt.sec = 0; 642 bt.frac = 0; 643 bintime_addx(&bt, pps->capth->th_scale * tcount); 644 bintime2timespec(&bt, &ts); 645 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 646 } 647#endif 648} 649 650/* 651 * Timecounters need to be updated every so often to prevent the hardware 652 * counter from overflowing. Updating also recalculates the cached values 653 * used by the get*() family of functions, so their precision depends on 654 * the update frequency. 655 */ 656 657static int tc_tick; 658SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tick, 0, ""); 659 660void 661tc_ticktock(void) 662{ 663 static int count; 664 665 if (++count < tc_tick) 666 return; 667 count = 0; 668 tc_windup(); 669} 670 671static void 672inittimecounter(void *dummy) 673{ 674 u_int p; 675 676 /* 677 * Set the initial timeout to 678 * max(1, <approx. number of hardclock ticks in a millisecond>). 679 * People should probably not use the sysctl to set the timeout 680 * to smaller than its inital value, since that value is the 681 * smallest reasonable one. If they want better timestamps they 682 * should use the non-"get"* functions. 683 */ 684 if (hz > 1000) 685 tc_tick = (hz + 500) / 1000; 686 else 687 tc_tick = 1; 688 p = (tc_tick * 1000000) / hz; 689 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 690 691 /* warm up new timecounter (again) and get rolling. */ 692 (void)timecounter->tc_get_timecount(timecounter); 693 (void)timecounter->tc_get_timecount(timecounter); 694} 695 696SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_FIRST, inittimecounter, NULL) 697