kern_tc.c revision 118842
1/*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10#include <sys/cdefs.h> 11__FBSDID("$FreeBSD: head/sys/kern/kern_tc.c 118842 2003-08-12 20:34:31Z mux $"); 12 13#include "opt_ntp.h" 14 15#include <sys/param.h> 16#include <sys/kernel.h> 17#include <sys/sysctl.h> 18#include <sys/systm.h> 19#include <sys/timepps.h> 20#include <sys/timetc.h> 21#include <sys/timex.h> 22 23/* 24 * a large step happens on boot. This constant detects such 25 * a steps. It is relatively small so that ntp_update_second gets called 26 * enough in the typical 'missed a couple of seconds' case, but doesn't 27 * loop forever when the time step is large. 28 */ 29#define LARGE_STEP 200 30 31/* 32 * Implement a dummy timecounter which we can use until we get a real one 33 * in the air. This allows the console and other early stuff to use 34 * time services. 35 */ 36 37static u_int 38dummy_get_timecount(struct timecounter *tc) 39{ 40 static u_int now; 41 42 return (++now); 43} 44 45static struct timecounter dummy_timecounter = { 46 dummy_get_timecount, 0, ~0u, 1000000, "dummy", 47}; 48 49struct timehands { 50 /* These fields must be initialized by the driver. */ 51 struct timecounter *th_counter; 52 int64_t th_adjustment; 53 u_int64_t th_scale; 54 u_int th_offset_count; 55 struct bintime th_offset; 56 struct timeval th_microtime; 57 struct timespec th_nanotime; 58 /* Fields not to be copied in tc_windup start with th_generation. */ 59 volatile u_int th_generation; 60 struct timehands *th_next; 61}; 62 63extern struct timehands th0; 64static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 65static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 66static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 67static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 68static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 69static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 70static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 71static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 72static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 73static struct timehands th0 = { 74 &dummy_timecounter, 75 0, 76 (uint64_t)-1 / 1000000, 77 0, 78 {1, 0}, 79 {0, 0}, 80 {0, 0}, 81 1, 82 &th1 83}; 84 85static struct timehands *volatile timehands = &th0; 86struct timecounter *timecounter = &dummy_timecounter; 87static struct timecounter *timecounters = &dummy_timecounter; 88 89time_t time_second = 1; 90time_t time_uptime = 0; 91 92static struct bintime boottimebin; 93struct timeval boottime; 94SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 95 &boottime, timeval, "System boottime"); 96 97SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 98 99#define TC_STATS(foo) \ 100 static u_int foo; \ 101 SYSCTL_UINT(_kern_timecounter, OID_AUTO, foo, CTLFLAG_RD, &foo, 0, "");\ 102 struct __hack 103 104TC_STATS(nbinuptime); TC_STATS(nnanouptime); TC_STATS(nmicrouptime); 105TC_STATS(nbintime); TC_STATS(nnanotime); TC_STATS(nmicrotime); 106TC_STATS(ngetbinuptime); TC_STATS(ngetnanouptime); TC_STATS(ngetmicrouptime); 107TC_STATS(ngetbintime); TC_STATS(ngetnanotime); TC_STATS(ngetmicrotime); 108TC_STATS(nsetclock); 109 110#undef TC_STATS 111 112static void tc_windup(void); 113 114/* 115 * Return the difference between the timehands' counter value now and what 116 * was when we copied it to the timehands' offset_count. 117 */ 118static __inline u_int 119tc_delta(struct timehands *th) 120{ 121 struct timecounter *tc; 122 123 tc = th->th_counter; 124 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 125 tc->tc_counter_mask); 126} 127 128/* 129 * Functions for reading the time. We have to loop until we are sure that 130 * the timehands that we operated on was not updated under our feet. See 131 * the comment in <sys/time.h> for a description of these 12 functions. 132 */ 133 134void 135binuptime(struct bintime *bt) 136{ 137 struct timehands *th; 138 u_int gen; 139 140 nbinuptime++; 141 do { 142 th = timehands; 143 gen = th->th_generation; 144 *bt = th->th_offset; 145 bintime_addx(bt, th->th_scale * tc_delta(th)); 146 } while (gen == 0 || gen != th->th_generation); 147} 148 149void 150nanouptime(struct timespec *tsp) 151{ 152 struct bintime bt; 153 154 nnanouptime++; 155 binuptime(&bt); 156 bintime2timespec(&bt, tsp); 157} 158 159void 160microuptime(struct timeval *tvp) 161{ 162 struct bintime bt; 163 164 nmicrouptime++; 165 binuptime(&bt); 166 bintime2timeval(&bt, tvp); 167} 168 169void 170bintime(struct bintime *bt) 171{ 172 173 nbintime++; 174 binuptime(bt); 175 bintime_add(bt, &boottimebin); 176} 177 178void 179nanotime(struct timespec *tsp) 180{ 181 struct bintime bt; 182 183 nnanotime++; 184 bintime(&bt); 185 bintime2timespec(&bt, tsp); 186} 187 188void 189microtime(struct timeval *tvp) 190{ 191 struct bintime bt; 192 193 nmicrotime++; 194 bintime(&bt); 195 bintime2timeval(&bt, tvp); 196} 197 198void 199getbinuptime(struct bintime *bt) 200{ 201 struct timehands *th; 202 u_int gen; 203 204 ngetbinuptime++; 205 do { 206 th = timehands; 207 gen = th->th_generation; 208 *bt = th->th_offset; 209 } while (gen == 0 || gen != th->th_generation); 210} 211 212void 213getnanouptime(struct timespec *tsp) 214{ 215 struct timehands *th; 216 u_int gen; 217 218 ngetnanouptime++; 219 do { 220 th = timehands; 221 gen = th->th_generation; 222 bintime2timespec(&th->th_offset, tsp); 223 } while (gen == 0 || gen != th->th_generation); 224} 225 226void 227getmicrouptime(struct timeval *tvp) 228{ 229 struct timehands *th; 230 u_int gen; 231 232 ngetmicrouptime++; 233 do { 234 th = timehands; 235 gen = th->th_generation; 236 bintime2timeval(&th->th_offset, tvp); 237 } while (gen == 0 || gen != th->th_generation); 238} 239 240void 241getbintime(struct bintime *bt) 242{ 243 struct timehands *th; 244 u_int gen; 245 246 ngetbintime++; 247 do { 248 th = timehands; 249 gen = th->th_generation; 250 *bt = th->th_offset; 251 } while (gen == 0 || gen != th->th_generation); 252 bintime_add(bt, &boottimebin); 253} 254 255void 256getnanotime(struct timespec *tsp) 257{ 258 struct timehands *th; 259 u_int gen; 260 261 ngetnanotime++; 262 do { 263 th = timehands; 264 gen = th->th_generation; 265 *tsp = th->th_nanotime; 266 } while (gen == 0 || gen != th->th_generation); 267} 268 269void 270getmicrotime(struct timeval *tvp) 271{ 272 struct timehands *th; 273 u_int gen; 274 275 ngetmicrotime++; 276 do { 277 th = timehands; 278 gen = th->th_generation; 279 *tvp = th->th_microtime; 280 } while (gen == 0 || gen != th->th_generation); 281} 282 283/* 284 * Initialize a new timecounter. 285 * We should really try to rank the timecounters and intelligently determine 286 * if the new timecounter is better than the current one. This is subject 287 * to further study. For now always use the new timecounter. 288 */ 289void 290tc_init(struct timecounter *tc) 291{ 292 unsigned u; 293 294 printf("Timecounter \"%s\" frequency %ju Hz", 295 tc->tc_name, (intmax_t)tc->tc_frequency); 296 297 u = tc->tc_frequency / tc->tc_counter_mask; 298 if (u > hz) { 299 printf(" -- Insufficient hz, needs at least %u\n", u); 300 return; 301 } 302 tc->tc_next = timecounters; 303 timecounters = tc; 304 printf("\n"); 305 (void)tc->tc_get_timecount(tc); 306 (void)tc->tc_get_timecount(tc); 307 timecounter = tc; 308} 309 310/* Report the frequency of the current timecounter. */ 311u_int64_t 312tc_getfrequency(void) 313{ 314 315 return (timehands->th_counter->tc_frequency); 316} 317 318/* 319 * Step our concept of UTC. This is done by modifying our estimate of 320 * when we booted. XXX: needs futher work. 321 */ 322void 323tc_setclock(struct timespec *ts) 324{ 325 struct timespec ts2; 326 327 nsetclock++; 328 nanouptime(&ts2); 329 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 330 /* XXX boottime should probably be a timespec. */ 331 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 332 if (boottime.tv_usec < 0) { 333 boottime.tv_usec += 1000000; 334 boottime.tv_sec--; 335 } 336 timeval2bintime(&boottime, &boottimebin); 337 338 /* XXX fiddle all the little crinkly bits around the fiords... */ 339 tc_windup(); 340} 341 342/* 343 * Initialize the next struct timehands in the ring and make 344 * it the active timehands. Along the way we might switch to a different 345 * timecounter and/or do seconds processing in NTP. Slightly magic. 346 */ 347static void 348tc_windup(void) 349{ 350 struct bintime bt; 351 struct timehands *th, *tho; 352 u_int64_t scale; 353 u_int delta, ncount, ogen; 354 int i; 355 time_t t; 356 357 /* 358 * Make the next timehands a copy of the current one, but do not 359 * overwrite the generation or next pointer. While we update 360 * the contents, the generation must be zero. 361 */ 362 tho = timehands; 363 th = tho->th_next; 364 ogen = th->th_generation; 365 th->th_generation = 0; 366 bcopy(tho, th, offsetof(struct timehands, th_generation)); 367 368 /* 369 * Capture a timecounter delta on the current timecounter and if 370 * changing timecounters, a counter value from the new timecounter. 371 * Update the offset fields accordingly. 372 */ 373 delta = tc_delta(th); 374 if (th->th_counter != timecounter) 375 ncount = timecounter->tc_get_timecount(timecounter); 376 else 377 ncount = 0; 378 th->th_offset_count += delta; 379 th->th_offset_count &= th->th_counter->tc_counter_mask; 380 bintime_addx(&th->th_offset, th->th_scale * delta); 381 382 /* 383 * Hardware latching timecounters may not generate interrupts on 384 * PPS events, so instead we poll them. There is a finite risk that 385 * the hardware might capture a count which is later than the one we 386 * got above, and therefore possibly in the next NTP second which might 387 * have a different rate than the current NTP second. It doesn't 388 * matter in practice. 389 */ 390 if (tho->th_counter->tc_poll_pps) 391 tho->th_counter->tc_poll_pps(tho->th_counter); 392 393 /* 394 * Compute the UTC time, before any leapsecond adjustments, are 395 * made. 396 */ 397 bt = th->th_offset; 398 bintime_add(&bt, &boottimebin); 399 400 /* 401 * Deal with NTP second processing. The for loop normally only 402 * iterates once, but in extreme situations it might keep NTP sane 403 * if timeouts are not run for several seconds. At boot, the 404 * time step can be large when the TOD hardware has been read, so 405 * on really large steps, we call ntp_update_second only once. 406 */ 407 for (i = bt.sec - tho->th_microtime.tv_sec; i > 0; i--) { 408 t = bt.sec; 409 ntp_update_second(&th->th_adjustment, &bt.sec); 410 if (bt.sec != t) 411 boottimebin.sec += bt.sec - t; 412 if (i > LARGE_STEP) 413 break; 414 } 415 416 /* Now is a good time to change timecounters. */ 417 if (th->th_counter != timecounter) { 418 th->th_counter = timecounter; 419 th->th_offset_count = ncount; 420 } 421 422 /*- 423 * Recalculate the scaling factor. We want the number of 1/2^64 424 * fractions of a second per period of the hardware counter, taking 425 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 426 * processing provides us with. 427 * 428 * The th_adjustment is nanoseconds per second with 32 bit binary 429 * fraction and we want 64 bit binary fraction of second: 430 * 431 * x = a * 2^32 / 10^9 = a * 4.294967296 432 * 433 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 434 * we can only multiply by about 850 without overflowing, but that 435 * leaves suitably precise fractions for multiply before divide. 436 * 437 * Divide before multiply with a fraction of 2199/512 results in a 438 * systematic undercompensation of 10PPM of th_adjustment. On a 439 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 440 * 441 * We happily sacrifice the lowest of the 64 bits of our result 442 * to the goddess of code clarity. 443 * 444 */ 445 scale = (u_int64_t)1 << 63; 446 scale += (th->th_adjustment / 1024) * 2199; 447 scale /= th->th_counter->tc_frequency; 448 th->th_scale = scale * 2; 449 450 bintime2timeval(&bt, &th->th_microtime); 451 bintime2timespec(&bt, &th->th_nanotime); 452 453 /* 454 * Now that the struct timehands is again consistent, set the new 455 * generation number, making sure to not make it zero. 456 */ 457 if (++ogen == 0) 458 ogen = 1; 459 th->th_generation = ogen; 460 461 /* Go live with the new struct timehands. */ 462 time_second = th->th_microtime.tv_sec; 463 time_uptime = th->th_offset.sec; 464 timehands = th; 465} 466 467/* Report or change the active timecounter hardware. */ 468static int 469sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 470{ 471 char newname[32]; 472 struct timecounter *newtc, *tc; 473 int error; 474 475 tc = timecounter; 476 strlcpy(newname, tc->tc_name, sizeof(newname)); 477 478 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 479 if (error != 0 || req->newptr == NULL || 480 strcmp(newname, tc->tc_name) == 0) 481 return (error); 482 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 483 if (strcmp(newname, newtc->tc_name) != 0) 484 continue; 485 486 /* Warm up new timecounter. */ 487 (void)newtc->tc_get_timecount(newtc); 488 (void)newtc->tc_get_timecount(newtc); 489 490 timecounter = newtc; 491 return (0); 492 } 493 return (EINVAL); 494} 495 496SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 497 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 498 499/* 500 * RFC 2783 PPS-API implementation. 501 */ 502 503int 504pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 505{ 506 pps_params_t *app; 507 struct pps_fetch_args *fapi; 508#ifdef PPS_SYNC 509 struct pps_kcbind_args *kapi; 510#endif 511 512 switch (cmd) { 513 case PPS_IOC_CREATE: 514 return (0); 515 case PPS_IOC_DESTROY: 516 return (0); 517 case PPS_IOC_SETPARAMS: 518 app = (pps_params_t *)data; 519 if (app->mode & ~pps->ppscap) 520 return (EINVAL); 521 pps->ppsparam = *app; 522 return (0); 523 case PPS_IOC_GETPARAMS: 524 app = (pps_params_t *)data; 525 *app = pps->ppsparam; 526 app->api_version = PPS_API_VERS_1; 527 return (0); 528 case PPS_IOC_GETCAP: 529 *(int*)data = pps->ppscap; 530 return (0); 531 case PPS_IOC_FETCH: 532 fapi = (struct pps_fetch_args *)data; 533 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 534 return (EINVAL); 535 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 536 return (EOPNOTSUPP); 537 pps->ppsinfo.current_mode = pps->ppsparam.mode; 538 fapi->pps_info_buf = pps->ppsinfo; 539 return (0); 540 case PPS_IOC_KCBIND: 541#ifdef PPS_SYNC 542 kapi = (struct pps_kcbind_args *)data; 543 /* XXX Only root should be able to do this */ 544 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 545 return (EINVAL); 546 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 547 return (EINVAL); 548 if (kapi->edge & ~pps->ppscap) 549 return (EINVAL); 550 pps->kcmode = kapi->edge; 551 return (0); 552#else 553 return (EOPNOTSUPP); 554#endif 555 default: 556 return (ENOTTY); 557 } 558} 559 560void 561pps_init(struct pps_state *pps) 562{ 563 pps->ppscap |= PPS_TSFMT_TSPEC; 564 if (pps->ppscap & PPS_CAPTUREASSERT) 565 pps->ppscap |= PPS_OFFSETASSERT; 566 if (pps->ppscap & PPS_CAPTURECLEAR) 567 pps->ppscap |= PPS_OFFSETCLEAR; 568} 569 570void 571pps_capture(struct pps_state *pps) 572{ 573 struct timehands *th; 574 575 th = timehands; 576 pps->capgen = th->th_generation; 577 pps->capth = th; 578 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 579 if (pps->capgen != th->th_generation) 580 pps->capgen = 0; 581} 582 583void 584pps_event(struct pps_state *pps, int event) 585{ 586 struct bintime bt; 587 struct timespec ts, *tsp, *osp; 588 u_int tcount, *pcount; 589 int foff, fhard; 590 pps_seq_t *pseq; 591 592 /* If the timecounter was wound up underneath us, bail out. */ 593 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) 594 return; 595 596 /* Things would be easier with arrays. */ 597 if (event == PPS_CAPTUREASSERT) { 598 tsp = &pps->ppsinfo.assert_timestamp; 599 osp = &pps->ppsparam.assert_offset; 600 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 601 fhard = pps->kcmode & PPS_CAPTUREASSERT; 602 pcount = &pps->ppscount[0]; 603 pseq = &pps->ppsinfo.assert_sequence; 604 } else { 605 tsp = &pps->ppsinfo.clear_timestamp; 606 osp = &pps->ppsparam.clear_offset; 607 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 608 fhard = pps->kcmode & PPS_CAPTURECLEAR; 609 pcount = &pps->ppscount[1]; 610 pseq = &pps->ppsinfo.clear_sequence; 611 } 612 613 /* 614 * If the timecounter changed, we cannot compare the count values, so 615 * we have to drop the rest of the PPS-stuff until the next event. 616 */ 617 if (pps->ppstc != pps->capth->th_counter) { 618 pps->ppstc = pps->capth->th_counter; 619 *pcount = pps->capcount; 620 pps->ppscount[2] = pps->capcount; 621 return; 622 } 623 624 /* Return if nothing really happened. */ 625 if (*pcount == pps->capcount) 626 return; 627 628 /* Convert the count to a timespec. */ 629 tcount = pps->capcount - pps->capth->th_offset_count; 630 tcount &= pps->capth->th_counter->tc_counter_mask; 631 bt = pps->capth->th_offset; 632 bintime_addx(&bt, pps->capth->th_scale * tcount); 633 bintime_add(&bt, &boottimebin); 634 bintime2timespec(&bt, &ts); 635 636 /* If the timecounter was wound up underneath us, bail out. */ 637 if (pps->capgen != pps->capth->th_generation) 638 return; 639 640 *pcount = pps->capcount; 641 (*pseq)++; 642 *tsp = ts; 643 644 if (foff) { 645 timespecadd(tsp, osp); 646 if (tsp->tv_nsec < 0) { 647 tsp->tv_nsec += 1000000000; 648 tsp->tv_sec -= 1; 649 } 650 } 651#ifdef PPS_SYNC 652 if (fhard) { 653 u_int64_t scale; 654 655 /* 656 * Feed the NTP PLL/FLL. 657 * The FLL wants to know how many (hardware) nanoseconds 658 * elapsed since the previous event. 659 */ 660 tcount = pps->capcount - pps->ppscount[2]; 661 pps->ppscount[2] = pps->capcount; 662 tcount &= pps->capth->th_counter->tc_counter_mask; 663 scale = (u_int64_t)1 << 63; 664 scale /= pps->capth->th_counter->tc_frequency; 665 scale *= 2; 666 bt.sec = 0; 667 bt.frac = 0; 668 bintime_addx(&bt, scale * tcount); 669 bintime2timespec(&bt, &ts); 670 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 671 } 672#endif 673} 674 675/* 676 * Timecounters need to be updated every so often to prevent the hardware 677 * counter from overflowing. Updating also recalculates the cached values 678 * used by the get*() family of functions, so their precision depends on 679 * the update frequency. 680 */ 681 682static int tc_tick; 683SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, ""); 684 685void 686tc_ticktock(void) 687{ 688 static int count; 689 690 if (++count < tc_tick) 691 return; 692 count = 0; 693 tc_windup(); 694} 695 696static void 697inittimecounter(void *dummy) 698{ 699 u_int p; 700 701 /* 702 * Set the initial timeout to 703 * max(1, <approx. number of hardclock ticks in a millisecond>). 704 * People should probably not use the sysctl to set the timeout 705 * to smaller than its inital value, since that value is the 706 * smallest reasonable one. If they want better timestamps they 707 * should use the non-"get"* functions. 708 */ 709 if (hz > 1000) 710 tc_tick = (hz + 500) / 1000; 711 else 712 tc_tick = 1; 713 p = (tc_tick * 1000000) / hz; 714 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 715 716 /* warm up new timecounter (again) and get rolling. */ 717 (void)timecounter->tc_get_timecount(timecounter); 718 (void)timecounter->tc_get_timecount(timecounter); 719} 720 721SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL) 722