kern_tc.c revision 95821
1/*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $FreeBSD: head/sys/kern/kern_tc.c 95821 2002-04-30 20:42:06Z phk $ 10 */ 11 12#include "opt_ntp.h" 13 14#include <sys/param.h> 15#include <sys/kernel.h> 16#include <sys/sysctl.h> 17#include <sys/systm.h> 18#include <sys/timetc.h> 19#include <sys/timepps.h> 20#include <sys/timex.h> 21 22#include <machine/psl.h> 23 24/* 25 * Implement a dummy timecounter which we can use until we get a real one 26 * in the air. This allows the console and other early stuff to use 27 * time services. 28 */ 29 30static u_int 31dummy_get_timecount(struct timecounter *tc) 32{ 33 static u_int now; 34 35 return (++now); 36} 37 38static struct timecounter dummy_timecounter = { 39 dummy_get_timecount, 0, ~0u, 1000000, "dummy", 40}; 41 42struct timehands { 43 /* These fields must be initialized by the driver. */ 44 struct timecounter *th_counter; 45 int64_t th_adjustment; 46 u_int64_t th_scale; 47 u_int th_offset_count; 48 struct bintime th_offset; 49 struct timeval th_microtime; 50 struct timespec th_nanotime; 51 /* Fields not to be copied in tc_windup start with th_generation. */ 52 volatile u_int th_generation; 53 struct timehands *th_next; 54}; 55 56extern struct timehands th0; 57static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0}; 58static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9}; 59static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8}; 60static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7}; 61static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6}; 62static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5}; 63static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4}; 64static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3}; 65static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2}; 66static struct timehands th0 = { 67 &dummy_timecounter, 68 0, 69 (uint64_t)-1 / 1000000, 70 0, 71 {1, 0}, 72 {0, 0}, 73 {0, 0}, 74 1, 75 &th1 76}; 77 78static struct timehands *volatile timehands = &th0; 79struct timecounter *timecounter = &dummy_timecounter; 80static struct timecounter *timecounters = &dummy_timecounter; 81 82time_t time_second; 83 84static struct bintime boottimebin; 85struct timeval boottime; 86SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 87 &boottime, timeval, "System boottime"); 88 89SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 90 91#define TC_STATS(foo) \ 92 static u_int foo; \ 93 SYSCTL_INT(_kern_timecounter, OID_AUTO, foo, CTLFLAG_RD, &foo, 0, "") \ 94 struct __hack 95 96TC_STATS(nbinuptime); TC_STATS(nnanouptime); TC_STATS(nmicrouptime); 97TC_STATS(nbintime); TC_STATS(nnanotime); TC_STATS(nmicrotime); 98TC_STATS(ngetbinuptime); TC_STATS(ngetnanouptime); TC_STATS(ngetmicrouptime); 99TC_STATS(ngetbintime); TC_STATS(ngetnanotime); TC_STATS(ngetmicrotime); 100 101#undef TC_STATS 102 103static void tc_windup(void); 104 105/* 106 * Return the difference between the timehands' counter value now and what 107 * was when we copied it to the timehands' offset_count. 108 */ 109static __inline u_int 110tc_delta(struct timehands *th) 111{ 112 struct timecounter *tc; 113 114 tc = th->th_counter; 115 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 116 tc->tc_counter_mask); 117} 118 119/* 120 * Functions for reading the time. We have to loop until we are sure that 121 * the timehands that we operated on was not updated under our feet. See 122 * the comment in <sys/time.h> for a description of these 12 functions. 123 */ 124 125void 126binuptime(struct bintime *bt) 127{ 128 struct timehands *th; 129 u_int gen; 130 131 nbinuptime++; 132 do { 133 th = timehands; 134 gen = th->th_generation; 135 *bt = th->th_offset; 136 bintime_addx(bt, th->th_scale * tc_delta(th)); 137 } while (gen == 0 || gen != th->th_generation); 138} 139 140void 141nanouptime(struct timespec *tsp) 142{ 143 struct bintime bt; 144 145 nnanouptime++; 146 binuptime(&bt); 147 bintime2timespec(&bt, tsp); 148} 149 150void 151microuptime(struct timeval *tvp) 152{ 153 struct bintime bt; 154 155 nmicrouptime++; 156 binuptime(&bt); 157 bintime2timeval(&bt, tvp); 158} 159 160#define SYNC_TIME 161 162void 163bintime(struct bintime *bt) 164{ 165 166 nbintime++; 167 binuptime(bt); 168 bintime_add(bt, &boottimebin); 169} 170 171void 172nanotime(struct timespec *tsp) 173{ 174 struct bintime bt; 175 176 nnanotime++; 177 bintime(&bt); 178 bintime2timespec(&bt, tsp); 179} 180 181void 182microtime(struct timeval *tvp) 183{ 184 struct bintime bt; 185 186 nmicrotime++; 187 bintime(&bt); 188 bintime2timeval(&bt, tvp); 189} 190 191void 192getbinuptime(struct bintime *bt) 193{ 194 struct timehands *th; 195 u_int gen; 196 197 ngetbinuptime++; 198 do { 199 th = timehands; 200 gen = th->th_generation; 201 *bt = th->th_offset; 202 } while (gen == 0 || gen != th->th_generation); 203} 204 205void 206getnanouptime(struct timespec *tsp) 207{ 208 struct timehands *th; 209 u_int gen; 210 211 ngetnanouptime++; 212 do { 213 th = timehands; 214 gen = th->th_generation; 215 bintime2timespec(&th->th_offset, tsp); 216 } while (gen == 0 || gen != th->th_generation); 217} 218 219void 220getmicrouptime(struct timeval *tvp) 221{ 222 struct timehands *th; 223 u_int gen; 224 225 ngetmicrouptime++; 226 do { 227 th = timehands; 228 gen = th->th_generation; 229 bintime2timeval(&th->th_offset, tvp); 230 } while (gen == 0 || gen != th->th_generation); 231} 232 233void 234getbintime(struct bintime *bt) 235{ 236 struct timehands *th; 237 u_int gen; 238 239 ngetbintime++; 240 do { 241 th = timehands; 242 gen = th->th_generation; 243 *bt = th->th_offset; 244 } while (gen == 0 || gen != th->th_generation); 245 bintime_add(bt, &boottimebin); 246} 247 248void 249getnanotime(struct timespec *tsp) 250{ 251 struct timehands *th; 252 u_int gen; 253 254 ngetnanotime++; 255 do { 256 th = timehands; 257 gen = th->th_generation; 258 *tsp = th->th_nanotime; 259 } while (gen == 0 || gen != th->th_generation); 260} 261 262void 263getmicrotime(struct timeval *tvp) 264{ 265 struct timehands *th; 266 u_int gen; 267 268 ngetmicrotime++; 269 do { 270 th = timehands; 271 gen = th->th_generation; 272 *tvp = th->th_microtime; 273 } while (gen == 0 || gen != th->th_generation); 274} 275 276/* 277 * Initialize a new timecounter. 278 * We should really try to rank the timecounters and intelligently determine 279 * if the new timecounter is better than the current one. This is subject 280 * to further study. For now always use the new timecounter. 281 */ 282void 283tc_init(struct timecounter *tc) 284{ 285 286 tc->tc_next = timecounters; 287 timecounters = tc; 288 printf("Timecounter \"%s\" frequency %lu Hz\n", 289 tc->tc_name, (u_long)tc->tc_frequency); 290 (void)tc->tc_get_timecount(tc); 291 (void)tc->tc_get_timecount(tc); 292 timecounter = tc; 293} 294 295/* Report the frequency of the current timecounter. */ 296u_int32_t 297tc_getfrequency(void) 298{ 299 300 return (timehands->th_counter->tc_frequency); 301} 302 303/* 304 * Step our concept of GMT. This is done by modifying our estimate of 305 * when we booted. XXX: needs futher work. 306 */ 307void 308tc_setclock(struct timespec *ts) 309{ 310 struct timespec ts2; 311 312 nanouptime(&ts2); 313 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 314 /* XXX boottime should probably be a timespec. */ 315 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 316 if (boottime.tv_usec < 0) { 317 boottime.tv_usec += 1000000; 318 boottime.tv_sec--; 319 } 320 timeval2bintime(&boottime, &boottimebin); 321 322 /* XXX fiddle all the little crinkly bits around the fiords... */ 323 tc_windup(); 324} 325 326/* 327 * Initialize the next struct timehands in the ring and make 328 * it the active timehands. Along the way we might switch to a different 329 * timecounter and/or do seconds processing in NTP. Slightly magic. 330 */ 331static void 332tc_windup(void) 333{ 334 struct bintime bt; 335 struct timehands *th, *tho; 336 u_int64_t scale; 337 u_int delta, ncount, ogen; 338 int i; 339 340 /* 341 * Make the next timehands a copy of the current one, but do not 342 * overwrite the generation or next pointer. While we update 343 * the contents, the generation must be zero. 344 */ 345 tho = timehands; 346 th = tho->th_next; 347 ogen = th->th_generation; 348 th->th_generation = 0; 349 bcopy(tho, th, offsetof(struct timehands, th_generation)); 350 351 /* 352 * Capture a timecounter delta on the current timecounter and if 353 * changing timecounters, a counter value from the new timecounter. 354 * Update the offset fields accordingly. 355 */ 356 delta = tc_delta(th); 357 if (th->th_counter != timecounter) 358 ncount = timecounter->tc_get_timecount(timecounter); 359 else 360 ncount = 0; 361 th->th_offset_count += delta; 362 th->th_offset_count &= th->th_counter->tc_counter_mask; 363 bintime_addx(&th->th_offset, th->th_scale * delta); 364 365 /* 366 * Hardware latching timecounters may not generate interrupts on 367 * PPS events, so instead we poll them. There is a finite risk that 368 * the hardware might capture a count which is later than the one we 369 * got above, and therefore possibly in the next NTP second which might 370 * have a different rate than the current NTP second. It doesn't 371 * matter in practice. 372 */ 373 if (tho->th_counter->tc_poll_pps) 374 tho->th_counter->tc_poll_pps(tho->th_counter); 375 376 /* 377 * Deal with NTP second processing. The for loop normally only 378 * iterates once, but in extreme situations it might keep NTP sane 379 * if timeouts are not run for several seconds. 380 */ 381 for (i = th->th_offset.sec - tho->th_offset.sec; i > 0; i--) 382 ntp_update_second(&th->th_adjustment, &th->th_offset.sec); 383 384 /* Now is a good time to change timecounters. */ 385 if (th->th_counter != timecounter) { 386 th->th_counter = timecounter; 387 th->th_offset_count = ncount; 388 } 389 390 /*-? 391 * Recalculate the scaling factor. We want the number of 1/2^64 392 * fractions of a second per period of the hardware counter, taking 393 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 394 * processing provides us with. 395 * 396 * The th_adjustment is nanoseconds per second with 32 bit binary 397 * fraction and want 64 bit binary fraction of second: 398 * 399 * x = a * 2^32 / 10^9 = a * 4.294967296 400 * 401 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 402 * we can only multiply by about 850 without overflowing, but that 403 * leaves suitably precise fractions for multiply before divide. 404 * 405 * Divide before multiply with a fraction of 2199/512 results in a 406 * systematic undercompensation of 10PPM of th_adjustment. On a 407 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 408 * 409 * We happily sacrifice the lowest of the 64 bits of our result 410 * to the goddess of code clarity. 411 * 412 */ 413 scale = (u_int64_t)1 << 63; 414 scale += (th->th_adjustment / 1024) * 2199; 415 scale /= th->th_counter->tc_frequency; 416 th->th_scale = scale * 2; 417 418 /* Update the GMT timestamps used for the get*() functions. */ 419 bt = th->th_offset; 420 bintime_add(&bt, &boottimebin); 421 bintime2timeval(&bt, &th->th_microtime); 422 bintime2timespec(&bt, &th->th_nanotime); 423 424 /* 425 * Now that the struct timehands is again consistent, set the new 426 * generation number, making sure to not make it zero. 427 */ 428 if (++ogen == 0) 429 ogen = 1; 430 th->th_generation = ogen; 431 432 /* Go live with the new struct timehands. */ 433 time_second = th->th_microtime.tv_sec; 434 timehands = th; 435} 436 437/* Report or change the active timecounter hardware. */ 438static int 439sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 440{ 441 char newname[32]; 442 struct timecounter *newtc, *tc; 443 int error; 444 445 tc = timecounter; 446 strncpy(newname, tc->tc_name, sizeof(newname)); 447 newname[sizeof(newname) - 1] = '\0'; 448 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 449 if (error != 0 || req->newptr == NULL || 450 strcmp(newname, tc->tc_name) == 0) 451 return (error); 452 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 453 if (strcmp(newname, newtc->tc_name) != 0) 454 continue; 455 456 /* Warm up new timecounter. */ 457 (void)newtc->tc_get_timecount(newtc); 458 (void)newtc->tc_get_timecount(newtc); 459 460 timecounter = newtc; 461 return (0); 462 } 463 return (EINVAL); 464} 465 466SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 467 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 468 469/* 470 * RFC 2783 PPS-API implementation. 471 */ 472 473int 474pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 475{ 476 pps_params_t *app; 477 struct pps_fetch_args *fapi; 478#ifdef PPS_SYNC 479 struct pps_kcbind_args *kapi; 480#endif 481 482 switch (cmd) { 483 case PPS_IOC_CREATE: 484 return (0); 485 case PPS_IOC_DESTROY: 486 return (0); 487 case PPS_IOC_SETPARAMS: 488 app = (pps_params_t *)data; 489 if (app->mode & ~pps->ppscap) 490 return (EINVAL); 491 pps->ppsparam = *app; 492 return (0); 493 case PPS_IOC_GETPARAMS: 494 app = (pps_params_t *)data; 495 *app = pps->ppsparam; 496 app->api_version = PPS_API_VERS_1; 497 return (0); 498 case PPS_IOC_GETCAP: 499 *(int*)data = pps->ppscap; 500 return (0); 501 case PPS_IOC_FETCH: 502 fapi = (struct pps_fetch_args *)data; 503 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 504 return (EINVAL); 505 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 506 return (EOPNOTSUPP); 507 pps->ppsinfo.current_mode = pps->ppsparam.mode; 508 fapi->pps_info_buf = pps->ppsinfo; 509 return (0); 510 case PPS_IOC_KCBIND: 511#ifdef PPS_SYNC 512 kapi = (struct pps_kcbind_args *)data; 513 /* XXX Only root should be able to do this */ 514 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 515 return (EINVAL); 516 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 517 return (EINVAL); 518 if (kapi->edge & ~pps->ppscap) 519 return (EINVAL); 520 pps->kcmode = kapi->edge; 521 return (0); 522#else 523 return (EOPNOTSUPP); 524#endif 525 default: 526 return (ENOTTY); 527 } 528} 529 530void 531pps_init(struct pps_state *pps) 532{ 533 pps->ppscap |= PPS_TSFMT_TSPEC; 534 if (pps->ppscap & PPS_CAPTUREASSERT) 535 pps->ppscap |= PPS_OFFSETASSERT; 536 if (pps->ppscap & PPS_CAPTURECLEAR) 537 pps->ppscap |= PPS_OFFSETCLEAR; 538} 539 540void 541pps_capture(struct pps_state *pps) 542{ 543 struct timehands *th; 544 545 th = timehands; 546 pps->capgen = th->th_generation; 547 pps->capth = th; 548 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 549 if (pps->capgen != th->th_generation) 550 pps->capgen = 0; 551} 552 553void 554pps_event(struct pps_state *pps, int event) 555{ 556 struct bintime bt; 557 struct timespec ts, *tsp, *osp; 558 u_int tcount, *pcount; 559 int foff, fhard; 560 pps_seq_t *pseq; 561 562 /* If the timecounter was wound up underneath us, bail out. */ 563 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation) 564 return; 565 566 /* Things would be easier with arrays. */ 567 if (event == PPS_CAPTUREASSERT) { 568 tsp = &pps->ppsinfo.assert_timestamp; 569 osp = &pps->ppsparam.assert_offset; 570 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 571 fhard = pps->kcmode & PPS_CAPTUREASSERT; 572 pcount = &pps->ppscount[0]; 573 pseq = &pps->ppsinfo.assert_sequence; 574 } else { 575 tsp = &pps->ppsinfo.clear_timestamp; 576 osp = &pps->ppsparam.clear_offset; 577 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 578 fhard = pps->kcmode & PPS_CAPTURECLEAR; 579 pcount = &pps->ppscount[1]; 580 pseq = &pps->ppsinfo.clear_sequence; 581 } 582 583 /* 584 * If the timecounter changed, we cannot compare the count values, so 585 * we have to drop the rest of the PPS-stuff until the next event. 586 */ 587 if (pps->ppstc != pps->capth->th_counter) { 588 pps->ppstc = pps->capth->th_counter; 589 *pcount = pps->capcount; 590 pps->ppscount[2] = pps->capcount; 591 return; 592 } 593 594 /* Return if nothing really happened. */ 595 if (*pcount == pps->capcount) 596 return; 597 598 /* Convert the count to a timespec. */ 599 tcount = pps->capcount - pps->capth->th_offset_count; 600 tcount &= pps->capth->th_counter->tc_counter_mask; 601 bt = pps->capth->th_offset; 602 bintime_addx(&bt, pps->capth->th_scale * tcount); 603 bintime2timespec(&bt, &ts); 604 605 /* If the timecounter was wound up underneath us, bail out. */ 606 if (pps->capgen != pps->capth->th_generation) 607 return; 608 609 *pcount = pps->capcount; 610 (*pseq)++; 611 *tsp = ts; 612 613 if (foff) { 614 timespecadd(tsp, osp); 615 if (tsp->tv_nsec < 0) { 616 tsp->tv_nsec += 1000000000; 617 tsp->tv_sec -= 1; 618 } 619 } 620#ifdef PPS_SYNC 621 if (fhard) { 622 /* 623 * Feed the NTP PLL/FLL. 624 * The FLL wants to know how many nanoseconds elapsed since 625 * the previous event. 626 * I have never been able to convince myself that this code 627 * is actually correct: Using th_scale is bound to contain 628 * a phase correction component from userland, when running 629 * as FLL, so the number hardpps() gets is not meaningful IMO. 630 */ 631 tcount = pps->capcount - pps->ppscount[2]; 632 pps->ppscount[2] = pps->capcount; 633 tcount &= pps->capth->th_counter->tc_counter_mask; 634 bt.sec = 0; 635 bt.frac = 0; 636 bintime_addx(&bt, pps->capth->th_scale * tcount); 637 bintime2timespec(&bt, &ts); 638 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 639 } 640#endif 641} 642 643/* 644 * Timecounters need to be updated every so often to prevent the hardware 645 * counter from overflowing. Updating also recalculates the cached values 646 * used by the get*() family of functions, so their precision depends on 647 * the update frequency. 648 */ 649 650static int tc_tick; 651SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tick, 0, ""); 652 653static void 654tc_ticktock(void *dummy) 655{ 656 657 tc_windup(); 658 timeout(tc_ticktock, NULL, tc_tick); 659} 660 661static void 662inittimecounter(void *dummy) 663{ 664 u_int p; 665 666 /* 667 * Set the initial timeout to 668 * max(1, <approx. number of hardclock ticks in a millisecond>). 669 * People should probably not use the sysctl to set the timeout 670 * to smaller than its inital value, since that value is the 671 * smallest reasonable one. If they want better timestamps they 672 * should use the non-"get"* functions. 673 */ 674 if (hz > 1000) 675 tc_tick = (hz + 500) / 1000; 676 else 677 tc_tick = 1; 678 p = (tc_tick * 1000000) / hz; 679 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 680 681 /* warm up new timecounter (again) and get rolling */ 682 (void)timecounter->tc_get_timecount(timecounter); 683 (void)timecounter->tc_get_timecount(timecounter); 684 tc_ticktock(NULL); 685} 686 687SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_FIRST, inittimecounter, NULL) 688