kern_tc.c revision 95661
1/* 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $FreeBSD: head/sys/kern/kern_tc.c 95661 2002-04-28 18:24:21Z phk $ 10 */ 11 12#include "opt_ntp.h" 13 14#include <sys/param.h> 15#include <sys/timetc.h> 16#include <sys/kernel.h> 17#include <sys/sysctl.h> 18#include <sys/systm.h> 19#include <sys/timex.h> 20#include <sys/timepps.h> 21 22/* 23 * Implement a dummy timecounter which we can use until we get a real one 24 * in the air. This allows the console and other early stuff to use 25 * timeservices. 26 */ 27 28static u_int 29dummy_get_timecount(struct timecounter *tc) 30{ 31 static u_int now; 32 33 return (++now); 34} 35 36static struct timecounter dummy_timecounter = { 37 dummy_get_timecount, 38 0, 39 ~0u, 40 1000000, 41 "dummy" 42}; 43 44struct timehands { 45 /* These fields must be initialized by the driver. */ 46 struct timecounter *th_counter; 47 int64_t th_adjustment; 48 u_int64_t th_scale; 49 u_int th_offset_count; 50 struct bintime th_offset; 51 struct timeval th_microtime; 52 struct timespec th_nanotime; 53 /* Fields not to be copied in tc_windup start with th_generation */ 54 volatile u_int th_generation; 55 struct timehands *th_next; 56}; 57 58 59extern struct timehands th0; 60static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th0}; 61static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th9}; 62static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th8}; 63static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th7}; 64static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th6}; 65static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th5}; 66static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th4}; 67static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th3}; 68static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 1, &th2}; 69static struct timehands th0 = { 70 &dummy_timecounter, 71 0, 72 18446744073709ULL, /* 2^64/1000000 */ 73 0, 74 {1, 0}, 75 {0, 0}, 76 {0, 0}, 77 1, 78 &th1 79}; 80 81static struct timehands *volatile timehands = &th0; 82struct timecounter *timecounter = &dummy_timecounter; 83static struct timecounter *timecounters = &dummy_timecounter; 84 85time_t time_second; 86 87struct bintime boottimebin; 88struct timeval boottime; 89SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, 90 &boottime, timeval, "System boottime"); 91 92SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 93 94#define TC_STATS(foo) \ 95 static u_int foo; \ 96 SYSCTL_INT(_kern_timecounter, OID_AUTO, foo, CTLFLAG_RD, & foo, 0, "") 97 98TC_STATS(nbinuptime); TC_STATS(nnanouptime); TC_STATS(nmicrouptime); 99TC_STATS(nbintime); TC_STATS(nnanotime); TC_STATS(nmicrotime); 100TC_STATS(ngetbinuptime); TC_STATS(ngetnanouptime); TC_STATS(ngetmicrouptime); 101TC_STATS(ngetbintime); TC_STATS(ngetnanotime); TC_STATS(ngetmicrotime); 102 103#undef TC_STATS 104 105static void tc_windup(void); 106 107/* Get delta hardware ticks relative to our timehands */ 108 109static __inline u_int 110tc_delta(struct timehands *th) 111{ 112 struct timecounter *tc; 113 114 tc = th->th_counter; 115 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 116 tc->tc_counter_mask); 117} 118 119/*- 120 * Functions for reading the time. We have to loop until we are sure that 121 * the timehands we operated on was not updated under our feet. 122 * See comment in <sys/time.h> for description of these 12 functions. 123 */ 124 125void 126binuptime(struct bintime *bt) 127{ 128 struct timehands *th; 129 u_int gen; 130 131 nbinuptime++; 132 do { 133 th = timehands; 134 gen = th->th_generation; 135 *bt = th->th_offset; 136 bintime_addx(bt, th->th_scale * tc_delta(th)); 137 } while (gen == 0 || gen != th->th_generation); 138} 139 140void 141nanouptime(struct timespec *ts) 142{ 143 struct bintime bt; 144 145 nnanouptime++; 146 binuptime(&bt); 147 bintime2timespec(&bt, ts); 148} 149 150void 151microuptime(struct timeval *tv) 152{ 153 struct bintime bt; 154 155 nmicrouptime++; 156 binuptime(&bt); 157 bintime2timeval(&bt, tv); 158} 159 160void 161bintime(struct bintime *bt) 162{ 163 164 nbintime++; 165 binuptime(bt); 166 bintime_add(bt, &boottimebin); 167} 168 169void 170nanotime(struct timespec *ts) 171{ 172 struct bintime bt; 173 174 nnanotime++; 175 bintime(&bt); 176 bintime2timespec(&bt, ts); 177} 178 179void 180microtime(struct timeval *tv) 181{ 182 struct bintime bt; 183 184 nmicrotime++; 185 bintime(&bt); 186 bintime2timeval(&bt, tv); 187} 188 189void 190getbinuptime(struct bintime *bt) 191{ 192 struct timehands *th; 193 u_int gen; 194 195 ngetbinuptime++; 196 do { 197 th = timehands; 198 gen = th->th_generation; 199 *bt = th->th_offset; 200 } while (gen == 0 || gen != th->th_generation); 201} 202 203void 204getnanouptime(struct timespec *tsp) 205{ 206 struct timehands *th; 207 u_int gen; 208 209 ngetnanouptime++; 210 do { 211 th = timehands; 212 gen = th->th_generation; 213 bintime2timespec(&th->th_offset, tsp); 214 } while (gen == 0 || gen != th->th_generation); 215} 216 217void 218getmicrouptime(struct timeval *tvp) 219{ 220 struct timehands *th; 221 u_int gen; 222 223 ngetmicrouptime++; 224 do { 225 th = timehands; 226 gen = th->th_generation; 227 bintime2timeval(&th->th_offset, tvp); 228 } while (gen == 0 || gen != th->th_generation); 229} 230 231void 232getbintime(struct bintime *bt) 233{ 234 struct timehands *th; 235 u_int gen; 236 237 ngetbintime++; 238 do { 239 th = timehands; 240 gen = th->th_generation; 241 *bt = th->th_offset; 242 } while (gen == 0 || gen != th->th_generation); 243 bintime_add(bt, &boottimebin); 244} 245 246void 247getnanotime(struct timespec *tsp) 248{ 249 struct timehands *th; 250 u_int gen; 251 252 ngetnanotime++; 253 do { 254 th = timehands; 255 gen = th->th_generation; 256 *tsp = th->th_nanotime; 257 } while (gen == 0 || gen != th->th_generation); 258} 259 260void 261getmicrotime(struct timeval *tvp) 262{ 263 struct timehands *th; 264 u_int gen; 265 266 ngetmicrotime++; 267 do { 268 th = timehands; 269 gen = th->th_generation; 270 *tvp = th->th_microtime; 271 } while (gen == 0 || gen != th->th_generation); 272} 273 274/*- 275 * Initialize a new timecounter. 276 * We should really try to rank the timecounters and intelligently determine 277 * if the new timecounter is better than the current one. This is subject 278 * to further study. For now always use the new timecounter. 279 */ 280 281void 282tc_init(struct timecounter *tc) 283{ 284 285 tc->tc_next = timecounters; 286 timecounters = tc; 287 printf("Timecounter \"%s\" frequency %lu Hz\n", 288 tc->tc_name, (u_long)tc->tc_frequency); 289 tc->tc_get_timecount(tc); 290 tc->tc_get_timecount(tc); 291 timecounter = tc; 292} 293 294/* Report frequency of the current timecounter. */ 295 296u_int32_t 297tc_getfrequency(void) 298{ 299 300 return (timehands->th_counter->tc_frequency); 301} 302 303/*- 304 * Step our concept of GMT. This is done by modifying our estimate of 305 * when we booted. XXX: needs futher work. 306 */ 307void 308tc_setclock(struct timespec *ts) 309{ 310 struct timespec ts2; 311 312 nanouptime(&ts2); 313 boottime.tv_sec = ts->tv_sec - ts2.tv_sec; 314 boottime.tv_usec = (ts->tv_nsec - ts2.tv_nsec) / 1000; 315 if (boottime.tv_usec < 0) { 316 boottime.tv_usec += 1000000; 317 boottime.tv_sec--; 318 } 319 timeval2bintime(&boottime, &boottimebin); 320 /* fiddle all the little crinkly bits around the fiords... */ 321 tc_windup(); 322} 323 324/*- 325 * tc_windup() will initialize the next struct timehands in the ring and make 326 * it the active timehands. Along the way we might switch to a different 327 * timecounter and/or do seconds processing in NTP. Slightly magic. 328 */ 329 330static void 331tc_windup(void) 332{ 333 struct timehands *th, *tho; 334 struct bintime bt; 335 u_int ogen, delta, ncount; 336 int i; 337 u_int64_t scale; 338 339 ncount = 0; /* GCC is lame */ 340 341 /*- 342 * Make the next timehands a copy of the current one, but do not 343 * overwrite the generation or next pointer. While we update 344 * the contents, the generation must be zero. 345 */ 346 tho = timehands; 347 th = tho->th_next; 348 ogen = th->th_generation; 349 th->th_generation = 0; 350 bcopy(tho, th, __offsetof(struct timehands, th_generation)); 351 352 /*- 353 * Capture a timecounter delta on the current timecounter and if 354 * changing timecounters, a counter value from the new timecounter. 355 * Update the offset fields accordingly. 356 */ 357 delta = tc_delta(th); 358 if (th->th_counter != timecounter) 359 ncount = timecounter->tc_get_timecount(timecounter); 360 th->th_offset_count += delta; 361 th->th_offset_count &= th->th_counter->tc_counter_mask; 362 bintime_addx(&th->th_offset, th->th_scale * delta); 363 364 /*- 365 * Hardware latching timecounters may not generate interrupts on 366 * PPS events, so instead we poll them. There is a finite risk that 367 * the hardware might capture a count which is later than the one we 368 * got above, and therefore possibly in the next NTP second which might 369 * have a different rate than the current NTP second. It doesn't 370 * matter in practice. 371 */ 372 if (tho->th_counter->tc_poll_pps) 373 tho->th_counter->tc_poll_pps(tho->th_counter); 374 375 /*- 376 * Deal with NTP second processing. The for() loop probably doesn't 377 * do anything normally, but in a few extreme situations it might 378 * keep timecounters sane if timeouts are not run for several seconds. 379 */ 380 for (i = th->th_offset.sec - tho->th_offset.sec; i > 0; i--) 381 ntp_update_second(&th->th_adjustment, &th->th_offset.sec); 382 383 /* Now is a good time to change timecounters. */ 384 if (th->th_counter != timecounter) { 385 th->th_counter = timecounter; 386 th->th_offset_count = ncount; 387 } 388 389 /*- 390 * Recalculate the scaling factor. We want the number of 1/2^64 391 * fractions of a second per period of the hardware counter, taking 392 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 393 * processing provides us with. 394 * 395 * The th_adjustment is nanoseconds per second with 32 bit binary 396 * fraction and want 64 bit binary fraction of second: 397 * 398 * x = a * 2^32 / 10^9 = a * 4.294967296 399 * 400 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 401 * we can only multiply by about 850 without overflowing, but that 402 * leaves suitably precise fractions for multiply before divide. 403 * 404 * Divide before multiply with a fraction of 2199/512 results in a 405 * systematic undercompensation of 10PPM of th_adjustment. On a 406 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 407 * 408 * We happily sacrifice the lowest of the 64 bits of our result 409 * to the goddess of code clarity. 410 */ 411 412 scale = 1ULL << 63; 413 scale += (th->th_adjustment / 1024) * 2199; 414 scale /= th->th_counter->tc_frequency; 415 th->th_scale = scale * 2; 416 417 /* Update the GMT timestamps used for the get*() functions. */ 418 bt = th->th_offset; 419 bintime_add(&bt, &boottimebin); 420 bintime2timeval(&bt, &th->th_microtime); 421 bintime2timespec(&bt, &th->th_nanotime); 422 423 /*- 424 * Now that the struct timehands is against consistent, set the new 425 * generation number, making sure to not make it zero. 426 */ 427 if (++ogen == 0) 428 ogen++; 429 th->th_generation = ogen; 430 431 /* Go live on the new struct timehands */ 432 time_second = th->th_microtime.tv_sec; 433 timehands = th; 434} 435 436/* Report or change active timecounter hardware. */ 437 438static int 439sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 440{ 441 char newname[32]; 442 struct timecounter *newtc, *tc; 443 int error; 444 445 tc = timecounter; 446 strncpy(newname, tc->tc_name, sizeof(newname)); 447 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 448 if (error != 0 && req->newptr == NULL && !strcmp(newname, tc->tc_name)) 449 return(error); 450 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 451 if (strcmp(newname, newtc->tc_name)) 452 continue; 453 /* Warm up new timecounter. */ 454 (void)newtc->tc_get_timecount(newtc); 455 (void)newtc->tc_get_timecount(newtc); 456 timecounter = newtc; 457 return (0); 458 } 459 return (EINVAL); 460} 461 462SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 463 0, 0, sysctl_kern_timecounter_hardware, "A", ""); 464 465/*- 466 * RFC 2783 PPS-API implementation. 467 */ 468 469int 470pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 471{ 472 pps_params_t *app; 473 struct pps_fetch_args *fapi; 474#ifdef PPS_SYNC 475 struct pps_kcbind_args *kapi; 476#endif 477 478 switch (cmd) { 479 case PPS_IOC_CREATE: 480 return (0); 481 case PPS_IOC_DESTROY: 482 return (0); 483 case PPS_IOC_SETPARAMS: 484 app = (pps_params_t *)data; 485 if (app->mode & ~pps->ppscap) 486 return (EINVAL); 487 pps->ppsparam = *app; 488 return (0); 489 case PPS_IOC_GETPARAMS: 490 app = (pps_params_t *)data; 491 *app = pps->ppsparam; 492 app->api_version = PPS_API_VERS_1; 493 return (0); 494 case PPS_IOC_GETCAP: 495 *(int*)data = pps->ppscap; 496 return (0); 497 case PPS_IOC_FETCH: 498 fapi = (struct pps_fetch_args *)data; 499 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 500 return (EINVAL); 501 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) 502 return (EOPNOTSUPP); 503 pps->ppsinfo.current_mode = pps->ppsparam.mode; 504 fapi->pps_info_buf = pps->ppsinfo; 505 return (0); 506 case PPS_IOC_KCBIND: 507#ifdef PPS_SYNC 508 kapi = (struct pps_kcbind_args *)data; 509 /* XXX Only root should be able to do this */ 510 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 511 return (EINVAL); 512 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 513 return (EINVAL); 514 if (kapi->edge & ~pps->ppscap) 515 return (EINVAL); 516 pps->kcmode = kapi->edge; 517 return (0); 518#else 519 return (EOPNOTSUPP); 520#endif 521 default: 522 return (ENOTTY); 523 } 524} 525 526void 527pps_init(struct pps_state *pps) 528{ 529 pps->ppscap |= PPS_TSFMT_TSPEC; 530 if (pps->ppscap & PPS_CAPTUREASSERT) 531 pps->ppscap |= PPS_OFFSETASSERT; 532 if (pps->ppscap & PPS_CAPTURECLEAR) 533 pps->ppscap |= PPS_OFFSETCLEAR; 534} 535 536void 537pps_capture(struct pps_state *pps) 538{ 539 struct timehands *th; 540 541 th = timehands; 542 pps->capgen = th->th_generation; 543 pps->capth = th; 544 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 545 if (pps->capgen != th->th_generation) 546 pps->capgen = 0; 547} 548 549void 550pps_event(struct pps_state *pps, int event) 551{ 552 struct timespec ts, *tsp, *osp; 553 u_int tcount, *pcount; 554 struct bintime bt; 555 int foff, fhard; 556 pps_seq_t *pseq; 557 558 /* If the timecounter were wound up, bail. */ 559 if (!pps->capgen || pps->capgen != pps->capth->th_generation) 560 return; 561 562 /* Things would be easier with arrays... */ 563 if (event == PPS_CAPTUREASSERT) { 564 tsp = &pps->ppsinfo.assert_timestamp; 565 osp = &pps->ppsparam.assert_offset; 566 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 567 fhard = pps->kcmode & PPS_CAPTUREASSERT; 568 pcount = &pps->ppscount[0]; 569 pseq = &pps->ppsinfo.assert_sequence; 570 } else { 571 tsp = &pps->ppsinfo.clear_timestamp; 572 osp = &pps->ppsparam.clear_offset; 573 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 574 fhard = pps->kcmode & PPS_CAPTURECLEAR; 575 pcount = &pps->ppscount[1]; 576 pseq = &pps->ppsinfo.clear_sequence; 577 } 578 579 /*- 580 * If the timecounter changed, we cannot compare the count values, so 581 * we have to drop the rest of the PPS-stuff until the next event. 582 */ 583 if (pps->ppstc != pps->capth->th_counter) { 584 pps->ppstc = pps->capth->th_counter; 585 *pcount = pps->capcount; 586 pps->ppscount[2] = pps->capcount; 587 return; 588 } 589 590 /* Nothing really happened */ 591 if (*pcount == pps->capcount) 592 return; 593 594 /* Convert the count to timespec */ 595 tcount = pps->capcount - pps->capth->th_offset_count; 596 tcount &= pps->capth->th_counter->tc_counter_mask; 597 bt = pps->capth->th_offset; 598 bintime_addx(&bt, pps->capth->th_scale * tcount); 599 bintime2timespec(&bt, &ts); 600 601 /* If the timecounter were wound up, bail. */ 602 if (pps->capgen != pps->capth->th_generation) 603 return; 604 605 *pcount = pps->capcount; 606 (*pseq)++; 607 *tsp = ts; 608 609 if (foff) { 610 timespecadd(tsp, osp); 611 if (tsp->tv_nsec < 0) { 612 tsp->tv_nsec += 1000000000; 613 tsp->tv_sec -= 1; 614 } 615 } 616#ifdef PPS_SYNC 617 if (fhard) { 618 /*- 619 * Feed the NTP PLL/FLL. 620 * The FLL wants to know how many nanoseconds elapsed since 621 * the previous event. 622 * I have never been able to convince myself that this code 623 * is actually correct: Using th_scale is bound to contain 624 * a phase correction component from userland, when running 625 * as FLL, so the number hardpps() gets is not meaningful IMO. 626 */ 627 tcount = pps->capcount - pps->ppscount[2]; 628 pps->ppscount[2] = pps->capcount; 629 tcount &= pps->capth->th_counter->tc_counter_mask; 630 bt.sec = 0; 631 bt.frac = 0; 632 bintime_addx(&bt, pps->capth->th_scale * tcount); 633 bintime2timespec(&bt, &ts); 634 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 635 } 636#endif 637} 638 639/*- 640 * Timecounters need to be updated every so often to prevent the hardware 641 * counter from overflowing. Updating also recalculates the cached values 642 * used by the get*() family of functions, so their precision depends on 643 * the update frequency. 644 * Don't update faster than approx once per millisecond, if people want 645 * better timestamps they should use the non-"get" functions. 646 */ 647 648static int tc_tick; 649SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tick, 0, ""); 650 651static void 652tc_ticktock(void *dummy) 653{ 654 655 tc_windup(); 656 timeout(tc_ticktock, NULL, tc_tick); 657} 658 659static void 660inittimecounter(void *dummy) 661{ 662 u_int p; 663 664 if (hz > 1000) 665 tc_tick = (hz + 500) / 1000; 666 else 667 tc_tick = 1; 668 p = (tc_tick * 1000000) / hz; 669 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 670 tc_ticktock(NULL); 671} 672 673SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_FIRST, inittimecounter, NULL) 674