1/*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation 12 * All rights reserved. 13 * 14 * Portions of this software were developed by Julien Ridoux at the University 15 * of Melbourne under sponsorship from the FreeBSD Foundation. 16 * 17 * Portions of this software were developed by Konstantin Belousov 18 * under sponsorship from the FreeBSD Foundation. 19 */ 20 21#include <sys/cdefs.h> 22__FBSDID("$FreeBSD$"); 23 24#include "opt_ntp.h" 25#include "opt_ffclock.h" 26 27#include <sys/param.h> 28#include <sys/kernel.h> 29#include <sys/limits.h> 30#include <sys/lock.h> 31#include <sys/mutex.h> 32#include <sys/proc.h> 33#include <sys/sbuf.h> 34#include <sys/sleepqueue.h> 35#include <sys/sysctl.h> 36#include <sys/syslog.h> 37#include <sys/systm.h> 38#include <sys/timeffc.h> 39#include <sys/timepps.h> 40#include <sys/timetc.h> 41#include <sys/timex.h> 42#include <sys/vdso.h> 43 44/* 45 * A large step happens on boot. This constant detects such steps. 46 * It is relatively small so that ntp_update_second gets called enough 47 * in the typical 'missed a couple of seconds' case, but doesn't loop 48 * forever when the time step is large. 49 */ 50#define LARGE_STEP 200 51 52/* 53 * Implement a dummy timecounter which we can use until we get a real one 54 * in the air. This allows the console and other early stuff to use 55 * time services. 56 */ 57 58static u_int 59dummy_get_timecount(struct timecounter *tc) 60{ 61 static u_int now; 62 63 return (++now); 64} 65 66static struct timecounter dummy_timecounter = { 67 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 68}; 69 70struct timehands { 71 /* These fields must be initialized by the driver. */ 72 struct timecounter *th_counter; 73 int64_t th_adjustment; 74 uint64_t th_scale; 75 u_int th_large_delta; 76 u_int th_offset_count; 77 struct bintime th_offset; 78 struct bintime th_bintime; 79 struct timeval th_microtime; 80 struct timespec th_nanotime; 81 struct bintime th_boottime; 82 /* Fields not to be copied in tc_windup start with th_generation. */ 83 u_int th_generation; 84 struct timehands *th_next; 85}; 86 87static struct timehands ths[16] = { 88 [0] = { 89 .th_counter = &dummy_timecounter, 90 .th_scale = (uint64_t)-1 / 1000000, 91 .th_large_delta = 1000000, 92 .th_offset = { .sec = 1 }, 93 .th_generation = 1, 94 }, 95}; 96 97static struct timehands *volatile timehands = &ths[0]; 98struct timecounter *timecounter = &dummy_timecounter; 99static struct timecounter *timecounters = &dummy_timecounter; 100 101int tc_min_ticktock_freq = 1; 102 103volatile time_t time_second = 1; 104volatile time_t time_uptime = 1; 105 106static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); 107SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, 108 NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); 109 110SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); 111static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, ""); 112 113static int timestepwarnings; 114SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, 115 ×tepwarnings, 0, "Log time steps"); 116 117static int timehands_count = 2; 118SYSCTL_INT(_kern_timecounter, OID_AUTO, timehands_count, 119 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, 120 &timehands_count, 0, "Count of timehands in rotation"); 121 122struct bintime bt_timethreshold; 123struct bintime bt_tickthreshold; 124sbintime_t sbt_timethreshold; 125sbintime_t sbt_tickthreshold; 126struct bintime tc_tick_bt; 127sbintime_t tc_tick_sbt; 128int tc_precexp; 129int tc_timepercentage = TC_DEFAULTPERC; 130static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); 131SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, 132 CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, 133 sysctl_kern_timecounter_adjprecision, "I", 134 "Allowed time interval deviation in percents"); 135 136volatile int rtc_generation = 1; 137 138static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ 139static char tc_from_tunable[16]; 140 141static void tc_windup(struct bintime *new_boottimebin); 142static void cpu_tick_calibrate(int); 143 144void dtrace_getnanotime(struct timespec *tsp); 145 146static int 147sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) 148{ 149 struct timeval boottime; 150 151 getboottime(&boottime); 152 153#ifndef __mips__ 154#ifdef SCTL_MASK32 155 int tv[2]; 156 157 if (req->flags & SCTL_MASK32) { 158 tv[0] = boottime.tv_sec; 159 tv[1] = boottime.tv_usec; 160 return (SYSCTL_OUT(req, tv, sizeof(tv))); 161 } 162#endif 163#endif 164 return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); 165} 166 167static int 168sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) 169{ 170 u_int ncount; 171 struct timecounter *tc = arg1; 172 173 ncount = tc->tc_get_timecount(tc); 174 return (sysctl_handle_int(oidp, &ncount, 0, req)); 175} 176 177static int 178sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) 179{ 180 uint64_t freq; 181 struct timecounter *tc = arg1; 182 183 freq = tc->tc_frequency; 184 return (sysctl_handle_64(oidp, &freq, 0, req)); 185} 186 187/* 188 * Return the difference between the timehands' counter value now and what 189 * was when we copied it to the timehands' offset_count. 190 */ 191static __inline u_int 192tc_delta(struct timehands *th) 193{ 194 struct timecounter *tc; 195 196 tc = th->th_counter; 197 return ((tc->tc_get_timecount(tc) - th->th_offset_count) & 198 tc->tc_counter_mask); 199} 200 201/* 202 * Functions for reading the time. We have to loop until we are sure that 203 * the timehands that we operated on was not updated under our feet. See 204 * the comment in <sys/time.h> for a description of these 12 functions. 205 */ 206 207static __inline void 208bintime_off(struct bintime *bt, u_int off) 209{ 210 struct timehands *th; 211 struct bintime *btp; 212 uint64_t scale, x; 213 u_int delta, gen, large_delta; 214 215 do { 216 th = timehands; 217 gen = atomic_load_acq_int(&th->th_generation); 218 btp = (struct bintime *)((vm_offset_t)th + off); 219 *bt = *btp; 220 scale = th->th_scale; 221 delta = tc_delta(th); 222 large_delta = th->th_large_delta; 223 atomic_thread_fence_acq(); 224 } while (gen == 0 || gen != th->th_generation); 225 226 if (__predict_false(delta >= large_delta)) { 227 /* Avoid overflow for scale * delta. */ 228 x = (scale >> 32) * delta; 229 bt->sec += x >> 32; 230 bintime_addx(bt, x << 32); 231 bintime_addx(bt, (scale & 0xffffffff) * delta); 232 } else { 233 bintime_addx(bt, scale * delta); 234 } 235} 236#define GETTHBINTIME(dst, member) \ 237do { \ 238/* \ 239 _Static_assert(_Generic(((struct timehands *)NULL)->member, \ 240 struct bintime: 1, default: 0) == 1, \ 241 "struct timehands member is not of struct bintime type"); \ 242*/ \ 243 bintime_off(dst, __offsetof(struct timehands, member)); \ 244} while (0) 245 246static __inline void 247getthmember(void *out, size_t out_size, u_int off) 248{ 249 struct timehands *th; 250 u_int gen; 251 252 do { 253 th = timehands; 254 gen = atomic_load_acq_int(&th->th_generation); 255 memcpy(out, (char *)th + off, out_size); 256 atomic_thread_fence_acq(); 257 } while (gen == 0 || gen != th->th_generation); 258} 259#define GETTHMEMBER(dst, member) \ 260do { \ 261/* \ 262 _Static_assert(_Generic(*dst, \ 263 __typeof(((struct timehands *)NULL)->member): 1, \ 264 default: 0) == 1, \ 265 "*dst and struct timehands member have different types"); \ 266*/ \ 267 getthmember(dst, sizeof(*dst), __offsetof(struct timehands, \ 268 member)); \ 269} while (0) 270 271#ifdef FFCLOCK 272void 273fbclock_binuptime(struct bintime *bt) 274{ 275 276 GETTHBINTIME(bt, th_offset); 277} 278 279void 280fbclock_nanouptime(struct timespec *tsp) 281{ 282 struct bintime bt; 283 284 fbclock_binuptime(&bt); 285 bintime2timespec(&bt, tsp); 286} 287 288void 289fbclock_microuptime(struct timeval *tvp) 290{ 291 struct bintime bt; 292 293 fbclock_binuptime(&bt); 294 bintime2timeval(&bt, tvp); 295} 296 297void 298fbclock_bintime(struct bintime *bt) 299{ 300 301 GETTHBINTIME(bt, th_bintime); 302} 303 304void 305fbclock_nanotime(struct timespec *tsp) 306{ 307 struct bintime bt; 308 309 fbclock_bintime(&bt); 310 bintime2timespec(&bt, tsp); 311} 312 313void 314fbclock_microtime(struct timeval *tvp) 315{ 316 struct bintime bt; 317 318 fbclock_bintime(&bt); 319 bintime2timeval(&bt, tvp); 320} 321 322void 323fbclock_getbinuptime(struct bintime *bt) 324{ 325 326 GETTHMEMBER(bt, th_offset); 327} 328 329void 330fbclock_getnanouptime(struct timespec *tsp) 331{ 332 struct bintime bt; 333 334 GETTHMEMBER(&bt, th_offset); 335 bintime2timespec(&bt, tsp); 336} 337 338void 339fbclock_getmicrouptime(struct timeval *tvp) 340{ 341 struct bintime bt; 342 343 GETTHMEMBER(&bt, th_offset); 344 bintime2timeval(&bt, tvp); 345} 346 347void 348fbclock_getbintime(struct bintime *bt) 349{ 350 351 GETTHMEMBER(bt, th_bintime); 352} 353 354void 355fbclock_getnanotime(struct timespec *tsp) 356{ 357 358 GETTHMEMBER(tsp, th_nanotime); 359} 360 361void 362fbclock_getmicrotime(struct timeval *tvp) 363{ 364 365 GETTHMEMBER(tvp, th_microtime); 366} 367#else /* !FFCLOCK */ 368 369void 370binuptime(struct bintime *bt) 371{ 372 373 GETTHBINTIME(bt, th_offset); 374} 375 376void 377nanouptime(struct timespec *tsp) 378{ 379 struct bintime bt; 380 381 binuptime(&bt); 382 bintime2timespec(&bt, tsp); 383} 384 385void 386microuptime(struct timeval *tvp) 387{ 388 struct bintime bt; 389 390 binuptime(&bt); 391 bintime2timeval(&bt, tvp); 392} 393 394void 395bintime(struct bintime *bt) 396{ 397 398 GETTHBINTIME(bt, th_bintime); 399} 400 401void 402nanotime(struct timespec *tsp) 403{ 404 struct bintime bt; 405 406 bintime(&bt); 407 bintime2timespec(&bt, tsp); 408} 409 410void 411microtime(struct timeval *tvp) 412{ 413 struct bintime bt; 414 415 bintime(&bt); 416 bintime2timeval(&bt, tvp); 417} 418 419void 420getbinuptime(struct bintime *bt) 421{ 422 423 GETTHMEMBER(bt, th_offset); 424} 425 426void 427getnanouptime(struct timespec *tsp) 428{ 429 struct bintime bt; 430 431 GETTHMEMBER(&bt, th_offset); 432 bintime2timespec(&bt, tsp); 433} 434 435void 436getmicrouptime(struct timeval *tvp) 437{ 438 struct bintime bt; 439 440 GETTHMEMBER(&bt, th_offset); 441 bintime2timeval(&bt, tvp); 442} 443 444void 445getbintime(struct bintime *bt) 446{ 447 448 GETTHMEMBER(bt, th_bintime); 449} 450 451void 452getnanotime(struct timespec *tsp) 453{ 454 455 GETTHMEMBER(tsp, th_nanotime); 456} 457 458void 459getmicrotime(struct timeval *tvp) 460{ 461 462 GETTHMEMBER(tvp, th_microtime); 463} 464#endif /* FFCLOCK */ 465 466void 467getboottime(struct timeval *boottime) 468{ 469 struct bintime boottimebin; 470 471 getboottimebin(&boottimebin); 472 bintime2timeval(&boottimebin, boottime); 473} 474 475void 476getboottimebin(struct bintime *boottimebin) 477{ 478 479 GETTHMEMBER(boottimebin, th_boottime); 480} 481 482#ifdef FFCLOCK 483/* 484 * Support for feed-forward synchronization algorithms. This is heavily inspired 485 * by the timehands mechanism but kept independent from it. *_windup() functions 486 * have some connection to avoid accessing the timecounter hardware more than 487 * necessary. 488 */ 489 490/* Feed-forward clock estimates kept updated by the synchronization daemon. */ 491struct ffclock_estimate ffclock_estimate; 492struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ 493uint32_t ffclock_status; /* Feed-forward clock status. */ 494int8_t ffclock_updated; /* New estimates are available. */ 495struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ 496 497struct fftimehands { 498 struct ffclock_estimate cest; 499 struct bintime tick_time; 500 struct bintime tick_time_lerp; 501 ffcounter tick_ffcount; 502 uint64_t period_lerp; 503 volatile uint8_t gen; 504 struct fftimehands *next; 505}; 506 507#define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) 508 509static struct fftimehands ffth[10]; 510static struct fftimehands *volatile fftimehands = ffth; 511 512static void 513ffclock_init(void) 514{ 515 struct fftimehands *cur; 516 struct fftimehands *last; 517 518 memset(ffth, 0, sizeof(ffth)); 519 520 last = ffth + NUM_ELEMENTS(ffth) - 1; 521 for (cur = ffth; cur < last; cur++) 522 cur->next = cur + 1; 523 last->next = ffth; 524 525 ffclock_updated = 0; 526 ffclock_status = FFCLOCK_STA_UNSYNC; 527 mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); 528} 529 530/* 531 * Reset the feed-forward clock estimates. Called from inittodr() to get things 532 * kick started and uses the timecounter nominal frequency as a first period 533 * estimate. Note: this function may be called several time just after boot. 534 * Note: this is the only function that sets the value of boot time for the 535 * monotonic (i.e. uptime) version of the feed-forward clock. 536 */ 537void 538ffclock_reset_clock(struct timespec *ts) 539{ 540 struct timecounter *tc; 541 struct ffclock_estimate cest; 542 543 tc = timehands->th_counter; 544 memset(&cest, 0, sizeof(struct ffclock_estimate)); 545 546 timespec2bintime(ts, &ffclock_boottime); 547 timespec2bintime(ts, &(cest.update_time)); 548 ffclock_read_counter(&cest.update_ffcount); 549 cest.leapsec_next = 0; 550 cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; 551 cest.errb_abs = 0; 552 cest.errb_rate = 0; 553 cest.status = FFCLOCK_STA_UNSYNC; 554 cest.leapsec_total = 0; 555 cest.leapsec = 0; 556 557 mtx_lock(&ffclock_mtx); 558 bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); 559 ffclock_updated = INT8_MAX; 560 mtx_unlock(&ffclock_mtx); 561 562 printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, 563 (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, 564 (unsigned long)ts->tv_nsec); 565} 566 567/* 568 * Sub-routine to convert a time interval measured in RAW counter units to time 569 * in seconds stored in bintime format. 570 * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be 571 * larger than the max value of u_int (on 32 bit architecture). Loop to consume 572 * extra cycles. 573 */ 574static void 575ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) 576{ 577 struct bintime bt2; 578 ffcounter delta, delta_max; 579 580 delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; 581 bintime_clear(bt); 582 do { 583 if (ffdelta > delta_max) 584 delta = delta_max; 585 else 586 delta = ffdelta; 587 bt2.sec = 0; 588 bt2.frac = period; 589 bintime_mul(&bt2, (unsigned int)delta); 590 bintime_add(bt, &bt2); 591 ffdelta -= delta; 592 } while (ffdelta > 0); 593} 594 595/* 596 * Update the fftimehands. 597 * Push the tick ffcount and time(s) forward based on current clock estimate. 598 * The conversion from ffcounter to bintime relies on the difference clock 599 * principle, whose accuracy relies on computing small time intervals. If a new 600 * clock estimate has been passed by the synchronisation daemon, make it 601 * current, and compute the linear interpolation for monotonic time if needed. 602 */ 603static void 604ffclock_windup(unsigned int delta) 605{ 606 struct ffclock_estimate *cest; 607 struct fftimehands *ffth; 608 struct bintime bt, gap_lerp; 609 ffcounter ffdelta; 610 uint64_t frac; 611 unsigned int polling; 612 uint8_t forward_jump, ogen; 613 614 /* 615 * Pick the next timehand, copy current ffclock estimates and move tick 616 * times and counter forward. 617 */ 618 forward_jump = 0; 619 ffth = fftimehands->next; 620 ogen = ffth->gen; 621 ffth->gen = 0; 622 cest = &ffth->cest; 623 bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); 624 ffdelta = (ffcounter)delta; 625 ffth->period_lerp = fftimehands->period_lerp; 626 627 ffth->tick_time = fftimehands->tick_time; 628 ffclock_convert_delta(ffdelta, cest->period, &bt); 629 bintime_add(&ffth->tick_time, &bt); 630 631 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 632 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); 633 bintime_add(&ffth->tick_time_lerp, &bt); 634 635 ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; 636 637 /* 638 * Assess the status of the clock, if the last update is too old, it is 639 * likely the synchronisation daemon is dead and the clock is free 640 * running. 641 */ 642 if (ffclock_updated == 0) { 643 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 644 ffclock_convert_delta(ffdelta, cest->period, &bt); 645 if (bt.sec > 2 * FFCLOCK_SKM_SCALE) 646 ffclock_status |= FFCLOCK_STA_UNSYNC; 647 } 648 649 /* 650 * If available, grab updated clock estimates and make them current. 651 * Recompute time at this tick using the updated estimates. The clock 652 * estimates passed the feed-forward synchronisation daemon may result 653 * in time conversion that is not monotonically increasing (just after 654 * the update). time_lerp is a particular linear interpolation over the 655 * synchronisation algo polling period that ensures monotonicity for the 656 * clock ids requesting it. 657 */ 658 if (ffclock_updated > 0) { 659 bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); 660 ffdelta = ffth->tick_ffcount - cest->update_ffcount; 661 ffth->tick_time = cest->update_time; 662 ffclock_convert_delta(ffdelta, cest->period, &bt); 663 bintime_add(&ffth->tick_time, &bt); 664 665 /* ffclock_reset sets ffclock_updated to INT8_MAX */ 666 if (ffclock_updated == INT8_MAX) 667 ffth->tick_time_lerp = ffth->tick_time; 668 669 if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) 670 forward_jump = 1; 671 else 672 forward_jump = 0; 673 674 bintime_clear(&gap_lerp); 675 if (forward_jump) { 676 gap_lerp = ffth->tick_time; 677 bintime_sub(&gap_lerp, &ffth->tick_time_lerp); 678 } else { 679 gap_lerp = ffth->tick_time_lerp; 680 bintime_sub(&gap_lerp, &ffth->tick_time); 681 } 682 683 /* 684 * The reset from the RTC clock may be far from accurate, and 685 * reducing the gap between real time and interpolated time 686 * could take a very long time if the interpolated clock insists 687 * on strict monotonicity. The clock is reset under very strict 688 * conditions (kernel time is known to be wrong and 689 * synchronization daemon has been restarted recently. 690 * ffclock_boottime absorbs the jump to ensure boot time is 691 * correct and uptime functions stay consistent. 692 */ 693 if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && 694 ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && 695 ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { 696 if (forward_jump) 697 bintime_add(&ffclock_boottime, &gap_lerp); 698 else 699 bintime_sub(&ffclock_boottime, &gap_lerp); 700 ffth->tick_time_lerp = ffth->tick_time; 701 bintime_clear(&gap_lerp); 702 } 703 704 ffclock_status = cest->status; 705 ffth->period_lerp = cest->period; 706 707 /* 708 * Compute corrected period used for the linear interpolation of 709 * time. The rate of linear interpolation is capped to 5000PPM 710 * (5ms/s). 711 */ 712 if (bintime_isset(&gap_lerp)) { 713 ffdelta = cest->update_ffcount; 714 ffdelta -= fftimehands->cest.update_ffcount; 715 ffclock_convert_delta(ffdelta, cest->period, &bt); 716 polling = bt.sec; 717 bt.sec = 0; 718 bt.frac = 5000000 * (uint64_t)18446744073LL; 719 bintime_mul(&bt, polling); 720 if (bintime_cmp(&gap_lerp, &bt, >)) 721 gap_lerp = bt; 722 723 /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ 724 frac = 0; 725 if (gap_lerp.sec > 0) { 726 frac -= 1; 727 frac /= ffdelta / gap_lerp.sec; 728 } 729 frac += gap_lerp.frac / ffdelta; 730 731 if (forward_jump) 732 ffth->period_lerp += frac; 733 else 734 ffth->period_lerp -= frac; 735 } 736 737 ffclock_updated = 0; 738 } 739 if (++ogen == 0) 740 ogen = 1; 741 ffth->gen = ogen; 742 fftimehands = ffth; 743} 744 745/* 746 * Adjust the fftimehands when the timecounter is changed. Stating the obvious, 747 * the old and new hardware counter cannot be read simultaneously. tc_windup() 748 * does read the two counters 'back to back', but a few cycles are effectively 749 * lost, and not accumulated in tick_ffcount. This is a fairly radical 750 * operation for a feed-forward synchronization daemon, and it is its job to not 751 * pushing irrelevant data to the kernel. Because there is no locking here, 752 * simply force to ignore pending or next update to give daemon a chance to 753 * realize the counter has changed. 754 */ 755static void 756ffclock_change_tc(struct timehands *th) 757{ 758 struct fftimehands *ffth; 759 struct ffclock_estimate *cest; 760 struct timecounter *tc; 761 uint8_t ogen; 762 763 tc = th->th_counter; 764 ffth = fftimehands->next; 765 ogen = ffth->gen; 766 ffth->gen = 0; 767 768 cest = &ffth->cest; 769 bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); 770 cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; 771 cest->errb_abs = 0; 772 cest->errb_rate = 0; 773 cest->status |= FFCLOCK_STA_UNSYNC; 774 775 ffth->tick_ffcount = fftimehands->tick_ffcount; 776 ffth->tick_time_lerp = fftimehands->tick_time_lerp; 777 ffth->tick_time = fftimehands->tick_time; 778 ffth->period_lerp = cest->period; 779 780 /* Do not lock but ignore next update from synchronization daemon. */ 781 ffclock_updated--; 782 783 if (++ogen == 0) 784 ogen = 1; 785 ffth->gen = ogen; 786 fftimehands = ffth; 787} 788 789/* 790 * Retrieve feed-forward counter and time of last kernel tick. 791 */ 792void 793ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) 794{ 795 struct fftimehands *ffth; 796 uint8_t gen; 797 798 /* 799 * No locking but check generation has not changed. Also need to make 800 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 801 */ 802 do { 803 ffth = fftimehands; 804 gen = ffth->gen; 805 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) 806 *bt = ffth->tick_time_lerp; 807 else 808 *bt = ffth->tick_time; 809 *ffcount = ffth->tick_ffcount; 810 } while (gen == 0 || gen != ffth->gen); 811} 812 813/* 814 * Absolute clock conversion. Low level function to convert ffcounter to 815 * bintime. The ffcounter is converted using the current ffclock period estimate 816 * or the "interpolated period" to ensure monotonicity. 817 * NOTE: this conversion may have been deferred, and the clock updated since the 818 * hardware counter has been read. 819 */ 820void 821ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) 822{ 823 struct fftimehands *ffth; 824 struct bintime bt2; 825 ffcounter ffdelta; 826 uint8_t gen; 827 828 /* 829 * No locking but check generation has not changed. Also need to make 830 * sure ffdelta is positive, i.e. ffcount > tick_ffcount. 831 */ 832 do { 833 ffth = fftimehands; 834 gen = ffth->gen; 835 if (ffcount > ffth->tick_ffcount) 836 ffdelta = ffcount - ffth->tick_ffcount; 837 else 838 ffdelta = ffth->tick_ffcount - ffcount; 839 840 if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { 841 *bt = ffth->tick_time_lerp; 842 ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); 843 } else { 844 *bt = ffth->tick_time; 845 ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); 846 } 847 848 if (ffcount > ffth->tick_ffcount) 849 bintime_add(bt, &bt2); 850 else 851 bintime_sub(bt, &bt2); 852 } while (gen == 0 || gen != ffth->gen); 853} 854 855/* 856 * Difference clock conversion. 857 * Low level function to Convert a time interval measured in RAW counter units 858 * into bintime. The difference clock allows measuring small intervals much more 859 * reliably than the absolute clock. 860 */ 861void 862ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) 863{ 864 struct fftimehands *ffth; 865 uint8_t gen; 866 867 /* No locking but check generation has not changed. */ 868 do { 869 ffth = fftimehands; 870 gen = ffth->gen; 871 ffclock_convert_delta(ffdelta, ffth->cest.period, bt); 872 } while (gen == 0 || gen != ffth->gen); 873} 874 875/* 876 * Access to current ffcounter value. 877 */ 878void 879ffclock_read_counter(ffcounter *ffcount) 880{ 881 struct timehands *th; 882 struct fftimehands *ffth; 883 unsigned int gen, delta; 884 885 /* 886 * ffclock_windup() called from tc_windup(), safe to rely on 887 * th->th_generation only, for correct delta and ffcounter. 888 */ 889 do { 890 th = timehands; 891 gen = atomic_load_acq_int(&th->th_generation); 892 ffth = fftimehands; 893 delta = tc_delta(th); 894 *ffcount = ffth->tick_ffcount; 895 atomic_thread_fence_acq(); 896 } while (gen == 0 || gen != th->th_generation); 897 898 *ffcount += delta; 899} 900 901void 902binuptime(struct bintime *bt) 903{ 904 905 binuptime_fromclock(bt, sysclock_active); 906} 907 908void 909nanouptime(struct timespec *tsp) 910{ 911 912 nanouptime_fromclock(tsp, sysclock_active); 913} 914 915void 916microuptime(struct timeval *tvp) 917{ 918 919 microuptime_fromclock(tvp, sysclock_active); 920} 921 922void 923bintime(struct bintime *bt) 924{ 925 926 bintime_fromclock(bt, sysclock_active); 927} 928 929void 930nanotime(struct timespec *tsp) 931{ 932 933 nanotime_fromclock(tsp, sysclock_active); 934} 935 936void 937microtime(struct timeval *tvp) 938{ 939 940 microtime_fromclock(tvp, sysclock_active); 941} 942 943void 944getbinuptime(struct bintime *bt) 945{ 946 947 getbinuptime_fromclock(bt, sysclock_active); 948} 949 950void 951getnanouptime(struct timespec *tsp) 952{ 953 954 getnanouptime_fromclock(tsp, sysclock_active); 955} 956 957void 958getmicrouptime(struct timeval *tvp) 959{ 960 961 getmicrouptime_fromclock(tvp, sysclock_active); 962} 963 964void 965getbintime(struct bintime *bt) 966{ 967 968 getbintime_fromclock(bt, sysclock_active); 969} 970 971void 972getnanotime(struct timespec *tsp) 973{ 974 975 getnanotime_fromclock(tsp, sysclock_active); 976} 977 978void 979getmicrotime(struct timeval *tvp) 980{ 981 982 getmicrouptime_fromclock(tvp, sysclock_active); 983} 984 985#endif /* FFCLOCK */ 986 987/* 988 * This is a clone of getnanotime and used for walltimestamps. 989 * The dtrace_ prefix prevents fbt from creating probes for 990 * it so walltimestamp can be safely used in all fbt probes. 991 */ 992void 993dtrace_getnanotime(struct timespec *tsp) 994{ 995 996 GETTHMEMBER(tsp, th_nanotime); 997} 998 999/* 1000 * System clock currently providing time to the system. Modifiable via sysctl 1001 * when the FFCLOCK option is defined. 1002 */ 1003int sysclock_active = SYSCLOCK_FBCK; 1004 1005/* Internal NTP status and error estimates. */ 1006extern int time_status; 1007extern long time_esterror; 1008 1009/* 1010 * Take a snapshot of sysclock data which can be used to compare system clocks 1011 * and generate timestamps after the fact. 1012 */ 1013void 1014sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) 1015{ 1016 struct fbclock_info *fbi; 1017 struct timehands *th; 1018 struct bintime bt; 1019 unsigned int delta, gen; 1020#ifdef FFCLOCK 1021 ffcounter ffcount; 1022 struct fftimehands *ffth; 1023 struct ffclock_info *ffi; 1024 struct ffclock_estimate cest; 1025 1026 ffi = &clock_snap->ff_info; 1027#endif 1028 1029 fbi = &clock_snap->fb_info; 1030 delta = 0; 1031 1032 do { 1033 th = timehands; 1034 gen = atomic_load_acq_int(&th->th_generation); 1035 fbi->th_scale = th->th_scale; 1036 fbi->tick_time = th->th_offset; 1037#ifdef FFCLOCK 1038 ffth = fftimehands; 1039 ffi->tick_time = ffth->tick_time_lerp; 1040 ffi->tick_time_lerp = ffth->tick_time_lerp; 1041 ffi->period = ffth->cest.period; 1042 ffi->period_lerp = ffth->period_lerp; 1043 clock_snap->ffcount = ffth->tick_ffcount; 1044 cest = ffth->cest; 1045#endif 1046 if (!fast) 1047 delta = tc_delta(th); 1048 atomic_thread_fence_acq(); 1049 } while (gen == 0 || gen != th->th_generation); 1050 1051 clock_snap->delta = delta; 1052 clock_snap->sysclock_active = sysclock_active; 1053 1054 /* Record feedback clock status and error. */ 1055 clock_snap->fb_info.status = time_status; 1056 /* XXX: Very crude estimate of feedback clock error. */ 1057 bt.sec = time_esterror / 1000000; 1058 bt.frac = ((time_esterror - bt.sec) * 1000000) * 1059 (uint64_t)18446744073709ULL; 1060 clock_snap->fb_info.error = bt; 1061 1062#ifdef FFCLOCK 1063 if (!fast) 1064 clock_snap->ffcount += delta; 1065 1066 /* Record feed-forward clock leap second adjustment. */ 1067 ffi->leapsec_adjustment = cest.leapsec_total; 1068 if (clock_snap->ffcount > cest.leapsec_next) 1069 ffi->leapsec_adjustment -= cest.leapsec; 1070 1071 /* Record feed-forward clock status and error. */ 1072 clock_snap->ff_info.status = cest.status; 1073 ffcount = clock_snap->ffcount - cest.update_ffcount; 1074 ffclock_convert_delta(ffcount, cest.period, &bt); 1075 /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ 1076 bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); 1077 /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ 1078 bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); 1079 clock_snap->ff_info.error = bt; 1080#endif 1081} 1082 1083/* 1084 * Convert a sysclock snapshot into a struct bintime based on the specified 1085 * clock source and flags. 1086 */ 1087int 1088sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, 1089 int whichclock, uint32_t flags) 1090{ 1091 struct bintime boottimebin; 1092#ifdef FFCLOCK 1093 struct bintime bt2; 1094 uint64_t period; 1095#endif 1096 1097 switch (whichclock) { 1098 case SYSCLOCK_FBCK: 1099 *bt = cs->fb_info.tick_time; 1100 1101 /* If snapshot was created with !fast, delta will be >0. */ 1102 if (cs->delta > 0) 1103 bintime_addx(bt, cs->fb_info.th_scale * cs->delta); 1104 1105 if ((flags & FBCLOCK_UPTIME) == 0) { 1106 getboottimebin(&boottimebin); 1107 bintime_add(bt, &boottimebin); 1108 } 1109 break; 1110#ifdef FFCLOCK 1111 case SYSCLOCK_FFWD: 1112 if (flags & FFCLOCK_LERP) { 1113 *bt = cs->ff_info.tick_time_lerp; 1114 period = cs->ff_info.period_lerp; 1115 } else { 1116 *bt = cs->ff_info.tick_time; 1117 period = cs->ff_info.period; 1118 } 1119 1120 /* If snapshot was created with !fast, delta will be >0. */ 1121 if (cs->delta > 0) { 1122 ffclock_convert_delta(cs->delta, period, &bt2); 1123 bintime_add(bt, &bt2); 1124 } 1125 1126 /* Leap second adjustment. */ 1127 if (flags & FFCLOCK_LEAPSEC) 1128 bt->sec -= cs->ff_info.leapsec_adjustment; 1129 1130 /* Boot time adjustment, for uptime/monotonic clocks. */ 1131 if (flags & FFCLOCK_UPTIME) 1132 bintime_sub(bt, &ffclock_boottime); 1133 break; 1134#endif 1135 default: 1136 return (EINVAL); 1137 break; 1138 } 1139 1140 return (0); 1141} 1142 1143/* 1144 * Initialize a new timecounter and possibly use it. 1145 */ 1146void 1147tc_init(struct timecounter *tc) 1148{ 1149 u_int u; 1150 struct sysctl_oid *tc_root; 1151 1152 u = tc->tc_frequency / tc->tc_counter_mask; 1153 /* XXX: We need some margin here, 10% is a guess */ 1154 u *= 11; 1155 u /= 10; 1156 if (u > hz && tc->tc_quality >= 0) { 1157 tc->tc_quality = -2000; 1158 if (bootverbose) { 1159 printf("Timecounter \"%s\" frequency %ju Hz", 1160 tc->tc_name, (uintmax_t)tc->tc_frequency); 1161 printf(" -- Insufficient hz, needs at least %u\n", u); 1162 } 1163 } else if (tc->tc_quality >= 0 || bootverbose) { 1164 printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", 1165 tc->tc_name, (uintmax_t)tc->tc_frequency, 1166 tc->tc_quality); 1167 } 1168 1169 tc->tc_next = timecounters; 1170 timecounters = tc; 1171 /* 1172 * Set up sysctl tree for this counter. 1173 */ 1174 tc_root = SYSCTL_ADD_NODE_WITH_LABEL(NULL, 1175 SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, 1176 CTLFLAG_RW, 0, "timecounter description", "timecounter"); 1177 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1178 "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, 1179 "mask for implemented bits"); 1180 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1181 "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc), 1182 sysctl_kern_timecounter_get, "IU", "current timecounter value"); 1183 SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1184 "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc), 1185 sysctl_kern_timecounter_freq, "QU", "timecounter frequency"); 1186 SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, 1187 "quality", CTLFLAG_RD, &(tc->tc_quality), 0, 1188 "goodness of time counter"); 1189 /* 1190 * Do not automatically switch if the current tc was specifically 1191 * chosen. Never automatically use a timecounter with negative quality. 1192 * Even though we run on the dummy counter, switching here may be 1193 * worse since this timecounter may not be monotonic. 1194 */ 1195 if (tc_chosen) 1196 return; 1197 if (tc->tc_quality < 0) 1198 return; 1199 if (tc_from_tunable[0] != '\0' && 1200 strcmp(tc->tc_name, tc_from_tunable) == 0) { 1201 tc_chosen = 1; 1202 tc_from_tunable[0] = '\0'; 1203 } else { 1204 if (tc->tc_quality < timecounter->tc_quality) 1205 return; 1206 if (tc->tc_quality == timecounter->tc_quality && 1207 tc->tc_frequency < timecounter->tc_frequency) 1208 return; 1209 } 1210 (void)tc->tc_get_timecount(tc); 1211 timecounter = tc; 1212} 1213 1214/* Report the frequency of the current timecounter. */ 1215uint64_t 1216tc_getfrequency(void) 1217{ 1218 1219 return (timehands->th_counter->tc_frequency); 1220} 1221 1222static bool 1223sleeping_on_old_rtc(struct thread *td) 1224{ 1225 1226 /* 1227 * td_rtcgen is modified by curthread when it is running, 1228 * and by other threads in this function. By finding the thread 1229 * on a sleepqueue and holding the lock on the sleepqueue 1230 * chain, we guarantee that the thread is not running and that 1231 * modifying td_rtcgen is safe. Setting td_rtcgen to zero informs 1232 * the thread that it was woken due to a real-time clock adjustment. 1233 * (The declaration of td_rtcgen refers to this comment.) 1234 */ 1235 if (td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation) { 1236 td->td_rtcgen = 0; 1237 return (true); 1238 } 1239 return (false); 1240} 1241 1242static struct mtx tc_setclock_mtx; 1243MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); 1244 1245/* 1246 * Step our concept of UTC. This is done by modifying our estimate of 1247 * when we booted. 1248 */ 1249void 1250tc_setclock(struct timespec *ts) 1251{ 1252 struct timespec tbef, taft; 1253 struct bintime bt, bt2; 1254 1255 timespec2bintime(ts, &bt); 1256 nanotime(&tbef); 1257 mtx_lock_spin(&tc_setclock_mtx); 1258 cpu_tick_calibrate(1); 1259 binuptime(&bt2); 1260 bintime_sub(&bt, &bt2); 1261 1262 /* XXX fiddle all the little crinkly bits around the fiords... */ 1263 tc_windup(&bt); 1264 mtx_unlock_spin(&tc_setclock_mtx); 1265 1266 /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ 1267 atomic_add_rel_int(&rtc_generation, 2); 1268 sleepq_chains_remove_matching(sleeping_on_old_rtc); 1269 if (timestepwarnings) { 1270 nanotime(&taft); 1271 log(LOG_INFO, 1272 "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", 1273 (intmax_t)tbef.tv_sec, tbef.tv_nsec, 1274 (intmax_t)taft.tv_sec, taft.tv_nsec, 1275 (intmax_t)ts->tv_sec, ts->tv_nsec); 1276 } 1277} 1278 1279/* 1280 * Initialize the next struct timehands in the ring and make 1281 * it the active timehands. Along the way we might switch to a different 1282 * timecounter and/or do seconds processing in NTP. Slightly magic. 1283 */ 1284static void 1285tc_windup(struct bintime *new_boottimebin) 1286{ 1287 struct bintime bt; 1288 struct timehands *th, *tho; 1289 uint64_t scale; 1290 u_int delta, ncount, ogen; 1291 int i; 1292 time_t t; 1293 1294 /* 1295 * Make the next timehands a copy of the current one, but do 1296 * not overwrite the generation or next pointer. While we 1297 * update the contents, the generation must be zero. We need 1298 * to ensure that the zero generation is visible before the 1299 * data updates become visible, which requires release fence. 1300 * For similar reasons, re-reading of the generation after the 1301 * data is read should use acquire fence. 1302 */ 1303 tho = timehands; 1304 th = tho->th_next; 1305 ogen = th->th_generation; 1306 th->th_generation = 0; 1307 atomic_thread_fence_rel(); 1308 memcpy(th, tho, offsetof(struct timehands, th_generation)); 1309 if (new_boottimebin != NULL) 1310 th->th_boottime = *new_boottimebin; 1311 1312 /* 1313 * Capture a timecounter delta on the current timecounter and if 1314 * changing timecounters, a counter value from the new timecounter. 1315 * Update the offset fields accordingly. 1316 */ 1317 delta = tc_delta(th); 1318 if (th->th_counter != timecounter) 1319 ncount = timecounter->tc_get_timecount(timecounter); 1320 else 1321 ncount = 0; 1322#ifdef FFCLOCK 1323 ffclock_windup(delta); 1324#endif 1325 th->th_offset_count += delta; 1326 th->th_offset_count &= th->th_counter->tc_counter_mask; 1327 while (delta > th->th_counter->tc_frequency) { 1328 /* Eat complete unadjusted seconds. */ 1329 delta -= th->th_counter->tc_frequency; 1330 th->th_offset.sec++; 1331 } 1332 if ((delta > th->th_counter->tc_frequency / 2) && 1333 (th->th_scale * delta < ((uint64_t)1 << 63))) { 1334 /* The product th_scale * delta just barely overflows. */ 1335 th->th_offset.sec++; 1336 } 1337 bintime_addx(&th->th_offset, th->th_scale * delta); 1338 1339 /* 1340 * Hardware latching timecounters may not generate interrupts on 1341 * PPS events, so instead we poll them. There is a finite risk that 1342 * the hardware might capture a count which is later than the one we 1343 * got above, and therefore possibly in the next NTP second which might 1344 * have a different rate than the current NTP second. It doesn't 1345 * matter in practice. 1346 */ 1347 if (tho->th_counter->tc_poll_pps) 1348 tho->th_counter->tc_poll_pps(tho->th_counter); 1349 1350 /* 1351 * Deal with NTP second processing. The for loop normally 1352 * iterates at most once, but in extreme situations it might 1353 * keep NTP sane if timeouts are not run for several seconds. 1354 * At boot, the time step can be large when the TOD hardware 1355 * has been read, so on really large steps, we call 1356 * ntp_update_second only twice. We need to call it twice in 1357 * case we missed a leap second. 1358 */ 1359 bt = th->th_offset; 1360 bintime_add(&bt, &th->th_boottime); 1361 i = bt.sec - tho->th_microtime.tv_sec; 1362 if (i > LARGE_STEP) 1363 i = 2; 1364 for (; i > 0; i--) { 1365 t = bt.sec; 1366 ntp_update_second(&th->th_adjustment, &bt.sec); 1367 if (bt.sec != t) 1368 th->th_boottime.sec += bt.sec - t; 1369 } 1370 /* Update the UTC timestamps used by the get*() functions. */ 1371 th->th_bintime = bt; 1372 bintime2timeval(&bt, &th->th_microtime); 1373 bintime2timespec(&bt, &th->th_nanotime); 1374 1375 /* Now is a good time to change timecounters. */ 1376 if (th->th_counter != timecounter) { 1377#ifndef __arm__ 1378 if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) 1379 cpu_disable_c2_sleep++; 1380 if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) 1381 cpu_disable_c2_sleep--; 1382#endif 1383 th->th_counter = timecounter; 1384 th->th_offset_count = ncount; 1385 tc_min_ticktock_freq = max(1, timecounter->tc_frequency / 1386 (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); 1387#ifdef FFCLOCK 1388 ffclock_change_tc(th); 1389#endif 1390 } 1391 1392 /*- 1393 * Recalculate the scaling factor. We want the number of 1/2^64 1394 * fractions of a second per period of the hardware counter, taking 1395 * into account the th_adjustment factor which the NTP PLL/adjtime(2) 1396 * processing provides us with. 1397 * 1398 * The th_adjustment is nanoseconds per second with 32 bit binary 1399 * fraction and we want 64 bit binary fraction of second: 1400 * 1401 * x = a * 2^32 / 10^9 = a * 4.294967296 1402 * 1403 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int 1404 * we can only multiply by about 850 without overflowing, that 1405 * leaves no suitably precise fractions for multiply before divide. 1406 * 1407 * Divide before multiply with a fraction of 2199/512 results in a 1408 * systematic undercompensation of 10PPM of th_adjustment. On a 1409 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. 1410 * 1411 * We happily sacrifice the lowest of the 64 bits of our result 1412 * to the goddess of code clarity. 1413 * 1414 */ 1415 scale = (uint64_t)1 << 63; 1416 scale += (th->th_adjustment / 1024) * 2199; 1417 scale /= th->th_counter->tc_frequency; 1418 th->th_scale = scale * 2; 1419 th->th_large_delta = MIN(((uint64_t)1 << 63) / scale, UINT_MAX); 1420 1421 /* 1422 * Now that the struct timehands is again consistent, set the new 1423 * generation number, making sure to not make it zero. 1424 */ 1425 if (++ogen == 0) 1426 ogen = 1; 1427 atomic_store_rel_int(&th->th_generation, ogen); 1428 1429 /* Go live with the new struct timehands. */ 1430#ifdef FFCLOCK 1431 switch (sysclock_active) { 1432 case SYSCLOCK_FBCK: 1433#endif 1434 time_second = th->th_microtime.tv_sec; 1435 time_uptime = th->th_offset.sec; 1436#ifdef FFCLOCK 1437 break; 1438 case SYSCLOCK_FFWD: 1439 time_second = fftimehands->tick_time_lerp.sec; 1440 time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; 1441 break; 1442 } 1443#endif 1444 1445 timehands = th; 1446 timekeep_push_vdso(); 1447} 1448 1449/* Report or change the active timecounter hardware. */ 1450static int 1451sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) 1452{ 1453 char newname[32]; 1454 struct timecounter *newtc, *tc; 1455 int error; 1456 1457 tc = timecounter; 1458 strlcpy(newname, tc->tc_name, sizeof(newname)); 1459 1460 error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); 1461 if (error != 0 || req->newptr == NULL) 1462 return (error); 1463 /* Record that the tc in use now was specifically chosen. */ 1464 tc_chosen = 1; 1465 if (strcmp(newname, tc->tc_name) == 0) 1466 return (0); 1467 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { 1468 if (strcmp(newname, newtc->tc_name) != 0) 1469 continue; 1470 1471 /* Warm up new timecounter. */ 1472 (void)newtc->tc_get_timecount(newtc); 1473 1474 timecounter = newtc; 1475 1476 /* 1477 * The vdso timehands update is deferred until the next 1478 * 'tc_windup()'. 1479 * 1480 * This is prudent given that 'timekeep_push_vdso()' does not 1481 * use any locking and that it can be called in hard interrupt 1482 * context via 'tc_windup()'. 1483 */ 1484 return (0); 1485 } 1486 return (EINVAL); 1487} 1488 1489SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, 1490 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 0, 0, 1491 sysctl_kern_timecounter_hardware, "A", 1492 "Timecounter hardware selected"); 1493 1494 1495/* Report the available timecounter hardware. */ 1496static int 1497sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) 1498{ 1499 struct sbuf sb; 1500 struct timecounter *tc; 1501 int error; 1502 1503 sbuf_new_for_sysctl(&sb, NULL, 0, req); 1504 for (tc = timecounters; tc != NULL; tc = tc->tc_next) { 1505 if (tc != timecounters) 1506 sbuf_putc(&sb, ' '); 1507 sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); 1508 } 1509 error = sbuf_finish(&sb); 1510 sbuf_delete(&sb); 1511 return (error); 1512} 1513 1514SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD, 1515 0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected"); 1516 1517/* 1518 * RFC 2783 PPS-API implementation. 1519 */ 1520 1521/* 1522 * Return true if the driver is aware of the abi version extensions in the 1523 * pps_state structure, and it supports at least the given abi version number. 1524 */ 1525static inline int 1526abi_aware(struct pps_state *pps, int vers) 1527{ 1528 1529 return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); 1530} 1531 1532static int 1533pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) 1534{ 1535 int err, timo; 1536 pps_seq_t aseq, cseq; 1537 struct timeval tv; 1538 1539 if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) 1540 return (EINVAL); 1541 1542 /* 1543 * If no timeout is requested, immediately return whatever values were 1544 * most recently captured. If timeout seconds is -1, that's a request 1545 * to block without a timeout. WITNESS won't let us sleep forever 1546 * without a lock (we really don't need a lock), so just repeatedly 1547 * sleep a long time. 1548 */ 1549 if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { 1550 if (fapi->timeout.tv_sec == -1) 1551 timo = 0x7fffffff; 1552 else { 1553 tv.tv_sec = fapi->timeout.tv_sec; 1554 tv.tv_usec = fapi->timeout.tv_nsec / 1000; 1555 timo = tvtohz(&tv); 1556 } 1557 aseq = atomic_load_int(&pps->ppsinfo.assert_sequence); 1558 cseq = atomic_load_int(&pps->ppsinfo.clear_sequence); 1559 while (aseq == atomic_load_int(&pps->ppsinfo.assert_sequence) && 1560 cseq == atomic_load_int(&pps->ppsinfo.clear_sequence)) { 1561 if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { 1562 if (pps->flags & PPSFLAG_MTX_SPIN) { 1563 err = msleep_spin(pps, pps->driver_mtx, 1564 "ppsfch", timo); 1565 } else { 1566 err = msleep(pps, pps->driver_mtx, PCATCH, 1567 "ppsfch", timo); 1568 } 1569 } else { 1570 err = tsleep(pps, PCATCH, "ppsfch", timo); 1571 } 1572 if (err == EWOULDBLOCK) { 1573 if (fapi->timeout.tv_sec == -1) { 1574 continue; 1575 } else { 1576 return (ETIMEDOUT); 1577 } 1578 } else if (err != 0) { 1579 return (err); 1580 } 1581 } 1582 } 1583 1584 pps->ppsinfo.current_mode = pps->ppsparam.mode; 1585 fapi->pps_info_buf = pps->ppsinfo; 1586 1587 return (0); 1588} 1589 1590int 1591pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) 1592{ 1593 pps_params_t *app; 1594 struct pps_fetch_args *fapi; 1595#ifdef FFCLOCK 1596 struct pps_fetch_ffc_args *fapi_ffc; 1597#endif 1598#ifdef PPS_SYNC 1599 struct pps_kcbind_args *kapi; 1600#endif 1601 1602 KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); 1603 switch (cmd) { 1604 case PPS_IOC_CREATE: 1605 return (0); 1606 case PPS_IOC_DESTROY: 1607 return (0); 1608 case PPS_IOC_SETPARAMS: 1609 app = (pps_params_t *)data; 1610 if (app->mode & ~pps->ppscap) 1611 return (EINVAL); 1612#ifdef FFCLOCK 1613 /* Ensure only a single clock is selected for ffc timestamp. */ 1614 if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) 1615 return (EINVAL); 1616#endif 1617 pps->ppsparam = *app; 1618 return (0); 1619 case PPS_IOC_GETPARAMS: 1620 app = (pps_params_t *)data; 1621 *app = pps->ppsparam; 1622 app->api_version = PPS_API_VERS_1; 1623 return (0); 1624 case PPS_IOC_GETCAP: 1625 *(int*)data = pps->ppscap; 1626 return (0); 1627 case PPS_IOC_FETCH: 1628 fapi = (struct pps_fetch_args *)data; 1629 return (pps_fetch(fapi, pps)); 1630#ifdef FFCLOCK 1631 case PPS_IOC_FETCH_FFCOUNTER: 1632 fapi_ffc = (struct pps_fetch_ffc_args *)data; 1633 if (fapi_ffc->tsformat && fapi_ffc->tsformat != 1634 PPS_TSFMT_TSPEC) 1635 return (EINVAL); 1636 if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) 1637 return (EOPNOTSUPP); 1638 pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; 1639 fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; 1640 /* Overwrite timestamps if feedback clock selected. */ 1641 switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { 1642 case PPS_TSCLK_FBCK: 1643 fapi_ffc->pps_info_buf_ffc.assert_timestamp = 1644 pps->ppsinfo.assert_timestamp; 1645 fapi_ffc->pps_info_buf_ffc.clear_timestamp = 1646 pps->ppsinfo.clear_timestamp; 1647 break; 1648 case PPS_TSCLK_FFWD: 1649 break; 1650 default: 1651 break; 1652 } 1653 return (0); 1654#endif /* FFCLOCK */ 1655 case PPS_IOC_KCBIND: 1656#ifdef PPS_SYNC 1657 kapi = (struct pps_kcbind_args *)data; 1658 /* XXX Only root should be able to do this */ 1659 if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) 1660 return (EINVAL); 1661 if (kapi->kernel_consumer != PPS_KC_HARDPPS) 1662 return (EINVAL); 1663 if (kapi->edge & ~pps->ppscap) 1664 return (EINVAL); 1665 pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | 1666 (pps->kcmode & KCMODE_ABIFLAG); 1667 return (0); 1668#else 1669 return (EOPNOTSUPP); 1670#endif 1671 default: 1672 return (ENOIOCTL); 1673 } 1674} 1675 1676void 1677pps_init(struct pps_state *pps) 1678{ 1679 pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; 1680 if (pps->ppscap & PPS_CAPTUREASSERT) 1681 pps->ppscap |= PPS_OFFSETASSERT; 1682 if (pps->ppscap & PPS_CAPTURECLEAR) 1683 pps->ppscap |= PPS_OFFSETCLEAR; 1684#ifdef FFCLOCK 1685 pps->ppscap |= PPS_TSCLK_MASK; 1686#endif 1687 pps->kcmode &= ~KCMODE_ABIFLAG; 1688} 1689 1690void 1691pps_init_abi(struct pps_state *pps) 1692{ 1693 1694 pps_init(pps); 1695 if (pps->driver_abi > 0) { 1696 pps->kcmode |= KCMODE_ABIFLAG; 1697 pps->kernel_abi = PPS_ABI_VERSION; 1698 } 1699} 1700 1701void 1702pps_capture(struct pps_state *pps) 1703{ 1704 struct timehands *th; 1705 1706 KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); 1707 th = timehands; 1708 pps->capgen = atomic_load_acq_int(&th->th_generation); 1709 pps->capth = th; 1710#ifdef FFCLOCK 1711 pps->capffth = fftimehands; 1712#endif 1713 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); 1714 atomic_thread_fence_acq(); 1715 if (pps->capgen != th->th_generation) 1716 pps->capgen = 0; 1717} 1718 1719void 1720pps_event(struct pps_state *pps, int event) 1721{ 1722 struct bintime bt; 1723 struct timespec ts, *tsp, *osp; 1724 u_int tcount, *pcount; 1725 int foff; 1726 pps_seq_t *pseq; 1727#ifdef FFCLOCK 1728 struct timespec *tsp_ffc; 1729 pps_seq_t *pseq_ffc; 1730 ffcounter *ffcount; 1731#endif 1732#ifdef PPS_SYNC 1733 int fhard; 1734#endif 1735 1736 KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); 1737 /* Nothing to do if not currently set to capture this event type. */ 1738 if ((event & pps->ppsparam.mode) == 0) 1739 return; 1740 /* If the timecounter was wound up underneath us, bail out. */ 1741 if (pps->capgen == 0 || pps->capgen != 1742 atomic_load_acq_int(&pps->capth->th_generation)) 1743 return; 1744 1745 /* Things would be easier with arrays. */ 1746 if (event == PPS_CAPTUREASSERT) { 1747 tsp = &pps->ppsinfo.assert_timestamp; 1748 osp = &pps->ppsparam.assert_offset; 1749 foff = pps->ppsparam.mode & PPS_OFFSETASSERT; 1750#ifdef PPS_SYNC 1751 fhard = pps->kcmode & PPS_CAPTUREASSERT; 1752#endif 1753 pcount = &pps->ppscount[0]; 1754 pseq = &pps->ppsinfo.assert_sequence; 1755#ifdef FFCLOCK 1756 ffcount = &pps->ppsinfo_ffc.assert_ffcount; 1757 tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; 1758 pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; 1759#endif 1760 } else { 1761 tsp = &pps->ppsinfo.clear_timestamp; 1762 osp = &pps->ppsparam.clear_offset; 1763 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; 1764#ifdef PPS_SYNC 1765 fhard = pps->kcmode & PPS_CAPTURECLEAR; 1766#endif 1767 pcount = &pps->ppscount[1]; 1768 pseq = &pps->ppsinfo.clear_sequence; 1769#ifdef FFCLOCK 1770 ffcount = &pps->ppsinfo_ffc.clear_ffcount; 1771 tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; 1772 pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; 1773#endif 1774 } 1775 1776 /* 1777 * If the timecounter changed, we cannot compare the count values, so 1778 * we have to drop the rest of the PPS-stuff until the next event. 1779 */ 1780 if (pps->ppstc != pps->capth->th_counter) { 1781 pps->ppstc = pps->capth->th_counter; 1782 *pcount = pps->capcount; 1783 pps->ppscount[2] = pps->capcount; 1784 return; 1785 } 1786 1787 /* Convert the count to a timespec. */ 1788 tcount = pps->capcount - pps->capth->th_offset_count; 1789 tcount &= pps->capth->th_counter->tc_counter_mask; 1790 bt = pps->capth->th_bintime; 1791 bintime_addx(&bt, pps->capth->th_scale * tcount); 1792 bintime2timespec(&bt, &ts); 1793 1794 /* If the timecounter was wound up underneath us, bail out. */ 1795 atomic_thread_fence_acq(); 1796 if (pps->capgen != pps->capth->th_generation) 1797 return; 1798 1799 *pcount = pps->capcount; 1800 (*pseq)++; 1801 *tsp = ts; 1802 1803 if (foff) { 1804 timespecadd(tsp, osp, tsp); 1805 if (tsp->tv_nsec < 0) { 1806 tsp->tv_nsec += 1000000000; 1807 tsp->tv_sec -= 1; 1808 } 1809 } 1810 1811#ifdef FFCLOCK 1812 *ffcount = pps->capffth->tick_ffcount + tcount; 1813 bt = pps->capffth->tick_time; 1814 ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); 1815 bintime_add(&bt, &pps->capffth->tick_time); 1816 bintime2timespec(&bt, &ts); 1817 (*pseq_ffc)++; 1818 *tsp_ffc = ts; 1819#endif 1820 1821#ifdef PPS_SYNC 1822 if (fhard) { 1823 uint64_t scale; 1824 1825 /* 1826 * Feed the NTP PLL/FLL. 1827 * The FLL wants to know how many (hardware) nanoseconds 1828 * elapsed since the previous event. 1829 */ 1830 tcount = pps->capcount - pps->ppscount[2]; 1831 pps->ppscount[2] = pps->capcount; 1832 tcount &= pps->capth->th_counter->tc_counter_mask; 1833 scale = (uint64_t)1 << 63; 1834 scale /= pps->capth->th_counter->tc_frequency; 1835 scale *= 2; 1836 bt.sec = 0; 1837 bt.frac = 0; 1838 bintime_addx(&bt, scale * tcount); 1839 bintime2timespec(&bt, &ts); 1840 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); 1841 } 1842#endif 1843 1844 /* Wakeup anyone sleeping in pps_fetch(). */ 1845 wakeup(pps); 1846} 1847 1848/* 1849 * Timecounters need to be updated every so often to prevent the hardware 1850 * counter from overflowing. Updating also recalculates the cached values 1851 * used by the get*() family of functions, so their precision depends on 1852 * the update frequency. 1853 */ 1854 1855static int tc_tick; 1856SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, 1857 "Approximate number of hardclock ticks in a millisecond"); 1858 1859void 1860tc_ticktock(int cnt) 1861{ 1862 static int count; 1863 1864 if (mtx_trylock_spin(&tc_setclock_mtx)) { 1865 count += cnt; 1866 if (count >= tc_tick) { 1867 count = 0; 1868 tc_windup(NULL); 1869 } 1870 mtx_unlock_spin(&tc_setclock_mtx); 1871 } 1872} 1873 1874static void __inline 1875tc_adjprecision(void) 1876{ 1877 int t; 1878 1879 if (tc_timepercentage > 0) { 1880 t = (99 + tc_timepercentage) / tc_timepercentage; 1881 tc_precexp = fls(t + (t >> 1)) - 1; 1882 FREQ2BT(hz / tc_tick, &bt_timethreshold); 1883 FREQ2BT(hz, &bt_tickthreshold); 1884 bintime_shift(&bt_timethreshold, tc_precexp); 1885 bintime_shift(&bt_tickthreshold, tc_precexp); 1886 } else { 1887 tc_precexp = 31; 1888 bt_timethreshold.sec = INT_MAX; 1889 bt_timethreshold.frac = ~(uint64_t)0; 1890 bt_tickthreshold = bt_timethreshold; 1891 } 1892 sbt_timethreshold = bttosbt(bt_timethreshold); 1893 sbt_tickthreshold = bttosbt(bt_tickthreshold); 1894} 1895 1896static int 1897sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) 1898{ 1899 int error, val; 1900 1901 val = tc_timepercentage; 1902 error = sysctl_handle_int(oidp, &val, 0, req); 1903 if (error != 0 || req->newptr == NULL) 1904 return (error); 1905 tc_timepercentage = val; 1906 if (cold) 1907 goto done; 1908 tc_adjprecision(); 1909done: 1910 return (0); 1911} 1912 1913/* Set up the requested number of timehands. */ 1914static void 1915inittimehands(void *dummy) 1916{ 1917 struct timehands *thp; 1918 int i; 1919 1920 TUNABLE_INT_FETCH("kern.timecounter.timehands_count", 1921 &timehands_count); 1922 if (timehands_count < 1) 1923 timehands_count = 1; 1924 if (timehands_count > nitems(ths)) 1925 timehands_count = nitems(ths); 1926 for (i = 1, thp = &ths[0]; i < timehands_count; thp = &ths[i++]) 1927 thp->th_next = &ths[i]; 1928 thp->th_next = &ths[0]; 1929 1930 TUNABLE_STR_FETCH("kern.timecounter.hardware", tc_from_tunable, 1931 sizeof(tc_from_tunable)); 1932} 1933SYSINIT(timehands, SI_SUB_TUNABLES, SI_ORDER_ANY, inittimehands, NULL); 1934 1935static void 1936inittimecounter(void *dummy) 1937{ 1938 u_int p; 1939 int tick_rate; 1940 1941 /* 1942 * Set the initial timeout to 1943 * max(1, <approx. number of hardclock ticks in a millisecond>). 1944 * People should probably not use the sysctl to set the timeout 1945 * to smaller than its initial value, since that value is the 1946 * smallest reasonable one. If they want better timestamps they 1947 * should use the non-"get"* functions. 1948 */ 1949 if (hz > 1000) 1950 tc_tick = (hz + 500) / 1000; 1951 else 1952 tc_tick = 1; 1953 tc_adjprecision(); 1954 FREQ2BT(hz, &tick_bt); 1955 tick_sbt = bttosbt(tick_bt); 1956 tick_rate = hz / tc_tick; 1957 FREQ2BT(tick_rate, &tc_tick_bt); 1958 tc_tick_sbt = bttosbt(tc_tick_bt); 1959 p = (tc_tick * 1000000) / hz; 1960 printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); 1961 1962#ifdef FFCLOCK 1963 ffclock_init(); 1964#endif 1965 1966 /* warm up new timecounter (again) and get rolling. */ 1967 (void)timecounter->tc_get_timecount(timecounter); 1968 mtx_lock_spin(&tc_setclock_mtx); 1969 tc_windup(NULL); 1970 mtx_unlock_spin(&tc_setclock_mtx); 1971} 1972 1973SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); 1974 1975/* Cpu tick handling -------------------------------------------------*/ 1976 1977static int cpu_tick_variable; 1978static uint64_t cpu_tick_frequency; 1979 1980DPCPU_DEFINE_STATIC(uint64_t, tc_cpu_ticks_base); 1981DPCPU_DEFINE_STATIC(unsigned, tc_cpu_ticks_last); 1982 1983static uint64_t 1984tc_cpu_ticks(void) 1985{ 1986 struct timecounter *tc; 1987 uint64_t res, *base; 1988 unsigned u, *last; 1989 1990 critical_enter(); 1991 base = DPCPU_PTR(tc_cpu_ticks_base); 1992 last = DPCPU_PTR(tc_cpu_ticks_last); 1993 tc = timehands->th_counter; 1994 u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; 1995 if (u < *last) 1996 *base += (uint64_t)tc->tc_counter_mask + 1; 1997 *last = u; 1998 res = u + *base; 1999 critical_exit(); 2000 return (res); 2001} 2002 2003void 2004cpu_tick_calibration(void) 2005{ 2006 static time_t last_calib; 2007 2008 if (time_uptime != last_calib && !(time_uptime & 0xf)) { 2009 cpu_tick_calibrate(0); 2010 last_calib = time_uptime; 2011 } 2012} 2013 2014/* 2015 * This function gets called every 16 seconds on only one designated 2016 * CPU in the system from hardclock() via cpu_tick_calibration()(). 2017 * 2018 * Whenever the real time clock is stepped we get called with reset=1 2019 * to make sure we handle suspend/resume and similar events correctly. 2020 */ 2021 2022static void 2023cpu_tick_calibrate(int reset) 2024{ 2025 static uint64_t c_last; 2026 uint64_t c_this, c_delta; 2027 static struct bintime t_last; 2028 struct bintime t_this, t_delta; 2029 uint32_t divi; 2030 2031 if (reset) { 2032 /* The clock was stepped, abort & reset */ 2033 t_last.sec = 0; 2034 return; 2035 } 2036 2037 /* we don't calibrate fixed rate cputicks */ 2038 if (!cpu_tick_variable) 2039 return; 2040 2041 getbinuptime(&t_this); 2042 c_this = cpu_ticks(); 2043 if (t_last.sec != 0) { 2044 c_delta = c_this - c_last; 2045 t_delta = t_this; 2046 bintime_sub(&t_delta, &t_last); 2047 /* 2048 * Headroom: 2049 * 2^(64-20) / 16[s] = 2050 * 2^(44) / 16[s] = 2051 * 17.592.186.044.416 / 16 = 2052 * 1.099.511.627.776 [Hz] 2053 */ 2054 divi = t_delta.sec << 20; 2055 divi |= t_delta.frac >> (64 - 20); 2056 c_delta <<= 20; 2057 c_delta /= divi; 2058 if (c_delta > cpu_tick_frequency) { 2059 if (0 && bootverbose) 2060 printf("cpu_tick increased to %ju Hz\n", 2061 c_delta); 2062 cpu_tick_frequency = c_delta; 2063 } 2064 } 2065 c_last = c_this; 2066 t_last = t_this; 2067} 2068 2069void 2070set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) 2071{ 2072 2073 if (func == NULL) { 2074 cpu_ticks = tc_cpu_ticks; 2075 } else { 2076 cpu_tick_frequency = freq; 2077 cpu_tick_variable = var; 2078 cpu_ticks = func; 2079 } 2080} 2081 2082uint64_t 2083cpu_tickrate(void) 2084{ 2085 2086 if (cpu_ticks == tc_cpu_ticks) 2087 return (tc_getfrequency()); 2088 return (cpu_tick_frequency); 2089} 2090 2091/* 2092 * We need to be slightly careful converting cputicks to microseconds. 2093 * There is plenty of margin in 64 bits of microseconds (half a million 2094 * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply 2095 * before divide conversion (to retain precision) we find that the 2096 * margin shrinks to 1.5 hours (one millionth of 146y). 2097 * With a three prong approach we never lose significant bits, no 2098 * matter what the cputick rate and length of timeinterval is. 2099 */ 2100 2101uint64_t 2102cputick2usec(uint64_t tick) 2103{ 2104 2105 if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ 2106 return (tick / (cpu_tickrate() / 1000000LL)); 2107 else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ 2108 return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); 2109 else 2110 return ((tick * 1000000LL) / cpu_tickrate()); 2111} 2112 2113cpu_tick_f *cpu_ticks = tc_cpu_ticks; 2114 2115static int vdso_th_enable = 1; 2116static int 2117sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) 2118{ 2119 int old_vdso_th_enable, error; 2120 2121 old_vdso_th_enable = vdso_th_enable; 2122 error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); 2123 if (error != 0) 2124 return (error); 2125 vdso_th_enable = old_vdso_th_enable; 2126 return (0); 2127} 2128SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, 2129 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2130 NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); 2131 2132uint32_t 2133tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) 2134{ 2135 struct timehands *th; 2136 uint32_t enabled; 2137 2138 th = timehands; 2139 vdso_th->th_scale = th->th_scale; 2140 vdso_th->th_offset_count = th->th_offset_count; 2141 vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; 2142 vdso_th->th_offset = th->th_offset; 2143 vdso_th->th_boottime = th->th_boottime; 2144 if (th->th_counter->tc_fill_vdso_timehands != NULL) { 2145 enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th, 2146 th->th_counter); 2147 } else 2148 enabled = 0; 2149 if (!vdso_th_enable) 2150 enabled = 0; 2151 return (enabled); 2152} 2153 2154#ifdef COMPAT_FREEBSD32 2155uint32_t 2156tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 2157{ 2158 struct timehands *th; 2159 uint32_t enabled; 2160 2161 th = timehands; 2162 *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; 2163 vdso_th32->th_offset_count = th->th_offset_count; 2164 vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; 2165 vdso_th32->th_offset.sec = th->th_offset.sec; 2166 *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; 2167 vdso_th32->th_boottime.sec = th->th_boottime.sec; 2168 *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; 2169 if (th->th_counter->tc_fill_vdso_timehands32 != NULL) { 2170 enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32, 2171 th->th_counter); 2172 } else 2173 enabled = 0; 2174 if (!vdso_th_enable) 2175 enabled = 0; 2176 return (enabled); 2177} 2178#endif 2179 2180#include "opt_ddb.h" 2181#ifdef DDB 2182#include <ddb/ddb.h> 2183 2184DB_SHOW_COMMAND(timecounter, db_show_timecounter) 2185{ 2186 struct timehands *th; 2187 struct timecounter *tc; 2188 u_int val1, val2; 2189 2190 th = timehands; 2191 tc = th->th_counter; 2192 val1 = tc->tc_get_timecount(tc); 2193 __compiler_membar(); 2194 val2 = tc->tc_get_timecount(tc); 2195 2196 db_printf("timecounter %p %s\n", tc, tc->tc_name); 2197 db_printf(" mask %#x freq %ju qual %d flags %#x priv %p\n", 2198 tc->tc_counter_mask, (uintmax_t)tc->tc_frequency, tc->tc_quality, 2199 tc->tc_flags, tc->tc_priv); 2200 db_printf(" val %#x %#x\n", val1, val2); 2201 db_printf("timehands adj %#jx scale %#jx ldelta %d off_cnt %d gen %d\n", 2202 (uintmax_t)th->th_adjustment, (uintmax_t)th->th_scale, 2203 th->th_large_delta, th->th_offset_count, th->th_generation); 2204 db_printf(" offset %jd %jd boottime %jd %jd\n", 2205 (intmax_t)th->th_offset.sec, (uintmax_t)th->th_offset.frac, 2206 (intmax_t)th->th_boottime.sec, (uintmax_t)th->th_boottime.frac); 2207} 2208#endif 2209