tsc.c revision 249625
14Srgrimes/*- 2110379Sphk * Copyright (c) 1998-2003 Poul-Henning Kamp 34Srgrimes * All rights reserved. 44Srgrimes * 54Srgrimes * Redistribution and use in source and binary forms, with or without 64Srgrimes * modification, are permitted provided that the following conditions 74Srgrimes * are met: 84Srgrimes * 1. Redistributions of source code must retain the above copyright 94Srgrimes * notice, this list of conditions and the following disclaimer. 104Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 114Srgrimes * notice, this list of conditions and the following disclaimer in the 124Srgrimes * documentation and/or other materials provided with the distribution. 134Srgrimes * 14110379Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 154Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 164Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17110379Sphk * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 184Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 194Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 204Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 214Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 224Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 234Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 244Srgrimes * SUCH DAMAGE. 254Srgrimes */ 264Srgrimes 27115683Sobrien#include <sys/cdefs.h> 28115683Sobrien__FBSDID("$FreeBSD: head/sys/x86/x86/tsc.c 249625 2013-04-18 17:07:04Z mav $"); 29115683Sobrien 30237433Skib#include "opt_compat.h" 3116299Spst#include "opt_clock.h" 3213228Swollman 332056Swollman#include <sys/param.h> 34167905Snjl#include <sys/bus.h> 35167905Snjl#include <sys/cpu.h> 36221214Sjkim#include <sys/limits.h> 37167905Snjl#include <sys/malloc.h> 382056Swollman#include <sys/systm.h> 39113348Sdes#include <sys/sysctl.h> 402056Swollman#include <sys/time.h> 4158377Sphk#include <sys/timetc.h> 422056Swollman#include <sys/kernel.h> 4385835Siwasaki#include <sys/power.h> 44113348Sdes#include <sys/smp.h> 45237433Skib#include <sys/vdso.h> 464180Sbde#include <machine/clock.h> 47216272Sjkim#include <machine/cputypes.h> 4832054Sphk#include <machine/md_var.h> 4932054Sphk#include <machine/specialreg.h> 5015508Sbde 51167905Snjl#include "cpufreq_if.h" 52167905Snjl 53216163Sjkimuint64_t tsc_freq; 54184102Sjkimint tsc_is_invariant; 55220579Sjkimint tsc_perf_stat; 56220579Sjkim 57167905Snjlstatic eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; 581390Ssos 59184102SjkimSYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, 60184108Sjkim &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); 61184108SjkimTUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); 62184102Sjkim 63113348Sdes#ifdef SMP 64249324Sneelint smp_tsc; 65121307SsilbySYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, 66113348Sdes "Indicates whether the TSC is safe to use in SMP mode"); 67113348SdesTUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); 68249625Smav 69249625Smavint smp_tsc_adjust = 0; 70249625SmavSYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, 71249625Smav &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP"); 72249625SmavTUNABLE_INT("kern.timecounter.smp_tsc_adjust", &smp_tsc_adjust); 73113348Sdes#endif 74113348Sdes 75246212Skibstatic int tsc_shift = 1; 76246212SkibSYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, 77246212Skib &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); 78246212SkibTUNABLE_INT("kern.timecounter.tsc_shift", &tsc_shift); 79246212Skib 80219473Sjkimstatic int tsc_disabled; 81219473SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, 82219473Sjkim "Disable x86 Time Stamp Counter"); 83219473SjkimTUNABLE_INT("machdep.disable_tsc", &tsc_disabled); 84219473Sjkim 85220577Sjkimstatic int tsc_skip_calibration; 86220577SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, 87220577Sjkim &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); 88220577SjkimTUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration); 89220577Sjkim 90167905Snjlstatic void tsc_freq_changed(void *arg, const struct cf_level *level, 91167905Snjl int status); 92167905Snjlstatic void tsc_freq_changing(void *arg, const struct cf_level *level, 93167905Snjl int *status); 94222866Sjkimstatic unsigned tsc_get_timecount(struct timecounter *tc); 95238973Skibstatic inline unsigned tsc_get_timecount_low(struct timecounter *tc); 96238973Skibstatic unsigned tsc_get_timecount_lfence(struct timecounter *tc); 97238973Skibstatic unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); 98238973Skibstatic unsigned tsc_get_timecount_mfence(struct timecounter *tc); 99238973Skibstatic unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); 100167905Snjlstatic void tsc_levels_changed(void *arg, int unit); 10117353Sbde 10240610Sphkstatic struct timecounter tsc_timecounter = { 10333690Sphk tsc_get_timecount, /* get_timecount */ 10436741Sphk 0, /* no poll_pps */ 105167905Snjl ~0u, /* counter_mask */ 10633690Sphk 0, /* frequency */ 107167905Snjl "TSC", /* name */ 108118987Sphk 800, /* quality (adjusted in code) */ 10933690Sphk}; 11033690Sphk 111221214Sjkim#define VMW_HVMAGIC 0x564d5868 112221214Sjkim#define VMW_HVPORT 0x5658 113221214Sjkim#define VMW_HVCMD_GETVERSION 10 114221214Sjkim#define VMW_HVCMD_GETHZ 45 115221214Sjkim 116221214Sjkimstatic __inline void 117221214Sjkimvmware_hvcall(u_int cmd, u_int *p) 118221214Sjkim{ 119221214Sjkim 120221331Sjkim __asm __volatile("inl %w3, %0" 121221214Sjkim : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 122221214Sjkim : "0" (VMW_HVMAGIC), "1" (UINT_MAX), "2" (cmd), "3" (VMW_HVPORT) 123221214Sjkim : "memory"); 124221214Sjkim} 125221214Sjkim 126221214Sjkimstatic int 127221214Sjkimtsc_freq_vmware(void) 128221214Sjkim{ 129221214Sjkim char hv_sig[13]; 130221214Sjkim u_int regs[4]; 131221214Sjkim char *p; 132221214Sjkim u_int hv_high; 133221214Sjkim int i; 134221214Sjkim 135221214Sjkim /* 136221214Sjkim * [RFC] CPUID usage for interaction between Hypervisors and Linux. 137221214Sjkim * http://lkml.org/lkml/2008/10/1/246 138221214Sjkim * 139221214Sjkim * KB1009458: Mechanisms to determine if software is running in 140221214Sjkim * a VMware virtual machine 141221214Sjkim * http://kb.vmware.com/kb/1009458 142221214Sjkim */ 143221214Sjkim hv_high = 0; 144221214Sjkim if ((cpu_feature2 & CPUID2_HV) != 0) { 145221214Sjkim do_cpuid(0x40000000, regs); 146221214Sjkim hv_high = regs[0]; 147221214Sjkim for (i = 1, p = hv_sig; i < 4; i++, p += sizeof(regs) / 4) 148221214Sjkim memcpy(p, ®s[i], sizeof(regs[i])); 149221214Sjkim *p = '\0'; 150221214Sjkim if (bootverbose) { 151221214Sjkim /* 152221214Sjkim * HV vendor ID string 153221214Sjkim * ------------+-------------- 154221214Sjkim * KVM "KVMKVMKVM" 155221214Sjkim * Microsoft "Microsoft Hv" 156221214Sjkim * VMware "VMwareVMware" 157221214Sjkim * Xen "XenVMMXenVMM" 158221214Sjkim */ 159221214Sjkim printf("Hypervisor: Origin = \"%s\"\n", hv_sig); 160221214Sjkim } 161221214Sjkim if (strncmp(hv_sig, "VMwareVMware", 12) != 0) 162221214Sjkim return (0); 163221214Sjkim } else { 164221214Sjkim p = getenv("smbios.system.serial"); 165221214Sjkim if (p == NULL) 166221214Sjkim return (0); 167221214Sjkim if (strncmp(p, "VMware-", 7) != 0 && 168221214Sjkim strncmp(p, "VMW", 3) != 0) { 169221214Sjkim freeenv(p); 170221214Sjkim return (0); 171221214Sjkim } 172221214Sjkim freeenv(p); 173221214Sjkim vmware_hvcall(VMW_HVCMD_GETVERSION, regs); 174221214Sjkim if (regs[1] != VMW_HVMAGIC) 175221214Sjkim return (0); 176221214Sjkim } 177221214Sjkim if (hv_high >= 0x40000010) { 178221214Sjkim do_cpuid(0x40000010, regs); 179221214Sjkim tsc_freq = regs[0] * 1000; 180221214Sjkim } else { 181221214Sjkim vmware_hvcall(VMW_HVCMD_GETHZ, regs); 182221214Sjkim if (regs[1] != UINT_MAX) 183221214Sjkim tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); 184221214Sjkim } 185221214Sjkim tsc_is_invariant = 1; 186221214Sjkim return (1); 187221214Sjkim} 188221214Sjkim 189220577Sjkimstatic void 190220577Sjkimtsc_freq_intel(void) 1911390Ssos{ 192220577Sjkim char brand[48]; 193220577Sjkim u_int regs[4]; 194220577Sjkim uint64_t freq; 195220577Sjkim char *p; 196220577Sjkim u_int i; 1971390Ssos 198220577Sjkim /* 199220577Sjkim * Intel Processor Identification and the CPUID Instruction 200220577Sjkim * Application Note 485. 201220577Sjkim * http://www.intel.com/assets/pdf/appnote/241618.pdf 202220577Sjkim */ 203220577Sjkim if (cpu_exthigh >= 0x80000004) { 204220577Sjkim p = brand; 205220577Sjkim for (i = 0x80000002; i < 0x80000005; i++) { 206220577Sjkim do_cpuid(i, regs); 207220577Sjkim memcpy(p, regs, sizeof(regs)); 208220577Sjkim p += sizeof(regs); 209220577Sjkim } 210220577Sjkim p = NULL; 211220577Sjkim for (i = 0; i < sizeof(brand) - 1; i++) 212220577Sjkim if (brand[i] == 'H' && brand[i + 1] == 'z') 213220577Sjkim p = brand + i; 214220577Sjkim if (p != NULL) { 215220577Sjkim p -= 5; 216220577Sjkim switch (p[4]) { 217220577Sjkim case 'M': 218220577Sjkim i = 1; 219220577Sjkim break; 220220577Sjkim case 'G': 221220577Sjkim i = 1000; 222220577Sjkim break; 223220577Sjkim case 'T': 224220577Sjkim i = 1000000; 225220577Sjkim break; 226220577Sjkim default: 227220577Sjkim return; 228220577Sjkim } 229220577Sjkim#define C2D(c) ((c) - '0') 230220577Sjkim if (p[1] == '.') { 231220577Sjkim freq = C2D(p[0]) * 1000; 232220577Sjkim freq += C2D(p[2]) * 100; 233220577Sjkim freq += C2D(p[3]) * 10; 234220577Sjkim freq *= i * 1000; 235220577Sjkim } else { 236220577Sjkim freq = C2D(p[0]) * 1000; 237220577Sjkim freq += C2D(p[1]) * 100; 238220577Sjkim freq += C2D(p[2]) * 10; 239220577Sjkim freq += C2D(p[3]); 240220577Sjkim freq *= i * 1000000; 241220577Sjkim } 242220577Sjkim#undef C2D 243220577Sjkim tsc_freq = freq; 244220577Sjkim } 245220577Sjkim } 246220577Sjkim} 24732054Sphk 248220577Sjkimstatic void 249220577Sjkimprobe_tsc_freq(void) 250220577Sjkim{ 251220579Sjkim u_int regs[4]; 252220577Sjkim uint64_t tsc1, tsc2; 25315508Sbde 254221214Sjkim if (cpu_high >= 6) { 255221214Sjkim do_cpuid(6, regs); 256221214Sjkim if ((regs[2] & CPUID_PERF_STAT) != 0) { 257221214Sjkim /* 258221214Sjkim * XXX Some emulators expose host CPUID without actual 259221214Sjkim * support for these MSRs. We must test whether they 260221214Sjkim * really work. 261221214Sjkim */ 262221214Sjkim wrmsr(MSR_MPERF, 0); 263221214Sjkim wrmsr(MSR_APERF, 0); 264221214Sjkim DELAY(10); 265221214Sjkim if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) 266221214Sjkim tsc_perf_stat = 1; 267221214Sjkim } 268221214Sjkim } 269221214Sjkim 270221214Sjkim if (tsc_freq_vmware()) 271221214Sjkim return; 272221214Sjkim 273216272Sjkim switch (cpu_vendor_id) { 274216272Sjkim case CPU_VENDOR_AMD: 275219469Sjkim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 276219469Sjkim (vm_guest == VM_GUEST_NO && 277219469Sjkim CPUID_TO_FAMILY(cpu_id) >= 0x10)) 278216272Sjkim tsc_is_invariant = 1; 279238973Skib if (cpu_feature & CPUID_SSE2) { 280238973Skib tsc_timecounter.tc_get_timecount = 281238973Skib tsc_get_timecount_mfence; 282238973Skib } 283216272Sjkim break; 284216272Sjkim case CPU_VENDOR_INTEL: 285219469Sjkim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 286219469Sjkim (vm_guest == VM_GUEST_NO && 287219469Sjkim ((CPUID_TO_FAMILY(cpu_id) == 0x6 && 288216272Sjkim CPUID_TO_MODEL(cpu_id) >= 0xe) || 289216272Sjkim (CPUID_TO_FAMILY(cpu_id) == 0xf && 290219469Sjkim CPUID_TO_MODEL(cpu_id) >= 0x3)))) 291216272Sjkim tsc_is_invariant = 1; 292238973Skib if (cpu_feature & CPUID_SSE2) { 293238973Skib tsc_timecounter.tc_get_timecount = 294238973Skib tsc_get_timecount_lfence; 295238973Skib } 296216272Sjkim break; 297216272Sjkim case CPU_VENDOR_CENTAUR: 298219469Sjkim if (vm_guest == VM_GUEST_NO && 299219469Sjkim CPUID_TO_FAMILY(cpu_id) == 0x6 && 300216272Sjkim CPUID_TO_MODEL(cpu_id) >= 0xf && 301216272Sjkim (rdmsr(0x1203) & 0x100000000ULL) == 0) 302216272Sjkim tsc_is_invariant = 1; 303238973Skib if (cpu_feature & CPUID_SSE2) { 304238973Skib tsc_timecounter.tc_get_timecount = 305238973Skib tsc_get_timecount_lfence; 306238973Skib } 307216272Sjkim break; 308216272Sjkim } 309216272Sjkim 310220577Sjkim if (tsc_skip_calibration) { 311220577Sjkim if (cpu_vendor_id == CPU_VENDOR_INTEL) 312220577Sjkim tsc_freq_intel(); 313220577Sjkim return; 314220577Sjkim } 315220577Sjkim 316220577Sjkim if (bootverbose) 317220577Sjkim printf("Calibrating TSC clock ... "); 318220577Sjkim tsc1 = rdtsc(); 319220577Sjkim DELAY(1000000); 320220577Sjkim tsc2 = rdtsc(); 321220577Sjkim tsc_freq = tsc2 - tsc1; 322220577Sjkim if (bootverbose) 323220577Sjkim printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); 324220577Sjkim} 325220577Sjkim 326220577Sjkimvoid 327220577Sjkiminit_TSC(void) 328220577Sjkim{ 329220577Sjkim 330220577Sjkim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 331220577Sjkim return; 332220577Sjkim 333220577Sjkim probe_tsc_freq(); 334220577Sjkim 335167905Snjl /* 336216274Sjkim * Inform CPU accounting about our boot-time clock rate. This will 337216274Sjkim * be updated if someone loads a cpufreq driver after boot that 338216274Sjkim * discovers a new max frequency. 339167905Snjl */ 340220577Sjkim if (tsc_freq != 0) 341221178Sjkim set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); 342167905Snjl 343216274Sjkim if (tsc_is_invariant) 344216274Sjkim return; 345216274Sjkim 346167905Snjl /* Register to find out about changes in CPU frequency. */ 347184108Sjkim tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, 348184108Sjkim tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); 349167905Snjl tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, 350167905Snjl tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); 351167905Snjl tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, 352167905Snjl tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); 353118550Sphk} 35434617Sphk 355221703Sjkim#ifdef SMP 356221703Sjkim 357238973Skib/* 358238973Skib * RDTSC is not a serializing instruction, and does not drain 359238973Skib * instruction stream, so we need to drain the stream before executing 360238973Skib * it. It could be fixed by use of RDTSCP, except the instruction is 361238973Skib * not available everywhere. 362238973Skib * 363238973Skib * Use CPUID for draining in the boot-time SMP constistency test. The 364238973Skib * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel 365238973Skib * and VIA) when SSE2 is present, and nothing on older machines which 366238973Skib * also do not issue RDTSC prematurely. There, testing for SSE2 and 367238975Skib * vendor is too cumbersome, and we learn about TSC presence from CPUID. 368238973Skib * 369238973Skib * Do not use do_cpuid(), since we do not need CPUID results, which 370238973Skib * have to be written into memory with do_cpuid(). 371238973Skib */ 372238973Skib#define TSC_READ(x) \ 373238973Skibstatic void \ 374238973Skibtsc_read_##x(void *arg) \ 375238973Skib{ \ 376239133Sjimharris uint64_t *tsc = arg; \ 377238973Skib u_int cpu = PCPU_GET(cpuid); \ 378238973Skib \ 379238973Skib __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx"); \ 380239133Sjimharris tsc[cpu * 3 + x] = rdtsc(); \ 381221703Sjkim} 382221703SjkimTSC_READ(0) 383221703SjkimTSC_READ(1) 384221703SjkimTSC_READ(2) 385221703Sjkim#undef TSC_READ 386221703Sjkim 387221703Sjkim#define N 1000 388221703Sjkim 389221703Sjkimstatic void 390221703Sjkimcomp_smp_tsc(void *arg) 391221703Sjkim{ 392239133Sjimharris uint64_t *tsc; 393239133Sjimharris int64_t d1, d2; 394221703Sjkim u_int cpu = PCPU_GET(cpuid); 395221703Sjkim u_int i, j, size; 396221703Sjkim 397221703Sjkim size = (mp_maxid + 1) * 3; 398221703Sjkim for (i = 0, tsc = arg; i < N; i++, tsc += size) 399221703Sjkim CPU_FOREACH(j) { 400221703Sjkim if (j == cpu) 401221703Sjkim continue; 402221703Sjkim d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; 403221703Sjkim d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; 404221703Sjkim if (d1 <= 0 || d2 <= 0) { 405221703Sjkim smp_tsc = 0; 406221703Sjkim return; 407221703Sjkim } 408221703Sjkim } 409221703Sjkim} 410221703Sjkim 411249625Smavstatic void 412249625Smavadj_smp_tsc(void *arg) 413249625Smav{ 414249625Smav uint64_t *tsc; 415249625Smav int64_t d, min, max; 416249625Smav u_int cpu = PCPU_GET(cpuid); 417249625Smav u_int first, i, size; 418249625Smav 419249625Smav first = CPU_FIRST(); 420249625Smav if (cpu == first) 421249625Smav return; 422249625Smav min = INT64_MIN; 423249625Smav max = INT64_MAX; 424249625Smav size = (mp_maxid + 1) * 3; 425249625Smav for (i = 0, tsc = arg; i < N; i++, tsc += size) { 426249625Smav d = tsc[first * 3] - tsc[cpu * 3 + 1]; 427249625Smav if (d > min) 428249625Smav min = d; 429249625Smav d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; 430249625Smav if (d > min) 431249625Smav min = d; 432249625Smav d = tsc[first * 3 + 1] - tsc[cpu * 3]; 433249625Smav if (d < max) 434249625Smav max = d; 435249625Smav d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; 436249625Smav if (d < max) 437249625Smav max = d; 438249625Smav } 439249625Smav if (min > max) 440249625Smav return; 441249625Smav d = min / 2 + max / 2; 442249625Smav __asm __volatile ( 443249625Smav "movl $0x10, %%ecx\n\t" 444249625Smav "rdmsr\n\t" 445249625Smav "addl %%edi, %%eax\n\t" 446249625Smav "adcl %%esi, %%edx\n\t" 447249625Smav "wrmsr\n" 448249625Smav : /* No output */ 449249625Smav : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32)) 450249625Smav : "ax", "cx", "dx", "cc" 451249625Smav ); 452249625Smav} 453249625Smav 454221703Sjkimstatic int 455246212Skibtest_tsc(void) 456221703Sjkim{ 457239133Sjimharris uint64_t *data, *tsc; 458249625Smav u_int i, size, adj; 459221703Sjkim 460246212Skib if ((!smp_tsc && !tsc_is_invariant) || vm_guest) 461221703Sjkim return (-100); 462221703Sjkim size = (mp_maxid + 1) * 3; 463221703Sjkim data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); 464249625Smav adj = 0; 465249625Smavretry: 466221703Sjkim for (i = 0, tsc = data; i < N; i++, tsc += size) 467221703Sjkim smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); 468221703Sjkim smp_tsc = 1; /* XXX */ 469221703Sjkim smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, 470221703Sjkim smp_no_rendevous_barrier, data); 471249625Smav if (!smp_tsc && adj < smp_tsc_adjust) { 472249625Smav adj++; 473249625Smav smp_rendezvous(smp_no_rendevous_barrier, adj_smp_tsc, 474249625Smav smp_no_rendevous_barrier, data); 475249625Smav goto retry; 476249625Smav } 477221703Sjkim free(data, M_TEMP); 478221703Sjkim if (bootverbose) 479249625Smav printf("SMP: %sed TSC synchronization test%s\n", 480249625Smav smp_tsc ? "pass" : "fail", 481249625Smav adj > 0 ? " after adjustment" : ""); 482222869Sjkim if (smp_tsc && tsc_is_invariant) { 483222869Sjkim switch (cpu_vendor_id) { 484222869Sjkim case CPU_VENDOR_AMD: 485222869Sjkim /* 486222869Sjkim * Starting with Family 15h processors, TSC clock 487222869Sjkim * source is in the north bridge. Check whether 488222869Sjkim * we have a single-socket/multi-core platform. 489222869Sjkim * XXX Need more work for complex cases. 490222869Sjkim */ 491222869Sjkim if (CPUID_TO_FAMILY(cpu_id) < 0x15 || 492222869Sjkim (amd_feature2 & AMDID2_CMP) == 0 || 493222869Sjkim smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) 494222869Sjkim break; 495222869Sjkim return (1000); 496222869Sjkim case CPU_VENDOR_INTEL: 497222869Sjkim /* 498222869Sjkim * XXX Assume Intel platforms have synchronized TSCs. 499222869Sjkim */ 500222869Sjkim return (1000); 501222869Sjkim } 502222869Sjkim return (800); 503222869Sjkim } 504222869Sjkim return (-100); 505221703Sjkim} 506221703Sjkim 507221703Sjkim#undef N 508221703Sjkim 509246212Skib#else 510246212Skib 511246212Skib/* 512246212Skib * The function is not called, it is provided to avoid linking failure 513246212Skib * on uniprocessor kernel. 514246212Skib */ 515246212Skibstatic int 516246212Skibtest_tsc(void) 517246212Skib{ 518246212Skib 519246212Skib return (0); 520246212Skib} 521246212Skib 522221703Sjkim#endif /* SMP */ 523221703Sjkim 524221703Sjkimstatic void 525118550Sphkinit_TSC_tc(void) 526118550Sphk{ 527222866Sjkim uint64_t max_freq; 528222866Sjkim int shift; 529209103Smav 530219673Sjkim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 531209103Smav return; 532209103Smav 53334617Sphk /* 534222866Sjkim * Limit timecounter frequency to fit in an int and prevent it from 535222866Sjkim * overflowing too fast. 536222866Sjkim */ 537222866Sjkim max_freq = UINT_MAX; 538222866Sjkim 539222866Sjkim /* 540160964Syar * We can not use the TSC if we support APM. Precise timekeeping 54149186Smsmith * on an APM'ed machine is at best a fools pursuit, since 54234617Sphk * any and all of the time spent in various SMM code can't 54334617Sphk * be reliably accounted for. Reading the RTC is your only 544160964Syar * source of reliable time info. The i8254 loses too, of course, 54534617Sphk * but we need to have some kind of time... 54649186Smsmith * We don't know at this point whether APM is going to be used 54749186Smsmith * or not, nor when it might be activated. Play it safe. 54834617Sphk */ 54985835Siwasaki if (power_pm_get_type() == POWER_PM_TYPE_APM) { 550118987Sphk tsc_timecounter.tc_quality = -1000; 55185835Siwasaki if (bootverbose) 552110370Sphk printf("TSC timecounter disabled: APM enabled.\n"); 553221703Sjkim goto init; 55464031Sphk } 55534617Sphk 556223426Sjkim /* 557223426Sjkim * We cannot use the TSC if it stops incrementing in deep sleep. 558223426Sjkim * Currently only Intel CPUs are known for this problem unless 559223426Sjkim * the invariant TSC bit is set. 560223426Sjkim */ 561223426Sjkim if (cpu_can_deep_sleep && cpu_vendor_id == CPU_VENDOR_INTEL && 562223426Sjkim (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { 563223426Sjkim tsc_timecounter.tc_quality = -1000; 564224042Sjkim tsc_timecounter.tc_flags |= TC_FLAGS_C3STOP; 565223426Sjkim if (bootverbose) 566223426Sjkim printf("TSC timecounter disabled: C3 enabled.\n"); 567223426Sjkim goto init; 568223426Sjkim } 569223426Sjkim 570118987Sphk /* 571246212Skib * We can not use the TSC in SMP mode unless the TSCs on all CPUs 572246212Skib * are synchronized. If the user is sure that the system has 573246212Skib * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a 574246212Skib * non-zero value. The TSC seems unreliable in virtualized SMP 575225069Ssilby * environments, so it is set to a negative quality in those cases. 576118987Sphk */ 577246212Skib if (mp_ncpus > 1) 578246212Skib tsc_timecounter.tc_quality = test_tsc(); 579246212Skib else if (tsc_is_invariant) 580222869Sjkim tsc_timecounter.tc_quality = 1000; 581246212Skib max_freq >>= tsc_shift; 582222869Sjkim 583221703Sjkiminit: 584246212Skib for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) 585222866Sjkim ; 586246212Skib if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { 587246212Skib if (cpu_vendor_id == CPU_VENDOR_AMD) { 588246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 589246212Skib tsc_get_timecount_low_mfence : 590246212Skib tsc_get_timecount_mfence; 591246212Skib } else { 592246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 593246212Skib tsc_get_timecount_low_lfence : 594246212Skib tsc_get_timecount_lfence; 595246212Skib } 596246212Skib } else { 597246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 598246212Skib tsc_get_timecount_low : tsc_get_timecount; 599246212Skib } 600222866Sjkim if (shift > 0) { 601222866Sjkim tsc_timecounter.tc_name = "TSC-low"; 602222866Sjkim if (bootverbose) 603222884Sjkim printf("TSC timecounter discards lower %d bit(s)\n", 604222866Sjkim shift); 605222866Sjkim } 606219461Sjkim if (tsc_freq != 0) { 607222866Sjkim tsc_timecounter.tc_frequency = tsc_freq >> shift; 608222866Sjkim tsc_timecounter.tc_priv = (void *)(intptr_t)shift; 60958377Sphk tc_init(&tsc_timecounter); 61033690Sphk } 6114Srgrimes} 612221703SjkimSYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); 6134Srgrimes 614167905Snjl/* 615167905Snjl * When cpufreq levels change, find out about the (new) max frequency. We 616167905Snjl * use this to update CPU accounting in case it got a lower estimate at boot. 617167905Snjl */ 618167905Snjlstatic void 619167905Snjltsc_levels_changed(void *arg, int unit) 620167905Snjl{ 621167905Snjl device_t cf_dev; 622167905Snjl struct cf_level *levels; 623167905Snjl int count, error; 624167905Snjl uint64_t max_freq; 625167905Snjl 626167905Snjl /* Only use values from the first CPU, assuming all are equal. */ 627167905Snjl if (unit != 0) 628167905Snjl return; 629167905Snjl 630167905Snjl /* Find the appropriate cpufreq device instance. */ 631167905Snjl cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); 632167905Snjl if (cf_dev == NULL) { 633167905Snjl printf("tsc_levels_changed() called but no cpufreq device?\n"); 634167905Snjl return; 635167905Snjl } 636167905Snjl 637167905Snjl /* Get settings from the device and find the max frequency. */ 638167905Snjl count = 64; 639167905Snjl levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); 640167905Snjl if (levels == NULL) 641167905Snjl return; 642167905Snjl error = CPUFREQ_LEVELS(cf_dev, levels, &count); 643167905Snjl if (error == 0 && count != 0) { 644167905Snjl max_freq = (uint64_t)levels[0].total_set.freq * 1000000; 645167905Snjl set_cputicker(rdtsc, max_freq, 1); 646167905Snjl } else 647167905Snjl printf("tsc_levels_changed: no max freq found\n"); 648167905Snjl free(levels, M_TEMP); 649167905Snjl} 650167905Snjl 651167905Snjl/* 652167905Snjl * If the TSC timecounter is in use, veto the pending change. It may be 653167905Snjl * possible in the future to handle a dynamically-changing timecounter rate. 654167905Snjl */ 655167905Snjlstatic void 656167905Snjltsc_freq_changing(void *arg, const struct cf_level *level, int *status) 657167905Snjl{ 658167905Snjl 659216274Sjkim if (*status != 0 || timecounter != &tsc_timecounter) 660167905Snjl return; 661167905Snjl 662167905Snjl printf("timecounter TSC must not be in use when " 663184102Sjkim "changing frequencies; change denied\n"); 664167905Snjl *status = EBUSY; 665167905Snjl} 666167905Snjl 667167905Snjl/* Update TSC freq with the value indicated by the caller. */ 668167905Snjlstatic void 669167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status) 670167905Snjl{ 671220433Sjkim uint64_t freq; 672216276Sjkim 673216276Sjkim /* If there was an error during the transition, don't do anything. */ 674219473Sjkim if (tsc_disabled || status != 0) 675167905Snjl return; 676167905Snjl 677167905Snjl /* Total setting for this level gives the new frequency in MHz. */ 678220433Sjkim freq = (uint64_t)level->total_set.freq * 1000000; 679220433Sjkim atomic_store_rel_64(&tsc_freq, freq); 680222866Sjkim tsc_timecounter.tc_frequency = 681222866Sjkim freq >> (int)(intptr_t)tsc_timecounter.tc_priv; 682167905Snjl} 683167905Snjl 68415508Sbdestatic int 68562573Sphksysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) 68615508Sbde{ 68715508Sbde int error; 688110039Sphk uint64_t freq; 68915508Sbde 690220433Sjkim freq = atomic_load_acq_64(&tsc_freq); 691220433Sjkim if (freq == 0) 69215508Sbde return (EOPNOTSUPP); 693217616Smdf error = sysctl_handle_64(oidp, &freq, 0, req); 694219700Sjkim if (error == 0 && req->newptr != NULL) { 695220433Sjkim atomic_store_rel_64(&tsc_freq, freq); 696222884Sjkim atomic_store_rel_64(&tsc_timecounter.tc_frequency, 697222884Sjkim freq >> (int)(intptr_t)tsc_timecounter.tc_priv); 698219700Sjkim } 69915508Sbde return (error); 70015508Sbde} 70115508Sbde 702217616SmdfSYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 703220613Sjkim 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); 70433690Sphk 705220632Sjkimstatic u_int 706222866Sjkimtsc_get_timecount(struct timecounter *tc __unused) 70733690Sphk{ 708220632Sjkim 709220632Sjkim return (rdtsc32()); 71033690Sphk} 711222866Sjkim 712238973Skibstatic inline u_int 713222884Sjkimtsc_get_timecount_low(struct timecounter *tc) 714222866Sjkim{ 715223211Sjkim uint32_t rv; 716222866Sjkim 717223211Sjkim __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 718238973Skib : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); 719223211Sjkim return (rv); 720222866Sjkim} 721237433Skib 722238973Skibstatic u_int 723238973Skibtsc_get_timecount_lfence(struct timecounter *tc __unused) 724238973Skib{ 725238973Skib 726238973Skib lfence(); 727238973Skib return (rdtsc32()); 728238973Skib} 729238973Skib 730238973Skibstatic u_int 731238973Skibtsc_get_timecount_low_lfence(struct timecounter *tc) 732238973Skib{ 733238973Skib 734238973Skib lfence(); 735238973Skib return (tsc_get_timecount_low(tc)); 736238973Skib} 737238973Skib 738238973Skibstatic u_int 739238973Skibtsc_get_timecount_mfence(struct timecounter *tc __unused) 740238973Skib{ 741238973Skib 742238973Skib mfence(); 743238973Skib return (rdtsc32()); 744238973Skib} 745238973Skib 746238973Skibstatic u_int 747238973Skibtsc_get_timecount_low_mfence(struct timecounter *tc) 748238973Skib{ 749238973Skib 750238973Skib mfence(); 751238973Skib return (tsc_get_timecount_low(tc)); 752238973Skib} 753238973Skib 754237433Skibuint32_t 755237433Skibcpu_fill_vdso_timehands(struct vdso_timehands *vdso_th) 756237433Skib{ 757237433Skib 758237433Skib vdso_th->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; 759237433Skib bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); 760237433Skib return (timecounter == &tsc_timecounter); 761237433Skib} 762237433Skib 763237433Skib#ifdef COMPAT_FREEBSD32 764237433Skibuint32_t 765237433Skibcpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) 766237433Skib{ 767237433Skib 768237433Skib vdso_th32->th_x86_shift = (int)(intptr_t)timecounter->tc_priv; 769237433Skib bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); 770237433Skib return (timecounter == &tsc_timecounter); 771237433Skib} 772237433Skib#endif 773