14Srgrimes/*- 2110379Sphk * Copyright (c) 1998-2003 Poul-Henning Kamp 34Srgrimes * All rights reserved. 44Srgrimes * 54Srgrimes * Redistribution and use in source and binary forms, with or without 64Srgrimes * modification, are permitted provided that the following conditions 74Srgrimes * are met: 84Srgrimes * 1. Redistributions of source code must retain the above copyright 94Srgrimes * notice, this list of conditions and the following disclaimer. 104Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 114Srgrimes * notice, this list of conditions and the following disclaimer in the 124Srgrimes * documentation and/or other materials provided with the distribution. 134Srgrimes * 14110379Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 154Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 164Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17110379Sphk * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 184Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 194Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 204Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 214Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 224Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 234Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 244Srgrimes * SUCH DAMAGE. 254Srgrimes */ 264Srgrimes 27115683Sobrien#include <sys/cdefs.h> 28115683Sobrien__FBSDID("$FreeBSD: stable/11/sys/x86/x86/tsc.c 363433 2020-07-22 21:17:02Z jkim $"); 29115683Sobrien 30237433Skib#include "opt_compat.h" 3116299Spst#include "opt_clock.h" 3213228Swollman 332056Swollman#include <sys/param.h> 34167905Snjl#include <sys/bus.h> 35167905Snjl#include <sys/cpu.h> 36221214Sjkim#include <sys/limits.h> 37167905Snjl#include <sys/malloc.h> 382056Swollman#include <sys/systm.h> 39113348Sdes#include <sys/sysctl.h> 402056Swollman#include <sys/time.h> 4158377Sphk#include <sys/timetc.h> 422056Swollman#include <sys/kernel.h> 4385835Siwasaki#include <sys/power.h> 44113348Sdes#include <sys/smp.h> 45237433Skib#include <sys/vdso.h> 464180Sbde#include <machine/clock.h> 47216272Sjkim#include <machine/cputypes.h> 4832054Sphk#include <machine/md_var.h> 4932054Sphk#include <machine/specialreg.h> 50273800Sjhb#include <x86/vmware.h> 51305866Skib#include <dev/acpica/acpi_hpet.h> 5215508Sbde 53167905Snjl#include "cpufreq_if.h" 54167905Snjl 55216163Sjkimuint64_t tsc_freq; 56184102Sjkimint tsc_is_invariant; 57220579Sjkimint tsc_perf_stat; 58220579Sjkim 59167905Snjlstatic eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; 601390Ssos 61184102SjkimSYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, 62184108Sjkim &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); 63184102Sjkim 64113348Sdes#ifdef SMP 65249324Sneelint smp_tsc; 66121307SsilbySYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, 67113348Sdes "Indicates whether the TSC is safe to use in SMP mode"); 68249625Smav 69249625Smavint smp_tsc_adjust = 0; 70249625SmavSYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, 71249625Smav &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP"); 72113348Sdes#endif 73113348Sdes 74246212Skibstatic int tsc_shift = 1; 75246212SkibSYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, 76246212Skib &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); 77246212Skib 78219473Sjkimstatic int tsc_disabled; 79219473SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, 80219473Sjkim "Disable x86 Time Stamp Counter"); 81219473Sjkim 82220577Sjkimstatic int tsc_skip_calibration; 83220577SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, 84220577Sjkim &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); 85220577Sjkim 86167905Snjlstatic void tsc_freq_changed(void *arg, const struct cf_level *level, 87167905Snjl int status); 88167905Snjlstatic void tsc_freq_changing(void *arg, const struct cf_level *level, 89167905Snjl int *status); 90222866Sjkimstatic unsigned tsc_get_timecount(struct timecounter *tc); 91238973Skibstatic inline unsigned tsc_get_timecount_low(struct timecounter *tc); 92238973Skibstatic unsigned tsc_get_timecount_lfence(struct timecounter *tc); 93238973Skibstatic unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); 94238973Skibstatic unsigned tsc_get_timecount_mfence(struct timecounter *tc); 95238973Skibstatic unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); 96167905Snjlstatic void tsc_levels_changed(void *arg, int unit); 97305866Skibstatic uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, 98305866Skib struct timecounter *tc); 99305866Skib#ifdef COMPAT_FREEBSD32 100305866Skibstatic uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, 101305866Skib struct timecounter *tc); 102305866Skib#endif 10317353Sbde 10440610Sphkstatic struct timecounter tsc_timecounter = { 105305866Skib .tc_get_timecount = tsc_get_timecount, 106305866Skib .tc_counter_mask = ~0u, 107305866Skib .tc_name = "TSC", 108305866Skib .tc_quality = 800, /* adjusted in code */ 109305866Skib .tc_fill_vdso_timehands = x86_tsc_vdso_timehands, 110305866Skib#ifdef COMPAT_FREEBSD32 111305866Skib .tc_fill_vdso_timehands32 = x86_tsc_vdso_timehands32, 112305866Skib#endif 11333690Sphk}; 11433690Sphk 115273800Sjhbstatic void 116221214Sjkimtsc_freq_vmware(void) 117221214Sjkim{ 118221214Sjkim u_int regs[4]; 119221214Sjkim 120221214Sjkim if (hv_high >= 0x40000010) { 121221214Sjkim do_cpuid(0x40000010, regs); 122221214Sjkim tsc_freq = regs[0] * 1000; 123221214Sjkim } else { 124221214Sjkim vmware_hvcall(VMW_HVCMD_GETHZ, regs); 125221214Sjkim if (regs[1] != UINT_MAX) 126221214Sjkim tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); 127221214Sjkim } 128221214Sjkim tsc_is_invariant = 1; 129221214Sjkim} 130221214Sjkim 131333161Skib/* 132333161Skib * Calculate TSC frequency using information from the CPUID leaf 0x15 133353007Skib * 'Time Stamp Counter and Nominal Core Crystal Clock'. If leaf 0x15 134353007Skib * is not functional, as it is on Skylake/Kabylake, try 0x16 'Processor 135353007Skib * Frequency Information'. Leaf 0x16 is described in the SDM as 136353007Skib * informational only, but if 0x15 did not work, and TSC calibration 137353007Skib * is disabled, it is the best we can get at all. It should still be 138333161Skib * an improvement over the parsing of the CPU model name in 139333161Skib * tsc_freq_intel(), when available. 140333161Skib */ 141333161Skibstatic bool 142333161Skibtsc_freq_cpuid(void) 143333161Skib{ 144333161Skib u_int regs[4]; 145333161Skib 146333161Skib if (cpu_high < 0x15) 147333161Skib return (false); 148333161Skib do_cpuid(0x15, regs); 149353007Skib if (regs[0] != 0 && regs[1] != 0 && regs[2] != 0) { 150353007Skib tsc_freq = (uint64_t)regs[2] * regs[1] / regs[0]; 151353007Skib return (true); 152353007Skib } 153353007Skib 154353007Skib if (cpu_high < 0x16) 155333161Skib return (false); 156353007Skib do_cpuid(0x16, regs); 157353007Skib if (regs[0] != 0) { 158353007Skib tsc_freq = (uint64_t)regs[0] * 1000000; 159353007Skib return (true); 160353007Skib } 161353007Skib 162353007Skib return (false); 163333161Skib} 164333161Skib 165220577Sjkimstatic void 166220577Sjkimtsc_freq_intel(void) 1671390Ssos{ 168220577Sjkim char brand[48]; 169220577Sjkim u_int regs[4]; 170220577Sjkim uint64_t freq; 171220577Sjkim char *p; 172220577Sjkim u_int i; 1731390Ssos 174220577Sjkim /* 175220577Sjkim * Intel Processor Identification and the CPUID Instruction 176220577Sjkim * Application Note 485. 177220577Sjkim * http://www.intel.com/assets/pdf/appnote/241618.pdf 178220577Sjkim */ 179220577Sjkim if (cpu_exthigh >= 0x80000004) { 180220577Sjkim p = brand; 181220577Sjkim for (i = 0x80000002; i < 0x80000005; i++) { 182220577Sjkim do_cpuid(i, regs); 183220577Sjkim memcpy(p, regs, sizeof(regs)); 184220577Sjkim p += sizeof(regs); 185220577Sjkim } 186220577Sjkim p = NULL; 187220577Sjkim for (i = 0; i < sizeof(brand) - 1; i++) 188220577Sjkim if (brand[i] == 'H' && brand[i + 1] == 'z') 189220577Sjkim p = brand + i; 190220577Sjkim if (p != NULL) { 191220577Sjkim p -= 5; 192220577Sjkim switch (p[4]) { 193220577Sjkim case 'M': 194220577Sjkim i = 1; 195220577Sjkim break; 196220577Sjkim case 'G': 197220577Sjkim i = 1000; 198220577Sjkim break; 199220577Sjkim case 'T': 200220577Sjkim i = 1000000; 201220577Sjkim break; 202220577Sjkim default: 203220577Sjkim return; 204220577Sjkim } 205220577Sjkim#define C2D(c) ((c) - '0') 206220577Sjkim if (p[1] == '.') { 207220577Sjkim freq = C2D(p[0]) * 1000; 208220577Sjkim freq += C2D(p[2]) * 100; 209220577Sjkim freq += C2D(p[3]) * 10; 210220577Sjkim freq *= i * 1000; 211220577Sjkim } else { 212220577Sjkim freq = C2D(p[0]) * 1000; 213220577Sjkim freq += C2D(p[1]) * 100; 214220577Sjkim freq += C2D(p[2]) * 10; 215220577Sjkim freq += C2D(p[3]); 216220577Sjkim freq *= i * 1000000; 217220577Sjkim } 218220577Sjkim#undef C2D 219220577Sjkim tsc_freq = freq; 220220577Sjkim } 221220577Sjkim } 222220577Sjkim} 22332054Sphk 224220577Sjkimstatic void 225220577Sjkimprobe_tsc_freq(void) 226220577Sjkim{ 227220579Sjkim u_int regs[4]; 228220577Sjkim uint64_t tsc1, tsc2; 22915508Sbde 230221214Sjkim if (cpu_high >= 6) { 231221214Sjkim do_cpuid(6, regs); 232221214Sjkim if ((regs[2] & CPUID_PERF_STAT) != 0) { 233221214Sjkim /* 234221214Sjkim * XXX Some emulators expose host CPUID without actual 235221214Sjkim * support for these MSRs. We must test whether they 236221214Sjkim * really work. 237221214Sjkim */ 238221214Sjkim wrmsr(MSR_MPERF, 0); 239221214Sjkim wrmsr(MSR_APERF, 0); 240221214Sjkim DELAY(10); 241221214Sjkim if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) 242221214Sjkim tsc_perf_stat = 1; 243221214Sjkim } 244221214Sjkim } 245221214Sjkim 246273800Sjhb if (vm_guest == VM_GUEST_VMWARE) { 247273800Sjhb tsc_freq_vmware(); 248221214Sjkim return; 249273800Sjhb } 250221214Sjkim 251216272Sjkim switch (cpu_vendor_id) { 252216272Sjkim case CPU_VENDOR_AMD: 253219469Sjkim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 254219469Sjkim (vm_guest == VM_GUEST_NO && 255219469Sjkim CPUID_TO_FAMILY(cpu_id) >= 0x10)) 256216272Sjkim tsc_is_invariant = 1; 257238973Skib if (cpu_feature & CPUID_SSE2) { 258238973Skib tsc_timecounter.tc_get_timecount = 259238973Skib tsc_get_timecount_mfence; 260238973Skib } 261216272Sjkim break; 262216272Sjkim case CPU_VENDOR_INTEL: 263219469Sjkim if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || 264219469Sjkim (vm_guest == VM_GUEST_NO && 265219469Sjkim ((CPUID_TO_FAMILY(cpu_id) == 0x6 && 266216272Sjkim CPUID_TO_MODEL(cpu_id) >= 0xe) || 267216272Sjkim (CPUID_TO_FAMILY(cpu_id) == 0xf && 268219469Sjkim CPUID_TO_MODEL(cpu_id) >= 0x3)))) 269216272Sjkim tsc_is_invariant = 1; 270238973Skib if (cpu_feature & CPUID_SSE2) { 271238973Skib tsc_timecounter.tc_get_timecount = 272238973Skib tsc_get_timecount_lfence; 273238973Skib } 274216272Sjkim break; 275216272Sjkim case CPU_VENDOR_CENTAUR: 276219469Sjkim if (vm_guest == VM_GUEST_NO && 277219469Sjkim CPUID_TO_FAMILY(cpu_id) == 0x6 && 278216272Sjkim CPUID_TO_MODEL(cpu_id) >= 0xf && 279216272Sjkim (rdmsr(0x1203) & 0x100000000ULL) == 0) 280216272Sjkim tsc_is_invariant = 1; 281238973Skib if (cpu_feature & CPUID_SSE2) { 282238973Skib tsc_timecounter.tc_get_timecount = 283238973Skib tsc_get_timecount_lfence; 284238973Skib } 285216272Sjkim break; 286216272Sjkim } 287216272Sjkim 288220577Sjkim if (tsc_skip_calibration) { 289333161Skib if (tsc_freq_cpuid()) 290333161Skib ; 291333161Skib else if (cpu_vendor_id == CPU_VENDOR_INTEL) 292220577Sjkim tsc_freq_intel(); 293333161Skib } else { 294333161Skib if (bootverbose) 295333161Skib printf("Calibrating TSC clock ... "); 296333161Skib tsc1 = rdtsc(); 297333161Skib DELAY(1000000); 298333161Skib tsc2 = rdtsc(); 299333161Skib tsc_freq = tsc2 - tsc1; 300220577Sjkim } 301220577Sjkim if (bootverbose) 302220577Sjkim printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); 303220577Sjkim} 304220577Sjkim 305220577Sjkimvoid 306220577Sjkiminit_TSC(void) 307220577Sjkim{ 308220577Sjkim 309220577Sjkim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 310220577Sjkim return; 311220577Sjkim 312271082Sjhb#ifdef __i386__ 313271082Sjhb /* The TSC is known to be broken on certain CPUs. */ 314271082Sjhb switch (cpu_vendor_id) { 315271082Sjhb case CPU_VENDOR_AMD: 316271082Sjhb switch (cpu_id & 0xFF0) { 317271082Sjhb case 0x500: 318271082Sjhb /* K5 Model 0 */ 319271082Sjhb return; 320271082Sjhb } 321271082Sjhb break; 322271082Sjhb case CPU_VENDOR_CENTAUR: 323271082Sjhb switch (cpu_id & 0xff0) { 324271082Sjhb case 0x540: 325271082Sjhb /* 326271082Sjhb * http://www.centtech.com/c6_data_sheet.pdf 327271082Sjhb * 328271082Sjhb * I-12 RDTSC may return incoherent values in EDX:EAX 329271082Sjhb * I-13 RDTSC hangs when certain event counters are used 330271082Sjhb */ 331271082Sjhb return; 332271082Sjhb } 333271082Sjhb break; 334271082Sjhb case CPU_VENDOR_NSC: 335271082Sjhb switch (cpu_id & 0xff0) { 336271082Sjhb case 0x540: 337271082Sjhb if ((cpu_id & CPUID_STEPPING) == 0) 338271082Sjhb return; 339271082Sjhb break; 340271082Sjhb } 341271082Sjhb break; 342271082Sjhb } 343271082Sjhb#endif 344271082Sjhb 345220577Sjkim probe_tsc_freq(); 346220577Sjkim 347167905Snjl /* 348216274Sjkim * Inform CPU accounting about our boot-time clock rate. This will 349216274Sjkim * be updated if someone loads a cpufreq driver after boot that 350216274Sjkim * discovers a new max frequency. 351167905Snjl */ 352220577Sjkim if (tsc_freq != 0) 353221178Sjkim set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); 354167905Snjl 355216274Sjkim if (tsc_is_invariant) 356216274Sjkim return; 357216274Sjkim 358167905Snjl /* Register to find out about changes in CPU frequency. */ 359184108Sjkim tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, 360184108Sjkim tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); 361167905Snjl tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, 362167905Snjl tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); 363167905Snjl tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, 364167905Snjl tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); 365118550Sphk} 36634617Sphk 367221703Sjkim#ifdef SMP 368221703Sjkim 369238973Skib/* 370238973Skib * RDTSC is not a serializing instruction, and does not drain 371238973Skib * instruction stream, so we need to drain the stream before executing 372238973Skib * it. It could be fixed by use of RDTSCP, except the instruction is 373238973Skib * not available everywhere. 374238973Skib * 375238973Skib * Use CPUID for draining in the boot-time SMP constistency test. The 376238973Skib * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel 377238973Skib * and VIA) when SSE2 is present, and nothing on older machines which 378238973Skib * also do not issue RDTSC prematurely. There, testing for SSE2 and 379238975Skib * vendor is too cumbersome, and we learn about TSC presence from CPUID. 380238973Skib * 381238973Skib * Do not use do_cpuid(), since we do not need CPUID results, which 382238973Skib * have to be written into memory with do_cpuid(). 383238973Skib */ 384238973Skib#define TSC_READ(x) \ 385238973Skibstatic void \ 386238973Skibtsc_read_##x(void *arg) \ 387238973Skib{ \ 388239133Sjimharris uint64_t *tsc = arg; \ 389238973Skib u_int cpu = PCPU_GET(cpuid); \ 390238973Skib \ 391238973Skib __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx"); \ 392239133Sjimharris tsc[cpu * 3 + x] = rdtsc(); \ 393221703Sjkim} 394221703SjkimTSC_READ(0) 395221703SjkimTSC_READ(1) 396221703SjkimTSC_READ(2) 397221703Sjkim#undef TSC_READ 398221703Sjkim 399221703Sjkim#define N 1000 400221703Sjkim 401221703Sjkimstatic void 402221703Sjkimcomp_smp_tsc(void *arg) 403221703Sjkim{ 404239133Sjimharris uint64_t *tsc; 405239133Sjimharris int64_t d1, d2; 406221703Sjkim u_int cpu = PCPU_GET(cpuid); 407221703Sjkim u_int i, j, size; 408221703Sjkim 409221703Sjkim size = (mp_maxid + 1) * 3; 410221703Sjkim for (i = 0, tsc = arg; i < N; i++, tsc += size) 411221703Sjkim CPU_FOREACH(j) { 412221703Sjkim if (j == cpu) 413221703Sjkim continue; 414221703Sjkim d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; 415221703Sjkim d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; 416221703Sjkim if (d1 <= 0 || d2 <= 0) { 417221703Sjkim smp_tsc = 0; 418221703Sjkim return; 419221703Sjkim } 420221703Sjkim } 421221703Sjkim} 422221703Sjkim 423249625Smavstatic void 424249625Smavadj_smp_tsc(void *arg) 425249625Smav{ 426249625Smav uint64_t *tsc; 427249625Smav int64_t d, min, max; 428249625Smav u_int cpu = PCPU_GET(cpuid); 429249625Smav u_int first, i, size; 430249625Smav 431249625Smav first = CPU_FIRST(); 432249625Smav if (cpu == first) 433249625Smav return; 434249625Smav min = INT64_MIN; 435249625Smav max = INT64_MAX; 436249625Smav size = (mp_maxid + 1) * 3; 437249625Smav for (i = 0, tsc = arg; i < N; i++, tsc += size) { 438249625Smav d = tsc[first * 3] - tsc[cpu * 3 + 1]; 439249625Smav if (d > min) 440249625Smav min = d; 441249625Smav d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; 442249625Smav if (d > min) 443249625Smav min = d; 444249625Smav d = tsc[first * 3 + 1] - tsc[cpu * 3]; 445249625Smav if (d < max) 446249625Smav max = d; 447249625Smav d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; 448249625Smav if (d < max) 449249625Smav max = d; 450249625Smav } 451249625Smav if (min > max) 452249625Smav return; 453249625Smav d = min / 2 + max / 2; 454249625Smav __asm __volatile ( 455249625Smav "movl $0x10, %%ecx\n\t" 456249625Smav "rdmsr\n\t" 457249625Smav "addl %%edi, %%eax\n\t" 458249625Smav "adcl %%esi, %%edx\n\t" 459249625Smav "wrmsr\n" 460249625Smav : /* No output */ 461249625Smav : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32)) 462249625Smav : "ax", "cx", "dx", "cc" 463249625Smav ); 464249625Smav} 465249625Smav 466221703Sjkimstatic int 467335657Savgtest_tsc(int adj_max_count) 468221703Sjkim{ 469239133Sjimharris uint64_t *data, *tsc; 470249625Smav u_int i, size, adj; 471221703Sjkim 472246212Skib if ((!smp_tsc && !tsc_is_invariant) || vm_guest) 473221703Sjkim return (-100); 474221703Sjkim size = (mp_maxid + 1) * 3; 475221703Sjkim data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); 476249625Smav adj = 0; 477249625Smavretry: 478221703Sjkim for (i = 0, tsc = data; i < N; i++, tsc += size) 479221703Sjkim smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); 480221703Sjkim smp_tsc = 1; /* XXX */ 481328386Spkelsey smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc, 482328386Spkelsey smp_no_rendezvous_barrier, data); 483335657Savg if (!smp_tsc && adj < adj_max_count) { 484249625Smav adj++; 485328386Spkelsey smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc, 486328386Spkelsey smp_no_rendezvous_barrier, data); 487249625Smav goto retry; 488249625Smav } 489221703Sjkim free(data, M_TEMP); 490221703Sjkim if (bootverbose) 491249625Smav printf("SMP: %sed TSC synchronization test%s\n", 492249625Smav smp_tsc ? "pass" : "fail", 493249625Smav adj > 0 ? " after adjustment" : ""); 494222869Sjkim if (smp_tsc && tsc_is_invariant) { 495222869Sjkim switch (cpu_vendor_id) { 496222869Sjkim case CPU_VENDOR_AMD: 497222869Sjkim /* 498363433Sjkim * Processor Programming Reference (PPR) for AMD 499363433Sjkim * Family 17h states that the TSC uses a common 500363433Sjkim * reference for all sockets, cores and threads. 501363433Sjkim */ 502363433Sjkim if (CPUID_TO_FAMILY(cpu_id) >= 0x17) 503363433Sjkim return (1000); 504363433Sjkim /* 505222869Sjkim * Starting with Family 15h processors, TSC clock 506222869Sjkim * source is in the north bridge. Check whether 507222869Sjkim * we have a single-socket/multi-core platform. 508222869Sjkim * XXX Need more work for complex cases. 509222869Sjkim */ 510222869Sjkim if (CPUID_TO_FAMILY(cpu_id) < 0x15 || 511222869Sjkim (amd_feature2 & AMDID2_CMP) == 0 || 512222869Sjkim smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) 513222869Sjkim break; 514222869Sjkim return (1000); 515222869Sjkim case CPU_VENDOR_INTEL: 516222869Sjkim /* 517222869Sjkim * XXX Assume Intel platforms have synchronized TSCs. 518222869Sjkim */ 519222869Sjkim return (1000); 520222869Sjkim } 521222869Sjkim return (800); 522222869Sjkim } 523222869Sjkim return (-100); 524221703Sjkim} 525221703Sjkim 526221703Sjkim#undef N 527221703Sjkim 528221703Sjkim#endif /* SMP */ 529221703Sjkim 530221703Sjkimstatic void 531118550Sphkinit_TSC_tc(void) 532118550Sphk{ 533222866Sjkim uint64_t max_freq; 534222866Sjkim int shift; 535209103Smav 536219673Sjkim if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) 537209103Smav return; 538209103Smav 53934617Sphk /* 540222866Sjkim * Limit timecounter frequency to fit in an int and prevent it from 541222866Sjkim * overflowing too fast. 542222866Sjkim */ 543222866Sjkim max_freq = UINT_MAX; 544222866Sjkim 545222866Sjkim /* 546160964Syar * We can not use the TSC if we support APM. Precise timekeeping 54749186Smsmith * on an APM'ed machine is at best a fools pursuit, since 54834617Sphk * any and all of the time spent in various SMM code can't 54934617Sphk * be reliably accounted for. Reading the RTC is your only 550160964Syar * source of reliable time info. The i8254 loses too, of course, 55134617Sphk * but we need to have some kind of time... 55249186Smsmith * We don't know at this point whether APM is going to be used 55349186Smsmith * or not, nor when it might be activated. Play it safe. 55434617Sphk */ 55585835Siwasaki if (power_pm_get_type() == POWER_PM_TYPE_APM) { 556118987Sphk tsc_timecounter.tc_quality = -1000; 55785835Siwasaki if (bootverbose) 558110370Sphk printf("TSC timecounter disabled: APM enabled.\n"); 559221703Sjkim goto init; 56064031Sphk } 56134617Sphk 562223426Sjkim /* 563276724Sjhb * Intel CPUs without a C-state invariant TSC can stop the TSC 564277900Sjhb * in either C2 or C3. Disable use of C2 and C3 while using 565277900Sjhb * the TSC as the timecounter. The timecounter can be changed 566277900Sjhb * to enable C2 and C3. 567277900Sjhb * 568277900Sjhb * Note that the TSC is used as the cputicker for computing 569277900Sjhb * thread runtime regardless of the timecounter setting, so 570277900Sjhb * using an alternate timecounter and enabling C2 or C3 can 571277900Sjhb * result incorrect runtimes for kernel idle threads (but not 572277900Sjhb * for any non-idle threads). 573223426Sjkim */ 574314999Skib if (cpu_vendor_id == CPU_VENDOR_INTEL && 575223426Sjkim (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { 576276724Sjhb tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP; 577223426Sjkim if (bootverbose) 578277900Sjhb printf("TSC timecounter disables C2 and C3.\n"); 579223426Sjkim } 580223426Sjkim 581118987Sphk /* 582246212Skib * We can not use the TSC in SMP mode unless the TSCs on all CPUs 583246212Skib * are synchronized. If the user is sure that the system has 584246212Skib * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a 585246212Skib * non-zero value. The TSC seems unreliable in virtualized SMP 586225069Ssilby * environments, so it is set to a negative quality in those cases. 587118987Sphk */ 588335657Savg#ifdef SMP 589246212Skib if (mp_ncpus > 1) 590335657Savg tsc_timecounter.tc_quality = test_tsc(smp_tsc_adjust); 591335657Savg else 592335657Savg#endif /* SMP */ 593335657Savg if (tsc_is_invariant) 594222869Sjkim tsc_timecounter.tc_quality = 1000; 595246212Skib max_freq >>= tsc_shift; 596222869Sjkim 597221703Sjkiminit: 598246212Skib for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) 599222866Sjkim ; 600246212Skib if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { 601246212Skib if (cpu_vendor_id == CPU_VENDOR_AMD) { 602246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 603246212Skib tsc_get_timecount_low_mfence : 604246212Skib tsc_get_timecount_mfence; 605246212Skib } else { 606246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 607246212Skib tsc_get_timecount_low_lfence : 608246212Skib tsc_get_timecount_lfence; 609246212Skib } 610246212Skib } else { 611246212Skib tsc_timecounter.tc_get_timecount = shift > 0 ? 612246212Skib tsc_get_timecount_low : tsc_get_timecount; 613246212Skib } 614222866Sjkim if (shift > 0) { 615222866Sjkim tsc_timecounter.tc_name = "TSC-low"; 616222866Sjkim if (bootverbose) 617222884Sjkim printf("TSC timecounter discards lower %d bit(s)\n", 618222866Sjkim shift); 619222866Sjkim } 620219461Sjkim if (tsc_freq != 0) { 621222866Sjkim tsc_timecounter.tc_frequency = tsc_freq >> shift; 622222866Sjkim tsc_timecounter.tc_priv = (void *)(intptr_t)shift; 62358377Sphk tc_init(&tsc_timecounter); 62433690Sphk } 6254Srgrimes} 626221703SjkimSYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); 6274Srgrimes 628335657Savgvoid 629335657Savgresume_TSC(void) 630335657Savg{ 631335657Savg#ifdef SMP 632335657Savg int quality; 633335657Savg 634335657Savg /* If TSC was not good on boot, it is unlikely to become good now. */ 635335657Savg if (tsc_timecounter.tc_quality < 0) 636335657Savg return; 637335657Savg /* Nothing to do with UP. */ 638335657Savg if (mp_ncpus < 2) 639335657Savg return; 640335657Savg 641335657Savg /* 642335657Savg * If TSC was good, a single synchronization should be enough, 643335657Savg * but honour smp_tsc_adjust if it's set. 644335657Savg */ 645335657Savg quality = test_tsc(MAX(smp_tsc_adjust, 1)); 646335657Savg if (quality != tsc_timecounter.tc_quality) { 647335657Savg printf("TSC timecounter quality changed: %d -> %d\n", 648335657Savg tsc_timecounter.tc_quality, quality); 649335657Savg tsc_timecounter.tc_quality = quality; 650335657Savg } 651335657Savg#endif /* SMP */ 652335657Savg} 653335657Savg 654167905Snjl/* 655167905Snjl * When cpufreq levels change, find out about the (new) max frequency. We 656167905Snjl * use this to update CPU accounting in case it got a lower estimate at boot. 657167905Snjl */ 658167905Snjlstatic void 659167905Snjltsc_levels_changed(void *arg, int unit) 660167905Snjl{ 661167905Snjl device_t cf_dev; 662167905Snjl struct cf_level *levels; 663167905Snjl int count, error; 664167905Snjl uint64_t max_freq; 665167905Snjl 666167905Snjl /* Only use values from the first CPU, assuming all are equal. */ 667167905Snjl if (unit != 0) 668167905Snjl return; 669167905Snjl 670167905Snjl /* Find the appropriate cpufreq device instance. */ 671167905Snjl cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); 672167905Snjl if (cf_dev == NULL) { 673167905Snjl printf("tsc_levels_changed() called but no cpufreq device?\n"); 674167905Snjl return; 675167905Snjl } 676167905Snjl 677167905Snjl /* Get settings from the device and find the max frequency. */ 678167905Snjl count = 64; 679167905Snjl levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); 680167905Snjl if (levels == NULL) 681167905Snjl return; 682167905Snjl error = CPUFREQ_LEVELS(cf_dev, levels, &count); 683167905Snjl if (error == 0 && count != 0) { 684167905Snjl max_freq = (uint64_t)levels[0].total_set.freq * 1000000; 685167905Snjl set_cputicker(rdtsc, max_freq, 1); 686167905Snjl } else 687167905Snjl printf("tsc_levels_changed: no max freq found\n"); 688167905Snjl free(levels, M_TEMP); 689167905Snjl} 690167905Snjl 691167905Snjl/* 692167905Snjl * If the TSC timecounter is in use, veto the pending change. It may be 693167905Snjl * possible in the future to handle a dynamically-changing timecounter rate. 694167905Snjl */ 695167905Snjlstatic void 696167905Snjltsc_freq_changing(void *arg, const struct cf_level *level, int *status) 697167905Snjl{ 698167905Snjl 699216274Sjkim if (*status != 0 || timecounter != &tsc_timecounter) 700167905Snjl return; 701167905Snjl 702167905Snjl printf("timecounter TSC must not be in use when " 703184102Sjkim "changing frequencies; change denied\n"); 704167905Snjl *status = EBUSY; 705167905Snjl} 706167905Snjl 707167905Snjl/* Update TSC freq with the value indicated by the caller. */ 708167905Snjlstatic void 709167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status) 710167905Snjl{ 711220433Sjkim uint64_t freq; 712216276Sjkim 713216276Sjkim /* If there was an error during the transition, don't do anything. */ 714219473Sjkim if (tsc_disabled || status != 0) 715167905Snjl return; 716167905Snjl 717167905Snjl /* Total setting for this level gives the new frequency in MHz. */ 718220433Sjkim freq = (uint64_t)level->total_set.freq * 1000000; 719220433Sjkim atomic_store_rel_64(&tsc_freq, freq); 720222866Sjkim tsc_timecounter.tc_frequency = 721222866Sjkim freq >> (int)(intptr_t)tsc_timecounter.tc_priv; 722167905Snjl} 723167905Snjl 72415508Sbdestatic int 72562573Sphksysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) 72615508Sbde{ 72715508Sbde int error; 728110039Sphk uint64_t freq; 72915508Sbde 730220433Sjkim freq = atomic_load_acq_64(&tsc_freq); 731220433Sjkim if (freq == 0) 73215508Sbde return (EOPNOTSUPP); 733217616Smdf error = sysctl_handle_64(oidp, &freq, 0, req); 734219700Sjkim if (error == 0 && req->newptr != NULL) { 735220433Sjkim atomic_store_rel_64(&tsc_freq, freq); 736222884Sjkim atomic_store_rel_64(&tsc_timecounter.tc_frequency, 737222884Sjkim freq >> (int)(intptr_t)tsc_timecounter.tc_priv); 738219700Sjkim } 73915508Sbde return (error); 74015508Sbde} 74115508Sbde 742217616SmdfSYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 743220613Sjkim 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); 74433690Sphk 745220632Sjkimstatic u_int 746222866Sjkimtsc_get_timecount(struct timecounter *tc __unused) 74733690Sphk{ 748220632Sjkim 749220632Sjkim return (rdtsc32()); 75033690Sphk} 751222866Sjkim 752238973Skibstatic inline u_int 753222884Sjkimtsc_get_timecount_low(struct timecounter *tc) 754222866Sjkim{ 755223211Sjkim uint32_t rv; 756222866Sjkim 757223211Sjkim __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" 758238973Skib : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); 759223211Sjkim return (rv); 760222866Sjkim} 761237433Skib 762238973Skibstatic u_int 763238973Skibtsc_get_timecount_lfence(struct timecounter *tc __unused) 764238973Skib{ 765238973Skib 766238973Skib lfence(); 767238973Skib return (rdtsc32()); 768238973Skib} 769238973Skib 770238973Skibstatic u_int 771238973Skibtsc_get_timecount_low_lfence(struct timecounter *tc) 772238973Skib{ 773238973Skib 774238973Skib lfence(); 775238973Skib return (tsc_get_timecount_low(tc)); 776238973Skib} 777238973Skib 778238973Skibstatic u_int 779238973Skibtsc_get_timecount_mfence(struct timecounter *tc __unused) 780238973Skib{ 781238973Skib 782238973Skib mfence(); 783238973Skib return (rdtsc32()); 784238973Skib} 785238973Skib 786238973Skibstatic u_int 787238973Skibtsc_get_timecount_low_mfence(struct timecounter *tc) 788238973Skib{ 789238973Skib 790238973Skib mfence(); 791238973Skib return (tsc_get_timecount_low(tc)); 792238973Skib} 793238973Skib 794305866Skibstatic uint32_t 795305866Skibx86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) 796237433Skib{ 797237433Skib 798305866Skib vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC; 799277406Sneel vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv; 800305866Skib vdso_th->th_x86_hpet_idx = 0xffffffff; 801237433Skib bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); 802305866Skib return (1); 803237433Skib} 804237433Skib 805237433Skib#ifdef COMPAT_FREEBSD32 806305866Skibstatic uint32_t 807305866Skibx86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, 808277406Sneel struct timecounter *tc) 809237433Skib{ 810237433Skib 811305866Skib vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC; 812277406Sneel vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv; 813305866Skib vdso_th32->th_x86_hpet_idx = 0xffffffff; 814237433Skib bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); 815305866Skib return (1); 816237433Skib} 817237433Skib#endif 818