1/*-
2 * Copyright (c) 1998-2003 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/sys/x86/x86/tsc.c 363433 2020-07-22 21:17:02Z jkim $");
29
30#include "opt_compat.h"
31#include "opt_clock.h"
32
33#include <sys/param.h>
34#include <sys/bus.h>
35#include <sys/cpu.h>
36#include <sys/limits.h>
37#include <sys/malloc.h>
38#include <sys/systm.h>
39#include <sys/sysctl.h>
40#include <sys/time.h>
41#include <sys/timetc.h>
42#include <sys/kernel.h>
43#include <sys/power.h>
44#include <sys/smp.h>
45#include <sys/vdso.h>
46#include <machine/clock.h>
47#include <machine/cputypes.h>
48#include <machine/md_var.h>
49#include <machine/specialreg.h>
50#include <x86/vmware.h>
51#include <dev/acpica/acpi_hpet.h>
52
53#include "cpufreq_if.h"
54
55uint64_t	tsc_freq;
56int		tsc_is_invariant;
57int		tsc_perf_stat;
58
59static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
60
61SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
62    &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
63
64#ifdef SMP
65int	smp_tsc;
66SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
67    "Indicates whether the TSC is safe to use in SMP mode");
68
69int	smp_tsc_adjust = 0;
70SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN,
71    &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP");
72#endif
73
74static int	tsc_shift = 1;
75SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN,
76    &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency");
77
78static int	tsc_disabled;
79SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
80    "Disable x86 Time Stamp Counter");
81
82static int	tsc_skip_calibration;
83SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
84    &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
85
86static void tsc_freq_changed(void *arg, const struct cf_level *level,
87    int status);
88static void tsc_freq_changing(void *arg, const struct cf_level *level,
89    int *status);
90static unsigned tsc_get_timecount(struct timecounter *tc);
91static inline unsigned tsc_get_timecount_low(struct timecounter *tc);
92static unsigned tsc_get_timecount_lfence(struct timecounter *tc);
93static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc);
94static unsigned tsc_get_timecount_mfence(struct timecounter *tc);
95static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc);
96static void tsc_levels_changed(void *arg, int unit);
97static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th,
98    struct timecounter *tc);
99#ifdef COMPAT_FREEBSD32
100static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
101    struct timecounter *tc);
102#endif
103
104static struct timecounter tsc_timecounter = {
105	.tc_get_timecount =		tsc_get_timecount,
106	.tc_counter_mask =		~0u,
107	.tc_name =			"TSC",
108	.tc_quality =			800,	/* adjusted in code */
109	.tc_fill_vdso_timehands = 	x86_tsc_vdso_timehands,
110#ifdef COMPAT_FREEBSD32
111	.tc_fill_vdso_timehands32 = 	x86_tsc_vdso_timehands32,
112#endif
113};
114
115static void
116tsc_freq_vmware(void)
117{
118	u_int regs[4];
119
120	if (hv_high >= 0x40000010) {
121		do_cpuid(0x40000010, regs);
122		tsc_freq = regs[0] * 1000;
123	} else {
124		vmware_hvcall(VMW_HVCMD_GETHZ, regs);
125		if (regs[1] != UINT_MAX)
126			tsc_freq = regs[0] | ((uint64_t)regs[1] << 32);
127	}
128	tsc_is_invariant = 1;
129}
130
131/*
132 * Calculate TSC frequency using information from the CPUID leaf 0x15
133 * 'Time Stamp Counter and Nominal Core Crystal Clock'.  If leaf 0x15
134 * is not functional, as it is on Skylake/Kabylake, try 0x16 'Processor
135 * Frequency Information'.  Leaf 0x16 is described in the SDM as
136 * informational only, but if 0x15 did not work, and TSC calibration
137 * is disabled, it is the best we can get at all.  It should still be
138 * an improvement over the parsing of the CPU model name in
139 * tsc_freq_intel(), when available.
140 */
141static bool
142tsc_freq_cpuid(void)
143{
144	u_int regs[4];
145
146	if (cpu_high < 0x15)
147		return (false);
148	do_cpuid(0x15, regs);
149	if (regs[0] != 0 && regs[1] != 0 && regs[2] != 0) {
150		tsc_freq = (uint64_t)regs[2] * regs[1] / regs[0];
151		return (true);
152	}
153
154	if (cpu_high < 0x16)
155		return (false);
156	do_cpuid(0x16, regs);
157	if (regs[0] != 0) {
158		tsc_freq = (uint64_t)regs[0] * 1000000;
159		return (true);
160	}
161
162	return (false);
163}
164
165static void
166tsc_freq_intel(void)
167{
168	char brand[48];
169	u_int regs[4];
170	uint64_t freq;
171	char *p;
172	u_int i;
173
174	/*
175	 * Intel Processor Identification and the CPUID Instruction
176	 * Application Note 485.
177	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
178	 */
179	if (cpu_exthigh >= 0x80000004) {
180		p = brand;
181		for (i = 0x80000002; i < 0x80000005; i++) {
182			do_cpuid(i, regs);
183			memcpy(p, regs, sizeof(regs));
184			p += sizeof(regs);
185		}
186		p = NULL;
187		for (i = 0; i < sizeof(brand) - 1; i++)
188			if (brand[i] == 'H' && brand[i + 1] == 'z')
189				p = brand + i;
190		if (p != NULL) {
191			p -= 5;
192			switch (p[4]) {
193			case 'M':
194				i = 1;
195				break;
196			case 'G':
197				i = 1000;
198				break;
199			case 'T':
200				i = 1000000;
201				break;
202			default:
203				return;
204			}
205#define	C2D(c)	((c) - '0')
206			if (p[1] == '.') {
207				freq = C2D(p[0]) * 1000;
208				freq += C2D(p[2]) * 100;
209				freq += C2D(p[3]) * 10;
210				freq *= i * 1000;
211			} else {
212				freq = C2D(p[0]) * 1000;
213				freq += C2D(p[1]) * 100;
214				freq += C2D(p[2]) * 10;
215				freq += C2D(p[3]);
216				freq *= i * 1000000;
217			}
218#undef C2D
219			tsc_freq = freq;
220		}
221	}
222}
223
224static void
225probe_tsc_freq(void)
226{
227	u_int regs[4];
228	uint64_t tsc1, tsc2;
229
230	if (cpu_high >= 6) {
231		do_cpuid(6, regs);
232		if ((regs[2] & CPUID_PERF_STAT) != 0) {
233			/*
234			 * XXX Some emulators expose host CPUID without actual
235			 * support for these MSRs.  We must test whether they
236			 * really work.
237			 */
238			wrmsr(MSR_MPERF, 0);
239			wrmsr(MSR_APERF, 0);
240			DELAY(10);
241			if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
242				tsc_perf_stat = 1;
243		}
244	}
245
246	if (vm_guest == VM_GUEST_VMWARE) {
247		tsc_freq_vmware();
248		return;
249	}
250
251	switch (cpu_vendor_id) {
252	case CPU_VENDOR_AMD:
253		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
254		    (vm_guest == VM_GUEST_NO &&
255		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
256			tsc_is_invariant = 1;
257		if (cpu_feature & CPUID_SSE2) {
258			tsc_timecounter.tc_get_timecount =
259			    tsc_get_timecount_mfence;
260		}
261		break;
262	case CPU_VENDOR_INTEL:
263		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
264		    (vm_guest == VM_GUEST_NO &&
265		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
266		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
267		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
268		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
269			tsc_is_invariant = 1;
270		if (cpu_feature & CPUID_SSE2) {
271			tsc_timecounter.tc_get_timecount =
272			    tsc_get_timecount_lfence;
273		}
274		break;
275	case CPU_VENDOR_CENTAUR:
276		if (vm_guest == VM_GUEST_NO &&
277		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
278		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
279		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
280			tsc_is_invariant = 1;
281		if (cpu_feature & CPUID_SSE2) {
282			tsc_timecounter.tc_get_timecount =
283			    tsc_get_timecount_lfence;
284		}
285		break;
286	}
287
288	if (tsc_skip_calibration) {
289		if (tsc_freq_cpuid())
290			;
291		else if (cpu_vendor_id == CPU_VENDOR_INTEL)
292			tsc_freq_intel();
293	} else {
294		if (bootverbose)
295			printf("Calibrating TSC clock ... ");
296		tsc1 = rdtsc();
297		DELAY(1000000);
298		tsc2 = rdtsc();
299		tsc_freq = tsc2 - tsc1;
300	}
301	if (bootverbose)
302		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
303}
304
305void
306init_TSC(void)
307{
308
309	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
310		return;
311
312#ifdef __i386__
313	/* The TSC is known to be broken on certain CPUs. */
314	switch (cpu_vendor_id) {
315	case CPU_VENDOR_AMD:
316		switch (cpu_id & 0xFF0) {
317		case 0x500:
318			/* K5 Model 0 */
319			return;
320		}
321		break;
322	case CPU_VENDOR_CENTAUR:
323		switch (cpu_id & 0xff0) {
324		case 0x540:
325			/*
326			 * http://www.centtech.com/c6_data_sheet.pdf
327			 *
328			 * I-12 RDTSC may return incoherent values in EDX:EAX
329			 * I-13 RDTSC hangs when certain event counters are used
330			 */
331			return;
332		}
333		break;
334	case CPU_VENDOR_NSC:
335		switch (cpu_id & 0xff0) {
336		case 0x540:
337			if ((cpu_id & CPUID_STEPPING) == 0)
338				return;
339			break;
340		}
341		break;
342	}
343#endif
344
345	probe_tsc_freq();
346
347	/*
348	 * Inform CPU accounting about our boot-time clock rate.  This will
349	 * be updated if someone loads a cpufreq driver after boot that
350	 * discovers a new max frequency.
351	 */
352	if (tsc_freq != 0)
353		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
354
355	if (tsc_is_invariant)
356		return;
357
358	/* Register to find out about changes in CPU frequency. */
359	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
360	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
361	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
362	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
363	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
364	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
365}
366
367#ifdef SMP
368
369/*
370 * RDTSC is not a serializing instruction, and does not drain
371 * instruction stream, so we need to drain the stream before executing
372 * it.  It could be fixed by use of RDTSCP, except the instruction is
373 * not available everywhere.
374 *
375 * Use CPUID for draining in the boot-time SMP constistency test.  The
376 * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel
377 * and VIA) when SSE2 is present, and nothing on older machines which
378 * also do not issue RDTSC prematurely.  There, testing for SSE2 and
379 * vendor is too cumbersome, and we learn about TSC presence from CPUID.
380 *
381 * Do not use do_cpuid(), since we do not need CPUID results, which
382 * have to be written into memory with do_cpuid().
383 */
384#define	TSC_READ(x)							\
385static void								\
386tsc_read_##x(void *arg)							\
387{									\
388	uint64_t *tsc = arg;						\
389	u_int cpu = PCPU_GET(cpuid);					\
390									\
391	__asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx");	\
392	tsc[cpu * 3 + x] = rdtsc();					\
393}
394TSC_READ(0)
395TSC_READ(1)
396TSC_READ(2)
397#undef TSC_READ
398
399#define	N	1000
400
401static void
402comp_smp_tsc(void *arg)
403{
404	uint64_t *tsc;
405	int64_t d1, d2;
406	u_int cpu = PCPU_GET(cpuid);
407	u_int i, j, size;
408
409	size = (mp_maxid + 1) * 3;
410	for (i = 0, tsc = arg; i < N; i++, tsc += size)
411		CPU_FOREACH(j) {
412			if (j == cpu)
413				continue;
414			d1 = tsc[cpu * 3 + 1] - tsc[j * 3];
415			d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1];
416			if (d1 <= 0 || d2 <= 0) {
417				smp_tsc = 0;
418				return;
419			}
420		}
421}
422
423static void
424adj_smp_tsc(void *arg)
425{
426	uint64_t *tsc;
427	int64_t d, min, max;
428	u_int cpu = PCPU_GET(cpuid);
429	u_int first, i, size;
430
431	first = CPU_FIRST();
432	if (cpu == first)
433		return;
434	min = INT64_MIN;
435	max = INT64_MAX;
436	size = (mp_maxid + 1) * 3;
437	for (i = 0, tsc = arg; i < N; i++, tsc += size) {
438		d = tsc[first * 3] - tsc[cpu * 3 + 1];
439		if (d > min)
440			min = d;
441		d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2];
442		if (d > min)
443			min = d;
444		d = tsc[first * 3 + 1] - tsc[cpu * 3];
445		if (d < max)
446			max = d;
447		d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1];
448		if (d < max)
449			max = d;
450	}
451	if (min > max)
452		return;
453	d = min / 2 + max / 2;
454	__asm __volatile (
455		"movl $0x10, %%ecx\n\t"
456		"rdmsr\n\t"
457		"addl %%edi, %%eax\n\t"
458		"adcl %%esi, %%edx\n\t"
459		"wrmsr\n"
460		: /* No output */
461		: "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32))
462		: "ax", "cx", "dx", "cc"
463	);
464}
465
466static int
467test_tsc(int adj_max_count)
468{
469	uint64_t *data, *tsc;
470	u_int i, size, adj;
471
472	if ((!smp_tsc && !tsc_is_invariant) || vm_guest)
473		return (-100);
474	size = (mp_maxid + 1) * 3;
475	data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK);
476	adj = 0;
477retry:
478	for (i = 0, tsc = data; i < N; i++, tsc += size)
479		smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc);
480	smp_tsc = 1;	/* XXX */
481	smp_rendezvous(smp_no_rendezvous_barrier, comp_smp_tsc,
482	    smp_no_rendezvous_barrier, data);
483	if (!smp_tsc && adj < adj_max_count) {
484		adj++;
485		smp_rendezvous(smp_no_rendezvous_barrier, adj_smp_tsc,
486		    smp_no_rendezvous_barrier, data);
487		goto retry;
488	}
489	free(data, M_TEMP);
490	if (bootverbose)
491		printf("SMP: %sed TSC synchronization test%s\n",
492		    smp_tsc ? "pass" : "fail",
493		    adj > 0 ? " after adjustment" : "");
494	if (smp_tsc && tsc_is_invariant) {
495		switch (cpu_vendor_id) {
496		case CPU_VENDOR_AMD:
497			/*
498			 * Processor Programming Reference (PPR) for AMD
499			 * Family 17h states that the TSC uses a common
500			 * reference for all sockets, cores and threads.
501			 */
502			if (CPUID_TO_FAMILY(cpu_id) >= 0x17)
503				return (1000);
504			/*
505			 * Starting with Family 15h processors, TSC clock
506			 * source is in the north bridge.  Check whether
507			 * we have a single-socket/multi-core platform.
508			 * XXX Need more work for complex cases.
509			 */
510			if (CPUID_TO_FAMILY(cpu_id) < 0x15 ||
511			    (amd_feature2 & AMDID2_CMP) == 0 ||
512			    smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1)
513				break;
514			return (1000);
515		case CPU_VENDOR_INTEL:
516			/*
517			 * XXX Assume Intel platforms have synchronized TSCs.
518			 */
519			return (1000);
520		}
521		return (800);
522	}
523	return (-100);
524}
525
526#undef N
527
528#endif /* SMP */
529
530static void
531init_TSC_tc(void)
532{
533	uint64_t max_freq;
534	int shift;
535
536	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
537		return;
538
539	/*
540	 * Limit timecounter frequency to fit in an int and prevent it from
541	 * overflowing too fast.
542	 */
543	max_freq = UINT_MAX;
544
545	/*
546	 * We can not use the TSC if we support APM.  Precise timekeeping
547	 * on an APM'ed machine is at best a fools pursuit, since
548	 * any and all of the time spent in various SMM code can't
549	 * be reliably accounted for.  Reading the RTC is your only
550	 * source of reliable time info.  The i8254 loses too, of course,
551	 * but we need to have some kind of time...
552	 * We don't know at this point whether APM is going to be used
553	 * or not, nor when it might be activated.  Play it safe.
554	 */
555	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
556		tsc_timecounter.tc_quality = -1000;
557		if (bootverbose)
558			printf("TSC timecounter disabled: APM enabled.\n");
559		goto init;
560	}
561
562	/*
563	 * Intel CPUs without a C-state invariant TSC can stop the TSC
564	 * in either C2 or C3.  Disable use of C2 and C3 while using
565	 * the TSC as the timecounter.  The timecounter can be changed
566	 * to enable C2 and C3.
567	 *
568	 * Note that the TSC is used as the cputicker for computing
569	 * thread runtime regardless of the timecounter setting, so
570	 * using an alternate timecounter and enabling C2 or C3 can
571	 * result incorrect runtimes for kernel idle threads (but not
572	 * for any non-idle threads).
573	 */
574	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
575	    (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) {
576		tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP;
577		if (bootverbose)
578			printf("TSC timecounter disables C2 and C3.\n");
579	}
580
581	/*
582	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
583	 * are synchronized.  If the user is sure that the system has
584	 * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a
585	 * non-zero value.  The TSC seems unreliable in virtualized SMP
586	 * environments, so it is set to a negative quality in those cases.
587	 */
588#ifdef SMP
589	if (mp_ncpus > 1)
590		tsc_timecounter.tc_quality = test_tsc(smp_tsc_adjust);
591	else
592#endif /* SMP */
593	if (tsc_is_invariant)
594		tsc_timecounter.tc_quality = 1000;
595	max_freq >>= tsc_shift;
596
597init:
598	for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++)
599		;
600	if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) {
601		if (cpu_vendor_id == CPU_VENDOR_AMD) {
602			tsc_timecounter.tc_get_timecount = shift > 0 ?
603			    tsc_get_timecount_low_mfence :
604			    tsc_get_timecount_mfence;
605		} else {
606			tsc_timecounter.tc_get_timecount = shift > 0 ?
607			    tsc_get_timecount_low_lfence :
608			    tsc_get_timecount_lfence;
609		}
610	} else {
611		tsc_timecounter.tc_get_timecount = shift > 0 ?
612		    tsc_get_timecount_low : tsc_get_timecount;
613	}
614	if (shift > 0) {
615		tsc_timecounter.tc_name = "TSC-low";
616		if (bootverbose)
617			printf("TSC timecounter discards lower %d bit(s)\n",
618			    shift);
619	}
620	if (tsc_freq != 0) {
621		tsc_timecounter.tc_frequency = tsc_freq >> shift;
622		tsc_timecounter.tc_priv = (void *)(intptr_t)shift;
623		tc_init(&tsc_timecounter);
624	}
625}
626SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL);
627
628void
629resume_TSC(void)
630{
631#ifdef SMP
632	int quality;
633
634	/* If TSC was not good on boot, it is unlikely to become good now. */
635	if (tsc_timecounter.tc_quality < 0)
636		return;
637	/* Nothing to do with UP. */
638	if (mp_ncpus < 2)
639		return;
640
641	/*
642	 * If TSC was good, a single synchronization should be enough,
643	 * but honour smp_tsc_adjust if it's set.
644	 */
645	quality = test_tsc(MAX(smp_tsc_adjust, 1));
646	if (quality != tsc_timecounter.tc_quality) {
647		printf("TSC timecounter quality changed: %d -> %d\n",
648		    tsc_timecounter.tc_quality, quality);
649		tsc_timecounter.tc_quality = quality;
650	}
651#endif /* SMP */
652}
653
654/*
655 * When cpufreq levels change, find out about the (new) max frequency.  We
656 * use this to update CPU accounting in case it got a lower estimate at boot.
657 */
658static void
659tsc_levels_changed(void *arg, int unit)
660{
661	device_t cf_dev;
662	struct cf_level *levels;
663	int count, error;
664	uint64_t max_freq;
665
666	/* Only use values from the first CPU, assuming all are equal. */
667	if (unit != 0)
668		return;
669
670	/* Find the appropriate cpufreq device instance. */
671	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
672	if (cf_dev == NULL) {
673		printf("tsc_levels_changed() called but no cpufreq device?\n");
674		return;
675	}
676
677	/* Get settings from the device and find the max frequency. */
678	count = 64;
679	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
680	if (levels == NULL)
681		return;
682	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
683	if (error == 0 && count != 0) {
684		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
685		set_cputicker(rdtsc, max_freq, 1);
686	} else
687		printf("tsc_levels_changed: no max freq found\n");
688	free(levels, M_TEMP);
689}
690
691/*
692 * If the TSC timecounter is in use, veto the pending change.  It may be
693 * possible in the future to handle a dynamically-changing timecounter rate.
694 */
695static void
696tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
697{
698
699	if (*status != 0 || timecounter != &tsc_timecounter)
700		return;
701
702	printf("timecounter TSC must not be in use when "
703	    "changing frequencies; change denied\n");
704	*status = EBUSY;
705}
706
707/* Update TSC freq with the value indicated by the caller. */
708static void
709tsc_freq_changed(void *arg, const struct cf_level *level, int status)
710{
711	uint64_t freq;
712
713	/* If there was an error during the transition, don't do anything. */
714	if (tsc_disabled || status != 0)
715		return;
716
717	/* Total setting for this level gives the new frequency in MHz. */
718	freq = (uint64_t)level->total_set.freq * 1000000;
719	atomic_store_rel_64(&tsc_freq, freq);
720	tsc_timecounter.tc_frequency =
721	    freq >> (int)(intptr_t)tsc_timecounter.tc_priv;
722}
723
724static int
725sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
726{
727	int error;
728	uint64_t freq;
729
730	freq = atomic_load_acq_64(&tsc_freq);
731	if (freq == 0)
732		return (EOPNOTSUPP);
733	error = sysctl_handle_64(oidp, &freq, 0, req);
734	if (error == 0 && req->newptr != NULL) {
735		atomic_store_rel_64(&tsc_freq, freq);
736		atomic_store_rel_64(&tsc_timecounter.tc_frequency,
737		    freq >> (int)(intptr_t)tsc_timecounter.tc_priv);
738	}
739	return (error);
740}
741
742SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
743    0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
744
745static u_int
746tsc_get_timecount(struct timecounter *tc __unused)
747{
748
749	return (rdtsc32());
750}
751
752static inline u_int
753tsc_get_timecount_low(struct timecounter *tc)
754{
755	uint32_t rv;
756
757	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
758	    : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx");
759	return (rv);
760}
761
762static u_int
763tsc_get_timecount_lfence(struct timecounter *tc __unused)
764{
765
766	lfence();
767	return (rdtsc32());
768}
769
770static u_int
771tsc_get_timecount_low_lfence(struct timecounter *tc)
772{
773
774	lfence();
775	return (tsc_get_timecount_low(tc));
776}
777
778static u_int
779tsc_get_timecount_mfence(struct timecounter *tc __unused)
780{
781
782	mfence();
783	return (rdtsc32());
784}
785
786static u_int
787tsc_get_timecount_low_mfence(struct timecounter *tc)
788{
789
790	mfence();
791	return (tsc_get_timecount_low(tc));
792}
793
794static uint32_t
795x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc)
796{
797
798	vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC;
799	vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv;
800	vdso_th->th_x86_hpet_idx = 0xffffffff;
801	bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
802	return (1);
803}
804
805#ifdef COMPAT_FREEBSD32
806static uint32_t
807x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32,
808    struct timecounter *tc)
809{
810
811	vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC;
812	vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv;
813	vdso_th32->th_x86_hpet_idx = 0xffffffff;
814	bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res));
815	return (1);
816}
817#endif
818