tsc.c revision 221178
14Srgrimes/*-
2110379Sphk * Copyright (c) 1998-2003 Poul-Henning Kamp
34Srgrimes * All rights reserved.
44Srgrimes *
54Srgrimes * Redistribution and use in source and binary forms, with or without
64Srgrimes * modification, are permitted provided that the following conditions
74Srgrimes * are met:
84Srgrimes * 1. Redistributions of source code must retain the above copyright
94Srgrimes *    notice, this list of conditions and the following disclaimer.
104Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
114Srgrimes *    notice, this list of conditions and the following disclaimer in the
124Srgrimes *    documentation and/or other materials provided with the distribution.
134Srgrimes *
14110379Sphk * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
154Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
164Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17110379Sphk * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
184Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
194Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
204Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
214Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
224Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
234Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
244Srgrimes * SUCH DAMAGE.
254Srgrimes */
264Srgrimes
27115683Sobrien#include <sys/cdefs.h>
28115683Sobrien__FBSDID("$FreeBSD: head/sys/x86/x86/tsc.c 221178 2011-04-28 17:56:02Z jkim $");
29115683Sobrien
3016299Spst#include "opt_clock.h"
3113228Swollman
322056Swollman#include <sys/param.h>
33167905Snjl#include <sys/bus.h>
34167905Snjl#include <sys/cpu.h>
35167905Snjl#include <sys/malloc.h>
362056Swollman#include <sys/systm.h>
37113348Sdes#include <sys/sysctl.h>
382056Swollman#include <sys/time.h>
3958377Sphk#include <sys/timetc.h>
402056Swollman#include <sys/kernel.h>
4185835Siwasaki#include <sys/power.h>
42113348Sdes#include <sys/smp.h>
434180Sbde#include <machine/clock.h>
44216272Sjkim#include <machine/cputypes.h>
4532054Sphk#include <machine/md_var.h>
4632054Sphk#include <machine/specialreg.h>
4715508Sbde
48167905Snjl#include "cpufreq_if.h"
49167905Snjl
50216163Sjkimuint64_t	tsc_freq;
51184102Sjkimint		tsc_is_invariant;
52220579Sjkimint		tsc_perf_stat;
53220579Sjkim
54167905Snjlstatic eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
551390Ssos
56184102SjkimSYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
57184108Sjkim    &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
58184108SjkimTUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
59184102Sjkim
60113348Sdes#ifdef SMP
61113348Sdesstatic int	smp_tsc;
62121307SsilbySYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
63113348Sdes    "Indicates whether the TSC is safe to use in SMP mode");
64113348SdesTUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
65113348Sdes#endif
66113348Sdes
67219473Sjkimstatic int	tsc_disabled;
68219473SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
69219473Sjkim    "Disable x86 Time Stamp Counter");
70219473SjkimTUNABLE_INT("machdep.disable_tsc", &tsc_disabled);
71219473Sjkim
72220577Sjkimstatic int	tsc_skip_calibration;
73220577SjkimSYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
74220577Sjkim    &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
75220577SjkimTUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration);
76220577Sjkim
77167905Snjlstatic void tsc_freq_changed(void *arg, const struct cf_level *level,
78167905Snjl    int status);
79167905Snjlstatic void tsc_freq_changing(void *arg, const struct cf_level *level,
80167905Snjl    int *status);
8192765Salfredstatic	unsigned tsc_get_timecount(struct timecounter *tc);
82167905Snjlstatic void tsc_levels_changed(void *arg, int unit);
8317353Sbde
8440610Sphkstatic struct timecounter tsc_timecounter = {
8533690Sphk	tsc_get_timecount,	/* get_timecount */
8636741Sphk	0,			/* no poll_pps */
87167905Snjl	~0u,			/* counter_mask */
8833690Sphk	0,			/* frequency */
89167905Snjl	"TSC",			/* name */
90118987Sphk	800,			/* quality (adjusted in code) */
9133690Sphk};
9233690Sphk
93220577Sjkimstatic void
94220577Sjkimtsc_freq_intel(void)
951390Ssos{
96220577Sjkim	char brand[48];
97220577Sjkim	u_int regs[4];
98220577Sjkim	uint64_t freq;
99220577Sjkim	char *p;
100220577Sjkim	u_int i;
1011390Ssos
102220577Sjkim	/*
103220577Sjkim	 * Intel Processor Identification and the CPUID Instruction
104220577Sjkim	 * Application Note 485.
105220577Sjkim	 *
106220577Sjkim	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
107220577Sjkim	 */
108220577Sjkim	if (cpu_exthigh >= 0x80000004) {
109220577Sjkim		p = brand;
110220577Sjkim		for (i = 0x80000002; i < 0x80000005; i++) {
111220577Sjkim			do_cpuid(i, regs);
112220577Sjkim			memcpy(p, regs, sizeof(regs));
113220577Sjkim			p += sizeof(regs);
114220577Sjkim		}
115220577Sjkim		p = NULL;
116220577Sjkim		for (i = 0; i < sizeof(brand) - 1; i++)
117220577Sjkim			if (brand[i] == 'H' && brand[i + 1] == 'z')
118220577Sjkim				p = brand + i;
119220577Sjkim		if (p != NULL) {
120220577Sjkim			p -= 5;
121220577Sjkim			switch (p[4]) {
122220577Sjkim			case 'M':
123220577Sjkim				i = 1;
124220577Sjkim				break;
125220577Sjkim			case 'G':
126220577Sjkim				i = 1000;
127220577Sjkim				break;
128220577Sjkim			case 'T':
129220577Sjkim				i = 1000000;
130220577Sjkim				break;
131220577Sjkim			default:
132220577Sjkim				return;
133220577Sjkim			}
134220577Sjkim#define	C2D(c)	((c) - '0')
135220577Sjkim			if (p[1] == '.') {
136220577Sjkim				freq = C2D(p[0]) * 1000;
137220577Sjkim				freq += C2D(p[2]) * 100;
138220577Sjkim				freq += C2D(p[3]) * 10;
139220577Sjkim				freq *= i * 1000;
140220577Sjkim			} else {
141220577Sjkim				freq = C2D(p[0]) * 1000;
142220577Sjkim				freq += C2D(p[1]) * 100;
143220577Sjkim				freq += C2D(p[2]) * 10;
144220577Sjkim				freq += C2D(p[3]);
145220577Sjkim				freq *= i * 1000000;
146220577Sjkim			}
147220577Sjkim#undef C2D
148220577Sjkim			tsc_freq = freq;
149220577Sjkim		}
150220577Sjkim	}
151220577Sjkim}
15232054Sphk
153220577Sjkimstatic void
154220577Sjkimprobe_tsc_freq(void)
155220577Sjkim{
156220579Sjkim	u_int regs[4];
157220577Sjkim	uint64_t tsc1, tsc2;
15815508Sbde
159216272Sjkim	switch (cpu_vendor_id) {
160216272Sjkim	case CPU_VENDOR_AMD:
161219469Sjkim		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
162219469Sjkim		    (vm_guest == VM_GUEST_NO &&
163219469Sjkim		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
164216272Sjkim			tsc_is_invariant = 1;
165216272Sjkim		break;
166216272Sjkim	case CPU_VENDOR_INTEL:
167219469Sjkim		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
168219469Sjkim		    (vm_guest == VM_GUEST_NO &&
169219469Sjkim		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
170216272Sjkim		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
171216272Sjkim		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
172219469Sjkim		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
173216272Sjkim			tsc_is_invariant = 1;
174216272Sjkim		break;
175216272Sjkim	case CPU_VENDOR_CENTAUR:
176219469Sjkim		if (vm_guest == VM_GUEST_NO &&
177219469Sjkim		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
178216272Sjkim		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
179216272Sjkim		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
180216272Sjkim			tsc_is_invariant = 1;
181216272Sjkim		break;
182216272Sjkim	}
183216272Sjkim
184220579Sjkim	if (cpu_high >= 6) {
185220579Sjkim		do_cpuid(6, regs);
186220637Sjkim		if ((regs[2] & CPUID_PERF_STAT) != 0) {
187220637Sjkim			/*
188220637Sjkim			 * XXX Some emulators expose host CPUID without actual
189220637Sjkim			 * support for these MSRs.  We must test whether they
190220637Sjkim			 * really work.
191220637Sjkim			 */
192220637Sjkim			wrmsr(MSR_MPERF, 0);
193220637Sjkim			wrmsr(MSR_APERF, 0);
194220637Sjkim			DELAY(10);
195220637Sjkim			if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
196220637Sjkim				tsc_perf_stat = 1;
197220637Sjkim		}
198220579Sjkim	}
199220579Sjkim
200220577Sjkim	if (tsc_skip_calibration) {
201220577Sjkim		if (cpu_vendor_id == CPU_VENDOR_INTEL)
202220577Sjkim			tsc_freq_intel();
203220577Sjkim		return;
204220577Sjkim	}
205220577Sjkim
206220577Sjkim	if (bootverbose)
207220577Sjkim	        printf("Calibrating TSC clock ... ");
208220577Sjkim	tsc1 = rdtsc();
209220577Sjkim	DELAY(1000000);
210220577Sjkim	tsc2 = rdtsc();
211220577Sjkim	tsc_freq = tsc2 - tsc1;
212220577Sjkim	if (bootverbose)
213220577Sjkim		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
214220577Sjkim}
215220577Sjkim
216220577Sjkimvoid
217220577Sjkiminit_TSC(void)
218220577Sjkim{
219220577Sjkim
220220577Sjkim	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
221220577Sjkim		return;
222220577Sjkim
223220577Sjkim	probe_tsc_freq();
224220577Sjkim
225167905Snjl	/*
226216274Sjkim	 * Inform CPU accounting about our boot-time clock rate.  This will
227216274Sjkim	 * be updated if someone loads a cpufreq driver after boot that
228216274Sjkim	 * discovers a new max frequency.
229167905Snjl	 */
230220577Sjkim	if (tsc_freq != 0)
231221178Sjkim		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
232167905Snjl
233216274Sjkim	if (tsc_is_invariant)
234216274Sjkim		return;
235216274Sjkim
236167905Snjl	/* Register to find out about changes in CPU frequency. */
237184108Sjkim	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
238184108Sjkim	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
239167905Snjl	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
240167905Snjl	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
241167905Snjl	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
242167905Snjl	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
243118550Sphk}
24434617Sphk
245118550Sphkvoid
246118550Sphkinit_TSC_tc(void)
247118550Sphk{
248209103Smav
249219673Sjkim	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
250209103Smav		return;
251209103Smav
25234617Sphk	/*
253160964Syar	 * We can not use the TSC if we support APM.  Precise timekeeping
25449186Smsmith	 * on an APM'ed machine is at best a fools pursuit, since
25534617Sphk	 * any and all of the time spent in various SMM code can't
25634617Sphk	 * be reliably accounted for.  Reading the RTC is your only
257160964Syar	 * source of reliable time info.  The i8254 loses too, of course,
25834617Sphk	 * but we need to have some kind of time...
25949186Smsmith	 * We don't know at this point whether APM is going to be used
26049186Smsmith	 * or not, nor when it might be activated.  Play it safe.
26134617Sphk	 */
26285835Siwasaki	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
263118987Sphk		tsc_timecounter.tc_quality = -1000;
26485835Siwasaki		if (bootverbose)
265110370Sphk			printf("TSC timecounter disabled: APM enabled.\n");
26664031Sphk	}
26734617Sphk
268118987Sphk#ifdef SMP
269118987Sphk	/*
270118987Sphk	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
271118987Sphk	 * are somehow synchronized.  Some hardware configurations do
272118987Sphk	 * this, but we have no way of determining whether this is the
273118987Sphk	 * case, so we do not use the TSC in multi-processor systems
274118987Sphk	 * unless the user indicated (by setting kern.timecounter.smp_tsc
275118987Sphk	 * to 1) that he believes that his TSCs are synchronized.
276118987Sphk	 */
277118987Sphk	if (mp_ncpus > 1 && !smp_tsc)
278118987Sphk		tsc_timecounter.tc_quality = -100;
279118987Sphk#endif
280118987Sphk
281219461Sjkim	if (tsc_freq != 0) {
28240610Sphk		tsc_timecounter.tc_frequency = tsc_freq;
28358377Sphk		tc_init(&tsc_timecounter);
28433690Sphk	}
2854Srgrimes}
2864Srgrimes
287167905Snjl/*
288167905Snjl * When cpufreq levels change, find out about the (new) max frequency.  We
289167905Snjl * use this to update CPU accounting in case it got a lower estimate at boot.
290167905Snjl */
291167905Snjlstatic void
292167905Snjltsc_levels_changed(void *arg, int unit)
293167905Snjl{
294167905Snjl	device_t cf_dev;
295167905Snjl	struct cf_level *levels;
296167905Snjl	int count, error;
297167905Snjl	uint64_t max_freq;
298167905Snjl
299167905Snjl	/* Only use values from the first CPU, assuming all are equal. */
300167905Snjl	if (unit != 0)
301167905Snjl		return;
302167905Snjl
303167905Snjl	/* Find the appropriate cpufreq device instance. */
304167905Snjl	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
305167905Snjl	if (cf_dev == NULL) {
306167905Snjl		printf("tsc_levels_changed() called but no cpufreq device?\n");
307167905Snjl		return;
308167905Snjl	}
309167905Snjl
310167905Snjl	/* Get settings from the device and find the max frequency. */
311167905Snjl	count = 64;
312167905Snjl	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
313167905Snjl	if (levels == NULL)
314167905Snjl		return;
315167905Snjl	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
316167905Snjl	if (error == 0 && count != 0) {
317167905Snjl		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
318167905Snjl		set_cputicker(rdtsc, max_freq, 1);
319167905Snjl	} else
320167905Snjl		printf("tsc_levels_changed: no max freq found\n");
321167905Snjl	free(levels, M_TEMP);
322167905Snjl}
323167905Snjl
324167905Snjl/*
325167905Snjl * If the TSC timecounter is in use, veto the pending change.  It may be
326167905Snjl * possible in the future to handle a dynamically-changing timecounter rate.
327167905Snjl */
328167905Snjlstatic void
329167905Snjltsc_freq_changing(void *arg, const struct cf_level *level, int *status)
330167905Snjl{
331167905Snjl
332216274Sjkim	if (*status != 0 || timecounter != &tsc_timecounter)
333167905Snjl		return;
334167905Snjl
335167905Snjl	printf("timecounter TSC must not be in use when "
336184102Sjkim	    "changing frequencies; change denied\n");
337167905Snjl	*status = EBUSY;
338167905Snjl}
339167905Snjl
340167905Snjl/* Update TSC freq with the value indicated by the caller. */
341167905Snjlstatic void
342167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status)
343167905Snjl{
344220433Sjkim	uint64_t freq;
345216276Sjkim
346216276Sjkim	/* If there was an error during the transition, don't do anything. */
347219473Sjkim	if (tsc_disabled || status != 0)
348167905Snjl		return;
349167905Snjl
350167905Snjl	/* Total setting for this level gives the new frequency in MHz. */
351220433Sjkim	freq = (uint64_t)level->total_set.freq * 1000000;
352220433Sjkim	atomic_store_rel_64(&tsc_freq, freq);
353220433Sjkim	atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq);
354167905Snjl}
355167905Snjl
35615508Sbdestatic int
35762573Sphksysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
35815508Sbde{
35915508Sbde	int error;
360110039Sphk	uint64_t freq;
36115508Sbde
362220433Sjkim	freq = atomic_load_acq_64(&tsc_freq);
363220433Sjkim	if (freq == 0)
36415508Sbde		return (EOPNOTSUPP);
365217616Smdf	error = sysctl_handle_64(oidp, &freq, 0, req);
366219700Sjkim	if (error == 0 && req->newptr != NULL) {
367220433Sjkim		atomic_store_rel_64(&tsc_freq, freq);
368220433Sjkim		atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq);
369219700Sjkim	}
37015508Sbde	return (error);
37115508Sbde}
37215508Sbde
373217616SmdfSYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
374220613Sjkim    0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
37533690Sphk
376220632Sjkimstatic u_int
37736719Sphktsc_get_timecount(struct timecounter *tc)
37833690Sphk{
379220632Sjkim
380220632Sjkim	return (rdtsc32());
38133690Sphk}
382