tsc.c revision 221178
1/*-
2 * Copyright (c) 1998-2003 Poul-Henning Kamp
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/x86/x86/tsc.c 221178 2011-04-28 17:56:02Z jkim $");
29
30#include "opt_clock.h"
31
32#include <sys/param.h>
33#include <sys/bus.h>
34#include <sys/cpu.h>
35#include <sys/malloc.h>
36#include <sys/systm.h>
37#include <sys/sysctl.h>
38#include <sys/time.h>
39#include <sys/timetc.h>
40#include <sys/kernel.h>
41#include <sys/power.h>
42#include <sys/smp.h>
43#include <machine/clock.h>
44#include <machine/cputypes.h>
45#include <machine/md_var.h>
46#include <machine/specialreg.h>
47
48#include "cpufreq_if.h"
49
50uint64_t	tsc_freq;
51int		tsc_is_invariant;
52int		tsc_perf_stat;
53
54static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
55
56SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
57    &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
58TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
59
60#ifdef SMP
61static int	smp_tsc;
62SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
63    "Indicates whether the TSC is safe to use in SMP mode");
64TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
65#endif
66
67static int	tsc_disabled;
68SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0,
69    "Disable x86 Time Stamp Counter");
70TUNABLE_INT("machdep.disable_tsc", &tsc_disabled);
71
72static int	tsc_skip_calibration;
73SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN,
74    &tsc_skip_calibration, 0, "Disable TSC frequency calibration");
75TUNABLE_INT("machdep.disable_tsc_calibration", &tsc_skip_calibration);
76
77static void tsc_freq_changed(void *arg, const struct cf_level *level,
78    int status);
79static void tsc_freq_changing(void *arg, const struct cf_level *level,
80    int *status);
81static	unsigned tsc_get_timecount(struct timecounter *tc);
82static void tsc_levels_changed(void *arg, int unit);
83
84static struct timecounter tsc_timecounter = {
85	tsc_get_timecount,	/* get_timecount */
86	0,			/* no poll_pps */
87	~0u,			/* counter_mask */
88	0,			/* frequency */
89	"TSC",			/* name */
90	800,			/* quality (adjusted in code) */
91};
92
93static void
94tsc_freq_intel(void)
95{
96	char brand[48];
97	u_int regs[4];
98	uint64_t freq;
99	char *p;
100	u_int i;
101
102	/*
103	 * Intel Processor Identification and the CPUID Instruction
104	 * Application Note 485.
105	 *
106	 * http://www.intel.com/assets/pdf/appnote/241618.pdf
107	 */
108	if (cpu_exthigh >= 0x80000004) {
109		p = brand;
110		for (i = 0x80000002; i < 0x80000005; i++) {
111			do_cpuid(i, regs);
112			memcpy(p, regs, sizeof(regs));
113			p += sizeof(regs);
114		}
115		p = NULL;
116		for (i = 0; i < sizeof(brand) - 1; i++)
117			if (brand[i] == 'H' && brand[i + 1] == 'z')
118				p = brand + i;
119		if (p != NULL) {
120			p -= 5;
121			switch (p[4]) {
122			case 'M':
123				i = 1;
124				break;
125			case 'G':
126				i = 1000;
127				break;
128			case 'T':
129				i = 1000000;
130				break;
131			default:
132				return;
133			}
134#define	C2D(c)	((c) - '0')
135			if (p[1] == '.') {
136				freq = C2D(p[0]) * 1000;
137				freq += C2D(p[2]) * 100;
138				freq += C2D(p[3]) * 10;
139				freq *= i * 1000;
140			} else {
141				freq = C2D(p[0]) * 1000;
142				freq += C2D(p[1]) * 100;
143				freq += C2D(p[2]) * 10;
144				freq += C2D(p[3]);
145				freq *= i * 1000000;
146			}
147#undef C2D
148			tsc_freq = freq;
149		}
150	}
151}
152
153static void
154probe_tsc_freq(void)
155{
156	u_int regs[4];
157	uint64_t tsc1, tsc2;
158
159	switch (cpu_vendor_id) {
160	case CPU_VENDOR_AMD:
161		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
162		    (vm_guest == VM_GUEST_NO &&
163		    CPUID_TO_FAMILY(cpu_id) >= 0x10))
164			tsc_is_invariant = 1;
165		break;
166	case CPU_VENDOR_INTEL:
167		if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 ||
168		    (vm_guest == VM_GUEST_NO &&
169		    ((CPUID_TO_FAMILY(cpu_id) == 0x6 &&
170		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
171		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
172		    CPUID_TO_MODEL(cpu_id) >= 0x3))))
173			tsc_is_invariant = 1;
174		break;
175	case CPU_VENDOR_CENTAUR:
176		if (vm_guest == VM_GUEST_NO &&
177		    CPUID_TO_FAMILY(cpu_id) == 0x6 &&
178		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
179		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
180			tsc_is_invariant = 1;
181		break;
182	}
183
184	if (cpu_high >= 6) {
185		do_cpuid(6, regs);
186		if ((regs[2] & CPUID_PERF_STAT) != 0) {
187			/*
188			 * XXX Some emulators expose host CPUID without actual
189			 * support for these MSRs.  We must test whether they
190			 * really work.
191			 */
192			wrmsr(MSR_MPERF, 0);
193			wrmsr(MSR_APERF, 0);
194			DELAY(10);
195			if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0)
196				tsc_perf_stat = 1;
197		}
198	}
199
200	if (tsc_skip_calibration) {
201		if (cpu_vendor_id == CPU_VENDOR_INTEL)
202			tsc_freq_intel();
203		return;
204	}
205
206	if (bootverbose)
207	        printf("Calibrating TSC clock ... ");
208	tsc1 = rdtsc();
209	DELAY(1000000);
210	tsc2 = rdtsc();
211	tsc_freq = tsc2 - tsc1;
212	if (bootverbose)
213		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
214}
215
216void
217init_TSC(void)
218{
219
220	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
221		return;
222
223	probe_tsc_freq();
224
225	/*
226	 * Inform CPU accounting about our boot-time clock rate.  This will
227	 * be updated if someone loads a cpufreq driver after boot that
228	 * discovers a new max frequency.
229	 */
230	if (tsc_freq != 0)
231		set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);
232
233	if (tsc_is_invariant)
234		return;
235
236	/* Register to find out about changes in CPU frequency. */
237	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
238	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
239	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
240	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
241	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
242	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
243}
244
245void
246init_TSC_tc(void)
247{
248
249	if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled)
250		return;
251
252	/*
253	 * We can not use the TSC if we support APM.  Precise timekeeping
254	 * on an APM'ed machine is at best a fools pursuit, since
255	 * any and all of the time spent in various SMM code can't
256	 * be reliably accounted for.  Reading the RTC is your only
257	 * source of reliable time info.  The i8254 loses too, of course,
258	 * but we need to have some kind of time...
259	 * We don't know at this point whether APM is going to be used
260	 * or not, nor when it might be activated.  Play it safe.
261	 */
262	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
263		tsc_timecounter.tc_quality = -1000;
264		if (bootverbose)
265			printf("TSC timecounter disabled: APM enabled.\n");
266	}
267
268#ifdef SMP
269	/*
270	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
271	 * are somehow synchronized.  Some hardware configurations do
272	 * this, but we have no way of determining whether this is the
273	 * case, so we do not use the TSC in multi-processor systems
274	 * unless the user indicated (by setting kern.timecounter.smp_tsc
275	 * to 1) that he believes that his TSCs are synchronized.
276	 */
277	if (mp_ncpus > 1 && !smp_tsc)
278		tsc_timecounter.tc_quality = -100;
279#endif
280
281	if (tsc_freq != 0) {
282		tsc_timecounter.tc_frequency = tsc_freq;
283		tc_init(&tsc_timecounter);
284	}
285}
286
287/*
288 * When cpufreq levels change, find out about the (new) max frequency.  We
289 * use this to update CPU accounting in case it got a lower estimate at boot.
290 */
291static void
292tsc_levels_changed(void *arg, int unit)
293{
294	device_t cf_dev;
295	struct cf_level *levels;
296	int count, error;
297	uint64_t max_freq;
298
299	/* Only use values from the first CPU, assuming all are equal. */
300	if (unit != 0)
301		return;
302
303	/* Find the appropriate cpufreq device instance. */
304	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
305	if (cf_dev == NULL) {
306		printf("tsc_levels_changed() called but no cpufreq device?\n");
307		return;
308	}
309
310	/* Get settings from the device and find the max frequency. */
311	count = 64;
312	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
313	if (levels == NULL)
314		return;
315	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
316	if (error == 0 && count != 0) {
317		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
318		set_cputicker(rdtsc, max_freq, 1);
319	} else
320		printf("tsc_levels_changed: no max freq found\n");
321	free(levels, M_TEMP);
322}
323
324/*
325 * If the TSC timecounter is in use, veto the pending change.  It may be
326 * possible in the future to handle a dynamically-changing timecounter rate.
327 */
328static void
329tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
330{
331
332	if (*status != 0 || timecounter != &tsc_timecounter)
333		return;
334
335	printf("timecounter TSC must not be in use when "
336	    "changing frequencies; change denied\n");
337	*status = EBUSY;
338}
339
340/* Update TSC freq with the value indicated by the caller. */
341static void
342tsc_freq_changed(void *arg, const struct cf_level *level, int status)
343{
344	uint64_t freq;
345
346	/* If there was an error during the transition, don't do anything. */
347	if (tsc_disabled || status != 0)
348		return;
349
350	/* Total setting for this level gives the new frequency in MHz. */
351	freq = (uint64_t)level->total_set.freq * 1000000;
352	atomic_store_rel_64(&tsc_freq, freq);
353	atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq);
354}
355
356static int
357sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
358{
359	int error;
360	uint64_t freq;
361
362	freq = atomic_load_acq_64(&tsc_freq);
363	if (freq == 0)
364		return (EOPNOTSUPP);
365	error = sysctl_handle_64(oidp, &freq, 0, req);
366	if (error == 0 && req->newptr != NULL) {
367		atomic_store_rel_64(&tsc_freq, freq);
368		atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq);
369	}
370	return (error);
371}
372
373SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW,
374    0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency");
375
376static u_int
377tsc_get_timecount(struct timecounter *tc)
378{
379
380	return (rdtsc32());
381}
382