tsc.c revision 216279
111397Sswallace/*-
211397Sswallace * Copyright (c) 1998-2003 Poul-Henning Kamp
311397Sswallace * All rights reserved.
411397Sswallace *
5123786Speter * Redistribution and use in source and binary forms, with or without
6123786Speter * modification, are permitted provided that the following conditions
711397Sswallace * are met:
811397Sswallace * 1. Redistributions of source code must retain the above copyright
911397Sswallace *    notice, this list of conditions and the following disclaimer.
1011397Sswallace * 2. Redistributions in binary form must reproduce the above copyright
1111397Sswallace *    notice, this list of conditions and the following disclaimer in the
1211397Sswallace *    documentation and/or other materials provided with the distribution.
1311397Sswallace *
1411397Sswallace * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1511397Sswallace * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1660290Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1760290Sbde * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1811397Sswallace * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1911397Sswallace * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2011397Sswallace * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2111397Sswallace * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2211397Sswallace * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2311397Sswallace * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2411397Sswallace * SUCH DAMAGE.
2511397Sswallace */
2611397Sswallace
2760290Sbde#include <sys/cdefs.h>
2811397Sswallace__FBSDID("$FreeBSD: head/sys/i386/i386/tsc.c 216279 2010-12-07 23:19:49Z jkim $");
2911397Sswallace
3060290Sbde#include "opt_clock.h"
3160290Sbde
3260290Sbde#include <sys/param.h>
3311397Sswallace#include <sys/bus.h>
3411397Sswallace#include <sys/cpu.h>
3511397Sswallace#include <sys/malloc.h>
3611397Sswallace#include <sys/systm.h>
3711397Sswallace#include <sys/sysctl.h>
3811397Sswallace#include <sys/time.h>
3911397Sswallace#include <sys/timetc.h>
4011397Sswallace#include <sys/kernel.h>
4111397Sswallace#include <sys/power.h>
4211397Sswallace#include <sys/smp.h>
4311397Sswallace#include <machine/clock.h>
4411397Sswallace#include <machine/cputypes.h>
4511397Sswallace#include <machine/md_var.h>
4611397Sswallace#include <machine/specialreg.h>
4711397Sswallace
4811397Sswallace#include "cpufreq_if.h"
4911397Sswallace
5011397Sswallaceuint64_t	tsc_freq;
5111397Sswallaceint		tsc_is_broken;
5211397Sswallaceint		tsc_is_invariant;
5311397Sswallaceint		tsc_present;
5411397Sswallacestatic eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag;
5511397Sswallace
5660290SbdeSYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
5760290Sbde    &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant");
5811397SswallaceTUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
5960290Sbde
6060290Sbde#ifdef SMP
6160290Sbdestatic int	smp_tsc;
6260290SbdeSYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
6360290Sbde    "Indicates whether the TSC is safe to use in SMP mode");
6460290SbdeTUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
6560290Sbde#endif
6660290Sbde
6760290Sbdestatic void tsc_freq_changed(void *arg, const struct cf_level *level,
6860290Sbde    int status);
6911397Sswallacestatic void tsc_freq_changing(void *arg, const struct cf_level *level,
7060290Sbde    int *status);
7111397Sswallacestatic	unsigned tsc_get_timecount(struct timecounter *tc);
7211397Sswallacestatic void tsc_levels_changed(void *arg, int unit);
7311397Sswallace
7411397Sswallacestatic struct timecounter tsc_timecounter = {
7560290Sbde	tsc_get_timecount,	/* get_timecount */
7660290Sbde	0,			/* no poll_pps */
7711397Sswallace	~0u,			/* counter_mask */
78	0,			/* frequency */
79	"TSC",			/* name */
80	800,			/* quality (adjusted in code) */
81};
82
83void
84init_TSC(void)
85{
86	u_int64_t tscval[2];
87
88	if (cpu_feature & CPUID_TSC)
89		tsc_present = 1;
90	else
91		tsc_present = 0;
92
93	if (!tsc_present)
94		return;
95
96	if (bootverbose)
97	        printf("Calibrating TSC clock ... ");
98
99	tscval[0] = rdtsc();
100	DELAY(1000000);
101	tscval[1] = rdtsc();
102
103	tsc_freq = tscval[1] - tscval[0];
104	if (bootverbose)
105		printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
106
107	switch (cpu_vendor_id) {
108	case CPU_VENDOR_AMD:
109		if ((amd_pminfo & AMDPM_TSC_INVARIANT) ||
110		    CPUID_TO_FAMILY(cpu_id) >= 0x10 || cpu_id == 0x60fb2)
111			tsc_is_invariant = 1;
112		break;
113	case CPU_VENDOR_INTEL:
114		if ((amd_pminfo & AMDPM_TSC_INVARIANT) ||
115		    (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
116		    CPUID_TO_MODEL(cpu_id) >= 0xe) ||
117		    (CPUID_TO_FAMILY(cpu_id) == 0xf &&
118		    CPUID_TO_MODEL(cpu_id) >= 0x3))
119			tsc_is_invariant = 1;
120		break;
121	case CPU_VENDOR_CENTAUR:
122		if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
123		    CPUID_TO_MODEL(cpu_id) >= 0xf &&
124		    (rdmsr(0x1203) & 0x100000000ULL) == 0)
125			tsc_is_invariant = 1;
126		break;
127	}
128
129	/*
130	 * Inform CPU accounting about our boot-time clock rate.  This will
131	 * be updated if someone loads a cpufreq driver after boot that
132	 * discovers a new max frequency.
133	 */
134	set_cputicker(rdtsc, tsc_freq, 1);
135
136	if (tsc_is_invariant)
137		return;
138
139	/* Register to find out about changes in CPU frequency. */
140	tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change,
141	    tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST);
142	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
143	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST);
144	tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed,
145	    tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY);
146}
147
148void
149init_TSC_tc(void)
150{
151
152	if (!tsc_present)
153		return;
154
155	/*
156	 * We can not use the TSC if we support APM.  Precise timekeeping
157	 * on an APM'ed machine is at best a fools pursuit, since
158	 * any and all of the time spent in various SMM code can't
159	 * be reliably accounted for.  Reading the RTC is your only
160	 * source of reliable time info.  The i8254 loses too, of course,
161	 * but we need to have some kind of time...
162	 * We don't know at this point whether APM is going to be used
163	 * or not, nor when it might be activated.  Play it safe.
164	 */
165	if (power_pm_get_type() == POWER_PM_TYPE_APM) {
166		tsc_timecounter.tc_quality = -1000;
167		if (bootverbose)
168			printf("TSC timecounter disabled: APM enabled.\n");
169	}
170
171#ifdef SMP
172	/*
173	 * We can not use the TSC in SMP mode unless the TSCs on all CPUs
174	 * are somehow synchronized.  Some hardware configurations do
175	 * this, but we have no way of determining whether this is the
176	 * case, so we do not use the TSC in multi-processor systems
177	 * unless the user indicated (by setting kern.timecounter.smp_tsc
178	 * to 1) that he believes that his TSCs are synchronized.
179	 */
180	if (mp_ncpus > 1 && !smp_tsc)
181		tsc_timecounter.tc_quality = -100;
182#endif
183
184	if (tsc_freq != 0 && !tsc_is_broken) {
185		tsc_timecounter.tc_frequency = tsc_freq;
186		tc_init(&tsc_timecounter);
187	}
188}
189
190/*
191 * When cpufreq levels change, find out about the (new) max frequency.  We
192 * use this to update CPU accounting in case it got a lower estimate at boot.
193 */
194static void
195tsc_levels_changed(void *arg, int unit)
196{
197	device_t cf_dev;
198	struct cf_level *levels;
199	int count, error;
200	uint64_t max_freq;
201
202	/* Only use values from the first CPU, assuming all are equal. */
203	if (unit != 0)
204		return;
205
206	/* Find the appropriate cpufreq device instance. */
207	cf_dev = devclass_get_device(devclass_find("cpufreq"), unit);
208	if (cf_dev == NULL) {
209		printf("tsc_levels_changed() called but no cpufreq device?\n");
210		return;
211	}
212
213	/* Get settings from the device and find the max frequency. */
214	count = 64;
215	levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT);
216	if (levels == NULL)
217		return;
218	error = CPUFREQ_LEVELS(cf_dev, levels, &count);
219	if (error == 0 && count != 0) {
220		max_freq = (uint64_t)levels[0].total_set.freq * 1000000;
221		set_cputicker(rdtsc, max_freq, 1);
222	} else
223		printf("tsc_levels_changed: no max freq found\n");
224	free(levels, M_TEMP);
225}
226
227/*
228 * If the TSC timecounter is in use, veto the pending change.  It may be
229 * possible in the future to handle a dynamically-changing timecounter rate.
230 */
231static void
232tsc_freq_changing(void *arg, const struct cf_level *level, int *status)
233{
234
235	if (*status != 0 || timecounter != &tsc_timecounter)
236		return;
237
238	printf("timecounter TSC must not be in use when "
239	    "changing frequencies; change denied\n");
240	*status = EBUSY;
241}
242
243/* Update TSC freq with the value indicated by the caller. */
244static void
245tsc_freq_changed(void *arg, const struct cf_level *level, int status)
246{
247
248	/* If there was an error during the transition, don't do anything. */
249	if (status != 0)
250		return;
251
252	/* Total setting for this level gives the new frequency in MHz. */
253	tsc_freq = (uint64_t)level->total_set.freq * 1000000;
254	tsc_timecounter.tc_frequency = tsc_freq;
255}
256
257static int
258sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS)
259{
260	int error;
261	uint64_t freq;
262
263	if (tsc_timecounter.tc_frequency == 0)
264		return (EOPNOTSUPP);
265	freq = tsc_freq;
266	error = sysctl_handle_quad(oidp, &freq, 0, req);
267	if (error == 0 && req->newptr != NULL) {
268		tsc_freq = freq;
269		tsc_timecounter.tc_frequency = tsc_freq;
270	}
271	return (error);
272}
273
274SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_QUAD | CTLFLAG_RW,
275    0, 0, sysctl_machdep_tsc_freq, "QU", "");
276
277static unsigned
278tsc_get_timecount(struct timecounter *tc)
279{
280	return (rdtsc());
281}
282