prof_machdep.c revision 219461
1193323Sed/*-
2193323Sed * Copyright (c) 1996 Bruce D. Evans.
3193323Sed * All rights reserved.
4193323Sed *
5193323Sed * Redistribution and use in source and binary forms, with or without
6193323Sed * modification, are permitted provided that the following conditions
7193323Sed * are met:
8193323Sed * 1. Redistributions of source code must retain the above copyright
9193323Sed *    notice, this list of conditions and the following disclaimer.
10193323Sed * 2. Redistributions in binary form must reproduce the above copyright
11193323Sed *    notice, this list of conditions and the following disclaimer in the
12193323Sed *    documentation and/or other materials provided with the distribution.
13193323Sed *
14193323Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17198090Srdivacky * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22198090Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23202878Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24202878Srdivacky * SUCH DAMAGE.
25202878Srdivacky */
26202878Srdivacky
27193323Sed#include <sys/cdefs.h>
28193323Sed__FBSDID("$FreeBSD: head/sys/i386/isa/prof_machdep.c 219461 2011-03-10 20:02:58Z jkim $");
29193323Sed
30193323Sed#ifdef GUPROF
31202878Srdivacky#include "opt_i586_guprof.h"
32202878Srdivacky#include "opt_perfmon.h"
33193323Sed
34193323Sed#include <sys/param.h>
35193323Sed#include <sys/systm.h>
36193323Sed#include <sys/bus.h>
37193323Sed#include <sys/cpu.h>
38193323Sed#include <sys/eventhandler.h>
39193323Sed#include <sys/gmon.h>
40193323Sed#include <sys/kernel.h>
41193323Sed#include <sys/smp.h>
42193323Sed#include <sys/sysctl.h>
43193323Sed
44193323Sed#include <machine/clock.h>
45193323Sed#include <machine/perfmon.h>
46193323Sed#include <machine/timerreg.h>
47193323Sed
48193323Sed#define	CPUTIME_CLOCK_UNINITIALIZED	0
49193323Sed#define	CPUTIME_CLOCK_I8254		1
50193323Sed#define	CPUTIME_CLOCK_TSC		2
51193323Sed#define	CPUTIME_CLOCK_I586_PMC		3
52193323Sed#define	CPUTIME_CLOCK_I8254_SHIFT	7
53193323Sed
54193323Sedint	cputime_bias = 1;	/* initialize for locality of reference */
55193323Sed
56193323Sedstatic int	cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
57193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
58193323Sedstatic u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
59193323Sedstatic int	cputime_clock_pmc_init;
60193323Sedstatic struct gmonparam saved_gmp;
61193323Sed#endif
62193323Sed#if defined(I586_CPU) || defined(I686_CPU)
63193323Sedstatic int	cputime_prof_active;
64193323Sed#endif
65193323Sed#endif /* GUPROF */
66193323Sed
67193323Sed#ifdef __GNUCLIKE_ASM
68193323Sed__asm("								\n\
69193323SedGM_STATE	=	0					\n\
70193323SedGMON_PROF_OFF	=	3					\n\
71193323Sed								\n\
72193323Sed	.text							\n\
73193323Sed	.p2align 4,0x90						\n\
74193323Sed	.globl	__mcount					\n\
75193323Sed	.type	__mcount,@function				\n\
76193323Sed__mcount:							\n\
77193323Sed	#							\n\
78193323Sed	# Check that we are profiling.  Do it early for speed.	\n\
79193323Sed	#							\n\
80193323Sed	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
81193323Sed	je	.mcount_exit					\n\
82193323Sed	#							\n\
83193323Sed	# __mcount is the same as [.]mcount except the caller	\n\
84202878Srdivacky	# hasn't changed the stack except to call here, so the	\n\
85202878Srdivacky	# caller's raddr is above our raddr.			\n\
86202878Srdivacky	#							\n\
87202878Srdivacky	movl	4(%esp),%edx					\n\
88202878Srdivacky	jmp	.got_frompc					\n\
89202878Srdivacky								\n\
90202878Srdivacky	.p2align 4,0x90						\n\
91202878Srdivacky	.globl	.mcount						\n\
92202878Srdivacky.mcount:							\n\
93202878Srdivacky	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
94202878Srdivacky	je	.mcount_exit					\n\
95202878Srdivacky	#							\n\
96202878Srdivacky	# The caller's stack frame has already been built, so	\n\
97202878Srdivacky	# %ebp is the caller's frame pointer.  The caller's	\n\
98202878Srdivacky	# raddr is in the caller's frame following the caller's	\n\
99202878Srdivacky	# caller's frame pointer.				\n\
100202878Srdivacky	#							\n\
101202878Srdivacky	movl	4(%ebp),%edx					\n\
102202878Srdivacky.got_frompc:							\n\
103202878Srdivacky	#							\n\
104202878Srdivacky	# Our raddr is the caller's pc.				\n\
105202878Srdivacky	#							\n\
106202878Srdivacky	movl	(%esp),%eax					\n\
107202878Srdivacky								\n\
108202878Srdivacky	pushfl							\n\
109202878Srdivacky	pushl	%eax						\n\
110202878Srdivacky	pushl	%edx						\n\
111202878Srdivacky	cli							\n\
112202878Srdivacky	call	mcount						\n\
113202878Srdivacky	addl	$8,%esp						\n\
114202878Srdivacky	popfl							\n\
115202878Srdivacky.mcount_exit:							\n\
116202878Srdivacky	ret	$0						\n\
117202878Srdivacky");
118202878Srdivacky#else /* !__GNUCLIKE_ASM */
119202878Srdivacky#error "this file needs to be ported to your compiler"
120202878Srdivacky#endif /* __GNUCLIKE_ASM */
121202878Srdivacky
122202878Srdivacky#ifdef GUPROF
123202878Srdivacky/*
124202878Srdivacky * [.]mexitcount saves the return register(s), loads selfpc and calls
125202878Srdivacky * mexitcount(selfpc) to do the work.  Someday it should be in a machine
126202878Srdivacky * dependent file together with cputime(), __mcount and [.]mcount.  cputime()
127202878Srdivacky * can't just be put in machdep.c because it has to be compiled without -pg.
128202878Srdivacky */
129202878Srdivacky#ifdef __GNUCLIKE_ASM
130202878Srdivacky__asm("								\n\
131202878Srdivacky	.text							\n\
132202878Srdivacky#								\n\
133202878Srdivacky# Dummy label to be seen when gprof -u hides [.]mexitcount.	\n\
134202878Srdivacky#								\n\
135202878Srdivacky	.p2align 4,0x90						\n\
136202878Srdivacky	.globl	__mexitcount					\n\
137202878Srdivacky	.type	__mexitcount,@function				\n\
138202878Srdivacky__mexitcount:							\n\
139202878Srdivacky	nop							\n\
140202878Srdivacky								\n\
141202878SrdivackyGMON_PROF_HIRES	=	4					\n\
142202878Srdivacky								\n\
143202878Srdivacky	.p2align 4,0x90						\n\
144202878Srdivacky	.globl	.mexitcount					\n\
145202878Srdivacky.mexitcount:							\n\
146202878Srdivacky	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
147202878Srdivacky	jne	.mexitcount_exit				\n\
148202878Srdivacky	pushl	%edx						\n\
149202878Srdivacky	pushl	%eax						\n\
150202878Srdivacky	movl	8(%esp),%eax					\n\
151202878Srdivacky	pushfl							\n\
152202878Srdivacky	pushl	%eax						\n\
153202878Srdivacky	cli							\n\
154202878Srdivacky	call	mexitcount					\n\
155202878Srdivacky	addl	$4,%esp						\n\
156202878Srdivacky	popfl							\n\
157202878Srdivacky	popl	%eax						\n\
158202878Srdivacky	popl	%edx						\n\
159202878Srdivacky.mexitcount_exit:						\n\
160202878Srdivacky	ret	$0						\n\
161202878Srdivacky");
162202878Srdivacky#endif /* __GNUCLIKE_ASM */
163202878Srdivacky
164202878Srdivacky/*
165202878Srdivacky * Return the time elapsed since the last call.  The units are machine-
166202878Srdivacky * dependent.
167202878Srdivacky */
168202878Srdivackyint
169202878Srdivackycputime()
170202878Srdivacky{
171202878Srdivacky	u_int count;
172202878Srdivacky	int delta;
173202878Srdivacky#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \
174202878Srdivacky    defined(PERFMON) && defined(I586_PMC_GUPROF)
175202878Srdivacky	u_quad_t event_count;
176202878Srdivacky#endif
177202878Srdivacky	u_char high, low;
178202878Srdivacky	static u_int prev_count;
179202878Srdivacky
180202878Srdivacky#if defined(I586_CPU) || defined(I686_CPU)
181202878Srdivacky	if (cputime_clock == CPUTIME_CLOCK_TSC) {
182202878Srdivacky		/*
183202878Srdivacky		 * Scale the TSC a little to make cputime()'s frequency
184202878Srdivacky		 * fit in an int, assuming that the TSC frequency fits
185202878Srdivacky		 * in a u_int.  Use a fixed scale since dynamic scaling
186202878Srdivacky		 * would be slower and we can't really use the low bit
187202878Srdivacky		 * of precision.
188202878Srdivacky		 */
189202878Srdivacky		count = (u_int)rdtsc() & ~1u;
190202878Srdivacky		delta = (int)(count - prev_count) >> 1;
191202878Srdivacky		prev_count = count;
192202878Srdivacky		return (delta);
193202878Srdivacky	}
194202878Srdivacky#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
195202878Srdivacky	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
196202878Srdivacky		/*
197202878Srdivacky		 * XXX permon_read() should be inlined so that the
198202878Srdivacky		 * perfmon module doesn't need to be compiled with
199202878Srdivacky		 * profiling disabled and so that it is fast.
200193323Sed		 */
201193323Sed		perfmon_read(0, &event_count);
202193323Sed
203193323Sed		count = (u_int)event_count;
204193323Sed		delta = (int)(count - prev_count);
205193323Sed		prev_count = count;
206193323Sed		return (delta);
207193323Sed	}
208193323Sed#endif /* PERFMON && I586_PMC_GUPROF && !SMP */
209193323Sed#endif /* I586_CPU || I686_CPU */
210193323Sed
211193323Sed	/*
212193323Sed	 * Read the current value of the 8254 timer counter 0.
213193323Sed	 */
214193323Sed	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
215193323Sed	low = inb(TIMER_CNTR0);
216193323Sed	high = inb(TIMER_CNTR0);
217193323Sed	count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
218193323Sed
219193323Sed	/*
220193323Sed	 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
221193323Sed	 * While profiling is enabled, this routine is called at least twice
222193323Sed	 * per timer reset (for mcounting and mexitcounting hardclock()),
223193323Sed	 * so at most one reset has occurred since the last call, and one
224193323Sed	 * has occurred iff the current count is larger than the previous
225193323Sed	 * count.  This allows counter underflow to be detected faster
226193323Sed	 * than in microtime().
227193323Sed	 */
228193323Sed	delta = prev_count - count;
229193323Sed	prev_count = count;
230193323Sed	if ((int) delta <= 0)
231193323Sed		return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT));
232193323Sed	return (delta);
233193323Sed}
234193323Sed
235193323Sedstatic int
236193323Sedsysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS)
237193323Sed{
238193323Sed	int clock;
239193323Sed	int error;
240193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
241193323Sed	int event;
242193323Sed	struct pmc pmc;
243193323Sed#endif
244193323Sed
245193323Sed	clock = cputime_clock;
246193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
247193323Sed	if (clock == CPUTIME_CLOCK_I586_PMC) {
248193323Sed		pmc.pmc_val = cputime_clock_pmc_conf;
249193323Sed		clock += pmc.pmc_event;
250193323Sed	}
251193323Sed#endif
252193323Sed	error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req);
253193323Sed	if (error == 0 && req->newptr != NULL) {
254193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
255193323Sed		if (clock >= CPUTIME_CLOCK_I586_PMC) {
256193323Sed			event = clock - CPUTIME_CLOCK_I586_PMC;
257193323Sed			if (event >= 256)
258193323Sed				return (EINVAL);
259193323Sed			pmc.pmc_num = 0;
260193323Sed			pmc.pmc_event = event;
261193323Sed			pmc.pmc_unit = 0;
262193323Sed			pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR;
263193323Sed			pmc.pmc_mask = 0;
264193323Sed			cputime_clock_pmc_conf = pmc.pmc_val;
265193323Sed			cputime_clock = CPUTIME_CLOCK_I586_PMC;
266193323Sed		} else
267193323Sed#endif
268193323Sed		{
269193323Sed			if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC)
270193323Sed				return (EINVAL);
271193323Sed			cputime_clock = clock;
272193323Sed		}
273193323Sed	}
274193323Sed	return (error);
275193323Sed}
276193323Sed
277193323SedSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW,
278193323Sed	    0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", "");
279198090Srdivacky
280198090Srdivacky/*
281198090Srdivacky * The start and stop routines need not be here since we turn off profiling
282198090Srdivacky * before calling them.  They are here for convenience.
283198090Srdivacky */
284193323Sed
285193323Sedvoid
286193323Sedstartguprof(gp)
287193323Sed	struct gmonparam *gp;
288193323Sed{
289193323Sed	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
290193323Sed		cputime_clock = CPUTIME_CLOCK_I8254;
291193323Sed#if defined(I586_CPU) || defined(I686_CPU)
292193323Sed		if (tsc_freq != 0 && mp_ncpus == 1)
293193323Sed			cputime_clock = CPUTIME_CLOCK_TSC;
294193323Sed#endif
295193323Sed	}
296193323Sed	gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT;
297193323Sed#if defined(I586_CPU) || defined(I686_CPU)
298193323Sed	if (cputime_clock == CPUTIME_CLOCK_TSC) {
299193323Sed		gp->profrate = tsc_freq >> 1;
300193323Sed		cputime_prof_active = 1;
301193323Sed	}
302193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
303193323Sed	else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
304193323Sed		if (perfmon_avail() &&
305193323Sed		    perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
306193323Sed			if (perfmon_start(0) != 0)
307198090Srdivacky				perfmon_fini(0);
308193323Sed			else {
309193323Sed				/* XXX 1 event == 1 us. */
310193323Sed				gp->profrate = 1000000;
311193323Sed
312193323Sed				saved_gmp = *gp;
313193323Sed
314193323Sed				/* Zap overheads.  They are invalid. */
315193323Sed				gp->cputime_overhead = 0;
316193323Sed				gp->mcount_overhead = 0;
317193323Sed				gp->mcount_post_overhead = 0;
318193323Sed				gp->mcount_pre_overhead = 0;
319193323Sed				gp->mexitcount_overhead = 0;
320193323Sed				gp->mexitcount_post_overhead = 0;
321198090Srdivacky				gp->mexitcount_pre_overhead = 0;
322193323Sed
323193323Sed				cputime_clock_pmc_init = TRUE;
324193323Sed			}
325193323Sed		}
326193323Sed	}
327193323Sed#endif /* PERFMON && I586_PMC_GUPROF */
328193323Sed#endif /* I586_CPU || I686_CPU */
329193323Sed	cputime_bias = 0;
330193323Sed	cputime();
331193323Sed}
332193323Sed
333193323Sedvoid
334193323Sedstopguprof(gp)
335193323Sed	struct gmonparam *gp;
336193323Sed{
337193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF)
338198090Srdivacky	if (cputime_clock_pmc_init) {
339198090Srdivacky		*gp = saved_gmp;
340198090Srdivacky		perfmon_fini(0);
341198090Srdivacky		cputime_clock_pmc_init = FALSE;
342198090Srdivacky	}
343198090Srdivacky#endif
344198090Srdivacky#if defined(I586_CPU) || defined(I686_CPU)
345198090Srdivacky	if (cputime_clock == CPUTIME_CLOCK_TSC)
346198090Srdivacky		cputime_prof_active = 0;
347193323Sed#endif
348193323Sed}
349193323Sed
350193323Sed#if defined(I586_CPU) || defined(I686_CPU)
351193323Sed/* If the cpu frequency changed while profiling, report a warning. */
352193323Sedstatic void
353193323Sedtsc_freq_changed(void *arg, const struct cf_level *level, int status)
354193323Sed{
355193323Sed
356198090Srdivacky	/*
357202878Srdivacky	 * If there was an error during the transition or
358202878Srdivacky	 * TSC is P-state invariant, don't do anything.
359193323Sed	 */
360193323Sed	if (status != 0 || tsc_is_invariant)
361193323Sed		return;
362193323Sed	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
363193323Sed		printf("warning: cpu freq changed while profiling active\n");
364193323Sed}
365193323Sed
366193323SedEVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
367193323Sed    EVENTHANDLER_PRI_ANY);
368193323Sed#endif /* I586_CPU || I686_CPU */
369193323Sed
370193323Sed#endif /* GUPROF */
371193323Sed