119000Sbde/*-
219000Sbde * Copyright (c) 1996 Bruce D. Evans.
319000Sbde * All rights reserved.
415146Swollman *
519000Sbde * Redistribution and use in source and binary forms, with or without
619000Sbde * modification, are permitted provided that the following conditions
719000Sbde * are met:
819000Sbde * 1. Redistributions of source code must retain the above copyright
919000Sbde *    notice, this list of conditions and the following disclaimer.
1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright
1119000Sbde *    notice, this list of conditions and the following disclaimer in the
1219000Sbde *    documentation and/or other materials provided with the distribution.
1319000Sbde *
1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1719000Sbde * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2419000Sbde * SUCH DAMAGE.
2515146Swollman */
2619000Sbde
27115703Sobrien#include <sys/cdefs.h>
28115703Sobrien__FBSDID("$FreeBSD$");
29115703Sobrien
3019000Sbde#ifdef GUPROF
3119000Sbde#include "opt_i586_guprof.h"
3219000Sbde#include "opt_perfmon.h"
3319000Sbde
34163727Sbde#include <sys/param.h>
35163727Sbde#include <sys/systm.h>
36167905Snjl#include <sys/bus.h>
37167905Snjl#include <sys/cpu.h>
38167905Snjl#include <sys/eventhandler.h>
3919000Sbde#include <sys/gmon.h>
4031395Sbde#include <sys/kernel.h>
41163756Sbde#include <sys/smp.h>
4231395Sbde#include <sys/sysctl.h>
4319000Sbde
4413107Sbde#include <machine/clock.h>
4519000Sbde#include <machine/perfmon.h>
46163727Sbde#include <machine/timerreg.h>
4719000Sbde
4819000Sbde#define	CPUTIME_CLOCK_UNINITIALIZED	0
4919000Sbde#define	CPUTIME_CLOCK_I8254		1
5032005Sphk#define	CPUTIME_CLOCK_TSC		2
5119000Sbde#define	CPUTIME_CLOCK_I586_PMC		3
5219000Sbde#define	CPUTIME_CLOCK_I8254_SHIFT	7
5319000Sbde
5419000Sbdeint	cputime_bias = 1;	/* initialize for locality of reference */
5519000Sbde
5619000Sbdestatic int	cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
57163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
5819000Sbdestatic u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
5919000Sbdestatic int	cputime_clock_pmc_init;
6019000Sbdestatic struct gmonparam saved_gmp;
6113107Sbde#endif
62167905Snjl#if defined(I586_CPU) || defined(I686_CPU)
63167905Snjlstatic int	cputime_prof_active;
64167905Snjl#endif
6519000Sbde#endif /* GUPROF */
6613107Sbde
67143063Sjoerg#ifdef __GNUCLIKE_ASM
6835303Sbde__asm("								\n\
6935303SbdeGM_STATE	=	0					\n\
7035303SbdeGMON_PROF_OFF	=	3					\n\
7135303Sbde								\n\
7235303Sbde	.text							\n\
7350379Speter	.p2align 4,0x90						\n\
7435303Sbde	.globl	__mcount					\n\
7546548Sbde	.type	__mcount,@function				\n\
7635303Sbde__mcount:							\n\
7735303Sbde	#							\n\
7835303Sbde	# Check that we are profiling.  Do it early for speed.	\n\
7935303Sbde	#							\n\
80163726Sbde	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
81167905Snjl	je	.mcount_exit					\n\
82167905Snjl	#							\n\
83167905Snjl	# __mcount is the same as [.]mcount except the caller	\n\
84167905Snjl	# hasn't changed the stack except to call here, so the	\n\
8535303Sbde	# caller's raddr is above our raddr.			\n\
8635303Sbde	#							\n\
87167905Snjl	movl	4(%esp),%edx					\n\
88167905Snjl	jmp	.got_frompc					\n\
89167905Snjl								\n\
90167905Snjl	.p2align 4,0x90						\n\
91167905Snjl	.globl	.mcount						\n\
92163726Sbde.mcount:							\n\
93163726Sbde	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
9446548Sbde	je	.mcount_exit					\n\
9535303Sbde	#							\n\
9635303Sbde	# The caller's stack frame has already been built, so	\n\
9735303Sbde	# %ebp is the caller's frame pointer.  The caller's	\n\
9835303Sbde	# raddr is in the caller's frame following the caller's	\n\
9935303Sbde	# caller's frame pointer.				\n\
10035303Sbde	#							\n\
10135303Sbde	movl	4(%ebp),%edx					\n\
10246548Sbde.got_frompc:							\n\
10335303Sbde	#							\n\
10435303Sbde	# Our raddr is the caller's pc.				\n\
10535303Sbde	#							\n\
10635303Sbde	movl	(%esp),%eax					\n\
10735303Sbde								\n\
10835303Sbde	pushfl							\n\
10935303Sbde	pushl	%eax						\n\
11035303Sbde	pushl	%edx						\n\
11135303Sbde	cli							\n\
112163726Sbde	call	mcount						\n\
11335303Sbde	addl	$8,%esp						\n\
11435303Sbde	popfl							\n\
11546548Sbde.mcount_exit:							\n\
116174067Sbde	ret	$0						\n\
11713107Sbde");
118143063Sjoerg#else /* !__GNUCLIKE_ASM */
119163727Sbde#error "this file needs to be ported to your compiler"
120143063Sjoerg#endif /* __GNUCLIKE_ASM */
12113107Sbde
12213107Sbde#ifdef GUPROF
12313107Sbde/*
12446548Sbde * [.]mexitcount saves the return register(s), loads selfpc and calls
12513107Sbde * mexitcount(selfpc) to do the work.  Someday it should be in a machine
12646548Sbde * dependent file together with cputime(), __mcount and [.]mcount.  cputime()
12713107Sbde * can't just be put in machdep.c because it has to be compiled without -pg.
12813107Sbde */
129143063Sjoerg#ifdef __GNUCLIKE_ASM
13035303Sbde__asm("								\n\
13135303Sbde	.text							\n\
13235303Sbde#								\n\
13346548Sbde# Dummy label to be seen when gprof -u hides [.]mexitcount.	\n\
13435303Sbde#								\n\
13550379Speter	.p2align 4,0x90						\n\
13635303Sbde	.globl	__mexitcount					\n\
13746548Sbde	.type	__mexitcount,@function				\n\
13835303Sbde__mexitcount:							\n\
13935303Sbde	nop							\n\
14035303Sbde								\n\
14135303SbdeGMON_PROF_HIRES	=	4					\n\
14235303Sbde								\n\
14350379Speter	.p2align 4,0x90						\n\
144163726Sbde	.globl	.mexitcount					\n\
145163726Sbde.mexitcount:							\n\
146163726Sbde	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
14746548Sbde	jne	.mexitcount_exit				\n\
14835303Sbde	pushl	%edx						\n\
14935303Sbde	pushl	%eax						\n\
15035303Sbde	movl	8(%esp),%eax					\n\
15135303Sbde	pushfl							\n\
15235303Sbde	pushl	%eax						\n\
15335303Sbde	cli							\n\
154163726Sbde	call	mexitcount					\n\
15535303Sbde	addl	$4,%esp						\n\
15635303Sbde	popfl							\n\
15735303Sbde	popl	%eax						\n\
15835303Sbde	popl	%edx						\n\
15946548Sbde.mexitcount_exit:						\n\
160174067Sbde	ret	$0						\n\
16113107Sbde");
162143063Sjoerg#endif /* __GNUCLIKE_ASM */
16313107Sbde
16413107Sbde/*
16513107Sbde * Return the time elapsed since the last call.  The units are machine-
16613107Sbde * dependent.
16713107Sbde */
16819000Sbdeint
16913107Sbdecputime()
17013107Sbde{
17113107Sbde	u_int count;
17219000Sbde	int delta;
173220433Sjkim#if (defined(I586_CPU) || defined(I686_CPU)) && \
174220433Sjkim    defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
17519000Sbde	u_quad_t event_count;
17619000Sbde#endif
17719000Sbde	u_char high, low;
17813107Sbde	static u_int prev_count;
17913107Sbde
180163756Sbde#if defined(I586_CPU) || defined(I686_CPU)
18132005Sphk	if (cputime_clock == CPUTIME_CLOCK_TSC) {
182129744Sbde		/*
183129744Sbde		 * Scale the TSC a little to make cputime()'s frequency
184129744Sbde		 * fit in an int, assuming that the TSC frequency fits
185129744Sbde		 * in a u_int.  Use a fixed scale since dynamic scaling
186129744Sbde		 * would be slower and we can't really use the low bit
187129744Sbde		 * of precision.
188129744Sbde		 */
189129744Sbde		count = (u_int)rdtsc() & ~1u;
190129744Sbde		delta = (int)(count - prev_count) >> 1;
19119000Sbde		prev_count = count;
19219000Sbde		return (delta);
19319000Sbde	}
194163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
19519000Sbde	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
19619000Sbde		/*
19719000Sbde		 * XXX permon_read() should be inlined so that the
19819000Sbde		 * perfmon module doesn't need to be compiled with
19919000Sbde		 * profiling disabled and so that it is fast.
20019000Sbde		 */
20119000Sbde		perfmon_read(0, &event_count);
20219000Sbde
20319000Sbde		count = (u_int)event_count;
20419000Sbde		delta = (int)(count - prev_count);
20519000Sbde		prev_count = count;
20619000Sbde		return (delta);
20719000Sbde	}
208163756Sbde#endif /* PERFMON && I586_PMC_GUPROF && !SMP */
209163756Sbde#endif /* I586_CPU || I686_CPU */
21019000Sbde
21113107Sbde	/*
21213107Sbde	 * Read the current value of the 8254 timer counter 0.
21313107Sbde	 */
21413107Sbde	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
21513107Sbde	low = inb(TIMER_CNTR0);
21619000Sbde	high = inb(TIMER_CNTR0);
21719000Sbde	count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
21813107Sbde
21913107Sbde	/*
22013107Sbde	 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
22113107Sbde	 * While profiling is enabled, this routine is called at least twice
22213107Sbde	 * per timer reset (for mcounting and mexitcounting hardclock()),
22313107Sbde	 * so at most one reset has occurred since the last call, and one
22413107Sbde	 * has occurred iff the current count is larger than the previous
22513107Sbde	 * count.  This allows counter underflow to be detected faster
22613107Sbde	 * than in microtime().
22713107Sbde	 */
22813107Sbde	delta = prev_count - count;
22913107Sbde	prev_count = count;
23013107Sbde	if ((int) delta <= 0)
231177631Sphk		return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT));
23213107Sbde	return (delta);
23313107Sbde}
23419000Sbde
23531395Sbdestatic int
23662573Sphksysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS)
23731395Sbde{
23831395Sbde	int clock;
23941794Sbde	int error;
24041794Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
24131395Sbde	int event;
24231395Sbde	struct pmc pmc;
24341794Sbde#endif
24431395Sbde
24531395Sbde	clock = cputime_clock;
24631395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
24731395Sbde	if (clock == CPUTIME_CLOCK_I586_PMC) {
24831395Sbde		pmc.pmc_val = cputime_clock_pmc_conf;
24931395Sbde		clock += pmc.pmc_event;
25031395Sbde	}
25131395Sbde#endif
25231395Sbde	error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req);
25331395Sbde	if (error == 0 && req->newptr != NULL) {
25431395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
25531395Sbde		if (clock >= CPUTIME_CLOCK_I586_PMC) {
25631395Sbde			event = clock - CPUTIME_CLOCK_I586_PMC;
25731395Sbde			if (event >= 256)
25831395Sbde				return (EINVAL);
25931395Sbde			pmc.pmc_num = 0;
26031395Sbde			pmc.pmc_event = event;
26131395Sbde			pmc.pmc_unit = 0;
26231395Sbde			pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR;
26331395Sbde			pmc.pmc_mask = 0;
26431395Sbde			cputime_clock_pmc_conf = pmc.pmc_val;
26531395Sbde			cputime_clock = CPUTIME_CLOCK_I586_PMC;
26631395Sbde		} else
26731395Sbde#endif
26831395Sbde		{
26931395Sbde			if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC)
27031395Sbde				return (EINVAL);
27131395Sbde			cputime_clock = clock;
27231395Sbde		}
27331395Sbde	}
27431395Sbde	return (error);
27531395Sbde}
27631395Sbde
27731395SbdeSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW,
27831395Sbde	    0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", "");
27931395Sbde
28019000Sbde/*
28119000Sbde * The start and stop routines need not be here since we turn off profiling
28219000Sbde * before calling them.  They are here for convenience.
28319000Sbde */
28419000Sbde
28519000Sbdevoid
28619000Sbdestartguprof(gp)
28719000Sbde	struct gmonparam *gp;
28819000Sbde{
289220433Sjkim#if defined(I586_CPU) || defined(I686_CPU)
290220433Sjkim	uint64_t freq;
291220433Sjkim
292220433Sjkim	freq = atomic_load_acq_64(&tsc_freq);
29319000Sbde	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
294220433Sjkim		if (freq != 0 && mp_ncpus == 1)
29532005Sphk			cputime_clock = CPUTIME_CLOCK_TSC;
296220433Sjkim		else
297220433Sjkim			cputime_clock = CPUTIME_CLOCK_I8254;
29819000Sbde	}
299167905Snjl	if (cputime_clock == CPUTIME_CLOCK_TSC) {
300220433Sjkim		gp->profrate = freq >> 1;
301167905Snjl		cputime_prof_active = 1;
302220433Sjkim	} else
303220433Sjkim		gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT;
30431395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
305220433Sjkim	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
30619000Sbde		if (perfmon_avail() &&
30719000Sbde		    perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
30819000Sbde			if (perfmon_start(0) != 0)
30919000Sbde				perfmon_fini(0);
31019000Sbde			else {
31119000Sbde				/* XXX 1 event == 1 us. */
31219000Sbde				gp->profrate = 1000000;
31319000Sbde
31419000Sbde				saved_gmp = *gp;
31519000Sbde
31619000Sbde				/* Zap overheads.  They are invalid. */
31719000Sbde				gp->cputime_overhead = 0;
31819000Sbde				gp->mcount_overhead = 0;
31919000Sbde				gp->mcount_post_overhead = 0;
32019000Sbde				gp->mcount_pre_overhead = 0;
32119000Sbde				gp->mexitcount_overhead = 0;
32219000Sbde				gp->mexitcount_post_overhead = 0;
32319000Sbde				gp->mexitcount_pre_overhead = 0;
32419000Sbde
32519000Sbde				cputime_clock_pmc_init = TRUE;
32619000Sbde			}
32719000Sbde		}
32819000Sbde	}
32931395Sbde#endif /* PERFMON && I586_PMC_GUPROF */
330220433Sjkim#else /* !(I586_CPU || I686_CPU) */
331220433Sjkim	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED)
332220433Sjkim		cputime_clock = CPUTIME_CLOCK_I8254;
333220433Sjkim	gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT;
334163756Sbde#endif /* I586_CPU || I686_CPU */
33519000Sbde	cputime_bias = 0;
33619000Sbde	cputime();
33719000Sbde}
33819000Sbde
33919000Sbdevoid
34019000Sbdestopguprof(gp)
34119000Sbde	struct gmonparam *gp;
34219000Sbde{
34319000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
34419000Sbde	if (cputime_clock_pmc_init) {
34519000Sbde		*gp = saved_gmp;
34619000Sbde		perfmon_fini(0);
34719000Sbde		cputime_clock_pmc_init = FALSE;
34819000Sbde	}
34919000Sbde#endif
350167905Snjl#if defined(I586_CPU) || defined(I686_CPU)
351167905Snjl	if (cputime_clock == CPUTIME_CLOCK_TSC)
352167905Snjl		cputime_prof_active = 0;
353167905Snjl#endif
35419000Sbde}
355167905Snjl
356167905Snjl#if defined(I586_CPU) || defined(I686_CPU)
357167905Snjl/* If the cpu frequency changed while profiling, report a warning. */
358167905Snjlstatic void
359167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status)
360167905Snjl{
361167905Snjl
362184102Sjkim	/*
363184102Sjkim	 * If there was an error during the transition or
364184102Sjkim	 * TSC is P-state invariant, don't do anything.
365184102Sjkim	 */
366184102Sjkim	if (status != 0 || tsc_is_invariant)
367167905Snjl		return;
368167905Snjl	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
369167905Snjl		printf("warning: cpu freq changed while profiling active\n");
370167905Snjl}
371167905Snjl
372167905SnjlEVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
373167905Snjl    EVENTHANDLER_PRI_ANY);
374167905Snjl#endif /* I586_CPU || I686_CPU */
375167905Snjl
37613107Sbde#endif /* GUPROF */
377