prof_machdep.c revision 19269
119000Sbde/*-
219000Sbde * Copyright (c) 1996 Bruce D. Evans.
319000Sbde * All rights reserved.
415146Swollman *
519000Sbde * Redistribution and use in source and binary forms, with or without
619000Sbde * modification, are permitted provided that the following conditions
719000Sbde * are met:
819000Sbde * 1. Redistributions of source code must retain the above copyright
919000Sbde *    notice, this list of conditions and the following disclaimer.
1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright
1119000Sbde *    notice, this list of conditions and the following disclaimer in the
1219000Sbde *    documentation and/or other materials provided with the distribution.
1319000Sbde *
1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1719000Sbde * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2419000Sbde * SUCH DAMAGE.
2519000Sbde *
2619269Sasami *	$Id: prof_machdep.c,v 1.3 1996/10/17 19:32:10 bde Exp $
2715146Swollman */
2819000Sbde
2919000Sbde#ifdef GUPROF
3019000Sbde#include "opt_cpu.h"
3119000Sbde#include "opt_i586_guprof.h"
3219000Sbde#include "opt_perfmon.h"
3319000Sbde
3413107Sbde#include <sys/param.h>
3513107Sbde#include <sys/systm.h>
3619000Sbde#include <sys/gmon.h>
3719000Sbde
3813107Sbde#include <machine/clock.h>
3919000Sbde#include <machine/perfmon.h>
4019000Sbde#include <machine/profile.h>
4119000Sbde#endif
4219000Sbde
4319269Sasami#ifdef PC98
4419269Sasami#include <pc98/pc98/pc98.h>
4519269Sasami#else
4613107Sbde#include <i386/isa/isa.h>
4719269Sasami#endif
4813107Sbde#include <i386/isa/timerreg.h>
4913107Sbde
5013107Sbde#ifdef GUPROF
5119000Sbde#define	CPUTIME_CLOCK_UNINITIALIZED	0
5219000Sbde#define	CPUTIME_CLOCK_I8254		1
5319000Sbde#define	CPUTIME_CLOCK_I586_CTR		2
5419000Sbde#define	CPUTIME_CLOCK_I586_PMC		3
5519000Sbde#define	CPUTIME_CLOCK_I8254_SHIFT	7
5619000Sbde
5719000Sbdeint	cputime_bias = 1;	/* initialize for locality of reference */
5819000Sbde
5919000Sbdestatic int	cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
6019000Sbde#ifdef I586_PMC_GUPROF
6119000Sbdestatic u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
6219000Sbdestatic int	cputime_clock_pmc_init;
6319000Sbdestatic struct gmonparam saved_gmp;
6413107Sbde#endif
6519000Sbde#endif /* GUPROF */
6613107Sbde
6713107Sbde#ifdef __GNUC__
6813107Sbdeasm("
6913107SbdeGM_STATE	=	0
7013107SbdeGMON_PROF_OFF	=	3
7113107Sbde
7213107Sbde	.text
7313107Sbde	.align	4,0x90
7413107Sbde	.globl	__mcount
7513107Sbde__mcount:
7613107Sbde	#
7713107Sbde	# Check that we are profiling.  Do it early for speed.
7813107Sbde	#
7913107Sbde	cmpl	$GMON_PROF_OFF,__gmonparam+GM_STATE
8013107Sbde 	je	Lmcount_exit
8113107Sbde 	#
8213107Sbde 	# __mcount is the same as mcount except the caller hasn't changed
8313107Sbde 	# the stack except to call here, so the caller's raddr is above
8413107Sbde 	# our raddr.
8513107Sbde 	#
8613107Sbde 	movl	4(%esp),%edx
8713107Sbde 	jmp	Lgot_frompc
8813107Sbde
8913107Sbde 	.align	4,0x90
9013107Sbde 	.globl	mcount
9113107Sbdemcount:
9213107Sbde	cmpl	$GMON_PROF_OFF,__gmonparam+GM_STATE
9313107Sbde	je	Lmcount_exit
9413107Sbde	#
9513107Sbde	# The caller's stack frame has already been built, so %ebp is
9613107Sbde	# the caller's frame pointer.  The caller's raddr is in the
9713107Sbde	# caller's frame following the caller's caller's frame pointer.
9813107Sbde	#
9913107Sbde	movl	4(%ebp),%edx
10013107SbdeLgot_frompc:
10113107Sbde	#
10213107Sbde	# Our raddr is the caller's pc.
10313107Sbde	#
10413107Sbde	movl	(%esp),%eax
10513107Sbde
10619000Sbde	pushfl
10713107Sbde	pushl	%eax
10813107Sbde	pushl	%edx
10913107Sbde	cli
11013107Sbde	call	_mcount
11113107Sbde	addl	$8,%esp
11219000Sbde	popfl
11313107SbdeLmcount_exit:
11413107Sbde	ret
11513107Sbde");
11613107Sbde#else /* !__GNUC__ */
11713107Sbde#error
11813107Sbde#endif /* __GNUC__ */
11913107Sbde
12013107Sbde#ifdef GUPROF
12113107Sbde/*
12213107Sbde * mexitcount saves the return register(s), loads selfpc and calls
12313107Sbde * mexitcount(selfpc) to do the work.  Someday it should be in a machine
12413107Sbde * dependent file together with cputime(), __mcount and mcount.  cputime()
12513107Sbde * can't just be put in machdep.c because it has to be compiled without -pg.
12613107Sbde */
12713107Sbde#ifdef __GNUC__
12813107Sbdeasm("
12913107Sbde	.text
13013107Sbde#
13113107Sbde# Dummy label to be seen when gprof -u hides mexitcount.
13213107Sbde#
13313107Sbde	.align	4,0x90
13413107Sbde	.globl	__mexitcount
13513107Sbde__mexitcount:
13613107Sbde	nop
13713107Sbde
13813107SbdeGMON_PROF_HIRES	=	4
13913107Sbde
14013107Sbde	.align	4,0x90
14113107Sbde	.globl	mexitcount
14213107Sbdemexitcount:
14313107Sbde	cmpl	$GMON_PROF_HIRES,__gmonparam+GM_STATE
14413107Sbde	jne	Lmexitcount_exit
14513107Sbde	pushl	%edx
14613107Sbde	pushl	%eax
14713107Sbde	movl	8(%esp),%eax
14819000Sbde	pushfl
14913107Sbde	pushl	%eax
15013107Sbde	cli
15113107Sbde	call	_mexitcount
15213107Sbde	addl	$4,%esp
15319000Sbde	popfl
15413107Sbde	popl	%eax
15513107Sbde	popl	%edx
15613107SbdeLmexitcount_exit:
15713107Sbde	ret
15813107Sbde");
15913107Sbde#else /* !__GNUC__ */
16013107Sbde#error
16113107Sbde#endif /* __GNUC__ */
16213107Sbde
16313107Sbde/*
16413107Sbde * Return the time elapsed since the last call.  The units are machine-
16513107Sbde * dependent.
16613107Sbde */
16719000Sbdeint
16813107Sbdecputime()
16913107Sbde{
17013107Sbde	u_int count;
17119000Sbde	int delta;
17219000Sbde#ifdef I586_PMC_GUPROF
17319000Sbde	u_quad_t event_count;
17419000Sbde#endif
17519000Sbde	u_char high, low;
17613107Sbde	static u_int prev_count;
17713107Sbde
17819000Sbde#if defined(I586_CPU) || defined(I686_CPU)
17919000Sbde	if (cputime_clock == CPUTIME_CLOCK_I586_CTR) {
18019000Sbde		count = (u_int)rdtsc();
18119000Sbde		delta = (int)(count - prev_count);
18219000Sbde		prev_count = count;
18319000Sbde		return (delta);
18419000Sbde	}
18519000Sbde#ifdef I586_PMC_GUPROF
18619000Sbde	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
18719000Sbde		/*
18819000Sbde		 * XXX permon_read() should be inlined so that the
18919000Sbde		 * perfmon module doesn't need to be compiled with
19019000Sbde		 * profiling disabled and so that it is fast.
19119000Sbde		 */
19219000Sbde		perfmon_read(0, &event_count);
19319000Sbde
19419000Sbde		count = (u_int)event_count;
19519000Sbde		delta = (int)(count - prev_count);
19619000Sbde		prev_count = count;
19719000Sbde		return (delta);
19819000Sbde	}
19919000Sbde#endif /* I586_PMC_GUPROF */
20019000Sbde#endif /* I586_CPU or I686_CPU */
20119000Sbde
20213107Sbde	/*
20313107Sbde	 * Read the current value of the 8254 timer counter 0.
20413107Sbde	 */
20513107Sbde	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
20613107Sbde	low = inb(TIMER_CNTR0);
20719000Sbde	high = inb(TIMER_CNTR0);
20819000Sbde	count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
20913107Sbde
21013107Sbde	/*
21113107Sbde	 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
21213107Sbde	 * While profiling is enabled, this routine is called at least twice
21313107Sbde	 * per timer reset (for mcounting and mexitcounting hardclock()),
21413107Sbde	 * so at most one reset has occurred since the last call, and one
21513107Sbde	 * has occurred iff the current count is larger than the previous
21613107Sbde	 * count.  This allows counter underflow to be detected faster
21713107Sbde	 * than in microtime().
21813107Sbde	 */
21913107Sbde	delta = prev_count - count;
22013107Sbde	prev_count = count;
22113107Sbde	if ((int) delta <= 0)
22219000Sbde		return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT));
22313107Sbde	return (delta);
22413107Sbde}
22519000Sbde
22619000Sbde/*
22719000Sbde * The start and stop routines need not be here since we turn off profiling
22819000Sbde * before calling them.  They are here for convenience.
22919000Sbde */
23019000Sbde
23119000Sbdevoid
23219000Sbdestartguprof(gp)
23319000Sbde	struct gmonparam *gp;
23419000Sbde{
23519000Sbde	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
23619000Sbde		cputime_clock = CPUTIME_CLOCK_I8254;
23719000Sbde#if defined(I586_CPU) || defined(I686_CPU)
23819000Sbde		if (i586_ctr_freq != 0)
23919000Sbde			cputime_clock = CPUTIME_CLOCK_I586_CTR;
24019000Sbde#endif
24119000Sbde	}
24219000Sbde	gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
24319000Sbde#if defined(I586_CPU) || defined(I686_CPU)
24419000Sbde	if (cputime_clock == CPUTIME_CLOCK_I586_CTR)
24519000Sbde		gp->profrate = i586_ctr_freq;
24619000Sbde#ifdef I586_PMC_GUPROF
24719000Sbde	else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
24819000Sbde		if (perfmon_avail() &&
24919000Sbde		    perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
25019000Sbde			if (perfmon_start(0) != 0)
25119000Sbde				perfmon_fini(0);
25219000Sbde			else {
25319000Sbde				/* XXX 1 event == 1 us. */
25419000Sbde				gp->profrate = 1000000;
25519000Sbde
25619000Sbde				saved_gmp = *gp;
25719000Sbde
25819000Sbde				/* Zap overheads.  They are invalid. */
25919000Sbde				gp->cputime_overhead = 0;
26019000Sbde				gp->mcount_overhead = 0;
26119000Sbde				gp->mcount_post_overhead = 0;
26219000Sbde				gp->mcount_pre_overhead = 0;
26319000Sbde				gp->mexitcount_overhead = 0;
26419000Sbde				gp->mexitcount_post_overhead = 0;
26519000Sbde				gp->mexitcount_pre_overhead = 0;
26619000Sbde
26719000Sbde				cputime_clock_pmc_init = TRUE;
26819000Sbde			}
26919000Sbde		}
27019000Sbde	}
27119000Sbde#endif /* I586_PMC_GUPROF */
27219000Sbde#endif /* I586_CPU or I686_CPU */
27319000Sbde	cputime_bias = 0;
27419000Sbde	cputime();
27519000Sbde}
27619000Sbde
27719000Sbdevoid
27819000Sbdestopguprof(gp)
27919000Sbde	struct gmonparam *gp;
28019000Sbde{
28119000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF)
28219000Sbde	if (cputime_clock_pmc_init) {
28319000Sbde		*gp = saved_gmp;
28419000Sbde		perfmon_fini(0);
28519000Sbde		cputime_clock_pmc_init = FALSE;
28619000Sbde	}
28719000Sbde#endif
28819000Sbde}
28919000Sbde
29019000Sbde#else /* !GUPROF */
29113107Sbde#ifdef __GNUC__
29213107Sbdeasm("
29313107Sbde	.text
29413107Sbde	.align	4,0x90
29513107Sbde	.globl	mexitcount
29613107Sbdemexitcount:
29713107Sbde	ret
29813107Sbde");
29913107Sbde#else /* !__GNUC__ */
30013107Sbde#error
30113107Sbde#endif /* __GNUC__ */
30213107Sbde#endif /* GUPROF */
303