prof_machdep.c revision 19269
119000Sbde/*- 219000Sbde * Copyright (c) 1996 Bruce D. Evans. 319000Sbde * All rights reserved. 415146Swollman * 519000Sbde * Redistribution and use in source and binary forms, with or without 619000Sbde * modification, are permitted provided that the following conditions 719000Sbde * are met: 819000Sbde * 1. Redistributions of source code must retain the above copyright 919000Sbde * notice, this list of conditions and the following disclaimer. 1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright 1119000Sbde * notice, this list of conditions and the following disclaimer in the 1219000Sbde * documentation and/or other materials provided with the distribution. 1319000Sbde * 1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1719000Sbde * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2419000Sbde * SUCH DAMAGE. 2519000Sbde * 2619269Sasami * $Id: prof_machdep.c,v 1.3 1996/10/17 19:32:10 bde Exp $ 2715146Swollman */ 2819000Sbde 2919000Sbde#ifdef GUPROF 3019000Sbde#include "opt_cpu.h" 3119000Sbde#include "opt_i586_guprof.h" 3219000Sbde#include "opt_perfmon.h" 3319000Sbde 3413107Sbde#include <sys/param.h> 3513107Sbde#include <sys/systm.h> 3619000Sbde#include <sys/gmon.h> 3719000Sbde 3813107Sbde#include <machine/clock.h> 3919000Sbde#include <machine/perfmon.h> 4019000Sbde#include <machine/profile.h> 4119000Sbde#endif 4219000Sbde 4319269Sasami#ifdef PC98 4419269Sasami#include <pc98/pc98/pc98.h> 4519269Sasami#else 4613107Sbde#include <i386/isa/isa.h> 4719269Sasami#endif 4813107Sbde#include <i386/isa/timerreg.h> 4913107Sbde 5013107Sbde#ifdef GUPROF 5119000Sbde#define CPUTIME_CLOCK_UNINITIALIZED 0 5219000Sbde#define CPUTIME_CLOCK_I8254 1 5319000Sbde#define CPUTIME_CLOCK_I586_CTR 2 5419000Sbde#define CPUTIME_CLOCK_I586_PMC 3 5519000Sbde#define CPUTIME_CLOCK_I8254_SHIFT 7 5619000Sbde 5719000Sbdeint cputime_bias = 1; /* initialize for locality of reference */ 5819000Sbde 5919000Sbdestatic int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 6019000Sbde#ifdef I586_PMC_GUPROF 6119000Sbdestatic u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 6219000Sbdestatic int cputime_clock_pmc_init; 6319000Sbdestatic struct gmonparam saved_gmp; 6413107Sbde#endif 6519000Sbde#endif /* GUPROF */ 6613107Sbde 6713107Sbde#ifdef __GNUC__ 6813107Sbdeasm(" 6913107SbdeGM_STATE = 0 7013107SbdeGMON_PROF_OFF = 3 7113107Sbde 7213107Sbde .text 7313107Sbde .align 4,0x90 7413107Sbde .globl __mcount 7513107Sbde__mcount: 7613107Sbde # 7713107Sbde # Check that we are profiling. Do it early for speed. 7813107Sbde # 7913107Sbde cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE 8013107Sbde je Lmcount_exit 8113107Sbde # 8213107Sbde # __mcount is the same as mcount except the caller hasn't changed 8313107Sbde # the stack except to call here, so the caller's raddr is above 8413107Sbde # our raddr. 8513107Sbde # 8613107Sbde movl 4(%esp),%edx 8713107Sbde jmp Lgot_frompc 8813107Sbde 8913107Sbde .align 4,0x90 9013107Sbde .globl mcount 9113107Sbdemcount: 9213107Sbde cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE 9313107Sbde je Lmcount_exit 9413107Sbde # 9513107Sbde # The caller's stack frame has already been built, so %ebp is 9613107Sbde # the caller's frame pointer. The caller's raddr is in the 9713107Sbde # caller's frame following the caller's caller's frame pointer. 9813107Sbde # 9913107Sbde movl 4(%ebp),%edx 10013107SbdeLgot_frompc: 10113107Sbde # 10213107Sbde # Our raddr is the caller's pc. 10313107Sbde # 10413107Sbde movl (%esp),%eax 10513107Sbde 10619000Sbde pushfl 10713107Sbde pushl %eax 10813107Sbde pushl %edx 10913107Sbde cli 11013107Sbde call _mcount 11113107Sbde addl $8,%esp 11219000Sbde popfl 11313107SbdeLmcount_exit: 11413107Sbde ret 11513107Sbde"); 11613107Sbde#else /* !__GNUC__ */ 11713107Sbde#error 11813107Sbde#endif /* __GNUC__ */ 11913107Sbde 12013107Sbde#ifdef GUPROF 12113107Sbde/* 12213107Sbde * mexitcount saves the return register(s), loads selfpc and calls 12313107Sbde * mexitcount(selfpc) to do the work. Someday it should be in a machine 12413107Sbde * dependent file together with cputime(), __mcount and mcount. cputime() 12513107Sbde * can't just be put in machdep.c because it has to be compiled without -pg. 12613107Sbde */ 12713107Sbde#ifdef __GNUC__ 12813107Sbdeasm(" 12913107Sbde .text 13013107Sbde# 13113107Sbde# Dummy label to be seen when gprof -u hides mexitcount. 13213107Sbde# 13313107Sbde .align 4,0x90 13413107Sbde .globl __mexitcount 13513107Sbde__mexitcount: 13613107Sbde nop 13713107Sbde 13813107SbdeGMON_PROF_HIRES = 4 13913107Sbde 14013107Sbde .align 4,0x90 14113107Sbde .globl mexitcount 14213107Sbdemexitcount: 14313107Sbde cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE 14413107Sbde jne Lmexitcount_exit 14513107Sbde pushl %edx 14613107Sbde pushl %eax 14713107Sbde movl 8(%esp),%eax 14819000Sbde pushfl 14913107Sbde pushl %eax 15013107Sbde cli 15113107Sbde call _mexitcount 15213107Sbde addl $4,%esp 15319000Sbde popfl 15413107Sbde popl %eax 15513107Sbde popl %edx 15613107SbdeLmexitcount_exit: 15713107Sbde ret 15813107Sbde"); 15913107Sbde#else /* !__GNUC__ */ 16013107Sbde#error 16113107Sbde#endif /* __GNUC__ */ 16213107Sbde 16313107Sbde/* 16413107Sbde * Return the time elapsed since the last call. The units are machine- 16513107Sbde * dependent. 16613107Sbde */ 16719000Sbdeint 16813107Sbdecputime() 16913107Sbde{ 17013107Sbde u_int count; 17119000Sbde int delta; 17219000Sbde#ifdef I586_PMC_GUPROF 17319000Sbde u_quad_t event_count; 17419000Sbde#endif 17519000Sbde u_char high, low; 17613107Sbde static u_int prev_count; 17713107Sbde 17819000Sbde#if defined(I586_CPU) || defined(I686_CPU) 17919000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_CTR) { 18019000Sbde count = (u_int)rdtsc(); 18119000Sbde delta = (int)(count - prev_count); 18219000Sbde prev_count = count; 18319000Sbde return (delta); 18419000Sbde } 18519000Sbde#ifdef I586_PMC_GUPROF 18619000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 18719000Sbde /* 18819000Sbde * XXX permon_read() should be inlined so that the 18919000Sbde * perfmon module doesn't need to be compiled with 19019000Sbde * profiling disabled and so that it is fast. 19119000Sbde */ 19219000Sbde perfmon_read(0, &event_count); 19319000Sbde 19419000Sbde count = (u_int)event_count; 19519000Sbde delta = (int)(count - prev_count); 19619000Sbde prev_count = count; 19719000Sbde return (delta); 19819000Sbde } 19919000Sbde#endif /* I586_PMC_GUPROF */ 20019000Sbde#endif /* I586_CPU or I686_CPU */ 20119000Sbde 20213107Sbde /* 20313107Sbde * Read the current value of the 8254 timer counter 0. 20413107Sbde */ 20513107Sbde outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 20613107Sbde low = inb(TIMER_CNTR0); 20719000Sbde high = inb(TIMER_CNTR0); 20819000Sbde count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 20913107Sbde 21013107Sbde /* 21113107Sbde * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 21213107Sbde * While profiling is enabled, this routine is called at least twice 21313107Sbde * per timer reset (for mcounting and mexitcounting hardclock()), 21413107Sbde * so at most one reset has occurred since the last call, and one 21513107Sbde * has occurred iff the current count is larger than the previous 21613107Sbde * count. This allows counter underflow to be detected faster 21713107Sbde * than in microtime(). 21813107Sbde */ 21913107Sbde delta = prev_count - count; 22013107Sbde prev_count = count; 22113107Sbde if ((int) delta <= 0) 22219000Sbde return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 22313107Sbde return (delta); 22413107Sbde} 22519000Sbde 22619000Sbde/* 22719000Sbde * The start and stop routines need not be here since we turn off profiling 22819000Sbde * before calling them. They are here for convenience. 22919000Sbde */ 23019000Sbde 23119000Sbdevoid 23219000Sbdestartguprof(gp) 23319000Sbde struct gmonparam *gp; 23419000Sbde{ 23519000Sbde if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 23619000Sbde cputime_clock = CPUTIME_CLOCK_I8254; 23719000Sbde#if defined(I586_CPU) || defined(I686_CPU) 23819000Sbde if (i586_ctr_freq != 0) 23919000Sbde cputime_clock = CPUTIME_CLOCK_I586_CTR; 24019000Sbde#endif 24119000Sbde } 24219000Sbde gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; 24319000Sbde#if defined(I586_CPU) || defined(I686_CPU) 24419000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_CTR) 24519000Sbde gp->profrate = i586_ctr_freq; 24619000Sbde#ifdef I586_PMC_GUPROF 24719000Sbde else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 24819000Sbde if (perfmon_avail() && 24919000Sbde perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 25019000Sbde if (perfmon_start(0) != 0) 25119000Sbde perfmon_fini(0); 25219000Sbde else { 25319000Sbde /* XXX 1 event == 1 us. */ 25419000Sbde gp->profrate = 1000000; 25519000Sbde 25619000Sbde saved_gmp = *gp; 25719000Sbde 25819000Sbde /* Zap overheads. They are invalid. */ 25919000Sbde gp->cputime_overhead = 0; 26019000Sbde gp->mcount_overhead = 0; 26119000Sbde gp->mcount_post_overhead = 0; 26219000Sbde gp->mcount_pre_overhead = 0; 26319000Sbde gp->mexitcount_overhead = 0; 26419000Sbde gp->mexitcount_post_overhead = 0; 26519000Sbde gp->mexitcount_pre_overhead = 0; 26619000Sbde 26719000Sbde cputime_clock_pmc_init = TRUE; 26819000Sbde } 26919000Sbde } 27019000Sbde } 27119000Sbde#endif /* I586_PMC_GUPROF */ 27219000Sbde#endif /* I586_CPU or I686_CPU */ 27319000Sbde cputime_bias = 0; 27419000Sbde cputime(); 27519000Sbde} 27619000Sbde 27719000Sbdevoid 27819000Sbdestopguprof(gp) 27919000Sbde struct gmonparam *gp; 28019000Sbde{ 28119000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 28219000Sbde if (cputime_clock_pmc_init) { 28319000Sbde *gp = saved_gmp; 28419000Sbde perfmon_fini(0); 28519000Sbde cputime_clock_pmc_init = FALSE; 28619000Sbde } 28719000Sbde#endif 28819000Sbde} 28919000Sbde 29019000Sbde#else /* !GUPROF */ 29113107Sbde#ifdef __GNUC__ 29213107Sbdeasm(" 29313107Sbde .text 29413107Sbde .align 4,0x90 29513107Sbde .globl mexitcount 29613107Sbdemexitcount: 29713107Sbde ret 29813107Sbde"); 29913107Sbde#else /* !__GNUC__ */ 30013107Sbde#error 30113107Sbde#endif /* __GNUC__ */ 30213107Sbde#endif /* GUPROF */ 303