119000Sbde/*- 219000Sbde * Copyright (c) 1996 Bruce D. Evans. 319000Sbde * All rights reserved. 415146Swollman * 519000Sbde * Redistribution and use in source and binary forms, with or without 619000Sbde * modification, are permitted provided that the following conditions 719000Sbde * are met: 819000Sbde * 1. Redistributions of source code must retain the above copyright 919000Sbde * notice, this list of conditions and the following disclaimer. 1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright 1119000Sbde * notice, this list of conditions and the following disclaimer in the 1219000Sbde * documentation and/or other materials provided with the distribution. 1319000Sbde * 1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1719000Sbde * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2419000Sbde * SUCH DAMAGE. 2515146Swollman */ 2619000Sbde 27115703Sobrien#include <sys/cdefs.h> 28115703Sobrien__FBSDID("$FreeBSD$"); 29115703Sobrien 3019000Sbde#ifdef GUPROF 3119000Sbde#include "opt_i586_guprof.h" 3219000Sbde#include "opt_perfmon.h" 3319000Sbde 34163727Sbde#include <sys/param.h> 35163727Sbde#include <sys/systm.h> 36167905Snjl#include <sys/bus.h> 37167905Snjl#include <sys/cpu.h> 38167905Snjl#include <sys/eventhandler.h> 3919000Sbde#include <sys/gmon.h> 4031395Sbde#include <sys/kernel.h> 41163756Sbde#include <sys/smp.h> 4231395Sbde#include <sys/sysctl.h> 4319000Sbde 4413107Sbde#include <machine/clock.h> 4519000Sbde#include <machine/perfmon.h> 46163727Sbde#include <machine/timerreg.h> 4719000Sbde 4819000Sbde#define CPUTIME_CLOCK_UNINITIALIZED 0 4919000Sbde#define CPUTIME_CLOCK_I8254 1 5032005Sphk#define CPUTIME_CLOCK_TSC 2 5119000Sbde#define CPUTIME_CLOCK_I586_PMC 3 5219000Sbde#define CPUTIME_CLOCK_I8254_SHIFT 7 5319000Sbde 5419000Sbdeint cputime_bias = 1; /* initialize for locality of reference */ 5519000Sbde 5619000Sbdestatic int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 57163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 5819000Sbdestatic u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 5919000Sbdestatic int cputime_clock_pmc_init; 6019000Sbdestatic struct gmonparam saved_gmp; 6113107Sbde#endif 62167905Snjl#if defined(I586_CPU) || defined(I686_CPU) 63167905Snjlstatic int cputime_prof_active; 64167905Snjl#endif 6519000Sbde#endif /* GUPROF */ 6613107Sbde 67143063Sjoerg#ifdef __GNUCLIKE_ASM 6835303Sbde__asm(" \n\ 6935303SbdeGM_STATE = 0 \n\ 7035303SbdeGMON_PROF_OFF = 3 \n\ 7135303Sbde \n\ 7235303Sbde .text \n\ 7350379Speter .p2align 4,0x90 \n\ 7435303Sbde .globl __mcount \n\ 7546548Sbde .type __mcount,@function \n\ 7635303Sbde__mcount: \n\ 7735303Sbde # \n\ 7835303Sbde # Check that we are profiling. Do it early for speed. \n\ 7935303Sbde # \n\ 80163726Sbde cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 81167905Snjl je .mcount_exit \n\ 82167905Snjl # \n\ 83167905Snjl # __mcount is the same as [.]mcount except the caller \n\ 84167905Snjl # hasn't changed the stack except to call here, so the \n\ 8535303Sbde # caller's raddr is above our raddr. \n\ 8635303Sbde # \n\ 87167905Snjl movl 4(%esp),%edx \n\ 88167905Snjl jmp .got_frompc \n\ 89167905Snjl \n\ 90167905Snjl .p2align 4,0x90 \n\ 91167905Snjl .globl .mcount \n\ 92163726Sbde.mcount: \n\ 93163726Sbde cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 9446548Sbde je .mcount_exit \n\ 9535303Sbde # \n\ 9635303Sbde # The caller's stack frame has already been built, so \n\ 9735303Sbde # %ebp is the caller's frame pointer. The caller's \n\ 9835303Sbde # raddr is in the caller's frame following the caller's \n\ 9935303Sbde # caller's frame pointer. \n\ 10035303Sbde # \n\ 10135303Sbde movl 4(%ebp),%edx \n\ 10246548Sbde.got_frompc: \n\ 10335303Sbde # \n\ 10435303Sbde # Our raddr is the caller's pc. \n\ 10535303Sbde # \n\ 10635303Sbde movl (%esp),%eax \n\ 10735303Sbde \n\ 10835303Sbde pushfl \n\ 10935303Sbde pushl %eax \n\ 11035303Sbde pushl %edx \n\ 11135303Sbde cli \n\ 112163726Sbde call mcount \n\ 11335303Sbde addl $8,%esp \n\ 11435303Sbde popfl \n\ 11546548Sbde.mcount_exit: \n\ 116174067Sbde ret $0 \n\ 11713107Sbde"); 118143063Sjoerg#else /* !__GNUCLIKE_ASM */ 119163727Sbde#error "this file needs to be ported to your compiler" 120143063Sjoerg#endif /* __GNUCLIKE_ASM */ 12113107Sbde 12213107Sbde#ifdef GUPROF 12313107Sbde/* 12446548Sbde * [.]mexitcount saves the return register(s), loads selfpc and calls 12513107Sbde * mexitcount(selfpc) to do the work. Someday it should be in a machine 12646548Sbde * dependent file together with cputime(), __mcount and [.]mcount. cputime() 12713107Sbde * can't just be put in machdep.c because it has to be compiled without -pg. 12813107Sbde */ 129143063Sjoerg#ifdef __GNUCLIKE_ASM 13035303Sbde__asm(" \n\ 13135303Sbde .text \n\ 13235303Sbde# \n\ 13346548Sbde# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ 13435303Sbde# \n\ 13550379Speter .p2align 4,0x90 \n\ 13635303Sbde .globl __mexitcount \n\ 13746548Sbde .type __mexitcount,@function \n\ 13835303Sbde__mexitcount: \n\ 13935303Sbde nop \n\ 14035303Sbde \n\ 14135303SbdeGMON_PROF_HIRES = 4 \n\ 14235303Sbde \n\ 14350379Speter .p2align 4,0x90 \n\ 144163726Sbde .globl .mexitcount \n\ 145163726Sbde.mexitcount: \n\ 146163726Sbde cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ 14746548Sbde jne .mexitcount_exit \n\ 14835303Sbde pushl %edx \n\ 14935303Sbde pushl %eax \n\ 15035303Sbde movl 8(%esp),%eax \n\ 15135303Sbde pushfl \n\ 15235303Sbde pushl %eax \n\ 15335303Sbde cli \n\ 154163726Sbde call mexitcount \n\ 15535303Sbde addl $4,%esp \n\ 15635303Sbde popfl \n\ 15735303Sbde popl %eax \n\ 15835303Sbde popl %edx \n\ 15946548Sbde.mexitcount_exit: \n\ 160174067Sbde ret $0 \n\ 16113107Sbde"); 162143063Sjoerg#endif /* __GNUCLIKE_ASM */ 16313107Sbde 16413107Sbde/* 16513107Sbde * Return the time elapsed since the last call. The units are machine- 16613107Sbde * dependent. 16713107Sbde */ 16819000Sbdeint 16913107Sbdecputime() 17013107Sbde{ 17113107Sbde u_int count; 17219000Sbde int delta; 173220433Sjkim#if (defined(I586_CPU) || defined(I686_CPU)) && \ 174220433Sjkim defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 17519000Sbde u_quad_t event_count; 17619000Sbde#endif 17719000Sbde u_char high, low; 17813107Sbde static u_int prev_count; 17913107Sbde 180163756Sbde#if defined(I586_CPU) || defined(I686_CPU) 18132005Sphk if (cputime_clock == CPUTIME_CLOCK_TSC) { 182129744Sbde /* 183129744Sbde * Scale the TSC a little to make cputime()'s frequency 184129744Sbde * fit in an int, assuming that the TSC frequency fits 185129744Sbde * in a u_int. Use a fixed scale since dynamic scaling 186129744Sbde * would be slower and we can't really use the low bit 187129744Sbde * of precision. 188129744Sbde */ 189129744Sbde count = (u_int)rdtsc() & ~1u; 190129744Sbde delta = (int)(count - prev_count) >> 1; 19119000Sbde prev_count = count; 19219000Sbde return (delta); 19319000Sbde } 194163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 19519000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 19619000Sbde /* 19719000Sbde * XXX permon_read() should be inlined so that the 19819000Sbde * perfmon module doesn't need to be compiled with 19919000Sbde * profiling disabled and so that it is fast. 20019000Sbde */ 20119000Sbde perfmon_read(0, &event_count); 20219000Sbde 20319000Sbde count = (u_int)event_count; 20419000Sbde delta = (int)(count - prev_count); 20519000Sbde prev_count = count; 20619000Sbde return (delta); 20719000Sbde } 208163756Sbde#endif /* PERFMON && I586_PMC_GUPROF && !SMP */ 209163756Sbde#endif /* I586_CPU || I686_CPU */ 21019000Sbde 21113107Sbde /* 21213107Sbde * Read the current value of the 8254 timer counter 0. 21313107Sbde */ 21413107Sbde outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 21513107Sbde low = inb(TIMER_CNTR0); 21619000Sbde high = inb(TIMER_CNTR0); 21719000Sbde count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 21813107Sbde 21913107Sbde /* 22013107Sbde * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 22113107Sbde * While profiling is enabled, this routine is called at least twice 22213107Sbde * per timer reset (for mcounting and mexitcounting hardclock()), 22313107Sbde * so at most one reset has occurred since the last call, and one 22413107Sbde * has occurred iff the current count is larger than the previous 22513107Sbde * count. This allows counter underflow to be detected faster 22613107Sbde * than in microtime(). 22713107Sbde */ 22813107Sbde delta = prev_count - count; 22913107Sbde prev_count = count; 23013107Sbde if ((int) delta <= 0) 231177631Sphk return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 23213107Sbde return (delta); 23313107Sbde} 23419000Sbde 23531395Sbdestatic int 23662573Sphksysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) 23731395Sbde{ 23831395Sbde int clock; 23941794Sbde int error; 24041794Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 24131395Sbde int event; 24231395Sbde struct pmc pmc; 24341794Sbde#endif 24431395Sbde 24531395Sbde clock = cputime_clock; 24631395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 24731395Sbde if (clock == CPUTIME_CLOCK_I586_PMC) { 24831395Sbde pmc.pmc_val = cputime_clock_pmc_conf; 24931395Sbde clock += pmc.pmc_event; 25031395Sbde } 25131395Sbde#endif 25231395Sbde error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); 25331395Sbde if (error == 0 && req->newptr != NULL) { 25431395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 25531395Sbde if (clock >= CPUTIME_CLOCK_I586_PMC) { 25631395Sbde event = clock - CPUTIME_CLOCK_I586_PMC; 25731395Sbde if (event >= 256) 25831395Sbde return (EINVAL); 25931395Sbde pmc.pmc_num = 0; 26031395Sbde pmc.pmc_event = event; 26131395Sbde pmc.pmc_unit = 0; 26231395Sbde pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; 26331395Sbde pmc.pmc_mask = 0; 26431395Sbde cputime_clock_pmc_conf = pmc.pmc_val; 26531395Sbde cputime_clock = CPUTIME_CLOCK_I586_PMC; 26631395Sbde } else 26731395Sbde#endif 26831395Sbde { 26931395Sbde if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) 27031395Sbde return (EINVAL); 27131395Sbde cputime_clock = clock; 27231395Sbde } 27331395Sbde } 27431395Sbde return (error); 27531395Sbde} 27631395Sbde 27731395SbdeSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 27831395Sbde 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); 27931395Sbde 28019000Sbde/* 28119000Sbde * The start and stop routines need not be here since we turn off profiling 28219000Sbde * before calling them. They are here for convenience. 28319000Sbde */ 28419000Sbde 28519000Sbdevoid 28619000Sbdestartguprof(gp) 28719000Sbde struct gmonparam *gp; 28819000Sbde{ 289220433Sjkim#if defined(I586_CPU) || defined(I686_CPU) 290220433Sjkim uint64_t freq; 291220433Sjkim 292220433Sjkim freq = atomic_load_acq_64(&tsc_freq); 29319000Sbde if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 294220433Sjkim if (freq != 0 && mp_ncpus == 1) 29532005Sphk cputime_clock = CPUTIME_CLOCK_TSC; 296220433Sjkim else 297220433Sjkim cputime_clock = CPUTIME_CLOCK_I8254; 29819000Sbde } 299167905Snjl if (cputime_clock == CPUTIME_CLOCK_TSC) { 300220433Sjkim gp->profrate = freq >> 1; 301167905Snjl cputime_prof_active = 1; 302220433Sjkim } else 303220433Sjkim gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; 30431395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 305220433Sjkim if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 30619000Sbde if (perfmon_avail() && 30719000Sbde perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 30819000Sbde if (perfmon_start(0) != 0) 30919000Sbde perfmon_fini(0); 31019000Sbde else { 31119000Sbde /* XXX 1 event == 1 us. */ 31219000Sbde gp->profrate = 1000000; 31319000Sbde 31419000Sbde saved_gmp = *gp; 31519000Sbde 31619000Sbde /* Zap overheads. They are invalid. */ 31719000Sbde gp->cputime_overhead = 0; 31819000Sbde gp->mcount_overhead = 0; 31919000Sbde gp->mcount_post_overhead = 0; 32019000Sbde gp->mcount_pre_overhead = 0; 32119000Sbde gp->mexitcount_overhead = 0; 32219000Sbde gp->mexitcount_post_overhead = 0; 32319000Sbde gp->mexitcount_pre_overhead = 0; 32419000Sbde 32519000Sbde cputime_clock_pmc_init = TRUE; 32619000Sbde } 32719000Sbde } 32819000Sbde } 32931395Sbde#endif /* PERFMON && I586_PMC_GUPROF */ 330220433Sjkim#else /* !(I586_CPU || I686_CPU) */ 331220433Sjkim if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) 332220433Sjkim cputime_clock = CPUTIME_CLOCK_I8254; 333220433Sjkim gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; 334163756Sbde#endif /* I586_CPU || I686_CPU */ 33519000Sbde cputime_bias = 0; 33619000Sbde cputime(); 33719000Sbde} 33819000Sbde 33919000Sbdevoid 34019000Sbdestopguprof(gp) 34119000Sbde struct gmonparam *gp; 34219000Sbde{ 34319000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 34419000Sbde if (cputime_clock_pmc_init) { 34519000Sbde *gp = saved_gmp; 34619000Sbde perfmon_fini(0); 34719000Sbde cputime_clock_pmc_init = FALSE; 34819000Sbde } 34919000Sbde#endif 350167905Snjl#if defined(I586_CPU) || defined(I686_CPU) 351167905Snjl if (cputime_clock == CPUTIME_CLOCK_TSC) 352167905Snjl cputime_prof_active = 0; 353167905Snjl#endif 35419000Sbde} 355167905Snjl 356167905Snjl#if defined(I586_CPU) || defined(I686_CPU) 357167905Snjl/* If the cpu frequency changed while profiling, report a warning. */ 358167905Snjlstatic void 359167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status) 360167905Snjl{ 361167905Snjl 362184102Sjkim /* 363184102Sjkim * If there was an error during the transition or 364184102Sjkim * TSC is P-state invariant, don't do anything. 365184102Sjkim */ 366184102Sjkim if (status != 0 || tsc_is_invariant) 367167905Snjl return; 368167905Snjl if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) 369167905Snjl printf("warning: cpu freq changed while profiling active\n"); 370167905Snjl} 371167905Snjl 372167905SnjlEVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 373167905Snjl EVENTHANDLER_PRI_ANY); 374167905Snjl#endif /* I586_CPU || I686_CPU */ 375167905Snjl 37613107Sbde#endif /* GUPROF */ 377