119000Sbde/*- 219000Sbde * Copyright (c) 1996 Bruce D. Evans. 319000Sbde * All rights reserved. 415146Swollman * 519000Sbde * Redistribution and use in source and binary forms, with or without 619000Sbde * modification, are permitted provided that the following conditions 719000Sbde * are met: 819000Sbde * 1. Redistributions of source code must retain the above copyright 919000Sbde * notice, this list of conditions and the following disclaimer. 1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright 1119000Sbde * notice, this list of conditions and the following disclaimer in the 1219000Sbde * documentation and/or other materials provided with the distribution. 1319000Sbde * 1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1719000Sbde * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2419000Sbde * SUCH DAMAGE. 2515146Swollman */ 2619000Sbde 27115703Sobrien#include <sys/cdefs.h> 28115703Sobrien__FBSDID("$FreeBSD$"); 29115703Sobrien 3019000Sbde#ifdef GUPROF 31129625Sbde#if 0 3219000Sbde#include "opt_i586_guprof.h" 3319000Sbde#include "opt_perfmon.h" 34129625Sbde#endif 3519000Sbde 3613107Sbde#include <sys/param.h> 3713107Sbde#include <sys/systm.h> 38167905Snjl#include <sys/bus.h> 39167905Snjl#include <sys/cpu.h> 40167905Snjl#include <sys/eventhandler.h> 4119000Sbde#include <sys/gmon.h> 4231395Sbde#include <sys/kernel.h> 43163756Sbde#include <sys/smp.h> 4431395Sbde#include <sys/sysctl.h> 4519000Sbde 4613107Sbde#include <machine/clock.h> 47129625Sbde#if 0 4819000Sbde#include <machine/perfmon.h> 49129625Sbde#endif 50146211Snyan#include <machine/timerreg.h> 5146548Sbde 5219000Sbde#define CPUTIME_CLOCK_UNINITIALIZED 0 5319000Sbde#define CPUTIME_CLOCK_I8254 1 5432005Sphk#define CPUTIME_CLOCK_TSC 2 5519000Sbde#define CPUTIME_CLOCK_I586_PMC 3 5619000Sbde#define CPUTIME_CLOCK_I8254_SHIFT 7 5719000Sbde 5819000Sbdeint cputime_bias = 1; /* initialize for locality of reference */ 5919000Sbde 6019000Sbdestatic int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 61163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 6219000Sbdestatic u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 6319000Sbdestatic int cputime_clock_pmc_init; 6419000Sbdestatic struct gmonparam saved_gmp; 6513107Sbde#endif 66167905Snjlstatic int cputime_prof_active; 6719000Sbde#endif /* GUPROF */ 6813107Sbde 69143063Sjoerg#ifdef __GNUCLIKE_ASM 7035303Sbde__asm(" \n\ 7135303SbdeGM_STATE = 0 \n\ 7235303SbdeGMON_PROF_OFF = 3 \n\ 7335303Sbde \n\ 7435303Sbde .text \n\ 7550379Speter .p2align 4,0x90 \n\ 7635303Sbde .globl __mcount \n\ 7746548Sbde .type __mcount,@function \n\ 7835303Sbde__mcount: \n\ 7935303Sbde # \n\ 8035303Sbde # Check that we are profiling. Do it early for speed. \n\ 8135303Sbde # \n\ 82163726Sbde cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 83174066Sbde je .mcount_exit \n\ 84174066Sbde # \n\ 85174066Sbde # __mcount is the same as [.]mcount except the caller \n\ 86174066Sbde # hasn't changed the stack except to call here, so the \n\ 8735303Sbde # caller's raddr is above our raddr. \n\ 8835303Sbde # \n\ 89129625Sbde pushq %rax \n\ 90129625Sbde pushq %rdx \n\ 91129625Sbde pushq %rcx \n\ 92129625Sbde pushq %rsi \n\ 93129625Sbde pushq %rdi \n\ 94129625Sbde pushq %r8 \n\ 95129625Sbde pushq %r9 \n\ 96129625Sbde movq 7*8+8(%rsp),%rdi \n\ 97174066Sbde jmp .got_frompc \n\ 98174066Sbde \n\ 99174066Sbde .p2align 4,0x90 \n\ 100174066Sbde .globl .mcount \n\ 101163726Sbde.mcount: \n\ 102163726Sbde cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 10346548Sbde je .mcount_exit \n\ 10435303Sbde # \n\ 10535303Sbde # The caller's stack frame has already been built, so \n\ 106129625Sbde # %rbp is the caller's frame pointer. The caller's \n\ 10735303Sbde # raddr is in the caller's frame following the caller's \n\ 10835303Sbde # caller's frame pointer. \n\ 10935303Sbde # \n\ 110129625Sbde pushq %rax \n\ 111129625Sbde pushq %rdx \n\ 112129625Sbde pushq %rcx \n\ 113129625Sbde pushq %rsi \n\ 114129625Sbde pushq %rdi \n\ 115129625Sbde pushq %r8 \n\ 116129625Sbde pushq %r9 \n\ 117129625Sbde movq 8(%rbp),%rdi \n\ 11846548Sbde.got_frompc: \n\ 11935303Sbde # \n\ 12035303Sbde # Our raddr is the caller's pc. \n\ 12135303Sbde # \n\ 122129625Sbde movq 7*8(%rsp),%rsi \n\ 12335303Sbde \n\ 124129625Sbde pushfq \n\ 12535303Sbde cli \n\ 126163726Sbde call mcount \n\ 127129625Sbde popfq \n\ 128129625Sbde popq %r9 \n\ 129129625Sbde popq %r8 \n\ 130129625Sbde popq %rdi \n\ 131129625Sbde popq %rsi \n\ 132129625Sbde popq %rcx \n\ 133129625Sbde popq %rdx \n\ 134129625Sbde popq %rax \n\ 13546548Sbde.mcount_exit: \n\ 136174067Sbde ret $0 \n\ 13713107Sbde"); 138143063Sjoerg#else /* !__GNUCLIKE_ASM */ 139163727Sbde#error "this file needs to be ported to your compiler" 140143063Sjoerg#endif /* __GNUCLIKE_ASM */ 14113107Sbde 14213107Sbde#ifdef GUPROF 14313107Sbde/* 14446548Sbde * [.]mexitcount saves the return register(s), loads selfpc and calls 14513107Sbde * mexitcount(selfpc) to do the work. Someday it should be in a machine 14646548Sbde * dependent file together with cputime(), __mcount and [.]mcount. cputime() 14713107Sbde * can't just be put in machdep.c because it has to be compiled without -pg. 14813107Sbde */ 149143063Sjoerg#ifdef __GNUCLIKE_ASM 15035303Sbde__asm(" \n\ 15135303Sbde .text \n\ 15235303Sbde# \n\ 15346548Sbde# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ 15435303Sbde# \n\ 15550379Speter .p2align 4,0x90 \n\ 15635303Sbde .globl __mexitcount \n\ 15746548Sbde .type __mexitcount,@function \n\ 15835303Sbde__mexitcount: \n\ 15935303Sbde nop \n\ 16035303Sbde \n\ 16135303SbdeGMON_PROF_HIRES = 4 \n\ 16235303Sbde \n\ 16350379Speter .p2align 4,0x90 \n\ 164163726Sbde .globl .mexitcount \n\ 165163726Sbde.mexitcount: \n\ 166163726Sbde cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ 16746548Sbde jne .mexitcount_exit \n\ 168129625Sbde pushq %rax \n\ 169129625Sbde pushq %rdx \n\ 170129625Sbde pushq %rcx \n\ 171129625Sbde pushq %rsi \n\ 172129625Sbde pushq %rdi \n\ 173129625Sbde pushq %r8 \n\ 174129625Sbde pushq %r9 \n\ 175129625Sbde movq 7*8(%rsp),%rdi \n\ 176129625Sbde pushfq \n\ 17735303Sbde cli \n\ 178163726Sbde call mexitcount \n\ 179129625Sbde popfq \n\ 180129625Sbde popq %r9 \n\ 181129625Sbde popq %r8 \n\ 182129625Sbde popq %rdi \n\ 183129625Sbde popq %rsi \n\ 184129625Sbde popq %rcx \n\ 185129625Sbde popq %rdx \n\ 186129625Sbde popq %rax \n\ 18746548Sbde.mexitcount_exit: \n\ 188174067Sbde ret $0 \n\ 18913107Sbde"); 190143063Sjoerg#endif /* __GNUCLIKE_ASM */ 19113107Sbde 19213107Sbde/* 19313107Sbde * Return the time elapsed since the last call. The units are machine- 19413107Sbde * dependent. 19513107Sbde */ 19619000Sbdeint 19713107Sbdecputime() 19813107Sbde{ 19913107Sbde u_int count; 20019000Sbde int delta; 201220429Sjkim#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 20219000Sbde u_quad_t event_count; 20319000Sbde#endif 20419000Sbde u_char high, low; 20513107Sbde static u_int prev_count; 20613107Sbde 20732005Sphk if (cputime_clock == CPUTIME_CLOCK_TSC) { 208129744Sbde /* 209129744Sbde * Scale the TSC a little to make cputime()'s frequency 210129744Sbde * fit in an int, assuming that the TSC frequency fits 211129744Sbde * in a u_int. Use a fixed scale since dynamic scaling 212129744Sbde * would be slower and we can't really use the low bit 213129744Sbde * of precision. 214129744Sbde */ 215129744Sbde count = (u_int)rdtsc() & ~1u; 216129744Sbde delta = (int)(count - prev_count) >> 1; 21719000Sbde prev_count = count; 21819000Sbde return (delta); 21919000Sbde } 220163756Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 22119000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 22219000Sbde /* 22319000Sbde * XXX permon_read() should be inlined so that the 22419000Sbde * perfmon module doesn't need to be compiled with 22519000Sbde * profiling disabled and so that it is fast. 22619000Sbde */ 22719000Sbde perfmon_read(0, &event_count); 22819000Sbde 22919000Sbde count = (u_int)event_count; 23019000Sbde delta = (int)(count - prev_count); 23119000Sbde prev_count = count; 23219000Sbde return (delta); 23319000Sbde } 234163756Sbde#endif /* PERFMON && I586_PMC_GUPROF && !SMP */ 23519000Sbde 23613107Sbde /* 23713107Sbde * Read the current value of the 8254 timer counter 0. 23813107Sbde */ 23913107Sbde outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 24013107Sbde low = inb(TIMER_CNTR0); 24119000Sbde high = inb(TIMER_CNTR0); 24219000Sbde count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 24313107Sbde 24413107Sbde /* 24513107Sbde * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 24613107Sbde * While profiling is enabled, this routine is called at least twice 24713107Sbde * per timer reset (for mcounting and mexitcounting hardclock()), 24813107Sbde * so at most one reset has occurred since the last call, and one 24913107Sbde * has occurred iff the current count is larger than the previous 25013107Sbde * count. This allows counter underflow to be detected faster 25113107Sbde * than in microtime(). 25213107Sbde */ 25313107Sbde delta = prev_count - count; 25413107Sbde prev_count = count; 25513107Sbde if ((int) delta <= 0) 256177631Sphk return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 25713107Sbde return (delta); 25813107Sbde} 25919000Sbde 26031395Sbdestatic int 26162573Sphksysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) 26231395Sbde{ 26331395Sbde int clock; 26441794Sbde int error; 26541794Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 26631395Sbde int event; 26731395Sbde struct pmc pmc; 26841794Sbde#endif 26931395Sbde 27031395Sbde clock = cputime_clock; 27131395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 27231395Sbde if (clock == CPUTIME_CLOCK_I586_PMC) { 27331395Sbde pmc.pmc_val = cputime_clock_pmc_conf; 27431395Sbde clock += pmc.pmc_event; 27531395Sbde } 27631395Sbde#endif 27731395Sbde error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); 27831395Sbde if (error == 0 && req->newptr != NULL) { 27931395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 28031395Sbde if (clock >= CPUTIME_CLOCK_I586_PMC) { 28131395Sbde event = clock - CPUTIME_CLOCK_I586_PMC; 28231395Sbde if (event >= 256) 28331395Sbde return (EINVAL); 28431395Sbde pmc.pmc_num = 0; 28531395Sbde pmc.pmc_event = event; 28631395Sbde pmc.pmc_unit = 0; 28731395Sbde pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; 28831395Sbde pmc.pmc_mask = 0; 28931395Sbde cputime_clock_pmc_conf = pmc.pmc_val; 29031395Sbde cputime_clock = CPUTIME_CLOCK_I586_PMC; 29131395Sbde } else 29231395Sbde#endif 29331395Sbde { 29431395Sbde if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) 29531395Sbde return (EINVAL); 29631395Sbde cputime_clock = clock; 29731395Sbde } 29831395Sbde } 29931395Sbde return (error); 30031395Sbde} 30131395Sbde 30231395SbdeSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 30331395Sbde 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); 30431395Sbde 30519000Sbde/* 30619000Sbde * The start and stop routines need not be here since we turn off profiling 30719000Sbde * before calling them. They are here for convenience. 30819000Sbde */ 30919000Sbde 31019000Sbdevoid 31119000Sbdestartguprof(gp) 31219000Sbde struct gmonparam *gp; 31319000Sbde{ 314220433Sjkim uint64_t freq; 315220433Sjkim 316220433Sjkim freq = atomic_load_acq_64(&tsc_freq); 31719000Sbde if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 318220433Sjkim if (freq != 0 && mp_ncpus == 1) 31932005Sphk cputime_clock = CPUTIME_CLOCK_TSC; 320220433Sjkim else 321220433Sjkim cputime_clock = CPUTIME_CLOCK_I8254; 32219000Sbde } 323167905Snjl if (cputime_clock == CPUTIME_CLOCK_TSC) { 324220433Sjkim gp->profrate = freq >> 1; 325167905Snjl cputime_prof_active = 1; 326220433Sjkim } else 327220433Sjkim gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; 32831395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 329220433Sjkim if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 33019000Sbde if (perfmon_avail() && 33119000Sbde perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 33219000Sbde if (perfmon_start(0) != 0) 33319000Sbde perfmon_fini(0); 33419000Sbde else { 33519000Sbde /* XXX 1 event == 1 us. */ 33619000Sbde gp->profrate = 1000000; 33719000Sbde 33819000Sbde saved_gmp = *gp; 33919000Sbde 34019000Sbde /* Zap overheads. They are invalid. */ 34119000Sbde gp->cputime_overhead = 0; 34219000Sbde gp->mcount_overhead = 0; 34319000Sbde gp->mcount_post_overhead = 0; 34419000Sbde gp->mcount_pre_overhead = 0; 34519000Sbde gp->mexitcount_overhead = 0; 34619000Sbde gp->mexitcount_post_overhead = 0; 34719000Sbde gp->mexitcount_pre_overhead = 0; 34819000Sbde 34919000Sbde cputime_clock_pmc_init = TRUE; 35019000Sbde } 35119000Sbde } 35219000Sbde } 35331395Sbde#endif /* PERFMON && I586_PMC_GUPROF */ 35419000Sbde cputime_bias = 0; 35519000Sbde cputime(); 35619000Sbde} 35719000Sbde 35819000Sbdevoid 35919000Sbdestopguprof(gp) 36019000Sbde struct gmonparam *gp; 36119000Sbde{ 36219000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 36319000Sbde if (cputime_clock_pmc_init) { 36419000Sbde *gp = saved_gmp; 36519000Sbde perfmon_fini(0); 36619000Sbde cputime_clock_pmc_init = FALSE; 36719000Sbde } 36819000Sbde#endif 369167905Snjl if (cputime_clock == CPUTIME_CLOCK_TSC) 370167905Snjl cputime_prof_active = 0; 37119000Sbde} 372167905Snjl 373167905Snjl/* If the cpu frequency changed while profiling, report a warning. */ 374167905Snjlstatic void 375167905Snjltsc_freq_changed(void *arg, const struct cf_level *level, int status) 376167905Snjl{ 377167905Snjl 378184102Sjkim /* 379184102Sjkim * If there was an error during the transition or 380184102Sjkim * TSC is P-state invariant, don't do anything. 381184102Sjkim */ 382184102Sjkim if (status != 0 || tsc_is_invariant) 383167905Snjl return; 384167905Snjl if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) 385167905Snjl printf("warning: cpu freq changed while profiling active\n"); 386167905Snjl} 387167905Snjl 388167905SnjlEVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 389167905Snjl EVENTHANDLER_PRI_ANY); 390167905Snjl 39113107Sbde#endif /* GUPROF */ 392