prof_machdep.c revision 219461
1193323Sed/*- 2193323Sed * Copyright (c) 1996 Bruce D. Evans. 3193323Sed * All rights reserved. 4193323Sed * 5193323Sed * Redistribution and use in source and binary forms, with or without 6193323Sed * modification, are permitted provided that the following conditions 7193323Sed * are met: 8193323Sed * 1. Redistributions of source code must retain the above copyright 9193323Sed * notice, this list of conditions and the following disclaimer. 10193323Sed * 2. Redistributions in binary form must reproduce the above copyright 11193323Sed * notice, this list of conditions and the following disclaimer in the 12193323Sed * documentation and/or other materials provided with the distribution. 13193323Sed * 14193323Sed * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15193323Sed * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16193323Sed * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17198090Srdivacky * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18193323Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19193323Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20193323Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21193323Sed * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22198090Srdivacky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23202878Srdivacky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24202878Srdivacky * SUCH DAMAGE. 25202878Srdivacky */ 26202878Srdivacky 27193323Sed#include <sys/cdefs.h> 28193323Sed__FBSDID("$FreeBSD: head/sys/i386/isa/prof_machdep.c 219461 2011-03-10 20:02:58Z jkim $"); 29193323Sed 30193323Sed#ifdef GUPROF 31202878Srdivacky#include "opt_i586_guprof.h" 32202878Srdivacky#include "opt_perfmon.h" 33193323Sed 34193323Sed#include <sys/param.h> 35193323Sed#include <sys/systm.h> 36193323Sed#include <sys/bus.h> 37193323Sed#include <sys/cpu.h> 38193323Sed#include <sys/eventhandler.h> 39193323Sed#include <sys/gmon.h> 40193323Sed#include <sys/kernel.h> 41193323Sed#include <sys/smp.h> 42193323Sed#include <sys/sysctl.h> 43193323Sed 44193323Sed#include <machine/clock.h> 45193323Sed#include <machine/perfmon.h> 46193323Sed#include <machine/timerreg.h> 47193323Sed 48193323Sed#define CPUTIME_CLOCK_UNINITIALIZED 0 49193323Sed#define CPUTIME_CLOCK_I8254 1 50193323Sed#define CPUTIME_CLOCK_TSC 2 51193323Sed#define CPUTIME_CLOCK_I586_PMC 3 52193323Sed#define CPUTIME_CLOCK_I8254_SHIFT 7 53193323Sed 54193323Sedint cputime_bias = 1; /* initialize for locality of reference */ 55193323Sed 56193323Sedstatic int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 57193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 58193323Sedstatic u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 59193323Sedstatic int cputime_clock_pmc_init; 60193323Sedstatic struct gmonparam saved_gmp; 61193323Sed#endif 62193323Sed#if defined(I586_CPU) || defined(I686_CPU) 63193323Sedstatic int cputime_prof_active; 64193323Sed#endif 65193323Sed#endif /* GUPROF */ 66193323Sed 67193323Sed#ifdef __GNUCLIKE_ASM 68193323Sed__asm(" \n\ 69193323SedGM_STATE = 0 \n\ 70193323SedGMON_PROF_OFF = 3 \n\ 71193323Sed \n\ 72193323Sed .text \n\ 73193323Sed .p2align 4,0x90 \n\ 74193323Sed .globl __mcount \n\ 75193323Sed .type __mcount,@function \n\ 76193323Sed__mcount: \n\ 77193323Sed # \n\ 78193323Sed # Check that we are profiling. Do it early for speed. \n\ 79193323Sed # \n\ 80193323Sed cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 81193323Sed je .mcount_exit \n\ 82193323Sed # \n\ 83193323Sed # __mcount is the same as [.]mcount except the caller \n\ 84202878Srdivacky # hasn't changed the stack except to call here, so the \n\ 85202878Srdivacky # caller's raddr is above our raddr. \n\ 86202878Srdivacky # \n\ 87202878Srdivacky movl 4(%esp),%edx \n\ 88202878Srdivacky jmp .got_frompc \n\ 89202878Srdivacky \n\ 90202878Srdivacky .p2align 4,0x90 \n\ 91202878Srdivacky .globl .mcount \n\ 92202878Srdivacky.mcount: \n\ 93202878Srdivacky cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ 94202878Srdivacky je .mcount_exit \n\ 95202878Srdivacky # \n\ 96202878Srdivacky # The caller's stack frame has already been built, so \n\ 97202878Srdivacky # %ebp is the caller's frame pointer. The caller's \n\ 98202878Srdivacky # raddr is in the caller's frame following the caller's \n\ 99202878Srdivacky # caller's frame pointer. \n\ 100202878Srdivacky # \n\ 101202878Srdivacky movl 4(%ebp),%edx \n\ 102202878Srdivacky.got_frompc: \n\ 103202878Srdivacky # \n\ 104202878Srdivacky # Our raddr is the caller's pc. \n\ 105202878Srdivacky # \n\ 106202878Srdivacky movl (%esp),%eax \n\ 107202878Srdivacky \n\ 108202878Srdivacky pushfl \n\ 109202878Srdivacky pushl %eax \n\ 110202878Srdivacky pushl %edx \n\ 111202878Srdivacky cli \n\ 112202878Srdivacky call mcount \n\ 113202878Srdivacky addl $8,%esp \n\ 114202878Srdivacky popfl \n\ 115202878Srdivacky.mcount_exit: \n\ 116202878Srdivacky ret $0 \n\ 117202878Srdivacky"); 118202878Srdivacky#else /* !__GNUCLIKE_ASM */ 119202878Srdivacky#error "this file needs to be ported to your compiler" 120202878Srdivacky#endif /* __GNUCLIKE_ASM */ 121202878Srdivacky 122202878Srdivacky#ifdef GUPROF 123202878Srdivacky/* 124202878Srdivacky * [.]mexitcount saves the return register(s), loads selfpc and calls 125202878Srdivacky * mexitcount(selfpc) to do the work. Someday it should be in a machine 126202878Srdivacky * dependent file together with cputime(), __mcount and [.]mcount. cputime() 127202878Srdivacky * can't just be put in machdep.c because it has to be compiled without -pg. 128202878Srdivacky */ 129202878Srdivacky#ifdef __GNUCLIKE_ASM 130202878Srdivacky__asm(" \n\ 131202878Srdivacky .text \n\ 132202878Srdivacky# \n\ 133202878Srdivacky# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ 134202878Srdivacky# \n\ 135202878Srdivacky .p2align 4,0x90 \n\ 136202878Srdivacky .globl __mexitcount \n\ 137202878Srdivacky .type __mexitcount,@function \n\ 138202878Srdivacky__mexitcount: \n\ 139202878Srdivacky nop \n\ 140202878Srdivacky \n\ 141202878SrdivackyGMON_PROF_HIRES = 4 \n\ 142202878Srdivacky \n\ 143202878Srdivacky .p2align 4,0x90 \n\ 144202878Srdivacky .globl .mexitcount \n\ 145202878Srdivacky.mexitcount: \n\ 146202878Srdivacky cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ 147202878Srdivacky jne .mexitcount_exit \n\ 148202878Srdivacky pushl %edx \n\ 149202878Srdivacky pushl %eax \n\ 150202878Srdivacky movl 8(%esp),%eax \n\ 151202878Srdivacky pushfl \n\ 152202878Srdivacky pushl %eax \n\ 153202878Srdivacky cli \n\ 154202878Srdivacky call mexitcount \n\ 155202878Srdivacky addl $4,%esp \n\ 156202878Srdivacky popfl \n\ 157202878Srdivacky popl %eax \n\ 158202878Srdivacky popl %edx \n\ 159202878Srdivacky.mexitcount_exit: \n\ 160202878Srdivacky ret $0 \n\ 161202878Srdivacky"); 162202878Srdivacky#endif /* __GNUCLIKE_ASM */ 163202878Srdivacky 164202878Srdivacky/* 165202878Srdivacky * Return the time elapsed since the last call. The units are machine- 166202878Srdivacky * dependent. 167202878Srdivacky */ 168202878Srdivackyint 169202878Srdivackycputime() 170202878Srdivacky{ 171202878Srdivacky u_int count; 172202878Srdivacky int delta; 173202878Srdivacky#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ 174202878Srdivacky defined(PERFMON) && defined(I586_PMC_GUPROF) 175202878Srdivacky u_quad_t event_count; 176202878Srdivacky#endif 177202878Srdivacky u_char high, low; 178202878Srdivacky static u_int prev_count; 179202878Srdivacky 180202878Srdivacky#if defined(I586_CPU) || defined(I686_CPU) 181202878Srdivacky if (cputime_clock == CPUTIME_CLOCK_TSC) { 182202878Srdivacky /* 183202878Srdivacky * Scale the TSC a little to make cputime()'s frequency 184202878Srdivacky * fit in an int, assuming that the TSC frequency fits 185202878Srdivacky * in a u_int. Use a fixed scale since dynamic scaling 186202878Srdivacky * would be slower and we can't really use the low bit 187202878Srdivacky * of precision. 188202878Srdivacky */ 189202878Srdivacky count = (u_int)rdtsc() & ~1u; 190202878Srdivacky delta = (int)(count - prev_count) >> 1; 191202878Srdivacky prev_count = count; 192202878Srdivacky return (delta); 193202878Srdivacky } 194202878Srdivacky#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP) 195202878Srdivacky if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 196202878Srdivacky /* 197202878Srdivacky * XXX permon_read() should be inlined so that the 198202878Srdivacky * perfmon module doesn't need to be compiled with 199202878Srdivacky * profiling disabled and so that it is fast. 200193323Sed */ 201193323Sed perfmon_read(0, &event_count); 202193323Sed 203193323Sed count = (u_int)event_count; 204193323Sed delta = (int)(count - prev_count); 205193323Sed prev_count = count; 206193323Sed return (delta); 207193323Sed } 208193323Sed#endif /* PERFMON && I586_PMC_GUPROF && !SMP */ 209193323Sed#endif /* I586_CPU || I686_CPU */ 210193323Sed 211193323Sed /* 212193323Sed * Read the current value of the 8254 timer counter 0. 213193323Sed */ 214193323Sed outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 215193323Sed low = inb(TIMER_CNTR0); 216193323Sed high = inb(TIMER_CNTR0); 217193323Sed count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 218193323Sed 219193323Sed /* 220193323Sed * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 221193323Sed * While profiling is enabled, this routine is called at least twice 222193323Sed * per timer reset (for mcounting and mexitcounting hardclock()), 223193323Sed * so at most one reset has occurred since the last call, and one 224193323Sed * has occurred iff the current count is larger than the previous 225193323Sed * count. This allows counter underflow to be detected faster 226193323Sed * than in microtime(). 227193323Sed */ 228193323Sed delta = prev_count - count; 229193323Sed prev_count = count; 230193323Sed if ((int) delta <= 0) 231193323Sed return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 232193323Sed return (delta); 233193323Sed} 234193323Sed 235193323Sedstatic int 236193323Sedsysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) 237193323Sed{ 238193323Sed int clock; 239193323Sed int error; 240193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 241193323Sed int event; 242193323Sed struct pmc pmc; 243193323Sed#endif 244193323Sed 245193323Sed clock = cputime_clock; 246193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 247193323Sed if (clock == CPUTIME_CLOCK_I586_PMC) { 248193323Sed pmc.pmc_val = cputime_clock_pmc_conf; 249193323Sed clock += pmc.pmc_event; 250193323Sed } 251193323Sed#endif 252193323Sed error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); 253193323Sed if (error == 0 && req->newptr != NULL) { 254193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 255193323Sed if (clock >= CPUTIME_CLOCK_I586_PMC) { 256193323Sed event = clock - CPUTIME_CLOCK_I586_PMC; 257193323Sed if (event >= 256) 258193323Sed return (EINVAL); 259193323Sed pmc.pmc_num = 0; 260193323Sed pmc.pmc_event = event; 261193323Sed pmc.pmc_unit = 0; 262193323Sed pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; 263193323Sed pmc.pmc_mask = 0; 264193323Sed cputime_clock_pmc_conf = pmc.pmc_val; 265193323Sed cputime_clock = CPUTIME_CLOCK_I586_PMC; 266193323Sed } else 267193323Sed#endif 268193323Sed { 269193323Sed if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) 270193323Sed return (EINVAL); 271193323Sed cputime_clock = clock; 272193323Sed } 273193323Sed } 274193323Sed return (error); 275193323Sed} 276193323Sed 277193323SedSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 278193323Sed 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); 279198090Srdivacky 280198090Srdivacky/* 281198090Srdivacky * The start and stop routines need not be here since we turn off profiling 282198090Srdivacky * before calling them. They are here for convenience. 283198090Srdivacky */ 284193323Sed 285193323Sedvoid 286193323Sedstartguprof(gp) 287193323Sed struct gmonparam *gp; 288193323Sed{ 289193323Sed if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 290193323Sed cputime_clock = CPUTIME_CLOCK_I8254; 291193323Sed#if defined(I586_CPU) || defined(I686_CPU) 292193323Sed if (tsc_freq != 0 && mp_ncpus == 1) 293193323Sed cputime_clock = CPUTIME_CLOCK_TSC; 294193323Sed#endif 295193323Sed } 296193323Sed gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT; 297193323Sed#if defined(I586_CPU) || defined(I686_CPU) 298193323Sed if (cputime_clock == CPUTIME_CLOCK_TSC) { 299193323Sed gp->profrate = tsc_freq >> 1; 300193323Sed cputime_prof_active = 1; 301193323Sed } 302193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 303193323Sed else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 304193323Sed if (perfmon_avail() && 305193323Sed perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 306193323Sed if (perfmon_start(0) != 0) 307198090Srdivacky perfmon_fini(0); 308193323Sed else { 309193323Sed /* XXX 1 event == 1 us. */ 310193323Sed gp->profrate = 1000000; 311193323Sed 312193323Sed saved_gmp = *gp; 313193323Sed 314193323Sed /* Zap overheads. They are invalid. */ 315193323Sed gp->cputime_overhead = 0; 316193323Sed gp->mcount_overhead = 0; 317193323Sed gp->mcount_post_overhead = 0; 318193323Sed gp->mcount_pre_overhead = 0; 319193323Sed gp->mexitcount_overhead = 0; 320193323Sed gp->mexitcount_post_overhead = 0; 321198090Srdivacky gp->mexitcount_pre_overhead = 0; 322193323Sed 323193323Sed cputime_clock_pmc_init = TRUE; 324193323Sed } 325193323Sed } 326193323Sed } 327193323Sed#endif /* PERFMON && I586_PMC_GUPROF */ 328193323Sed#endif /* I586_CPU || I686_CPU */ 329193323Sed cputime_bias = 0; 330193323Sed cputime(); 331193323Sed} 332193323Sed 333193323Sedvoid 334193323Sedstopguprof(gp) 335193323Sed struct gmonparam *gp; 336193323Sed{ 337193323Sed#if defined(PERFMON) && defined(I586_PMC_GUPROF) 338198090Srdivacky if (cputime_clock_pmc_init) { 339198090Srdivacky *gp = saved_gmp; 340198090Srdivacky perfmon_fini(0); 341198090Srdivacky cputime_clock_pmc_init = FALSE; 342198090Srdivacky } 343198090Srdivacky#endif 344198090Srdivacky#if defined(I586_CPU) || defined(I686_CPU) 345198090Srdivacky if (cputime_clock == CPUTIME_CLOCK_TSC) 346198090Srdivacky cputime_prof_active = 0; 347193323Sed#endif 348193323Sed} 349193323Sed 350193323Sed#if defined(I586_CPU) || defined(I686_CPU) 351193323Sed/* If the cpu frequency changed while profiling, report a warning. */ 352193323Sedstatic void 353193323Sedtsc_freq_changed(void *arg, const struct cf_level *level, int status) 354193323Sed{ 355193323Sed 356198090Srdivacky /* 357202878Srdivacky * If there was an error during the transition or 358202878Srdivacky * TSC is P-state invariant, don't do anything. 359193323Sed */ 360193323Sed if (status != 0 || tsc_is_invariant) 361193323Sed return; 362193323Sed if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) 363193323Sed printf("warning: cpu freq changed while profiling active\n"); 364193323Sed} 365193323Sed 366193323SedEVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 367193323Sed EVENTHANDLER_PRI_ANY); 368193323Sed#endif /* I586_CPU || I686_CPU */ 369193323Sed 370193323Sed#endif /* GUPROF */ 371193323Sed