prof_machdep.c revision 143063
119000Sbde/*- 219000Sbde * Copyright (c) 1996 Bruce D. Evans. 319000Sbde * All rights reserved. 415146Swollman * 519000Sbde * Redistribution and use in source and binary forms, with or without 619000Sbde * modification, are permitted provided that the following conditions 719000Sbde * are met: 819000Sbde * 1. Redistributions of source code must retain the above copyright 919000Sbde * notice, this list of conditions and the following disclaimer. 1019000Sbde * 2. Redistributions in binary form must reproduce the above copyright 1119000Sbde * notice, this list of conditions and the following disclaimer in the 1219000Sbde * documentation and/or other materials provided with the distribution. 1319000Sbde * 1419000Sbde * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1519000Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1619000Sbde * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1719000Sbde * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1819000Sbde * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1919000Sbde * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2019000Sbde * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2119000Sbde * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2219000Sbde * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2319000Sbde * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2419000Sbde * SUCH DAMAGE. 2515146Swollman */ 2619000Sbde 27115703Sobrien#include <sys/cdefs.h> 28115703Sobrien__FBSDID("$FreeBSD: head/sys/amd64/amd64/prof_machdep.c 143063 2005-03-02 21:33:29Z joerg $"); 29115703Sobrien 3019000Sbde#ifdef GUPROF 31129625Sbde#if 0 3219000Sbde#include "opt_i586_guprof.h" 3319000Sbde#include "opt_perfmon.h" 34129625Sbde#endif 3519000Sbde 3613107Sbde#include <sys/param.h> 3713107Sbde#include <sys/systm.h> 3819000Sbde#include <sys/gmon.h> 3931395Sbde#include <sys/kernel.h> 4031395Sbde#include <sys/sysctl.h> 4119000Sbde 4213107Sbde#include <machine/clock.h> 43129625Sbde#if 0 4419000Sbde#include <machine/perfmon.h> 45129625Sbde#endif 4619000Sbde#include <machine/profile.h> 4746548Sbde#undef MCOUNT 4819000Sbde#endif 4919000Sbde 5046548Sbde#include <machine/asmacros.h> 5146548Sbde 5213107Sbde#include <i386/isa/isa.h> 5313107Sbde#include <i386/isa/timerreg.h> 5413107Sbde 5513107Sbde#ifdef GUPROF 5619000Sbde#define CPUTIME_CLOCK_UNINITIALIZED 0 5719000Sbde#define CPUTIME_CLOCK_I8254 1 5832005Sphk#define CPUTIME_CLOCK_TSC 2 5919000Sbde#define CPUTIME_CLOCK_I586_PMC 3 6019000Sbde#define CPUTIME_CLOCK_I8254_SHIFT 7 6119000Sbde 6219000Sbdeint cputime_bias = 1; /* initialize for locality of reference */ 6319000Sbde 6419000Sbdestatic int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; 6519000Sbde#ifdef I586_PMC_GUPROF 6619000Sbdestatic u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; 6719000Sbdestatic int cputime_clock_pmc_init; 6819000Sbdestatic struct gmonparam saved_gmp; 6913107Sbde#endif 7019000Sbde#endif /* GUPROF */ 7113107Sbde 72143063Sjoerg#ifdef __GNUCLIKE_ASM 7335303Sbde__asm(" \n\ 7435303SbdeGM_STATE = 0 \n\ 7535303SbdeGMON_PROF_OFF = 3 \n\ 7635303Sbde \n\ 7735303Sbde .text \n\ 7850379Speter .p2align 4,0x90 \n\ 7935303Sbde .globl __mcount \n\ 8046548Sbde .type __mcount,@function \n\ 8135303Sbde__mcount: \n\ 8235303Sbde # \n\ 8335303Sbde # Check that we are profiling. Do it early for speed. \n\ 8435303Sbde # \n\ 8546548Sbde cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ 8646548Sbde je .mcount_exit \n\ 8735303Sbde # \n\ 8846548Sbde # __mcount is the same as [.]mcount except the caller \n\ 8935303Sbde # hasn't changed the stack except to call here, so the \n\ 9035303Sbde # caller's raddr is above our raddr. \n\ 9135303Sbde # \n\ 92129625Sbde pushq %rax \n\ 93129625Sbde pushq %rdx \n\ 94129625Sbde pushq %rcx \n\ 95129625Sbde pushq %rsi \n\ 96129625Sbde pushq %rdi \n\ 97129625Sbde pushq %r8 \n\ 98129625Sbde pushq %r9 \n\ 99129625Sbde movq 7*8+8(%rsp),%rdi \n\ 10046548Sbde jmp .got_frompc \n\ 10135303Sbde \n\ 10250379Speter .p2align 4,0x90 \n\ 10346548Sbde .globl " __XSTRING(HIDENAME(mcount)) " \n\ 10446548Sbde" __XSTRING(HIDENAME(mcount)) ": \n\ 10599932Sbde .globl __cyg_profile_func_enter \n\ 10699932Sbde__cyg_profile_func_enter: \n\ 10746548Sbde cmpl $GMON_PROF_OFF," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ 10846548Sbde je .mcount_exit \n\ 10935303Sbde # \n\ 11035303Sbde # The caller's stack frame has already been built, so \n\ 111129625Sbde # %rbp is the caller's frame pointer. The caller's \n\ 11235303Sbde # raddr is in the caller's frame following the caller's \n\ 11335303Sbde # caller's frame pointer. \n\ 11435303Sbde # \n\ 115129625Sbde pushq %rax \n\ 116129625Sbde pushq %rdx \n\ 117129625Sbde pushq %rcx \n\ 118129625Sbde pushq %rsi \n\ 119129625Sbde pushq %rdi \n\ 120129625Sbde pushq %r8 \n\ 121129625Sbde pushq %r9 \n\ 122129625Sbde movq 8(%rbp),%rdi \n\ 12346548Sbde.got_frompc: \n\ 12435303Sbde # \n\ 12535303Sbde # Our raddr is the caller's pc. \n\ 12635303Sbde # \n\ 127129625Sbde movq 7*8(%rsp),%rsi \n\ 12835303Sbde \n\ 129129625Sbde pushfq \n\ 13035303Sbde cli \n\ 13146548Sbde call " __XSTRING(CNAME(mcount)) " \n\ 132129625Sbde popfq \n\ 133129625Sbde popq %r9 \n\ 134129625Sbde popq %r8 \n\ 135129625Sbde popq %rdi \n\ 136129625Sbde popq %rsi \n\ 137129625Sbde popq %rcx \n\ 138129625Sbde popq %rdx \n\ 139129625Sbde popq %rax \n\ 14046548Sbde.mcount_exit: \n\ 14135303Sbde ret \n\ 14213107Sbde"); 143143063Sjoerg#else /* !__GNUCLIKE_ASM */ 144143063Sjoerg#error this file needs to be ported to your compiler 145143063Sjoerg#endif /* __GNUCLIKE_ASM */ 14613107Sbde 14713107Sbde#ifdef GUPROF 14813107Sbde/* 14946548Sbde * [.]mexitcount saves the return register(s), loads selfpc and calls 15013107Sbde * mexitcount(selfpc) to do the work. Someday it should be in a machine 15146548Sbde * dependent file together with cputime(), __mcount and [.]mcount. cputime() 15213107Sbde * can't just be put in machdep.c because it has to be compiled without -pg. 15313107Sbde */ 154143063Sjoerg#ifdef __GNUCLIKE_ASM 15535303Sbde__asm(" \n\ 15635303Sbde .text \n\ 15735303Sbde# \n\ 15846548Sbde# Dummy label to be seen when gprof -u hides [.]mexitcount. \n\ 15935303Sbde# \n\ 16050379Speter .p2align 4,0x90 \n\ 16135303Sbde .globl __mexitcount \n\ 16246548Sbde .type __mexitcount,@function \n\ 16335303Sbde__mexitcount: \n\ 16435303Sbde nop \n\ 16535303Sbde \n\ 16635303SbdeGMON_PROF_HIRES = 4 \n\ 16735303Sbde \n\ 16850379Speter .p2align 4,0x90 \n\ 16946548Sbde .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ 17046548Sbde" __XSTRING(HIDENAME(mexitcount)) ": \n\ 17199932Sbde .globl __cyg_profile_func_exit \n\ 17299932Sbde__cyg_profile_func_exit: \n\ 17346548Sbde cmpl $GMON_PROF_HIRES," __XSTRING(CNAME(_gmonparam)) "+GM_STATE \n\ 17446548Sbde jne .mexitcount_exit \n\ 175129625Sbde pushq %rax \n\ 176129625Sbde pushq %rdx \n\ 177129625Sbde pushq %rcx \n\ 178129625Sbde pushq %rsi \n\ 179129625Sbde pushq %rdi \n\ 180129625Sbde pushq %r8 \n\ 181129625Sbde pushq %r9 \n\ 182129625Sbde movq 7*8(%rsp),%rdi \n\ 183129625Sbde pushfq \n\ 18435303Sbde cli \n\ 18546548Sbde call " __XSTRING(CNAME(mexitcount)) " \n\ 186129625Sbde popfq \n\ 187129625Sbde popq %r9 \n\ 188129625Sbde popq %r8 \n\ 189129625Sbde popq %rdi \n\ 190129625Sbde popq %rsi \n\ 191129625Sbde popq %rcx \n\ 192129625Sbde popq %rdx \n\ 193129625Sbde popq %rax \n\ 19446548Sbde.mexitcount_exit: \n\ 19535303Sbde ret \n\ 19613107Sbde"); 197143063Sjoerg#else /* !__GNUCLIKE_ASM */ 198143063Sjoerg#error this file needs to be ported to your compiler 199143063Sjoerg#endif /* __GNUCLIKE_ASM */ 20013107Sbde 20113107Sbde/* 20213107Sbde * Return the time elapsed since the last call. The units are machine- 20313107Sbde * dependent. 20413107Sbde */ 20519000Sbdeint 20613107Sbdecputime() 20713107Sbde{ 20813107Sbde u_int count; 20919000Sbde int delta; 21041794Sbde#if (defined(I586_CPU) || defined(I686_CPU)) && !defined(SMP) && \ 21141794Sbde defined(PERFMON) && defined(I586_PMC_GUPROF) 21219000Sbde u_quad_t event_count; 21319000Sbde#endif 21419000Sbde u_char high, low; 21513107Sbde static u_int prev_count; 21613107Sbde 217129625Sbde#ifndef SMP 21832005Sphk if (cputime_clock == CPUTIME_CLOCK_TSC) { 219129744Sbde /* 220129744Sbde * Scale the TSC a little to make cputime()'s frequency 221129744Sbde * fit in an int, assuming that the TSC frequency fits 222129744Sbde * in a u_int. Use a fixed scale since dynamic scaling 223129744Sbde * would be slower and we can't really use the low bit 224129744Sbde * of precision. 225129744Sbde */ 226129744Sbde count = (u_int)rdtsc() & ~1u; 227129744Sbde delta = (int)(count - prev_count) >> 1; 22819000Sbde prev_count = count; 22919000Sbde return (delta); 23019000Sbde } 23131395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 23219000Sbde if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 23319000Sbde /* 23419000Sbde * XXX permon_read() should be inlined so that the 23519000Sbde * perfmon module doesn't need to be compiled with 23619000Sbde * profiling disabled and so that it is fast. 23719000Sbde */ 23819000Sbde perfmon_read(0, &event_count); 23919000Sbde 24019000Sbde count = (u_int)event_count; 24119000Sbde delta = (int)(count - prev_count); 24219000Sbde prev_count = count; 24319000Sbde return (delta); 24419000Sbde } 24531395Sbde#endif /* PERFMON && I586_PMC_GUPROF */ 246129625Sbde#endif /* !SMP */ 24719000Sbde 24813107Sbde /* 24913107Sbde * Read the current value of the 8254 timer counter 0. 25013107Sbde */ 25113107Sbde outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); 25213107Sbde low = inb(TIMER_CNTR0); 25319000Sbde high = inb(TIMER_CNTR0); 25419000Sbde count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; 25513107Sbde 25613107Sbde /* 25713107Sbde * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. 25813107Sbde * While profiling is enabled, this routine is called at least twice 25913107Sbde * per timer reset (for mcounting and mexitcounting hardclock()), 26013107Sbde * so at most one reset has occurred since the last call, and one 26113107Sbde * has occurred iff the current count is larger than the previous 26213107Sbde * count. This allows counter underflow to be detected faster 26313107Sbde * than in microtime(). 26413107Sbde */ 26513107Sbde delta = prev_count - count; 26613107Sbde prev_count = count; 26713107Sbde if ((int) delta <= 0) 26819000Sbde return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); 26913107Sbde return (delta); 27013107Sbde} 27119000Sbde 27231395Sbdestatic int 27362573Sphksysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS) 27431395Sbde{ 27531395Sbde int clock; 27641794Sbde int error; 27741794Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 27831395Sbde int event; 27931395Sbde struct pmc pmc; 28041794Sbde#endif 28131395Sbde 28231395Sbde clock = cputime_clock; 28331395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 28431395Sbde if (clock == CPUTIME_CLOCK_I586_PMC) { 28531395Sbde pmc.pmc_val = cputime_clock_pmc_conf; 28631395Sbde clock += pmc.pmc_event; 28731395Sbde } 28831395Sbde#endif 28931395Sbde error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req); 29031395Sbde if (error == 0 && req->newptr != NULL) { 29131395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 29231395Sbde if (clock >= CPUTIME_CLOCK_I586_PMC) { 29331395Sbde event = clock - CPUTIME_CLOCK_I586_PMC; 29431395Sbde if (event >= 256) 29531395Sbde return (EINVAL); 29631395Sbde pmc.pmc_num = 0; 29731395Sbde pmc.pmc_event = event; 29831395Sbde pmc.pmc_unit = 0; 29931395Sbde pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR; 30031395Sbde pmc.pmc_mask = 0; 30131395Sbde cputime_clock_pmc_conf = pmc.pmc_val; 30231395Sbde cputime_clock = CPUTIME_CLOCK_I586_PMC; 30331395Sbde } else 30431395Sbde#endif 30531395Sbde { 30631395Sbde if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC) 30731395Sbde return (EINVAL); 30831395Sbde cputime_clock = clock; 30931395Sbde } 31031395Sbde } 31131395Sbde return (error); 31231395Sbde} 31331395Sbde 31431395SbdeSYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW, 31531395Sbde 0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", ""); 31631395Sbde 31719000Sbde/* 31819000Sbde * The start and stop routines need not be here since we turn off profiling 31919000Sbde * before calling them. They are here for convenience. 32019000Sbde */ 32119000Sbde 32219000Sbdevoid 32319000Sbdestartguprof(gp) 32419000Sbde struct gmonparam *gp; 32519000Sbde{ 32619000Sbde if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { 32719000Sbde cputime_clock = CPUTIME_CLOCK_I8254; 328129625Sbde#ifndef SMP 32932005Sphk if (tsc_freq != 0) 33032005Sphk cputime_clock = CPUTIME_CLOCK_TSC; 33119000Sbde#endif 33219000Sbde } 33319000Sbde gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; 334129625Sbde#ifndef SMP 33532005Sphk if (cputime_clock == CPUTIME_CLOCK_TSC) 336129744Sbde gp->profrate = tsc_freq >> 1; 33731395Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 33819000Sbde else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { 33919000Sbde if (perfmon_avail() && 34019000Sbde perfmon_setup(0, cputime_clock_pmc_conf) == 0) { 34119000Sbde if (perfmon_start(0) != 0) 34219000Sbde perfmon_fini(0); 34319000Sbde else { 34419000Sbde /* XXX 1 event == 1 us. */ 34519000Sbde gp->profrate = 1000000; 34619000Sbde 34719000Sbde saved_gmp = *gp; 34819000Sbde 34919000Sbde /* Zap overheads. They are invalid. */ 35019000Sbde gp->cputime_overhead = 0; 35119000Sbde gp->mcount_overhead = 0; 35219000Sbde gp->mcount_post_overhead = 0; 35319000Sbde gp->mcount_pre_overhead = 0; 35419000Sbde gp->mexitcount_overhead = 0; 35519000Sbde gp->mexitcount_post_overhead = 0; 35619000Sbde gp->mexitcount_pre_overhead = 0; 35719000Sbde 35819000Sbde cputime_clock_pmc_init = TRUE; 35919000Sbde } 36019000Sbde } 36119000Sbde } 36231395Sbde#endif /* PERFMON && I586_PMC_GUPROF */ 363129625Sbde#endif /* !SMP */ 36419000Sbde cputime_bias = 0; 36519000Sbde cputime(); 36619000Sbde} 36719000Sbde 36819000Sbdevoid 36919000Sbdestopguprof(gp) 37019000Sbde struct gmonparam *gp; 37119000Sbde{ 37219000Sbde#if defined(PERFMON) && defined(I586_PMC_GUPROF) 37319000Sbde if (cputime_clock_pmc_init) { 37419000Sbde *gp = saved_gmp; 37519000Sbde perfmon_fini(0); 37619000Sbde cputime_clock_pmc_init = FALSE; 37719000Sbde } 37819000Sbde#endif 37919000Sbde} 38019000Sbde 38119000Sbde#else /* !GUPROF */ 382143063Sjoerg#ifdef __GNUCLIKE_ASM 38335303Sbde__asm(" \n\ 38435303Sbde .text \n\ 38550379Speter .p2align 4,0x90 \n\ 38646548Sbde .globl " __XSTRING(HIDENAME(mexitcount)) " \n\ 38746548Sbde" __XSTRING(HIDENAME(mexitcount)) ": \n\ 38835303Sbde ret \n\ 38913107Sbde"); 390143063Sjoerg#else /* !__GNUCLIKE_ASM */ 391143063Sjoerg#error this file needs to be ported to your compiler 392143063Sjoerg#endif /* __GNUCLIKE_ASM */ 39313107Sbde#endif /* GUPROF */ 394