11573Srgrimes/*- 21573Srgrimes * Copyright (c) 1983, 1992, 1993 31573Srgrimes * The Regents of the University of California. All rights reserved. 41573Srgrimes * 51573Srgrimes * Redistribution and use in source and binary forms, with or without 61573Srgrimes * modification, are permitted provided that the following conditions 71573Srgrimes * are met: 81573Srgrimes * 1. Redistributions of source code must retain the above copyright 91573Srgrimes * notice, this list of conditions and the following disclaimer. 101573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111573Srgrimes * notice, this list of conditions and the following disclaimer in the 121573Srgrimes * documentation and/or other materials provided with the distribution. 131573Srgrimes * 4. Neither the name of the University nor the names of its contributors 141573Srgrimes * may be used to endorse or promote products derived from this software 151573Srgrimes * without specific prior written permission. 161573Srgrimes * 171573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201573Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271573Srgrimes * SUCH DAMAGE. 281573Srgrimes */ 291573Srgrimes 30136582Sobrien#if !defined(_KERNEL) && defined(LIBC_SCCS) && !defined(lint) 311573Srgrimesstatic char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93"; 321573Srgrimes#endif 3390046Sobrien#include <sys/cdefs.h> 3490046Sobrien__FBSDID("$FreeBSD$"); 351573Srgrimes 361573Srgrimes#include <sys/param.h> 371573Srgrimes#include <sys/gmon.h> 3855206Speter#ifdef _KERNEL 3913107Sbde#include <sys/systm.h> 4013107Sbde#include <vm/vm.h> 4113107Sbde#include <vm/vm_param.h> 4213107Sbde#include <vm/pmap.h> 4390046Sobrienvoid bintr(void); 4490046Sobrienvoid btrap(void); 4590046Sobrienvoid eintr(void); 4690046Sobrienvoid user(void); 472800Spaul#endif 48145959Sdavidxu#include <machine/atomic.h> 491573Srgrimes 501573Srgrimes/* 511573Srgrimes * mcount is called on entry to each function compiled with the profiling 521573Srgrimes * switch set. _mcount(), which is declared in a machine-dependent way 531573Srgrimes * with _MCOUNT_DECL, does the actual work and is either inlined into a 541573Srgrimes * C routine or called by an assembly stub. In any case, this magic is 551573Srgrimes * taken care of by the MCOUNT definition in <machine/profile.h>. 561573Srgrimes * 571573Srgrimes * _mcount updates data structures that represent traversals of the 581573Srgrimes * program's call graph edges. frompc and selfpc are the return 591573Srgrimes * address and function address that represents the given call graph edge. 608870Srgrimes * 611573Srgrimes * Note: the original BSD code used the same variable (frompcindex) for 621573Srgrimes * both frompcindex and frompc. Any reasonable, modern compiler will 631573Srgrimes * perform this optimization. 641573Srgrimes */ 65124180Snectar/* _mcount; may be static, inline, etc */ 66124180Snectar_MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) 671573Srgrimes{ 6813107Sbde#ifdef GUPROF 6913107Sbde u_int delta; 7013107Sbde#endif 7190046Sobrien fptrdiff_t frompci; 7290046Sobrien u_short *frompcindex; 7390046Sobrien struct tostruct *top, *prevtop; 7490046Sobrien struct gmonparam *p; 7590046Sobrien long toindex; 7655206Speter#ifdef _KERNEL 7717879Sbde MCOUNT_DECL(s) 781573Srgrimes#endif 791573Srgrimes 801573Srgrimes p = &_gmonparam; 8113107Sbde#ifndef GUPROF /* XXX */ 821573Srgrimes /* 831573Srgrimes * check that we are profiling 841573Srgrimes * and that we aren't recursively invoked. 851573Srgrimes */ 861573Srgrimes if (p->state != GMON_PROF_ON) 871573Srgrimes return; 8813107Sbde#endif 8955206Speter#ifdef _KERNEL 9017879Sbde MCOUNT_ENTER(s); 911573Srgrimes#else 92145959Sdavidxu if (!atomic_cmpset_acq_int(&p->state, GMON_PROF_ON, GMON_PROF_BUSY)) 93145959Sdavidxu return; 941573Srgrimes#endif 9513107Sbde frompci = frompc - p->lowpc; 9613107Sbde 9755206Speter#ifdef _KERNEL 981573Srgrimes /* 9913107Sbde * When we are called from an exception handler, frompci may be 10013107Sbde * for a user address. Convert such frompci's to the index of 10113107Sbde * user() to merge all user counts. 10213107Sbde */ 10313107Sbde if (frompci >= p->textsize) { 10413107Sbde if (frompci + p->lowpc 10537629Sbde >= (uintfptr_t)(VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE)) 10613107Sbde goto done; 10737629Sbde frompci = (uintfptr_t)user - p->lowpc; 10813107Sbde if (frompci >= p->textsize) 10913107Sbde goto done; 11013107Sbde } 11155206Speter#endif 11213107Sbde 11313107Sbde#ifdef GUPROF 11413107Sbde if (p->state != GMON_PROF_HIRES) 11513107Sbde goto skip_guprof_stuff; 11613107Sbde /* 11713107Sbde * Look at the clock and add the count of clock cycles since the 11813107Sbde * clock was last looked at to a counter for frompc. This 11913107Sbde * solidifies the count for the function containing frompc and 12013107Sbde * effectively starts another clock for the current function. 12113107Sbde * The count for the new clock will be solidified when another 12213107Sbde * function call is made or the function returns. 12313107Sbde * 12413107Sbde * We use the usual sampling counters since they can be located 12513107Sbde * efficiently. 4-byte counters are usually necessary. 12613107Sbde * 12713107Sbde * There are many complications for subtracting the profiling 12813107Sbde * overheads from the counts for normal functions and adding 12913107Sbde * them to the counts for mcount(), mexitcount() and cputime(). 13013107Sbde * We attempt to handle fractional cycles, but the overheads 13113107Sbde * are usually underestimated because they are calibrated for 13213107Sbde * a simpler than usual setup. 13313107Sbde */ 13413107Sbde delta = cputime() - p->mcount_overhead; 13513107Sbde p->cputime_overhead_resid += p->cputime_overhead_frac; 13613107Sbde p->mcount_overhead_resid += p->mcount_overhead_frac; 13713107Sbde if ((int)delta < 0) 13813107Sbde *p->mcount_count += delta + p->mcount_overhead 13913107Sbde - p->cputime_overhead; 14013107Sbde else if (delta != 0) { 14113107Sbde if (p->cputime_overhead_resid >= CALIB_SCALE) { 14213107Sbde p->cputime_overhead_resid -= CALIB_SCALE; 14313107Sbde ++*p->cputime_count; 14413107Sbde --delta; 14513107Sbde } 14613107Sbde if (delta != 0) { 14713107Sbde if (p->mcount_overhead_resid >= CALIB_SCALE) { 14813107Sbde p->mcount_overhead_resid -= CALIB_SCALE; 14913107Sbde ++*p->mcount_count; 15013107Sbde --delta; 15113107Sbde } 15213107Sbde KCOUNT(p, frompci) += delta; 15313107Sbde } 15413107Sbde *p->mcount_count += p->mcount_overhead_sub; 15513107Sbde } 15613107Sbde *p->cputime_count += p->cputime_overhead; 15713107Sbdeskip_guprof_stuff: 15813107Sbde#endif /* GUPROF */ 15913107Sbde 16055206Speter#ifdef _KERNEL 16113107Sbde /* 16213107Sbde * When we are called from an exception handler, frompc is faked 16313107Sbde * to be for where the exception occurred. We've just solidified 16413107Sbde * the count for there. Now convert frompci to the index of btrap() 16513107Sbde * for trap handlers and bintr() for interrupt handlers to make 16613107Sbde * exceptions appear in the call graph as calls from btrap() and 16713107Sbde * bintr() instead of calls from all over. 16813107Sbde */ 16937629Sbde if ((uintfptr_t)selfpc >= (uintfptr_t)btrap 17037629Sbde && (uintfptr_t)selfpc < (uintfptr_t)eintr) { 17137629Sbde if ((uintfptr_t)selfpc >= (uintfptr_t)bintr) 17237629Sbde frompci = (uintfptr_t)bintr - p->lowpc; 17313107Sbde else 17437629Sbde frompci = (uintfptr_t)btrap - p->lowpc; 17513107Sbde } 17655206Speter#endif 17713107Sbde 17813107Sbde /* 17913107Sbde * check that frompc is a reasonable pc value. 1801573Srgrimes * for example: signal catchers get called from the stack, 1811573Srgrimes * not from text space. too bad. 1821573Srgrimes */ 18313107Sbde if (frompci >= p->textsize) 1841573Srgrimes goto done; 1851573Srgrimes 18613107Sbde frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; 1871573Srgrimes toindex = *frompcindex; 1881573Srgrimes if (toindex == 0) { 1891573Srgrimes /* 1901573Srgrimes * first time traversing this arc 1911573Srgrimes */ 1921573Srgrimes toindex = ++p->tos[0].link; 1931573Srgrimes if (toindex >= p->tolimit) 1941573Srgrimes /* halt further profiling */ 1951573Srgrimes goto overflow; 1961573Srgrimes 1971573Srgrimes *frompcindex = toindex; 1981573Srgrimes top = &p->tos[toindex]; 1991573Srgrimes top->selfpc = selfpc; 2001573Srgrimes top->count = 1; 2011573Srgrimes top->link = 0; 2021573Srgrimes goto done; 2031573Srgrimes } 2041573Srgrimes top = &p->tos[toindex]; 2051573Srgrimes if (top->selfpc == selfpc) { 2061573Srgrimes /* 2071573Srgrimes * arc at front of chain; usual case. 2081573Srgrimes */ 2091573Srgrimes top->count++; 2101573Srgrimes goto done; 2111573Srgrimes } 2121573Srgrimes /* 2131573Srgrimes * have to go looking down chain for it. 2141573Srgrimes * top points to what we are looking at, 2151573Srgrimes * prevtop points to previous top. 2161573Srgrimes * we know it is not at the head of the chain. 2171573Srgrimes */ 2181573Srgrimes for (; /* goto done */; ) { 2191573Srgrimes if (top->link == 0) { 2201573Srgrimes /* 2211573Srgrimes * top is end of the chain and none of the chain 2221573Srgrimes * had top->selfpc == selfpc. 2231573Srgrimes * so we allocate a new tostruct 2241573Srgrimes * and link it to the head of the chain. 2251573Srgrimes */ 2261573Srgrimes toindex = ++p->tos[0].link; 2271573Srgrimes if (toindex >= p->tolimit) 2281573Srgrimes goto overflow; 2291573Srgrimes 2301573Srgrimes top = &p->tos[toindex]; 2311573Srgrimes top->selfpc = selfpc; 2321573Srgrimes top->count = 1; 2331573Srgrimes top->link = *frompcindex; 2341573Srgrimes *frompcindex = toindex; 2351573Srgrimes goto done; 2361573Srgrimes } 2371573Srgrimes /* 2381573Srgrimes * otherwise, check the next arc on the chain. 2391573Srgrimes */ 2401573Srgrimes prevtop = top; 2411573Srgrimes top = &p->tos[top->link]; 2421573Srgrimes if (top->selfpc == selfpc) { 2431573Srgrimes /* 2441573Srgrimes * there it is. 2451573Srgrimes * increment its count 2461573Srgrimes * move it to the head of the chain. 2471573Srgrimes */ 2481573Srgrimes top->count++; 2491573Srgrimes toindex = prevtop->link; 2501573Srgrimes prevtop->link = top->link; 2511573Srgrimes top->link = *frompcindex; 2521573Srgrimes *frompcindex = toindex; 2531573Srgrimes goto done; 2541573Srgrimes } 2558870Srgrimes 2561573Srgrimes } 2571573Srgrimesdone: 25855206Speter#ifdef _KERNEL 25917879Sbde MCOUNT_EXIT(s); 2601573Srgrimes#else 261145959Sdavidxu atomic_store_rel_int(&p->state, GMON_PROF_ON); 2621573Srgrimes#endif 2631573Srgrimes return; 2641573Srgrimesoverflow: 265145959Sdavidxu atomic_store_rel_int(&p->state, GMON_PROF_ERROR); 26655206Speter#ifdef _KERNEL 26717879Sbde MCOUNT_EXIT(s); 2681573Srgrimes#endif 2691573Srgrimes return; 2701573Srgrimes} 2711573Srgrimes 2721573Srgrimes/* 2731573Srgrimes * Actual definition of mcount function. Defined in <machine/profile.h>, 2741573Srgrimes * which is included by <sys/gmon.h>. 2751573Srgrimes */ 2761573SrgrimesMCOUNT 27713107Sbde 27813107Sbde#ifdef GUPROF 27913107Sbdevoid 28013107Sbdemexitcount(selfpc) 28137629Sbde uintfptr_t selfpc; 28213107Sbde{ 28313107Sbde struct gmonparam *p; 28437629Sbde uintfptr_t selfpcdiff; 28513107Sbde 28613107Sbde p = &_gmonparam; 28737629Sbde selfpcdiff = selfpc - (uintfptr_t)p->lowpc; 28813107Sbde if (selfpcdiff < p->textsize) { 28913107Sbde u_int delta; 29013107Sbde 29113107Sbde /* 29213107Sbde * Solidify the count for the current function. 29313107Sbde */ 29413107Sbde delta = cputime() - p->mexitcount_overhead; 29513107Sbde p->cputime_overhead_resid += p->cputime_overhead_frac; 29613107Sbde p->mexitcount_overhead_resid += p->mexitcount_overhead_frac; 29713107Sbde if ((int)delta < 0) 29813107Sbde *p->mexitcount_count += delta + p->mexitcount_overhead 29913107Sbde - p->cputime_overhead; 30013107Sbde else if (delta != 0) { 30113107Sbde if (p->cputime_overhead_resid >= CALIB_SCALE) { 30213107Sbde p->cputime_overhead_resid -= CALIB_SCALE; 30313107Sbde ++*p->cputime_count; 30413107Sbde --delta; 30513107Sbde } 30613107Sbde if (delta != 0) { 30713107Sbde if (p->mexitcount_overhead_resid 30813107Sbde >= CALIB_SCALE) { 30913107Sbde p->mexitcount_overhead_resid 31013107Sbde -= CALIB_SCALE; 31113107Sbde ++*p->mexitcount_count; 31213107Sbde --delta; 31313107Sbde } 31413107Sbde KCOUNT(p, selfpcdiff) += delta; 31513107Sbde } 31613107Sbde *p->mexitcount_count += p->mexitcount_overhead_sub; 31713107Sbde } 31813107Sbde *p->cputime_count += p->cputime_overhead; 31913107Sbde } 32013107Sbde} 32113107Sbde#endif /* GUPROF */ 322