11573Srgrimes/*-
21573Srgrimes * Copyright (c) 1983, 1992, 1993
31573Srgrimes *	The Regents of the University of California.  All rights reserved.
41573Srgrimes *
51573Srgrimes * Redistribution and use in source and binary forms, with or without
61573Srgrimes * modification, are permitted provided that the following conditions
71573Srgrimes * are met:
81573Srgrimes * 1. Redistributions of source code must retain the above copyright
91573Srgrimes *    notice, this list of conditions and the following disclaimer.
101573Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111573Srgrimes *    notice, this list of conditions and the following disclaimer in the
121573Srgrimes *    documentation and/or other materials provided with the distribution.
131573Srgrimes * 4. Neither the name of the University nor the names of its contributors
141573Srgrimes *    may be used to endorse or promote products derived from this software
151573Srgrimes *    without specific prior written permission.
161573Srgrimes *
171573Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181573Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191573Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201573Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211573Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221573Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231573Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241573Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251573Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261573Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271573Srgrimes * SUCH DAMAGE.
281573Srgrimes */
291573Srgrimes
30136582Sobrien#if !defined(_KERNEL) && defined(LIBC_SCCS) && !defined(lint)
311573Srgrimesstatic char sccsid[] = "@(#)mcount.c	8.1 (Berkeley) 6/4/93";
321573Srgrimes#endif
3390046Sobrien#include <sys/cdefs.h>
3490046Sobrien__FBSDID("$FreeBSD$");
351573Srgrimes
361573Srgrimes#include <sys/param.h>
371573Srgrimes#include <sys/gmon.h>
3855206Speter#ifdef _KERNEL
3913107Sbde#include <sys/systm.h>
4013107Sbde#include <vm/vm.h>
4113107Sbde#include <vm/vm_param.h>
4213107Sbde#include <vm/pmap.h>
4390046Sobrienvoid	bintr(void);
4490046Sobrienvoid	btrap(void);
4590046Sobrienvoid	eintr(void);
4690046Sobrienvoid	user(void);
472800Spaul#endif
48145959Sdavidxu#include <machine/atomic.h>
491573Srgrimes
501573Srgrimes/*
511573Srgrimes * mcount is called on entry to each function compiled with the profiling
521573Srgrimes * switch set.  _mcount(), which is declared in a machine-dependent way
531573Srgrimes * with _MCOUNT_DECL, does the actual work and is either inlined into a
541573Srgrimes * C routine or called by an assembly stub.  In any case, this magic is
551573Srgrimes * taken care of by the MCOUNT definition in <machine/profile.h>.
561573Srgrimes *
571573Srgrimes * _mcount updates data structures that represent traversals of the
581573Srgrimes * program's call graph edges.  frompc and selfpc are the return
591573Srgrimes * address and function address that represents the given call graph edge.
608870Srgrimes *
611573Srgrimes * Note: the original BSD code used the same variable (frompcindex) for
621573Srgrimes * both frompcindex and frompc.  Any reasonable, modern compiler will
631573Srgrimes * perform this optimization.
641573Srgrimes */
65124180Snectar/* _mcount; may be static, inline, etc */
66124180Snectar_MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc)
671573Srgrimes{
6813107Sbde#ifdef GUPROF
6913107Sbde	u_int delta;
7013107Sbde#endif
7190046Sobrien	fptrdiff_t frompci;
7290046Sobrien	u_short *frompcindex;
7390046Sobrien	struct tostruct *top, *prevtop;
7490046Sobrien	struct gmonparam *p;
7590046Sobrien	long toindex;
7655206Speter#ifdef _KERNEL
7717879Sbde	MCOUNT_DECL(s)
781573Srgrimes#endif
791573Srgrimes
801573Srgrimes	p = &_gmonparam;
8113107Sbde#ifndef GUPROF			/* XXX */
821573Srgrimes	/*
831573Srgrimes	 * check that we are profiling
841573Srgrimes	 * and that we aren't recursively invoked.
851573Srgrimes	 */
861573Srgrimes	if (p->state != GMON_PROF_ON)
871573Srgrimes		return;
8813107Sbde#endif
8955206Speter#ifdef _KERNEL
9017879Sbde	MCOUNT_ENTER(s);
911573Srgrimes#else
92145959Sdavidxu	if (!atomic_cmpset_acq_int(&p->state, GMON_PROF_ON, GMON_PROF_BUSY))
93145959Sdavidxu		return;
941573Srgrimes#endif
9513107Sbde	frompci = frompc - p->lowpc;
9613107Sbde
9755206Speter#ifdef _KERNEL
981573Srgrimes	/*
9913107Sbde	 * When we are called from an exception handler, frompci may be
10013107Sbde	 * for a user address.  Convert such frompci's to the index of
10113107Sbde	 * user() to merge all user counts.
10213107Sbde	 */
10313107Sbde	if (frompci >= p->textsize) {
10413107Sbde		if (frompci + p->lowpc
10537629Sbde		    >= (uintfptr_t)(VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE))
10613107Sbde			goto done;
10737629Sbde		frompci = (uintfptr_t)user - p->lowpc;
10813107Sbde		if (frompci >= p->textsize)
10913107Sbde		    goto done;
11013107Sbde	}
11155206Speter#endif
11213107Sbde
11313107Sbde#ifdef GUPROF
11413107Sbde	if (p->state != GMON_PROF_HIRES)
11513107Sbde		goto skip_guprof_stuff;
11613107Sbde	/*
11713107Sbde	 * Look at the clock and add the count of clock cycles since the
11813107Sbde	 * clock was last looked at to a counter for frompc.  This
11913107Sbde	 * solidifies the count for the function containing frompc and
12013107Sbde	 * effectively starts another clock for the current function.
12113107Sbde	 * The count for the new clock will be solidified when another
12213107Sbde	 * function call is made or the function returns.
12313107Sbde	 *
12413107Sbde	 * We use the usual sampling counters since they can be located
12513107Sbde	 * efficiently.  4-byte counters are usually necessary.
12613107Sbde	 *
12713107Sbde	 * There are many complications for subtracting the profiling
12813107Sbde	 * overheads from the counts for normal functions and adding
12913107Sbde	 * them to the counts for mcount(), mexitcount() and cputime().
13013107Sbde	 * We attempt to handle fractional cycles, but the overheads
13113107Sbde	 * are usually underestimated because they are calibrated for
13213107Sbde	 * a simpler than usual setup.
13313107Sbde	 */
13413107Sbde	delta = cputime() - p->mcount_overhead;
13513107Sbde	p->cputime_overhead_resid += p->cputime_overhead_frac;
13613107Sbde	p->mcount_overhead_resid += p->mcount_overhead_frac;
13713107Sbde	if ((int)delta < 0)
13813107Sbde		*p->mcount_count += delta + p->mcount_overhead
13913107Sbde				    - p->cputime_overhead;
14013107Sbde	else if (delta != 0) {
14113107Sbde		if (p->cputime_overhead_resid >= CALIB_SCALE) {
14213107Sbde			p->cputime_overhead_resid -= CALIB_SCALE;
14313107Sbde			++*p->cputime_count;
14413107Sbde			--delta;
14513107Sbde		}
14613107Sbde		if (delta != 0) {
14713107Sbde			if (p->mcount_overhead_resid >= CALIB_SCALE) {
14813107Sbde				p->mcount_overhead_resid -= CALIB_SCALE;
14913107Sbde				++*p->mcount_count;
15013107Sbde				--delta;
15113107Sbde			}
15213107Sbde			KCOUNT(p, frompci) += delta;
15313107Sbde		}
15413107Sbde		*p->mcount_count += p->mcount_overhead_sub;
15513107Sbde	}
15613107Sbde	*p->cputime_count += p->cputime_overhead;
15713107Sbdeskip_guprof_stuff:
15813107Sbde#endif /* GUPROF */
15913107Sbde
16055206Speter#ifdef _KERNEL
16113107Sbde	/*
16213107Sbde	 * When we are called from an exception handler, frompc is faked
16313107Sbde	 * to be for where the exception occurred.  We've just solidified
16413107Sbde	 * the count for there.  Now convert frompci to the index of btrap()
16513107Sbde	 * for trap handlers and bintr() for interrupt handlers to make
16613107Sbde	 * exceptions appear in the call graph as calls from btrap() and
16713107Sbde	 * bintr() instead of calls from all over.
16813107Sbde	 */
16937629Sbde	if ((uintfptr_t)selfpc >= (uintfptr_t)btrap
17037629Sbde	    && (uintfptr_t)selfpc < (uintfptr_t)eintr) {
17137629Sbde		if ((uintfptr_t)selfpc >= (uintfptr_t)bintr)
17237629Sbde			frompci = (uintfptr_t)bintr - p->lowpc;
17313107Sbde		else
17437629Sbde			frompci = (uintfptr_t)btrap - p->lowpc;
17513107Sbde	}
17655206Speter#endif
17713107Sbde
17813107Sbde	/*
17913107Sbde	 * check that frompc is a reasonable pc value.
1801573Srgrimes	 * for example:	signal catchers get called from the stack,
1811573Srgrimes	 *		not from text space.  too bad.
1821573Srgrimes	 */
18313107Sbde	if (frompci >= p->textsize)
1841573Srgrimes		goto done;
1851573Srgrimes
18613107Sbde	frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))];
1871573Srgrimes	toindex = *frompcindex;
1881573Srgrimes	if (toindex == 0) {
1891573Srgrimes		/*
1901573Srgrimes		 *	first time traversing this arc
1911573Srgrimes		 */
1921573Srgrimes		toindex = ++p->tos[0].link;
1931573Srgrimes		if (toindex >= p->tolimit)
1941573Srgrimes			/* halt further profiling */
1951573Srgrimes			goto overflow;
1961573Srgrimes
1971573Srgrimes		*frompcindex = toindex;
1981573Srgrimes		top = &p->tos[toindex];
1991573Srgrimes		top->selfpc = selfpc;
2001573Srgrimes		top->count = 1;
2011573Srgrimes		top->link = 0;
2021573Srgrimes		goto done;
2031573Srgrimes	}
2041573Srgrimes	top = &p->tos[toindex];
2051573Srgrimes	if (top->selfpc == selfpc) {
2061573Srgrimes		/*
2071573Srgrimes		 * arc at front of chain; usual case.
2081573Srgrimes		 */
2091573Srgrimes		top->count++;
2101573Srgrimes		goto done;
2111573Srgrimes	}
2121573Srgrimes	/*
2131573Srgrimes	 * have to go looking down chain for it.
2141573Srgrimes	 * top points to what we are looking at,
2151573Srgrimes	 * prevtop points to previous top.
2161573Srgrimes	 * we know it is not at the head of the chain.
2171573Srgrimes	 */
2181573Srgrimes	for (; /* goto done */; ) {
2191573Srgrimes		if (top->link == 0) {
2201573Srgrimes			/*
2211573Srgrimes			 * top is end of the chain and none of the chain
2221573Srgrimes			 * had top->selfpc == selfpc.
2231573Srgrimes			 * so we allocate a new tostruct
2241573Srgrimes			 * and link it to the head of the chain.
2251573Srgrimes			 */
2261573Srgrimes			toindex = ++p->tos[0].link;
2271573Srgrimes			if (toindex >= p->tolimit)
2281573Srgrimes				goto overflow;
2291573Srgrimes
2301573Srgrimes			top = &p->tos[toindex];
2311573Srgrimes			top->selfpc = selfpc;
2321573Srgrimes			top->count = 1;
2331573Srgrimes			top->link = *frompcindex;
2341573Srgrimes			*frompcindex = toindex;
2351573Srgrimes			goto done;
2361573Srgrimes		}
2371573Srgrimes		/*
2381573Srgrimes		 * otherwise, check the next arc on the chain.
2391573Srgrimes		 */
2401573Srgrimes		prevtop = top;
2411573Srgrimes		top = &p->tos[top->link];
2421573Srgrimes		if (top->selfpc == selfpc) {
2431573Srgrimes			/*
2441573Srgrimes			 * there it is.
2451573Srgrimes			 * increment its count
2461573Srgrimes			 * move it to the head of the chain.
2471573Srgrimes			 */
2481573Srgrimes			top->count++;
2491573Srgrimes			toindex = prevtop->link;
2501573Srgrimes			prevtop->link = top->link;
2511573Srgrimes			top->link = *frompcindex;
2521573Srgrimes			*frompcindex = toindex;
2531573Srgrimes			goto done;
2541573Srgrimes		}
2558870Srgrimes
2561573Srgrimes	}
2571573Srgrimesdone:
25855206Speter#ifdef _KERNEL
25917879Sbde	MCOUNT_EXIT(s);
2601573Srgrimes#else
261145959Sdavidxu	atomic_store_rel_int(&p->state, GMON_PROF_ON);
2621573Srgrimes#endif
2631573Srgrimes	return;
2641573Srgrimesoverflow:
265145959Sdavidxu	atomic_store_rel_int(&p->state, GMON_PROF_ERROR);
26655206Speter#ifdef _KERNEL
26717879Sbde	MCOUNT_EXIT(s);
2681573Srgrimes#endif
2691573Srgrimes	return;
2701573Srgrimes}
2711573Srgrimes
2721573Srgrimes/*
2731573Srgrimes * Actual definition of mcount function.  Defined in <machine/profile.h>,
2741573Srgrimes * which is included by <sys/gmon.h>.
2751573Srgrimes */
2761573SrgrimesMCOUNT
27713107Sbde
27813107Sbde#ifdef GUPROF
27913107Sbdevoid
28013107Sbdemexitcount(selfpc)
28137629Sbde	uintfptr_t selfpc;
28213107Sbde{
28313107Sbde	struct gmonparam *p;
28437629Sbde	uintfptr_t selfpcdiff;
28513107Sbde
28613107Sbde	p = &_gmonparam;
28737629Sbde	selfpcdiff = selfpc - (uintfptr_t)p->lowpc;
28813107Sbde	if (selfpcdiff < p->textsize) {
28913107Sbde		u_int delta;
29013107Sbde
29113107Sbde		/*
29213107Sbde		 * Solidify the count for the current function.
29313107Sbde		 */
29413107Sbde		delta = cputime() - p->mexitcount_overhead;
29513107Sbde		p->cputime_overhead_resid += p->cputime_overhead_frac;
29613107Sbde		p->mexitcount_overhead_resid += p->mexitcount_overhead_frac;
29713107Sbde		if ((int)delta < 0)
29813107Sbde			*p->mexitcount_count += delta + p->mexitcount_overhead
29913107Sbde						- p->cputime_overhead;
30013107Sbde		else if (delta != 0) {
30113107Sbde			if (p->cputime_overhead_resid >= CALIB_SCALE) {
30213107Sbde				p->cputime_overhead_resid -= CALIB_SCALE;
30313107Sbde				++*p->cputime_count;
30413107Sbde				--delta;
30513107Sbde			}
30613107Sbde			if (delta != 0) {
30713107Sbde				if (p->mexitcount_overhead_resid
30813107Sbde				    >= CALIB_SCALE) {
30913107Sbde					p->mexitcount_overhead_resid
31013107Sbde					    -= CALIB_SCALE;
31113107Sbde					++*p->mexitcount_count;
31213107Sbde					--delta;
31313107Sbde				}
31413107Sbde				KCOUNT(p, selfpcdiff) += delta;
31513107Sbde			}
31613107Sbde			*p->mexitcount_count += p->mexitcount_overhead_sub;
31713107Sbde		}
31813107Sbde		*p->cputime_count += p->cputime_overhead;
31913107Sbde	}
32013107Sbde}
32113107Sbde#endif /* GUPROF */
322