perfmon.c revision 92651
180708Sjake/*
280708Sjake * Copyright 1996 Massachusetts Institute of Technology
385586Sjake *
485586Sjake * Permission to use, copy, modify, and distribute this software and
580708Sjake * its documentation for any purpose and without fee is hereby
680708Sjake * granted, provided that both the above copyright notice and this
785586Sjake * permission notice appear in all copies, that both the above
885586Sjake * copyright notice and this permission notice appear in all
985586Sjake * supporting documentation, and that the name of M.I.T. not be used
1080708Sjake * in advertising or publicity pertaining to distribution of the
1180708Sjake * software without specific, written prior permission.  M.I.T. makes
1280708Sjake * no representations about the suitability of this software for any
1380708Sjake * purpose.  It is provided "as is" without express or implied
1480708Sjake * warranty.
1580708Sjake *
1680708Sjake * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
1780708Sjake * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
1885586Sjake * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
1985586Sjake * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
2085586Sjake * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
2185586Sjake * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2285586Sjake * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
2385586Sjake * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
2485586Sjake * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
2580708Sjake * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
2685586Sjake * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2785586Sjake * SUCH DAMAGE.
2885586Sjake *
2985586Sjake * $FreeBSD: head/sys/i386/i386/perfmon.c 92651 2002-03-19 06:45:25Z alc $
3085586Sjake */
3185586Sjake
3285586Sjake#include <sys/param.h>
3385586Sjake#include <sys/systm.h>
3485586Sjake#include <sys/conf.h>
3585586Sjake#include <sys/fcntl.h>
3685586Sjake#include <sys/kernel.h>
3780708Sjake
3885586Sjake#ifndef SMP
3980708Sjake#include <machine/cputypes.h>
4080708Sjake#endif
4180708Sjake#include <machine/clock.h>
4280708Sjake#include <machine/perfmon.h>
43100384Speter
4480708Sjakestatic int perfmon_inuse;
45102808Sjakestatic int perfmon_cpuok;
46102808Sjake#ifndef SMP
4780708Sjakestatic int msr_ctl[NPMC];
48100384Speter#endif
49100384Speterstatic int msr_pmc[NPMC];
50100384Speterstatic unsigned int ctl_shadow[NPMC];
51100384Speterstatic quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
52100384Speterstatic int (*writectl)(int);
53102808Sjake#ifndef SMP
54102808Sjakestatic int writectl5(int);
55102808Sjakestatic int writectl6(int);
56102808Sjake#endif
5780708Sjake
5880708Sjakestatic d_close_t perfmon_close;
5985586Sjakestatic d_open_t	perfmon_open;
6085586Sjakestatic d_ioctl_t perfmon_ioctl;
61100384Speter
62100384Speter/*
63100384Speter * XXX perfmon_init_dev(void *) is a split from the perfmon_init() funtion.
64100384Speter * This solves a problem for DEVFS users.  It loads the "perfmon" driver after
65100384Speter * the DEVFS subsystem has been kicked into action.  The SI_ORDER_ANY is to
66102808Sjake * assure that it is the most lowest priority task which, guarantees the
67100384Speter * above.
68102808Sjake */
69102808Sjakestatic void perfmon_init_dev __P((void *));
70102808SjakeSYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL);
71100384Speter
72102555Sjake#define CDEV_MAJOR 2	/* We're really a minor of mem.c */
73102555Sjakestatic struct cdevsw perfmon_cdevsw = {
74102808Sjake	/* open */      perfmon_open,
75100384Speter	/* close */     perfmon_close,
76100384Speter	/* read */      noread,
77100384Speter	/* write */     nowrite,
78102808Sjake	/* ioctl */     perfmon_ioctl,
79102808Sjake	/* poll */      nopoll,
80102808Sjake	/* mmap */      nommap,
81102808Sjake	/* strategy */  nostrategy,
82102808Sjake	/* name */      "perfmon",
83102808Sjake	/* maj */       CDEV_MAJOR,
84102808Sjake	/* dump */      nodump,
85102808Sjake	/* psize */     nopsize,
86120422Speter	/* flags */     0,
87120422Speter};
88100384Speter
89100384Speter/*
90100384Speter * Must be called after cpu_class is set up.
91100384Speter */
92100384Spetervoid
93100384Speterperfmon_init(void)
94123742Speter{
95119015Sgordon#ifndef SMP
96123742Speter	switch(cpu_class) {
97123742Speter	case CPUCLASS_586:
98100384Speter		perfmon_cpuok = 1;
99100384Speter		msr_ctl[0] = 0x11;
100100384Speter		msr_ctl[1] = 0x11;
101100384Speter		msr_pmc[0] = 0x12;
102100384Speter		msr_pmc[1] = 0x13;
103100384Speter		writectl = writectl5;
104123742Speter		break;
105123742Speter	case CPUCLASS_686:
106123742Speter		perfmon_cpuok = 1;
107123742Speter		msr_ctl[0] = 0x186;
108123742Speter		msr_ctl[1] = 0x187;
109123742Speter		msr_pmc[0] = 0xc1;
110123742Speter		msr_pmc[1] = 0xc2;
111123742Speter		writectl = writectl6;
112123742Speter		break;
113123742Speter
114123742Speter	default:
115123742Speter		perfmon_cpuok = 0;
116123742Speter		break;
117123742Speter	}
118133464Smarcel#endif /* SMP */
119133464Smarcel}
120133464Smarcel
121133464Smarcelstatic void
122133464Smarcelperfmon_init_dev(dummy)
123133464Smarcel	void *dummy;
124133464Smarcel{
125133464Smarcel	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
12685586Sjake}
12785586Sjake
12885586Sjakeint
12985586Sjakeperfmon_avail(void)
13085586Sjake{
13185586Sjake	return perfmon_cpuok;
13285586Sjake}
13385586Sjake
13485586Sjakeint
13585586Sjakeperfmon_setup(int pmc, unsigned int control)
13685586Sjake{
13785586Sjake	critical_t	savecrit;
13885586Sjake
13985586Sjake	if (pmc < 0 || pmc >= NPMC)
14085586Sjake		return EINVAL;
14185586Sjake
14285586Sjake	perfmon_inuse |= (1 << pmc);
14385586Sjake	control &= ~(PMCF_SYS_FLAGS << 16);
14485586Sjake	savecrit = cpu_critical_enter();
14585586Sjake	ctl_shadow[pmc] = control;
14685586Sjake	writectl(pmc);
14785586Sjake	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
14885586Sjake	cpu_critical_exit(savecrit);
14985586Sjake	return 0;
15085586Sjake}
15185586Sjake
15285586Sjakeint
15385586Sjakeperfmon_get(int pmc, unsigned int *control)
15485586Sjake{
15585586Sjake	if (pmc < 0 || pmc >= NPMC)
15685586Sjake		return EINVAL;
15785586Sjake
158104072Sjake	if (perfmon_inuse & (1 << pmc)) {
15985586Sjake		*control = ctl_shadow[pmc];
16085586Sjake		return 0;
161104072Sjake	}
16285586Sjake	return EBUSY;		/* XXX reversed sense */
16385586Sjake}
16485586Sjake
16585586Sjakeint
16685586Sjakeperfmon_fini(int pmc)
16785586Sjake{
16885586Sjake	if (pmc < 0 || pmc >= NPMC)
16985586Sjake		return EINVAL;
17085586Sjake
171104072Sjake	if (perfmon_inuse & (1 << pmc)) {
172104072Sjake		perfmon_stop(pmc);
17385586Sjake		ctl_shadow[pmc] = 0;
17485586Sjake		perfmon_inuse &= ~(1 << pmc);
17585586Sjake		return 0;
17685586Sjake	}
17785586Sjake	return EBUSY;		/* XXX reversed sense */
17885586Sjake}
17985586Sjake
18085586Sjakeint
18185586Sjakeperfmon_start(int pmc)
18285586Sjake{
18385586Sjake	critical_t	savecrit;
18485586Sjake
18585586Sjake	if (pmc < 0 || pmc >= NPMC)
18685586Sjake		return EINVAL;
18785586Sjake
18885586Sjake	if (perfmon_inuse & (1 << pmc)) {
18985586Sjake		savecrit = cpu_critical_enter();
19085586Sjake		ctl_shadow[pmc] |= (PMCF_EN << 16);
19185586Sjake		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
19285586Sjake		writectl(pmc);
19385586Sjake		cpu_critical_exit(savecrit);
19485586Sjake		return 0;
19585586Sjake	}
19685586Sjake	return EBUSY;
19785586Sjake}
19885586Sjake
19985586Sjakeint
20085586Sjakeperfmon_stop(int pmc)
20185586Sjake{
20285586Sjake	critical_t	savecrit;
20385586Sjake
20485586Sjake	if (pmc < 0 || pmc >= NPMC)
20585586Sjake		return EINVAL;
20685586Sjake
20785586Sjake	if (perfmon_inuse & (1 << pmc)) {
208104072Sjake		savecrit = cpu_critical_enter();
209104072Sjake		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
210104072Sjake		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
211104072Sjake		writectl(pmc);
212104072Sjake		cpu_critical_exit(savecrit);
213104072Sjake		return 0;
214104072Sjake	}
215104072Sjake	return EBUSY;
216104072Sjake}
217104072Sjake
218104072Sjakeint
219104072Sjakeperfmon_read(int pmc, quad_t *val)
220104072Sjake{
221104072Sjake	if (pmc < 0 || pmc >= NPMC)
222104072Sjake		return EINVAL;
223104072Sjake
22485586Sjake	if (perfmon_inuse & (1 << pmc)) {
22585586Sjake		if (ctl_shadow[pmc] & (PMCF_EN << 16))
22685586Sjake			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
22785586Sjake		else
22885586Sjake			*val = pmc_shadow[pmc];
22985586Sjake		return 0;
23085586Sjake	}
23185586Sjake
23285586Sjake	return EBUSY;
23385586Sjake}
23485586Sjake
23585586Sjakeint
23685586Sjakeperfmon_reset(int pmc)
23785586Sjake{
23885586Sjake	if (pmc < 0 || pmc >= NPMC)
23985586Sjake		return EINVAL;
24085586Sjake
24185586Sjake	if (perfmon_inuse & (1 << pmc)) {
24285586Sjake		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
24385586Sjake		return 0;
24485586Sjake	}
24585586Sjake	return EBUSY;
24685586Sjake}
24785586Sjake
24885586Sjake#ifndef SMP
24985586Sjake/*
25085586Sjake * Unfortunately, the performance-monitoring registers are laid out
25185586Sjake * differently in the P5 and P6.  We keep everything in P6 format
25285586Sjake * internally (except for the event code), and convert to P5
25385586Sjake * format as needed on those CPUs.  The writectl function pointer
25485586Sjake * is set up to point to one of these functions by perfmon_init().
25585586Sjake */
25685586Sjakeint
25785586Sjakewritectl6(int pmc)
25885586Sjake{
25985586Sjake	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
26085586Sjake		wrmsr(msr_ctl[pmc], 0);
26185586Sjake	} else {
262109605Sjake		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
263129282Speter	}
264129282Speter	return 0;
265109605Sjake}
266109605Sjake
267109605Sjake#define	P5FLAG_P	0x200
268109605Sjake#define	P5FLAG_E	0x100
269109605Sjake#define	P5FLAG_USR	0x80
270109605Sjake#define	P5FLAG_OS	0x40
271109605Sjake
272109605Sjakeint
273109605Sjakewritectl5(int pmc)
274109605Sjake{
275109605Sjake	quad_t newval = 0;
276109605Sjake
277109605Sjake	if (ctl_shadow[1] & (PMCF_EN << 16)) {
278109605Sjake		if (ctl_shadow[1] & (PMCF_USR << 16))
279109605Sjake			newval |= P5FLAG_USR << 16;
280109605Sjake		if (ctl_shadow[1] & (PMCF_OS << 16))
281109605Sjake			newval |= P5FLAG_OS << 16;
282109605Sjake		if (!(ctl_shadow[1] & (PMCF_E << 16)))
283109605Sjake			newval |= P5FLAG_E << 16;
284109605Sjake		newval |= (ctl_shadow[1] & 0x3f) << 16;
28585586Sjake	}
28680708Sjake	if (ctl_shadow[0] & (PMCF_EN << 16)) {
287129282Speter		if (ctl_shadow[0] & (PMCF_USR << 16))
288129282Speter			newval |= P5FLAG_USR;
28980708Sjake		if (ctl_shadow[0] & (PMCF_OS << 16))
29085586Sjake			newval |= P5FLAG_OS;
291153504Smarcel		if (!(ctl_shadow[0] & (PMCF_E << 16)))
29285586Sjake			newval |= P5FLAG_E;
293153504Smarcel		newval |= ctl_shadow[0] & 0x3f;
29485586Sjake	}
29585586Sjake
29698635Smux	wrmsr(msr_ctl[0], newval);
29785586Sjake	return 0;		/* XXX should check for unimplemented bits */
29885586Sjake}
29985586Sjake#endif /* !SMP */
30085586Sjake
30185586Sjake/*
30285586Sjake * Now the user-mode interface, called from a subdevice of mem.c.
303153504Smarcel */
30485586Sjakestatic int writer;
30595410Smarcelstatic int writerpmc;
30685586Sjake
307109605Sjakestatic int
30885586Sjakeperfmon_open(dev_t dev, int flags, int fmt, struct thread *td)
30985586Sjake{
31085586Sjake	if (!perfmon_cpuok)
31185586Sjake		return ENXIO;
31285586Sjake
31385586Sjake	if (flags & FWRITE) {
31485586Sjake		if (writer) {
31585586Sjake			return EBUSY;
31685586Sjake		} else {
31785586Sjake			writer = 1;
31885586Sjake			writerpmc = 0;
31985586Sjake		}
320129282Speter	}
321107517Stmm	return 0;
322107517Stmm}
323107517Stmm
32485586Sjakestatic int
32585586Sjakeperfmon_close(dev_t dev, int flags, int fmt, struct thread *td)
32685586Sjake{
32785586Sjake	if (flags & FWRITE) {
32885586Sjake		int i;
32985586Sjake
33098635Smux		for (i = 0; i < NPMC; i++) {
33185586Sjake			if (writerpmc & (1 << i))
33285586Sjake				perfmon_fini(i);
33385586Sjake		}
33485586Sjake		writer = 0;
33585586Sjake	}
33685586Sjake	return 0;
33785586Sjake}
33885586Sjake
33985586Sjakestatic int
34085586Sjakeperfmon_ioctl(dev_t dev, u_long cmd, caddr_t param, int flags, struct thread *td)
34185586Sjake{
34285586Sjake	struct pmc *pmc;
34385586Sjake	struct pmc_data *pmcd;
34480708Sjake	struct pmc_tstamp *pmct;
34580708Sjake	int *ip;
346105469Smarcel	int rv;
347105469Smarcel
348105469Smarcel	switch(cmd) {
349105469Smarcel	case PMIOSETUP:
350105469Smarcel		if (!(flags & FWRITE))
351105469Smarcel			return EPERM;
352105469Smarcel		pmc = (struct pmc *)param;
353105469Smarcel
354105469Smarcel		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
355105469Smarcel		if (!rv) {
356105469Smarcel			writerpmc |= (1 << pmc->pmc_num);
357105469Smarcel		}
358105469Smarcel		break;
359105469Smarcel
360	case PMIOGET:
361		pmc = (struct pmc *)param;
362		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
363		break;
364
365	case PMIOSTART:
366		if (!(flags & FWRITE))
367			return EPERM;
368
369		ip = (int *)param;
370		rv = perfmon_start(*ip);
371		break;
372
373	case PMIOSTOP:
374		if (!(flags & FWRITE))
375			return EPERM;
376
377		ip = (int *)param;
378		rv = perfmon_stop(*ip);
379		break;
380
381	case PMIORESET:
382		if (!(flags & FWRITE))
383			return EPERM;
384
385		ip = (int *)param;
386		rv = perfmon_reset(*ip);
387		break;
388
389	case PMIOREAD:
390		pmcd = (struct pmc_data *)param;
391		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
392		break;
393
394	case PMIOTSTAMP:
395		if (!tsc_freq) {
396			rv = ENOTTY;
397			break;
398		}
399		pmct = (struct pmc_tstamp *)param;
400		/* XXX interface loses precision. */
401		pmct->pmct_rate = tsc_freq / 1000000;
402		pmct->pmct_value = rdtsc();
403		rv = 0;
404		break;
405	default:
406		rv = ENOTTY;
407	}
408
409	return rv;
410}
411