perfmon.c revision 65557
1/*
2 * Copyright 1996 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/perfmon.c 65557 2000-09-07 01:33:02Z jasone $
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/fcntl.h>
36
37#ifndef SMP
38#include <machine/cputypes.h>
39#endif
40#include <machine/clock.h>
41#include <machine/perfmon.h>
42
43static int perfmon_inuse;
44static int perfmon_cpuok;
45#ifndef SMP
46static int msr_ctl[NPMC];
47#endif
48static int msr_pmc[NPMC];
49static unsigned int ctl_shadow[NPMC];
50static quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
51static int (*writectl)(int);
52#ifndef SMP
53static int writectl5(int);
54static int writectl6(int);
55#endif
56
57static d_close_t perfmon_close;
58static d_open_t	perfmon_open;
59static d_ioctl_t perfmon_ioctl;
60
61#define CDEV_MAJOR 2	/* We're really a minor of mem.c */
62static struct cdevsw perfmon_cdevsw = {
63	/* open */      perfmon_open,
64	/* close */     perfmon_close,
65	/* read */      noread,
66	/* write */     nowrite,
67	/* ioctl */     perfmon_ioctl,
68	/* poll */      nopoll,
69	/* mmap */      nommap,
70	/* strategy */  nostrategy,
71	/* name */      "perfmon",
72	/* maj */       CDEV_MAJOR,
73	/* dump */      nodump,
74	/* psize */     nopsize,
75	/* flags */     0,
76	/* bmaj */      -1
77};
78
79/*
80 * Must be called after cpu_class is set up.
81 */
82void
83perfmon_init(void)
84{
85#ifndef SMP
86	switch(cpu_class) {
87	case CPUCLASS_586:
88		perfmon_cpuok = 1;
89		msr_ctl[0] = 0x11;
90		msr_ctl[1] = 0x11;
91		msr_pmc[0] = 0x12;
92		msr_pmc[1] = 0x13;
93		writectl = writectl5;
94		break;
95	case CPUCLASS_686:
96		perfmon_cpuok = 1;
97		msr_ctl[0] = 0x186;
98		msr_ctl[1] = 0x187;
99		msr_pmc[0] = 0xc1;
100		msr_pmc[1] = 0xc2;
101		writectl = writectl6;
102		break;
103
104	default:
105		perfmon_cpuok = 0;
106		break;
107	}
108#endif /* SMP */
109	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
110}
111
112int
113perfmon_avail(void)
114{
115	return perfmon_cpuok;
116}
117
118int
119perfmon_setup(int pmc, unsigned int control)
120{
121	int	intrstate;
122
123	if (pmc < 0 || pmc >= NPMC)
124		return EINVAL;
125
126	perfmon_inuse |= (1 << pmc);
127	control &= ~(PMCF_SYS_FLAGS << 16);
128	intrstate = save_intr();
129	disable_intr();
130	ctl_shadow[pmc] = control;
131	writectl(pmc);
132	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
133	restore_intr(intrstate);
134	return 0;
135}
136
137int
138perfmon_get(int pmc, unsigned int *control)
139{
140	if (pmc < 0 || pmc >= NPMC)
141		return EINVAL;
142
143	if (perfmon_inuse & (1 << pmc)) {
144		*control = ctl_shadow[pmc];
145		return 0;
146	}
147	return EBUSY;		/* XXX reversed sense */
148}
149
150int
151perfmon_fini(int pmc)
152{
153	if (pmc < 0 || pmc >= NPMC)
154		return EINVAL;
155
156	if (perfmon_inuse & (1 << pmc)) {
157		perfmon_stop(pmc);
158		ctl_shadow[pmc] = 0;
159		perfmon_inuse &= ~(1 << pmc);
160		return 0;
161	}
162	return EBUSY;		/* XXX reversed sense */
163}
164
165int
166perfmon_start(int pmc)
167{
168	int	intrstate;
169
170	if (pmc < 0 || pmc >= NPMC)
171		return EINVAL;
172
173	if (perfmon_inuse & (1 << pmc)) {
174		intrstate = save_intr();
175		disable_intr();
176		ctl_shadow[pmc] |= (PMCF_EN << 16);
177		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
178		writectl(pmc);
179		restore_intr(intrstate);
180		return 0;
181	}
182	return EBUSY;
183}
184
185int
186perfmon_stop(int pmc)
187{
188	int	intrstate;
189
190	if (pmc < 0 || pmc >= NPMC)
191		return EINVAL;
192
193	if (perfmon_inuse & (1 << pmc)) {
194		intrstate = save_intr();
195		disable_intr();
196		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
197		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
198		writectl(pmc);
199		restore_intr(intrstate);
200		return 0;
201	}
202	return EBUSY;
203}
204
205int
206perfmon_read(int pmc, quad_t *val)
207{
208	if (pmc < 0 || pmc >= NPMC)
209		return EINVAL;
210
211	if (perfmon_inuse & (1 << pmc)) {
212		if (ctl_shadow[pmc] & (PMCF_EN << 16))
213			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
214		else
215			*val = pmc_shadow[pmc];
216		return 0;
217	}
218
219	return EBUSY;
220}
221
222int
223perfmon_reset(int pmc)
224{
225	if (pmc < 0 || pmc >= NPMC)
226		return EINVAL;
227
228	if (perfmon_inuse & (1 << pmc)) {
229		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
230		return 0;
231	}
232	return EBUSY;
233}
234
235#ifndef SMP
236/*
237 * Unfortunately, the performance-monitoring registers are laid out
238 * differently in the P5 and P6.  We keep everything in P6 format
239 * internally (except for the event code), and convert to P5
240 * format as needed on those CPUs.  The writectl function pointer
241 * is set up to point to one of these functions by perfmon_init().
242 */
243int
244writectl6(int pmc)
245{
246	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
247		wrmsr(msr_ctl[pmc], 0);
248	} else {
249		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
250	}
251	return 0;
252}
253
254#define	P5FLAG_P	0x200
255#define	P5FLAG_E	0x100
256#define	P5FLAG_USR	0x80
257#define	P5FLAG_OS	0x40
258
259int
260writectl5(int pmc)
261{
262	quad_t newval = 0;
263
264	if (ctl_shadow[1] & (PMCF_EN << 16)) {
265		if (ctl_shadow[1] & (PMCF_USR << 16))
266			newval |= P5FLAG_USR << 16;
267		if (ctl_shadow[1] & (PMCF_OS << 16))
268			newval |= P5FLAG_OS << 16;
269		if (!(ctl_shadow[1] & (PMCF_E << 16)))
270			newval |= P5FLAG_E << 16;
271		newval |= (ctl_shadow[1] & 0x3f) << 16;
272	}
273	if (ctl_shadow[0] & (PMCF_EN << 16)) {
274		if (ctl_shadow[0] & (PMCF_USR << 16))
275			newval |= P5FLAG_USR;
276		if (ctl_shadow[0] & (PMCF_OS << 16))
277			newval |= P5FLAG_OS;
278		if (!(ctl_shadow[0] & (PMCF_E << 16)))
279			newval |= P5FLAG_E;
280		newval |= ctl_shadow[0] & 0x3f;
281	}
282
283	wrmsr(msr_ctl[0], newval);
284	return 0;		/* XXX should check for unimplemented bits */
285}
286#endif /* !SMP */
287
288/*
289 * Now the user-mode interface, called from a subdevice of mem.c.
290 */
291static int writer;
292static int writerpmc;
293
294static int
295perfmon_open(dev_t dev, int flags, int fmt, struct proc *p)
296{
297	if (!perfmon_cpuok)
298		return ENXIO;
299
300	if (flags & FWRITE) {
301		if (writer) {
302			return EBUSY;
303		} else {
304			writer = 1;
305			writerpmc = 0;
306		}
307	}
308	return 0;
309}
310
311static int
312perfmon_close(dev_t dev, int flags, int fmt, struct proc *p)
313{
314	if (flags & FWRITE) {
315		int i;
316
317		for (i = 0; i < NPMC; i++) {
318			if (writerpmc & (1 << i))
319				perfmon_fini(i);
320		}
321		writer = 0;
322	}
323	return 0;
324}
325
326static int
327perfmon_ioctl(dev_t dev, u_long cmd, caddr_t param, int flags, struct proc *p)
328{
329	struct pmc *pmc;
330	struct pmc_data *pmcd;
331	struct pmc_tstamp *pmct;
332	int *ip;
333	int rv;
334
335	switch(cmd) {
336	case PMIOSETUP:
337		if (!(flags & FWRITE))
338			return EPERM;
339		pmc = (struct pmc *)param;
340
341		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
342		if (!rv) {
343			writerpmc |= (1 << pmc->pmc_num);
344		}
345		break;
346
347	case PMIOGET:
348		pmc = (struct pmc *)param;
349		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
350		break;
351
352	case PMIOSTART:
353		if (!(flags & FWRITE))
354			return EPERM;
355
356		ip = (int *)param;
357		rv = perfmon_start(*ip);
358		break;
359
360	case PMIOSTOP:
361		if (!(flags & FWRITE))
362			return EPERM;
363
364		ip = (int *)param;
365		rv = perfmon_stop(*ip);
366		break;
367
368	case PMIORESET:
369		if (!(flags & FWRITE))
370			return EPERM;
371
372		ip = (int *)param;
373		rv = perfmon_reset(*ip);
374		break;
375
376	case PMIOREAD:
377		pmcd = (struct pmc_data *)param;
378		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
379		break;
380
381	case PMIOTSTAMP:
382		if (!tsc_freq) {
383			rv = ENOTTY;
384			break;
385		}
386		pmct = (struct pmc_tstamp *)param;
387		/* XXX interface loses precision. */
388		pmct->pmct_rate = tsc_freq / 1000000;
389		pmct->pmct_value = rdtsc();
390		rv = 0;
391		break;
392	default:
393		rv = ENOTTY;
394	}
395
396	return rv;
397}
398