perfmon.c revision 74810
1/*
2 * Copyright 1996 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/perfmon.c 74810 2001-03-26 12:41:29Z phk $
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/fcntl.h>
36
37#ifndef SMP
38#include <machine/cputypes.h>
39#endif
40#include <machine/clock.h>
41#include <machine/perfmon.h>
42
43static int perfmon_inuse;
44static int perfmon_cpuok;
45#ifndef SMP
46static int msr_ctl[NPMC];
47#endif
48static int msr_pmc[NPMC];
49static unsigned int ctl_shadow[NPMC];
50static quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
51static int (*writectl)(int);
52#ifndef SMP
53static int writectl5(int);
54static int writectl6(int);
55#endif
56
57static d_close_t perfmon_close;
58static d_open_t	perfmon_open;
59static d_ioctl_t perfmon_ioctl;
60
61#define CDEV_MAJOR 2	/* We're really a minor of mem.c */
62static struct cdevsw perfmon_cdevsw = {
63	/* open */      perfmon_open,
64	/* close */     perfmon_close,
65	/* read */      noread,
66	/* write */     nowrite,
67	/* ioctl */     perfmon_ioctl,
68	/* poll */      nopoll,
69	/* mmap */      nommap,
70	/* strategy */  nostrategy,
71	/* name */      "perfmon",
72	/* maj */       CDEV_MAJOR,
73	/* dump */      nodump,
74	/* psize */     nopsize,
75	/* flags */     0,
76};
77
78/*
79 * Must be called after cpu_class is set up.
80 */
81void
82perfmon_init(void)
83{
84#ifndef SMP
85	switch(cpu_class) {
86	case CPUCLASS_586:
87		perfmon_cpuok = 1;
88		msr_ctl[0] = 0x11;
89		msr_ctl[1] = 0x11;
90		msr_pmc[0] = 0x12;
91		msr_pmc[1] = 0x13;
92		writectl = writectl5;
93		break;
94	case CPUCLASS_686:
95		perfmon_cpuok = 1;
96		msr_ctl[0] = 0x186;
97		msr_ctl[1] = 0x187;
98		msr_pmc[0] = 0xc1;
99		msr_pmc[1] = 0xc2;
100		writectl = writectl6;
101		break;
102
103	default:
104		perfmon_cpuok = 0;
105		break;
106	}
107#endif /* SMP */
108	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
109}
110
111int
112perfmon_avail(void)
113{
114	return perfmon_cpuok;
115}
116
117int
118perfmon_setup(int pmc, unsigned int control)
119{
120	int	intrstate;
121
122	if (pmc < 0 || pmc >= NPMC)
123		return EINVAL;
124
125	perfmon_inuse |= (1 << pmc);
126	control &= ~(PMCF_SYS_FLAGS << 16);
127	intrstate = save_intr();
128	disable_intr();
129	ctl_shadow[pmc] = control;
130	writectl(pmc);
131	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
132	restore_intr(intrstate);
133	return 0;
134}
135
136int
137perfmon_get(int pmc, unsigned int *control)
138{
139	if (pmc < 0 || pmc >= NPMC)
140		return EINVAL;
141
142	if (perfmon_inuse & (1 << pmc)) {
143		*control = ctl_shadow[pmc];
144		return 0;
145	}
146	return EBUSY;		/* XXX reversed sense */
147}
148
149int
150perfmon_fini(int pmc)
151{
152	if (pmc < 0 || pmc >= NPMC)
153		return EINVAL;
154
155	if (perfmon_inuse & (1 << pmc)) {
156		perfmon_stop(pmc);
157		ctl_shadow[pmc] = 0;
158		perfmon_inuse &= ~(1 << pmc);
159		return 0;
160	}
161	return EBUSY;		/* XXX reversed sense */
162}
163
164int
165perfmon_start(int pmc)
166{
167	int	intrstate;
168
169	if (pmc < 0 || pmc >= NPMC)
170		return EINVAL;
171
172	if (perfmon_inuse & (1 << pmc)) {
173		intrstate = save_intr();
174		disable_intr();
175		ctl_shadow[pmc] |= (PMCF_EN << 16);
176		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
177		writectl(pmc);
178		restore_intr(intrstate);
179		return 0;
180	}
181	return EBUSY;
182}
183
184int
185perfmon_stop(int pmc)
186{
187	int	intrstate;
188
189	if (pmc < 0 || pmc >= NPMC)
190		return EINVAL;
191
192	if (perfmon_inuse & (1 << pmc)) {
193		intrstate = save_intr();
194		disable_intr();
195		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
196		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
197		writectl(pmc);
198		restore_intr(intrstate);
199		return 0;
200	}
201	return EBUSY;
202}
203
204int
205perfmon_read(int pmc, quad_t *val)
206{
207	if (pmc < 0 || pmc >= NPMC)
208		return EINVAL;
209
210	if (perfmon_inuse & (1 << pmc)) {
211		if (ctl_shadow[pmc] & (PMCF_EN << 16))
212			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
213		else
214			*val = pmc_shadow[pmc];
215		return 0;
216	}
217
218	return EBUSY;
219}
220
221int
222perfmon_reset(int pmc)
223{
224	if (pmc < 0 || pmc >= NPMC)
225		return EINVAL;
226
227	if (perfmon_inuse & (1 << pmc)) {
228		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
229		return 0;
230	}
231	return EBUSY;
232}
233
234#ifndef SMP
235/*
236 * Unfortunately, the performance-monitoring registers are laid out
237 * differently in the P5 and P6.  We keep everything in P6 format
238 * internally (except for the event code), and convert to P5
239 * format as needed on those CPUs.  The writectl function pointer
240 * is set up to point to one of these functions by perfmon_init().
241 */
242int
243writectl6(int pmc)
244{
245	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
246		wrmsr(msr_ctl[pmc], 0);
247	} else {
248		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
249	}
250	return 0;
251}
252
253#define	P5FLAG_P	0x200
254#define	P5FLAG_E	0x100
255#define	P5FLAG_USR	0x80
256#define	P5FLAG_OS	0x40
257
258int
259writectl5(int pmc)
260{
261	quad_t newval = 0;
262
263	if (ctl_shadow[1] & (PMCF_EN << 16)) {
264		if (ctl_shadow[1] & (PMCF_USR << 16))
265			newval |= P5FLAG_USR << 16;
266		if (ctl_shadow[1] & (PMCF_OS << 16))
267			newval |= P5FLAG_OS << 16;
268		if (!(ctl_shadow[1] & (PMCF_E << 16)))
269			newval |= P5FLAG_E << 16;
270		newval |= (ctl_shadow[1] & 0x3f) << 16;
271	}
272	if (ctl_shadow[0] & (PMCF_EN << 16)) {
273		if (ctl_shadow[0] & (PMCF_USR << 16))
274			newval |= P5FLAG_USR;
275		if (ctl_shadow[0] & (PMCF_OS << 16))
276			newval |= P5FLAG_OS;
277		if (!(ctl_shadow[0] & (PMCF_E << 16)))
278			newval |= P5FLAG_E;
279		newval |= ctl_shadow[0] & 0x3f;
280	}
281
282	wrmsr(msr_ctl[0], newval);
283	return 0;		/* XXX should check for unimplemented bits */
284}
285#endif /* !SMP */
286
287/*
288 * Now the user-mode interface, called from a subdevice of mem.c.
289 */
290static int writer;
291static int writerpmc;
292
293static int
294perfmon_open(dev_t dev, int flags, int fmt, struct proc *p)
295{
296	if (!perfmon_cpuok)
297		return ENXIO;
298
299	if (flags & FWRITE) {
300		if (writer) {
301			return EBUSY;
302		} else {
303			writer = 1;
304			writerpmc = 0;
305		}
306	}
307	return 0;
308}
309
310static int
311perfmon_close(dev_t dev, int flags, int fmt, struct proc *p)
312{
313	if (flags & FWRITE) {
314		int i;
315
316		for (i = 0; i < NPMC; i++) {
317			if (writerpmc & (1 << i))
318				perfmon_fini(i);
319		}
320		writer = 0;
321	}
322	return 0;
323}
324
325static int
326perfmon_ioctl(dev_t dev, u_long cmd, caddr_t param, int flags, struct proc *p)
327{
328	struct pmc *pmc;
329	struct pmc_data *pmcd;
330	struct pmc_tstamp *pmct;
331	int *ip;
332	int rv;
333
334	switch(cmd) {
335	case PMIOSETUP:
336		if (!(flags & FWRITE))
337			return EPERM;
338		pmc = (struct pmc *)param;
339
340		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
341		if (!rv) {
342			writerpmc |= (1 << pmc->pmc_num);
343		}
344		break;
345
346	case PMIOGET:
347		pmc = (struct pmc *)param;
348		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
349		break;
350
351	case PMIOSTART:
352		if (!(flags & FWRITE))
353			return EPERM;
354
355		ip = (int *)param;
356		rv = perfmon_start(*ip);
357		break;
358
359	case PMIOSTOP:
360		if (!(flags & FWRITE))
361			return EPERM;
362
363		ip = (int *)param;
364		rv = perfmon_stop(*ip);
365		break;
366
367	case PMIORESET:
368		if (!(flags & FWRITE))
369			return EPERM;
370
371		ip = (int *)param;
372		rv = perfmon_reset(*ip);
373		break;
374
375	case PMIOREAD:
376		pmcd = (struct pmc_data *)param;
377		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
378		break;
379
380	case PMIOTSTAMP:
381		if (!tsc_freq) {
382			rv = ENOTTY;
383			break;
384		}
385		pmct = (struct pmc_tstamp *)param;
386		/* XXX interface loses precision. */
387		pmct->pmct_rate = tsc_freq / 1000000;
388		pmct->pmct_value = rdtsc();
389		rv = 0;
390		break;
391	default:
392		rv = ENOTTY;
393	}
394
395	return rv;
396}
397