perfmon.c revision 92770
1/*
2 * Copyright 1996 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/sys/i386/i386/perfmon.c 92770 2002-03-20 08:56:31Z alfred $
30 */
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/fcntl.h>
36#include <sys/kernel.h>
37
38#ifndef SMP
39#include <machine/cputypes.h>
40#endif
41#include <machine/clock.h>
42#include <machine/perfmon.h>
43
44static int perfmon_inuse;
45static int perfmon_cpuok;
46#ifndef SMP
47static int msr_ctl[NPMC];
48#endif
49static int msr_pmc[NPMC];
50static unsigned int ctl_shadow[NPMC];
51static quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
52static int (*writectl)(int);
53#ifndef SMP
54static int writectl5(int);
55static int writectl6(int);
56#endif
57
58static d_close_t perfmon_close;
59static d_open_t	perfmon_open;
60static d_ioctl_t perfmon_ioctl;
61
62/*
63 * XXX perfmon_init_dev(void *) is a split from the perfmon_init() funtion.
64 * This solves a problem for DEVFS users.  It loads the "perfmon" driver after
65 * the DEVFS subsystem has been kicked into action.  The SI_ORDER_ANY is to
66 * assure that it is the most lowest priority task which, guarantees the
67 * above.
68 */
69static void perfmon_init_dev(void *);
70SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL);
71
72#define CDEV_MAJOR 2	/* We're really a minor of mem.c */
73static struct cdevsw perfmon_cdevsw = {
74	/* open */      perfmon_open,
75	/* close */     perfmon_close,
76	/* read */      noread,
77	/* write */     nowrite,
78	/* ioctl */     perfmon_ioctl,
79	/* poll */      nopoll,
80	/* mmap */      nommap,
81	/* strategy */  nostrategy,
82	/* name */      "perfmon",
83	/* maj */       CDEV_MAJOR,
84	/* dump */      nodump,
85	/* psize */     nopsize,
86	/* flags */     0,
87};
88
89/*
90 * Must be called after cpu_class is set up.
91 */
92void
93perfmon_init(void)
94{
95#ifndef SMP
96	switch(cpu_class) {
97	case CPUCLASS_586:
98		perfmon_cpuok = 1;
99		msr_ctl[0] = 0x11;
100		msr_ctl[1] = 0x11;
101		msr_pmc[0] = 0x12;
102		msr_pmc[1] = 0x13;
103		writectl = writectl5;
104		break;
105	case CPUCLASS_686:
106		perfmon_cpuok = 1;
107		msr_ctl[0] = 0x186;
108		msr_ctl[1] = 0x187;
109		msr_pmc[0] = 0xc1;
110		msr_pmc[1] = 0xc2;
111		writectl = writectl6;
112		break;
113
114	default:
115		perfmon_cpuok = 0;
116		break;
117	}
118#endif /* SMP */
119}
120
121static void
122perfmon_init_dev(dummy)
123	void *dummy;
124{
125	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
126}
127
128int
129perfmon_avail(void)
130{
131	return perfmon_cpuok;
132}
133
134int
135perfmon_setup(int pmc, unsigned int control)
136{
137	critical_t	savecrit;
138
139	if (pmc < 0 || pmc >= NPMC)
140		return EINVAL;
141
142	perfmon_inuse |= (1 << pmc);
143	control &= ~(PMCF_SYS_FLAGS << 16);
144	savecrit = cpu_critical_enter();
145	ctl_shadow[pmc] = control;
146	writectl(pmc);
147	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
148	cpu_critical_exit(savecrit);
149	return 0;
150}
151
152int
153perfmon_get(int pmc, unsigned int *control)
154{
155	if (pmc < 0 || pmc >= NPMC)
156		return EINVAL;
157
158	if (perfmon_inuse & (1 << pmc)) {
159		*control = ctl_shadow[pmc];
160		return 0;
161	}
162	return EBUSY;		/* XXX reversed sense */
163}
164
165int
166perfmon_fini(int pmc)
167{
168	if (pmc < 0 || pmc >= NPMC)
169		return EINVAL;
170
171	if (perfmon_inuse & (1 << pmc)) {
172		perfmon_stop(pmc);
173		ctl_shadow[pmc] = 0;
174		perfmon_inuse &= ~(1 << pmc);
175		return 0;
176	}
177	return EBUSY;		/* XXX reversed sense */
178}
179
180int
181perfmon_start(int pmc)
182{
183	critical_t	savecrit;
184
185	if (pmc < 0 || pmc >= NPMC)
186		return EINVAL;
187
188	if (perfmon_inuse & (1 << pmc)) {
189		savecrit = cpu_critical_enter();
190		ctl_shadow[pmc] |= (PMCF_EN << 16);
191		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
192		writectl(pmc);
193		cpu_critical_exit(savecrit);
194		return 0;
195	}
196	return EBUSY;
197}
198
199int
200perfmon_stop(int pmc)
201{
202	critical_t	savecrit;
203
204	if (pmc < 0 || pmc >= NPMC)
205		return EINVAL;
206
207	if (perfmon_inuse & (1 << pmc)) {
208		savecrit = cpu_critical_enter();
209		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
210		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
211		writectl(pmc);
212		cpu_critical_exit(savecrit);
213		return 0;
214	}
215	return EBUSY;
216}
217
218int
219perfmon_read(int pmc, quad_t *val)
220{
221	if (pmc < 0 || pmc >= NPMC)
222		return EINVAL;
223
224	if (perfmon_inuse & (1 << pmc)) {
225		if (ctl_shadow[pmc] & (PMCF_EN << 16))
226			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
227		else
228			*val = pmc_shadow[pmc];
229		return 0;
230	}
231
232	return EBUSY;
233}
234
235int
236perfmon_reset(int pmc)
237{
238	if (pmc < 0 || pmc >= NPMC)
239		return EINVAL;
240
241	if (perfmon_inuse & (1 << pmc)) {
242		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
243		return 0;
244	}
245	return EBUSY;
246}
247
248#ifndef SMP
249/*
250 * Unfortunately, the performance-monitoring registers are laid out
251 * differently in the P5 and P6.  We keep everything in P6 format
252 * internally (except for the event code), and convert to P5
253 * format as needed on those CPUs.  The writectl function pointer
254 * is set up to point to one of these functions by perfmon_init().
255 */
256int
257writectl6(int pmc)
258{
259	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
260		wrmsr(msr_ctl[pmc], 0);
261	} else {
262		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
263	}
264	return 0;
265}
266
267#define	P5FLAG_P	0x200
268#define	P5FLAG_E	0x100
269#define	P5FLAG_USR	0x80
270#define	P5FLAG_OS	0x40
271
272int
273writectl5(int pmc)
274{
275	quad_t newval = 0;
276
277	if (ctl_shadow[1] & (PMCF_EN << 16)) {
278		if (ctl_shadow[1] & (PMCF_USR << 16))
279			newval |= P5FLAG_USR << 16;
280		if (ctl_shadow[1] & (PMCF_OS << 16))
281			newval |= P5FLAG_OS << 16;
282		if (!(ctl_shadow[1] & (PMCF_E << 16)))
283			newval |= P5FLAG_E << 16;
284		newval |= (ctl_shadow[1] & 0x3f) << 16;
285	}
286	if (ctl_shadow[0] & (PMCF_EN << 16)) {
287		if (ctl_shadow[0] & (PMCF_USR << 16))
288			newval |= P5FLAG_USR;
289		if (ctl_shadow[0] & (PMCF_OS << 16))
290			newval |= P5FLAG_OS;
291		if (!(ctl_shadow[0] & (PMCF_E << 16)))
292			newval |= P5FLAG_E;
293		newval |= ctl_shadow[0] & 0x3f;
294	}
295
296	wrmsr(msr_ctl[0], newval);
297	return 0;		/* XXX should check for unimplemented bits */
298}
299#endif /* !SMP */
300
301/*
302 * Now the user-mode interface, called from a subdevice of mem.c.
303 */
304static int writer;
305static int writerpmc;
306
307static int
308perfmon_open(dev_t dev, int flags, int fmt, struct thread *td)
309{
310	if (!perfmon_cpuok)
311		return ENXIO;
312
313	if (flags & FWRITE) {
314		if (writer) {
315			return EBUSY;
316		} else {
317			writer = 1;
318			writerpmc = 0;
319		}
320	}
321	return 0;
322}
323
324static int
325perfmon_close(dev_t dev, int flags, int fmt, struct thread *td)
326{
327	if (flags & FWRITE) {
328		int i;
329
330		for (i = 0; i < NPMC; i++) {
331			if (writerpmc & (1 << i))
332				perfmon_fini(i);
333		}
334		writer = 0;
335	}
336	return 0;
337}
338
339static int
340perfmon_ioctl(dev_t dev, u_long cmd, caddr_t param, int flags, struct thread *td)
341{
342	struct pmc *pmc;
343	struct pmc_data *pmcd;
344	struct pmc_tstamp *pmct;
345	int *ip;
346	int rv;
347
348	switch(cmd) {
349	case PMIOSETUP:
350		if (!(flags & FWRITE))
351			return EPERM;
352		pmc = (struct pmc *)param;
353
354		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
355		if (!rv) {
356			writerpmc |= (1 << pmc->pmc_num);
357		}
358		break;
359
360	case PMIOGET:
361		pmc = (struct pmc *)param;
362		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
363		break;
364
365	case PMIOSTART:
366		if (!(flags & FWRITE))
367			return EPERM;
368
369		ip = (int *)param;
370		rv = perfmon_start(*ip);
371		break;
372
373	case PMIOSTOP:
374		if (!(flags & FWRITE))
375			return EPERM;
376
377		ip = (int *)param;
378		rv = perfmon_stop(*ip);
379		break;
380
381	case PMIORESET:
382		if (!(flags & FWRITE))
383			return EPERM;
384
385		ip = (int *)param;
386		rv = perfmon_reset(*ip);
387		break;
388
389	case PMIOREAD:
390		pmcd = (struct pmc_data *)param;
391		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
392		break;
393
394	case PMIOTSTAMP:
395		if (!tsc_freq) {
396			rv = ENOTTY;
397			break;
398		}
399		pmct = (struct pmc_tstamp *)param;
400		/* XXX interface loses precision. */
401		pmct->pmct_rate = tsc_freq / 1000000;
402		pmct->pmct_value = rdtsc();
403		rv = 0;
404		break;
405	default:
406		rv = ENOTTY;
407	}
408
409	return rv;
410}
411