1/*-
2 * Copyright 1996 Massachusetts Institute of Technology
3 *
4 * Permission to use, copy, modify, and distribute this software and
5 * its documentation for any purpose and without fee is hereby
6 * granted, provided that both the above copyright notice and this
7 * permission notice appear in all copies, that both the above
8 * copyright notice and this permission notice appear in all
9 * supporting documentation, and that the name of M.I.T. not be used
10 * in advertising or publicity pertaining to distribution of the
11 * software without specific, written prior permission.  M.I.T. makes
12 * no representations about the suitability of this software for any
13 * purpose.  It is provided "as is" without express or implied
14 * warranty.
15 *
16 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
17 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
20 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
23 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/conf.h>
36#include <sys/fcntl.h>
37#include <sys/kernel.h>
38
39#ifndef SMP
40#include <machine/cputypes.h>
41#endif
42#include <machine/clock.h>
43#include <machine/perfmon.h>
44#include <machine/specialreg.h>
45
46static int perfmon_inuse;
47static int perfmon_cpuok;
48#ifndef SMP
49static int msr_ctl[NPMC];
50#endif
51static int msr_pmc[NPMC];
52static unsigned int ctl_shadow[NPMC];
53static quad_t pmc_shadow[NPMC];	/* used when ctr is stopped on P5 */
54static int (*writectl)(int);
55#ifndef SMP
56static int writectl5(int);
57static int writectl6(int);
58#endif
59
60static d_close_t perfmon_close;
61static d_open_t	perfmon_open;
62static d_ioctl_t perfmon_ioctl;
63
64/*
65 * XXX perfmon_init_dev(void *) is a split from the perfmon_init() funtion.
66 * This solves a problem for DEVFS users.  It loads the "perfmon" driver after
67 * the DEVFS subsystem has been kicked into action.  The SI_ORDER_ANY is to
68 * assure that it is the most lowest priority task which, guarantees the
69 * above.
70 */
71static void perfmon_init_dev(void *);
72SYSINIT(cpu, SI_SUB_DRIVERS, SI_ORDER_ANY, perfmon_init_dev, NULL);
73
74static struct cdevsw perfmon_cdevsw = {
75	.d_version =	D_VERSION,
76	.d_flags =	D_NEEDGIANT,
77	.d_open =	perfmon_open,
78	.d_close =	perfmon_close,
79	.d_ioctl =	perfmon_ioctl,
80	.d_name =	"perfmon",
81};
82
83/*
84 * Must be called after cpu_class is set up.
85 */
86void
87perfmon_init(void)
88{
89#ifndef SMP
90	switch(cpu_class) {
91	case CPUCLASS_586:
92		perfmon_cpuok = 1;
93		msr_ctl[0] = MSR_P5_CESR;
94		msr_ctl[1] = MSR_P5_CESR;
95		msr_pmc[0] = MSR_P5_CTR0;
96		msr_pmc[1] = MSR_P5_CTR1;
97		writectl = writectl5;
98		break;
99	case CPUCLASS_686:
100		perfmon_cpuok = 1;
101		msr_ctl[0] = MSR_EVNTSEL0;
102		msr_ctl[1] = MSR_EVNTSEL1;
103		msr_pmc[0] = MSR_PERFCTR0;
104		msr_pmc[1] = MSR_PERFCTR1;
105		writectl = writectl6;
106		break;
107
108	default:
109		perfmon_cpuok = 0;
110		break;
111	}
112#endif /* SMP */
113}
114
115static void
116perfmon_init_dev(dummy)
117	void *dummy;
118{
119	make_dev(&perfmon_cdevsw, 32, UID_ROOT, GID_KMEM, 0640, "perfmon");
120}
121
122int
123perfmon_avail(void)
124{
125	return perfmon_cpuok;
126}
127
128int
129perfmon_setup(int pmc, unsigned int control)
130{
131	register_t	saveintr;
132
133	if (pmc < 0 || pmc >= NPMC)
134		return EINVAL;
135
136	perfmon_inuse |= (1 << pmc);
137	control &= ~(PMCF_SYS_FLAGS << 16);
138	saveintr = intr_disable();
139	ctl_shadow[pmc] = control;
140	writectl(pmc);
141	wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
142	intr_restore(saveintr);
143	return 0;
144}
145
146int
147perfmon_get(int pmc, unsigned int *control)
148{
149	if (pmc < 0 || pmc >= NPMC)
150		return EINVAL;
151
152	if (perfmon_inuse & (1 << pmc)) {
153		*control = ctl_shadow[pmc];
154		return 0;
155	}
156	return EBUSY;		/* XXX reversed sense */
157}
158
159int
160perfmon_fini(int pmc)
161{
162	if (pmc < 0 || pmc >= NPMC)
163		return EINVAL;
164
165	if (perfmon_inuse & (1 << pmc)) {
166		perfmon_stop(pmc);
167		ctl_shadow[pmc] = 0;
168		perfmon_inuse &= ~(1 << pmc);
169		return 0;
170	}
171	return EBUSY;		/* XXX reversed sense */
172}
173
174int
175perfmon_start(int pmc)
176{
177	register_t	saveintr;
178
179	if (pmc < 0 || pmc >= NPMC)
180		return EINVAL;
181
182	if (perfmon_inuse & (1 << pmc)) {
183		saveintr = intr_disable();
184		ctl_shadow[pmc] |= (PMCF_EN << 16);
185		wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
186		writectl(pmc);
187		intr_restore(saveintr);
188		return 0;
189	}
190	return EBUSY;
191}
192
193int
194perfmon_stop(int pmc)
195{
196	register_t	saveintr;
197
198	if (pmc < 0 || pmc >= NPMC)
199		return EINVAL;
200
201	if (perfmon_inuse & (1 << pmc)) {
202		saveintr = intr_disable();
203		pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
204		ctl_shadow[pmc] &= ~(PMCF_EN << 16);
205		writectl(pmc);
206		intr_restore(saveintr);
207		return 0;
208	}
209	return EBUSY;
210}
211
212int
213perfmon_read(int pmc, quad_t *val)
214{
215	if (pmc < 0 || pmc >= NPMC)
216		return EINVAL;
217
218	if (perfmon_inuse & (1 << pmc)) {
219		if (ctl_shadow[pmc] & (PMCF_EN << 16))
220			*val = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
221		else
222			*val = pmc_shadow[pmc];
223		return 0;
224	}
225
226	return EBUSY;
227}
228
229int
230perfmon_reset(int pmc)
231{
232	if (pmc < 0 || pmc >= NPMC)
233		return EINVAL;
234
235	if (perfmon_inuse & (1 << pmc)) {
236		wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
237		return 0;
238	}
239	return EBUSY;
240}
241
242#ifndef SMP
243/*
244 * Unfortunately, the performance-monitoring registers are laid out
245 * differently in the P5 and P6.  We keep everything in P6 format
246 * internally (except for the event code), and convert to P5
247 * format as needed on those CPUs.  The writectl function pointer
248 * is set up to point to one of these functions by perfmon_init().
249 */
250int
251writectl6(int pmc)
252{
253	if (pmc > 0 && !(ctl_shadow[pmc] & (PMCF_EN << 16))) {
254		wrmsr(msr_ctl[pmc], 0);
255	} else {
256		wrmsr(msr_ctl[pmc], ctl_shadow[pmc]);
257	}
258	return 0;
259}
260
261#define	P5FLAG_P	0x200
262#define	P5FLAG_E	0x100
263#define	P5FLAG_USR	0x80
264#define	P5FLAG_OS	0x40
265
266int
267writectl5(int pmc)
268{
269	quad_t newval = 0;
270
271	if (ctl_shadow[1] & (PMCF_EN << 16)) {
272		if (ctl_shadow[1] & (PMCF_USR << 16))
273			newval |= P5FLAG_USR << 16;
274		if (ctl_shadow[1] & (PMCF_OS << 16))
275			newval |= P5FLAG_OS << 16;
276		if (!(ctl_shadow[1] & (PMCF_E << 16)))
277			newval |= P5FLAG_E << 16;
278		newval |= (ctl_shadow[1] & 0x3f) << 16;
279	}
280	if (ctl_shadow[0] & (PMCF_EN << 16)) {
281		if (ctl_shadow[0] & (PMCF_USR << 16))
282			newval |= P5FLAG_USR;
283		if (ctl_shadow[0] & (PMCF_OS << 16))
284			newval |= P5FLAG_OS;
285		if (!(ctl_shadow[0] & (PMCF_E << 16)))
286			newval |= P5FLAG_E;
287		newval |= ctl_shadow[0] & 0x3f;
288	}
289
290	wrmsr(msr_ctl[0], newval);
291	return 0;		/* XXX should check for unimplemented bits */
292}
293#endif /* !SMP */
294
295/*
296 * Now the user-mode interface, called from a subdevice of mem.c.
297 */
298static int writer;
299static int writerpmc;
300
301static int
302perfmon_open(struct cdev *dev, int flags, int fmt, struct thread *td)
303{
304	if (!perfmon_cpuok)
305		return ENXIO;
306
307	if (flags & FWRITE) {
308		if (writer) {
309			return EBUSY;
310		} else {
311			writer = 1;
312			writerpmc = 0;
313		}
314	}
315	return 0;
316}
317
318static int
319perfmon_close(struct cdev *dev, int flags, int fmt, struct thread *td)
320{
321	if (flags & FWRITE) {
322		int i;
323
324		for (i = 0; i < NPMC; i++) {
325			if (writerpmc & (1 << i))
326				perfmon_fini(i);
327		}
328		writer = 0;
329	}
330	return 0;
331}
332
333static int
334perfmon_ioctl(struct cdev *dev, u_long cmd, caddr_t param, int flags, struct thread *td)
335{
336	struct pmc *pmc;
337	struct pmc_data *pmcd;
338	struct pmc_tstamp *pmct;
339	uint64_t freq;
340	int *ip;
341	int rv;
342
343	switch(cmd) {
344	case PMIOSETUP:
345		if (!(flags & FWRITE))
346			return EPERM;
347		pmc = (struct pmc *)param;
348
349		rv = perfmon_setup(pmc->pmc_num, pmc->pmc_val);
350		if (!rv) {
351			writerpmc |= (1 << pmc->pmc_num);
352		}
353		break;
354
355	case PMIOGET:
356		pmc = (struct pmc *)param;
357		rv = perfmon_get(pmc->pmc_num, &pmc->pmc_val);
358		break;
359
360	case PMIOSTART:
361		if (!(flags & FWRITE))
362			return EPERM;
363
364		ip = (int *)param;
365		rv = perfmon_start(*ip);
366		break;
367
368	case PMIOSTOP:
369		if (!(flags & FWRITE))
370			return EPERM;
371
372		ip = (int *)param;
373		rv = perfmon_stop(*ip);
374		break;
375
376	case PMIORESET:
377		if (!(flags & FWRITE))
378			return EPERM;
379
380		ip = (int *)param;
381		rv = perfmon_reset(*ip);
382		break;
383
384	case PMIOREAD:
385		pmcd = (struct pmc_data *)param;
386		rv = perfmon_read(pmcd->pmcd_num, &pmcd->pmcd_value);
387		break;
388
389	case PMIOTSTAMP:
390		freq = atomic_load_acq_64(&tsc_freq);
391		if (freq == 0) {
392			rv = ENOTTY;
393			break;
394		}
395		pmct = (struct pmc_tstamp *)param;
396		/* XXX interface loses precision. */
397		pmct->pmct_rate = freq / 1000000;
398		pmct->pmct_value = rdtsc();
399		rv = 0;
400		break;
401	default:
402		rv = ENOTTY;
403	}
404
405	return rv;
406}
407