core_pcbe.c revision 7258:489b851ce606
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28/*
29 * Performance Counter Back-End for Intel Family 6 Models 15 and 23
30 */
31
32#include <sys/cpuvar.h>
33#include <sys/param.h>
34#include <sys/cpc_impl.h>
35#include <sys/cpc_pcbe.h>
36#include <sys/modctl.h>
37#include <sys/inttypes.h>
38#include <sys/systm.h>
39#include <sys/cmn_err.h>
40#include <sys/x86_archext.h>
41#include <sys/sdt.h>
42#include <sys/archsystm.h>
43#include <sys/privregs.h>
44#include <sys/ddi.h>
45#include <sys/sunddi.h>
46#include <sys/cred.h>
47#include <sys/policy.h>
48
49static int core_pcbe_init(void);
50static uint_t core_pcbe_ncounters(void);
51static const char *core_pcbe_impl_name(void);
52static const char *core_pcbe_cpuref(void);
53static char *core_pcbe_list_events(uint_t picnum);
54static char *core_pcbe_list_attrs(void);
55static uint64_t core_pcbe_event_coverage(char *event);
56static uint64_t core_pcbe_overflow_bitmap(void);
57static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
58    uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
59    void *token);
60static void core_pcbe_program(void *token);
61static void core_pcbe_allstop(void);
62static void core_pcbe_sample(void *token);
63static void core_pcbe_free(void *config);
64
65#define	FALSE	0
66#define	TRUE	1
67
68/* Architectural Performance Counter versioning */
69#define	APC_V1	1
70#define	APC_V2	2
71
72/* Counter Type */
73#define	CORE_GPC	0	/* General-Purpose Counter (GPC) */
74#define	CORE_FFC	1	/* Fixed-Function Counter (FFC) */
75
76/* MSR Addresses */
77#define	GPC_BASE_PMC		0x00c1	/* First GPC */
78#define	GPC_BASE_PES		0x0186	/* First GPC Event Select register */
79#define	FFC_BASE_PMC		0x0309	/* First FFC */
80#define	PERF_FIXED_CTR_CTRL	0x038d	/* Used to enable/disable FFCs */
81#define	PERF_GLOBAL_STATUS	0x038e	/* Overflow status register */
82#define	PERF_GLOBAL_CTRL	0x038f	/* Used to enable/disable counting */
83#define	PERF_GLOBAL_OVF_CTRL	0x0390	/* Used to clear overflow status */
84
85/*
86 * Processor Event Select register fields
87 */
88#define	CORE_USR	(1ULL << 16)	/* Count while not in ring 0 */
89#define	CORE_OS		(1ULL << 17)	/* Count while in ring 0 */
90#define	CORE_EDGE	(1ULL << 18)	/* Enable edge detection */
91#define	CORE_PC		(1ULL << 19)	/* Enable pin control */
92#define	CORE_INT	(1ULL << 20)	/* Enable interrupt on overflow */
93#define	CORE_EN		(1ULL << 22)	/* Enable counting */
94#define	CORE_INV	(1ULL << 23)	/* Invert the CMASK */
95
96#define	CORE_UMASK_SHIFT	8
97#define	CORE_UMASK_MASK		0xffu
98#define	CORE_CMASK_SHIFT	24
99#define	CORE_CMASK_MASK		0xffu
100
101/*
102 * Fixed-function counter attributes
103 */
104#define	CORE_FFC_OS_EN	(1ULL << 0)	/* Count while not in ring 0 */
105#define	CORE_FFC_USR_EN	(1ULL << 1)	/* Count while in ring 1 */
106#define	CORE_FFC_PMI	(1ULL << 3)	/* Enable interrupt on overflow */
107
108/*
109 * Number of bits for specifying each FFC's attributes in the control register
110 */
111#define	CORE_FFC_ATTR_SIZE	4
112
113/*
114 * CondChgd and OvfBuffer fields of global status and overflow control registers
115 */
116#define	CONDCHGD	(1ULL << 63)
117#define	OVFBUFFER	(1ULL << 62)
118#define	MASK_CONDCHGD_OVFBUFFER	(CONDCHGD | OVFBUFFER)
119
120#define	ALL_STOPPED	0ULL
121
122#define	BITMASK_XBITS(x)	((1ull << (x)) - 1ull)
123
124/*
125 * Only the lower 32-bits can be written to in the general-purpose
126 * counters.  The higher bits are extended from bit 31; all ones if
127 * bit 31 is one and all zeros otherwise.
128 *
129 * The fixed-function counters do not have this restriction.
130 */
131#define	BITS_EXTENDED_FROM_31	(BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
132
133#define	WRMSR(msr, value)						\
134	wrmsr((msr), (value));						\
135	DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
136
137#define	RDMSR(msr, value)						\
138	(value) = rdmsr((msr));						\
139	DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
140
141typedef struct core_pcbe_config {
142	uint64_t	core_rawpic;
143	uint64_t	core_ctl;	/* Event Select bits */
144	uint64_t	core_pmc;	/* Counter register address */
145	uint64_t	core_pes;	/* Event Select register address */
146	uint_t		core_picno;
147	uint8_t		core_pictype;	/* CORE_GPC or CORE_FFC */
148} core_pcbe_config_t;
149
150pcbe_ops_t core_pcbe_ops = {
151	PCBE_VER_1,			/* pcbe_ver */
152	CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,	/* pcbe_caps */
153	core_pcbe_ncounters,		/* pcbe_ncounters */
154	core_pcbe_impl_name,		/* pcbe_impl_name */
155	core_pcbe_cpuref,		/* pcbe_cpuref */
156	core_pcbe_list_events,		/* pcbe_list_events */
157	core_pcbe_list_attrs,		/* pcbe_list_attrs */
158	core_pcbe_event_coverage,	/* pcbe_event_coverage */
159	core_pcbe_overflow_bitmap,	/* pcbe_overflow_bitmap */
160	core_pcbe_configure,		/* pcbe_configure */
161	core_pcbe_program,		/* pcbe_program */
162	core_pcbe_allstop,		/* pcbe_allstop */
163	core_pcbe_sample,		/* pcbe_sample */
164	core_pcbe_free			/* pcbe_free */
165};
166
167struct nametable {
168	const char	*name;
169	uint64_t	restricted_bits;
170	uint8_t		event_num;
171};
172
173#define	NT_END	0xFF
174
175/*
176 * Counting an event for all cores or all bus agents requires cpc_cpu privileges
177 */
178#define	ALL_CORES	(1ULL << 15)
179#define	ALL_AGENTS	(1ULL << 13)
180
181static const struct nametable common_gpc_events[] = {
182	/* Alphabetical order of event name */
183
184	{ "baclears",			0x0,	0xe6 },
185	{ "bogus_br",			0x0,	0xe4 },
186	{ "br_bac_missp_exec",		0x0,	0x8a },
187
188	{ "br_call_exec",		0x0,	0x92 },
189	{ "br_call_missp_exec",		0x0,	0x93 },
190	{ "br_cnd_exec",		0x0,	0x8b },
191
192	{ "br_cnd_missp_exec",		0x0,	0x8c },
193	{ "br_ind_call_exec",		0x0,	0x94 },
194	{ "br_ind_exec",		0x0,	0x8d },
195
196	{ "br_ind_missp_exec",		0x0,	0x8e },
197	{ "br_inst_decoded",		0x0,	0xe0 },
198	{ "br_inst_exec",		0x0,	0x88 },
199
200	{ "br_inst_retired",		0x0,	0xc4 },
201	{ "br_inst_retired_mispred",	0x0,	0xc5 },
202	{ "br_missp_exec",		0x0,	0x89 },
203
204	{ "br_ret_bac_missp_exec",	0x0,	0x91 },
205	{ "br_ret_exec",		0x0,	0x8f },
206	{ "br_ret_missp_exec",		0x0,	0x90 },
207
208	{ "br_tkn_bubble_1",		0x0,	0x97 },
209	{ "br_tkn_bubble_2",		0x0,	0x98 },
210	{ "bus_bnr_drv",		ALL_AGENTS,	0x61 },
211
212	{ "bus_data_rcv",		ALL_CORES,	0x64 },
213	{ "bus_drdy_clocks",		ALL_AGENTS,	0x62 },
214	{ "bus_hit_drv",		ALL_AGENTS,	0x7a },
215
216	{ "bus_hitm_drv",		ALL_AGENTS,	0x7b },
217	{ "bus_io_wait",		ALL_CORES,	0x7f },
218	{ "bus_lock_clocks",		ALL_CORES | ALL_AGENTS,	0x63 },
219
220	{ "bus_request_outstanding",	ALL_CORES | ALL_AGENTS,	0x60 },
221	{ "bus_trans_any",		ALL_CORES | ALL_AGENTS,	0x70 },
222	{ "bus_trans_brd",		ALL_CORES | ALL_AGENTS,	0x65 },
223
224	{ "bus_trans_burst",		ALL_CORES | ALL_AGENTS,	0x6e },
225	{ "bus_trans_def",		ALL_CORES | ALL_AGENTS,	0x6d },
226	{ "bus_trans_ifetch",		ALL_CORES | ALL_AGENTS,	0x68 },
227
228	{ "bus_trans_inval",		ALL_CORES | ALL_AGENTS,	0x69 },
229	{ "bus_trans_io",		ALL_CORES | ALL_AGENTS,	0x6c },
230	{ "bus_trans_mem",		ALL_CORES | ALL_AGENTS,	0x6f },
231
232	{ "bus_trans_p",		ALL_CORES | ALL_AGENTS,	0x6b },
233	{ "bus_trans_pwr",		ALL_CORES | ALL_AGENTS,	0x6a },
234	{ "bus_trans_rfo",		ALL_CORES | ALL_AGENTS,	0x66 },
235
236	{ "bus_trans_wb",		ALL_CORES | ALL_AGENTS,	0x67 },
237	{ "busq_empty",			ALL_CORES,	0x7d },
238	{ "cmp_snoop",			ALL_CORES,	0x78 },
239
240	{ "cpu_clk_unhalted",		0x0,	0x3c },
241	{ "cycles_int",			0x0,	0xc6 },
242	{ "cycles_l1i_mem_stalled",	0x0,	0x86 },
243
244	{ "dtlb_misses",		0x0,	0x08 },
245	{ "eist_trans",			0x0,	0x3a },
246	{ "esp",			0x0,	0xab },
247
248	{ "ext_snoop",			ALL_AGENTS,	0x77 },
249	{ "fp_mmx_trans",		0x0,	0xcc },
250	{ "hw_int_rcv",			0x0,	0xc8 },
251
252	{ "ild_stall",			0x0,	0x87 },
253	{ "inst_queue",			0x0,	0x83 },
254	{ "inst_retired",		0x0,	0xc0 },
255
256	{ "itlb",			0x0,	0x82 },
257	{ "itlb_miss_retired",		0x0,	0xc9 },
258	{ "l1d_all_ref",		0x0,	0x43 },
259
260	{ "l1d_cache_ld",		0x0,	0x40 },
261	{ "l1d_cache_lock",		0x0,	0x42 },
262	{ "l1d_cache_st",		0x0,	0x41 },
263
264	{ "l1d_m_evict",		0x0,	0x47 },
265	{ "l1d_m_repl",			0x0,	0x46 },
266	{ "l1d_pend_miss",		0x0,	0x48 },
267
268	{ "l1d_prefetch",		0x0,	0x4e },
269	{ "l1d_repl",			0x0,	0x45 },
270	{ "l1d_split",			0x0,	0x49 },
271
272	{ "l1i_misses",			0x0,	0x81 },
273	{ "l1i_reads",			0x0,	0x80 },
274	{ "l2_ads",			ALL_CORES,	0x21 },
275
276	{ "l2_dbus_busy_rd",		ALL_CORES,	0x23 },
277	{ "l2_ifetch",			ALL_CORES,	0x28 },
278	{ "l2_ld",			ALL_CORES,	0x29 },
279
280	{ "l2_lines_in",		ALL_CORES,	0x24 },
281	{ "l2_lines_out",		ALL_CORES,	0x26 },
282	{ "l2_lock",			ALL_CORES,	0x2b },
283
284	{ "l2_m_lines_in",		ALL_CORES,	0x25 },
285	{ "l2_m_lines_out",		ALL_CORES,	0x27 },
286	{ "l2_no_req",			ALL_CORES,	0x32 },
287
288	{ "l2_reject_busq",		ALL_CORES,	0x30 },
289	{ "l2_rqsts",			ALL_CORES,	0x2e },
290	{ "l2_st",			ALL_CORES,	0x2a },
291
292	{ "load_block",			0x0,	0x03 },
293	{ "load_hit_pre",		0x0,	0x4c },
294	{ "machine_nukes",		0x0,	0xc3 },
295
296	{ "macro_insts",		0x0,	0xaa },
297	{ "memory_disambiguation",	0x0,	0x09 },
298	{ "page_walks",			0x0,	0x0c },
299
300	{ "pref_rqsts_dn",		0x0,	0xf8 },
301	{ "pref_rqsts_up",		0x0,	0xf0 },
302	{ "rat_stalls",			0x0,	0xd2 },
303
304	{ "resource_stalls",		0x0,	0xdc },
305	{ "rs_uops_dispatched",		0x0,	0xa0 },
306	{ "seg_reg_renames",		0x0,	0xd5 },
307
308	{ "seg_rename_stalls",		0x0,	0xd4 },
309	{ "segment_reg_loads",		0x0,	0x06 },
310	{ "simd_assist",		0x0,	0xcd },
311
312	{ "simd_comp_inst_retired",	0x0,	0xca },
313	{ "simd_inst_retired",		0x0,	0xc7 },
314	{ "simd_instr_retired",		0x0,	0xce },
315
316	{ "simd_sat_instr_retired",	0x0,	0xcf },
317	{ "simd_sat_uop_exec",		0x0,	0xb1 },
318	{ "simd_uop_type_exec",		0x0,	0xb3 },
319
320	{ "simd_uops_exec",		0x0,	0xb0 },
321	{ "snoop_stall_drv",		ALL_CORES | ALL_AGENTS,	0x7e },
322	{ "sse_pre_exec",		0x0,	0x07 },
323
324	{ "sse_pre_miss",		0x0,	0x4b },
325	{ "store_block",		0x0,	0x04 },
326	{ "thermal_trip",		0x0,	0x3b },
327
328	{ "uops_retired",		0x0,	0xc2 },
329	{ "x87_ops_retired",		0x0,	0xc1 },
330	{ "",				0x0,	NT_END }
331};
332
333/*
334 * If any of the pic specific events require privileges, make sure to add a
335 * check in configure_gpc() to find whether an event hard-coded as a number by
336 * the user has any privilege requirements
337 */
338static const struct nametable pic0_events[] = {
339	/* Alphabetical order of event name */
340
341	{ "cycles_div_busy",		0x0,	0x14 },
342	{ "fp_comp_ops_exe",		0x0,	0x10 },
343	{ "idle_during_div",		0x0,	0x18 },
344
345	{ "mem_load_retired",		0x0,	0xcb },
346	{ "rs_uops_dispatched_port",	0x0,	0xa1 },
347	{ "",				0x0,	NT_END }
348};
349
350static const struct nametable pic1_events[] = {
351	/* Alphabetical order of event name */
352
353	{ "delayed_bypass",	0x0,	0x19 },
354	{ "div",		0x0,	0x13 },
355	{ "fp_assist",		0x0,	0x11 },
356
357	{ "mul",		0x0,	0x12 },
358	{ "",			0x0,	NT_END }
359};
360
361static char **gpc_names;
362
363char *ffc_names[] = {
364	"instr_retired.any",
365	"cpu_clk_unhalted.core",
366	"cpu_clk_unhalted.ref",
367	NULL
368};
369
370static uint64_t	num_gpc;
371static uint64_t	width_gpc;
372static uint64_t	mask_gpc;
373static uint64_t	num_ffc;
374static uint64_t	width_ffc;
375static uint64_t	mask_ffc;
376static uint_t	total_pmc;
377static uint64_t	control_ffc;
378static uint64_t	control_gpc;
379static uint64_t	control_mask;
380
381static const char *core_impl_name = "Core Microarchitecture";
382
383static const char *core_cpuref =
384	"See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
385	" Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
386	" Order Number: 253669-026US, Februrary 2008";
387
388static int
389core_pcbe_init(void)
390{
391	struct cpuid_regs	cp;
392	uint32_t		versionid;
393	const struct nametable	*n;
394	size_t			size;
395	size_t			common_size;
396	uint64_t		i;
397	const struct nametable	*picspecific_events;
398
399	if ((cpuid_getvendor(CPU) != X86_VENDOR_Intel) ||
400	    (cpuid_getfamily(CPU) != 6) ||
401	    (cpuid_getmodel(CPU) != 15 && cpuid_getmodel(CPU) != 23))
402		return (-1);
403
404	/* Obtain the Architectural Performance Monitoring Leaf */
405	cp.cp_eax = 0xa;
406	(void) __cpuid_insn(&cp);
407
408	versionid = cp.cp_eax & 0xFF;
409
410	/*
411	 * All Family 6 Model 15 and Model 23 processors have fixed-function
412	 * counters.  These counters were made Architectural with
413	 * Family 6 Model 9 Stepping 9.
414	 */
415	switch (versionid) {
416
417		case 0:
418			return (-1);
419
420		case APC_V2:
421			num_ffc = cp.cp_edx & 0x1F;
422			width_ffc = (cp.cp_edx >> 5) & 0xFF;
423
424			if (num_ffc == 0) {
425				/*
426				 * Some processors have an errata (AW34) where
427				 * versionid is reported as 2 when actually 1.
428				 * In this case, fixed-function counters are
429				 * model-specific as in Version 1.
430				 */
431				num_ffc = 3;
432				width_ffc = 40;
433				versionid = APC_V1;
434			}
435			break;
436
437		default:
438			/*
439			 * For higher versions currently unsupported,
440			 * default to Version 1
441			 */
442			num_ffc = 3;
443			width_ffc = 40;
444			break;
445	}
446
447	if (num_ffc >= 64)
448		return (-1);
449
450	if (num_ffc >= sizeof (ffc_names) / sizeof (char *)) {
451		/*
452		 * The system seems to have more fixed-function counters than
453		 * what this PCBE is able to handle correctly.  Default to the
454		 * maximum number of fixed-function counters that this driver
455		 * is aware of.
456		 */
457		num_ffc = sizeof (ffc_names) / sizeof (char *) - 1;
458	}
459
460	mask_ffc = BITMASK_XBITS(width_ffc);
461
462	num_gpc = (cp.cp_eax >> 8) & 0xFF;
463	width_gpc = (cp.cp_eax >> 16) & 0xFF;
464
465	if (num_gpc >= 64)
466		return (-1);
467
468	mask_gpc = BITMASK_XBITS(width_gpc);
469
470	total_pmc = num_gpc + num_ffc;
471
472	control_gpc = BITMASK_XBITS(num_gpc);
473	control_ffc = BITMASK_XBITS(num_ffc);
474
475	control_mask = (control_ffc << 32) | control_gpc;
476
477	if (total_pmc > 64) {
478		/* Too wide for the overflow bitmap */
479		return (-1);
480	}
481
482	/* General-purpose Counters (GPC) */
483	gpc_names = NULL;
484
485	if (num_gpc > 0) {
486		gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
487
488		/* Calculate space needed to save all the common event names */
489		common_size = 0;
490		for (n = common_gpc_events; n->event_num != NT_END; n++) {
491			common_size += strlen(n->name) + 1;
492		}
493
494		for (i = 0; i < num_gpc; i++) {
495			size = 0;
496			switch (i) {
497				case 0:
498					picspecific_events = pic0_events;
499					break;
500				case 1:
501					picspecific_events = pic1_events;
502					break;
503				default:
504					picspecific_events = NULL;
505					break;
506			}
507			if (picspecific_events != NULL) {
508				for (n = picspecific_events;
509				    n->event_num != NT_END;
510				    n++) {
511					size += strlen(n->name) + 1;
512				}
513			}
514
515			gpc_names[i] =
516			    kmem_alloc(size + common_size + 1, KM_SLEEP);
517
518			gpc_names[i][0] = '\0';
519			if (picspecific_events != NULL) {
520				for (n = picspecific_events;
521				    n->event_num != NT_END;
522				    n++) {
523					(void) strcat(gpc_names[i], n->name);
524					(void) strcat(gpc_names[i], ",");
525				}
526			}
527			for (n = common_gpc_events; n->event_num != NT_END;
528			    n++) {
529				(void) strcat(gpc_names[i], n->name);
530				(void) strcat(gpc_names[i], ",");
531			}
532			/*
533			 * Remove trailing comma.
534			 */
535			gpc_names[i][common_size + size - 1] = '\0';
536		}
537	}
538
539	/*
540	 * Fixed-function Counters (FFC) are already listed individually in
541	 * ffc_names[]
542	 */
543	return (0);
544}
545
546static uint_t core_pcbe_ncounters()
547{
548	return (total_pmc);
549}
550
551static const char *core_pcbe_impl_name(void)
552{
553	return (core_impl_name);
554}
555
556static const char *core_pcbe_cpuref(void)
557{
558	return (core_cpuref);
559}
560
561static char *core_pcbe_list_events(uint_t picnum)
562{
563	ASSERT(picnum < cpc_ncounters);
564
565	if (picnum < num_gpc) {
566		return (gpc_names[picnum]);
567	} else {
568		return (ffc_names[picnum - num_gpc]);
569	}
570}
571
572static char *core_pcbe_list_attrs(void)
573{
574	return ("edge,pc,inv,umask,cmask");
575}
576
577static const struct nametable *
578find_gpcevent(char *name, const struct nametable *nametable)
579{
580	const struct nametable *n;
581	int compare_result;
582
583	compare_result = -1;
584	for (n = nametable; n->event_num != NT_END; n++) {
585		compare_result = strcmp(name, n->name);
586		if (compare_result <= 0) {
587			break;
588		}
589	}
590
591	if (compare_result == 0) {
592		return (n);
593	}
594
595	return (NULL);
596}
597
598static uint64_t
599core_pcbe_event_coverage(char *event)
600{
601	uint64_t bitmap;
602	uint64_t bitmask;
603	int i;
604
605	bitmap = 0;
606
607	/* Is it an event that a GPC can track? */
608	if (find_gpcevent(event, common_gpc_events) != NULL) {
609		bitmap |= BITMASK_XBITS(num_gpc);
610	} else if (find_gpcevent(event, pic0_events) != NULL) {
611		bitmap |= 1ULL;
612	} else if (find_gpcevent(event, pic1_events) != NULL) {
613		bitmap |= 1ULL << 1;
614	}
615
616	/* Check if the event can be counted in the fixed-function counters */
617	if (num_ffc > 0) {
618		bitmask = 1ULL << num_gpc;
619		for (i = 0; i < num_ffc; i++) {
620			if (strcmp(event, ffc_names[i]) == 0) {
621				bitmap |= bitmask;
622			}
623			bitmask = bitmask << 1;
624		}
625	}
626
627	return (bitmap);
628}
629
630static uint64_t
631core_pcbe_overflow_bitmap(void)
632{
633	uint64_t interrupt_status;
634	uint64_t intrbits_ffc;
635	uint64_t intrbits_gpc;
636	extern int kcpc_hw_overflow_intr_installed;
637	uint64_t overflow_bitmap;
638
639	RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
640	WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
641
642	interrupt_status = interrupt_status & control_mask;
643	intrbits_ffc = (interrupt_status >> 32) & control_ffc;
644	intrbits_gpc = interrupt_status & control_gpc;
645	overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
646
647	ASSERT(kcpc_hw_overflow_intr_installed);
648	(*kcpc_hw_enable_cpc_intr)();
649
650	return (overflow_bitmap);
651}
652
653static int
654check_cpc_securitypolicy(core_pcbe_config_t *conf, const struct nametable *n)
655{
656	if (conf->core_ctl & n->restricted_bits) {
657		if (secpolicy_cpc_cpu(crgetcred()) != 0) {
658			return (CPC_ATTR_REQUIRES_PRIVILEGE);
659		}
660	}
661	return (0);
662}
663
664static int
665configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
666    uint_t nattrs, kcpc_attr_t *attrs, void **data)
667{
668	core_pcbe_config_t	conf;
669	const struct nametable	*n;
670	const struct nametable	*m;
671	const struct nametable	*picspecific_events;
672	struct nametable	nt_raw = { "", 0x0, 0x0 };
673	uint_t			i;
674	long			event_num;
675
676	if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
677	    ((preset & BITS_EXTENDED_FROM_31) !=
678	    BITS_EXTENDED_FROM_31)) {
679
680		/*
681		 * Bits beyond bit-31 in the general-purpose counters can only
682		 * be written to by extension of bit 31.  We cannot preset
683		 * these bits to any value other than all 1s or all 0s.
684		 */
685		return (CPC_ATTRIBUTE_OUT_OF_RANGE);
686	}
687
688	n = find_gpcevent(event, common_gpc_events);
689	if (n == NULL) {
690		switch (picnum) {
691			case 0:
692				picspecific_events = pic0_events;
693				break;
694			case 1:
695				picspecific_events = pic1_events;
696				break;
697			default:
698				picspecific_events = NULL;
699				break;
700		}
701		if (picspecific_events != NULL) {
702			n = find_gpcevent(event, picspecific_events);
703			if (n == NULL) {
704				/*
705				 * Check if this is a case where the event was
706				 * specified directly by its event number
707				 * instead of its name string.
708				 */
709				if (ddi_strtol(event, NULL, 0, &event_num) !=
710				    0) {
711					return (CPC_INVALID_EVENT);
712				}
713
714				event_num = event_num & 0xFF;
715
716				/*
717				 * Search the event table to find out if the
718				 * event specified has an privilege
719				 * requirements.  Currently none of the
720				 * pic-specific counters have any privilege
721				 * requirements.  Hence only the
722				 * common_gpc_events table is searched.
723				 */
724				for (m = common_gpc_events;
725				    m->event_num != NT_END;
726				    m++) {
727					if (event_num == m->event_num) {
728						break;
729					}
730				}
731				if (m->event_num == NT_END) {
732					nt_raw.event_num = (uint8_t)event_num;
733					n = &nt_raw;
734				} else {
735					n = m;
736				}
737			}
738		}
739	}
740
741	conf.core_picno = picnum;
742	conf.core_pictype = CORE_GPC;
743	conf.core_rawpic = preset & mask_gpc;
744
745	conf.core_pes = GPC_BASE_PES + picnum;
746	conf.core_pmc = GPC_BASE_PMC + picnum;
747
748	conf.core_ctl = n->event_num; /* Event Select */
749	for (i = 0; i < nattrs; i++) {
750		if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
751			if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
752			    CORE_UMASK_MASK) {
753				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
754			}
755			conf.core_ctl |= attrs[i].ka_val <<
756			    CORE_UMASK_SHIFT;
757		} else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
758			if (attrs[i].ka_val != 0)
759				conf.core_ctl |= CORE_EDGE;
760		} else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
761			if (attrs[i].ka_val != 0)
762				conf.core_ctl |= CORE_PC;
763		} else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
764			if (attrs[i].ka_val != 0)
765				conf.core_ctl |= CORE_INV;
766		} else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
767			if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
768			    CORE_CMASK_MASK) {
769				return (CPC_ATTRIBUTE_OUT_OF_RANGE);
770			}
771			conf.core_ctl |= attrs[i].ka_val << CORE_CMASK_SHIFT;
772		} else {
773			return (CPC_INVALID_ATTRIBUTE);
774		}
775	}
776
777	if (flags & CPC_COUNT_USER)
778		conf.core_ctl |= CORE_USR;
779	if (flags & CPC_COUNT_SYSTEM)
780		conf.core_ctl |= CORE_OS;
781	if (flags & CPC_OVF_NOTIFY_EMT)
782		conf.core_ctl |= CORE_INT;
783	conf.core_ctl |= CORE_EN;
784
785	if (check_cpc_securitypolicy(&conf, n) != 0) {
786		return (CPC_ATTR_REQUIRES_PRIVILEGE);
787	}
788
789	*data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
790	*((core_pcbe_config_t *)*data) = conf;
791
792	return (0);
793}
794
795static int
796configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
797    uint_t nattrs, void **data)
798{
799	core_pcbe_config_t	*conf;
800
801	if (picnum - num_gpc >= num_ffc) {
802		return (CPC_INVALID_PICNUM);
803	}
804	if (strcmp(ffc_names[picnum-num_gpc], event) != 0) {
805		return (CPC_INVALID_EVENT);
806	}
807
808	if (nattrs != 0) {
809		return (CPC_INVALID_ATTRIBUTE);
810	}
811
812	conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
813
814	conf->core_picno = picnum;
815	conf->core_pictype = CORE_FFC;
816	conf->core_rawpic = preset & mask_ffc;
817	conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
818
819	/* All fixed-function counters have the same control register */
820	conf->core_pes = PERF_FIXED_CTR_CTRL;
821
822	conf->core_ctl = 0;
823	if (flags & CPC_COUNT_USER)
824		conf->core_ctl |= CORE_FFC_USR_EN;
825	if (flags & CPC_COUNT_SYSTEM)
826		conf->core_ctl |= CORE_FFC_OS_EN;
827	if (flags & CPC_OVF_NOTIFY_EMT)
828		conf->core_ctl |= CORE_FFC_PMI;
829
830	*data = conf;
831	return (0);
832}
833
834/*ARGSUSED*/
835static int
836core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
837    uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
838    void *token)
839{
840	int			ret;
841	core_pcbe_config_t	*conf;
842
843	/*
844	 * If we've been handed an existing configuration, we need only preset
845	 * the counter value.
846	 */
847	if (*data != NULL) {
848		conf = *data;
849		ASSERT(conf->core_pictype == CORE_GPC ||
850		    conf->core_pictype == CORE_FFC);
851		if (conf->core_pictype == CORE_GPC)
852			conf->core_rawpic = preset & mask_gpc;
853		else /* CORE_FFC */
854			conf->core_rawpic = preset & mask_ffc;
855		return (0);
856	}
857
858	if (picnum >= total_pmc) {
859		return (CPC_INVALID_PICNUM);
860	}
861
862	if (picnum < num_gpc) {
863		ret = configure_gpc(picnum, event, preset, flags,
864		    nattrs, attrs, data);
865	} else {
866		ret = configure_ffc(picnum, event, preset, flags,
867		    nattrs, data);
868	}
869	return (ret);
870}
871
872static void
873core_pcbe_program(void *token)
874{
875	core_pcbe_config_t	*cfg;
876	uint64_t		perf_global_ctrl;
877	uint64_t		perf_fixed_ctr_ctrl;
878	uint64_t		curcr4;
879
880	core_pcbe_allstop();
881
882	curcr4 = getcr4();
883	if (kcpc_allow_nonpriv(token))
884		/* Allow RDPMC at any ring level */
885		setcr4(curcr4 | CR4_PCE);
886	else
887		/* Allow RDPMC only at ring 0 */
888		setcr4(curcr4 & ~CR4_PCE);
889
890	/* Clear any overflow indicators before programming the counters */
891	WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
892
893	cfg = NULL;
894	perf_global_ctrl = 0;
895	perf_fixed_ctr_ctrl = 0;
896	cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
897	while (cfg != NULL) {
898		ASSERT(cfg->core_pictype == CORE_GPC ||
899		    cfg->core_pictype == CORE_FFC);
900
901		if (cfg->core_pictype == CORE_GPC) {
902			/*
903			 * General-purpose counter registers have write
904			 * restrictions where only the lower 32-bits can be
905			 * written to.  The rest of the relevant bits are
906			 * written to by extension from bit 31 (all ZEROS if
907			 * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
908			 * makes it possible to write to the counter register
909			 * only values that have all ONEs or all ZEROs in the
910			 * higher bits.
911			 */
912			if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
913			    ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
914			    BITS_EXTENDED_FROM_31)) {
915				/*
916				 * Straighforward case where the higher bits
917				 * are all ZEROs or all ONEs.
918				 */
919				WRMSR(cfg->core_pmc,
920				    (cfg->core_rawpic & mask_gpc));
921			} else {
922				/*
923				 * The high order bits are not all the same.
924				 * We save what is currently in the registers
925				 * and do not write to it.  When we want to do
926				 * a read from this register later (in
927				 * core_pcbe_sample()), we subtract the value
928				 * we save here to get the actual event count.
929				 *
930				 * NOTE: As a result, we will not get overflow
931				 * interrupts as expected.
932				 */
933				RDMSR(cfg->core_pmc, cfg->core_rawpic);
934				cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
935			}
936			WRMSR(cfg->core_pes, cfg->core_ctl);
937			perf_global_ctrl |= 1ull << cfg->core_picno;
938		} else {
939			/*
940			 * Unlike the general-purpose counters, all relevant
941			 * bits of fixed-function counters can be written to.
942			 */
943			WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
944
945			/*
946			 * Collect the control bits for all the
947			 * fixed-function counters and write it at one shot
948			 * later in this function
949			 */
950			perf_fixed_ctr_ctrl |= cfg->core_ctl <<
951			    ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
952			perf_global_ctrl |=
953			    1ull << (cfg->core_picno - num_gpc + 32);
954		}
955
956		cfg = (core_pcbe_config_t *)
957		    kcpc_next_config(token, cfg, NULL);
958	}
959
960	/* Enable all the counters */
961	WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
962	WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
963}
964
965static void
966core_pcbe_allstop(void)
967{
968	/* Disable all the counters together */
969	WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
970
971	setcr4(getcr4() & ~CR4_PCE);
972}
973
974static void
975core_pcbe_sample(void *token)
976{
977	uint64_t		*daddr;
978	uint64_t		curpic;
979	core_pcbe_config_t	*cfg;
980	uint64_t			counter_mask;
981
982	cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
983	while (cfg != NULL) {
984		ASSERT(cfg->core_pictype == CORE_GPC ||
985		    cfg->core_pictype == CORE_FFC);
986
987		curpic = rdmsr(cfg->core_pmc);
988
989		DTRACE_PROBE4(core__pcbe__sample,
990		    uint64_t, cfg->core_pmc,
991		    uint64_t, curpic,
992		    uint64_t, cfg->core_rawpic,
993		    uint64_t, *daddr);
994
995		if (cfg->core_pictype == CORE_GPC) {
996			counter_mask = mask_gpc;
997		} else {
998			counter_mask = mask_ffc;
999		}
1000		curpic = curpic & counter_mask;
1001		if (curpic >= cfg->core_rawpic) {
1002			*daddr += curpic - cfg->core_rawpic;
1003		} else {
1004			/* Counter overflowed since our last sample */
1005			*daddr += counter_mask - (cfg->core_rawpic - curpic) +
1006			    1;
1007		}
1008		cfg->core_rawpic = *daddr & counter_mask;
1009
1010		cfg =
1011		    (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1012	}
1013}
1014
1015static void
1016core_pcbe_free(void *config)
1017{
1018	kmem_free(config, sizeof (core_pcbe_config_t));
1019}
1020
1021static struct modlpcbe core_modlpcbe = {
1022	&mod_pcbeops,
1023	"Core Performance Counters",
1024	&core_pcbe_ops
1025};
1026
1027static struct modlinkage core_modl = {
1028	MODREV_1,
1029	&core_modlpcbe,
1030};
1031
1032int
1033_init(void)
1034{
1035	if (core_pcbe_init() != 0) {
1036		return (ENOTSUP);
1037	}
1038	return (mod_install(&core_modl));
1039}
1040
1041int
1042_fini(void)
1043{
1044	return (mod_remove(&core_modl));
1045}
1046
1047int
1048_info(struct modinfo *mi)
1049{
1050	return (mod_info(&core_modl, mi));
1051}
1052