hwpmc_piv.c revision 145256
1/*-
2 * Copyright (c) 2003-2005 Joseph Koshy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 145256 2005-04-19 04:01:25Z jkoshy $");
29
30#include <sys/param.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/pmckern.h>
34#include <sys/smp.h>
35#include <sys/systm.h>
36
37#include <machine/cputypes.h>
38#include <machine/md_var.h>
39#include <machine/pmc_mdep.h>
40#include <machine/specialreg.h>
41
42/*
43 * PENTIUM 4 SUPPORT
44 *
45 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
46 * respectively.  Each PMC comprises of two model specific registers:
47 * a counter configuration control register (CCCR) and a counter
48 * register that holds the actual event counts.
49 *
50 * Configuring an event requires the use of one of 45 event selection
51 * control registers (ESCR).  Events are associated with specific
52 * ESCRs.  Each PMC group has a set of ESCRs it can use.
53 *
54 * - The BPU counter group (4 PMCs) can use the 16 ESCRs:
55 *   BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
56 *   PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
57 *
58 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
59 *   TC_ESCR{0,1}, TBPU_ESCR{0,1}.
60 *
61 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
62 *   FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
63 *   DAC_ESCR{0,1}.
64 *
65 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
66 *   ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
67 *
68 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
69 * present) of a counter group.  Odd-numbers ESCRs can be used with
70 * counters 2, 3 and 5 (if present) of a counter group.  The
71 * 'p4_escrs[]' table describes these restrictions in a form that
72 * function 'p4_allocate()' uses for making allocation decisions.
73 *
74 * SYSTEM-MODE AND THREAD-MODE ALLOCATION
75 *
76 * In addition to remembering the state of PMC rows
77 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
78 * state of ESCR rows.  If an ESCR is allocated to a system-mode PMC
79 * on a CPU we cannot allocate this to a thread-mode PMC.  On a
80 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each
81 * CPU is tracked by the pc_escrs[] array.
82 *
83 * Each system-mode PMC that is using an ESCR records its row-index in
84 * the appropriate entry and system-mode allocation attempts check
85 * that an ESCR is available using this array.  Process-mode PMCs do
86 * not use the pc_escrs[] array, since ESCR row itself would have been
87 * marked as in 'THREAD' mode.
88 *
89 * HYPERTHREADING SUPPORT
90 *
91 * When HTT is enabled, the FreeBSD kernel treats the two 'logical'
92 * cpus as independent CPUs and can schedule kernel threads on them
93 * independently.  However, the two logical CPUs share the same set of
94 * PMC resources.  We need to ensure that:
95 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
96 *   and,
97 * - Threads of multi-threaded processes that get scheduled on the same
98 *   physical CPU are handled correctly.
99 *
100 * Not all HTT capable systems will have HTT enabled since users may
101 * have turned HTT support off using the appropriate sysctls
102 * (machdep.hlt_logical_cpus and machdep.logical_cpus_mask).  We
103 * detect the presence of HTT by remembering if an initialization was
104 * done for a logical CPU.
105 *
106 */
107
108#define	P4_PMCS()				\
109	P4_PMC(BPU_COUNTER0)			\
110	P4_PMC(BPU_COUNTER1)			\
111	P4_PMC(BPU_COUNTER2)			\
112	P4_PMC(BPU_COUNTER3)			\
113	P4_PMC(MS_COUNTER0)			\
114	P4_PMC(MS_COUNTER1)			\
115	P4_PMC(MS_COUNTER2)			\
116	P4_PMC(MS_COUNTER3)			\
117	P4_PMC(FLAME_COUNTER0)			\
118	P4_PMC(FLAME_COUNTER1)			\
119	P4_PMC(FLAME_COUNTER2)			\
120	P4_PMC(FLAME_COUNTER3)			\
121	P4_PMC(IQ_COUNTER0)			\
122	P4_PMC(IQ_COUNTER1)			\
123	P4_PMC(IQ_COUNTER2)			\
124	P4_PMC(IQ_COUNTER3)			\
125	P4_PMC(IQ_COUNTER4)			\
126	P4_PMC(IQ_COUNTER5)			\
127	P4_PMC(NONE)
128
129enum pmc_p4pmc {
130#undef	P4_PMC
131#define	P4_PMC(N)	P4_PMC_##N ,
132	P4_PMCS()
133};
134
135/*
136 * P4 ESCR descriptors
137 */
138
139#define	P4_ESCRS()							\
140    P4_ESCR(BSU_ESCR0,	0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
141    P4_ESCR(BSU_ESCR1,	0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
142    P4_ESCR(FSB_ESCR0,	0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
143    P4_ESCR(FSB_ESCR1,	0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
144    P4_ESCR(FIRM_ESCR0,	0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
145    P4_ESCR(FIRM_ESCR1,	0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
146    P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
147    P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
148    P4_ESCR(DAC_ESCR0,	0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
149    P4_ESCR(DAC_ESCR1,	0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
150    P4_ESCR(MOB_ESCR0,	0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
151    P4_ESCR(MOB_ESCR1,	0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
152    P4_ESCR(PMH_ESCR0,	0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
153    P4_ESCR(PMH_ESCR1,	0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
154    P4_ESCR(SAAT_ESCR0,	0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
155    P4_ESCR(SAAT_ESCR1,	0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
156    P4_ESCR(U2L_ESCR0,	0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
157    P4_ESCR(U2L_ESCR1,	0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
158    P4_ESCR(BPU_ESCR0,	0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
159    P4_ESCR(BPU_ESCR1,	0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
160    P4_ESCR(IS_ESCR0,	0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
161    P4_ESCR(IS_ESCR1,	0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
162    P4_ESCR(ITLB_ESCR0,	0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
163    P4_ESCR(ITLB_ESCR1,	0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
164    P4_ESCR(CRU_ESCR0,	0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
165    P4_ESCR(CRU_ESCR1,	0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
166    P4_ESCR(IQ_ESCR0,	0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
167    P4_ESCR(IQ_ESCR1,	0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5)	\
168    P4_ESCR(RAT_ESCR0,	0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
169    P4_ESCR(RAT_ESCR1,	0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
170    P4_ESCR(SSU_ESCR0,	0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4)	\
171    P4_ESCR(MS_ESCR0,	0x3C0, MS_COUNTER0, MS_COUNTER1, NONE)		\
172    P4_ESCR(MS_ESCR1,	0x3C1, MS_COUNTER2, MS_COUNTER3, NONE)		\
173    P4_ESCR(TBPU_ESCR0,	0x3C2, MS_COUNTER0, MS_COUNTER1, NONE)		\
174    P4_ESCR(TBPU_ESCR1,	0x3C3, MS_COUNTER2, MS_COUNTER3, NONE)		\
175    P4_ESCR(TC_ESCR0,	0x3C4, MS_COUNTER0, MS_COUNTER1, NONE)		\
176    P4_ESCR(TC_ESCR1,	0x3C5, MS_COUNTER2, MS_COUNTER3, NONE)		\
177    P4_ESCR(IX_ESCR0,	0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
178    P4_ESCR(IX_ESCR1,	0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
179    P4_ESCR(ALF_ESCR0,	0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
180    P4_ESCR(ALF_ESCR1,	0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
181    P4_ESCR(CRU_ESCR2,	0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
182    P4_ESCR(CRU_ESCR3,	0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
183    P4_ESCR(CRU_ESCR4,	0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
184    P4_ESCR(CRU_ESCR5,	0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
185    P4_ESCR(NONE,		~0,    NONE, NONE, NONE)
186
187enum pmc_p4escr {
188#define	P4_ESCR(N, MSR, P1, P2, P3)	P4_ESCR_##N ,
189	P4_ESCRS()
190#undef	P4_ESCR
191};
192
193struct pmc_p4escr_descr {
194	const char	pm_escrname[PMC_NAME_MAX];
195	u_short		pm_escr_msr;
196	const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
197};
198
199static struct pmc_p4escr_descr p4_escrs[] =
200{
201#define	P4_ESCR(N, MSR, P1, P2, P3)		\
202	{					\
203		.pm_escrname = #N,		\
204		.pm_escr_msr = (MSR),		\
205		.pm_pmcs =			\
206		{				\
207			P4_PMC_##P1,		\
208			P4_PMC_##P2,		\
209			P4_PMC_##P3		\
210		}				\
211	} ,
212
213	P4_ESCRS()
214
215#undef	P4_ESCR
216};
217
218/*
219 * P4 Event descriptor
220 */
221
222struct p4_event_descr {
223	const enum pmc_event pm_event;
224	const uint32_t	pm_escr_eventselect;
225	const uint32_t	pm_cccr_select;
226	const char	pm_is_ti_event;
227	enum pmc_p4escr	pm_escrs[P4_MAX_ESCR_PER_EVENT];
228};
229
230static struct p4_event_descr p4_events[] = {
231
232#define	P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1)	\
233	{								\
234		.pm_event            = PMC_EV_P4_##NAME,		\
235		.pm_escr_eventselect = (ESCREVENTSEL),			\
236		.pm_cccr_select      = (CCCRSEL),			\
237		.pm_is_ti_event	     = (TI_EVENT),			\
238		.pm_escrs            =					\
239		{							\
240			P4_ESCR_##ESCR0,				\
241			P4_ESCR_##ESCR1					\
242		}							\
243	}
244
245P4_EVDESCR(TC_DELIVER_MODE,	0x01, 0x01, TRUE,  TC_ESCR0,	TC_ESCR1),
246P4_EVDESCR(BPU_FETCH_REQUEST,	0x03, 0x00, FALSE, BPU_ESCR0,	BPU_ESCR1),
247P4_EVDESCR(ITLB_REFERENCE,	0x18, 0x03, FALSE, ITLB_ESCR0,	ITLB_ESCR1),
248P4_EVDESCR(MEMORY_CANCEL,	0x02, 0x05, FALSE, DAC_ESCR0,	DAC_ESCR1),
249P4_EVDESCR(MEMORY_COMPLETE,	0x08, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
250P4_EVDESCR(LOAD_PORT_REPLAY,	0x04, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
251P4_EVDESCR(STORE_PORT_REPLAY,	0x05, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
252P4_EVDESCR(MOB_LOAD_REPLAY,	0x03, 0x02, FALSE, MOB_ESCR0,	MOB_ESCR1),
253P4_EVDESCR(PAGE_WALK_TYPE,	0x01, 0x04, TRUE,  PMH_ESCR0,	PMH_ESCR1),
254P4_EVDESCR(BSQ_CACHE_REFERENCE,	0x0C, 0x07, FALSE, BSU_ESCR0,	BSU_ESCR1),
255P4_EVDESCR(IOQ_ALLOCATION,	0x03, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
256P4_EVDESCR(IOQ_ACTIVE_ENTRIES,	0x1A, 0x06, FALSE, FSB_ESCR1,	NONE),
257P4_EVDESCR(FSB_DATA_ACTIVITY,	0x17, 0x06, TRUE,  FSB_ESCR0,	FSB_ESCR1),
258P4_EVDESCR(BSQ_ALLOCATION,	0x05, 0x07, FALSE, BSU_ESCR0,	NONE),
259P4_EVDESCR(BSQ_ACTIVE_ENTRIES,	0x06, 0x07, FALSE, BSU_ESCR1,	NONE),
260	/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
261P4_EVDESCR(SSE_INPUT_ASSIST,	0x34, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
262P4_EVDESCR(PACKED_SP_UOP,	0x08, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
263P4_EVDESCR(PACKED_DP_UOP,	0x0C, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
264P4_EVDESCR(SCALAR_SP_UOP,	0x0A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
265P4_EVDESCR(SCALAR_DP_UOP,	0x0E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
266P4_EVDESCR(64BIT_MMX_UOP,	0x02, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
267P4_EVDESCR(128BIT_MMX_UOP,	0x1A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
268P4_EVDESCR(X87_FP_UOP,		0x04, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
269P4_EVDESCR(X87_SIMD_MOVES_UOP,	0x2E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
270P4_EVDESCR(GLOBAL_POWER_EVENTS,	0x13, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
271P4_EVDESCR(TC_MS_XFER,		0x05, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
272P4_EVDESCR(UOP_QUEUE_WRITES,	0x09, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
273P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
274    				0x05, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
275P4_EVDESCR(RETIRED_BRANCH_TYPE,	0x04, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
276P4_EVDESCR(RESOURCE_STALL,	0x01, 0x01, FALSE, ALF_ESCR0,	ALF_ESCR1),
277P4_EVDESCR(WC_BUFFER,		0x05, 0x05, TRUE,  DAC_ESCR0,	DAC_ESCR1),
278P4_EVDESCR(B2B_CYCLES,		0x16, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
279P4_EVDESCR(BNR,			0x08, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
280P4_EVDESCR(SNOOP,		0x06, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
281P4_EVDESCR(RESPONSE,		0x04, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
282P4_EVDESCR(FRONT_END_EVENT,	0x08, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
283P4_EVDESCR(EXECUTION_EVENT,	0x0C, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
284P4_EVDESCR(REPLAY_EVENT, 	0x09, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
285P4_EVDESCR(INSTR_RETIRED,	0x02, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
286P4_EVDESCR(UOPS_RETIRED,	0x01, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
287P4_EVDESCR(UOP_TYPE,		0x02, 0x02, FALSE, RAT_ESCR0,	RAT_ESCR1),
288P4_EVDESCR(BRANCH_RETIRED,	0x06, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
289P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
290P4_EVDESCR(X87_ASSIST,		0x03, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
291P4_EVDESCR(MACHINE_CLEAR,	0x02, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3)
292
293#undef	P4_EVDESCR
294};
295
296#define	P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
297
298#define	P4_NEVENTS	(PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
299
300/*
301 * P4 PMC descriptors
302 */
303
304struct p4pmc_descr {
305	struct pmc_descr pm_descr; 	/* common information */
306	enum pmc_p4pmc	pm_pmcnum;	/* PMC number */
307	uint32_t	pm_pmc_msr; 	/* PERFCTR MSR address */
308	uint32_t	pm_cccr_msr;  	/* CCCR MSR address */
309};
310
311static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
312
313	/*
314	 * TSC descriptor
315	 */
316
317	{
318		.pm_descr =
319		{
320			.pd_name  = "TSC",
321			.pd_class = PMC_CLASS_TSC,
322			.pd_caps  = PMC_CAP_READ | PMC_CAP_WRITE,
323			.pd_width = 64
324		},
325		.pm_pmcnum   = ~0,
326		.pm_cccr_msr = ~0,
327		.pm_pmc_msr  = 0x10,
328	},
329
330	/*
331	 * P4 PMCS
332	 */
333
334#define	P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM |  \
335	PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
336	PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE |            \
337	PMC_CAP_TAGGING | PMC_CAP_CASCADE)
338
339#define	P4_PMCDESCR(N, PMC, CCCR)			\
340	{						\
341		.pm_descr =				\
342		{					\
343			.pd_name = #N,			\
344			.pd_class = PMC_CLASS_P4,	\
345			.pd_caps = P4_PMC_CAPS,		\
346			.pd_width = 40			\
347		},					\
348		.pm_pmcnum      = P4_PMC_##N,		\
349		.pm_cccr_msr 	= (CCCR),		\
350		.pm_pmc_msr	= (PMC)			\
351	}
352
353	P4_PMCDESCR(BPU_COUNTER0,	0x300,	0x360),
354	P4_PMCDESCR(BPU_COUNTER1,	0x301,	0x361),
355	P4_PMCDESCR(BPU_COUNTER2,	0x302,	0x362),
356	P4_PMCDESCR(BPU_COUNTER3,	0x303,	0x363),
357	P4_PMCDESCR(MS_COUNTER0,	0x304,	0x364),
358	P4_PMCDESCR(MS_COUNTER1,	0x305,	0x365),
359	P4_PMCDESCR(MS_COUNTER2,	0x306,	0x366),
360	P4_PMCDESCR(MS_COUNTER3,	0x307,	0x367),
361	P4_PMCDESCR(FLAME_COUNTER0,	0x308,	0x368),
362	P4_PMCDESCR(FLAME_COUNTER1,	0x309,	0x369),
363	P4_PMCDESCR(FLAME_COUNTER2,	0x30A,	0x36A),
364	P4_PMCDESCR(FLAME_COUNTER3,	0x30B,	0x36B),
365	P4_PMCDESCR(IQ_COUNTER0,	0x30C,	0x36C),
366	P4_PMCDESCR(IQ_COUNTER1,	0x30D,	0x36D),
367	P4_PMCDESCR(IQ_COUNTER2,	0x30E,	0x36E),
368	P4_PMCDESCR(IQ_COUNTER3,	0x30F,	0x36F),
369	P4_PMCDESCR(IQ_COUNTER4,	0x310,	0x370),
370	P4_PMCDESCR(IQ_COUNTER5,	0x311,	0x371),
371
372#undef	P4_PMCDESCR
373};
374
375/* HTT support */
376#define	P4_NHTT					2 /* logical processors/chip */
377#define	P4_HTT_CPU_INDEX_0			0
378#define	P4_HTT_CPU_INDEX_1			1
379
380static int p4_system_has_htt;
381
382/*
383 * Per-CPU data structure for P4 class CPUs
384 *
385 * [common stuff]
386 * [19 struct pmc_hw pointers]
387 * [19 struct pmc_hw structures]
388 * [45 ESCRs status bytes]
389 * [per-cpu spin mutex]
390 * [19 flags for holding the config count and runcount]
391 * [19*2 saved value fields] (Thread mode PMC support)
392 * [19*2 pmc value fields]   (-do-)
393 */
394
395struct p4_cpu {
396	struct pmc_cpu	pc_common;
397	struct pmc_hw	*pc_hwpmcs[P4_NPMCS];
398	struct pmc_hw	pc_p4pmcs[P4_NPMCS];
399	char		pc_escrs[P4_NESCR];
400	struct mtx	pc_mtx;	/* spin lock */
401	unsigned char	pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
402	pmc_value_t	pc_saved[P4_NPMCS * P4_NHTT];
403	pmc_value_t	pc_pmc_values[P4_NPMCS * P4_NHTT];
404};
405
406#define	P4_PCPU_SAVED_VALUE(PC,RI,CPU)	(PC)->pc_saved[(RI)*((CPU) & 1)]
407#define	P4_PCPU_PMC_VALUE(P,R,C) (P)->pc_pmc_values[(R)*((C) & 1)]
408
409#define	P4_PCPU_GET_FLAGS(PC,RI,MASK)	((PC)->pc_flags[(RI)] & (MASK))
410#define	P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL)	do {	\
411	char _tmp;					\
412	_tmp = (PC)->pc_flags[(RI)];			\
413	_tmp &= ~(MASK);				\
414	_tmp |= (VAL) & (MASK);				\
415	(PC)->pc_flags[(RI)] = _tmp;			\
416} while (0)
417
418#define	P4_PCPU_GET_RUNCOUNT(PC,RI)	P4_PCPU_GET_FLAGS(PC,RI,0x0F)
419#define	P4_PCPU_SET_RUNCOUNT(PC,RI,V)	P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
420
421#define	P4_PCPU_GET_CFGCOUNT(PC,RI)	(P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
422#define	P4_PCPU_SET_CFGCOUNT(PC,RI,C)	P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
423
424/* ESCR row disposition */
425static int p4_escrdisp[P4_NESCR];
426
427#define	P4_ESCR_ROW_DISP_IS_THREAD(E)		(p4_escrdisp[(E)] > 0)
428#define	P4_ESCR_ROW_DISP_IS_STANDALONE(E)	(p4_escrdisp[(E)] < 0)
429#define	P4_ESCR_ROW_DISP_IS_FREE(E)		(p4_escrdisp[(E)] == 0)
430
431#define	P4_ESCR_MARK_ROW_STANDALONE(E) do {				\
432	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
433		    __LINE__));						\
434	atomic_add_int(&p4_escrdisp[(E)], -1);				\
435	KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row "	\
436		"disposition error", __LINE__));			\
437} while (0)
438
439#define	P4_ESCR_UNMARK_ROW_STANDALONE(E) do {				\
440	atomic_add_int(&p4_escrdisp[(E)], 1);				\
441	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
442		    __LINE__));						\
443} while (0)
444
445#define	P4_ESCR_MARK_ROW_THREAD(E) do {					 \
446	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
447		    __LINE__));						 \
448	atomic_add_int(&p4_escrdisp[(E)], 1);				 \
449} while (0)
450
451#define	P4_ESCR_UNMARK_ROW_THREAD(E) do {				 \
452	atomic_add_int(&p4_escrdisp[(E)], -1);				 \
453	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error",\
454		    __LINE__));						 \
455} while (0)
456
457#define	P4_PMC_IS_STOPPED(cccr)	((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
458
459#define	P4_TO_PHYSICAL_CPU(cpu) (pmc_cpu_is_logical(cpu) ?		\
460    ((cpu) & ~1) : (cpu))
461
462#define	P4_CCCR_Tx_MASK	(~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1|	\
463			     P4_CCCR_ENABLE|P4_CCCR_OVF))
464#define	P4_ESCR_Tx_MASK	(~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS|	\
465			     P4_ESCR_T1_USR))
466
467/*
468 * support routines
469 */
470
471static struct p4_event_descr *
472p4_find_event(enum pmc_event ev)
473{
474	int n;
475
476	for (n = 0; n < P4_NEVENTS; n++)
477		if (p4_events[n].pm_event == ev)
478			break;
479	if (n == P4_NEVENTS)
480		return NULL;
481	return &p4_events[n];
482}
483
484/*
485 * Initialize per-cpu state
486 */
487
488static int
489p4_init(int cpu)
490{
491	int n, phycpu;
492	char *pescr;
493	struct p4_cpu *pcs;
494	struct pmc_hw *phw;
495
496	KASSERT(cpu >= 0 && cpu < mp_ncpus,
497	    ("[p4,%d] insane cpu number %d", __LINE__, cpu));
498
499	PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu,
500	    pmc_cpu_is_logical(cpu) != 0);
501
502	/*
503	 * A 'logical' CPU shares its per-cpu state with its physical
504	 * CPU.  The physical CPU would have been initialized prior to
505	 * the initialization for this cpu.
506	 */
507
508	if (pmc_cpu_is_logical(cpu)) {
509		phycpu = P4_TO_PHYSICAL_CPU(cpu);
510		pcs = (struct p4_cpu *) pmc_pcpu[phycpu];
511		PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p",
512		    cpu, phycpu, pcs);
513		KASSERT(pcs,
514		    ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__,
515			cpu, phycpu));
516		if (pcs == NULL) /* decline to init */
517			return ENXIO;
518		p4_system_has_htt = 1;
519		pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
520		return 0;
521	}
522
523	MALLOC(pcs, struct p4_cpu *, sizeof(struct p4_cpu), M_PMC,
524	    M_WAITOK|M_ZERO);
525
526	if (pcs == NULL)
527		return ENOMEM;
528	phw = pcs->pc_p4pmcs;
529
530	for (n = 0; n < P4_NPMCS; n++, phw++) {
531		phw->phw_state   = PMC_PHW_FLAG_IS_ENABLED |
532		    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
533		phw->phw_pmc     = NULL;
534		pcs->pc_hwpmcs[n] = phw;
535	}
536
537	/* Mark the TSC as shareable */
538	pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE;
539
540	pescr = pcs->pc_escrs;
541	for (n = 0; n < P4_NESCR; n++)
542		*pescr++ = P4_INVALID_PMC_INDEX;
543	pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
544
545	mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc", MTX_SPIN);
546
547	return 0;
548}
549
550/*
551 * Destroy per-cpu state.
552 */
553
554static int
555p4_cleanup(int cpu)
556{
557	struct p4_cpu *pcs;
558
559	PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
560
561	/*
562	 * Free up the per-cpu structure for the given cpu if
563	 * allocated, and if this is a physical CPU.
564	 */
565
566	if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) != NULL &&
567	    !pmc_cpu_is_logical(cpu)) {
568		mtx_destroy(&pcs->pc_mtx);
569		FREE(pcs, M_PMC);
570	}
571
572	pmc_pcpu[cpu] = NULL;
573
574	return 0;
575}
576
577/*
578 * Context switch in.
579 */
580
581static int
582p4_switch_in(struct pmc_cpu *pc)
583{
584	(void) pc;
585	/* enable the RDPMC instruction */
586	load_cr4(rcr4() | CR4_PCE);
587	return 0;
588}
589
590/*
591 * Context switch out.
592 */
593
594static int
595p4_switch_out(struct pmc_cpu *pc)
596{
597	(void) pc;
598	/* disallow RDPMC instruction */
599	load_cr4(rcr4() & ~CR4_PCE);
600	return 0;
601}
602
603/*
604 * Read a PMC
605 */
606
607static int
608p4_read_pmc(int cpu, int ri, pmc_value_t *v)
609{
610	enum pmc_mode mode;
611	struct p4pmc_descr *pd;
612	struct pmc *pm;
613	struct p4_cpu *pc;
614	struct pmc_hw *phw;
615	pmc_value_t tmp;
616
617	KASSERT(cpu >= 0 && cpu < mp_ncpus,
618	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
619	KASSERT(ri >= 0 && ri < P4_NPMCS,
620	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
621
622	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
623	phw = pc->pc_hwpmcs[ri];
624	pd  = &p4_pmcdesc[ri];
625	pm  = phw->phw_pmc;
626
627	KASSERT(pm != NULL,
628	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
629		cpu, ri));
630
631	mode = pm->pm_mode;
632
633	PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
634
635	if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
636		KASSERT(PMC_IS_COUNTING_MODE(mode),
637		    ("[p4,%d] TSC counter in non-counting mode", __LINE__));
638		*v = rdtsc();
639		PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
640		return 0;
641	}
642
643	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
644	    ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
645
646	if (PMC_IS_SYSTEM_MODE(pm->pm_mode))
647		tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
648	else
649		tmp = P4_PCPU_PMC_VALUE(pc,ri,cpu);
650
651	if (PMC_IS_SAMPLING_MODE(mode))
652		*v = -(tmp + 1); /* undo transformation */
653	else
654		*v = tmp;
655
656	PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
657	return 0;
658}
659
660/*
661 * Write a PMC
662 */
663
664static int
665p4_write_pmc(int cpu, int ri, pmc_value_t v)
666{
667	struct pmc *pm;
668	struct p4_cpu *pc;
669	const struct pmc_hw *phw;
670	const struct p4pmc_descr *pd;
671
672	KASSERT(cpu >= 0 && cpu < mp_ncpus,
673	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
674	KASSERT(ri >= 0 && ri < P4_NPMCS,
675	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
676
677	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
678	phw = pc->pc_hwpmcs[ri];
679	pm  = phw->phw_pmc;
680	pd  = &p4_pmcdesc[ri];
681
682	KASSERT(pm != NULL,
683	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
684		cpu, ri));
685
686	PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
687	    pm->pm_mode, v);
688
689	/*
690	 * The P4's TSC register is writeable, but we don't allow a
691	 * write as changing the TSC's value could interfere with
692	 * other parts of the system.
693	 */
694	if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
695		return 0;
696
697	/*
698	 * write the PMC value to the register/saved value: for
699	 * sampling mode PMCs, the value to be programmed into the PMC
700	 * counter is -(C+1) where 'C' is the requested sample rate.
701	 */
702	if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
703		v = -(v + 1);
704
705	if (PMC_IS_SYSTEM_MODE(pm->pm_mode))
706		wrmsr(pd->pm_pmc_msr, v);
707	else
708		P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
709
710	return 0;
711}
712
713/*
714 * Configure a PMC 'pm' on the given CPU and row-index.
715 *
716 * 'pm' may be NULL to indicate de-configuration.
717 *
718 * On HTT systems, a PMC may get configured twice, once for each
719 * "logical" CPU.
720 */
721
722static int
723p4_config_pmc(int cpu, int ri, struct pmc *pm)
724{
725	struct pmc_hw *phw;
726	struct p4_cpu *pc;
727	int cfgcount;
728
729	KASSERT(cpu >= 0 && cpu < mp_ncpus,
730	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
731	KASSERT(ri >= 0 && ri < P4_NPMCS,
732	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
733
734	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
735	phw = pc->pc_hwpmcs[ri];
736
737	KASSERT(pm == NULL || phw->phw_pmc == NULL ||
738	    (p4_system_has_htt && phw->phw_pmc == pm),
739	    ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
740
741	mtx_lock_spin(&pc->pc_mtx);
742	cfgcount = P4_PCPU_GET_CFGCOUNT(pc,ri);
743
744	KASSERT(cfgcount >= 0 || cfgcount <= 2,
745	    ("[p4,%d] illegal cfgcount cfg=%d on cpu=%d ri=%d", __LINE__,
746		cfgcount, cpu, ri));
747
748	KASSERT(cfgcount == 0 || phw->phw_pmc,
749	    ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
750		__LINE__, cpu, ri));
751
752	PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgcount,
753	    pm);
754
755	if (pm) {		/* config */
756		if (cfgcount == 0)
757			phw->phw_pmc = pm;
758
759		KASSERT(phw->phw_pmc == pm,
760		    ("[p4,%d] cpu=%d ri=%d config %p != hw %p",
761			__LINE__, cpu, ri, pm, phw->phw_pmc));
762
763		cfgcount++;
764	} else {		/* unconfig */
765		--cfgcount;
766		if (cfgcount == 0)
767			phw->phw_pmc = NULL;
768	}
769
770	KASSERT(cfgcount >= 0 || cfgcount <= 2,
771	    ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
772		cfgcount, cpu, ri));
773
774	P4_PCPU_SET_CFGCOUNT(pc,ri,cfgcount);
775
776	mtx_unlock_spin(&pc->pc_mtx);
777
778	return 0;
779}
780
781/*
782 * Allocate a PMC.
783 *
784 * The allocation strategy differs between HTT and non-HTT systems.
785 *
786 * The non-HTT case:
787 *   - Given the desired event and the PMC row-index, lookup the
788 *   list of valid ESCRs for the event.
789 *   - For each valid ESCR:
790 *     - Check if the ESCR is free and the ESCR row is in a compatible
791 *       mode (i.e., system or process))
792 *     - Check if the ESCR is usable with a P4 PMC at the desired row-index.
793 *   If everything matches, we determine the appropriate bit values for the
794 *   ESCR and CCCR registers.
795 *
796 * The HTT case:
797 *
798 * - Process mode PMCs require special care.  The FreeBSD scheduler could
799 *   schedule any two processes on the same physical CPU.  We need to ensure
800 *   that a given PMC row-index is never allocated to two different
801 *   PMCs owned by different user-processes.
802 *   This is ensured by always allocating a PMC from a 'FREE' PMC row
803 *   if the system has HTT active.
804 * - A similar check needs to be done for ESCRs; we do not want two PMCs
805 *   using the same ESCR to be scheduled at the same time.  Thus ESCR
806 *   allocation is also restricted to FREE rows if the system has HTT
807 *   enabled.
808 * - Thirdly, some events are 'thread-independent' terminology, i.e.,
809 *   the PMC hardware cannot distinguish between events caused by
810 *   different logical CPUs.  This makes it impossible to assign events
811 *   to a given thread of execution.  If the system has HTT enabled,
812 *   these events are not allowed for process-mode PMCs.
813 */
814
815static int
816p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
817    const struct pmc_op_pmcallocate *a)
818{
819	int found, n, m;
820	uint32_t caps, cccrvalue, escrvalue, tflags;
821	enum pmc_p4escr escr;
822	struct p4_cpu *pc;
823	struct p4_event_descr *pevent;
824	const struct p4pmc_descr *pd;
825
826	KASSERT(cpu >= 0 && cpu < mp_ncpus,
827	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
828	KASSERT(ri >= 0 && ri < P4_NPMCS,
829	    ("[p4,%d] illegal row-index value %d", __LINE__, ri));
830
831	pd = &p4_pmcdesc[ri];
832
833	PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
834	    "reqcaps=0x%x\n", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
835	    pm->pm_caps);
836
837	/* check class */
838	if (pd->pm_descr.pd_class != pm->pm_class)
839		return EINVAL;
840
841	/* check requested capabilities */
842	caps = a->pm_caps;
843	if ((pd->pm_descr.pd_caps & caps) != caps)
844		return EPERM;
845
846	if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
847		/* TSC's are always allocated in system-wide counting mode */
848		if (a->pm_ev != PMC_EV_TSC_TSC ||
849		    a->pm_mode != PMC_MODE_SC)
850			return EINVAL;
851		return 0;
852	}
853
854	/*
855	 * If the system has HTT enabled, and the desired allocation
856	 * mode is process-private, and the PMC row disposition is not
857	 * FREE (0), decline the allocation.
858	 */
859
860	if (p4_system_has_htt &&
861	    PMC_IS_VIRTUAL_MODE(pm->pm_mode) &&
862	    pmc_getrowdisp(ri) != 0)
863		return EBUSY;
864
865	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
866	    ("[p4,%d] unknown PMC class %d", __LINE__,
867		pd->pm_descr.pd_class));
868
869	if (pm->pm_event < PMC_EV_P4_FIRST ||
870	    pm->pm_event > PMC_EV_P4_LAST)
871		return EINVAL;
872
873	if ((pevent = p4_find_event(pm->pm_event)) == NULL)
874		return ESRCH;
875
876	PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
877	    pevent->pm_event, pevent->pm_escr_eventselect,
878	    pevent->pm_cccr_select, pevent->pm_is_ti_event);
879
880	/*
881	 * Some PMC events are 'thread independent'and therefore
882	 * cannot be used for process-private modes if HTT is being
883	 * used.
884	 */
885
886	if (P4_EVENT_IS_TI(pevent) &&
887	    PMC_IS_VIRTUAL_MODE(pm->pm_mode) && p4_system_has_htt)
888		return EINVAL;
889
890	pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
891
892	found   = 0;
893
894	/* look for a suitable ESCR for this event */
895	for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
896		if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
897			break;	/* out of ESCRs */
898		/*
899		 * Check ESCR row disposition.
900		 *
901		 * If the request is for a system-mode PMC, then the
902		 * ESCR row should not be in process-virtual mode, and
903		 * should also be free on the current CPU.
904		 */
905
906		if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
907		    if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
908			pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
909			    continue;
910		}
911
912		/*
913		 * If the request is for a process-virtual PMC, and if
914		 * HTT is not enabled, we can use an ESCR row that is
915		 * either FREE or already in process mode.
916		 *
917		 * If HTT is enabled, then we need to ensure that a
918		 * given ESCR is never allocated to two PMCS that
919		 * could run simultaneously on the two logical CPUs of
920		 * a CPU package.  We ensure this be only allocating
921		 * ESCRs from rows marked as 'FREE'.
922		 */
923
924		if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
925			if (p4_system_has_htt) {
926				if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
927					continue;
928			} else
929				if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
930					continue;
931		}
932
933		/*
934		 * We found a suitable ESCR for this event.  Now check if
935		 * this escr can work with the PMC at row-index 'ri'.
936		 */
937
938		for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
939			if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
940				found = 1;
941				break;
942			}
943	}
944
945	if (found == 0)
946		return ESRCH;
947
948	KASSERT((int) escr >= 0 && escr < P4_NESCR,
949	    ("[p4,%d] illegal ESCR value %d", __LINE__, escr));
950
951	/* mark ESCR row mode */
952	if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
953		pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
954		P4_ESCR_MARK_ROW_STANDALONE(escr);
955	} else {
956		KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
957		    ("[p4,%d] escr[%d] already in use", __LINE__, escr));
958		P4_ESCR_MARK_ROW_THREAD(escr);
959	}
960
961	pm->pm_md.pm_p4.pm_p4_escrmsr   = p4_escrs[escr].pm_escr_msr;
962	pm->pm_md.pm_p4.pm_p4_escr      = escr;
963
964	cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
965	escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
966
967	/* CCCR fields */
968	if (caps & PMC_CAP_THRESHOLD)
969		cccrvalue |= (a->pm_p4_cccrconfig & P4_CCCR_THRESHOLD_MASK) |
970		    P4_CCCR_COMPARE;
971
972	if (caps & PMC_CAP_EDGE)
973		cccrvalue |= P4_CCCR_EDGE;
974
975	if (caps & PMC_CAP_INVERT)
976		cccrvalue |= P4_CCCR_COMPLEMENT;
977
978	if (p4_system_has_htt)
979		cccrvalue |= a->pm_p4_cccrconfig & P4_CCCR_ACTIVE_THREAD_MASK;
980	else			/* no HTT; thread field should be '11b' */
981		cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
982
983	if (caps & PMC_CAP_CASCADE)
984		cccrvalue |= P4_CCCR_CASCADE;
985
986	/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
987	if (caps & PMC_CAP_INTERRUPT)
988		cccrvalue |= P4_CCCR_OVF_PMI_T0;
989
990	/* ESCR fields */
991	if (caps & PMC_CAP_QUALIFIER)
992		escrvalue |= a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK;
993	if (caps & PMC_CAP_TAGGING)
994		escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_TAG_VALUE_MASK) |
995		    P4_ESCR_TAG_ENABLE;
996	if (caps & PMC_CAP_QUALIFIER)
997		escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK);
998
999	/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
1000	tflags = 0;
1001	if (caps & PMC_CAP_SYSTEM)
1002		tflags |= P4_ESCR_T0_OS;
1003	if (caps & PMC_CAP_USER)
1004		tflags |= P4_ESCR_T0_USR;
1005	if (tflags == 0)
1006		tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1007	escrvalue |= tflags;
1008
1009	pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
1010	pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
1011
1012	PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
1013	    "escr=%d escrmsr=0x%x escrval=0x%x\n", pevent->pm_cccr_select,
1014	    cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
1015
1016	return 0;
1017}
1018
1019/*
1020 * release a PMC.
1021 */
1022
1023static int
1024p4_release_pmc(int cpu, int ri, struct pmc *pm)
1025{
1026	enum pmc_p4escr escr;
1027	struct pmc_hw *phw;
1028	struct p4_cpu *pc;
1029
1030	if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC)
1031		return 0;
1032
1033	escr = pm->pm_md.pm_p4.pm_p4_escr;
1034
1035	PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
1036
1037	if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
1038		pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1039		phw = pc->pc_hwpmcs[ri];
1040
1041		KASSERT(phw->phw_pmc == NULL,
1042		    ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
1043
1044		P4_ESCR_UNMARK_ROW_STANDALONE(escr);
1045		KASSERT(pc->pc_escrs[escr] == ri,
1046		    ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
1047			escr, ri));
1048	        pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
1049	} else
1050		P4_ESCR_UNMARK_ROW_THREAD(escr);
1051
1052	return 0;
1053}
1054
1055/*
1056 * Start a PMC
1057 */
1058
1059static int
1060p4_start_pmc(int cpu, int ri)
1061{
1062	int rc;
1063	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1064	struct pmc *pm;
1065	struct p4_cpu *pc;
1066	struct pmc_hw *phw;
1067	struct p4pmc_descr *pd;
1068#if	DEBUG
1069	pmc_value_t tmp;
1070#endif
1071
1072	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1073	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1074	KASSERT(ri >= 0 && ri < P4_NPMCS,
1075	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
1076
1077	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1078	phw = pc->pc_hwpmcs[ri];
1079	pm  = phw->phw_pmc;
1080	pd  = &p4_pmcdesc[ri];
1081
1082	KASSERT(pm != NULL,
1083	    ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__,
1084		cpu, ri));
1085
1086	PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
1087
1088	if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */
1089		return 0;
1090
1091	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1092	    ("[p4,%d] wrong PMC class %d", __LINE__,
1093		pd->pm_descr.pd_class));
1094
1095	/* retrieve the desired CCCR/ESCR values from the PMC */
1096	cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
1097	escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
1098	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1099
1100	/* extract and zero the logical processor selection bits */
1101	cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
1102	escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1103	cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
1104	escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1105
1106	if (pmc_cpu_is_logical(cpu)) { /* shift T0 bits to T1 position */
1107		cccrtbits <<= 1;
1108		escrtbits >>= 2;
1109	}
1110
1111	/* start system mode PMCs directly */
1112	if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
1113		wrmsr(escrmsr, escrvalue | escrtbits);
1114		wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
1115		return 0;
1116	}
1117
1118	/*
1119	 * Thread mode PMCs
1120	 *
1121	 * On HTT machines, the same PMC could be scheduled on the
1122	 * same physical CPU twice (once for each logical CPU), for
1123	 * example, if two threads of a multi-threaded process get
1124	 * scheduled on the same CPU.
1125	 *
1126	 */
1127
1128	mtx_lock_spin(&pc->pc_mtx);
1129
1130	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1131	KASSERT(rc == 0 || rc == 1,
1132	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1133		rc));
1134
1135	if (rc == 0) {		/* 1st CPU and the non-HTT case */
1136		/*
1137		 * Enable the correct bits for this CPU.
1138		 */
1139		escrvalue |= escrtbits;
1140		cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
1141
1142		KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
1143		    ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
1144			cpu, ri, pd->pm_cccr_msr));
1145
1146		/* write out the low 40 bits of the saved value to hardware */
1147		wrmsr(pd->pm_pmc_msr,
1148		    P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
1149		P4_PCPU_SAVED_VALUE(pc,ri,cpu) = P4_PCPU_PMC_VALUE(pc,ri,cpu) &
1150		    P4_PERFCTR_MASK;
1151
1152		/* Program the ESCR and CCCR and start the PMC */
1153		wrmsr(escrmsr, escrvalue);
1154		wrmsr(pd->pm_cccr_msr, cccrvalue);
1155
1156		PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
1157		    "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x\n", cpu, rc,
1158		    ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
1159		    cccrvalue);
1160
1161	} else if (rc == 1) {		/* 2nd CPU */
1162
1163		/*
1164		 * Retrieve the CCCR and ESCR values from their MSRs,
1165		 * and turn on the addition T[0/1] bits for the 2nd
1166		 * CPU.  Remember the difference between the saved
1167		 * value from the previous 'write()' operation to this
1168		 * (PMC,CPU) pair and the current PMC reading; this is
1169		 * used at PMCSTOP time to derive the correct
1170		 * increment.
1171		 */
1172
1173		cccrvalue = rdmsr(pd->pm_cccr_msr);
1174
1175		KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
1176		    (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
1177		    ("[p4,%d] cpu=%d rc=%d ri=%d CCCR bits 0x%x PMC 0x%x",
1178			__LINE__, cpu, rc, ri, cccrvalue & P4_CCCR_Tx_MASK,
1179			pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
1180		KASSERT(cccrvalue & P4_CCCR_ENABLE,
1181		    ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
1182			__LINE__, rc, cpu, ri));
1183		KASSERT((cccrvalue & cccrtbits) == 0,
1184		    ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
1185		     "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
1186			cccrvalue, cccrtbits));
1187
1188		/* stop PMC */
1189		wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1190
1191		escrvalue = rdmsr(escrmsr);
1192
1193		KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
1194		    (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
1195		    ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
1196			"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
1197			escrvalue & P4_ESCR_Tx_MASK,
1198			pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
1199
1200		KASSERT((escrvalue & escrtbits) == 0,
1201		    ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
1202		     "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
1203			rc, cpu, ri, escrmsr, escrvalue, escrtbits));
1204
1205		/* read current value and save it */
1206		P4_PCPU_SAVED_VALUE(pc,ri,cpu) =
1207		    rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK;
1208
1209		/*
1210		 * program the new bits into the ESCR and CCCR,
1211		 * starting the PMC in the process.
1212		 */
1213
1214		escrvalue |= escrtbits;
1215		cccrvalue |= cccrvalue;
1216
1217		wrmsr(escrmsr, escrvalue);
1218		wrmsr(pd->pm_cccr_msr, cccrvalue);
1219
1220		PMCDBG(MDP,STA,2,"p4-start/2 cpu=%d rc=%d ri=%d escr=%d"
1221		    "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x pmc=0x%jx",
1222		    cpu, rc, ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr,
1223		    escrvalue, cccrvalue, tmp);
1224
1225	} else
1226		panic("invalid runcount %d\n", rc);
1227
1228	++rc;
1229	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1230
1231	mtx_unlock_spin(&pc->pc_mtx);
1232
1233	return 0;
1234}
1235
1236/*
1237 * Stop a PMC.
1238 */
1239
1240static int
1241p4_stop_pmc(int cpu, int ri)
1242{
1243	int rc;
1244	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1245	struct pmc *pm;
1246	struct p4_cpu *pc;
1247	struct pmc_hw *phw;
1248	struct p4pmc_descr *pd;
1249	pmc_value_t tmp;
1250
1251	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1252	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1253	KASSERT(ri >= 0 && ri < P4_NPMCS,
1254	    ("[p4,%d] illegal row index %d", __LINE__, ri));
1255
1256	pd  = &p4_pmcdesc[ri];
1257
1258	if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
1259		return 0;
1260
1261	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1262	phw = pc->pc_hwpmcs[ri];
1263
1264	KASSERT(phw != NULL,
1265	    ("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri));
1266
1267	pm  = phw->phw_pmc;
1268
1269	KASSERT(pm != NULL,
1270	    ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
1271
1272	PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
1273
1274	if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
1275		wrmsr(pd->pm_cccr_msr,
1276		    pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
1277		return 0;
1278	}
1279
1280	/*
1281	 * Thread mode PMCs.
1282	 *
1283	 * On HTT machines, this PMC may be in use by two threads
1284	 * running on two logical CPUS.  Thus we look at the
1285	 * 'pm_runcount' field and only turn off the appropriate TO/T1
1286	 * bits (and keep the PMC running).
1287	 *
1288	 * The 'pc_saved' field has the 'diff' between the value in
1289	 * the hardware register at PMCSTART time and the nominal
1290	 * start value for the PMC.  This diff is added to the current
1291	 * PMC reading to derived the correct (absolute) return value.
1292	 */
1293
1294	/* bits to mask */
1295	cccrtbits = P4_CCCR_OVF_PMI_T0;
1296	escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
1297	if (pmc_cpu_is_logical(cpu)) {
1298		cccrtbits <<= 1;
1299		escrtbits >>= 2;
1300	}
1301
1302	mtx_lock_spin(&pc->pc_mtx);
1303
1304	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1305
1306	KASSERT(rc == 2 || rc == 1,
1307	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1308		rc));
1309
1310	--rc;
1311
1312	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1313
1314	/* Stop this PMC */
1315	cccrvalue = rdmsr(pd->pm_cccr_msr);
1316	wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1317
1318	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1319	escrvalue = rdmsr(escrmsr);
1320
1321	/* get the current PMC reading */
1322	tmp = rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK;
1323
1324	if (rc == 1) {		/* need to keep the PMC running */
1325
1326		KASSERT(escrvalue & escrtbits,
1327		    ("[p4,%d] ESCR T0/T1 mismatch cpu=%d ri=%d escrmsr=0x%x "
1328		     "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, ri, escrmsr,
1329			escrvalue, escrtbits));
1330
1331		KASSERT(PMC_IS_COUNTING_MODE(pm->pm_mode) ||
1332		    (cccrvalue & cccrtbits),
1333		    ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
1334		     "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
1335
1336		escrvalue &= ~escrtbits;
1337		cccrvalue &= ~cccrtbits;
1338
1339		wrmsr(escrmsr, escrvalue);
1340		wrmsr(pd->pm_cccr_msr, cccrvalue);
1341
1342	}
1343
1344	PMCDBG(MDP,STO,2, "p4-stop/2 cpu=%d rc=%d ri=%d escrmsr=0x%x escrval=0x%x "
1345	    "cccrval=0x%x", cpu, rc, ri, escrmsr, escrvalue, cccrvalue);
1346
1347	/* get the incremental count from this context switch */
1348	tmp -= P4_PCPU_SAVED_VALUE(pc,ri,cpu);
1349	if ((int64_t) tmp < 0)		/* counter wrap-around */
1350		tmp = -tmp + 1;
1351
1352	P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
1353
1354	mtx_unlock_spin(&pc->pc_mtx);
1355	return 0;
1356}
1357
1358/*
1359 * Handle an interrupt.
1360 */
1361
1362static int
1363p4_intr(int cpu, uintptr_t eip)
1364{
1365	(void) cpu;
1366	(void) eip;
1367
1368	return 0;
1369}
1370
1371/*
1372 * Describe a CPU's PMC state.
1373 */
1374
1375static int
1376p4_describe(int cpu, int ri, struct pmc_info *pi,
1377    struct pmc **ppmc)
1378{
1379	int error;
1380	size_t copied;
1381	struct pmc_hw *phw;
1382	const struct p4pmc_descr *pd;
1383
1384	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1385	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1386	KASSERT(ri >= 0 && ri < P4_NPMCS,
1387	    ("[p4,%d] row-index %d out of range", __LINE__, ri));
1388
1389	PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
1390
1391	if (pmc_cpu_is_logical(cpu))
1392		return EINVAL;
1393
1394	phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
1395	pd  = &p4_pmcdesc[ri];
1396
1397	if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
1398		 PMC_NAME_MAX, &copied)) != 0)
1399		return error;
1400
1401	pi->pm_class = pd->pm_descr.pd_class;
1402	pi->pm_caps  = pd->pm_descr.pd_caps;
1403	pi->pm_width = pd->pm_descr.pd_width;
1404
1405	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
1406		pi->pm_enabled = TRUE;
1407		*ppmc          = phw->phw_pmc;
1408	} else {
1409		pi->pm_enabled = FALSE;
1410		*ppmc          = NULL;
1411	}
1412
1413	return 0;
1414}
1415
1416/*
1417 * Get MSR# for use with RDPMC.
1418 */
1419
1420static int
1421p4_get_msr(int ri, uint32_t *msr)
1422{
1423	KASSERT(ri >= 0 && ri < P4_NPMCS,
1424	    ("[p4,%d] ri %d out of range", __LINE__, ri));
1425
1426	*msr = p4_pmcdesc[ri].pm_pmc_msr;
1427	return 0;
1428}
1429
1430
1431int
1432pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
1433{
1434	struct p4_event_descr *pe;
1435
1436	KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
1437	    ("[p4,%d] Initializing non-intel processor", __LINE__));
1438
1439	PMCDBG(MDP,INI,1, "%s", "p4-initialize");
1440
1441	switch (pmc_mdep->pmd_cputype) {
1442	case PMC_CPU_INTEL_PIV:
1443
1444		pmc_mdep->pmd_npmc	    = P4_NPMCS;
1445		pmc_mdep->pmd_classes[1]    = PMC_CLASS_P4;
1446		pmc_mdep->pmd_nclasspmcs[1] = 18;
1447
1448		pmc_mdep->pmd_init    	    = p4_init;
1449		pmc_mdep->pmd_cleanup 	    = p4_cleanup;
1450		pmc_mdep->pmd_switch_in     = p4_switch_in;
1451		pmc_mdep->pmd_switch_out    = p4_switch_out;
1452		pmc_mdep->pmd_read_pmc 	    = p4_read_pmc;
1453		pmc_mdep->pmd_write_pmc     = p4_write_pmc;
1454		pmc_mdep->pmd_config_pmc    = p4_config_pmc;
1455		pmc_mdep->pmd_allocate_pmc  = p4_allocate_pmc;
1456		pmc_mdep->pmd_release_pmc   = p4_release_pmc;
1457		pmc_mdep->pmd_start_pmc     = p4_start_pmc;
1458		pmc_mdep->pmd_stop_pmc      = p4_stop_pmc;
1459		pmc_mdep->pmd_intr	    = p4_intr;
1460		pmc_mdep->pmd_describe      = p4_describe;
1461		pmc_mdep->pmd_get_msr  	    = p4_get_msr; /* i386 */
1462
1463		/* model specific munging */
1464		if ((cpu_id & 0xFFF) < 0xF27) {
1465
1466			/*
1467			 * On P4 and Xeon with CPUID < (Family 15,
1468			 * Model 2, Stepping 7), only one ESCR is
1469			 * available for the IOQ_ALLOCATION event.
1470			 */
1471
1472			pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
1473			pe->pm_escrs[1] = P4_ESCR_NONE;
1474		}
1475
1476		break;
1477
1478	default:
1479		KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
1480		return ENOSYS;
1481	}
1482
1483	return 0;
1484}
1485