hwpmc_piv.c revision 145774
1/*-
2 * Copyright (c) 2003-2005 Joseph Koshy
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 145774 2005-05-01 14:11:49Z jkoshy $");
29
30#include <sys/param.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/pmc.h>
34#include <sys/pmckern.h>
35#include <sys/smp.h>
36#include <sys/systm.h>
37
38#include <machine/apicreg.h>
39#include <machine/md_var.h>
40
41/*
42 * PENTIUM 4 SUPPORT
43 *
44 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
45 * respectively.  Each PMC comprises of two model specific registers:
46 * a counter configuration control register (CCCR) and a counter
47 * register that holds the actual event counts.
48 *
49 * Configuring an event requires the use of one of 45 event selection
50 * control registers (ESCR).  Events are associated with specific
51 * ESCRs.  Each PMC group has a set of ESCRs it can use.
52 *
53 * - The BPU counter group (4 PMCs) can use the 16 ESCRs:
54 *   BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
55 *   PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
56 *
57 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
58 *   TC_ESCR{0,1}, TBPU_ESCR{0,1}.
59 *
60 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
61 *   FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
62 *   DAC_ESCR{0,1}.
63 *
64 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
65 *   ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
66 *
67 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
68 * present) of a counter group.  Odd-numbers ESCRs can be used with
69 * counters 2, 3 and 5 (if present) of a counter group.  The
70 * 'p4_escrs[]' table describes these restrictions in a form that
71 * function 'p4_allocate()' uses for making allocation decisions.
72 *
73 * SYSTEM-MODE AND THREAD-MODE ALLOCATION
74 *
75 * In addition to remembering the state of PMC rows
76 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
77 * state of ESCR rows.  If an ESCR is allocated to a system-mode PMC
78 * on a CPU we cannot allocate this to a thread-mode PMC.  On a
79 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each
80 * CPU is tracked by the pc_escrs[] array.
81 *
82 * Each system-mode PMC that is using an ESCR records its row-index in
83 * the appropriate entry and system-mode allocation attempts check
84 * that an ESCR is available using this array.  Process-mode PMCs do
85 * not use the pc_escrs[] array, since ESCR row itself would have been
86 * marked as in 'THREAD' mode.
87 *
88 * HYPERTHREADING SUPPORT
89 *
90 * When HTT is enabled, the FreeBSD kernel treats the two 'logical'
91 * cpus as independent CPUs and can schedule kernel threads on them
92 * independently.  However, the two logical CPUs share the same set of
93 * PMC resources.  We need to ensure that:
94 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
95 *   and,
96 * - Threads of multi-threaded processes that get scheduled on the same
97 *   physical CPU are handled correctly.
98 *
99 * HTT Detection
100 *
101 * Not all HTT capable systems will have HTT enabled since users may
102 * have turned HTT support off using the appropriate sysctls
103 * (machdep.hlt_logical_cpus or machdep.logical_cpus_mask).  We detect
104 * the presence of HTT by remembering if 'p4_init()' was called for a
105 * logical CPU.  Note that hwpmc(4) cannot deal with a change in HTT
106 * status once it is loaded.
107 *
108 * Handling HTT READ / WRITE / START / STOP
109 *
110 * PMC resources are shared across multiple logical CPUs.  In each
111 * physical CPU's state we keep track of a 'runcount' which reflects
112 * the number of PMC-using processes that have been scheduled on the
113 * logical CPUs of this physical CPU.  Process-mode PMC operations
114 * will actually 'start' or 'stop' hardware only if these are the
115 * first or last processes respectively to use the hardware.  PMC
116 * values written by a 'write' operation are saved and are transferred
117 * to hardware at PMC 'start' time if the runcount is 0.  If the
118 * runcount is greater than 0 at the time of a 'start' operation, we
119 * keep track of the actual hardware value at the time of the 'start'
120 * operation and use this to adjust the final readings at PMC 'stop'
121 * or 'read' time.
122 *
123 * Execution sequences:
124 *
125 * Case 1:   CPUx   +...-		(no overlap)
126 *	     CPUy         +...-
127 *           RC   0 1   0 1   0
128 *
129 * Case 2:   CPUx   +........-		(partial overlap)
130 * 	     CPUy       +........-
131 *           RC   0 1   2    1   0
132 *
133 * Case 3:   CPUx   +..............-	(fully overlapped)
134 *	     CPUy       +.....-
135 *	     RC   0 1   2     1    0
136 *
137 * Here CPUx and CPUy are one of the two logical processors on a HTT CPU.
138 *
139 * Handling HTT CONFIG
140 *
141 * Different processes attached to the same PMC may get scheduled on
142 * the two logical processors in the package.  We keep track of config
143 * and de-config operations using the CFGFLAGS fields of the per-physical
144 * cpu state.
145 */
146
147#define	P4_PMCS()				\
148	P4_PMC(BPU_COUNTER0)			\
149	P4_PMC(BPU_COUNTER1)			\
150	P4_PMC(BPU_COUNTER2)			\
151	P4_PMC(BPU_COUNTER3)			\
152	P4_PMC(MS_COUNTER0)			\
153	P4_PMC(MS_COUNTER1)			\
154	P4_PMC(MS_COUNTER2)			\
155	P4_PMC(MS_COUNTER3)			\
156	P4_PMC(FLAME_COUNTER0)			\
157	P4_PMC(FLAME_COUNTER1)			\
158	P4_PMC(FLAME_COUNTER2)			\
159	P4_PMC(FLAME_COUNTER3)			\
160	P4_PMC(IQ_COUNTER0)			\
161	P4_PMC(IQ_COUNTER1)			\
162	P4_PMC(IQ_COUNTER2)			\
163	P4_PMC(IQ_COUNTER3)			\
164	P4_PMC(IQ_COUNTER4)			\
165	P4_PMC(IQ_COUNTER5)			\
166	P4_PMC(NONE)
167
168enum pmc_p4pmc {
169#undef	P4_PMC
170#define	P4_PMC(N)	P4_PMC_##N ,
171	P4_PMCS()
172};
173
174/*
175 * P4 ESCR descriptors
176 */
177
178#define	P4_ESCRS()							\
179    P4_ESCR(BSU_ESCR0,	0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
180    P4_ESCR(BSU_ESCR1,	0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
181    P4_ESCR(FSB_ESCR0,	0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
182    P4_ESCR(FSB_ESCR1,	0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
183    P4_ESCR(FIRM_ESCR0,	0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
184    P4_ESCR(FIRM_ESCR1,	0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
185    P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
186    P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
187    P4_ESCR(DAC_ESCR0,	0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
188    P4_ESCR(DAC_ESCR1,	0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
189    P4_ESCR(MOB_ESCR0,	0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
190    P4_ESCR(MOB_ESCR1,	0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
191    P4_ESCR(PMH_ESCR0,	0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
192    P4_ESCR(PMH_ESCR1,	0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
193    P4_ESCR(SAAT_ESCR0,	0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
194    P4_ESCR(SAAT_ESCR1,	0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
195    P4_ESCR(U2L_ESCR0,	0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
196    P4_ESCR(U2L_ESCR1,	0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
197    P4_ESCR(BPU_ESCR0,	0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
198    P4_ESCR(BPU_ESCR1,	0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
199    P4_ESCR(IS_ESCR0,	0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
200    P4_ESCR(IS_ESCR1,	0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
201    P4_ESCR(ITLB_ESCR0,	0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
202    P4_ESCR(ITLB_ESCR1,	0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
203    P4_ESCR(CRU_ESCR0,	0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
204    P4_ESCR(CRU_ESCR1,	0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
205    P4_ESCR(IQ_ESCR0,	0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
206    P4_ESCR(IQ_ESCR1,	0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5)	\
207    P4_ESCR(RAT_ESCR0,	0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
208    P4_ESCR(RAT_ESCR1,	0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
209    P4_ESCR(SSU_ESCR0,	0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4)	\
210    P4_ESCR(MS_ESCR0,	0x3C0, MS_COUNTER0, MS_COUNTER1, NONE)		\
211    P4_ESCR(MS_ESCR1,	0x3C1, MS_COUNTER2, MS_COUNTER3, NONE)		\
212    P4_ESCR(TBPU_ESCR0,	0x3C2, MS_COUNTER0, MS_COUNTER1, NONE)		\
213    P4_ESCR(TBPU_ESCR1,	0x3C3, MS_COUNTER2, MS_COUNTER3, NONE)		\
214    P4_ESCR(TC_ESCR0,	0x3C4, MS_COUNTER0, MS_COUNTER1, NONE)		\
215    P4_ESCR(TC_ESCR1,	0x3C5, MS_COUNTER2, MS_COUNTER3, NONE)		\
216    P4_ESCR(IX_ESCR0,	0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
217    P4_ESCR(IX_ESCR1,	0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
218    P4_ESCR(ALF_ESCR0,	0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
219    P4_ESCR(ALF_ESCR1,	0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
220    P4_ESCR(CRU_ESCR2,	0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
221    P4_ESCR(CRU_ESCR3,	0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
222    P4_ESCR(CRU_ESCR4,	0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
223    P4_ESCR(CRU_ESCR5,	0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
224    P4_ESCR(NONE,		~0,    NONE, NONE, NONE)
225
226enum pmc_p4escr {
227#define	P4_ESCR(N, MSR, P1, P2, P3)	P4_ESCR_##N ,
228	P4_ESCRS()
229#undef	P4_ESCR
230};
231
232struct pmc_p4escr_descr {
233	const char	pm_escrname[PMC_NAME_MAX];
234	u_short		pm_escr_msr;
235	const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
236};
237
238static struct pmc_p4escr_descr p4_escrs[] =
239{
240#define	P4_ESCR(N, MSR, P1, P2, P3)		\
241	{					\
242		.pm_escrname = #N,		\
243		.pm_escr_msr = (MSR),		\
244		.pm_pmcs =			\
245		{				\
246			P4_PMC_##P1,		\
247			P4_PMC_##P2,		\
248			P4_PMC_##P3		\
249		}				\
250	} ,
251
252	P4_ESCRS()
253
254#undef	P4_ESCR
255};
256
257/*
258 * P4 Event descriptor
259 */
260
261struct p4_event_descr {
262	const enum pmc_event pm_event;
263	const uint32_t	pm_escr_eventselect;
264	const uint32_t	pm_cccr_select;
265	const char	pm_is_ti_event;
266	enum pmc_p4escr	pm_escrs[P4_MAX_ESCR_PER_EVENT];
267};
268
269static struct p4_event_descr p4_events[] = {
270
271#define	P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1)	\
272	{								\
273		.pm_event            = PMC_EV_P4_##NAME,		\
274		.pm_escr_eventselect = (ESCREVENTSEL),			\
275		.pm_cccr_select      = (CCCRSEL),			\
276		.pm_is_ti_event	     = (TI_EVENT),			\
277		.pm_escrs            =					\
278		{							\
279			P4_ESCR_##ESCR0,				\
280			P4_ESCR_##ESCR1					\
281		}							\
282	}
283
284P4_EVDESCR(TC_DELIVER_MODE,	0x01, 0x01, TRUE,  TC_ESCR0,	TC_ESCR1),
285P4_EVDESCR(BPU_FETCH_REQUEST,	0x03, 0x00, FALSE, BPU_ESCR0,	BPU_ESCR1),
286P4_EVDESCR(ITLB_REFERENCE,	0x18, 0x03, FALSE, ITLB_ESCR0,	ITLB_ESCR1),
287P4_EVDESCR(MEMORY_CANCEL,	0x02, 0x05, FALSE, DAC_ESCR0,	DAC_ESCR1),
288P4_EVDESCR(MEMORY_COMPLETE,	0x08, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
289P4_EVDESCR(LOAD_PORT_REPLAY,	0x04, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
290P4_EVDESCR(STORE_PORT_REPLAY,	0x05, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
291P4_EVDESCR(MOB_LOAD_REPLAY,	0x03, 0x02, FALSE, MOB_ESCR0,	MOB_ESCR1),
292P4_EVDESCR(PAGE_WALK_TYPE,	0x01, 0x04, TRUE,  PMH_ESCR0,	PMH_ESCR1),
293P4_EVDESCR(BSQ_CACHE_REFERENCE,	0x0C, 0x07, FALSE, BSU_ESCR0,	BSU_ESCR1),
294P4_EVDESCR(IOQ_ALLOCATION,	0x03, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
295P4_EVDESCR(IOQ_ACTIVE_ENTRIES,	0x1A, 0x06, FALSE, FSB_ESCR1,	NONE),
296P4_EVDESCR(FSB_DATA_ACTIVITY,	0x17, 0x06, TRUE,  FSB_ESCR0,	FSB_ESCR1),
297P4_EVDESCR(BSQ_ALLOCATION,	0x05, 0x07, FALSE, BSU_ESCR0,	NONE),
298P4_EVDESCR(BSQ_ACTIVE_ENTRIES,	0x06, 0x07, FALSE, BSU_ESCR1,	NONE),
299	/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
300P4_EVDESCR(SSE_INPUT_ASSIST,	0x34, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
301P4_EVDESCR(PACKED_SP_UOP,	0x08, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
302P4_EVDESCR(PACKED_DP_UOP,	0x0C, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
303P4_EVDESCR(SCALAR_SP_UOP,	0x0A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
304P4_EVDESCR(SCALAR_DP_UOP,	0x0E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
305P4_EVDESCR(64BIT_MMX_UOP,	0x02, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
306P4_EVDESCR(128BIT_MMX_UOP,	0x1A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
307P4_EVDESCR(X87_FP_UOP,		0x04, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
308P4_EVDESCR(X87_SIMD_MOVES_UOP,	0x2E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
309P4_EVDESCR(GLOBAL_POWER_EVENTS,	0x13, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
310P4_EVDESCR(TC_MS_XFER,		0x05, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
311P4_EVDESCR(UOP_QUEUE_WRITES,	0x09, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
312P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
313    				0x05, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
314P4_EVDESCR(RETIRED_BRANCH_TYPE,	0x04, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
315P4_EVDESCR(RESOURCE_STALL,	0x01, 0x01, FALSE, ALF_ESCR0,	ALF_ESCR1),
316P4_EVDESCR(WC_BUFFER,		0x05, 0x05, TRUE,  DAC_ESCR0,	DAC_ESCR1),
317P4_EVDESCR(B2B_CYCLES,		0x16, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
318P4_EVDESCR(BNR,			0x08, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
319P4_EVDESCR(SNOOP,		0x06, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
320P4_EVDESCR(RESPONSE,		0x04, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
321P4_EVDESCR(FRONT_END_EVENT,	0x08, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
322P4_EVDESCR(EXECUTION_EVENT,	0x0C, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
323P4_EVDESCR(REPLAY_EVENT, 	0x09, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
324P4_EVDESCR(INSTR_RETIRED,	0x02, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
325P4_EVDESCR(UOPS_RETIRED,	0x01, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
326P4_EVDESCR(UOP_TYPE,		0x02, 0x02, FALSE, RAT_ESCR0,	RAT_ESCR1),
327P4_EVDESCR(BRANCH_RETIRED,	0x06, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
328P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
329P4_EVDESCR(X87_ASSIST,		0x03, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
330P4_EVDESCR(MACHINE_CLEAR,	0x02, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3)
331
332#undef	P4_EVDESCR
333};
334
335#define	P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
336
337#define	P4_NEVENTS	(PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
338
339/*
340 * P4 PMC descriptors
341 */
342
343struct p4pmc_descr {
344	struct pmc_descr pm_descr; 	/* common information */
345	enum pmc_p4pmc	pm_pmcnum;	/* PMC number */
346	uint32_t	pm_pmc_msr; 	/* PERFCTR MSR address */
347	uint32_t	pm_cccr_msr;  	/* CCCR MSR address */
348};
349
350static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
351
352	/*
353	 * TSC descriptor
354	 */
355
356	{
357		.pm_descr =
358		{
359			.pd_name  = "TSC",
360			.pd_class = PMC_CLASS_TSC,
361			.pd_caps  = PMC_CAP_READ | PMC_CAP_WRITE,
362			.pd_width = 64
363		},
364		.pm_pmcnum   = ~0,
365		.pm_cccr_msr = ~0,
366		.pm_pmc_msr  = 0x10,
367	},
368
369	/*
370	 * P4 PMCS
371	 */
372
373#define	P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM |  \
374	PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
375	PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE |            \
376	PMC_CAP_TAGGING | PMC_CAP_CASCADE)
377
378#define	P4_PMCDESCR(N, PMC, CCCR)			\
379	{						\
380		.pm_descr =				\
381		{					\
382			.pd_name = #N,			\
383			.pd_class = PMC_CLASS_P4,	\
384			.pd_caps = P4_PMC_CAPS,		\
385			.pd_width = 40			\
386		},					\
387		.pm_pmcnum      = P4_PMC_##N,		\
388		.pm_cccr_msr 	= (CCCR),		\
389		.pm_pmc_msr	= (PMC)			\
390	}
391
392	P4_PMCDESCR(BPU_COUNTER0,	0x300,	0x360),
393	P4_PMCDESCR(BPU_COUNTER1,	0x301,	0x361),
394	P4_PMCDESCR(BPU_COUNTER2,	0x302,	0x362),
395	P4_PMCDESCR(BPU_COUNTER3,	0x303,	0x363),
396	P4_PMCDESCR(MS_COUNTER0,	0x304,	0x364),
397	P4_PMCDESCR(MS_COUNTER1,	0x305,	0x365),
398	P4_PMCDESCR(MS_COUNTER2,	0x306,	0x366),
399	P4_PMCDESCR(MS_COUNTER3,	0x307,	0x367),
400	P4_PMCDESCR(FLAME_COUNTER0,	0x308,	0x368),
401	P4_PMCDESCR(FLAME_COUNTER1,	0x309,	0x369),
402	P4_PMCDESCR(FLAME_COUNTER2,	0x30A,	0x36A),
403	P4_PMCDESCR(FLAME_COUNTER3,	0x30B,	0x36B),
404	P4_PMCDESCR(IQ_COUNTER0,	0x30C,	0x36C),
405	P4_PMCDESCR(IQ_COUNTER1,	0x30D,	0x36D),
406	P4_PMCDESCR(IQ_COUNTER2,	0x30E,	0x36E),
407	P4_PMCDESCR(IQ_COUNTER3,	0x30F,	0x36F),
408	P4_PMCDESCR(IQ_COUNTER4,	0x310,	0x370),
409	P4_PMCDESCR(IQ_COUNTER5,	0x311,	0x371),
410
411#undef	P4_PMCDESCR
412};
413
414/* HTT support */
415#define	P4_NHTT					2 /* logical processors/chip */
416#define	P4_HTT_CPU_INDEX_0			0
417#define	P4_HTT_CPU_INDEX_1			1
418
419static int p4_system_has_htt;
420
421/*
422 * Per-CPU data structure for P4 class CPUs
423 *
424 * [common stuff]
425 * [19 struct pmc_hw pointers]
426 * [19 struct pmc_hw structures]
427 * [45 ESCRs status bytes]
428 * [per-cpu spin mutex]
429 * [19 flag fields for holding config flags and a runcount]
430 * [19*2 hw value fields]	(Thread mode PMC support)
431 *    or
432 * [19*2 EIP values]		(Sampling mode PMCs)
433 * [19*2 pmc value fields]	(Thread mode PMC support))
434 */
435
436struct p4_cpu {
437	struct pmc_cpu	pc_common;
438	struct pmc_hw	*pc_hwpmcs[P4_NPMCS];
439	struct pmc_hw	pc_p4pmcs[P4_NPMCS];
440	char		pc_escrs[P4_NESCR];
441	struct mtx	pc_mtx;	/* spin lock */
442	unsigned char	pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
443	union {
444		pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
445		uintptr_t   pc_ip[P4_NPMCS * P4_NHTT];
446	}		pc_si;
447	pmc_value_t	pc_pmc_values[P4_NPMCS * P4_NHTT];
448};
449
450#define	P4_PCPU_PMC_VALUE(PC,RI,CPU) 	(PC)->pc_pmc_values[(RI)*((CPU) & 1)]
451#define	P4_PCPU_HW_VALUE(PC,RI,CPU)	(PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
452#define	P4_PCPU_SAVED_IP(PC,RI,CPU)	(PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
453
454#define	P4_PCPU_GET_FLAGS(PC,RI,MASK)	((PC)->pc_flags[(RI)] & (MASK))
455#define	P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL)	do {	\
456	char _tmp;					\
457	_tmp = (PC)->pc_flags[(RI)];			\
458	_tmp &= ~(MASK);				\
459	_tmp |= (VAL) & (MASK);				\
460	(PC)->pc_flags[(RI)] = _tmp;			\
461} while (0)
462
463#define	P4_PCPU_GET_RUNCOUNT(PC,RI)	P4_PCPU_GET_FLAGS(PC,RI,0x0F)
464#define	P4_PCPU_SET_RUNCOUNT(PC,RI,V)	P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
465
466#define	P4_PCPU_GET_CFGFLAGS(PC,RI)	(P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
467#define	P4_PCPU_SET_CFGFLAGS(PC,RI,C)	P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
468
469#define	P4_CPU_TO_FLAG(C)		(pmc_cpu_is_logical(cpu) ? 0x2 : 0x1)
470
471/* ESCR row disposition */
472static int p4_escrdisp[P4_NESCR];
473
474#define	P4_ESCR_ROW_DISP_IS_THREAD(E)		(p4_escrdisp[(E)] > 0)
475#define	P4_ESCR_ROW_DISP_IS_STANDALONE(E)	(p4_escrdisp[(E)] < 0)
476#define	P4_ESCR_ROW_DISP_IS_FREE(E)		(p4_escrdisp[(E)] == 0)
477
478#define	P4_ESCR_MARK_ROW_STANDALONE(E) do {				\
479	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
480		    __LINE__));						\
481	atomic_add_int(&p4_escrdisp[(E)], -1);				\
482	KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row "	\
483		"disposition error", __LINE__));			\
484} while (0)
485
486#define	P4_ESCR_UNMARK_ROW_STANDALONE(E) do {				\
487	atomic_add_int(&p4_escrdisp[(E)], 1);				\
488	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
489		    __LINE__));						\
490} while (0)
491
492#define	P4_ESCR_MARK_ROW_THREAD(E) do {					 \
493	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
494		    __LINE__));						 \
495	atomic_add_int(&p4_escrdisp[(E)], 1);				 \
496} while (0)
497
498#define	P4_ESCR_UNMARK_ROW_THREAD(E) do {				 \
499	atomic_add_int(&p4_escrdisp[(E)], -1);				 \
500	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error",\
501		    __LINE__));						 \
502} while (0)
503
504#define	P4_PMC_IS_STOPPED(cccr)	((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
505
506#define	P4_TO_PHYSICAL_CPU(cpu) (pmc_cpu_is_logical(cpu) ?		\
507    ((cpu) & ~1) : (cpu))
508
509#define	P4_CCCR_Tx_MASK	(~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1|	\
510			     P4_CCCR_ENABLE|P4_CCCR_OVF))
511#define	P4_ESCR_Tx_MASK	(~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS|	\
512			     P4_ESCR_T1_USR))
513
514/*
515 * support routines
516 */
517
518static struct p4_event_descr *
519p4_find_event(enum pmc_event ev)
520{
521	int n;
522
523	for (n = 0; n < P4_NEVENTS; n++)
524		if (p4_events[n].pm_event == ev)
525			break;
526	if (n == P4_NEVENTS)
527		return NULL;
528	return &p4_events[n];
529}
530
531/*
532 * Initialize per-cpu state
533 */
534
535static int
536p4_init(int cpu)
537{
538	int n, phycpu;
539	char *pescr;
540	struct p4_cpu *pcs;
541	struct pmc_hw *phw;
542
543	KASSERT(cpu >= 0 && cpu < mp_ncpus,
544	    ("[p4,%d] insane cpu number %d", __LINE__, cpu));
545
546	PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu,
547	    pmc_cpu_is_logical(cpu) != 0);
548
549	/*
550	 * A 'logical' CPU shares its per-cpu state with its physical
551	 * CPU.  The physical CPU would have been initialized prior to
552	 * the initialization for this cpu.
553	 */
554
555	if (pmc_cpu_is_logical(cpu)) {
556		phycpu = P4_TO_PHYSICAL_CPU(cpu);
557		pcs = (struct p4_cpu *) pmc_pcpu[phycpu];
558		PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p",
559		    cpu, phycpu, pcs);
560		KASSERT(pcs,
561		    ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__,
562			cpu, phycpu));
563		if (pcs == NULL) /* decline to init */
564			return ENXIO;
565		p4_system_has_htt = 1;
566		pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
567		return 0;
568	}
569
570	MALLOC(pcs, struct p4_cpu *, sizeof(struct p4_cpu), M_PMC,
571	    M_WAITOK|M_ZERO);
572
573	if (pcs == NULL)
574		return ENOMEM;
575	phw = pcs->pc_p4pmcs;
576
577	for (n = 0; n < P4_NPMCS; n++, phw++) {
578		phw->phw_state   = PMC_PHW_FLAG_IS_ENABLED |
579		    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
580		phw->phw_pmc     = NULL;
581		pcs->pc_hwpmcs[n] = phw;
582	}
583
584	/* Mark the TSC as shareable */
585	pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE;
586
587	pescr = pcs->pc_escrs;
588	for (n = 0; n < P4_NESCR; n++)
589		*pescr++ = P4_INVALID_PMC_INDEX;
590	pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
591
592	mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc", MTX_SPIN);
593
594	return 0;
595}
596
597/*
598 * Destroy per-cpu state.
599 */
600
601static int
602p4_cleanup(int cpu)
603{
604	struct p4_cpu *pcs;
605
606	PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
607
608	/*
609	 * Free up the per-cpu structure for the given cpu if
610	 * allocated, and if this is a physical CPU.
611	 */
612
613	if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) != NULL &&
614	    !pmc_cpu_is_logical(cpu)) {
615		mtx_destroy(&pcs->pc_mtx);
616		FREE(pcs, M_PMC);
617	}
618
619	pmc_pcpu[cpu] = NULL;
620
621	return 0;
622}
623
624/*
625 * Context switch in.
626 */
627
628static int
629p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
630{
631	(void) pc;
632
633	PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
634	    (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
635
636	/* enable the RDPMC instruction */
637	if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
638		load_cr4(rcr4() | CR4_PCE);
639
640	PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4());
641
642	return 0;
643}
644
645/*
646 * Context switch out.
647 */
648
649static int
650p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
651{
652	(void) pc;
653	(void) pp;		/* can be null */
654
655	PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp);
656
657	/* always disallow the RDPMC instruction */
658	load_cr4(rcr4() & ~CR4_PCE);
659
660	PMCDBG(MDP,SWO,2, "cr4=0x%x", rcr4());
661
662	return 0;
663}
664
665/*
666 * Read a PMC
667 */
668
669static int
670p4_read_pmc(int cpu, int ri, pmc_value_t *v)
671{
672	enum pmc_mode mode;
673	struct p4pmc_descr *pd;
674	struct pmc *pm;
675	struct p4_cpu *pc;
676	struct pmc_hw *phw;
677	pmc_value_t tmp;
678
679	KASSERT(cpu >= 0 && cpu < mp_ncpus,
680	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
681	KASSERT(ri >= 0 && ri < P4_NPMCS,
682	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
683
684	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
685	phw = pc->pc_hwpmcs[ri];
686	pd  = &p4_pmcdesc[ri];
687	pm  = phw->phw_pmc;
688
689	KASSERT(pm != NULL,
690	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
691		cpu, ri));
692
693	KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
694	    ("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
695		pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
696
697	mode = PMC_TO_MODE(pm);
698
699	PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
700
701	if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) {
702		KASSERT(PMC_IS_COUNTING_MODE(mode),
703		    ("[p4,%d] TSC counter in non-counting mode", __LINE__));
704		*v = rdtsc();
705		PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
706		return 0;
707	}
708
709	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
710	    ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
711
712	tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
713
714	if (PMC_IS_VIRTUAL_MODE(mode)) {
715		if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
716			tmp += (P4_PERFCTR_MASK + 1) -
717			    P4_PCPU_HW_VALUE(pc,ri,cpu);
718		else
719			tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
720		tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
721	}
722
723	if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
724		*v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
725	else
726		*v = tmp;
727
728	PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
729	return 0;
730}
731
732/*
733 * Write a PMC
734 */
735
736static int
737p4_write_pmc(int cpu, int ri, pmc_value_t v)
738{
739	enum pmc_mode mode;
740	struct pmc *pm;
741	struct p4_cpu *pc;
742	const struct pmc_hw *phw;
743	const struct p4pmc_descr *pd;
744
745	KASSERT(cpu >= 0 && cpu < mp_ncpus,
746	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
747	KASSERT(ri >= 0 && ri < P4_NPMCS,
748	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
749
750	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
751	phw = pc->pc_hwpmcs[ri];
752	pm  = phw->phw_pmc;
753	pd  = &p4_pmcdesc[ri];
754
755	KASSERT(pm != NULL,
756	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
757		cpu, ri));
758
759	mode = PMC_TO_MODE(pm);
760
761	PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
762	    mode, v);
763
764	/*
765	 * The P4's TSC register is writeable, but we don't allow a
766	 * write as changing the TSC's value could interfere with
767	 * timekeeping and other system functions.
768	 */
769	if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC)
770		return 0;
771
772	/*
773	 * write the PMC value to the register/saved value: for
774	 * sampling mode PMCs, the value to be programmed into the PMC
775	 * counter is -(C+1) where 'C' is the requested sample rate.
776	 */
777	if (PMC_IS_SAMPLING_MODE(mode))
778		v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
779
780	if (PMC_IS_SYSTEM_MODE(mode))
781		wrmsr(pd->pm_pmc_msr, v);
782	else
783		P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
784
785	return 0;
786}
787
788/*
789 * Configure a PMC 'pm' on the given CPU and row-index.
790 *
791 * 'pm' may be NULL to indicate de-configuration.
792 *
793 * On HTT systems, a PMC may get configured twice, once for each
794 * "logical" CPU.  We track this using the CFGFLAGS field of the
795 * per-cpu state; this field is a bit mask with one bit each for
796 * logical CPUs 0 & 1.
797 */
798
799static int
800p4_config_pmc(int cpu, int ri, struct pmc *pm)
801{
802	struct pmc_hw *phw;
803	struct p4_cpu *pc;
804	int cfgflags, cpuflag;
805
806	KASSERT(cpu >= 0 && cpu < mp_ncpus,
807	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
808	KASSERT(ri >= 0 && ri < P4_NPMCS,
809	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
810
811	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
812	phw = pc->pc_hwpmcs[ri];
813
814	KASSERT(pm == NULL || phw->phw_pmc == NULL ||
815	    (p4_system_has_htt && phw->phw_pmc == pm),
816	    ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
817
818	mtx_lock_spin(&pc->pc_mtx);
819	cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
820
821	KASSERT(cfgflags >= 0 || cfgflags <= 3,
822	    ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
823		cfgflags, cpu, ri));
824
825	KASSERT(cfgflags == 0 || phw->phw_pmc,
826	    ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
827		__LINE__, cpu, ri));
828
829	PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags,
830	    pm);
831
832	cpuflag = P4_CPU_TO_FLAG(cpu);
833
834	if (pm) {		/* config */
835		if (cfgflags == 0)
836			phw->phw_pmc = pm;
837
838		KASSERT(phw->phw_pmc == pm,
839		    ("[p4,%d] cpu=%d ri=%d config %p != hw %p",
840			__LINE__, cpu, ri, pm, phw->phw_pmc));
841
842		cfgflags |= cpuflag;
843	} else {		/* unconfig */
844		cfgflags &= ~cpuflag;
845
846		if (cfgflags == 0)
847			phw->phw_pmc = NULL;
848	}
849
850	KASSERT(cfgflags >= 0 || cfgflags <= 3,
851	    ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
852		cfgflags, cpu, ri));
853
854	P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
855
856	mtx_unlock_spin(&pc->pc_mtx);
857
858	return 0;
859}
860
861/*
862 * Retrieve a configured PMC pointer from hardware state.
863 */
864
865static int
866p4_get_config(int cpu, int ri, struct pmc **ppm)
867{
868	struct p4_cpu *pc;
869	struct pmc_hw *phw;
870	int cfgflags;
871
872	pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
873	phw = pc->pc_hwpmcs[ri];
874
875	mtx_lock_spin(&pc->pc_mtx);
876	cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
877	mtx_unlock_spin(&pc->pc_mtx);
878
879	if (cfgflags & P4_CPU_TO_FLAG(cpu))
880		*ppm = phw->phw_pmc; /* PMC config'ed on this CPU */
881	else
882		*ppm = NULL;
883
884	return 0;
885}
886
887/*
888 * Allocate a PMC.
889 *
890 * The allocation strategy differs between HTT and non-HTT systems.
891 *
892 * The non-HTT case:
893 *   - Given the desired event and the PMC row-index, lookup the
894 *   list of valid ESCRs for the event.
895 *   - For each valid ESCR:
896 *     - Check if the ESCR is free and the ESCR row is in a compatible
897 *       mode (i.e., system or process))
898 *     - Check if the ESCR is usable with a P4 PMC at the desired row-index.
899 *   If everything matches, we determine the appropriate bit values for the
900 *   ESCR and CCCR registers.
901 *
902 * The HTT case:
903 *
904 * - Process mode PMCs require special care.  The FreeBSD scheduler could
905 *   schedule any two processes on the same physical CPU.  We need to ensure
906 *   that a given PMC row-index is never allocated to two different
907 *   PMCs owned by different user-processes.
908 *   This is ensured by always allocating a PMC from a 'FREE' PMC row
909 *   if the system has HTT active.
910 * - A similar check needs to be done for ESCRs; we do not want two PMCs
911 *   using the same ESCR to be scheduled at the same time.  Thus ESCR
912 *   allocation is also restricted to FREE rows if the system has HTT
913 *   enabled.
914 * - Thirdly, some events are 'thread-independent' terminology, i.e.,
915 *   the PMC hardware cannot distinguish between events caused by
916 *   different logical CPUs.  This makes it impossible to assign events
917 *   to a given thread of execution.  If the system has HTT enabled,
918 *   these events are not allowed for process-mode PMCs.
919 */
920
921static int
922p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
923    const struct pmc_op_pmcallocate *a)
924{
925	int found, n, m;
926	uint32_t caps, cccrvalue, escrvalue, tflags;
927	enum pmc_p4escr escr;
928	struct p4_cpu *pc;
929	struct p4_event_descr *pevent;
930	const struct p4pmc_descr *pd;
931
932	KASSERT(cpu >= 0 && cpu < mp_ncpus,
933	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
934	KASSERT(ri >= 0 && ri < P4_NPMCS,
935	    ("[p4,%d] illegal row-index value %d", __LINE__, ri));
936
937	pd = &p4_pmcdesc[ri];
938
939	PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
940	    "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
941	    pm->pm_caps);
942
943	/* check class */
944	if (pd->pm_descr.pd_class != a->pm_class)
945		return EINVAL;
946
947	/* check requested capabilities */
948	caps = a->pm_caps;
949	if ((pd->pm_descr.pd_caps & caps) != caps)
950		return EPERM;
951
952	if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
953		/* TSC's are always allocated in system-wide counting mode */
954		if (a->pm_ev != PMC_EV_TSC_TSC ||
955		    a->pm_mode != PMC_MODE_SC)
956			return EINVAL;
957		return 0;
958	}
959
960	/*
961	 * If the system has HTT enabled, and the desired allocation
962	 * mode is process-private, and the PMC row disposition is not
963	 * FREE (0), decline the allocation.
964	 */
965
966	if (p4_system_has_htt &&
967	    PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
968	    pmc_getrowdisp(ri) != 0)
969		return EBUSY;
970
971	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
972	    ("[p4,%d] unknown PMC class %d", __LINE__,
973		pd->pm_descr.pd_class));
974
975	if (pm->pm_event < PMC_EV_P4_FIRST ||
976	    pm->pm_event > PMC_EV_P4_LAST)
977		return EINVAL;
978
979	if ((pevent = p4_find_event(pm->pm_event)) == NULL)
980		return ESRCH;
981
982	PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
983	    pevent->pm_event, pevent->pm_escr_eventselect,
984	    pevent->pm_cccr_select, pevent->pm_is_ti_event);
985
986	/*
987	 * Some PMC events are 'thread independent'and therefore
988	 * cannot be used for process-private modes if HTT is being
989	 * used.
990	 */
991
992	if (P4_EVENT_IS_TI(pevent) &&
993	    PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
994	    p4_system_has_htt)
995		return EINVAL;
996
997	pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
998
999	found   = 0;
1000
1001	/* look for a suitable ESCR for this event */
1002	for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
1003		if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
1004			break;	/* out of ESCRs */
1005		/*
1006		 * Check ESCR row disposition.
1007		 *
1008		 * If the request is for a system-mode PMC, then the
1009		 * ESCR row should not be in process-virtual mode, and
1010		 * should also be free on the current CPU.
1011		 */
1012
1013		if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1014		    if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
1015			pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
1016			    continue;
1017		}
1018
1019		/*
1020		 * If the request is for a process-virtual PMC, and if
1021		 * HTT is not enabled, we can use an ESCR row that is
1022		 * either FREE or already in process mode.
1023		 *
1024		 * If HTT is enabled, then we need to ensure that a
1025		 * given ESCR is never allocated to two PMCS that
1026		 * could run simultaneously on the two logical CPUs of
1027		 * a CPU package.  We ensure this be only allocating
1028		 * ESCRs from rows marked as 'FREE'.
1029		 */
1030
1031		if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
1032			if (p4_system_has_htt) {
1033				if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
1034					continue;
1035			} else
1036				if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
1037					continue;
1038		}
1039
1040		/*
1041		 * We found a suitable ESCR for this event.  Now check if
1042		 * this escr can work with the PMC at row-index 'ri'.
1043		 */
1044
1045		for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
1046			if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
1047				found = 1;
1048				break;
1049			}
1050	}
1051
1052	if (found == 0)
1053		return ESRCH;
1054
1055	KASSERT((int) escr >= 0 && escr < P4_NESCR,
1056	    ("[p4,%d] illegal ESCR value %d", __LINE__, escr));
1057
1058	/* mark ESCR row mode */
1059	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1060		pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
1061		P4_ESCR_MARK_ROW_STANDALONE(escr);
1062	} else {
1063		KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
1064		    ("[p4,%d] escr[%d] already in use", __LINE__, escr));
1065		P4_ESCR_MARK_ROW_THREAD(escr);
1066	}
1067
1068	pm->pm_md.pm_p4.pm_p4_escrmsr   = p4_escrs[escr].pm_escr_msr;
1069	pm->pm_md.pm_p4.pm_p4_escr      = escr;
1070
1071	cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
1072	escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
1073
1074	/* CCCR fields */
1075	if (caps & PMC_CAP_THRESHOLD)
1076		cccrvalue |= (a->pm_p4_cccrconfig & P4_CCCR_THRESHOLD_MASK) |
1077		    P4_CCCR_COMPARE;
1078
1079	if (caps & PMC_CAP_EDGE)
1080		cccrvalue |= P4_CCCR_EDGE;
1081
1082	if (caps & PMC_CAP_INVERT)
1083		cccrvalue |= P4_CCCR_COMPLEMENT;
1084
1085	if (p4_system_has_htt)
1086		cccrvalue |= a->pm_p4_cccrconfig & P4_CCCR_ACTIVE_THREAD_MASK;
1087	else			/* no HTT; thread field should be '11b' */
1088		cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
1089
1090	if (caps & PMC_CAP_CASCADE)
1091		cccrvalue |= P4_CCCR_CASCADE;
1092
1093	/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
1094	if (caps & PMC_CAP_INTERRUPT)
1095		cccrvalue |= P4_CCCR_OVF_PMI_T0;
1096
1097	/* ESCR fields */
1098	if (caps & PMC_CAP_QUALIFIER)
1099		escrvalue |= a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK;
1100	if (caps & PMC_CAP_TAGGING)
1101		escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_TAG_VALUE_MASK) |
1102		    P4_ESCR_TAG_ENABLE;
1103	if (caps & PMC_CAP_QUALIFIER)
1104		escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK);
1105
1106	/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
1107	tflags = 0;
1108	if (caps & PMC_CAP_SYSTEM)
1109		tflags |= P4_ESCR_T0_OS;
1110	if (caps & PMC_CAP_USER)
1111		tflags |= P4_ESCR_T0_USR;
1112	if (tflags == 0)
1113		tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1114	escrvalue |= tflags;
1115
1116	pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
1117	pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
1118
1119	PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
1120	    "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
1121	    cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
1122
1123	return 0;
1124}
1125
1126/*
1127 * release a PMC.
1128 */
1129
1130static int
1131p4_release_pmc(int cpu, int ri, struct pmc *pm)
1132{
1133	enum pmc_p4escr escr;
1134	struct pmc_hw *phw;
1135	struct p4_cpu *pc;
1136
1137	if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC)
1138		return 0;
1139
1140	escr = pm->pm_md.pm_p4.pm_p4_escr;
1141
1142	PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
1143
1144	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1145		pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1146		phw = pc->pc_hwpmcs[ri];
1147
1148		KASSERT(phw->phw_pmc == NULL,
1149		    ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
1150
1151		P4_ESCR_UNMARK_ROW_STANDALONE(escr);
1152		KASSERT(pc->pc_escrs[escr] == ri,
1153		    ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
1154			escr, ri));
1155	        pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
1156	} else
1157		P4_ESCR_UNMARK_ROW_THREAD(escr);
1158
1159	return 0;
1160}
1161
1162/*
1163 * Start a PMC
1164 */
1165
1166static int
1167p4_start_pmc(int cpu, int ri)
1168{
1169	int rc;
1170	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1171	struct pmc *pm;
1172	struct p4_cpu *pc;
1173	struct pmc_hw *phw;
1174	struct p4pmc_descr *pd;
1175
1176	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1177	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1178	KASSERT(ri >= 0 && ri < P4_NPMCS,
1179	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
1180
1181	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1182	phw = pc->pc_hwpmcs[ri];
1183	pm  = phw->phw_pmc;
1184	pd  = &p4_pmcdesc[ri];
1185
1186	KASSERT(pm != NULL,
1187	    ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__,
1188		cpu, ri));
1189
1190	PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
1191
1192	if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */
1193		return 0;
1194
1195	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1196	    ("[p4,%d] wrong PMC class %d", __LINE__,
1197		pd->pm_descr.pd_class));
1198
1199	/* retrieve the desired CCCR/ESCR values from the PMC */
1200	cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
1201	escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
1202	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1203
1204	/* extract and zero the logical processor selection bits */
1205	cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
1206	escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1207	cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
1208	escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1209
1210	if (pmc_cpu_is_logical(cpu)) { /* shift T0 bits to T1 position */
1211		cccrtbits <<= 1;
1212		escrtbits >>= 2;
1213	}
1214
1215	/* start system mode PMCs directly */
1216	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1217		wrmsr(escrmsr, escrvalue | escrtbits);
1218		wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
1219		return 0;
1220	}
1221
1222	/*
1223	 * Thread mode PMCs
1224	 *
1225	 * On HTT machines, the same PMC could be scheduled on the
1226	 * same physical CPU twice (once for each logical CPU), for
1227	 * example, if two threads of a multi-threaded process get
1228	 * scheduled on the same CPU.
1229	 *
1230	 */
1231
1232	mtx_lock_spin(&pc->pc_mtx);
1233
1234	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1235	KASSERT(rc == 0 || rc == 1,
1236	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1237		rc));
1238
1239	if (rc == 0) {		/* 1st CPU and the non-HTT case */
1240
1241		KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
1242		    ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
1243			cpu, ri, pd->pm_cccr_msr));
1244
1245		/* write out the low 40 bits of the saved value to hardware */
1246		wrmsr(pd->pm_pmc_msr,
1247		    P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
1248
1249	} else if (rc == 1) {		/* 2nd CPU */
1250
1251		/*
1252		 * Stop the PMC and retrieve the CCCR and ESCR values
1253		 * from their MSRs, and turn on the additional T[0/1]
1254		 * bits for the 2nd CPU.
1255		 */
1256
1257		cccrvalue = rdmsr(pd->pm_cccr_msr);
1258		wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1259
1260		/* check that the configuration bits read back match the PMC */
1261		KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
1262		    (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
1263		    ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
1264			"cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
1265			cccrvalue & P4_CCCR_Tx_MASK,
1266			pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
1267		KASSERT(cccrvalue & P4_CCCR_ENABLE,
1268		    ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
1269			__LINE__, rc, cpu, ri));
1270		KASSERT((cccrvalue & cccrtbits) == 0,
1271		    ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
1272		     "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
1273			cccrvalue, cccrtbits));
1274
1275		escrvalue = rdmsr(escrmsr);
1276
1277		KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
1278		    (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
1279		    ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
1280			"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
1281			escrvalue & P4_ESCR_Tx_MASK,
1282			pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
1283		KASSERT((escrvalue & escrtbits) == 0,
1284		    ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
1285		     "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
1286			rc, cpu, ri, escrmsr, escrvalue, escrtbits));
1287	}
1288
1289	/* Enable the correct bits for this CPU. */
1290	escrvalue |= escrtbits;
1291	cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
1292
1293	/* Save HW value at the time of starting hardware */
1294	P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
1295
1296	/* Program the ESCR and CCCR and start the PMC */
1297	wrmsr(escrmsr, escrvalue);
1298	wrmsr(pd->pm_cccr_msr, cccrvalue);
1299
1300	++rc;
1301	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1302
1303	mtx_unlock_spin(&pc->pc_mtx);
1304
1305	PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
1306	    "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
1307	    ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
1308	    cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
1309
1310	return 0;
1311}
1312
1313/*
1314 * Stop a PMC.
1315 */
1316
1317static int
1318p4_stop_pmc(int cpu, int ri)
1319{
1320	int rc;
1321	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1322	struct pmc *pm;
1323	struct p4_cpu *pc;
1324	struct pmc_hw *phw;
1325	struct p4pmc_descr *pd;
1326	pmc_value_t tmp;
1327
1328	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1329	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1330	KASSERT(ri >= 0 && ri < P4_NPMCS,
1331	    ("[p4,%d] illegal row index %d", __LINE__, ri));
1332
1333	pd  = &p4_pmcdesc[ri];
1334
1335	if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
1336		return 0;
1337
1338	pc  = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
1339	phw = pc->pc_hwpmcs[ri];
1340
1341	KASSERT(phw != NULL,
1342	    ("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri));
1343
1344	pm  = phw->phw_pmc;
1345
1346	KASSERT(pm != NULL,
1347	    ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
1348
1349	PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
1350
1351	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1352		wrmsr(pd->pm_cccr_msr,
1353		    pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
1354		return 0;
1355	}
1356
1357	/*
1358	 * Thread mode PMCs.
1359	 *
1360	 * On HTT machines, this PMC may be in use by two threads
1361	 * running on two logical CPUS.  Thus we look at the
1362	 * 'pm_runcount' field and only turn off the appropriate TO/T1
1363	 * bits (and keep the PMC running) if two logical CPUs were
1364	 * using the PMC.
1365	 *
1366	 */
1367
1368	/* bits to mask */
1369	cccrtbits = P4_CCCR_OVF_PMI_T0;
1370	escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
1371	if (pmc_cpu_is_logical(cpu)) {
1372		cccrtbits <<= 1;
1373		escrtbits >>= 2;
1374	}
1375
1376	mtx_lock_spin(&pc->pc_mtx);
1377
1378	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1379
1380	KASSERT(rc == 2 || rc == 1,
1381	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1382		rc));
1383
1384	--rc;
1385
1386	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1387
1388	/* Stop this PMC */
1389	cccrvalue = rdmsr(pd->pm_cccr_msr);
1390	wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1391
1392	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1393	escrvalue = rdmsr(escrmsr);
1394
1395	/* The current CPU should be running on this PMC */
1396	KASSERT(escrvalue & escrtbits,
1397	    ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
1398		"escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
1399		escrvalue, escrtbits));
1400	KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
1401	    (cccrvalue & cccrtbits),
1402	    ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
1403		"tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
1404
1405	/* get the current hardware reading */
1406	tmp = rdmsr(pd->pm_pmc_msr);
1407
1408	if (rc == 1) {		/* need to keep the PMC running */
1409		escrvalue &= ~escrtbits;
1410		cccrvalue &= ~cccrtbits;
1411		wrmsr(escrmsr, escrvalue);
1412		wrmsr(pd->pm_cccr_msr, cccrvalue);
1413	}
1414
1415	mtx_unlock_spin(&pc->pc_mtx);
1416
1417	PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
1418	    "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
1419	    escrvalue, cccrvalue, tmp);
1420
1421	if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
1422		tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
1423	else
1424		tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
1425
1426	P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
1427
1428	return 0;
1429}
1430
1431/*
1432 * Handle an interrupt.
1433 *
1434 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler
1435 * examines all the 18 CCCR registers, processing the counters that have overflowed.
1436 *
1437 * On HTT machines, multiple logical CPUs may try to enter the NMI service
1438 * routine at the same time.
1439 */
1440
1441extern volatile lapic_t *lapic;
1442
1443static void
1444p4_lapic_enable_pmc_interrupt(void)
1445{
1446	uint32_t value;
1447
1448	value =  lapic->lvt_pcint;
1449	value &= ~APIC_LVT_M;
1450	lapic->lvt_pcint = value;
1451}
1452
1453
1454static int
1455p4_intr(int cpu, uintptr_t eip)
1456{
1457	int i, pmc_interrupted;
1458	uint32_t cccrval, pmi_ovf_mask;
1459	struct p4_cpu *pc;
1460	struct pmc_hw *phw;
1461	struct pmc *pm;
1462	pmc_value_t v;
1463
1464	(void) eip;
1465	PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip,
1466	    lapic->lvt_pcint);
1467
1468	pmc_interrupted = 0;
1469	pc = (struct p4_cpu *) pmc_pcpu[cpu];
1470
1471	pmi_ovf_mask = pmc_cpu_is_logical(cpu) ?
1472	    P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
1473	pmi_ovf_mask |= P4_CCCR_OVF;
1474
1475	/*
1476	 * Loop through all CCCRs, looking for ones that have the
1477	 * OVF_PMI bit set for our logical CPU.
1478	 */
1479
1480	for (i = 1; i < P4_NPMCS; i++) {
1481		cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1);
1482
1483		if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask)
1484			continue;
1485
1486		v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1);
1487
1488		pmc_interrupted = 1;
1489
1490		PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v);
1491
1492		/* Stop the counter, and turn off the overflow  bit */
1493		cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
1494		wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval);
1495
1496		phw = pc->pc_hwpmcs[i];
1497		pm  = phw->phw_pmc;
1498
1499		/*
1500		 * Ignore de-configured or stopped PMCs.
1501		 * Also ignore counting mode PMCs that may
1502		 * have overflowed their counters.
1503		 */
1504		if (pm == NULL ||
1505		    pm->pm_state != PMC_STATE_RUNNING ||
1506		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
1507			continue;
1508
1509		/*
1510		 * If the previous sample hasn't been read yet, the
1511		 * sampling interrupt is coming in too fast for the
1512		 * rest of the system to cope.  Do not re-enable the
1513		 * counter.
1514		 */
1515
1516		if (P4_PCPU_SAVED_IP(pc,i,cpu)) {
1517			atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
1518			continue;
1519		}
1520
1521		/*
1522		 * write the the reload count and restart the
1523		 * hardware.
1524		 */
1525
1526		v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
1527			pm->pm_sc.pm_reloadcount);
1528		wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v);
1529		wrmsr(P4_CCCR_MSR_FIRST + i - 1,
1530		    cccrval | P4_CCCR_ENABLE);
1531	}
1532
1533	if (pmc_interrupted) {
1534
1535		/*
1536		 * On Intel CPUs, the PMC 'pcint' entry in the LAPIC
1537		 * gets masked when a PMC interrupts the CPU.  We need
1538		 * to unmask this.
1539		 */
1540		p4_lapic_enable_pmc_interrupt();
1541
1542		/* XXX: Invoke helper (non-NMI) interrupt here */
1543	}
1544
1545	return pmc_interrupted;
1546}
1547
1548/*
1549 * Describe a CPU's PMC state.
1550 */
1551
1552static int
1553p4_describe(int cpu, int ri, struct pmc_info *pi,
1554    struct pmc **ppmc)
1555{
1556	int error;
1557	size_t copied;
1558	struct pmc_hw *phw;
1559	const struct p4pmc_descr *pd;
1560
1561	KASSERT(cpu >= 0 && cpu < mp_ncpus,
1562	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1563	KASSERT(ri >= 0 && ri < P4_NPMCS,
1564	    ("[p4,%d] row-index %d out of range", __LINE__, ri));
1565
1566	PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
1567
1568	if (pmc_cpu_is_logical(cpu))
1569		return EINVAL;
1570
1571	phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
1572	pd  = &p4_pmcdesc[ri];
1573
1574	if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
1575		 PMC_NAME_MAX, &copied)) != 0)
1576		return error;
1577
1578	pi->pm_class = pd->pm_descr.pd_class;
1579
1580	if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
1581		pi->pm_enabled = TRUE;
1582		*ppmc          = phw->phw_pmc;
1583	} else {
1584		pi->pm_enabled = FALSE;
1585		*ppmc          = NULL;
1586	}
1587
1588	return 0;
1589}
1590
1591/*
1592 * Get MSR# for use with RDPMC.
1593 */
1594
1595static int
1596p4_get_msr(int ri, uint32_t *msr)
1597{
1598	KASSERT(ri >= 0 && ri < P4_NPMCS,
1599	    ("[p4,%d] ri %d out of range", __LINE__, ri));
1600
1601	*msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
1602
1603	PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
1604
1605	return 0;
1606}
1607
1608
1609int
1610pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
1611{
1612	struct p4_event_descr *pe;
1613
1614	KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
1615	    ("[p4,%d] Initializing non-intel processor", __LINE__));
1616
1617	PMCDBG(MDP,INI,1, "%s", "p4-initialize");
1618
1619	switch (pmc_mdep->pmd_cputype) {
1620	case PMC_CPU_INTEL_PIV:
1621
1622		pmc_mdep->pmd_npmc	    = P4_NPMCS;
1623		pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4;
1624		pmc_mdep->pmd_classes[1].pm_caps  = P4_PMC_CAPS;
1625		pmc_mdep->pmd_classes[1].pm_width = 40;
1626		pmc_mdep->pmd_nclasspmcs[1] = 18;
1627
1628		pmc_mdep->pmd_init    	    = p4_init;
1629		pmc_mdep->pmd_cleanup 	    = p4_cleanup;
1630		pmc_mdep->pmd_switch_in     = p4_switch_in;
1631		pmc_mdep->pmd_switch_out    = p4_switch_out;
1632		pmc_mdep->pmd_read_pmc 	    = p4_read_pmc;
1633		pmc_mdep->pmd_write_pmc     = p4_write_pmc;
1634		pmc_mdep->pmd_config_pmc    = p4_config_pmc;
1635		pmc_mdep->pmd_get_config    = p4_get_config;
1636		pmc_mdep->pmd_allocate_pmc  = p4_allocate_pmc;
1637		pmc_mdep->pmd_release_pmc   = p4_release_pmc;
1638		pmc_mdep->pmd_start_pmc     = p4_start_pmc;
1639		pmc_mdep->pmd_stop_pmc      = p4_stop_pmc;
1640		pmc_mdep->pmd_intr	    = p4_intr;
1641		pmc_mdep->pmd_describe      = p4_describe;
1642		pmc_mdep->pmd_get_msr  	    = p4_get_msr; /* i386 */
1643
1644		/* model specific munging */
1645		if ((cpu_id & 0xFFF) < 0xF27) {
1646
1647			/*
1648			 * On P4 and Xeon with CPUID < (Family 15,
1649			 * Model 2, Stepping 7), only one ESCR is
1650			 * available for the IOQ_ALLOCATION event.
1651			 */
1652
1653			pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
1654			pe->pm_escrs[1] = P4_ESCR_NONE;
1655		}
1656
1657		break;
1658
1659	default:
1660		KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
1661		return ENOSYS;
1662	}
1663
1664	return 0;
1665}
1666