1139826Simp/*-
2191672Sbms * Copyright (c) 2003-2007 Joseph Koshy
353541Sshin * Copyright (c) 2007 The FreeBSD Foundation
453541Sshin * All rights reserved.
553541Sshin *
653541Sshin * Portions of this software were developed by A. Joseph Koshy under
753541Sshin * sponsorship from the FreeBSD Foundation and Google, Inc.
853541Sshin *
953541Sshin * Redistribution and use in source and binary forms, with or without
1053541Sshin * modification, are permitted provided that the following conditions
1153541Sshin * are met:
12191672Sbms * 1. Redistributions of source code must retain the above copyright
13191672Sbms *    notice, this list of conditions and the following disclaimer.
14191672Sbms * 2. Redistributions in binary form must reproduce the above copyright
1553541Sshin *    notice, this list of conditions and the following disclaimer in the
16191672Sbms *    documentation and/or other materials provided with the distribution.
1753541Sshin *
1853541Sshin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19191672Sbms * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2053541Sshin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2153541Sshin * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2253541Sshin * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2353541Sshin * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2453541Sshin * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2553541Sshin * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2653541Sshin * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27174510Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28174510Sobrien * SUCH DAMAGE.
2953541Sshin */
3053541Sshin
31139826Simp#include <sys/cdefs.h>
3253541Sshin__FBSDID("$FreeBSD: releng/10.3/sys/dev/hwpmc/hwpmc_piv.c 283884 2015-06-01 17:57:05Z jhb $");
3353541Sshin
3453541Sshin#include <sys/param.h>
3553541Sshin#include <sys/bus.h>
3653541Sshin#include <sys/lock.h>
3753541Sshin#include <sys/mutex.h>
3853541Sshin#include <sys/pmc.h>
3953541Sshin#include <sys/pmckern.h>
4053541Sshin#include <sys/smp.h>
4153541Sshin#include <sys/systm.h>
4253541Sshin#include <machine/intr_machdep.h>
4353541Sshin#if (__FreeBSD_version >= 1100000)
4453541Sshin#include <x86/apicvar.h>
4553541Sshin#else
4653541Sshin#include <machine/apicvar.h>
4753541Sshin#endif
4853541Sshin#include <machine/cpu.h>
4953541Sshin#include <machine/cpufunc.h>
5053541Sshin#include <machine/cputypes.h>
5153541Sshin#include <machine/md_var.h>
5253541Sshin#include <machine/specialreg.h>
5353541Sshin
5453541Sshin/*
5553541Sshin * PENTIUM 4 SUPPORT
5653541Sshin *
5753541Sshin * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
5853541Sshin * respectively.  Each PMC comprises of two model specific registers:
5953541Sshin * a counter configuration control register (CCCR) and a counter
6053541Sshin * register that holds the actual event counts.
6153541Sshin *
6253541Sshin * Configuring an event requires the use of one of 45 event selection
6353541Sshin * control registers (ESCR).  Events are associated with specific
6453541Sshin * ESCRs.  Each PMC group has a set of ESCRs it can use.
6553541Sshin *
66174510Sobrien * - The BPU counter group (4 PMCs) can use the 16 ESCRs:
67174510Sobrien *   BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
68174510Sobrien *   PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
6962587Sitojun *
7062587Sitojun * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
7155009Sshin *   TC_ESCR{0,1}, TBPU_ESCR{0,1}.
7253541Sshin *
7353541Sshin * - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
7453541Sshin *   FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
7553541Sshin *   DAC_ESCR{0,1}.
7653541Sshin *
77191672Sbms * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
78151539Ssuz *   ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
79151539Ssuz *
80126603Sume * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
81191672Sbms * present) of a counter group.  Odd-numbers ESCRs can be used with
82200572Sbms * counters 2, 3 and 5 (if present) of a counter group.  The
8353541Sshin * 'p4_escrs[]' table describes these restrictions in a form that
8453541Sshin * function 'p4_allocate()' uses for making allocation decisions.
85191672Sbms *
86191672Sbms * SYSTEM-MODE AND THREAD-MODE ALLOCATION
8753541Sshin *
8853541Sshin * In addition to remembering the state of PMC rows
8953541Sshin * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
90151539Ssuz * state of ESCR rows.  If an ESCR is allocated to a system-mode PMC
9162587Sitojun * on a CPU we cannot allocate this to a thread-mode PMC.  On a
9253541Sshin * multi-cpu (multiple physical CPUs) system, ESCR allocation on each
93148385Sume * CPU is tracked by the pc_escrs[] array.
9462587Sitojun *
95191672Sbms * Each system-mode PMC that is using an ESCR records its row-index in
9653541Sshin * the appropriate entry and system-mode allocation attempts check
9753541Sshin * that an ESCR is available using this array.  Process-mode PMCs do
98191672Sbms * not use the pc_escrs[] array, since ESCR row itself would have been
99191672Sbms * marked as in 'THREAD' mode.
100191672Sbms *
101191672Sbms * HYPERTHREADING SUPPORT
102191672Sbms *
103191672Sbms * When HTT is enabled, the FreeBSD kernel treats the two 'logical'
104191672Sbms * cpus as independent CPUs and can schedule kernel threads on them
105191672Sbms * independently.  However, the two logical CPUs share the same set of
106191672Sbms * PMC resources.  We need to ensure that:
107191672Sbms * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
108191672Sbms *   and,
109191672Sbms * - Threads of multi-threaded processes that get scheduled on the same
110191672Sbms *   physical CPU are handled correctly.
111191672Sbms *
112191672Sbms * HTT Detection
113191672Sbms *
114191672Sbms * Not all HTT capable systems will have HTT enabled.  We detect the
115191672Sbms * presence of HTT by detecting if 'p4_init()' was called for a secondary
116191672Sbms * CPU in a HTT pair.
117191672Sbms *
118191672Sbms * Note that hwpmc(4) cannot currently deal with a change in HTT status once
119191672Sbms * loaded.
120191672Sbms *
121192923Sbms * Handling HTT READ / WRITE / START / STOP
122191672Sbms *
123192923Sbms * PMC resources are shared across the CPUs in an HTT pair.  We
124230076Sjhb * designate the lower numbered CPU in a HTT pair as the 'primary'
125230076Sjhb * CPU.  In each primary CPU's state we keep track of a 'runcount'
126191672Sbms * which reflects the number of PMC-using processes that have been
127191672Sbms * scheduled on its secondary CPU.  Process-mode PMC operations will
128191672Sbms * actually 'start' or 'stop' hardware only if these are the first or
129191672Sbms * last processes respectively to use the hardware.  PMC values
130191672Sbms * written by a 'write' operation are saved and are transferred to
131191672Sbms * hardware at PMC 'start' time if the runcount is 0.  If the runcount
132191672Sbms * is greater than 0 at the time of a 'start' operation, we keep track
133191672Sbms * of the actual hardware value at the time of the 'start' operation
134191672Sbms * and use this to adjust the final readings at PMC 'stop' or 'read'
135191672Sbms * time.
136200871Sbms *
137200871Sbms * Execution sequences:
138191672Sbms *
139191672Sbms * Case 1:   CPUx   +...-		(no overlap)
140191672Sbms *	     CPUy         +...-
141191672Sbms *           RC   0 1   0 1   0
142191672Sbms *
143191672Sbms * Case 2:   CPUx   +........-		(partial overlap)
144191672Sbms * 	     CPUy       +........-
145191672Sbms *           RC   0 1   2    1   0
146191672Sbms *
147191672Sbms * Case 3:   CPUx   +..............-	(fully overlapped)
148191672Sbms *	     CPUy       +.....-
149191672Sbms *	     RC   0 1   2     1    0
15053541Sshin *
151191672Sbms *     Key:
152191672Sbms *     'CPU[xy]' : one of the two logical processors on a HTT CPU.
153191672Sbms *     'RC'      : run count (#threads per physical core).
154191672Sbms *     '+'       : point in time when a thread is put on a CPU.
155191672Sbms *     '-'       : point in time where a thread is taken off a CPU.
156191672Sbms *
157191672Sbms * Handling HTT CONFIG
158191672Sbms *
159191672Sbms * Different processes attached to the same PMC may get scheduled on
160191672Sbms * the two logical processors in the package.  We keep track of config
161191672Sbms * and de-config operations using the CFGFLAGS fields of the per-physical
162191672Sbms * cpu state.
163191672Sbms */
164191672Sbms
165191672Sbms#define	P4_PMCS()				\
166191672Sbms	P4_PMC(BPU_COUNTER0)			\
167191672Sbms	P4_PMC(BPU_COUNTER1)			\
168191672Sbms	P4_PMC(BPU_COUNTER2)			\
169191672Sbms	P4_PMC(BPU_COUNTER3)			\
170191672Sbms	P4_PMC(MS_COUNTER0)			\
171191672Sbms	P4_PMC(MS_COUNTER1)			\
172191672Sbms	P4_PMC(MS_COUNTER2)			\
173191672Sbms	P4_PMC(MS_COUNTER3)			\
174191672Sbms	P4_PMC(FLAME_COUNTER0)			\
175191672Sbms	P4_PMC(FLAME_COUNTER1)			\
176191672Sbms	P4_PMC(FLAME_COUNTER2)			\
177191672Sbms	P4_PMC(FLAME_COUNTER3)			\
178191672Sbms	P4_PMC(IQ_COUNTER0)			\
179191672Sbms	P4_PMC(IQ_COUNTER1)			\
180191672Sbms	P4_PMC(IQ_COUNTER2)			\
181191672Sbms	P4_PMC(IQ_COUNTER3)			\
182191672Sbms	P4_PMC(IQ_COUNTER4)			\
183191672Sbms	P4_PMC(IQ_COUNTER5)			\
184191672Sbms	P4_PMC(NONE)
185191672Sbms
186191672Sbmsenum pmc_p4pmc {
187191672Sbms#undef	P4_PMC
188191672Sbms#define	P4_PMC(N)	P4_PMC_##N ,
189191672Sbms	P4_PMCS()
190191672Sbms};
191191672Sbms
192191672Sbms/*
193191672Sbms * P4 ESCR descriptors
194191672Sbms */
19553541Sshin
196191672Sbms#define	P4_ESCRS()							\
197191672Sbms    P4_ESCR(BSU_ESCR0,	0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
19853541Sshin    P4_ESCR(BSU_ESCR1,	0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
199206454Sbms    P4_ESCR(FSB_ESCR0,	0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
200206454Sbms    P4_ESCR(FSB_ESCR1,	0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
201206454Sbms    P4_ESCR(FIRM_ESCR0,	0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
202206454Sbms    P4_ESCR(FIRM_ESCR1,	0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
203191672Sbms    P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
20453541Sshin    P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
205191672Sbms    P4_ESCR(DAC_ESCR0,	0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
20653541Sshin    P4_ESCR(DAC_ESCR1,	0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
207215701Sdim    P4_ESCR(MOB_ESCR0,	0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
208215701Sdim    P4_ESCR(MOB_ESCR1,	0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
209215701Sdim    P4_ESCR(PMH_ESCR0,	0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
210215701Sdim    P4_ESCR(PMH_ESCR1,	0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
211215701Sdim    P4_ESCR(SAAT_ESCR0,	0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
21253541Sshin    P4_ESCR(SAAT_ESCR1,	0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
213195727Srwatson    P4_ESCR(U2L_ESCR0,	0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE)	\
214195727Srwatson    P4_ESCR(U2L_ESCR1,	0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE)	\
215195727Srwatson    P4_ESCR(BPU_ESCR0,	0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
216195727Srwatson    P4_ESCR(BPU_ESCR1,	0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
217195727Srwatson    P4_ESCR(IS_ESCR0,	0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
218195699Srwatson    P4_ESCR(IS_ESCR1,	0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
219191672Sbms    P4_ESCR(ITLB_ESCR0,	0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
22053541Sshin    P4_ESCR(ITLB_ESCR1,	0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
221191672Sbms    P4_ESCR(CRU_ESCR0,	0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
222191672Sbms    P4_ESCR(CRU_ESCR1,	0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
223191672Sbms    P4_ESCR(IQ_ESCR0,	0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
224191672Sbms    P4_ESCR(IQ_ESCR1,	0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5)	\
225191672Sbms    P4_ESCR(RAT_ESCR0,	0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
226191672Sbms    P4_ESCR(RAT_ESCR1,	0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
227195699Srwatson    P4_ESCR(SSU_ESCR0,	0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4)	\
228195699Srwatson    P4_ESCR(MS_ESCR0,	0x3C0, MS_COUNTER0, MS_COUNTER1, NONE)		\
229195699Srwatson    P4_ESCR(MS_ESCR1,	0x3C1, MS_COUNTER2, MS_COUNTER3, NONE)		\
230191672Sbms    P4_ESCR(TBPU_ESCR0,	0x3C2, MS_COUNTER0, MS_COUNTER1, NONE)		\
231191672Sbms    P4_ESCR(TBPU_ESCR1,	0x3C3, MS_COUNTER2, MS_COUNTER3, NONE)		\
232191672Sbms    P4_ESCR(TC_ESCR0,	0x3C4, MS_COUNTER0, MS_COUNTER1, NONE)		\
233191672Sbms    P4_ESCR(TC_ESCR1,	0x3C5, MS_COUNTER2, MS_COUNTER3, NONE)		\
234191672Sbms    P4_ESCR(IX_ESCR0,	0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE)	\
235248085Smarius    P4_ESCR(IX_ESCR1,	0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE)	\
236248085Smarius    P4_ESCR(ALF_ESCR0,	0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
237248085Smarius    P4_ESCR(ALF_ESCR1,	0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
238191672Sbms    P4_ESCR(CRU_ESCR2,	0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
239192923Sbms    P4_ESCR(CRU_ESCR3,	0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
240192923Sbms    P4_ESCR(CRU_ESCR4,	0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4)	\
241192923Sbms    P4_ESCR(CRU_ESCR5,	0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5)	\
242192923Sbms    P4_ESCR(NONE,		~0,    NONE, NONE, NONE)
243192923Sbms
244200871Sbmsenum pmc_p4escr {
245200871Sbms#define	P4_ESCR(N, MSR, P1, P2, P3)	P4_ESCR_##N ,
246200871Sbms	P4_ESCRS()
247200871Sbms#undef	P4_ESCR
248200871Sbms};
249191672Sbms
250191672Sbmsstruct pmc_p4escr_descr {
251191672Sbms	const char	pm_escrname[PMC_NAME_MAX];
252191672Sbms	u_short		pm_escr_msr;
253191672Sbms	const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
254191672Sbms};
255191672Sbms
256191672Sbmsstatic struct pmc_p4escr_descr p4_escrs[] =
257191672Sbms{
258191672Sbms#define	P4_ESCR(N, MSR, P1, P2, P3)		\
259191672Sbms	{					\
260191672Sbms		.pm_escrname = #N,		\
261191672Sbms		.pm_escr_msr = (MSR),		\
262191672Sbms		.pm_pmcs =			\
263191672Sbms		{				\
264191672Sbms			P4_PMC_##P1,		\
265191672Sbms			P4_PMC_##P2,		\
266191672Sbms			P4_PMC_##P3		\
267191672Sbms		}				\
268191672Sbms	} ,
269191672Sbms
270191672Sbms	P4_ESCRS()
271191672Sbms
272191672Sbms#undef	P4_ESCR
273191672Sbms};
274191672Sbms
27553541Sshin/*
276191672Sbms * P4 Event descriptor
277191672Sbms */
278191672Sbms
279191672Sbmsstruct p4_event_descr {
280191672Sbms	const enum pmc_event pm_event;
281191672Sbms	const uint32_t	pm_escr_eventselect;
282191672Sbms	const uint32_t	pm_cccr_select;
283191672Sbms	const char	pm_is_ti_event;
284191672Sbms	enum pmc_p4escr	pm_escrs[P4_MAX_ESCR_PER_EVENT];
285191672Sbms};
286191672Sbms
287191672Sbmsstatic struct p4_event_descr p4_events[] = {
288191672Sbms
289191672Sbms#define	P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1)	\
290191672Sbms	{								\
291191672Sbms		.pm_event            = PMC_EV_P4_##NAME,		\
292191672Sbms		.pm_escr_eventselect = (ESCREVENTSEL),			\
293191672Sbms		.pm_cccr_select      = (CCCRSEL),			\
294191672Sbms		.pm_is_ti_event	     = (TI_EVENT),			\
295191672Sbms		.pm_escrs            =					\
296191672Sbms		{							\
297191672Sbms			P4_ESCR_##ESCR0,				\
298191672Sbms			P4_ESCR_##ESCR1					\
299191672Sbms		}							\
300191672Sbms	}
301191672Sbms
302191672SbmsP4_EVDESCR(TC_DELIVER_MODE,	0x01, 0x01, TRUE,  TC_ESCR0,	TC_ESCR1),
303191846SzecP4_EVDESCR(BPU_FETCH_REQUEST,	0x03, 0x00, FALSE, BPU_ESCR0,	BPU_ESCR1),
304191672SbmsP4_EVDESCR(ITLB_REFERENCE,	0x18, 0x03, FALSE, ITLB_ESCR0,	ITLB_ESCR1),
305191672SbmsP4_EVDESCR(MEMORY_CANCEL,	0x02, 0x05, FALSE, DAC_ESCR0,	DAC_ESCR1),
306191672SbmsP4_EVDESCR(MEMORY_COMPLETE,	0x08, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
307191672SbmsP4_EVDESCR(LOAD_PORT_REPLAY,	0x04, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
308191672SbmsP4_EVDESCR(STORE_PORT_REPLAY,	0x05, 0x02, FALSE, SAAT_ESCR0,	SAAT_ESCR1),
309191672SbmsP4_EVDESCR(MOB_LOAD_REPLAY,	0x03, 0x02, FALSE, MOB_ESCR0,	MOB_ESCR1),
310191672SbmsP4_EVDESCR(PAGE_WALK_TYPE,	0x01, 0x04, TRUE,  PMH_ESCR0,	PMH_ESCR1),
311191672SbmsP4_EVDESCR(BSQ_CACHE_REFERENCE,	0x0C, 0x07, FALSE, BSU_ESCR0,	BSU_ESCR1),
312191672SbmsP4_EVDESCR(IOQ_ALLOCATION,	0x03, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
313191672SbmsP4_EVDESCR(IOQ_ACTIVE_ENTRIES,	0x1A, 0x06, FALSE, FSB_ESCR1,	NONE),
314191672SbmsP4_EVDESCR(FSB_DATA_ACTIVITY,	0x17, 0x06, TRUE,  FSB_ESCR0,	FSB_ESCR1),
315191672SbmsP4_EVDESCR(BSQ_ALLOCATION,	0x05, 0x07, FALSE, BSU_ESCR0,	NONE),
316191672SbmsP4_EVDESCR(BSQ_ACTIVE_ENTRIES,	0x06, 0x07, FALSE, BSU_ESCR1,	NONE),
317191672Sbms	/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
318191672SbmsP4_EVDESCR(SSE_INPUT_ASSIST,	0x34, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
319191672SbmsP4_EVDESCR(PACKED_SP_UOP,	0x08, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
32053541SshinP4_EVDESCR(PACKED_DP_UOP,	0x0C, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
321191672SbmsP4_EVDESCR(SCALAR_SP_UOP,	0x0A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
322191672SbmsP4_EVDESCR(SCALAR_DP_UOP,	0x0E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
323191672SbmsP4_EVDESCR(64BIT_MMX_UOP,	0x02, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
32453541SshinP4_EVDESCR(128BIT_MMX_UOP,	0x1A, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
325191672SbmsP4_EVDESCR(X87_FP_UOP,		0x04, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
32653541SshinP4_EVDESCR(X87_SIMD_MOVES_UOP,	0x2E, 0x01, TRUE,  FIRM_ESCR0,	FIRM_ESCR1),
327191672SbmsP4_EVDESCR(GLOBAL_POWER_EVENTS,	0x13, 0x06, FALSE, FSB_ESCR0,	FSB_ESCR1),
328191672SbmsP4_EVDESCR(TC_MS_XFER,		0x05, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
329191672SbmsP4_EVDESCR(UOP_QUEUE_WRITES,	0x09, 0x00, FALSE, MS_ESCR0,	MS_ESCR1),
330191672SbmsP4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
331191672Sbms    				0x05, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
332191672SbmsP4_EVDESCR(RETIRED_BRANCH_TYPE,	0x04, 0x02, FALSE, TBPU_ESCR0,	TBPU_ESCR1),
333191672SbmsP4_EVDESCR(RESOURCE_STALL,	0x01, 0x01, FALSE, ALF_ESCR0,	ALF_ESCR1),
334191672SbmsP4_EVDESCR(WC_BUFFER,		0x05, 0x05, TRUE,  DAC_ESCR0,	DAC_ESCR1),
335191672SbmsP4_EVDESCR(B2B_CYCLES,		0x16, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
336191672SbmsP4_EVDESCR(BNR,			0x08, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
337191672SbmsP4_EVDESCR(SNOOP,		0x06, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
338191672SbmsP4_EVDESCR(RESPONSE,		0x04, 0x03, TRUE,  FSB_ESCR0,	FSB_ESCR1),
339191672SbmsP4_EVDESCR(FRONT_END_EVENT,	0x08, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
340191672SbmsP4_EVDESCR(EXECUTION_EVENT,	0x0C, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
341191672SbmsP4_EVDESCR(REPLAY_EVENT, 	0x09, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
342191672SbmsP4_EVDESCR(INSTR_RETIRED,	0x02, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
343191672SbmsP4_EVDESCR(UOPS_RETIRED,	0x01, 0x04, FALSE, CRU_ESCR0,	CRU_ESCR1),
344191672SbmsP4_EVDESCR(UOP_TYPE,		0x02, 0x02, FALSE, RAT_ESCR0,	RAT_ESCR1),
34553541SshinP4_EVDESCR(BRANCH_RETIRED,	0x06, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
34653541SshinP4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
347191672SbmsP4_EVDESCR(X87_ASSIST,		0x03, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3),
348191672SbmsP4_EVDESCR(MACHINE_CLEAR,	0x02, 0x05, FALSE, CRU_ESCR2,	CRU_ESCR3)
349191672Sbms
350191672Sbms#undef	P4_EVDESCR
351191672Sbms};
352191672Sbms
353191672Sbms#define	P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
354191672Sbms
355191672Sbms#define	P4_NEVENTS	(PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
356191672Sbms
357151539Ssuz/*
358191672Sbms * P4 PMC descriptors
359191672Sbms */
360191672Sbms
361191672Sbmsstruct p4pmc_descr {
362191672Sbms	struct pmc_descr pm_descr; 	/* common information */
363151539Ssuz	enum pmc_p4pmc	pm_pmcnum;	/* PMC number */
364191672Sbms	uint32_t	pm_pmc_msr; 	/* PERFCTR MSR address */
365191672Sbms	uint32_t	pm_cccr_msr;  	/* CCCR MSR address */
366191672Sbms};
367191672Sbms
368191672Sbmsstatic struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
369191672Sbms#define	P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM |  \
370191672Sbms	PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
371191672Sbms	PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE |            \
372191672Sbms	PMC_CAP_TAGGING | PMC_CAP_CASCADE)
373191672Sbms
374191672Sbms#define	P4_PMCDESCR(N, PMC, CCCR)			\
375191672Sbms	{						\
376191672Sbms		.pm_descr =				\
377191672Sbms		{					\
378191672Sbms			.pd_name = #N,			\
379191672Sbms			.pd_class = PMC_CLASS_P4,	\
380191672Sbms			.pd_caps = P4_PMC_CAPS,		\
381191672Sbms			.pd_width = 40			\
382191672Sbms		},					\
383151539Ssuz		.pm_pmcnum      = P4_PMC_##N,		\
384151539Ssuz		.pm_cccr_msr 	= (CCCR),		\
385191672Sbms		.pm_pmc_msr	= (PMC)			\
386191672Sbms	}
387191672Sbms
388191672Sbms	P4_PMCDESCR(BPU_COUNTER0,	0x300,	0x360),
389191672Sbms	P4_PMCDESCR(BPU_COUNTER1,	0x301,	0x361),
390191672Sbms	P4_PMCDESCR(BPU_COUNTER2,	0x302,	0x362),
391191672Sbms	P4_PMCDESCR(BPU_COUNTER3,	0x303,	0x363),
392191672Sbms	P4_PMCDESCR(MS_COUNTER0,	0x304,	0x364),
393191672Sbms	P4_PMCDESCR(MS_COUNTER1,	0x305,	0x365),
394191672Sbms	P4_PMCDESCR(MS_COUNTER2,	0x306,	0x366),
395191672Sbms	P4_PMCDESCR(MS_COUNTER3,	0x307,	0x367),
396191672Sbms	P4_PMCDESCR(FLAME_COUNTER0,	0x308,	0x368),
397191672Sbms	P4_PMCDESCR(FLAME_COUNTER1,	0x309,	0x369),
398191672Sbms	P4_PMCDESCR(FLAME_COUNTER2,	0x30A,	0x36A),
399191672Sbms	P4_PMCDESCR(FLAME_COUNTER3,	0x30B,	0x36B),
400191672Sbms	P4_PMCDESCR(IQ_COUNTER0,	0x30C,	0x36C),
401191672Sbms	P4_PMCDESCR(IQ_COUNTER1,	0x30D,	0x36D),
402191672Sbms	P4_PMCDESCR(IQ_COUNTER2,	0x30E,	0x36E),
403151539Ssuz	P4_PMCDESCR(IQ_COUNTER3,	0x30F,	0x36F),
404151539Ssuz	P4_PMCDESCR(IQ_COUNTER4,	0x310,	0x370),
405191672Sbms	P4_PMCDESCR(IQ_COUNTER5,	0x311,	0x371),
406191672Sbms
407191672Sbms#undef	P4_PMCDESCR
408191672Sbms};
409151539Ssuz
410191672Sbms/* HTT support */
411151539Ssuz#define	P4_NHTT					2 /* logical processors/chip */
412191672Sbms
413151539Ssuzstatic int p4_system_has_htt;
414191672Sbms
415191672Sbms/*
416191672Sbms * Per-CPU data structure for P4 class CPUs
417191672Sbms *
418191672Sbms * [19 struct pmc_hw structures]
419191672Sbms * [45 ESCRs status bytes]
420191672Sbms * [per-cpu spin mutex]
421191672Sbms * [19 flag fields for holding config flags and a runcount]
422191672Sbms * [19*2 hw value fields]	(Thread mode PMC support)
423151539Ssuz *    or
424151539Ssuz * [19*2 EIP values]		(Sampling mode PMCs)
425191672Sbms * [19*2 pmc value fields]	(Thread mode PMC support))
426191672Sbms */
427191672Sbms
428191672Sbmsstruct p4_cpu {
429191672Sbms	struct pmc_hw	pc_p4pmcs[P4_NPMCS];
430191672Sbms	char		pc_escrs[P4_NESCR];
431191672Sbms	struct mtx	pc_mtx;		/* spin lock */
432191672Sbms	uint32_t	pc_intrflag;	/* NMI handler flags */
433191672Sbms	unsigned int	pc_intrlock;	/* NMI handler spin lock */
434191672Sbms	unsigned char	pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
435191672Sbms	union {
436191672Sbms		pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
437191672Sbms		uintptr_t   pc_ip[P4_NPMCS * P4_NHTT];
438191672Sbms	}		pc_si;
439151539Ssuz	pmc_value_t	pc_pmc_values[P4_NPMCS * P4_NHTT];
440151539Ssuz};
441191672Sbms
442151539Ssuzstatic struct p4_cpu **p4_pcpu;
443191672Sbms
444191672Sbms#define	P4_PCPU_PMC_VALUE(PC,RI,CPU) 	(PC)->pc_pmc_values[(RI)*((CPU) & 1)]
445151539Ssuz#define	P4_PCPU_HW_VALUE(PC,RI,CPU)	(PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
446191672Sbms#define	P4_PCPU_SAVED_IP(PC,RI,CPU)	(PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
447191672Sbms
448191672Sbms#define	P4_PCPU_GET_FLAGS(PC,RI,MASK)	((PC)->pc_flags[(RI)] & (MASK))
449191672Sbms#define	P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL)	do {	\
450191672Sbms	char _tmp;					\
451151539Ssuz	_tmp = (PC)->pc_flags[(RI)];			\
452151539Ssuz	_tmp &= ~(MASK);				\
453191672Sbms	_tmp |= (VAL) & (MASK);				\
454151539Ssuz	(PC)->pc_flags[(RI)] = _tmp;			\
455151539Ssuz} while (0)
456191672Sbms
457191672Sbms#define	P4_PCPU_GET_RUNCOUNT(PC,RI)	P4_PCPU_GET_FLAGS(PC,RI,0x0F)
458191672Sbms#define	P4_PCPU_SET_RUNCOUNT(PC,RI,V)	P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
459191672Sbms
460191672Sbms#define	P4_PCPU_GET_CFGFLAGS(PC,RI)	(P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
461191672Sbms#define	P4_PCPU_SET_CFGFLAGS(PC,RI,C)	P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
462191672Sbms
463151539Ssuz#define	P4_CPU_TO_FLAG(C)		(P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1)
464191672Sbms
465151539Ssuz#define	P4_PCPU_GET_INTRFLAG(PC,I)	((PC)->pc_intrflag & (1 << (I)))
466191672Sbms#define	P4_PCPU_SET_INTRFLAG(PC,I,V)	do {		\
467191672Sbms		uint32_t __mask;			\
468151539Ssuz		__mask = 1 << (I);			\
469191672Sbms		if ((V))				\
470191672Sbms			(PC)->pc_intrflag |= __mask;	\
471191672Sbms		else					\
472191672Sbms			(PC)->pc_intrflag &= ~__mask;	\
473191672Sbms	} while (0)
474200871Sbms
475200871Sbms/*
476191672Sbms * A minimal spin lock implementation for use inside the NMI handler.
477191672Sbms *
478191672Sbms * We don't want to use a regular spin lock here, because curthread
479191672Sbms * may not be consistent at the time the handler is invoked.
480191672Sbms */
481191672Sbms#define	P4_PCPU_ACQ_INTR_SPINLOCK(PC) do {				\
482191672Sbms		while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1))	\
483191672Sbms			ia32_pause();					\
484191672Sbms	} while (0)
485191672Sbms#define	P4_PCPU_REL_INTR_SPINLOCK(PC) 					\
486191672Sbms	atomic_store_rel_int(&pc->pc_intrlock, 0);
487191672Sbms
488191672Sbms/* ESCR row disposition */
489191672Sbmsstatic int p4_escrdisp[P4_NESCR];
490191672Sbms
491191672Sbms#define	P4_ESCR_ROW_DISP_IS_THREAD(E)		(p4_escrdisp[(E)] > 0)
492191672Sbms#define	P4_ESCR_ROW_DISP_IS_STANDALONE(E)	(p4_escrdisp[(E)] < 0)
493191672Sbms#define	P4_ESCR_ROW_DISP_IS_FREE(E)		(p4_escrdisp[(E)] == 0)
494191672Sbms
495191672Sbms#define	P4_ESCR_MARK_ROW_STANDALONE(E) do {				\
496191672Sbms	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
497191672Sbms		    __LINE__));						\
498191672Sbms	atomic_add_int(&p4_escrdisp[(E)], -1);				\
499191672Sbms	KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), 		\
500191672Sbms		("[p4,%d] row disposition error", __LINE__));		\
501191672Sbms} while (0)
502191672Sbms
503191672Sbms#define	P4_ESCR_UNMARK_ROW_STANDALONE(E) do {				\
504191672Sbms	atomic_add_int(&p4_escrdisp[(E)], 1);				\
505191672Sbms	KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
506191672Sbms		    __LINE__));						\
507191672Sbms} while (0)
508191672Sbms
509191672Sbms#define	P4_ESCR_MARK_ROW_THREAD(E) do {					 \
510191672Sbms	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
511191672Sbms		    __LINE__));						 \
512191672Sbms	atomic_add_int(&p4_escrdisp[(E)], 1);				 \
513191672Sbms} while (0)
514191672Sbms
515191672Sbms#define	P4_ESCR_UNMARK_ROW_THREAD(E) do {				 \
516191672Sbms	atomic_add_int(&p4_escrdisp[(E)], -1);				 \
517191672Sbms	KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
518191672Sbms		    __LINE__));						 \
519191672Sbms} while (0)
520191672Sbms
521191672Sbms#define	P4_PMC_IS_STOPPED(cccr)	((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
522191672Sbms
523191672Sbms#define	P4_CPU_IS_HTT_SECONDARY(cpu)					\
524191672Sbms	(p4_system_has_htt ? ((cpu) & 1) : 0)
525191672Sbms#define	P4_TO_HTT_PRIMARY(cpu) 						\
526191672Sbms	(p4_system_has_htt ? ((cpu) & ~1) : (cpu))
527191672Sbms
528191672Sbms#define	P4_CCCR_Tx_MASK	(~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1|	\
529191672Sbms			     P4_CCCR_ENABLE|P4_CCCR_OVF))
530191672Sbms#define	P4_ESCR_Tx_MASK	(~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS|	\
531191672Sbms			     P4_ESCR_T1_USR))
532191672Sbms
533191672Sbms/*
534191672Sbms * support routines
535191672Sbms */
536191672Sbms
537191672Sbmsstatic struct p4_event_descr *
538191672Sbmsp4_find_event(enum pmc_event ev)
539191672Sbms{
540191672Sbms	int n;
541191672Sbms
542191672Sbms	for (n = 0; n < P4_NEVENTS; n++)
543191672Sbms		if (p4_events[n].pm_event == ev)
544191672Sbms			break;
545191672Sbms	if (n == P4_NEVENTS)
546233200Sjhb		return (NULL);
547191672Sbms	return (&p4_events[n]);
548191672Sbms}
549191672Sbms
550191672Sbms/*
551191672Sbms * Initialize per-cpu state
552191672Sbms */
553191672Sbms
554191672Sbmsstatic int
555191672Sbmsp4_pcpu_init(struct pmc_mdep *md, int cpu)
556191672Sbms{
557191672Sbms	char *pescr;
558233200Sjhb	int n, first_ri, phycpu;
559191672Sbms	struct pmc_hw *phw;
560191672Sbms	struct p4_cpu *p4c;
561191672Sbms	struct pmc_cpu *pc, *plc;
562191672Sbms
563191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
564151539Ssuz	    ("[p4,%d] insane cpu number %d", __LINE__, cpu));
565151539Ssuz
566191672Sbms	PMCDBG2(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
567151539Ssuz	    pmc_cpu_is_primary(cpu) != 0);
568151539Ssuz
569191672Sbms	first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
570191672Sbms
571191672Sbms	/*
572191672Sbms	 * The two CPUs in an HT pair share their per-cpu state.
573191672Sbms	 *
574191672Sbms	 * For HT capable CPUs, we assume that the two logical
57553541Sshin	 * processors in the HT pair get two consecutive CPU ids
576191672Sbms	 * starting with an even id #.
57753541Sshin	 *
57853541Sshin	 * The primary CPU (the even numbered CPU of the pair) would
579191672Sbms	 * have been initialized prior to the initialization for the
580191672Sbms	 * secondary.
581191672Sbms	 */
582191672Sbms
583191672Sbms	if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
584191672Sbms
585191672Sbms		p4_system_has_htt = 1;
586191672Sbms
587191672Sbms		phycpu = P4_TO_HTT_PRIMARY(cpu);
588191672Sbms		pc = pmc_pcpu[phycpu];
589191672Sbms		plc = pmc_pcpu[cpu];
590191672Sbms
591191672Sbms		KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__));
592191672Sbms
593191672Sbms		PMCDBG3(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu,
594191672Sbms		    phycpu, pc);
595191672Sbms		KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d",
596191672Sbms		    __LINE__, cpu, phycpu));
597191672Sbms
598191672Sbms		/* PMCs are shared with the physical CPU. */
599191672Sbms		for (n = 0; n < P4_NPMCS; n++)
600191672Sbms			plc->pc_hwpmcs[n + first_ri] =
601191672Sbms			    pc->pc_hwpmcs[n + first_ri];
602191672Sbms
603191672Sbms		return (0);
604191672Sbms	}
605191672Sbms
606191672Sbms	p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO);
607191672Sbms
608191672Sbms	if (p4c == NULL)
609191672Sbms		return (ENOMEM);
610191672Sbms
611191672Sbms	pc = pmc_pcpu[cpu];
612191672Sbms
613191672Sbms	KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu));
614191672Sbms
615191672Sbms	p4_pcpu[cpu] = p4c;
616191672Sbms	phw = p4c->pc_p4pmcs;
617191672Sbms
618191672Sbms	for (n = 0; n < P4_NPMCS; n++, phw++) {
619191672Sbms		phw->phw_state   = PMC_PHW_FLAG_IS_ENABLED |
620191672Sbms		    PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
621191672Sbms		phw->phw_pmc     = NULL;
622192923Sbms		pc->pc_hwpmcs[n + first_ri] = phw;
623192923Sbms	}
624192923Sbms
625191672Sbms	pescr = p4c->pc_escrs;
626191672Sbms	for (n = 0; n < P4_NESCR; n++)
627191672Sbms		*pescr++ = P4_INVALID_PMC_INDEX;
628192923Sbms
629191672Sbms	mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN);
630191672Sbms
631191672Sbms	return (0);
632191672Sbms}
633192923Sbms
634191672Sbms/*
635191672Sbms * Destroy per-cpu state.
636191672Sbms */
637191672Sbms
638191672Sbmsstatic int
639192923Sbmsp4_pcpu_fini(struct pmc_mdep *md, int cpu)
640192923Sbms{
641192923Sbms	int first_ri, i;
642192923Sbms	struct p4_cpu *p4c;
643192923Sbms	struct pmc_cpu *pc;
644192923Sbms
645192923Sbms	PMCDBG1(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
646192923Sbms
647192923Sbms	pc = pmc_pcpu[cpu];
648192923Sbms	first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
649192923Sbms
650192923Sbms	for (i = 0; i < P4_NPMCS; i++)
651192923Sbms		pc->pc_hwpmcs[i + first_ri] = NULL;
652192923Sbms
653192923Sbms	if (!pmc_cpu_is_primary(cpu) && (cpu & 1))
654192923Sbms		return (0);
655192923Sbms
656192923Sbms	p4c = p4_pcpu[cpu];
657192923Sbms
658192923Sbms	KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__));
659192923Sbms
660192923Sbms	/* Turn off all PMCs on this CPU */
661192923Sbms	for (i = 0; i < P4_NPMCS - 1; i++)
662192923Sbms		wrmsr(P4_CCCR_MSR_FIRST + i,
663192923Sbms		    rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
664192923Sbms
665192923Sbms	mtx_destroy(&p4c->pc_mtx);
666192923Sbms
667192923Sbms	free(p4c, M_PMC);
668192923Sbms
669192923Sbms	p4_pcpu[cpu] = NULL;
670192923Sbms
671192923Sbms	return (0);
672192923Sbms}
673192923Sbms
674192923Sbms/*
675192923Sbms * Read a PMC
676192923Sbms */
677192923Sbms
678192923Sbmsstatic int
679192923Sbmsp4_read_pmc(int cpu, int ri, pmc_value_t *v)
680192923Sbms{
681192923Sbms	struct pmc *pm;
682192923Sbms	pmc_value_t tmp;
683191672Sbms	struct p4_cpu *pc;
684192547Sbms	enum pmc_mode mode;
685191672Sbms	struct p4pmc_descr *pd;
68653541Sshin
687191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
68853541Sshin	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
689192923Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
690192923Sbms	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
691191672Sbms
692191672Sbms	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
693192923Sbms	pm = pc->pc_p4pmcs[ri].phw_pmc;
694191672Sbms	pd = &p4_pmcdesc[ri];
695191672Sbms
696191672Sbms	KASSERT(pm != NULL,
697233200Sjhb	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri));
698192923Sbms
699191672Sbms	KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
700192923Sbms	    ("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
701192923Sbms	    pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
702192923Sbms
703192923Sbms	mode = PMC_TO_MODE(pm);
704192923Sbms
705192923Sbms	PMCDBG3(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
706192923Sbms
707192923Sbms	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
708192923Sbms	    ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
709192923Sbms
710192923Sbms	tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
711192923Sbms
712192923Sbms	if (PMC_IS_VIRTUAL_MODE(mode)) {
713192923Sbms		if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
714191672Sbms			tmp += (P4_PERFCTR_MASK + 1) -
715191672Sbms			    P4_PCPU_HW_VALUE(pc,ri,cpu);
716191672Sbms		else
717191672Sbms			tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
718191672Sbms		tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
719191672Sbms	}
720191672Sbms
721191672Sbms	if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
722191672Sbms		*v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
723191672Sbms	else
724191672Sbms		*v = tmp;
725192923Sbms
726192923Sbms	PMCDBG1(MDP,REA,2, "p4-read -> %jx", *v);
72753541Sshin
728191672Sbms	return (0);
729233200Sjhb}
730191672Sbms
731191672Sbms/*
732191672Sbms * Write a PMC
733191672Sbms */
73453541Sshin
73553541Sshinstatic int
736191672Sbmsp4_write_pmc(int cpu, int ri, pmc_value_t v)
737191672Sbms{
738191672Sbms	enum pmc_mode mode;
739191672Sbms	struct pmc *pm;
740191672Sbms	struct p4_cpu *pc;
741191672Sbms	const struct pmc_hw *phw;
742191672Sbms	const struct p4pmc_descr *pd;
743191672Sbms
744191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
745191672Sbms	    ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
746191672Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
747191672Sbms	    ("[amd,%d] illegal row-index %d", __LINE__, ri));
748191672Sbms
749191672Sbms	pc  = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
750191672Sbms	phw = &pc->pc_p4pmcs[ri];
751191672Sbms	pm  = phw->phw_pmc;
752191672Sbms	pd  = &p4_pmcdesc[ri];
75353541Sshin
754191672Sbms	KASSERT(pm != NULL,
755191672Sbms	    ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
756191672Sbms		cpu, ri));
75753541Sshin
758191672Sbms	mode = PMC_TO_MODE(pm);
759191672Sbms
760191672Sbms	PMCDBG4(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
761191672Sbms	    mode, v);
762191672Sbms
763191672Sbms	/*
764191672Sbms	 * write the PMC value to the register/saved value: for
765191672Sbms	 * sampling mode PMCs, the value to be programmed into the PMC
766191672Sbms	 * counter is -(C+1) where 'C' is the requested sample rate.
767191672Sbms	 */
768191672Sbms	if (PMC_IS_SAMPLING_MODE(mode))
769191672Sbms		v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
770191672Sbms
771191672Sbms	if (PMC_IS_SYSTEM_MODE(mode))
772191672Sbms		wrmsr(pd->pm_pmc_msr, v);
773191672Sbms	else
774191672Sbms		P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
775191672Sbms
776191672Sbms	return (0);
777191672Sbms}
778191672Sbms
779191672Sbms/*
780191672Sbms * Configure a PMC 'pm' on the given CPU and row-index.
781191672Sbms *
782191672Sbms * 'pm' may be NULL to indicate de-configuration.
783191672Sbms *
784191672Sbms * On HTT systems, a PMC may get configured twice, once for each
785191672Sbms * "logical" CPU.  We track this using the CFGFLAGS field of the
786191672Sbms * per-cpu state; this field is a bit mask with one bit each for
787191672Sbms * logical CPUs 0 & 1.
788191672Sbms */
789191672Sbms
790191672Sbmsstatic int
791191672Sbmsp4_config_pmc(int cpu, int ri, struct pmc *pm)
792148385Sume{
79353541Sshin	struct pmc_hw *phw;
79453541Sshin	struct p4_cpu *pc;
795191672Sbms	int cfgflags, cpuflag;
796191672Sbms
797191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
798191672Sbms	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
799191672Sbms
800191672Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
801191672Sbms	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
802191672Sbms
803191672Sbms	PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
804191672Sbms
805191672Sbms	pc  = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
80653541Sshin	phw = &pc->pc_p4pmcs[ri];
807191672Sbms
808191672Sbms	KASSERT(pm == NULL || phw->phw_pmc == NULL ||
809191672Sbms	    (p4_system_has_htt && phw->phw_pmc == pm),
810191672Sbms	    ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
811192923Sbms
812191672Sbms	mtx_lock_spin(&pc->pc_mtx);
813191672Sbms	cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
814192923Sbms
815192923Sbms	KASSERT(cfgflags >= 0 || cfgflags <= 3,
816192923Sbms	    ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
81753541Sshin		cfgflags, cpu, ri));
818192923Sbms
819191672Sbms	KASSERT(cfgflags == 0 || phw->phw_pmc,
820192923Sbms	    ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
821192923Sbms		__LINE__, cpu, ri));
822192923Sbms
823192923Sbms	cpuflag = P4_CPU_TO_FLAG(cpu);
824192923Sbms
825192923Sbms	if (pm) {		/* config */
826192923Sbms		if (cfgflags == 0)
827192923Sbms			phw->phw_pmc = pm;
828192923Sbms
829192923Sbms		KASSERT(phw->phw_pmc == pm,
830192923Sbms		    ("[p4,%d] cpu=%d ri=%d config %p != hw %p",
831192923Sbms			__LINE__, cpu, ri, pm, phw->phw_pmc));
832192923Sbms
833191672Sbms		cfgflags |= cpuflag;
834191672Sbms	} else {		/* unconfig */
835191672Sbms		cfgflags &= ~cpuflag;
836237992Sbms
837192923Sbms		if (cfgflags == 0)
838192923Sbms			phw->phw_pmc = NULL;
83978064Sume	}
840192923Sbms
841192923Sbms	KASSERT(cfgflags >= 0 || cfgflags <= 3,
842192923Sbms	    ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
84378064Sume		cfgflags, cpu, ri));
844191672Sbms
845191672Sbms	P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
846191672Sbms
847191672Sbms	mtx_unlock_spin(&pc->pc_mtx);
848191672Sbms
849191672Sbms	return (0);
850191672Sbms}
851191672Sbms
852191672Sbms/*
853191672Sbms * Retrieve a configured PMC pointer from hardware state.
854191672Sbms */
855191672Sbms
856191672Sbmsstatic int
857191672Sbmsp4_get_config(int cpu, int ri, struct pmc **ppm)
858191672Sbms{
859191672Sbms	int cfgflags;
860191672Sbms	struct p4_cpu *pc;
861191672Sbms
862191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
863191672Sbms	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
864192923Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
865192923Sbms	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
866192923Sbms
867192923Sbms	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
868192923Sbms
869192923Sbms	mtx_lock_spin(&pc->pc_mtx);
870192923Sbms	cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
871192923Sbms	mtx_unlock_spin(&pc->pc_mtx);
872192923Sbms
873237990Sbms	if (cfgflags & P4_CPU_TO_FLAG(cpu))
874192923Sbms		*ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */
875192923Sbms	else
876192923Sbms		*ppm = NULL;
877192923Sbms
878192923Sbms	return 0;
879192923Sbms}
880192923Sbms
881192923Sbms/*
882192923Sbms * Allocate a PMC.
883192923Sbms *
884192923Sbms * The allocation strategy differs between HTT and non-HTT systems.
885191672Sbms *
886192547Sbms * The non-HTT case:
887191672Sbms *   - Given the desired event and the PMC row-index, lookup the
888191672Sbms *   list of valid ESCRs for the event.
889191672Sbms *   - For each valid ESCR:
890191672Sbms *     - Check if the ESCR is free and the ESCR row is in a compatible
891192923Sbms *       mode (i.e., system or process))
892192923Sbms *     - Check if the ESCR is usable with a P4 PMC at the desired row-index.
893192923Sbms *   If everything matches, we determine the appropriate bit values for the
894192923Sbms *   ESCR and CCCR registers.
895192923Sbms *
896192923Sbms * The HTT case:
897192923Sbms *
898192923Sbms * - Process mode PMCs require special care.  The FreeBSD scheduler could
899191672Sbms *   schedule any two processes on the same physical CPU.  We need to ensure
900191672Sbms *   that a given PMC row-index is never allocated to two different
901191672Sbms *   PMCs owned by different user-processes.
902191672Sbms *   This is ensured by always allocating a PMC from a 'FREE' PMC row
903191672Sbms *   if the system has HTT active.
904192547Sbms * - A similar check needs to be done for ESCRs; we do not want two PMCs
905191672Sbms *   using the same ESCR to be scheduled at the same time.  Thus ESCR
906191672Sbms *   allocation is also restricted to FREE rows if the system has HTT
907192923Sbms *   enabled.
90853541Sshin * - Thirdly, some events are 'thread-independent' terminology, i.e.,
909191672Sbms *   the PMC hardware cannot distinguish between events caused by
910192547Sbms *   different logical CPUs.  This makes it impossible to assign events
911191672Sbms *   to a given thread of execution.  If the system has HTT enabled,
912191672Sbms *   these events are not allowed for process-mode PMCs.
913192547Sbms */
914191672Sbms
915191672Sbmsstatic int
916191672Sbmsp4_allocate_pmc(int cpu, int ri, struct pmc *pm,
917191672Sbms    const struct pmc_op_pmcallocate *a)
918191672Sbms{
919192923Sbms	int found, n, m;
920192923Sbms	uint32_t caps, cccrvalue, escrvalue, tflags;
921192562Sbms	enum pmc_p4escr escr;
922192562Sbms	struct p4_cpu *pc;
923191672Sbms	struct p4_event_descr *pevent;
924191672Sbms	const struct p4pmc_descr *pd;
925191672Sbms
926191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
927191672Sbms	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
928191672Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
929191672Sbms	    ("[p4,%d] illegal row-index value %d", __LINE__, ri));
930191672Sbms
931191672Sbms	pd = &p4_pmcdesc[ri];
932191672Sbms
933191672Sbms	PMCDBG4(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
934233200Sjhb	    "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
935191672Sbms	    pm->pm_caps);
936225096Spluknet
937233200Sjhb	/* check class */
938191672Sbms	if (pd->pm_descr.pd_class != a->pm_class)
939225096Spluknet		return (EINVAL);
940191672Sbms
941191672Sbms	/* check requested capabilities */
942191672Sbms	caps = a->pm_caps;
943191672Sbms	if ((pd->pm_descr.pd_caps & caps) != caps)
944191672Sbms		return (EPERM);
945233200Sjhb
946191672Sbms	/*
947191672Sbms	 * If the system has HTT enabled, and the desired allocation
948191672Sbms	 * mode is process-private, and the PMC row disposition is not
949191672Sbms	 * FREE (0), decline the allocation.
950191672Sbms	 */
951191672Sbms
952191672Sbms	if (p4_system_has_htt &&
953191672Sbms	    PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
954191672Sbms	    pmc_getrowdisp(ri) != 0)
955191672Sbms		return (EBUSY);
956191672Sbms
957191672Sbms	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
958192562Sbms	    ("[p4,%d] unknown PMC class %d", __LINE__,
959191672Sbms		pd->pm_descr.pd_class));
960192923Sbms
961192923Sbms	if (pm->pm_event < PMC_EV_P4_FIRST ||
962192923Sbms	    pm->pm_event > PMC_EV_P4_LAST)
963233200Sjhb		return (EINVAL);
96453541Sshin
96553541Sshin	if ((pevent = p4_find_event(pm->pm_event)) == NULL)
966191672Sbms		return (ESRCH);
967191672Sbms
968191672Sbms	PMCDBG4(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
969191672Sbms	    pevent->pm_event, pevent->pm_escr_eventselect,
970191672Sbms	    pevent->pm_cccr_select, pevent->pm_is_ti_event);
971191672Sbms
972191672Sbms	/*
973191672Sbms	 * Some PMC events are 'thread independent'and therefore
974191672Sbms	 * cannot be used for process-private modes if HTT is being
975191672Sbms	 * used.
976191672Sbms	 */
977191672Sbms
978191672Sbms	if (P4_EVENT_IS_TI(pevent) &&
979191672Sbms	    PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
980191672Sbms	    p4_system_has_htt)
981191672Sbms		return (EINVAL);
982191672Sbms
983191672Sbms	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
984191672Sbms
985191672Sbms	found   = 0;
986191672Sbms
987191672Sbms	/* look for a suitable ESCR for this event */
988191672Sbms	for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
989191672Sbms		if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
990191672Sbms			break;	/* out of ESCRs */
991191672Sbms		/*
992191672Sbms		 * Check ESCR row disposition.
993191672Sbms		 *
994191672Sbms		 * If the request is for a system-mode PMC, then the
995191672Sbms		 * ESCR row should not be in process-virtual mode, and
996191672Sbms		 * should also be free on the current CPU.
997191672Sbms		 */
998191672Sbms
999191672Sbms		if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1000191672Sbms		    if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
1001191672Sbms			pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
1002191672Sbms			    continue;
1003191672Sbms		}
1004191672Sbms
1005191672Sbms		/*
1006191672Sbms		 * If the request is for a process-virtual PMC, and if
1007191672Sbms		 * HTT is not enabled, we can use an ESCR row that is
1008191672Sbms		 * either FREE or already in process mode.
1009191672Sbms		 *
101053541Sshin		 * If HTT is enabled, then we need to ensure that a
1011191672Sbms		 * given ESCR is never allocated to two PMCS that
1012191672Sbms		 * could run simultaneously on the two logical CPUs of
1013191672Sbms		 * a CPU package.  We ensure this be only allocating
1014191672Sbms		 * ESCRs from rows marked as 'FREE'.
1015148385Sume		 */
1016191672Sbms
1017191672Sbms		if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
1018191672Sbms			if (p4_system_has_htt) {
1019191672Sbms				if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
1020191672Sbms					continue;
1021191672Sbms			} else
1022191672Sbms				if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
1023191672Sbms					continue;
1024191672Sbms		}
1025191672Sbms
1026148385Sume		/*
1027148385Sume		 * We found a suitable ESCR for this event.  Now check if
1028148385Sume		 * this escr can work with the PMC at row-index 'ri'.
1029191672Sbms		 */
1030191672Sbms
103153541Sshin		for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
1032191672Sbms			if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
1033191672Sbms				found = 1;
1034191672Sbms				break;
1035191672Sbms			}
1036191672Sbms	}
1037191672Sbms
103853541Sshin	if (found == 0)
1039191672Sbms		return (ESRCH);
1040191672Sbms
1041191672Sbms	KASSERT((int) escr >= 0 && escr < P4_NESCR,
1042191672Sbms	    ("[p4,%d] illegal ESCR value %d", __LINE__, escr));
1043191672Sbms
1044191672Sbms	/* mark ESCR row mode */
1045191672Sbms	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1046191672Sbms		pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
1047191672Sbms		P4_ESCR_MARK_ROW_STANDALONE(escr);
1048191672Sbms	} else {
1049191672Sbms		KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
1050191672Sbms		    ("[p4,%d] escr[%d] already in use", __LINE__, escr));
1051191672Sbms		P4_ESCR_MARK_ROW_THREAD(escr);
1052191672Sbms	}
1053191672Sbms
1054191672Sbms	pm->pm_md.pm_p4.pm_p4_escrmsr   = p4_escrs[escr].pm_escr_msr;
105553541Sshin	pm->pm_md.pm_p4.pm_p4_escr      = escr;
1056191672Sbms
1057191672Sbms	cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
1058191672Sbms	escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
1059191672Sbms
1060191672Sbms	/* CCCR fields */
1061191672Sbms	if (caps & PMC_CAP_THRESHOLD)
1062191672Sbms		cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
1063191672Sbms		    P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
1064191672Sbms
1065191672Sbms	if (caps & PMC_CAP_EDGE)
1066191672Sbms		cccrvalue |= P4_CCCR_EDGE;
1067191672Sbms
1068191672Sbms	if (caps & PMC_CAP_INVERT)
1069191672Sbms		cccrvalue |= P4_CCCR_COMPLEMENT;
1070191672Sbms
1071191672Sbms	if (p4_system_has_htt)
1072191672Sbms		cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
1073148385Sume		    P4_CCCR_ACTIVE_THREAD_MASK;
1074191672Sbms	else			/* no HTT; thread field should be '11b' */
1075191672Sbms		cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
1076191672Sbms
1077191672Sbms	if (caps & PMC_CAP_CASCADE)
1078191672Sbms		cccrvalue |= P4_CCCR_CASCADE;
1079191672Sbms
1080191672Sbms	/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
1081191672Sbms	if (caps & PMC_CAP_INTERRUPT)
1082148385Sume		cccrvalue |= P4_CCCR_OVF_PMI_T0;
1083191672Sbms
1084191672Sbms	/* ESCR fields */
1085191672Sbms	if (caps & PMC_CAP_QUALIFIER)
1086191672Sbms		escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
1087191672Sbms		    P4_ESCR_EVENT_MASK_MASK;
1088191672Sbms	if (caps & PMC_CAP_TAGGING)
1089192923Sbms		escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1090192923Sbms		    P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
1091192923Sbms	if (caps & PMC_CAP_QUALIFIER)
1092191672Sbms		escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1093191672Sbms		    P4_ESCR_EVENT_MASK_MASK);
1094191672Sbms
1095192923Sbms	/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
1096191672Sbms	tflags = 0;
1097192923Sbms	if (caps & PMC_CAP_SYSTEM)
1098191672Sbms		tflags |= P4_ESCR_T0_OS;
1099191672Sbms	if (caps & PMC_CAP_USER)
1100191672Sbms		tflags |= P4_ESCR_T0_USR;
1101191672Sbms	if (tflags == 0)
1102191672Sbms		tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1103191672Sbms	escrvalue |= tflags;
1104192923Sbms
1105192923Sbms	pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
1106192923Sbms	pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
1107192923Sbms
1108192923Sbms	PMCDBG5(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
1109192923Sbms	    "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
1110192923Sbms	    cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
1111191672Sbms
1112191672Sbms	return (0);
1113192923Sbms}
1114192923Sbms
1115192923Sbms/*
1116192923Sbms * release a PMC.
1117192923Sbms */
1118192923Sbms
1119192923Sbmsstatic int
1120192923Sbmsp4_release_pmc(int cpu, int ri, struct pmc *pm)
1121192923Sbms{
1122192923Sbms	enum pmc_p4escr escr;
1123192923Sbms	struct p4_cpu *pc;
1124192547Sbms
1125192923Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
1126191672Sbms	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
1127192923Sbms
1128192923Sbms	escr = pm->pm_md.pm_p4.pm_p4_escr;
1129192923Sbms
1130192923Sbms	PMCDBG3(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
1131192547Sbms
1132192547Sbms	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1133192923Sbms		pc  = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1134192923Sbms
1135192923Sbms		KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL,
1136192923Sbms		    ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
1137192923Sbms
1138191672Sbms		P4_ESCR_UNMARK_ROW_STANDALONE(escr);
1139192923Sbms		KASSERT(pc->pc_escrs[escr] == ri,
1140191672Sbms		    ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
1141191672Sbms			escr, ri));
1142191672Sbms	        pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
1143191672Sbms	} else
1144191672Sbms		P4_ESCR_UNMARK_ROW_THREAD(escr);
1145191672Sbms
1146192547Sbms	return (0);
1147192547Sbms}
1148192547Sbms
1149192547Sbms/*
1150191672Sbms * Start a PMC
1151191672Sbms */
1152191672Sbms
1153194760Srwatsonstatic int
1154194760Srwatsonp4_start_pmc(int cpu, int ri)
1155194760Srwatson{
1156191672Sbms	int rc;
1157194760Srwatson	struct pmc *pm;
1158194760Srwatson	struct p4_cpu *pc;
1159194760Srwatson	struct p4pmc_descr *pd;
1160191672Sbms	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1161191672Sbms
1162191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1163191672Sbms	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1164192923Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
1165192923Sbms	    ("[p4,%d] illegal row-index %d", __LINE__, ri));
1166192923Sbms
1167192923Sbms	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1168192923Sbms	pm = pc->pc_p4pmcs[ri].phw_pmc;
1169192923Sbms	pd = &p4_pmcdesc[ri];
1170192923Sbms
1171191672Sbms	KASSERT(pm != NULL,
1172192547Sbms	    ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri));
1173233200Sjhb
1174191672Sbms	PMCDBG2(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
1175191672Sbms
1176191672Sbms	KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1177191672Sbms	    ("[p4,%d] wrong PMC class %d", __LINE__,
1178191672Sbms		pd->pm_descr.pd_class));
1179191672Sbms
1180191672Sbms	/* retrieve the desired CCCR/ESCR values from the PMC */
1181191672Sbms	cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
1182191672Sbms	escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
1183191672Sbms	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1184191672Sbms
1185191672Sbms	/* extract and zero the logical processor selection bits */
1186191672Sbms	cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
1187191672Sbms	escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1188191672Sbms	cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
118953541Sshin	escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1190191672Sbms
1191191672Sbms	if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */
119262587Sitojun		cccrtbits <<= 1;
1193191672Sbms		escrtbits >>= 2;
1194191672Sbms	}
1195151539Ssuz
1196191672Sbms	/* start system mode PMCs directly */
1197191672Sbms	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1198191672Sbms		wrmsr(escrmsr, escrvalue | escrtbits);
1199191672Sbms		wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
1200191672Sbms		return 0;
1201191672Sbms	}
120253541Sshin
1203191672Sbms	/*
1204191672Sbms	 * Thread mode PMCs
1205191672Sbms	 *
1206191672Sbms	 * On HTT machines, the same PMC could be scheduled on the
1207191672Sbms	 * same physical CPU twice (once for each logical CPU), for
1208191672Sbms	 * example, if two threads of a multi-threaded process get
1209191672Sbms	 * scheduled on the same CPU.
1210191672Sbms	 *
1211191672Sbms	 */
1212191672Sbms
1213191672Sbms	mtx_lock_spin(&pc->pc_mtx);
1214191672Sbms
1215191672Sbms	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1216191672Sbms	KASSERT(rc == 0 || rc == 1,
1217191341Srwatson	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1218191672Sbms		rc));
121953541Sshin
1220191672Sbms	if (rc == 0) {		/* 1st CPU and the non-HTT case */
1221233200Sjhb
1222192547Sbms		KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
1223191672Sbms		    ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
1224120941Sume			cpu, ri, pd->pm_cccr_msr));
1225192923Sbms
1226192923Sbms		/* write out the low 40 bits of the saved value to hardware */
1227192923Sbms		wrmsr(pd->pm_pmc_msr,
1228191672Sbms		    P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
1229191672Sbms
1230120941Sume	} else if (rc == 1) {		/* 2nd CPU */
1231191672Sbms
1232191672Sbms		/*
1233191672Sbms		 * Stop the PMC and retrieve the CCCR and ESCR values
1234191672Sbms		 * from their MSRs, and turn on the additional T[0/1]
1235191672Sbms		 * bits for the 2nd CPU.
1236191672Sbms		 */
1237191672Sbms
1238191672Sbms		cccrvalue = rdmsr(pd->pm_cccr_msr);
1239191672Sbms		wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1240191672Sbms
1241191672Sbms		/* check that the configuration bits read back match the PMC */
1242191672Sbms		KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
1243191672Sbms		    (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
1244191672Sbms		    ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
1245191672Sbms			"cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
1246191672Sbms			cccrvalue & P4_CCCR_Tx_MASK,
1247191672Sbms			pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
1248191672Sbms		KASSERT(cccrvalue & P4_CCCR_ENABLE,
1249191672Sbms		    ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
1250191672Sbms			__LINE__, rc, cpu, ri));
1251191672Sbms		KASSERT((cccrvalue & cccrtbits) == 0,
1252191672Sbms		    ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
1253191672Sbms		     "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
1254191672Sbms			cccrvalue, cccrtbits));
1255191672Sbms
1256191672Sbms		escrvalue = rdmsr(escrmsr);
1257191672Sbms
1258191672Sbms		KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
1259191672Sbms		    (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
1260191672Sbms		    ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
1261191672Sbms			"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
1262191672Sbms			escrvalue & P4_ESCR_Tx_MASK,
1263191672Sbms			pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
1264191672Sbms		KASSERT((escrvalue & escrtbits) == 0,
1265191672Sbms		    ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
1266191672Sbms		     "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
1267191672Sbms			rc, cpu, ri, escrmsr, escrvalue, escrtbits));
1268191672Sbms	}
1269191672Sbms
1270192923Sbms	/* Enable the correct bits for this CPU. */
1271192923Sbms	escrvalue |= escrtbits;
1272192923Sbms	cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
1273192923Sbms
1274191672Sbms	/* Save HW value at the time of starting hardware */
1275191672Sbms	P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
1276191672Sbms
1277191672Sbms	/* Program the ESCR and CCCR and start the PMC */
1278191672Sbms	wrmsr(escrmsr, escrvalue);
1279191672Sbms	wrmsr(pd->pm_cccr_msr, cccrvalue);
1280191672Sbms
1281191672Sbms	++rc;
1282191672Sbms	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1283191672Sbms
1284191672Sbms	mtx_unlock_spin(&pc->pc_mtx);
1285191672Sbms
1286191672Sbms	PMCDBG6(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
1287191672Sbms	    "escrmsr=0x%x escrvalue=0x%x", cpu, rc,
1288191672Sbms	    ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue);
1289192923Sbms	PMCDBG2(MDP,STA,2,"cccr_config=0x%x v=%jx",
1290191672Sbms	    cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
1291191672Sbms
1292191672Sbms	return (0);
1293191672Sbms}
1294191672Sbms
1295191672Sbms/*
1296191672Sbms * Stop a PMC.
1297191672Sbms */
1298191672Sbms
1299191672Sbmsstatic int
1300191672Sbmsp4_stop_pmc(int cpu, int ri)
1301191672Sbms{
1302191672Sbms	int rc;
1303191672Sbms	uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1304191672Sbms	struct pmc *pm;
1305191672Sbms	struct p4_cpu *pc;
1306191672Sbms	struct p4pmc_descr *pd;
1307191672Sbms	pmc_value_t tmp;
1308191672Sbms
1309191672Sbms	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1310191672Sbms	    ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1311191672Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
1312191672Sbms	    ("[p4,%d] illegal row index %d", __LINE__, ri));
1313195760Srwatson
1314191672Sbms	pd = &p4_pmcdesc[ri];
1315191672Sbms	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1316191672Sbms	pm = pc->pc_p4pmcs[ri].phw_pmc;
1317191672Sbms
1318191672Sbms	KASSERT(pm != NULL,
1319195760Srwatson	    ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
1320191672Sbms
1321191672Sbms	PMCDBG2(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
1322191672Sbms
1323191672Sbms	if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1324191672Sbms		wrmsr(pd->pm_cccr_msr,
1325191672Sbms		    pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
1326191672Sbms		return (0);
1327191672Sbms	}
1328191672Sbms
1329191672Sbms	/*
1330191672Sbms	 * Thread mode PMCs.
1331191672Sbms	 *
1332191672Sbms	 * On HTT machines, this PMC may be in use by two threads
1333191672Sbms	 * running on two logical CPUS.  Thus we look at the
1334230076Sjhb	 * 'runcount' field and only turn off the appropriate TO/T1
1335230076Sjhb	 * bits (and keep the PMC running) if two logical CPUs were
1336191672Sbms	 * using the PMC.
1337191672Sbms	 *
1338191672Sbms	 */
1339191672Sbms
1340191672Sbms	/* bits to mask */
1341191672Sbms	cccrtbits = P4_CCCR_OVF_PMI_T0;
1342191672Sbms	escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
1343191672Sbms	if (P4_CPU_IS_HTT_SECONDARY(cpu)) {
1344191672Sbms		cccrtbits <<= 1;
1345191672Sbms		escrtbits >>= 2;
1346191672Sbms	}
1347191672Sbms
1348191672Sbms	mtx_lock_spin(&pc->pc_mtx);
1349191672Sbms
1350191672Sbms	rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1351191672Sbms
1352191672Sbms	KASSERT(rc == 2 || rc == 1,
1353191672Sbms	    ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1354191672Sbms		rc));
1355191672Sbms
1356191672Sbms	--rc;
1357191672Sbms
1358191672Sbms	P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1359191672Sbms
1360191672Sbms	/* Stop this PMC */
1361191672Sbms	cccrvalue = rdmsr(pd->pm_cccr_msr);
1362191672Sbms	wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1363191672Sbms
1364191672Sbms	escrmsr   = pm->pm_md.pm_p4.pm_p4_escrmsr;
1365191672Sbms	escrvalue = rdmsr(escrmsr);
1366191672Sbms
1367191672Sbms	/* The current CPU should be running on this PMC */
1368191672Sbms	KASSERT(escrvalue & escrtbits,
1369191672Sbms	    ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
1370191672Sbms		"escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
1371191672Sbms		escrvalue, escrtbits));
1372191672Sbms	KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
1373191672Sbms	    (cccrvalue & cccrtbits),
1374191672Sbms	    ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
1375191672Sbms		"tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
1376191672Sbms
1377191672Sbms	/* get the current hardware reading */
1378191672Sbms	tmp = rdmsr(pd->pm_pmc_msr);
1379191672Sbms
1380191672Sbms	if (rc == 1) {		/* need to keep the PMC running */
1381191672Sbms		escrvalue &= ~escrtbits;
1382191672Sbms		cccrvalue &= ~cccrtbits;
1383191672Sbms		wrmsr(escrmsr, escrvalue);
1384191672Sbms		wrmsr(pd->pm_cccr_msr, cccrvalue);
1385191672Sbms	}
1386191672Sbms
1387191672Sbms	mtx_unlock_spin(&pc->pc_mtx);
1388191672Sbms
1389191672Sbms	PMCDBG5(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
1390191672Sbms	    "escrval=0x%x", cpu, rc, ri, escrmsr, escrvalue);
1391191672Sbms	PMCDBG2(MDP,STO,2, "cccrval=0x%x v=%jx", cccrvalue, tmp);
1392191672Sbms
1393191672Sbms	if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
1394191672Sbms		tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
1395191672Sbms	else
1396191672Sbms		tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
1397191672Sbms
1398233200Sjhb	P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
1399230076Sjhb
1400191672Sbms	return 0;
1401191672Sbms}
140253541Sshin
1403191672Sbms/*
1404191672Sbms * Handle an interrupt.
1405191672Sbms *
1406230076Sjhb * The hardware sets the CCCR_OVF whenever a counter overflow occurs,
1407191672Sbms * so the handler examines all the 18 CCCR registers, processing the
1408191672Sbms * counters that have overflowed.
1409191672Sbms *
1410191672Sbms * On HTT machines, the CCCR register is shared and will interrupt
1411191672Sbms * both logical processors if so configured.  Thus multiple logical
1412191672Sbms * CPUs could enter the NMI service routine at the same time.  These
1413191672Sbms * will get serialized using a per-cpu spinlock dedicated for use in
1414233200Sjhb * the NMI handler.
141553541Sshin */
1416230076Sjhb
1417230076Sjhbstatic int
1418230076Sjhbp4_intr(int cpu, struct trapframe *tf)
1419230076Sjhb{
1420230076Sjhb	uint32_t cccrval, ovf_mask, ovf_partner;
1421230076Sjhb	int did_interrupt, error, ri;
1422230076Sjhb	struct p4_cpu *pc;
1423230076Sjhb	struct pmc *pm;
1424230076Sjhb	pmc_value_t v;
1425230076Sjhb
1426230076Sjhb	PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1427230076Sjhb	    TRAPF_USERMODE(tf));
1428230076Sjhb
1429230076Sjhb	pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1430230076Sjhb
1431230076Sjhb	ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1432230076Sjhb	    P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
1433230076Sjhb	ovf_mask |= P4_CCCR_OVF;
1434230076Sjhb	if (p4_system_has_htt)
1435191672Sbms		ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1436191672Sbms		    P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
1437191672Sbms	else
1438191672Sbms		ovf_partner = 0;
1439191672Sbms	did_interrupt = 0;
1440191672Sbms
1441191672Sbms	if (p4_system_has_htt)
1442191672Sbms		P4_PCPU_ACQ_INTR_SPINLOCK(pc);
1443191672Sbms
1444191672Sbms	/*
1445191672Sbms	 * Loop through all CCCRs, looking for ones that have
1446191672Sbms	 * interrupted this CPU.
144753541Sshin	 */
1448230076Sjhb	for (ri = 0; ri < P4_NPMCS; ri++) {
144953541Sshin
1450191672Sbms		/*
1451191672Sbms		 * Check if our partner logical CPU has already marked
1452191672Sbms		 * this PMC has having interrupted it.  If so, reset
1453191672Sbms		 * the flag and process the interrupt, but leave the
1454191672Sbms		 * hardware alone.
1455191672Sbms		 */
1456191672Sbms		if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
1457191672Sbms			P4_PCPU_SET_INTRFLAG(pc,ri,0);
1458191672Sbms			did_interrupt = 1;
1459191672Sbms
1460191672Sbms			/*
1461191672Sbms			 * Ignore de-configured or stopped PMCs.
1462230076Sjhb			 * Ignore PMCs not in sampling mode.
1463191672Sbms			 */
1464191672Sbms			pm = pc->pc_p4pmcs[ri].phw_pmc;
1465191672Sbms			if (pm == NULL ||
1466191672Sbms			    pm->pm_state != PMC_STATE_RUNNING ||
1467191672Sbms			    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1468191672Sbms				continue;
1469191672Sbms			}
1470191672Sbms			(void) pmc_process_interrupt(cpu, PMC_HR, pm, tf,
1471191672Sbms			    TRAPF_USERMODE(tf));
1472191672Sbms			continue;
1473191672Sbms		}
1474191672Sbms
1475191672Sbms		/*
1476191672Sbms		 * Fresh interrupt.  Look for the CCCR_OVF bit
1477191672Sbms		 * and the OVF_Tx bit for this logical
1478191672Sbms		 * processor being set.
1479191672Sbms		 */
1480191672Sbms		cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri);
1481191672Sbms
1482191672Sbms		if ((cccrval & ovf_mask) != ovf_mask)
1483191672Sbms			continue;
1484191672Sbms
148553541Sshin		/*
1486191672Sbms		 * If the other logical CPU would also have been
1487191672Sbms		 * interrupted due to the PMC being shared, record
1488191672Sbms		 * this fact in the per-cpu saved interrupt flag
1489230076Sjhb		 * bitmask.
1490230076Sjhb		 */
1491191672Sbms		if (p4_system_has_htt && (cccrval & ovf_partner))
1492191672Sbms			P4_PCPU_SET_INTRFLAG(pc, ri, 1);
1493191672Sbms
1494191672Sbms		v = rdmsr(P4_PERFCTR_MSR_FIRST + ri);
1495191672Sbms
1496191672Sbms		PMCDBG2(MDP,INT, 2, "ri=%d v=%jx", ri, v);
1497191672Sbms
1498191672Sbms		/* Stop the counter, and reset the overflow  bit */
1499120941Sume		cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
1500191672Sbms		wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval);
1501191672Sbms
1502191672Sbms		did_interrupt = 1;
1503191672Sbms
1504191672Sbms		/*
1505191672Sbms		 * Ignore de-configured or stopped PMCs.  Ignore PMCs
1506191672Sbms		 * not in sampling mode.
1507191672Sbms		 */
1508191672Sbms		pm = pc->pc_p4pmcs[ri].phw_pmc;
1509191672Sbms
1510191672Sbms		if (pm == NULL ||
1511191672Sbms		    pm->pm_state != PMC_STATE_RUNNING ||
1512191672Sbms		    !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1513191672Sbms			continue;
1514191672Sbms		}
1515191672Sbms
1516191672Sbms		/*
1517191672Sbms		 * Process the interrupt.  Re-enable the PMC if
1518191672Sbms		 * processing was successful.
1519191672Sbms		 */
1520191672Sbms		error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
1521191672Sbms		    TRAPF_USERMODE(tf));
1522191672Sbms
1523191672Sbms		/*
1524191672Sbms		 * Only the first processor executing the NMI handler
1525191672Sbms		 * in a HTT pair will restart a PMC, and that too
1526191672Sbms		 * only if there were no errors.
1527191672Sbms		 */
1528191672Sbms		v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
1529191672Sbms			pm->pm_sc.pm_reloadcount);
1530191672Sbms		wrmsr(P4_PERFCTR_MSR_FIRST + ri, v);
1531191672Sbms		if (error == 0)
1532191672Sbms			wrmsr(P4_CCCR_MSR_FIRST + ri,
1533191672Sbms			    cccrval | P4_CCCR_ENABLE);
1534191672Sbms	}
1535191672Sbms
1536191672Sbms	/* allow the other CPU to proceed */
1537191672Sbms	if (p4_system_has_htt)
1538191672Sbms		P4_PCPU_REL_INTR_SPINLOCK(pc);
1539191672Sbms
1540191672Sbms	/*
1541191672Sbms	 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
1542191672Sbms	 * masked when a PMC interrupts the CPU.  We need to unmask
1543191672Sbms	 * the interrupt source explicitly.
1544191672Sbms	 */
1545191672Sbms
1546191672Sbms	if (did_interrupt)
1547191672Sbms		lapic_reenable_pmc();
1548191672Sbms
1549191672Sbms	atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
1550191672Sbms	    &pmc_stats.pm_intr_ignored, 1);
1551191672Sbms
1552191672Sbms	return (did_interrupt);
1553191672Sbms}
1554191672Sbms
1555191672Sbms/*
1556191672Sbms * Describe a CPU's PMC state.
1557191672Sbms */
1558191672Sbms
155953541Sshinstatic int
1560191672Sbmsp4_describe(int cpu, int ri, struct pmc_info *pi,
1561191672Sbms    struct pmc **ppmc)
1562191672Sbms{
1563191672Sbms	int error;
156462587Sitojun	size_t copied;
1565191672Sbms	const struct p4pmc_descr *pd;
1566191672Sbms
156753541Sshin	KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1568191672Sbms	    ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1569200871Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
1570200871Sbms	    ("[p4,%d] row-index %d out of range", __LINE__, ri));
1571191672Sbms
1572191672Sbms	PMCDBG2(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
1573191672Sbms
1574191672Sbms	if (P4_CPU_IS_HTT_SECONDARY(cpu))
1575191672Sbms		return (EINVAL);
1576191672Sbms
1577191672Sbms	pd  = &p4_pmcdesc[ri];
1578191672Sbms
1579191672Sbms	if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
1580191672Sbms	    PMC_NAME_MAX, &copied)) != 0)
1581191672Sbms		return (error);
1582191672Sbms
1583191672Sbms	pi->pm_class = pd->pm_descr.pd_class;
1584191672Sbms
1585191672Sbms	if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) {
1586191672Sbms		pi->pm_enabled = TRUE;
1587191672Sbms		*ppmc          = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc;
1588191672Sbms	} else {
1589191672Sbms		pi->pm_enabled = FALSE;
1590191672Sbms		*ppmc          = NULL;
1591191672Sbms	}
1592191672Sbms
1593191672Sbms	return (0);
1594191672Sbms}
1595191672Sbms
1596191672Sbms/*
1597191672Sbms * Get MSR# for use with RDPMC.
159853541Sshin */
1599191672Sbms
1600191672Sbmsstatic int
1601191672Sbmsp4_get_msr(int ri, uint32_t *msr)
1602191672Sbms{
1603191672Sbms	KASSERT(ri >= 0 && ri < P4_NPMCS,
1604191672Sbms	    ("[p4,%d] ri %d out of range", __LINE__, ri));
1605191672Sbms
1606191672Sbms	*msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
1607191672Sbms
1608191672Sbms	PMCDBG2(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
1609191672Sbms
1610191672Sbms	return 0;
1611191672Sbms}
1612191672Sbms
1613191672Sbms
1614191672Sbmsint
1615191672Sbmspmc_p4_initialize(struct pmc_mdep *md, int ncpus)
1616191672Sbms{
1617191672Sbms	struct pmc_classdep *pcd;
1618191672Sbms	struct p4_event_descr *pe;
1619191672Sbms
1620191672Sbms	KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__));
1621191672Sbms	KASSERT(cpu_vendor_id == CPU_VENDOR_INTEL,
1622191672Sbms	    ("[p4,%d] Initializing non-intel processor", __LINE__));
1623191672Sbms
1624191672Sbms	PMCDBG0(MDP,INI,1, "p4-initialize");
1625191672Sbms
1626191672Sbms	/* Allocate space for pointers to per-cpu descriptors. */
1627191672Sbms	p4_pcpu = malloc(sizeof(*p4_pcpu) * ncpus, M_PMC, M_ZERO | M_WAITOK);
1628191672Sbms
1629192562Sbms	/* Fill in the class dependent descriptor. */
1630191672Sbms	pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4];
1631191672Sbms
1632191672Sbms	switch (md->pmd_cputype) {
1633191672Sbms	case PMC_CPU_INTEL_PIV:
1634191672Sbms
1635191672Sbms		pcd->pcd_caps		= P4_PMC_CAPS;
1636191672Sbms		pcd->pcd_class		= PMC_CLASS_P4;
163753541Sshin		pcd->pcd_num		= P4_NPMCS;
1638191672Sbms		pcd->pcd_ri		= md->pmd_npmc;
1639191672Sbms		pcd->pcd_width		= 40;
164062587Sitojun
1641192923Sbms		pcd->pcd_allocate_pmc	= p4_allocate_pmc;
1642191672Sbms		pcd->pcd_config_pmc	= p4_config_pmc;
1643192562Sbms		pcd->pcd_describe	= p4_describe;
164453541Sshin		pcd->pcd_get_config	= p4_get_config;
164562587Sitojun		pcd->pcd_get_msr	= p4_get_msr;
1646192562Sbms		pcd->pcd_pcpu_fini 	= p4_pcpu_fini;
1647192562Sbms		pcd->pcd_pcpu_init    	= p4_pcpu_init;
1648192562Sbms		pcd->pcd_read_pmc	= p4_read_pmc;
1649191672Sbms		pcd->pcd_release_pmc	= p4_release_pmc;
165053541Sshin		pcd->pcd_start_pmc	= p4_start_pmc;
165153541Sshin		pcd->pcd_stop_pmc	= p4_stop_pmc;
1652191672Sbms		pcd->pcd_write_pmc	= p4_write_pmc;
1653191672Sbms
1654191672Sbms		md->pmd_pcpu_fini	= NULL;
1655191672Sbms		md->pmd_pcpu_init	= NULL;
165653541Sshin		md->pmd_intr	    	= p4_intr;
1657191672Sbms		md->pmd_npmc	       += P4_NPMCS;
165853541Sshin
1659191672Sbms		/* model specific configuration */
1660191672Sbms		if ((cpu_id & 0xFFF) < 0xF27) {
1661230076Sjhb
166253541Sshin			/*
1663191672Sbms			 * On P4 and Xeon with CPUID < (Family 15,
1664191672Sbms			 * Model 2, Stepping 7), only one ESCR is
1665191672Sbms			 * available for the IOQ_ALLOCATION event.
1666191672Sbms			 */
1667191672Sbms
1668191672Sbms			pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
166953541Sshin			pe->pm_escrs[1] = P4_ESCR_NONE;
1670191672Sbms		}
1671191672Sbms
167253541Sshin		break;
1673191672Sbms
1674191672Sbms	default:
1675191672Sbms		KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
167653541Sshin		return ENOSYS;
167753541Sshin	}
1678191672Sbms
1679191672Sbms	return (0);
1680191672Sbms}
1681191672Sbms
1682233200Sjhbvoid
1683191672Sbmspmc_p4_finalize(struct pmc_mdep *md)
1684191672Sbms{
1685191672Sbms#if	defined(INVARIANTS)
1686191672Sbms	int i, ncpus;
1687191672Sbms#endif
1688191672Sbms
1689191672Sbms	KASSERT(p4_pcpu != NULL,
1690191672Sbms	    ("[p4,%d] NULL p4_pcpu", __LINE__));
1691191672Sbms
1692191672Sbms#if	defined(INVARIANTS)
1693191672Sbms	ncpus = pmc_cpu_max();
1694191672Sbms	for (i = 0; i < ncpus; i++)
1695191672Sbms		KASSERT(p4_pcpu[i] == NULL, ("[p4,%d] non-null pcpu %d",
1696191672Sbms		    __LINE__, i));
1697191672Sbms#endif
1698191672Sbms
1699191672Sbms	free(p4_pcpu, M_PMC);
1700191672Sbms	p4_pcpu = NULL;
1701230076Sjhb}
1702230076Sjhb