Deleted Added
full compact
hwpmc_piv.c (184214) hwpmc_piv.c (184802)
1/*-
2 * Copyright (c) 2003-2007 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2003-2007 Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 184214 2008-10-23 20:26:15Z des $");
32__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 184802 2008-11-09 17:37:54Z jkoshy $");
33
34#include <sys/param.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/pmc.h>
38#include <sys/pmckern.h>
39#include <sys/smp.h>
40#include <sys/systm.h>
41
42#include <machine/cpu.h>
43#include <machine/cpufunc.h>
44#include <machine/md_var.h>
45#include <machine/specialreg.h>
46
47/*
48 * PENTIUM 4 SUPPORT
49 *
50 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
51 * respectively. Each PMC comprises of two model specific registers:
52 * a counter configuration control register (CCCR) and a counter
53 * register that holds the actual event counts.
54 *
55 * Configuring an event requires the use of one of 45 event selection
56 * control registers (ESCR). Events are associated with specific
57 * ESCRs. Each PMC group has a set of ESCRs it can use.
58 *
59 * - The BPU counter group (4 PMCs) can use the 16 ESCRs:
60 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
61 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
62 *
63 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
64 * TC_ESCR{0,1}, TBPU_ESCR{0,1}.
65 *
66 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
67 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
68 * DAC_ESCR{0,1}.
69 *
70 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
71 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
72 *
73 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
74 * present) of a counter group. Odd-numbers ESCRs can be used with
75 * counters 2, 3 and 5 (if present) of a counter group. The
76 * 'p4_escrs[]' table describes these restrictions in a form that
77 * function 'p4_allocate()' uses for making allocation decisions.
78 *
79 * SYSTEM-MODE AND THREAD-MODE ALLOCATION
80 *
81 * In addition to remembering the state of PMC rows
82 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
83 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC
84 * on a CPU we cannot allocate this to a thread-mode PMC. On a
85 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each
86 * CPU is tracked by the pc_escrs[] array.
87 *
88 * Each system-mode PMC that is using an ESCR records its row-index in
89 * the appropriate entry and system-mode allocation attempts check
90 * that an ESCR is available using this array. Process-mode PMCs do
91 * not use the pc_escrs[] array, since ESCR row itself would have been
92 * marked as in 'THREAD' mode.
93 *
94 * HYPERTHREADING SUPPORT
95 *
96 * When HTT is enabled, the FreeBSD kernel treats the two 'logical'
97 * cpus as independent CPUs and can schedule kernel threads on them
98 * independently. However, the two logical CPUs share the same set of
99 * PMC resources. We need to ensure that:
100 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
101 * and,
102 * - Threads of multi-threaded processes that get scheduled on the same
103 * physical CPU are handled correctly.
104 *
105 * HTT Detection
106 *
107 * Not all HTT capable systems will have HTT enabled. We detect the
108 * presence of HTT by detecting if 'p4_init()' was called for a secondary
109 * CPU in a HTT pair.
110 *
111 * Note that hwpmc(4) cannot currently deal with a change in HTT status once
112 * loaded.
113 *
114 * Handling HTT READ / WRITE / START / STOP
115 *
116 * PMC resources are shared across the CPUs in an HTT pair. We
117 * designate the lower numbered CPU in a HTT pair as the 'primary'
118 * CPU. In each primary CPU's state we keep track of a 'runcount'
119 * which reflects the number of PMC-using processes that have been
120 * scheduled on its secondary CPU. Process-mode PMC operations will
121 * actually 'start' or 'stop' hardware only if these are the first or
122 * last processes respectively to use the hardware. PMC values
123 * written by a 'write' operation are saved and are transferred to
124 * hardware at PMC 'start' time if the runcount is 0. If the runcount
125 * is greater than 0 at the time of a 'start' operation, we keep track
126 * of the actual hardware value at the time of the 'start' operation
127 * and use this to adjust the final readings at PMC 'stop' or 'read'
128 * time.
129 *
130 * Execution sequences:
131 *
132 * Case 1: CPUx +...- (no overlap)
133 * CPUy +...-
134 * RC 0 1 0 1 0
135 *
136 * Case 2: CPUx +........- (partial overlap)
137 * CPUy +........-
138 * RC 0 1 2 1 0
139 *
140 * Case 3: CPUx +..............- (fully overlapped)
141 * CPUy +.....-
142 * RC 0 1 2 1 0
143 *
144 * Key:
145 * 'CPU[xy]' : one of the two logical processors on a HTT CPU.
146 * 'RC' : run count (#threads per physical core).
147 * '+' : point in time when a thread is put on a CPU.
148 * '-' : point in time where a thread is taken off a CPU.
149 *
150 * Handling HTT CONFIG
151 *
152 * Different processes attached to the same PMC may get scheduled on
153 * the two logical processors in the package. We keep track of config
154 * and de-config operations using the CFGFLAGS fields of the per-physical
155 * cpu state.
33
34#include <sys/param.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/pmc.h>
38#include <sys/pmckern.h>
39#include <sys/smp.h>
40#include <sys/systm.h>
41
42#include <machine/cpu.h>
43#include <machine/cpufunc.h>
44#include <machine/md_var.h>
45#include <machine/specialreg.h>
46
47/*
48 * PENTIUM 4 SUPPORT
49 *
50 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
51 * respectively. Each PMC comprises of two model specific registers:
52 * a counter configuration control register (CCCR) and a counter
53 * register that holds the actual event counts.
54 *
55 * Configuring an event requires the use of one of 45 event selection
56 * control registers (ESCR). Events are associated with specific
57 * ESCRs. Each PMC group has a set of ESCRs it can use.
58 *
59 * - The BPU counter group (4 PMCs) can use the 16 ESCRs:
60 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
61 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
62 *
63 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
64 * TC_ESCR{0,1}, TBPU_ESCR{0,1}.
65 *
66 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
67 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
68 * DAC_ESCR{0,1}.
69 *
70 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
71 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
72 *
73 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
74 * present) of a counter group. Odd-numbers ESCRs can be used with
75 * counters 2, 3 and 5 (if present) of a counter group. The
76 * 'p4_escrs[]' table describes these restrictions in a form that
77 * function 'p4_allocate()' uses for making allocation decisions.
78 *
79 * SYSTEM-MODE AND THREAD-MODE ALLOCATION
80 *
81 * In addition to remembering the state of PMC rows
82 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
83 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC
84 * on a CPU we cannot allocate this to a thread-mode PMC. On a
85 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each
86 * CPU is tracked by the pc_escrs[] array.
87 *
88 * Each system-mode PMC that is using an ESCR records its row-index in
89 * the appropriate entry and system-mode allocation attempts check
90 * that an ESCR is available using this array. Process-mode PMCs do
91 * not use the pc_escrs[] array, since ESCR row itself would have been
92 * marked as in 'THREAD' mode.
93 *
94 * HYPERTHREADING SUPPORT
95 *
96 * When HTT is enabled, the FreeBSD kernel treats the two 'logical'
97 * cpus as independent CPUs and can schedule kernel threads on them
98 * independently. However, the two logical CPUs share the same set of
99 * PMC resources. We need to ensure that:
100 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
101 * and,
102 * - Threads of multi-threaded processes that get scheduled on the same
103 * physical CPU are handled correctly.
104 *
105 * HTT Detection
106 *
107 * Not all HTT capable systems will have HTT enabled. We detect the
108 * presence of HTT by detecting if 'p4_init()' was called for a secondary
109 * CPU in a HTT pair.
110 *
111 * Note that hwpmc(4) cannot currently deal with a change in HTT status once
112 * loaded.
113 *
114 * Handling HTT READ / WRITE / START / STOP
115 *
116 * PMC resources are shared across the CPUs in an HTT pair. We
117 * designate the lower numbered CPU in a HTT pair as the 'primary'
118 * CPU. In each primary CPU's state we keep track of a 'runcount'
119 * which reflects the number of PMC-using processes that have been
120 * scheduled on its secondary CPU. Process-mode PMC operations will
121 * actually 'start' or 'stop' hardware only if these are the first or
122 * last processes respectively to use the hardware. PMC values
123 * written by a 'write' operation are saved and are transferred to
124 * hardware at PMC 'start' time if the runcount is 0. If the runcount
125 * is greater than 0 at the time of a 'start' operation, we keep track
126 * of the actual hardware value at the time of the 'start' operation
127 * and use this to adjust the final readings at PMC 'stop' or 'read'
128 * time.
129 *
130 * Execution sequences:
131 *
132 * Case 1: CPUx +...- (no overlap)
133 * CPUy +...-
134 * RC 0 1 0 1 0
135 *
136 * Case 2: CPUx +........- (partial overlap)
137 * CPUy +........-
138 * RC 0 1 2 1 0
139 *
140 * Case 3: CPUx +..............- (fully overlapped)
141 * CPUy +.....-
142 * RC 0 1 2 1 0
143 *
144 * Key:
145 * 'CPU[xy]' : one of the two logical processors on a HTT CPU.
146 * 'RC' : run count (#threads per physical core).
147 * '+' : point in time when a thread is put on a CPU.
148 * '-' : point in time where a thread is taken off a CPU.
149 *
150 * Handling HTT CONFIG
151 *
152 * Different processes attached to the same PMC may get scheduled on
153 * the two logical processors in the package. We keep track of config
154 * and de-config operations using the CFGFLAGS fields of the per-physical
155 * cpu state.
156 *
157 * Handling TSCs
158 *
159 * TSCs are architectural state and each CPU in a HTT pair has its own
160 * TSC register.
161 */
162
163#define P4_PMCS() \
164 P4_PMC(BPU_COUNTER0) \
165 P4_PMC(BPU_COUNTER1) \
166 P4_PMC(BPU_COUNTER2) \
167 P4_PMC(BPU_COUNTER3) \
168 P4_PMC(MS_COUNTER0) \
169 P4_PMC(MS_COUNTER1) \
170 P4_PMC(MS_COUNTER2) \
171 P4_PMC(MS_COUNTER3) \
172 P4_PMC(FLAME_COUNTER0) \
173 P4_PMC(FLAME_COUNTER1) \
174 P4_PMC(FLAME_COUNTER2) \
175 P4_PMC(FLAME_COUNTER3) \
176 P4_PMC(IQ_COUNTER0) \
177 P4_PMC(IQ_COUNTER1) \
178 P4_PMC(IQ_COUNTER2) \
179 P4_PMC(IQ_COUNTER3) \
180 P4_PMC(IQ_COUNTER4) \
181 P4_PMC(IQ_COUNTER5) \
182 P4_PMC(NONE)
183
184enum pmc_p4pmc {
185#undef P4_PMC
186#define P4_PMC(N) P4_PMC_##N ,
187 P4_PMCS()
188};
189
190/*
191 * P4 ESCR descriptors
192 */
193
194#define P4_ESCRS() \
195 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \
196 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \
197 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
198 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
199 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
200 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
201 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
202 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
203 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
204 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
205 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \
206 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \
207 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \
208 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \
209 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
210 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
211 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
212 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
213 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
214 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
215 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \
216 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \
217 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \
218 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \
219 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
220 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
221 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
222 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \
223 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
224 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
225 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \
226 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \
227 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \
228 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \
229 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \
230 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \
231 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \
232 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \
233 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \
234 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
235 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
236 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
237 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
238 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
239 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
240 P4_ESCR(NONE, ~0, NONE, NONE, NONE)
241
242enum pmc_p4escr {
243#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N ,
244 P4_ESCRS()
245#undef P4_ESCR
246};
247
248struct pmc_p4escr_descr {
249 const char pm_escrname[PMC_NAME_MAX];
250 u_short pm_escr_msr;
251 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
252};
253
254static struct pmc_p4escr_descr p4_escrs[] =
255{
256#define P4_ESCR(N, MSR, P1, P2, P3) \
257 { \
258 .pm_escrname = #N, \
259 .pm_escr_msr = (MSR), \
260 .pm_pmcs = \
261 { \
262 P4_PMC_##P1, \
263 P4_PMC_##P2, \
264 P4_PMC_##P3 \
265 } \
266 } ,
267
268 P4_ESCRS()
269
270#undef P4_ESCR
271};
272
273/*
274 * P4 Event descriptor
275 */
276
277struct p4_event_descr {
278 const enum pmc_event pm_event;
279 const uint32_t pm_escr_eventselect;
280 const uint32_t pm_cccr_select;
281 const char pm_is_ti_event;
282 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT];
283};
284
285static struct p4_event_descr p4_events[] = {
286
287#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \
288 { \
289 .pm_event = PMC_EV_P4_##NAME, \
290 .pm_escr_eventselect = (ESCREVENTSEL), \
291 .pm_cccr_select = (CCCRSEL), \
292 .pm_is_ti_event = (TI_EVENT), \
293 .pm_escrs = \
294 { \
295 P4_ESCR_##ESCR0, \
296 P4_ESCR_##ESCR1 \
297 } \
298 }
299
300P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1),
301P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1),
302P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1),
303P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1),
304P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
305P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
306P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
307P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1),
308P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1),
309P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1),
310P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
311P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE),
312P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1),
313P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE),
314P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE),
315 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
316P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
317P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
318P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
319P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
320P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
321P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
322P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
323P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
324P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
325P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
326P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
327P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
328P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
329 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
330P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
331P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1),
332P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1),
333P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
334P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
335P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
336P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
337P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
338P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
339P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
340P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
341P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
342P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1),
343P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
344P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
345P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
346P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3)
347
348#undef P4_EVDESCR
349};
350
351#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
352
353#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
354
355/*
356 * P4 PMC descriptors
357 */
358
359struct p4pmc_descr {
360 struct pmc_descr pm_descr; /* common information */
361 enum pmc_p4pmc pm_pmcnum; /* PMC number */
362 uint32_t pm_pmc_msr; /* PERFCTR MSR address */
363 uint32_t pm_cccr_msr; /* CCCR MSR address */
364};
365
366static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
156 */
157
158#define P4_PMCS() \
159 P4_PMC(BPU_COUNTER0) \
160 P4_PMC(BPU_COUNTER1) \
161 P4_PMC(BPU_COUNTER2) \
162 P4_PMC(BPU_COUNTER3) \
163 P4_PMC(MS_COUNTER0) \
164 P4_PMC(MS_COUNTER1) \
165 P4_PMC(MS_COUNTER2) \
166 P4_PMC(MS_COUNTER3) \
167 P4_PMC(FLAME_COUNTER0) \
168 P4_PMC(FLAME_COUNTER1) \
169 P4_PMC(FLAME_COUNTER2) \
170 P4_PMC(FLAME_COUNTER3) \
171 P4_PMC(IQ_COUNTER0) \
172 P4_PMC(IQ_COUNTER1) \
173 P4_PMC(IQ_COUNTER2) \
174 P4_PMC(IQ_COUNTER3) \
175 P4_PMC(IQ_COUNTER4) \
176 P4_PMC(IQ_COUNTER5) \
177 P4_PMC(NONE)
178
179enum pmc_p4pmc {
180#undef P4_PMC
181#define P4_PMC(N) P4_PMC_##N ,
182 P4_PMCS()
183};
184
185/*
186 * P4 ESCR descriptors
187 */
188
189#define P4_ESCRS() \
190 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \
191 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \
192 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
193 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
194 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
195 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
196 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
197 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
198 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
199 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
200 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \
201 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \
202 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \
203 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \
204 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
205 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
206 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
207 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
208 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
209 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
210 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \
211 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \
212 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \
213 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \
214 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
215 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
216 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
217 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \
218 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
219 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
220 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \
221 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \
222 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \
223 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \
224 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \
225 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \
226 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \
227 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \
228 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \
229 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
230 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
231 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
232 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
233 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
234 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
235 P4_ESCR(NONE, ~0, NONE, NONE, NONE)
236
237enum pmc_p4escr {
238#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N ,
239 P4_ESCRS()
240#undef P4_ESCR
241};
242
243struct pmc_p4escr_descr {
244 const char pm_escrname[PMC_NAME_MAX];
245 u_short pm_escr_msr;
246 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
247};
248
249static struct pmc_p4escr_descr p4_escrs[] =
250{
251#define P4_ESCR(N, MSR, P1, P2, P3) \
252 { \
253 .pm_escrname = #N, \
254 .pm_escr_msr = (MSR), \
255 .pm_pmcs = \
256 { \
257 P4_PMC_##P1, \
258 P4_PMC_##P2, \
259 P4_PMC_##P3 \
260 } \
261 } ,
262
263 P4_ESCRS()
264
265#undef P4_ESCR
266};
267
268/*
269 * P4 Event descriptor
270 */
271
272struct p4_event_descr {
273 const enum pmc_event pm_event;
274 const uint32_t pm_escr_eventselect;
275 const uint32_t pm_cccr_select;
276 const char pm_is_ti_event;
277 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT];
278};
279
280static struct p4_event_descr p4_events[] = {
281
282#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \
283 { \
284 .pm_event = PMC_EV_P4_##NAME, \
285 .pm_escr_eventselect = (ESCREVENTSEL), \
286 .pm_cccr_select = (CCCRSEL), \
287 .pm_is_ti_event = (TI_EVENT), \
288 .pm_escrs = \
289 { \
290 P4_ESCR_##ESCR0, \
291 P4_ESCR_##ESCR1 \
292 } \
293 }
294
295P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1),
296P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1),
297P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1),
298P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1),
299P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
300P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
301P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
302P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1),
303P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1),
304P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1),
305P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
306P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE),
307P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1),
308P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE),
309P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE),
310 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
311P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
312P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
313P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
314P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
315P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
316P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
317P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
318P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
319P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
320P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
321P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
322P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
323P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
324 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
325P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
326P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1),
327P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1),
328P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
329P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
330P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
331P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
332P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
333P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
334P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
335P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
336P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
337P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1),
338P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
339P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
340P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
341P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3)
342
343#undef P4_EVDESCR
344};
345
346#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
347
348#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
349
350/*
351 * P4 PMC descriptors
352 */
353
354struct p4pmc_descr {
355 struct pmc_descr pm_descr; /* common information */
356 enum pmc_p4pmc pm_pmcnum; /* PMC number */
357 uint32_t pm_pmc_msr; /* PERFCTR MSR address */
358 uint32_t pm_cccr_msr; /* CCCR MSR address */
359};
360
361static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
367
368 /*
369 * TSC descriptor
370 */
371
372 {
373 .pm_descr =
374 {
375 .pd_name = "TSC",
376 .pd_class = PMC_CLASS_TSC,
377 .pd_caps = PMC_CAP_READ | PMC_CAP_WRITE,
378 .pd_width = 64
379 },
380 .pm_pmcnum = ~0,
381 .pm_cccr_msr = ~0,
382 .pm_pmc_msr = 0x10,
383 },
384
385 /*
386 * P4 PMCS
387 */
388
389#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
390 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
391 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \
392 PMC_CAP_TAGGING | PMC_CAP_CASCADE)
393
394#define P4_PMCDESCR(N, PMC, CCCR) \
395 { \
396 .pm_descr = \
397 { \
398 .pd_name = #N, \
399 .pd_class = PMC_CLASS_P4, \
400 .pd_caps = P4_PMC_CAPS, \
401 .pd_width = 40 \
402 }, \
403 .pm_pmcnum = P4_PMC_##N, \
404 .pm_cccr_msr = (CCCR), \
405 .pm_pmc_msr = (PMC) \
406 }
407
408 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360),
409 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361),
410 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362),
411 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363),
412 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364),
413 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365),
414 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366),
415 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367),
416 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368),
417 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369),
418 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A),
419 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B),
420 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C),
421 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D),
422 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E),
423 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F),
424 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370),
425 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371),
426
427#undef P4_PMCDESCR
428};
429
430/* HTT support */
431#define P4_NHTT 2 /* logical processors/chip */
432
433static int p4_system_has_htt;
434
435/*
436 * Per-CPU data structure for P4 class CPUs
437 *
362#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
363 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
364 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \
365 PMC_CAP_TAGGING | PMC_CAP_CASCADE)
366
367#define P4_PMCDESCR(N, PMC, CCCR) \
368 { \
369 .pm_descr = \
370 { \
371 .pd_name = #N, \
372 .pd_class = PMC_CLASS_P4, \
373 .pd_caps = P4_PMC_CAPS, \
374 .pd_width = 40 \
375 }, \
376 .pm_pmcnum = P4_PMC_##N, \
377 .pm_cccr_msr = (CCCR), \
378 .pm_pmc_msr = (PMC) \
379 }
380
381 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360),
382 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361),
383 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362),
384 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363),
385 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364),
386 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365),
387 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366),
388 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367),
389 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368),
390 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369),
391 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A),
392 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B),
393 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C),
394 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D),
395 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E),
396 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F),
397 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370),
398 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371),
399
400#undef P4_PMCDESCR
401};
402
403/* HTT support */
404#define P4_NHTT 2 /* logical processors/chip */
405
406static int p4_system_has_htt;
407
408/*
409 * Per-CPU data structure for P4 class CPUs
410 *
438 * [common stuff]
439 * [19 struct pmc_hw pointers]
440 * [19 struct pmc_hw structures]
441 * [45 ESCRs status bytes]
442 * [per-cpu spin mutex]
443 * [19 flag fields for holding config flags and a runcount]
444 * [19*2 hw value fields] (Thread mode PMC support)
445 * or
446 * [19*2 EIP values] (Sampling mode PMCs)
447 * [19*2 pmc value fields] (Thread mode PMC support))
448 */
449
450struct p4_cpu {
411 * [19 struct pmc_hw structures]
412 * [45 ESCRs status bytes]
413 * [per-cpu spin mutex]
414 * [19 flag fields for holding config flags and a runcount]
415 * [19*2 hw value fields] (Thread mode PMC support)
416 * or
417 * [19*2 EIP values] (Sampling mode PMCs)
418 * [19*2 pmc value fields] (Thread mode PMC support))
419 */
420
421struct p4_cpu {
451 struct pmc_cpu pc_common;
452 struct pmc_hw *pc_hwpmcs[P4_NPMCS];
453 struct pmc_hw pc_p4pmcs[P4_NPMCS];
454 char pc_escrs[P4_NESCR];
455 struct mtx pc_mtx; /* spin lock */
456 uint32_t pc_intrflag; /* NMI handler flags */
457 unsigned int pc_intrlock; /* NMI handler spin lock */
458 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
459 union {
460 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
461 uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
462 } pc_si;
463 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
464};
465
422 struct pmc_hw pc_p4pmcs[P4_NPMCS];
423 char pc_escrs[P4_NESCR];
424 struct mtx pc_mtx; /* spin lock */
425 uint32_t pc_intrflag; /* NMI handler flags */
426 unsigned int pc_intrlock; /* NMI handler spin lock */
427 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
428 union {
429 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
430 uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
431 } pc_si;
432 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
433};
434
466/*
467 * A 'logical' CPU shares PMC resources with partner 'physical' CPU,
468 * except the TSC, which is architectural and hence seperate. The
469 * 'logical' CPU descriptor thus has pointers to the physical CPUs
470 * descriptor state except for the TSC (rowindex 0) which is not
471 * shared.
472 */
435static struct p4_cpu **p4_pcpu;
473
436
474struct p4_logicalcpu {
475 struct pmc_cpu pc_common;
476 struct pmc_hw *pc_hwpmcs[P4_NPMCS];
477 struct pmc_hw pc_tsc;
478};
479
480#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
481#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
482#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
483
484#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
485#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
486 char _tmp; \
487 _tmp = (PC)->pc_flags[(RI)]; \
488 _tmp &= ~(MASK); \
489 _tmp |= (VAL) & (MASK); \
490 (PC)->pc_flags[(RI)] = _tmp; \
491} while (0)
492
493#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
494#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
495
496#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
497#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
498
499#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1)
500
501#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
502#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
503 uint32_t __mask; \
504 __mask = 1 << (I); \
505 if ((V)) \
506 (PC)->pc_intrflag |= __mask; \
507 else \
508 (PC)->pc_intrflag &= ~__mask; \
509 } while (0)
510
511/*
512 * A minimal spin lock implementation for use inside the NMI handler.
513 *
514 * We don't want to use a regular spin lock here, because curthread
515 * may not be consistent at the time the handler is invoked.
516 */
517#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
518 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
519 ia32_pause(); \
520 } while (0)
521#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
522 atomic_store_rel_int(&pc->pc_intrlock, 0);
523
524/* ESCR row disposition */
525static int p4_escrdisp[P4_NESCR];
526
527#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0)
528#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0)
529#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0)
530
531#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \
532 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
533 __LINE__)); \
534 atomic_add_int(&p4_escrdisp[(E)], -1); \
535 KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \
536 ("[p4,%d] row disposition error", __LINE__)); \
537} while (0)
538
539#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \
540 atomic_add_int(&p4_escrdisp[(E)], 1); \
541 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
542 __LINE__)); \
543} while (0)
544
545#define P4_ESCR_MARK_ROW_THREAD(E) do { \
546 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
547 __LINE__)); \
548 atomic_add_int(&p4_escrdisp[(E)], 1); \
549} while (0)
550
551#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \
552 atomic_add_int(&p4_escrdisp[(E)], -1); \
553 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
554 __LINE__)); \
555} while (0)
556
557#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
558
559#define P4_CPU_IS_HTT_SECONDARY(cpu) \
560 (p4_system_has_htt ? ((cpu) & 1) : 0)
561#define P4_TO_HTT_PRIMARY(cpu) \
562 (p4_system_has_htt ? ((cpu) & ~1) : (cpu))
563
564#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \
565 P4_CCCR_ENABLE|P4_CCCR_OVF))
566#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \
567 P4_ESCR_T1_USR))
568
569/*
570 * support routines
571 */
572
573static struct p4_event_descr *
574p4_find_event(enum pmc_event ev)
575{
576 int n;
577
578 for (n = 0; n < P4_NEVENTS; n++)
579 if (p4_events[n].pm_event == ev)
580 break;
581 if (n == P4_NEVENTS)
437#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
438#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
439#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
440
441#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
442#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
443 char _tmp; \
444 _tmp = (PC)->pc_flags[(RI)]; \
445 _tmp &= ~(MASK); \
446 _tmp |= (VAL) & (MASK); \
447 (PC)->pc_flags[(RI)] = _tmp; \
448} while (0)
449
450#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
451#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
452
453#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
454#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
455
456#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1)
457
458#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
459#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
460 uint32_t __mask; \
461 __mask = 1 << (I); \
462 if ((V)) \
463 (PC)->pc_intrflag |= __mask; \
464 else \
465 (PC)->pc_intrflag &= ~__mask; \
466 } while (0)
467
468/*
469 * A minimal spin lock implementation for use inside the NMI handler.
470 *
471 * We don't want to use a regular spin lock here, because curthread
472 * may not be consistent at the time the handler is invoked.
473 */
474#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
475 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
476 ia32_pause(); \
477 } while (0)
478#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
479 atomic_store_rel_int(&pc->pc_intrlock, 0);
480
481/* ESCR row disposition */
482static int p4_escrdisp[P4_NESCR];
483
484#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0)
485#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0)
486#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0)
487
488#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \
489 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
490 __LINE__)); \
491 atomic_add_int(&p4_escrdisp[(E)], -1); \
492 KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \
493 ("[p4,%d] row disposition error", __LINE__)); \
494} while (0)
495
496#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \
497 atomic_add_int(&p4_escrdisp[(E)], 1); \
498 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
499 __LINE__)); \
500} while (0)
501
502#define P4_ESCR_MARK_ROW_THREAD(E) do { \
503 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
504 __LINE__)); \
505 atomic_add_int(&p4_escrdisp[(E)], 1); \
506} while (0)
507
508#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \
509 atomic_add_int(&p4_escrdisp[(E)], -1); \
510 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
511 __LINE__)); \
512} while (0)
513
514#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
515
516#define P4_CPU_IS_HTT_SECONDARY(cpu) \
517 (p4_system_has_htt ? ((cpu) & 1) : 0)
518#define P4_TO_HTT_PRIMARY(cpu) \
519 (p4_system_has_htt ? ((cpu) & ~1) : (cpu))
520
521#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \
522 P4_CCCR_ENABLE|P4_CCCR_OVF))
523#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \
524 P4_ESCR_T1_USR))
525
526/*
527 * support routines
528 */
529
530static struct p4_event_descr *
531p4_find_event(enum pmc_event ev)
532{
533 int n;
534
535 for (n = 0; n < P4_NEVENTS; n++)
536 if (p4_events[n].pm_event == ev)
537 break;
538 if (n == P4_NEVENTS)
582 return NULL;
583 return &p4_events[n];
539 return (NULL);
540 return (&p4_events[n]);
584}
585
586/*
587 * Initialize per-cpu state
588 */
589
590static int
541}
542
543/*
544 * Initialize per-cpu state
545 */
546
547static int
591p4_init(int cpu)
548p4_pcpu_init(struct pmc_mdep *md, int cpu)
592{
549{
593 int n, phycpu;
594 char *pescr;
550 char *pescr;
595 struct p4_cpu *pcs;
596 struct p4_logicalcpu *plcs;
551 int n, first_ri, phycpu;
597 struct pmc_hw *phw;
552 struct pmc_hw *phw;
553 struct p4_cpu *p4c;
554 struct pmc_cpu *pc, *plc;
598
599 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
600 ("[p4,%d] insane cpu number %d", __LINE__, cpu));
601
602 PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
603 pmc_cpu_is_primary(cpu) != 0);
604
555
556 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
557 ("[p4,%d] insane cpu number %d", __LINE__, cpu));
558
559 PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
560 pmc_cpu_is_primary(cpu) != 0);
561
562 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
563
605 /*
606 * The two CPUs in an HT pair share their per-cpu state.
607 *
608 * For HT capable CPUs, we assume that the two logical
609 * processors in the HT pair get two consecutive CPU ids
610 * starting with an even id #.
611 *
612 * The primary CPU (the even numbered CPU of the pair) would
613 * have been initialized prior to the initialization for the
614 * secondary.
615 */
616
617 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
618
619 p4_system_has_htt = 1;
620
621 phycpu = P4_TO_HTT_PRIMARY(cpu);
564 /*
565 * The two CPUs in an HT pair share their per-cpu state.
566 *
567 * For HT capable CPUs, we assume that the two logical
568 * processors in the HT pair get two consecutive CPU ids
569 * starting with an even id #.
570 *
571 * The primary CPU (the even numbered CPU of the pair) would
572 * have been initialized prior to the initialization for the
573 * secondary.
574 */
575
576 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
577
578 p4_system_has_htt = 1;
579
580 phycpu = P4_TO_HTT_PRIMARY(cpu);
622 pcs = (struct p4_cpu *) pmc_pcpu[phycpu];
623 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p",
624 cpu, phycpu, pcs);
625 KASSERT(pcs,
626 ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__,
627 cpu, phycpu));
628 if (pcs == NULL) /* decline to init */
629 return ENXIO;
581 pc = pmc_pcpu[phycpu];
582 plc = pmc_pcpu[cpu];
630
583
631 plcs = malloc(sizeof(struct p4_logicalcpu),
632 M_PMC, M_WAITOK|M_ZERO);
584 KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__));
633
585
634 /* The TSC is architectural state and is not shared */
635 plcs->pc_hwpmcs[0] = &plcs->pc_tsc;
636 plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED |
637 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
638 PMC_PHW_FLAG_IS_SHAREABLE;
586 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu,
587 phycpu, pc);
588 KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d",
589 __LINE__, cpu, phycpu));
639
590
640 /* Other PMCs are shared with the physical CPU */
641 for (n = 1; n < P4_NPMCS; n++)
642 plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n];
591 /* PMCs are shared with the physical CPU. */
592 for (n = 0; n < P4_NPMCS; n++)
593 plc->pc_hwpmcs[n + first_ri] =
594 pc->pc_hwpmcs[n + first_ri];
643
595
644 pmc_pcpu[cpu] = (struct pmc_cpu *) plcs;
645 return 0;
596 return (0);
646 }
647
597 }
598
648 pcs = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO);
599 p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO);
649
600
650 if (pcs == NULL)
651 return ENOMEM;
652 phw = pcs->pc_p4pmcs;
601 if (p4c == NULL)
602 return (ENOMEM);
653
603
604 pc = pmc_pcpu[cpu];
605
606 KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu));
607
608 p4_pcpu[cpu] = p4c;
609 phw = p4c->pc_p4pmcs;
610
654 for (n = 0; n < P4_NPMCS; n++, phw++) {
655 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
656 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
657 phw->phw_pmc = NULL;
611 for (n = 0; n < P4_NPMCS; n++, phw++) {
612 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
613 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
614 phw->phw_pmc = NULL;
658 pcs->pc_hwpmcs[n] = phw;
615 pc->pc_hwpmcs[n + first_ri] = phw;
659 }
660
616 }
617
661 /* Mark the TSC as shareable */
662 pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE;
663
664 pescr = pcs->pc_escrs;
618 pescr = p4c->pc_escrs;
665 for (n = 0; n < P4_NESCR; n++)
666 *pescr++ = P4_INVALID_PMC_INDEX;
619 for (n = 0; n < P4_NESCR; n++)
620 *pescr++ = P4_INVALID_PMC_INDEX;
667 pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
668
621
669 mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN);
622 mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN);
670
623
671 return 0;
624 return (0);
672}
673
674/*
675 * Destroy per-cpu state.
676 */
677
678static int
625}
626
627/*
628 * Destroy per-cpu state.
629 */
630
631static int
679p4_cleanup(int cpu)
632p4_pcpu_fini(struct pmc_mdep *md, int cpu)
680{
633{
681 int i;
682 struct p4_cpu *pcs;
634 int first_ri, i;
635 struct p4_cpu *p4c;
636 struct pmc_cpu *pc;
683
684 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
685
637
638 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
639
686 if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
687 return 0;
640 pc = pmc_pcpu[cpu];
641 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
688
642
643 for (i = 0; i < P4_NPMCS; i++)
644 pc->pc_hwpmcs[i + first_ri] = NULL;
645
646 if (!pmc_cpu_is_primary(cpu) && (cpu & 1))
647 return (0);
648
649 p4c = p4_pcpu[cpu];
650
651 KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__));
652
689 /* Turn off all PMCs on this CPU */
690 for (i = 0; i < P4_NPMCS - 1; i++)
691 wrmsr(P4_CCCR_MSR_FIRST + i,
692 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
693
653 /* Turn off all PMCs on this CPU */
654 for (i = 0; i < P4_NPMCS - 1; i++)
655 wrmsr(P4_CCCR_MSR_FIRST + i,
656 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
657
694 /*
695 * If the CPU is physical we need to teardown the
696 * full MD state.
697 */
698 if (!P4_CPU_IS_HTT_SECONDARY(cpu))
699 mtx_destroy(&pcs->pc_mtx);
658 mtx_destroy(&p4c->pc_mtx);
700
659
701 free(pcs, M_PMC);
660 free(p4c, M_PMC);
702
661
703 pmc_pcpu[cpu] = NULL;
662 p4_pcpu[cpu] = NULL;
704
663
705 return 0;
664 return (0);
706}
707
708/*
665}
666
667/*
709 * Context switch in.
710 */
711
712static int
713p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
714{
715 (void) pc;
716
717 PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
718 (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
719
720 /* enable the RDPMC instruction */
721 if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
722 load_cr4(rcr4() | CR4_PCE);
723
724 PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4());
725
726 return 0;
727}
728
729/*
730 * Context switch out.
731 */
732
733static int
734p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
735{
736 (void) pc;
737 (void) pp; /* can be null */
738
739 PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp);
740
741 /* always disallow the RDPMC instruction */
742 load_cr4(rcr4() & ~CR4_PCE);
743
744 PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4());
745
746 return 0;
747}
748
749/*
750 * Read a PMC
751 */
752
753static int
754p4_read_pmc(int cpu, int ri, pmc_value_t *v)
755{
668 * Read a PMC
669 */
670
671static int
672p4_read_pmc(int cpu, int ri, pmc_value_t *v)
673{
756 enum pmc_mode mode;
757 struct p4pmc_descr *pd;
758 struct pmc *pm;
674 struct pmc *pm;
759 struct p4_cpu *pc;
760 struct pmc_hw *phw;
761 pmc_value_t tmp;
675 pmc_value_t tmp;
676 struct p4_cpu *pc;
677 enum pmc_mode mode;
678 struct p4pmc_descr *pd;
762
763 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
764 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
765 KASSERT(ri >= 0 && ri < P4_NPMCS,
766 ("[p4,%d] illegal row-index %d", __LINE__, ri));
767
679
680 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
681 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
682 KASSERT(ri >= 0 && ri < P4_NPMCS,
683 ("[p4,%d] illegal row-index %d", __LINE__, ri));
684
685 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
686 pm = pc->pc_p4pmcs[ri].phw_pmc;
687 pd = &p4_pmcdesc[ri];
768
688
769 if (ri == 0) { /* TSC */
770#ifdef DEBUG
771 pc = (struct p4_cpu *) pmc_pcpu[cpu];
772 phw = pc->pc_hwpmcs[ri];
773 pm = phw->phw_pmc;
774
775 KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
776 cpu, ri));
777 KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
778 ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri,
779 PMC_TO_CLASS(pm)));
780 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)),
781 ("[p4,%d] TSC counter in non-counting mode", __LINE__));
782#endif
783 *v = rdtsc();
784 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
785 return 0;
786 }
787
788 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
789 phw = pc->pc_hwpmcs[ri];
790 pd = &p4_pmcdesc[ri];
791 pm = phw->phw_pmc;
792
793 KASSERT(pm != NULL,
689 KASSERT(pm != NULL,
794 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
795 cpu, ri));
690 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri));
796
797 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
798 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
691
692 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
693 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
799 pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
694 pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
800
801 mode = PMC_TO_MODE(pm);
802
803 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
804
805 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
806 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
807
808 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
809
810 if (PMC_IS_VIRTUAL_MODE(mode)) {
811 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
812 tmp += (P4_PERFCTR_MASK + 1) -
813 P4_PCPU_HW_VALUE(pc,ri,cpu);
814 else
815 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
816 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
817 }
818
819 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
820 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
821 else
822 *v = tmp;
823
824 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
695
696 mode = PMC_TO_MODE(pm);
697
698 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
699
700 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
701 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
702
703 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
704
705 if (PMC_IS_VIRTUAL_MODE(mode)) {
706 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
707 tmp += (P4_PERFCTR_MASK + 1) -
708 P4_PCPU_HW_VALUE(pc,ri,cpu);
709 else
710 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
711 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
712 }
713
714 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
715 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
716 else
717 *v = tmp;
718
719 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
825 return 0;
720
721 return (0);
826}
827
828/*
829 * Write a PMC
830 */
831
832static int
833p4_write_pmc(int cpu, int ri, pmc_value_t v)
834{
835 enum pmc_mode mode;
836 struct pmc *pm;
837 struct p4_cpu *pc;
838 const struct pmc_hw *phw;
839 const struct p4pmc_descr *pd;
840
841 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
842 ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
843 KASSERT(ri >= 0 && ri < P4_NPMCS,
844 ("[amd,%d] illegal row-index %d", __LINE__, ri));
845
722}
723
724/*
725 * Write a PMC
726 */
727
728static int
729p4_write_pmc(int cpu, int ri, pmc_value_t v)
730{
731 enum pmc_mode mode;
732 struct pmc *pm;
733 struct p4_cpu *pc;
734 const struct pmc_hw *phw;
735 const struct p4pmc_descr *pd;
736
737 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
738 ("[amd,%d] illegal CPU value %d", __LINE__, cpu));
739 KASSERT(ri >= 0 && ri < P4_NPMCS,
740 ("[amd,%d] illegal row-index %d", __LINE__, ri));
741
846
847 /*
848 * The P4's TSC register is writeable, but we don't allow a
849 * write as changing the TSC's value could interfere with
850 * timekeeping and other system functions.
851 */
852 if (ri == 0) {
853#ifdef DEBUG
854 pc = (struct p4_cpu *) pmc_pcpu[cpu];
855 phw = pc->pc_hwpmcs[ri];
856 pm = phw->phw_pmc;
857 KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
858 cpu, ri));
859 KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
860 ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__,
861 cpu, ri, PMC_TO_CLASS(pm)));
862#endif
863 return 0;
864 }
865
866 /* Shared PMCs */
867 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
868 phw = pc->pc_hwpmcs[ri];
742 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
743 phw = &pc->pc_p4pmcs[ri];
869 pm = phw->phw_pmc;
870 pd = &p4_pmcdesc[ri];
871
872 KASSERT(pm != NULL,
873 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
874 cpu, ri));
875
876 mode = PMC_TO_MODE(pm);
877
878 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
879 mode, v);
880
881 /*
882 * write the PMC value to the register/saved value: for
883 * sampling mode PMCs, the value to be programmed into the PMC
884 * counter is -(C+1) where 'C' is the requested sample rate.
885 */
886 if (PMC_IS_SAMPLING_MODE(mode))
887 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
888
889 if (PMC_IS_SYSTEM_MODE(mode))
890 wrmsr(pd->pm_pmc_msr, v);
891 else
892 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
893
744 pm = phw->phw_pmc;
745 pd = &p4_pmcdesc[ri];
746
747 KASSERT(pm != NULL,
748 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
749 cpu, ri));
750
751 mode = PMC_TO_MODE(pm);
752
753 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
754 mode, v);
755
756 /*
757 * write the PMC value to the register/saved value: for
758 * sampling mode PMCs, the value to be programmed into the PMC
759 * counter is -(C+1) where 'C' is the requested sample rate.
760 */
761 if (PMC_IS_SAMPLING_MODE(mode))
762 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
763
764 if (PMC_IS_SYSTEM_MODE(mode))
765 wrmsr(pd->pm_pmc_msr, v);
766 else
767 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
768
894 return 0;
769 return (0);
895}
896
897/*
898 * Configure a PMC 'pm' on the given CPU and row-index.
899 *
900 * 'pm' may be NULL to indicate de-configuration.
901 *
902 * On HTT systems, a PMC may get configured twice, once for each
903 * "logical" CPU. We track this using the CFGFLAGS field of the
904 * per-cpu state; this field is a bit mask with one bit each for
905 * logical CPUs 0 & 1.
906 */
907
908static int
909p4_config_pmc(int cpu, int ri, struct pmc *pm)
910{
911 struct pmc_hw *phw;
912 struct p4_cpu *pc;
913 int cfgflags, cpuflag;
914
915 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
916 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
770}
771
772/*
773 * Configure a PMC 'pm' on the given CPU and row-index.
774 *
775 * 'pm' may be NULL to indicate de-configuration.
776 *
777 * On HTT systems, a PMC may get configured twice, once for each
778 * "logical" CPU. We track this using the CFGFLAGS field of the
779 * per-cpu state; this field is a bit mask with one bit each for
780 * logical CPUs 0 & 1.
781 */
782
783static int
784p4_config_pmc(int cpu, int ri, struct pmc *pm)
785{
786 struct pmc_hw *phw;
787 struct p4_cpu *pc;
788 int cfgflags, cpuflag;
789
790 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
791 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
792
917 KASSERT(ri >= 0 && ri < P4_NPMCS,
918 ("[p4,%d] illegal row-index %d", __LINE__, ri));
919
920 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
921
793 KASSERT(ri >= 0 && ri < P4_NPMCS,
794 ("[p4,%d] illegal row-index %d", __LINE__, ri));
795
796 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
797
922 if (ri == 0) { /* TSC */
923 pc = (struct p4_cpu *) pmc_pcpu[cpu];
924 phw = pc->pc_hwpmcs[ri];
798 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
799 phw = &pc->pc_p4pmcs[ri];
925
800
926 KASSERT(pm == NULL || phw->phw_pmc == NULL,
927 ("[p4,%d] hwpmc doubly config'ed", __LINE__));
928 phw->phw_pmc = pm;
929 return 0;
930 }
931
932 /* Shared PMCs */
933
934 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
935 phw = pc->pc_hwpmcs[ri];
936
937 KASSERT(pm == NULL || phw->phw_pmc == NULL ||
938 (p4_system_has_htt && phw->phw_pmc == pm),
939 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
940
941 mtx_lock_spin(&pc->pc_mtx);
942 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
943
944 KASSERT(cfgflags >= 0 || cfgflags <= 3,
945 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
946 cfgflags, cpu, ri));
947
948 KASSERT(cfgflags == 0 || phw->phw_pmc,
949 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
950 __LINE__, cpu, ri));
951
952 cpuflag = P4_CPU_TO_FLAG(cpu);
953
954 if (pm) { /* config */
955 if (cfgflags == 0)
956 phw->phw_pmc = pm;
957
958 KASSERT(phw->phw_pmc == pm,
959 ("[p4,%d] cpu=%d ri=%d config %p != hw %p",
960 __LINE__, cpu, ri, pm, phw->phw_pmc));
961
962 cfgflags |= cpuflag;
963 } else { /* unconfig */
964 cfgflags &= ~cpuflag;
965
966 if (cfgflags == 0)
967 phw->phw_pmc = NULL;
968 }
969
970 KASSERT(cfgflags >= 0 || cfgflags <= 3,
971 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
972 cfgflags, cpu, ri));
973
974 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
975
976 mtx_unlock_spin(&pc->pc_mtx);
977
801 KASSERT(pm == NULL || phw->phw_pmc == NULL ||
802 (p4_system_has_htt && phw->phw_pmc == pm),
803 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
804
805 mtx_lock_spin(&pc->pc_mtx);
806 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
807
808 KASSERT(cfgflags >= 0 || cfgflags <= 3,
809 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
810 cfgflags, cpu, ri));
811
812 KASSERT(cfgflags == 0 || phw->phw_pmc,
813 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
814 __LINE__, cpu, ri));
815
816 cpuflag = P4_CPU_TO_FLAG(cpu);
817
818 if (pm) { /* config */
819 if (cfgflags == 0)
820 phw->phw_pmc = pm;
821
822 KASSERT(phw->phw_pmc == pm,
823 ("[p4,%d] cpu=%d ri=%d config %p != hw %p",
824 __LINE__, cpu, ri, pm, phw->phw_pmc));
825
826 cfgflags |= cpuflag;
827 } else { /* unconfig */
828 cfgflags &= ~cpuflag;
829
830 if (cfgflags == 0)
831 phw->phw_pmc = NULL;
832 }
833
834 KASSERT(cfgflags >= 0 || cfgflags <= 3,
835 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
836 cfgflags, cpu, ri));
837
838 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
839
840 mtx_unlock_spin(&pc->pc_mtx);
841
978 return 0;
842 return (0);
979}
980
981/*
982 * Retrieve a configured PMC pointer from hardware state.
983 */
984
985static int
986p4_get_config(int cpu, int ri, struct pmc **ppm)
987{
843}
844
845/*
846 * Retrieve a configured PMC pointer from hardware state.
847 */
848
849static int
850p4_get_config(int cpu, int ri, struct pmc **ppm)
851{
988 struct p4_cpu *pc;
989 struct pmc_hw *phw;
990 int cfgflags;
852 int cfgflags;
853 struct p4_cpu *pc;
991
854
992 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
993 phw = pc->pc_hwpmcs[ri];
855 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
856 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
857 KASSERT(ri >= 0 && ri < P4_NPMCS,
858 ("[p4,%d] illegal row-index %d", __LINE__, ri));
994
859
860 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
861
995 mtx_lock_spin(&pc->pc_mtx);
996 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
997 mtx_unlock_spin(&pc->pc_mtx);
998
999 if (cfgflags & P4_CPU_TO_FLAG(cpu))
862 mtx_lock_spin(&pc->pc_mtx);
863 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
864 mtx_unlock_spin(&pc->pc_mtx);
865
866 if (cfgflags & P4_CPU_TO_FLAG(cpu))
1000 *ppm = phw->phw_pmc; /* PMC config'ed on this CPU */
867 *ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */
1001 else
1002 *ppm = NULL;
1003
1004 return 0;
1005}
1006
1007/*
1008 * Allocate a PMC.
1009 *
1010 * The allocation strategy differs between HTT and non-HTT systems.
1011 *
1012 * The non-HTT case:
1013 * - Given the desired event and the PMC row-index, lookup the
1014 * list of valid ESCRs for the event.
1015 * - For each valid ESCR:
1016 * - Check if the ESCR is free and the ESCR row is in a compatible
1017 * mode (i.e., system or process))
1018 * - Check if the ESCR is usable with a P4 PMC at the desired row-index.
1019 * If everything matches, we determine the appropriate bit values for the
1020 * ESCR and CCCR registers.
1021 *
1022 * The HTT case:
1023 *
1024 * - Process mode PMCs require special care. The FreeBSD scheduler could
1025 * schedule any two processes on the same physical CPU. We need to ensure
1026 * that a given PMC row-index is never allocated to two different
1027 * PMCs owned by different user-processes.
1028 * This is ensured by always allocating a PMC from a 'FREE' PMC row
1029 * if the system has HTT active.
1030 * - A similar check needs to be done for ESCRs; we do not want two PMCs
1031 * using the same ESCR to be scheduled at the same time. Thus ESCR
1032 * allocation is also restricted to FREE rows if the system has HTT
1033 * enabled.
1034 * - Thirdly, some events are 'thread-independent' terminology, i.e.,
1035 * the PMC hardware cannot distinguish between events caused by
1036 * different logical CPUs. This makes it impossible to assign events
1037 * to a given thread of execution. If the system has HTT enabled,
1038 * these events are not allowed for process-mode PMCs.
1039 */
1040
1041static int
1042p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
1043 const struct pmc_op_pmcallocate *a)
1044{
1045 int found, n, m;
1046 uint32_t caps, cccrvalue, escrvalue, tflags;
1047 enum pmc_p4escr escr;
1048 struct p4_cpu *pc;
1049 struct p4_event_descr *pevent;
1050 const struct p4pmc_descr *pd;
1051
1052 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1053 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1054 KASSERT(ri >= 0 && ri < P4_NPMCS,
1055 ("[p4,%d] illegal row-index value %d", __LINE__, ri));
1056
1057 pd = &p4_pmcdesc[ri];
1058
1059 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
1060 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
1061 pm->pm_caps);
1062
1063 /* check class */
1064 if (pd->pm_descr.pd_class != a->pm_class)
868 else
869 *ppm = NULL;
870
871 return 0;
872}
873
874/*
875 * Allocate a PMC.
876 *
877 * The allocation strategy differs between HTT and non-HTT systems.
878 *
879 * The non-HTT case:
880 * - Given the desired event and the PMC row-index, lookup the
881 * list of valid ESCRs for the event.
882 * - For each valid ESCR:
883 * - Check if the ESCR is free and the ESCR row is in a compatible
884 * mode (i.e., system or process))
885 * - Check if the ESCR is usable with a P4 PMC at the desired row-index.
886 * If everything matches, we determine the appropriate bit values for the
887 * ESCR and CCCR registers.
888 *
889 * The HTT case:
890 *
891 * - Process mode PMCs require special care. The FreeBSD scheduler could
892 * schedule any two processes on the same physical CPU. We need to ensure
893 * that a given PMC row-index is never allocated to two different
894 * PMCs owned by different user-processes.
895 * This is ensured by always allocating a PMC from a 'FREE' PMC row
896 * if the system has HTT active.
897 * - A similar check needs to be done for ESCRs; we do not want two PMCs
898 * using the same ESCR to be scheduled at the same time. Thus ESCR
899 * allocation is also restricted to FREE rows if the system has HTT
900 * enabled.
901 * - Thirdly, some events are 'thread-independent' terminology, i.e.,
902 * the PMC hardware cannot distinguish between events caused by
903 * different logical CPUs. This makes it impossible to assign events
904 * to a given thread of execution. If the system has HTT enabled,
905 * these events are not allowed for process-mode PMCs.
906 */
907
908static int
909p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
910 const struct pmc_op_pmcallocate *a)
911{
912 int found, n, m;
913 uint32_t caps, cccrvalue, escrvalue, tflags;
914 enum pmc_p4escr escr;
915 struct p4_cpu *pc;
916 struct p4_event_descr *pevent;
917 const struct p4pmc_descr *pd;
918
919 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
920 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
921 KASSERT(ri >= 0 && ri < P4_NPMCS,
922 ("[p4,%d] illegal row-index value %d", __LINE__, ri));
923
924 pd = &p4_pmcdesc[ri];
925
926 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
927 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
928 pm->pm_caps);
929
930 /* check class */
931 if (pd->pm_descr.pd_class != a->pm_class)
1065 return EINVAL;
932 return (EINVAL);
1066
1067 /* check requested capabilities */
1068 caps = a->pm_caps;
1069 if ((pd->pm_descr.pd_caps & caps) != caps)
933
934 /* check requested capabilities */
935 caps = a->pm_caps;
936 if ((pd->pm_descr.pd_caps & caps) != caps)
1070 return EPERM;
937 return (EPERM);
1071
938
1072 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
1073 /* TSC's are always allocated in system-wide counting mode */
1074 if (a->pm_ev != PMC_EV_TSC_TSC ||
1075 a->pm_mode != PMC_MODE_SC)
1076 return EINVAL;
1077 return 0;
1078 }
1079
1080 /*
1081 * If the system has HTT enabled, and the desired allocation
1082 * mode is process-private, and the PMC row disposition is not
939 /*
940 * If the system has HTT enabled, and the desired allocation
941 * mode is process-private, and the PMC row disposition is not
1083 * free (0), decline the allocation.
942 * FREE (0), decline the allocation.
1084 */
1085
1086 if (p4_system_has_htt &&
1087 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
1088 pmc_getrowdisp(ri) != 0)
943 */
944
945 if (p4_system_has_htt &&
946 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
947 pmc_getrowdisp(ri) != 0)
1089 return EBUSY;
948 return (EBUSY);
1090
1091 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1092 ("[p4,%d] unknown PMC class %d", __LINE__,
1093 pd->pm_descr.pd_class));
1094
1095 if (pm->pm_event < PMC_EV_P4_FIRST ||
1096 pm->pm_event > PMC_EV_P4_LAST)
949
950 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
951 ("[p4,%d] unknown PMC class %d", __LINE__,
952 pd->pm_descr.pd_class));
953
954 if (pm->pm_event < PMC_EV_P4_FIRST ||
955 pm->pm_event > PMC_EV_P4_LAST)
1097 return EINVAL;
956 return (EINVAL);
1098
1099 if ((pevent = p4_find_event(pm->pm_event)) == NULL)
957
958 if ((pevent = p4_find_event(pm->pm_event)) == NULL)
1100 return ESRCH;
959 return (ESRCH);
1101
1102 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
1103 pevent->pm_event, pevent->pm_escr_eventselect,
1104 pevent->pm_cccr_select, pevent->pm_is_ti_event);
1105
1106 /*
1107 * Some PMC events are 'thread independent'and therefore
1108 * cannot be used for process-private modes if HTT is being
1109 * used.
1110 */
1111
1112 if (P4_EVENT_IS_TI(pevent) &&
1113 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
1114 p4_system_has_htt)
960
961 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
962 pevent->pm_event, pevent->pm_escr_eventselect,
963 pevent->pm_cccr_select, pevent->pm_is_ti_event);
964
965 /*
966 * Some PMC events are 'thread independent'and therefore
967 * cannot be used for process-private modes if HTT is being
968 * used.
969 */
970
971 if (P4_EVENT_IS_TI(pevent) &&
972 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
973 p4_system_has_htt)
1115 return EINVAL;
974 return (EINVAL);
1116
975
1117 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
976 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1118
1119 found = 0;
1120
1121 /* look for a suitable ESCR for this event */
1122 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
1123 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
1124 break; /* out of ESCRs */
1125 /*
1126 * Check ESCR row disposition.
1127 *
1128 * If the request is for a system-mode PMC, then the
1129 * ESCR row should not be in process-virtual mode, and
1130 * should also be free on the current CPU.
1131 */
1132
1133 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1134 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
1135 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
1136 continue;
1137 }
1138
1139 /*
1140 * If the request is for a process-virtual PMC, and if
1141 * HTT is not enabled, we can use an ESCR row that is
1142 * either FREE or already in process mode.
1143 *
1144 * If HTT is enabled, then we need to ensure that a
1145 * given ESCR is never allocated to two PMCS that
1146 * could run simultaneously on the two logical CPUs of
1147 * a CPU package. We ensure this be only allocating
1148 * ESCRs from rows marked as 'FREE'.
1149 */
1150
1151 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
1152 if (p4_system_has_htt) {
1153 if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
1154 continue;
1155 } else
1156 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
1157 continue;
1158 }
1159
1160 /*
1161 * We found a suitable ESCR for this event. Now check if
1162 * this escr can work with the PMC at row-index 'ri'.
1163 */
1164
1165 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
1166 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
1167 found = 1;
1168 break;
1169 }
1170 }
1171
1172 if (found == 0)
977
978 found = 0;
979
980 /* look for a suitable ESCR for this event */
981 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
982 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
983 break; /* out of ESCRs */
984 /*
985 * Check ESCR row disposition.
986 *
987 * If the request is for a system-mode PMC, then the
988 * ESCR row should not be in process-virtual mode, and
989 * should also be free on the current CPU.
990 */
991
992 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
993 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
994 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
995 continue;
996 }
997
998 /*
999 * If the request is for a process-virtual PMC, and if
1000 * HTT is not enabled, we can use an ESCR row that is
1001 * either FREE or already in process mode.
1002 *
1003 * If HTT is enabled, then we need to ensure that a
1004 * given ESCR is never allocated to two PMCS that
1005 * could run simultaneously on the two logical CPUs of
1006 * a CPU package. We ensure this be only allocating
1007 * ESCRs from rows marked as 'FREE'.
1008 */
1009
1010 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
1011 if (p4_system_has_htt) {
1012 if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
1013 continue;
1014 } else
1015 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
1016 continue;
1017 }
1018
1019 /*
1020 * We found a suitable ESCR for this event. Now check if
1021 * this escr can work with the PMC at row-index 'ri'.
1022 */
1023
1024 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
1025 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
1026 found = 1;
1027 break;
1028 }
1029 }
1030
1031 if (found == 0)
1173 return ESRCH;
1032 return (ESRCH);
1174
1175 KASSERT((int) escr >= 0 && escr < P4_NESCR,
1176 ("[p4,%d] illegal ESCR value %d", __LINE__, escr));
1177
1178 /* mark ESCR row mode */
1179 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1180 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
1181 P4_ESCR_MARK_ROW_STANDALONE(escr);
1182 } else {
1183 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
1184 ("[p4,%d] escr[%d] already in use", __LINE__, escr));
1185 P4_ESCR_MARK_ROW_THREAD(escr);
1186 }
1187
1188 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr;
1189 pm->pm_md.pm_p4.pm_p4_escr = escr;
1190
1191 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
1192 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
1193
1194 /* CCCR fields */
1195 if (caps & PMC_CAP_THRESHOLD)
1196 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
1197 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
1198
1199 if (caps & PMC_CAP_EDGE)
1200 cccrvalue |= P4_CCCR_EDGE;
1201
1202 if (caps & PMC_CAP_INVERT)
1203 cccrvalue |= P4_CCCR_COMPLEMENT;
1204
1205 if (p4_system_has_htt)
1206 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
1207 P4_CCCR_ACTIVE_THREAD_MASK;
1208 else /* no HTT; thread field should be '11b' */
1209 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
1210
1211 if (caps & PMC_CAP_CASCADE)
1212 cccrvalue |= P4_CCCR_CASCADE;
1213
1214 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
1215 if (caps & PMC_CAP_INTERRUPT)
1216 cccrvalue |= P4_CCCR_OVF_PMI_T0;
1217
1218 /* ESCR fields */
1219 if (caps & PMC_CAP_QUALIFIER)
1220 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
1221 P4_ESCR_EVENT_MASK_MASK;
1222 if (caps & PMC_CAP_TAGGING)
1223 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1224 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
1225 if (caps & PMC_CAP_QUALIFIER)
1226 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1227 P4_ESCR_EVENT_MASK_MASK);
1228
1229 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
1230 tflags = 0;
1231 if (caps & PMC_CAP_SYSTEM)
1232 tflags |= P4_ESCR_T0_OS;
1233 if (caps & PMC_CAP_USER)
1234 tflags |= P4_ESCR_T0_USR;
1235 if (tflags == 0)
1236 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1237 escrvalue |= tflags;
1238
1239 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
1240 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
1241
1242 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
1243 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
1244 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
1245
1033
1034 KASSERT((int) escr >= 0 && escr < P4_NESCR,
1035 ("[p4,%d] illegal ESCR value %d", __LINE__, escr));
1036
1037 /* mark ESCR row mode */
1038 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1039 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
1040 P4_ESCR_MARK_ROW_STANDALONE(escr);
1041 } else {
1042 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
1043 ("[p4,%d] escr[%d] already in use", __LINE__, escr));
1044 P4_ESCR_MARK_ROW_THREAD(escr);
1045 }
1046
1047 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr;
1048 pm->pm_md.pm_p4.pm_p4_escr = escr;
1049
1050 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
1051 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
1052
1053 /* CCCR fields */
1054 if (caps & PMC_CAP_THRESHOLD)
1055 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
1056 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
1057
1058 if (caps & PMC_CAP_EDGE)
1059 cccrvalue |= P4_CCCR_EDGE;
1060
1061 if (caps & PMC_CAP_INVERT)
1062 cccrvalue |= P4_CCCR_COMPLEMENT;
1063
1064 if (p4_system_has_htt)
1065 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
1066 P4_CCCR_ACTIVE_THREAD_MASK;
1067 else /* no HTT; thread field should be '11b' */
1068 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
1069
1070 if (caps & PMC_CAP_CASCADE)
1071 cccrvalue |= P4_CCCR_CASCADE;
1072
1073 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
1074 if (caps & PMC_CAP_INTERRUPT)
1075 cccrvalue |= P4_CCCR_OVF_PMI_T0;
1076
1077 /* ESCR fields */
1078 if (caps & PMC_CAP_QUALIFIER)
1079 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
1080 P4_ESCR_EVENT_MASK_MASK;
1081 if (caps & PMC_CAP_TAGGING)
1082 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1083 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
1084 if (caps & PMC_CAP_QUALIFIER)
1085 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
1086 P4_ESCR_EVENT_MASK_MASK);
1087
1088 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
1089 tflags = 0;
1090 if (caps & PMC_CAP_SYSTEM)
1091 tflags |= P4_ESCR_T0_OS;
1092 if (caps & PMC_CAP_USER)
1093 tflags |= P4_ESCR_T0_USR;
1094 if (tflags == 0)
1095 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1096 escrvalue |= tflags;
1097
1098 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
1099 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
1100
1101 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
1102 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
1103 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
1104
1246 return 0;
1105 return (0);
1247}
1248
1249/*
1250 * release a PMC.
1251 */
1252
1253static int
1254p4_release_pmc(int cpu, int ri, struct pmc *pm)
1255{
1256 enum pmc_p4escr escr;
1106}
1107
1108/*
1109 * release a PMC.
1110 */
1111
1112static int
1113p4_release_pmc(int cpu, int ri, struct pmc *pm)
1114{
1115 enum pmc_p4escr escr;
1257 struct pmc_hw *phw;
1258 struct p4_cpu *pc;
1259
1116 struct p4_cpu *pc;
1117
1260 if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC)
1261 return 0;
1118 KASSERT(ri >= 0 && ri < P4_NPMCS,
1119 ("[p4,%d] illegal row-index %d", __LINE__, ri));
1262
1263 escr = pm->pm_md.pm_p4.pm_p4_escr;
1264
1265 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
1266
1267 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1120
1121 escr = pm->pm_md.pm_p4.pm_p4_escr;
1122
1123 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
1124
1125 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1268 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1269 phw = pc->pc_hwpmcs[ri];
1126 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1270
1127
1271 KASSERT(phw->phw_pmc == NULL,
1128 KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL,
1272 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
1273
1274 P4_ESCR_UNMARK_ROW_STANDALONE(escr);
1275 KASSERT(pc->pc_escrs[escr] == ri,
1276 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
1277 escr, ri));
1278 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
1279 } else
1280 P4_ESCR_UNMARK_ROW_THREAD(escr);
1281
1129 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
1130
1131 P4_ESCR_UNMARK_ROW_STANDALONE(escr);
1132 KASSERT(pc->pc_escrs[escr] == ri,
1133 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
1134 escr, ri));
1135 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
1136 } else
1137 P4_ESCR_UNMARK_ROW_THREAD(escr);
1138
1282 return 0;
1139 return (0);
1283}
1284
1285/*
1286 * Start a PMC
1287 */
1288
1289static int
1290p4_start_pmc(int cpu, int ri)
1291{
1292 int rc;
1140}
1141
1142/*
1143 * Start a PMC
1144 */
1145
1146static int
1147p4_start_pmc(int cpu, int ri)
1148{
1149 int rc;
1293 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1294 struct pmc *pm;
1295 struct p4_cpu *pc;
1150 struct pmc *pm;
1151 struct p4_cpu *pc;
1296 struct pmc_hw *phw;
1297 struct p4pmc_descr *pd;
1152 struct p4pmc_descr *pd;
1153 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1298
1299 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1300 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1301 KASSERT(ri >= 0 && ri < P4_NPMCS,
1302 ("[p4,%d] illegal row-index %d", __LINE__, ri));
1303
1154
1155 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1156 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1157 KASSERT(ri >= 0 && ri < P4_NPMCS,
1158 ("[p4,%d] illegal row-index %d", __LINE__, ri));
1159
1304 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1305 phw = pc->pc_hwpmcs[ri];
1306 pm = phw->phw_pmc;
1307 pd = &p4_pmcdesc[ri];
1160 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1161 pm = pc->pc_p4pmcs[ri].phw_pmc;
1162 pd = &p4_pmcdesc[ri];
1308
1309 KASSERT(pm != NULL,
1163
1164 KASSERT(pm != NULL,
1310 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__,
1311 cpu, ri));
1165 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri));
1312
1313 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
1314
1166
1167 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
1168
1315 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */
1316 return 0;
1317
1318 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1319 ("[p4,%d] wrong PMC class %d", __LINE__,
1320 pd->pm_descr.pd_class));
1321
1322 /* retrieve the desired CCCR/ESCR values from the PMC */
1323 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
1324 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
1325 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
1326
1327 /* extract and zero the logical processor selection bits */
1328 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
1329 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1330 cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
1331 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1332
1333 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */
1334 cccrtbits <<= 1;
1335 escrtbits >>= 2;
1336 }
1337
1338 /* start system mode PMCs directly */
1339 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1340 wrmsr(escrmsr, escrvalue | escrtbits);
1341 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
1342 return 0;
1343 }
1344
1345 /*
1346 * Thread mode PMCs
1347 *
1348 * On HTT machines, the same PMC could be scheduled on the
1349 * same physical CPU twice (once for each logical CPU), for
1350 * example, if two threads of a multi-threaded process get
1351 * scheduled on the same CPU.
1352 *
1353 */
1354
1355 mtx_lock_spin(&pc->pc_mtx);
1356
1357 rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1358 KASSERT(rc == 0 || rc == 1,
1359 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1360 rc));
1361
1362 if (rc == 0) { /* 1st CPU and the non-HTT case */
1363
1364 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
1365 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
1366 cpu, ri, pd->pm_cccr_msr));
1367
1368 /* write out the low 40 bits of the saved value to hardware */
1369 wrmsr(pd->pm_pmc_msr,
1370 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
1371
1372 } else if (rc == 1) { /* 2nd CPU */
1373
1374 /*
1375 * Stop the PMC and retrieve the CCCR and ESCR values
1376 * from their MSRs, and turn on the additional T[0/1]
1377 * bits for the 2nd CPU.
1378 */
1379
1380 cccrvalue = rdmsr(pd->pm_cccr_msr);
1381 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1382
1383 /* check that the configuration bits read back match the PMC */
1384 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
1385 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
1386 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
1387 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
1388 cccrvalue & P4_CCCR_Tx_MASK,
1389 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
1390 KASSERT(cccrvalue & P4_CCCR_ENABLE,
1391 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
1392 __LINE__, rc, cpu, ri));
1393 KASSERT((cccrvalue & cccrtbits) == 0,
1394 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
1395 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
1396 cccrvalue, cccrtbits));
1397
1398 escrvalue = rdmsr(escrmsr);
1399
1400 KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
1401 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
1402 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
1403 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
1404 escrvalue & P4_ESCR_Tx_MASK,
1405 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
1406 KASSERT((escrvalue & escrtbits) == 0,
1407 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
1408 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
1409 rc, cpu, ri, escrmsr, escrvalue, escrtbits));
1410 }
1411
1412 /* Enable the correct bits for this CPU. */
1413 escrvalue |= escrtbits;
1414 cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
1415
1416 /* Save HW value at the time of starting hardware */
1417 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
1418
1419 /* Program the ESCR and CCCR and start the PMC */
1420 wrmsr(escrmsr, escrvalue);
1421 wrmsr(pd->pm_cccr_msr, cccrvalue);
1422
1423 ++rc;
1424 P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1425
1426 mtx_unlock_spin(&pc->pc_mtx);
1427
1428 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
1429 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
1430 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
1431 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
1432
1169 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
1170 ("[p4,%d] wrong PMC class %d", __LINE__,
1171 pd->pm_descr.pd_class));
1172
1173 /* retrieve the desired CCCR/ESCR values from the PMC */
1174 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
1175 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
1176 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
1177
1178 /* extract and zero the logical processor selection bits */
1179 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
1180 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1181 cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
1182 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
1183
1184 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */
1185 cccrtbits <<= 1;
1186 escrtbits >>= 2;
1187 }
1188
1189 /* start system mode PMCs directly */
1190 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1191 wrmsr(escrmsr, escrvalue | escrtbits);
1192 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
1193 return 0;
1194 }
1195
1196 /*
1197 * Thread mode PMCs
1198 *
1199 * On HTT machines, the same PMC could be scheduled on the
1200 * same physical CPU twice (once for each logical CPU), for
1201 * example, if two threads of a multi-threaded process get
1202 * scheduled on the same CPU.
1203 *
1204 */
1205
1206 mtx_lock_spin(&pc->pc_mtx);
1207
1208 rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1209 KASSERT(rc == 0 || rc == 1,
1210 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1211 rc));
1212
1213 if (rc == 0) { /* 1st CPU and the non-HTT case */
1214
1215 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
1216 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
1217 cpu, ri, pd->pm_cccr_msr));
1218
1219 /* write out the low 40 bits of the saved value to hardware */
1220 wrmsr(pd->pm_pmc_msr,
1221 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
1222
1223 } else if (rc == 1) { /* 2nd CPU */
1224
1225 /*
1226 * Stop the PMC and retrieve the CCCR and ESCR values
1227 * from their MSRs, and turn on the additional T[0/1]
1228 * bits for the 2nd CPU.
1229 */
1230
1231 cccrvalue = rdmsr(pd->pm_cccr_msr);
1232 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1233
1234 /* check that the configuration bits read back match the PMC */
1235 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
1236 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
1237 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
1238 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
1239 cccrvalue & P4_CCCR_Tx_MASK,
1240 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
1241 KASSERT(cccrvalue & P4_CCCR_ENABLE,
1242 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
1243 __LINE__, rc, cpu, ri));
1244 KASSERT((cccrvalue & cccrtbits) == 0,
1245 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
1246 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
1247 cccrvalue, cccrtbits));
1248
1249 escrvalue = rdmsr(escrmsr);
1250
1251 KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
1252 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
1253 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
1254 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
1255 escrvalue & P4_ESCR_Tx_MASK,
1256 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
1257 KASSERT((escrvalue & escrtbits) == 0,
1258 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
1259 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
1260 rc, cpu, ri, escrmsr, escrvalue, escrtbits));
1261 }
1262
1263 /* Enable the correct bits for this CPU. */
1264 escrvalue |= escrtbits;
1265 cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
1266
1267 /* Save HW value at the time of starting hardware */
1268 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
1269
1270 /* Program the ESCR and CCCR and start the PMC */
1271 wrmsr(escrmsr, escrvalue);
1272 wrmsr(pd->pm_cccr_msr, cccrvalue);
1273
1274 ++rc;
1275 P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1276
1277 mtx_unlock_spin(&pc->pc_mtx);
1278
1279 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
1280 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
1281 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
1282 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
1283
1433 return 0;
1284 return (0);
1434}
1435
1436/*
1437 * Stop a PMC.
1438 */
1439
1440static int
1441p4_stop_pmc(int cpu, int ri)
1442{
1443 int rc;
1444 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1445 struct pmc *pm;
1446 struct p4_cpu *pc;
1285}
1286
1287/*
1288 * Stop a PMC.
1289 */
1290
1291static int
1292p4_stop_pmc(int cpu, int ri)
1293{
1294 int rc;
1295 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
1296 struct pmc *pm;
1297 struct p4_cpu *pc;
1447 struct pmc_hw *phw;
1448 struct p4pmc_descr *pd;
1449 pmc_value_t tmp;
1450
1451 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1452 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1453 KASSERT(ri >= 0 && ri < P4_NPMCS,
1454 ("[p4,%d] illegal row index %d", __LINE__, ri));
1455
1298 struct p4pmc_descr *pd;
1299 pmc_value_t tmp;
1300
1301 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1302 ("[p4,%d] illegal CPU value %d", __LINE__, cpu));
1303 KASSERT(ri >= 0 && ri < P4_NPMCS,
1304 ("[p4,%d] illegal row index %d", __LINE__, ri));
1305
1456 pd = &p4_pmcdesc[ri];
1306 pd = &p4_pmcdesc[ri];
1307 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1308 pm = pc->pc_p4pmcs[ri].phw_pmc;
1457
1309
1458 if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
1459 return 0;
1460
1461 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1462 phw = pc->pc_hwpmcs[ri];
1463
1464 KASSERT(phw != NULL,
1465 ("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri));
1466
1467 pm = phw->phw_pmc;
1468
1469 KASSERT(pm != NULL,
1470 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
1471
1472 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
1473
1474 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1475 wrmsr(pd->pm_cccr_msr,
1476 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
1310 KASSERT(pm != NULL,
1311 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
1312
1313 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
1314
1315 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
1316 wrmsr(pd->pm_cccr_msr,
1317 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
1477 return 0;
1318 return (0);
1478 }
1479
1480 /*
1481 * Thread mode PMCs.
1482 *
1483 * On HTT machines, this PMC may be in use by two threads
1484 * running on two logical CPUS. Thus we look at the
1485 * 'runcount' field and only turn off the appropriate TO/T1
1486 * bits (and keep the PMC running) if two logical CPUs were
1487 * using the PMC.
1488 *
1489 */
1490
1491 /* bits to mask */
1492 cccrtbits = P4_CCCR_OVF_PMI_T0;
1493 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
1494 if (P4_CPU_IS_HTT_SECONDARY(cpu)) {
1495 cccrtbits <<= 1;
1496 escrtbits >>= 2;
1497 }
1498
1499 mtx_lock_spin(&pc->pc_mtx);
1500
1501 rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1502
1503 KASSERT(rc == 2 || rc == 1,
1504 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1505 rc));
1506
1507 --rc;
1508
1509 P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1510
1511 /* Stop this PMC */
1512 cccrvalue = rdmsr(pd->pm_cccr_msr);
1513 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1514
1515 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
1516 escrvalue = rdmsr(escrmsr);
1517
1518 /* The current CPU should be running on this PMC */
1519 KASSERT(escrvalue & escrtbits,
1520 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
1521 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
1522 escrvalue, escrtbits));
1523 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
1524 (cccrvalue & cccrtbits),
1525 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
1526 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
1527
1528 /* get the current hardware reading */
1529 tmp = rdmsr(pd->pm_pmc_msr);
1530
1531 if (rc == 1) { /* need to keep the PMC running */
1532 escrvalue &= ~escrtbits;
1533 cccrvalue &= ~cccrtbits;
1534 wrmsr(escrmsr, escrvalue);
1535 wrmsr(pd->pm_cccr_msr, cccrvalue);
1536 }
1537
1538 mtx_unlock_spin(&pc->pc_mtx);
1539
1540 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
1541 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
1542 escrvalue, cccrvalue, tmp);
1543
1544 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
1545 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
1546 else
1547 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
1548
1549 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
1550
1551 return 0;
1552}
1553
1554/*
1555 * Handle an interrupt.
1556 *
1557 * The hardware sets the CCCR_OVF whenever a counter overflow occurs,
1558 * so the handler examines all the 18 CCCR registers, processing the
1559 * counters that have overflowed.
1560 *
1561 * On HTT machines, the CCCR register is shared and will interrupt
1562 * both logical processors if so configured. Thus multiple logical
1563 * CPUs could enter the NMI service routine at the same time. These
1564 * will get serialized using a per-cpu spinlock dedicated for use in
1565 * the NMI handler.
1566 */
1567
1568static int
1569p4_intr(int cpu, struct trapframe *tf)
1570{
1571 uint32_t cccrval, ovf_mask, ovf_partner;
1319 }
1320
1321 /*
1322 * Thread mode PMCs.
1323 *
1324 * On HTT machines, this PMC may be in use by two threads
1325 * running on two logical CPUS. Thus we look at the
1326 * 'runcount' field and only turn off the appropriate TO/T1
1327 * bits (and keep the PMC running) if two logical CPUs were
1328 * using the PMC.
1329 *
1330 */
1331
1332 /* bits to mask */
1333 cccrtbits = P4_CCCR_OVF_PMI_T0;
1334 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
1335 if (P4_CPU_IS_HTT_SECONDARY(cpu)) {
1336 cccrtbits <<= 1;
1337 escrtbits >>= 2;
1338 }
1339
1340 mtx_lock_spin(&pc->pc_mtx);
1341
1342 rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
1343
1344 KASSERT(rc == 2 || rc == 1,
1345 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
1346 rc));
1347
1348 --rc;
1349
1350 P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
1351
1352 /* Stop this PMC */
1353 cccrvalue = rdmsr(pd->pm_cccr_msr);
1354 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
1355
1356 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
1357 escrvalue = rdmsr(escrmsr);
1358
1359 /* The current CPU should be running on this PMC */
1360 KASSERT(escrvalue & escrtbits,
1361 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
1362 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
1363 escrvalue, escrtbits));
1364 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
1365 (cccrvalue & cccrtbits),
1366 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
1367 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
1368
1369 /* get the current hardware reading */
1370 tmp = rdmsr(pd->pm_pmc_msr);
1371
1372 if (rc == 1) { /* need to keep the PMC running */
1373 escrvalue &= ~escrtbits;
1374 cccrvalue &= ~cccrtbits;
1375 wrmsr(escrmsr, escrvalue);
1376 wrmsr(pd->pm_cccr_msr, cccrvalue);
1377 }
1378
1379 mtx_unlock_spin(&pc->pc_mtx);
1380
1381 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
1382 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
1383 escrvalue, cccrvalue, tmp);
1384
1385 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
1386 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
1387 else
1388 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
1389
1390 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
1391
1392 return 0;
1393}
1394
1395/*
1396 * Handle an interrupt.
1397 *
1398 * The hardware sets the CCCR_OVF whenever a counter overflow occurs,
1399 * so the handler examines all the 18 CCCR registers, processing the
1400 * counters that have overflowed.
1401 *
1402 * On HTT machines, the CCCR register is shared and will interrupt
1403 * both logical processors if so configured. Thus multiple logical
1404 * CPUs could enter the NMI service routine at the same time. These
1405 * will get serialized using a per-cpu spinlock dedicated for use in
1406 * the NMI handler.
1407 */
1408
1409static int
1410p4_intr(int cpu, struct trapframe *tf)
1411{
1412 uint32_t cccrval, ovf_mask, ovf_partner;
1572 int i, did_interrupt, error, ri;
1573 struct pmc_hw *phw;
1413 int did_interrupt, error, ri;
1574 struct p4_cpu *pc;
1575 struct pmc *pm;
1576 pmc_value_t v;
1577
1578 PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1579 TRAPF_USERMODE(tf));
1580
1414 struct p4_cpu *pc;
1415 struct pmc *pm;
1416 pmc_value_t v;
1417
1418 PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
1419 TRAPF_USERMODE(tf));
1420
1581 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1421 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
1582
1583 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1584 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
1585 ovf_mask |= P4_CCCR_OVF;
1586 if (p4_system_has_htt)
1587 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1588 P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
1589 else
1590 ovf_partner = 0;
1591 did_interrupt = 0;
1592
1593 if (p4_system_has_htt)
1594 P4_PCPU_ACQ_INTR_SPINLOCK(pc);
1595
1596 /*
1597 * Loop through all CCCRs, looking for ones that have
1598 * interrupted this CPU.
1599 */
1422
1423 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1424 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
1425 ovf_mask |= P4_CCCR_OVF;
1426 if (p4_system_has_htt)
1427 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
1428 P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
1429 else
1430 ovf_partner = 0;
1431 did_interrupt = 0;
1432
1433 if (p4_system_has_htt)
1434 P4_PCPU_ACQ_INTR_SPINLOCK(pc);
1435
1436 /*
1437 * Loop through all CCCRs, looking for ones that have
1438 * interrupted this CPU.
1439 */
1600 for (i = 0; i < P4_NPMCS-1; i++) {
1440 for (ri = 0; ri < P4_NPMCS; ri++) {
1601
1441
1602 ri = i + 1; /* row index */
1603
1604 /*
1605 * Check if our partner logical CPU has already marked
1606 * this PMC has having interrupted it. If so, reset
1607 * the flag and process the interrupt, but leave the
1608 * hardware alone.
1609 */
1610 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
1611 P4_PCPU_SET_INTRFLAG(pc,ri,0);
1612 did_interrupt = 1;
1613
1614 /*
1615 * Ignore de-configured or stopped PMCs.
1616 * Ignore PMCs not in sampling mode.
1617 */
1442 /*
1443 * Check if our partner logical CPU has already marked
1444 * this PMC has having interrupted it. If so, reset
1445 * the flag and process the interrupt, but leave the
1446 * hardware alone.
1447 */
1448 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
1449 P4_PCPU_SET_INTRFLAG(pc,ri,0);
1450 did_interrupt = 1;
1451
1452 /*
1453 * Ignore de-configured or stopped PMCs.
1454 * Ignore PMCs not in sampling mode.
1455 */
1618 phw = pc->pc_hwpmcs[ri];
1619 pm = phw->phw_pmc;
1456 pm = pc->pc_p4pmcs[ri].phw_pmc;
1620 if (pm == NULL ||
1621 pm->pm_state != PMC_STATE_RUNNING ||
1622 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1623 continue;
1624 }
1625 (void) pmc_process_interrupt(cpu, pm, tf,
1626 TRAPF_USERMODE(tf));
1627 continue;
1628 }
1629
1630 /*
1631 * Fresh interrupt. Look for the CCCR_OVF bit
1632 * and the OVF_Tx bit for this logical
1633 * processor being set.
1634 */
1457 if (pm == NULL ||
1458 pm->pm_state != PMC_STATE_RUNNING ||
1459 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1460 continue;
1461 }
1462 (void) pmc_process_interrupt(cpu, pm, tf,
1463 TRAPF_USERMODE(tf));
1464 continue;
1465 }
1466
1467 /*
1468 * Fresh interrupt. Look for the CCCR_OVF bit
1469 * and the OVF_Tx bit for this logical
1470 * processor being set.
1471 */
1635 cccrval = rdmsr(P4_CCCR_MSR_FIRST + i);
1472 cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri);
1636
1637 if ((cccrval & ovf_mask) != ovf_mask)
1638 continue;
1639
1640 /*
1641 * If the other logical CPU would also have been
1642 * interrupted due to the PMC being shared, record
1643 * this fact in the per-cpu saved interrupt flag
1644 * bitmask.
1645 */
1646 if (p4_system_has_htt && (cccrval & ovf_partner))
1647 P4_PCPU_SET_INTRFLAG(pc, ri, 1);
1648
1473
1474 if ((cccrval & ovf_mask) != ovf_mask)
1475 continue;
1476
1477 /*
1478 * If the other logical CPU would also have been
1479 * interrupted due to the PMC being shared, record
1480 * this fact in the per-cpu saved interrupt flag
1481 * bitmask.
1482 */
1483 if (p4_system_has_htt && (cccrval & ovf_partner))
1484 P4_PCPU_SET_INTRFLAG(pc, ri, 1);
1485
1649 v = rdmsr(P4_PERFCTR_MSR_FIRST + i);
1486 v = rdmsr(P4_PERFCTR_MSR_FIRST + ri);
1650
1651 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
1652
1653 /* Stop the counter, and reset the overflow bit */
1654 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
1487
1488 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
1489
1490 /* Stop the counter, and reset the overflow bit */
1491 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
1655 wrmsr(P4_CCCR_MSR_FIRST + i, cccrval);
1492 wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval);
1656
1657 did_interrupt = 1;
1658
1659 /*
1660 * Ignore de-configured or stopped PMCs. Ignore PMCs
1661 * not in sampling mode.
1662 */
1493
1494 did_interrupt = 1;
1495
1496 /*
1497 * Ignore de-configured or stopped PMCs. Ignore PMCs
1498 * not in sampling mode.
1499 */
1663 phw = pc->pc_hwpmcs[ri];
1664 pm = phw->phw_pmc;
1500 pm = pc->pc_p4pmcs[ri].phw_pmc;
1665
1666 if (pm == NULL ||
1667 pm->pm_state != PMC_STATE_RUNNING ||
1668 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1669 continue;
1670 }
1671
1672 /*
1673 * Process the interrupt. Re-enable the PMC if
1674 * processing was successful.
1675 */
1676 error = pmc_process_interrupt(cpu, pm, tf,
1677 TRAPF_USERMODE(tf));
1678
1679 /*
1680 * Only the first processor executing the NMI handler
1681 * in a HTT pair will restart a PMC, and that too
1682 * only if there were no errors.
1683 */
1684 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
1685 pm->pm_sc.pm_reloadcount);
1501
1502 if (pm == NULL ||
1503 pm->pm_state != PMC_STATE_RUNNING ||
1504 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
1505 continue;
1506 }
1507
1508 /*
1509 * Process the interrupt. Re-enable the PMC if
1510 * processing was successful.
1511 */
1512 error = pmc_process_interrupt(cpu, pm, tf,
1513 TRAPF_USERMODE(tf));
1514
1515 /*
1516 * Only the first processor executing the NMI handler
1517 * in a HTT pair will restart a PMC, and that too
1518 * only if there were no errors.
1519 */
1520 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
1521 pm->pm_sc.pm_reloadcount);
1686 wrmsr(P4_PERFCTR_MSR_FIRST + i, v);
1522 wrmsr(P4_PERFCTR_MSR_FIRST + ri, v);
1687 if (error == 0)
1523 if (error == 0)
1688 wrmsr(P4_CCCR_MSR_FIRST + i,
1524 wrmsr(P4_CCCR_MSR_FIRST + ri,
1689 cccrval | P4_CCCR_ENABLE);
1690 }
1691
1692 /* allow the other CPU to proceed */
1693 if (p4_system_has_htt)
1694 P4_PCPU_REL_INTR_SPINLOCK(pc);
1695
1696 /*
1697 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
1698 * masked when a PMC interrupts the CPU. We need to unmask
1699 * the interrupt source explicitly.
1700 */
1701
1702 if (did_interrupt)
1703 pmc_x86_lapic_enable_pmc_interrupt();
1704
1705 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
1706 &pmc_stats.pm_intr_ignored, 1);
1707
1708 return (did_interrupt);
1709}
1710
1711/*
1712 * Describe a CPU's PMC state.
1713 */
1714
1715static int
1716p4_describe(int cpu, int ri, struct pmc_info *pi,
1717 struct pmc **ppmc)
1718{
1719 int error;
1720 size_t copied;
1525 cccrval | P4_CCCR_ENABLE);
1526 }
1527
1528 /* allow the other CPU to proceed */
1529 if (p4_system_has_htt)
1530 P4_PCPU_REL_INTR_SPINLOCK(pc);
1531
1532 /*
1533 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
1534 * masked when a PMC interrupts the CPU. We need to unmask
1535 * the interrupt source explicitly.
1536 */
1537
1538 if (did_interrupt)
1539 pmc_x86_lapic_enable_pmc_interrupt();
1540
1541 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
1542 &pmc_stats.pm_intr_ignored, 1);
1543
1544 return (did_interrupt);
1545}
1546
1547/*
1548 * Describe a CPU's PMC state.
1549 */
1550
1551static int
1552p4_describe(int cpu, int ri, struct pmc_info *pi,
1553 struct pmc **ppmc)
1554{
1555 int error;
1556 size_t copied;
1721 struct pmc_hw *phw;
1722 const struct p4pmc_descr *pd;
1723
1724 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1725 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1726 KASSERT(ri >= 0 && ri < P4_NPMCS,
1727 ("[p4,%d] row-index %d out of range", __LINE__, ri));
1728
1729 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
1730
1731 if (P4_CPU_IS_HTT_SECONDARY(cpu))
1557 const struct p4pmc_descr *pd;
1558
1559 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
1560 ("[p4,%d] illegal CPU %d", __LINE__, cpu));
1561 KASSERT(ri >= 0 && ri < P4_NPMCS,
1562 ("[p4,%d] row-index %d out of range", __LINE__, ri));
1563
1564 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
1565
1566 if (P4_CPU_IS_HTT_SECONDARY(cpu))
1732 return EINVAL;
1567 return (EINVAL);
1733
1568
1734 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
1735 pd = &p4_pmcdesc[ri];
1736
1737 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
1569 pd = &p4_pmcdesc[ri];
1570
1571 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
1738 PMC_NAME_MAX, &copied)) != 0)
1739 return error;
1572 PMC_NAME_MAX, &copied)) != 0)
1573 return (error);
1740
1741 pi->pm_class = pd->pm_descr.pd_class;
1742
1574
1575 pi->pm_class = pd->pm_descr.pd_class;
1576
1743 if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
1577 if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) {
1744 pi->pm_enabled = TRUE;
1578 pi->pm_enabled = TRUE;
1745 *ppmc = phw->phw_pmc;
1579 *ppmc = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc;
1746 } else {
1747 pi->pm_enabled = FALSE;
1748 *ppmc = NULL;
1749 }
1750
1580 } else {
1581 pi->pm_enabled = FALSE;
1582 *ppmc = NULL;
1583 }
1584
1751 return 0;
1585 return (0);
1752}
1753
1754/*
1755 * Get MSR# for use with RDPMC.
1756 */
1757
1758static int
1759p4_get_msr(int ri, uint32_t *msr)
1760{
1761 KASSERT(ri >= 0 && ri < P4_NPMCS,
1762 ("[p4,%d] ri %d out of range", __LINE__, ri));
1763
1764 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
1765
1766 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
1767
1768 return 0;
1769}
1770
1771
1772int
1586}
1587
1588/*
1589 * Get MSR# for use with RDPMC.
1590 */
1591
1592static int
1593p4_get_msr(int ri, uint32_t *msr)
1594{
1595 KASSERT(ri >= 0 && ri < P4_NPMCS,
1596 ("[p4,%d] ri %d out of range", __LINE__, ri));
1597
1598 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
1599
1600 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
1601
1602 return 0;
1603}
1604
1605
1606int
1773pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
1607pmc_p4_initialize(struct pmc_mdep *md, int ncpus)
1774{
1608{
1609 struct pmc_classdep *pcd;
1775 struct p4_event_descr *pe;
1776
1610 struct p4_event_descr *pe;
1611
1612 KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__));
1777 KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
1778 ("[p4,%d] Initializing non-intel processor", __LINE__));
1779
1780 PMCDBG(MDP,INI,1, "%s", "p4-initialize");
1781
1613 KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
1614 ("[p4,%d] Initializing non-intel processor", __LINE__));
1615
1616 PMCDBG(MDP,INI,1, "%s", "p4-initialize");
1617
1782 switch (pmc_mdep->pmd_cputype) {
1618 /* Allocate space for pointers to per-cpu descriptors. */
1619 p4_pcpu = malloc(sizeof(struct p4_cpu **) * ncpus, M_PMC,
1620 M_ZERO|M_WAITOK);
1621
1622 /* Fill in the class dependent descriptor. */
1623 pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4];
1624
1625 switch (md->pmd_cputype) {
1783 case PMC_CPU_INTEL_PIV:
1784
1626 case PMC_CPU_INTEL_PIV:
1627
1785 pmc_mdep->pmd_npmc = P4_NPMCS;
1786 pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4;
1787 pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS;
1788 pmc_mdep->pmd_classes[1].pm_width = 40;
1789 pmc_mdep->pmd_nclasspmcs[1] = 18;
1628 pcd->pcd_caps = P4_PMC_CAPS;
1629 pcd->pcd_class = PMC_CLASS_P4;
1630 pcd->pcd_num = P4_NPMCS;
1631 pcd->pcd_ri = md->pmd_npmc;
1632 pcd->pcd_width = 40;
1790
1633
1791 pmc_mdep->pmd_init = p4_init;
1792 pmc_mdep->pmd_cleanup = p4_cleanup;
1793 pmc_mdep->pmd_switch_in = p4_switch_in;
1794 pmc_mdep->pmd_switch_out = p4_switch_out;
1795 pmc_mdep->pmd_read_pmc = p4_read_pmc;
1796 pmc_mdep->pmd_write_pmc = p4_write_pmc;
1797 pmc_mdep->pmd_config_pmc = p4_config_pmc;
1798 pmc_mdep->pmd_get_config = p4_get_config;
1799 pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc;
1800 pmc_mdep->pmd_release_pmc = p4_release_pmc;
1801 pmc_mdep->pmd_start_pmc = p4_start_pmc;
1802 pmc_mdep->pmd_stop_pmc = p4_stop_pmc;
1803 pmc_mdep->pmd_intr = p4_intr;
1804 pmc_mdep->pmd_describe = p4_describe;
1805 pmc_mdep->pmd_get_msr = p4_get_msr; /* i386 */
1634 pcd->pcd_allocate_pmc = p4_allocate_pmc;
1635 pcd->pcd_config_pmc = p4_config_pmc;
1636 pcd->pcd_describe = p4_describe;
1637 pcd->pcd_get_config = p4_get_config;
1638 pcd->pcd_get_msr = p4_get_msr;
1639 pcd->pcd_pcpu_fini = p4_pcpu_fini;
1640 pcd->pcd_pcpu_init = p4_pcpu_init;
1641 pcd->pcd_read_pmc = p4_read_pmc;
1642 pcd->pcd_release_pmc = p4_release_pmc;
1643 pcd->pcd_start_pmc = p4_start_pmc;
1644 pcd->pcd_stop_pmc = p4_stop_pmc;
1645 pcd->pcd_write_pmc = p4_write_pmc;
1806
1646
1807 /* model specific munging */
1647 md->pmd_pcpu_fini = NULL;
1648 md->pmd_pcpu_init = NULL;
1649 md->pmd_intr = p4_intr;
1650 md->pmd_npmc += P4_NPMCS;
1651
1652 /* model specific configuration */
1808 if ((cpu_id & 0xFFF) < 0xF27) {
1809
1810 /*
1811 * On P4 and Xeon with CPUID < (Family 15,
1812 * Model 2, Stepping 7), only one ESCR is
1813 * available for the IOQ_ALLOCATION event.
1814 */
1815
1816 pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
1817 pe->pm_escrs[1] = P4_ESCR_NONE;
1818 }
1819
1820 break;
1821
1822 default:
1823 KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
1824 return ENOSYS;
1825 }
1826
1653 if ((cpu_id & 0xFFF) < 0xF27) {
1654
1655 /*
1656 * On P4 and Xeon with CPUID < (Family 15,
1657 * Model 2, Stepping 7), only one ESCR is
1658 * available for the IOQ_ALLOCATION event.
1659 */
1660
1661 pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
1662 pe->pm_escrs[1] = P4_ESCR_NONE;
1663 }
1664
1665 break;
1666
1667 default:
1668 KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
1669 return ENOSYS;
1670 }
1671
1827 return 0;
1672 return (0);
1828}
1673}
1674
1675void
1676pmc_p4_finalize(struct pmc_mdep *md)
1677{
1678#if defined(INVARIANTS)
1679 int i, ncpus;
1680#endif
1681
1682 KASSERT(p4_pcpu != NULL,
1683 ("[p4,%d] NULL p4_pcpu", __LINE__));
1684
1685#if defined(INVARIANTS)
1686 ncpus = pmc_cpu_max();
1687 for (i = 0; i < ncpus; i++)
1688 KASSERT(p4_pcpu[i] == NULL, ("[p4,%d] non-null pcpu %d",
1689 __LINE__, i));
1690#endif
1691
1692 free(p4_pcpu, M_PMC);
1693 p4_pcpu = NULL;
1694}