1/*- 2 * Copyright (c) 2003-2007 Joseph Koshy 3 * Copyright (c) 2007 The FreeBSD Foundation 4 * All rights reserved. 5 * 6 * Portions of this software were developed by A. Joseph Koshy under 7 * sponsorship from the FreeBSD Foundation and Google, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h> 32__FBSDID("$FreeBSD: stable/10/sys/dev/hwpmc/hwpmc_piv.c 322543 2017-08-15 14:21:44Z kib $"); 33 34#include <sys/param.h> 35#include <sys/bus.h> 36#include <sys/lock.h> 37#include <sys/mutex.h> 38#include <sys/pmc.h> 39#include <sys/pmckern.h> 40#include <sys/smp.h> 41#include <sys/systm.h> 42#include <machine/intr_machdep.h> 43#if (__FreeBSD_version >= 1100000) 44#include <x86/apicvar.h> 45#else 46#include <machine/apicvar.h> 47#endif 48#include <machine/cpu.h> 49#include <machine/cpufunc.h> 50#include <machine/cputypes.h> 51#include <machine/md_var.h> 52#include <machine/specialreg.h> 53 54/* 55 * PENTIUM 4 SUPPORT 56 * 57 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs 58 * respectively. Each PMC comprises of two model specific registers: 59 * a counter configuration control register (CCCR) and a counter 60 * register that holds the actual event counts. 61 * 62 * Configuring an event requires the use of one of 45 event selection 63 * control registers (ESCR). Events are associated with specific 64 * ESCRs. Each PMC group has a set of ESCRs it can use. 65 * 66 * - The BPU counter group (4 PMCs) can use the 16 ESCRs: 67 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1}, 68 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}. 69 * 70 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1}, 71 * TC_ESCR{0,1}, TBPU_ESCR{0,1}. 72 * 73 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs: 74 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1}, 75 * DAC_ESCR{0,1}. 76 * 77 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1}, 78 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}. 79 * 80 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if 81 * present) of a counter group. Odd-numbers ESCRs can be used with 82 * counters 2, 3 and 5 (if present) of a counter group. The 83 * 'p4_escrs[]' table describes these restrictions in a form that 84 * function 'p4_allocate()' uses for making allocation decisions. 85 * 86 * SYSTEM-MODE AND THREAD-MODE ALLOCATION 87 * 88 * In addition to remembering the state of PMC rows 89 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the 90 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC 91 * on a CPU we cannot allocate this to a thread-mode PMC. On a 92 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each 93 * CPU is tracked by the pc_escrs[] array. 94 * 95 * Each system-mode PMC that is using an ESCR records its row-index in 96 * the appropriate entry and system-mode allocation attempts check 97 * that an ESCR is available using this array. Process-mode PMCs do 98 * not use the pc_escrs[] array, since ESCR row itself would have been 99 * marked as in 'THREAD' mode. 100 * 101 * HYPERTHREADING SUPPORT 102 * 103 * When HTT is enabled, the FreeBSD kernel treats the two 'logical' 104 * cpus as independent CPUs and can schedule kernel threads on them 105 * independently. However, the two logical CPUs share the same set of 106 * PMC resources. We need to ensure that: 107 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly, 108 * and, 109 * - Threads of multi-threaded processes that get scheduled on the same 110 * physical CPU are handled correctly. 111 * 112 * HTT Detection 113 * 114 * Not all HTT capable systems will have HTT enabled. We detect the 115 * presence of HTT by detecting if 'p4_init()' was called for a secondary 116 * CPU in a HTT pair. 117 * 118 * Note that hwpmc(4) cannot currently deal with a change in HTT status once 119 * loaded. 120 * 121 * Handling HTT READ / WRITE / START / STOP 122 * 123 * PMC resources are shared across the CPUs in an HTT pair. We 124 * designate the lower numbered CPU in a HTT pair as the 'primary' 125 * CPU. In each primary CPU's state we keep track of a 'runcount' 126 * which reflects the number of PMC-using processes that have been 127 * scheduled on its secondary CPU. Process-mode PMC operations will 128 * actually 'start' or 'stop' hardware only if these are the first or 129 * last processes respectively to use the hardware. PMC values 130 * written by a 'write' operation are saved and are transferred to 131 * hardware at PMC 'start' time if the runcount is 0. If the runcount 132 * is greater than 0 at the time of a 'start' operation, we keep track 133 * of the actual hardware value at the time of the 'start' operation 134 * and use this to adjust the final readings at PMC 'stop' or 'read' 135 * time. 136 * 137 * Execution sequences: 138 * 139 * Case 1: CPUx +...- (no overlap) 140 * CPUy +...- 141 * RC 0 1 0 1 0 142 * 143 * Case 2: CPUx +........- (partial overlap) 144 * CPUy +........- 145 * RC 0 1 2 1 0 146 * 147 * Case 3: CPUx +..............- (fully overlapped) 148 * CPUy +.....- 149 * RC 0 1 2 1 0 150 * 151 * Key: 152 * 'CPU[xy]' : one of the two logical processors on a HTT CPU. 153 * 'RC' : run count (#threads per physical core). 154 * '+' : point in time when a thread is put on a CPU. 155 * '-' : point in time where a thread is taken off a CPU. 156 * 157 * Handling HTT CONFIG 158 * 159 * Different processes attached to the same PMC may get scheduled on 160 * the two logical processors in the package. We keep track of config 161 * and de-config operations using the CFGFLAGS fields of the per-physical 162 * cpu state. 163 */ 164 165#define P4_PMCS() \ 166 P4_PMC(BPU_COUNTER0) \ 167 P4_PMC(BPU_COUNTER1) \ 168 P4_PMC(BPU_COUNTER2) \ 169 P4_PMC(BPU_COUNTER3) \ 170 P4_PMC(MS_COUNTER0) \ 171 P4_PMC(MS_COUNTER1) \ 172 P4_PMC(MS_COUNTER2) \ 173 P4_PMC(MS_COUNTER3) \ 174 P4_PMC(FLAME_COUNTER0) \ 175 P4_PMC(FLAME_COUNTER1) \ 176 P4_PMC(FLAME_COUNTER2) \ 177 P4_PMC(FLAME_COUNTER3) \ 178 P4_PMC(IQ_COUNTER0) \ 179 P4_PMC(IQ_COUNTER1) \ 180 P4_PMC(IQ_COUNTER2) \ 181 P4_PMC(IQ_COUNTER3) \ 182 P4_PMC(IQ_COUNTER4) \ 183 P4_PMC(IQ_COUNTER5) \ 184 P4_PMC(NONE) 185 186enum pmc_p4pmc { 187#undef P4_PMC 188#define P4_PMC(N) P4_PMC_##N , 189 P4_PMCS() 190}; 191 192/* 193 * P4 ESCR descriptors 194 */ 195 196#define P4_ESCRS() \ 197 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 198 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 199 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 200 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 201 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 202 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 203 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 204 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 205 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 206 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 207 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 208 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 209 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 210 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 211 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 212 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 213 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 214 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 215 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 216 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 217 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 218 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 219 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 220 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 221 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 222 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 223 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 224 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \ 225 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 226 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 227 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \ 228 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \ 229 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \ 230 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \ 231 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \ 232 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \ 233 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \ 234 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 235 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 236 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 237 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 238 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 239 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 240 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 241 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 242 P4_ESCR(NONE, ~0, NONE, NONE, NONE) 243 244enum pmc_p4escr { 245#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N , 246 P4_ESCRS() 247#undef P4_ESCR 248}; 249 250struct pmc_p4escr_descr { 251 const char pm_escrname[PMC_NAME_MAX]; 252 u_short pm_escr_msr; 253 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR]; 254}; 255 256static struct pmc_p4escr_descr p4_escrs[] = 257{ 258#define P4_ESCR(N, MSR, P1, P2, P3) \ 259 { \ 260 .pm_escrname = #N, \ 261 .pm_escr_msr = (MSR), \ 262 .pm_pmcs = \ 263 { \ 264 P4_PMC_##P1, \ 265 P4_PMC_##P2, \ 266 P4_PMC_##P3 \ 267 } \ 268 } , 269 270 P4_ESCRS() 271 272#undef P4_ESCR 273}; 274 275/* 276 * P4 Event descriptor 277 */ 278 279struct p4_event_descr { 280 const enum pmc_event pm_event; 281 const uint32_t pm_escr_eventselect; 282 const uint32_t pm_cccr_select; 283 const char pm_is_ti_event; 284 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT]; 285}; 286 287static struct p4_event_descr p4_events[] = { 288 289#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \ 290 { \ 291 .pm_event = PMC_EV_P4_##NAME, \ 292 .pm_escr_eventselect = (ESCREVENTSEL), \ 293 .pm_cccr_select = (CCCRSEL), \ 294 .pm_is_ti_event = (TI_EVENT), \ 295 .pm_escrs = \ 296 { \ 297 P4_ESCR_##ESCR0, \ 298 P4_ESCR_##ESCR1 \ 299 } \ 300 } 301 302P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1), 303P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1), 304P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1), 305P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1), 306P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 307P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 308P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 309P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1), 310P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1), 311P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1), 312P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 313P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE), 314P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1), 315P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE), 316P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE), 317 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */ 318P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 319P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 320P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 321P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 322P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 323P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 324P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 325P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 326P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 327P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 328P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 329P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 330P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE, 331 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 332P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 333P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1), 334P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1), 335P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 336P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 337P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 338P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 339P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 340P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 341P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 342P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 343P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 344P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1), 345P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 346P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 347P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 348P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3) 349 350#undef P4_EVDESCR 351}; 352 353#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE) 354 355#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1) 356 357/* 358 * P4 PMC descriptors 359 */ 360 361struct p4pmc_descr { 362 struct pmc_descr pm_descr; /* common information */ 363 enum pmc_p4pmc pm_pmcnum; /* PMC number */ 364 uint32_t pm_pmc_msr; /* PERFCTR MSR address */ 365 uint32_t pm_cccr_msr; /* CCCR MSR address */ 366}; 367 368static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = { 369#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ 370 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ 371 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \ 372 PMC_CAP_TAGGING | PMC_CAP_CASCADE) 373 374#define P4_PMCDESCR(N, PMC, CCCR) \ 375 { \ 376 .pm_descr = \ 377 { \ 378 .pd_name = #N, \ 379 .pd_class = PMC_CLASS_P4, \ 380 .pd_caps = P4_PMC_CAPS, \ 381 .pd_width = 40 \ 382 }, \ 383 .pm_pmcnum = P4_PMC_##N, \ 384 .pm_cccr_msr = (CCCR), \ 385 .pm_pmc_msr = (PMC) \ 386 } 387 388 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360), 389 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361), 390 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362), 391 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363), 392 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364), 393 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365), 394 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366), 395 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367), 396 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368), 397 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369), 398 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A), 399 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B), 400 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C), 401 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D), 402 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E), 403 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F), 404 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370), 405 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371), 406 407#undef P4_PMCDESCR 408}; 409 410/* HTT support */ 411#define P4_NHTT 2 /* logical processors/chip */ 412 413static int p4_system_has_htt; 414 415/* 416 * Per-CPU data structure for P4 class CPUs 417 * 418 * [19 struct pmc_hw structures] 419 * [45 ESCRs status bytes] 420 * [per-cpu spin mutex] 421 * [19 flag fields for holding config flags and a runcount] 422 * [19*2 hw value fields] (Thread mode PMC support) 423 * or 424 * [19*2 EIP values] (Sampling mode PMCs) 425 * [19*2 pmc value fields] (Thread mode PMC support)) 426 */ 427 428struct p4_cpu { 429 struct pmc_hw pc_p4pmcs[P4_NPMCS]; 430 char pc_escrs[P4_NESCR]; 431 struct mtx pc_mtx; /* spin lock */ 432 uint32_t pc_intrflag; /* NMI handler flags */ 433 unsigned int pc_intrlock; /* NMI handler spin lock */ 434 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ 435 union { 436 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; 437 uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; 438 } pc_si; 439 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; 440}; 441 442static struct p4_cpu **p4_pcpu; 443 444#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] 445#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] 446#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] 447 448#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) 449#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ 450 char _tmp; \ 451 _tmp = (PC)->pc_flags[(RI)]; \ 452 _tmp &= ~(MASK); \ 453 _tmp |= (VAL) & (MASK); \ 454 (PC)->pc_flags[(RI)] = _tmp; \ 455} while (0) 456 457#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) 458#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) 459 460#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) 461#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) 462 463#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1) 464 465#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I))) 466#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \ 467 uint32_t __mask; \ 468 __mask = 1 << (I); \ 469 if ((V)) \ 470 (PC)->pc_intrflag |= __mask; \ 471 else \ 472 (PC)->pc_intrflag &= ~__mask; \ 473 } while (0) 474 475/* 476 * A minimal spin lock implementation for use inside the NMI handler. 477 * 478 * We don't want to use a regular spin lock here, because curthread 479 * may not be consistent at the time the handler is invoked. 480 */ 481#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \ 482 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \ 483 ia32_pause(); \ 484 } while (0) 485#define P4_PCPU_REL_INTR_SPINLOCK(PC) \ 486 atomic_store_rel_int(&pc->pc_intrlock, 0); 487 488/* ESCR row disposition */ 489static int p4_escrdisp[P4_NESCR]; 490 491#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0) 492#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0) 493#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0) 494 495#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \ 496 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 497 __LINE__)); \ 498 atomic_add_int(&p4_escrdisp[(E)], -1); \ 499 KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \ 500 ("[p4,%d] row disposition error", __LINE__)); \ 501} while (0) 502 503#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \ 504 atomic_add_int(&p4_escrdisp[(E)], 1); \ 505 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 506 __LINE__)); \ 507} while (0) 508 509#define P4_ESCR_MARK_ROW_THREAD(E) do { \ 510 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 511 __LINE__)); \ 512 atomic_add_int(&p4_escrdisp[(E)], 1); \ 513} while (0) 514 515#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \ 516 atomic_add_int(&p4_escrdisp[(E)], -1); \ 517 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 518 __LINE__)); \ 519} while (0) 520 521#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0) 522 523#define P4_CPU_IS_HTT_SECONDARY(cpu) \ 524 (p4_system_has_htt ? ((cpu) & 1) : 0) 525#define P4_TO_HTT_PRIMARY(cpu) \ 526 (p4_system_has_htt ? ((cpu) & ~1) : (cpu)) 527 528#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \ 529 P4_CCCR_ENABLE|P4_CCCR_OVF)) 530#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \ 531 P4_ESCR_T1_USR)) 532 533/* 534 * support routines 535 */ 536 537static struct p4_event_descr * 538p4_find_event(enum pmc_event ev) 539{ 540 int n; 541 542 for (n = 0; n < P4_NEVENTS; n++) 543 if (p4_events[n].pm_event == ev) 544 break; 545 if (n == P4_NEVENTS) 546 return (NULL); 547 return (&p4_events[n]); 548} 549 550/* 551 * Initialize per-cpu state 552 */ 553 554static int 555p4_pcpu_init(struct pmc_mdep *md, int cpu) 556{ 557 char *pescr; 558 int n, first_ri, phycpu; 559 struct pmc_hw *phw; 560 struct p4_cpu *p4c; 561 struct pmc_cpu *pc, *plc; 562 563 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 564 ("[p4,%d] insane cpu number %d", __LINE__, cpu)); 565 566 PMCDBG2(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu, 567 pmc_cpu_is_primary(cpu) != 0); 568 569 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 570 571 /* 572 * The two CPUs in an HT pair share their per-cpu state. 573 * 574 * For HT capable CPUs, we assume that the two logical 575 * processors in the HT pair get two consecutive CPU ids 576 * starting with an even id #. 577 * 578 * The primary CPU (the even numbered CPU of the pair) would 579 * have been initialized prior to the initialization for the 580 * secondary. 581 */ 582 583 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) { 584 585 p4_system_has_htt = 1; 586 587 phycpu = P4_TO_HTT_PRIMARY(cpu); 588 pc = pmc_pcpu[phycpu]; 589 plc = pmc_pcpu[cpu]; 590 591 KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__)); 592 593 PMCDBG3(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu, 594 phycpu, pc); 595 KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", 596 __LINE__, cpu, phycpu)); 597 598 /* PMCs are shared with the physical CPU. */ 599 for (n = 0; n < P4_NPMCS; n++) 600 plc->pc_hwpmcs[n + first_ri] = 601 pc->pc_hwpmcs[n + first_ri]; 602 603 return (0); 604 } 605 606 p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO); 607 608 if (p4c == NULL) 609 return (ENOMEM); 610 611 pc = pmc_pcpu[cpu]; 612 613 KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu)); 614 615 p4_pcpu[cpu] = p4c; 616 phw = p4c->pc_p4pmcs; 617 618 for (n = 0; n < P4_NPMCS; n++, phw++) { 619 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 620 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 621 phw->phw_pmc = NULL; 622 pc->pc_hwpmcs[n + first_ri] = phw; 623 } 624 625 pescr = p4c->pc_escrs; 626 for (n = 0; n < P4_NESCR; n++) 627 *pescr++ = P4_INVALID_PMC_INDEX; 628 629 mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN); 630 631 return (0); 632} 633 634/* 635 * Destroy per-cpu state. 636 */ 637 638static int 639p4_pcpu_fini(struct pmc_mdep *md, int cpu) 640{ 641 int first_ri, i; 642 struct p4_cpu *p4c; 643 struct pmc_cpu *pc; 644 645 PMCDBG1(MDP,INI,0, "p4-cleanup cpu=%d", cpu); 646 647 pc = pmc_pcpu[cpu]; 648 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 649 650 for (i = 0; i < P4_NPMCS; i++) 651 pc->pc_hwpmcs[i + first_ri] = NULL; 652 653 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) 654 return (0); 655 656 p4c = p4_pcpu[cpu]; 657 658 KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__)); 659 660 /* Turn off all PMCs on this CPU */ 661 for (i = 0; i < P4_NPMCS - 1; i++) 662 wrmsr(P4_CCCR_MSR_FIRST + i, 663 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE); 664 665 mtx_destroy(&p4c->pc_mtx); 666 667 free(p4c, M_PMC); 668 669 p4_pcpu[cpu] = NULL; 670 671 return (0); 672} 673 674/* 675 * Read a PMC 676 */ 677 678static int 679p4_read_pmc(int cpu, int ri, pmc_value_t *v) 680{ 681 struct pmc *pm; 682 pmc_value_t tmp; 683 struct p4_cpu *pc; 684 enum pmc_mode mode; 685 struct p4pmc_descr *pd; 686 687 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 688 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 689 KASSERT(ri >= 0 && ri < P4_NPMCS, 690 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 691 692 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 693 pm = pc->pc_p4pmcs[ri].phw_pmc; 694 pd = &p4_pmcdesc[ri]; 695 696 KASSERT(pm != NULL, 697 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); 698 699 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), 700 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, 701 pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); 702 703 mode = PMC_TO_MODE(pm); 704 705 PMCDBG3(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); 706 707 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 708 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); 709 710 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); 711 712 if (PMC_IS_VIRTUAL_MODE(mode)) { 713 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ 714 tmp += (P4_PERFCTR_MASK + 1) - 715 P4_PCPU_HW_VALUE(pc,ri,cpu); 716 else 717 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 718 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); 719 } 720 721 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ 722 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); 723 else 724 *v = tmp; 725 726 PMCDBG1(MDP,REA,2, "p4-read -> %jx", *v); 727 728 return (0); 729} 730 731/* 732 * Write a PMC 733 */ 734 735static int 736p4_write_pmc(int cpu, int ri, pmc_value_t v) 737{ 738 enum pmc_mode mode; 739 struct pmc *pm; 740 struct p4_cpu *pc; 741 const struct pmc_hw *phw; 742 const struct p4pmc_descr *pd; 743 744 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 745 ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); 746 KASSERT(ri >= 0 && ri < P4_NPMCS, 747 ("[amd,%d] illegal row-index %d", __LINE__, ri)); 748 749 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 750 phw = &pc->pc_p4pmcs[ri]; 751 pm = phw->phw_pmc; 752 pd = &p4_pmcdesc[ri]; 753 754 KASSERT(pm != NULL, 755 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 756 cpu, ri)); 757 758 mode = PMC_TO_MODE(pm); 759 760 PMCDBG4(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, 761 mode, v); 762 763 /* 764 * write the PMC value to the register/saved value: for 765 * sampling mode PMCs, the value to be programmed into the PMC 766 * counter is -(C+1) where 'C' is the requested sample rate. 767 */ 768 if (PMC_IS_SAMPLING_MODE(mode)) 769 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); 770 771 if (PMC_IS_SYSTEM_MODE(mode)) 772 wrmsr(pd->pm_pmc_msr, v); 773 else 774 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; 775 776 return (0); 777} 778 779/* 780 * Configure a PMC 'pm' on the given CPU and row-index. 781 * 782 * 'pm' may be NULL to indicate de-configuration. 783 * 784 * On HTT systems, a PMC may get configured twice, once for each 785 * "logical" CPU. We track this using the CFGFLAGS field of the 786 * per-cpu state; this field is a bit mask with one bit each for 787 * logical CPUs 0 & 1. 788 */ 789 790static int 791p4_config_pmc(int cpu, int ri, struct pmc *pm) 792{ 793 struct pmc_hw *phw; 794 struct p4_cpu *pc; 795 int cfgflags, cpuflag; 796 797 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 798 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 799 800 KASSERT(ri >= 0 && ri < P4_NPMCS, 801 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 802 803 PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); 804 805 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 806 phw = &pc->pc_p4pmcs[ri]; 807 808 KASSERT(pm == NULL || phw->phw_pmc == NULL || 809 (p4_system_has_htt && phw->phw_pmc == pm), 810 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); 811 812 mtx_lock_spin(&pc->pc_mtx); 813 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 814 815 KASSERT((cfgflags & ~0x3) == 0, 816 ("[p4,%d] illegal cfgflags cfg=%#x on cpu=%d ri=%d", __LINE__, 817 cfgflags, cpu, ri)); 818 819 KASSERT(cfgflags == 0 || phw->phw_pmc, 820 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", 821 __LINE__, cpu, ri)); 822 823 cpuflag = P4_CPU_TO_FLAG(cpu); 824 825 if (pm) { /* config */ 826 if (cfgflags == 0) 827 phw->phw_pmc = pm; 828 829 KASSERT(phw->phw_pmc == pm, 830 ("[p4,%d] cpu=%d ri=%d config %p != hw %p", 831 __LINE__, cpu, ri, pm, phw->phw_pmc)); 832 833 cfgflags |= cpuflag; 834 } else { /* unconfig */ 835 cfgflags &= ~cpuflag; 836 837 if (cfgflags == 0) 838 phw->phw_pmc = NULL; 839 } 840 841 KASSERT((cfgflags & ~0x3) == 0, 842 ("[p4,%d] illegal runcount cfg=%#x on cpu=%d ri=%d", __LINE__, 843 cfgflags, cpu, ri)); 844 845 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); 846 847 mtx_unlock_spin(&pc->pc_mtx); 848 849 return (0); 850} 851 852/* 853 * Retrieve a configured PMC pointer from hardware state. 854 */ 855 856static int 857p4_get_config(int cpu, int ri, struct pmc **ppm) 858{ 859 int cfgflags; 860 struct p4_cpu *pc; 861 862 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 863 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 864 KASSERT(ri >= 0 && ri < P4_NPMCS, 865 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 866 867 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 868 869 mtx_lock_spin(&pc->pc_mtx); 870 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 871 mtx_unlock_spin(&pc->pc_mtx); 872 873 if (cfgflags & P4_CPU_TO_FLAG(cpu)) 874 *ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */ 875 else 876 *ppm = NULL; 877 878 return 0; 879} 880 881/* 882 * Allocate a PMC. 883 * 884 * The allocation strategy differs between HTT and non-HTT systems. 885 * 886 * The non-HTT case: 887 * - Given the desired event and the PMC row-index, lookup the 888 * list of valid ESCRs for the event. 889 * - For each valid ESCR: 890 * - Check if the ESCR is free and the ESCR row is in a compatible 891 * mode (i.e., system or process)) 892 * - Check if the ESCR is usable with a P4 PMC at the desired row-index. 893 * If everything matches, we determine the appropriate bit values for the 894 * ESCR and CCCR registers. 895 * 896 * The HTT case: 897 * 898 * - Process mode PMCs require special care. The FreeBSD scheduler could 899 * schedule any two processes on the same physical CPU. We need to ensure 900 * that a given PMC row-index is never allocated to two different 901 * PMCs owned by different user-processes. 902 * This is ensured by always allocating a PMC from a 'FREE' PMC row 903 * if the system has HTT active. 904 * - A similar check needs to be done for ESCRs; we do not want two PMCs 905 * using the same ESCR to be scheduled at the same time. Thus ESCR 906 * allocation is also restricted to FREE rows if the system has HTT 907 * enabled. 908 * - Thirdly, some events are 'thread-independent' terminology, i.e., 909 * the PMC hardware cannot distinguish between events caused by 910 * different logical CPUs. This makes it impossible to assign events 911 * to a given thread of execution. If the system has HTT enabled, 912 * these events are not allowed for process-mode PMCs. 913 */ 914 915static int 916p4_allocate_pmc(int cpu, int ri, struct pmc *pm, 917 const struct pmc_op_pmcallocate *a) 918{ 919 int found, n, m; 920 uint32_t caps, cccrvalue, escrvalue, tflags; 921 enum pmc_p4escr escr; 922 struct p4_cpu *pc; 923 struct p4_event_descr *pevent; 924 const struct p4pmc_descr *pd; 925 926 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 927 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 928 KASSERT(ri >= 0 && ri < P4_NPMCS, 929 ("[p4,%d] illegal row-index value %d", __LINE__, ri)); 930 931 pd = &p4_pmcdesc[ri]; 932 933 PMCDBG4(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " 934 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, 935 pm->pm_caps); 936 937 /* check class */ 938 if (pd->pm_descr.pd_class != a->pm_class) 939 return (EINVAL); 940 941 /* check requested capabilities */ 942 caps = a->pm_caps; 943 if ((pd->pm_descr.pd_caps & caps) != caps) 944 return (EPERM); 945 946 /* 947 * If the system has HTT enabled, and the desired allocation 948 * mode is process-private, and the PMC row disposition is not 949 * FREE (0), decline the allocation. 950 */ 951 952 if (p4_system_has_htt && 953 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 954 pmc_getrowdisp(ri) != 0) 955 return (EBUSY); 956 957 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 958 ("[p4,%d] unknown PMC class %d", __LINE__, 959 pd->pm_descr.pd_class)); 960 961 if (pm->pm_event < PMC_EV_P4_FIRST || 962 pm->pm_event > PMC_EV_P4_LAST) 963 return (EINVAL); 964 965 if ((pevent = p4_find_event(pm->pm_event)) == NULL) 966 return (ESRCH); 967 968 PMCDBG4(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}", 969 pevent->pm_event, pevent->pm_escr_eventselect, 970 pevent->pm_cccr_select, pevent->pm_is_ti_event); 971 972 /* 973 * Some PMC events are 'thread independent'and therefore 974 * cannot be used for process-private modes if HTT is being 975 * used. 976 */ 977 978 if (P4_EVENT_IS_TI(pevent) && 979 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 980 p4_system_has_htt) 981 return (EINVAL); 982 983 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 984 985 found = 0; 986 987 /* look for a suitable ESCR for this event */ 988 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) { 989 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE) 990 break; /* out of ESCRs */ 991 /* 992 * Check ESCR row disposition. 993 * 994 * If the request is for a system-mode PMC, then the 995 * ESCR row should not be in process-virtual mode, and 996 * should also be free on the current CPU. 997 */ 998 999 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1000 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || 1001 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) 1002 continue; 1003 } 1004 1005 /* 1006 * If the request is for a process-virtual PMC, and if 1007 * HTT is not enabled, we can use an ESCR row that is 1008 * either FREE or already in process mode. 1009 * 1010 * If HTT is enabled, then we need to ensure that a 1011 * given ESCR is never allocated to two PMCS that 1012 * could run simultaneously on the two logical CPUs of 1013 * a CPU package. We ensure this be only allocating 1014 * ESCRs from rows marked as 'FREE'. 1015 */ 1016 1017 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 1018 if (p4_system_has_htt) { 1019 if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) 1020 continue; 1021 } else 1022 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr)) 1023 continue; 1024 } 1025 1026 /* 1027 * We found a suitable ESCR for this event. Now check if 1028 * this escr can work with the PMC at row-index 'ri'. 1029 */ 1030 1031 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++) 1032 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) { 1033 found = 1; 1034 break; 1035 } 1036 } 1037 1038 if (found == 0) 1039 return (ESRCH); 1040 1041 KASSERT((int) escr >= 0 && escr < P4_NESCR, 1042 ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); 1043 1044 /* mark ESCR row mode */ 1045 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1046 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ 1047 P4_ESCR_MARK_ROW_STANDALONE(escr); 1048 } else { 1049 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX, 1050 ("[p4,%d] escr[%d] already in use", __LINE__, escr)); 1051 P4_ESCR_MARK_ROW_THREAD(escr); 1052 } 1053 1054 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr; 1055 pm->pm_md.pm_p4.pm_p4_escr = escr; 1056 1057 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select); 1058 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect); 1059 1060 /* CCCR fields */ 1061 if (caps & PMC_CAP_THRESHOLD) 1062 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig & 1063 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE; 1064 1065 if (caps & PMC_CAP_EDGE) 1066 cccrvalue |= P4_CCCR_EDGE; 1067 1068 if (caps & PMC_CAP_INVERT) 1069 cccrvalue |= P4_CCCR_COMPLEMENT; 1070 1071 if (p4_system_has_htt) 1072 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig & 1073 P4_CCCR_ACTIVE_THREAD_MASK; 1074 else /* no HTT; thread field should be '11b' */ 1075 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3); 1076 1077 if (caps & PMC_CAP_CASCADE) 1078 cccrvalue |= P4_CCCR_CASCADE; 1079 1080 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */ 1081 if (caps & PMC_CAP_INTERRUPT) 1082 cccrvalue |= P4_CCCR_OVF_PMI_T0; 1083 1084 /* ESCR fields */ 1085 if (caps & PMC_CAP_QUALIFIER) 1086 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig & 1087 P4_ESCR_EVENT_MASK_MASK; 1088 if (caps & PMC_CAP_TAGGING) 1089 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1090 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE; 1091 if (caps & PMC_CAP_QUALIFIER) 1092 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1093 P4_ESCR_EVENT_MASK_MASK); 1094 1095 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */ 1096 tflags = 0; 1097 if (caps & PMC_CAP_SYSTEM) 1098 tflags |= P4_ESCR_T0_OS; 1099 if (caps & PMC_CAP_USER) 1100 tflags |= P4_ESCR_T0_USR; 1101 if (tflags == 0) 1102 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1103 escrvalue |= tflags; 1104 1105 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue; 1106 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; 1107 1108 PMCDBG5(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " 1109 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, 1110 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); 1111 1112 return (0); 1113} 1114 1115/* 1116 * release a PMC. 1117 */ 1118 1119static int 1120p4_release_pmc(int cpu, int ri, struct pmc *pm) 1121{ 1122 enum pmc_p4escr escr; 1123 struct p4_cpu *pc; 1124 1125 KASSERT(ri >= 0 && ri < P4_NPMCS, 1126 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1127 1128 escr = pm->pm_md.pm_p4.pm_p4_escr; 1129 1130 PMCDBG3(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); 1131 1132 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1133 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1134 1135 KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL, 1136 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri)); 1137 1138 P4_ESCR_UNMARK_ROW_STANDALONE(escr); 1139 KASSERT(pc->pc_escrs[escr] == ri, 1140 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__, 1141 escr, ri)); 1142 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */ 1143 } else 1144 P4_ESCR_UNMARK_ROW_THREAD(escr); 1145 1146 return (0); 1147} 1148 1149/* 1150 * Start a PMC 1151 */ 1152 1153static int 1154p4_start_pmc(int cpu, int ri) 1155{ 1156 int rc; 1157 struct pmc *pm; 1158 struct p4_cpu *pc; 1159 struct p4pmc_descr *pd; 1160 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1161 1162 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1163 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1164 KASSERT(ri >= 0 && ri < P4_NPMCS, 1165 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1166 1167 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1168 pm = pc->pc_p4pmcs[ri].phw_pmc; 1169 pd = &p4_pmcdesc[ri]; 1170 1171 KASSERT(pm != NULL, 1172 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri)); 1173 1174 PMCDBG2(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri); 1175 1176 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1177 ("[p4,%d] wrong PMC class %d", __LINE__, 1178 pd->pm_descr.pd_class)); 1179 1180 /* retrieve the desired CCCR/ESCR values from the PMC */ 1181 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue; 1182 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue; 1183 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1184 1185 /* extract and zero the logical processor selection bits */ 1186 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0; 1187 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1188 cccrvalue &= ~P4_CCCR_OVF_PMI_T0; 1189 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1190 1191 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */ 1192 cccrtbits <<= 1; 1193 escrtbits >>= 2; 1194 } 1195 1196 /* start system mode PMCs directly */ 1197 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1198 wrmsr(escrmsr, escrvalue | escrtbits); 1199 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); 1200 return 0; 1201 } 1202 1203 /* 1204 * Thread mode PMCs 1205 * 1206 * On HTT machines, the same PMC could be scheduled on the 1207 * same physical CPU twice (once for each logical CPU), for 1208 * example, if two threads of a multi-threaded process get 1209 * scheduled on the same CPU. 1210 * 1211 */ 1212 1213 mtx_lock_spin(&pc->pc_mtx); 1214 1215 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1216 KASSERT(rc == 0 || rc == 1, 1217 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1218 rc)); 1219 1220 if (rc == 0) { /* 1st CPU and the non-HTT case */ 1221 1222 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), 1223 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, 1224 cpu, ri, pd->pm_cccr_msr)); 1225 1226 /* write out the low 40 bits of the saved value to hardware */ 1227 wrmsr(pd->pm_pmc_msr, 1228 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); 1229 1230 } else if (rc == 1) { /* 2nd CPU */ 1231 1232 /* 1233 * Stop the PMC and retrieve the CCCR and ESCR values 1234 * from their MSRs, and turn on the additional T[0/1] 1235 * bits for the 2nd CPU. 1236 */ 1237 1238 cccrvalue = rdmsr(pd->pm_cccr_msr); 1239 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1240 1241 /* check that the configuration bits read back match the PMC */ 1242 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == 1243 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), 1244 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " 1245 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, 1246 cccrvalue & P4_CCCR_Tx_MASK, 1247 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); 1248 KASSERT(cccrvalue & P4_CCCR_ENABLE, 1249 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", 1250 __LINE__, rc, cpu, ri)); 1251 KASSERT((cccrvalue & cccrtbits) == 0, 1252 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d" 1253 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, 1254 cccrvalue, cccrtbits)); 1255 1256 escrvalue = rdmsr(escrmsr); 1257 1258 KASSERT((escrvalue & P4_ESCR_Tx_MASK) == 1259 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK), 1260 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d " 1261 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, 1262 escrvalue & P4_ESCR_Tx_MASK, 1263 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); 1264 KASSERT((escrvalue & escrtbits) == 0, 1265 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " 1266 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, 1267 rc, cpu, ri, escrmsr, escrvalue, escrtbits)); 1268 } 1269 1270 /* Enable the correct bits for this CPU. */ 1271 escrvalue |= escrtbits; 1272 cccrvalue |= cccrtbits | P4_CCCR_ENABLE; 1273 1274 /* Save HW value at the time of starting hardware */ 1275 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); 1276 1277 /* Program the ESCR and CCCR and start the PMC */ 1278 wrmsr(escrmsr, escrvalue); 1279 wrmsr(pd->pm_cccr_msr, cccrvalue); 1280 1281 ++rc; 1282 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1283 1284 mtx_unlock_spin(&pc->pc_mtx); 1285 1286 PMCDBG6(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " 1287 "escrmsr=0x%x escrvalue=0x%x", cpu, rc, 1288 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue); 1289 PMCDBG2(MDP,STA,2,"cccr_config=0x%x v=%jx", 1290 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); 1291 1292 return (0); 1293} 1294 1295/* 1296 * Stop a PMC. 1297 */ 1298 1299static int 1300p4_stop_pmc(int cpu, int ri) 1301{ 1302 int rc; 1303 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1304 struct pmc *pm; 1305 struct p4_cpu *pc; 1306 struct p4pmc_descr *pd; 1307 pmc_value_t tmp; 1308 1309 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1310 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1311 KASSERT(ri >= 0 && ri < P4_NPMCS, 1312 ("[p4,%d] illegal row index %d", __LINE__, ri)); 1313 1314 pd = &p4_pmcdesc[ri]; 1315 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1316 pm = pc->pc_p4pmcs[ri].phw_pmc; 1317 1318 KASSERT(pm != NULL, 1319 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri)); 1320 1321 PMCDBG2(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); 1322 1323 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1324 wrmsr(pd->pm_cccr_msr, 1325 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); 1326 return (0); 1327 } 1328 1329 /* 1330 * Thread mode PMCs. 1331 * 1332 * On HTT machines, this PMC may be in use by two threads 1333 * running on two logical CPUS. Thus we look at the 1334 * 'runcount' field and only turn off the appropriate TO/T1 1335 * bits (and keep the PMC running) if two logical CPUs were 1336 * using the PMC. 1337 * 1338 */ 1339 1340 /* bits to mask */ 1341 cccrtbits = P4_CCCR_OVF_PMI_T0; 1342 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR; 1343 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { 1344 cccrtbits <<= 1; 1345 escrtbits >>= 2; 1346 } 1347 1348 mtx_lock_spin(&pc->pc_mtx); 1349 1350 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1351 1352 KASSERT(rc == 2 || rc == 1, 1353 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1354 rc)); 1355 1356 --rc; 1357 1358 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1359 1360 /* Stop this PMC */ 1361 cccrvalue = rdmsr(pd->pm_cccr_msr); 1362 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1363 1364 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1365 escrvalue = rdmsr(escrmsr); 1366 1367 /* The current CPU should be running on this PMC */ 1368 KASSERT(escrvalue & escrtbits, 1369 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " 1370 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, 1371 escrvalue, escrtbits)); 1372 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || 1373 (cccrvalue & cccrtbits), 1374 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " 1375 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); 1376 1377 /* get the current hardware reading */ 1378 tmp = rdmsr(pd->pm_pmc_msr); 1379 1380 if (rc == 1) { /* need to keep the PMC running */ 1381 escrvalue &= ~escrtbits; 1382 cccrvalue &= ~cccrtbits; 1383 wrmsr(escrmsr, escrvalue); 1384 wrmsr(pd->pm_cccr_msr, cccrvalue); 1385 } 1386 1387 mtx_unlock_spin(&pc->pc_mtx); 1388 1389 PMCDBG5(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " 1390 "escrval=0x%x", cpu, rc, ri, escrmsr, escrvalue); 1391 PMCDBG2(MDP,STO,2, "cccrval=0x%x v=%jx", cccrvalue, tmp); 1392 1393 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ 1394 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); 1395 else 1396 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 1397 1398 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; 1399 1400 return 0; 1401} 1402 1403/* 1404 * Handle an interrupt. 1405 * 1406 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, 1407 * so the handler examines all the 18 CCCR registers, processing the 1408 * counters that have overflowed. 1409 * 1410 * On HTT machines, the CCCR register is shared and will interrupt 1411 * both logical processors if so configured. Thus multiple logical 1412 * CPUs could enter the NMI service routine at the same time. These 1413 * will get serialized using a per-cpu spinlock dedicated for use in 1414 * the NMI handler. 1415 */ 1416 1417static int 1418p4_intr(int cpu, struct trapframe *tf) 1419{ 1420 uint32_t cccrval, ovf_mask, ovf_partner; 1421 int did_interrupt, error, ri; 1422 struct p4_cpu *pc; 1423 struct pmc *pm; 1424 pmc_value_t v; 1425 1426 PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, 1427 TRAPF_USERMODE(tf)); 1428 1429 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1430 1431 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1432 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; 1433 ovf_mask |= P4_CCCR_OVF; 1434 if (p4_system_has_htt) 1435 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1436 P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1; 1437 else 1438 ovf_partner = 0; 1439 did_interrupt = 0; 1440 1441 if (p4_system_has_htt) 1442 P4_PCPU_ACQ_INTR_SPINLOCK(pc); 1443 1444 /* 1445 * Loop through all CCCRs, looking for ones that have 1446 * interrupted this CPU. 1447 */ 1448 for (ri = 0; ri < P4_NPMCS; ri++) { 1449 1450 /* 1451 * Check if our partner logical CPU has already marked 1452 * this PMC has having interrupted it. If so, reset 1453 * the flag and process the interrupt, but leave the 1454 * hardware alone. 1455 */ 1456 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) { 1457 P4_PCPU_SET_INTRFLAG(pc,ri,0); 1458 did_interrupt = 1; 1459 1460 /* 1461 * Ignore de-configured or stopped PMCs. 1462 * Ignore PMCs not in sampling mode. 1463 */ 1464 pm = pc->pc_p4pmcs[ri].phw_pmc; 1465 if (pm == NULL || 1466 pm->pm_state != PMC_STATE_RUNNING || 1467 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1468 continue; 1469 } 1470 (void) pmc_process_interrupt(cpu, PMC_HR, pm, tf, 1471 TRAPF_USERMODE(tf)); 1472 continue; 1473 } 1474 1475 /* 1476 * Fresh interrupt. Look for the CCCR_OVF bit 1477 * and the OVF_Tx bit for this logical 1478 * processor being set. 1479 */ 1480 cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri); 1481 1482 if ((cccrval & ovf_mask) != ovf_mask) 1483 continue; 1484 1485 /* 1486 * If the other logical CPU would also have been 1487 * interrupted due to the PMC being shared, record 1488 * this fact in the per-cpu saved interrupt flag 1489 * bitmask. 1490 */ 1491 if (p4_system_has_htt && (cccrval & ovf_partner)) 1492 P4_PCPU_SET_INTRFLAG(pc, ri, 1); 1493 1494 v = rdmsr(P4_PERFCTR_MSR_FIRST + ri); 1495 1496 PMCDBG2(MDP,INT, 2, "ri=%d v=%jx", ri, v); 1497 1498 /* Stop the counter, and reset the overflow bit */ 1499 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); 1500 wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval); 1501 1502 did_interrupt = 1; 1503 1504 /* 1505 * Ignore de-configured or stopped PMCs. Ignore PMCs 1506 * not in sampling mode. 1507 */ 1508 pm = pc->pc_p4pmcs[ri].phw_pmc; 1509 1510 if (pm == NULL || 1511 pm->pm_state != PMC_STATE_RUNNING || 1512 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1513 continue; 1514 } 1515 1516 /* 1517 * Process the interrupt. Re-enable the PMC if 1518 * processing was successful. 1519 */ 1520 error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, 1521 TRAPF_USERMODE(tf)); 1522 1523 /* 1524 * Only the first processor executing the NMI handler 1525 * in a HTT pair will restart a PMC, and that too 1526 * only if there were no errors. 1527 */ 1528 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( 1529 pm->pm_sc.pm_reloadcount); 1530 wrmsr(P4_PERFCTR_MSR_FIRST + ri, v); 1531 if (error == 0) 1532 wrmsr(P4_CCCR_MSR_FIRST + ri, 1533 cccrval | P4_CCCR_ENABLE); 1534 } 1535 1536 /* allow the other CPU to proceed */ 1537 if (p4_system_has_htt) 1538 P4_PCPU_REL_INTR_SPINLOCK(pc); 1539 1540 /* 1541 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets 1542 * masked when a PMC interrupts the CPU. We need to unmask 1543 * the interrupt source explicitly. 1544 */ 1545 1546 if (did_interrupt) 1547 lapic_reenable_pmc(); 1548 1549 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : 1550 &pmc_stats.pm_intr_ignored, 1); 1551 1552 return (did_interrupt); 1553} 1554 1555/* 1556 * Describe a CPU's PMC state. 1557 */ 1558 1559static int 1560p4_describe(int cpu, int ri, struct pmc_info *pi, 1561 struct pmc **ppmc) 1562{ 1563 int error; 1564 size_t copied; 1565 const struct p4pmc_descr *pd; 1566 1567 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1568 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1569 KASSERT(ri >= 0 && ri < P4_NPMCS, 1570 ("[p4,%d] row-index %d out of range", __LINE__, ri)); 1571 1572 PMCDBG2(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri); 1573 1574 if (P4_CPU_IS_HTT_SECONDARY(cpu)) 1575 return (EINVAL); 1576 1577 pd = &p4_pmcdesc[ri]; 1578 1579 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, 1580 PMC_NAME_MAX, &copied)) != 0) 1581 return (error); 1582 1583 pi->pm_class = pd->pm_descr.pd_class; 1584 1585 if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) { 1586 pi->pm_enabled = TRUE; 1587 *ppmc = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc; 1588 } else { 1589 pi->pm_enabled = FALSE; 1590 *ppmc = NULL; 1591 } 1592 1593 return (0); 1594} 1595 1596/* 1597 * Get MSR# for use with RDPMC. 1598 */ 1599 1600static int 1601p4_get_msr(int ri, uint32_t *msr) 1602{ 1603 KASSERT(ri >= 0 && ri < P4_NPMCS, 1604 ("[p4,%d] ri %d out of range", __LINE__, ri)); 1605 1606 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST; 1607 1608 PMCDBG2(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr); 1609 1610 return 0; 1611} 1612 1613 1614int 1615pmc_p4_initialize(struct pmc_mdep *md, int ncpus) 1616{ 1617 struct pmc_classdep *pcd; 1618 struct p4_event_descr *pe; 1619 1620 KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__)); 1621 KASSERT(cpu_vendor_id == CPU_VENDOR_INTEL, 1622 ("[p4,%d] Initializing non-intel processor", __LINE__)); 1623 1624 PMCDBG0(MDP,INI,1, "p4-initialize"); 1625 1626 /* Allocate space for pointers to per-cpu descriptors. */ 1627 p4_pcpu = malloc(sizeof(*p4_pcpu) * ncpus, M_PMC, M_ZERO | M_WAITOK); 1628 1629 /* Fill in the class dependent descriptor. */ 1630 pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4]; 1631 1632 switch (md->pmd_cputype) { 1633 case PMC_CPU_INTEL_PIV: 1634 1635 pcd->pcd_caps = P4_PMC_CAPS; 1636 pcd->pcd_class = PMC_CLASS_P4; 1637 pcd->pcd_num = P4_NPMCS; 1638 pcd->pcd_ri = md->pmd_npmc; 1639 pcd->pcd_width = 40; 1640 1641 pcd->pcd_allocate_pmc = p4_allocate_pmc; 1642 pcd->pcd_config_pmc = p4_config_pmc; 1643 pcd->pcd_describe = p4_describe; 1644 pcd->pcd_get_config = p4_get_config; 1645 pcd->pcd_get_msr = p4_get_msr; 1646 pcd->pcd_pcpu_fini = p4_pcpu_fini; 1647 pcd->pcd_pcpu_init = p4_pcpu_init; 1648 pcd->pcd_read_pmc = p4_read_pmc; 1649 pcd->pcd_release_pmc = p4_release_pmc; 1650 pcd->pcd_start_pmc = p4_start_pmc; 1651 pcd->pcd_stop_pmc = p4_stop_pmc; 1652 pcd->pcd_write_pmc = p4_write_pmc; 1653 1654 md->pmd_pcpu_fini = NULL; 1655 md->pmd_pcpu_init = NULL; 1656 md->pmd_intr = p4_intr; 1657 md->pmd_npmc += P4_NPMCS; 1658 1659 /* model specific configuration */ 1660 if ((cpu_id & 0xFFF) < 0xF27) { 1661 1662 /* 1663 * On P4 and Xeon with CPUID < (Family 15, 1664 * Model 2, Stepping 7), only one ESCR is 1665 * available for the IOQ_ALLOCATION event. 1666 */ 1667 1668 pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION); 1669 pe->pm_escrs[1] = P4_ESCR_NONE; 1670 } 1671 1672 break; 1673 1674 default: 1675 KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__)); 1676 return ENOSYS; 1677 } 1678 1679 return (0); 1680} 1681 1682void 1683pmc_p4_finalize(struct pmc_mdep *md) 1684{ 1685#if defined(INVARIANTS) 1686 int i, ncpus; 1687#endif 1688 1689 KASSERT(p4_pcpu != NULL, 1690 ("[p4,%d] NULL p4_pcpu", __LINE__)); 1691 1692#if defined(INVARIANTS) 1693 ncpus = pmc_cpu_max(); 1694 for (i = 0; i < ncpus; i++) 1695 KASSERT(p4_pcpu[i] == NULL, ("[p4,%d] non-null pcpu %d", 1696 __LINE__, i)); 1697#endif 1698 1699 free(p4_pcpu, M_PMC); 1700 p4_pcpu = NULL; 1701} 1702