hwpmc_piv.c revision 145774
1/*- 2 * Copyright (c) 2003-2005 Joseph Koshy 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 145774 2005-05-01 14:11:49Z jkoshy $"); 29 30#include <sys/param.h> 31#include <sys/lock.h> 32#include <sys/mutex.h> 33#include <sys/pmc.h> 34#include <sys/pmckern.h> 35#include <sys/smp.h> 36#include <sys/systm.h> 37 38#include <machine/apicreg.h> 39#include <machine/md_var.h> 40 41/* 42 * PENTIUM 4 SUPPORT 43 * 44 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs 45 * respectively. Each PMC comprises of two model specific registers: 46 * a counter configuration control register (CCCR) and a counter 47 * register that holds the actual event counts. 48 * 49 * Configuring an event requires the use of one of 45 event selection 50 * control registers (ESCR). Events are associated with specific 51 * ESCRs. Each PMC group has a set of ESCRs it can use. 52 * 53 * - The BPU counter group (4 PMCs) can use the 16 ESCRs: 54 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1}, 55 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}. 56 * 57 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1}, 58 * TC_ESCR{0,1}, TBPU_ESCR{0,1}. 59 * 60 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs: 61 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1}, 62 * DAC_ESCR{0,1}. 63 * 64 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1}, 65 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}. 66 * 67 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if 68 * present) of a counter group. Odd-numbers ESCRs can be used with 69 * counters 2, 3 and 5 (if present) of a counter group. The 70 * 'p4_escrs[]' table describes these restrictions in a form that 71 * function 'p4_allocate()' uses for making allocation decisions. 72 * 73 * SYSTEM-MODE AND THREAD-MODE ALLOCATION 74 * 75 * In addition to remembering the state of PMC rows 76 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the 77 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC 78 * on a CPU we cannot allocate this to a thread-mode PMC. On a 79 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each 80 * CPU is tracked by the pc_escrs[] array. 81 * 82 * Each system-mode PMC that is using an ESCR records its row-index in 83 * the appropriate entry and system-mode allocation attempts check 84 * that an ESCR is available using this array. Process-mode PMCs do 85 * not use the pc_escrs[] array, since ESCR row itself would have been 86 * marked as in 'THREAD' mode. 87 * 88 * HYPERTHREADING SUPPORT 89 * 90 * When HTT is enabled, the FreeBSD kernel treats the two 'logical' 91 * cpus as independent CPUs and can schedule kernel threads on them 92 * independently. However, the two logical CPUs share the same set of 93 * PMC resources. We need to ensure that: 94 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly, 95 * and, 96 * - Threads of multi-threaded processes that get scheduled on the same 97 * physical CPU are handled correctly. 98 * 99 * HTT Detection 100 * 101 * Not all HTT capable systems will have HTT enabled since users may 102 * have turned HTT support off using the appropriate sysctls 103 * (machdep.hlt_logical_cpus or machdep.logical_cpus_mask). We detect 104 * the presence of HTT by remembering if 'p4_init()' was called for a 105 * logical CPU. Note that hwpmc(4) cannot deal with a change in HTT 106 * status once it is loaded. 107 * 108 * Handling HTT READ / WRITE / START / STOP 109 * 110 * PMC resources are shared across multiple logical CPUs. In each 111 * physical CPU's state we keep track of a 'runcount' which reflects 112 * the number of PMC-using processes that have been scheduled on the 113 * logical CPUs of this physical CPU. Process-mode PMC operations 114 * will actually 'start' or 'stop' hardware only if these are the 115 * first or last processes respectively to use the hardware. PMC 116 * values written by a 'write' operation are saved and are transferred 117 * to hardware at PMC 'start' time if the runcount is 0. If the 118 * runcount is greater than 0 at the time of a 'start' operation, we 119 * keep track of the actual hardware value at the time of the 'start' 120 * operation and use this to adjust the final readings at PMC 'stop' 121 * or 'read' time. 122 * 123 * Execution sequences: 124 * 125 * Case 1: CPUx +...- (no overlap) 126 * CPUy +...- 127 * RC 0 1 0 1 0 128 * 129 * Case 2: CPUx +........- (partial overlap) 130 * CPUy +........- 131 * RC 0 1 2 1 0 132 * 133 * Case 3: CPUx +..............- (fully overlapped) 134 * CPUy +.....- 135 * RC 0 1 2 1 0 136 * 137 * Here CPUx and CPUy are one of the two logical processors on a HTT CPU. 138 * 139 * Handling HTT CONFIG 140 * 141 * Different processes attached to the same PMC may get scheduled on 142 * the two logical processors in the package. We keep track of config 143 * and de-config operations using the CFGFLAGS fields of the per-physical 144 * cpu state. 145 */ 146 147#define P4_PMCS() \ 148 P4_PMC(BPU_COUNTER0) \ 149 P4_PMC(BPU_COUNTER1) \ 150 P4_PMC(BPU_COUNTER2) \ 151 P4_PMC(BPU_COUNTER3) \ 152 P4_PMC(MS_COUNTER0) \ 153 P4_PMC(MS_COUNTER1) \ 154 P4_PMC(MS_COUNTER2) \ 155 P4_PMC(MS_COUNTER3) \ 156 P4_PMC(FLAME_COUNTER0) \ 157 P4_PMC(FLAME_COUNTER1) \ 158 P4_PMC(FLAME_COUNTER2) \ 159 P4_PMC(FLAME_COUNTER3) \ 160 P4_PMC(IQ_COUNTER0) \ 161 P4_PMC(IQ_COUNTER1) \ 162 P4_PMC(IQ_COUNTER2) \ 163 P4_PMC(IQ_COUNTER3) \ 164 P4_PMC(IQ_COUNTER4) \ 165 P4_PMC(IQ_COUNTER5) \ 166 P4_PMC(NONE) 167 168enum pmc_p4pmc { 169#undef P4_PMC 170#define P4_PMC(N) P4_PMC_##N , 171 P4_PMCS() 172}; 173 174/* 175 * P4 ESCR descriptors 176 */ 177 178#define P4_ESCRS() \ 179 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 180 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 181 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 182 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 183 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 184 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 185 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 186 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 187 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 188 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 189 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 190 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 191 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 192 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 193 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 194 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 195 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 196 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 197 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 198 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 199 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 200 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 201 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 202 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 203 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 204 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 205 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 206 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \ 207 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 208 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 209 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \ 210 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \ 211 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \ 212 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \ 213 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \ 214 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \ 215 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \ 216 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 217 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 218 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 219 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 220 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 221 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 222 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 223 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 224 P4_ESCR(NONE, ~0, NONE, NONE, NONE) 225 226enum pmc_p4escr { 227#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N , 228 P4_ESCRS() 229#undef P4_ESCR 230}; 231 232struct pmc_p4escr_descr { 233 const char pm_escrname[PMC_NAME_MAX]; 234 u_short pm_escr_msr; 235 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR]; 236}; 237 238static struct pmc_p4escr_descr p4_escrs[] = 239{ 240#define P4_ESCR(N, MSR, P1, P2, P3) \ 241 { \ 242 .pm_escrname = #N, \ 243 .pm_escr_msr = (MSR), \ 244 .pm_pmcs = \ 245 { \ 246 P4_PMC_##P1, \ 247 P4_PMC_##P2, \ 248 P4_PMC_##P3 \ 249 } \ 250 } , 251 252 P4_ESCRS() 253 254#undef P4_ESCR 255}; 256 257/* 258 * P4 Event descriptor 259 */ 260 261struct p4_event_descr { 262 const enum pmc_event pm_event; 263 const uint32_t pm_escr_eventselect; 264 const uint32_t pm_cccr_select; 265 const char pm_is_ti_event; 266 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT]; 267}; 268 269static struct p4_event_descr p4_events[] = { 270 271#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \ 272 { \ 273 .pm_event = PMC_EV_P4_##NAME, \ 274 .pm_escr_eventselect = (ESCREVENTSEL), \ 275 .pm_cccr_select = (CCCRSEL), \ 276 .pm_is_ti_event = (TI_EVENT), \ 277 .pm_escrs = \ 278 { \ 279 P4_ESCR_##ESCR0, \ 280 P4_ESCR_##ESCR1 \ 281 } \ 282 } 283 284P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1), 285P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1), 286P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1), 287P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1), 288P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 289P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 290P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 291P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1), 292P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1), 293P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1), 294P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 295P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE), 296P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1), 297P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE), 298P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE), 299 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */ 300P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 301P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 302P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 303P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 304P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 305P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 306P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 307P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 308P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 309P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 310P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 311P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 312P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE, 313 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 314P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 315P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1), 316P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1), 317P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 318P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 319P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 320P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 321P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 322P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 323P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 324P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 325P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 326P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1), 327P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 328P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 329P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 330P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3) 331 332#undef P4_EVDESCR 333}; 334 335#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE) 336 337#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1) 338 339/* 340 * P4 PMC descriptors 341 */ 342 343struct p4pmc_descr { 344 struct pmc_descr pm_descr; /* common information */ 345 enum pmc_p4pmc pm_pmcnum; /* PMC number */ 346 uint32_t pm_pmc_msr; /* PERFCTR MSR address */ 347 uint32_t pm_cccr_msr; /* CCCR MSR address */ 348}; 349 350static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = { 351 352 /* 353 * TSC descriptor 354 */ 355 356 { 357 .pm_descr = 358 { 359 .pd_name = "TSC", 360 .pd_class = PMC_CLASS_TSC, 361 .pd_caps = PMC_CAP_READ | PMC_CAP_WRITE, 362 .pd_width = 64 363 }, 364 .pm_pmcnum = ~0, 365 .pm_cccr_msr = ~0, 366 .pm_pmc_msr = 0x10, 367 }, 368 369 /* 370 * P4 PMCS 371 */ 372 373#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ 374 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ 375 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \ 376 PMC_CAP_TAGGING | PMC_CAP_CASCADE) 377 378#define P4_PMCDESCR(N, PMC, CCCR) \ 379 { \ 380 .pm_descr = \ 381 { \ 382 .pd_name = #N, \ 383 .pd_class = PMC_CLASS_P4, \ 384 .pd_caps = P4_PMC_CAPS, \ 385 .pd_width = 40 \ 386 }, \ 387 .pm_pmcnum = P4_PMC_##N, \ 388 .pm_cccr_msr = (CCCR), \ 389 .pm_pmc_msr = (PMC) \ 390 } 391 392 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360), 393 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361), 394 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362), 395 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363), 396 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364), 397 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365), 398 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366), 399 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367), 400 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368), 401 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369), 402 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A), 403 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B), 404 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C), 405 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D), 406 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E), 407 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F), 408 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370), 409 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371), 410 411#undef P4_PMCDESCR 412}; 413 414/* HTT support */ 415#define P4_NHTT 2 /* logical processors/chip */ 416#define P4_HTT_CPU_INDEX_0 0 417#define P4_HTT_CPU_INDEX_1 1 418 419static int p4_system_has_htt; 420 421/* 422 * Per-CPU data structure for P4 class CPUs 423 * 424 * [common stuff] 425 * [19 struct pmc_hw pointers] 426 * [19 struct pmc_hw structures] 427 * [45 ESCRs status bytes] 428 * [per-cpu spin mutex] 429 * [19 flag fields for holding config flags and a runcount] 430 * [19*2 hw value fields] (Thread mode PMC support) 431 * or 432 * [19*2 EIP values] (Sampling mode PMCs) 433 * [19*2 pmc value fields] (Thread mode PMC support)) 434 */ 435 436struct p4_cpu { 437 struct pmc_cpu pc_common; 438 struct pmc_hw *pc_hwpmcs[P4_NPMCS]; 439 struct pmc_hw pc_p4pmcs[P4_NPMCS]; 440 char pc_escrs[P4_NESCR]; 441 struct mtx pc_mtx; /* spin lock */ 442 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ 443 union { 444 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; 445 uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; 446 } pc_si; 447 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; 448}; 449 450#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] 451#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] 452#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] 453 454#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) 455#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ 456 char _tmp; \ 457 _tmp = (PC)->pc_flags[(RI)]; \ 458 _tmp &= ~(MASK); \ 459 _tmp |= (VAL) & (MASK); \ 460 (PC)->pc_flags[(RI)] = _tmp; \ 461} while (0) 462 463#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) 464#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) 465 466#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) 467#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) 468 469#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1) 470 471/* ESCR row disposition */ 472static int p4_escrdisp[P4_NESCR]; 473 474#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0) 475#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0) 476#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0) 477 478#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \ 479 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 480 __LINE__)); \ 481 atomic_add_int(&p4_escrdisp[(E)], -1); \ 482 KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row " \ 483 "disposition error", __LINE__)); \ 484} while (0) 485 486#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \ 487 atomic_add_int(&p4_escrdisp[(E)], 1); \ 488 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 489 __LINE__)); \ 490} while (0) 491 492#define P4_ESCR_MARK_ROW_THREAD(E) do { \ 493 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 494 __LINE__)); \ 495 atomic_add_int(&p4_escrdisp[(E)], 1); \ 496} while (0) 497 498#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \ 499 atomic_add_int(&p4_escrdisp[(E)], -1); \ 500 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error",\ 501 __LINE__)); \ 502} while (0) 503 504#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0) 505 506#define P4_TO_PHYSICAL_CPU(cpu) (pmc_cpu_is_logical(cpu) ? \ 507 ((cpu) & ~1) : (cpu)) 508 509#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \ 510 P4_CCCR_ENABLE|P4_CCCR_OVF)) 511#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \ 512 P4_ESCR_T1_USR)) 513 514/* 515 * support routines 516 */ 517 518static struct p4_event_descr * 519p4_find_event(enum pmc_event ev) 520{ 521 int n; 522 523 for (n = 0; n < P4_NEVENTS; n++) 524 if (p4_events[n].pm_event == ev) 525 break; 526 if (n == P4_NEVENTS) 527 return NULL; 528 return &p4_events[n]; 529} 530 531/* 532 * Initialize per-cpu state 533 */ 534 535static int 536p4_init(int cpu) 537{ 538 int n, phycpu; 539 char *pescr; 540 struct p4_cpu *pcs; 541 struct pmc_hw *phw; 542 543 KASSERT(cpu >= 0 && cpu < mp_ncpus, 544 ("[p4,%d] insane cpu number %d", __LINE__, cpu)); 545 546 PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu, 547 pmc_cpu_is_logical(cpu) != 0); 548 549 /* 550 * A 'logical' CPU shares its per-cpu state with its physical 551 * CPU. The physical CPU would have been initialized prior to 552 * the initialization for this cpu. 553 */ 554 555 if (pmc_cpu_is_logical(cpu)) { 556 phycpu = P4_TO_PHYSICAL_CPU(cpu); 557 pcs = (struct p4_cpu *) pmc_pcpu[phycpu]; 558 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p", 559 cpu, phycpu, pcs); 560 KASSERT(pcs, 561 ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__, 562 cpu, phycpu)); 563 if (pcs == NULL) /* decline to init */ 564 return ENXIO; 565 p4_system_has_htt = 1; 566 pmc_pcpu[cpu] = (struct pmc_cpu *) pcs; 567 return 0; 568 } 569 570 MALLOC(pcs, struct p4_cpu *, sizeof(struct p4_cpu), M_PMC, 571 M_WAITOK|M_ZERO); 572 573 if (pcs == NULL) 574 return ENOMEM; 575 phw = pcs->pc_p4pmcs; 576 577 for (n = 0; n < P4_NPMCS; n++, phw++) { 578 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 579 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 580 phw->phw_pmc = NULL; 581 pcs->pc_hwpmcs[n] = phw; 582 } 583 584 /* Mark the TSC as shareable */ 585 pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE; 586 587 pescr = pcs->pc_escrs; 588 for (n = 0; n < P4_NESCR; n++) 589 *pescr++ = P4_INVALID_PMC_INDEX; 590 pmc_pcpu[cpu] = (struct pmc_cpu *) pcs; 591 592 mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc", MTX_SPIN); 593 594 return 0; 595} 596 597/* 598 * Destroy per-cpu state. 599 */ 600 601static int 602p4_cleanup(int cpu) 603{ 604 struct p4_cpu *pcs; 605 606 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu); 607 608 /* 609 * Free up the per-cpu structure for the given cpu if 610 * allocated, and if this is a physical CPU. 611 */ 612 613 if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) != NULL && 614 !pmc_cpu_is_logical(cpu)) { 615 mtx_destroy(&pcs->pc_mtx); 616 FREE(pcs, M_PMC); 617 } 618 619 pmc_pcpu[cpu] = NULL; 620 621 return 0; 622} 623 624/* 625 * Context switch in. 626 */ 627 628static int 629p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) 630{ 631 (void) pc; 632 633 PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, 634 (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); 635 636 /* enable the RDPMC instruction */ 637 if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) 638 load_cr4(rcr4() | CR4_PCE); 639 640 PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4()); 641 642 return 0; 643} 644 645/* 646 * Context switch out. 647 */ 648 649static int 650p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) 651{ 652 (void) pc; 653 (void) pp; /* can be null */ 654 655 PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp); 656 657 /* always disallow the RDPMC instruction */ 658 load_cr4(rcr4() & ~CR4_PCE); 659 660 PMCDBG(MDP,SWO,2, "cr4=0x%x", rcr4()); 661 662 return 0; 663} 664 665/* 666 * Read a PMC 667 */ 668 669static int 670p4_read_pmc(int cpu, int ri, pmc_value_t *v) 671{ 672 enum pmc_mode mode; 673 struct p4pmc_descr *pd; 674 struct pmc *pm; 675 struct p4_cpu *pc; 676 struct pmc_hw *phw; 677 pmc_value_t tmp; 678 679 KASSERT(cpu >= 0 && cpu < mp_ncpus, 680 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 681 KASSERT(ri >= 0 && ri < P4_NPMCS, 682 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 683 684 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 685 phw = pc->pc_hwpmcs[ri]; 686 pd = &p4_pmcdesc[ri]; 687 pm = phw->phw_pmc; 688 689 KASSERT(pm != NULL, 690 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 691 cpu, ri)); 692 693 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), 694 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, 695 pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); 696 697 mode = PMC_TO_MODE(pm); 698 699 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); 700 701 if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) { 702 KASSERT(PMC_IS_COUNTING_MODE(mode), 703 ("[p4,%d] TSC counter in non-counting mode", __LINE__)); 704 *v = rdtsc(); 705 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 706 return 0; 707 } 708 709 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 710 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); 711 712 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); 713 714 if (PMC_IS_VIRTUAL_MODE(mode)) { 715 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ 716 tmp += (P4_PERFCTR_MASK + 1) - 717 P4_PCPU_HW_VALUE(pc,ri,cpu); 718 else 719 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 720 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); 721 } 722 723 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ 724 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); 725 else 726 *v = tmp; 727 728 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 729 return 0; 730} 731 732/* 733 * Write a PMC 734 */ 735 736static int 737p4_write_pmc(int cpu, int ri, pmc_value_t v) 738{ 739 enum pmc_mode mode; 740 struct pmc *pm; 741 struct p4_cpu *pc; 742 const struct pmc_hw *phw; 743 const struct p4pmc_descr *pd; 744 745 KASSERT(cpu >= 0 && cpu < mp_ncpus, 746 ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); 747 KASSERT(ri >= 0 && ri < P4_NPMCS, 748 ("[amd,%d] illegal row-index %d", __LINE__, ri)); 749 750 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 751 phw = pc->pc_hwpmcs[ri]; 752 pm = phw->phw_pmc; 753 pd = &p4_pmcdesc[ri]; 754 755 KASSERT(pm != NULL, 756 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 757 cpu, ri)); 758 759 mode = PMC_TO_MODE(pm); 760 761 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, 762 mode, v); 763 764 /* 765 * The P4's TSC register is writeable, but we don't allow a 766 * write as changing the TSC's value could interfere with 767 * timekeeping and other system functions. 768 */ 769 if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) 770 return 0; 771 772 /* 773 * write the PMC value to the register/saved value: for 774 * sampling mode PMCs, the value to be programmed into the PMC 775 * counter is -(C+1) where 'C' is the requested sample rate. 776 */ 777 if (PMC_IS_SAMPLING_MODE(mode)) 778 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); 779 780 if (PMC_IS_SYSTEM_MODE(mode)) 781 wrmsr(pd->pm_pmc_msr, v); 782 else 783 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; 784 785 return 0; 786} 787 788/* 789 * Configure a PMC 'pm' on the given CPU and row-index. 790 * 791 * 'pm' may be NULL to indicate de-configuration. 792 * 793 * On HTT systems, a PMC may get configured twice, once for each 794 * "logical" CPU. We track this using the CFGFLAGS field of the 795 * per-cpu state; this field is a bit mask with one bit each for 796 * logical CPUs 0 & 1. 797 */ 798 799static int 800p4_config_pmc(int cpu, int ri, struct pmc *pm) 801{ 802 struct pmc_hw *phw; 803 struct p4_cpu *pc; 804 int cfgflags, cpuflag; 805 806 KASSERT(cpu >= 0 && cpu < mp_ncpus, 807 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 808 KASSERT(ri >= 0 && ri < P4_NPMCS, 809 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 810 811 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 812 phw = pc->pc_hwpmcs[ri]; 813 814 KASSERT(pm == NULL || phw->phw_pmc == NULL || 815 (p4_system_has_htt && phw->phw_pmc == pm), 816 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); 817 818 mtx_lock_spin(&pc->pc_mtx); 819 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 820 821 KASSERT(cfgflags >= 0 || cfgflags <= 3, 822 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__, 823 cfgflags, cpu, ri)); 824 825 KASSERT(cfgflags == 0 || phw->phw_pmc, 826 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", 827 __LINE__, cpu, ri)); 828 829 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags, 830 pm); 831 832 cpuflag = P4_CPU_TO_FLAG(cpu); 833 834 if (pm) { /* config */ 835 if (cfgflags == 0) 836 phw->phw_pmc = pm; 837 838 KASSERT(phw->phw_pmc == pm, 839 ("[p4,%d] cpu=%d ri=%d config %p != hw %p", 840 __LINE__, cpu, ri, pm, phw->phw_pmc)); 841 842 cfgflags |= cpuflag; 843 } else { /* unconfig */ 844 cfgflags &= ~cpuflag; 845 846 if (cfgflags == 0) 847 phw->phw_pmc = NULL; 848 } 849 850 KASSERT(cfgflags >= 0 || cfgflags <= 3, 851 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__, 852 cfgflags, cpu, ri)); 853 854 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); 855 856 mtx_unlock_spin(&pc->pc_mtx); 857 858 return 0; 859} 860 861/* 862 * Retrieve a configured PMC pointer from hardware state. 863 */ 864 865static int 866p4_get_config(int cpu, int ri, struct pmc **ppm) 867{ 868 struct p4_cpu *pc; 869 struct pmc_hw *phw; 870 int cfgflags; 871 872 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 873 phw = pc->pc_hwpmcs[ri]; 874 875 mtx_lock_spin(&pc->pc_mtx); 876 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 877 mtx_unlock_spin(&pc->pc_mtx); 878 879 if (cfgflags & P4_CPU_TO_FLAG(cpu)) 880 *ppm = phw->phw_pmc; /* PMC config'ed on this CPU */ 881 else 882 *ppm = NULL; 883 884 return 0; 885} 886 887/* 888 * Allocate a PMC. 889 * 890 * The allocation strategy differs between HTT and non-HTT systems. 891 * 892 * The non-HTT case: 893 * - Given the desired event and the PMC row-index, lookup the 894 * list of valid ESCRs for the event. 895 * - For each valid ESCR: 896 * - Check if the ESCR is free and the ESCR row is in a compatible 897 * mode (i.e., system or process)) 898 * - Check if the ESCR is usable with a P4 PMC at the desired row-index. 899 * If everything matches, we determine the appropriate bit values for the 900 * ESCR and CCCR registers. 901 * 902 * The HTT case: 903 * 904 * - Process mode PMCs require special care. The FreeBSD scheduler could 905 * schedule any two processes on the same physical CPU. We need to ensure 906 * that a given PMC row-index is never allocated to two different 907 * PMCs owned by different user-processes. 908 * This is ensured by always allocating a PMC from a 'FREE' PMC row 909 * if the system has HTT active. 910 * - A similar check needs to be done for ESCRs; we do not want two PMCs 911 * using the same ESCR to be scheduled at the same time. Thus ESCR 912 * allocation is also restricted to FREE rows if the system has HTT 913 * enabled. 914 * - Thirdly, some events are 'thread-independent' terminology, i.e., 915 * the PMC hardware cannot distinguish between events caused by 916 * different logical CPUs. This makes it impossible to assign events 917 * to a given thread of execution. If the system has HTT enabled, 918 * these events are not allowed for process-mode PMCs. 919 */ 920 921static int 922p4_allocate_pmc(int cpu, int ri, struct pmc *pm, 923 const struct pmc_op_pmcallocate *a) 924{ 925 int found, n, m; 926 uint32_t caps, cccrvalue, escrvalue, tflags; 927 enum pmc_p4escr escr; 928 struct p4_cpu *pc; 929 struct p4_event_descr *pevent; 930 const struct p4pmc_descr *pd; 931 932 KASSERT(cpu >= 0 && cpu < mp_ncpus, 933 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 934 KASSERT(ri >= 0 && ri < P4_NPMCS, 935 ("[p4,%d] illegal row-index value %d", __LINE__, ri)); 936 937 pd = &p4_pmcdesc[ri]; 938 939 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " 940 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, 941 pm->pm_caps); 942 943 /* check class */ 944 if (pd->pm_descr.pd_class != a->pm_class) 945 return EINVAL; 946 947 /* check requested capabilities */ 948 caps = a->pm_caps; 949 if ((pd->pm_descr.pd_caps & caps) != caps) 950 return EPERM; 951 952 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) { 953 /* TSC's are always allocated in system-wide counting mode */ 954 if (a->pm_ev != PMC_EV_TSC_TSC || 955 a->pm_mode != PMC_MODE_SC) 956 return EINVAL; 957 return 0; 958 } 959 960 /* 961 * If the system has HTT enabled, and the desired allocation 962 * mode is process-private, and the PMC row disposition is not 963 * FREE (0), decline the allocation. 964 */ 965 966 if (p4_system_has_htt && 967 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 968 pmc_getrowdisp(ri) != 0) 969 return EBUSY; 970 971 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 972 ("[p4,%d] unknown PMC class %d", __LINE__, 973 pd->pm_descr.pd_class)); 974 975 if (pm->pm_event < PMC_EV_P4_FIRST || 976 pm->pm_event > PMC_EV_P4_LAST) 977 return EINVAL; 978 979 if ((pevent = p4_find_event(pm->pm_event)) == NULL) 980 return ESRCH; 981 982 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}", 983 pevent->pm_event, pevent->pm_escr_eventselect, 984 pevent->pm_cccr_select, pevent->pm_is_ti_event); 985 986 /* 987 * Some PMC events are 'thread independent'and therefore 988 * cannot be used for process-private modes if HTT is being 989 * used. 990 */ 991 992 if (P4_EVENT_IS_TI(pevent) && 993 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 994 p4_system_has_htt) 995 return EINVAL; 996 997 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 998 999 found = 0; 1000 1001 /* look for a suitable ESCR for this event */ 1002 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) { 1003 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE) 1004 break; /* out of ESCRs */ 1005 /* 1006 * Check ESCR row disposition. 1007 * 1008 * If the request is for a system-mode PMC, then the 1009 * ESCR row should not be in process-virtual mode, and 1010 * should also be free on the current CPU. 1011 */ 1012 1013 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1014 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || 1015 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) 1016 continue; 1017 } 1018 1019 /* 1020 * If the request is for a process-virtual PMC, and if 1021 * HTT is not enabled, we can use an ESCR row that is 1022 * either FREE or already in process mode. 1023 * 1024 * If HTT is enabled, then we need to ensure that a 1025 * given ESCR is never allocated to two PMCS that 1026 * could run simultaneously on the two logical CPUs of 1027 * a CPU package. We ensure this be only allocating 1028 * ESCRs from rows marked as 'FREE'. 1029 */ 1030 1031 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 1032 if (p4_system_has_htt) { 1033 if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) 1034 continue; 1035 } else 1036 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr)) 1037 continue; 1038 } 1039 1040 /* 1041 * We found a suitable ESCR for this event. Now check if 1042 * this escr can work with the PMC at row-index 'ri'. 1043 */ 1044 1045 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++) 1046 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) { 1047 found = 1; 1048 break; 1049 } 1050 } 1051 1052 if (found == 0) 1053 return ESRCH; 1054 1055 KASSERT((int) escr >= 0 && escr < P4_NESCR, 1056 ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); 1057 1058 /* mark ESCR row mode */ 1059 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1060 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ 1061 P4_ESCR_MARK_ROW_STANDALONE(escr); 1062 } else { 1063 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX, 1064 ("[p4,%d] escr[%d] already in use", __LINE__, escr)); 1065 P4_ESCR_MARK_ROW_THREAD(escr); 1066 } 1067 1068 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr; 1069 pm->pm_md.pm_p4.pm_p4_escr = escr; 1070 1071 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select); 1072 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect); 1073 1074 /* CCCR fields */ 1075 if (caps & PMC_CAP_THRESHOLD) 1076 cccrvalue |= (a->pm_p4_cccrconfig & P4_CCCR_THRESHOLD_MASK) | 1077 P4_CCCR_COMPARE; 1078 1079 if (caps & PMC_CAP_EDGE) 1080 cccrvalue |= P4_CCCR_EDGE; 1081 1082 if (caps & PMC_CAP_INVERT) 1083 cccrvalue |= P4_CCCR_COMPLEMENT; 1084 1085 if (p4_system_has_htt) 1086 cccrvalue |= a->pm_p4_cccrconfig & P4_CCCR_ACTIVE_THREAD_MASK; 1087 else /* no HTT; thread field should be '11b' */ 1088 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3); 1089 1090 if (caps & PMC_CAP_CASCADE) 1091 cccrvalue |= P4_CCCR_CASCADE; 1092 1093 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */ 1094 if (caps & PMC_CAP_INTERRUPT) 1095 cccrvalue |= P4_CCCR_OVF_PMI_T0; 1096 1097 /* ESCR fields */ 1098 if (caps & PMC_CAP_QUALIFIER) 1099 escrvalue |= a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK; 1100 if (caps & PMC_CAP_TAGGING) 1101 escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_TAG_VALUE_MASK) | 1102 P4_ESCR_TAG_ENABLE; 1103 if (caps & PMC_CAP_QUALIFIER) 1104 escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK); 1105 1106 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */ 1107 tflags = 0; 1108 if (caps & PMC_CAP_SYSTEM) 1109 tflags |= P4_ESCR_T0_OS; 1110 if (caps & PMC_CAP_USER) 1111 tflags |= P4_ESCR_T0_USR; 1112 if (tflags == 0) 1113 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1114 escrvalue |= tflags; 1115 1116 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue; 1117 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; 1118 1119 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " 1120 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, 1121 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); 1122 1123 return 0; 1124} 1125 1126/* 1127 * release a PMC. 1128 */ 1129 1130static int 1131p4_release_pmc(int cpu, int ri, struct pmc *pm) 1132{ 1133 enum pmc_p4escr escr; 1134 struct pmc_hw *phw; 1135 struct p4_cpu *pc; 1136 1137 if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC) 1138 return 0; 1139 1140 escr = pm->pm_md.pm_p4.pm_p4_escr; 1141 1142 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); 1143 1144 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1145 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 1146 phw = pc->pc_hwpmcs[ri]; 1147 1148 KASSERT(phw->phw_pmc == NULL, 1149 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri)); 1150 1151 P4_ESCR_UNMARK_ROW_STANDALONE(escr); 1152 KASSERT(pc->pc_escrs[escr] == ri, 1153 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__, 1154 escr, ri)); 1155 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */ 1156 } else 1157 P4_ESCR_UNMARK_ROW_THREAD(escr); 1158 1159 return 0; 1160} 1161 1162/* 1163 * Start a PMC 1164 */ 1165 1166static int 1167p4_start_pmc(int cpu, int ri) 1168{ 1169 int rc; 1170 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1171 struct pmc *pm; 1172 struct p4_cpu *pc; 1173 struct pmc_hw *phw; 1174 struct p4pmc_descr *pd; 1175 1176 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1177 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1178 KASSERT(ri >= 0 && ri < P4_NPMCS, 1179 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1180 1181 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 1182 phw = pc->pc_hwpmcs[ri]; 1183 pm = phw->phw_pmc; 1184 pd = &p4_pmcdesc[ri]; 1185 1186 KASSERT(pm != NULL, 1187 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, 1188 cpu, ri)); 1189 1190 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri); 1191 1192 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */ 1193 return 0; 1194 1195 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1196 ("[p4,%d] wrong PMC class %d", __LINE__, 1197 pd->pm_descr.pd_class)); 1198 1199 /* retrieve the desired CCCR/ESCR values from the PMC */ 1200 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue; 1201 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue; 1202 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1203 1204 /* extract and zero the logical processor selection bits */ 1205 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0; 1206 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1207 cccrvalue &= ~P4_CCCR_OVF_PMI_T0; 1208 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1209 1210 if (pmc_cpu_is_logical(cpu)) { /* shift T0 bits to T1 position */ 1211 cccrtbits <<= 1; 1212 escrtbits >>= 2; 1213 } 1214 1215 /* start system mode PMCs directly */ 1216 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1217 wrmsr(escrmsr, escrvalue | escrtbits); 1218 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); 1219 return 0; 1220 } 1221 1222 /* 1223 * Thread mode PMCs 1224 * 1225 * On HTT machines, the same PMC could be scheduled on the 1226 * same physical CPU twice (once for each logical CPU), for 1227 * example, if two threads of a multi-threaded process get 1228 * scheduled on the same CPU. 1229 * 1230 */ 1231 1232 mtx_lock_spin(&pc->pc_mtx); 1233 1234 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1235 KASSERT(rc == 0 || rc == 1, 1236 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1237 rc)); 1238 1239 if (rc == 0) { /* 1st CPU and the non-HTT case */ 1240 1241 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), 1242 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, 1243 cpu, ri, pd->pm_cccr_msr)); 1244 1245 /* write out the low 40 bits of the saved value to hardware */ 1246 wrmsr(pd->pm_pmc_msr, 1247 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); 1248 1249 } else if (rc == 1) { /* 2nd CPU */ 1250 1251 /* 1252 * Stop the PMC and retrieve the CCCR and ESCR values 1253 * from their MSRs, and turn on the additional T[0/1] 1254 * bits for the 2nd CPU. 1255 */ 1256 1257 cccrvalue = rdmsr(pd->pm_cccr_msr); 1258 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1259 1260 /* check that the configuration bits read back match the PMC */ 1261 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == 1262 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), 1263 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " 1264 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, 1265 cccrvalue & P4_CCCR_Tx_MASK, 1266 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); 1267 KASSERT(cccrvalue & P4_CCCR_ENABLE, 1268 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", 1269 __LINE__, rc, cpu, ri)); 1270 KASSERT((cccrvalue & cccrtbits) == 0, 1271 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d" 1272 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, 1273 cccrvalue, cccrtbits)); 1274 1275 escrvalue = rdmsr(escrmsr); 1276 1277 KASSERT((escrvalue & P4_ESCR_Tx_MASK) == 1278 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK), 1279 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d " 1280 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, 1281 escrvalue & P4_ESCR_Tx_MASK, 1282 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); 1283 KASSERT((escrvalue & escrtbits) == 0, 1284 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " 1285 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, 1286 rc, cpu, ri, escrmsr, escrvalue, escrtbits)); 1287 } 1288 1289 /* Enable the correct bits for this CPU. */ 1290 escrvalue |= escrtbits; 1291 cccrvalue |= cccrtbits | P4_CCCR_ENABLE; 1292 1293 /* Save HW value at the time of starting hardware */ 1294 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); 1295 1296 /* Program the ESCR and CCCR and start the PMC */ 1297 wrmsr(escrmsr, escrvalue); 1298 wrmsr(pd->pm_cccr_msr, cccrvalue); 1299 1300 ++rc; 1301 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1302 1303 mtx_unlock_spin(&pc->pc_mtx); 1304 1305 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " 1306 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc, 1307 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, 1308 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); 1309 1310 return 0; 1311} 1312 1313/* 1314 * Stop a PMC. 1315 */ 1316 1317static int 1318p4_stop_pmc(int cpu, int ri) 1319{ 1320 int rc; 1321 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1322 struct pmc *pm; 1323 struct p4_cpu *pc; 1324 struct pmc_hw *phw; 1325 struct p4pmc_descr *pd; 1326 pmc_value_t tmp; 1327 1328 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1329 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1330 KASSERT(ri >= 0 && ri < P4_NPMCS, 1331 ("[p4,%d] illegal row index %d", __LINE__, ri)); 1332 1333 pd = &p4_pmcdesc[ri]; 1334 1335 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) 1336 return 0; 1337 1338 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; 1339 phw = pc->pc_hwpmcs[ri]; 1340 1341 KASSERT(phw != NULL, 1342 ("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri)); 1343 1344 pm = phw->phw_pmc; 1345 1346 KASSERT(pm != NULL, 1347 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri)); 1348 1349 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); 1350 1351 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1352 wrmsr(pd->pm_cccr_msr, 1353 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); 1354 return 0; 1355 } 1356 1357 /* 1358 * Thread mode PMCs. 1359 * 1360 * On HTT machines, this PMC may be in use by two threads 1361 * running on two logical CPUS. Thus we look at the 1362 * 'pm_runcount' field and only turn off the appropriate TO/T1 1363 * bits (and keep the PMC running) if two logical CPUs were 1364 * using the PMC. 1365 * 1366 */ 1367 1368 /* bits to mask */ 1369 cccrtbits = P4_CCCR_OVF_PMI_T0; 1370 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR; 1371 if (pmc_cpu_is_logical(cpu)) { 1372 cccrtbits <<= 1; 1373 escrtbits >>= 2; 1374 } 1375 1376 mtx_lock_spin(&pc->pc_mtx); 1377 1378 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1379 1380 KASSERT(rc == 2 || rc == 1, 1381 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1382 rc)); 1383 1384 --rc; 1385 1386 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1387 1388 /* Stop this PMC */ 1389 cccrvalue = rdmsr(pd->pm_cccr_msr); 1390 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1391 1392 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1393 escrvalue = rdmsr(escrmsr); 1394 1395 /* The current CPU should be running on this PMC */ 1396 KASSERT(escrvalue & escrtbits, 1397 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " 1398 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, 1399 escrvalue, escrtbits)); 1400 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || 1401 (cccrvalue & cccrtbits), 1402 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " 1403 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); 1404 1405 /* get the current hardware reading */ 1406 tmp = rdmsr(pd->pm_pmc_msr); 1407 1408 if (rc == 1) { /* need to keep the PMC running */ 1409 escrvalue &= ~escrtbits; 1410 cccrvalue &= ~cccrtbits; 1411 wrmsr(escrmsr, escrvalue); 1412 wrmsr(pd->pm_cccr_msr, cccrvalue); 1413 } 1414 1415 mtx_unlock_spin(&pc->pc_mtx); 1416 1417 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " 1418 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr, 1419 escrvalue, cccrvalue, tmp); 1420 1421 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ 1422 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); 1423 else 1424 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 1425 1426 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; 1427 1428 return 0; 1429} 1430 1431/* 1432 * Handle an interrupt. 1433 * 1434 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler 1435 * examines all the 18 CCCR registers, processing the counters that have overflowed. 1436 * 1437 * On HTT machines, multiple logical CPUs may try to enter the NMI service 1438 * routine at the same time. 1439 */ 1440 1441extern volatile lapic_t *lapic; 1442 1443static void 1444p4_lapic_enable_pmc_interrupt(void) 1445{ 1446 uint32_t value; 1447 1448 value = lapic->lvt_pcint; 1449 value &= ~APIC_LVT_M; 1450 lapic->lvt_pcint = value; 1451} 1452 1453 1454static int 1455p4_intr(int cpu, uintptr_t eip) 1456{ 1457 int i, pmc_interrupted; 1458 uint32_t cccrval, pmi_ovf_mask; 1459 struct p4_cpu *pc; 1460 struct pmc_hw *phw; 1461 struct pmc *pm; 1462 pmc_value_t v; 1463 1464 (void) eip; 1465 PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip, 1466 lapic->lvt_pcint); 1467 1468 pmc_interrupted = 0; 1469 pc = (struct p4_cpu *) pmc_pcpu[cpu]; 1470 1471 pmi_ovf_mask = pmc_cpu_is_logical(cpu) ? 1472 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; 1473 pmi_ovf_mask |= P4_CCCR_OVF; 1474 1475 /* 1476 * Loop through all CCCRs, looking for ones that have the 1477 * OVF_PMI bit set for our logical CPU. 1478 */ 1479 1480 for (i = 1; i < P4_NPMCS; i++) { 1481 cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1); 1482 1483 if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask) 1484 continue; 1485 1486 v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1); 1487 1488 pmc_interrupted = 1; 1489 1490 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v); 1491 1492 /* Stop the counter, and turn off the overflow bit */ 1493 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); 1494 wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval); 1495 1496 phw = pc->pc_hwpmcs[i]; 1497 pm = phw->phw_pmc; 1498 1499 /* 1500 * Ignore de-configured or stopped PMCs. 1501 * Also ignore counting mode PMCs that may 1502 * have overflowed their counters. 1503 */ 1504 if (pm == NULL || 1505 pm->pm_state != PMC_STATE_RUNNING || 1506 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) 1507 continue; 1508 1509 /* 1510 * If the previous sample hasn't been read yet, the 1511 * sampling interrupt is coming in too fast for the 1512 * rest of the system to cope. Do not re-enable the 1513 * counter. 1514 */ 1515 1516 if (P4_PCPU_SAVED_IP(pc,i,cpu)) { 1517 atomic_add_int(&pmc_stats.pm_intr_ignored, 1); 1518 continue; 1519 } 1520 1521 /* 1522 * write the the reload count and restart the 1523 * hardware. 1524 */ 1525 1526 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( 1527 pm->pm_sc.pm_reloadcount); 1528 wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v); 1529 wrmsr(P4_CCCR_MSR_FIRST + i - 1, 1530 cccrval | P4_CCCR_ENABLE); 1531 } 1532 1533 if (pmc_interrupted) { 1534 1535 /* 1536 * On Intel CPUs, the PMC 'pcint' entry in the LAPIC 1537 * gets masked when a PMC interrupts the CPU. We need 1538 * to unmask this. 1539 */ 1540 p4_lapic_enable_pmc_interrupt(); 1541 1542 /* XXX: Invoke helper (non-NMI) interrupt here */ 1543 } 1544 1545 return pmc_interrupted; 1546} 1547 1548/* 1549 * Describe a CPU's PMC state. 1550 */ 1551 1552static int 1553p4_describe(int cpu, int ri, struct pmc_info *pi, 1554 struct pmc **ppmc) 1555{ 1556 int error; 1557 size_t copied; 1558 struct pmc_hw *phw; 1559 const struct p4pmc_descr *pd; 1560 1561 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1562 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1563 KASSERT(ri >= 0 && ri < P4_NPMCS, 1564 ("[p4,%d] row-index %d out of range", __LINE__, ri)); 1565 1566 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri); 1567 1568 if (pmc_cpu_is_logical(cpu)) 1569 return EINVAL; 1570 1571 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 1572 pd = &p4_pmcdesc[ri]; 1573 1574 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, 1575 PMC_NAME_MAX, &copied)) != 0) 1576 return error; 1577 1578 pi->pm_class = pd->pm_descr.pd_class; 1579 1580 if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { 1581 pi->pm_enabled = TRUE; 1582 *ppmc = phw->phw_pmc; 1583 } else { 1584 pi->pm_enabled = FALSE; 1585 *ppmc = NULL; 1586 } 1587 1588 return 0; 1589} 1590 1591/* 1592 * Get MSR# for use with RDPMC. 1593 */ 1594 1595static int 1596p4_get_msr(int ri, uint32_t *msr) 1597{ 1598 KASSERT(ri >= 0 && ri < P4_NPMCS, 1599 ("[p4,%d] ri %d out of range", __LINE__, ri)); 1600 1601 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST; 1602 1603 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr); 1604 1605 return 0; 1606} 1607 1608 1609int 1610pmc_initialize_p4(struct pmc_mdep *pmc_mdep) 1611{ 1612 struct p4_event_descr *pe; 1613 1614 KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0, 1615 ("[p4,%d] Initializing non-intel processor", __LINE__)); 1616 1617 PMCDBG(MDP,INI,1, "%s", "p4-initialize"); 1618 1619 switch (pmc_mdep->pmd_cputype) { 1620 case PMC_CPU_INTEL_PIV: 1621 1622 pmc_mdep->pmd_npmc = P4_NPMCS; 1623 pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4; 1624 pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS; 1625 pmc_mdep->pmd_classes[1].pm_width = 40; 1626 pmc_mdep->pmd_nclasspmcs[1] = 18; 1627 1628 pmc_mdep->pmd_init = p4_init; 1629 pmc_mdep->pmd_cleanup = p4_cleanup; 1630 pmc_mdep->pmd_switch_in = p4_switch_in; 1631 pmc_mdep->pmd_switch_out = p4_switch_out; 1632 pmc_mdep->pmd_read_pmc = p4_read_pmc; 1633 pmc_mdep->pmd_write_pmc = p4_write_pmc; 1634 pmc_mdep->pmd_config_pmc = p4_config_pmc; 1635 pmc_mdep->pmd_get_config = p4_get_config; 1636 pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc; 1637 pmc_mdep->pmd_release_pmc = p4_release_pmc; 1638 pmc_mdep->pmd_start_pmc = p4_start_pmc; 1639 pmc_mdep->pmd_stop_pmc = p4_stop_pmc; 1640 pmc_mdep->pmd_intr = p4_intr; 1641 pmc_mdep->pmd_describe = p4_describe; 1642 pmc_mdep->pmd_get_msr = p4_get_msr; /* i386 */ 1643 1644 /* model specific munging */ 1645 if ((cpu_id & 0xFFF) < 0xF27) { 1646 1647 /* 1648 * On P4 and Xeon with CPUID < (Family 15, 1649 * Model 2, Stepping 7), only one ESCR is 1650 * available for the IOQ_ALLOCATION event. 1651 */ 1652 1653 pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION); 1654 pe->pm_escrs[1] = P4_ESCR_NONE; 1655 } 1656 1657 break; 1658 1659 default: 1660 KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__)); 1661 return ENOSYS; 1662 } 1663 1664 return 0; 1665} 1666