hwpmc_piv.c revision 157210
1/*- 2 * Copyright (c) 2003-2005 Joseph Koshy 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27#include <sys/cdefs.h> 28__FBSDID("$FreeBSD: head/sys/dev/hwpmc/hwpmc_piv.c 157210 2006-03-28 14:09:21Z jkoshy $"); 29 30#include <sys/param.h> 31#include <sys/lock.h> 32#include <sys/mutex.h> 33#include <sys/pmc.h> 34#include <sys/pmckern.h> 35#include <sys/smp.h> 36#include <sys/systm.h> 37 38#include <machine/cpufunc.h> 39#include <machine/md_var.h> 40#include <machine/specialreg.h> 41 42/* 43 * PENTIUM 4 SUPPORT 44 * 45 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs 46 * respectively. Each PMC comprises of two model specific registers: 47 * a counter configuration control register (CCCR) and a counter 48 * register that holds the actual event counts. 49 * 50 * Configuring an event requires the use of one of 45 event selection 51 * control registers (ESCR). Events are associated with specific 52 * ESCRs. Each PMC group has a set of ESCRs it can use. 53 * 54 * - The BPU counter group (4 PMCs) can use the 16 ESCRs: 55 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1}, 56 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}. 57 * 58 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1}, 59 * TC_ESCR{0,1}, TBPU_ESCR{0,1}. 60 * 61 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs: 62 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1}, 63 * DAC_ESCR{0,1}. 64 * 65 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1}, 66 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}. 67 * 68 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if 69 * present) of a counter group. Odd-numbers ESCRs can be used with 70 * counters 2, 3 and 5 (if present) of a counter group. The 71 * 'p4_escrs[]' table describes these restrictions in a form that 72 * function 'p4_allocate()' uses for making allocation decisions. 73 * 74 * SYSTEM-MODE AND THREAD-MODE ALLOCATION 75 * 76 * In addition to remembering the state of PMC rows 77 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the 78 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC 79 * on a CPU we cannot allocate this to a thread-mode PMC. On a 80 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each 81 * CPU is tracked by the pc_escrs[] array. 82 * 83 * Each system-mode PMC that is using an ESCR records its row-index in 84 * the appropriate entry and system-mode allocation attempts check 85 * that an ESCR is available using this array. Process-mode PMCs do 86 * not use the pc_escrs[] array, since ESCR row itself would have been 87 * marked as in 'THREAD' mode. 88 * 89 * HYPERTHREADING SUPPORT 90 * 91 * When HTT is enabled, the FreeBSD kernel treats the two 'logical' 92 * cpus as independent CPUs and can schedule kernel threads on them 93 * independently. However, the two logical CPUs share the same set of 94 * PMC resources. We need to ensure that: 95 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly, 96 * and, 97 * - Threads of multi-threaded processes that get scheduled on the same 98 * physical CPU are handled correctly. 99 * 100 * HTT Detection 101 * 102 * Not all HTT capable systems will have HTT enabled. We detect the 103 * presence of HTT by detecting if 'p4_init()' was called for a secondary 104 * CPU in a HTT pair. 105 * 106 * Note that hwpmc(4) cannot currently deal with a change in HTT status once 107 * loaded. 108 * 109 * Handling HTT READ / WRITE / START / STOP 110 * 111 * PMC resources are shared across the CPUs in an HTT pair. We 112 * designate the lower numbered CPU in a HTT pair as the 'primary' 113 * CPU. In each primary CPU's state we keep track of a 'runcount' 114 * which reflects the number of PMC-using processes that have been 115 * scheduled on its secondary CPU. Process-mode PMC operations will 116 * actually 'start' or 'stop' hardware only if these are the first or 117 * last processes respectively to use the hardware. PMC values 118 * written by a 'write' operation are saved and are transferred to 119 * hardware at PMC 'start' time if the runcount is 0. If the runcount 120 * is greater than 0 at the time of a 'start' operation, we keep track 121 * of the actual hardware value at the time of the 'start' operation 122 * and use this to adjust the final readings at PMC 'stop' or 'read' 123 * time. 124 * 125 * Execution sequences: 126 * 127 * Case 1: CPUx +...- (no overlap) 128 * CPUy +...- 129 * RC 0 1 0 1 0 130 * 131 * Case 2: CPUx +........- (partial overlap) 132 * CPUy +........- 133 * RC 0 1 2 1 0 134 * 135 * Case 3: CPUx +..............- (fully overlapped) 136 * CPUy +.....- 137 * RC 0 1 2 1 0 138 * 139 * Key: 140 * 'CPU[xy]' : one of the two logical processors on a HTT CPU. 141 * 'RC' : run count (#threads per physical core). 142 * '+' : point in time when a thread is put on a CPU. 143 * '-' : point in time where a thread is taken off a CPU. 144 * 145 * Handling HTT CONFIG 146 * 147 * Different processes attached to the same PMC may get scheduled on 148 * the two logical processors in the package. We keep track of config 149 * and de-config operations using the CFGFLAGS fields of the per-physical 150 * cpu state. 151 * 152 * Handling TSCs 153 * 154 * TSCs are architectural state and each CPU in a HTT pair has its own 155 * TSC register. 156 */ 157 158#define P4_PMCS() \ 159 P4_PMC(BPU_COUNTER0) \ 160 P4_PMC(BPU_COUNTER1) \ 161 P4_PMC(BPU_COUNTER2) \ 162 P4_PMC(BPU_COUNTER3) \ 163 P4_PMC(MS_COUNTER0) \ 164 P4_PMC(MS_COUNTER1) \ 165 P4_PMC(MS_COUNTER2) \ 166 P4_PMC(MS_COUNTER3) \ 167 P4_PMC(FLAME_COUNTER0) \ 168 P4_PMC(FLAME_COUNTER1) \ 169 P4_PMC(FLAME_COUNTER2) \ 170 P4_PMC(FLAME_COUNTER3) \ 171 P4_PMC(IQ_COUNTER0) \ 172 P4_PMC(IQ_COUNTER1) \ 173 P4_PMC(IQ_COUNTER2) \ 174 P4_PMC(IQ_COUNTER3) \ 175 P4_PMC(IQ_COUNTER4) \ 176 P4_PMC(IQ_COUNTER5) \ 177 P4_PMC(NONE) 178 179enum pmc_p4pmc { 180#undef P4_PMC 181#define P4_PMC(N) P4_PMC_##N , 182 P4_PMCS() 183}; 184 185/* 186 * P4 ESCR descriptors 187 */ 188 189#define P4_ESCRS() \ 190 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 191 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 192 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 193 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 194 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 195 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 196 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 197 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 198 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 199 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 200 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 201 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 202 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 203 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 204 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 205 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 206 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 207 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 208 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 209 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 210 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 211 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 212 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 213 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 214 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 215 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 216 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 217 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \ 218 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 219 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 220 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \ 221 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \ 222 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \ 223 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \ 224 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \ 225 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \ 226 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \ 227 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 228 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 229 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 230 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 231 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 232 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 233 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 234 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 235 P4_ESCR(NONE, ~0, NONE, NONE, NONE) 236 237enum pmc_p4escr { 238#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N , 239 P4_ESCRS() 240#undef P4_ESCR 241}; 242 243struct pmc_p4escr_descr { 244 const char pm_escrname[PMC_NAME_MAX]; 245 u_short pm_escr_msr; 246 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR]; 247}; 248 249static struct pmc_p4escr_descr p4_escrs[] = 250{ 251#define P4_ESCR(N, MSR, P1, P2, P3) \ 252 { \ 253 .pm_escrname = #N, \ 254 .pm_escr_msr = (MSR), \ 255 .pm_pmcs = \ 256 { \ 257 P4_PMC_##P1, \ 258 P4_PMC_##P2, \ 259 P4_PMC_##P3 \ 260 } \ 261 } , 262 263 P4_ESCRS() 264 265#undef P4_ESCR 266}; 267 268/* 269 * P4 Event descriptor 270 */ 271 272struct p4_event_descr { 273 const enum pmc_event pm_event; 274 const uint32_t pm_escr_eventselect; 275 const uint32_t pm_cccr_select; 276 const char pm_is_ti_event; 277 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT]; 278}; 279 280static struct p4_event_descr p4_events[] = { 281 282#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \ 283 { \ 284 .pm_event = PMC_EV_P4_##NAME, \ 285 .pm_escr_eventselect = (ESCREVENTSEL), \ 286 .pm_cccr_select = (CCCRSEL), \ 287 .pm_is_ti_event = (TI_EVENT), \ 288 .pm_escrs = \ 289 { \ 290 P4_ESCR_##ESCR0, \ 291 P4_ESCR_##ESCR1 \ 292 } \ 293 } 294 295P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1), 296P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1), 297P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1), 298P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1), 299P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 300P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 301P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 302P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1), 303P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1), 304P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1), 305P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 306P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE), 307P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1), 308P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE), 309P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE), 310 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */ 311P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 312P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 313P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 314P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 315P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 316P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 317P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 318P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 319P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 320P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 321P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 322P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 323P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE, 324 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 325P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 326P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1), 327P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1), 328P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 329P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 330P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 331P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 332P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 333P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 334P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 335P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 336P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 337P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1), 338P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 339P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 340P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 341P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3) 342 343#undef P4_EVDESCR 344}; 345 346#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE) 347 348#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1) 349 350/* 351 * P4 PMC descriptors 352 */ 353 354struct p4pmc_descr { 355 struct pmc_descr pm_descr; /* common information */ 356 enum pmc_p4pmc pm_pmcnum; /* PMC number */ 357 uint32_t pm_pmc_msr; /* PERFCTR MSR address */ 358 uint32_t pm_cccr_msr; /* CCCR MSR address */ 359}; 360 361static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = { 362 363 /* 364 * TSC descriptor 365 */ 366 367 { 368 .pm_descr = 369 { 370 .pd_name = "TSC", 371 .pd_class = PMC_CLASS_TSC, 372 .pd_caps = PMC_CAP_READ | PMC_CAP_WRITE, 373 .pd_width = 64 374 }, 375 .pm_pmcnum = ~0, 376 .pm_cccr_msr = ~0, 377 .pm_pmc_msr = 0x10, 378 }, 379 380 /* 381 * P4 PMCS 382 */ 383 384#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ 385 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ 386 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \ 387 PMC_CAP_TAGGING | PMC_CAP_CASCADE) 388 389#define P4_PMCDESCR(N, PMC, CCCR) \ 390 { \ 391 .pm_descr = \ 392 { \ 393 .pd_name = #N, \ 394 .pd_class = PMC_CLASS_P4, \ 395 .pd_caps = P4_PMC_CAPS, \ 396 .pd_width = 40 \ 397 }, \ 398 .pm_pmcnum = P4_PMC_##N, \ 399 .pm_cccr_msr = (CCCR), \ 400 .pm_pmc_msr = (PMC) \ 401 } 402 403 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360), 404 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361), 405 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362), 406 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363), 407 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364), 408 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365), 409 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366), 410 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367), 411 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368), 412 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369), 413 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A), 414 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B), 415 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C), 416 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D), 417 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E), 418 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F), 419 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370), 420 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371), 421 422#undef P4_PMCDESCR 423}; 424 425/* HTT support */ 426#define P4_NHTT 2 /* logical processors/chip */ 427 428static int p4_system_has_htt; 429 430/* 431 * Per-CPU data structure for P4 class CPUs 432 * 433 * [common stuff] 434 * [19 struct pmc_hw pointers] 435 * [19 struct pmc_hw structures] 436 * [45 ESCRs status bytes] 437 * [per-cpu spin mutex] 438 * [19 flag fields for holding config flags and a runcount] 439 * [19*2 hw value fields] (Thread mode PMC support) 440 * or 441 * [19*2 EIP values] (Sampling mode PMCs) 442 * [19*2 pmc value fields] (Thread mode PMC support)) 443 */ 444 445struct p4_cpu { 446 struct pmc_cpu pc_common; 447 struct pmc_hw *pc_hwpmcs[P4_NPMCS]; 448 struct pmc_hw pc_p4pmcs[P4_NPMCS]; 449 char pc_escrs[P4_NESCR]; 450 struct mtx pc_mtx; /* spin lock */ 451 uint32_t pc_intrflag; /* NMI handler flags */ 452 unsigned int pc_intrlock; /* NMI handler spin lock */ 453 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ 454 union { 455 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; 456 uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; 457 } pc_si; 458 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; 459}; 460 461/* 462 * A 'logical' CPU shares PMC resources with partner 'physical' CPU, 463 * except the TSC, which is architectural and hence seperate. The 464 * 'logical' CPU descriptor thus has pointers to the physical CPUs 465 * descriptor state except for the TSC (rowindex 0) which is not 466 * shared. 467 */ 468 469struct p4_logicalcpu { 470 struct pmc_cpu pc_common; 471 struct pmc_hw *pc_hwpmcs[P4_NPMCS]; 472 struct pmc_hw pc_tsc; 473}; 474 475#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] 476#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] 477#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] 478 479#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) 480#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ 481 char _tmp; \ 482 _tmp = (PC)->pc_flags[(RI)]; \ 483 _tmp &= ~(MASK); \ 484 _tmp |= (VAL) & (MASK); \ 485 (PC)->pc_flags[(RI)] = _tmp; \ 486} while (0) 487 488#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) 489#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) 490 491#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) 492#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) 493 494#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1) 495 496#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I))) 497#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \ 498 uint32_t __mask; \ 499 __mask = 1 << (I); \ 500 if ((V)) \ 501 (PC)->pc_intrflag |= __mask; \ 502 else \ 503 (PC)->pc_intrflag &= ~__mask; \ 504 } while (0) 505 506/* 507 * A minimal spin lock implementation for use inside the NMI handler. 508 * 509 * We don't want to use a regular spin lock here, because curthread 510 * may not be consistent at the time the handler is invoked. 511 */ 512#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \ 513 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \ 514 ia32_pause(); \ 515 } while (0) 516#define P4_PCPU_REL_INTR_SPINLOCK(PC) \ 517 atomic_store_rel_int(&pc->pc_intrlock, 0); 518 519/* ESCR row disposition */ 520static int p4_escrdisp[P4_NESCR]; 521 522#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0) 523#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0) 524#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0) 525 526#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \ 527 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 528 __LINE__)); \ 529 atomic_add_int(&p4_escrdisp[(E)], -1); \ 530 KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row " \ 531 "disposition error", __LINE__)); \ 532} while (0) 533 534#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \ 535 atomic_add_int(&p4_escrdisp[(E)], 1); \ 536 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 537 __LINE__)); \ 538} while (0) 539 540#define P4_ESCR_MARK_ROW_THREAD(E) do { \ 541 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 542 __LINE__)); \ 543 atomic_add_int(&p4_escrdisp[(E)], 1); \ 544} while (0) 545 546#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \ 547 atomic_add_int(&p4_escrdisp[(E)], -1); \ 548 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 549 __LINE__)); \ 550} while (0) 551 552#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0) 553 554#define P4_CPU_IS_HTT_SECONDARY(cpu) \ 555 (p4_system_has_htt ? ((cpu) & 1) : 0) 556#define P4_TO_HTT_PRIMARY(cpu) \ 557 (p4_system_has_htt ? ((cpu) & ~1) : (cpu)) 558 559#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \ 560 P4_CCCR_ENABLE|P4_CCCR_OVF)) 561#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \ 562 P4_ESCR_T1_USR)) 563 564/* 565 * support routines 566 */ 567 568static struct p4_event_descr * 569p4_find_event(enum pmc_event ev) 570{ 571 int n; 572 573 for (n = 0; n < P4_NEVENTS; n++) 574 if (p4_events[n].pm_event == ev) 575 break; 576 if (n == P4_NEVENTS) 577 return NULL; 578 return &p4_events[n]; 579} 580 581/* 582 * Initialize per-cpu state 583 */ 584 585static int 586p4_init(int cpu) 587{ 588 int n, phycpu; 589 char *pescr; 590 struct p4_cpu *pcs; 591 struct p4_logicalcpu *plcs; 592 struct pmc_hw *phw; 593 594 KASSERT(cpu >= 0 && cpu < mp_ncpus, 595 ("[p4,%d] insane cpu number %d", __LINE__, cpu)); 596 597 PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu, 598 pmc_cpu_is_logical(cpu) != 0); 599 600 /* 601 * The two CPUs in an HT pair share their per-cpu state. 602 * 603 * For HT capable CPUs, we assume that the two logical 604 * processors in the HT pair get two consecutive CPU ids 605 * starting with an even id #. 606 * 607 * The primary CPU (the even numbered CPU of the pair) would 608 * have been initialized prior to the initialization for the 609 * secondary. 610 */ 611 612 if (pmc_cpu_is_logical(cpu) && (cpu & 1)) { 613 614 p4_system_has_htt = 1; 615 616 phycpu = P4_TO_HTT_PRIMARY(cpu); 617 pcs = (struct p4_cpu *) pmc_pcpu[phycpu]; 618 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p", 619 cpu, phycpu, pcs); 620 KASSERT(pcs, 621 ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__, 622 cpu, phycpu)); 623 if (pcs == NULL) /* decline to init */ 624 return ENXIO; 625 626 MALLOC(plcs, struct p4_logicalcpu *, 627 sizeof(struct p4_logicalcpu), M_PMC, M_WAITOK|M_ZERO); 628 629 /* The TSC is architectural state and is not shared */ 630 plcs->pc_hwpmcs[0] = &plcs->pc_tsc; 631 plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED | 632 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) | 633 PMC_PHW_FLAG_IS_SHAREABLE; 634 635 /* Other PMCs are shared with the physical CPU */ 636 for (n = 1; n < P4_NPMCS; n++) 637 plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n]; 638 639 pmc_pcpu[cpu] = (struct pmc_cpu *) plcs; 640 return 0; 641 } 642 643 MALLOC(pcs, struct p4_cpu *, sizeof(struct p4_cpu), M_PMC, 644 M_WAITOK|M_ZERO); 645 646 if (pcs == NULL) 647 return ENOMEM; 648 phw = pcs->pc_p4pmcs; 649 650 for (n = 0; n < P4_NPMCS; n++, phw++) { 651 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 652 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 653 phw->phw_pmc = NULL; 654 pcs->pc_hwpmcs[n] = phw; 655 } 656 657 /* Mark the TSC as shareable */ 658 pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE; 659 660 pescr = pcs->pc_escrs; 661 for (n = 0; n < P4_NESCR; n++) 662 *pescr++ = P4_INVALID_PMC_INDEX; 663 pmc_pcpu[cpu] = (struct pmc_cpu *) pcs; 664 665 mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc", MTX_SPIN); 666 667 return 0; 668} 669 670/* 671 * Destroy per-cpu state. 672 */ 673 674static int 675p4_cleanup(int cpu) 676{ 677 int i; 678 struct p4_cpu *pcs; 679 680 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu); 681 682 if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL) 683 return 0; 684 685 /* Turn off all PMCs on this CPU */ 686 for (i = 0; i < P4_NPMCS - 1; i++) 687 wrmsr(P4_CCCR_MSR_FIRST + i, 688 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE); 689 690 /* 691 * If the CPU is physical we need to teardown the 692 * full MD state. 693 */ 694 if (!P4_CPU_IS_HTT_SECONDARY(cpu)) 695 mtx_destroy(&pcs->pc_mtx); 696 697 FREE(pcs, M_PMC); 698 699 pmc_pcpu[cpu] = NULL; 700 701 return 0; 702} 703 704/* 705 * Context switch in. 706 */ 707 708static int 709p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) 710{ 711 (void) pc; 712 713 PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, 714 (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); 715 716 /* enable the RDPMC instruction */ 717 if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) 718 load_cr4(rcr4() | CR4_PCE); 719 720 PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4()); 721 722 return 0; 723} 724 725/* 726 * Context switch out. 727 */ 728 729static int 730p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) 731{ 732 (void) pc; 733 (void) pp; /* can be null */ 734 735 PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp); 736 737 /* always disallow the RDPMC instruction */ 738 load_cr4(rcr4() & ~CR4_PCE); 739 740 PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4()); 741 742 return 0; 743} 744 745/* 746 * Read a PMC 747 */ 748 749static int 750p4_read_pmc(int cpu, int ri, pmc_value_t *v) 751{ 752 enum pmc_mode mode; 753 struct p4pmc_descr *pd; 754 struct pmc *pm; 755 struct p4_cpu *pc; 756 struct pmc_hw *phw; 757 pmc_value_t tmp; 758 759 KASSERT(cpu >= 0 && cpu < mp_ncpus, 760 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 761 KASSERT(ri >= 0 && ri < P4_NPMCS, 762 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 763 764 765 if (ri == 0) { /* TSC */ 766#ifdef DEBUG 767 pc = (struct p4_cpu *) pmc_pcpu[cpu]; 768 phw = pc->pc_hwpmcs[ri]; 769 pm = phw->phw_pmc; 770 771 KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__, 772 cpu, ri)); 773 KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC, 774 ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri, 775 PMC_TO_CLASS(pm))); 776 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)), 777 ("[p4,%d] TSC counter in non-counting mode", __LINE__)); 778#endif 779 *v = rdtsc(); 780 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 781 return 0; 782 } 783 784 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 785 phw = pc->pc_hwpmcs[ri]; 786 pd = &p4_pmcdesc[ri]; 787 pm = phw->phw_pmc; 788 789 KASSERT(pm != NULL, 790 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 791 cpu, ri)); 792 793 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), 794 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, 795 pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); 796 797 mode = PMC_TO_MODE(pm); 798 799 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); 800 801 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 802 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); 803 804 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); 805 806 if (PMC_IS_VIRTUAL_MODE(mode)) { 807 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ 808 tmp += (P4_PERFCTR_MASK + 1) - 809 P4_PCPU_HW_VALUE(pc,ri,cpu); 810 else 811 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 812 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); 813 } 814 815 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ 816 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); 817 else 818 *v = tmp; 819 820 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 821 return 0; 822} 823 824/* 825 * Write a PMC 826 */ 827 828static int 829p4_write_pmc(int cpu, int ri, pmc_value_t v) 830{ 831 enum pmc_mode mode; 832 struct pmc *pm; 833 struct p4_cpu *pc; 834 const struct pmc_hw *phw; 835 const struct p4pmc_descr *pd; 836 837 KASSERT(cpu >= 0 && cpu < mp_ncpus, 838 ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); 839 KASSERT(ri >= 0 && ri < P4_NPMCS, 840 ("[amd,%d] illegal row-index %d", __LINE__, ri)); 841 842 843 /* 844 * The P4's TSC register is writeable, but we don't allow a 845 * write as changing the TSC's value could interfere with 846 * timekeeping and other system functions. 847 */ 848 if (ri == 0) { 849#ifdef DEBUG 850 pc = (struct p4_cpu *) pmc_pcpu[cpu]; 851 phw = pc->pc_hwpmcs[ri]; 852 pm = phw->phw_pmc; 853 KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__, 854 cpu, ri)); 855 KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC, 856 ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, 857 cpu, ri, PMC_TO_CLASS(pm))); 858#endif 859 return 0; 860 } 861 862 /* Shared PMCs */ 863 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 864 phw = pc->pc_hwpmcs[ri]; 865 pm = phw->phw_pmc; 866 pd = &p4_pmcdesc[ri]; 867 868 KASSERT(pm != NULL, 869 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 870 cpu, ri)); 871 872 mode = PMC_TO_MODE(pm); 873 874 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, 875 mode, v); 876 877 /* 878 * write the PMC value to the register/saved value: for 879 * sampling mode PMCs, the value to be programmed into the PMC 880 * counter is -(C+1) where 'C' is the requested sample rate. 881 */ 882 if (PMC_IS_SAMPLING_MODE(mode)) 883 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); 884 885 if (PMC_IS_SYSTEM_MODE(mode)) 886 wrmsr(pd->pm_pmc_msr, v); 887 else 888 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; 889 890 return 0; 891} 892 893/* 894 * Configure a PMC 'pm' on the given CPU and row-index. 895 * 896 * 'pm' may be NULL to indicate de-configuration. 897 * 898 * On HTT systems, a PMC may get configured twice, once for each 899 * "logical" CPU. We track this using the CFGFLAGS field of the 900 * per-cpu state; this field is a bit mask with one bit each for 901 * logical CPUs 0 & 1. 902 */ 903 904static int 905p4_config_pmc(int cpu, int ri, struct pmc *pm) 906{ 907 struct pmc_hw *phw; 908 struct p4_cpu *pc; 909 int cfgflags, cpuflag; 910 911 KASSERT(cpu >= 0 && cpu < mp_ncpus, 912 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 913 KASSERT(ri >= 0 && ri < P4_NPMCS, 914 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 915 916 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); 917 918 if (ri == 0) { /* TSC */ 919 pc = (struct p4_cpu *) pmc_pcpu[cpu]; 920 phw = pc->pc_hwpmcs[ri]; 921 922 KASSERT(pm == NULL || phw->phw_pmc == NULL, 923 ("[p4,%d] hwpmc doubly config'ed", __LINE__)); 924 phw->phw_pmc = pm; 925 return 0; 926 } 927 928 /* Shared PMCs */ 929 930 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 931 phw = pc->pc_hwpmcs[ri]; 932 933 KASSERT(pm == NULL || phw->phw_pmc == NULL || 934 (p4_system_has_htt && phw->phw_pmc == pm), 935 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); 936 937 mtx_lock_spin(&pc->pc_mtx); 938 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 939 940 KASSERT(cfgflags >= 0 || cfgflags <= 3, 941 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__, 942 cfgflags, cpu, ri)); 943 944 KASSERT(cfgflags == 0 || phw->phw_pmc, 945 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", 946 __LINE__, cpu, ri)); 947 948 cpuflag = P4_CPU_TO_FLAG(cpu); 949 950 if (pm) { /* config */ 951 if (cfgflags == 0) 952 phw->phw_pmc = pm; 953 954 KASSERT(phw->phw_pmc == pm, 955 ("[p4,%d] cpu=%d ri=%d config %p != hw %p", 956 __LINE__, cpu, ri, pm, phw->phw_pmc)); 957 958 cfgflags |= cpuflag; 959 } else { /* unconfig */ 960 cfgflags &= ~cpuflag; 961 962 if (cfgflags == 0) 963 phw->phw_pmc = NULL; 964 } 965 966 KASSERT(cfgflags >= 0 || cfgflags <= 3, 967 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__, 968 cfgflags, cpu, ri)); 969 970 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); 971 972 mtx_unlock_spin(&pc->pc_mtx); 973 974 return 0; 975} 976 977/* 978 * Retrieve a configured PMC pointer from hardware state. 979 */ 980 981static int 982p4_get_config(int cpu, int ri, struct pmc **ppm) 983{ 984 struct p4_cpu *pc; 985 struct pmc_hw *phw; 986 int cfgflags; 987 988 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 989 phw = pc->pc_hwpmcs[ri]; 990 991 mtx_lock_spin(&pc->pc_mtx); 992 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 993 mtx_unlock_spin(&pc->pc_mtx); 994 995 if (cfgflags & P4_CPU_TO_FLAG(cpu)) 996 *ppm = phw->phw_pmc; /* PMC config'ed on this CPU */ 997 else 998 *ppm = NULL; 999 1000 return 0; 1001} 1002 1003/* 1004 * Allocate a PMC. 1005 * 1006 * The allocation strategy differs between HTT and non-HTT systems. 1007 * 1008 * The non-HTT case: 1009 * - Given the desired event and the PMC row-index, lookup the 1010 * list of valid ESCRs for the event. 1011 * - For each valid ESCR: 1012 * - Check if the ESCR is free and the ESCR row is in a compatible 1013 * mode (i.e., system or process)) 1014 * - Check if the ESCR is usable with a P4 PMC at the desired row-index. 1015 * If everything matches, we determine the appropriate bit values for the 1016 * ESCR and CCCR registers. 1017 * 1018 * The HTT case: 1019 * 1020 * - Process mode PMCs require special care. The FreeBSD scheduler could 1021 * schedule any two processes on the same physical CPU. We need to ensure 1022 * that a given PMC row-index is never allocated to two different 1023 * PMCs owned by different user-processes. 1024 * This is ensured by always allocating a PMC from a 'FREE' PMC row 1025 * if the system has HTT active. 1026 * - A similar check needs to be done for ESCRs; we do not want two PMCs 1027 * using the same ESCR to be scheduled at the same time. Thus ESCR 1028 * allocation is also restricted to FREE rows if the system has HTT 1029 * enabled. 1030 * - Thirdly, some events are 'thread-independent' terminology, i.e., 1031 * the PMC hardware cannot distinguish between events caused by 1032 * different logical CPUs. This makes it impossible to assign events 1033 * to a given thread of execution. If the system has HTT enabled, 1034 * these events are not allowed for process-mode PMCs. 1035 */ 1036 1037static int 1038p4_allocate_pmc(int cpu, int ri, struct pmc *pm, 1039 const struct pmc_op_pmcallocate *a) 1040{ 1041 int found, n, m; 1042 uint32_t caps, cccrvalue, escrvalue, tflags; 1043 enum pmc_p4escr escr; 1044 struct p4_cpu *pc; 1045 struct p4_event_descr *pevent; 1046 const struct p4pmc_descr *pd; 1047 1048 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1049 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1050 KASSERT(ri >= 0 && ri < P4_NPMCS, 1051 ("[p4,%d] illegal row-index value %d", __LINE__, ri)); 1052 1053 pd = &p4_pmcdesc[ri]; 1054 1055 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " 1056 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, 1057 pm->pm_caps); 1058 1059 /* check class */ 1060 if (pd->pm_descr.pd_class != a->pm_class) 1061 return EINVAL; 1062 1063 /* check requested capabilities */ 1064 caps = a->pm_caps; 1065 if ((pd->pm_descr.pd_caps & caps) != caps) 1066 return EPERM; 1067 1068 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) { 1069 /* TSC's are always allocated in system-wide counting mode */ 1070 if (a->pm_ev != PMC_EV_TSC_TSC || 1071 a->pm_mode != PMC_MODE_SC) 1072 return EINVAL; 1073 return 0; 1074 } 1075 1076 /* 1077 * If the system has HTT enabled, and the desired allocation 1078 * mode is process-private, and the PMC row disposition is not 1079 * FREE (0), decline the allocation. 1080 */ 1081 1082 if (p4_system_has_htt && 1083 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 1084 pmc_getrowdisp(ri) != 0) 1085 return EBUSY; 1086 1087 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1088 ("[p4,%d] unknown PMC class %d", __LINE__, 1089 pd->pm_descr.pd_class)); 1090 1091 if (pm->pm_event < PMC_EV_P4_FIRST || 1092 pm->pm_event > PMC_EV_P4_LAST) 1093 return EINVAL; 1094 1095 if ((pevent = p4_find_event(pm->pm_event)) == NULL) 1096 return ESRCH; 1097 1098 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}", 1099 pevent->pm_event, pevent->pm_escr_eventselect, 1100 pevent->pm_cccr_select, pevent->pm_is_ti_event); 1101 1102 /* 1103 * Some PMC events are 'thread independent'and therefore 1104 * cannot be used for process-private modes if HTT is being 1105 * used. 1106 */ 1107 1108 if (P4_EVENT_IS_TI(pevent) && 1109 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 1110 p4_system_has_htt) 1111 return EINVAL; 1112 1113 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1114 1115 found = 0; 1116 1117 /* look for a suitable ESCR for this event */ 1118 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) { 1119 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE) 1120 break; /* out of ESCRs */ 1121 /* 1122 * Check ESCR row disposition. 1123 * 1124 * If the request is for a system-mode PMC, then the 1125 * ESCR row should not be in process-virtual mode, and 1126 * should also be free on the current CPU. 1127 */ 1128 1129 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1130 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || 1131 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) 1132 continue; 1133 } 1134 1135 /* 1136 * If the request is for a process-virtual PMC, and if 1137 * HTT is not enabled, we can use an ESCR row that is 1138 * either FREE or already in process mode. 1139 * 1140 * If HTT is enabled, then we need to ensure that a 1141 * given ESCR is never allocated to two PMCS that 1142 * could run simultaneously on the two logical CPUs of 1143 * a CPU package. We ensure this be only allocating 1144 * ESCRs from rows marked as 'FREE'. 1145 */ 1146 1147 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 1148 if (p4_system_has_htt) { 1149 if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) 1150 continue; 1151 } else 1152 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr)) 1153 continue; 1154 } 1155 1156 /* 1157 * We found a suitable ESCR for this event. Now check if 1158 * this escr can work with the PMC at row-index 'ri'. 1159 */ 1160 1161 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++) 1162 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) { 1163 found = 1; 1164 break; 1165 } 1166 } 1167 1168 if (found == 0) 1169 return ESRCH; 1170 1171 KASSERT((int) escr >= 0 && escr < P4_NESCR, 1172 ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); 1173 1174 /* mark ESCR row mode */ 1175 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1176 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ 1177 P4_ESCR_MARK_ROW_STANDALONE(escr); 1178 } else { 1179 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX, 1180 ("[p4,%d] escr[%d] already in use", __LINE__, escr)); 1181 P4_ESCR_MARK_ROW_THREAD(escr); 1182 } 1183 1184 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr; 1185 pm->pm_md.pm_p4.pm_p4_escr = escr; 1186 1187 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select); 1188 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect); 1189 1190 /* CCCR fields */ 1191 if (caps & PMC_CAP_THRESHOLD) 1192 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig & 1193 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE; 1194 1195 if (caps & PMC_CAP_EDGE) 1196 cccrvalue |= P4_CCCR_EDGE; 1197 1198 if (caps & PMC_CAP_INVERT) 1199 cccrvalue |= P4_CCCR_COMPLEMENT; 1200 1201 if (p4_system_has_htt) 1202 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig & 1203 P4_CCCR_ACTIVE_THREAD_MASK; 1204 else /* no HTT; thread field should be '11b' */ 1205 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3); 1206 1207 if (caps & PMC_CAP_CASCADE) 1208 cccrvalue |= P4_CCCR_CASCADE; 1209 1210 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */ 1211 if (caps & PMC_CAP_INTERRUPT) 1212 cccrvalue |= P4_CCCR_OVF_PMI_T0; 1213 1214 /* ESCR fields */ 1215 if (caps & PMC_CAP_QUALIFIER) 1216 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig & 1217 P4_ESCR_EVENT_MASK_MASK; 1218 if (caps & PMC_CAP_TAGGING) 1219 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1220 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE; 1221 if (caps & PMC_CAP_QUALIFIER) 1222 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1223 P4_ESCR_EVENT_MASK_MASK); 1224 1225 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */ 1226 tflags = 0; 1227 if (caps & PMC_CAP_SYSTEM) 1228 tflags |= P4_ESCR_T0_OS; 1229 if (caps & PMC_CAP_USER) 1230 tflags |= P4_ESCR_T0_USR; 1231 if (tflags == 0) 1232 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1233 escrvalue |= tflags; 1234 1235 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue; 1236 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; 1237 1238 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " 1239 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, 1240 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); 1241 1242 return 0; 1243} 1244 1245/* 1246 * release a PMC. 1247 */ 1248 1249static int 1250p4_release_pmc(int cpu, int ri, struct pmc *pm) 1251{ 1252 enum pmc_p4escr escr; 1253 struct pmc_hw *phw; 1254 struct p4_cpu *pc; 1255 1256 if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC) 1257 return 0; 1258 1259 escr = pm->pm_md.pm_p4.pm_p4_escr; 1260 1261 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); 1262 1263 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1264 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1265 phw = pc->pc_hwpmcs[ri]; 1266 1267 KASSERT(phw->phw_pmc == NULL, 1268 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri)); 1269 1270 P4_ESCR_UNMARK_ROW_STANDALONE(escr); 1271 KASSERT(pc->pc_escrs[escr] == ri, 1272 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__, 1273 escr, ri)); 1274 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */ 1275 } else 1276 P4_ESCR_UNMARK_ROW_THREAD(escr); 1277 1278 return 0; 1279} 1280 1281/* 1282 * Start a PMC 1283 */ 1284 1285static int 1286p4_start_pmc(int cpu, int ri) 1287{ 1288 int rc; 1289 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1290 struct pmc *pm; 1291 struct p4_cpu *pc; 1292 struct pmc_hw *phw; 1293 struct p4pmc_descr *pd; 1294 1295 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1296 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1297 KASSERT(ri >= 0 && ri < P4_NPMCS, 1298 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1299 1300 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1301 phw = pc->pc_hwpmcs[ri]; 1302 pm = phw->phw_pmc; 1303 pd = &p4_pmcdesc[ri]; 1304 1305 KASSERT(pm != NULL, 1306 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, 1307 cpu, ri)); 1308 1309 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri); 1310 1311 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */ 1312 return 0; 1313 1314 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1315 ("[p4,%d] wrong PMC class %d", __LINE__, 1316 pd->pm_descr.pd_class)); 1317 1318 /* retrieve the desired CCCR/ESCR values from the PMC */ 1319 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue; 1320 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue; 1321 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1322 1323 /* extract and zero the logical processor selection bits */ 1324 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0; 1325 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1326 cccrvalue &= ~P4_CCCR_OVF_PMI_T0; 1327 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1328 1329 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */ 1330 cccrtbits <<= 1; 1331 escrtbits >>= 2; 1332 } 1333 1334 /* start system mode PMCs directly */ 1335 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1336 wrmsr(escrmsr, escrvalue | escrtbits); 1337 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); 1338 return 0; 1339 } 1340 1341 /* 1342 * Thread mode PMCs 1343 * 1344 * On HTT machines, the same PMC could be scheduled on the 1345 * same physical CPU twice (once for each logical CPU), for 1346 * example, if two threads of a multi-threaded process get 1347 * scheduled on the same CPU. 1348 * 1349 */ 1350 1351 mtx_lock_spin(&pc->pc_mtx); 1352 1353 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1354 KASSERT(rc == 0 || rc == 1, 1355 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1356 rc)); 1357 1358 if (rc == 0) { /* 1st CPU and the non-HTT case */ 1359 1360 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), 1361 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, 1362 cpu, ri, pd->pm_cccr_msr)); 1363 1364 /* write out the low 40 bits of the saved value to hardware */ 1365 wrmsr(pd->pm_pmc_msr, 1366 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); 1367 1368 } else if (rc == 1) { /* 2nd CPU */ 1369 1370 /* 1371 * Stop the PMC and retrieve the CCCR and ESCR values 1372 * from their MSRs, and turn on the additional T[0/1] 1373 * bits for the 2nd CPU. 1374 */ 1375 1376 cccrvalue = rdmsr(pd->pm_cccr_msr); 1377 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1378 1379 /* check that the configuration bits read back match the PMC */ 1380 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == 1381 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), 1382 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " 1383 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, 1384 cccrvalue & P4_CCCR_Tx_MASK, 1385 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); 1386 KASSERT(cccrvalue & P4_CCCR_ENABLE, 1387 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", 1388 __LINE__, rc, cpu, ri)); 1389 KASSERT((cccrvalue & cccrtbits) == 0, 1390 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d" 1391 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, 1392 cccrvalue, cccrtbits)); 1393 1394 escrvalue = rdmsr(escrmsr); 1395 1396 KASSERT((escrvalue & P4_ESCR_Tx_MASK) == 1397 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK), 1398 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d " 1399 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, 1400 escrvalue & P4_ESCR_Tx_MASK, 1401 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); 1402 KASSERT((escrvalue & escrtbits) == 0, 1403 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " 1404 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, 1405 rc, cpu, ri, escrmsr, escrvalue, escrtbits)); 1406 } 1407 1408 /* Enable the correct bits for this CPU. */ 1409 escrvalue |= escrtbits; 1410 cccrvalue |= cccrtbits | P4_CCCR_ENABLE; 1411 1412 /* Save HW value at the time of starting hardware */ 1413 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); 1414 1415 /* Program the ESCR and CCCR and start the PMC */ 1416 wrmsr(escrmsr, escrvalue); 1417 wrmsr(pd->pm_cccr_msr, cccrvalue); 1418 1419 ++rc; 1420 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1421 1422 mtx_unlock_spin(&pc->pc_mtx); 1423 1424 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " 1425 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc, 1426 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, 1427 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); 1428 1429 return 0; 1430} 1431 1432/* 1433 * Stop a PMC. 1434 */ 1435 1436static int 1437p4_stop_pmc(int cpu, int ri) 1438{ 1439 int rc; 1440 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1441 struct pmc *pm; 1442 struct p4_cpu *pc; 1443 struct pmc_hw *phw; 1444 struct p4pmc_descr *pd; 1445 pmc_value_t tmp; 1446 1447 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1448 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1449 KASSERT(ri >= 0 && ri < P4_NPMCS, 1450 ("[p4,%d] illegal row index %d", __LINE__, ri)); 1451 1452 pd = &p4_pmcdesc[ri]; 1453 1454 if (pd->pm_descr.pd_class == PMC_CLASS_TSC) 1455 return 0; 1456 1457 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1458 phw = pc->pc_hwpmcs[ri]; 1459 1460 KASSERT(phw != NULL, 1461 ("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri)); 1462 1463 pm = phw->phw_pmc; 1464 1465 KASSERT(pm != NULL, 1466 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri)); 1467 1468 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); 1469 1470 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1471 wrmsr(pd->pm_cccr_msr, 1472 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); 1473 return 0; 1474 } 1475 1476 /* 1477 * Thread mode PMCs. 1478 * 1479 * On HTT machines, this PMC may be in use by two threads 1480 * running on two logical CPUS. Thus we look at the 1481 * 'pm_runcount' field and only turn off the appropriate TO/T1 1482 * bits (and keep the PMC running) if two logical CPUs were 1483 * using the PMC. 1484 * 1485 */ 1486 1487 /* bits to mask */ 1488 cccrtbits = P4_CCCR_OVF_PMI_T0; 1489 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR; 1490 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { 1491 cccrtbits <<= 1; 1492 escrtbits >>= 2; 1493 } 1494 1495 mtx_lock_spin(&pc->pc_mtx); 1496 1497 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1498 1499 KASSERT(rc == 2 || rc == 1, 1500 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1501 rc)); 1502 1503 --rc; 1504 1505 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1506 1507 /* Stop this PMC */ 1508 cccrvalue = rdmsr(pd->pm_cccr_msr); 1509 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1510 1511 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1512 escrvalue = rdmsr(escrmsr); 1513 1514 /* The current CPU should be running on this PMC */ 1515 KASSERT(escrvalue & escrtbits, 1516 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " 1517 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, 1518 escrvalue, escrtbits)); 1519 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || 1520 (cccrvalue & cccrtbits), 1521 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " 1522 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); 1523 1524 /* get the current hardware reading */ 1525 tmp = rdmsr(pd->pm_pmc_msr); 1526 1527 if (rc == 1) { /* need to keep the PMC running */ 1528 escrvalue &= ~escrtbits; 1529 cccrvalue &= ~cccrtbits; 1530 wrmsr(escrmsr, escrvalue); 1531 wrmsr(pd->pm_cccr_msr, cccrvalue); 1532 } 1533 1534 mtx_unlock_spin(&pc->pc_mtx); 1535 1536 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " 1537 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr, 1538 escrvalue, cccrvalue, tmp); 1539 1540 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ 1541 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); 1542 else 1543 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 1544 1545 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; 1546 1547 return 0; 1548} 1549 1550/* 1551 * Handle an interrupt. 1552 * 1553 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, 1554 * so the handler examines all the 18 CCCR registers, processing the 1555 * counters that have overflowed. 1556 * 1557 * On HTT machines, the CCCR register is shared and will interrupt 1558 * both logical processors if so configured. Thus multiple logical 1559 * CPUs could enter the NMI service routine at the same time. These 1560 * will get serialized using a per-cpu spinlock dedicated for use in 1561 * the NMI handler. 1562 */ 1563 1564static int 1565p4_intr(int cpu, uintptr_t eip, int usermode) 1566{ 1567 int i, did_interrupt, error, ri; 1568 uint32_t cccrval, ovf_mask, ovf_partner; 1569 struct p4_cpu *pc; 1570 struct pmc_hw *phw; 1571 struct pmc *pm; 1572 pmc_value_t v; 1573 1574 PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode); 1575 1576 pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1577 1578 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1579 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; 1580 ovf_mask |= P4_CCCR_OVF; 1581 if (p4_system_has_htt) 1582 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T0 : 1583 P4_CCCR_OVF_PMI_T1; 1584 else 1585 ovf_partner = 0; 1586 did_interrupt = 0; 1587 1588 if (p4_system_has_htt) 1589 P4_PCPU_ACQ_INTR_SPINLOCK(pc); 1590 1591 /* 1592 * Loop through all CCCRs, looking for ones that have 1593 * interrupted this CPU. 1594 */ 1595 for (i = 0; i < P4_NPMCS-1; i++) { 1596 1597 ri = i + 1; /* row index */ 1598 1599 /* 1600 * Check if our partner logical CPU has already marked 1601 * this PMC has having interrupted it. If so, reset 1602 * the flag and process the interrupt, but leave the 1603 * hardware alone. 1604 */ 1605 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) { 1606 P4_PCPU_SET_INTRFLAG(pc,ri,0); 1607 did_interrupt = 1; 1608 1609 /* 1610 * Ignore de-configured or stopped PMCs. 1611 * Ignore PMCs not in sampling mode. 1612 */ 1613 phw = pc->pc_hwpmcs[ri]; 1614 pm = phw->phw_pmc; 1615 if (pm == NULL || 1616 pm->pm_state != PMC_STATE_RUNNING || 1617 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1618 continue; 1619 } 1620 (void) pmc_process_interrupt(cpu, pm, eip, usermode); 1621 continue; 1622 } 1623 1624 /* 1625 * Fresh interrupt. Look for the CCCR_OVF bit 1626 * and the OVF_Tx bit for this logical 1627 * processor being set. 1628 */ 1629 cccrval = rdmsr(P4_CCCR_MSR_FIRST + i); 1630 1631 if ((cccrval & ovf_mask) != ovf_mask) 1632 continue; 1633 1634 /* 1635 * If the other logical CPU would also have been 1636 * interrupted due to the PMC being shared, record 1637 * this fact in the per-cpu saved interrupt flag 1638 * bitmask. 1639 */ 1640 if (p4_system_has_htt && (cccrval & ovf_partner)) 1641 P4_PCPU_SET_INTRFLAG(pc, ri, 1); 1642 1643 v = rdmsr(P4_PERFCTR_MSR_FIRST + i); 1644 1645 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v); 1646 1647 /* Stop the counter, and reset the overflow bit */ 1648 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); 1649 wrmsr(P4_CCCR_MSR_FIRST + i, cccrval); 1650 1651 did_interrupt = 1; 1652 1653 /* 1654 * Ignore de-configured or stopped PMCs. Ignore PMCs 1655 * not in sampling mode. 1656 */ 1657 phw = pc->pc_hwpmcs[ri]; 1658 pm = phw->phw_pmc; 1659 1660 if (pm == NULL || 1661 pm->pm_state != PMC_STATE_RUNNING || 1662 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1663 continue; 1664 } 1665 1666 /* 1667 * Process the interrupt. Re-enable the PMC if 1668 * processing was successful. 1669 */ 1670 error = pmc_process_interrupt(cpu, pm, eip, usermode); 1671 1672 /* 1673 * Only the first processor executing the NMI handler 1674 * in a HTT pair will restart a PMC, and that too 1675 * only if there were no errors. 1676 */ 1677 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( 1678 pm->pm_sc.pm_reloadcount); 1679 wrmsr(P4_PERFCTR_MSR_FIRST + i, v); 1680 if (error == 0) 1681 wrmsr(P4_CCCR_MSR_FIRST + i, 1682 cccrval | P4_CCCR_ENABLE); 1683 } 1684 1685 /* allow the other CPU to proceed */ 1686 if (p4_system_has_htt) 1687 P4_PCPU_REL_INTR_SPINLOCK(pc); 1688 1689 /* 1690 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets 1691 * masked when a PMC interrupts the CPU. We need to unmask 1692 * the interrupt source explicitly. 1693 */ 1694 1695 if (did_interrupt) 1696 pmc_x86_lapic_enable_pmc_interrupt(); 1697 1698 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : 1699 &pmc_stats.pm_intr_ignored, 1); 1700 1701 return did_interrupt; 1702} 1703 1704/* 1705 * Describe a CPU's PMC state. 1706 */ 1707 1708static int 1709p4_describe(int cpu, int ri, struct pmc_info *pi, 1710 struct pmc **ppmc) 1711{ 1712 int error; 1713 size_t copied; 1714 struct pmc_hw *phw; 1715 const struct p4pmc_descr *pd; 1716 1717 KASSERT(cpu >= 0 && cpu < mp_ncpus, 1718 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1719 KASSERT(ri >= 0 && ri < P4_NPMCS, 1720 ("[p4,%d] row-index %d out of range", __LINE__, ri)); 1721 1722 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri); 1723 1724 if (P4_CPU_IS_HTT_SECONDARY(cpu)) 1725 return EINVAL; 1726 1727 phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; 1728 pd = &p4_pmcdesc[ri]; 1729 1730 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, 1731 PMC_NAME_MAX, &copied)) != 0) 1732 return error; 1733 1734 pi->pm_class = pd->pm_descr.pd_class; 1735 1736 if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { 1737 pi->pm_enabled = TRUE; 1738 *ppmc = phw->phw_pmc; 1739 } else { 1740 pi->pm_enabled = FALSE; 1741 *ppmc = NULL; 1742 } 1743 1744 return 0; 1745} 1746 1747/* 1748 * Get MSR# for use with RDPMC. 1749 */ 1750 1751static int 1752p4_get_msr(int ri, uint32_t *msr) 1753{ 1754 KASSERT(ri >= 0 && ri < P4_NPMCS, 1755 ("[p4,%d] ri %d out of range", __LINE__, ri)); 1756 1757 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST; 1758 1759 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr); 1760 1761 return 0; 1762} 1763 1764 1765int 1766pmc_initialize_p4(struct pmc_mdep *pmc_mdep) 1767{ 1768 struct p4_event_descr *pe; 1769 1770 KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0, 1771 ("[p4,%d] Initializing non-intel processor", __LINE__)); 1772 1773 PMCDBG(MDP,INI,1, "%s", "p4-initialize"); 1774 1775 switch (pmc_mdep->pmd_cputype) { 1776 case PMC_CPU_INTEL_PIV: 1777 1778 pmc_mdep->pmd_npmc = P4_NPMCS; 1779 pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4; 1780 pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS; 1781 pmc_mdep->pmd_classes[1].pm_width = 40; 1782 pmc_mdep->pmd_nclasspmcs[1] = 18; 1783 1784 pmc_mdep->pmd_init = p4_init; 1785 pmc_mdep->pmd_cleanup = p4_cleanup; 1786 pmc_mdep->pmd_switch_in = p4_switch_in; 1787 pmc_mdep->pmd_switch_out = p4_switch_out; 1788 pmc_mdep->pmd_read_pmc = p4_read_pmc; 1789 pmc_mdep->pmd_write_pmc = p4_write_pmc; 1790 pmc_mdep->pmd_config_pmc = p4_config_pmc; 1791 pmc_mdep->pmd_get_config = p4_get_config; 1792 pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc; 1793 pmc_mdep->pmd_release_pmc = p4_release_pmc; 1794 pmc_mdep->pmd_start_pmc = p4_start_pmc; 1795 pmc_mdep->pmd_stop_pmc = p4_stop_pmc; 1796 pmc_mdep->pmd_intr = p4_intr; 1797 pmc_mdep->pmd_describe = p4_describe; 1798 pmc_mdep->pmd_get_msr = p4_get_msr; /* i386 */ 1799 1800 /* model specific munging */ 1801 if ((cpu_id & 0xFFF) < 0xF27) { 1802 1803 /* 1804 * On P4 and Xeon with CPUID < (Family 15, 1805 * Model 2, Stepping 7), only one ESCR is 1806 * available for the IOQ_ALLOCATION event. 1807 */ 1808 1809 pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION); 1810 pe->pm_escrs[1] = P4_ESCR_NONE; 1811 } 1812 1813 break; 1814 1815 default: 1816 KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__)); 1817 return ENOSYS; 1818 } 1819 1820 return 0; 1821} 1822