44#include <machine/md_var.h> 45#include <machine/specialreg.h> 46 47/* 48 * PENTIUM 4 SUPPORT 49 * 50 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs 51 * respectively. Each PMC comprises of two model specific registers: 52 * a counter configuration control register (CCCR) and a counter 53 * register that holds the actual event counts. 54 * 55 * Configuring an event requires the use of one of 45 event selection 56 * control registers (ESCR). Events are associated with specific 57 * ESCRs. Each PMC group has a set of ESCRs it can use. 58 * 59 * - The BPU counter group (4 PMCs) can use the 16 ESCRs: 60 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1}, 61 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}. 62 * 63 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1}, 64 * TC_ESCR{0,1}, TBPU_ESCR{0,1}. 65 * 66 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs: 67 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1}, 68 * DAC_ESCR{0,1}. 69 * 70 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1}, 71 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}. 72 * 73 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if 74 * present) of a counter group. Odd-numbers ESCRs can be used with 75 * counters 2, 3 and 5 (if present) of a counter group. The 76 * 'p4_escrs[]' table describes these restrictions in a form that 77 * function 'p4_allocate()' uses for making allocation decisions. 78 * 79 * SYSTEM-MODE AND THREAD-MODE ALLOCATION 80 * 81 * In addition to remembering the state of PMC rows 82 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the 83 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC 84 * on a CPU we cannot allocate this to a thread-mode PMC. On a 85 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each 86 * CPU is tracked by the pc_escrs[] array. 87 * 88 * Each system-mode PMC that is using an ESCR records its row-index in 89 * the appropriate entry and system-mode allocation attempts check 90 * that an ESCR is available using this array. Process-mode PMCs do 91 * not use the pc_escrs[] array, since ESCR row itself would have been 92 * marked as in 'THREAD' mode. 93 * 94 * HYPERTHREADING SUPPORT 95 * 96 * When HTT is enabled, the FreeBSD kernel treats the two 'logical' 97 * cpus as independent CPUs and can schedule kernel threads on them 98 * independently. However, the two logical CPUs share the same set of 99 * PMC resources. We need to ensure that: 100 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly, 101 * and, 102 * - Threads of multi-threaded processes that get scheduled on the same 103 * physical CPU are handled correctly. 104 * 105 * HTT Detection 106 * 107 * Not all HTT capable systems will have HTT enabled. We detect the 108 * presence of HTT by detecting if 'p4_init()' was called for a secondary 109 * CPU in a HTT pair. 110 * 111 * Note that hwpmc(4) cannot currently deal with a change in HTT status once 112 * loaded. 113 * 114 * Handling HTT READ / WRITE / START / STOP 115 * 116 * PMC resources are shared across the CPUs in an HTT pair. We 117 * designate the lower numbered CPU in a HTT pair as the 'primary' 118 * CPU. In each primary CPU's state we keep track of a 'runcount' 119 * which reflects the number of PMC-using processes that have been 120 * scheduled on its secondary CPU. Process-mode PMC operations will 121 * actually 'start' or 'stop' hardware only if these are the first or 122 * last processes respectively to use the hardware. PMC values 123 * written by a 'write' operation are saved and are transferred to 124 * hardware at PMC 'start' time if the runcount is 0. If the runcount 125 * is greater than 0 at the time of a 'start' operation, we keep track 126 * of the actual hardware value at the time of the 'start' operation 127 * and use this to adjust the final readings at PMC 'stop' or 'read' 128 * time. 129 * 130 * Execution sequences: 131 * 132 * Case 1: CPUx +...- (no overlap) 133 * CPUy +...- 134 * RC 0 1 0 1 0 135 * 136 * Case 2: CPUx +........- (partial overlap) 137 * CPUy +........- 138 * RC 0 1 2 1 0 139 * 140 * Case 3: CPUx +..............- (fully overlapped) 141 * CPUy +.....- 142 * RC 0 1 2 1 0 143 * 144 * Key: 145 * 'CPU[xy]' : one of the two logical processors on a HTT CPU. 146 * 'RC' : run count (#threads per physical core). 147 * '+' : point in time when a thread is put on a CPU. 148 * '-' : point in time where a thread is taken off a CPU. 149 * 150 * Handling HTT CONFIG 151 * 152 * Different processes attached to the same PMC may get scheduled on 153 * the two logical processors in the package. We keep track of config 154 * and de-config operations using the CFGFLAGS fields of the per-physical 155 * cpu state. 156 */ 157 158#define P4_PMCS() \ 159 P4_PMC(BPU_COUNTER0) \ 160 P4_PMC(BPU_COUNTER1) \ 161 P4_PMC(BPU_COUNTER2) \ 162 P4_PMC(BPU_COUNTER3) \ 163 P4_PMC(MS_COUNTER0) \ 164 P4_PMC(MS_COUNTER1) \ 165 P4_PMC(MS_COUNTER2) \ 166 P4_PMC(MS_COUNTER3) \ 167 P4_PMC(FLAME_COUNTER0) \ 168 P4_PMC(FLAME_COUNTER1) \ 169 P4_PMC(FLAME_COUNTER2) \ 170 P4_PMC(FLAME_COUNTER3) \ 171 P4_PMC(IQ_COUNTER0) \ 172 P4_PMC(IQ_COUNTER1) \ 173 P4_PMC(IQ_COUNTER2) \ 174 P4_PMC(IQ_COUNTER3) \ 175 P4_PMC(IQ_COUNTER4) \ 176 P4_PMC(IQ_COUNTER5) \ 177 P4_PMC(NONE) 178 179enum pmc_p4pmc { 180#undef P4_PMC 181#define P4_PMC(N) P4_PMC_##N , 182 P4_PMCS() 183}; 184 185/* 186 * P4 ESCR descriptors 187 */ 188 189#define P4_ESCRS() \ 190 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 191 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 192 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 193 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 194 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 195 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 196 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 197 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 198 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 199 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 200 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 201 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 202 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 203 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 204 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 205 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 206 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 207 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 208 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 209 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 210 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 211 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 212 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 213 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 214 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 215 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 216 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 217 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \ 218 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 219 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 220 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \ 221 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \ 222 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \ 223 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \ 224 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \ 225 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \ 226 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \ 227 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 228 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 229 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 230 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 231 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 232 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 233 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 234 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 235 P4_ESCR(NONE, ~0, NONE, NONE, NONE) 236 237enum pmc_p4escr { 238#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N , 239 P4_ESCRS() 240#undef P4_ESCR 241}; 242 243struct pmc_p4escr_descr { 244 const char pm_escrname[PMC_NAME_MAX]; 245 u_short pm_escr_msr; 246 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR]; 247}; 248 249static struct pmc_p4escr_descr p4_escrs[] = 250{ 251#define P4_ESCR(N, MSR, P1, P2, P3) \ 252 { \ 253 .pm_escrname = #N, \ 254 .pm_escr_msr = (MSR), \ 255 .pm_pmcs = \ 256 { \ 257 P4_PMC_##P1, \ 258 P4_PMC_##P2, \ 259 P4_PMC_##P3 \ 260 } \ 261 } , 262 263 P4_ESCRS() 264 265#undef P4_ESCR 266}; 267 268/* 269 * P4 Event descriptor 270 */ 271 272struct p4_event_descr { 273 const enum pmc_event pm_event; 274 const uint32_t pm_escr_eventselect; 275 const uint32_t pm_cccr_select; 276 const char pm_is_ti_event; 277 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT]; 278}; 279 280static struct p4_event_descr p4_events[] = { 281 282#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \ 283 { \ 284 .pm_event = PMC_EV_P4_##NAME, \ 285 .pm_escr_eventselect = (ESCREVENTSEL), \ 286 .pm_cccr_select = (CCCRSEL), \ 287 .pm_is_ti_event = (TI_EVENT), \ 288 .pm_escrs = \ 289 { \ 290 P4_ESCR_##ESCR0, \ 291 P4_ESCR_##ESCR1 \ 292 } \ 293 } 294 295P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1), 296P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1), 297P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1), 298P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1), 299P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 300P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 301P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 302P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1), 303P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1), 304P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1), 305P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 306P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE), 307P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1), 308P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE), 309P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE), 310 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */ 311P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 312P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 313P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 314P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 315P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 316P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 317P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 318P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 319P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 320P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 321P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 322P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 323P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE, 324 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 325P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 326P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1), 327P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1), 328P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 329P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 330P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 331P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 332P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 333P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 334P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 335P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 336P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 337P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1), 338P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 339P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 340P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 341P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3) 342 343#undef P4_EVDESCR 344}; 345 346#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE) 347 348#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1) 349 350/* 351 * P4 PMC descriptors 352 */ 353 354struct p4pmc_descr { 355 struct pmc_descr pm_descr; /* common information */ 356 enum pmc_p4pmc pm_pmcnum; /* PMC number */ 357 uint32_t pm_pmc_msr; /* PERFCTR MSR address */ 358 uint32_t pm_cccr_msr; /* CCCR MSR address */ 359}; 360 361static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = { 362#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ 363 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ 364 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \ 365 PMC_CAP_TAGGING | PMC_CAP_CASCADE) 366 367#define P4_PMCDESCR(N, PMC, CCCR) \ 368 { \ 369 .pm_descr = \ 370 { \ 371 .pd_name = #N, \ 372 .pd_class = PMC_CLASS_P4, \ 373 .pd_caps = P4_PMC_CAPS, \ 374 .pd_width = 40 \ 375 }, \ 376 .pm_pmcnum = P4_PMC_##N, \ 377 .pm_cccr_msr = (CCCR), \ 378 .pm_pmc_msr = (PMC) \ 379 } 380 381 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360), 382 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361), 383 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362), 384 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363), 385 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364), 386 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365), 387 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366), 388 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367), 389 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368), 390 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369), 391 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A), 392 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B), 393 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C), 394 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D), 395 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E), 396 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F), 397 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370), 398 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371), 399 400#undef P4_PMCDESCR 401}; 402 403/* HTT support */ 404#define P4_NHTT 2 /* logical processors/chip */ 405 406static int p4_system_has_htt; 407 408/* 409 * Per-CPU data structure for P4 class CPUs 410 * 411 * [19 struct pmc_hw structures] 412 * [45 ESCRs status bytes] 413 * [per-cpu spin mutex] 414 * [19 flag fields for holding config flags and a runcount] 415 * [19*2 hw value fields] (Thread mode PMC support) 416 * or 417 * [19*2 EIP values] (Sampling mode PMCs) 418 * [19*2 pmc value fields] (Thread mode PMC support)) 419 */ 420 421struct p4_cpu { 422 struct pmc_hw pc_p4pmcs[P4_NPMCS]; 423 char pc_escrs[P4_NESCR]; 424 struct mtx pc_mtx; /* spin lock */ 425 uint32_t pc_intrflag; /* NMI handler flags */ 426 unsigned int pc_intrlock; /* NMI handler spin lock */ 427 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ 428 union { 429 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; 430 uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; 431 } pc_si; 432 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; 433}; 434 435static struct p4_cpu **p4_pcpu; 436 437#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] 438#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] 439#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] 440 441#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) 442#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ 443 char _tmp; \ 444 _tmp = (PC)->pc_flags[(RI)]; \ 445 _tmp &= ~(MASK); \ 446 _tmp |= (VAL) & (MASK); \ 447 (PC)->pc_flags[(RI)] = _tmp; \ 448} while (0) 449 450#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) 451#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) 452 453#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) 454#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) 455 456#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1) 457 458#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I))) 459#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \ 460 uint32_t __mask; \ 461 __mask = 1 << (I); \ 462 if ((V)) \ 463 (PC)->pc_intrflag |= __mask; \ 464 else \ 465 (PC)->pc_intrflag &= ~__mask; \ 466 } while (0) 467 468/* 469 * A minimal spin lock implementation for use inside the NMI handler. 470 * 471 * We don't want to use a regular spin lock here, because curthread 472 * may not be consistent at the time the handler is invoked. 473 */ 474#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \ 475 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \ 476 ia32_pause(); \ 477 } while (0) 478#define P4_PCPU_REL_INTR_SPINLOCK(PC) \ 479 atomic_store_rel_int(&pc->pc_intrlock, 0); 480 481/* ESCR row disposition */ 482static int p4_escrdisp[P4_NESCR]; 483 484#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0) 485#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0) 486#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0) 487 488#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \ 489 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 490 __LINE__)); \ 491 atomic_add_int(&p4_escrdisp[(E)], -1); \ 492 KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \ 493 ("[p4,%d] row disposition error", __LINE__)); \ 494} while (0) 495 496#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \ 497 atomic_add_int(&p4_escrdisp[(E)], 1); \ 498 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 499 __LINE__)); \ 500} while (0) 501 502#define P4_ESCR_MARK_ROW_THREAD(E) do { \ 503 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 504 __LINE__)); \ 505 atomic_add_int(&p4_escrdisp[(E)], 1); \ 506} while (0) 507 508#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \ 509 atomic_add_int(&p4_escrdisp[(E)], -1); \ 510 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 511 __LINE__)); \ 512} while (0) 513 514#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0) 515 516#define P4_CPU_IS_HTT_SECONDARY(cpu) \ 517 (p4_system_has_htt ? ((cpu) & 1) : 0) 518#define P4_TO_HTT_PRIMARY(cpu) \ 519 (p4_system_has_htt ? ((cpu) & ~1) : (cpu)) 520 521#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \ 522 P4_CCCR_ENABLE|P4_CCCR_OVF)) 523#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \ 524 P4_ESCR_T1_USR)) 525 526/* 527 * support routines 528 */ 529 530static struct p4_event_descr * 531p4_find_event(enum pmc_event ev) 532{ 533 int n; 534 535 for (n = 0; n < P4_NEVENTS; n++) 536 if (p4_events[n].pm_event == ev) 537 break; 538 if (n == P4_NEVENTS) 539 return (NULL); 540 return (&p4_events[n]); 541} 542 543/* 544 * Initialize per-cpu state 545 */ 546 547static int 548p4_pcpu_init(struct pmc_mdep *md, int cpu) 549{ 550 char *pescr; 551 int n, first_ri, phycpu; 552 struct pmc_hw *phw; 553 struct p4_cpu *p4c; 554 struct pmc_cpu *pc, *plc; 555 556 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 557 ("[p4,%d] insane cpu number %d", __LINE__, cpu)); 558 559 PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu, 560 pmc_cpu_is_primary(cpu) != 0); 561 562 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 563 564 /* 565 * The two CPUs in an HT pair share their per-cpu state. 566 * 567 * For HT capable CPUs, we assume that the two logical 568 * processors in the HT pair get two consecutive CPU ids 569 * starting with an even id #. 570 * 571 * The primary CPU (the even numbered CPU of the pair) would 572 * have been initialized prior to the initialization for the 573 * secondary. 574 */ 575 576 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) { 577 578 p4_system_has_htt = 1; 579 580 phycpu = P4_TO_HTT_PRIMARY(cpu); 581 pc = pmc_pcpu[phycpu]; 582 plc = pmc_pcpu[cpu]; 583 584 KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__)); 585 586 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu, 587 phycpu, pc); 588 KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", 589 __LINE__, cpu, phycpu)); 590 591 /* PMCs are shared with the physical CPU. */ 592 for (n = 0; n < P4_NPMCS; n++) 593 plc->pc_hwpmcs[n + first_ri] = 594 pc->pc_hwpmcs[n + first_ri]; 595 596 return (0); 597 } 598 599 p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO); 600 601 if (p4c == NULL) 602 return (ENOMEM); 603 604 pc = pmc_pcpu[cpu]; 605 606 KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu)); 607 608 p4_pcpu[cpu] = p4c; 609 phw = p4c->pc_p4pmcs; 610 611 for (n = 0; n < P4_NPMCS; n++, phw++) { 612 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 613 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 614 phw->phw_pmc = NULL; 615 pc->pc_hwpmcs[n + first_ri] = phw; 616 } 617 618 pescr = p4c->pc_escrs; 619 for (n = 0; n < P4_NESCR; n++) 620 *pescr++ = P4_INVALID_PMC_INDEX; 621 622 mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN); 623 624 return (0); 625} 626 627/* 628 * Destroy per-cpu state. 629 */ 630 631static int 632p4_pcpu_fini(struct pmc_mdep *md, int cpu) 633{ 634 int first_ri, i; 635 struct p4_cpu *p4c; 636 struct pmc_cpu *pc; 637 638 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu); 639 640 pc = pmc_pcpu[cpu]; 641 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 642 643 for (i = 0; i < P4_NPMCS; i++) 644 pc->pc_hwpmcs[i + first_ri] = NULL; 645 646 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) 647 return (0); 648 649 p4c = p4_pcpu[cpu]; 650 651 KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__)); 652 653 /* Turn off all PMCs on this CPU */ 654 for (i = 0; i < P4_NPMCS - 1; i++) 655 wrmsr(P4_CCCR_MSR_FIRST + i, 656 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE); 657 658 mtx_destroy(&p4c->pc_mtx); 659 660 free(p4c, M_PMC); 661 662 p4_pcpu[cpu] = NULL; 663 664 return (0); 665} 666 667/* 668 * Read a PMC 669 */ 670 671static int 672p4_read_pmc(int cpu, int ri, pmc_value_t *v) 673{ 674 struct pmc *pm; 675 pmc_value_t tmp; 676 struct p4_cpu *pc; 677 enum pmc_mode mode; 678 struct p4pmc_descr *pd; 679 680 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 681 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 682 KASSERT(ri >= 0 && ri < P4_NPMCS, 683 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 684 685 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 686 pm = pc->pc_p4pmcs[ri].phw_pmc; 687 pd = &p4_pmcdesc[ri]; 688 689 KASSERT(pm != NULL, 690 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); 691 692 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), 693 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, 694 pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); 695 696 mode = PMC_TO_MODE(pm); 697 698 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); 699 700 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 701 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); 702 703 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); 704 705 if (PMC_IS_VIRTUAL_MODE(mode)) { 706 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ 707 tmp += (P4_PERFCTR_MASK + 1) - 708 P4_PCPU_HW_VALUE(pc,ri,cpu); 709 else 710 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 711 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); 712 } 713 714 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ 715 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); 716 else 717 *v = tmp; 718 719 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 720 721 return (0); 722} 723 724/* 725 * Write a PMC 726 */ 727 728static int 729p4_write_pmc(int cpu, int ri, pmc_value_t v) 730{ 731 enum pmc_mode mode; 732 struct pmc *pm; 733 struct p4_cpu *pc; 734 const struct pmc_hw *phw; 735 const struct p4pmc_descr *pd; 736 737 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 738 ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); 739 KASSERT(ri >= 0 && ri < P4_NPMCS, 740 ("[amd,%d] illegal row-index %d", __LINE__, ri)); 741 742 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 743 phw = &pc->pc_p4pmcs[ri]; 744 pm = phw->phw_pmc; 745 pd = &p4_pmcdesc[ri]; 746 747 KASSERT(pm != NULL, 748 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 749 cpu, ri)); 750 751 mode = PMC_TO_MODE(pm); 752 753 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, 754 mode, v); 755 756 /* 757 * write the PMC value to the register/saved value: for 758 * sampling mode PMCs, the value to be programmed into the PMC 759 * counter is -(C+1) where 'C' is the requested sample rate. 760 */ 761 if (PMC_IS_SAMPLING_MODE(mode)) 762 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); 763 764 if (PMC_IS_SYSTEM_MODE(mode)) 765 wrmsr(pd->pm_pmc_msr, v); 766 else 767 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; 768 769 return (0); 770} 771 772/* 773 * Configure a PMC 'pm' on the given CPU and row-index. 774 * 775 * 'pm' may be NULL to indicate de-configuration. 776 * 777 * On HTT systems, a PMC may get configured twice, once for each 778 * "logical" CPU. We track this using the CFGFLAGS field of the 779 * per-cpu state; this field is a bit mask with one bit each for 780 * logical CPUs 0 & 1. 781 */ 782 783static int 784p4_config_pmc(int cpu, int ri, struct pmc *pm) 785{ 786 struct pmc_hw *phw; 787 struct p4_cpu *pc; 788 int cfgflags, cpuflag; 789 790 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 791 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 792 793 KASSERT(ri >= 0 && ri < P4_NPMCS, 794 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 795 796 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); 797 798 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 799 phw = &pc->pc_p4pmcs[ri]; 800 801 KASSERT(pm == NULL || phw->phw_pmc == NULL || 802 (p4_system_has_htt && phw->phw_pmc == pm), 803 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); 804 805 mtx_lock_spin(&pc->pc_mtx); 806 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 807 808 KASSERT(cfgflags >= 0 || cfgflags <= 3, 809 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__, 810 cfgflags, cpu, ri)); 811 812 KASSERT(cfgflags == 0 || phw->phw_pmc, 813 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", 814 __LINE__, cpu, ri)); 815 816 cpuflag = P4_CPU_TO_FLAG(cpu); 817 818 if (pm) { /* config */ 819 if (cfgflags == 0) 820 phw->phw_pmc = pm; 821 822 KASSERT(phw->phw_pmc == pm, 823 ("[p4,%d] cpu=%d ri=%d config %p != hw %p", 824 __LINE__, cpu, ri, pm, phw->phw_pmc)); 825 826 cfgflags |= cpuflag; 827 } else { /* unconfig */ 828 cfgflags &= ~cpuflag; 829 830 if (cfgflags == 0) 831 phw->phw_pmc = NULL; 832 } 833 834 KASSERT(cfgflags >= 0 || cfgflags <= 3, 835 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__, 836 cfgflags, cpu, ri)); 837 838 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); 839 840 mtx_unlock_spin(&pc->pc_mtx); 841 842 return (0); 843} 844 845/* 846 * Retrieve a configured PMC pointer from hardware state. 847 */ 848 849static int 850p4_get_config(int cpu, int ri, struct pmc **ppm) 851{ 852 int cfgflags; 853 struct p4_cpu *pc; 854 855 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 856 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 857 KASSERT(ri >= 0 && ri < P4_NPMCS, 858 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 859 860 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 861 862 mtx_lock_spin(&pc->pc_mtx); 863 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 864 mtx_unlock_spin(&pc->pc_mtx); 865 866 if (cfgflags & P4_CPU_TO_FLAG(cpu)) 867 *ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */ 868 else 869 *ppm = NULL; 870 871 return 0; 872} 873 874/* 875 * Allocate a PMC. 876 * 877 * The allocation strategy differs between HTT and non-HTT systems. 878 * 879 * The non-HTT case: 880 * - Given the desired event and the PMC row-index, lookup the 881 * list of valid ESCRs for the event. 882 * - For each valid ESCR: 883 * - Check if the ESCR is free and the ESCR row is in a compatible 884 * mode (i.e., system or process)) 885 * - Check if the ESCR is usable with a P4 PMC at the desired row-index. 886 * If everything matches, we determine the appropriate bit values for the 887 * ESCR and CCCR registers. 888 * 889 * The HTT case: 890 * 891 * - Process mode PMCs require special care. The FreeBSD scheduler could 892 * schedule any two processes on the same physical CPU. We need to ensure 893 * that a given PMC row-index is never allocated to two different 894 * PMCs owned by different user-processes. 895 * This is ensured by always allocating a PMC from a 'FREE' PMC row 896 * if the system has HTT active. 897 * - A similar check needs to be done for ESCRs; we do not want two PMCs 898 * using the same ESCR to be scheduled at the same time. Thus ESCR 899 * allocation is also restricted to FREE rows if the system has HTT 900 * enabled. 901 * - Thirdly, some events are 'thread-independent' terminology, i.e., 902 * the PMC hardware cannot distinguish between events caused by 903 * different logical CPUs. This makes it impossible to assign events 904 * to a given thread of execution. If the system has HTT enabled, 905 * these events are not allowed for process-mode PMCs. 906 */ 907 908static int 909p4_allocate_pmc(int cpu, int ri, struct pmc *pm, 910 const struct pmc_op_pmcallocate *a) 911{ 912 int found, n, m; 913 uint32_t caps, cccrvalue, escrvalue, tflags; 914 enum pmc_p4escr escr; 915 struct p4_cpu *pc; 916 struct p4_event_descr *pevent; 917 const struct p4pmc_descr *pd; 918 919 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 920 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 921 KASSERT(ri >= 0 && ri < P4_NPMCS, 922 ("[p4,%d] illegal row-index value %d", __LINE__, ri)); 923 924 pd = &p4_pmcdesc[ri]; 925 926 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " 927 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, 928 pm->pm_caps); 929 930 /* check class */ 931 if (pd->pm_descr.pd_class != a->pm_class) 932 return (EINVAL); 933 934 /* check requested capabilities */ 935 caps = a->pm_caps; 936 if ((pd->pm_descr.pd_caps & caps) != caps) 937 return (EPERM); 938 939 /* 940 * If the system has HTT enabled, and the desired allocation 941 * mode is process-private, and the PMC row disposition is not 942 * FREE (0), decline the allocation. 943 */ 944 945 if (p4_system_has_htt && 946 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 947 pmc_getrowdisp(ri) != 0) 948 return (EBUSY); 949 950 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 951 ("[p4,%d] unknown PMC class %d", __LINE__, 952 pd->pm_descr.pd_class)); 953 954 if (pm->pm_event < PMC_EV_P4_FIRST || 955 pm->pm_event > PMC_EV_P4_LAST) 956 return (EINVAL); 957 958 if ((pevent = p4_find_event(pm->pm_event)) == NULL) 959 return (ESRCH); 960 961 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}", 962 pevent->pm_event, pevent->pm_escr_eventselect, 963 pevent->pm_cccr_select, pevent->pm_is_ti_event); 964 965 /* 966 * Some PMC events are 'thread independent'and therefore 967 * cannot be used for process-private modes if HTT is being 968 * used. 969 */ 970 971 if (P4_EVENT_IS_TI(pevent) && 972 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 973 p4_system_has_htt) 974 return (EINVAL); 975 976 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 977 978 found = 0; 979 980 /* look for a suitable ESCR for this event */ 981 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) { 982 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE) 983 break; /* out of ESCRs */ 984 /* 985 * Check ESCR row disposition. 986 * 987 * If the request is for a system-mode PMC, then the 988 * ESCR row should not be in process-virtual mode, and 989 * should also be free on the current CPU. 990 */ 991 992 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 993 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || 994 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) 995 continue; 996 } 997 998 /* 999 * If the request is for a process-virtual PMC, and if 1000 * HTT is not enabled, we can use an ESCR row that is 1001 * either FREE or already in process mode. 1002 * 1003 * If HTT is enabled, then we need to ensure that a 1004 * given ESCR is never allocated to two PMCS that 1005 * could run simultaneously on the two logical CPUs of 1006 * a CPU package. We ensure this be only allocating 1007 * ESCRs from rows marked as 'FREE'. 1008 */ 1009 1010 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 1011 if (p4_system_has_htt) { 1012 if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) 1013 continue; 1014 } else 1015 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr)) 1016 continue; 1017 } 1018 1019 /* 1020 * We found a suitable ESCR for this event. Now check if 1021 * this escr can work with the PMC at row-index 'ri'. 1022 */ 1023 1024 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++) 1025 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) { 1026 found = 1; 1027 break; 1028 } 1029 } 1030 1031 if (found == 0) 1032 return (ESRCH); 1033 1034 KASSERT((int) escr >= 0 && escr < P4_NESCR, 1035 ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); 1036 1037 /* mark ESCR row mode */ 1038 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1039 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ 1040 P4_ESCR_MARK_ROW_STANDALONE(escr); 1041 } else { 1042 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX, 1043 ("[p4,%d] escr[%d] already in use", __LINE__, escr)); 1044 P4_ESCR_MARK_ROW_THREAD(escr); 1045 } 1046 1047 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr; 1048 pm->pm_md.pm_p4.pm_p4_escr = escr; 1049 1050 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select); 1051 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect); 1052 1053 /* CCCR fields */ 1054 if (caps & PMC_CAP_THRESHOLD) 1055 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig & 1056 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE; 1057 1058 if (caps & PMC_CAP_EDGE) 1059 cccrvalue |= P4_CCCR_EDGE; 1060 1061 if (caps & PMC_CAP_INVERT) 1062 cccrvalue |= P4_CCCR_COMPLEMENT; 1063 1064 if (p4_system_has_htt) 1065 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig & 1066 P4_CCCR_ACTIVE_THREAD_MASK; 1067 else /* no HTT; thread field should be '11b' */ 1068 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3); 1069 1070 if (caps & PMC_CAP_CASCADE) 1071 cccrvalue |= P4_CCCR_CASCADE; 1072 1073 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */ 1074 if (caps & PMC_CAP_INTERRUPT) 1075 cccrvalue |= P4_CCCR_OVF_PMI_T0; 1076 1077 /* ESCR fields */ 1078 if (caps & PMC_CAP_QUALIFIER) 1079 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig & 1080 P4_ESCR_EVENT_MASK_MASK; 1081 if (caps & PMC_CAP_TAGGING) 1082 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1083 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE; 1084 if (caps & PMC_CAP_QUALIFIER) 1085 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1086 P4_ESCR_EVENT_MASK_MASK); 1087 1088 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */ 1089 tflags = 0; 1090 if (caps & PMC_CAP_SYSTEM) 1091 tflags |= P4_ESCR_T0_OS; 1092 if (caps & PMC_CAP_USER) 1093 tflags |= P4_ESCR_T0_USR; 1094 if (tflags == 0) 1095 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1096 escrvalue |= tflags; 1097 1098 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue; 1099 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; 1100 1101 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " 1102 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, 1103 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); 1104 1105 return (0); 1106} 1107 1108/* 1109 * release a PMC. 1110 */ 1111 1112static int 1113p4_release_pmc(int cpu, int ri, struct pmc *pm) 1114{ 1115 enum pmc_p4escr escr; 1116 struct p4_cpu *pc; 1117 1118 KASSERT(ri >= 0 && ri < P4_NPMCS, 1119 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1120 1121 escr = pm->pm_md.pm_p4.pm_p4_escr; 1122 1123 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); 1124 1125 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1126 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1127 1128 KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL, 1129 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri)); 1130 1131 P4_ESCR_UNMARK_ROW_STANDALONE(escr); 1132 KASSERT(pc->pc_escrs[escr] == ri, 1133 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__, 1134 escr, ri)); 1135 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */ 1136 } else 1137 P4_ESCR_UNMARK_ROW_THREAD(escr); 1138 1139 return (0); 1140} 1141 1142/* 1143 * Start a PMC 1144 */ 1145 1146static int 1147p4_start_pmc(int cpu, int ri) 1148{ 1149 int rc; 1150 struct pmc *pm; 1151 struct p4_cpu *pc; 1152 struct p4pmc_descr *pd; 1153 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1154 1155 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1156 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1157 KASSERT(ri >= 0 && ri < P4_NPMCS, 1158 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1159 1160 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1161 pm = pc->pc_p4pmcs[ri].phw_pmc; 1162 pd = &p4_pmcdesc[ri]; 1163 1164 KASSERT(pm != NULL, 1165 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri)); 1166 1167 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri); 1168 1169 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1170 ("[p4,%d] wrong PMC class %d", __LINE__, 1171 pd->pm_descr.pd_class)); 1172 1173 /* retrieve the desired CCCR/ESCR values from the PMC */ 1174 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue; 1175 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue; 1176 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1177 1178 /* extract and zero the logical processor selection bits */ 1179 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0; 1180 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1181 cccrvalue &= ~P4_CCCR_OVF_PMI_T0; 1182 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1183 1184 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */ 1185 cccrtbits <<= 1; 1186 escrtbits >>= 2; 1187 } 1188 1189 /* start system mode PMCs directly */ 1190 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1191 wrmsr(escrmsr, escrvalue | escrtbits); 1192 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); 1193 return 0; 1194 } 1195 1196 /* 1197 * Thread mode PMCs 1198 * 1199 * On HTT machines, the same PMC could be scheduled on the 1200 * same physical CPU twice (once for each logical CPU), for 1201 * example, if two threads of a multi-threaded process get 1202 * scheduled on the same CPU. 1203 * 1204 */ 1205 1206 mtx_lock_spin(&pc->pc_mtx); 1207 1208 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1209 KASSERT(rc == 0 || rc == 1, 1210 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1211 rc)); 1212 1213 if (rc == 0) { /* 1st CPU and the non-HTT case */ 1214 1215 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), 1216 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, 1217 cpu, ri, pd->pm_cccr_msr)); 1218 1219 /* write out the low 40 bits of the saved value to hardware */ 1220 wrmsr(pd->pm_pmc_msr, 1221 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); 1222 1223 } else if (rc == 1) { /* 2nd CPU */ 1224 1225 /* 1226 * Stop the PMC and retrieve the CCCR and ESCR values 1227 * from their MSRs, and turn on the additional T[0/1] 1228 * bits for the 2nd CPU. 1229 */ 1230 1231 cccrvalue = rdmsr(pd->pm_cccr_msr); 1232 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1233 1234 /* check that the configuration bits read back match the PMC */ 1235 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == 1236 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), 1237 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " 1238 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, 1239 cccrvalue & P4_CCCR_Tx_MASK, 1240 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); 1241 KASSERT(cccrvalue & P4_CCCR_ENABLE, 1242 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", 1243 __LINE__, rc, cpu, ri)); 1244 KASSERT((cccrvalue & cccrtbits) == 0, 1245 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d" 1246 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, 1247 cccrvalue, cccrtbits)); 1248 1249 escrvalue = rdmsr(escrmsr); 1250 1251 KASSERT((escrvalue & P4_ESCR_Tx_MASK) == 1252 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK), 1253 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d " 1254 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, 1255 escrvalue & P4_ESCR_Tx_MASK, 1256 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); 1257 KASSERT((escrvalue & escrtbits) == 0, 1258 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " 1259 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, 1260 rc, cpu, ri, escrmsr, escrvalue, escrtbits)); 1261 } 1262 1263 /* Enable the correct bits for this CPU. */ 1264 escrvalue |= escrtbits; 1265 cccrvalue |= cccrtbits | P4_CCCR_ENABLE; 1266 1267 /* Save HW value at the time of starting hardware */ 1268 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); 1269 1270 /* Program the ESCR and CCCR and start the PMC */ 1271 wrmsr(escrmsr, escrvalue); 1272 wrmsr(pd->pm_cccr_msr, cccrvalue); 1273 1274 ++rc; 1275 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1276 1277 mtx_unlock_spin(&pc->pc_mtx); 1278 1279 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " 1280 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc, 1281 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, 1282 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); 1283 1284 return (0); 1285} 1286 1287/* 1288 * Stop a PMC. 1289 */ 1290 1291static int 1292p4_stop_pmc(int cpu, int ri) 1293{ 1294 int rc; 1295 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1296 struct pmc *pm; 1297 struct p4_cpu *pc; 1298 struct p4pmc_descr *pd; 1299 pmc_value_t tmp; 1300 1301 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1302 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1303 KASSERT(ri >= 0 && ri < P4_NPMCS, 1304 ("[p4,%d] illegal row index %d", __LINE__, ri)); 1305 1306 pd = &p4_pmcdesc[ri]; 1307 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1308 pm = pc->pc_p4pmcs[ri].phw_pmc; 1309 1310 KASSERT(pm != NULL, 1311 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri)); 1312 1313 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); 1314 1315 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1316 wrmsr(pd->pm_cccr_msr, 1317 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); 1318 return (0); 1319 } 1320 1321 /* 1322 * Thread mode PMCs. 1323 * 1324 * On HTT machines, this PMC may be in use by two threads 1325 * running on two logical CPUS. Thus we look at the 1326 * 'runcount' field and only turn off the appropriate TO/T1 1327 * bits (and keep the PMC running) if two logical CPUs were 1328 * using the PMC. 1329 * 1330 */ 1331 1332 /* bits to mask */ 1333 cccrtbits = P4_CCCR_OVF_PMI_T0; 1334 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR; 1335 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { 1336 cccrtbits <<= 1; 1337 escrtbits >>= 2; 1338 } 1339 1340 mtx_lock_spin(&pc->pc_mtx); 1341 1342 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1343 1344 KASSERT(rc == 2 || rc == 1, 1345 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1346 rc)); 1347 1348 --rc; 1349 1350 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1351 1352 /* Stop this PMC */ 1353 cccrvalue = rdmsr(pd->pm_cccr_msr); 1354 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1355 1356 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1357 escrvalue = rdmsr(escrmsr); 1358 1359 /* The current CPU should be running on this PMC */ 1360 KASSERT(escrvalue & escrtbits, 1361 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " 1362 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, 1363 escrvalue, escrtbits)); 1364 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || 1365 (cccrvalue & cccrtbits), 1366 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " 1367 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); 1368 1369 /* get the current hardware reading */ 1370 tmp = rdmsr(pd->pm_pmc_msr); 1371 1372 if (rc == 1) { /* need to keep the PMC running */ 1373 escrvalue &= ~escrtbits; 1374 cccrvalue &= ~cccrtbits; 1375 wrmsr(escrmsr, escrvalue); 1376 wrmsr(pd->pm_cccr_msr, cccrvalue); 1377 } 1378 1379 mtx_unlock_spin(&pc->pc_mtx); 1380 1381 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " 1382 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr, 1383 escrvalue, cccrvalue, tmp); 1384 1385 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ 1386 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); 1387 else 1388 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 1389 1390 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; 1391 1392 return 0; 1393} 1394 1395/* 1396 * Handle an interrupt. 1397 * 1398 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, 1399 * so the handler examines all the 18 CCCR registers, processing the 1400 * counters that have overflowed. 1401 * 1402 * On HTT machines, the CCCR register is shared and will interrupt 1403 * both logical processors if so configured. Thus multiple logical 1404 * CPUs could enter the NMI service routine at the same time. These 1405 * will get serialized using a per-cpu spinlock dedicated for use in 1406 * the NMI handler. 1407 */ 1408 1409static int 1410p4_intr(int cpu, struct trapframe *tf) 1411{ 1412 uint32_t cccrval, ovf_mask, ovf_partner; 1413 int did_interrupt, error, ri; 1414 struct p4_cpu *pc; 1415 struct pmc *pm; 1416 pmc_value_t v; 1417 1418 PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, 1419 TRAPF_USERMODE(tf)); 1420 1421 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1422 1423 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1424 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; 1425 ovf_mask |= P4_CCCR_OVF; 1426 if (p4_system_has_htt) 1427 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1428 P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1; 1429 else 1430 ovf_partner = 0; 1431 did_interrupt = 0; 1432 1433 if (p4_system_has_htt) 1434 P4_PCPU_ACQ_INTR_SPINLOCK(pc); 1435 1436 /* 1437 * Loop through all CCCRs, looking for ones that have 1438 * interrupted this CPU. 1439 */ 1440 for (ri = 0; ri < P4_NPMCS; ri++) { 1441 1442 /* 1443 * Check if our partner logical CPU has already marked 1444 * this PMC has having interrupted it. If so, reset 1445 * the flag and process the interrupt, but leave the 1446 * hardware alone. 1447 */ 1448 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) { 1449 P4_PCPU_SET_INTRFLAG(pc,ri,0); 1450 did_interrupt = 1; 1451 1452 /* 1453 * Ignore de-configured or stopped PMCs. 1454 * Ignore PMCs not in sampling mode. 1455 */ 1456 pm = pc->pc_p4pmcs[ri].phw_pmc; 1457 if (pm == NULL || 1458 pm->pm_state != PMC_STATE_RUNNING || 1459 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1460 continue; 1461 } 1462 (void) pmc_process_interrupt(cpu, pm, tf, 1463 TRAPF_USERMODE(tf)); 1464 continue; 1465 } 1466 1467 /* 1468 * Fresh interrupt. Look for the CCCR_OVF bit 1469 * and the OVF_Tx bit for this logical 1470 * processor being set. 1471 */ 1472 cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri); 1473 1474 if ((cccrval & ovf_mask) != ovf_mask) 1475 continue; 1476 1477 /* 1478 * If the other logical CPU would also have been 1479 * interrupted due to the PMC being shared, record 1480 * this fact in the per-cpu saved interrupt flag 1481 * bitmask. 1482 */ 1483 if (p4_system_has_htt && (cccrval & ovf_partner)) 1484 P4_PCPU_SET_INTRFLAG(pc, ri, 1); 1485 1486 v = rdmsr(P4_PERFCTR_MSR_FIRST + ri); 1487 1488 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v); 1489 1490 /* Stop the counter, and reset the overflow bit */ 1491 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); 1492 wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval); 1493 1494 did_interrupt = 1; 1495 1496 /* 1497 * Ignore de-configured or stopped PMCs. Ignore PMCs 1498 * not in sampling mode. 1499 */ 1500 pm = pc->pc_p4pmcs[ri].phw_pmc; 1501 1502 if (pm == NULL || 1503 pm->pm_state != PMC_STATE_RUNNING || 1504 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1505 continue; 1506 } 1507 1508 /* 1509 * Process the interrupt. Re-enable the PMC if 1510 * processing was successful. 1511 */ 1512 error = pmc_process_interrupt(cpu, pm, tf, 1513 TRAPF_USERMODE(tf)); 1514 1515 /* 1516 * Only the first processor executing the NMI handler 1517 * in a HTT pair will restart a PMC, and that too 1518 * only if there were no errors. 1519 */ 1520 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( 1521 pm->pm_sc.pm_reloadcount); 1522 wrmsr(P4_PERFCTR_MSR_FIRST + ri, v); 1523 if (error == 0) 1524 wrmsr(P4_CCCR_MSR_FIRST + ri, 1525 cccrval | P4_CCCR_ENABLE); 1526 } 1527 1528 /* allow the other CPU to proceed */ 1529 if (p4_system_has_htt) 1530 P4_PCPU_REL_INTR_SPINLOCK(pc); 1531 1532 /* 1533 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets 1534 * masked when a PMC interrupts the CPU. We need to unmask 1535 * the interrupt source explicitly. 1536 */ 1537 1538 if (did_interrupt) 1539 pmc_x86_lapic_enable_pmc_interrupt(); 1540 1541 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : 1542 &pmc_stats.pm_intr_ignored, 1); 1543 1544 return (did_interrupt); 1545} 1546 1547/* 1548 * Describe a CPU's PMC state. 1549 */ 1550 1551static int 1552p4_describe(int cpu, int ri, struct pmc_info *pi, 1553 struct pmc **ppmc) 1554{ 1555 int error; 1556 size_t copied; 1557 const struct p4pmc_descr *pd; 1558 1559 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1560 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1561 KASSERT(ri >= 0 && ri < P4_NPMCS, 1562 ("[p4,%d] row-index %d out of range", __LINE__, ri)); 1563 1564 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri); 1565 1566 if (P4_CPU_IS_HTT_SECONDARY(cpu)) 1567 return (EINVAL); 1568 1569 pd = &p4_pmcdesc[ri]; 1570 1571 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, 1572 PMC_NAME_MAX, &copied)) != 0) 1573 return (error); 1574 1575 pi->pm_class = pd->pm_descr.pd_class; 1576 1577 if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) { 1578 pi->pm_enabled = TRUE; 1579 *ppmc = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc; 1580 } else { 1581 pi->pm_enabled = FALSE; 1582 *ppmc = NULL; 1583 } 1584 1585 return (0); 1586} 1587 1588/* 1589 * Get MSR# for use with RDPMC. 1590 */ 1591 1592static int 1593p4_get_msr(int ri, uint32_t *msr) 1594{ 1595 KASSERT(ri >= 0 && ri < P4_NPMCS, 1596 ("[p4,%d] ri %d out of range", __LINE__, ri)); 1597 1598 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST; 1599 1600 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr); 1601 1602 return 0; 1603} 1604 1605 1606int 1607pmc_p4_initialize(struct pmc_mdep *md, int ncpus) 1608{ 1609 struct pmc_classdep *pcd; 1610 struct p4_event_descr *pe; 1611 1612 KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__));
| 45#include <machine/md_var.h> 46#include <machine/specialreg.h> 47 48/* 49 * PENTIUM 4 SUPPORT 50 * 51 * The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs 52 * respectively. Each PMC comprises of two model specific registers: 53 * a counter configuration control register (CCCR) and a counter 54 * register that holds the actual event counts. 55 * 56 * Configuring an event requires the use of one of 45 event selection 57 * control registers (ESCR). Events are associated with specific 58 * ESCRs. Each PMC group has a set of ESCRs it can use. 59 * 60 * - The BPU counter group (4 PMCs) can use the 16 ESCRs: 61 * BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1}, 62 * PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}. 63 * 64 * - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1}, 65 * TC_ESCR{0,1}, TBPU_ESCR{0,1}. 66 * 67 * - The FLAME counter group (4 PMCs) can use the 10 ESCRs: 68 * FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1}, 69 * DAC_ESCR{0,1}. 70 * 71 * - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1}, 72 * ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}. 73 * 74 * Even-numbered ESCRs can be used with counters 0, 1 and 4 (if 75 * present) of a counter group. Odd-numbers ESCRs can be used with 76 * counters 2, 3 and 5 (if present) of a counter group. The 77 * 'p4_escrs[]' table describes these restrictions in a form that 78 * function 'p4_allocate()' uses for making allocation decisions. 79 * 80 * SYSTEM-MODE AND THREAD-MODE ALLOCATION 81 * 82 * In addition to remembering the state of PMC rows 83 * ('FREE','STANDALONE', or 'THREAD'), we similar need to track the 84 * state of ESCR rows. If an ESCR is allocated to a system-mode PMC 85 * on a CPU we cannot allocate this to a thread-mode PMC. On a 86 * multi-cpu (multiple physical CPUs) system, ESCR allocation on each 87 * CPU is tracked by the pc_escrs[] array. 88 * 89 * Each system-mode PMC that is using an ESCR records its row-index in 90 * the appropriate entry and system-mode allocation attempts check 91 * that an ESCR is available using this array. Process-mode PMCs do 92 * not use the pc_escrs[] array, since ESCR row itself would have been 93 * marked as in 'THREAD' mode. 94 * 95 * HYPERTHREADING SUPPORT 96 * 97 * When HTT is enabled, the FreeBSD kernel treats the two 'logical' 98 * cpus as independent CPUs and can schedule kernel threads on them 99 * independently. However, the two logical CPUs share the same set of 100 * PMC resources. We need to ensure that: 101 * - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly, 102 * and, 103 * - Threads of multi-threaded processes that get scheduled on the same 104 * physical CPU are handled correctly. 105 * 106 * HTT Detection 107 * 108 * Not all HTT capable systems will have HTT enabled. We detect the 109 * presence of HTT by detecting if 'p4_init()' was called for a secondary 110 * CPU in a HTT pair. 111 * 112 * Note that hwpmc(4) cannot currently deal with a change in HTT status once 113 * loaded. 114 * 115 * Handling HTT READ / WRITE / START / STOP 116 * 117 * PMC resources are shared across the CPUs in an HTT pair. We 118 * designate the lower numbered CPU in a HTT pair as the 'primary' 119 * CPU. In each primary CPU's state we keep track of a 'runcount' 120 * which reflects the number of PMC-using processes that have been 121 * scheduled on its secondary CPU. Process-mode PMC operations will 122 * actually 'start' or 'stop' hardware only if these are the first or 123 * last processes respectively to use the hardware. PMC values 124 * written by a 'write' operation are saved and are transferred to 125 * hardware at PMC 'start' time if the runcount is 0. If the runcount 126 * is greater than 0 at the time of a 'start' operation, we keep track 127 * of the actual hardware value at the time of the 'start' operation 128 * and use this to adjust the final readings at PMC 'stop' or 'read' 129 * time. 130 * 131 * Execution sequences: 132 * 133 * Case 1: CPUx +...- (no overlap) 134 * CPUy +...- 135 * RC 0 1 0 1 0 136 * 137 * Case 2: CPUx +........- (partial overlap) 138 * CPUy +........- 139 * RC 0 1 2 1 0 140 * 141 * Case 3: CPUx +..............- (fully overlapped) 142 * CPUy +.....- 143 * RC 0 1 2 1 0 144 * 145 * Key: 146 * 'CPU[xy]' : one of the two logical processors on a HTT CPU. 147 * 'RC' : run count (#threads per physical core). 148 * '+' : point in time when a thread is put on a CPU. 149 * '-' : point in time where a thread is taken off a CPU. 150 * 151 * Handling HTT CONFIG 152 * 153 * Different processes attached to the same PMC may get scheduled on 154 * the two logical processors in the package. We keep track of config 155 * and de-config operations using the CFGFLAGS fields of the per-physical 156 * cpu state. 157 */ 158 159#define P4_PMCS() \ 160 P4_PMC(BPU_COUNTER0) \ 161 P4_PMC(BPU_COUNTER1) \ 162 P4_PMC(BPU_COUNTER2) \ 163 P4_PMC(BPU_COUNTER3) \ 164 P4_PMC(MS_COUNTER0) \ 165 P4_PMC(MS_COUNTER1) \ 166 P4_PMC(MS_COUNTER2) \ 167 P4_PMC(MS_COUNTER3) \ 168 P4_PMC(FLAME_COUNTER0) \ 169 P4_PMC(FLAME_COUNTER1) \ 170 P4_PMC(FLAME_COUNTER2) \ 171 P4_PMC(FLAME_COUNTER3) \ 172 P4_PMC(IQ_COUNTER0) \ 173 P4_PMC(IQ_COUNTER1) \ 174 P4_PMC(IQ_COUNTER2) \ 175 P4_PMC(IQ_COUNTER3) \ 176 P4_PMC(IQ_COUNTER4) \ 177 P4_PMC(IQ_COUNTER5) \ 178 P4_PMC(NONE) 179 180enum pmc_p4pmc { 181#undef P4_PMC 182#define P4_PMC(N) P4_PMC_##N , 183 P4_PMCS() 184}; 185 186/* 187 * P4 ESCR descriptors 188 */ 189 190#define P4_ESCRS() \ 191 P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 192 P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 193 P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 194 P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 195 P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 196 P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 197 P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 198 P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 199 P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 200 P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 201 P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 202 P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 203 P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 204 P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 205 P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 206 P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 207 P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \ 208 P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \ 209 P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 210 P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 211 P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 212 P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 213 P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 214 P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 215 P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 216 P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 217 P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 218 P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \ 219 P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 220 P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 221 P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \ 222 P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \ 223 P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \ 224 P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \ 225 P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \ 226 P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \ 227 P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \ 228 P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \ 229 P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \ 230 P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 231 P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 232 P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 233 P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 234 P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \ 235 P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \ 236 P4_ESCR(NONE, ~0, NONE, NONE, NONE) 237 238enum pmc_p4escr { 239#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N , 240 P4_ESCRS() 241#undef P4_ESCR 242}; 243 244struct pmc_p4escr_descr { 245 const char pm_escrname[PMC_NAME_MAX]; 246 u_short pm_escr_msr; 247 const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR]; 248}; 249 250static struct pmc_p4escr_descr p4_escrs[] = 251{ 252#define P4_ESCR(N, MSR, P1, P2, P3) \ 253 { \ 254 .pm_escrname = #N, \ 255 .pm_escr_msr = (MSR), \ 256 .pm_pmcs = \ 257 { \ 258 P4_PMC_##P1, \ 259 P4_PMC_##P2, \ 260 P4_PMC_##P3 \ 261 } \ 262 } , 263 264 P4_ESCRS() 265 266#undef P4_ESCR 267}; 268 269/* 270 * P4 Event descriptor 271 */ 272 273struct p4_event_descr { 274 const enum pmc_event pm_event; 275 const uint32_t pm_escr_eventselect; 276 const uint32_t pm_cccr_select; 277 const char pm_is_ti_event; 278 enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT]; 279}; 280 281static struct p4_event_descr p4_events[] = { 282 283#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \ 284 { \ 285 .pm_event = PMC_EV_P4_##NAME, \ 286 .pm_escr_eventselect = (ESCREVENTSEL), \ 287 .pm_cccr_select = (CCCRSEL), \ 288 .pm_is_ti_event = (TI_EVENT), \ 289 .pm_escrs = \ 290 { \ 291 P4_ESCR_##ESCR0, \ 292 P4_ESCR_##ESCR1 \ 293 } \ 294 } 295 296P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1), 297P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1), 298P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1), 299P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1), 300P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 301P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 302P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1), 303P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1), 304P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1), 305P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1), 306P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 307P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE), 308P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1), 309P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE), 310P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE), 311 /* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */ 312P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 313P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 314P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 315P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 316P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 317P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 318P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 319P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 320P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1), 321P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1), 322P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 323P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1), 324P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE, 325 0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 326P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1), 327P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1), 328P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1), 329P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 330P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 331P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 332P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1), 333P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 334P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 335P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 336P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 337P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 338P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1), 339P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 340P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1), 341P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3), 342P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3) 343 344#undef P4_EVDESCR 345}; 346 347#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE) 348 349#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1) 350 351/* 352 * P4 PMC descriptors 353 */ 354 355struct p4pmc_descr { 356 struct pmc_descr pm_descr; /* common information */ 357 enum pmc_p4pmc pm_pmcnum; /* PMC number */ 358 uint32_t pm_pmc_msr; /* PERFCTR MSR address */ 359 uint32_t pm_cccr_msr; /* CCCR MSR address */ 360}; 361 362static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = { 363#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \ 364 PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \ 365 PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \ 366 PMC_CAP_TAGGING | PMC_CAP_CASCADE) 367 368#define P4_PMCDESCR(N, PMC, CCCR) \ 369 { \ 370 .pm_descr = \ 371 { \ 372 .pd_name = #N, \ 373 .pd_class = PMC_CLASS_P4, \ 374 .pd_caps = P4_PMC_CAPS, \ 375 .pd_width = 40 \ 376 }, \ 377 .pm_pmcnum = P4_PMC_##N, \ 378 .pm_cccr_msr = (CCCR), \ 379 .pm_pmc_msr = (PMC) \ 380 } 381 382 P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360), 383 P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361), 384 P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362), 385 P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363), 386 P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364), 387 P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365), 388 P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366), 389 P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367), 390 P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368), 391 P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369), 392 P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A), 393 P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B), 394 P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C), 395 P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D), 396 P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E), 397 P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F), 398 P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370), 399 P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371), 400 401#undef P4_PMCDESCR 402}; 403 404/* HTT support */ 405#define P4_NHTT 2 /* logical processors/chip */ 406 407static int p4_system_has_htt; 408 409/* 410 * Per-CPU data structure for P4 class CPUs 411 * 412 * [19 struct pmc_hw structures] 413 * [45 ESCRs status bytes] 414 * [per-cpu spin mutex] 415 * [19 flag fields for holding config flags and a runcount] 416 * [19*2 hw value fields] (Thread mode PMC support) 417 * or 418 * [19*2 EIP values] (Sampling mode PMCs) 419 * [19*2 pmc value fields] (Thread mode PMC support)) 420 */ 421 422struct p4_cpu { 423 struct pmc_hw pc_p4pmcs[P4_NPMCS]; 424 char pc_escrs[P4_NESCR]; 425 struct mtx pc_mtx; /* spin lock */ 426 uint32_t pc_intrflag; /* NMI handler flags */ 427 unsigned int pc_intrlock; /* NMI handler spin lock */ 428 unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ 429 union { 430 pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; 431 uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; 432 } pc_si; 433 pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; 434}; 435 436static struct p4_cpu **p4_pcpu; 437 438#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] 439#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] 440#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] 441 442#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) 443#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ 444 char _tmp; \ 445 _tmp = (PC)->pc_flags[(RI)]; \ 446 _tmp &= ~(MASK); \ 447 _tmp |= (VAL) & (MASK); \ 448 (PC)->pc_flags[(RI)] = _tmp; \ 449} while (0) 450 451#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) 452#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) 453 454#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) 455#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) 456 457#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1) 458 459#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I))) 460#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \ 461 uint32_t __mask; \ 462 __mask = 1 << (I); \ 463 if ((V)) \ 464 (PC)->pc_intrflag |= __mask; \ 465 else \ 466 (PC)->pc_intrflag &= ~__mask; \ 467 } while (0) 468 469/* 470 * A minimal spin lock implementation for use inside the NMI handler. 471 * 472 * We don't want to use a regular spin lock here, because curthread 473 * may not be consistent at the time the handler is invoked. 474 */ 475#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \ 476 while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \ 477 ia32_pause(); \ 478 } while (0) 479#define P4_PCPU_REL_INTR_SPINLOCK(PC) \ 480 atomic_store_rel_int(&pc->pc_intrlock, 0); 481 482/* ESCR row disposition */ 483static int p4_escrdisp[P4_NESCR]; 484 485#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0) 486#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0) 487#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0) 488 489#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \ 490 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 491 __LINE__)); \ 492 atomic_add_int(&p4_escrdisp[(E)], -1); \ 493 KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \ 494 ("[p4,%d] row disposition error", __LINE__)); \ 495} while (0) 496 497#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \ 498 atomic_add_int(&p4_escrdisp[(E)], 1); \ 499 KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\ 500 __LINE__)); \ 501} while (0) 502 503#define P4_ESCR_MARK_ROW_THREAD(E) do { \ 504 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 505 __LINE__)); \ 506 atomic_add_int(&p4_escrdisp[(E)], 1); \ 507} while (0) 508 509#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \ 510 atomic_add_int(&p4_escrdisp[(E)], -1); \ 511 KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \ 512 __LINE__)); \ 513} while (0) 514 515#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0) 516 517#define P4_CPU_IS_HTT_SECONDARY(cpu) \ 518 (p4_system_has_htt ? ((cpu) & 1) : 0) 519#define P4_TO_HTT_PRIMARY(cpu) \ 520 (p4_system_has_htt ? ((cpu) & ~1) : (cpu)) 521 522#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \ 523 P4_CCCR_ENABLE|P4_CCCR_OVF)) 524#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \ 525 P4_ESCR_T1_USR)) 526 527/* 528 * support routines 529 */ 530 531static struct p4_event_descr * 532p4_find_event(enum pmc_event ev) 533{ 534 int n; 535 536 for (n = 0; n < P4_NEVENTS; n++) 537 if (p4_events[n].pm_event == ev) 538 break; 539 if (n == P4_NEVENTS) 540 return (NULL); 541 return (&p4_events[n]); 542} 543 544/* 545 * Initialize per-cpu state 546 */ 547 548static int 549p4_pcpu_init(struct pmc_mdep *md, int cpu) 550{ 551 char *pescr; 552 int n, first_ri, phycpu; 553 struct pmc_hw *phw; 554 struct p4_cpu *p4c; 555 struct pmc_cpu *pc, *plc; 556 557 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 558 ("[p4,%d] insane cpu number %d", __LINE__, cpu)); 559 560 PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu, 561 pmc_cpu_is_primary(cpu) != 0); 562 563 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 564 565 /* 566 * The two CPUs in an HT pair share their per-cpu state. 567 * 568 * For HT capable CPUs, we assume that the two logical 569 * processors in the HT pair get two consecutive CPU ids 570 * starting with an even id #. 571 * 572 * The primary CPU (the even numbered CPU of the pair) would 573 * have been initialized prior to the initialization for the 574 * secondary. 575 */ 576 577 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) { 578 579 p4_system_has_htt = 1; 580 581 phycpu = P4_TO_HTT_PRIMARY(cpu); 582 pc = pmc_pcpu[phycpu]; 583 plc = pmc_pcpu[cpu]; 584 585 KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__)); 586 587 PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu, 588 phycpu, pc); 589 KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", 590 __LINE__, cpu, phycpu)); 591 592 /* PMCs are shared with the physical CPU. */ 593 for (n = 0; n < P4_NPMCS; n++) 594 plc->pc_hwpmcs[n + first_ri] = 595 pc->pc_hwpmcs[n + first_ri]; 596 597 return (0); 598 } 599 600 p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO); 601 602 if (p4c == NULL) 603 return (ENOMEM); 604 605 pc = pmc_pcpu[cpu]; 606 607 KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu)); 608 609 p4_pcpu[cpu] = p4c; 610 phw = p4c->pc_p4pmcs; 611 612 for (n = 0; n < P4_NPMCS; n++, phw++) { 613 phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | 614 PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n); 615 phw->phw_pmc = NULL; 616 pc->pc_hwpmcs[n + first_ri] = phw; 617 } 618 619 pescr = p4c->pc_escrs; 620 for (n = 0; n < P4_NESCR; n++) 621 *pescr++ = P4_INVALID_PMC_INDEX; 622 623 mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN); 624 625 return (0); 626} 627 628/* 629 * Destroy per-cpu state. 630 */ 631 632static int 633p4_pcpu_fini(struct pmc_mdep *md, int cpu) 634{ 635 int first_ri, i; 636 struct p4_cpu *p4c; 637 struct pmc_cpu *pc; 638 639 PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu); 640 641 pc = pmc_pcpu[cpu]; 642 first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri; 643 644 for (i = 0; i < P4_NPMCS; i++) 645 pc->pc_hwpmcs[i + first_ri] = NULL; 646 647 if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) 648 return (0); 649 650 p4c = p4_pcpu[cpu]; 651 652 KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__)); 653 654 /* Turn off all PMCs on this CPU */ 655 for (i = 0; i < P4_NPMCS - 1; i++) 656 wrmsr(P4_CCCR_MSR_FIRST + i, 657 rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE); 658 659 mtx_destroy(&p4c->pc_mtx); 660 661 free(p4c, M_PMC); 662 663 p4_pcpu[cpu] = NULL; 664 665 return (0); 666} 667 668/* 669 * Read a PMC 670 */ 671 672static int 673p4_read_pmc(int cpu, int ri, pmc_value_t *v) 674{ 675 struct pmc *pm; 676 pmc_value_t tmp; 677 struct p4_cpu *pc; 678 enum pmc_mode mode; 679 struct p4pmc_descr *pd; 680 681 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 682 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 683 KASSERT(ri >= 0 && ri < P4_NPMCS, 684 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 685 686 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 687 pm = pc->pc_p4pmcs[ri].phw_pmc; 688 pd = &p4_pmcdesc[ri]; 689 690 KASSERT(pm != NULL, 691 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); 692 693 KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), 694 ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, 695 pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); 696 697 mode = PMC_TO_MODE(pm); 698 699 PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); 700 701 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 702 ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); 703 704 tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); 705 706 if (PMC_IS_VIRTUAL_MODE(mode)) { 707 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ 708 tmp += (P4_PERFCTR_MASK + 1) - 709 P4_PCPU_HW_VALUE(pc,ri,cpu); 710 else 711 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 712 tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); 713 } 714 715 if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ 716 *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); 717 else 718 *v = tmp; 719 720 PMCDBG(MDP,REA,2, "p4-read -> %jx", *v); 721 722 return (0); 723} 724 725/* 726 * Write a PMC 727 */ 728 729static int 730p4_write_pmc(int cpu, int ri, pmc_value_t v) 731{ 732 enum pmc_mode mode; 733 struct pmc *pm; 734 struct p4_cpu *pc; 735 const struct pmc_hw *phw; 736 const struct p4pmc_descr *pd; 737 738 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 739 ("[amd,%d] illegal CPU value %d", __LINE__, cpu)); 740 KASSERT(ri >= 0 && ri < P4_NPMCS, 741 ("[amd,%d] illegal row-index %d", __LINE__, ri)); 742 743 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 744 phw = &pc->pc_p4pmcs[ri]; 745 pm = phw->phw_pmc; 746 pd = &p4_pmcdesc[ri]; 747 748 KASSERT(pm != NULL, 749 ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, 750 cpu, ri)); 751 752 mode = PMC_TO_MODE(pm); 753 754 PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, 755 mode, v); 756 757 /* 758 * write the PMC value to the register/saved value: for 759 * sampling mode PMCs, the value to be programmed into the PMC 760 * counter is -(C+1) where 'C' is the requested sample rate. 761 */ 762 if (PMC_IS_SAMPLING_MODE(mode)) 763 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); 764 765 if (PMC_IS_SYSTEM_MODE(mode)) 766 wrmsr(pd->pm_pmc_msr, v); 767 else 768 P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; 769 770 return (0); 771} 772 773/* 774 * Configure a PMC 'pm' on the given CPU and row-index. 775 * 776 * 'pm' may be NULL to indicate de-configuration. 777 * 778 * On HTT systems, a PMC may get configured twice, once for each 779 * "logical" CPU. We track this using the CFGFLAGS field of the 780 * per-cpu state; this field is a bit mask with one bit each for 781 * logical CPUs 0 & 1. 782 */ 783 784static int 785p4_config_pmc(int cpu, int ri, struct pmc *pm) 786{ 787 struct pmc_hw *phw; 788 struct p4_cpu *pc; 789 int cfgflags, cpuflag; 790 791 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 792 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 793 794 KASSERT(ri >= 0 && ri < P4_NPMCS, 795 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 796 797 PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); 798 799 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 800 phw = &pc->pc_p4pmcs[ri]; 801 802 KASSERT(pm == NULL || phw->phw_pmc == NULL || 803 (p4_system_has_htt && phw->phw_pmc == pm), 804 ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); 805 806 mtx_lock_spin(&pc->pc_mtx); 807 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 808 809 KASSERT(cfgflags >= 0 || cfgflags <= 3, 810 ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__, 811 cfgflags, cpu, ri)); 812 813 KASSERT(cfgflags == 0 || phw->phw_pmc, 814 ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", 815 __LINE__, cpu, ri)); 816 817 cpuflag = P4_CPU_TO_FLAG(cpu); 818 819 if (pm) { /* config */ 820 if (cfgflags == 0) 821 phw->phw_pmc = pm; 822 823 KASSERT(phw->phw_pmc == pm, 824 ("[p4,%d] cpu=%d ri=%d config %p != hw %p", 825 __LINE__, cpu, ri, pm, phw->phw_pmc)); 826 827 cfgflags |= cpuflag; 828 } else { /* unconfig */ 829 cfgflags &= ~cpuflag; 830 831 if (cfgflags == 0) 832 phw->phw_pmc = NULL; 833 } 834 835 KASSERT(cfgflags >= 0 || cfgflags <= 3, 836 ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__, 837 cfgflags, cpu, ri)); 838 839 P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); 840 841 mtx_unlock_spin(&pc->pc_mtx); 842 843 return (0); 844} 845 846/* 847 * Retrieve a configured PMC pointer from hardware state. 848 */ 849 850static int 851p4_get_config(int cpu, int ri, struct pmc **ppm) 852{ 853 int cfgflags; 854 struct p4_cpu *pc; 855 856 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 857 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 858 KASSERT(ri >= 0 && ri < P4_NPMCS, 859 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 860 861 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 862 863 mtx_lock_spin(&pc->pc_mtx); 864 cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); 865 mtx_unlock_spin(&pc->pc_mtx); 866 867 if (cfgflags & P4_CPU_TO_FLAG(cpu)) 868 *ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */ 869 else 870 *ppm = NULL; 871 872 return 0; 873} 874 875/* 876 * Allocate a PMC. 877 * 878 * The allocation strategy differs between HTT and non-HTT systems. 879 * 880 * The non-HTT case: 881 * - Given the desired event and the PMC row-index, lookup the 882 * list of valid ESCRs for the event. 883 * - For each valid ESCR: 884 * - Check if the ESCR is free and the ESCR row is in a compatible 885 * mode (i.e., system or process)) 886 * - Check if the ESCR is usable with a P4 PMC at the desired row-index. 887 * If everything matches, we determine the appropriate bit values for the 888 * ESCR and CCCR registers. 889 * 890 * The HTT case: 891 * 892 * - Process mode PMCs require special care. The FreeBSD scheduler could 893 * schedule any two processes on the same physical CPU. We need to ensure 894 * that a given PMC row-index is never allocated to two different 895 * PMCs owned by different user-processes. 896 * This is ensured by always allocating a PMC from a 'FREE' PMC row 897 * if the system has HTT active. 898 * - A similar check needs to be done for ESCRs; we do not want two PMCs 899 * using the same ESCR to be scheduled at the same time. Thus ESCR 900 * allocation is also restricted to FREE rows if the system has HTT 901 * enabled. 902 * - Thirdly, some events are 'thread-independent' terminology, i.e., 903 * the PMC hardware cannot distinguish between events caused by 904 * different logical CPUs. This makes it impossible to assign events 905 * to a given thread of execution. If the system has HTT enabled, 906 * these events are not allowed for process-mode PMCs. 907 */ 908 909static int 910p4_allocate_pmc(int cpu, int ri, struct pmc *pm, 911 const struct pmc_op_pmcallocate *a) 912{ 913 int found, n, m; 914 uint32_t caps, cccrvalue, escrvalue, tflags; 915 enum pmc_p4escr escr; 916 struct p4_cpu *pc; 917 struct p4_event_descr *pevent; 918 const struct p4pmc_descr *pd; 919 920 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 921 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 922 KASSERT(ri >= 0 && ri < P4_NPMCS, 923 ("[p4,%d] illegal row-index value %d", __LINE__, ri)); 924 925 pd = &p4_pmcdesc[ri]; 926 927 PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " 928 "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, 929 pm->pm_caps); 930 931 /* check class */ 932 if (pd->pm_descr.pd_class != a->pm_class) 933 return (EINVAL); 934 935 /* check requested capabilities */ 936 caps = a->pm_caps; 937 if ((pd->pm_descr.pd_caps & caps) != caps) 938 return (EPERM); 939 940 /* 941 * If the system has HTT enabled, and the desired allocation 942 * mode is process-private, and the PMC row disposition is not 943 * FREE (0), decline the allocation. 944 */ 945 946 if (p4_system_has_htt && 947 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 948 pmc_getrowdisp(ri) != 0) 949 return (EBUSY); 950 951 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 952 ("[p4,%d] unknown PMC class %d", __LINE__, 953 pd->pm_descr.pd_class)); 954 955 if (pm->pm_event < PMC_EV_P4_FIRST || 956 pm->pm_event > PMC_EV_P4_LAST) 957 return (EINVAL); 958 959 if ((pevent = p4_find_event(pm->pm_event)) == NULL) 960 return (ESRCH); 961 962 PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}", 963 pevent->pm_event, pevent->pm_escr_eventselect, 964 pevent->pm_cccr_select, pevent->pm_is_ti_event); 965 966 /* 967 * Some PMC events are 'thread independent'and therefore 968 * cannot be used for process-private modes if HTT is being 969 * used. 970 */ 971 972 if (P4_EVENT_IS_TI(pevent) && 973 PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && 974 p4_system_has_htt) 975 return (EINVAL); 976 977 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 978 979 found = 0; 980 981 /* look for a suitable ESCR for this event */ 982 for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) { 983 if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE) 984 break; /* out of ESCRs */ 985 /* 986 * Check ESCR row disposition. 987 * 988 * If the request is for a system-mode PMC, then the 989 * ESCR row should not be in process-virtual mode, and 990 * should also be free on the current CPU. 991 */ 992 993 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 994 if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || 995 pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) 996 continue; 997 } 998 999 /* 1000 * If the request is for a process-virtual PMC, and if 1001 * HTT is not enabled, we can use an ESCR row that is 1002 * either FREE or already in process mode. 1003 * 1004 * If HTT is enabled, then we need to ensure that a 1005 * given ESCR is never allocated to two PMCS that 1006 * could run simultaneously on the two logical CPUs of 1007 * a CPU package. We ensure this be only allocating 1008 * ESCRs from rows marked as 'FREE'. 1009 */ 1010 1011 if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { 1012 if (p4_system_has_htt) { 1013 if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) 1014 continue; 1015 } else 1016 if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr)) 1017 continue; 1018 } 1019 1020 /* 1021 * We found a suitable ESCR for this event. Now check if 1022 * this escr can work with the PMC at row-index 'ri'. 1023 */ 1024 1025 for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++) 1026 if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) { 1027 found = 1; 1028 break; 1029 } 1030 } 1031 1032 if (found == 0) 1033 return (ESRCH); 1034 1035 KASSERT((int) escr >= 0 && escr < P4_NESCR, 1036 ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); 1037 1038 /* mark ESCR row mode */ 1039 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1040 pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ 1041 P4_ESCR_MARK_ROW_STANDALONE(escr); 1042 } else { 1043 KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX, 1044 ("[p4,%d] escr[%d] already in use", __LINE__, escr)); 1045 P4_ESCR_MARK_ROW_THREAD(escr); 1046 } 1047 1048 pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr; 1049 pm->pm_md.pm_p4.pm_p4_escr = escr; 1050 1051 cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select); 1052 escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect); 1053 1054 /* CCCR fields */ 1055 if (caps & PMC_CAP_THRESHOLD) 1056 cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig & 1057 P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE; 1058 1059 if (caps & PMC_CAP_EDGE) 1060 cccrvalue |= P4_CCCR_EDGE; 1061 1062 if (caps & PMC_CAP_INVERT) 1063 cccrvalue |= P4_CCCR_COMPLEMENT; 1064 1065 if (p4_system_has_htt) 1066 cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig & 1067 P4_CCCR_ACTIVE_THREAD_MASK; 1068 else /* no HTT; thread field should be '11b' */ 1069 cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3); 1070 1071 if (caps & PMC_CAP_CASCADE) 1072 cccrvalue |= P4_CCCR_CASCADE; 1073 1074 /* On HTT systems the PMI T0 field may get moved to T1 at pmc start */ 1075 if (caps & PMC_CAP_INTERRUPT) 1076 cccrvalue |= P4_CCCR_OVF_PMI_T0; 1077 1078 /* ESCR fields */ 1079 if (caps & PMC_CAP_QUALIFIER) 1080 escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig & 1081 P4_ESCR_EVENT_MASK_MASK; 1082 if (caps & PMC_CAP_TAGGING) 1083 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1084 P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE; 1085 if (caps & PMC_CAP_QUALIFIER) 1086 escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig & 1087 P4_ESCR_EVENT_MASK_MASK); 1088 1089 /* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */ 1090 tflags = 0; 1091 if (caps & PMC_CAP_SYSTEM) 1092 tflags |= P4_ESCR_T0_OS; 1093 if (caps & PMC_CAP_USER) 1094 tflags |= P4_ESCR_T0_USR; 1095 if (tflags == 0) 1096 tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1097 escrvalue |= tflags; 1098 1099 pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue; 1100 pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; 1101 1102 PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " 1103 "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, 1104 cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); 1105 1106 return (0); 1107} 1108 1109/* 1110 * release a PMC. 1111 */ 1112 1113static int 1114p4_release_pmc(int cpu, int ri, struct pmc *pm) 1115{ 1116 enum pmc_p4escr escr; 1117 struct p4_cpu *pc; 1118 1119 KASSERT(ri >= 0 && ri < P4_NPMCS, 1120 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1121 1122 escr = pm->pm_md.pm_p4.pm_p4_escr; 1123 1124 PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); 1125 1126 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1127 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1128 1129 KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL, 1130 ("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri)); 1131 1132 P4_ESCR_UNMARK_ROW_STANDALONE(escr); 1133 KASSERT(pc->pc_escrs[escr] == ri, 1134 ("[p4,%d] escr[%d] not allocated to ri %d", __LINE__, 1135 escr, ri)); 1136 pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */ 1137 } else 1138 P4_ESCR_UNMARK_ROW_THREAD(escr); 1139 1140 return (0); 1141} 1142 1143/* 1144 * Start a PMC 1145 */ 1146 1147static int 1148p4_start_pmc(int cpu, int ri) 1149{ 1150 int rc; 1151 struct pmc *pm; 1152 struct p4_cpu *pc; 1153 struct p4pmc_descr *pd; 1154 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1155 1156 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1157 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1158 KASSERT(ri >= 0 && ri < P4_NPMCS, 1159 ("[p4,%d] illegal row-index %d", __LINE__, ri)); 1160 1161 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1162 pm = pc->pc_p4pmcs[ri].phw_pmc; 1163 pd = &p4_pmcdesc[ri]; 1164 1165 KASSERT(pm != NULL, 1166 ("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri)); 1167 1168 PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri); 1169 1170 KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, 1171 ("[p4,%d] wrong PMC class %d", __LINE__, 1172 pd->pm_descr.pd_class)); 1173 1174 /* retrieve the desired CCCR/ESCR values from the PMC */ 1175 cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue; 1176 escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue; 1177 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1178 1179 /* extract and zero the logical processor selection bits */ 1180 cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0; 1181 escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1182 cccrvalue &= ~P4_CCCR_OVF_PMI_T0; 1183 escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR); 1184 1185 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */ 1186 cccrtbits <<= 1; 1187 escrtbits >>= 2; 1188 } 1189 1190 /* start system mode PMCs directly */ 1191 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1192 wrmsr(escrmsr, escrvalue | escrtbits); 1193 wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); 1194 return 0; 1195 } 1196 1197 /* 1198 * Thread mode PMCs 1199 * 1200 * On HTT machines, the same PMC could be scheduled on the 1201 * same physical CPU twice (once for each logical CPU), for 1202 * example, if two threads of a multi-threaded process get 1203 * scheduled on the same CPU. 1204 * 1205 */ 1206 1207 mtx_lock_spin(&pc->pc_mtx); 1208 1209 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1210 KASSERT(rc == 0 || rc == 1, 1211 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1212 rc)); 1213 1214 if (rc == 0) { /* 1st CPU and the non-HTT case */ 1215 1216 KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), 1217 ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, 1218 cpu, ri, pd->pm_cccr_msr)); 1219 1220 /* write out the low 40 bits of the saved value to hardware */ 1221 wrmsr(pd->pm_pmc_msr, 1222 P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); 1223 1224 } else if (rc == 1) { /* 2nd CPU */ 1225 1226 /* 1227 * Stop the PMC and retrieve the CCCR and ESCR values 1228 * from their MSRs, and turn on the additional T[0/1] 1229 * bits for the 2nd CPU. 1230 */ 1231 1232 cccrvalue = rdmsr(pd->pm_cccr_msr); 1233 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1234 1235 /* check that the configuration bits read back match the PMC */ 1236 KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == 1237 (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), 1238 ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " 1239 "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, 1240 cccrvalue & P4_CCCR_Tx_MASK, 1241 pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); 1242 KASSERT(cccrvalue & P4_CCCR_ENABLE, 1243 ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", 1244 __LINE__, rc, cpu, ri)); 1245 KASSERT((cccrvalue & cccrtbits) == 0, 1246 ("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d" 1247 "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, 1248 cccrvalue, cccrtbits)); 1249 1250 escrvalue = rdmsr(escrmsr); 1251 1252 KASSERT((escrvalue & P4_ESCR_Tx_MASK) == 1253 (pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK), 1254 ("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d " 1255 "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, 1256 escrvalue & P4_ESCR_Tx_MASK, 1257 pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); 1258 KASSERT((escrvalue & escrtbits) == 0, 1259 ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " 1260 "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, 1261 rc, cpu, ri, escrmsr, escrvalue, escrtbits)); 1262 } 1263 1264 /* Enable the correct bits for this CPU. */ 1265 escrvalue |= escrtbits; 1266 cccrvalue |= cccrtbits | P4_CCCR_ENABLE; 1267 1268 /* Save HW value at the time of starting hardware */ 1269 P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); 1270 1271 /* Program the ESCR and CCCR and start the PMC */ 1272 wrmsr(escrmsr, escrvalue); 1273 wrmsr(pd->pm_cccr_msr, cccrvalue); 1274 1275 ++rc; 1276 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1277 1278 mtx_unlock_spin(&pc->pc_mtx); 1279 1280 PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " 1281 "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc, 1282 ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, 1283 cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); 1284 1285 return (0); 1286} 1287 1288/* 1289 * Stop a PMC. 1290 */ 1291 1292static int 1293p4_stop_pmc(int cpu, int ri) 1294{ 1295 int rc; 1296 uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits; 1297 struct pmc *pm; 1298 struct p4_cpu *pc; 1299 struct p4pmc_descr *pd; 1300 pmc_value_t tmp; 1301 1302 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1303 ("[p4,%d] illegal CPU value %d", __LINE__, cpu)); 1304 KASSERT(ri >= 0 && ri < P4_NPMCS, 1305 ("[p4,%d] illegal row index %d", __LINE__, ri)); 1306 1307 pd = &p4_pmcdesc[ri]; 1308 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1309 pm = pc->pc_p4pmcs[ri].phw_pmc; 1310 1311 KASSERT(pm != NULL, 1312 ("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri)); 1313 1314 PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); 1315 1316 if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { 1317 wrmsr(pd->pm_cccr_msr, 1318 pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); 1319 return (0); 1320 } 1321 1322 /* 1323 * Thread mode PMCs. 1324 * 1325 * On HTT machines, this PMC may be in use by two threads 1326 * running on two logical CPUS. Thus we look at the 1327 * 'runcount' field and only turn off the appropriate TO/T1 1328 * bits (and keep the PMC running) if two logical CPUs were 1329 * using the PMC. 1330 * 1331 */ 1332 1333 /* bits to mask */ 1334 cccrtbits = P4_CCCR_OVF_PMI_T0; 1335 escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR; 1336 if (P4_CPU_IS_HTT_SECONDARY(cpu)) { 1337 cccrtbits <<= 1; 1338 escrtbits >>= 2; 1339 } 1340 1341 mtx_lock_spin(&pc->pc_mtx); 1342 1343 rc = P4_PCPU_GET_RUNCOUNT(pc,ri); 1344 1345 KASSERT(rc == 2 || rc == 1, 1346 ("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri, 1347 rc)); 1348 1349 --rc; 1350 1351 P4_PCPU_SET_RUNCOUNT(pc,ri,rc); 1352 1353 /* Stop this PMC */ 1354 cccrvalue = rdmsr(pd->pm_cccr_msr); 1355 wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); 1356 1357 escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; 1358 escrvalue = rdmsr(escrmsr); 1359 1360 /* The current CPU should be running on this PMC */ 1361 KASSERT(escrvalue & escrtbits, 1362 ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " 1363 "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, 1364 escrvalue, escrtbits)); 1365 KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || 1366 (cccrvalue & cccrtbits), 1367 ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " 1368 "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); 1369 1370 /* get the current hardware reading */ 1371 tmp = rdmsr(pd->pm_pmc_msr); 1372 1373 if (rc == 1) { /* need to keep the PMC running */ 1374 escrvalue &= ~escrtbits; 1375 cccrvalue &= ~cccrtbits; 1376 wrmsr(escrmsr, escrvalue); 1377 wrmsr(pd->pm_cccr_msr, cccrvalue); 1378 } 1379 1380 mtx_unlock_spin(&pc->pc_mtx); 1381 1382 PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " 1383 "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr, 1384 escrvalue, cccrvalue, tmp); 1385 1386 if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ 1387 tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); 1388 else 1389 tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); 1390 1391 P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; 1392 1393 return 0; 1394} 1395 1396/* 1397 * Handle an interrupt. 1398 * 1399 * The hardware sets the CCCR_OVF whenever a counter overflow occurs, 1400 * so the handler examines all the 18 CCCR registers, processing the 1401 * counters that have overflowed. 1402 * 1403 * On HTT machines, the CCCR register is shared and will interrupt 1404 * both logical processors if so configured. Thus multiple logical 1405 * CPUs could enter the NMI service routine at the same time. These 1406 * will get serialized using a per-cpu spinlock dedicated for use in 1407 * the NMI handler. 1408 */ 1409 1410static int 1411p4_intr(int cpu, struct trapframe *tf) 1412{ 1413 uint32_t cccrval, ovf_mask, ovf_partner; 1414 int did_interrupt, error, ri; 1415 struct p4_cpu *pc; 1416 struct pmc *pm; 1417 pmc_value_t v; 1418 1419 PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf, 1420 TRAPF_USERMODE(tf)); 1421 1422 pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)]; 1423 1424 ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1425 P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; 1426 ovf_mask |= P4_CCCR_OVF; 1427 if (p4_system_has_htt) 1428 ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? 1429 P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1; 1430 else 1431 ovf_partner = 0; 1432 did_interrupt = 0; 1433 1434 if (p4_system_has_htt) 1435 P4_PCPU_ACQ_INTR_SPINLOCK(pc); 1436 1437 /* 1438 * Loop through all CCCRs, looking for ones that have 1439 * interrupted this CPU. 1440 */ 1441 for (ri = 0; ri < P4_NPMCS; ri++) { 1442 1443 /* 1444 * Check if our partner logical CPU has already marked 1445 * this PMC has having interrupted it. If so, reset 1446 * the flag and process the interrupt, but leave the 1447 * hardware alone. 1448 */ 1449 if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) { 1450 P4_PCPU_SET_INTRFLAG(pc,ri,0); 1451 did_interrupt = 1; 1452 1453 /* 1454 * Ignore de-configured or stopped PMCs. 1455 * Ignore PMCs not in sampling mode. 1456 */ 1457 pm = pc->pc_p4pmcs[ri].phw_pmc; 1458 if (pm == NULL || 1459 pm->pm_state != PMC_STATE_RUNNING || 1460 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1461 continue; 1462 } 1463 (void) pmc_process_interrupt(cpu, pm, tf, 1464 TRAPF_USERMODE(tf)); 1465 continue; 1466 } 1467 1468 /* 1469 * Fresh interrupt. Look for the CCCR_OVF bit 1470 * and the OVF_Tx bit for this logical 1471 * processor being set. 1472 */ 1473 cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri); 1474 1475 if ((cccrval & ovf_mask) != ovf_mask) 1476 continue; 1477 1478 /* 1479 * If the other logical CPU would also have been 1480 * interrupted due to the PMC being shared, record 1481 * this fact in the per-cpu saved interrupt flag 1482 * bitmask. 1483 */ 1484 if (p4_system_has_htt && (cccrval & ovf_partner)) 1485 P4_PCPU_SET_INTRFLAG(pc, ri, 1); 1486 1487 v = rdmsr(P4_PERFCTR_MSR_FIRST + ri); 1488 1489 PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v); 1490 1491 /* Stop the counter, and reset the overflow bit */ 1492 cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); 1493 wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval); 1494 1495 did_interrupt = 1; 1496 1497 /* 1498 * Ignore de-configured or stopped PMCs. Ignore PMCs 1499 * not in sampling mode. 1500 */ 1501 pm = pc->pc_p4pmcs[ri].phw_pmc; 1502 1503 if (pm == NULL || 1504 pm->pm_state != PMC_STATE_RUNNING || 1505 !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { 1506 continue; 1507 } 1508 1509 /* 1510 * Process the interrupt. Re-enable the PMC if 1511 * processing was successful. 1512 */ 1513 error = pmc_process_interrupt(cpu, pm, tf, 1514 TRAPF_USERMODE(tf)); 1515 1516 /* 1517 * Only the first processor executing the NMI handler 1518 * in a HTT pair will restart a PMC, and that too 1519 * only if there were no errors. 1520 */ 1521 v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( 1522 pm->pm_sc.pm_reloadcount); 1523 wrmsr(P4_PERFCTR_MSR_FIRST + ri, v); 1524 if (error == 0) 1525 wrmsr(P4_CCCR_MSR_FIRST + ri, 1526 cccrval | P4_CCCR_ENABLE); 1527 } 1528 1529 /* allow the other CPU to proceed */ 1530 if (p4_system_has_htt) 1531 P4_PCPU_REL_INTR_SPINLOCK(pc); 1532 1533 /* 1534 * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets 1535 * masked when a PMC interrupts the CPU. We need to unmask 1536 * the interrupt source explicitly. 1537 */ 1538 1539 if (did_interrupt) 1540 pmc_x86_lapic_enable_pmc_interrupt(); 1541 1542 atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : 1543 &pmc_stats.pm_intr_ignored, 1); 1544 1545 return (did_interrupt); 1546} 1547 1548/* 1549 * Describe a CPU's PMC state. 1550 */ 1551 1552static int 1553p4_describe(int cpu, int ri, struct pmc_info *pi, 1554 struct pmc **ppmc) 1555{ 1556 int error; 1557 size_t copied; 1558 const struct p4pmc_descr *pd; 1559 1560 KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), 1561 ("[p4,%d] illegal CPU %d", __LINE__, cpu)); 1562 KASSERT(ri >= 0 && ri < P4_NPMCS, 1563 ("[p4,%d] row-index %d out of range", __LINE__, ri)); 1564 1565 PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri); 1566 1567 if (P4_CPU_IS_HTT_SECONDARY(cpu)) 1568 return (EINVAL); 1569 1570 pd = &p4_pmcdesc[ri]; 1571 1572 if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name, 1573 PMC_NAME_MAX, &copied)) != 0) 1574 return (error); 1575 1576 pi->pm_class = pd->pm_descr.pd_class; 1577 1578 if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) { 1579 pi->pm_enabled = TRUE; 1580 *ppmc = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc; 1581 } else { 1582 pi->pm_enabled = FALSE; 1583 *ppmc = NULL; 1584 } 1585 1586 return (0); 1587} 1588 1589/* 1590 * Get MSR# for use with RDPMC. 1591 */ 1592 1593static int 1594p4_get_msr(int ri, uint32_t *msr) 1595{ 1596 KASSERT(ri >= 0 && ri < P4_NPMCS, 1597 ("[p4,%d] ri %d out of range", __LINE__, ri)); 1598 1599 *msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST; 1600 1601 PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr); 1602 1603 return 0; 1604} 1605 1606 1607int 1608pmc_p4_initialize(struct pmc_mdep *md, int ncpus) 1609{ 1610 struct pmc_classdep *pcd; 1611 struct p4_event_descr *pe; 1612 1613 KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__));
|