1/** 2 * @file op_model_p4.c 3 * P4 model-specific MSR operations 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Graydon Hoare 9 */ 10 11#include <linux/oprofile.h> 12#include <linux/smp.h> 13#include <asm/msr.h> 14#include <asm/ptrace.h> 15#include <asm/fixmap.h> 16#include <asm/apic.h> 17#include <asm/nmi.h> 18 19#include "op_x86_model.h" 20#include "op_counter.h" 21 22#define NUM_EVENTS 39 23 24#define NUM_COUNTERS_NON_HT 8 25#define NUM_ESCRS_NON_HT 45 26#define NUM_CCCRS_NON_HT 18 27#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) 28 29#define NUM_COUNTERS_HT2 4 30#define NUM_ESCRS_HT2 23 31#define NUM_CCCRS_HT2 9 32#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 33 34static unsigned int num_counters = NUM_COUNTERS_NON_HT; 35static unsigned int num_controls = NUM_CONTROLS_NON_HT; 36 37/* this has to be checked dynamically since the 38 hyper-threadedness of a chip is discovered at 39 kernel boot-time. */ 40static inline void setup_num_counters(void) 41{ 42#ifdef CONFIG_SMP 43 if (smp_num_siblings == 2){ 44 num_counters = NUM_COUNTERS_HT2; 45 num_controls = NUM_CONTROLS_HT2; 46 } 47#endif 48} 49 50static int inline addr_increment(void) 51{ 52#ifdef CONFIG_SMP 53 return smp_num_siblings == 2 ? 2 : 1; 54#else 55 return 1; 56#endif 57} 58 59 60/* tables to simulate simplified hardware view of p4 registers */ 61struct p4_counter_binding { 62 int virt_counter; 63 int counter_address; 64 int cccr_address; 65}; 66 67struct p4_event_binding { 68 int escr_select; /* value to put in CCCR */ 69 int event_select; /* value to put in ESCR */ 70 struct { 71 int virt_counter; /* for this counter... */ 72 int escr_address; /* use this ESCR */ 73 } bindings[2]; 74}; 75 76/* nb: these CTR_* defines are a duplicate of defines in 77 event/i386.p4*events. */ 78 79 80#define CTR_BPU_0 (1 << 0) 81#define CTR_MS_0 (1 << 1) 82#define CTR_FLAME_0 (1 << 2) 83#define CTR_IQ_4 (1 << 3) 84#define CTR_BPU_2 (1 << 4) 85#define CTR_MS_2 (1 << 5) 86#define CTR_FLAME_2 (1 << 6) 87#define CTR_IQ_5 (1 << 7) 88 89static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { 90 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, 91 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, 92 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, 93 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, 94 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, 95 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, 96 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, 97 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } 98}; 99 100#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 101 102/* p4 event codes in libop/op_event.h are indices into this table. */ 103 104static struct p4_event_binding p4_events[NUM_EVENTS] = { 105 106 { /* BRANCH_RETIRED */ 107 0x05, 0x06, 108 { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, 109 {CTR_IQ_5, MSR_P4_CRU_ESCR3} } 110 }, 111 112 { /* MISPRED_BRANCH_RETIRED */ 113 0x04, 0x03, 114 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 115 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 116 }, 117 118 { /* TC_DELIVER_MODE */ 119 0x01, 0x01, 120 { { CTR_MS_0, MSR_P4_TC_ESCR0}, 121 { CTR_MS_2, MSR_P4_TC_ESCR1} } 122 }, 123 124 { /* BPU_FETCH_REQUEST */ 125 0x00, 0x03, 126 { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, 127 { CTR_BPU_2, MSR_P4_BPU_ESCR1} } 128 }, 129 130 { /* ITLB_REFERENCE */ 131 0x03, 0x18, 132 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, 133 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } 134 }, 135 136 { /* MEMORY_CANCEL */ 137 0x05, 0x02, 138 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, 139 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } 140 }, 141 142 { /* MEMORY_COMPLETE */ 143 0x02, 0x08, 144 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 145 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 146 }, 147 148 { /* LOAD_PORT_REPLAY */ 149 0x02, 0x04, 150 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 151 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 152 }, 153 154 { /* STORE_PORT_REPLAY */ 155 0x02, 0x05, 156 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 157 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 158 }, 159 160 { /* MOB_LOAD_REPLAY */ 161 0x02, 0x03, 162 { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, 163 { CTR_BPU_2, MSR_P4_MOB_ESCR1} } 164 }, 165 166 { /* PAGE_WALK_TYPE */ 167 0x04, 0x01, 168 { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, 169 { CTR_BPU_2, MSR_P4_PMH_ESCR1} } 170 }, 171 172 { /* BSQ_CACHE_REFERENCE */ 173 0x07, 0x0c, 174 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 175 { CTR_BPU_2, MSR_P4_BSU_ESCR1} } 176 }, 177 178 { /* IOQ_ALLOCATION */ 179 0x06, 0x03, 180 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 181 { 0, 0 } } 182 }, 183 184 { /* IOQ_ACTIVE_ENTRIES */ 185 0x06, 0x1a, 186 { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, 187 { 0, 0 } } 188 }, 189 190 { /* FSB_DATA_ACTIVITY */ 191 0x06, 0x17, 192 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 193 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 194 }, 195 196 { /* BSQ_ALLOCATION */ 197 0x07, 0x05, 198 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 199 { 0, 0 } } 200 }, 201 202 { /* BSQ_ACTIVE_ENTRIES */ 203 0x07, 0x06, 204 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, 205 { 0, 0 } } 206 }, 207 208 { /* X87_ASSIST */ 209 0x05, 0x03, 210 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 211 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 212 }, 213 214 { /* SSE_INPUT_ASSIST */ 215 0x01, 0x34, 216 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 217 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 218 }, 219 220 { /* PACKED_SP_UOP */ 221 0x01, 0x08, 222 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 223 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 224 }, 225 226 { /* PACKED_DP_UOP */ 227 0x01, 0x0c, 228 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 229 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 230 }, 231 232 { /* SCALAR_SP_UOP */ 233 0x01, 0x0a, 234 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 235 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 236 }, 237 238 { /* SCALAR_DP_UOP */ 239 0x01, 0x0e, 240 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 241 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 242 }, 243 244 { /* 64BIT_MMX_UOP */ 245 0x01, 0x02, 246 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 247 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 248 }, 249 250 { /* 128BIT_MMX_UOP */ 251 0x01, 0x1a, 252 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 253 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 254 }, 255 256 { /* X87_FP_UOP */ 257 0x01, 0x04, 258 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 259 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 260 }, 261 262 { /* X87_SIMD_MOVES_UOP */ 263 0x01, 0x2e, 264 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 265 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 266 }, 267 268 { /* MACHINE_CLEAR */ 269 0x05, 0x02, 270 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 271 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 272 }, 273 274 { /* GLOBAL_POWER_EVENTS */ 275 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, 276 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 277 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 278 }, 279 280 { /* TC_MS_XFER */ 281 0x00, 0x05, 282 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 283 { CTR_MS_2, MSR_P4_MS_ESCR1} } 284 }, 285 286 { /* UOP_QUEUE_WRITES */ 287 0x00, 0x09, 288 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 289 { CTR_MS_2, MSR_P4_MS_ESCR1} } 290 }, 291 292 { /* FRONT_END_EVENT */ 293 0x05, 0x08, 294 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 295 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 296 }, 297 298 { /* EXECUTION_EVENT */ 299 0x05, 0x0c, 300 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 301 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 302 }, 303 304 { /* REPLAY_EVENT */ 305 0x05, 0x09, 306 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 307 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 308 }, 309 310 { /* INSTR_RETIRED */ 311 0x04, 0x02, 312 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 313 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 314 }, 315 316 { /* UOPS_RETIRED */ 317 0x04, 0x01, 318 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 319 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 320 }, 321 322 { /* UOP_TYPE */ 323 0x02, 0x02, 324 { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, 325 { CTR_IQ_5, MSR_P4_RAT_ESCR1} } 326 }, 327 328 { /* RETIRED_MISPRED_BRANCH_TYPE */ 329 0x02, 0x05, 330 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 331 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 332 }, 333 334 { /* RETIRED_BRANCH_TYPE */ 335 0x02, 0x04, 336 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 337 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 338 } 339}; 340 341 342#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) 343 344#define ESCR_RESERVED_BITS 0x80000003 345#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) 346#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) 347#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) 348#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) 349#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 350#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 351#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 352#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 353#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 354 355#define CCCR_RESERVED_BITS 0x38030FFF 356#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 357#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) 358#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) 359#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) 360#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 361#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 362#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 363#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 364#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 365#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 366#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 367 368#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) 369#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) 370#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 371#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 372#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 373 374 375/* this assigns a "stagger" to the current CPU, which is used throughout 376 the code in this module as an extra array offset, to select the "even" 377 or "odd" part of all the divided resources. */ 378static unsigned int get_stagger(void) 379{ 380#ifdef CONFIG_SMP 381 int cpu = smp_processor_id(); 382 return (cpu != first_cpu(cpu_sibling_map[cpu])); 383#endif 384 return 0; 385} 386 387 388/* finally, mediate access to a real hardware counter 389 by passing a "virtual" counter numer to this macro, 390 along with your stagger setting. */ 391#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) 392 393static unsigned long reset_value[NUM_COUNTERS_NON_HT]; 394 395 396static void p4_fill_in_addresses(struct op_msrs * const msrs) 397{ 398 unsigned int i; 399 unsigned int addr, cccraddr, stag; 400 401 setup_num_counters(); 402 stag = get_stagger(); 403 404 /* initialize some registers */ 405 for (i = 0; i < num_counters; ++i) { 406 msrs->counters[i].addr = 0; 407 } 408 for (i = 0; i < num_controls; ++i) { 409 msrs->controls[i].addr = 0; 410 } 411 412 /* the counter & cccr registers we pay attention to */ 413 for (i = 0; i < num_counters; ++i) { 414 addr = p4_counters[VIRT_CTR(stag, i)].counter_address; 415 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; 416 if (reserve_perfctr_nmi(addr)){ 417 msrs->counters[i].addr = addr; 418 msrs->controls[i].addr = cccraddr; 419 } 420 } 421 422 /* 43 ESCR registers in three or four discontiguous group */ 423 for (addr = MSR_P4_BSU_ESCR0 + stag; 424 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { 425 if (reserve_evntsel_nmi(addr)) 426 msrs->controls[i].addr = addr; 427 } 428 429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 430 * to avoid special case in nmi_{save|restore}_registers() */ 431 if (boot_cpu_data.x86_model >= 0x3) { 432 for (addr = MSR_P4_BSU_ESCR0 + stag; 433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { 434 if (reserve_evntsel_nmi(addr)) 435 msrs->controls[i].addr = addr; 436 } 437 } else { 438 for (addr = MSR_P4_IQ_ESCR0 + stag; 439 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { 440 if (reserve_evntsel_nmi(addr)) 441 msrs->controls[i].addr = addr; 442 } 443 } 444 445 for (addr = MSR_P4_RAT_ESCR0 + stag; 446 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 447 if (reserve_evntsel_nmi(addr)) 448 msrs->controls[i].addr = addr; 449 } 450 451 for (addr = MSR_P4_MS_ESCR0 + stag; 452 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 453 if (reserve_evntsel_nmi(addr)) 454 msrs->controls[i].addr = addr; 455 } 456 457 for (addr = MSR_P4_IX_ESCR0 + stag; 458 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 459 if (reserve_evntsel_nmi(addr)) 460 msrs->controls[i].addr = addr; 461 } 462 463 /* there are 2 remaining non-contiguously located ESCRs */ 464 465 if (num_counters == NUM_COUNTERS_NON_HT) { 466 /* standard non-HT CPUs handle both remaining ESCRs*/ 467 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) 468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 469 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) 470 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 471 472 } else if (stag == 0) { 473 /* HT CPUs give the first remainder to the even thread, as 474 the 32nd control register */ 475 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4)) 476 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; 477 478 } else { 479 /* and two copies of the second to the odd thread, 480 for the 22st and 23nd control registers */ 481 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) { 482 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 483 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; 484 } 485 } 486} 487 488 489static void pmc_setup_one_p4_counter(unsigned int ctr) 490{ 491 int i; 492 int const maxbind = 2; 493 unsigned int cccr = 0; 494 unsigned int escr = 0; 495 unsigned int high = 0; 496 unsigned int counter_bit; 497 struct p4_event_binding *ev = NULL; 498 unsigned int stag; 499 500 stag = get_stagger(); 501 502 /* convert from counter *number* to counter *bit* */ 503 counter_bit = 1 << VIRT_CTR(stag, ctr); 504 505 /* find our event binding structure. */ 506 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { 507 printk(KERN_ERR 508 "oprofile: P4 event code 0x%lx out of range\n", 509 counter_config[ctr].event); 510 return; 511 } 512 513 ev = &(p4_events[counter_config[ctr].event - 1]); 514 515 for (i = 0; i < maxbind; i++) { 516 if (ev->bindings[i].virt_counter & counter_bit) { 517 518 /* modify ESCR */ 519 ESCR_READ(escr, high, ev, i); 520 ESCR_CLEAR(escr); 521 if (stag == 0) { 522 ESCR_SET_USR_0(escr, counter_config[ctr].user); 523 ESCR_SET_OS_0(escr, counter_config[ctr].kernel); 524 } else { 525 ESCR_SET_USR_1(escr, counter_config[ctr].user); 526 ESCR_SET_OS_1(escr, counter_config[ctr].kernel); 527 } 528 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 529 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); 530 ESCR_WRITE(escr, high, ev, i); 531 532 /* modify CCCR */ 533 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); 534 CCCR_CLEAR(cccr); 535 CCCR_SET_REQUIRED_BITS(cccr); 536 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 537 if (stag == 0) { 538 CCCR_SET_PMI_OVF_0(cccr); 539 } else { 540 CCCR_SET_PMI_OVF_1(cccr); 541 } 542 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); 543 return; 544 } 545 } 546 547 printk(KERN_ERR 548 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", 549 counter_config[ctr].event, stag, ctr); 550} 551 552 553static void p4_setup_ctrs(struct op_msrs const * const msrs) 554{ 555 unsigned int i; 556 unsigned int low, high; 557 unsigned int stag; 558 559 stag = get_stagger(); 560 561 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 562 if (! MISC_PMC_ENABLED_P(low)) { 563 printk(KERN_ERR "oprofile: P4 PMC not available\n"); 564 return; 565 } 566 567 /* clear the cccrs we will use */ 568 for (i = 0 ; i < num_counters ; i++) { 569 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 570 continue; 571 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 572 CCCR_CLEAR(low); 573 CCCR_SET_REQUIRED_BITS(low); 574 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 575 } 576 577 /* clear all escrs (including those outside our concern) */ 578 for (i = num_counters; i < num_controls; i++) { 579 if (unlikely(!CTRL_IS_RESERVED(msrs,i))) 580 continue; 581 wrmsr(msrs->controls[i].addr, 0, 0); 582 } 583 584 /* setup all counters */ 585 for (i = 0 ; i < num_counters ; ++i) { 586 if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { 587 reset_value[i] = counter_config[i].count; 588 pmc_setup_one_p4_counter(i); 589 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); 590 } else { 591 reset_value[i] = 0; 592 } 593 } 594} 595 596 597static int p4_check_ctrs(struct pt_regs * const regs, 598 struct op_msrs const * const msrs) 599{ 600 unsigned long ctr, low, high, stag, real; 601 int i; 602 603 stag = get_stagger(); 604 605 for (i = 0; i < num_counters; ++i) { 606 607 if (!reset_value[i]) 608 continue; 609 610 611 real = VIRT_CTR(stag, i); 612 613 CCCR_READ(low, high, real); 614 CTR_READ(ctr, high, real); 615 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { 616 oprofile_add_sample(regs, i); 617 CTR_WRITE(reset_value[i], real); 618 CCCR_CLEAR_OVF(low); 619 CCCR_WRITE(low, high, real); 620 CTR_WRITE(reset_value[i], real); 621 } 622 } 623 624 /* P4 quirk: you have to re-unmask the apic vector */ 625 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 626 627 /* See op_model_ppro.c */ 628 return 1; 629} 630 631 632static void p4_start(struct op_msrs const * const msrs) 633{ 634 unsigned int low, high, stag; 635 int i; 636 637 stag = get_stagger(); 638 639 for (i = 0; i < num_counters; ++i) { 640 if (!reset_value[i]) 641 continue; 642 CCCR_READ(low, high, VIRT_CTR(stag, i)); 643 CCCR_SET_ENABLE(low); 644 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 645 } 646} 647 648 649static void p4_stop(struct op_msrs const * const msrs) 650{ 651 unsigned int low, high, stag; 652 int i; 653 654 stag = get_stagger(); 655 656 for (i = 0; i < num_counters; ++i) { 657 if (!reset_value[i]) 658 continue; 659 CCCR_READ(low, high, VIRT_CTR(stag, i)); 660 CCCR_SET_DISABLE(low); 661 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 662 } 663} 664 665static void p4_shutdown(struct op_msrs const * const msrs) 666{ 667 int i; 668 669 for (i = 0 ; i < num_counters ; ++i) { 670 if (CTR_IS_RESERVED(msrs,i)) 671 release_perfctr_nmi(msrs->counters[i].addr); 672 } 673 /* some of the control registers are specially reserved in 674 * conjunction with the counter registers (hence the starting offset). 675 * This saves a few bits. 676 */ 677 for (i = num_counters ; i < num_controls ; ++i) { 678 if (CTRL_IS_RESERVED(msrs,i)) 679 release_evntsel_nmi(msrs->controls[i].addr); 680 } 681} 682 683 684#ifdef CONFIG_SMP 685struct op_x86_model_spec const op_p4_ht2_spec = { 686 .num_counters = NUM_COUNTERS_HT2, 687 .num_controls = NUM_CONTROLS_HT2, 688 .fill_in_addresses = &p4_fill_in_addresses, 689 .setup_ctrs = &p4_setup_ctrs, 690 .check_ctrs = &p4_check_ctrs, 691 .start = &p4_start, 692 .stop = &p4_stop, 693 .shutdown = &p4_shutdown 694}; 695#endif 696 697struct op_x86_model_spec const op_p4_spec = { 698 .num_counters = NUM_COUNTERS_NON_HT, 699 .num_controls = NUM_CONTROLS_NON_HT, 700 .fill_in_addresses = &p4_fill_in_addresses, 701 .setup_ctrs = &p4_setup_ctrs, 702 .check_ctrs = &p4_check_ctrs, 703 .start = &p4_start, 704 .stop = &p4_stop, 705 .shutdown = &p4_shutdown 706}; 707