1#ifdef CONFIG_CPU_SUP_INTEL 2 3/* The maximal number of PEBS events: */ 4#define MAX_PEBS_EVENTS 4 5 6/* The size of a BTS record in bytes: */ 7#define BTS_RECORD_SIZE 24 8 9#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) 10#define PEBS_BUFFER_SIZE PAGE_SIZE 11 12/* 13 * pebs_record_32 for p4 and core not supported 14 15struct pebs_record_32 { 16 u32 flags, ip; 17 u32 ax, bc, cx, dx; 18 u32 si, di, bp, sp; 19}; 20 21 */ 22 23struct pebs_record_core { 24 u64 flags, ip; 25 u64 ax, bx, cx, dx; 26 u64 si, di, bp, sp; 27 u64 r8, r9, r10, r11; 28 u64 r12, r13, r14, r15; 29}; 30 31struct pebs_record_nhm { 32 u64 flags, ip; 33 u64 ax, bx, cx, dx; 34 u64 si, di, bp, sp; 35 u64 r8, r9, r10, r11; 36 u64 r12, r13, r14, r15; 37 u64 status, dla, dse, lat; 38}; 39 40/* 41 * A debug store configuration. 42 * 43 * We only support architectures that use 64bit fields. 44 */ 45struct debug_store { 46 u64 bts_buffer_base; 47 u64 bts_index; 48 u64 bts_absolute_maximum; 49 u64 bts_interrupt_threshold; 50 u64 pebs_buffer_base; 51 u64 pebs_index; 52 u64 pebs_absolute_maximum; 53 u64 pebs_interrupt_threshold; 54 u64 pebs_event_reset[MAX_PEBS_EVENTS]; 55}; 56 57static void init_debug_store_on_cpu(int cpu) 58{ 59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 60 61 if (!ds) 62 return; 63 64 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 65 (u32)((u64)(unsigned long)ds), 66 (u32)((u64)(unsigned long)ds >> 32)); 67} 68 69static void fini_debug_store_on_cpu(int cpu) 70{ 71 if (!per_cpu(cpu_hw_events, cpu).ds) 72 return; 73 74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); 75} 76 77static void release_ds_buffers(void) 78{ 79 int cpu; 80 81 if (!x86_pmu.bts && !x86_pmu.pebs) 82 return; 83 84 get_online_cpus(); 85 86 for_each_online_cpu(cpu) 87 fini_debug_store_on_cpu(cpu); 88 89 for_each_possible_cpu(cpu) { 90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; 91 92 if (!ds) 93 continue; 94 95 per_cpu(cpu_hw_events, cpu).ds = NULL; 96 97 kfree((void *)(unsigned long)ds->pebs_buffer_base); 98 kfree((void *)(unsigned long)ds->bts_buffer_base); 99 kfree(ds); 100 } 101 102 put_online_cpus(); 103} 104 105static int reserve_ds_buffers(void) 106{ 107 int cpu, err = 0; 108 109 if (!x86_pmu.bts && !x86_pmu.pebs) 110 return 0; 111 112 get_online_cpus(); 113 114 for_each_possible_cpu(cpu) { 115 struct debug_store *ds; 116 void *buffer; 117 int max, thresh; 118 119 err = -ENOMEM; 120 ds = kzalloc(sizeof(*ds), GFP_KERNEL); 121 if (unlikely(!ds)) 122 break; 123 per_cpu(cpu_hw_events, cpu).ds = ds; 124 125 if (x86_pmu.bts) { 126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); 127 if (unlikely(!buffer)) 128 break; 129 130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; 131 thresh = max / 16; 132 133 ds->bts_buffer_base = (u64)(unsigned long)buffer; 134 ds->bts_index = ds->bts_buffer_base; 135 ds->bts_absolute_maximum = ds->bts_buffer_base + 136 max * BTS_RECORD_SIZE; 137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum - 138 thresh * BTS_RECORD_SIZE; 139 } 140 141 if (x86_pmu.pebs) { 142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); 143 if (unlikely(!buffer)) 144 break; 145 146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; 147 148 ds->pebs_buffer_base = (u64)(unsigned long)buffer; 149 ds->pebs_index = ds->pebs_buffer_base; 150 ds->pebs_absolute_maximum = ds->pebs_buffer_base + 151 max * x86_pmu.pebs_record_size; 152 /* 153 * Always use single record PEBS 154 */ 155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base + 156 x86_pmu.pebs_record_size; 157 } 158 159 err = 0; 160 } 161 162 if (err) 163 release_ds_buffers(); 164 else { 165 for_each_online_cpu(cpu) 166 init_debug_store_on_cpu(cpu); 167 } 168 169 put_online_cpus(); 170 171 return err; 172} 173 174/* 175 * BTS 176 */ 177 178static struct event_constraint bts_constraint = 179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); 180 181static void intel_pmu_enable_bts(u64 config) 182{ 183 unsigned long debugctlmsr; 184 185 debugctlmsr = get_debugctlmsr(); 186 187 debugctlmsr |= DEBUGCTLMSR_TR; 188 debugctlmsr |= DEBUGCTLMSR_BTS; 189 debugctlmsr |= DEBUGCTLMSR_BTINT; 190 191 if (!(config & ARCH_PERFMON_EVENTSEL_OS)) 192 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; 193 194 if (!(config & ARCH_PERFMON_EVENTSEL_USR)) 195 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; 196 197 update_debugctlmsr(debugctlmsr); 198} 199 200static void intel_pmu_disable_bts(void) 201{ 202 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 203 unsigned long debugctlmsr; 204 205 if (!cpuc->ds) 206 return; 207 208 debugctlmsr = get_debugctlmsr(); 209 210 debugctlmsr &= 211 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | 212 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); 213 214 update_debugctlmsr(debugctlmsr); 215} 216 217static void intel_pmu_drain_bts_buffer(void) 218{ 219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 220 struct debug_store *ds = cpuc->ds; 221 struct bts_record { 222 u64 from; 223 u64 to; 224 u64 flags; 225 }; 226 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; 227 struct bts_record *at, *top; 228 struct perf_output_handle handle; 229 struct perf_event_header header; 230 struct perf_sample_data data; 231 struct pt_regs regs; 232 233 if (!event) 234 return; 235 236 if (!ds) 237 return; 238 239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; 240 top = (struct bts_record *)(unsigned long)ds->bts_index; 241 242 if (top <= at) 243 return; 244 245 ds->bts_index = ds->bts_buffer_base; 246 247 perf_sample_data_init(&data, 0); 248 data.period = event->hw.last_period; 249 regs.ip = 0; 250 251 /* 252 * Prepare a generic sample, i.e. fill in the invariant fields. 253 * We will overwrite the from and to address before we output 254 * the sample. 255 */ 256 perf_prepare_sample(&header, &data, event, ®s); 257 258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 259 return; 260 261 for (; at < top; at++) { 262 data.ip = at->from; 263 data.addr = at->to; 264 265 perf_output_sample(&handle, &header, &data, event); 266 } 267 268 perf_output_end(&handle); 269 270 /* There's new data available. */ 271 event->hw.interrupts++; 272 event->pending_kill = POLL_IN; 273} 274 275/* 276 * PEBS 277 */ 278 279static struct event_constraint intel_core_pebs_events[] = { 280 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ 281 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ 282 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ 283 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ 284 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ 285 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 286 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ 287 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 288 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ 289 EVENT_CONSTRAINT_END 290}; 291 292static struct event_constraint intel_nehalem_pebs_events[] = { 293 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ 294 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ 295 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ 296 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ 297 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ 298 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ 299 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ 300 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ 301 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ 302 EVENT_CONSTRAINT_END 303}; 304 305static struct event_constraint * 306intel_pebs_constraints(struct perf_event *event) 307{ 308 struct event_constraint *c; 309 310 if (!event->attr.precise_ip) 311 return NULL; 312 313 if (x86_pmu.pebs_constraints) { 314 for_each_event_constraint(c, x86_pmu.pebs_constraints) { 315 if ((event->hw.config & c->cmask) == c->code) 316 return c; 317 } 318 } 319 320 return &emptyconstraint; 321} 322 323static void intel_pmu_pebs_enable(struct perf_event *event) 324{ 325 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 326 struct hw_perf_event *hwc = &event->hw; 327 328 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 329 330 cpuc->pebs_enabled |= 1ULL << hwc->idx; 331 WARN_ON_ONCE(cpuc->enabled); 332 333 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 334 intel_pmu_lbr_enable(event); 335} 336 337static void intel_pmu_pebs_disable(struct perf_event *event) 338{ 339 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 340 struct hw_perf_event *hwc = &event->hw; 341 342 cpuc->pebs_enabled &= ~(1ULL << hwc->idx); 343 if (cpuc->enabled) 344 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 345 346 hwc->config |= ARCH_PERFMON_EVENTSEL_INT; 347 348 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 349 intel_pmu_lbr_disable(event); 350} 351 352static void intel_pmu_pebs_enable_all(void) 353{ 354 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 355 356 if (cpuc->pebs_enabled) 357 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); 358} 359 360static void intel_pmu_pebs_disable_all(void) 361{ 362 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 363 364 if (cpuc->pebs_enabled) 365 wrmsrl(MSR_IA32_PEBS_ENABLE, 0); 366} 367 368#include <asm/insn.h> 369 370static inline bool kernel_ip(unsigned long ip) 371{ 372#ifdef CONFIG_X86_32 373 return ip > PAGE_OFFSET; 374#else 375 return (long)ip < 0; 376#endif 377} 378 379static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) 380{ 381 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 382 unsigned long from = cpuc->lbr_entries[0].from; 383 unsigned long old_to, to = cpuc->lbr_entries[0].to; 384 unsigned long ip = regs->ip; 385 386 /* 387 * We don't need to fixup if the PEBS assist is fault like 388 */ 389 if (!x86_pmu.intel_cap.pebs_trap) 390 return 1; 391 392 /* 393 * No LBR entry, no basic block, no rewinding 394 */ 395 if (!cpuc->lbr_stack.nr || !from || !to) 396 return 0; 397 398 /* 399 * Basic blocks should never cross user/kernel boundaries 400 */ 401 if (kernel_ip(ip) != kernel_ip(to)) 402 return 0; 403 404 /* 405 * unsigned math, either ip is before the start (impossible) or 406 * the basic block is larger than 1 page (sanity) 407 */ 408 if ((ip - to) > PAGE_SIZE) 409 return 0; 410 411 /* 412 * We sampled a branch insn, rewind using the LBR stack 413 */ 414 if (ip == to) { 415 regs->ip = from; 416 return 1; 417 } 418 419 do { 420 struct insn insn; 421 u8 buf[MAX_INSN_SIZE]; 422 void *kaddr; 423 424 old_to = to; 425 if (!kernel_ip(ip)) { 426 int bytes, size = MAX_INSN_SIZE; 427 428 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 429 if (bytes != size) 430 return 0; 431 432 kaddr = buf; 433 } else 434 kaddr = (void *)to; 435 436 kernel_insn_init(&insn, kaddr); 437 insn_get_length(&insn); 438 to += insn.length; 439 } while (to < ip); 440 441 if (to == ip) { 442 regs->ip = old_to; 443 return 1; 444 } 445 446 /* 447 * Even though we decoded the basic block, the instruction stream 448 * never matched the given IP, either the TO or the IP got corrupted. 449 */ 450 return 0; 451} 452 453static int intel_pmu_save_and_restart(struct perf_event *event); 454 455static void __intel_pmu_pebs_event(struct perf_event *event, 456 struct pt_regs *iregs, void *__pebs) 457{ 458 /* 459 * We cast to pebs_record_core since that is a subset of 460 * both formats and we don't use the other fields in this 461 * routine. 462 */ 463 struct pebs_record_core *pebs = __pebs; 464 struct perf_sample_data data; 465 struct pt_regs regs; 466 467 if (!intel_pmu_save_and_restart(event)) 468 return; 469 470 perf_sample_data_init(&data, 0); 471 data.period = event->hw.last_period; 472 473 /* 474 * We use the interrupt regs as a base because the PEBS record 475 * does not contain a full regs set, specifically it seems to 476 * lack segment descriptors, which get used by things like 477 * user_mode(). 478 * 479 * In the simple case fix up only the IP and BP,SP regs, for 480 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. 481 * A possible PERF_SAMPLE_REGS will have to transfer all regs. 482 */ 483 regs = *iregs; 484 regs.ip = pebs->ip; 485 regs.bp = pebs->bp; 486 regs.sp = pebs->sp; 487 488 if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) 489 regs.flags |= PERF_EFLAGS_EXACT; 490 else 491 regs.flags &= ~PERF_EFLAGS_EXACT; 492 493 if (perf_event_overflow(event, 1, &data, ®s)) 494 x86_pmu_stop(event); 495} 496 497static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) 498{ 499 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 500 struct debug_store *ds = cpuc->ds; 501 struct perf_event *event = cpuc->events[0]; /* PMC0 only */ 502 struct pebs_record_core *at, *top; 503 int n; 504 505 if (!ds || !x86_pmu.pebs) 506 return; 507 508 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; 509 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; 510 511 /* 512 * Whatever else happens, drain the thing 513 */ 514 ds->pebs_index = ds->pebs_buffer_base; 515 516 if (!test_bit(0, cpuc->active_mask)) 517 return; 518 519 WARN_ON_ONCE(!event); 520 521 if (!event->attr.precise_ip) 522 return; 523 524 n = top - at; 525 if (n <= 0) 526 return; 527 528 /* 529 * Should not happen, we program the threshold at 1 and do not 530 * set a reset value. 531 */ 532 WARN_ON_ONCE(n > 1); 533 at += n - 1; 534 535 __intel_pmu_pebs_event(event, iregs, at); 536} 537 538static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 539{ 540 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 541 struct debug_store *ds = cpuc->ds; 542 struct pebs_record_nhm *at, *top; 543 struct perf_event *event = NULL; 544 u64 status = 0; 545 int bit, n; 546 547 if (!ds || !x86_pmu.pebs) 548 return; 549 550 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 551 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 552 553 ds->pebs_index = ds->pebs_buffer_base; 554 555 n = top - at; 556 if (n <= 0) 557 return; 558 559 /* 560 * Should not happen, we program the threshold at 1 and do not 561 * set a reset value. 562 */ 563 WARN_ON_ONCE(n > MAX_PEBS_EVENTS); 564 565 for ( ; at < top; at++) { 566 for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { 567 event = cpuc->events[bit]; 568 if (!test_bit(bit, cpuc->active_mask)) 569 continue; 570 571 WARN_ON_ONCE(!event); 572 573 if (!event->attr.precise_ip) 574 continue; 575 576 if (__test_and_set_bit(bit, (unsigned long *)&status)) 577 continue; 578 579 break; 580 } 581 582 if (!event || bit >= MAX_PEBS_EVENTS) 583 continue; 584 585 __intel_pmu_pebs_event(event, iregs, at); 586 } 587} 588 589/* 590 * BTS, PEBS probe and setup 591 */ 592 593static void intel_ds_init(void) 594{ 595 /* 596 * No support for 32bit formats 597 */ 598 if (!boot_cpu_has(X86_FEATURE_DTES64)) 599 return; 600 601 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); 602 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); 603 if (x86_pmu.pebs) { 604 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; 605 int format = x86_pmu.intel_cap.pebs_format; 606 607 switch (format) { 608 case 0: 609 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); 610 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); 611 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; 612 x86_pmu.pebs_constraints = intel_core_pebs_events; 613 break; 614 615 case 1: 616 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); 617 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 618 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 619 x86_pmu.pebs_constraints = intel_nehalem_pebs_events; 620 break; 621 622 default: 623 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); 624 x86_pmu.pebs = 0; 625 break; 626 } 627 } 628} 629 630#else /* CONFIG_CPU_SUP_INTEL */ 631 632static int reserve_ds_buffers(void) 633{ 634 return 0; 635} 636 637static void release_ds_buffers(void) 638{ 639} 640 641#endif /* CONFIG_CPU_SUP_INTEL */ 642