1#undef DEBUG 2 3/* 4 * ARM performance counter support. 5 * 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 7 * 8 * ARMv7 support: Jean Pihet <jpihet@mvista.com> 9 * 2010 (c) MontaVista Software, LLC. 10 * 11 * This code is based on the sparc64 perf event code, which is in turn based 12 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 13 * code. 14 */ 15#define pr_fmt(fmt) "hw perfevents: " fmt 16 17#include <linux/interrupt.h> 18#include <linux/kernel.h> 19#include <linux/module.h> 20#include <linux/perf_event.h> 21#include <linux/platform_device.h> 22#include <linux/spinlock.h> 23#include <linux/uaccess.h> 24 25#include <asm/cputype.h> 26#include <asm/irq.h> 27#include <asm/irq_regs.h> 28#include <asm/pmu.h> 29#include <asm/stacktrace.h> 30 31static struct platform_device *pmu_device; 32 33/* 34 * Hardware lock to serialize accesses to PMU registers. Needed for the 35 * read/modify/write sequences. 36 */ 37DEFINE_SPINLOCK(pmu_lock); 38 39/* 40 * ARMv6 supports a maximum of 3 events, starting from index 1. If we add 41 * another platform that supports more, we need to increase this to be the 42 * largest of all platforms. 43 * 44 * ARMv7 supports up to 32 events: 45 * cycle counter CCNT + 31 events counters CNT0..30. 46 * Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters. 47 */ 48#define ARMPMU_MAX_HWEVENTS 33 49 50/* The events for a given CPU. */ 51struct cpu_hw_events { 52 /* 53 * The events that are active on the CPU for the given index. Index 0 54 * is reserved. 55 */ 56 struct perf_event *events[ARMPMU_MAX_HWEVENTS]; 57 58 /* 59 * A 1 bit for an index indicates that the counter is being used for 60 * an event. A 0 means that the counter can be used. 61 */ 62 unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; 63 64 /* 65 * A 1 bit for an index indicates that the counter is actively being 66 * used. 67 */ 68 unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; 69}; 70DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 71 72/* PMU names. */ 73static const char *arm_pmu_names[] = { 74 [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", 75 [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", 76 [ARM_PERF_PMU_ID_V6] = "v6", 77 [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", 78 [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", 79 [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", 80}; 81 82struct arm_pmu { 83 enum arm_perf_pmu_ids id; 84 irqreturn_t (*handle_irq)(int irq_num, void *dev); 85 void (*enable)(struct hw_perf_event *evt, int idx); 86 void (*disable)(struct hw_perf_event *evt, int idx); 87 int (*event_map)(int evt); 88 u64 (*raw_event)(u64); 89 int (*get_event_idx)(struct cpu_hw_events *cpuc, 90 struct hw_perf_event *hwc); 91 u32 (*read_counter)(int idx); 92 void (*write_counter)(int idx, u32 val); 93 void (*start)(void); 94 void (*stop)(void); 95 int num_events; 96 u64 max_period; 97}; 98 99/* Set at runtime when we know what CPU type we are. */ 100static const struct arm_pmu *armpmu; 101 102enum arm_perf_pmu_ids 103armpmu_get_pmu_id(void) 104{ 105 int id = -ENODEV; 106 107 if (armpmu != NULL) 108 id = armpmu->id; 109 110 return id; 111} 112EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); 113 114int 115armpmu_get_max_events(void) 116{ 117 int max_events = 0; 118 119 if (armpmu != NULL) 120 max_events = armpmu->num_events; 121 122 return max_events; 123} 124EXPORT_SYMBOL_GPL(armpmu_get_max_events); 125 126#define HW_OP_UNSUPPORTED 0xFFFF 127 128#define C(_x) \ 129 PERF_COUNT_HW_CACHE_##_x 130 131#define CACHE_OP_UNSUPPORTED 0xFFFF 132 133static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 134 [PERF_COUNT_HW_CACHE_OP_MAX] 135 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 136 137static int 138armpmu_map_cache_event(u64 config) 139{ 140 unsigned int cache_type, cache_op, cache_result, ret; 141 142 cache_type = (config >> 0) & 0xff; 143 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 144 return -EINVAL; 145 146 cache_op = (config >> 8) & 0xff; 147 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 148 return -EINVAL; 149 150 cache_result = (config >> 16) & 0xff; 151 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 152 return -EINVAL; 153 154 ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; 155 156 if (ret == CACHE_OP_UNSUPPORTED) 157 return -ENOENT; 158 159 return ret; 160} 161 162static int 163armpmu_event_set_period(struct perf_event *event, 164 struct hw_perf_event *hwc, 165 int idx) 166{ 167 s64 left = local64_read(&hwc->period_left); 168 s64 period = hwc->sample_period; 169 int ret = 0; 170 171 if (unlikely(left <= -period)) { 172 left = period; 173 local64_set(&hwc->period_left, left); 174 hwc->last_period = period; 175 ret = 1; 176 } 177 178 if (unlikely(left <= 0)) { 179 left += period; 180 local64_set(&hwc->period_left, left); 181 hwc->last_period = period; 182 ret = 1; 183 } 184 185 if (left > (s64)armpmu->max_period) 186 left = armpmu->max_period; 187 188 local64_set(&hwc->prev_count, (u64)-left); 189 190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 191 192 perf_event_update_userpage(event); 193 194 return ret; 195} 196 197static u64 198armpmu_event_update(struct perf_event *event, 199 struct hw_perf_event *hwc, 200 int idx) 201{ 202 int shift = 64 - 32; 203 s64 prev_raw_count, new_raw_count; 204 u64 delta; 205 206again: 207 prev_raw_count = local64_read(&hwc->prev_count); 208 new_raw_count = armpmu->read_counter(idx); 209 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 211 new_raw_count) != prev_raw_count) 212 goto again; 213 214 delta = (new_raw_count << shift) - (prev_raw_count << shift); 215 delta >>= shift; 216 217 local64_add(delta, &event->count); 218 local64_sub(delta, &hwc->period_left); 219 220 return new_raw_count; 221} 222 223static void 224armpmu_disable(struct perf_event *event) 225{ 226 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 227 struct hw_perf_event *hwc = &event->hw; 228 int idx = hwc->idx; 229 230 WARN_ON(idx < 0); 231 232 clear_bit(idx, cpuc->active_mask); 233 armpmu->disable(hwc, idx); 234 235 barrier(); 236 237 armpmu_event_update(event, hwc, idx); 238 cpuc->events[idx] = NULL; 239 clear_bit(idx, cpuc->used_mask); 240 241 perf_event_update_userpage(event); 242} 243 244static void 245armpmu_read(struct perf_event *event) 246{ 247 struct hw_perf_event *hwc = &event->hw; 248 249 /* Don't read disabled counters! */ 250 if (hwc->idx < 0) 251 return; 252 253 armpmu_event_update(event, hwc, hwc->idx); 254} 255 256static void 257armpmu_unthrottle(struct perf_event *event) 258{ 259 struct hw_perf_event *hwc = &event->hw; 260 261 /* 262 * Set the period again. Some counters can't be stopped, so when we 263 * were throttled we simply disabled the IRQ source and the counter 264 * may have been left counting. If we don't do this step then we may 265 * get an interrupt too soon or *way* too late if the overflow has 266 * happened since disabling. 267 */ 268 armpmu_event_set_period(event, hwc, hwc->idx); 269 armpmu->enable(hwc, hwc->idx); 270} 271 272static int 273armpmu_enable(struct perf_event *event) 274{ 275 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 276 struct hw_perf_event *hwc = &event->hw; 277 int idx; 278 int err = 0; 279 280 /* If we don't have a space for the counter then finish early. */ 281 idx = armpmu->get_event_idx(cpuc, hwc); 282 if (idx < 0) { 283 err = idx; 284 goto out; 285 } 286 287 /* 288 * If there is an event in the counter we are going to use then make 289 * sure it is disabled. 290 */ 291 event->hw.idx = idx; 292 armpmu->disable(hwc, idx); 293 cpuc->events[idx] = event; 294 set_bit(idx, cpuc->active_mask); 295 296 /* Set the period for the event. */ 297 armpmu_event_set_period(event, hwc, idx); 298 299 /* Enable the event. */ 300 armpmu->enable(hwc, idx); 301 302 /* Propagate our changes to the userspace mapping. */ 303 perf_event_update_userpage(event); 304 305out: 306 return err; 307} 308 309static struct pmu pmu = { 310 .enable = armpmu_enable, 311 .disable = armpmu_disable, 312 .unthrottle = armpmu_unthrottle, 313 .read = armpmu_read, 314}; 315 316static int 317validate_event(struct cpu_hw_events *cpuc, 318 struct perf_event *event) 319{ 320 struct hw_perf_event fake_event = event->hw; 321 322 if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) 323 return 1; 324 325 return armpmu->get_event_idx(cpuc, &fake_event) >= 0; 326} 327 328static int 329validate_group(struct perf_event *event) 330{ 331 struct perf_event *sibling, *leader = event->group_leader; 332 struct cpu_hw_events fake_pmu; 333 334 memset(&fake_pmu, 0, sizeof(fake_pmu)); 335 336 if (!validate_event(&fake_pmu, leader)) 337 return -ENOSPC; 338 339 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 340 if (!validate_event(&fake_pmu, sibling)) 341 return -ENOSPC; 342 } 343 344 if (!validate_event(&fake_pmu, event)) 345 return -ENOSPC; 346 347 return 0; 348} 349 350static int 351armpmu_reserve_hardware(void) 352{ 353 int i, err = -ENODEV, irq; 354 355 pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU); 356 if (IS_ERR(pmu_device)) { 357 pr_warning("unable to reserve pmu\n"); 358 return PTR_ERR(pmu_device); 359 } 360 361 init_pmu(ARM_PMU_DEVICE_CPU); 362 363 if (pmu_device->num_resources < 1) { 364 pr_err("no irqs for PMUs defined\n"); 365 return -ENODEV; 366 } 367 368 for (i = 0; i < pmu_device->num_resources; ++i) { 369 irq = platform_get_irq(pmu_device, i); 370 if (irq < 0) 371 continue; 372 373 err = request_irq(irq, armpmu->handle_irq, 374 IRQF_DISABLED | IRQF_NOBALANCING, 375 "armpmu", NULL); 376 if (err) { 377 pr_warning("unable to request IRQ%d for ARM perf " 378 "counters\n", irq); 379 break; 380 } 381 } 382 383 if (err) { 384 for (i = i - 1; i >= 0; --i) { 385 irq = platform_get_irq(pmu_device, i); 386 if (irq >= 0) 387 free_irq(irq, NULL); 388 } 389 release_pmu(pmu_device); 390 pmu_device = NULL; 391 } 392 393 return err; 394} 395 396static void 397armpmu_release_hardware(void) 398{ 399 int i, irq; 400 401 for (i = pmu_device->num_resources - 1; i >= 0; --i) { 402 irq = platform_get_irq(pmu_device, i); 403 if (irq >= 0) 404 free_irq(irq, NULL); 405 } 406 armpmu->stop(); 407 408 release_pmu(pmu_device); 409 pmu_device = NULL; 410} 411 412static atomic_t active_events = ATOMIC_INIT(0); 413static DEFINE_MUTEX(pmu_reserve_mutex); 414 415static void 416hw_perf_event_destroy(struct perf_event *event) 417{ 418 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 419 armpmu_release_hardware(); 420 mutex_unlock(&pmu_reserve_mutex); 421 } 422} 423 424static int 425__hw_perf_event_init(struct perf_event *event) 426{ 427 struct hw_perf_event *hwc = &event->hw; 428 int mapping, err; 429 430 /* Decode the generic type into an ARM event identifier. */ 431 if (PERF_TYPE_HARDWARE == event->attr.type) { 432 mapping = armpmu->event_map(event->attr.config); 433 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 434 mapping = armpmu_map_cache_event(event->attr.config); 435 } else if (PERF_TYPE_RAW == event->attr.type) { 436 mapping = armpmu->raw_event(event->attr.config); 437 } else { 438 pr_debug("event type %x not supported\n", event->attr.type); 439 return -EOPNOTSUPP; 440 } 441 442 if (mapping < 0) { 443 pr_debug("event %x:%llx not supported\n", event->attr.type, 444 event->attr.config); 445 return mapping; 446 } 447 448 /* 449 * Check whether we need to exclude the counter from certain modes. 450 * The ARM performance counters are on all of the time so if someone 451 * has asked us for some excludes then we have to fail. 452 */ 453 if (event->attr.exclude_kernel || event->attr.exclude_user || 454 event->attr.exclude_hv || event->attr.exclude_idle) { 455 pr_debug("ARM performance counters do not support " 456 "mode exclusion\n"); 457 return -EPERM; 458 } 459 460 /* 461 * We don't assign an index until we actually place the event onto 462 * hardware. Use -1 to signify that we haven't decided where to put it 463 * yet. For SMP systems, each core has it's own PMU so we can't do any 464 * clever allocation or constraints checking at this point. 465 */ 466 hwc->idx = -1; 467 468 /* 469 * Store the event encoding into the config_base field. config and 470 * event_base are unused as the only 2 things we need to know are 471 * the event mapping and the counter to use. The counter to use is 472 * also the indx and the config_base is the event type. 473 */ 474 hwc->config_base = (unsigned long)mapping; 475 hwc->config = 0; 476 hwc->event_base = 0; 477 478 if (!hwc->sample_period) { 479 hwc->sample_period = armpmu->max_period; 480 hwc->last_period = hwc->sample_period; 481 local64_set(&hwc->period_left, hwc->sample_period); 482 } 483 484 err = 0; 485 if (event->group_leader != event) { 486 err = validate_group(event); 487 if (err) 488 return -EINVAL; 489 } 490 491 return err; 492} 493 494const struct pmu * 495hw_perf_event_init(struct perf_event *event) 496{ 497 int err = 0; 498 499 if (!armpmu) 500 return ERR_PTR(-ENODEV); 501 502 event->destroy = hw_perf_event_destroy; 503 504 if (!atomic_inc_not_zero(&active_events)) { 505 if (atomic_read(&active_events) > perf_max_events) { 506 atomic_dec(&active_events); 507 return ERR_PTR(-ENOSPC); 508 } 509 510 mutex_lock(&pmu_reserve_mutex); 511 if (atomic_read(&active_events) == 0) { 512 err = armpmu_reserve_hardware(); 513 } 514 515 if (!err) 516 atomic_inc(&active_events); 517 mutex_unlock(&pmu_reserve_mutex); 518 } 519 520 if (err) 521 return ERR_PTR(err); 522 523 err = __hw_perf_event_init(event); 524 if (err) 525 hw_perf_event_destroy(event); 526 527 return err ? ERR_PTR(err) : &pmu; 528} 529 530void 531hw_perf_enable(void) 532{ 533 /* Enable all of the perf events on hardware. */ 534 int idx; 535 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 536 537 if (!armpmu) 538 return; 539 540 for (idx = 0; idx <= armpmu->num_events; ++idx) { 541 struct perf_event *event = cpuc->events[idx]; 542 543 if (!event) 544 continue; 545 546 armpmu->enable(&event->hw, idx); 547 } 548 549 armpmu->start(); 550} 551 552void 553hw_perf_disable(void) 554{ 555 if (armpmu) 556 armpmu->stop(); 557} 558 559/* 560 * ARMv6 Performance counter handling code. 561 * 562 * ARMv6 has 2 configurable performance counters and a single cycle counter. 563 * They all share a single reset bit but can be written to zero so we can use 564 * that for a reset. 565 * 566 * The counters can't be individually enabled or disabled so when we remove 567 * one event and replace it with another we could get spurious counts from the 568 * wrong event. However, we can take advantage of the fact that the 569 * performance counters can export events to the event bus, and the event bus 570 * itself can be monitored. This requires that we *don't* export the events to 571 * the event bus. The procedure for disabling a configurable counter is: 572 * - change the counter to count the ETMEXTOUT[0] signal (0x20). This 573 * effectively stops the counter from counting. 574 * - disable the counter's interrupt generation (each counter has it's 575 * own interrupt enable bit). 576 * Once stopped, the counter value can be written as 0 to reset. 577 * 578 * To enable a counter: 579 * - enable the counter's interrupt generation. 580 * - set the new event type. 581 * 582 * Note: the dedicated cycle counter only counts cycles and can't be 583 * enabled/disabled independently of the others. When we want to disable the 584 * cycle counter, we have to just disable the interrupt reporting and start 585 * ignoring that counter. When re-enabling, we have to reset the value and 586 * enable the interrupt. 587 */ 588 589enum armv6_perf_types { 590 ARMV6_PERFCTR_ICACHE_MISS = 0x0, 591 ARMV6_PERFCTR_IBUF_STALL = 0x1, 592 ARMV6_PERFCTR_DDEP_STALL = 0x2, 593 ARMV6_PERFCTR_ITLB_MISS = 0x3, 594 ARMV6_PERFCTR_DTLB_MISS = 0x4, 595 ARMV6_PERFCTR_BR_EXEC = 0x5, 596 ARMV6_PERFCTR_BR_MISPREDICT = 0x6, 597 ARMV6_PERFCTR_INSTR_EXEC = 0x7, 598 ARMV6_PERFCTR_DCACHE_HIT = 0x9, 599 ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, 600 ARMV6_PERFCTR_DCACHE_MISS = 0xB, 601 ARMV6_PERFCTR_DCACHE_WBACK = 0xC, 602 ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, 603 ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, 604 ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, 605 ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, 606 ARMV6_PERFCTR_WBUF_DRAINED = 0x12, 607 ARMV6_PERFCTR_CPU_CYCLES = 0xFF, 608 ARMV6_PERFCTR_NOP = 0x20, 609}; 610 611enum armv6_counters { 612 ARMV6_CYCLE_COUNTER = 1, 613 ARMV6_COUNTER0, 614 ARMV6_COUNTER1, 615}; 616 617/* 618 * The hardware events that we support. We do support cache operations but 619 * we have harvard caches and no way to combine instruction and data 620 * accesses/misses in hardware. 621 */ 622static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { 623 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, 624 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, 625 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 626 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 627 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, 628 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, 629 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 630}; 631 632static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 633 [PERF_COUNT_HW_CACHE_OP_MAX] 634 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 635 [C(L1D)] = { 636 /* 637 * The performance counters don't differentiate between read 638 * and write accesses/misses so this isn't strictly correct, 639 * but it's the best we can do. Writes and reads get 640 * combined. 641 */ 642 [C(OP_READ)] = { 643 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 644 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 645 }, 646 [C(OP_WRITE)] = { 647 [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 648 [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 649 }, 650 [C(OP_PREFETCH)] = { 651 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 652 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 653 }, 654 }, 655 [C(L1I)] = { 656 [C(OP_READ)] = { 657 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 658 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 659 }, 660 [C(OP_WRITE)] = { 661 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 662 [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 663 }, 664 [C(OP_PREFETCH)] = { 665 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 666 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 667 }, 668 }, 669 [C(LL)] = { 670 [C(OP_READ)] = { 671 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 672 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 673 }, 674 [C(OP_WRITE)] = { 675 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 676 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 677 }, 678 [C(OP_PREFETCH)] = { 679 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 680 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 681 }, 682 }, 683 [C(DTLB)] = { 684 /* 685 * The ARM performance counters can count micro DTLB misses, 686 * micro ITLB misses and main TLB misses. There isn't an event 687 * for TLB misses, so use the micro misses here and if users 688 * want the main TLB misses they can use a raw counter. 689 */ 690 [C(OP_READ)] = { 691 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 692 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 693 }, 694 [C(OP_WRITE)] = { 695 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 696 [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 697 }, 698 [C(OP_PREFETCH)] = { 699 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 700 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 701 }, 702 }, 703 [C(ITLB)] = { 704 [C(OP_READ)] = { 705 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 706 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 707 }, 708 [C(OP_WRITE)] = { 709 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 710 [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 711 }, 712 [C(OP_PREFETCH)] = { 713 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 714 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 715 }, 716 }, 717 [C(BPU)] = { 718 [C(OP_READ)] = { 719 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 720 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 721 }, 722 [C(OP_WRITE)] = { 723 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 724 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 725 }, 726 [C(OP_PREFETCH)] = { 727 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 728 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 729 }, 730 }, 731}; 732 733enum armv6mpcore_perf_types { 734 ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, 735 ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, 736 ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, 737 ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, 738 ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, 739 ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, 740 ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, 741 ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, 742 ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, 743 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, 744 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, 745 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, 746 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, 747 ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, 748 ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, 749 ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, 750 ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, 751 ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, 752 ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, 753 ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, 754}; 755 756/* 757 * The hardware events that we support. We do support cache operations but 758 * we have harvard caches and no way to combine instruction and data 759 * accesses/misses in hardware. 760 */ 761static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { 762 [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, 763 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, 764 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 765 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 766 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, 767 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, 768 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 769}; 770 771static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 772 [PERF_COUNT_HW_CACHE_OP_MAX] 773 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 774 [C(L1D)] = { 775 [C(OP_READ)] = { 776 [C(RESULT_ACCESS)] = 777 ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, 778 [C(RESULT_MISS)] = 779 ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, 780 }, 781 [C(OP_WRITE)] = { 782 [C(RESULT_ACCESS)] = 783 ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, 784 [C(RESULT_MISS)] = 785 ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, 786 }, 787 [C(OP_PREFETCH)] = { 788 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 789 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 790 }, 791 }, 792 [C(L1I)] = { 793 [C(OP_READ)] = { 794 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 795 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 796 }, 797 [C(OP_WRITE)] = { 798 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 799 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 800 }, 801 [C(OP_PREFETCH)] = { 802 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 803 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 804 }, 805 }, 806 [C(LL)] = { 807 [C(OP_READ)] = { 808 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 809 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 810 }, 811 [C(OP_WRITE)] = { 812 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 813 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 814 }, 815 [C(OP_PREFETCH)] = { 816 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 817 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 818 }, 819 }, 820 [C(DTLB)] = { 821 /* 822 * The ARM performance counters can count micro DTLB misses, 823 * micro ITLB misses and main TLB misses. There isn't an event 824 * for TLB misses, so use the micro misses here and if users 825 * want the main TLB misses they can use a raw counter. 826 */ 827 [C(OP_READ)] = { 828 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 829 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 830 }, 831 [C(OP_WRITE)] = { 832 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 833 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 834 }, 835 [C(OP_PREFETCH)] = { 836 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 837 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 838 }, 839 }, 840 [C(ITLB)] = { 841 [C(OP_READ)] = { 842 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 843 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 844 }, 845 [C(OP_WRITE)] = { 846 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 847 [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 848 }, 849 [C(OP_PREFETCH)] = { 850 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 851 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 852 }, 853 }, 854 [C(BPU)] = { 855 [C(OP_READ)] = { 856 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 857 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 858 }, 859 [C(OP_WRITE)] = { 860 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 861 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 862 }, 863 [C(OP_PREFETCH)] = { 864 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 865 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 866 }, 867 }, 868}; 869 870static inline unsigned long 871armv6_pmcr_read(void) 872{ 873 u32 val; 874 asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); 875 return val; 876} 877 878static inline void 879armv6_pmcr_write(unsigned long val) 880{ 881 asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); 882} 883 884#define ARMV6_PMCR_ENABLE (1 << 0) 885#define ARMV6_PMCR_CTR01_RESET (1 << 1) 886#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) 887#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) 888#define ARMV6_PMCR_COUNT0_IEN (1 << 4) 889#define ARMV6_PMCR_COUNT1_IEN (1 << 5) 890#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) 891#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) 892#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) 893#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) 894#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 895#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) 896#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 897#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) 898 899#define ARMV6_PMCR_OVERFLOWED_MASK \ 900 (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ 901 ARMV6_PMCR_CCOUNT_OVERFLOW) 902 903static inline int 904armv6_pmcr_has_overflowed(unsigned long pmcr) 905{ 906 return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); 907} 908 909static inline int 910armv6_pmcr_counter_has_overflowed(unsigned long pmcr, 911 enum armv6_counters counter) 912{ 913 int ret = 0; 914 915 if (ARMV6_CYCLE_COUNTER == counter) 916 ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; 917 else if (ARMV6_COUNTER0 == counter) 918 ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; 919 else if (ARMV6_COUNTER1 == counter) 920 ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; 921 else 922 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 923 924 return ret; 925} 926 927static inline u32 928armv6pmu_read_counter(int counter) 929{ 930 unsigned long value = 0; 931 932 if (ARMV6_CYCLE_COUNTER == counter) 933 asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); 934 else if (ARMV6_COUNTER0 == counter) 935 asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); 936 else if (ARMV6_COUNTER1 == counter) 937 asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); 938 else 939 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 940 941 return value; 942} 943 944static inline void 945armv6pmu_write_counter(int counter, 946 u32 value) 947{ 948 if (ARMV6_CYCLE_COUNTER == counter) 949 asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); 950 else if (ARMV6_COUNTER0 == counter) 951 asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); 952 else if (ARMV6_COUNTER1 == counter) 953 asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); 954 else 955 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 956} 957 958void 959armv6pmu_enable_event(struct hw_perf_event *hwc, 960 int idx) 961{ 962 unsigned long val, mask, evt, flags; 963 964 if (ARMV6_CYCLE_COUNTER == idx) { 965 mask = 0; 966 evt = ARMV6_PMCR_CCOUNT_IEN; 967 } else if (ARMV6_COUNTER0 == idx) { 968 mask = ARMV6_PMCR_EVT_COUNT0_MASK; 969 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | 970 ARMV6_PMCR_COUNT0_IEN; 971 } else if (ARMV6_COUNTER1 == idx) { 972 mask = ARMV6_PMCR_EVT_COUNT1_MASK; 973 evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | 974 ARMV6_PMCR_COUNT1_IEN; 975 } else { 976 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 977 return; 978 } 979 980 /* 981 * Mask out the current event and set the counter to count the event 982 * that we're interested in. 983 */ 984 spin_lock_irqsave(&pmu_lock, flags); 985 val = armv6_pmcr_read(); 986 val &= ~mask; 987 val |= evt; 988 armv6_pmcr_write(val); 989 spin_unlock_irqrestore(&pmu_lock, flags); 990} 991 992static irqreturn_t 993armv6pmu_handle_irq(int irq_num, 994 void *dev) 995{ 996 unsigned long pmcr = armv6_pmcr_read(); 997 struct perf_sample_data data; 998 struct cpu_hw_events *cpuc; 999 struct pt_regs *regs; 1000 int idx; 1001 1002 if (!armv6_pmcr_has_overflowed(pmcr)) 1003 return IRQ_NONE; 1004 1005 regs = get_irq_regs(); 1006 1007 /* 1008 * The interrupts are cleared by writing the overflow flags back to 1009 * the control register. All of the other bits don't have any effect 1010 * if they are rewritten, so write the whole value back. 1011 */ 1012 armv6_pmcr_write(pmcr); 1013 1014 perf_sample_data_init(&data, 0); 1015 1016 cpuc = &__get_cpu_var(cpu_hw_events); 1017 for (idx = 0; idx <= armpmu->num_events; ++idx) { 1018 struct perf_event *event = cpuc->events[idx]; 1019 struct hw_perf_event *hwc; 1020 1021 if (!test_bit(idx, cpuc->active_mask)) 1022 continue; 1023 1024 /* 1025 * We have a single interrupt for all counters. Check that 1026 * each counter has overflowed before we process it. 1027 */ 1028 if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) 1029 continue; 1030 1031 hwc = &event->hw; 1032 armpmu_event_update(event, hwc, idx); 1033 data.period = event->hw.last_period; 1034 if (!armpmu_event_set_period(event, hwc, idx)) 1035 continue; 1036 1037 if (perf_event_overflow(event, 0, &data, regs)) 1038 armpmu->disable(hwc, idx); 1039 } 1040 1041 /* 1042 * Handle the pending perf events. 1043 * 1044 * Note: this call *must* be run with interrupts disabled. For 1045 * platforms that can have the PMU interrupts raised as an NMI, this 1046 * will not work. 1047 */ 1048 perf_event_do_pending(); 1049 1050 return IRQ_HANDLED; 1051} 1052 1053static void 1054armv6pmu_start(void) 1055{ 1056 unsigned long flags, val; 1057 1058 spin_lock_irqsave(&pmu_lock, flags); 1059 val = armv6_pmcr_read(); 1060 val |= ARMV6_PMCR_ENABLE; 1061 armv6_pmcr_write(val); 1062 spin_unlock_irqrestore(&pmu_lock, flags); 1063} 1064 1065void 1066armv6pmu_stop(void) 1067{ 1068 unsigned long flags, val; 1069 1070 spin_lock_irqsave(&pmu_lock, flags); 1071 val = armv6_pmcr_read(); 1072 val &= ~ARMV6_PMCR_ENABLE; 1073 armv6_pmcr_write(val); 1074 spin_unlock_irqrestore(&pmu_lock, flags); 1075} 1076 1077static inline int 1078armv6pmu_event_map(int config) 1079{ 1080 int mapping = armv6_perf_map[config]; 1081 if (HW_OP_UNSUPPORTED == mapping) 1082 mapping = -EOPNOTSUPP; 1083 return mapping; 1084} 1085 1086static inline int 1087armv6mpcore_pmu_event_map(int config) 1088{ 1089 int mapping = armv6mpcore_perf_map[config]; 1090 if (HW_OP_UNSUPPORTED == mapping) 1091 mapping = -EOPNOTSUPP; 1092 return mapping; 1093} 1094 1095static u64 1096armv6pmu_raw_event(u64 config) 1097{ 1098 return config & 0xff; 1099} 1100 1101static int 1102armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, 1103 struct hw_perf_event *event) 1104{ 1105 /* Always place a cycle counter into the cycle counter. */ 1106 if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { 1107 if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) 1108 return -EAGAIN; 1109 1110 return ARMV6_CYCLE_COUNTER; 1111 } else { 1112 /* 1113 * For anything other than a cycle counter, try and use 1114 * counter0 and counter1. 1115 */ 1116 if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { 1117 return ARMV6_COUNTER1; 1118 } 1119 1120 if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { 1121 return ARMV6_COUNTER0; 1122 } 1123 1124 /* The counters are all in use. */ 1125 return -EAGAIN; 1126 } 1127} 1128 1129static void 1130armv6pmu_disable_event(struct hw_perf_event *hwc, 1131 int idx) 1132{ 1133 unsigned long val, mask, evt, flags; 1134 1135 if (ARMV6_CYCLE_COUNTER == idx) { 1136 mask = ARMV6_PMCR_CCOUNT_IEN; 1137 evt = 0; 1138 } else if (ARMV6_COUNTER0 == idx) { 1139 mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; 1140 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; 1141 } else if (ARMV6_COUNTER1 == idx) { 1142 mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; 1143 evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; 1144 } else { 1145 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 1146 return; 1147 } 1148 1149 /* 1150 * Mask out the current event and set the counter to count the number 1151 * of ETM bus signal assertion cycles. The external reporting should 1152 * be disabled and so this should never increment. 1153 */ 1154 spin_lock_irqsave(&pmu_lock, flags); 1155 val = armv6_pmcr_read(); 1156 val &= ~mask; 1157 val |= evt; 1158 armv6_pmcr_write(val); 1159 spin_unlock_irqrestore(&pmu_lock, flags); 1160} 1161 1162static void 1163armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, 1164 int idx) 1165{ 1166 unsigned long val, mask, flags, evt = 0; 1167 1168 if (ARMV6_CYCLE_COUNTER == idx) { 1169 mask = ARMV6_PMCR_CCOUNT_IEN; 1170 } else if (ARMV6_COUNTER0 == idx) { 1171 mask = ARMV6_PMCR_COUNT0_IEN; 1172 } else if (ARMV6_COUNTER1 == idx) { 1173 mask = ARMV6_PMCR_COUNT1_IEN; 1174 } else { 1175 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 1176 return; 1177 } 1178 1179 /* 1180 * Unlike UP ARMv6, we don't have a way of stopping the counters. We 1181 * simply disable the interrupt reporting. 1182 */ 1183 spin_lock_irqsave(&pmu_lock, flags); 1184 val = armv6_pmcr_read(); 1185 val &= ~mask; 1186 val |= evt; 1187 armv6_pmcr_write(val); 1188 spin_unlock_irqrestore(&pmu_lock, flags); 1189} 1190 1191static const struct arm_pmu armv6pmu = { 1192 .id = ARM_PERF_PMU_ID_V6, 1193 .handle_irq = armv6pmu_handle_irq, 1194 .enable = armv6pmu_enable_event, 1195 .disable = armv6pmu_disable_event, 1196 .event_map = armv6pmu_event_map, 1197 .raw_event = armv6pmu_raw_event, 1198 .read_counter = armv6pmu_read_counter, 1199 .write_counter = armv6pmu_write_counter, 1200 .get_event_idx = armv6pmu_get_event_idx, 1201 .start = armv6pmu_start, 1202 .stop = armv6pmu_stop, 1203 .num_events = 3, 1204 .max_period = (1LLU << 32) - 1, 1205}; 1206 1207/* 1208 * ARMv6mpcore is almost identical to single core ARMv6 with the exception 1209 * that some of the events have different enumerations and that there is no 1210 * *hack* to stop the programmable counters. To stop the counters we simply 1211 * disable the interrupt reporting and update the event. When unthrottling we 1212 * reset the period and enable the interrupt reporting. 1213 */ 1214static const struct arm_pmu armv6mpcore_pmu = { 1215 .id = ARM_PERF_PMU_ID_V6MP, 1216 .handle_irq = armv6pmu_handle_irq, 1217 .enable = armv6pmu_enable_event, 1218 .disable = armv6mpcore_pmu_disable_event, 1219 .event_map = armv6mpcore_pmu_event_map, 1220 .raw_event = armv6pmu_raw_event, 1221 .read_counter = armv6pmu_read_counter, 1222 .write_counter = armv6pmu_write_counter, 1223 .get_event_idx = armv6pmu_get_event_idx, 1224 .start = armv6pmu_start, 1225 .stop = armv6pmu_stop, 1226 .num_events = 3, 1227 .max_period = (1LLU << 32) - 1, 1228}; 1229 1230/* 1231 * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. 1232 * 1233 * Copied from ARMv6 code, with the low level code inspired 1234 * by the ARMv7 Oprofile code. 1235 * 1236 * Cortex-A8 has up to 4 configurable performance counters and 1237 * a single cycle counter. 1238 * Cortex-A9 has up to 31 configurable performance counters and 1239 * a single cycle counter. 1240 * 1241 * All counters can be enabled/disabled and IRQ masked separately. The cycle 1242 * counter and all 4 performance counters together can be reset separately. 1243 */ 1244 1245/* Common ARMv7 event types */ 1246enum armv7_perf_types { 1247 ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, 1248 ARMV7_PERFCTR_IFETCH_MISS = 0x01, 1249 ARMV7_PERFCTR_ITLB_MISS = 0x02, 1250 ARMV7_PERFCTR_DCACHE_REFILL = 0x03, 1251 ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, 1252 ARMV7_PERFCTR_DTLB_REFILL = 0x05, 1253 ARMV7_PERFCTR_DREAD = 0x06, 1254 ARMV7_PERFCTR_DWRITE = 0x07, 1255 1256 ARMV7_PERFCTR_EXC_TAKEN = 0x09, 1257 ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, 1258 ARMV7_PERFCTR_CID_WRITE = 0x0B, 1259 /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. 1260 * It counts: 1261 * - all branch instructions, 1262 * - instructions that explicitly write the PC, 1263 * - exception generating instructions. 1264 */ 1265 ARMV7_PERFCTR_PC_WRITE = 0x0C, 1266 ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, 1267 ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, 1268 ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, 1269 ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, 1270 1271 ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, 1272 1273 ARMV7_PERFCTR_CPU_CYCLES = 0xFF 1274}; 1275 1276/* ARMv7 Cortex-A8 specific event types */ 1277enum armv7_a8_perf_types { 1278 ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, 1279 1280 ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, 1281 1282 ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, 1283 ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, 1284 ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, 1285 ARMV7_PERFCTR_L2_ACCESS = 0x43, 1286 ARMV7_PERFCTR_L2_CACH_MISS = 0x44, 1287 ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, 1288 ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, 1289 ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, 1290 ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, 1291 ARMV7_PERFCTR_L1_DATA_MISS = 0x49, 1292 ARMV7_PERFCTR_L1_INST_MISS = 0x4A, 1293 ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, 1294 ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, 1295 ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, 1296 ARMV7_PERFCTR_L2_NEON = 0x4E, 1297 ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, 1298 ARMV7_PERFCTR_L1_INST = 0x50, 1299 ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, 1300 ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, 1301 ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, 1302 ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, 1303 ARMV7_PERFCTR_OP_EXECUTED = 0x55, 1304 ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, 1305 ARMV7_PERFCTR_CYCLES_INST = 0x57, 1306 ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, 1307 ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, 1308 ARMV7_PERFCTR_NEON_CYCLES = 0x5A, 1309 1310 ARMV7_PERFCTR_PMU0_EVENTS = 0x70, 1311 ARMV7_PERFCTR_PMU1_EVENTS = 0x71, 1312 ARMV7_PERFCTR_PMU_EVENTS = 0x72, 1313}; 1314 1315/* ARMv7 Cortex-A9 specific event types */ 1316enum armv7_a9_perf_types { 1317 ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, 1318 ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, 1319 ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, 1320 1321 ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, 1322 ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, 1323 1324 ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, 1325 ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, 1326 ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, 1327 ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, 1328 ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, 1329 ARMV7_PERFCTR_DATA_EVICTION = 0x65, 1330 ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, 1331 ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, 1332 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, 1333 1334 ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, 1335 1336 ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, 1337 ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, 1338 ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, 1339 ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, 1340 ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, 1341 1342 ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, 1343 ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, 1344 ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, 1345 ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, 1346 ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, 1347 ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, 1348 ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, 1349 1350 ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, 1351 ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, 1352 1353 ARMV7_PERFCTR_ISB_INST = 0x90, 1354 ARMV7_PERFCTR_DSB_INST = 0x91, 1355 ARMV7_PERFCTR_DMB_INST = 0x92, 1356 ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, 1357 1358 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, 1359 ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, 1360 ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, 1361 ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, 1362 ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, 1363 ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 1364}; 1365 1366/* 1367 * Cortex-A8 HW events mapping 1368 * 1369 * The hardware events that we support. We do support cache operations but 1370 * we have harvard caches and no way to combine instruction and data 1371 * accesses/misses in hardware. 1372 */ 1373static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { 1374 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 1375 [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, 1376 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 1377 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 1378 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 1379 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1380 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 1381}; 1382 1383static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 1384 [PERF_COUNT_HW_CACHE_OP_MAX] 1385 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1386 [C(L1D)] = { 1387 /* 1388 * The performance counters don't differentiate between read 1389 * and write accesses/misses so this isn't strictly correct, 1390 * but it's the best we can do. Writes and reads get 1391 * combined. 1392 */ 1393 [C(OP_READ)] = { 1394 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1395 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1396 }, 1397 [C(OP_WRITE)] = { 1398 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1399 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1400 }, 1401 [C(OP_PREFETCH)] = { 1402 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1403 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1404 }, 1405 }, 1406 [C(L1I)] = { 1407 [C(OP_READ)] = { 1408 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 1409 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 1410 }, 1411 [C(OP_WRITE)] = { 1412 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 1413 [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 1414 }, 1415 [C(OP_PREFETCH)] = { 1416 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1417 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1418 }, 1419 }, 1420 [C(LL)] = { 1421 [C(OP_READ)] = { 1422 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 1423 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 1424 }, 1425 [C(OP_WRITE)] = { 1426 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 1427 [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 1428 }, 1429 [C(OP_PREFETCH)] = { 1430 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1431 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1432 }, 1433 }, 1434 [C(DTLB)] = { 1435 /* 1436 * Only ITLB misses and DTLB refills are supported. 1437 * If users want the DTLB refills misses a raw counter 1438 * must be used. 1439 */ 1440 [C(OP_READ)] = { 1441 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1442 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1443 }, 1444 [C(OP_WRITE)] = { 1445 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1446 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1447 }, 1448 [C(OP_PREFETCH)] = { 1449 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1450 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1451 }, 1452 }, 1453 [C(ITLB)] = { 1454 [C(OP_READ)] = { 1455 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1456 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1457 }, 1458 [C(OP_WRITE)] = { 1459 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1460 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1461 }, 1462 [C(OP_PREFETCH)] = { 1463 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1464 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1465 }, 1466 }, 1467 [C(BPU)] = { 1468 [C(OP_READ)] = { 1469 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1470 [C(RESULT_MISS)] 1471 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1472 }, 1473 [C(OP_WRITE)] = { 1474 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1475 [C(RESULT_MISS)] 1476 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1477 }, 1478 [C(OP_PREFETCH)] = { 1479 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1480 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1481 }, 1482 }, 1483}; 1484 1485/* 1486 * Cortex-A9 HW events mapping 1487 */ 1488static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { 1489 [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 1490 [PERF_COUNT_HW_INSTRUCTIONS] = 1491 ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, 1492 [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, 1493 [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, 1494 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 1495 [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1496 [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 1497}; 1498 1499static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 1500 [PERF_COUNT_HW_CACHE_OP_MAX] 1501 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1502 [C(L1D)] = { 1503 /* 1504 * The performance counters don't differentiate between read 1505 * and write accesses/misses so this isn't strictly correct, 1506 * but it's the best we can do. Writes and reads get 1507 * combined. 1508 */ 1509 [C(OP_READ)] = { 1510 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1511 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1512 }, 1513 [C(OP_WRITE)] = { 1514 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1515 [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1516 }, 1517 [C(OP_PREFETCH)] = { 1518 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1519 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1520 }, 1521 }, 1522 [C(L1I)] = { 1523 [C(OP_READ)] = { 1524 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1525 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 1526 }, 1527 [C(OP_WRITE)] = { 1528 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1529 [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 1530 }, 1531 [C(OP_PREFETCH)] = { 1532 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1533 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1534 }, 1535 }, 1536 [C(LL)] = { 1537 [C(OP_READ)] = { 1538 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1539 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1540 }, 1541 [C(OP_WRITE)] = { 1542 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1543 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1544 }, 1545 [C(OP_PREFETCH)] = { 1546 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1547 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1548 }, 1549 }, 1550 [C(DTLB)] = { 1551 /* 1552 * Only ITLB misses and DTLB refills are supported. 1553 * If users want the DTLB refills misses a raw counter 1554 * must be used. 1555 */ 1556 [C(OP_READ)] = { 1557 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1558 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1559 }, 1560 [C(OP_WRITE)] = { 1561 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1562 [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1563 }, 1564 [C(OP_PREFETCH)] = { 1565 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1566 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1567 }, 1568 }, 1569 [C(ITLB)] = { 1570 [C(OP_READ)] = { 1571 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1572 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1573 }, 1574 [C(OP_WRITE)] = { 1575 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1576 [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1577 }, 1578 [C(OP_PREFETCH)] = { 1579 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1580 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1581 }, 1582 }, 1583 [C(BPU)] = { 1584 [C(OP_READ)] = { 1585 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1586 [C(RESULT_MISS)] 1587 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1588 }, 1589 [C(OP_WRITE)] = { 1590 [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1591 [C(RESULT_MISS)] 1592 = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1593 }, 1594 [C(OP_PREFETCH)] = { 1595 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1596 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1597 }, 1598 }, 1599}; 1600 1601/* 1602 * Perf Events counters 1603 */ 1604enum armv7_counters { 1605 ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ 1606 ARMV7_COUNTER0 = 2, /* First event counter */ 1607}; 1608 1609/* 1610 * The cycle counter is ARMV7_CYCLE_COUNTER. 1611 * The first event counter is ARMV7_COUNTER0. 1612 * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). 1613 */ 1614#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) 1615 1616/* 1617 * ARMv7 low level PMNC access 1618 */ 1619 1620/* 1621 * Per-CPU PMNC: config reg 1622 */ 1623#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ 1624#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ 1625#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ 1626#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ 1627#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ 1628#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ 1629#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ 1630#define ARMV7_PMNC_N_MASK 0x1f 1631#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ 1632 1633/* 1634 * Available counters 1635 */ 1636#define ARMV7_CNT0 0 /* First event counter */ 1637#define ARMV7_CCNT 31 /* Cycle counter */ 1638 1639/* Perf Event to low level counters mapping */ 1640#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) 1641 1642/* 1643 * CNTENS: counters enable reg 1644 */ 1645#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1646#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) 1647 1648/* 1649 * CNTENC: counters disable reg 1650 */ 1651#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1652#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) 1653 1654/* 1655 * INTENS: counters overflow interrupt enable reg 1656 */ 1657#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1658#define ARMV7_INTENS_C (1 << ARMV7_CCNT) 1659 1660/* 1661 * INTENC: counters overflow interrupt disable reg 1662 */ 1663#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1664#define ARMV7_INTENC_C (1 << ARMV7_CCNT) 1665 1666/* 1667 * EVTSEL: Event selection reg 1668 */ 1669#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ 1670 1671/* 1672 * SELECT: Counter selection reg 1673 */ 1674#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ 1675 1676/* 1677 * FLAG: counters overflow flag status reg 1678 */ 1679#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1680#define ARMV7_FLAG_C (1 << ARMV7_CCNT) 1681#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ 1682#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK 1683 1684static inline unsigned long armv7_pmnc_read(void) 1685{ 1686 u32 val; 1687 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); 1688 return val; 1689} 1690 1691static inline void armv7_pmnc_write(unsigned long val) 1692{ 1693 val &= ARMV7_PMNC_MASK; 1694 asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); 1695} 1696 1697static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) 1698{ 1699 return pmnc & ARMV7_OVERFLOWED_MASK; 1700} 1701 1702static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, 1703 enum armv7_counters counter) 1704{ 1705 int ret; 1706 1707 if (counter == ARMV7_CYCLE_COUNTER) 1708 ret = pmnc & ARMV7_FLAG_C; 1709 else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) 1710 ret = pmnc & ARMV7_FLAG_P(counter); 1711 else 1712 pr_err("CPU%u checking wrong counter %d overflow status\n", 1713 smp_processor_id(), counter); 1714 1715 return ret; 1716} 1717 1718static inline int armv7_pmnc_select_counter(unsigned int idx) 1719{ 1720 u32 val; 1721 1722 if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { 1723 pr_err("CPU%u selecting wrong PMNC counter" 1724 " %d\n", smp_processor_id(), idx); 1725 return -1; 1726 } 1727 1728 val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; 1729 asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); 1730 1731 return idx; 1732} 1733 1734static inline u32 armv7pmu_read_counter(int idx) 1735{ 1736 unsigned long value = 0; 1737 1738 if (idx == ARMV7_CYCLE_COUNTER) 1739 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); 1740 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 1741 if (armv7_pmnc_select_counter(idx) == idx) 1742 asm volatile("mrc p15, 0, %0, c9, c13, 2" 1743 : "=r" (value)); 1744 } else 1745 pr_err("CPU%u reading wrong counter %d\n", 1746 smp_processor_id(), idx); 1747 1748 return value; 1749} 1750 1751static inline void armv7pmu_write_counter(int idx, u32 value) 1752{ 1753 if (idx == ARMV7_CYCLE_COUNTER) 1754 asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); 1755 else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 1756 if (armv7_pmnc_select_counter(idx) == idx) 1757 asm volatile("mcr p15, 0, %0, c9, c13, 2" 1758 : : "r" (value)); 1759 } else 1760 pr_err("CPU%u writing wrong counter %d\n", 1761 smp_processor_id(), idx); 1762} 1763 1764static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) 1765{ 1766 if (armv7_pmnc_select_counter(idx) == idx) { 1767 val &= ARMV7_EVTSEL_MASK; 1768 asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); 1769 } 1770} 1771 1772static inline u32 armv7_pmnc_enable_counter(unsigned int idx) 1773{ 1774 u32 val; 1775 1776 if ((idx != ARMV7_CYCLE_COUNTER) && 1777 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1778 pr_err("CPU%u enabling wrong PMNC counter" 1779 " %d\n", smp_processor_id(), idx); 1780 return -1; 1781 } 1782 1783 if (idx == ARMV7_CYCLE_COUNTER) 1784 val = ARMV7_CNTENS_C; 1785 else 1786 val = ARMV7_CNTENS_P(idx); 1787 1788 asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); 1789 1790 return idx; 1791} 1792 1793static inline u32 armv7_pmnc_disable_counter(unsigned int idx) 1794{ 1795 u32 val; 1796 1797 1798 if ((idx != ARMV7_CYCLE_COUNTER) && 1799 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1800 pr_err("CPU%u disabling wrong PMNC counter" 1801 " %d\n", smp_processor_id(), idx); 1802 return -1; 1803 } 1804 1805 if (idx == ARMV7_CYCLE_COUNTER) 1806 val = ARMV7_CNTENC_C; 1807 else 1808 val = ARMV7_CNTENC_P(idx); 1809 1810 asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); 1811 1812 return idx; 1813} 1814 1815static inline u32 armv7_pmnc_enable_intens(unsigned int idx) 1816{ 1817 u32 val; 1818 1819 if ((idx != ARMV7_CYCLE_COUNTER) && 1820 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1821 pr_err("CPU%u enabling wrong PMNC counter" 1822 " interrupt enable %d\n", smp_processor_id(), idx); 1823 return -1; 1824 } 1825 1826 if (idx == ARMV7_CYCLE_COUNTER) 1827 val = ARMV7_INTENS_C; 1828 else 1829 val = ARMV7_INTENS_P(idx); 1830 1831 asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); 1832 1833 return idx; 1834} 1835 1836static inline u32 armv7_pmnc_disable_intens(unsigned int idx) 1837{ 1838 u32 val; 1839 1840 if ((idx != ARMV7_CYCLE_COUNTER) && 1841 ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1842 pr_err("CPU%u disabling wrong PMNC counter" 1843 " interrupt enable %d\n", smp_processor_id(), idx); 1844 return -1; 1845 } 1846 1847 if (idx == ARMV7_CYCLE_COUNTER) 1848 val = ARMV7_INTENC_C; 1849 else 1850 val = ARMV7_INTENC_P(idx); 1851 1852 asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); 1853 1854 return idx; 1855} 1856 1857static inline u32 armv7_pmnc_getreset_flags(void) 1858{ 1859 u32 val; 1860 1861 /* Read */ 1862 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 1863 1864 /* Write to clear flags */ 1865 val &= ARMV7_FLAG_MASK; 1866 asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); 1867 1868 return val; 1869} 1870 1871#ifdef DEBUG 1872static void armv7_pmnc_dump_regs(void) 1873{ 1874 u32 val; 1875 unsigned int cnt; 1876 1877 printk(KERN_INFO "PMNC registers dump:\n"); 1878 1879 asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); 1880 printk(KERN_INFO "PMNC =0x%08x\n", val); 1881 1882 asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); 1883 printk(KERN_INFO "CNTENS=0x%08x\n", val); 1884 1885 asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); 1886 printk(KERN_INFO "INTENS=0x%08x\n", val); 1887 1888 asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 1889 printk(KERN_INFO "FLAGS =0x%08x\n", val); 1890 1891 asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); 1892 printk(KERN_INFO "SELECT=0x%08x\n", val); 1893 1894 asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); 1895 printk(KERN_INFO "CCNT =0x%08x\n", val); 1896 1897 for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { 1898 armv7_pmnc_select_counter(cnt); 1899 asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); 1900 printk(KERN_INFO "CNT[%d] count =0x%08x\n", 1901 cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 1902 asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); 1903 printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", 1904 cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 1905 } 1906} 1907#endif 1908 1909void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) 1910{ 1911 unsigned long flags; 1912 1913 /* 1914 * Enable counter and interrupt, and set the counter to count 1915 * the event that we're interested in. 1916 */ 1917 spin_lock_irqsave(&pmu_lock, flags); 1918 1919 /* 1920 * Disable counter 1921 */ 1922 armv7_pmnc_disable_counter(idx); 1923 1924 /* 1925 * Set event (if destined for PMNx counters) 1926 * We don't need to set the event if it's a cycle count 1927 */ 1928 if (idx != ARMV7_CYCLE_COUNTER) 1929 armv7_pmnc_write_evtsel(idx, hwc->config_base); 1930 1931 /* 1932 * Enable interrupt for this counter 1933 */ 1934 armv7_pmnc_enable_intens(idx); 1935 1936 /* 1937 * Enable counter 1938 */ 1939 armv7_pmnc_enable_counter(idx); 1940 1941 spin_unlock_irqrestore(&pmu_lock, flags); 1942} 1943 1944static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) 1945{ 1946 unsigned long flags; 1947 1948 /* 1949 * Disable counter and interrupt 1950 */ 1951 spin_lock_irqsave(&pmu_lock, flags); 1952 1953 /* 1954 * Disable counter 1955 */ 1956 armv7_pmnc_disable_counter(idx); 1957 1958 /* 1959 * Disable interrupt for this counter 1960 */ 1961 armv7_pmnc_disable_intens(idx); 1962 1963 spin_unlock_irqrestore(&pmu_lock, flags); 1964} 1965 1966static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) 1967{ 1968 unsigned long pmnc; 1969 struct perf_sample_data data; 1970 struct cpu_hw_events *cpuc; 1971 struct pt_regs *regs; 1972 int idx; 1973 1974 /* 1975 * Get and reset the IRQ flags 1976 */ 1977 pmnc = armv7_pmnc_getreset_flags(); 1978 1979 /* 1980 * Did an overflow occur? 1981 */ 1982 if (!armv7_pmnc_has_overflowed(pmnc)) 1983 return IRQ_NONE; 1984 1985 /* 1986 * Handle the counter(s) overflow(s) 1987 */ 1988 regs = get_irq_regs(); 1989 1990 perf_sample_data_init(&data, 0); 1991 1992 cpuc = &__get_cpu_var(cpu_hw_events); 1993 for (idx = 0; idx <= armpmu->num_events; ++idx) { 1994 struct perf_event *event = cpuc->events[idx]; 1995 struct hw_perf_event *hwc; 1996 1997 if (!test_bit(idx, cpuc->active_mask)) 1998 continue; 1999 2000 /* 2001 * We have a single interrupt for all counters. Check that 2002 * each counter has overflowed before we process it. 2003 */ 2004 if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) 2005 continue; 2006 2007 hwc = &event->hw; 2008 armpmu_event_update(event, hwc, idx); 2009 data.period = event->hw.last_period; 2010 if (!armpmu_event_set_period(event, hwc, idx)) 2011 continue; 2012 2013 if (perf_event_overflow(event, 0, &data, regs)) 2014 armpmu->disable(hwc, idx); 2015 } 2016 2017 /* 2018 * Handle the pending perf events. 2019 * 2020 * Note: this call *must* be run with interrupts disabled. For 2021 * platforms that can have the PMU interrupts raised as an NMI, this 2022 * will not work. 2023 */ 2024 perf_event_do_pending(); 2025 2026 return IRQ_HANDLED; 2027} 2028 2029static void armv7pmu_start(void) 2030{ 2031 unsigned long flags; 2032 2033 spin_lock_irqsave(&pmu_lock, flags); 2034 /* Enable all counters */ 2035 armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); 2036 spin_unlock_irqrestore(&pmu_lock, flags); 2037} 2038 2039static void armv7pmu_stop(void) 2040{ 2041 unsigned long flags; 2042 2043 spin_lock_irqsave(&pmu_lock, flags); 2044 /* Disable all counters */ 2045 armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); 2046 spin_unlock_irqrestore(&pmu_lock, flags); 2047} 2048 2049static inline int armv7_a8_pmu_event_map(int config) 2050{ 2051 int mapping = armv7_a8_perf_map[config]; 2052 if (HW_OP_UNSUPPORTED == mapping) 2053 mapping = -EOPNOTSUPP; 2054 return mapping; 2055} 2056 2057static inline int armv7_a9_pmu_event_map(int config) 2058{ 2059 int mapping = armv7_a9_perf_map[config]; 2060 if (HW_OP_UNSUPPORTED == mapping) 2061 mapping = -EOPNOTSUPP; 2062 return mapping; 2063} 2064 2065static u64 armv7pmu_raw_event(u64 config) 2066{ 2067 return config & 0xff; 2068} 2069 2070static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, 2071 struct hw_perf_event *event) 2072{ 2073 int idx; 2074 2075 /* Always place a cycle counter into the cycle counter. */ 2076 if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { 2077 if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) 2078 return -EAGAIN; 2079 2080 return ARMV7_CYCLE_COUNTER; 2081 } else { 2082 /* 2083 * For anything other than a cycle counter, try and use 2084 * the events counters 2085 */ 2086 for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { 2087 if (!test_and_set_bit(idx, cpuc->used_mask)) 2088 return idx; 2089 } 2090 2091 /* The counters are all in use. */ 2092 return -EAGAIN; 2093 } 2094} 2095 2096static struct arm_pmu armv7pmu = { 2097 .handle_irq = armv7pmu_handle_irq, 2098 .enable = armv7pmu_enable_event, 2099 .disable = armv7pmu_disable_event, 2100 .raw_event = armv7pmu_raw_event, 2101 .read_counter = armv7pmu_read_counter, 2102 .write_counter = armv7pmu_write_counter, 2103 .get_event_idx = armv7pmu_get_event_idx, 2104 .start = armv7pmu_start, 2105 .stop = armv7pmu_stop, 2106 .max_period = (1LLU << 32) - 1, 2107}; 2108 2109static u32 __init armv7_reset_read_pmnc(void) 2110{ 2111 u32 nb_cnt; 2112 2113 /* Initialize & Reset PMNC: C and P bits */ 2114 armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); 2115 2116 /* Read the nb of CNTx counters supported from PMNC */ 2117 nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; 2118 2119 /* Add the CPU cycles counter and return */ 2120 return nb_cnt + 1; 2121} 2122 2123/* 2124 * ARMv5 [xscale] Performance counter handling code. 2125 * 2126 * Based on xscale OProfile code. 2127 * 2128 * There are two variants of the xscale PMU that we support: 2129 * - xscale1pmu: 2 event counters and a cycle counter 2130 * - xscale2pmu: 4 event counters and a cycle counter 2131 * The two variants share event definitions, but have different 2132 * PMU structures. 2133 */ 2134 2135enum xscale_perf_types { 2136 XSCALE_PERFCTR_ICACHE_MISS = 0x00, 2137 XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, 2138 XSCALE_PERFCTR_DATA_STALL = 0x02, 2139 XSCALE_PERFCTR_ITLB_MISS = 0x03, 2140 XSCALE_PERFCTR_DTLB_MISS = 0x04, 2141 XSCALE_PERFCTR_BRANCH = 0x05, 2142 XSCALE_PERFCTR_BRANCH_MISS = 0x06, 2143 XSCALE_PERFCTR_INSTRUCTION = 0x07, 2144 XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, 2145 XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, 2146 XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, 2147 XSCALE_PERFCTR_DCACHE_MISS = 0x0B, 2148 XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, 2149 XSCALE_PERFCTR_PC_CHANGED = 0x0D, 2150 XSCALE_PERFCTR_BCU_REQUEST = 0x10, 2151 XSCALE_PERFCTR_BCU_FULL = 0x11, 2152 XSCALE_PERFCTR_BCU_DRAIN = 0x12, 2153 XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, 2154 XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, 2155 XSCALE_PERFCTR_RMW = 0x16, 2156 /* XSCALE_PERFCTR_CCNT is not hardware defined */ 2157 XSCALE_PERFCTR_CCNT = 0xFE, 2158 XSCALE_PERFCTR_UNUSED = 0xFF, 2159}; 2160 2161enum xscale_counters { 2162 XSCALE_CYCLE_COUNTER = 1, 2163 XSCALE_COUNTER0, 2164 XSCALE_COUNTER1, 2165 XSCALE_COUNTER2, 2166 XSCALE_COUNTER3, 2167}; 2168 2169static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { 2170 [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, 2171 [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, 2172 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 2173 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 2174 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, 2175 [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, 2176 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 2177}; 2178 2179static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 2180 [PERF_COUNT_HW_CACHE_OP_MAX] 2181 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2182 [C(L1D)] = { 2183 [C(OP_READ)] = { 2184 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 2185 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 2186 }, 2187 [C(OP_WRITE)] = { 2188 [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 2189 [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 2190 }, 2191 [C(OP_PREFETCH)] = { 2192 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2193 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2194 }, 2195 }, 2196 [C(L1I)] = { 2197 [C(OP_READ)] = { 2198 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2199 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 2200 }, 2201 [C(OP_WRITE)] = { 2202 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2203 [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 2204 }, 2205 [C(OP_PREFETCH)] = { 2206 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2207 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2208 }, 2209 }, 2210 [C(LL)] = { 2211 [C(OP_READ)] = { 2212 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2213 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2214 }, 2215 [C(OP_WRITE)] = { 2216 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2217 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2218 }, 2219 [C(OP_PREFETCH)] = { 2220 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2221 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2222 }, 2223 }, 2224 [C(DTLB)] = { 2225 [C(OP_READ)] = { 2226 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2227 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 2228 }, 2229 [C(OP_WRITE)] = { 2230 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2231 [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 2232 }, 2233 [C(OP_PREFETCH)] = { 2234 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2235 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2236 }, 2237 }, 2238 [C(ITLB)] = { 2239 [C(OP_READ)] = { 2240 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2241 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 2242 }, 2243 [C(OP_WRITE)] = { 2244 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2245 [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 2246 }, 2247 [C(OP_PREFETCH)] = { 2248 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2249 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2250 }, 2251 }, 2252 [C(BPU)] = { 2253 [C(OP_READ)] = { 2254 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2256 }, 2257 [C(OP_WRITE)] = { 2258 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2259 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2260 }, 2261 [C(OP_PREFETCH)] = { 2262 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2263 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2264 }, 2265 }, 2266}; 2267 2268#define XSCALE_PMU_ENABLE 0x001 2269#define XSCALE_PMN_RESET 0x002 2270#define XSCALE_CCNT_RESET 0x004 2271#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) 2272#define XSCALE_PMU_CNT64 0x008 2273 2274static inline int 2275xscalepmu_event_map(int config) 2276{ 2277 int mapping = xscale_perf_map[config]; 2278 if (HW_OP_UNSUPPORTED == mapping) 2279 mapping = -EOPNOTSUPP; 2280 return mapping; 2281} 2282 2283static u64 2284xscalepmu_raw_event(u64 config) 2285{ 2286 return config & 0xff; 2287} 2288 2289#define XSCALE1_OVERFLOWED_MASK 0x700 2290#define XSCALE1_CCOUNT_OVERFLOW 0x400 2291#define XSCALE1_COUNT0_OVERFLOW 0x100 2292#define XSCALE1_COUNT1_OVERFLOW 0x200 2293#define XSCALE1_CCOUNT_INT_EN 0x040 2294#define XSCALE1_COUNT0_INT_EN 0x010 2295#define XSCALE1_COUNT1_INT_EN 0x020 2296#define XSCALE1_COUNT0_EVT_SHFT 12 2297#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) 2298#define XSCALE1_COUNT1_EVT_SHFT 20 2299#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) 2300 2301static inline u32 2302xscale1pmu_read_pmnc(void) 2303{ 2304 u32 val; 2305 asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); 2306 return val; 2307} 2308 2309static inline void 2310xscale1pmu_write_pmnc(u32 val) 2311{ 2312 /* upper 4bits and 7, 11 are write-as-0 */ 2313 val &= 0xffff77f; 2314 asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); 2315} 2316 2317static inline int 2318xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, 2319 enum xscale_counters counter) 2320{ 2321 int ret = 0; 2322 2323 switch (counter) { 2324 case XSCALE_CYCLE_COUNTER: 2325 ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; 2326 break; 2327 case XSCALE_COUNTER0: 2328 ret = pmnc & XSCALE1_COUNT0_OVERFLOW; 2329 break; 2330 case XSCALE_COUNTER1: 2331 ret = pmnc & XSCALE1_COUNT1_OVERFLOW; 2332 break; 2333 default: 2334 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 2335 } 2336 2337 return ret; 2338} 2339 2340static irqreturn_t 2341xscale1pmu_handle_irq(int irq_num, void *dev) 2342{ 2343 unsigned long pmnc; 2344 struct perf_sample_data data; 2345 struct cpu_hw_events *cpuc; 2346 struct pt_regs *regs; 2347 int idx; 2348 2349 pmnc = xscale1pmu_read_pmnc(); 2350 2351 /* 2352 * Write the value back to clear the overflow flags. Overflow 2353 * flags remain in pmnc for use below. We also disable the PMU 2354 * while we process the interrupt. 2355 */ 2356 xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 2357 2358 if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) 2359 return IRQ_NONE; 2360 2361 regs = get_irq_regs(); 2362 2363 perf_sample_data_init(&data, 0); 2364 2365 cpuc = &__get_cpu_var(cpu_hw_events); 2366 for (idx = 0; idx <= armpmu->num_events; ++idx) { 2367 struct perf_event *event = cpuc->events[idx]; 2368 struct hw_perf_event *hwc; 2369 2370 if (!test_bit(idx, cpuc->active_mask)) 2371 continue; 2372 2373 if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) 2374 continue; 2375 2376 hwc = &event->hw; 2377 armpmu_event_update(event, hwc, idx); 2378 data.period = event->hw.last_period; 2379 if (!armpmu_event_set_period(event, hwc, idx)) 2380 continue; 2381 2382 if (perf_event_overflow(event, 0, &data, regs)) 2383 armpmu->disable(hwc, idx); 2384 } 2385 2386 perf_event_do_pending(); 2387 2388 /* 2389 * Re-enable the PMU. 2390 */ 2391 pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; 2392 xscale1pmu_write_pmnc(pmnc); 2393 2394 return IRQ_HANDLED; 2395} 2396 2397static void 2398xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) 2399{ 2400 unsigned long val, mask, evt, flags; 2401 2402 switch (idx) { 2403 case XSCALE_CYCLE_COUNTER: 2404 mask = 0; 2405 evt = XSCALE1_CCOUNT_INT_EN; 2406 break; 2407 case XSCALE_COUNTER0: 2408 mask = XSCALE1_COUNT0_EVT_MASK; 2409 evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | 2410 XSCALE1_COUNT0_INT_EN; 2411 break; 2412 case XSCALE_COUNTER1: 2413 mask = XSCALE1_COUNT1_EVT_MASK; 2414 evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | 2415 XSCALE1_COUNT1_INT_EN; 2416 break; 2417 default: 2418 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2419 return; 2420 } 2421 2422 spin_lock_irqsave(&pmu_lock, flags); 2423 val = xscale1pmu_read_pmnc(); 2424 val &= ~mask; 2425 val |= evt; 2426 xscale1pmu_write_pmnc(val); 2427 spin_unlock_irqrestore(&pmu_lock, flags); 2428} 2429 2430static void 2431xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) 2432{ 2433 unsigned long val, mask, evt, flags; 2434 2435 switch (idx) { 2436 case XSCALE_CYCLE_COUNTER: 2437 mask = XSCALE1_CCOUNT_INT_EN; 2438 evt = 0; 2439 break; 2440 case XSCALE_COUNTER0: 2441 mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; 2442 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; 2443 break; 2444 case XSCALE_COUNTER1: 2445 mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; 2446 evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; 2447 break; 2448 default: 2449 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2450 return; 2451 } 2452 2453 spin_lock_irqsave(&pmu_lock, flags); 2454 val = xscale1pmu_read_pmnc(); 2455 val &= ~mask; 2456 val |= evt; 2457 xscale1pmu_write_pmnc(val); 2458 spin_unlock_irqrestore(&pmu_lock, flags); 2459} 2460 2461static int 2462xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, 2463 struct hw_perf_event *event) 2464{ 2465 if (XSCALE_PERFCTR_CCNT == event->config_base) { 2466 if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) 2467 return -EAGAIN; 2468 2469 return XSCALE_CYCLE_COUNTER; 2470 } else { 2471 if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { 2472 return XSCALE_COUNTER1; 2473 } 2474 2475 if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { 2476 return XSCALE_COUNTER0; 2477 } 2478 2479 return -EAGAIN; 2480 } 2481} 2482 2483static void 2484xscale1pmu_start(void) 2485{ 2486 unsigned long flags, val; 2487 2488 spin_lock_irqsave(&pmu_lock, flags); 2489 val = xscale1pmu_read_pmnc(); 2490 val |= XSCALE_PMU_ENABLE; 2491 xscale1pmu_write_pmnc(val); 2492 spin_unlock_irqrestore(&pmu_lock, flags); 2493} 2494 2495static void 2496xscale1pmu_stop(void) 2497{ 2498 unsigned long flags, val; 2499 2500 spin_lock_irqsave(&pmu_lock, flags); 2501 val = xscale1pmu_read_pmnc(); 2502 val &= ~XSCALE_PMU_ENABLE; 2503 xscale1pmu_write_pmnc(val); 2504 spin_unlock_irqrestore(&pmu_lock, flags); 2505} 2506 2507static inline u32 2508xscale1pmu_read_counter(int counter) 2509{ 2510 u32 val = 0; 2511 2512 switch (counter) { 2513 case XSCALE_CYCLE_COUNTER: 2514 asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); 2515 break; 2516 case XSCALE_COUNTER0: 2517 asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); 2518 break; 2519 case XSCALE_COUNTER1: 2520 asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); 2521 break; 2522 } 2523 2524 return val; 2525} 2526 2527static inline void 2528xscale1pmu_write_counter(int counter, u32 val) 2529{ 2530 switch (counter) { 2531 case XSCALE_CYCLE_COUNTER: 2532 asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); 2533 break; 2534 case XSCALE_COUNTER0: 2535 asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); 2536 break; 2537 case XSCALE_COUNTER1: 2538 asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); 2539 break; 2540 } 2541} 2542 2543static const struct arm_pmu xscale1pmu = { 2544 .id = ARM_PERF_PMU_ID_XSCALE1, 2545 .handle_irq = xscale1pmu_handle_irq, 2546 .enable = xscale1pmu_enable_event, 2547 .disable = xscale1pmu_disable_event, 2548 .event_map = xscalepmu_event_map, 2549 .raw_event = xscalepmu_raw_event, 2550 .read_counter = xscale1pmu_read_counter, 2551 .write_counter = xscale1pmu_write_counter, 2552 .get_event_idx = xscale1pmu_get_event_idx, 2553 .start = xscale1pmu_start, 2554 .stop = xscale1pmu_stop, 2555 .num_events = 3, 2556 .max_period = (1LLU << 32) - 1, 2557}; 2558 2559#define XSCALE2_OVERFLOWED_MASK 0x01f 2560#define XSCALE2_CCOUNT_OVERFLOW 0x001 2561#define XSCALE2_COUNT0_OVERFLOW 0x002 2562#define XSCALE2_COUNT1_OVERFLOW 0x004 2563#define XSCALE2_COUNT2_OVERFLOW 0x008 2564#define XSCALE2_COUNT3_OVERFLOW 0x010 2565#define XSCALE2_CCOUNT_INT_EN 0x001 2566#define XSCALE2_COUNT0_INT_EN 0x002 2567#define XSCALE2_COUNT1_INT_EN 0x004 2568#define XSCALE2_COUNT2_INT_EN 0x008 2569#define XSCALE2_COUNT3_INT_EN 0x010 2570#define XSCALE2_COUNT0_EVT_SHFT 0 2571#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) 2572#define XSCALE2_COUNT1_EVT_SHFT 8 2573#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) 2574#define XSCALE2_COUNT2_EVT_SHFT 16 2575#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) 2576#define XSCALE2_COUNT3_EVT_SHFT 24 2577#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) 2578 2579static inline u32 2580xscale2pmu_read_pmnc(void) 2581{ 2582 u32 val; 2583 asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); 2584 /* bits 1-2 and 4-23 are read-unpredictable */ 2585 return val & 0xff000009; 2586} 2587 2588static inline void 2589xscale2pmu_write_pmnc(u32 val) 2590{ 2591 /* bits 4-23 are write-as-0, 24-31 are write ignored */ 2592 val &= 0xf; 2593 asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); 2594} 2595 2596static inline u32 2597xscale2pmu_read_overflow_flags(void) 2598{ 2599 u32 val; 2600 asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); 2601 return val; 2602} 2603 2604static inline void 2605xscale2pmu_write_overflow_flags(u32 val) 2606{ 2607 asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); 2608} 2609 2610static inline u32 2611xscale2pmu_read_event_select(void) 2612{ 2613 u32 val; 2614 asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); 2615 return val; 2616} 2617 2618static inline void 2619xscale2pmu_write_event_select(u32 val) 2620{ 2621 asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); 2622} 2623 2624static inline u32 2625xscale2pmu_read_int_enable(void) 2626{ 2627 u32 val; 2628 asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); 2629 return val; 2630} 2631 2632static void 2633xscale2pmu_write_int_enable(u32 val) 2634{ 2635 asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); 2636} 2637 2638static inline int 2639xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, 2640 enum xscale_counters counter) 2641{ 2642 int ret = 0; 2643 2644 switch (counter) { 2645 case XSCALE_CYCLE_COUNTER: 2646 ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; 2647 break; 2648 case XSCALE_COUNTER0: 2649 ret = of_flags & XSCALE2_COUNT0_OVERFLOW; 2650 break; 2651 case XSCALE_COUNTER1: 2652 ret = of_flags & XSCALE2_COUNT1_OVERFLOW; 2653 break; 2654 case XSCALE_COUNTER2: 2655 ret = of_flags & XSCALE2_COUNT2_OVERFLOW; 2656 break; 2657 case XSCALE_COUNTER3: 2658 ret = of_flags & XSCALE2_COUNT3_OVERFLOW; 2659 break; 2660 default: 2661 WARN_ONCE(1, "invalid counter number (%d)\n", counter); 2662 } 2663 2664 return ret; 2665} 2666 2667static irqreturn_t 2668xscale2pmu_handle_irq(int irq_num, void *dev) 2669{ 2670 unsigned long pmnc, of_flags; 2671 struct perf_sample_data data; 2672 struct cpu_hw_events *cpuc; 2673 struct pt_regs *regs; 2674 int idx; 2675 2676 /* Disable the PMU. */ 2677 pmnc = xscale2pmu_read_pmnc(); 2678 xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 2679 2680 /* Check the overflow flag register. */ 2681 of_flags = xscale2pmu_read_overflow_flags(); 2682 if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) 2683 return IRQ_NONE; 2684 2685 /* Clear the overflow bits. */ 2686 xscale2pmu_write_overflow_flags(of_flags); 2687 2688 regs = get_irq_regs(); 2689 2690 perf_sample_data_init(&data, 0); 2691 2692 cpuc = &__get_cpu_var(cpu_hw_events); 2693 for (idx = 0; idx <= armpmu->num_events; ++idx) { 2694 struct perf_event *event = cpuc->events[idx]; 2695 struct hw_perf_event *hwc; 2696 2697 if (!test_bit(idx, cpuc->active_mask)) 2698 continue; 2699 2700 if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) 2701 continue; 2702 2703 hwc = &event->hw; 2704 armpmu_event_update(event, hwc, idx); 2705 data.period = event->hw.last_period; 2706 if (!armpmu_event_set_period(event, hwc, idx)) 2707 continue; 2708 2709 if (perf_event_overflow(event, 0, &data, regs)) 2710 armpmu->disable(hwc, idx); 2711 } 2712 2713 perf_event_do_pending(); 2714 2715 /* 2716 * Re-enable the PMU. 2717 */ 2718 pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; 2719 xscale2pmu_write_pmnc(pmnc); 2720 2721 return IRQ_HANDLED; 2722} 2723 2724static void 2725xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) 2726{ 2727 unsigned long flags, ien, evtsel; 2728 2729 ien = xscale2pmu_read_int_enable(); 2730 evtsel = xscale2pmu_read_event_select(); 2731 2732 switch (idx) { 2733 case XSCALE_CYCLE_COUNTER: 2734 ien |= XSCALE2_CCOUNT_INT_EN; 2735 break; 2736 case XSCALE_COUNTER0: 2737 ien |= XSCALE2_COUNT0_INT_EN; 2738 evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 2739 evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; 2740 break; 2741 case XSCALE_COUNTER1: 2742 ien |= XSCALE2_COUNT1_INT_EN; 2743 evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 2744 evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; 2745 break; 2746 case XSCALE_COUNTER2: 2747 ien |= XSCALE2_COUNT2_INT_EN; 2748 evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 2749 evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; 2750 break; 2751 case XSCALE_COUNTER3: 2752 ien |= XSCALE2_COUNT3_INT_EN; 2753 evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 2754 evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; 2755 break; 2756 default: 2757 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2758 return; 2759 } 2760 2761 spin_lock_irqsave(&pmu_lock, flags); 2762 xscale2pmu_write_event_select(evtsel); 2763 xscale2pmu_write_int_enable(ien); 2764 spin_unlock_irqrestore(&pmu_lock, flags); 2765} 2766 2767static void 2768xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) 2769{ 2770 unsigned long flags, ien, evtsel; 2771 2772 ien = xscale2pmu_read_int_enable(); 2773 evtsel = xscale2pmu_read_event_select(); 2774 2775 switch (idx) { 2776 case XSCALE_CYCLE_COUNTER: 2777 ien &= ~XSCALE2_CCOUNT_INT_EN; 2778 break; 2779 case XSCALE_COUNTER0: 2780 ien &= ~XSCALE2_COUNT0_INT_EN; 2781 evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 2782 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; 2783 break; 2784 case XSCALE_COUNTER1: 2785 ien &= ~XSCALE2_COUNT1_INT_EN; 2786 evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 2787 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; 2788 break; 2789 case XSCALE_COUNTER2: 2790 ien &= ~XSCALE2_COUNT2_INT_EN; 2791 evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 2792 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; 2793 break; 2794 case XSCALE_COUNTER3: 2795 ien &= ~XSCALE2_COUNT3_INT_EN; 2796 evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 2797 evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; 2798 break; 2799 default: 2800 WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2801 return; 2802 } 2803 2804 spin_lock_irqsave(&pmu_lock, flags); 2805 xscale2pmu_write_event_select(evtsel); 2806 xscale2pmu_write_int_enable(ien); 2807 spin_unlock_irqrestore(&pmu_lock, flags); 2808} 2809 2810static int 2811xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, 2812 struct hw_perf_event *event) 2813{ 2814 int idx = xscale1pmu_get_event_idx(cpuc, event); 2815 if (idx >= 0) 2816 goto out; 2817 2818 if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) 2819 idx = XSCALE_COUNTER3; 2820 else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) 2821 idx = XSCALE_COUNTER2; 2822out: 2823 return idx; 2824} 2825 2826static void 2827xscale2pmu_start(void) 2828{ 2829 unsigned long flags, val; 2830 2831 spin_lock_irqsave(&pmu_lock, flags); 2832 val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; 2833 val |= XSCALE_PMU_ENABLE; 2834 xscale2pmu_write_pmnc(val); 2835 spin_unlock_irqrestore(&pmu_lock, flags); 2836} 2837 2838static void 2839xscale2pmu_stop(void) 2840{ 2841 unsigned long flags, val; 2842 2843 spin_lock_irqsave(&pmu_lock, flags); 2844 val = xscale2pmu_read_pmnc(); 2845 val &= ~XSCALE_PMU_ENABLE; 2846 xscale2pmu_write_pmnc(val); 2847 spin_unlock_irqrestore(&pmu_lock, flags); 2848} 2849 2850static inline u32 2851xscale2pmu_read_counter(int counter) 2852{ 2853 u32 val = 0; 2854 2855 switch (counter) { 2856 case XSCALE_CYCLE_COUNTER: 2857 asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); 2858 break; 2859 case XSCALE_COUNTER0: 2860 asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); 2861 break; 2862 case XSCALE_COUNTER1: 2863 asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); 2864 break; 2865 case XSCALE_COUNTER2: 2866 asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); 2867 break; 2868 case XSCALE_COUNTER3: 2869 asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); 2870 break; 2871 } 2872 2873 return val; 2874} 2875 2876static inline void 2877xscale2pmu_write_counter(int counter, u32 val) 2878{ 2879 switch (counter) { 2880 case XSCALE_CYCLE_COUNTER: 2881 asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); 2882 break; 2883 case XSCALE_COUNTER0: 2884 asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); 2885 break; 2886 case XSCALE_COUNTER1: 2887 asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); 2888 break; 2889 case XSCALE_COUNTER2: 2890 asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); 2891 break; 2892 case XSCALE_COUNTER3: 2893 asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); 2894 break; 2895 } 2896} 2897 2898static const struct arm_pmu xscale2pmu = { 2899 .id = ARM_PERF_PMU_ID_XSCALE2, 2900 .handle_irq = xscale2pmu_handle_irq, 2901 .enable = xscale2pmu_enable_event, 2902 .disable = xscale2pmu_disable_event, 2903 .event_map = xscalepmu_event_map, 2904 .raw_event = xscalepmu_raw_event, 2905 .read_counter = xscale2pmu_read_counter, 2906 .write_counter = xscale2pmu_write_counter, 2907 .get_event_idx = xscale2pmu_get_event_idx, 2908 .start = xscale2pmu_start, 2909 .stop = xscale2pmu_stop, 2910 .num_events = 5, 2911 .max_period = (1LLU << 32) - 1, 2912}; 2913 2914static int __init 2915init_hw_perf_events(void) 2916{ 2917 unsigned long cpuid = read_cpuid_id(); 2918 unsigned long implementor = (cpuid & 0xFF000000) >> 24; 2919 unsigned long part_number = (cpuid & 0xFFF0); 2920 2921 /* ARM Ltd CPUs. */ 2922 if (0x41 == implementor) { 2923 switch (part_number) { 2924 case 0xB360: /* ARM1136 */ 2925 case 0xB560: /* ARM1156 */ 2926 case 0xB760: /* ARM1176 */ 2927 armpmu = &armv6pmu; 2928 memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 2929 sizeof(armv6_perf_cache_map)); 2930 perf_max_events = armv6pmu.num_events; 2931 break; 2932 case 0xB020: /* ARM11mpcore */ 2933 armpmu = &armv6mpcore_pmu; 2934 memcpy(armpmu_perf_cache_map, 2935 armv6mpcore_perf_cache_map, 2936 sizeof(armv6mpcore_perf_cache_map)); 2937 perf_max_events = armv6mpcore_pmu.num_events; 2938 break; 2939 case 0xC080: /* Cortex-A8 */ 2940 armv7pmu.id = ARM_PERF_PMU_ID_CA8; 2941 memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, 2942 sizeof(armv7_a8_perf_cache_map)); 2943 armv7pmu.event_map = armv7_a8_pmu_event_map; 2944 armpmu = &armv7pmu; 2945 2946 /* Reset PMNC and read the nb of CNTx counters 2947 supported */ 2948 armv7pmu.num_events = armv7_reset_read_pmnc(); 2949 perf_max_events = armv7pmu.num_events; 2950 break; 2951 case 0xC090: /* Cortex-A9 */ 2952 armv7pmu.id = ARM_PERF_PMU_ID_CA9; 2953 memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, 2954 sizeof(armv7_a9_perf_cache_map)); 2955 armv7pmu.event_map = armv7_a9_pmu_event_map; 2956 armpmu = &armv7pmu; 2957 2958 /* Reset PMNC and read the nb of CNTx counters 2959 supported */ 2960 armv7pmu.num_events = armv7_reset_read_pmnc(); 2961 perf_max_events = armv7pmu.num_events; 2962 break; 2963 } 2964 /* Intel CPUs [xscale]. */ 2965 } else if (0x69 == implementor) { 2966 part_number = (cpuid >> 13) & 0x7; 2967 switch (part_number) { 2968 case 1: 2969 armpmu = &xscale1pmu; 2970 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 2971 sizeof(xscale_perf_cache_map)); 2972 perf_max_events = xscale1pmu.num_events; 2973 break; 2974 case 2: 2975 armpmu = &xscale2pmu; 2976 memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 2977 sizeof(xscale_perf_cache_map)); 2978 perf_max_events = xscale2pmu.num_events; 2979 break; 2980 } 2981 } 2982 2983 if (armpmu) { 2984 pr_info("enabled with %s PMU driver, %d counters available\n", 2985 arm_pmu_names[armpmu->id], armpmu->num_events); 2986 } else { 2987 pr_info("no hardware support available\n"); 2988 perf_max_events = -1; 2989 } 2990 2991 return 0; 2992} 2993arch_initcall(init_hw_perf_events); 2994 2995/* 2996 * Callchain handling code. 2997 */ 2998static inline void 2999callchain_store(struct perf_callchain_entry *entry, 3000 u64 ip) 3001{ 3002 if (entry->nr < PERF_MAX_STACK_DEPTH) 3003 entry->ip[entry->nr++] = ip; 3004} 3005 3006struct frame_tail { 3007 struct frame_tail *fp; 3008 unsigned long sp; 3009 unsigned long lr; 3010} __attribute__((packed)); 3011 3012/* 3013 * Get the return address for a single stackframe and return a pointer to the 3014 * next frame tail. 3015 */ 3016static struct frame_tail * 3017user_backtrace(struct frame_tail *tail, 3018 struct perf_callchain_entry *entry) 3019{ 3020 struct frame_tail buftail; 3021 3022 /* Also check accessibility of one struct frame_tail beyond */ 3023 if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) 3024 return NULL; 3025 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 3026 return NULL; 3027 3028 callchain_store(entry, buftail.lr); 3029 3030 /* 3031 * Frame pointers should strictly progress back up the stack 3032 * (towards higher addresses). 3033 */ 3034 if (tail >= buftail.fp) 3035 return NULL; 3036 3037 return buftail.fp - 1; 3038} 3039 3040static void 3041perf_callchain_user(struct pt_regs *regs, 3042 struct perf_callchain_entry *entry) 3043{ 3044 struct frame_tail *tail; 3045 3046 callchain_store(entry, PERF_CONTEXT_USER); 3047 3048 if (!user_mode(regs)) 3049 regs = task_pt_regs(current); 3050 3051 tail = (struct frame_tail *)regs->ARM_fp - 1; 3052 3053 while (tail && !((unsigned long)tail & 0x3)) 3054 tail = user_backtrace(tail, entry); 3055} 3056 3057/* 3058 * Gets called by walk_stackframe() for every stackframe. This will be called 3059 * whist unwinding the stackframe and is like a subroutine return so we use 3060 * the PC. 3061 */ 3062static int 3063callchain_trace(struct stackframe *fr, 3064 void *data) 3065{ 3066 struct perf_callchain_entry *entry = data; 3067 callchain_store(entry, fr->pc); 3068 return 0; 3069} 3070 3071static void 3072perf_callchain_kernel(struct pt_regs *regs, 3073 struct perf_callchain_entry *entry) 3074{ 3075 struct stackframe fr; 3076 3077 callchain_store(entry, PERF_CONTEXT_KERNEL); 3078 fr.fp = regs->ARM_fp; 3079 fr.sp = regs->ARM_sp; 3080 fr.lr = regs->ARM_lr; 3081 fr.pc = regs->ARM_pc; 3082 walk_stackframe(&fr, callchain_trace, entry); 3083} 3084 3085static void 3086perf_do_callchain(struct pt_regs *regs, 3087 struct perf_callchain_entry *entry) 3088{ 3089 int is_user; 3090 3091 if (!regs) 3092 return; 3093 3094 is_user = user_mode(regs); 3095 3096 if (!current || !current->pid) 3097 return; 3098 3099 if (is_user && current->state != TASK_RUNNING) 3100 return; 3101 3102 if (!is_user) 3103 perf_callchain_kernel(regs, entry); 3104 3105 if (current->mm) 3106 perf_callchain_user(regs, entry); 3107} 3108 3109static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 3110 3111struct perf_callchain_entry * 3112perf_callchain(struct pt_regs *regs) 3113{ 3114 struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); 3115 3116 entry->nr = 0; 3117 perf_do_callchain(regs, entry); 3118 return entry; 3119} 3120