1/** 2 * @file op_model_xscale.c 3 * XScale Performance Monitor Driver 4 * 5 * @remark Copyright 2000-2004 Deepak Saxena <dsaxena@mvista.com> 6 * @remark Copyright 2000-2004 MontaVista Software Inc 7 * @remark Copyright 2004 Dave Jiang <dave.jiang@intel.com> 8 * @remark Copyright 2004 Intel Corporation 9 * @remark Copyright 2004 Zwane Mwaikambo <zwane@arm.linux.org.uk> 10 * @remark Copyright 2004 OProfile Authors 11 * 12 * @remark Read the file COPYING 13 * 14 * @author Zwane Mwaikambo 15 */ 16 17/* #define DEBUG */ 18#include <linux/types.h> 19#include <linux/errno.h> 20#include <linux/sched.h> 21#include <linux/oprofile.h> 22#include <linux/interrupt.h> 23#include <linux/irq.h> 24 25#include <asm/system.h> 26 27#include "op_counter.h" 28#include "op_arm_model.h" 29 30#define PMU_ENABLE 0x001 /* Enable counters */ 31#define PMN_RESET 0x002 /* Reset event counters */ 32#define CCNT_RESET 0x004 /* Reset clock counter */ 33#define PMU_RESET (CCNT_RESET | PMN_RESET) 34#define PMU_CNT64 0x008 /* Make CCNT count every 64th cycle */ 35 36/* TODO do runtime detection */ 37#ifdef CONFIG_ARCH_IOP32X 38#define XSCALE_PMU_IRQ IRQ_IOP32X_CORE_PMU 39#endif 40#ifdef CONFIG_ARCH_IOP33X 41#define XSCALE_PMU_IRQ IRQ_IOP33X_CORE_PMU 42#endif 43#ifdef CONFIG_ARCH_PXA 44#define XSCALE_PMU_IRQ IRQ_PMU 45#endif 46 47/* 48 * Different types of events that can be counted by the XScale PMU 49 * as used by Oprofile userspace. Here primarily for documentation 50 * purposes. 51 */ 52 53#define EVT_ICACHE_MISS 0x00 54#define EVT_ICACHE_NO_DELIVER 0x01 55#define EVT_DATA_STALL 0x02 56#define EVT_ITLB_MISS 0x03 57#define EVT_DTLB_MISS 0x04 58#define EVT_BRANCH 0x05 59#define EVT_BRANCH_MISS 0x06 60#define EVT_INSTRUCTION 0x07 61#define EVT_DCACHE_FULL_STALL 0x08 62#define EVT_DCACHE_FULL_STALL_CONTIG 0x09 63#define EVT_DCACHE_ACCESS 0x0A 64#define EVT_DCACHE_MISS 0x0B 65#define EVT_DCACE_WRITE_BACK 0x0C 66#define EVT_PC_CHANGED 0x0D 67#define EVT_BCU_REQUEST 0x10 68#define EVT_BCU_FULL 0x11 69#define EVT_BCU_DRAIN 0x12 70#define EVT_BCU_ECC_NO_ELOG 0x14 71#define EVT_BCU_1_BIT_ERR 0x15 72#define EVT_RMW 0x16 73/* EVT_CCNT is not hardware defined */ 74#define EVT_CCNT 0xFE 75#define EVT_UNUSED 0xFF 76 77struct pmu_counter { 78 volatile unsigned long ovf; 79 unsigned long reset_counter; 80}; 81 82enum { CCNT, PMN0, PMN1, PMN2, PMN3, MAX_COUNTERS }; 83 84static struct pmu_counter results[MAX_COUNTERS]; 85 86/* 87 * There are two versions of the PMU in current XScale processors 88 * with differing register layouts and number of performance counters. 89 * e.g. IOP32x is xsc1 whilst IOP33x is xsc2. 90 * We detect which register layout to use in xscale_detect_pmu() 91 */ 92enum { PMU_XSC1, PMU_XSC2 }; 93 94struct pmu_type { 95 int id; 96 char *name; 97 int num_counters; 98 unsigned int int_enable; 99 unsigned int cnt_ovf[MAX_COUNTERS]; 100 unsigned int int_mask[MAX_COUNTERS]; 101}; 102 103static struct pmu_type pmu_parms[] = { 104 { 105 .id = PMU_XSC1, 106 .name = "arm/xscale1", 107 .num_counters = 3, 108 .int_mask = { [PMN0] = 0x10, [PMN1] = 0x20, 109 [CCNT] = 0x40 }, 110 .cnt_ovf = { [CCNT] = 0x400, [PMN0] = 0x100, 111 [PMN1] = 0x200}, 112 }, 113 { 114 .id = PMU_XSC2, 115 .name = "arm/xscale2", 116 .num_counters = 5, 117 .int_mask = { [CCNT] = 0x01, [PMN0] = 0x02, 118 [PMN1] = 0x04, [PMN2] = 0x08, 119 [PMN3] = 0x10 }, 120 .cnt_ovf = { [CCNT] = 0x01, [PMN0] = 0x02, 121 [PMN1] = 0x04, [PMN2] = 0x08, 122 [PMN3] = 0x10 }, 123 }, 124}; 125 126static struct pmu_type *pmu; 127 128static void write_pmnc(u32 val) 129{ 130 if (pmu->id == PMU_XSC1) { 131 /* upper 4bits and 7, 11 are write-as-0 */ 132 val &= 0xffff77f; 133 __asm__ __volatile__ ("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); 134 } else { 135 /* bits 4-23 are write-as-0, 24-31 are write ignored */ 136 val &= 0xf; 137 __asm__ __volatile__ ("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); 138 } 139} 140 141static u32 read_pmnc(void) 142{ 143 u32 val; 144 145 if (pmu->id == PMU_XSC1) 146 __asm__ __volatile__ ("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); 147 else { 148 __asm__ __volatile__ ("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); 149 /* bits 1-2 and 4-23 are read-unpredictable */ 150 val &= 0xff000009; 151 } 152 153 return val; 154} 155 156static u32 __xsc1_read_counter(int counter) 157{ 158 u32 val = 0; 159 160 switch (counter) { 161 case CCNT: 162 __asm__ __volatile__ ("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); 163 break; 164 case PMN0: 165 __asm__ __volatile__ ("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); 166 break; 167 case PMN1: 168 __asm__ __volatile__ ("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); 169 break; 170 } 171 return val; 172} 173 174static u32 __xsc2_read_counter(int counter) 175{ 176 u32 val = 0; 177 178 switch (counter) { 179 case CCNT: 180 __asm__ __volatile__ ("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); 181 break; 182 case PMN0: 183 __asm__ __volatile__ ("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); 184 break; 185 case PMN1: 186 __asm__ __volatile__ ("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); 187 break; 188 case PMN2: 189 __asm__ __volatile__ ("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); 190 break; 191 case PMN3: 192 __asm__ __volatile__ ("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); 193 break; 194 } 195 return val; 196} 197 198static u32 read_counter(int counter) 199{ 200 u32 val; 201 202 if (pmu->id == PMU_XSC1) 203 val = __xsc1_read_counter(counter); 204 else 205 val = __xsc2_read_counter(counter); 206 207 return val; 208} 209 210static void __xsc1_write_counter(int counter, u32 val) 211{ 212 switch (counter) { 213 case CCNT: 214 __asm__ __volatile__ ("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); 215 break; 216 case PMN0: 217 __asm__ __volatile__ ("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); 218 break; 219 case PMN1: 220 __asm__ __volatile__ ("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); 221 break; 222 } 223} 224 225static void __xsc2_write_counter(int counter, u32 val) 226{ 227 switch (counter) { 228 case CCNT: 229 __asm__ __volatile__ ("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); 230 break; 231 case PMN0: 232 __asm__ __volatile__ ("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); 233 break; 234 case PMN1: 235 __asm__ __volatile__ ("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); 236 break; 237 case PMN2: 238 __asm__ __volatile__ ("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); 239 break; 240 case PMN3: 241 __asm__ __volatile__ ("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); 242 break; 243 } 244} 245 246static void write_counter(int counter, u32 val) 247{ 248 if (pmu->id == PMU_XSC1) 249 __xsc1_write_counter(counter, val); 250 else 251 __xsc2_write_counter(counter, val); 252} 253 254static int xscale_setup_ctrs(void) 255{ 256 u32 evtsel, pmnc; 257 int i; 258 259 for (i = CCNT; i < MAX_COUNTERS; i++) { 260 if (counter_config[i].enabled) 261 continue; 262 263 counter_config[i].event = EVT_UNUSED; 264 } 265 266 switch (pmu->id) { 267 case PMU_XSC1: 268 pmnc = (counter_config[PMN1].event << 20) | (counter_config[PMN0].event << 12); 269 pr_debug("xscale_setup_ctrs: pmnc: %#08x\n", pmnc); 270 write_pmnc(pmnc); 271 break; 272 273 case PMU_XSC2: 274 evtsel = counter_config[PMN0].event | (counter_config[PMN1].event << 8) | 275 (counter_config[PMN2].event << 16) | (counter_config[PMN3].event << 24); 276 277 pr_debug("xscale_setup_ctrs: evtsel %#08x\n", evtsel); 278 __asm__ __volatile__ ("mcr p14, 0, %0, c8, c1, 0" : : "r" (evtsel)); 279 break; 280 } 281 282 for (i = CCNT; i < MAX_COUNTERS; i++) { 283 if (counter_config[i].event == EVT_UNUSED) { 284 counter_config[i].event = 0; 285 pmu->int_enable &= ~pmu->int_mask[i]; 286 continue; 287 } 288 289 results[i].reset_counter = counter_config[i].count; 290 write_counter(i, -(u32)counter_config[i].count); 291 pmu->int_enable |= pmu->int_mask[i]; 292 pr_debug("xscale_setup_ctrs: counter%d %#08x from %#08lx\n", i, 293 read_counter(i), counter_config[i].count); 294 } 295 296 return 0; 297} 298 299static void inline __xsc1_check_ctrs(void) 300{ 301 int i; 302 u32 pmnc = read_pmnc(); 303 304 /* NOTE: there's an A stepping errata that states if an overflow */ 305 /* bit already exists and another occurs, the previous */ 306 /* Fixed in B stepping or later */ 307 308 /* Write the value back to clear the overflow flags. Overflow */ 309 /* flags remain in pmnc for use below */ 310 write_pmnc(pmnc & ~PMU_ENABLE); 311 312 for (i = CCNT; i <= PMN1; i++) { 313 if (!(pmu->int_mask[i] & pmu->int_enable)) 314 continue; 315 316 if (pmnc & pmu->cnt_ovf[i]) 317 results[i].ovf++; 318 } 319} 320 321static void inline __xsc2_check_ctrs(void) 322{ 323 int i; 324 u32 flag = 0, pmnc = read_pmnc(); 325 326 pmnc &= ~PMU_ENABLE; 327 write_pmnc(pmnc); 328 329 /* read overflow flag register */ 330 __asm__ __volatile__ ("mrc p14, 0, %0, c5, c1, 0" : "=r" (flag)); 331 332 for (i = CCNT; i <= PMN3; i++) { 333 if (!(pmu->int_mask[i] & pmu->int_enable)) 334 continue; 335 336 if (flag & pmu->cnt_ovf[i]) 337 results[i].ovf++; 338 } 339 340 /* writeback clears overflow bits */ 341 __asm__ __volatile__ ("mcr p14, 0, %0, c5, c1, 0" : : "r" (flag)); 342} 343 344static irqreturn_t xscale_pmu_interrupt(int irq, void *arg) 345{ 346 int i; 347 u32 pmnc; 348 349 if (pmu->id == PMU_XSC1) 350 __xsc1_check_ctrs(); 351 else 352 __xsc2_check_ctrs(); 353 354 for (i = CCNT; i < MAX_COUNTERS; i++) { 355 if (!results[i].ovf) 356 continue; 357 358 write_counter(i, -(u32)results[i].reset_counter); 359 oprofile_add_sample(get_irq_regs(), i); 360 results[i].ovf--; 361 } 362 363 pmnc = read_pmnc() | PMU_ENABLE; 364 write_pmnc(pmnc); 365 366 return IRQ_HANDLED; 367} 368 369static void xscale_pmu_stop(void) 370{ 371 u32 pmnc = read_pmnc(); 372 373 pmnc &= ~PMU_ENABLE; 374 write_pmnc(pmnc); 375 376 free_irq(XSCALE_PMU_IRQ, results); 377} 378 379static int xscale_pmu_start(void) 380{ 381 int ret; 382 u32 pmnc = read_pmnc(); 383 384 ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED, 385 "XScale PMU", (void *)results); 386 387 if (ret < 0) { 388 printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n", 389 XSCALE_PMU_IRQ); 390 return ret; 391 } 392 393 if (pmu->id == PMU_XSC1) 394 pmnc |= pmu->int_enable; 395 else { 396 __asm__ __volatile__ ("mcr p14, 0, %0, c4, c1, 0" : : "r" (pmu->int_enable)); 397 pmnc &= ~PMU_CNT64; 398 } 399 400 pmnc |= PMU_ENABLE; 401 write_pmnc(pmnc); 402 pr_debug("xscale_pmu_start: pmnc: %#08x mask: %08x\n", pmnc, pmu->int_enable); 403 return 0; 404} 405 406static int xscale_detect_pmu(void) 407{ 408 int ret = 0; 409 u32 id; 410 411 id = (read_cpuid(CPUID_ID) >> 13) & 0x7; 412 413 switch (id) { 414 case 1: 415 pmu = &pmu_parms[PMU_XSC1]; 416 break; 417 case 2: 418 pmu = &pmu_parms[PMU_XSC2]; 419 break; 420 default: 421 ret = -ENODEV; 422 break; 423 } 424 425 if (!ret) { 426 op_xscale_spec.name = pmu->name; 427 op_xscale_spec.num_counters = pmu->num_counters; 428 pr_debug("xscale_detect_pmu: detected %s PMU\n", pmu->name); 429 } 430 431 return ret; 432} 433 434struct op_arm_model_spec op_xscale_spec = { 435 .init = xscale_detect_pmu, 436 .setup_ctrs = xscale_setup_ctrs, 437 .start = xscale_pmu_start, 438 .stop = xscale_pmu_stop, 439}; 440