1/**
2 * @file op_model_xscale.c
3 * XScale Performance Monitor Driver
4 *
5 * @remark Copyright 2000-2004 Deepak Saxena <dsaxena@mvista.com>
6 * @remark Copyright 2000-2004 MontaVista Software Inc
7 * @remark Copyright 2004 Dave Jiang <dave.jiang@intel.com>
8 * @remark Copyright 2004 Intel Corporation
9 * @remark Copyright 2004 Zwane Mwaikambo <zwane@arm.linux.org.uk>
10 * @remark Copyright 2004 OProfile Authors
11 *
12 * @remark Read the file COPYING
13 *
14 * @author Zwane Mwaikambo
15 */
16
17/* #define DEBUG */
18#include <linux/types.h>
19#include <linux/errno.h>
20#include <linux/sched.h>
21#include <linux/oprofile.h>
22#include <linux/interrupt.h>
23#include <linux/irq.h>
24
25#include <asm/system.h>
26
27#include "op_counter.h"
28#include "op_arm_model.h"
29
30#define	PMU_ENABLE	0x001	/* Enable counters */
31#define PMN_RESET	0x002	/* Reset event counters */
32#define	CCNT_RESET	0x004	/* Reset clock counter */
33#define	PMU_RESET	(CCNT_RESET | PMN_RESET)
34#define PMU_CNT64	0x008	/* Make CCNT count every 64th cycle */
35
36/* TODO do runtime detection */
37#ifdef CONFIG_ARCH_IOP32X
38#define XSCALE_PMU_IRQ  IRQ_IOP32X_CORE_PMU
39#endif
40#ifdef CONFIG_ARCH_IOP33X
41#define XSCALE_PMU_IRQ  IRQ_IOP33X_CORE_PMU
42#endif
43#ifdef CONFIG_ARCH_PXA
44#define XSCALE_PMU_IRQ  IRQ_PMU
45#endif
46
47/*
48 * Different types of events that can be counted by the XScale PMU
49 * as used by Oprofile userspace. Here primarily for documentation
50 * purposes.
51 */
52
53#define EVT_ICACHE_MISS			0x00
54#define	EVT_ICACHE_NO_DELIVER		0x01
55#define	EVT_DATA_STALL			0x02
56#define	EVT_ITLB_MISS			0x03
57#define	EVT_DTLB_MISS			0x04
58#define	EVT_BRANCH			0x05
59#define	EVT_BRANCH_MISS			0x06
60#define	EVT_INSTRUCTION			0x07
61#define	EVT_DCACHE_FULL_STALL		0x08
62#define	EVT_DCACHE_FULL_STALL_CONTIG	0x09
63#define	EVT_DCACHE_ACCESS		0x0A
64#define	EVT_DCACHE_MISS			0x0B
65#define	EVT_DCACE_WRITE_BACK		0x0C
66#define	EVT_PC_CHANGED			0x0D
67#define	EVT_BCU_REQUEST			0x10
68#define	EVT_BCU_FULL			0x11
69#define	EVT_BCU_DRAIN			0x12
70#define	EVT_BCU_ECC_NO_ELOG		0x14
71#define	EVT_BCU_1_BIT_ERR		0x15
72#define	EVT_RMW				0x16
73/* EVT_CCNT is not hardware defined */
74#define EVT_CCNT			0xFE
75#define EVT_UNUSED			0xFF
76
77struct pmu_counter {
78	volatile unsigned long ovf;
79	unsigned long reset_counter;
80};
81
82enum { CCNT, PMN0, PMN1, PMN2, PMN3, MAX_COUNTERS };
83
84static struct pmu_counter results[MAX_COUNTERS];
85
86/*
87 * There are two versions of the PMU in current XScale processors
88 * with differing register layouts and number of performance counters.
89 * e.g. IOP32x is xsc1 whilst IOP33x is xsc2.
90 * We detect which register layout to use in xscale_detect_pmu()
91 */
92enum { PMU_XSC1, PMU_XSC2 };
93
94struct pmu_type {
95	int id;
96	char *name;
97	int num_counters;
98	unsigned int int_enable;
99	unsigned int cnt_ovf[MAX_COUNTERS];
100	unsigned int int_mask[MAX_COUNTERS];
101};
102
103static struct pmu_type pmu_parms[] = {
104	{
105		.id		= PMU_XSC1,
106		.name		= "arm/xscale1",
107		.num_counters	= 3,
108		.int_mask	= { [PMN0] = 0x10, [PMN1] = 0x20,
109				    [CCNT] = 0x40 },
110		.cnt_ovf	= { [CCNT] = 0x400, [PMN0] = 0x100,
111				    [PMN1] = 0x200},
112	},
113	{
114		.id		= PMU_XSC2,
115		.name		= "arm/xscale2",
116		.num_counters	= 5,
117		.int_mask	= { [CCNT] = 0x01, [PMN0] = 0x02,
118				    [PMN1] = 0x04, [PMN2] = 0x08,
119				    [PMN3] = 0x10 },
120		.cnt_ovf	= { [CCNT] = 0x01, [PMN0] = 0x02,
121				    [PMN1] = 0x04, [PMN2] = 0x08,
122				    [PMN3] = 0x10 },
123	},
124};
125
126static struct pmu_type *pmu;
127
128static void write_pmnc(u32 val)
129{
130	if (pmu->id == PMU_XSC1) {
131		/* upper 4bits and 7, 11 are write-as-0 */
132		val &= 0xffff77f;
133		__asm__ __volatile__ ("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
134	} else {
135		/* bits 4-23 are write-as-0, 24-31 are write ignored */
136		val &= 0xf;
137		__asm__ __volatile__ ("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
138	}
139}
140
141static u32 read_pmnc(void)
142{
143	u32 val;
144
145	if (pmu->id == PMU_XSC1)
146		__asm__ __volatile__ ("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
147	else {
148		__asm__ __volatile__ ("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
149		/* bits 1-2 and 4-23 are read-unpredictable */
150		val &= 0xff000009;
151	}
152
153	return val;
154}
155
156static u32 __xsc1_read_counter(int counter)
157{
158	u32 val = 0;
159
160	switch (counter) {
161	case CCNT:
162		__asm__ __volatile__ ("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
163		break;
164	case PMN0:
165		__asm__ __volatile__ ("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
166		break;
167	case PMN1:
168		__asm__ __volatile__ ("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
169		break;
170	}
171	return val;
172}
173
174static u32 __xsc2_read_counter(int counter)
175{
176	u32 val = 0;
177
178	switch (counter) {
179	case CCNT:
180		__asm__ __volatile__ ("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
181		break;
182	case PMN0:
183		__asm__ __volatile__ ("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
184		break;
185	case PMN1:
186		__asm__ __volatile__ ("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
187		break;
188	case PMN2:
189		__asm__ __volatile__ ("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
190		break;
191	case PMN3:
192		__asm__ __volatile__ ("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
193		break;
194	}
195	return val;
196}
197
198static u32 read_counter(int counter)
199{
200	u32 val;
201
202	if (pmu->id == PMU_XSC1)
203		val = __xsc1_read_counter(counter);
204	else
205		val = __xsc2_read_counter(counter);
206
207	return val;
208}
209
210static void __xsc1_write_counter(int counter, u32 val)
211{
212	switch (counter) {
213	case CCNT:
214		__asm__ __volatile__ ("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
215		break;
216	case PMN0:
217		__asm__ __volatile__ ("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
218		break;
219	case PMN1:
220		__asm__ __volatile__ ("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
221		break;
222	}
223}
224
225static void __xsc2_write_counter(int counter, u32 val)
226{
227	switch (counter) {
228	case CCNT:
229		__asm__ __volatile__ ("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
230		break;
231	case PMN0:
232		__asm__ __volatile__ ("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
233		break;
234	case PMN1:
235		__asm__ __volatile__ ("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
236		break;
237	case PMN2:
238		__asm__ __volatile__ ("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
239		break;
240	case PMN3:
241		__asm__ __volatile__ ("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
242		break;
243	}
244}
245
246static void write_counter(int counter, u32 val)
247{
248	if (pmu->id == PMU_XSC1)
249		__xsc1_write_counter(counter, val);
250	else
251		__xsc2_write_counter(counter, val);
252}
253
254static int xscale_setup_ctrs(void)
255{
256	u32 evtsel, pmnc;
257	int i;
258
259	for (i = CCNT; i < MAX_COUNTERS; i++) {
260		if (counter_config[i].enabled)
261			continue;
262
263		counter_config[i].event = EVT_UNUSED;
264	}
265
266	switch (pmu->id) {
267	case PMU_XSC1:
268		pmnc = (counter_config[PMN1].event << 20) | (counter_config[PMN0].event << 12);
269		pr_debug("xscale_setup_ctrs: pmnc: %#08x\n", pmnc);
270		write_pmnc(pmnc);
271		break;
272
273	case PMU_XSC2:
274		evtsel = counter_config[PMN0].event | (counter_config[PMN1].event << 8) |
275			(counter_config[PMN2].event << 16) | (counter_config[PMN3].event << 24);
276
277		pr_debug("xscale_setup_ctrs: evtsel %#08x\n", evtsel);
278		__asm__ __volatile__ ("mcr p14, 0, %0, c8, c1, 0" : : "r" (evtsel));
279		break;
280	}
281
282	for (i = CCNT; i < MAX_COUNTERS; i++) {
283		if (counter_config[i].event == EVT_UNUSED) {
284			counter_config[i].event = 0;
285			pmu->int_enable &= ~pmu->int_mask[i];
286			continue;
287		}
288
289		results[i].reset_counter = counter_config[i].count;
290		write_counter(i, -(u32)counter_config[i].count);
291		pmu->int_enable |= pmu->int_mask[i];
292		pr_debug("xscale_setup_ctrs: counter%d %#08x from %#08lx\n", i,
293			read_counter(i), counter_config[i].count);
294	}
295
296	return 0;
297}
298
299static void inline __xsc1_check_ctrs(void)
300{
301	int i;
302	u32 pmnc = read_pmnc();
303
304	/* NOTE: there's an A stepping errata that states if an overflow */
305	/*       bit already exists and another occurs, the previous     */
306	/*	 Fixed in B stepping or later			 	 */
307
308	/* Write the value back to clear the overflow flags. Overflow */
309	/* flags remain in pmnc for use below */
310	write_pmnc(pmnc & ~PMU_ENABLE);
311
312	for (i = CCNT; i <= PMN1; i++) {
313		if (!(pmu->int_mask[i] & pmu->int_enable))
314			continue;
315
316		if (pmnc & pmu->cnt_ovf[i])
317			results[i].ovf++;
318	}
319}
320
321static void inline __xsc2_check_ctrs(void)
322{
323	int i;
324	u32 flag = 0, pmnc = read_pmnc();
325
326	pmnc &= ~PMU_ENABLE;
327	write_pmnc(pmnc);
328
329	/* read overflow flag register */
330	__asm__ __volatile__ ("mrc p14, 0, %0, c5, c1, 0" : "=r" (flag));
331
332	for (i = CCNT; i <= PMN3; i++) {
333		if (!(pmu->int_mask[i] & pmu->int_enable))
334			continue;
335
336		if (flag & pmu->cnt_ovf[i])
337			results[i].ovf++;
338	}
339
340	/* writeback clears overflow bits */
341	__asm__ __volatile__ ("mcr p14, 0, %0, c5, c1, 0" : : "r" (flag));
342}
343
344static irqreturn_t xscale_pmu_interrupt(int irq, void *arg)
345{
346	int i;
347	u32 pmnc;
348
349	if (pmu->id == PMU_XSC1)
350		__xsc1_check_ctrs();
351	else
352		__xsc2_check_ctrs();
353
354	for (i = CCNT; i < MAX_COUNTERS; i++) {
355		if (!results[i].ovf)
356			continue;
357
358		write_counter(i, -(u32)results[i].reset_counter);
359		oprofile_add_sample(get_irq_regs(), i);
360		results[i].ovf--;
361	}
362
363	pmnc = read_pmnc() | PMU_ENABLE;
364	write_pmnc(pmnc);
365
366	return IRQ_HANDLED;
367}
368
369static void xscale_pmu_stop(void)
370{
371	u32 pmnc = read_pmnc();
372
373	pmnc &= ~PMU_ENABLE;
374	write_pmnc(pmnc);
375
376	free_irq(XSCALE_PMU_IRQ, results);
377}
378
379static int xscale_pmu_start(void)
380{
381	int ret;
382	u32 pmnc = read_pmnc();
383
384	ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
385			"XScale PMU", (void *)results);
386
387	if (ret < 0) {
388		printk(KERN_ERR "oprofile: unable to request IRQ%d for XScale PMU\n",
389			XSCALE_PMU_IRQ);
390		return ret;
391	}
392
393	if (pmu->id == PMU_XSC1)
394		pmnc |= pmu->int_enable;
395	else {
396		__asm__ __volatile__ ("mcr p14, 0, %0, c4, c1, 0" : : "r" (pmu->int_enable));
397		pmnc &= ~PMU_CNT64;
398	}
399
400	pmnc |= PMU_ENABLE;
401	write_pmnc(pmnc);
402	pr_debug("xscale_pmu_start: pmnc: %#08x mask: %08x\n", pmnc, pmu->int_enable);
403	return 0;
404}
405
406static int xscale_detect_pmu(void)
407{
408	int ret = 0;
409	u32 id;
410
411	id = (read_cpuid(CPUID_ID) >> 13) & 0x7;
412
413	switch (id) {
414	case 1:
415		pmu = &pmu_parms[PMU_XSC1];
416		break;
417	case 2:
418		pmu = &pmu_parms[PMU_XSC2];
419		break;
420	default:
421		ret = -ENODEV;
422		break;
423	}
424
425	if (!ret) {
426		op_xscale_spec.name = pmu->name;
427		op_xscale_spec.num_counters = pmu->num_counters;
428		pr_debug("xscale_detect_pmu: detected %s PMU\n", pmu->name);
429	}
430
431	return ret;
432}
433
434struct op_arm_model_spec op_xscale_spec = {
435	.init		= xscale_detect_pmu,
436	.setup_ctrs	= xscale_setup_ctrs,
437	.start		= xscale_pmu_start,
438	.stop		= xscale_pmu_stop,
439};
440